#!/usr/bin/perl -w -s
# PODNAME: tmxgrep
# ABSTRACT: grep translation units in a TMX file
## use utf8::all;
use Data::Dumper;
our ($all,$w,$max,$pdf, $o, $n, $a,$debug, $i, $html);
$a //= "p" if $pdf || $html;
my ($ab,$ae); ## emphatise marker
my $browser="";
$browser = "=$html" if ($html and $html ne "1");
if($a){
if($a eq "1"){
$ab="=(";
$ae=")=";
}
elsif($a){
$ab="<$a>";
$ae="$a>";
}
}
if($html){
if($html eq "1"){
$ab="";
$ae="";
}
elsif($html){
$ab="<$html>";
$ae="$html>";
}
}
$max //= 500;
$max = 1000000000 if ($max == 0 or $all);
use strict;
use warnings;
my $p = shift or die("Usage: $0 [options] pattern f.tmx\n");
if ($p =~ m{(\w+):(.+?);(\w+):(.+)}){
if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}, $3 => qr{(?i)\b$4\b}} }
elsif($i) { $p = +{$1=> qr{(?i)$2}, $3 => qr{(?i)$4}} }
elsif($w) { $p = +{$1=> qr{\b$2\b}, $3 => qr{\b$4\b}} }
else { $p = +{$1=> $2, $3 => $4} }
}
elsif($p =~ m{(\w+):(.+)} ){
if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}} }
elsif($i) { $p = +{$1=> qr{(?i)$2}} }
elsif($w) { $p = +{$1=> qr{\b$2\b}} }
else { $p = +{$1=> $2} }
}
else{
if($i and $w){ $p = qr{(?i)\b$p\b} }
elsif($i) { $p = qr{(?i)$p} }
elsif($w) { $p = qr{\b$p\b} }
}
use XML::TMX;
use XML::TMX::Reader;
$o //= "__.tmx" if $pdf || $html;
our ($icons);
my $tmx = shift;
my $tmx_obj = XML::TMX::Reader->new($tmx);
print STDERR Dumper($p) if $main::debug;
$tmx_obj->for_tu(
{ patt => $p,
gen_tu=> $max,
n => $n || 0,
output => $o || undef }, #### "__tmxgrep.tmx"
sub { my($tu,$at) = @_;
print STDERR Dumper($tu,$at,$p) if $main::debug;
# print STDERR ".";
if($main::a ){
for my $li(keys %$tu){
next if $li =~ /^-/;
for(keys %{$tu->{$li}}){
if(ref($p)){
my $pli = $p->{lc($li)} or next;
$tu->{$li}{$_} =~ s!($pli)!$ab$1$ae!g;
}
else{
$tu->{$li}{$_} =~ s!($p)!$ab$1$ae!g;
}
}
}
}
return $tu;
}
);
if($pdf){ system("tmx2pdf-prince -v $o");}
if($html){ system("tmx2html -v$browser $o");}
__END__
=head1 SYNOPSIS
tmxgrep patt file.tmx
options:
-max=300 -- extract up to 300 TU (def=500)
-max=0 -- all matches
-all -- all matches
-a -- annotate matches with '=(...)='
-a=xx -- annotate matches with '...'
-o=out.tmx -- define output file (defaut= STDOUT)
-n -- print original TU number (eg: )
-i -- ignore case
-w -- whole words
-pdf -- output is transformed in a PDF file and show
-html -- output is transformed in a HTML file and show
-html=chromium -- idem and show it with chromium (def:firefox)
Where patt can be:
RegExp
l1:RegExp
l1:RegExp;l2:RegExp
=head1 DESCRIPTION
Creates a TMX file with the translation units that macth the provided
regular expression.
=cut