#!/usr/bin/perl -w -s # PODNAME: tmxgrep # ABSTRACT: grep translation units in a TMX file ## use utf8::all; use Data::Dumper; our ($all,$w,$max,$pdf, $o, $n, $a,$debug, $i, $html); $a //= "p" if $pdf || $html; my ($ab,$ae); ## emphatise marker my $browser=""; $browser = "=$html" if ($html and $html ne "1"); if($a){ if($a eq "1"){ $ab="=("; $ae=")="; } elsif($a){ $ab="<$a>"; $ae=""; } } if($html){ if($html eq "1"){ $ab=""; $ae=""; } elsif($html){ $ab="<$html>"; $ae=""; } } $max //= 500; $max = 1000000000 if ($max == 0 or $all); use strict; use warnings; my $p = shift or die("Usage: $0 [options] pattern f.tmx\n"); if ($p =~ m{(\w+):(.+?);(\w+):(.+)}){ if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}, $3 => qr{(?i)\b$4\b}} } elsif($i) { $p = +{$1=> qr{(?i)$2}, $3 => qr{(?i)$4}} } elsif($w) { $p = +{$1=> qr{\b$2\b}, $3 => qr{\b$4\b}} } else { $p = +{$1=> $2, $3 => $4} } } elsif($p =~ m{(\w+):(.+)} ){ if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}} } elsif($i) { $p = +{$1=> qr{(?i)$2}} } elsif($w) { $p = +{$1=> qr{\b$2\b}} } else { $p = +{$1=> $2} } } else{ if($i and $w){ $p = qr{(?i)\b$p\b} } elsif($i) { $p = qr{(?i)$p} } elsif($w) { $p = qr{\b$p\b} } } use XML::TMX; use XML::TMX::Reader; $o //= "__.tmx" if $pdf || $html; our ($icons); my $tmx = shift; my $tmx_obj = XML::TMX::Reader->new($tmx); print STDERR Dumper($p) if $main::debug; $tmx_obj->for_tu( { patt => $p, gen_tu=> $max, n => $n || 0, output => $o || undef }, #### "__tmxgrep.tmx" sub { my($tu,$at) = @_; print STDERR Dumper($tu,$at,$p) if $main::debug; # print STDERR "."; if($main::a ){ for my $li(keys %$tu){ next if $li =~ /^-/; for(keys %{$tu->{$li}}){ if(ref($p)){ my $pli = $p->{lc($li)} or next; $tu->{$li}{$_} =~ s!($pli)!$ab$1$ae!g; } else{ $tu->{$li}{$_} =~ s!($p)!$ab$1$ae!g; } } } } return $tu; } ); if($pdf){ system("tmx2pdf-prince -v $o");} if($html){ system("tmx2html -v$browser $o");} __END__ =head1 SYNOPSIS tmxgrep patt file.tmx options: -max=300 -- extract up to 300 TU (def=500) -max=0 -- all matches -all -- all matches -a -- annotate matches with '=(...)=' -a=xx -- annotate matches with '...' -o=out.tmx -- define output file (defaut= STDOUT) -n -- print original TU number (eg: ) -i -- ignore case -w -- whole words -pdf -- output is transformed in a PDF file and show -html -- output is transformed in a HTML file and show -html=chromium -- idem and show it with chromium (def:firefox) Where patt can be: RegExp l1:RegExp l1:RegExp;l2:RegExp =head1 DESCRIPTION Creates a TMX file with the translation units that macth the provided regular expression. =cut