#!/usr/bin/perl -w -s

# PODNAME: tmxgrep
# ABSTRACT: grep translation units in a TMX file

## use utf8::all;
use Data::Dumper;

our ($all,$w,$max,$pdf, $o, $n, $a,$debug, $i, $html);

$a //= "p" if $pdf || $html;

my ($ab,$ae); ## emphatise marker
my $browser="";
$browser = "=$html" if ($html and $html ne "1");

if($a){
  if($a eq "1"){
    $ab="=(";
    $ae=")=";
  }
  elsif($a){
    $ab="<$a>";
    $ae="</$a>";
  }
}

if($html){
  if($html eq "1"){
    $ab="<b>";
    $ae="</b>";
  }
  elsif($html){
    $ab="<$html>";
    $ae="</$html>";
  }
}

$max //= 500;
$max = 1000000000 if ($max == 0 or $all);

use strict;
use warnings;

my $p = shift or die("Usage: $0 [options] pattern f.tmx\n");
if   ($p =~ m{(\w+):(.+?);(\w+):(.+)}){ 
     if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}, $3 => qr{(?i)\b$4\b}} }
     elsif($i)    { $p = +{$1=> qr{(?i)$2},     $3 => qr{(?i)$4}} }
     elsif($w)    { $p = +{$1=> qr{\b$2\b},     $3 => qr{\b$4\b}} }
     else         { $p = +{$1=> $2,             $3 => $4}  }       
}
elsif($p =~ m{(\w+):(.+)}            ){ 
     if($i and $w){ $p = +{$1=> qr{(?i)\b$2\b}} }
     elsif($i)    { $p = +{$1=> qr{(?i)$2}} }
     elsif($w)    { $p = +{$1=> qr{\b$2\b}} }
     else         { $p = +{$1=> $2}  }       
}
else{
     if($i and $w){ $p = qr{(?i)\b$p\b} }
     elsif($i)    { $p = qr{(?i)$p} }
     elsif($w)    { $p = qr{\b$p\b} }
}

use XML::TMX;
use XML::TMX::Reader;

$o //= "__.tmx" if $pdf || $html;

our ($icons);

my $tmx = shift;
my $tmx_obj = XML::TMX::Reader->new($tmx);

print STDERR  Dumper($p) if $main::debug; 

 $tmx_obj->for_tu( 
    { patt => $p, 
      gen_tu=> $max, 
      n => $n || 0,
      output => $o || undef }, #### "__tmxgrep.tmx" 
    sub { my($tu,$at) = @_;
          print STDERR  Dumper($tu,$at,$p) if $main::debug; 
#         print STDERR  "."; 
          if($main::a ){
            for my $li(keys %$tu){
              next if $li =~ /^-/;
              for(keys %{$tu->{$li}}){
                if(ref($p)){
                  my $pli = $p->{lc($li)} or next;
                  $tu->{$li}{$_} =~ s!($pli)!$ab$1$ae!g;
                }
                else{
                  $tu->{$li}{$_} =~ s!($p)!$ab$1$ae!g;
                }
              }
            }
          }

          return $tu;
    }
 );

if($pdf){ system("tmx2pdf-prince -v  $o");}
if($html){ system("tmx2html -v$browser $o");}


__END__


=head1 SYNOPSIS

 tmxgrep patt file.tmx
 options:
   -max=300    -- extract up to 300 TU (def=500)
   -max=0      -- all matches
   -all        -- all matches
   -a          -- annotate matches with '=(...)='
   -a=xx       -- annotate matches with '<xx>...</xx>'
   -o=out.tmx  -- define output file (defaut= STDOUT)
   -n          -- print original TU number (eg: <tu n="35">)
   -i          -- ignore case
   -w          -- whole words
   -pdf        -- output is transformed in a PDF file and show
   -html       -- output is transformed in a HTML file and show
   -html=chromium  -- idem and show it with chromium (def:firefox)

Where patt can be:

  RegExp
  l1:RegExp
  l1:RegExp;l2:RegExp

=head1 DESCRIPTION

Creates a TMX file with the translation units that macth the provided
regular expression.

=cut