#!/usr/bin/perl -s use XML::DT ; use Data::Dumper ; $Data::Dumper::Terse =1; $Data::Dumper::Deepcopy =1; ## use encoding "utf8"; use strict; use utf8; use File::Copy; our ($r,$tex,$l,$s,$html,$img,$dici,$o,$abs,$diciold,$langtag,$t); $o ||= "_output"; $dici //= 1 unless($tex or $html or $abs or $diciold or $t); if($img == "1"){ $img = "IMG"; mkdir("IMG")} elsif($img) { mkdir($img) } my $langs; $l ||= "PT"; $s ||= ""; use POSIX qw(locale_h); setlocale(&POSIX::LC_ALL, "pt_PT"); use locale; m2html({output=>$o}, @ARGV) if $html; m2term({output=>$o}, @ARGV) if $diciold ; m2tex ({output=>$o, baselang=> $l, style => $s},@ARGV) if $tex; print Dumper(m2abs ({style => $s},@ARGV)) if $abs; m2dici2({output=>$o}, @ARGV) if $dici ; m2r({output=>$o}, @ARGV) if $r ; if ($t==1){ dici2abs({output=>$o}, @ARGV) } if ($t==2){ my ($T1,$nc,$langs,$ind)=dici2abs({output=>$o}, @ARGV); abs2dici($T1,$nc,$langs,$ind); } if ($t==3){ my ($T1,$nc,$langs,$ind)=dici2abs({output=>$o}, @ARGV); abs2tex({output=>$o,img=>$img,baselang=>$l},$T1,$nc,$langs,$ind); } sub m2r{ my %opt =(output => "_output"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; #my %subrel=(); my %rel=(); my %lang=(); my %tran=(); my $con=0; my $lin=0; for my $filename (@A){ my %handler=( '-default' => sub{""}, # -type => { transacGrp => "ZERO", }, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; }, 'descrip' => sub{ if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; } else{ $rel{$v{type}}{L}++; }}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ }, 'mtf' => sub{ }, 'system' => sub{ }, 'term' => sub{ $lang{gfather->{lang}}++ }, 'termGrp' => sub{ }, 'transac' => sub{ $tran{$c}++ }, 'xref' => sub{ $lin++ }, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print "concepts: $con\n"; print "links: $lin\n"; for (sort ( keys (%lang), )){ print "lang: $_ $lang{$_}\n"; } for (sort ( keys (%rel), )){ print "rel:", lunaccent($_), "($rel{$_}{G}+$rel{$_}{L}) '$_'\n"; } for (sort ( keys (%tran), )){ print "user: $_ $tran{$_}\n"; } } return($con,\%rel,$lin) } sub m2html{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; #my %subrel=(); my %rel=(); my $con=0; open(F,">:utf8","$opt{output}.html") or die; for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; print F "
\n"; }, 'descrip' => sub{ if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; print F "$v{type} = ".n($c)."
\n"} else{ $rel{$v{type}}{L}++; push(@{gfather->{_att}}, "+$v{type} = ".n($c)."
\n")}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ print F join("",@{$v{_att}||[]})}, 'mtf' => sub{ }, 'system' => sub{ }, 'term' => sub{ print F "",gfather->{lang}, " = $c
\n" }, 'termGrp' => sub{ print F join("",@{$v{_att}||[]})}, 'xref' => sub{ "$c ($v{Tlink})"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print F "

## concepts - $con

\n"; for (sort ( keys (%rel), )){ print F "## $_ - ($rel{$_}{G}+$rel{$_}{L})
\n"; } } close F; } sub m2term{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; #my %subrel=(); my %rel=(); my $con=0; open(F,">:utf8","$opt{output}.dici") or die; for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; print F "\n#--------------------------------------------------------------($con)---\n"; }, 'descrip' => sub{ if( imagem($v{type},$c) ){ my $f; if($img){$f = $img} else {$f="media=$filename"; $f =~ s/\.xml$//i; } print F "!",n($v{type})," = ", n("$c"), "\n"} elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; print F "$v{type} = ".n($c)."\n"} else{ $rel{$v{type}}{L}++; push(@{gfather->{_att}}, "+$v{type} = ".n($c)."\n")}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ print F join("",@{$v{_att}||[]})}, 'mtf' => sub{ }, # 1 occurrences; 'system' => sub{ }, 'term' => sub{ print F gfather->{lang}, " = $c\n" }, 'termGrp' => sub{ print F join("",@{$v{_att}||[]})}, 'xref' => sub{ "$c ($v{Tlink})"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print F "## concepts - $con\n"; for (sort ( keys (%rel), )){ print F "## $_ - ($rel{$_}{G}+$rel{$_}{L})\n"; } } close F; } sub m2abs{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; my %rel=(); my $langs={}; my $con=0; ## Concept number my $T; my $Ind; my $inf; my @term; for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, -pcdata => sub{ $c =~ s/\s*$//; $c =~ s/([\n\r]+)/\n\t/g; $c}, 'conceptGrp' => sub{ $con++; $T->{$con} = $inf; for (@term){ push(@{$Ind->{$_}}, $con); } @term=(); $inf=[];}, ## con ## $T : con -> inf ## $Ind : term -> con* ## ciclo @term = term* ## $inf = p* ## p : !id -> v ## | id -> v 'descrip' => sub{ $v{type} = lunaccent($v{type}); if( imagem($v{type},$c) ){ ## imagem push(@$inf, ["!$v{type}",lunaccent($c)]); } elsif( ctxt(2) eq "conceptGrp"){ ## at-con $rel{$v{type}}{G}++; push(@$inf,[$v{type},$c])} elsif( ctxt(2) eq "termGrp"){ ## at-con $rel{$v{type}}{L}++; push(@{gfather->{_att}}, [$v{type},$c])} elsif( ctxt(2) eq "languageGrp"){ ## at-con $rel{$v{type}}{G}++; push(@$inf,[gfather->{lang}."-$v{type}",$c])} else{ ## sub-at $rel{$v{type}}{L}++; push(@$inf, ["??(".ctxt(2).")$v{type}","'$c'"])} }, 'language' => sub{ father->{lang}= ($langtag?"=":"").$v{lang}; $langs->{$v{lang}}++}, 'languageGrp' => sub{ for my $at(@{$v{_att}}){ $at->[0]="$v{lang}+$at->[0]"; push(@$inf, $at) } }, 'term' => sub{ push(@term, gfather->{lang}.":$c"); # push(@$inf ,[term => gfather->{lang}.":$c"]); push(@$inf ,gfather->{lang}.":$c");} , 'termGrp' => sub{ my $aux = pop(@$inf); if(ref($aux)){ my $aux2 = pop(@$inf); push(@$inf, $aux, {$aux2 => $v{_att} || []}); } else { push(@$inf, {$aux =>$v{_att} || []}) } }, 'xref' => sub{ "$c ($v{Tlink})"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } # print Dumper($T,$Ind); ($T,$con,$langs,$Ind); } sub dici2abs{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; my %rel=(); my $langs; local $/=""; my $con=0; ## Concept number my $T; my $Ind; my $inf=[]; my @term; binmode(STDOUT,":utf8"); for my $filename (@A){ open(F,$filename) or die; while(){ $inf=[]; chomp; s/\n#.*//g; s/^#.*\n?//g; while(s/%enc(oding)?\s*(\S+).*//){ binmode(F,":$2"); } while(s/%lang(uage)?\s*(.*)//){ my $x=$2; for($x =~ m/(\w+)/g){ $langs->{$_} ++ }} next unless /\w/; $con++; while( /(?:^|\n)(\S+)\s*=?\s*((?:.|\n[ \t])*)/g){ my ($a1,$b1) = ($1,$2); if ($langs->{$a1}){ push(@{$Ind->{"$a1:$b1"}},$con); push(@$inf, {"$a1:$b1"=>[]}) } elsif($a1 =~ /^\+/) { $a1=~ s/^\+//; my $aux = pop(@$inf); my ($a2,$b2) = each(%$aux); push(@$b2, [$a1,$b1]) ; push(@$inf, {$a2=>$b2}) } elsif($a1 =~ /^\!/) { push(@$inf, [$a1,$b1]) } else { push(@$inf, [$a1,$b1]) } } $T->{$con} = [@$inf]; } } ($T,$con,$langs,$Ind) } sub m2dici2{ my ($T,$con,$langs,$Ind)=m2abs(@_); abs2dici($T,$con,$langs,$Ind); } sub abs2dici{ my ($T,$con,$langs,$Ind)=@_; binmode(STDOUT,":utf8"); print "%lang ", join(" ",keys %$langs),"\n%encoding utf8\n\n"; print "%img $img\n" if $img; for(1..$con){ print "\n#--------------------------------------------------------------($_)---\n"; for my $co(@{$T->{$_}}){ if(ref($co) eq "ARRAY"){ print "$co->[0] = $co->[1]\n"; } elsif(ref($co) eq "HASH"){ my ($a1,$a2)=(%$co); $a1 =~ s/:/ = /; print "$a1\n"; for(@$a2){print "+$_->[0] = $_->[1]\n"; } } else {print "??{(".Dumper($co).")}" } } } } sub abs2tex{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my ($T,$con,$langs,$Ind)=@_; binmode(STDOUT,":utf8"); my @A=@_; #my %subrel=(); my %rel=(); my $inf; my @term; my $foto; my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]}; my $nl = "\n\\\\\n"; # -pcdata => sub{ $c =~ s/\s*$//; $c}, # 'conceptGrp' => sub{ # $con++; # for (@term){ $T->{$_} = $inf . $foto; } # @term=(); $inf="";$foto ="";}, # 'descrip' => sub{ # if( imagem($v{type},$c) ){ # my $f; # # if($img){$f = $img} # else {$f="media=$filename"; $f =~ s/\.xml$//i; } # # $foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" } # # elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; # $inf .= "$nl\\emph{$v{type}}: ".n($c)} # else{ $rel{$v{type}}{L}++; # push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}}, # 'language' => sub{ father->{lang}=$v{lang} }, # 'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})}, # 'term' => sub{ # if (gfather->{lang} eq $opt{baselang}){ # push(@term, $c); # $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c"; ##duplications # } # else { $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c" ;} # }, # 'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})}, # ); # dt($filename,%handler); # } open(F,">:utf8","$opt{output}.tex") or die; if($opt{style} eq "agenda"){ print F q{ \documentclass[portuges,a4paper,twoside]{article} \usepackage{agbook} } } else { print F q{ \documentclass[portuges,a4paper,twocolumn]{book} \RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry} \parindent 0pt \parskip 3pt } } print F q{ \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage[T1]{fontenc} \usepackage{babel} \usepackage{dict} \usepackage{graphicx} \usepackage{url} \begin{document} \begin{dictionary} }; my $last=""; for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$Ind)){ next unless $t =~ /^$opt{baselang}:/; my $k=$t; $t =~ s/.*?://; my $fl = uunaccent(substr($t,0,1)); if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl } print F "\n\\term{",ppttex($t),"}{\\", absentry2tex({%opt},$T->{$Ind->{$k}[0]}), "}\n"; } print F "\\end{dictionary}\n\\end{document}"; close F } sub open_term{ my $t = shift;; if(ref($t) eq "HASH"){ my ($k,$v) = each(%$t) ; if($k =~ /(\w+?):(.*)/) { return ("term",$1,$2,$v) } else { die "Wrong Key:term format\n". Dumper($t) } } if(ref($t) eq "ARRAY"){ if($t->[0] =~ /^!(.*)/) { return ("img",$1,$t->[1]); } if($t->[0] =~ /(.*?)\+(.*)/){ return ("attrlang",$1,$2,$t->[1]); } else { return ("attr", $t->[0],$t->[1]) } } else { return ("str",$t) } } sub absentry2tex { my %opt =(type => "tex"); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $nl = "\n\\\\\n"; my $a=shift; my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]}; my $r=""; return "" unless @$a; for(@$a){ my @b=open_term($_); if($b[0] eq "img") {$r .= "\\begin{center}$fotoopt\{$opt{img}/$b[2]\}\n\\end{center}$nl"} if($b[0] eq "term") {$r .= "\\textbf{\\sc $b[1]}: $b[2]$nl"; } if($b[0] eq "attr") {$r .= "\\emph{$b[1]}: $b[2]$nl"; } if($b[0] eq "attrlan") {$r .= "\\emph{$b[2]}({\\scriptsize $b[1]}):$b[3]$nl"; } } return $r; } sub m2tex{ my %opt =(output => "_output",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; #my %subrel=(); my %rel=(); my $con=0; my $T; my $inf; my @term; my $foto; my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]}; my $nl = "\n\\\\\n"; for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, -pcdata => sub{ $c =~ s/\s*$//; $c}, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; for (@term){ $T->{$_} = $inf . $foto; } @term=(); $inf="";$foto ="";}, 'descrip' => sub{ if( imagem($v{type},$c) ){ my $f; if($img){$f = $img} else {$f="media=$filename"; $f =~ s/\.xml$//i; } $foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" } elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; $inf .= "$nl\\emph{$v{type}}: ".n($c)} else{ $rel{$v{type}}{L}++; push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})}, 'mtf' => sub{ }, 'system' => sub{ }, 'term' => sub{ if (gfather->{lang} eq $opt{baselang}){ push(@term, $c); $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c"; ##duplications } else{$inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c";} }, 'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})}, 'xref' => sub{ "$c (→$v{Tlink})"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } binmode(STDOUT,":utf8"); if($r){ print "concepts - $con\n"; for (sort ( keys (%rel), )){ print " $_ - ($rel{$_}{G}+$rel{$_}{L})\n"; } } open(F,">:utf8","$opt{output}.tex") or die; if($opt{style} eq "agenda"){ print F q{ \documentclass[portuges,a4paper,twoside]{article} \usepackage{agbook} } } else { print F q{ \documentclass[portuges,a4paper,twocolumn]{book} \RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry} \parindent 0pt \parskip 3pt } } print F q{ \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage[T1]{fontenc} \usepackage{babel} \usepackage{dict} \usepackage{graphicx} \usepackage{url} \begin{document} }; print F "\\begin{dictionary}\n"; my $last=""; for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$T)){ my $fl = uunaccent(substr($t,0,1)); if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl } print F "\n\\term{",ppttex($t),"}{",ppttex($T->{$t}),"}\n"; # for my $r (keys %{$T->{$t}}){ # print F "\\\\\\textbf{",ppttex($r),"} "; # for my $t2 (sort( keys %{$T->{$t}{$r}})){ # print F ppttex($t2),", "; # } # } # print F "}\n"; } print F "\\end{dictionary}\n\\end{document}"; close F } sub n{ my $a=shift; $a =~ s/([\n\r]+)/\n\t/g; $a } sub lunaccent{ my $b = shift; $b =~ s/[ ()\[\]]/_/g; lc(unaccent($b))} sub uunaccent{ unaccent(uc($_[0]))} sub unaccent{ my $a=shift; $a =~ y/áéíóúàèìòùâêîôûÁÉÍÓÚÂÊÎÔÛÈãõç/aeiouaeiouaeiouAEIOUAEIOUEaoc/; $a; } sub ppttex{ my $a=shift; $a =~ s/(http:[^\\ ]*|www\.[^\\ ]*)/\\url{$1}/g; $a =~ s/([_\$\%\#\&])/\\$1/g; $a =~ s/media=/media_/g; $a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{ sprintf('includegraphics[%s]{%s}',$2,putunder($3))}ge; $a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{ sprintf('%sincludegraphics[%s]{%s}',((-f $3)?"":"# Image missing\n%%"),$2,$3)}ge; $a; } sub putunder{ ## prepare images and image names to LaTeX my $b=shift; $b =~ s/\\_/_/g; my $c=$b; $b =~ s/[ ()\[\]]/-/g; $b = unaccent($b); if($b ne $c){copy($c,$b);} $c=$b; if($b =~ s/\.(gif|bmp)$/.png/i){system("convert '$c' '$b'") ;} $b; } sub imagem{ ## is-a imagem my ($n,$v)=@_; return 0 if $v =~ m{http://}; $v =~ m/\.(png|gif|jpe?g|bmp)$/i; } __END__ =head1 NAME term2any - Show the contents of a multiterm lexical db file =head1 SYNOPSIS term2any [-r] file.xml+ =head1 DESCRIPTION C command is used to show the contents of a multiterm lexical db file, or to generate a LaTeX dictionary. If a field has image filename as value (filename with extension png, gif, or jpe?g), the image is includes in the LaTeX dictionary. =head2 Options -html writes a very simple HTML output -r (also) print the set of fields found and their number of occ. Latex oriented options: -tex to generate LaTeX -s=agenda style = agenda: to print a LaTeX smallbook dictionary -l=EN to chose the baselanguage = EN -img[=DIR] define image directory (def: IMG) -dici outpur is in dici format -abs -dici2 -langtag -t -o ## Fix me: defaulte _output... =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut