#!/usr/bin/perl -s
use XML::DT ;
use Data::Dumper ;
$Data::Dumper::Terse =1;
$Data::Dumper::Deepcopy =1;
## use encoding "utf8";
use strict;
use utf8;
use File::Copy;
our ($r,$tex,$l,$s,$html,$img,$dici,$o,$abs,$diciold,$langtag,$t);
$o ||= "_output";
$dici //= 1 unless($tex or $html or $abs or $diciold or $t);
if($img == "1"){ $img = "IMG"; mkdir("IMG")}
elsif($img) { mkdir($img) }
my $langs;
$l ||= "PT";
$s ||= "";
use POSIX qw(locale_h);
setlocale(&POSIX::LC_ALL, "pt_PT");
use locale;
m2html({output=>$o}, @ARGV) if $html;
m2term({output=>$o}, @ARGV) if $diciold ;
m2tex ({output=>$o, baselang=> $l, style => $s},@ARGV) if $tex;
print Dumper(m2abs ({style => $s},@ARGV)) if $abs;
m2dici2({output=>$o}, @ARGV) if $dici ;
m2r({output=>$o}, @ARGV) if $r ;
if ($t==1){ dici2abs({output=>$o}, @ARGV) }
if ($t==2){ my ($T1,$nc,$langs,$ind)=dici2abs({output=>$o}, @ARGV);
abs2dici($T1,$nc,$langs,$ind);
}
if ($t==3){ my ($T1,$nc,$langs,$ind)=dici2abs({output=>$o}, @ARGV);
abs2tex({output=>$o,img=>$img,baselang=>$l},$T1,$nc,$langs,$ind);
}
sub m2r{
my %opt =(output => "_output");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
#my %subrel=();
my %rel=();
my %lang=();
my %tran=();
my $con=0;
my $lin=0;
for my $filename (@A){
my %handler=(
'-default' => sub{""},
# -type => { transacGrp => "ZERO", },
'concept' => sub{ },
'conceptGrp' => sub{ $con++; },
'descrip' => sub{
if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; }
else{ $rel{$v{type}}{L}++; }},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ },
'mtf' => sub{ },
'system' => sub{ },
'term' => sub{ $lang{gfather->{lang}}++ },
'termGrp' => sub{ },
'transac' => sub{ $tran{$c}++ },
'xref' => sub{ $lin++ },
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print "concepts: $con\n";
print "links: $lin\n";
for (sort ( keys (%lang), )){
print "lang: $_ $lang{$_}\n";
}
for (sort ( keys (%rel), )){
print "rel:", lunaccent($_), "($rel{$_}{G}+$rel{$_}{L}) '$_'\n";
}
for (sort ( keys (%tran), )){
print "user: $_ $tran{$_}\n";
}
}
return($con,\%rel,$lin)
}
sub m2html{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
#my %subrel=();
my %rel=();
my $con=0;
open(F,">:utf8","$opt{output}.html") or die;
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
'concept' => sub{ },
'conceptGrp' => sub{ $con++; print F "
\n"; },
'descrip' => sub{
if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
print F "$v{type} = ".n($c)."
\n"}
else{ $rel{$v{type}}{L}++;
push(@{gfather->{_att}}, "+$v{type} = ".n($c)."
\n")}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ print F join("",@{$v{_att}||[]})},
'mtf' => sub{ },
'system' => sub{ },
'term' => sub{
print F "",gfather->{lang}, " = $c
\n" },
'termGrp' => sub{ print F join("",@{$v{_att}||[]})},
'xref' => sub{ "$c ($v{Tlink})"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print F "## concepts - $con
\n";
for (sort ( keys (%rel), )){
print F "## $_ - ($rel{$_}{G}+$rel{$_}{L})
\n";
}
}
close F;
}
sub m2term{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
#my %subrel=();
my %rel=();
my $con=0;
open(F,">:utf8","$opt{output}.dici") or die;
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
'concept' => sub{ },
'conceptGrp' => sub{ $con++;
print F "\n#--------------------------------------------------------------($con)---\n"; },
'descrip' => sub{
if( imagem($v{type},$c) ){
my $f;
if($img){$f = $img}
else {$f="media=$filename"; $f =~ s/\.xml$//i; }
print F "!",n($v{type})," = ", n("$c"), "\n"}
elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
print F "$v{type} = ".n($c)."\n"}
else{ $rel{$v{type}}{L}++;
push(@{gfather->{_att}}, "+$v{type} = ".n($c)."\n")}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ print F join("",@{$v{_att}||[]})},
'mtf' => sub{ }, # 1 occurrences;
'system' => sub{ },
'term' => sub{
print F gfather->{lang}, " = $c\n" },
'termGrp' => sub{ print F join("",@{$v{_att}||[]})},
'xref' => sub{ "$c ($v{Tlink})"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print F "## concepts - $con\n";
for (sort ( keys (%rel), )){
print F "## $_ - ($rel{$_}{G}+$rel{$_}{L})\n";
}
}
close F;
}
sub m2abs{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
my %rel=();
my $langs={};
my $con=0; ## Concept number
my $T;
my $Ind;
my $inf;
my @term;
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
-pcdata => sub{ $c =~ s/\s*$//; $c =~ s/([\n\r]+)/\n\t/g; $c},
'conceptGrp' => sub{
$con++;
$T->{$con} = $inf;
for (@term){ push(@{$Ind->{$_}}, $con); }
@term=(); $inf=[];},
## con
## $T : con -> inf
## $Ind : term -> con*
## ciclo @term = term*
## $inf = p*
## p : !id -> v
## | id -> v
'descrip' => sub{
$v{type} = lunaccent($v{type});
if( imagem($v{type},$c) ){ ## imagem
push(@$inf, ["!$v{type}",lunaccent($c)]); }
elsif( ctxt(2) eq "conceptGrp"){ ## at-con
$rel{$v{type}}{G}++;
push(@$inf,[$v{type},$c])}
elsif( ctxt(2) eq "termGrp"){ ## at-con
$rel{$v{type}}{L}++;
push(@{gfather->{_att}}, [$v{type},$c])}
elsif( ctxt(2) eq "languageGrp"){ ## at-con
$rel{$v{type}}{G}++;
push(@$inf,[gfather->{lang}."-$v{type}",$c])}
else{ ## sub-at
$rel{$v{type}}{L}++;
push(@$inf, ["??(".ctxt(2).")$v{type}","'$c'"])}
},
'language' => sub{ father->{lang}= ($langtag?"=":"").$v{lang};
$langs->{$v{lang}}++},
'languageGrp' => sub{
for my $at(@{$v{_att}}){
$at->[0]="$v{lang}+$at->[0]";
push(@$inf, $at) } },
'term' => sub{
push(@term, gfather->{lang}.":$c");
# push(@$inf ,[term => gfather->{lang}.":$c"]);
push(@$inf ,gfather->{lang}.":$c");} ,
'termGrp' => sub{
my $aux = pop(@$inf);
if(ref($aux)){
my $aux2 = pop(@$inf);
push(@$inf, $aux, {$aux2 => $v{_att} || []});
}
else { push(@$inf, {$aux =>$v{_att} || []}) }
},
'xref' => sub{ "$c ($v{Tlink})"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
# print Dumper($T,$Ind);
($T,$con,$langs,$Ind);
}
sub dici2abs{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
my %rel=();
my $langs;
local $/="";
my $con=0; ## Concept number
my $T;
my $Ind;
my $inf=[];
my @term;
binmode(STDOUT,":utf8");
for my $filename (@A){
open(F,$filename) or die;
while(){
$inf=[];
chomp;
s/\n#.*//g;
s/^#.*\n?//g;
while(s/%enc(oding)?\s*(\S+).*//){ binmode(F,":$2"); }
while(s/%lang(uage)?\s*(.*)//){ my $x=$2; for($x =~ m/(\w+)/g){ $langs->{$_} ++ }}
next unless /\w/;
$con++;
while( /(?:^|\n)(\S+)\s*=?\s*((?:.|\n[ \t])*)/g){
my ($a1,$b1) = ($1,$2);
if ($langs->{$a1}){
push(@{$Ind->{"$a1:$b1"}},$con);
push(@$inf, {"$a1:$b1"=>[]}) }
elsif($a1 =~ /^\+/) {
$a1=~ s/^\+//;
my $aux = pop(@$inf);
my ($a2,$b2) = each(%$aux);
push(@$b2, [$a1,$b1]) ;
push(@$inf, {$a2=>$b2}) }
elsif($a1 =~ /^\!/) { push(@$inf, [$a1,$b1]) }
else { push(@$inf, [$a1,$b1]) }
}
$T->{$con} = [@$inf];
}
}
($T,$con,$langs,$Ind)
}
sub m2dici2{
my ($T,$con,$langs,$Ind)=m2abs(@_);
abs2dici($T,$con,$langs,$Ind);
}
sub abs2dici{
my ($T,$con,$langs,$Ind)=@_;
binmode(STDOUT,":utf8");
print "%lang ", join(" ",keys %$langs),"\n%encoding utf8\n\n";
print "%img $img\n" if $img;
for(1..$con){
print "\n#--------------------------------------------------------------($_)---\n";
for my $co(@{$T->{$_}}){
if(ref($co) eq "ARRAY"){ print "$co->[0] = $co->[1]\n"; }
elsif(ref($co) eq "HASH"){
my ($a1,$a2)=(%$co);
$a1 =~ s/:/ = /;
print "$a1\n";
for(@$a2){print "+$_->[0] = $_->[1]\n"; }
}
else {print "??{(".Dumper($co).")}" }
}
}
}
sub abs2tex{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my ($T,$con,$langs,$Ind)=@_;
binmode(STDOUT,":utf8");
my @A=@_;
#my %subrel=();
my %rel=();
my $inf;
my @term;
my $foto;
my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]};
my $nl = "\n\\\\\n";
# -pcdata => sub{ $c =~ s/\s*$//; $c},
# 'conceptGrp' => sub{
# $con++;
# for (@term){ $T->{$_} = $inf . $foto; }
# @term=(); $inf="";$foto ="";},
# 'descrip' => sub{
# if( imagem($v{type},$c) ){
# my $f;
#
# if($img){$f = $img}
# else {$f="media=$filename"; $f =~ s/\.xml$//i; }
#
# $foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" }
#
# elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
# $inf .= "$nl\\emph{$v{type}}: ".n($c)}
# else{ $rel{$v{type}}{L}++;
# push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}},
# 'language' => sub{ father->{lang}=$v{lang} },
# 'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})},
# 'term' => sub{
# if (gfather->{lang} eq $opt{baselang}){
# push(@term, $c);
# $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c"; ##duplications
# }
# else { $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c" ;}
# },
# 'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})},
# );
# dt($filename,%handler);
# }
open(F,">:utf8","$opt{output}.tex") or die;
if($opt{style} eq "agenda"){
print F q{
\documentclass[portuges,a4paper,twoside]{article}
\usepackage{agbook}
}
}
else {
print F q{
\documentclass[portuges,a4paper,twocolumn]{book}
\RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry}
\parindent 0pt
\parskip 3pt
}
}
print F q{
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{dict}
\usepackage{graphicx}
\usepackage{url}
\begin{document}
\begin{dictionary}
};
my $last="";
for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$Ind)){
next unless $t =~ /^$opt{baselang}:/;
my $k=$t;
$t =~ s/.*?://;
my $fl = uunaccent(substr($t,0,1));
if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl }
print F "\n\\term{",ppttex($t),"}{\\",
absentry2tex({%opt},$T->{$Ind->{$k}[0]}),
"}\n";
}
print F "\\end{dictionary}\n\\end{document}";
close F
}
sub open_term{
my $t = shift;;
if(ref($t) eq "HASH"){
my ($k,$v) = each(%$t) ;
if($k =~ /(\w+?):(.*)/) { return ("term",$1,$2,$v) }
else { die "Wrong Key:term format\n". Dumper($t) }
}
if(ref($t) eq "ARRAY"){
if($t->[0] =~ /^!(.*)/) { return ("img",$1,$t->[1]); }
if($t->[0] =~ /(.*?)\+(.*)/){ return ("attrlang",$1,$2,$t->[1]); }
else { return ("attr", $t->[0],$t->[1]) }
}
else { return ("str",$t) }
}
sub absentry2tex {
my %opt =(type => "tex");
if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
my $nl = "\n\\\\\n";
my $a=shift;
my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]};
my $r="";
return "" unless @$a;
for(@$a){
my @b=open_term($_);
if($b[0] eq "img") {$r .= "\\begin{center}$fotoopt\{$opt{img}/$b[2]\}\n\\end{center}$nl"}
if($b[0] eq "term") {$r .= "\\textbf{\\sc $b[1]}: $b[2]$nl"; }
if($b[0] eq "attr") {$r .= "\\emph{$b[1]}: $b[2]$nl"; }
if($b[0] eq "attrlan") {$r .= "\\emph{$b[2]}({\\scriptsize $b[1]}):$b[3]$nl"; }
}
return $r;
}
sub m2tex{
my %opt =(output => "_output",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
#my %subrel=();
my %rel=();
my $con=0;
my $T;
my $inf;
my @term;
my $foto;
my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]};
my $nl = "\n\\\\\n";
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
-pcdata => sub{ $c =~ s/\s*$//; $c},
'concept' => sub{ },
'conceptGrp' => sub{
$con++;
for (@term){ $T->{$_} = $inf . $foto; }
@term=(); $inf="";$foto ="";},
'descrip' => sub{
if( imagem($v{type},$c) ){
my $f;
if($img){$f = $img}
else {$f="media=$filename"; $f =~ s/\.xml$//i; }
$foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" }
elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
$inf .= "$nl\\emph{$v{type}}: ".n($c)}
else{ $rel{$v{type}}{L}++;
push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})},
'mtf' => sub{ },
'system' => sub{ },
'term' => sub{
if (gfather->{lang} eq $opt{baselang}){
push(@term, $c);
$inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c"; ##duplications
}
else{$inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . "$c";}
},
'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})},
'xref' => sub{ "$c (→$v{Tlink})"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
binmode(STDOUT,":utf8");
if($r){
print "concepts - $con\n";
for (sort ( keys (%rel), )){
print " $_ - ($rel{$_}{G}+$rel{$_}{L})\n";
}
}
open(F,">:utf8","$opt{output}.tex") or die;
if($opt{style} eq "agenda"){
print F q{
\documentclass[portuges,a4paper,twoside]{article}
\usepackage{agbook}
}
}
else {
print F q{
\documentclass[portuges,a4paper,twocolumn]{book}
\RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry}
\parindent 0pt
\parskip 3pt
}
}
print F q{
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{dict}
\usepackage{graphicx}
\usepackage{url}
\begin{document}
};
print F "\\begin{dictionary}\n";
my $last="";
for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$T)){
my $fl = uunaccent(substr($t,0,1));
if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl }
print F "\n\\term{",ppttex($t),"}{",ppttex($T->{$t}),"}\n";
# for my $r (keys %{$T->{$t}}){
# print F "\\\\\\textbf{",ppttex($r),"} ";
# for my $t2 (sort( keys %{$T->{$t}{$r}})){
# print F ppttex($t2),", ";
# }
# }
# print F "}\n";
}
print F "\\end{dictionary}\n\\end{document}";
close F
}
sub n{ my $a=shift;
$a =~ s/([\n\r]+)/\n\t/g;
$a
}
sub lunaccent{ my $b = shift; $b =~ s/[ ()\[\]]/_/g; lc(unaccent($b))}
sub uunaccent{ unaccent(uc($_[0]))}
sub unaccent{ my $a=shift;
$a =~ y/áéíóúàèìòùâêîôûÁÉÍÓÚÂÊÎÔÛÈãõç/aeiouaeiouaeiouAEIOUAEIOUEaoc/; $a; }
sub ppttex{ my $a=shift;
$a =~ s/(http:[^\\ ]*|www\.[^\\ ]*)/\\url{$1}/g;
$a =~ s/([_\$\%\#\&])/\\$1/g;
$a =~ s/media=/media_/g;
$a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{
sprintf('includegraphics[%s]{%s}',$2,putunder($3))}ge;
$a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{
sprintf('%sincludegraphics[%s]{%s}',((-f $3)?"":"# Image missing\n%%"),$2,$3)}ge;
$a;
}
sub putunder{ ## prepare images and image names to LaTeX
my $b=shift;
$b =~ s/\\_/_/g;
my $c=$b;
$b =~ s/[ ()\[\]]/-/g;
$b = unaccent($b);
if($b ne $c){copy($c,$b);}
$c=$b;
if($b =~ s/\.(gif|bmp)$/.png/i){system("convert '$c' '$b'") ;}
$b;
}
sub imagem{ ## is-a imagem
my ($n,$v)=@_;
return 0 if $v =~ m{http://};
$v =~ m/\.(png|gif|jpe?g|bmp)$/i;
}
__END__
=head1 NAME
term2any - Show the contents of a multiterm lexical db file
=head1 SYNOPSIS
term2any [-r] file.xml+
=head1 DESCRIPTION
C command is used to show the contents of a multiterm lexical db file,
or to generate a LaTeX dictionary.
If a field has image filename as value (filename with extension png, gif, or jpe?g),
the image is includes in the LaTeX dictionary.
=head2 Options
-html writes a very simple HTML output
-r (also) print the set of fields found and their number of occ.
Latex oriented options:
-tex to generate LaTeX
-s=agenda style = agenda: to print a LaTeX smallbook dictionary
-l=EN to chose the baselanguage = EN
-img[=DIR] define image directory (def: IMG)
-dici outpur is in dici format
-abs
-dici2
-langtag
-t
-o ## Fix me: defaulte _output...
=head1 AUTHOR
J.Joao Almeida, jj@di.uminho.pt
=head1 SEE ALSO
perl(1).
=cut