#!/usr/bin/perl -s
use XML::DT ;
use Data::Dumper ;
## use encoding "utf8";
use strict;
use utf8;
use File::Copy;
our ($r,$tex,$l,$s,$html,$img);
my %velhas=();
if($img == "1"){ $img = "IMG"; mkdir("IMG")}
elsif($img) { mkdir($img) }
$l ||= "PT";
$s ||= "";
use POSIX qw(locale_h);
setlocale(&POSIX::LC_ALL, "pt_PT");
use locale;
if($html){ m2html( @ARGV) }
else { m2term( @ARGV) }
m2tex({baselang=> $l, style => $s},@ARGV) if $tex;
sub m2html{ ## xml multiterm to html
my @A=@_;
#my %subrel=();
my %rel=();
my %rellang=();
my $con=0;
binmode(STDOUT,":utf8");
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
'concept' => sub{ },
'conceptGrp' => sub{ $con++; print "
\n"; },
'descrip' => sub{
if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
print "$v{type} = ".n($c)."
\n"}
else{ $rel{$v{type}}{L}++;
push(@{gfather->{_att}}, "+$v{type} = ".n($c)."
\n")}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ print join("",@{$v{_att}||[]})},
'mtf' => sub{ },
'system' => sub{ },
'term' => sub{
$rellang{gfather->{lang}}++;
print "",gfather->{lang}, " = $c
\n" },
'termGrp' => sub{ print join("",@{$v{_att}||[]})},
'xref' => sub{ "$c"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print "## concepts - $con
\n";
for (sort ( keys (%rellang), )){
print "## $_ - ($rellang{$_})
\n";
}
for (sort ( keys (%rel), )){
print "## $_ - ($rel{$_}{G}+$rel{$_}{L})
\n";
}
}
}
sub m2term{ ## xml multiterm to DICI term notation
my @A=@_;
#my %subrel=();
my %rel=();
my %rellang=();
my $con=0;
binmode(STDOUT,":utf8");
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
'concept' => sub{ },
'conceptGrp' => sub{ $con++; print "-----------------\n"; },
'descrip' => sub{
my $rn = lunaccent($v{type});
if( imagem($rn,$c) ){ $rel{$rn}{G}++;
print "!$rn = ".n($c)."\n"}
elsif( ctxt(2) eq "conceptGrp"){ $rel{$rn}{G}++;
print "$rn = ".n($c)."\n"}
else{ $rel{$rn}{L}++;
push(@{gfather->{_att}}, "+$rn = ".n($c)."\n")}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ print join("",@{$v{_att}||[]})},
'mtf' => sub{ }, # 1 occurrences;
'system' => sub{ },
'term' => sub{
$rellang{gfather->{lang}}++;
print gfather->{lang}, " = ",n($c),"\n" },
'termGrp' => sub{ print join("",@{$v{_att}||[]})},
'xref' => sub{ "$c ($v{Tlink})"},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print "## concepts - $con\n";
for (sort ( keys (%rellang), )){
print "## $_ - ($rellang{$_})\n";
}
for (sort ( keys (%rel), )){
print "## $_ - ($rel{$_}{G}+$rel{$_}{L})\n";
}
}
}
sub m2tex{
my %opt =(output => "_output.tex",baselang=>"PT");
if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})};
my @A=@_;
#my %subrel=();
my %rel=();
my %rellang=();
my $con=0;
my $T;
my $inf;
my @term;
my $foto;
my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]};
my $nl = "\n\\\\\n";
binmode(STDOUT,":utf8");
for my $filename (@A){
my %handler=(
'-default' => sub{""},
-type => { transacGrp => "ZERO",
},
-pcdata => sub{ $c =~ s/\s*$//; $c},
'concept' => sub{ },
'conceptGrp' => sub{
$con++;
for (@term){ $T->{$_} = $inf . $foto; }
@term=(); $inf="";$foto ="";},
'descrip' => sub{
if( imagem($v{type},$c) ){
my $f;
###FIXME_ img repetidas
if($img){$f = $img}
else {$f="media=$filename"; $f =~ s/\.xml$//i; }
if(not $velhas{"$f/$c"}){
$foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" ;
$velhas{"$f/$c"} = 1;
}
}
elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++;
$inf .= "$nl\\emph{$v{type}}: ".n($c)}
else{ $rel{$v{type}}{L}++;
push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}},
'descripGrp' => sub{ },
'language' => sub{ father->{lang}=$v{lang} },
'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})},
'mtf' => sub{ },
'system' => sub{ },
'term' => sub{
$rellang{gfather->{lang}}++;
if (gfather->{lang} eq $opt{baselang}){
push(@term, $c);
$inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . n($c); ##duplications
}
else { $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . n($c) ;}
},
'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})},
# 'xref' => sub{ " [$c] {\\scriptsize (→$v{Tlink})} "},
'xref' => sub{ " [".n($c)."] "},
);
eval {dt($filename,%handler)};
warn ("######Error $filename:$@\n") if $@;
}
if($r){
print "%## concepts - $con\n";
for (sort ( keys (%rellang), )){
print "%## $_ - ($rellang{$_})\n";
}
for (sort ( keys (%rel), )){
print "%## $_ - ($rel{$_}{G}+$rel{$_}{L})\n";
}
}
open (F ,">$opt{output}");
binmode(F,":utf8");
if($opt{style} eq "agenda"){
print F q{
\documentclass[portuges,a4paper,twoside]{article}
\usepackage{agbook}
}
}
else {
print F q{
\documentclass[portuges,a4paper,twocolumn]{book}
\RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry}
\parindent 0pt
\parskip 3pt
}
}
print F q{
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{dict}
\usepackage{graphicx}
\usepackage{url}
\begin{document}
};
print F "\\begin{dictionary}\n";
my $last="";
for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$T)){
my $fl = uunaccent(substr($t,0,1));
if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl }
print F "\n\\term{",ppttex($t),"}{",ppttex($T->{$t}),"}\n";
# for my $r (keys %{$T->{$t}}){
# print F "\\\\\\textbf{",ppttex($r),"} ";
# for my $t2 (sort( keys %{$T->{$t}{$r}})){
# print F ppttex($t2),", ";
# }
# }
# print F "}\n";
}
print F "\\end{dictionary}\n\\end{document}";
close F
}
sub n{ my $a=shift;
$a =~ s/^\s+//;
$a =~ s/\s+$//;
$a =~ s/([\n\r]+)/\n\t/g;
$a
}
sub lunaccent{ my $b = shift; $b =~ s/[ ()\[\]]/_/g; lc(unaccent($b))}
sub uunaccent{ unaccent(uc($_[0]))}
sub unaccent{ my $a=shift;
$a =~ y/áéíóúàèìòùâêîôûÁÉÍÓÚÂÊÎÔÛÈãõç/aeiouaeiouaeiouAEIOUAEIOUEaoc/; $a; }
sub ppttex{ my $a=shift;
$a =~ s/(http:[^\\ ]*|www\.[^\\ ]*)/\\url{$1}/g;
$a =~ s/([_\$\%\#\&])/\\$1/g;
$a =~ s/(["])(\s*)$/$1.$2/g;
$a =~ s/media=/media_/g;
$a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{
sprintf('includegraphics[%s]{%s}',$2,putunder($3))}ge;
$a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{
sprintf('%sincludegraphics[%s]{%s}',((-f $3)?"":"# Image missing\n%%"),$2,$3)}ge;
$a;
}
sub putunder{ ## prepare images and image names to LaTeX
my $b=shift;
$b =~ s/\\_/_/g;
my $c=$b;
$b =~ s/[ ()\[\]]/-/g;
$b = unaccent($b);
if($b ne $c){copy($c,$b);}
$c=$b;
if($b =~ s/\.(gif|bmp)$/.png/i){system("convert '$c' '$b'") ;}
$b;
}
sub imagem{ ## is-a imagem
my ($n,$v)=@_;
return 0 if $v =~ m{http://};
$v =~ m/\.(png|gif|jpe?g|bmp)$/i;
}
__END__
=head1 NAME
Multiterm2term - Show the contents of a multiterm lexical db file
=head1 SYNOPSIS
multiterm2term [-r] file.xml+
=head1 DESCRIPTION
Multiterm2term command is used to show the contents of a multiterm lexical db file,
or to generate a LaTeX dictionary.
If a field has image filename as value (filename with extension png, gif, or jpe?g),
the image is includes in the LaTeX dictionary.
=head2 Options
-html writes a very simple HTML output
-r (also) print the set of fields found and their number of occ.
Latex oriented options:
-tex to generate LaTeX
-s=agenda style = agenda: to print a LaTeX smallbook dictionary
-l=EN to chose the baselanguage = EN
-img=dir
=head1 AUTHOR
J.Joao Almeida, jj@di.uminho.pt
=head1 SEE ALSO
perl(1).
=cut