#!/usr/bin/perl -s use XML::DT ; use Data::Dumper ; ## use encoding "utf8"; use strict; use utf8; use File::Copy; our ($r,$tex,$l,$s,$html,$img); my %velhas=(); if($img == "1"){ $img = "IMG"; mkdir("IMG")} elsif($img) { mkdir($img) } $l ||= "PT"; $s ||= ""; use POSIX qw(locale_h); setlocale(&POSIX::LC_ALL, "pt_PT"); use locale; if($html){ m2html( @ARGV) } else { m2term( @ARGV) } m2tex({baselang=> $l, style => $s},@ARGV) if $tex; sub m2html{ ## xml multiterm to html my @A=@_; #my %subrel=(); my %rel=(); my %rellang=(); my $con=0; binmode(STDOUT,":utf8"); for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; print "
\n"; }, 'descrip' => sub{ if( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; print "$v{type} = ".n($c)."
\n"} else{ $rel{$v{type}}{L}++; push(@{gfather->{_att}}, "+$v{type} = ".n($c)."
\n")}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ print join("",@{$v{_att}||[]})}, 'mtf' => sub{ }, 'system' => sub{ }, 'term' => sub{ $rellang{gfather->{lang}}++; print "",gfather->{lang}, " = $c
\n" }, 'termGrp' => sub{ print join("",@{$v{_att}||[]})}, 'xref' => sub{ "$c"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print "

## concepts - $con

\n"; for (sort ( keys (%rellang), )){ print "## $_ - ($rellang{$_})
\n"; } for (sort ( keys (%rel), )){ print "## $_ - ($rel{$_}{G}+$rel{$_}{L})
\n"; } } } sub m2term{ ## xml multiterm to DICI term notation my @A=@_; #my %subrel=(); my %rel=(); my %rellang=(); my $con=0; binmode(STDOUT,":utf8"); for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; print "-----------------\n"; }, 'descrip' => sub{ my $rn = lunaccent($v{type}); if( imagem($rn,$c) ){ $rel{$rn}{G}++; print "!$rn = ".n($c)."\n"} elsif( ctxt(2) eq "conceptGrp"){ $rel{$rn}{G}++; print "$rn = ".n($c)."\n"} else{ $rel{$rn}{L}++; push(@{gfather->{_att}}, "+$rn = ".n($c)."\n")}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ print join("",@{$v{_att}||[]})}, 'mtf' => sub{ }, # 1 occurrences; 'system' => sub{ }, 'term' => sub{ $rellang{gfather->{lang}}++; print gfather->{lang}, " = ",n($c),"\n" }, 'termGrp' => sub{ print join("",@{$v{_att}||[]})}, 'xref' => sub{ "$c ($v{Tlink})"}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print "## concepts - $con\n"; for (sort ( keys (%rellang), )){ print "## $_ - ($rellang{$_})\n"; } for (sort ( keys (%rel), )){ print "## $_ - ($rel{$_}{G}+$rel{$_}{L})\n"; } } } sub m2tex{ my %opt =(output => "_output.tex",baselang=>"PT"); if(ref($_[0]) eq "HASH") {%opt = (%opt, %{shift(@_)})}; my @A=@_; #my %subrel=(); my %rel=(); my %rellang=(); my $con=0; my $T; my $inf; my @term; my $foto; my $fotoopt=q{\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]}; my $nl = "\n\\\\\n"; binmode(STDOUT,":utf8"); for my $filename (@A){ my %handler=( '-default' => sub{""}, -type => { transacGrp => "ZERO", }, -pcdata => sub{ $c =~ s/\s*$//; $c}, 'concept' => sub{ }, 'conceptGrp' => sub{ $con++; for (@term){ $T->{$_} = $inf . $foto; } @term=(); $inf="";$foto ="";}, 'descrip' => sub{ if( imagem($v{type},$c) ){ my $f; ###FIXME_ img repetidas if($img){$f = $img} else {$f="media=$filename"; $f =~ s/\.xml$//i; } if(not $velhas{"$f/$c"}){ $foto .= "\\begin{center}$fotoopt\{$f/$c\}\n\\end{center}\n" ; $velhas{"$f/$c"} = 1; } } elsif( ctxt(2) eq "conceptGrp"){ $rel{$v{type}}{G}++; $inf .= "$nl\\emph{$v{type}}: ".n($c)} else{ $rel{$v{type}}{L}++; push(@{gfather->{_att}}, "$nl-- \\emph{$v{type}}: ".n($c))}}, 'descripGrp' => sub{ }, 'language' => sub{ father->{lang}=$v{lang} }, 'languageGrp' => sub{ $inf .= join(";",@{$v{_att}||[]})}, 'mtf' => sub{ }, 'system' => sub{ }, 'term' => sub{ $rellang{gfather->{lang}}++; if (gfather->{lang} eq $opt{baselang}){ push(@term, $c); $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . n($c); ##duplications } else { $inf .= "$nl\\textbf{\\sc ".lc(gfather->{lang}) ."}: " . n($c) ;} }, 'termGrp' => sub{ $inf .= join("",@{$v{_att}||[]})}, # 'xref' => sub{ " [$c] {\\scriptsize (→$v{Tlink})} "}, 'xref' => sub{ " [".n($c)."] "}, ); eval {dt($filename,%handler)}; warn ("######Error $filename:$@\n") if $@; } if($r){ print "%## concepts - $con\n"; for (sort ( keys (%rellang), )){ print "%## $_ - ($rellang{$_})\n"; } for (sort ( keys (%rel), )){ print "%## $_ - ($rel{$_}{G}+$rel{$_}{L})\n"; } } open (F ,">$opt{output}"); binmode(F,":utf8"); if($opt{style} eq "agenda"){ print F q{ \documentclass[portuges,a4paper,twoside]{article} \usepackage{agbook} } } else { print F q{ \documentclass[portuges,a4paper,twocolumn]{book} \RequirePackage[a4paper,top=2.5cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry} \parindent 0pt \parskip 3pt } } print F q{ \usepackage{ucs} \usepackage[utf8x]{inputenc} \usepackage[T1]{fontenc} \usepackage{babel} \usepackage{dict} \usepackage{graphicx} \usepackage{url} \begin{document} }; print F "\\begin{dictionary}\n"; my $last=""; for my $t (sort {unaccent($a) cmp unaccent($b)} ( keys %$T)){ my $fl = uunaccent(substr($t,0,1)); if($fl ne $last){print F "\\bigletterc{$fl}\n"; $last = $fl } print F "\n\\term{",ppttex($t),"}{",ppttex($T->{$t}),"}\n"; # for my $r (keys %{$T->{$t}}){ # print F "\\\\\\textbf{",ppttex($r),"} "; # for my $t2 (sort( keys %{$T->{$t}{$r}})){ # print F ppttex($t2),", "; # } # } # print F "}\n"; } print F "\\end{dictionary}\n\\end{document}"; close F } sub n{ my $a=shift; $a =~ s/^\s+//; $a =~ s/\s+$//; $a =~ s/([\n\r]+)/\n\t/g; $a } sub lunaccent{ my $b = shift; $b =~ s/[ ()\[\]]/_/g; lc(unaccent($b))} sub uunaccent{ unaccent(uc($_[0]))} sub unaccent{ my $a=shift; $a =~ y/áéíóúàèìòùâêîôûÁÉÍÓÚÂÊÎÔÛÈãõç/aeiouaeiouaeiouAEIOUAEIOUEaoc/; $a; } sub ppttex{ my $a=shift; $a =~ s/(http:[^\\ ]*|www\.[^\\ ]*)/\\url{$1}/g; $a =~ s/([_\$\%\#\&])/\\$1/g; $a =~ s/(["])(\s*)$/$1.$2/g; $a =~ s/media=/media_/g; $a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{ sprintf('includegraphics[%s]{%s}',$2,putunder($3))}ge; $a =~ s{(includegraphics)\[(.*?)\]\{(.*?)\}}{ sprintf('%sincludegraphics[%s]{%s}',((-f $3)?"":"# Image missing\n%%"),$2,$3)}ge; $a; } sub putunder{ ## prepare images and image names to LaTeX my $b=shift; $b =~ s/\\_/_/g; my $c=$b; $b =~ s/[ ()\[\]]/-/g; $b = unaccent($b); if($b ne $c){copy($c,$b);} $c=$b; if($b =~ s/\.(gif|bmp)$/.png/i){system("convert '$c' '$b'") ;} $b; } sub imagem{ ## is-a imagem my ($n,$v)=@_; return 0 if $v =~ m{http://}; $v =~ m/\.(png|gif|jpe?g|bmp)$/i; } __END__ =head1 NAME Multiterm2term - Show the contents of a multiterm lexical db file =head1 SYNOPSIS multiterm2term [-r] file.xml+ =head1 DESCRIPTION Multiterm2term command is used to show the contents of a multiterm lexical db file, or to generate a LaTeX dictionary. If a field has image filename as value (filename with extension png, gif, or jpe?g), the image is includes in the LaTeX dictionary. =head2 Options -html writes a very simple HTML output -r (also) print the set of fields found and their number of occ. Latex oriented options: -tex to generate LaTeX -s=agenda style = agenda: to print a LaTeX smallbook dictionary -l=EN to chose the baselanguage = EN -img=dir =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut