%{ use strict; use utf8::all; my %config=(style => 1); my %sem=(); ###GRAM:state our %A=(ac=>{},li=>{},img=>{},atli=>{}); # estatisticas dos atributos our %meta=(inline=>{}, ignore=>{}, macro=>{}, # macro-structure(dom,subdom,subsubdom) rename=>{}, crel=>{qw(bt 2 nt 2 dom 2 voc 2 subdom 2 supdom 2)}, #### conceptual relations inverse=> { "subdom" => "supdom", "supdom"=> "subdom" }, #### {bt => "nt", nt => "bt"}, basedir => "./", lang=>{qw(PT 2 EN 2 FR 2 IT 2 ES 2 RU 2 DE 2)}, img=> [] ); our @dic=(); our %voc_dom=(); our %func=(); our ($dom1, $dom2, $subdom1, $subdom2, $subsubdom1); ###GRAM %} %token LB IAC IMG ILI ALI V TAB INLINE CREL IGNORE JOIN RENAME LANG RELLANG INV DOM DOMPAIR SUBSUBDOM SUBDOM SUBDOMPAIR NODOM NOSUBDOM NOSUBSUBDOM NEWDIC %% Axioma: LB Dic | Dic ; Dic : | Dic dirs LB { } | Dic ent LB { push (@dic, $_[2] ); } | Dic entl LB { push (@dic,@{$_[2]}); } | Dic LB | Dic error LB { $_[0]->YYErrok; } | ent { push (@dic, $_[1] ); } | entl { push (@dic,@{$_[1]}); } | dirs ; dirs: dir | dirs dir ; dir : DIR V { push (@{$meta{$_[1]}}, $_[2]) } | TIT V { push (@{$meta{title}}, $_[2]) } | AUT V { push (@{$meta{author}}, split(/\s*;\s*/,$_[2])) } | IMG V { push (@{$meta{img}}, split(/\s+/,$_[2])) } | LANG V { my($fl,@ls)= split(/\s+/,$_[2]); for my $l (($fl, @ls)){ $meta{lang}{$l} = 2} $meta{rellang}=$fl; } | RELLANG V { my @ls= split(/\s+/,$_[2]); $meta{rellang}=$ls[0]; $meta{rellang2}=$ls[1] if $ls[1]} | NEWDIC { $subsubdom1=$subdom1=$subdom2=$dom1=$dom2="" } | INLINE V { for my $l (split(/\s+/,$_[2])){ $meta{inline}{$l} = $_[1]} } | IGNORE V { for my $l (split(/\s+/,$_[2])){ $meta{ignore}{$l} = 2} } | JOIN V { for my $l (split(/\s+/,$_[2])){ $meta{joinv}{$l} = $_[1]} } | CREL V { for my $l (split(/\s+/,$_[2])){ $meta{crel}{$l} = 2 } } # is concept rel. | INV V { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{inverse}{$a1} = $a2; $meta{crel}{$a1}=2; $meta{crel}{$a2}=2; } | RENAME V { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{rename}{$a1} = $a2 } | DOMPAIR { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = ($_[1][0], $_[1][1], undef, undef,undef); macro1($_[1][0])} | DOM { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = ($_[1] , undef , undef, undef,undef); macro1($_[1])} | NODOM { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = (undef , undef , undef, undef,undef); } | SUBDOMPAIR { ($subdom1,$subdom2,$subsubdom1) = ($_[1][0] , $_[1][1],undef) ; macro2($dom1,$_[1][0]); } | SUBDOM { ($subdom1,$subdom2,$subsubdom1) = ($_[1] , undef ,undef ) ; macro2($dom1,$_[1]); } | NOSUBDOM { ($subdom1,$subdom2,$subsubdom1) = (undef , undef ,undef ) ; } | SUBSUBDOM { $subsubdom1 = $_[1] ; macro3($dom1,$subdom1,$_[1]); } | NOSUBSUBDOM { $subsubdom1 = undef ; } ; ent : sent { #WAS: if($subdom1){[$_[1],_at(_ren('dom'),$dom1,[]),_at(_ren('subdom'),$subdom1,[])]} if($subsubdom1){[$_[1],_at(_ren('dom'),$subsubdom1,[])]} elsif($subdom1){[$_[1],_at(_ren('dom'),$subdom1,[])]} elsif($dom1 ){[$_[1],_at(_ren('dom'),$dom1,[])]} else {[$_[1]]} } | ent sent { [ @{$_[1]}, $_[2] ] } ; entl : ent '*' TAB { mapent($_[1],$_[3]) } | '(' entseq ')' '*' TAB { mapents($_[2],$_[5]) } | TAB '*' ent { mapent($_[3],$_[1]) } ; entseq : entseq LB ent { [@{$_[1]}, $_[3] ] } | entseq LB { $_[1] } | ent { [$_[1]] } ; sent: IAC '=' V lat { $A{at}{$_[1]}++; _at ( _ren($_[1]),$_[3], $_[4])} | IMG '=' V lat { $A{img}{$_[1]}++; _img ( _ren($_[1]),$_[3], $_[4])} | ILI '=' V lat { $A{li}{$_[1]}++; _term( _ren($_[1]),$_[3], $_[4])} | IAC '=' { +{} } | IMG '=' { +{} } | ILI '=' { +{} } ; lat : lat ALI '=' V { $A{atli}{$_[2]}++; [ @{$_[1]}, [_ren($_[2]), $_[4]]] } | { [ ] } ; %% sub _at {+{ISA=>"At", AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _img {+{ISA=>"Img", AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _term{ my($lang, $term, $ats) = @_; if($subdom1){ $voc_dom{"$dom1#$subdom1"}{"$lang#$term"} ++ } elsif($dom1){ $voc_dom{$dom1}{"$lang#$term"} ++ }; +{ISA=>"Term", AN=>$lang, AV=>$term, Ats=>$ats} } sub _ren { $meta{rename}{$_[0]} || $_[0]} sub mapents{ my ($ents,$tab)=@_; [ map {my $e=$_ ; @{mapent($e,$tab)} } @$ents ] } sub mapent{ my ($ent,$tab)=@_; [ map {subrec($ent,$_)} @$tab ] } sub macro1{ my ($d)=@_; $meta{macro}{$d}={} unless $meta{macro}{$d}; } sub macro2{ my ($d,$sd)=@_; $meta{macro}{$d}{$sd}={} unless $meta{macro}{$d}{$sd}; } sub macro3{ my ($d,$sd,$ssd)=@_; $meta{macro}{$d}{$sd}{$ssd}={} unless $meta{macro}{$d}{$sd}{$ssd}; } sub subrecv{ my ($v,$l)=@_; if($v=~/\$(\d+)\b/) { my $rep=$l->[$1-1]; if($rep =~ /\S/){ return [ map {$v =~ s/\$(\d+)\b/$_/r } split(/\s*\|\s*/,$rep)] } else { return [] } } elsif($v=~/\@(\d+)\b/) { my $rep=$l->[$1-1]; return ( map { [ $v =~ s/\@(\d+)\b/$_/r ] } split(/\s*\|\s*/,$rep)) } else { return [$v] } } sub subrec{ ## template, tup my ($ent,$l)=@_; my @out=(); for my $item (@$ent){ if(ref($item) eq "HASH") { my @outats=(); for my $pair (@{$item->{Ats}}){ my $hs = subrecv($pair->[1],$l); push(@outats, ( map { [ $pair->[0], $_] } @$hs )); } my @hs = subrecv($item->{AV},$l); ### FIXME -- wrong semantics of @1 ? for my $vs (@hs){ push(@out, ( map { +{ %$item, AV => $_ , Ats => [@outats]} } @$vs )); } } else { die("BUG on um-naterm: invalid subrec $item\n".join("\n",@$item)) } } return [@out] } ### MAIN package main; use Data::Dumper; use utf8::all; use Lingua::StarDict::Gen; our ($lua,$name,$lang, $html, $tex, $stardict, $xdxf, $debug,$skel,$lexdebug,$p2,$nop2); $p2 = 1 unless $nop2; $lang ||= "PT"; $tex=1 unless ($html or $stardict or $xdxf); ## default output my $yyst = 0; my $yylineno = 0; my $n; my $File = ""; binmode(STDOUT,":utf8"); if($lexdebug){lexdebug()} my $t = parseFile(); if($debug){ open(F,">debug-dic"); print F Dumper(\@naterm::dic, \%naterm::meta); close F;} $naterm::meta{rellang} ||= "PT"; ## perhaps EN? FIXME: -rl=EN $naterm::meta{name}[0] = $name if $name; gera_output(\@naterm::dic, \%naterm::meta, $lang); ### COMMON sub lexdebug { my $p = new naterm(); print "Init file:\n"; yygetmore(); print "Init lex:\n"; my ($a,$b)=lex(); while($a){ print "={$a}={$b} (",pos($File) ,")\n"; ($a,$b)=lex(); } print "end lex:\n"; exit 0; } sub parseFile { my $p = new naterm(); yygetmore(); $p->YYParse( yylex => \&lex, yyerror => \&yyerror); } sub yyerror { if ($_[0]->YYCurtok) { my $x= pos($File)<20 ? 0 : pos($File)-20; my $y= pos($File)<20 ? pos($File) : 20; my $m= substr($File,$x,$y)."(#)".substr($File,pos($File),20); $m =~ s/\n/\\n/g; my $k = substr($File,0,pos($File)); my $yylineno = ($k =~ s/\n/\n/g); printf STDERR ('Error %d(%d): a "%s" (%s) was found where %s was expected'."\n", $yylineno,pos($File), $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect); print STDERR ".....$m \n" ; } else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n"; } } sub _basedir{ my @a = @_; for (@a){ s/^/$naterm::meta{basedir}/ unless m!^[/.]!; } return @a } sub _slurp8{ my $f = shift; my ($f2) = _basedir( $f ); # warn("Debug: $f ... $f2\n"); open(F,"<:utf8",$f2) or warn("###!!##cant open $f ... $f2\n"); my $c=join("",); close F; $c; } ###COMMON:input-slurp sub yygetmore{ ## for compilers # local $/; undef $/; binmode(ARGV,":utf8"); my $prev=undef; binmode(STDIN,":utf8"); if(@ARGV == 1 and $ARGV[0] =~ /((.*\/)?(.*))\.(?:nat|term|naterm|dici?)$/) {#print "(1)$1 (2)$2 (3)$3\n"; if($tex ){open(STDOUT,">:utf8","$1.tex" ) or die("Cant reopen STDOUT\n"); } if($xdxf){open(STDOUT,">:utf8","$1.xdxf") or die("Cant reopen STDOUT\n"); } if($html){open(STDOUT,">:utf8","$1.html") or die("Cant reopen STDOUT\n"); } open(STDERR,">:utf8","$1.err") or die("Cant reopen STDERR\n"); $naterm::meta{basedir} = $2 || "./"; } while(<>){ if ($ARGV ne $prev){ $File.="\n\n__NEWDIC__\n$_"; $prev=$ARGV; } else{ $File.=$_; } } $File.= "\n\n__EOF__"; $File =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## remove sp before \n $File =~ s/(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; ## remove BOM ! close F; } ###SEM:functions sub _langis{ $_[0]->{AN} eq $_[1] && $_[0]->{ISA} eq "Term"} ## _langis(subj,l)=subj is in lang l sub _inv{ $_[1]->{inverse}{$_[0]->{AN}} } ## _inv(meta, trpl)= meta.inverse(trpl.AN) or undef ### # D1 = [entry] # entry=[trpl] # trpl=( ISA: {Term,Img,At}, # AN: str, // att. name - Rel ou Lingua # AV: , // att. val - Obj term ou text # Ats: [ [ name, str] ] // subatributes of AV # ) # # D2 = term → [entry] // term in output language src # # rl -- rellang // language used in conceptual rels # src -- srclang // language used as src in output dict sub gera_output{ my ($d, $m, $lang)=@_; ## dict, meta, language my $sd; ## The DIC! sd: D2 my %dic_rl_src=(); ## ( rl → src ) my %dic_src_rl=(); ## ( src → rel ) my $main_trpl={}; ## ( src → rel → rl ) my $inverse_trpl=[]; ## [( rl , rel, src)] my $rellang = $m->{rellang} ; # add term "TOP" $sd->{TOP}=[[naterm::_term($rellang,"TOP",[]),naterm::_at("dom","TOP",[])]] if %{$m->{macro}} ; for my $e (@$d){ ## for all entry //d: D1 warn("'$e' is not the rigth type...\n") unless ref($e) eq "ARRAY"; my $termsrc = undef; ## term in the source language my $termrl = undef; ## term in the conceptual rel lang for my $t (@$e){ ## for all trpl next unless $t->{AV} =~ /\S/; if (_langis($t,$lang)){ ## if is a term in srclang $termsrc ||= $t->{AV}; push ( @{$sd->{$t->{AV}}},$e) ; } if (_langis($t,$rellang)) { $termrl ||= $t->{AV}; } } next unless $termsrc; ## skip if no term in srclang (FIXME) if( $lang ne $rellang ){ $dic_rl_src{$termrl}=$termsrc; $dic_src_rl{$termsrc}=$termrl; } # build inverse_trpl for my $t (@$e){ next unless $t->{AV} =~ /\S/; next if $t->{ISA} eq "Term"; next if $t->{ISA} eq "Img"; $main_trpl->{$termsrc}{$t->{AN}}{$t->{AV}}=1; my $in = _inv($t,$m); if ($in) { ## if rel with a defined inverse push (@{$inverse_trpl},[$t->{AV},$in,$termsrc]); } } } # add macrostructure triples for my $daux( keys(%{$m->{macro}}) ){ if( defined $sd->{$daux}){ push ( @{$sd->{$daux}[0]},naterm::_term($rellang,$daux,[]),naterm::_at("supdom","TOP",[])) ; } else{ $sd->{$daux}=[[naterm::_term($rellang,$daux,[]),naterm::_at("supdom","TOP",[])]] ; } push ( @{$sd->{TOP}[0]},naterm::_at("subdom",$daux,[])) ; for my $sdaux( keys(%{$m->{macro}{$daux}}) ){ if( defined $sd->{$sdaux}){ push ( @{$sd->{$sdaux}[0]},naterm::_term($rellang,$sdaux,[]),naterm::_at("supdom",$daux,[])) ; } else{ $sd->{$sdaux}=[[naterm::_term($rellang,$sdaux,[]),naterm::_at("supdom",$daux,[])]] ; } push ( @{$sd->{$daux}[0]},naterm::_at("subdom",$sdaux,[])) unless $daux eq $sdaux ; for my $ssdaux( keys(%{$m->{macro}{$daux}{$sdaux}} ) ){ ### ???? if( defined $sd->{$ssdaux}){ push ( @{$sd->{$ssdaux}[0]},naterm::_term($rellang,$ssdaux,[]),naterm::_at("supdom",$sdaux,[])) ; } else{ $sd->{$ssdaux}=[[naterm::_term($rellang,$ssdaux,[]),naterm::_at("supdom",$sdaux,[])]] ; } push ( @{$sd->{$sdaux}[0]},naterm::_at("subdom",$ssdaux,[])) unless $sdaux eq $ssdaux ; } } } ## Done: add triples for TOP and macro (dom, subdom) for my $trpl(@$inverse_trpl){ ## termrl rel termsrc my ($termrl, $rel, $termsrc) = @$trpl; my ($subj, $obj); if( $lang ne $rellang ){ $subj = $dic_rl_src{$termrl} || "$termrl*"; $obj = $dic_src_rl{$termsrc} || $termsrc; } else { $subj = $termrl; $obj = $termsrc; } if($sd->{$subj}){ push( @{$sd->{$subj}[0]},naterm::_at($rel,$obj,[])); } else { $sd->{$subj}=[[naterm::_term($lang,$subj,[]), naterm::_at($rel,$obj,[])]]; } } if($debug) { open(F,">debug-sd"); print F Dumper ($sd); print F Dumper($m->{macro}); close F; } print Dumper(\%naterm::A) if $skel; TexGen::gen( $sd,{asep=>"\n\\\\⋄", vsep=>" | ", baselang=>$lang,%$m }) if $tex; XdxfGen::gen($sd,{asep=>"\n⋄", baselang=>$lang,%$m }) if $xdxf; HtmlGen::gen($sd,{asep=>"
─────────────
", baselang=>$lang,%$m }) if $html; StardictGen::gen($sd,{asep=>"\n------", baselang=>$lang,%$m }) if $stardict; } sub _lang{ $naterm::meta{lang}{$_[0]} } sub slurpinlinetab{ # my %opt =(rs=>qr(\s*\n\s*), fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $text=shift; my @r; my @extra=(); for my $aa ( split($opt{rs} , $text)){ $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; if($aux =~ /\S/){ @extra = split(/\s*$opt{fs}\s*/,$aux)} else { @extra = ();}} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa,-1),@extra])} print "#", Dumper(\@r) if $lexdebug; \@r; } sub slurptab{ my $f= shift; my ($f2) = _basedir($f); my %opt =(fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my @r; my @extra=(); open(F1,"<:utf8",$f2) or warn("###cant open $_[0]\n");; my $aa; while($aa=){ chomp($aa); $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; if($aux =~ /\S/){ @extra = split(/\s*$opt{fs}\s*/,$aux)} else { @extra = ();}} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa,-1),@extra])} close F1; print "#", Dumper(\@r) if $lexdebug; \@r; } ###LEX sub lex{ ## %x VALORES=1 ==>( = ...\n) for($File){ m!\G[ \t]+!gc; ## avançar brancos m!\G(#.*\n)+!gc; ## avança comentários if( m!\G__EOF__!gc) { return("","iii") } if( m!\G__NEWDIC__!gc) { return("NEWDIC","iii") } if($yyst==0){ if( m!\Gtab\{(.*?)\}!gcs) { $yyst=1; return("TAB",slurpinlinetab($1)); } if( m!\Gtab\((.*?)\)!gc) { $yyst=1; return("TAB",slurptab($1)); } if( m!\G(\w[\w\-]+)!gc) { if(_lang($1)){return("ILI",$1);} else {return("IAC",$1);}} if( m!\G\+(\w[\w\-]*)!gc) { return("ALI",$1); } if( m!\G\!(\w[\w\-]*)!gc) { return("IMG",$1); } if( m!\G====\h*(.+?)\h*====!gc) { return("SUBSUBDOM",$1); } if( m!\G====\h*([^= \t\n].*)!gc) { return("SUBSUBDOM",$1); } if( m!\G===\h*(.+?)\h*[#:=]\h*(.+?)\h*===!gc) { return("SUBDOMPAIR",[$1,$2]); } if( m!\G===\h*(.+?)\h*===!gc) { return("SUBDOM",$1); } if( m!\G===\h*([^= \t\n].*)!gc) { return("SUBDOM",$1); } if( m!\G==\h*(.+?)\h*[#:=]\h*(.+?)\h*==!gc) { return("DOMPAIR",[$1,$2]); } if( m!\G==\h*(.+?)\h*==!gc) { return("DOM",$1); } if( m!\G==\h*(\w.*)!gc) { return("DOM",$1); } if( m!\G====!gc) { return("NOSUBSUBDOM",$1); } if( m!\G===!gc) { return("NOSUBDOM",$1); } if( m!\G==!gc) { return("NODOM",$1); } if( m!\G\%lang(uage)?s?:?\s+!gc){ $yyst=1; return("LANG","lang"); } if( m!\G\%inv(erse)?:?\s+!gc) { $yyst=1; return("INV","inverse"); } if( m!\G\%rellang:?\s+!gc) { $yyst=1; return("RELLANG","rellang")} if( m!\G\%tit(le)?:?\s+!gc) { $yyst=1; return("TIT","title"); } if( m!\G\%aut(hor)?:?\s+!gc) { $yyst=1; return("AUT","author");} if( m!\G\%img:?\s+!gc) { $yyst=1; return("IMG","img");} if( m!\G\%inline(\d*):?\s+!gc) { $yyst=1; return("INLINE", $1 || 1);} if( m!\G\%crel:?\s+!gc) { $yyst=1; return("CREL", "crel");} if( m!\G\%ignore:?\s+!gc) { $yyst=1; return("IGNORE","ignore");} if( m!\G\%join:?\((.*?)\)\s+!gc){ $yyst=1; return("JOIN",$1);} if( m!\G\%rename:?\s+!gc) { $yyst=1; return("RENAME","rename");} if( m!\G\%(\w+)\:?!gc) { $yyst=1; return("DIR",$1); } if( m!\G([(])\s*!gc) { $yyst=0 ; return($1,$1); } if( m!\G\s*([)])\s*!gc) { $yyst=0 ; return($1,$1); } if( m!\G([=*])!gc) { $yyst=1 ; return($1,$1); } if( $p2 and m!\G([:])!gc) { $yyst=1 ; return('=',$1); } if( m!\G(---+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(___+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if($yyst==1){ if( m!\G([*])\n!gc) { $yyst=0 ; return($1,$1); } if( m!\Gtab\{(.*?)\}!gcs) { $yyst=1; return("TAB",slurpinlinetab($1)); } if( m!\Gtab\((.*?)\)!gc) { $yyst=0; return("TAB",slurptab($1)); } if( m!\G\{\h*([^\}]*?)\h*\}\h*\n!gc){ $yyst=0 ; return("V",$1); } if( m!\G<\s*(\S+)\h*\n!gc) { $yyst=0 ; return("V",_slurp8($1)); } if( m!\G\h*([^#\n]+)(?=#.*\n)!gc){ $yyst=0 ; return("V",$1); } if( m!\G\h*([^\n\{\}+]+?(\n\h+[^\n\{\}+]*)*)\h*\n!gc) { $yyst=0 ; return("V",$1); } if( m!\G(---+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(___+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if( m!\G(.)!gc) { print STDERR "Simbolos desconhecidos '", substr($File,pos($File),20),"...'\n" ; return(lex()); } } } ###--------------------------------------------------------------------- package TexGen; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; use Data::Dumper; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' \and ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; if($lua){ print STDOUT LUALATEX_BEGIN( $param );} else { print STDOUT LATEX_BEGIN( $param );} my $prevle=""; for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $le = uc(lc1(substr($_,0,1))); if($le ne $prevle){ if($le =~ /[A-Z]/){ print STDOUT "\\bigletterc{$le}\n";} $prevle=$le;} my $aux=texprot($_); print STDOUT LATEX_ENTRY({ term => $aux, def => gg($d->{$_},$h,$_) } ) ; } print STDOUT LATEX_END($param ); } sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub texprot{ my $a = shift; $a =~ s/\s*$//; while( $a =~ s/([^\\])([_\%\@\#\$\&])/$1\\$2/ ){ } ; $a =~ s/^([_\%\@\#\$\&])/\\$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); return "" if $deja_vu{$p}++; if($p =~ m/\.(gif|bmp|bpm)$/) {return "(img-gif)"} if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "\\playsnd{$_/$p}";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(-f "$_/$p"){ my ($g_x, $g_y) = imgsize("$_/$p"); if($g_x <200 and $g_y < 200){ return "\\milust{$_/$p}";} else { return "\\ilust{$_/$p}";} } } warn("Error: missing image:'$p'\n"); } return ""; } ##FIXME sub g_ats{ my ($ats,$h)= @_; if($ats){ my $inli=""; my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if( $h->{inline}{$at->[0]} == 1){ $inli .= " \\textit{{\\scriptsize $at->[1] }} " } elsif($h->{inline}{$at->[0]} == 2){ $inli .= " \\textit{{\\scriptsize ($at->[1]) }} " } else{$ra .= "\\\\--$at->[0]: $at->[1]";} } return $inli . $ra; } return "" } ## generate aceptions sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an and $av eq $te); my $atstex = g_ats($ats,$h); if ($t eq "Term"){ if ($istheterm){ $r = "$atstex $r" } else { $r .= "\\\\\n\t\\textbf{\\textsc{".lc($an)."}}: $av $atstex";} } elsif($t eq "At") { push(@{$atdict->{$an}},[$av,$atstex]) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img})."\\mbox{}"; } else { $r .= "\\\\\n\t==$an: $av $atstex\\\\" if $av =~ /\S/; } # if($ats){ ## FIXME: ordem errada nos ats dos "At" # my $ra=""; # for my $at(@$ats){ # next if $h->{ignore}{$at->[0]}; # if($h->{inline}{$at->[0]} == 1){ # $ra .= " \\textit{{\\scriptsize $at->[1]}} " } # elsif($h->{inline}{$at->[0]} == 2){ # $ra .= " \\textit{{\\scriptsize ($at->[1]) }} " } # else{ $ra.="\\\\--$at->[0]: $at->[1]";} # } # if($istheterm){$r = $ra.$r;} else {$r .= $ra} # } } my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\\\\\n\t\\textbf{\\textit{$an}}: "; if( defined( $h->{crel}{$an}) ){ $at_r .= join($vsep, map {qq{\\termref{$_->[0]}$_->[1]}} (sort {lc1($a->[0]) cmp lc1($b->[0])} uniq1( @{$atdict->{$an}}))); }else{ $at_r .= join($vsep, map {qq{$_->[0]$_->[1]}} (sort {lc1($a->[0]) cmp lc1($b->[0])} uniq1( @{$atdict->{$an}}))); } } $r .= "$at_r"; $r = texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; $r; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; $p;} sub uniq1 { my %seen; grep !$seen{$_->[0]}++, @_; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package XdxfGen; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' \and ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; print STDOUT XDXF_BEGIN( $param ); for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $aux=texprot($_); print STDOUT "$aux\n", gg($d->{$_},$h,$_), "\n"; } print STDOUT XDXF_END($param ); } sub texprot{ my $a = shift; $a =~ s/\s*$//; # while( $a =~ s/([^\\])([_\%\@\#\$\&])/$1\\$2/ ){ } ; $a =~ s/& /&/g; $a =~ s/<([^A-Za-z0-9\/])/<$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); return "" if $deja_vu{$p}++; if($p =~ m/\.(gif|bmp|bpm)$/) {return ""} if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(-f "$_/$p"){ my ($g_x, $g_y) = imgsize("$_/$p"); if($g_x <200 and $g_y < 200){ return "pequena"; } else { return "grande"; } } } warn("Error: missing image:'$p'\n"); } return ""; } sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\t$an: $av
\n" unless $istheterm; } elsif($t eq "At") { push(@{$atdict->{$an}},$av) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img}); } else { $r .= "\t$an: $av\n" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]} == 1){ $ra .= "$at->[1]
\n" } elsif($h->{inline}{$at->[0]} == 2){ $ra .= "( $at->[1] )
\n" } else{ $ra.="\t-- $at->[0]: $at->[1]\n";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\t$an:"; if( defined( $h->{crel}{$an}) ){ $at_r .= join("", map {"\n\t\t$_"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); }else{ $at_r .= join("", map {"\n\t\t$_"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); } $at_r .= "\t\n"; } $r .= $at_r; $r=texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; $r; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; $p;} sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package HtmlGen; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; use Data::Dumper; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' || ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; print STDOUT HTML_BEGIN( $param ); for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $aux=texprot($_); print STDOUT HTML_TERM( +{"key"=>$aux, "def"=> gg($d->{$_},$h,$_) }); } print STDOUT HTML_END($param ); } sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub texprot{ my $a = shift; $a =~ s/\s*$//; $a =~ s/& /&/g; $a =~ s/<([^A-Za-z0-9\/])/<$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); # return "" if $deja_vu{$p}++; if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(/^http/){ return "remota"; } my ($path) = main::_basedir("$_/$p"); if(-f $path or -f "$_/$p"){ my ($g_x, $g_y) = imgsize($path); if($g_x <300 and $g_y < 300){ return ""; } else { return ""; } } } warn("Error: missing image:'$p' in ",join(";",@$l),"\n"); } return "fixme $p not found"; } ## generate aceptions sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $maininline = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an and $av eq $te); $r .= "
  • "; if ($t eq "Term"){ $r .= "\t$an: $av \n" unless $istheterm; } elsif($t eq "At") { push(@{$atdict->{$an}},$av) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img}); } else { $r .= "\t$an: $av | \n" if $av =~ /\S/; } if($ats){ my $ra=""; my $inli=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]} == 1 ){ $inli .= "$at->[1],\n" } elsif($h->{inline}{$at->[0]} == 2){ $inli .= "( $at->[1] ),\n" ==2 } else{ $ra.="\t
  • $at->[0]: $at->[1]
  • \n";} } $ra = "" if $ra ; $inli = "$inli" if $inli ; if($istheterm){ $r = "$ra $r"; $maininline = $inli} else { $r .= "$inli $ra"} } $r .= ""; } $r =~ s!
  • \s*
  • !!g; my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\t
  • $an: "; if( defined( $h->{crel}{$an}) ){ $at_r .= join("", map {qq{\n\t\t$_ | }} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); }else{ $at_r .= join("", map {"\n\t\t$_ |"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); } $at_r .= "
  • \n"; } $r .= "$at_r" if $at_r; $r=texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; "$maininline"; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; "$p";} sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package StardictGen; use utf8::all; sub gen{ my($d,$h)=@_; my $d1 = {}; # my %op=(title=>"Dicionário", # author=>"", # date=>'\today', # introduction=>"...",%$h); # $op{author} = join("\n",@{$op{author}}) if ref $op{author}; for (keys %$d){ $d1->{$_} = gg($d->{$_},$h,$_); } Lingua::StarDict::Gen::writeDict($d1,$h->{name}[0] || "dicname"); } sub ga{ my($ac,$h,$te)=@_; my $r = ""; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\n\t$an: $av" unless $istheterm; } elsif($t eq "At") { $r .= "\n\t$an: $av" if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "(img)" } else { $r .= "\n\t==$an: $av" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]}){$ra .= " $at->[1] " } else{ $ra.="\n--$at->[0]: $at->[1]";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } # $r =~ s/([\&_\$\#\%])/\\$1/g; # $r =~ s/\\\\$//; $r; } sub gg{ my($e,$h,$te)=@_; join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e) } package main; __DATA__ __LUALATEX_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] \documentclass[twoside,portuges]{book} \usepackage{textcomp} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} \usepackage{luanaterm} \usepackage{graphicx} \usepackage{url} [% usepackage %] \def\milust#1{\begin{center}\includegraphics{#1}\end{center}} \def\ilust#1{\begin{center}\includegraphics[ width=0.7\columnwidth, height=1\columnwidth, keepaspectratio]{#1}\end{center}} \newcommand{\termref}[1]{\hyperlink{#1}{#1}} \newcommand{\termlabel}[1]{\phantomsection\hypertarget{#1}{}} \usepackage{hyperref} \hypersetup{ colorlinks, linktocpage, citecolor=black, filecolor=black, linkcolor=blue, urlcolor=black } \begin{document} \title{[% title %]} \author{[% author %]} \date{[% date %]} \frontmatter \maketitle \mbox{} \vfill Data:[% date %]\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{naterm}) [% introduction %][% pre %] \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] \documentclass[twoside,portuges]{book} \usepackage{naterm} \usepackage{textcomp} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} %\usepackage[mathletters]{ucs} %\usepackage[utf8x]{inputenc} \usepackage{graphicx} \usepackage{url} [% usepackage %] \usepackage[T1,T2A]{fontenc} %\usepackage[T1]{fontenc} \def\milust#1{\begin{center}\includegraphics{#1}\end{center}} \def\ilust#1{\begin{center}\includegraphics[ width=0.7\columnwidth, height=1\columnwidth, keepaspectratio]{#1}\end{center}} \newcommand{\termref}[1]{\hyperlink{#1}{#1}} \newcommand{\termlabel}[1]{\phantomsection\hypertarget{#1}{}} \usepackage{hyperref} \hypersetup{ colorlinks, linktocpage, citecolor=black, filecolor=black, linkcolor=blue, urlcolor=black } \begin{document} \title{[% title %]} \author{[% author %]} \date{[% date %]} \frontmatter \maketitle \mbox{} \vfill Data:[% date %]\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{naterm}) [% introduction %][% pre %] \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_END__ [%default:{pos => "", }%] \end{dictionary} [% pos %] \end{document} __LATEX_ENTRY__ \termlabel{[% term %]} \term{[% term %]}{[% def %]} __HTML_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '', introduction => "", pre => "", usepackage => "", }%] [% title %]

    [% title %]

    [% author %] [%date%]

    [% introduction %][% pre %] __HTML_END__ [%default:{pos => "", }%] [% pos %] __HTML_TERM__ [%default:{'def' => "", }%]
    [% key %] [% def %]
    __XDXF_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] [% title %] [% title %] 001 [%date%] [% introduction %][% pre %] __XDXF_END__ [%default:{pos => "", }%] [% pos %] __END__ =encoding UTF-8 =head1 NAME naterm - a dictionary DSL =head1 SYNOPSIS naterm [options] file.naterm -lang=EN # default PT -html -tex -stardict -debug -skel -lua -- Output is Latex:lualatex (def: Latex:pdflatex) -nop2 -- ":" not a valid sep. (deft: PT: gato and PT=gato are equiv) -p2 -- (this is the default) -- PT: gato and PT=gato are equiv -lexdebug -- lex debuger while(<>){($t,$v)=lex(); print "=$t=$v\n"} =head1 DESCRIPTION =head1 Dici Language =head2 Metadata section %name %title %author aut1 ; aut2 ; autn %pre < file.tex //chapters before the dictionary %introdution < file.tex // ... idem %pre { inline text ... } // ... idem %rename attrib1 attrib2 %ignore attrib %inline attrib %inv nt bt // inverse conceptual relation %inv dom voc %rellang PT // language in relations objects %crel aaa // aaa is a conceptual relation %img image/directory %lang PT EN RU %pos //chapters after the dictionary %join // not yet implemented =head2 Entries Entries (concepts) are separated by emtpy lines !img : gato.jpg // a multimedia file ( MEDIA/gato.jpg ) PT : gato // a term +gen : m // atribute of a term def : domestic feline // atribute of the full entry EN : cat EN : pussy-cat =head3 Values Values normally are simple single line strings, but can also be more complex: EN : domestic cat // simple single line strings (usual) def: a very long definition // multi line value (continuation lines must with continuation lines // start with spaces) doc: { asd // a curly bracket block ... } att: < file // a value read from "file =head2 Entries from an external / inline tables Both External and inline tables follow a CSV-like format, where: Register separator is newline Field separator is "::" (spaces adjacent to FS are removed) Sub field separator is "|" (spaces adj. are ignored) empty lines are ignored empty fields lines started by "#" are comments (ignored) ++ fiels are concatenated with the following section =head3 External tables tab(list-of-plants)* PT : $2 EN : $1 dom : plants PT : $1 EN : $2 dom : zoologia *tab(tab1) =head3 Inline tables PT : $1 EN : $2 dom : zoologia *tab{ gato::cat cão::dog } =head2 Macro structure -- Sections with a common domain == domainA # dom=domainA; subdom = subsubdom = none === subdomainB # subdom=subdomainB subsubdom = none ==== subsubdomainC # subsubdom=subsubdomainP == # domain = subdomain = subsubdomain = none === # subdomain = subsubdomain = none ==== # subsubdomain = none =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO Stardict Lingua::StarDict::Gen tbx2naterm LaTeX =cut