%{ use strict; use utf8; use Skel::Data; my %config=(style => 1); my %sem=(); ###GRAM:state our %A=(ac=>{},li=>{},img=>{},atli=>{}); our %meta=(inline=>{},ignore=>{},rename=>{}); our @dic=(); our %func=(); ###GRAM %} %token LB IAC IMG ILI ALI V TAB INLINE IGNORE RENAME %% Dic : Dic LB dirs | Dic LB ent { push (@dic,$_[3]); } | Dic LB entl { push (@dic,@{$_[3]}); } | Dic LB error { $_[0]->YYErrok; } | Dic LB | ent { push (@dic,$_[1]); } | entl { push (@dic,@{$_[1]}); } | dirs ; dirs: dir | dirs dir ; dir : DIR V { push (@{$meta{$_[1]}}, $_[2]) } | TIT V { push (@{$meta{title}}, $_[2]) } | AUT V { push (@{$meta{author}}, split(/\s*;\s*/,$_[2])) } | IMG V { push (@{$meta{img}}, split(/\s+/,$_[2])) } | LANG V { for my $l (split(/\s+/,$_[2])){ $meta{lang}{$l} = 2} } | INLINE V { for my $l (split(/\s+/,$_[2])){ $meta{inline}{$l} = 2} } | IGNORE V { for my $l (split(/\s+/,$_[2])){ $meta{ignore}{$l} = 2} } | RENAME V { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{rename}{$a1} = $a2 } ; ent : sent { [$_[1]] } | ent sent { [ @{$_[1]}, $_[2] ] } ; entl : ent '*' TAB { mapent($_[1],$_[3]) } | TAB '*' ent { mapent($_[3],$_[1]) } ; sent: IAC '=' V { $A{at}{$_[1]}++; _at ( _ren($_[1]),$_[3]) } | IMG '=' V lat { $A{img}{$_[1]}++; _img ( _ren($_[1]),$_[3], $_[4])} | ILI '=' V lat { $A{li}{$_[1]}++; _term( _ren($_[1]),$_[3], $_[4])} ; lat : lat ALI '=' V { $A{atli}{$_[2]}++; [ @{$_[1]}, [_ren($_[2]), $_[4]]] } | { [ ] } ; %% sub _at {+{ISA=>"At", AN=>$_[0],AV=>$_[1] }} sub _img {+{ISA=>"Img", AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _term{+{ISA=>"Term",AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _ren { $meta{rename}{$_[0]} || $_[0]} sub mapent{ my ($p,$l)=@_; [ map {subrec($p,$_)} @$l ] } sub subrec{ my ($a,$l)=@_; if (ref($a) eq "ARRAY"){ return [ map { subrec($_,$l) } @$a] ; } elsif(ref($a) eq "HASH") { return +{ map {($_,subrec($a->{$_},$l))} keys %$a};} elsif($a=~/\$(\d+)\b/) { return $l->[$1-1] } else { return $a} } ### MAIN package main; use Data::Dumper; use Lingua::StarDict::Gen; our ($lang,$html,$tex,$stardict,$debug,$skel,$lexdebug); $lang ||= "PT"; my $yyst = 0; my $yylineno = 0; my $n; my $filename=shift or die("Error: no file provided\n"); my $File = ""; binmode(STDOUT,":utf8"); if($lexdebug){lexdebug()} my $t = parseFile(); if($debug){ open(F,">debug-dic"); print F Dumper(\@dici::dic, \%dici::meta); close F;} gera_output(\@dici::dic, \%dici::meta,$lang); ### COMMON sub lexdebug { my $p = new dici(); print "Init file:\n"; yygetmore(); print "Init lex:\n"; my ($a,$b)=lex(); while($a){ print "={$a}={$b} (",pos $File ,")\n"; ($a,$b)=lex(); } print "end lex:\n"; exit 0; } sub parseFile { my $p = new dici(); yygetmore(); $p->YYParse( yylex => \&lex, yyerror => \&yyerror); } sub yyerror { if ($_[0]->YYCurtok) { my $m= substr($File,pos($File)-20,20)."#".substr($File,pos($File),20); $m =~ s/\n/\\n/g; printf STDERR ('Error (%d): a "%s" (%s) was found where %s was expected'."\n", pos($File), $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect); print STDERR ".....$m \n" ; } else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n"; } } sub _slurp8{ open(F,"<:utf8",$_[0]) or warn("###cant open $_[0]\m"); my $c=join("",);close F; $c;} ###COMMON:input-slurp sub yygetmore{ ## for compilers local $/; undef $/; open (F,$filename) or die("cant open $filename\n"); binmode(F,":utf8"); $File = . "__EOF__"; close F; } #sub yygetmore{ ## for compilers # local $/; $/="\n"; # $yylineno++; # $File = <> || "__EOF__"; #} ###SEM:functions sub _langis{ $_[0]->{AN} eq $_[1] && $_[0]->{ISA} eq "Term"} sub gera_output{ my ($d,$m,$lang)=@_; my $sd; for my $e(@$d){ if(ref($e) eq "ARRAY"){ for my $t (@$e){ next unless $t->{AV} =~ /\S/; push ( @{$sd->{$t->{AV}}},$e) if _langis($t,$lang)} } else {warn("$e not the rigth type...\n")} } if($debug) { open(F,">debug-sd"); print F Dumper ($sd); close F;} print Dumper(\%dici::A) if $skel; TexGen::gen($sd,{asep=>"\n\\\\⋄", baselang=>$lang,%$m } ) if $tex; HtmlGen::gen($sd,{asep=>"
", baselang=>$lang,%$m } ) if $html; StardictGen::gen($sd,{asep=>"\n------", baselang=>$lang,%$m } ) if $stardict; } sub _lang{ $dici::meta{lang}{$_[0]} } sub slurptab{ my %opt =(fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my @r; my @extra=(); open(F1,"<:utf8",$_[0]) or warn("###cant open $_[0]\n");; my $aa; while($aa=){ chomp($aa); $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; if($aux =~ /\S/){ @extra = split(/\s*$opt{fs}\s*/,$aux)} else { @extra = ();}} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa),@extra])} close F1; print "#", Dumper(\@r) if $lexdebug; \@r; } ###LEX sub lex{ ## %x VALORES=1 ==>( = ...\n) for($File){ m!\G[ \t]+!gc; ## avançar brancos m!\G#.+\n!gc; ## avança comentários if( m!\G__EOF__!gc) { return("","iii") } if($yyst==0){ if( m!\Gtab\((.*?)\)!gc) { $yyst=1; return("TAB",slurptab($1)); } if( m!\G([\w\-]+)!gc) { if(_lang($1)){return("ILI",$1);} else {return("IAC",$1);}} if( m!\G\+(\w[\w\-]*)!gc) { return("ALI",$1); } if( m!\G\!(\w+)!gc) { return("IMG",$1); } if( m!\G\%lang(uage)?\s+!gc) { $yyst=1; return("LANG","lang"); } if( m!\G\%tit(le)?\s+!gc) { $yyst=1; return("TIT","title"); } if( m!\G\%aut(hor)?\s+!gc) { $yyst=1; return("AUT","author");} if( m!\G\%img\s+!gc) { $yyst=1; return("IMG","img");} if( m!\G\%inline\s+!gc) { $yyst=1; return("INLINE","inline");} if( m!\G\%ignore\s+!gc) { $yyst=1; return("IGNORE","ignore");} if( m!\G\%rename\s+!gc) { $yyst=1; return("RENAME","rename");} if( m!\G\%(\w+)!gc) { $yyst=1; return("DIR",$1); } if( m!\G([=*])!gc) { $yyst=1 ; return($1,$1); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if($yyst==1){ if( m!\G([*])\n!gc) { $yyst=0 ; return($1,$1); } if( m!\Gtab\((.*?)\)!gc) { $yyst=0; return("TAB",slurptab($1)); } if( m!\G\{([^\}]*?)\}[ \t]*\n!gc){ $yyst=0 ;return("V",$1); } if( m!\G<\s*(\S+)[ \t]*\n!gc) { $yyst=0 ; return("V",_slurp8($1)); } if( m!\G([^#\n]+)(?=#.*\n)!gc){ $yyst=0 ; return("V",$1); } if( m!\G(.+(\n[ \t]+.*)*)\n!gc) { $yyst=0 ; return("V",$1); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if( m!\G(.)!gc) { print STDERR "Simbolos desconhecidos '", substr($File,pos($File),20),"...'\n" ; return(lex()); } } } ###--------------------------------------------------------------------- package TexGen; use Skel::Data; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=(title=>"Dicionário", author=>"", date=>'\today', introduction=>"...", %$h); $op{author} = join(' \and ',@{$op{author}}) if ref $op{author}; print LATEX_BEGIN({-default => sub{ _op($op{$_[0]})}}); my $prevle=""; for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $le = uc(lc1(substr($_,0,1))); if($le ne $prevle){print "\\bigletterc{$le}\n"; $prevle=$le;} print "\\term{$_}{", gg($d->{$_},$h,$_), "\n}\n"; } print LATEX_END(); } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub foto{ my ($p,$l)=(@_); return "" if $deja_vu{$p}++; if($p =~ m/\.(gif)$/) {return "(img-gif)"} for(@$l,"IMG","img"){if(-f "$_/$p"){ return "\\ilust{$_/$p}";}} return "(img)"; } sub ga{ my($ac,$h,$te)=@_; my $r = ""; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\\\\\n\t\\textsc{$an}: $av" unless $istheterm; } elsif($t eq "At") { $r .= "\\\\\n\t\\textit{$an}: $av" if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= foto($av,$h->{img})."\\mbox{}"; } else { $r .= "\\\\\n\t==$an: $av\\\\" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]}){$ra .= " \\textit{{\\scriptsize $at->[1]}} " } else{ $ra.="\\\\--$at->[0]: $at->[1]";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } $r =~ s/([\&_\$\#\%])/\\$1/g; $r =~ s/\\\\$//; $r =~ s/(\\ilust\{.*?\})/ppath($1)/ge; $r; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; $p;} sub gg{ my($e,$h,$te)=@_; join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package HtmlGen; ###--------------------------------------------------------------------- package StardictGen; sub gen{ my($d,$h)=@_; my $d1 = {}; # my %op=(title=>"Dicionário", # author=>"", # date=>'\today', # introduction=>"...",%$h); # $op{author} = join("\n",@{$op{author}}) if ref $op{author}; for (keys %$d){ $d1->{$_} = gg($d->{$_},$h,$_); } # Lingua::StarDict::Gen::writeDict($d,$h->{name}[0] || "dicname"); Lingua::StarDict::Gen::writeDict($d1,$h->{name}[0] || "dicname"); } sub ga{ my($ac,$h,$te)=@_; my $r = ""; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\n\t$an: $av" unless $istheterm; } elsif($t eq "At") { $r .= "\n\t$an: $av" if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "(img)" } else { $r .= "\n\t==$an: $av" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]}){$ra .= " $at->[1] " } else{ $ra.="\n--$at->[0]: $at->[1]";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } # $r =~ s/([\&_\$\#\%])/\\$1/g; # $r =~ s/\\\\$//; $r; } sub gg{ my($e,$h,$te)=@_; join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e) } package main; __DATA__ __LATEX_BEGIN__ \documentclass[twoside,portuges]{book} \RequirePackage[a4paper,top=3cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} \usepackage[mathletters]{ucs} \usepackage[utf8x]{inputenc} \usepackage{graphicx} %\usepackage[T1,T2A]{fontenc} \usepackage[T1]{fontenc} \usepackage{dict} \def\ilust#1{\begin{center}\includegraphics[width=0.7\columnwidth,height=1\columnwidth,keepaspectratio]{#1}\end{center}} \begin{document} \title{[% title %]} \author{[% author %]} \date{[% date %]} \frontmatter \maketitle \mbox{} \vfill Data:[% date %]\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{dici}) [% introduction %] \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_END__ \end{dictionary} \end{document} __END__ =head1 NAME dici - a dictionary DSL =head1 SYNOPSIS dici [options] file.dici lang=EN html tex stardict debug skel lexdebug -- lex debuger while(<>){($t,$v)=lex(); print "=$t=$v\n"} =head1 DESCRIPTION =head2 Dici Language =head3 Metadata section %name %title %author aut1 ; aut2 ; autn %rename attrib1 attrib2 %ignore attrib %inline attrib %lang PT EN RU =head3 Entries !img = gato.jpg PT = gato +gen = m def = domestic feline EN = cat EN = pussy-cat =head3 Entries from an external table tab(list-of-plants)* PT = $2 EN = $1 dom = plants =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO Stardict LaTeX =cut