===GRAM:state %{ ==>use strict; ==>use utf8; my %config=(style => 1); my %sem=(); our %A=(ac=>{},li=>{},img=>{},atli=>{}); our %meta=(inline=>{},ignore=>{},rename=>{}); our @dic=(); our %func=(); %} ===GRAM %token LB IAC IMG ILI ALI V TAB INLINE IGNORE RENAME %% Dic : Dic LB dirs | Dic LB ent { push (@dic,$_[3]); } | Dic LB entl { push (@dic,@{$_[3]}); } | Dic LB error { $_[0]->YYErrok; } | Dic LB | ent { push (@dic,$_[1]); } | entl { push (@dic,@{$_[1]}); } | dirs ; dirs: dir | dirs dir ; dir : DIR V { push (@{$meta{$_[1]}}, $_[2]) } | TIT V { push (@{$meta{title}}, $_[2]) } | AUT V { push (@{$meta{author}}, split(/\s*;\s*/,$_[2])) } | LANG V { for my $l (split(/\s+/,$_[2])){ $meta{lang}{$l} = 2} } | INLINE V { for my $l (split(/\s+/,$_[2])){ $meta{inline}{$l} = 2} } | IGNORE V { for my $l (split(/\s+/,$_[2])){ $meta{ignore}{$l} = 2} } | RENAME V { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{rename}{$a1} = $a2 } ; ent : sent { [$_[1]] } | ent sent { [ @{$_[1]}, $_[2] ] } ; entl : ent '*' TAB { mapent($_[1],$_[3]) } ; sent: IAC '=' V { $A{at}{$_[1]}++; +{ISA=> "At", AN=> _ren($_[1]), AV=> $_[3]} } | IMG '=' V lat { $A{img}{$_[1]}++; +{ISA=> "Img", AN=> _ren($_[1]), AV=> $_[3], Ats=> $_[4]}} | ILI '=' V lat { $A{li}{$_[1]}++; +{ISA=> "Term", AN=> _ren($_[1]), AV=> $_[3], Ats=> $_[4]}} ; lat : lat ALI '=' V { $A{atli}{$_[2]}++; [ @{$_[1]}, [_ren($_[2]), $_[4]]] } | { [ ] } ; ===GRAM:func %% sub _ren{ $meta{rename}{$_[0]} || $_[0]} sub mapent{ my ($p,$l)=@_; [ map {subrec($p,$_)} @$l ] } sub subrec{ my ($a,$l)=@_; if (ref($a) eq "ARRAY"){ return [ map { subrec($_,$l) } @$a] ; } elsif(ref($a) eq "HASH") { return +{ map {($_,subrec($a->{$_},$l))} keys %$a};} elsif($a=~/\$(\d+)\b/) { return $l->[$1-1] } else { return $a} } ===MAIN package main; use Data::Dumper; use Lingua::StarDict::Gen; our ($lang,$html,$tex,$stardict,$debug,$skel,$lexdebug); $lang ||= "PT"; my $yyst = 0; my $yylineno = 0; my $n; my $filename=shift or die("Error: no file provided\n"); my $File = ""; binmode(STDOUT,":utf8"); if($lexdebug){lexdebug()} my $t = parseFile(); if($debug){ open(F,">debug-dic"); print F Dumper(\@dici::dic, \%dici::meta); close F;} gera_output(\@dici::dic, \%dici::meta,$lang); sub lexdebug { my $p = new dici(); print "Init file:\n"; yygetmore(); print "Init lex:\n"; my ($a,$b)=lex(); while($a){ print "=$a=$b (",pos $File ,")\n"; ($a,$b)=lex(); } print "end lex:\n"; exit 0; } ===FIX sub parseFile { my $p = new dici(); yygetmore(); $p->YYParse( yylex => \&lex, yyerror => \&yyerror); } sub yyerror { if ($_[0]->YYCurtok) { printf STDERR ('Error (%d): a "%s" (%s) was found where %s was expected'."\n", $yylineno, $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect) } else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n"; } } sub _langis{ $_[0]->{AN} eq $_[1] && $_[0]->{ISA} eq "Term"} sub gera_output{ my ($d,$m,$lang)=@_; my $sd; for my $e(@$d){ if(ref($e) eq "ARRAY"){ for my $t (@$e){ next unless $t->{AV} =~ /\S/; push ( @{$sd->{$t->{AV}}},$e) if _langis($t,$lang)} } else {warn("$e not the rigth type...\n")} } if($debug) { open(F,">debug-sd"); print F Dumper ($sd); close F;} print Dumper(\%dici::A) if $skel; TexGen::gen($sd,{asep=>"\n\\\\----", baselang=>$lang,%$m } ) if $tex; HtmlGen::gen($sd,{asep=>"
", baselang=>$lang,%$m } ) if $html; StardictGen::gen($sd,{asep=>"\n------", baselang=>$lang,%$m } ) if $stardict; } sub yygetmore{ ## for compilers local $/; undef $/; open (F,$filename) or die("cant open $filename\n"); binmode(F,":utf8"); $File = . "__EOF__"; close F; } #sub yygetmore{ ## for compilers # local $/; $/="\n"; # $yylineno++; # $File = <> || "__EOF__"; #} sub _lang{ $dici::meta{lang}{$_[0]} } sub _slurp8{ open(F,"<:utf8",$_[0]) or warn("###cant open $_[0]\m"); my $c=join("",);close F; $c;} sub slurptab{ my %opt =(fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my @r; my @extra=(); open(F1,"<:utf8",$_[0]) or warn("###cant open $_[0]\n");; my $aa; while($aa=){ chomp($aa); $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; @extra = split(/\s*$opt{fs}\s*/,$aux)} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa),@extra])} close F1; print "#", Dumper(\@r) if $lexdebug; \@r; } sub lex{ ## %x VALORES=1 ==>( = ...\n) for($File){ m!\G[ \t]+!gc; ## avançar brancos m!\G#.+\n!gc; ## avança comentários if( m!\G__EOF__!gc) { return("","iii") } if($yyst==0){ if( m!\G([\w\-]+)!gc) { if(_lang($1)){return("ILI",$1);} else {return("IAC",$1);}} if( m!\G\+(\w[\w\-]*)!gc) { return("ALI",$1); } if( m!\G\!(\w+)!gc) { return("IMG",$1); } if( m!\G\%lang(uage)?\s+!gc) { $yyst=1; return("LANG","lang"); } if( m!\G\%tit(le)?\s+!gc) { $yyst=1; return("TIT","title"); } if( m!\G\%aut(thor)?\s+!gc) { $yyst=1; return("AUT","author");} if( m!\G\%inline\s+!gc) { $yyst=1; return("INLINE","inline");} if( m!\G\%ignore\s+!gc) { $yyst=1; return("IGNORE","ignore");} if( m!\G\%rename\s+!gc) { $yyst=1; return("RENAME","rename");} if( m!\G\%(\w+)!gc) { $yyst=1; return("DIR",$1); } if( m!\G([=*])!gc) { $yyst=1 ; return($1,$1); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if($yyst==1){ if( m!\Gtab\((.*?)\)!gc) { $yyst=0; return("TAB",slurptab($1)); } if( m!\G\{([^\}]*?)\}[ \t]*\n!gc){ $yyst=0 ;return("V",$1); } if( m!\G<\s*(\S+)[ \t]*\n!gc) { $yyst=0 ; return("V",_slurp8($1)); } if( m!\G([^#\n]+)(?=#.*\n)!gc){ $yyst=0 ; return("V",$1); } if( m!\G(.+)\n!gc) { $yyst=0 ; return("V",$1); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if( m!\G(.)!gc) { print STDERR "Simbolos desconhecidos '", substr($File,pos($File),20),"...'\n" ; return(lex()); } } } ##--------------------------------------------------------------------- package TexGen; use Inline::Files; sub gen{ my($d,$h)=@_; my %op=(title=>"Dicionário", author=>"", date=>'\today', introduction=>"...",%$h); ### {open (F,"<:utf8", $op{headpat}) or die; $head=join("",); close F;} my $head = join("",); my $foot = join("", ); $head =~ s/###([\w-]+)###/_op($op{$1})/ge; print "$head\n"; my $prevle=""; for (sort {lc($a) cmp lc($b)} keys %$d){ my $le = uc(substr($_,0,1)); if($le ne $prevle){print "\\bigletterc{$le}\n"; $prevle=$le;} print "\\term{$_}{", gg($d->{$_},$h,$_), "\n}\n"; } print $foot; } sub ga{ my($ac,$h,$te)=@_; my $r = ""; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\\\\\n\t\\textsc{$an}: $av" unless $istheterm; } elsif($t eq "At") { $r .= "\\\\\n\t\\textit{$an}: $av"; } elsif($t eq "Img" ){ $r .= "(img)" } else { $r .= "\\\\\n\t==$an: $av\\\\"; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]}){$ra .= " \\textit{{\\scriptsize $at->[1]}} " } else{ $ra.="\\\\--$at->[0]: $at->[1]";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } $r =~ s/([\&_\$\#\%])/\\$1/g; $r =~ s/\\\\$//; $r; } sub gg{ my($e,$h,$te)=@_; join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ##--------------------------------------------------------------------- package HtmlGen; ##--------------------------------------------------------------------- package StardictGen; sub gen{ my($d,$h)=@_; Lingua::StarDict::Gen::writeDict($d,$h->{name}[0] || "dicname"); } __LATEX_BEGIN__ \documentclass[twoside,portuges]{book} \RequirePackage[a4paper,top=3cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} \usepackage{ucs} \usepackage[utf8x]{inputenc} %\usepackage[T1,T2A]{fontenc} \usepackage[T1]{fontenc} \usepackage{dict} \begin{document} \title{###title###} \author{###author###} \date{###date###} \frontmatter \maketitle \mbox{} \vfill Data:###date###\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{dici}) ###introduction### \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_END__ \end{dictionary} \end{document} __END__