===GRAM:state
%{
==>use strict;
==>use utf8;
my %config=(style => 1);
my %sem=();
our %A=(ac=>{},li=>{},img=>{},atli=>{});
our %meta=(inline=>{},ignore=>{},rename=>{});
our @dic=();
our %func=();
%}
===GRAM
%token LB IAC IMG ILI ALI V TAB INLINE IGNORE RENAME
%%
Dic : Dic LB dirs
| Dic LB ent { push (@dic,$_[3]); }
| Dic LB entl { push (@dic,@{$_[3]}); }
| Dic LB error { $_[0]->YYErrok; }
| Dic LB
| ent { push (@dic,$_[1]); }
| entl { push (@dic,@{$_[1]}); }
| dirs
;
dirs: dir
| dirs dir
;
dir : DIR V { push (@{$meta{$_[1]}}, $_[2]) }
| TIT V { push (@{$meta{title}}, $_[2]) }
| AUT V { push (@{$meta{author}}, split(/\s*;\s*/,$_[2])) }
| LANG V { for my $l (split(/\s+/,$_[2])){ $meta{lang}{$l} = 2} }
| INLINE V { for my $l (split(/\s+/,$_[2])){ $meta{inline}{$l} = 2} }
| IGNORE V { for my $l (split(/\s+/,$_[2])){ $meta{ignore}{$l} = 2} }
| RENAME V { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{rename}{$a1} = $a2 }
;
ent : sent { [$_[1]] }
| ent sent { [ @{$_[1]}, $_[2] ] }
;
entl : ent '*' TAB { mapent($_[1],$_[3]) }
;
sent: IAC '=' V { $A{at}{$_[1]}++; +{ISA=> "At", AN=> _ren($_[1]), AV=> $_[3]} }
| IMG '=' V lat { $A{img}{$_[1]}++; +{ISA=> "Img", AN=> _ren($_[1]), AV=> $_[3], Ats=> $_[4]}}
| ILI '=' V lat { $A{li}{$_[1]}++; +{ISA=> "Term", AN=> _ren($_[1]), AV=> $_[3], Ats=> $_[4]}}
;
lat : lat ALI '=' V { $A{atli}{$_[2]}++; [ @{$_[1]}, [_ren($_[2]), $_[4]]] }
| { [ ] }
;
===GRAM:func
%%
sub _ren{ $meta{rename}{$_[0]} || $_[0]}
sub mapent{
my ($p,$l)=@_;
[ map {subrec($p,$_)} @$l ]
}
sub subrec{
my ($a,$l)=@_;
if (ref($a) eq "ARRAY"){ return [ map { subrec($_,$l) } @$a] ; }
elsif(ref($a) eq "HASH") { return +{ map {($_,subrec($a->{$_},$l))} keys %$a};}
elsif($a=~/\$(\d+)\b/) { return $l->[$1-1] }
else { return $a}
}
===MAIN
package main;
use Data::Dumper;
use Lingua::StarDict::Gen;
our ($lang,$html,$tex,$stardict,$debug,$skel,$lexdebug);
$lang ||= "PT";
my $yyst = 0;
my $yylineno = 0;
my $n;
my $filename=shift or die("Error: no file provided\n");
my $File = "";
binmode(STDOUT,":utf8");
if($lexdebug){lexdebug()}
my $t = parseFile();
if($debug){ open(F,">debug-dic"); print F Dumper(\@dici::dic, \%dici::meta); close F;}
gera_output(\@dici::dic, \%dici::meta,$lang);
sub lexdebug {
my $p = new dici();
print "Init file:\n";
yygetmore();
print "Init lex:\n";
my ($a,$b)=lex();
while($a){
print "=$a=$b (",pos $File ,")\n";
($a,$b)=lex(); }
print "end lex:\n";
exit 0;
}
===FIX
sub parseFile {
my $p = new dici();
yygetmore();
$p->YYParse( yylex => \&lex,
yyerror => \&yyerror);
}
sub yyerror {
if ($_[0]->YYCurtok) {
printf STDERR ('Error (%d): a "%s" (%s) was found where %s was expected'."\n",
$yylineno, $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect)
}
else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n";
}
}
sub _langis{ $_[0]->{AN} eq $_[1] && $_[0]->{ISA} eq "Term"}
sub gera_output{
my ($d,$m,$lang)=@_;
my $sd;
for my $e(@$d){
if(ref($e) eq "ARRAY"){
for my $t (@$e){
next unless $t->{AV} =~ /\S/;
push ( @{$sd->{$t->{AV}}},$e) if _langis($t,$lang)}
}
else {warn("$e not the rigth type...\n")}
}
if($debug) { open(F,">debug-sd"); print F Dumper ($sd); close F;}
print Dumper(\%dici::A) if $skel;
TexGen::gen($sd,{asep=>"\n\\\\----", baselang=>$lang,%$m } ) if $tex;
HtmlGen::gen($sd,{asep=>"
", baselang=>$lang,%$m } ) if $html;
StardictGen::gen($sd,{asep=>"\n------", baselang=>$lang,%$m } ) if $stardict;
}
sub yygetmore{ ## for compilers
local $/; undef $/;
open (F,$filename) or die("cant open $filename\n");
binmode(F,":utf8");
$File = . "__EOF__";
close F;
}
#sub yygetmore{ ## for compilers
# local $/; $/="\n";
# $yylineno++;
# $File = <> || "__EOF__";
#}
sub _lang{ $dici::meta{lang}{$_[0]} }
sub _slurp8{ open(F,"<:utf8",$_[0]) or warn("###cant open $_[0]\m");
my $c=join("",);close F; $c;}
sub slurptab{
my %opt =(fs=>"::", comm=>1 , Dom => "Dom:" );
if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
my @r;
my @extra=();
open(F1,"<:utf8",$_[0]) or warn("###cant open $_[0]\n");;
my $aa;
while($aa=){ chomp($aa);
$aa=~ s/#.*// if $opt{comm};
if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1;
@extra = split(/\s*$opt{fs}\s*/,$aux)}
next unless $aa=~ /\S/;
push(@r,[split(/\s*$opt{fs}\s*/,$aa),@extra])}
close F1;
print "#", Dumper(\@r) if $lexdebug;
\@r;
}
sub lex{ ## %x VALORES=1 ==>( = ...\n)
for($File){
m!\G[ \t]+!gc; ## avançar brancos
m!\G#.+\n!gc; ## avança comentários
if( m!\G__EOF__!gc) { return("","iii") }
if($yyst==0){
if( m!\G([\w\-]+)!gc) { if(_lang($1)){return("ILI",$1);}
else {return("IAC",$1);}}
if( m!\G\+(\w[\w\-]*)!gc) { return("ALI",$1); }
if( m!\G\!(\w+)!gc) { return("IMG",$1); }
if( m!\G\%lang(uage)?\s+!gc) { $yyst=1; return("LANG","lang"); }
if( m!\G\%tit(le)?\s+!gc) { $yyst=1; return("TIT","title"); }
if( m!\G\%aut(thor)?\s+!gc) { $yyst=1; return("AUT","author");}
if( m!\G\%inline\s+!gc) { $yyst=1; return("INLINE","inline");}
if( m!\G\%ignore\s+!gc) { $yyst=1; return("IGNORE","ignore");}
if( m!\G\%rename\s+!gc) { $yyst=1; return("RENAME","rename");}
if( m!\G\%(\w+)!gc) { $yyst=1; return("DIR",$1); }
if( m!\G([=*])!gc) { $yyst=1 ; return($1,$1); }
if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); }
}
if($yyst==1){
if( m!\Gtab\((.*?)\)!gc) { $yyst=0; return("TAB",slurptab($1)); }
if( m!\G\{([^\}]*?)\}[ \t]*\n!gc){ $yyst=0 ;return("V",$1); }
if( m!\G<\s*(\S+)[ \t]*\n!gc) { $yyst=0 ; return("V",_slurp8($1)); }
if( m!\G([^#\n]+)(?=#.*\n)!gc){ $yyst=0 ; return("V",$1); }
if( m!\G(.+)\n!gc) { $yyst=0 ; return("V",$1); }
if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); }
}
if( m!\G(.)!gc) {
print STDERR "Simbolos desconhecidos '",
substr($File,pos($File),20),"...'\n" ;
return(lex()); }
}
}
##---------------------------------------------------------------------
package TexGen;
use Inline::Files;
sub gen{
my($d,$h)=@_;
my %op=(title=>"Dicionário",
author=>"",
date=>'\today',
introduction=>"...",%$h);
### {open (F,"<:utf8", $op{headpat}) or die; $head=join("",); close F;}
my $head = join("",);
my $foot = join("", );
$head =~ s/###([\w-]+)###/_op($op{$1})/ge;
print "$head\n";
my $prevle="";
for (sort {lc($a) cmp lc($b)} keys %$d){
my $le = uc(substr($_,0,1));
if($le ne $prevle){print "\\bigletterc{$le}\n"; $prevle=$le;}
print "\\term{$_}{", gg($d->{$_},$h,$_), "\n}\n";
}
print $foot;
}
sub ga{ my($ac,$h,$te)=@_;
my $r = "";
for( @$ac ){
my $t = $_->{ISA};
my $an = $_->{AN};
my $av = $_->{AV};
my $ats = $_->{Ats};
next if $h->{ignore}{$an};
my $istheterm = ($h->{baselang} eq $an && $av eq $te);
if ($t eq "Term"){ $r .= "\\\\\n\t\\textsc{$an}: $av" unless $istheterm; }
elsif($t eq "At") { $r .= "\\\\\n\t\\textit{$an}: $av"; }
elsif($t eq "Img" ){ $r .= "(img)" }
else { $r .= "\\\\\n\t==$an: $av\\\\"; }
if($ats){
my $ra="";
for my $at(@$ats){
next if $h->{ignore}{$at->[0]};
if($h->{inline}{$at->[0]}){$ra .= " \\textit{{\\scriptsize $at->[1]}} " }
else{ $ra.="\\\\--$at->[0]: $at->[1]";} }
if($istheterm){$r = $ra.$r;} else {$r .= $ra}
}
}
$r =~ s/([\&_\$\#\%])/\\$1/g;
$r =~ s/\\\\$//;
$r;
}
sub gg{ my($e,$h,$te)=@_;
join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e)
}
sub _op{
my ($v,$h)=@_;
if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) }
return $v;
}
##---------------------------------------------------------------------
package HtmlGen;
##---------------------------------------------------------------------
package StardictGen;
sub gen{
my($d,$h)=@_;
Lingua::StarDict::Gen::writeDict($d,$h->{name}[0] || "dicname");
}
__LATEX_BEGIN__
\documentclass[twoside,portuges]{book}
\RequirePackage[a4paper,top=3cm,left=2cm,right=2cm,bottom=1.5cm,nofoot]{geometry}
\parindent 0pt
\usepackage[russian,greek,portuguese]{babel}
\usepackage{ucs}
\usepackage[utf8x]{inputenc}
%\usepackage[T1,T2A]{fontenc}
\usepackage[T1]{fontenc}
\usepackage{dict}
\begin{document}
\title{###title###}
\author{###author###}
\date{###date###}
\frontmatter
\maketitle
\mbox{}
\vfill
Data:###date###\\
Tiragem: 1 exemplar\\
(gerado automaticamente por \textbf{dici})
###introduction###
\newpage
\mainmatter
\twocolumn
\begin{dictionary}
__LATEX_END__
\end{dictionary}
\end{document}
__END__