#!/usr/bin/perl -s #################################################################### # # This file was generated using Parse::Yapp version 1.21. # # Don't edit this file, use source file instead. # # ANY CHANGE MADE HERE WILL BE LOST ! # #################################################################### package naterm; use vars qw ( @ISA ); use strict; @ISA= qw ( Parse::Yapp::Driver ); #Included Parse/Yapp/Driver.pm file---------------------------------------- { # # Module Parse::Yapp::Driver # # This module is part of the Parse::Yapp package available on your # nearest CPAN # # Any use of this module in a standalone parser make the included # text under the same copyright as the Parse::Yapp module itself. # # This notice should remain unchanged. # # Copyright © 1998, 1999, 2000, 2001, Francois Desarmenien. # Copyright © 2017 William N. Braswell, Jr. # (see the pod text in Parse::Yapp module for use and distribution rights) # package Parse::Yapp::Driver; require 5.004; use strict; use vars qw ( $VERSION $COMPATIBLE $FILENAME ); # CORRELATION #py001: $VERSION must be changed in both Parse::Yapp & Parse::Yapp::Driver $VERSION = '1.21'; $COMPATIBLE = '0.07'; $FILENAME=__FILE__; use Carp; #Known parameters, all starting with YY (leading YY will be discarded) my(%params)=(YYLEX => 'CODE', 'YYERROR' => 'CODE', YYVERSION => '', YYRULES => 'ARRAY', YYSTATES => 'ARRAY', YYDEBUG => ''); #Mandatory parameters my(@params)=('LEX','RULES','STATES'); sub new { my($class)=shift; my($errst,$nberr,$token,$value,$check,$dotpos); my($self)={ ERROR => \&_Error, ERRST => \$errst, NBERR => \$nberr, TOKEN => \$token, VALUE => \$value, DOTPOS => \$dotpos, STACK => [], DEBUG => 0, CHECK => \$check }; _CheckParams( [], \%params, \@_, $self ); exists($$self{VERSION}) and $$self{VERSION} < $COMPATIBLE and croak "Yapp driver version $VERSION ". "incompatible with version $$self{VERSION}:\n". "Please recompile parser module."; ref($class) and $class=ref($class); bless($self,$class); } sub YYParse { my($self)=shift; my($retval); _CheckParams( \@params, \%params, \@_, $self ); if($$self{DEBUG}) { _DBLoad(); $retval = eval '$self->_DBParse()';#Do not create stab entry on compile $@ and die $@; } else { $retval = $self->_Parse(); } $retval } sub YYData { my($self)=shift; exists($$self{USER}) or $$self{USER}={}; $$self{USER}; } sub YYErrok { my($self)=shift; ${$$self{ERRST}}=0; undef; } sub YYNberr { my($self)=shift; ${$$self{NBERR}}; } sub YYRecovering { my($self)=shift; ${$$self{ERRST}} != 0; } sub YYAbort { my($self)=shift; ${$$self{CHECK}}='ABORT'; undef; } sub YYAccept { my($self)=shift; ${$$self{CHECK}}='ACCEPT'; undef; } sub YYError { my($self)=shift; ${$$self{CHECK}}='ERROR'; undef; } sub YYSemval { my($self)=shift; my($index)= $_[0] - ${$$self{DOTPOS}} - 1; $index < 0 and -$index <= @{$$self{STACK}} and return $$self{STACK}[$index][1]; undef; #Invalid index } sub YYCurtok { my($self)=shift; @_ and ${$$self{TOKEN}}=$_[0]; ${$$self{TOKEN}}; } sub YYCurval { my($self)=shift; @_ and ${$$self{VALUE}}=$_[0]; ${$$self{VALUE}}; } sub YYExpect { my($self)=shift; sort keys %{$self->{STATES}[$self->{STACK}[-1][0]]{ACTIONS}} } sub YYLexer { my($self)=shift; $$self{LEX}; } ################# # Private stuff # ################# sub _CheckParams { my($mandatory,$checklist,$inarray,$outhash)=@_; my($prm,$value); my($prmlst)={}; while(($prm,$value)=splice(@$inarray,0,2)) { $prm=uc($prm); exists($$checklist{$prm}) or croak("Unknow parameter '$prm'"); ref($value) eq $$checklist{$prm} or croak("Invalid value for parameter '$prm'"); $prm=unpack('@2A*',$prm); $$outhash{$prm}=$value; } for (@$mandatory) { exists($$outhash{$_}) or croak("Missing mandatory parameter '".lc($_)."'"); } } sub _Error { print "Parse error.\n"; } sub _DBLoad { { no strict 'refs'; exists(${__PACKAGE__.'::'}{_DBParse})#Already loaded ? and return; } my($fname)=__FILE__; my(@drv); open(DRV,"<$fname") or die "Report this as a BUG: Cannot open $fname"; while() { /^\s*sub\s+_Parse\s*{\s*$/ .. /^\s*}\s*#\s*_Parse\s*$/ and do { s/^#DBG>//; push(@drv,$_); } } close(DRV); $drv[0]=~s/_P/_DBP/; eval join('',@drv); } #Note that for loading debugging version of the driver, #this file will be parsed from 'sub _Parse' up to '}#_Parse' inclusive. #So, DO NOT remove comment at end of sub !!! sub _Parse { my($self)=shift; my($rules,$states,$lex,$error) = @$self{ 'RULES', 'STATES', 'LEX', 'ERROR' }; my($errstatus,$nberror,$token,$value,$stack,$check,$dotpos) = @$self{ 'ERRST', 'NBERR', 'TOKEN', 'VALUE', 'STACK', 'CHECK', 'DOTPOS' }; #DBG> my($debug)=$$self{DEBUG}; #DBG> my($dbgerror)=0; #DBG> my($ShowCurToken) = sub { #DBG> my($tok)='>'; #DBG> for (split('',$$token)) { #DBG> $tok.= (ord($_) < 32 or ord($_) > 126) #DBG> ? sprintf('<%02X>',ord($_)) #DBG> : $_; #DBG> } #DBG> $tok.='<'; #DBG> }; $$errstatus=0; $$nberror=0; ($$token,$$value)=(undef,undef); @$stack=( [ 0, undef ] ); $$check=''; while(1) { my($actions,$act,$stateno); $stateno=$$stack[-1][0]; $actions=$$states[$stateno]; #DBG> print STDERR ('-' x 40),"\n"; #DBG> $debug & 0x2 #DBG> and print STDERR "In state $stateno:\n"; #DBG> $debug & 0x08 #DBG> and print STDERR "Stack:[". #DBG> join(',',map { $$_[0] } @$stack). #DBG> "]\n"; if (exists($$actions{ACTIONS})) { defined($$token) or do { ($$token,$$value)=&$lex($self); #DBG> $debug & 0x01 #DBG> and print STDERR "Need token. Got ".&$ShowCurToken."\n"; }; $act= exists($$actions{ACTIONS}{$$token}) ? $$actions{ACTIONS}{$$token} : exists($$actions{DEFAULT}) ? $$actions{DEFAULT} : undef; } else { $act=$$actions{DEFAULT}; #DBG> $debug & 0x01 #DBG> and print STDERR "Don't need token.\n"; } defined($act) and do { $act > 0 and do { #shift #DBG> $debug & 0x04 #DBG> and print STDERR "Shift and go to state $act.\n"; $$errstatus and do { --$$errstatus; #DBG> $debug & 0x10 #DBG> and $dbgerror #DBG> and $$errstatus == 0 #DBG> and do { #DBG> print STDERR "**End of Error recovery.\n"; #DBG> $dbgerror=0; #DBG> }; }; push(@$stack,[ $act, $$value ]); $$token ne '' #Don't eat the eof and $$token=$$value=undef; next; }; #reduce my($lhs,$len,$code,@sempar,$semval); ($lhs,$len,$code)=@{$$rules[-$act]}; #DBG> $debug & 0x04 #DBG> and $act #DBG> and print STDERR "Reduce using rule ".-$act." ($lhs,$len): "; $act or $self->YYAccept(); $$dotpos=$len; unpack('A1',$lhs) eq '@' #In line rule and do { $lhs =~ /^\@[0-9]+\-([0-9]+)$/ or die "In line rule name '$lhs' ill formed: ". "report it as a BUG.\n"; $$dotpos = $1; }; @sempar = $$dotpos ? map { $$_[1] } @$stack[ -$$dotpos .. -1 ] : (); $semval = $code ? &$code( $self, @sempar ) : @sempar ? $sempar[0] : undef; splice(@$stack,-$len,$len); $$check eq 'ACCEPT' and do { #DBG> $debug & 0x04 #DBG> and print STDERR "Accept.\n"; return($semval); }; $$check eq 'ABORT' and do { #DBG> $debug & 0x04 #DBG> and print STDERR "Abort.\n"; return(undef); }; #DBG> $debug & 0x04 #DBG> and print STDERR "Back to state $$stack[-1][0], then "; $$check eq 'ERROR' or do { #DBG> $debug & 0x04 #DBG> and print STDERR #DBG> "go to state $$states[$$stack[-1][0]]{GOTOS}{$lhs}.\n"; #DBG> $debug & 0x10 #DBG> and $dbgerror #DBG> and $$errstatus == 0 #DBG> and do { #DBG> print STDERR "**End of Error recovery.\n"; #DBG> $dbgerror=0; #DBG> }; push(@$stack, [ $$states[$$stack[-1][0]]{GOTOS}{$lhs}, $semval ]); $$check=''; next; }; #DBG> $debug & 0x04 #DBG> and print STDERR "Forced Error recovery.\n"; $$check=''; }; #Error $$errstatus or do { $$errstatus = 1; &$error($self); $$errstatus # if 0, then YYErrok has been called or next; # so continue parsing #DBG> $debug & 0x10 #DBG> and do { #DBG> print STDERR "**Entering Error recovery.\n"; #DBG> ++$dbgerror; #DBG> }; ++$$nberror; }; $$errstatus == 3 #The next token is not valid: discard it and do { $$token eq '' # End of input: no hope and do { #DBG> $debug & 0x10 #DBG> and print STDERR "**At eof: aborting.\n"; return(undef); }; #DBG> $debug & 0x10 #DBG> and print STDERR "**Dicard invalid token ".&$ShowCurToken.".\n"; $$token=$$value=undef; }; $$errstatus=3; while( @$stack and ( not exists($$states[$$stack[-1][0]]{ACTIONS}) or not exists($$states[$$stack[-1][0]]{ACTIONS}{error}) or $$states[$$stack[-1][0]]{ACTIONS}{error} <= 0)) { #DBG> $debug & 0x10 #DBG> and print STDERR "**Pop state $$stack[-1][0].\n"; pop(@$stack); } @$stack or do { #DBG> $debug & 0x10 #DBG> and print STDERR "**No state left on stack: aborting.\n"; return(undef); }; #shift the error token #DBG> $debug & 0x10 #DBG> and print STDERR "**Shift \$error token and go to state ". #DBG> $$states[$$stack[-1][0]]{ACTIONS}{error}. #DBG> ".\n"; push(@$stack, [ $$states[$$stack[-1][0]]{ACTIONS}{error}, undef ]); } #never reached croak("Error in driver logic. Please, report it as a BUG"); }#_Parse #DO NOT remove comment 1; } #End of include-------------------------------------------------- use strict; use utf8::all; my %config=(style => 1); my %sem=(); ###GRAM:state our %A=(ac=>{},li=>{},img=>{},atli=>{}); # estatisticas dos atributos our %meta=(inline=>{}, ignore=>{}, macro=>{}, # macro-structure(dom,subdom,subsubdom) rename=>{}, crel=>{qw(bt 2 nt 2 dom 2 voc 2 subdom 2 supdom 2)}, #### conceptual relations inverse=> { "subdom" => "supdom", "supdom"=> "subdom" }, #### {bt => "nt", nt => "bt"}, basedir => "./", lang=>{qw(PT 2 EN 2 FR 2 IT 2 ES 2 RU 2 DE 2)}, img=> [] ); our @dic=(); our %voc_dom=(); our %func=(); our ($dom1, $dom2, $subdom1, $subdom2, $subsubdom1); ###GRAM sub new { my($class)=shift; ref($class) and $class=ref($class); my($self)=$class->SUPER::new( yyversion => '1.21', yystates => [ {#State 0 ACTIONS => { "(" => 1, 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IAC' => 9, 'IGNORE' => 10, 'ILI' => 11, 'IMG' => 12, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'LB' => 17, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TAB' => 27, 'TIT' => 28 }, DEFAULT => -3, GOTOS => { 'Axioma' => 3, 'Dic' => 8, 'dir' => 29, 'dirs' => 30, 'ent' => 31, 'entl' => 32, 'sent' => 33 } }, {#State 1 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, GOTOS => { 'ent' => 35, 'entseq' => 36, 'sent' => 33 } }, {#State 2 ACTIONS => { 'V' => 37 } }, {#State 3 ACTIONS => { '' => 38 } }, {#State 4 ACTIONS => { 'V' => 39 } }, {#State 5 ACTIONS => { 'V' => 40 } }, {#State 6 DEFAULT => -28 }, {#State 7 DEFAULT => -27 }, {#State 8 ACTIONS => { '' => -2, "(" => 1, 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IAC' => 9, 'IGNORE' => 10, 'ILI' => 11, 'IMG' => 12, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'LB' => 41, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TAB' => 27, 'TIT' => 28, 'error' => 45 }, GOTOS => { 'dir' => 29, 'dirs' => 42, 'ent' => 43, 'entl' => 44, 'sent' => 33 } }, {#State 9 ACTIONS => { "=" => 46 } }, {#State 10 ACTIONS => { 'V' => 47 } }, {#State 11 ACTIONS => { "=" => 48 } }, {#State 12 ACTIONS => { "=" => 49, 'V' => 50 } }, {#State 13 ACTIONS => { 'V' => 51 } }, {#State 14 ACTIONS => { 'V' => 52 } }, {#State 15 ACTIONS => { 'V' => 53 } }, {#State 16 ACTIONS => { 'V' => 54 } }, {#State 17 ACTIONS => { "(" => 1, 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IAC' => 9, 'IGNORE' => 10, 'ILI' => 11, 'IMG' => 12, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TAB' => 27, 'TIT' => 28 }, DEFAULT => -3, GOTOS => { 'Dic' => 55, 'dir' => 29, 'dirs' => 30, 'ent' => 31, 'entl' => 32, 'sent' => 33 } }, {#State 18 DEFAULT => -20 }, {#State 19 DEFAULT => -29 }, {#State 20 DEFAULT => -32 }, {#State 21 DEFAULT => -34 }, {#State 22 ACTIONS => { 'V' => 56 } }, {#State 23 ACTIONS => { 'V' => 57 } }, {#State 24 DEFAULT => -31 }, {#State 25 DEFAULT => -30 }, {#State 26 DEFAULT => -33 }, {#State 27 ACTIONS => { "*" => 58 } }, {#State 28 ACTIONS => { 'V' => 59 } }, {#State 29 DEFAULT => -12 }, {#State 30 ACTIONS => { 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IGNORE' => 10, 'IMG' => 60, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TIT' => 28 }, DEFAULT => -11, GOTOS => { 'dir' => 61 } }, {#State 31 ACTIONS => { "*" => 62, 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, DEFAULT => -9, GOTOS => { 'sent' => 63 } }, {#State 32 DEFAULT => -10 }, {#State 33 DEFAULT => -35 }, {#State 34 ACTIONS => { "=" => 49 } }, {#State 35 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, DEFAULT => -42, GOTOS => { 'sent' => 63 } }, {#State 36 ACTIONS => { ")" => 64, 'LB' => 65 } }, {#State 37 DEFAULT => -16 }, {#State 38 DEFAULT => 0 }, {#State 39 DEFAULT => -24 }, {#State 40 DEFAULT => -14 }, {#State 41 DEFAULT => -7 }, {#State 42 ACTIONS => { 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IGNORE' => 10, 'IMG' => 60, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'LB' => 66, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TIT' => 28 }, GOTOS => { 'dir' => 61 } }, {#State 43 ACTIONS => { "*" => 62, 'IAC' => 9, 'ILI' => 11, 'IMG' => 34, 'LB' => 67 }, GOTOS => { 'sent' => 63 } }, {#State 44 ACTIONS => { 'LB' => 68 } }, {#State 45 ACTIONS => { 'LB' => 69 } }, {#State 46 ACTIONS => { 'V' => 70 }, DEFAULT => -46 }, {#State 47 DEFAULT => -22 }, {#State 48 ACTIONS => { 'V' => 71 }, DEFAULT => -48 }, {#State 49 ACTIONS => { 'V' => 72 }, DEFAULT => -47 }, {#State 50 DEFAULT => -17 }, {#State 51 DEFAULT => -21 }, {#State 52 DEFAULT => -25 }, {#State 53 DEFAULT => -23 }, {#State 54 DEFAULT => -18 }, {#State 55 ACTIONS => { '' => -1, "(" => 1, 'AUT' => 2, 'CREL' => 4, 'DIR' => 5, 'DOM' => 6, 'DOMPAIR' => 7, 'IAC' => 9, 'IGNORE' => 10, 'ILI' => 11, 'IMG' => 12, 'INLINE' => 13, 'INV' => 14, 'JOIN' => 15, 'LANG' => 16, 'LB' => 41, 'NEWDIC' => 18, 'NODOM' => 19, 'NOSUBDOM' => 20, 'NOSUBSUBDOM' => 21, 'RELLANG' => 22, 'RENAME' => 23, 'SUBDOM' => 24, 'SUBDOMPAIR' => 25, 'SUBSUBDOM' => 26, 'TAB' => 27, 'TIT' => 28, 'error' => 45 }, GOTOS => { 'dir' => 29, 'dirs' => 42, 'ent' => 43, 'entl' => 44, 'sent' => 33 } }, {#State 56 DEFAULT => -19 }, {#State 57 DEFAULT => -26 }, {#State 58 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, GOTOS => { 'ent' => 73, 'sent' => 33 } }, {#State 59 DEFAULT => -15 }, {#State 60 ACTIONS => { 'V' => 50 } }, {#State 61 DEFAULT => -13 }, {#State 62 ACTIONS => { 'TAB' => 74 } }, {#State 63 DEFAULT => -36 }, {#State 64 ACTIONS => { "*" => 75 } }, {#State 65 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, DEFAULT => -41, GOTOS => { 'ent' => 76, 'sent' => 33 } }, {#State 66 DEFAULT => -4 }, {#State 67 DEFAULT => -5 }, {#State 68 DEFAULT => -6 }, {#State 69 DEFAULT => -8 }, {#State 70 DEFAULT => -50, GOTOS => { 'lat' => 77 } }, {#State 71 DEFAULT => -50, GOTOS => { 'lat' => 78 } }, {#State 72 DEFAULT => -50, GOTOS => { 'lat' => 79 } }, {#State 73 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, DEFAULT => -39, GOTOS => { 'sent' => 63 } }, {#State 74 DEFAULT => -37 }, {#State 75 ACTIONS => { 'TAB' => 80 } }, {#State 76 ACTIONS => { 'IAC' => 9, 'ILI' => 11, 'IMG' => 34 }, DEFAULT => -40, GOTOS => { 'sent' => 63 } }, {#State 77 ACTIONS => { 'ALI' => 81 }, DEFAULT => -43 }, {#State 78 ACTIONS => { 'ALI' => 81 }, DEFAULT => -45 }, {#State 79 ACTIONS => { 'ALI' => 81 }, DEFAULT => -44 }, {#State 80 DEFAULT => -38 }, {#State 81 ACTIONS => { "=" => 82 } }, {#State 82 ACTIONS => { 'V' => 83 } }, {#State 83 DEFAULT => -49 } ], yyrules => [ [#Rule 0 '$start', 2, undef ], [#Rule 1 'Axioma', 2, undef ], [#Rule 2 'Axioma', 1, undef ], [#Rule 3 'Dic', 0, undef ], [#Rule 4 'Dic', 3, sub { } ], [#Rule 5 'Dic', 3, sub { push (@dic, $_[2] ); } ], [#Rule 6 'Dic', 3, sub { push (@dic,@{$_[2]}); } ], [#Rule 7 'Dic', 2, undef ], [#Rule 8 'Dic', 3, sub { $_[0]->YYErrok; } ], [#Rule 9 'Dic', 1, sub { push (@dic, $_[1] ); } ], [#Rule 10 'Dic', 1, sub { push (@dic,@{$_[1]}); } ], [#Rule 11 'Dic', 1, undef ], [#Rule 12 'dirs', 1, undef ], [#Rule 13 'dirs', 2, undef ], [#Rule 14 'dir', 2, sub { push (@{$meta{$_[1]}}, $_[2]) } ], [#Rule 15 'dir', 2, sub { push (@{$meta{title}}, $_[2]) } ], [#Rule 16 'dir', 2, sub { push (@{$meta{author}}, split(/\s*;\s*/,$_[2])) } ], [#Rule 17 'dir', 2, sub { push (@{$meta{img}}, split(/\s+/,$_[2])) } ], [#Rule 18 'dir', 2, sub { my($fl,@ls)= split(/\s+/,$_[2]); for my $l (($fl, @ls)){ $meta{lang}{$l} = 2} $meta{rellang}=$fl; } ], [#Rule 19 'dir', 2, sub { my @ls= split(/\s+/,$_[2]); $meta{rellang}=$ls[0]; $meta{rellang2}=$ls[1] if $ls[1]} ], [#Rule 20 'dir', 1, sub { $subsubdom1=$subdom1=$subdom2=$dom1=$dom2="" } ], [#Rule 21 'dir', 2, sub { for my $l (split(/\s+/,$_[2])){ $meta{inline}{$l} = $_[1]} } ], [#Rule 22 'dir', 2, sub { for my $l (split(/\s+/,$_[2])){ $meta{ignore}{$l} = 2} } ], [#Rule 23 'dir', 2, sub { for my $l (split(/\s+/,$_[2])){ $meta{joinv}{$l} = $_[1]} } ], [#Rule 24 'dir', 2, sub { for my $l (split(/\s+/,$_[2])){ $meta{crel}{$l} = 2 } } ], [#Rule 25 'dir', 2, sub { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{inverse}{$a1} = $a2; $meta{crel}{$a1}=2; $meta{crel}{$a2}=2; } ], [#Rule 26 'dir', 2, sub { my ($a1,$a2)=(split(/\s+/,$_[2])); $meta{rename}{$a1} = $a2 } ], [#Rule 27 'dir', 1, sub { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = ($_[1][0], $_[1][1], undef, undef,undef); macro1($_[1][0])} ], [#Rule 28 'dir', 1, sub { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = ($_[1] , undef , undef, undef,undef); macro1($_[1])} ], [#Rule 29 'dir', 1, sub { ($dom1,$dom2,$subdom1,$subdom2,$subsubdom1) = (undef , undef , undef, undef,undef); } ], [#Rule 30 'dir', 1, sub { ($subdom1,$subdom2,$subsubdom1) = ($_[1][0] , $_[1][1],undef) ; macro2($dom1,$_[1][0]); } ], [#Rule 31 'dir', 1, sub { ($subdom1,$subdom2,$subsubdom1) = ($_[1] , undef ,undef ) ; macro2($dom1,$_[1]); } ], [#Rule 32 'dir', 1, sub { ($subdom1,$subdom2,$subsubdom1) = (undef , undef ,undef ) ; } ], [#Rule 33 'dir', 1, sub { $subsubdom1 = $_[1] ; macro3($dom1,$subdom1,$_[1]); } ], [#Rule 34 'dir', 1, sub { $subsubdom1 = undef ; } ], [#Rule 35 'ent', 1, sub { #WAS: if($subdom1){[$_[1],_at(_ren('dom'),$dom1,[]),_at(_ren('subdom'),$subdom1,[])]} if($subsubdom1){[$_[1],_at(_ren('dom'),$subsubdom1,[])]} elsif($subdom1){[$_[1],_at(_ren('dom'),$subdom1,[])]} elsif($dom1 ){[$_[1],_at(_ren('dom'),$dom1,[])]} else {[$_[1]]} } ], [#Rule 36 'ent', 2, sub { [ @{$_[1]}, $_[2] ] } ], [#Rule 37 'entl', 3, sub { mapent($_[1],$_[3]) } ], [#Rule 38 'entl', 5, sub { mapents($_[2],$_[5]) } ], [#Rule 39 'entl', 3, sub { mapent($_[3],$_[1]) } ], [#Rule 40 'entseq', 3, sub { [@{$_[1]}, $_[3] ] } ], [#Rule 41 'entseq', 2, sub { $_[1] } ], [#Rule 42 'entseq', 1, sub { [$_[1]] } ], [#Rule 43 'sent', 4, sub { $A{at}{$_[1]}++; _at ( _ren($_[1]),$_[3], $_[4])} ], [#Rule 44 'sent', 4, sub { $A{img}{$_[1]}++; _img ( _ren($_[1]),$_[3], $_[4])} ], [#Rule 45 'sent', 4, sub { $A{li}{$_[1]}++; _term( _ren($_[1]),$_[3], $_[4])} ], [#Rule 46 'sent', 2, sub { +{} } ], [#Rule 47 'sent', 2, sub { +{} } ], [#Rule 48 'sent', 2, sub { +{} } ], [#Rule 49 'lat', 4, sub { $A{atli}{$_[2]}++; [ @{$_[1]}, [_ren($_[2]), $_[4]]] } ], [#Rule 50 'lat', 0, sub { [ ] } ] ], @_); bless($self,$class); } sub _at {+{ISA=>"At", AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _img {+{ISA=>"Img", AN=>$_[0],AV=>$_[1], Ats=>$_[2]}} sub _term{ my($lang, $term, $ats) = @_; if($subdom1){ $voc_dom{"$dom1#$subdom1"}{"$lang#$term"} ++ } elsif($dom1){ $voc_dom{$dom1}{"$lang#$term"} ++ }; +{ISA=>"Term", AN=>$lang, AV=>$term, Ats=>$ats} } sub _ren { $meta{rename}{$_[0]} || $_[0]} sub mapents{ my ($ents,$tab)=@_; [ map {my $e=$_ ; @{mapent($e,$tab)} } @$ents ] } sub mapent{ my ($ent,$tab)=@_; [ map {subrec($ent,$_)} @$tab ] } sub macro1{ my ($d)=@_; $meta{macro}{$d}={} unless $meta{macro}{$d}; } sub macro2{ my ($d,$sd)=@_; $meta{macro}{$d}{$sd}={} unless $meta{macro}{$d}{$sd}; } sub macro3{ my ($d,$sd,$ssd)=@_; $meta{macro}{$d}{$sd}{$ssd}={} unless $meta{macro}{$d}{$sd}{$ssd}; } sub subrecv{ my ($v,$l)=@_; if($v=~/\$(\d+)\b/) { my $rep=$l->[$1-1]; if($rep =~ /\S/){ return [ map {$v =~ s/\$(\d+)\b/$_/r } split(/\s*\|\s*/,$rep)] } else { return [] } } elsif($v=~/\@(\d+)\b/) { my $rep=$l->[$1-1]; return ( map { [ $v =~ s/\@(\d+)\b/$_/r ] } split(/\s*\|\s*/,$rep)) } else { return [$v] } } sub subrec{ ## template, tup my ($ent,$l)=@_; my @out=(); for my $item (@$ent){ if(ref($item) eq "HASH") { my @outats=(); for my $pair (@{$item->{Ats}}){ my $hs = subrecv($pair->[1],$l); push(@outats, ( map { [ $pair->[0], $_] } @$hs )); } my @hs = subrecv($item->{AV},$l); ### FIXME -- wrong semantics of @1 ? for my $vs (@hs){ push(@out, ( map { +{ %$item, AV => $_ , Ats => [@outats]} } @$vs )); } } else { die("BUG on um-naterm: invalid subrec $item\n".join("\n",@$item)) } } return [@out] } ### MAIN package main; use Data::Dumper; use utf8::all; use Lingua::StarDict::Gen; our ($lua,$name,$lang, $html, $tex, $stardict, $xdxf, $debug,$skel,$lexdebug,$p2,$nop2); $p2 = 1 unless $nop2; $lang ||= "PT"; $tex=1 unless ($html or $stardict or $xdxf); ## default output my $yyst = 0; my $yylineno = 0; my $n; my $File = ""; binmode(STDOUT,":utf8"); if($lexdebug){lexdebug()} my $t = parseFile(); if($debug){ open(F,">debug-dic"); print F Dumper(\@naterm::dic, \%naterm::meta); close F;} $naterm::meta{rellang} ||= "PT"; ## perhaps EN? FIXME: -rl=EN $naterm::meta{name}[0] = $name if $name; gera_output(\@naterm::dic, \%naterm::meta, $lang); ### COMMON sub lexdebug { my $p = new naterm(); print "Init file:\n"; yygetmore(); print "Init lex:\n"; my ($a,$b)=lex(); while($a){ print "={$a}={$b} (",pos($File) ,")\n"; ($a,$b)=lex(); } print "end lex:\n"; exit 0; } sub parseFile { my $p = new naterm(); yygetmore(); $p->YYParse( yylex => \&lex, yyerror => \&yyerror); } sub yyerror { if ($_[0]->YYCurtok) { my $x= pos($File)<20 ? 0 : pos($File)-20; my $y= pos($File)<20 ? pos($File) : 20; my $m= substr($File,$x,$y)."(#)".substr($File,pos($File),20); $m =~ s/\n/\\n/g; my $k = substr($File,0,pos($File)); my $yylineno = ($k =~ s/\n/\n/g); printf STDERR ('Error %d(%d): a "%s" (%s) was found where %s was expected'."\n", $yylineno,pos($File), $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect); print STDERR ".....$m \n" ; } else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n"; } } sub _basedir{ my @a = @_; for (@a){ s/^/$naterm::meta{basedir}/ unless m!^[/.]!; } return @a } sub _slurp8{ my $f = shift; my ($f2) = _basedir( $f ); # warn("Debug: $f ... $f2\n"); open(F,"<:utf8",$f2) or warn("###!!##cant open $f ... $f2\n"); my $c=join("",); close F; $c; } ###COMMON:input-slurp sub yygetmore{ ## for compilers # local $/; undef $/; binmode(ARGV,":utf8"); my $prev=undef; binmode(STDIN,":utf8"); if(@ARGV == 1 and $ARGV[0] =~ /((.*\/)?(.*))\.(?:nat|term|naterm|dici?)$/) {#print "(1)$1 (2)$2 (3)$3\n"; if($tex ){open(STDOUT,">:utf8","$1.tex" ) or die("Cant reopen STDOUT\n"); } if($xdxf){open(STDOUT,">:utf8","$1.xdxf") or die("Cant reopen STDOUT\n"); } if($html){open(STDOUT,">:utf8","$1.html") or die("Cant reopen STDOUT\n"); } open(STDERR,">:utf8","$1.err") or die("Cant reopen STDERR\n"); $naterm::meta{basedir} = $2 || "./"; } while(<>){ if ($ARGV ne $prev){ $File.="\n\n__NEWDIC__\n$_"; $prev=$ARGV; } else{ $File.=$_; } } $File.= "\n\n__EOF__"; $File =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## remove sp before \n $File =~ s/(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; ## remove BOM ! close F; } ###SEM:functions sub _langis{ $_[0]->{AN} eq $_[1] && $_[0]->{ISA} eq "Term"} ## _langis(subj,l)=subj is in lang l sub _inv{ $_[1]->{inverse}{$_[0]->{AN}} } ## _inv(meta, trpl)= meta.inverse(trpl.AN) or undef ### # D1 = [entry] # entry=[trpl] # trpl=( ISA: {Term,Img,At}, # AN: str, // att. name - Rel ou Lingua # AV: , // att. val - Obj term ou text # Ats: [ [ name, str] ] // subatributes of AV # ) # # D2 = term → [entry] // term in output language src # # rl -- rellang // language used in conceptual rels # src -- srclang // language used as src in output dict sub gera_output{ my ($d, $m, $lang)=@_; ## dict, meta, language my $sd; ## The DIC! sd: D2 my %dic_rl_src=(); ## ( rl → src ) my %dic_src_rl=(); ## ( src → rel ) my $main_trpl={}; ## ( src → rel → rl ) my $inverse_trpl=[]; ## [( rl , rel, src)] # add term "TOP" $sd->{TOP}=[[naterm::_term($m->{rellang},"TOP",[]),naterm::_at("dom","TOP",[])]] if %{$m->{macro}} ; for my $e (@$d){ ## for all entry //d: D1 warn("'$e' is not the rigth type...\n") unless ref($e) eq "ARRAY"; my $termsrc = undef; my $termrl = undef; for my $t (@$e){ ## for all trpl next unless $t->{AV} =~ /\S/; if (_langis($t,$lang)){ ## if is a term in srclang $termsrc ||= $t->{AV}; push ( @{$sd->{$t->{AV}}},$e) ; } if (_langis($t,$m->{rellang})) { $termrl ||= $t->{AV}; } } next unless $termsrc; ## skip if no term in srclang if( $lang ne $m->{rellang} ){ $dic_rl_src{$termrl}=$termsrc; $dic_src_rl{$termsrc}=$termrl; } for my $t (@$e){ next unless $t->{AV} =~ /\S/; next if $t->{ISA} eq "Term"; next if $t->{ISA} eq "Img"; $main_trpl->{$termsrc}{$t->{AN}}{$t->{AV}}=1; my $in = _inv($t,$m); if ($in) { ## if rel with a defined inverse push (@{$inverse_trpl},[$t->{AV},$in,$termsrc]); } } } # add macrostructure triples for my $daux( keys(%{$m->{macro}}) ){ if( defined $sd->{$daux}){ push ( @{$sd->{$daux}[0]},naterm::_term($m->{rellang},$daux,[]),naterm::_at("supdom","TOP",[])) ; } else{ $sd->{$daux}=[[naterm::_term($m->{rellang},$daux,[]),naterm::_at("supdom","TOP",[])]] ; } push ( @{$sd->{TOP}[0]},naterm::_at("subdom",$daux,[])) ; for my $sdaux( keys(%{$m->{macro}{$daux}}) ){ if( defined $sd->{$sdaux}){ push ( @{$sd->{$sdaux}[0]},naterm::_term($m->{rellang},$sdaux,[]),naterm::_at("supdom",$daux,[])) ; } else{ $sd->{$sdaux}=[[naterm::_term($m->{rellang},$sdaux,[]),naterm::_at("supdom",$daux,[])]] ; } push ( @{$sd->{$daux}[0]},naterm::_at("subdom",$sdaux,[])) unless $daux eq $sdaux ; for my $ssdaux( keys(%{$m->{macro}{$daux}{$sdaux}} ) ){ ### ???? if( defined $sd->{$ssdaux}){ push ( @{$sd->{$ssdaux}[0]},naterm::_term($m->{rellang},$ssdaux,[]),naterm::_at("supdom",$sdaux,[])) ; } else{ $sd->{$ssdaux}=[[naterm::_term($m->{rellang},$ssdaux,[]),naterm::_at("supdom",$sdaux,[])]] ; } push ( @{$sd->{$sdaux}[0]},naterm::_at("subdom",$ssdaux,[])) unless $sdaux eq $ssdaux ; } } } ## Done: add triples for TOP and macro (dom, subdom) for my $trpl(@$inverse_trpl){ ## termrl rel termsrc my ($termrl, $rel, $termsrc) = @$trpl; my($subj, $obj); if( $lang ne $m->{rellang} ){ $subj = $dic_rl_src{$termrl} || "$termrl*"; $obj = $dic_src_rl{$termsrc} || $termsrc; } else { $subj = $termrl; $obj = $termsrc; } if($sd->{$subj}){ push( @{$sd->{$subj}[0]},naterm::_at($rel,$obj,[])); } else { $sd->{$subj}=[[naterm::_term($lang,$subj,[]), naterm::_at($rel,$obj,[])]]; } } if($debug) { open(F,">debug-sd"); print F Dumper ($sd); print F Dumper($m->{macro}); close F; } print Dumper(\%naterm::A) if $skel; TexGen::gen( $sd,{asep=>"\n\\\\⋄", vsep=>" | ", baselang=>$lang,%$m }) if $tex; XdxfGen::gen($sd,{asep=>"\n⋄", baselang=>$lang,%$m }) if $xdxf; HtmlGen::gen($sd,{asep=>"
─────────────
", baselang=>$lang,%$m }) if $html; StardictGen::gen($sd,{asep=>"\n------", baselang=>$lang,%$m }) if $stardict; } sub _lang{ $naterm::meta{lang}{$_[0]} } sub slurpinlinetab{ # my %opt =(rs=>qr(\s*\n\s*), fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $text=shift; my @r; my @extra=(); for my $aa ( split($opt{rs} , $text)){ $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; if($aux =~ /\S/){ @extra = split(/\s*$opt{fs}\s*/,$aux)} else { @extra = ();}} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa,-1),@extra])} print "#", Dumper(\@r) if $lexdebug; \@r; } sub slurptab{ my $f= shift; my ($f2) = _basedir($f); my %opt =(fs=>"::", comm=>1 , Dom => "Dom:" ); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my @r; my @extra=(); open(F1,"<:utf8",$f2) or warn("###cant open $_[0]\n");; my $aa; while($aa=){ chomp($aa); $aa=~ s/#.*// if $opt{comm}; if($aa=~ s/^\+\+\s*(?:\w+\s*=\s*)?(.*)//) { my $aux = $1; if($aux =~ /\S/){ @extra = split(/\s*$opt{fs}\s*/,$aux)} else { @extra = ();}} next unless $aa=~ /\S/; push(@r,[split(/\s*$opt{fs}\s*/,$aa,-1),@extra])} close F1; print "#", Dumper(\@r) if $lexdebug; \@r; } ###LEX sub lex{ ## %x VALORES=1 ==>( = ...\n) for($File){ m!\G[ \t]+!gc; ## avançar brancos m!\G(#.*\n)+!gc; ## avança comentários if( m!\G__EOF__!gc) { return("","iii") } if( m!\G__NEWDIC__!gc) { return("NEWDIC","iii") } if($yyst==0){ if( m!\Gtab\{(.*?)\}!gcs) { $yyst=1; return("TAB",slurpinlinetab($1)); } if( m!\Gtab\((.*?)\)!gc) { $yyst=1; return("TAB",slurptab($1)); } if( m!\G(\w[\w\-]+)!gc) { if(_lang($1)){return("ILI",$1);} else {return("IAC",$1);}} if( m!\G\+(\w[\w\-]*)!gc) { return("ALI",$1); } if( m!\G\!(\w[\w\-]*)!gc) { return("IMG",$1); } if( m!\G====\h*(.+?)\h*====!gc) { return("SUBSUBDOM",$1); } if( m!\G====\h*([^= \t\n].*)!gc) { return("SUBSUBDOM",$1); } if( m!\G===\h*(.+?)\h*[#:=]\h*(.+?)\h*===!gc) { return("SUBDOMPAIR",[$1,$2]); } if( m!\G===\h*(.+?)\h*===!gc) { return("SUBDOM",$1); } if( m!\G===\h*([^= \t\n].*)!gc) { return("SUBDOM",$1); } if( m!\G==\h*(.+?)\h*[#:=]\h*(.+?)\h*==!gc) { return("DOMPAIR",[$1,$2]); } if( m!\G==\h*(.+?)\h*==!gc) { return("DOM",$1); } if( m!\G==\h*(\w.*)!gc) { return("DOM",$1); } if( m!\G====!gc) { return("NOSUBSUBDOM",$1); } if( m!\G===!gc) { return("NOSUBDOM",$1); } if( m!\G==!gc) { return("NODOM",$1); } if( m!\G\%lang(uage)?s?:?\s+!gc){ $yyst=1; return("LANG","lang"); } if( m!\G\%inv(erse)?:?\s+!gc) { $yyst=1; return("INV","inverse"); } if( m!\G\%rellang:?\s+!gc) { $yyst=1; return("RELLANG","rellang")} if( m!\G\%tit(le)?:?\s+!gc) { $yyst=1; return("TIT","title"); } if( m!\G\%aut(hor)?:?\s+!gc) { $yyst=1; return("AUT","author");} if( m!\G\%img:?\s+!gc) { $yyst=1; return("IMG","img");} if( m!\G\%inline(\d*):?\s+!gc) { $yyst=1; return("INLINE", $1 || 1);} if( m!\G\%crel:?\s+!gc) { $yyst=1; return("CREL", "crel");} if( m!\G\%ignore:?\s+!gc) { $yyst=1; return("IGNORE","ignore");} if( m!\G\%join:?\((.*?)\)\s+!gc){ $yyst=1; return("JOIN",$1);} if( m!\G\%rename:?\s+!gc) { $yyst=1; return("RENAME","rename");} if( m!\G\%(\w+)\:?!gc) { $yyst=1; return("DIR",$1); } if( m!\G([(])\s*!gc) { $yyst=0 ; return($1,$1); } if( m!\G\s*([)])\s*!gc) { $yyst=0 ; return($1,$1); } if( m!\G([=*])!gc) { $yyst=1 ; return($1,$1); } if( $p2 and m!\G([:])!gc) { $yyst=1 ; return('=',$1); } if( m!\G(---+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(___+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if($yyst==1){ if( m!\G([*])\n!gc) { $yyst=0 ; return($1,$1); } if( m!\Gtab\{(.*?)\}!gcs) { $yyst=1; return("TAB",slurpinlinetab($1)); } if( m!\Gtab\((.*?)\)!gc) { $yyst=0; return("TAB",slurptab($1)); } if( m!\G\{\h*([^\}]*?)\h*\}\h*\n!gc){ $yyst=0 ; return("V",$1); } if( m!\G<\s*(\S+)\h*\n!gc) { $yyst=0 ; return("V",_slurp8($1)); } if( m!\G\h*([^#\n]+)(?=#.*\n)!gc){ $yyst=0 ; return("V",$1); } if( m!\G\h*([^\n\{\}+]+?(\n\h+[^\n\{\}+]*)*)\h*\n!gc) { $yyst=0 ; return("V",$1); } if( m!\G(---+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(___+\n)!gc) { $yyst=0 ; return("LB",""); } if( m!\G(\n)!gc) { $yyst=0 ; return("LB",""); } } if( m!\G(.)!gc) { print STDERR "Simbolos desconhecidos '", substr($File,pos($File),20),"...'\n" ; return(lex()); } } } ###--------------------------------------------------------------------- package TexGen; # use Skel::Data; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; use Data::Dumper; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' \and ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; if($lua){ print STDOUT LUALATEX_BEGIN( $param );} else { print STDOUT LATEX_BEGIN( $param );} my $prevle=""; for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $le = uc(lc1(substr($_,0,1))); if($le ne $prevle){ if($le =~ /[A-Z]/){ print STDOUT "\\bigletterc{$le}\n";} $prevle=$le;} my $aux=texprot($_); print STDOUT LATEX_ENTRY({ term => $aux, def => gg($d->{$_},$h,$_) } ) ; # print STDOUT "\\term{$aux}{", gg($d->{$_},$h,$_), "}\n"; } print STDOUT LATEX_END($param ); } sub texprot{ my $a = shift; $a =~ s/\s*$//; while( $a =~ s/([^\\])([_\%\@\#\$\&])/$1\\$2/ ){ } ; $a =~ s/^([_\%\@\#\$\&])/\\$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); return "" if $deja_vu{$p}++; if($p =~ m/\.(gif|bmp|bpm)$/) {return "(img-gif)"} if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "\\playsnd{$_/$p}";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(-f "$_/$p"){ my ($g_x, $g_y) = imgsize("$_/$p"); if($g_x <200 and $g_y < 200){ return "\\milust{$_/$p}";} else { return "\\ilust{$_/$p}";} } } warn("Error: missing image:'$p'\n"); } return ""; } ##FIXME sub g_ats{ my ($ats,$h)= @_; if($ats){ my $inli=""; my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if( $h->{inline}{$at->[0]} == 1){ $inli .= " \\textit{{\\scriptsize $at->[1] }} " } elsif($h->{inline}{$at->[0]} == 2){ $inli .= " \\textit{{\\scriptsize ($at->[1]) }} " } else{$ra .= "\\\\--$at->[0]: $at->[1]";} } return $inli . $ra; } return "" } sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an and $av eq $te); my $atstex = g_ats($ats,$h); if ($t eq "Term"){ if ($istheterm){ $r = "$atstex $r" } else { $r .= "\\\\\n\t\\textbf{\\textsc{".lc($an)."}}: $av $atstex";} } elsif($t eq "At") { push(@{$atdict->{$an}},[$av,$atstex]) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img})."\\mbox{}"; } else { $r .= "\\\\\n\t==$an: $av $atstex\\\\" if $av =~ /\S/; } # if($ats){ ## FIXME: ordem errada nos ats dos "At" # my $ra=""; # for my $at(@$ats){ # next if $h->{ignore}{$at->[0]}; # if($h->{inline}{$at->[0]} == 1){ # $ra .= " \\textit{{\\scriptsize $at->[1]}} " } # elsif($h->{inline}{$at->[0]} == 2){ # $ra .= " \\textit{{\\scriptsize ($at->[1]) }} " } # else{ $ra.="\\\\--$at->[0]: $at->[1]";} # } # if($istheterm){$r = $ra.$r;} else {$r .= $ra} # } } my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\\\\\n\t\\textbf{\\textit{$an}}: "; if( defined( $h->{crel}{$an}) ){ $at_r .= join($vsep, map {qq{\\termref{$_->[0]}$_->[1]}} (sort {lc1($a->[0]) cmp lc1($b->[0])} uniq1( @{$atdict->{$an}}))); }else{ $at_r .= join($vsep, map {qq{$_->[0]$_->[1]}} (sort {lc1($a->[0]) cmp lc1($b->[0])} uniq1( @{$atdict->{$an}}))); } } $r .= $at_r; $r = texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; $r; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; $p;} sub uniq1 { my %seen; grep !$seen{$_->[0]}++, @_; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package XdxfGen; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' \and ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; print STDOUT XDXF_BEGIN( $param ); for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $aux=texprot($_); print STDOUT "$aux\n", gg($d->{$_},$h,$_), "\n"; } print STDOUT XDXF_END($param ); } sub texprot{ my $a = shift; $a =~ s/\s*$//; # while( $a =~ s/([^\\])([_\%\@\#\$\&])/$1\\$2/ ){ } ; $a =~ s/& /&/g; $a =~ s/<([^A-Za-z0-9\/])/<$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); return "" if $deja_vu{$p}++; if($p =~ m/\.(gif|bmp|bpm)$/) {return ""} if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(-f "$_/$p"){ my ($g_x, $g_y) = imgsize("$_/$p"); if($g_x <200 and $g_y < 200){ return "pequena"; } else { return "grande"; } } } warn("Error: missing image:'$p'\n"); } return ""; } sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\t$an: $av
\n" unless $istheterm; } elsif($t eq "At") { push(@{$atdict->{$an}},$av) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img}); } else { $r .= "\t$an: $av\n" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]} == 1){ $ra .= "$at->[1]
\n" } elsif($h->{inline}{$at->[0]} == 2){ $ra .= "( $at->[1] )
\n" } else{ $ra.="\t-- $at->[0]: $at->[1]\n";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\t$an:"; if( defined( $h->{crel}{$an}) ){ $at_r .= join("", map {"\n\t\t$_"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); }else{ $at_r .= join("", map {"\n\t\t$_"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); } $at_r .= "\t\n"; } $r .= $at_r; $r=texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; $r; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; $p;} sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package HtmlGen; use utf8::all; use Lingua::NATerm::FTemplates; use Image::Size; use Data::Dumper; my %deja_vu; sub gen{ skimport(); my($d,$h)=@_; my %op=( %$h); $op{author} = join(' || ',@{$op{author}}) if ref $op{author}; my $param = { map { ( $_ => _op($op{$_})) } keys %op}; print STDOUT HTML_BEGIN( $param ); for (sort {lc1($a) cmp lc1($b)} keys %$d){ my $aux=texprot($_); print STDOUT HTML_TERM( +{"key"=>$aux, "def"=> gg($d->{$_},$h,$_) }); } print STDOUT HTML_END($param ); } sub texprot{ my $a = shift; $a =~ s/\s*$//; $a =~ s/& /&/g; $a =~ s/<([^A-Za-z0-9\/])/<$1/g; $a } sub lc1{ my $x=lc($_[0]); $x =~ tr{áéíóúãõçâêôûäëïöüñàèìòù}{aeiouaocaeouaeiounaeiou}; $x } sub multimedia{ my ($p,$l)=(@_); # return "" if $deja_vu{$p}++; if($p =~ m/\.(mp3|wav)$/) { ## Som for(@$l,"MEDIA","SND","snd","."){ if(-f "$_/$p"){ { return "";} } } warn("Error: missing sound file:'$p'\n"); } else { ## imagem for(@$l,"MEDIA","IMG","img","."){ if(/^http/){ return "remota"; } my ($path) = main::_basedir("$_/$p"); if(-f $path or -f "$_/$p"){ my ($g_x, $g_y) = imgsize($path); if($g_x <300 and $g_y < 300){ return ""; } else { return ""; } } } warn("Error: missing image:'$p' in ",join(";",@$l),"\n"); } return "fixme $p not found"; } sub ga{ my($ac,$h,$te,$vsep)=@_; ## treat each aception: ac* ,meta, term, value-sep my $r = ""; my $maininline = ""; my $atdict={}; $vsep ||= " | "; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an and $av eq $te); $r .= "
  • "; if ($t eq "Term"){ $r .= "\t$an: $av \n" unless $istheterm; } elsif($t eq "At") { push(@{$atdict->{$an}},$av) if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "\n ".multimedia($av,$h->{img}); } else { $r .= "\t$an: $av | \n" if $av =~ /\S/; } if($ats){ my $ra=""; my $inli=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]} == 1 ){ $inli .= "$at->[1],\n" } elsif($h->{inline}{$at->[0]} == 2){ $inli .= "( $at->[1] ),\n" ==2 } else{ $ra.="\t
  • $at->[0]: $at->[1]
  • \n";} } $ra = "
      $ra
    " if $ra ; $inli = "$inli" if $inli ; if($istheterm){ $r = "$ra $r"; $maininline = $inli} else { $r .= "$inli $ra"} } $r .= ""; } $r =~ s!
  • \s*
  • !!g; my $at_r= ""; for my $an (sort keys %$atdict){ $at_r .= "\t
  • $an: "; if( defined( $h->{crel}{$an}) ){ $at_r .= join("", map {qq{\n\t\t$_ | }} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); }else{ $at_r .= join("", map {"\n\t\t$_ |"} (sort {lc1($a) cmp lc1($b)} uniq( @{$atdict->{$an}}))); } $at_r .= "
  • \n"; } $r .= "$at_r" if $at_r; $r=texprot($r); $r =~ s/\\\\$//; $r =~ s/(\\m?ilust\{.*?\})/ppath($1)/ge; "$maininline
      $r
    "; } sub uniq { my %seen; grep !$seen{$_}++, @_; } sub ppath{ my $p=shift; $p =~ s/\\([_&])/$1/g; "$p";} sub gg{ my($e,$h,$te)=@_; ## join ga(acepção) join($h->{asep}|| "\n===", map {ga($_,$h,$te,$h->{vsep})} @$e) } sub _op{ my ($v,$h)=@_; if(ref($v) eq "ARRAY"){ return join( ($h->{sep} || ", ") , @$v ) } return $v; } ###--------------------------------------------------------------------- package StardictGen; use utf8::all; sub gen{ my($d,$h)=@_; my $d1 = {}; # my %op=(title=>"Dicionário", # author=>"", # date=>'\today', # introduction=>"...",%$h); # $op{author} = join("\n",@{$op{author}}) if ref $op{author}; for (keys %$d){ $d1->{$_} = gg($d->{$_},$h,$_); } Lingua::StarDict::Gen::writeDict($d1,$h->{name}[0] || "dicname"); } sub ga{ my($ac,$h,$te)=@_; my $r = ""; for( @$ac ){ my $t = $_->{ISA}; my $an = $_->{AN}; my $av = $_->{AV}; my $ats = $_->{Ats}; next if $h->{ignore}{$an}; my $istheterm = ($h->{baselang} eq $an && $av eq $te); if ($t eq "Term"){ $r .= "\n\t$an: $av" unless $istheterm; } elsif($t eq "At") { $r .= "\n\t$an: $av" if $av =~ /\S/; } elsif($t eq "Img" ){ $r .= "(img)" } else { $r .= "\n\t==$an: $av" if $av =~ /\S/; } if($ats){ my $ra=""; for my $at(@$ats){ next if $h->{ignore}{$at->[0]}; if($h->{inline}{$at->[0]}){$ra .= " $at->[1] " } else{ $ra.="\n--$at->[0]: $at->[1]";} } if($istheterm){$r = $ra.$r;} else {$r .= $ra} } } # $r =~ s/([\&_\$\#\%])/\\$1/g; # $r =~ s/\\\\$//; $r; } sub gg{ my($e,$h,$te)=@_; join($h->{asep}|| "\n===", map {ga($_,$h,$te)} @$e) } package main; __DATA__ __LUALATEX_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] \documentclass[twoside,portuges]{book} \usepackage{textcomp} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} \usepackage{luanaterm} \usepackage{graphicx} \usepackage{url} [% usepackage %] \def\milust#1{\begin{center}\includegraphics{#1}\end{center}} \def\ilust#1{\begin{center}\includegraphics[ width=0.7\columnwidth, height=1\columnwidth, keepaspectratio]{#1}\end{center}} \newcommand{\termref}[1]{\hyperlink{#1}{#1}} \newcommand{\termlabel}[1]{\phantomsection\hypertarget{#1}{}} \usepackage{hyperref} \hypersetup{ colorlinks, linktocpage, citecolor=black, filecolor=black, linkcolor=blue, urlcolor=black } \begin{document} \title{[% title %]} \author{[% author %]} \date{[% date %]} \frontmatter \maketitle \mbox{} \vfill Data:[% date %]\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{naterm}) [% introduction %][% pre %] \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] \documentclass[twoside,portuges]{book} \usepackage{naterm} \usepackage{textcomp} \parindent 0pt \usepackage[russian,greek,portuguese]{babel} \usepackage[mathletters]{ucs} \usepackage[utf8x]{inputenc} \usepackage{graphicx} \usepackage{url} [% usepackage %] %\usepackage[T1,T2A]{fontenc} %\usepackage[T1]{fontenc} \def\milust#1{\begin{center}\includegraphics{#1}\end{center}} \def\ilust#1{\begin{center}\includegraphics[ width=0.7\columnwidth, height=1\columnwidth, keepaspectratio]{#1}\end{center}} \newcommand{\termref}[1]{\hyperlink{#1}{#1}} \newcommand{\termlabel}[1]{\phantomsection\hypertarget{#1}{}} \usepackage{hyperref} \hypersetup{ colorlinks, linktocpage, citecolor=black, filecolor=black, linkcolor=blue, urlcolor=black } \begin{document} \title{[% title %]} \author{[% author %]} \date{[% date %]} \frontmatter \maketitle \mbox{} \vfill Data:[% date %]\\ Tiragem: 1 exemplar\\ (gerado automaticamente por \textbf{naterm}) [% introduction %][% pre %] \newpage \mainmatter \twocolumn \begin{dictionary} __LATEX_END__ [%default:{pos => "", }%] \end{dictionary} [% pos %] \end{document} __LATEX_ENTRY__ \termlabel{[% term %]} \term{[% term %]}{[% def %]} __HTML_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '', introduction => "", pre => "", usepackage => "", }%] [% title %]

    [% title %]

    [% author %] [%date%]

    [% introduction %][% pre %] __HTML_END__ [%default:{pos => "", }%] [% pos %] __HTML_TERM__ [%default:{'def' => "", }%]
    [% key %] [% def %]
    __XDXF_BEGIN__ [%default:{author=>"No author provided", title=> "Dictionary", date => '\today', introduction => "", pre => "", usepackage => "", }%] [% title %] [% title %] 001 [%date%] [% introduction %][% pre %] __XDXF_END__ [%default:{pos => "", }%] [% pos %] __END__ =encoding UTF-8 =head1 NAME naterm - a dictionary DSL =head1 SYNOPSIS naterm [options] file.naterm -lang=EN # default PT -html -tex -stardict -debug -skel -lua -- Output is Latex:lualatex (def: Latex:pdflatex) -nop2 -- ":" not a valid sep. (deft: PT: gato and PT=gato are equiv) -p2 -- (this is the default) -- PT: gato and PT=gato are equiv -lexdebug -- lex debuger while(<>){($t,$v)=lex(); print "=$t=$v\n"} =head1 DESCRIPTION =head1 Dici Language =head2 Metadata section %name %title %author aut1 ; aut2 ; autn %pre < file.tex //chapters before the dictionary %introdution < file.tex // ... idem %rename attrib1 attrib2 %ignore attrib %inline attrib %inv nt bt // inverse conceptual relation %inv dom voc %rellang PT // language in relations objects %crel aaa // aaa is a conceptual relation %img image/directory %lang PT EN RU %pos //chapters after the dictionary %join // not yet implemented =head2 Entries Entries (concepts) are separated by emtpy lines !img : gato.jpg // a multimedia file ( MEDIA/gato.jpg ) PT : gato // a term +gen : m // atribute of a term def : domestic feline // atribute of the full entry EN : cat EN : pussy-cat =head3 Values Values normally are simple single line strings, but can also be more complex: EN : domestic cat // simple single line strings (usual) def: a very long definition // multi line value (continuation lines must with continuation lines // start with spaces) doc: { asd // a curly bracket block ... } att: < file // a value read from "file =head2 Entries from an external / inline tables Both External and inline tables follow a CSV-like format, where: Register separator is newline Field separator is "::" (spaces adjacent to FS are removed) Sub field separator is "|" (spaces adj. are ignored) empty lines are ignored empty fields lines started by "#" are comments (ignored) ++ fiels are concatenated with the following section =head3 External tables tab(list-of-plants)* PT : $2 EN : $1 dom : plants PT : $1 EN : $2 dom : zoologia *tab(tab1) =head3 Inline tables PT : $1 EN : $2 dom : zoologia *tab{ gato::cat cão::dog } =head2 Macro structure -- Sections with a common domain == domain # dom=domain; subdom = subsubdom = none == domain == # idem === subdomain # subdom=subdomain subsubdom = none === subdomain === # idem ==== subsubdomain # subsubdom=subsubdomain ==== subsubdomain ==== # idem == # no domain === # no subdomain ==== # no subsubdomain =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO Stardict Lingua::StarDict::Gen tbx2naterm LaTeX =cut 1;