#!/usr/bin/perl use locale; use Data::Dumper; ## binmode(DATA, ":utf8"); binmode(STDOUT, ":utf8"); my $G; sub init{ local $/; undef $/; $G = eval (); warn ($@) if $@; } init(); =head1 aps aps: regraAt x pal-set -> pal-set Aplica uma regra atomica a um conjunto de palavras. =cut sub aps{ my($r,@pal)=@_; my @r=(); $r = $G->{$r} unless ref($r); my $fi="$r->[0]$r->[1]"; $fi =~ s/(.)/[.$1]/g; @r = map {if (s/$fi$/$r->[0]$r->[1]/i){ s/$r->[1]$/$r->[2]/i;$_ } else {()};} @pal; } =head1 ap ap: regra x pal-set -> pal-set regra = u(regra) - dá a primeira aplicação possivel | a(regra) - dá o conjunto de todas as aplicações possiveis | c(regra) - dá a composição dos conjuntos | regraAt - regra atomica Aplica uma regra estruturada a um conjunto de palavras. =cut sub ap{ my($rid,@pal)=@_; $r = ref($rid) ? $rid: $G->{$rid} ; my @r=(); if (ref($r) eq "u"){ @r = grep (/./, map { aps($_,@pal) } @$r); @r = (@r[0]); } elsif(ref($r) eq "a"){ @r = map { ap($_,@pal) } @$r; } elsif(ref($r) eq "c"){ @r=@pal; for(@$r){ @r=grep(/./,ap($_,@r) ); } } elsif(ref($r) eq "ARRAY"){ @r= ( aps($r,@pal))} elsif(defined $G->{$r}) { print "::"; @r= ( ap($G->{$r},@pal))} else { print sdterr "???$r???\n" } @r; } =head1 prefixos prefixos: regra -> pal-set calcula os sufixos de entrada associada a uma regra. =cut sub prefixos{ my($r)=@_; my @r=(); if (ref($r) eq "u"){ @r = map { $_->[0].$_->[1] } @$r; } elsif(ref($r) eq "a"){ @r = map { prefixos($_) } @$r; } elsif(ref($r) eq "c"){ @r = prefixos($r->[0]) ; } else { @r= ( $r->[0].$r->[1] ) } @r; } =head1 concatenacao de regras: regraqs = regraAt | regraU conc: regra -> regrads-set dado um esquema da um conjunto de regras simples equivalente concaux - concatena regras simples concaux: regraAt x regraAt -> regraAt|() concsets: regraqs-set x regraqs-set -> regraqs-set concsets - concatena conjuntos de regrasqs concsetsaux: regraqs x regraqs -> regraqs|() aps1 - dada uma regra simples calcula a informacao minima das palavras entrada, saida ## concu: regraAt* -> segraAt* u2at : regraAt* -> segraAt* =cut sub aps1{ my($r,$pal)=@_; my @r=(); my $fi="$r->[0]$r->[1]"; $fi =~ s/(.)/[.$1]/g; my $p =$pal; if ( $p =~ s/$fi$/$r->[0]$r->[1]/i){ $pal=$p; $pal =~ s/$r->[1]$/$r->[2]/i; @r=($p,$pal) } @r; } sub concaux{ my($r1,$r2)=@_; my ($p1,$p2,$p3,$p4)=("...........","","",""); ($p1,$p2)=aps1($r1,$p1); return () unless $p2; ($p3,$p4)=aps1($r2,$p2); return () unless $p4; print "$p1/$p2/$p3/$p4\n" if $debug; $p1 =~ s/\.//g; $p2 =~ s/\.//g; $p3 =~ s/\.//g; $p4 =~ s/\.//g; $p3 =~ s/$p2$// if $p2 ; $p1 = $p3 . $p1; for (reverse( (1..length($p4)))){ if (substr($p1,0,$_) eq substr($p4,0,$_)) { return [substr($p1,0,$_),substr($p1,$_),substr($p4,$_),{%{$r1->[3]},%{$r2->[3]}}]} } ["",$p1,$p4,{%{$r1->[3]},%{$r2->[3]}}]; } sub conc{ my $r=shift; my @a=""; if (ref($r) eq "u"){ @a = u2at(@$r); } elsif(ref($r) eq "a"){ @a = ( map { conc($_) } @$r); } elsif(ref($r) eq "c"){ my ($l1,@l)=@$r; @a= ( conc($l1)) ; for(@l){ @a=(grep(defined($_),(concsets([@a],[conc($_)])))); } } elsif(ref($r)){ @a= ($r)} elsif(defined $G->{$r}){ @a = conc($G->{$r}) } else {@a=("????")} @a; } sub concsets{ my ($r1,$r2)=@_; my $aux; map { $aux = $_; ( map {concsetsaux($aux,$_) } @$r2 ) } @$r1 ; } sub concsetsaux{ my ($r1,$r2)=@_; my $aux; my @a=(); if(ref($r1) eq "u" && ref($r2) eq "u"){ @a = (map { $aux = $_; ( map {concaux($aux,$_) } @$r2 ) } @$r1) ; if(scalar @a >=2 ) {u(@a)} else {@a} } elsif(ref($r1) eq "u"){ @a = (map {concaux($_,$r2) } @$r1) ; if(scalar @a >=2 ) {u(@a)} else {@a} } elsif(ref($r2) eq "u"){ @a = (map {concaux($r1,$_) } @$r2) ; if(scalar @a >=2 ) {u(@a)} else {@a} } else{ concaux($r1,$r2); } } ### Prints flag *x : ..... def ... (in jspell.aff format) sub pflag{ my ($f,$ef)=@_; print "\n\nflag *$f:\n"; PP(conc($ef)); } sub PP{ for my $a (@_){ my $x= Dumper($a->[3]); $x =~ s/[\{\}]//g; $x =~ s/.*?=\s*//; $x =~ s/=>/=/g; $x =~ s/\s+//g; $x =~ s/'//g; if ($a->[1]){ print "$a->[0]$a->[1]\t> -$a->[1],$a->[2]\t;\"$x\" #\n" } else { print "$a->[0]\t> $a->[2]\t;\"$x\" #\n" } }; } sub u {bless([@_],"u");} sub a {bless([@_],"a");} sub c {bless([@_],"c");} sub _nnot{ "[".join("",( "^",@_))."]" } sub _in0{ my ($p,$t)=@_; if(!ref($t)){ return $t} if(@$p){ my ($f1,@p1) = @$p; $t->{$f1} = _in0([@p1],$t->{$f1} || {} ); return $t; } else {return 1} } sub in{ _in0([ reverse(split(//,$_[0]))],$_[1]); } sub _d0{ my ($p,$t)=@_; if(@$p){ my ($f,@tail) = @$p; if(defined $t->{$f} ){ return (map { [$f, @$_ ]} _d0([@tail],$t->{$f} ) ); } else { return ( $p ); } } else{ if(not ref($t)) { return ()}; my @l = keys %$t; return ( [ _nnot(@l)] , (map { my @l1 = _d0($p,$t->{$_}); my $f = $_; (map { [$f, @$_ ]} @l1) } @l)) } } sub d{ map { join ("", reverse @$_) } _d0([reverse(split(//,$_[0]))],$_[1]); } sub u2at{ my @l=@_; my @r = (); my $t = {}; for my $a (@l){ my $aux = "$a->[0]$a->[1]"; for ( d($aux,$t)){ s/$a->[1]//; push (@r,[$_,$a->[1],$a->[2],$a->[3]]); } $t = in($aux,$t); } @r; } # exemplos #print Dumper(concaux( ["a","r","va",{n=>'p'}],["","","s",{n=>'p'}])); #print Dumper(concaux( ["a","r","va",{n=>'p'}],["ia","va","bas",{n=>'p'}])); #print Dumper(concsets([["a","r","va",{n=>'p'}]],[["ia","va","bas",{n=>'p'}]])); #print Dumper(conc( ["a","r","va",{n=>'p'}])); #print Dumper(conc( "GN")); #print Dumper(conc( u( ["a","r","va",{n=>'p'}],["ia","va","bas",{n=>'p'}]))); #print Dumper(conc( a( ["a","r","va",{n=>'p'}],["ia","va","bas",{n=>'p'}]))); #print Dumper(conc( c( ["a","r","va",{n=>'p'}],["ia","va","bas",{n=>'p'}]))); #print Dumper(conc( c( ["a","r","va",{n=>'p'}],a(["ia","va","bas",{n=>'p'}],["","","qqq",{n=>'p'}])))); #print Dumper(conc("pp")); #print Dumper(conc("verbo")); #print Dumper(conc("av_izar")); #PP(conc("av_izar")); ## pflag(p=>"p"); ## pflag(z=>"av_izar"); ## pflag("a","pp"); ## pflag("b","GN"); ## pflag(X=>"verbo"); ## pflag(M=>"vs_mento"); ## pflag(C=>"vs_cao"); # pflag(ZZ1 =>"mudaae"); OK # pflag(ZZ2 =>"mudaeio"); OK # pflag(ZZ3 =>"jj1"); notok print "--mudaae\n"; PP(conc("mudaae")); print "--jj1\n"; PP(conc("jj1")); print "--id\n"; PP(conc("id")); print "--c(id,mudaae)\n"; PP(conc(c("id","mudaae"))); ##u2a(qw{ão al r m },"" ); #print Dumper($G->{GN}); #print join("...", ap("GN","lindo"))," (GN(lindo))\n" ; #print join("...", ap("p","lindo"))," (p(lindo))\n" ; #print join("...", ap("p","leão"))," (p(leão))\n" ; #print join("...", ap("verbo","lavar"))," (verbo(lavar))\n" ; #print join("...",prefixos("verbo")); #print "-" x 20, "\n"; #while(<>){ # chop; # if(/(\w+)\s*\+\s*(\w+)/){ # print join("...", ap($2,$1))," ($2($1))\n" ; A } # else { # print join("...", ap("verbo",$_))," (verbo($_))\n" ; # } #} __DATA__ { p =>u( # plural ["","ão","ões",{n=>'p'}], ["a","l","is",{n=>'p'}], ["r","","es",{n=>'p'}], ["","m","ns",{n=>'p'}], ["","","s",{n=>'p'}], ), pnfi => a( ["","","ei",{n=>'s',p=>'1',tempo=>'futuroInd'}], ["","","ás",{n=>'s',p=>'2',tempo=>'futuroInd'}], ["","","á",{n=>'s',p=>'3',tempo=>'futuroInd'}], ["","","emos",{n=>'p',p=>'1',tempo=>'futuroInd'}], ["","","eis",{n=>'p',p=>'2',tempo=>'futuroInd'}], ["","","rão",{n=>'p',p=>'3',tempo=>'futuroInd'}], ), pnfc => a( ["","","",{n=>'s',p=>'1_3',tempo=>'futuroCon'}], ["","","es",{n=>'s',p=>'2',tempo=>'futuroCon'}], ["","","mos",{n=>'p',p=>'1',tempo=>'futuroCon'}], ["","","des",{n=>'p',p=>'2',tempo=>'futuroCon'}], ["","","em",{n=>'p',p=>'3',tempo=>'futuroCon'}], ), pnppi => a( c("mudaae", ["","","i",{n=>'s',p=>'1'}]), ["","e","i",{n=>'s',p=>'1'}], ["","i","i",{n=>'s',p=>'1'}], ["","","ste",{n=>'s',p=>'2'}], ["","a","ou",{n=>'s',p=>'3'}], ["","e","eu",{n=>'s',p=>'3'}], ["","i","iu",{n=>'s',p=>'3'}], ["","a","amos",{n=>'p',p=>'1'}], ["","e","emos",{n=>'p',p=>'1'}], ["","i","imos",{n=>'p',p=>'1'}], ["","","steis",{n=>'p',p=>'2'}], ["","","ram",{n=>'p',p=>'3'}], ), pnpi =>a( ["","a","o",{n=>'s',p=>'1'}], c("mudaeio", ["","","",{n=>'s',p=>'1'}]), ["","a","as",{n=>'s',p=>'2'}], ["","e","es",{n=>'s',p=>'2'}], ["","i","es",{n=>'s',p=>'2'}], ["","a","a",{n=>'s',p=>'3'}], ["","e","e",{n=>'s',p=>'3'}], ["","i","e",{n=>'s',p=>'3'}], ["","a","amos",{n=>'p',p=>'1'}], ["","e","emos",{n=>'p',p=>'1'}], ["","i","imos",{n=>'p',p=>'1'}], ["","a","ais",{n=>'p',p=>'2'}], ["","e","eis",{n=>'p',p=>'2'}], ["","i","is",{n=>'p',p=>'2'}], ["","a","am",{n=>'p',p=>'3'}], ["","e","em",{n=>'p',p=>'3'}], ["","i","em",{n=>'p',p=>'3'}], ), mudaeio => u( ["","ci","ço",{}], ["","gi","jo",{}], ["","i","o",{}], ["","ce","ço",{}], ["","ge","jo",{}], ["","e","o",{}], ), mudaeia => u( ["","ci","ça",{}], ["","gi","ja",{}], ["","i","a",{}], ["","ce","ça",{}], ["","ge","ja",{}], ["","e","a",{}], ), mudaae => u( ["","ça","ce",{}], ["","ca","que",{}], ["","ga","gue",{}], ["","a","e",{}], ), jj1 => c("mudaae", ["","","mos",{n=>'p',p=>'1'}]), pnpc =>a( c("mudaae", ["","","",{n=>'s',p=>'1_3'}]), c("mudaeia", ["","","",{n=>'s',p=>'1_3'}]), c("mudaae", ["","","s",{n=>'s',p=>'2'}]), c("mudaeia", ["","","s",{n=>'s',p=>'2'}]), c("mudaae", ["","","mos",{n=>'p',p=>'1'}]), c("mudaeia", ["","","mos",{n=>'p',p=>'1'}]), c("mudaae", ["","","m",{n=>'p',p=>'3'}]), c("mudaeia", ["","","m",{n=>'p',p=>'3'}]), ), pn =>a( ["","","",{n=>'s',p=>'1_3'}], ["","","s",{n=>'s',p=>'2'}], ["","","mos",{n=>'p',p=>'1'}], ["","a","eis",{n=>'p',p=>'2'}], ["","e","eis",{n=>'p',p=>'2'}], ["i","","eis",{n=>'p',p=>'2'}], ["","","m",{n=>'p',p=>'3'}], ), id =>["","","",{}], # identidade f =>u( # feminino ["","ão","oa",{g=>'f'}], ["","o","a",{g=>'f'}], ["or","","a",{g=>'f'}], ), GN => a( "id", # genero numero "f", "p", c("f","p") ), aa_mente => u( ["","o","amente",{cat=>adv_modo}], ["a","","mente",{cat=>adv_modo}], ["e","","mente",{cat=>adv_modo}], ["","ável","avelmente",{cat=>adv_modo}], ["","ível","ivelmente",{cat=>adv_modo}], ), pp =>c(a(["","er","ido",{tempo=>'pp'}], #participio passado ["a","r","do",{tempo=>'pp'}], ["i","r","do",{tempo=>'pp'}]), "GN"), verbo=> c( ["","","",{cat=>'v'}], a( #verbos c(["","r","", {tempo=>'presentInd'}], # presente indicativo "pnpi"), c(u( # imperfeito do indicativo ["a","r","va",{tempo=>'imperfInd'}], ["","er","ia",{tempo=>'imperfInd'}], ["i","r","a", {tempo=>'imperfInd'}]), "pn"), c(["","r","", {tempo=>'presentPerf'}], # pret. perfeito indicatico "pnppi"), c(["r","","a", {tempo=>'maisqueperfInd'}],# mais-que-perfeito do ind "pn"), "pnfi", c(["","r","", {tempo=>'presentConj'}], # presente conjuntivo "pnpc"), c(["","r","sse", {tempo=>'imperfConj'}], # imperfeito do conjuntivo "pn"), "pnfc", c(["r","","ia", {tempo=>'condicional'}], # condicional "pn"), "pp", ["","r","ndo",{tempo=>'gerundio'}], ) # participio ), av_izar=> c(u( ["a","","tizar",{cat=>"v",sem=>"izar"}], ["","e","izar",{cat=>"v",sem=>"izar"}], ["","o","izar",{cat=>"v",sem=>"izar"}], ["","ável","abilizar",{cat=>"v",sem=>"izar"}], ["l","","izar",{cat=>"v",sem=>"izar"}], ["r","","izar",{cat=>"v",sem=>"izar"}], ), "verbo" ), vs_mento=> c(a( ["a","r","mento",{cat=>"n",g=>"m",sem=>"mento"}], ["","er","imento",{cat=>"n",g=>"m",sem=>"mento"}], ["i","r","mento",{cat=>"n",g=>"m",sem=>"mento"}], ), a("id","p") ), vs_cao => c(a( ["a","r","ção",{cat=>"n",g=>"m",sem=>"cao"}], ["","er","ição",{cat=>"n",g=>"m",sem=>"cao"}], ["i","r","ção",{cat=>"n",g=>"m",sem=>"cao"}], ), a("id","p") ), };