#!/usr/bin/perl ## -CIO #use POSIX qw(locale_h); #setlocale(&POSIX::LC_ALL, "pt_PT"); #use locale; use Lingua::Jspell; use utf8::all; use Lingua::PT::ProperNames; use strict; my $pt_dict = Lingua::Jspell->new("port"); my $en_dict = Lingua::Jspell->new("eng"); my %pnlist=(); my %pnlistD=(); my $file = shift; my $tab = do("tagPnTable"); die("$!$@\n") if ($! or $@); forPN({in=>$file, sep=>'>', out=>"$file.out.txt", encoding=>"utf8", out=>"$file.out", t=>"double"}, \&procPn,\&procPn2); sub procPn{my $a=shift; if($a =~ /(\w+)/){ my $f=$1; my @a=$pt_dict->fea($f); my %b = Lingua::Jspell::onethat({CAT => "np"}, @a); if(%b){if (defined $b{SEM}){"$a"} else {"$a"}} elsif(defined $tab->{$f}) {"$a"} else { @a = $en_dict->fea($f); %b = Lingua::Jspell::onethat({CAT => "np"}, @a); if(%b){if (defined $b{SEM}){"$a"} else {print STDERR "?? ",n($a)," -- english but no SEM\n"; "$a"}} else { print STDERR "?? $a is undefined\n"; "$a"}}} else {"$a"} } sub procPn2{my $a=shift; if($a =~ /(\w+)/){ my @a=$pt_dict->fea($1); my %b = Lingua::Jspell::onethat({CAT => "np"}, @a); if(%b || scalar @a == 0){procPn($a)} elsif($a =~ /(\w+\s+)(\S.*)/){ return($1 . procPn($2));} else{ return undef ; }} else {"$a"} } sub n{ my $a=shift; $a =~ s/\s+/ /g; $a =~ s/^ | $//g; $a; }