#!/usr/bin/perl -s my $id= shift or die; our($oco,$cor,$dup,$equiv,$debug,$requiv); use NAT::Client; use Data::Dumper; use strict; #my $client = NAT::Client->new( LocalDumper => "$id" ); my $client = NAT::Client->new( Local => "$id"); my $minimum_oco = $oco|| 20; my $minimum_cor = $cor|| 0.70; my %mem; $client->iterate( {Language =>'source'}, sub { my %p = @_; my $bestkey = (sort {$p{trans}{$b} <=> $p{trans}{$a}} keys %{$p{trans}})[0]; return if $p{trans}{$bestkey} < $minimum_cor; return if $p{count} < $minimum_oco; return if $p{word} eq $bestkey; return if $p{word} =~ /unicode/; return if $bestkey =~ /unicode/; return if $p{word} =~ /\d/; return if $bestkey =~ /\d/; if($dup) {print "$p{word} $bestkey\n"} if($debug){print "$p{word} $bestkey=$p{count}=$p{trans}{$bestkey}\n"} $mem{$p{word}}=$bestkey; }); print "==============================================\n"; $client->iterate( {Language =>'target'}, sub { my %p = @_; my $bestkey = (sort {$p{trans}{$b} <=> $p{trans}{$a}} keys %{$p{trans}})[0]; return if $p{trans}{$bestkey} < $minimum_cor; return if $p{count} < $minimum_oco; return if $p{word} eq $bestkey; return if $p{word} =~ /unicode/; return if $bestkey =~ /unicode/; return if $p{word} =~ /\d/; return if $bestkey =~ /\d/; if($dup) {print "$p{word} $bestkey\n"} if($debug){print "$p{word} $bestkey=$p{count}=$p{trans}{$bestkey}\n"} if($equiv and $p{word} eq $mem{$bestkey}){print "$bestkey $p{word}\n"}; if($requiv and $p{word} eq $mem{$bestkey}){print "$p{word} $bestkey\n"}; }); __END__ #!/usr/bin/perl -w use strict; use NAT; use NAT::Dict; use Data::Dumper; my $dict_name = shift; unless ($dict_name =~ m!\.db!) { die 'de momento so funciono com ficheiros dbs'; } my $dict = NAT::Dict::open(dbfile => $dict_name); __END__ =head1 NAME jj-trans-equiv - extract word equivalents from PTD =head1 SYNOPSIS jj-trans-equiv -equiv corpus-dir (equiv source-traget) jj-trans-equiv -requiv corpus-dir (reverse target-source) jj-trans-equiv -dup corpus-dir (source-target + traget-source) =head1 DESCRIPTION =head1 Options -cor=0.6 minimum percentage of the best word translation (0.7) -oco=10 minimum number of ocurrences of the source word (20) =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut