#!/usr/bin/perl -s our ($inv,$id,$max); my @l = (); my @inv =(direction => "<~"); if ($inv) {@l = (direction => "<~"); @inv = () } $max ||= 11; $id ||= 1; use NAT::Client; use Data::Dumper; $Data::Dumper::Indent=1; $Data::Dumper::Terse=1; #my $client = NAT::Client->new( Local => "/home/extra2/CORPORA/EuroParl-PT-EN"); my $client = NAT::Client->new( crp => $id, PeerAddr => 'eremita.di.uminho.pt' ); while(<>){ chomp; my %r = (); my %rint = (); my $a1 = $client->ptd( { @l }, $_); print "$_ (#=$a1->[0])\n"; for my $b1 (keys %{ $a1->[1] }){ my $c = $client->ptd( { @inv }, $b1); for my $d ( keys %{$c->[1]} ){ next if ($d =~ /\(none\)/); $rint{$d} .= "$b1 "; $r{$d} += $a1->[1]{$b1} * $c->[1]{$d}; }; } for(( sort {$r{$b} <=> $r{$a}} keys %r)[0..$max]) { next unless $_; printf " %s (%.3f) {%s}\n", $_, $r{$_}*100,$rint{$_} } } __END__ =head1 NAME jj-5 - the set od similar words (ptd) + (ptd) =head1 SYNOPSIS jj-5 [-id=3] [-inv] =head1 DESCRIPTION If you are using a local corpus, you need to run jj-1 -> to create it. By default it is using a remote corpus. simil(y) = { x | x in trans(z) /\ z in trans(y) } prob(X,Y) = Sum {prob(X,I) * prob(I,Y) | I in trans(X) } Print the words and number of occurrences. Example of output: país (#=19922) país (79.336) {país allí países turquía } países (2.122) {país países } estado (0.656) {estado país } nação (0.135) {país } estado-membro (0.053) {país } ali (0.041) {allí } lá (0.033) {allí } turquia (0.031) {turquía } povo (#=4202) pessoas (36.158) povo (9.914) cidadãos (5.934) população (5.321) popular (3.872) povos (3.237) nação (1.830) os (1.748) nacionais (0.388) In order to have good results a big corpus may be necessary... =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut