#!/usr/bin/perl -s

our ($inv,$id,$max);
my @l = ();
my @inv =(direction => "<~");
if ($inv) {@l = (direction => "<~"); @inv = () }
$max ||= 11;

$id ||= 1;

use NAT::Client;
use Data::Dumper;
$Data::Dumper::Indent=1;
$Data::Dumper::Terse=1;

#my $client = NAT::Client->new( Local => "/home/extra2/CORPORA/EuroParl-PT-EN");
my $client = NAT::Client->new( crp => $id,
         PeerAddr => 'eremita.di.uminho.pt' );

while(<>){
  chomp;
  my %r = ();
  my %rint = ();
  my $a1 = $client->ptd( { @l }, $_);
  print "$_ (#=$a1->[0])\n";
  for my $b1 (keys %{ $a1->[1] }){ 
    my $c = $client->ptd( { @inv }, $b1);
    for my $d ( keys %{$c->[1]} ){
      next if ($d =~ /\(none\)/);
      $rint{$d} .= "$b1 ";
      $r{$d}    +=  $a1->[1]{$b1} * $c->[1]{$d};
    };
  }
  for(( sort {$r{$b} <=> $r{$a}} keys %r)[0..$max])
       { next unless $_;
         printf "  %s (%.3f)   {%s}\n", $_, $r{$_}*100,$rint{$_}  }
}
__END__
  
=head1 NAME

jj-5 - the set od similar words  (ptd) + (ptd)

=head1 SYNOPSIS

 jj-5 [-id=3] [-inv]

=head1 DESCRIPTION

If you are using a local corpus, you need to run jj-1 -> to create it.
By default it is using a remote corpus.

 simil(y) = { x | x in trans(z) /\ z in trans(y) }
 prob(X,Y) = Sum {prob(X,I) * prob(I,Y) | I in trans(X) }

Print the words and number of occurrences. Example of output:

 país (#=19922)
  país (79.336)   {país allí países turquía }
  países (2.122)   {país países }
  estado (0.656)   {estado país }
  nação (0.135)   {país }
  estado-membro (0.053)   {país }
  ali (0.041)   {allí }
  lá (0.033)   {allí }
  turquia (0.031)   {turquía }

 povo (#=4202)
  pessoas (36.158)
  povo (9.914)
  cidadãos (5.934)
  população (5.321)
  popular (3.872)
  povos (3.237)
  nação (1.830)
  os (1.748)
  nacionais (0.388)

In order to have good results a big corpus may be necessary...

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut