#!/usr/bin/perl -s my $id="__jj1-constituicao"; our ($lang); my @l = (); my @inv =(direction => "<~"); if ($lang eq "ES") {@l = (direction => "<~"); @inv = () } use NAT::Client; use Data::Dumper; $Data::Dumper::Indent=1; $Data::Dumper::Terse=1; my $client = NAT::Client->new( Local => "./$id"); while(<>){ chomp; my %r = (); # my $a1 = $client->ptd( { @l }, $_); my $word = $_; if( /\w \w/){ for ((sort {$b->[3]<=>$a->[3]} @{$client->ngrams("$word *")})[0..9]){ next unless $_; printf("%13s |", "$_->[3] $_->[2]"); for ((sort {$b->[5]<=>$a->[5]} @{$client->ngrams("$word $_->[2] *")})[0..9]){ next unless $_; print ("| $_->[3] "); } print "\n"; } } else { for ((sort {$b->[2]<=>$a->[2]} @{$client->ngrams("$word *")})[0..9]){ next unless $_; printf("%13s |", "$_->[2] $_->[1]"); for ((sort {$b->[4]<=>$a->[4]} @{$client->ngrams("$word $_->[1] * *")})[0..9]){ next unless $_; print ("| $_->[2] $_->[3]"); } print "\n"; } } } __END__ =head1 NAME jj-7 - compact tetragrams from 1 or 2 words =head1 SYNOPSIS =head1 DESCRIPTION (Previously: jj-1 -> to create the corpus) Print compact tetragrams . Example of output: == os tribunais 3 de || primeira | segunda | comarca 2 da || relacao 2 sao || os | independentes 2 judiciais || de | sao 2 comuns || . | em 1 . || (null) 1 tem || direito 1 previstos || nos 1 aplicar || normas == tribunais 10 judiciais || de primeira| de segunda| e o| sao os| ou de| formam um 5 de || qualquer instancia| conflitos .| primeira instancia| comarca , 5 administrativos || e fiscais 4 sao || os orgaos| publicas ,| independentes e| obrigatorias para 3 militares || com competencia 2 e || do ministerio| os demais In order to be accurate a big corpus is necessary... =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut