#!/usr/bin/perl -s our ($id); $id||="__jj1-constituicao"; my @l = (); my @inv =(direction => "<~"); use NAT::Client; use Data::Dumper; $Data::Dumper::Indent=1; $Data::Dumper::Terse=1; my $client = NAT::Client->new( Local => "$id"); while(<>){ chomp; my %r = (); my $ws = split(/\s+/,$_); my $word = $_; print "\n"; ## pre ngrams if ($ws==3){ for (sngrams({max=>10},"* $word")){ next unless $_; printf("%-13s |", "$_->[4] $_->[0]"); print "\n"; } } elsif($ws == 2){ for (sngrams({max=>10},"* $word")){ next unless $_; $tmp = ""; for (sngrams({max=>10},"* $_->[0] $word")){ next unless $_; $tmp .= "$_->[0] |"; } printf("%85s| %-10s\n", substr($tmp,0,85), "$_->[3] $_->[0]"); } } else { for (sngrams({max=>10},"* $word")){ next unless $_; $tmp = ""; for (sngrams({max=>10},"* * $_->[0] $word")){ next unless $_; $tmp .= "$_->[0] $_->[1] |"; } printf("%85s| %-10s\n", substr($tmp,0,85), "$_->[2] $_->[0]"); } } print "===== $word =====\n"; if ($ws==3){ for (sngrams({max=>10},"$word *")){ next unless $_; printf("%-13s |", "$_->[4] $_->[3]"); print "\n"; } } elsif($ws == 2){ for (sngrams({max=>10},"$word *")){ next unless $_; printf("%-13s |", "$_->[3] $_->[2]"); for (sngrams({max=>10},"$word $_->[2] *")){ next unless $_; print ("| $_->[3] "); } print "\n"; } } else { for (sngrams({max=>10},"$word *")){ next unless $_; printf("%-13s |", "$_->[2] $_->[1]"); for (sngrams({max=>10},"$word $_->[1] * *")){ next unless $_; print ("| $_->[2] $_->[3]"); } print "\n"; } } print "\n"; } sub sngrams{ my %opt =(max => 50); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $exp=shift; my $ng=split(/\s+/,$exp); if($opt{max}){ # grep {$_} ((sort {$b->[$ng]<=>$a->[$ng]} @{$client->ngrams($exp)})[0..$opt{max}-1]) } else { (sort {$b->[$ng]<=>$a->[$ng]} @{$client->ngrams($exp)}) } } __END__ =encoding utf8 =head1 NAME jj-9 - compact tetragrams from 1 or 2 words Pro e pre =head1 SYNOPSIS =head1 DESCRIPTION (Previously: jj-1 -> to create the corpus) Print compact tetragrams . Example of output: itleart audiências |titleart decisões |ii org| 24 dos |mandato perante |governo e |judiciais são |3| 16 os justiça incumbe |- compete |disposto quanto || 5 aos egorias |a existência |seguintes categorias || 3 de juízes dos || 3 restantes | 2 titleart | 1 constituídos - podem || 1 existir conjuntamente , || 1 em tribunais marítimos || 1 , ===== tribunais ===== 10 judiciais || de primeira| de segunda| , sem| e o| são o 6 de || conflitos .| qualquer instância| primeira i 6 administrativos || e fiscais 4 são | 3 da || relação .| relação e 3 . | 3 : | 3 militares || , é| com competência 2 e || do ministério| os demais 2 que || não sejam| apliquem norma In order to be accurate a big corpus is necessary... =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut