#!/usr/bin/perl -s use strict; use warnings; use NAT::Client; use Data::Dumper; use Lingua::StarDict::Gen; our($name); my $id=shift || "EuroParl-PT-EN"; our $client=NAT::Client->new(Local=>$id); $name ||= $id ; my $dic; our $i = 0; $client->iterate( \&proc ); Lingua::StarDict::Gen::writeDict($dic,"$name"); sub proc { my %param=@_; my $word = $param{word}; return unless $word; return unless length($word) > 2; return unless $param{count} > 1; my $string = "$param{count}\n"; my %trads = %{$param{trans}}; # return if $i > 2000; ## $string = "Palavra: $word\n\n"; for my $t (sort {$trads{$b}<=>$trads{$a}} keys %trads) { next if $trads{$t} < 0.03; my $p = sprintf("%5.2f",$trads{$t}*100); ## $string.= " - P(Trad( $word ) = $t) = $p\n\n"; $string.= "==> $t ($p):\n"; ## $string.= " - Exemplos de uso:\n\n"; my $concs = $client->conc({count=>4},$word,$t); for my $c (@$concs) { $string.= " - ".trunca($word,$c->[0])."\n"; $string.= " - ".trunca($t,$c->[1])."\n\n"; } # $string.= " - Entradas Terminologicas:\n\n"; # my @terms = `grep -E '\\<$word\\>' EuroParl-PT-EN/___|grep '\\<$t\\>'`; # for my $term (@terms) { # chomp($term); # my ($count,$l,$r) = ($term =~ m/\s*(\d+)\s+(.*?)\s=!.+?!=\s(.*)/); # $string .=" - $l | $r ($count)\n"; # } $string .="\n\n"; } $dic->{$word} = $string; $i++; print STDERR "." unless $i%1000; } sub trunca { my ($word, $sentence) = @_; $sentence =~ s/^.*(.{30}\Q$word\E)/...$1/is; $sentence =~ s/(\Q$word\E.{30}).*$/$1.../is; return $sentence; }