#!/usr/bin/perl -w use locale; use Data::Dumper; use Lingua::PT::PLNbase; use MLDBM qw/DB_File Storable/; use Fcntl; use Memoize; memoize('getB'); memoize('getA'); my $db = shift; my %DB; my $f1 = shift; my $f2 = shift; die "Can't open '$f1'" unless -f $f1; die "Can't open '$f2'" unless -f $f2; my @s1 = atomiza(lc(`cat $f1`)); my @s2 = atomiza(lc(`cat $f2`)); my $Size1 = scalar @s1; my $Size2 = scalar @s2; tie %DB, 'MLDBM', $db, O_RDONLY, 0640, or die $!; my $TOT = 0; for $i (0..$Size1-1) { next if length($s1[$i]) < 2; for $j (0..$Size2-1) { next if length($s2[$j]) < 2; if (abs($i/$Size1-$j/$Size2) < .05) { $TOT += prob(\%DB, $s1[$i], $s2[$j]) } } } $TOT = $TOT / (($Size1 + $Size2)/2); printf("%.5f\n", $TOT); untie %DB; sub getA { my ($db,$a) = @_; if (exists($db->{"A_$a"})) { $db->{"A_$a"}{trans} } else { +{} } } sub getB { my ($db,$b) = @_; if (exists($db->{"B_$b"})) { $db->{"B_$b"}{trans} } else { +{} } } sub prob { my ($db,$l,$r) = @_; my $x; my $ret = 0; $x = getA($db,$l); if (exists($x->{$r})) { $ret += 100*$x->{$r}; } $x = getB($db,$r); if (exists($x->{$l})) { $ret += 100*$x->{$l}; } $ret /= 2; $ret *= (1-abs($i/$Size1 - $j/$Size2)); $ret; }