#!/usr/bin/perl use Data::Dumper; use NAT::Dict; use NAT::Lexicon; $dicfile1 = shift; $lexicon1 = shift; $dicfile2 = shift; $lexicon2 = shift; $dic1 = NAT::Dict::open($dicfile1); $lex1 = NAT::Lexicon::open($lexicon1); $dic2 = NAT::Dict::open($dicfile2); $lex2 = NAT::Lexicon::open($lexicon2); $dicX = {}; $dicY = {}; $THRESHOLD = 0.7; print STDERR "Calculating dictionary 1\n"; $dic1->for_each( sub { my %data = @_; # data{word}=w, data{occ} and data{vals} = [w,%,w,%] return unless $data{vals}[1] >= $THRESHOLD; my $w1 = $data{vals}[0]; my $tvals = $dic2->vals($w1); return unless $tvals->[1] >= $THRESHOLD; my $w2 = $tvals->[0]; $dicX->{$lex1->word_from_id($data{word})} = $lex2->word_from_id($w1) if $w2 == $data{word}; } ); print STDERR "Calculating dictionary 2\n"; $dic2->for_each( sub { my %data = @_; # data{word}=w, data{occ} and data{vals} = [w,%,w,%] #print STDERR Dumper(\%data); return unless $data{vals}[1] >= $THRESHOLD; my $w1 = $data{vals}[0]; my $tvals = $dic1->vals($w1); return unless $tvals->[1] >= $THRESHOLD; my $w2 = $tvals->[0]; $dicY->{$lex2->word_from_id($data{word})} = $lex1->word_from_id($w1) if $w2 == $data{word}; } ); print Dumper($dicX, $dicY);