#!/usr/bin/perl use warnings; use strict; use POSIX qw(locale_h); setlocale(LC_CTYPE, "pt_PT"); use locale; if (scalar(@ARGV) != 4) { print "Usage: PTD1 PTD2 cp1 cp2\n"; exit; } for (@ARGV) { die "Can't read $_\n" unless -f $_; } my ($ptd1, $ptd2, $cp1, $cp2) = @ARGV; $ptd1 = do $ptd1; $ptd2 = do $ptd2; print STDERR "Corpus loaded\n"; $/ = "\n\$\n"; my $line1; my $line2; open R1, $cp1 or die "Can't open file $cp1\n"; open R2, $cp2 or die "Can't open file $cp2\n"; open W1, ">$cp1.new" or die "Can't create file $cp1.new\n"; open W2, ">$cp2.new" or die "Can't create file $cp2.new\n"; while (defined($line1 = )) { $line2 = ; $line1 = lc $line1; $line2 = lc $line2; chomp($line1); chomp($line2); my @w1 = split /\s+/, $line1; my @w2 = split /\s+/, $line2; my (%w1,%w2); @w1{@w1}=@w1; @w2{@w2}=@w2; for (keys %w1) { if (exists($ptd1->{$_})) { my @trads = sort {$ptd1->{$_}{trans}{$b} <=> $ptd1->{$_}{trans}{$a}} keys %{$ptd1->{$_}{trans}}; KEYSA: for my $t (@trads) { if (exists($w2{$t})) { print W1 "$_\n\$\n"; print W2 "$t\n\$\n"; last KEYSA; } } } else { print STDERR " - New word L1: $_\n"; } } for (keys %w2) { if (exists($ptd2->{$_})) { my @trads = sort {$ptd2->{$_}{trans}{$b} <=> $ptd2->{$_}{trans}{$a}} keys %{$ptd2->{$_}{trans}}; KEYSB: for my $t (@trads) { if (exists($w1{$t})) { print W2 "$_\n\$\n"; print W1 "$t\n\$\n"; last KEYSB; } } } else { print STDERR " - New word L2: $_\n"; } } print W1 "$line1\n\$\n"; print W2 "$line2\n\$\n"; } close W2; close W1; close R2; close R1;