#!/usr/bin/perl -s use POSIX qw(locale_h); setlocale(LC_CTYPE, "pt_PT"); use locale; use warnings; use strict; use Data::Dumper; use NAT::Client; use NAT::Matrix; use NAT::PatternRules; our ($ua); my ($f1, $f2) = @ARGV; my $rules = NAT::PatternRules->parseFile("/home/ambs/Natura/main/NATools/rules.test-case"); my $client = NAT::Client->new( Local => "/home/extra2/CORPORA/COMPARA/COMPARINHA"); print STDERR "Dictionary loaded.\n"; if ($ua) { $/ = "\n"; } else { $/ = "\$\n"; } open F1, $f1 or die $!; open F2, $f2 or die $!; open O1, ">$f1.out" or die $!; open O2, ">$f2.out" or die $!; my ($l1, $l2); while ($l1 = ) { $l2 = ; my ($left, $right) = ($l1,$l2); my (@w1,@w2); if ($ua) { $l1 = lc(clean_ua($l1)); $l2 = lc(clean_ua($l2)); @w1 = map { my @a = split /\s+/; $a[0] } split /\n/, $l1; @w2 = map { my @a = split /\s+/; $a[0] } split /\n/, $l2; } else { $l1 = lc($l1); $l2 = lc($l2); chomp($l1); chomp($l2); @w1 = split /\s+/, $l1; @w2 = split /\s+/, $l2; } my $matrix = NAT::Matrix->new($client, $rules, \@w1, \@w2); $matrix->findDiagonal; my $blocks = $matrix->grep_blocks; my $bs = $matrix->combine_blocks($blocks, 1); my @blocks = (); for my $b (@$bs) { push @blocks, $matrix->dump_block($b); } ## Alignment Matrix ready, now output the alignment my $block = 0; my @linha = $ua? (split /\n/, $left) : @w1; for my $linha (@linha) { if ($ua && $linha =~ m!^ptd({direction => '~>'}, $word); for my $t (sort {$data->[1]{$b} <=> $data->[1]{$a}} keys %{$data->[1]}) { if (exists($words{$t})) { $tr = $t; last; } } if ($tr) { print O1 "$linha\t$tr\n" } else { print O1 "$linha\t$blocks[$block][1]\n" } } } $block = 0; @linha = $ua? (split /\n/, $right) : @w2; for my $linha (@linha) { if ($ua && $linha =~ m!^ptd({direction => '<~'}, $word); for my $t (sort {$data->[1]{$b} <=> $data->[1]{$a}} keys %{$data->[1]}) { if (exists($words{$t})) { $tr = $t; last; } } if ($tr) { print O2 "$linha\t$tr\n" } else { print O2 "$linha\t$blocks[$block][0]\n" } } } if (!$ua) { print O1 "\n"; print O2 "\n"; } } print STDERR "** DONE **\n"; sub clean_ua { $_ = shift; s!!!gs; s!^<.*$!!mg; s!\n+!\n!g; s!^\s*\n!!g; s!^\*.*\n!!g; return $_; }