#!/usr/bin/perl -w -s my ($f1,$f2) = @ARGV; our ($batch); my $RATIO = 0.19; die "Need two aligned files in NATools format\n" unless -f $f1 && -f $f2; open F1, $f1 or die "Cannot open file $f1\n"; open F2, $f2 or die "Cannot open file $f1\n"; if ($batch) { open O1, ">$f1.wrong" or die "Cannot open file $f1.wrong\n"; open O2, ">$f2.wrong" or die "Cannot open file $f2.wrong\n"; open OK1, ">$f1.ok" or die "Cannot open file $f1.ok\n"; open OK2, ">$f2.ok" or die "Cannot open file $f2.ok\n"; } else { open N1, ">$f1.new" or die "Cannot open file $f1.new\n"; open N2, ">$f2.new" or die "Cannot open file $f2.new\n"; } $/ = "\n\$\n"; my ($l,$r); my $min = 1; my $max = 0; my $bad = 0; my $quit = 0; while (defined($l = ) && defined($r = )) { chomp($l); chomp($r); my $cl =()= $l =~ /\s+/g; $cl++; my $cr =()= $r =~ /\s+/g; $cr++; $max = $cl if $cl > $max; $max = $cr if $cr > $max; my $ratio = min($cl,$cr)/max($cl,$cr); if ($batch) { if ($ratio < $RATIO) { $min = $ratio if $min > $ratio; $bad++; print "($cl,$cr) $ratio\n" if $ratio < $RATIO; print O1 "$l\n\$\n"; print O2 "$r\n\$\n"; } else { print OK1 "$l\n\$\n"; print OK2 "$r\n\$\n"; } } else { print STDERR "$ratio, $cl, $cr\n" if $ratio < $RATIO; if ($ratio < $RATIO) { $min = $ratio if $min > $ratio; $bad++; if ($quit) { print N1 "$l\n\$\n"; print N2 "$r\n\$\n"; } else { print "\n\n-[$.]----------- ($cl,$cr) $ratio\n" if $ratio < $RATIO; print "\n$l\n---\n$r"; print "\n *** (A)ccept (D)elete (E)dit (Q)uit *** \n"; my $ans; { local $/ = "\n"; chomp($ans = ) } if ($ans =~ m!^a!i) { print "Accepting pair...\n"; print N1 "$l\n\$\n"; print N2 "$r\n\$\n"; } elsif ($ans =~ m!^d!i) { print "Deleting pair...\n"; } elsif ($ans =~ m!^q!i) { print "Accepting all remaining pairs...\n"; print N1 "$l\n\$\n"; print N2 "$r\n\$\n"; $quit = 1; } elsif ($ans =~ m!^e!i) { my ($nl,$nr); print "New left: "; { local $/ = "\n"; chomp($nl = ) } print "New right: "; { local $/ = "\n"; chomp($nr = ) } print N1 "$nl\n\$\n"; print N2 "$nr\n\$\n"; } else { print "What? $ans? Bah.. accepting...\n"; print N1 "$l\n\$\n"; print N2 "$r\n\$\n"; } } } else { print N1 "$l\n\$\n"; print N2 "$r\n\$\n"; } } } print "Min: $min\n"; print "Max Sentence size in words: $max\n"; print "Number of bad pairs: $bad\n"; sub max { $_[0]>$_[1]?$_[0]:$_[1] } sub min { $_[0]<$_[1]?$_[0]:$_[1] }