#!/usr/bin/perl -s
our ($l1,$l2,$html,$debug,$v);
$v=1 if $debug;
die ("usage $0 -l1=en -l2=pt [file_which_listOfPathPaisr]\n")
unless $l1 && $l2;
while(<>){
chomp;
my @fs=split("\t",$_);
if ($l1) {
if($html){chomp ($res = `w3m -dump -T text/html '$fs[0]' |langident`);}
else { chomp ($res = `cat '$fs[0]' | langident`);}
unless ($res eq $l1) {
print STDERR "\t##unwanted languages $fs[0]($res not $l1)\n" if $v;
next } }
if ($l2) {
if($html){chomp ($res = `w3m -dump -T text/html '$fs[1]' |langident`);}
else { chomp ($res = `cat '$fs[1]' | langident`);}
unless ($res eq $l2) {
print STDERR "\t##unwanted languages $fs[1]($res not $l2)\n" if $v;
next } }
print "$fs[0]\t$fs[1]\n";
}
__END__
=head1 NAME
langcheck - Perl script for language pairs checking
=head1 SYNOPSIS
langcheck -debug -l1=en -l2=pt [file]
=head1 DESCRIPTION
C (or stdin) should have lines with 2 filenames separated with a TAB
Output is the list of filenamepairs with the proper languages.
=head1 AUTHOR
J.Joao Almeida, jj@di.uminho.pt
=head1 SEE ALSO
perl(1).
=cut