#!/usr/bin/perl -s our ($l1,$l2,$html,$debug,$v); $v=1 if $debug; die ("usage $0 -l1=en -l2=pt [file_which_listOfPathPaisr]\n") unless $l1 && $l2; while(<>){ chomp; my @fs=split("\t",$_); if ($l1) { if($html){chomp ($res = `w3m -dump -T text/html '$fs[0]' |langident`);} else { chomp ($res = `cat '$fs[0]' | langident`);} unless ($res eq $l1) { print STDERR "\t##unwanted languages $fs[0]($res not $l1)\n" if $v; next } } if ($l2) { if($html){chomp ($res = `w3m -dump -T text/html '$fs[1]' |langident`);} else { chomp ($res = `cat '$fs[1]' | langident`);} unless ($res eq $l2) { print STDERR "\t##unwanted languages $fs[1]($res not $l2)\n" if $v; next } } print "$fs[0]\t$fs[1]\n"; } __END__ =head1 NAME langcheck - Perl script for language pairs checking =head1 SYNOPSIS langcheck -debug -l1=en -l2=pt [file] =head1 DESCRIPTION C (or stdin) should have lines with 2 filenames separated with a TAB Output is the list of filenamepairs with the proper languages. =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut