#!/usr/bin/perl -s #undef $/; #$/=''; my $f1=shift; my $f2=shift; our ( $l1, $l2,$o,$local); ## options $l1 ||= $f1; $l2 ||= $f2; $o ||= "_$l1-$l2.tmx"; $local = ($local ? "-local" : ""); ## html2pml -latin1output -listofpairs -noimg p$G.pairs p$N.$O p$N.$T ## LANG=pt_PT xmlalign2cqp p$N.$O p$N.$T my $f1base= $f1; system("html2pml -latin1output -noimg '$f1' > _$l1.pml"); my $f2base= $f2; system("html2pml -latin1output -noimg '$f2' > _$l2.pml"); system("xmlalign2cqp $local _$l1.pml _$l2.pml"); my $v=`cqpalign2tmx $local _$l1.pml-_$l2.pml.align $o`; print $v; if ($v =~ /R=(\d+)/ && $1 > 80 ) { warn("problems: bad alignment($1)\n"); rename($o,"$o.BADALIGNMENT") } unlink ("_$l1.pml"); unlink ("_$l2.pml"); unlink ("_$l1.pml-_$l2.pml.align"); unlink ("_$l2.pml-_$l1.pml.align"); unlink (); __END__ =head1 NAME htmlaligner - align html bitexts =head1 SYNOPSIS htmlaligner -l1=en -l2=pt -o=en.pt.tmx f_en.html f_pt.html =head1 DESCRIPTION =head2 Pre-requesits cwb-utils cwb -- corpora workbench (Stutgard) =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut