#!/usr/bin/perl -s use Lingua::NATools; use warnings; use strict; our ($h); sub usage { print "nat-pair2tmx: join two files in NATools input format into a TMX.\n\n"; print "\tnat-pair2tmx \n\n"; print "For more help, please run 'perldoc nat-pair2tmx'\n"; exit 0; } usage() if $h; my $sourceF = shift or usage(); my $sourceL = shift or usage(); my $targetF = shift or usage(); my $targetL = shift or usage(); my ($l0,$l1); print "\n"; print "\n"; print "\n"; print "
\n"; print "
\n"; print " \n"; open A, "$sourceF" or die; open B, "$targetF" or die; $/="\n\$\n"; while(!eof(A)) { $l0 = || ""; $l1 = || ""; chomp($l0); chomp($l1); ($l0,$l1) = (n($l0),n($l1)); print " \n"; print " \n"; print " $l0\n"; print " \n"; print " \n"; print " $l1\n"; print " \n"; print " \n"; } print " \n"; print "
\n"; close B; close A; sub n { my $str = shift; $str =~ s/<[^>]+>/ /g; $str =~ s/\s+/ /g; $str =~ s/&/&/g; $str =~ s///g; return $str; } __END__ =encoding UTF-8 =head1 NAME nat-pair2tmx - join two files in NATools input format into a TMX file. =head1 SYNOPSIS nat-pair2tmx =head1 DESCRIPTION This script is used to convert a pair of files in NATools input format (translation units separated by a dollar sign) into a TMX file. To use it supply two NATools input files (with same number of translation units) and two language descriptors. For instance, nat-pair2tmx corpus.pt pt corpus.en en > corpus-pt-en.tmx Note that the TMX will be output to STDTOU. =head1 SEE ALSO NATools documentation, perl(1) =head1 AUTHOR Alberto Manuel Brandão Simões, Eambs@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2006-2009 by Alberto Manuel Brandão Simões =cut