#!/usr/bin/perl use strict; use warnings; use utf8; use XML::TMX::Reader; use XML::TMX::Writer; use Lingua::FreeLing::Simple; use Data::Dumper; my %analysers = (); for my $file (@ARGV){ my $tmin = XML::TMX::Reader->new($file); my $tmout = XML::TMX::Writer->new(); my @langs = $tmin->languages; $tmout->start_tmx(DATATYPE => 'xml', OUTPUT => "$file.freeling"); # initialize analysers map { $analysers{$_} = Lingua::FreeLing::Simple->new($_) if not $analysers{$_}; } @langs; # process TMX file $tmin->for_tu( { }, sub { my $tu = shift; my %tus = (); for my $lang (@langs) { if (defined $tu->{$lang}) { my $sentences = $analysers{$lang}->analyse($tu->{$lang}); $tus{$lang} = join "\n", Lingua::FreeLing::Simple::forall($sentences, { sentence => \&to_xml , returns => 1}); } } $tmout->add_tu(%tus); }); $tmout->end_tmx(); } sub to_xml { my $sentence = shift; my $ws = $sentence->get_words; my @words = Lingua::FreeLing::Simple::get_tuple($ws, { lemma => 1, tag => 1 }); "\n\t" . (join "\n", (map { my $w = $_; $_ = ("{$_}"| } (keys %$w))) . " />") } @words)) . "\n"; }