#!/usr/bin/perl
use strict;
use warnings;
use utf8;
use Encode;
use Lingua::FreeLing;
use Lingua::FreeLing::Simple;
my %analysers = ();
my @langs = qw|pt|;
# initialize analysers
map { $analysers{$_} = new Lingua::FreeLing::Simple($_) if not $analysers{$_}; } @langs;
# Encode->from_to($tu->{$l1}, "utf8", "iso-8859-1");
# Encode->from_to($tu->{$l2}, "utf8", "iso-8859-1");
# $tu->{$l1} = to_string($analysers{$l1}->analyse($tu->{$l1}));
# $tu->{$l2} = to_string($analysers{$l2}->analyse($tu->{$l2}));
# Encode->from_to($tu->{$l1}, "iso-8859-1", "utf8");
# Encode->from_to($tu->{$l2}, "iso-8859-1", "utf8");
binmode(STDOUT, ":utf8");
my $str1 = "mudanças é que é bonito.";
my $str2 = $str1;
##Encode::decode_utf8(Foo::Bar::escape_html(Encode::encode_utf8($what)));
##Encode::from_to($str2, "utf8", "iso-8859-1");
#Encode->from_to($str2, "utf8", "iso-8859-1");
my $s1 = $analysers{"pt"}->analyse($str1);
$str2 = encode("iso-8859-1", $str2);
print "'$str1'\n";
#print "'$str2'\n";
my $s2 = $analysers{"pt"}->analyse($str2);
print to_string($s1);
print "----------\n";
print to_string($s2);
sub to_string {
my $sentences = shift;
my $xml = "";
my $template_word = qq|\t#FORM# #LEMMA# #TAG#|;
for my $s (@$sentences) {
my $sentence = "";
my $ws = $s->get_words;
for my $w (@$ws) {
my $form = decode("iso-8859-1", $w->get_form);
my $lemma = decode("iso-8859-1",$w->get_lemma);
my $tag = decode("iso-8859-1",$w->get_parole);
my $word = $template_word;
$word =~ s/#FORM#/$form/ge;
$word =~ s/#LEMMA#/$lemma/ge;
$word =~ s/#TAG#/$tag/ge;
$sentence.= "$word\n";
}
$sentence.= "";
$xml.= $sentence;
}
return $xml;
}
sub to_xml {
my $sentences = shift;
my $xml = "";
my $template_word = qq||;
for my $s (@$sentences) {
my $sentence = "\t\n";
my $ws = $s->get_words;
for my $w (@$ws) {
my $form = $w->get_form;
my $lemma = $w->get_lemma;
my $tag = $w->get_parole;
my $word = $template_word;
$word =~ s/#FORM#/$form/ge;
$word =~ s/#LEMMA#/$lemma/ge;
$word =~ s/#TAG#/$tag/ge;
$sentence.= "\t\t$word\n";
}
$sentence.= "\t\n";
$xml.= $sentence;
}
return $xml;
}