package XML::TMX::Metrics;

use warnings;
use strict;
use NAT::PTD;
use Lingua::StopWords qw( getStopWords );

use base 'Exporter';
our @EXPORT = 'ptd_m';

=head1 NAME

XML::TMX::Metrics - Calculate Metrics for TMX files.

=cut

our $VERSION = '0.01';

=head1 SYNOPSIS

    use XML::TMX::Metrics;

    my $foo = XML::TMX::Metrics->new();
    ...

=head1 EXPORT

A list of functions that can be exported.  You can delete this section
if you don't export anything, such as for a purely object-oriented module.

=head1 FUNCTIONS

=cut

=head2 init

=cut

my $base = undef;

sub init {
	my ($l1, $l2, $ptd1, $ptd2) = @_;
	
	$base->{sw1} = getStopWords($l1);
	$base->{sw2} = getStopWords($l2);
	
	$base->{l1} = $l1;
	$base->{l2} = $l2;
	
	$base->{ptd1} = $ptd1;
	$base->{ptd2} = $ptd2;
}


=head2 ptd_m

=cut

sub ptd_m {
	my ($f1, $f2) = @_;
	
	die "Module not initialized!\n" unless ref $base;
	
	my $ptd        = [$base->{ptd1}, $base->{ptd2}];
	my $stop_words = [$base->{sw1},  $base->{sw2}];
	
	printf STDERR "[%s] [%s]\n", join(" ",@$f1), join(" ",@$f2);
	my ($ST,$TS);
	
	### VERIFICAR PALAVRAS OU NON-WORDS IGUAIS EM AMBAS AS LINGUAS
	
	my @word_probs;
	my @sword_probs;
	for my $w (@$f1) {
		my $prob = 0;
		my $trans = exists($ptd->[0]{$w})?$ptd->[0]{$w}{trans}:{};
		for my $t (sort { $trans->{$b} <=> $trans->{$a} } keys %$trans) {
			if (grep {$t eq $_} @$f2) {
				$prob = $trans->{$t};
				last;
			}
		}
		if (exists($stop_words->[0]{$w})) {
			push @sword_probs, $prob
		}
		else {
			push @word_probs, $prob
		}		
	}
	printf STDERR "Source: SW: %.3f NSW: %.3f\n",_average(@sword_probs), _average(@word_probs);
	$ST = .25*_average(@sword_probs) + .75*_average(@word_probs);

	for my $w (@$f2) {
		my $prob = 0;
		my $trans = exists($ptd->[1]{$w})?$ptd->[1]{$w}{trans}:{};
		for my $t (sort { $trans->{$b} <=> $trans->{$a} } keys %$trans) {
			if (grep {$t eq $_} @$f2) {
				$prob = $trans->{$t};
				last;
			}
		}
		if (exists($stop_words->[1]{$w})) {
			push @sword_probs, $prob
		}
		else {
			push @word_probs, $prob
		}		
	}
	printf STDERR "Target: SW: %.3f NSW: %.3f\n",_average(@sword_probs), _average(@word_probs);
	$TS = .25*_average(@sword_probs) + .75*_average(@word_probs);
	
	return .5*$ST + .5*$TS
}

sub _average {
	if (@_) {
		my $tot = 0;
		$tot += $_ for @_;
		return $tot / scalar(@_);
	}
	return 0;
}

=head1 AUTHOR

Natura, C<< <ambs at di.uminho.pt> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-xml-tmx-metrics at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=XML-TMX-Metrics>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.




=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc XML::TMX::Metrics


You can also look for information at:

=over 4

=item * RT: CPAN's request tracker

L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=XML-TMX-Metrics>

=item * AnnoCPAN: Annotated CPAN documentation

L<http://annocpan.org/dist/XML-TMX-Metrics>

=item * CPAN Ratings

L<http://cpanratings.perl.org/d/XML-TMX-Metrics>

=item * Search CPAN

L<http://search.cpan.org/dist/XML-TMX-Metrics/>

=back


=head1 ACKNOWLEDGEMENTS


=head1 COPYRIGHT & LICENSE

Copyright 2009 Natura, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.


=cut

1; # End of XML::TMX::Metrics
