#!/usr/bin/env perl

use strict; use warnings;
use Text::Perfide::PartialAlign qw/
		usage 
		subcorpora2files 
		calc_pairs 
		_log
		get_corpus
		calc_common_tokens
		build_chain/;
use Data::Dumper;
use Getopt::Long;

my $options = {};
my $result = GetOptions ($options,
	'rs=s'		,
	'sec'		,
	'debug'		,
	'v'			,
	'cf'		,
	'max=i'		,
	'all'		,
);
$options->{rs} //= "\n";
$options->{rs}= '_sec' if $options->{sec};
$/ = $options->{rs};
if($options->{sec}){ $options->{all} = 1; }
if($options->{all}){ $options->{max} = 1; }
if($options->{max}){ push @ARGV,$options->{max}; }


usage(@ARGV) unless (@ARGV == 5 or @ARGV == 6);
my $maximalChunkSize = (@ARGV==6 ? $ARGV[5] : 5000);
my ($huFilename,$enFilename,$outputFilename,$huLangName,$enLangName) = @ARGV[0..5];

_log("Reading corpora...");
my ($huCorpus, $huOffsets, $huTextRef) = get_corpus($huFilename);
my ($enCorpus, $enOffsets, $enTextRef) = get_corpus($enFilename);
_log("Done.");

my ($commonHap,$huPositions,$enPositions) = calc_common_tokens($huCorpus,$enCorpus);
my $pairs = calc_pairs($commonHap,$huPositions,$enPositions,$huCorpus,$enCorpus);
my $chain = build_chain($pairs,$maximalChunkSize,$options);
subcorpora2files($chain,$huTextRef,$enTextRef,$huOffsets,$enOffsets,$outputFilename,$huLangName,$enLangName);


__END__

=head1 NAME

partial_align2 - aligner ........

=head1 SYNOPSIS

 partial_align2 [options] file1 file2 output_prefix l1 l2
 
=head1 DESCRIPTION

=head2 Options

 --rs=...	Define record separator (Perl's $/). Default is the newline character (\n).
 --sec   	Split by section annotations added by Text::Perfide::BookCleaner (same as -rs=_sec -all).
 -v		Create several files with dumps of auxiliary structures.
 --cf=... 	Pass an additional file containing correspondences between the two languages.
 		File must follow the format

				term(,term)* = term(,term)*

 --all		Try to split in as many files as possible (same as -max=1).
 --max=...	Maximum size of the split files (in bytes).

=head2 EXPORT

=head1 AUTHOR

Andre Santos, andrefs@cpan.org

J.Joao Almeida, jj@di.uminho.pt


=head1 SEE ALSO

perl(1).

=cut