#!/usr/bin/env perl

use strict;
use warnings;

use RENA::LocalConfiguration qw($BASE_DIR);
use RENA::JSpellFilter;
use RENA::JSpellFilters qw(&default_pipeline);
use RENA::Support::Classes;

use Getopt::Long qw(:config no_auto_abbrev bundling);
use Pod::Usage;

my $man = 0;
my $help = 0;
my $blank = 0;
my @inputnames = ();
my $multiout = undef;
my $outname = undef;
my $xml = 0;
my $ents = 0;
my $unify = 0;
my $harem = 0;
my $verbose = 0;
my $hinput = 0;

GetOptions("help|?" => \$help,
	   "man" => \$man,
	   "blank|b" => \$blank,
	   "<>" => sub {push @inputnames, $_[0]},
	   "out|O:s" => \$multiout,
	   "output|o=s" => \$outname,
	   "xml|x" => \$xml,
	   "ents|e" => \$ents,
	   "unify|u" => \$unify,
	   "harem|h" => \$harem,
	   "H" => \$hinput,
	   "verbose|v" => sub {$verbose++},
	   ) or die "Specify the --help (or -?) option for usage information.\n";

pod2usage(2) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

# Consistência das opções
if ($multiout and $outname) {
    print STDERR "--out and --output options are mutually exclusive.\n";
    die "Specify the --help (or -?) option for usage information.\n"
}
if ($multiout and @inputnames == 0) {
    print STDERR "When using --out you must specify at least one input file.\n";
    die "Specify the --help (or -?) option for usage information.\n"
}
if (($xml + $unify + $ents + $harem) > 1) {
    print STDERR "XML, unified output, plain entity listing and HAREM markup are mutually exclusive.\n";
    die "Specify the --help (or -?) option for usage information.\n"
}

# Processamento por omissão
if (($unify + $ents + $harem) == 0) {
    $xml = 1;
}


my $TEMPLATE = "";

my @pipeline = @{&default_pipeline("$BASE_DIR")};

if ($ents) {
    push @pipeline, { name => "toents", options => {} };
} elsif ($unify) {
    push @pipeline, { name => "unified_yaml", options => {} };
} elsif ($harem) {
    push @pipeline, { name => "harem_markup", options => {template => \$TEMPLATE} };
} elsif ($xml) {
    push @pipeline, { name => "toxml", options => {} };
} else {
    die "Nooooooooo...";
}


my $cl = RENA::Support::Classes::load_from_file
    ("$BASE_DIR/tree.yaml");

my $filter = new RENA::JSpellFilter (pipeline => \@pipeline,
				     global => {cl => $cl});

my $output;
# Ficheiro de saída
if (not defined $multiout) {
    if (not defined $outname or $outname eq "-") {
	$output = \*STDOUT;
	$outname = "-";
    } else {
	open $output, ">", $outname or die "Can't write to '$outname': $!\n";
    }
}

my @input;
push @input, \*STDIN if @inputnames == 0;
push @input, @inputnames;

for my $name (@input) {

    my $in;
    
    if (not ref($name)) {
	open $in, "<", $name or die "Can't read from '$name': $!\n";
	print STDERR "$name\n" if defined $outname and $verbose;
    } else {
	$in = $name;
    }

    if (not defined $outname) {
	# Um ficheiro de saída por entrada
	my $dir;
	my $fname;

	# FIXME não é portável !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	$name =~ m!^(?:(.*)/)?([^/]+)$!;
	#die "Directoria passada como input ($name).\n" if not defined $2;
	$dir = $1;
	$fname = $2;
	$dir = "." if not defined $dir;

	$dir = $multiout if defined $multiout and $multiout ne '';
	$dir =~ s!(?<=.)/$!!;

	my $ext = $ents ? ".ents"
                : $unify ? ".unif"
                : $harem ? ".har"
                : $xml ? ".xml" : ".rena";
	open ($output, ">", "$dir/$fname$ext")
	    or die "Não foi possível abrir $dir/$fname$ext: $!.\n";
	print STDERR "$name -> $dir/$fname$ext\n" if $verbose;
    }

    if ($hinput) {
      local $/ = undef;
      my $input = <$in>;
      my $texto;
      $input =~ s!<TEXTO>(.*)</TEXTO>!$texto = $1; "<TEXTO></TEXTO>"!se;
      $texto =~ s!.*\n!$filter->process(text => $&);""!ge;
      $TEMPLATE = $input;
    } else {
      local $/ = $blank ? "" : "\n";
      while (<$in>) {
	$filter->process(text => $_);
      }
    }

    if (not defined $outname) {
	$filter->apply(global => {output => $output},
		       begin => {debug => 1},
		       oneword => {debug => 2},
		       begin_unified => {debug => 3},
		       );
	close $output;
    }

}

if (defined $outname) {
    $filter->apply(global => {output => $output},
		   begin => {debug => 1},
		   oneword => {debug => 2},
		   begin_unified => {debug => 3},
		   );
    close $output if $outname ne "-";
}

__END__

=head1 NAME

B<rena> - Analyze and markup named entities

=head1 SYNOPSIS

    rena [options] file ...
    
  Options:
    --help (-?)             brief help message
    --man                   full documentation
    --verbose (-v)          verbose operation
    --blank (-b)            separate input by one or more empty lines
    --xml (-x)              output XML markup (the default)
    --ents (-e)             produce a plain listing of entities
    --unify (-u)            listing of unified entities
    --harem (-h)            HAREM markup
    --output=target (-o)    write all output to a single file
    --out[=targetdir] (-O)  one XML file for each input file

=head1 OPTIONS

=over 8

=item B<--help (-?)>

Print a brief help message and exits.

=item B<--man>

Print the manual page and exits.

=item B<--verbose (-v)>

Verbose operation.

=item B<--blank (-b)>

Process the input in parts separated by one or more empty lines.

By default, the input is processed one line at a time.

=item B<--xml (-x)>

Produce a XML markup of the input.

=item B<--ents (-e)>

Instead of XML markup, dump a listing of entities for each input file
(the extension for created files will be I<.ents>).

=item B<--unify (-u)>

Unify entities before outputting a listing.

=item B<--harem (-h)>

Instead of the default XML markup output a markup format specifically created
to be used in the HAREM joint evaluation project for NER systems.
See <url here>.

=item B<--output=>I<target>B< (-o)>

Write the markup to I<target> file instead of I<stdout>.

=item B<--out[=>I<target>B<] (-O)>

Write the markup corresponding to each input file to a separate file, either
in the same directory as the input file or to I<target> directory, if supplied.
The extension I<.xml> is appended to each input file name to produce the name of
the corresponding XML file (or I<.ents> if the option B<-e> is used).

=back

=head1 DESCRIPTION

B<This program> does stuff ...

==head2 XML OUTPUT

==head3 ARRAY

==head3 HASH

=head1 EXAMPLES

=head1 BUGS

Neither input text nor properties (names and respective values) are escaped, so
the outputted XML may be invalid.

There is no way to tell the program to a MIME type other than I<text/css> for
the stylesheet.

=head1 AUTHOR

Edgar Alves, <edgaralves@di.uminho.pt>

=cut
