#!/usr/bin/perl

use strict;
use warnings;
use File::Find;
use Template;
use Cwd;
use XML::DT;
use Data::Dumper;
use File::Slurp 'slurp';

use DBI;

my $SIZE = 500;
my $global;
chomp(my $date = `date +%Y-%m-%d`);

my $warn = 0;
if (-f "make.aux") {
    print STDERR "Loading auxiliary index\n";
    $warn = 1;
    $global = do "make.aux";
}


my $DB = DBI->connect("dbi:SQLite:dbname=../xml.db", "", "");
die unless $DB;
$DB->{sqlite_unicode} = 1;

print STDERR "Checking current dictionary range\n";
my ($first,$last) = get_range($DB);

my $TT = Template->new(INCLUDE_PATH => getcwd(),
                       ENCODING => 'utf8',
                       RELATIVE => 1);

print STDERR "Checking ranges per letter\n";
my $alphabet = [ map {
    my ($f, $t) = get_range($DB, $_);
    my $c = count($DB, $_);
    +{name => $_, from=>$f, count => $c, to=>$t}
} ('A'..'Z') ];

my $ID = 0;
my @extras;
my @allparts;
for my $k (@$alphabet) {
    print STDERR "Checking letter indexes ($k->{name})\n";
    if ($k->{count} > $SIZE * 1.1) {
        $k->{filename} = "index$k->{name}";
        my $thissize = $SIZE;
        my $v = $k->{count} % $thissize;
        while ($v > 0 && $v < 20) {
            $thissize++;
            $v = $k->{count} % $thissize;
        }
        my $parts = int($k->{count} / $thissize + .5);
        my $sth = $DB->prepare("SELECT word FROM entry WHERE normalized LIKE \"$k->{name}\%\" ORDER BY normalized ASC, n ASC LIMIT $thissize OFFSET ?");
        my @parts;
        for my $p (0 .. $parts-1) {
            ++$ID;
            $sth->execute($p* $thissize);
            my @row = $sth->fetchrow_array();
            my $f = { offset => $p*$thissize, limit => $thissize,
                      letter => $k->{name}, from => $row[0], id => $ID };
            while (@row = $sth->fetchrow_array()) {
                $f->{to} = $row[0];
            }
            push @parts, $f;
            push @allparts, $f;
        }

        $TT->process("OPS/indexA.xml-mt",
                     { parts => \@parts },
                     "OPS/index$k->{name}.xml",
                     binmode => ':utf8');
        push @extras, "index$k->{name}";
    } else {
        ++$ID;
        $k->{filename} = "words$ID";
        push @allparts, { letter => $k->{name}, id => $ID };
    }
}

for my $part (@allparts) {
    print STDERR "Creating file $part->{id}\n";
    my $select = "SELECT * FROM entry WHERE normalized LIKE \"$part->{letter}\%\" ORDER BY normalized ASC, n ASC";
    $select .= " LIMIT $part->{limit} OFFSET $part->{offset}" if ($part->{limit});

    my $sth = $DB->prepare($select);
    $sth->execute;
    my @row;
    my @vs;
    while (@row = $sth->fetchrow_array) {
        my $thisid = "$row[0]:$row[1]";
        $global->{$thisid} = "words$part->{id}";
        push @vs, { id => $thisid,
                    word => $row[1]?"$row[0]<sup>$row[1]</sup>":$row[0],
                    def  => format_xml($row[2]) }
    }
    my $vars = { letra => $part->{letter}, words => \@vs };
    if ($part->{from}) {
        $vars->{from} = $part->{from};
        $vars->{to}   = $part->{to};
    }

    $TT->process("OPS/words.xml-mt", $vars,
                 "OPS/words$part->{id}.xml",
                 binmode => ':utf8');
    push @extras, "words$part->{id}";

}

open O, ">", "make.aux";
print O Dumper($global);
close O;

find( { wanted => sub {
            return unless /-tt$/;
            print STDERR "Processing $_\n";
            my $out = $_;
            $out =~ s/-tt$//;
            $TT->process($_,
                         {
                          extrafiles => [@extras],
                          date => $date,
                          from => $first, to=> $last,
                          alphabet => $alphabet,
                         },
                         $out, binmode => ':utf8') || die $TT->error();
        },
        no_chdir => 1,
      }, ".");

my @files = slurp("MANIFEST");
my $files = join(" ", @files, map { "OPS/$_.xml"} @extras);
$files =~ s/\n//g;
`zip foo.zip $files`;
`mv foo.zip dicaberto.epub`;

sub count {
    my ($DB, $l) = @_;
    my $sth = $DB->prepare("SELECT COUNT(*) FROM entry WHERE normalized LIKE \"$l\%\"");
    $sth->execute;
    my ($c) = $sth->fetchrow_array;
    return $c;
}

sub _link {
    my $word = shift;
    my $id = shift || 0;
    ($word, $id) = split /:/, $word if $word =~ /:/;
    my $key = "$word:$id";
    if (exists($global->{$key})) {
        return "$global->{$key}.xml#$key"
    } elsif ($warn) {
        print "WARNING: Could not find [$key]\n";
        return "";
    }
}

sub get_range {
    my $DB = shift;
    my $l = shift;
    my ($from, $to);
    if ($l) {
        my $sth = $DB->prepare("SELECT word, n FROM entry WHERE normalized LIKE \"$l\%\" ORDER BY normalized,n ASC LIMIT 1");
        $sth->execute;
        my @row = $sth->fetchrow_array;
        $from = $row[1]?"$row[0]<sup>$row[1]</sup>":$row[0];

        $sth = $DB->prepare("SELECT word, n FROM entry WHERE normalized LIKE \"$l\%\" ORDER BY normalized DESC LIMIT 1");
        $sth->execute;
        @row = $sth->fetchrow_array;
        $to = $row[1]?"$row[0]<sup>$row[1]</sup>":$row[0];
    } else {
        my $sth = $DB->prepare("SELECT word, n FROM entry ORDER BY normalized,n ASC LIMIT 1");
        $sth->execute;
        my @row = $sth->fetchrow_array;
        $from = $row[1]?"$row[0]<sup>$row[1]</sup>":$row[0];

        $sth = $DB->prepare("SELECT word, n FROM entry ORDER BY normalized DESC LIMIT 1");
        $sth->execute;
        @row = $sth->fetchrow_array;
        $to = $row[1]?"$row[0]<sup>$row[1]</sup>":$row[0];
    }
    return ($from, $to);
}

sub trim {
    my $x = shift;
    for ($x) {
        s/^\s*//;
        s/\s*$//;
    }
    return $x;
}

sub format_xml {
    my $xml = shift;
    $xml = dtstring($xml,
                    -default => sub {
                        $c
                    },
                    phon => sub {
                        father("phon", $c); ""
                    },
                    form => sub {
                        $c = trim($c);
                        $c .= ", (<i>$v{phon}</i>)" if $v{phon};
                        $c;
                    },
                    orth => sub {
                        my $n = gfather("n") || "";
                        $n && ($n = "<sup>$n</sup>");
                        "<b>$c$n</b>"
                    },
                    usg     => sub {
                        my $f = father("usg") || "";
                        father("usg", "$f $c"); ""
                    },
                    sense => sub {
                        $q = "span";
                        if ($v{gramgrp} || $v{usg}) {
                            $v{gramgrp} ||= "";
                            $v{usg}     ||= "";
                            $a = "<i>$v{gramgrp} $v{usg}</i>";
                            delete($v{gramgrp});
                            delete($v{usg});
                            $c = "$a. $c"
                        }
                        toxml
                    },
                    gramGrp => sub {
                        father("gramgrp", $c); ""
                    },
                    etym => sub {
                        for ($c) {
                            # Links
                            # s!De\s*_([a-záéíóúàèìòùãõêîôâûç][^_]+)_!De <i><a href="$query$1">$1</a></i>!g;
                            # s!De\s*_([a-záéíóúàèìòùãõêîôâûç][^_]+)_ \+ _([a-záéíóúàèìòùãõêîôâûç][^_]+)_!De <i><a href="$query$1">$1</a></i> + <i><a href="$query$2">$2</a></i>!g;
                            s!_([^_]+)_( \*)?!<i>$1</i>!g;
                            s!\^(\d)!<sup>$1</sup>!g;
                            s!\^\{([^\}]+)\}!<sup>$1</sup>!g;
                 }
                 $c
             },
                    def => sub {
                        for ($c) {
                            s!\[\)e\]!&#277;!g;
                            s!^[\s\n]*!!;
                            s![\s\n]*$!!;
                            s!\n! !g;
                            # Links
                            s{\[\[([^]|]+)\|([^]]+)\]\]}
                             {"<i><a href='"._link($2)."'>$1</a></i>"}ge;

                            s{\[\[([^]]+)\]\]}
                             {"<i><a href='"._link($1)."'>$1</a></i>"}ge;

                            s{O mesmo que\s*_([^_]+)_}
                              {"O mesmo que <i><a href='"._link($1)."'>$1</a></i>"}ge;

                            s{De\s*_([a-záéíóúàèìòùãõêîôâûç][^_]+)_ \+ _([a-záéíóúàèìòùãõêîôâûç][^_]+)_}
                             {"De <i><a href='"._link($1)."'>$1</a></i> + <i><a href='"._link($2)."'>$2</a></i>"}ge;

                            s{De\s*_([a-záéíóúàèìòùãõêîôâûç][^_]+)_}
                              {"De <i><a href='"._link($1)."'>$1</a></i>"}ge;

                            s{Cp\.\s*_([^_]+)_}
                             {"Cp. <i><a href='"._link($1)."'>$1</a></i>"}ge;

                            s{V\.\s*_([^_]+)_}
                             {"V. <i><a href='"._link($1)."'>$1</a></i>"}ge;

                            s!_([^_]+)_( \*)?!<i>$1</i>!g;
                            s!\^\{([^\}]+)\}!<sup>$1</sup>!g;
                            s!\^(\d|[a])!<sup>$1</sup>!g;
                        }
                        $c
                    }
                   );
    return $xml;
}
