#!/usr/bin/env perl

use common::sense;
use Lingua::Identify::CLD;


my %map = langmap();
my $identifier = Lingua::Identify::CLD->new();

my @folders = grep { -d } <*>;

my $total = 0;
my $ok = 0;

for my $folder (@folders) {

#    next if $folder =~ /sr-cyr/;

    my ($lang) = ( $folder =~ m{([^/]*)$} );
    print ":: $lang\n"; 
    $lang = "sr" if $lang =~ /^sr-/;
    

    my @files = <"$folder/*">;

    my ($partial_total, $partial_ok) = (0, 0);
    for my $file (@files) {
        $partial_total++;

        my $contents;
        {
            local $/ = undef;
            open my $fh, "<:utf8", $file or die;
            $contents = <$fh>;
            close $fh;
        }

        my $guess = $identifier->identify($contents);

        if ($lang eq $map{$guess}) {
            ++$partial_ok;
        } else {
            printf "    got %10s [%s]\n", $guess, $file;
        }
    }
    if ($partial_ok) {
        $total += $partial_total;
        $ok    += $partial_ok;
    }
    printf "    %.4f%% of accuracy (%d files)\n", 100*$partial_ok/$partial_total, $partial_total;
}
printf "----[ Accuracy: %.4f ]---[ %d files tested ]-------------------------\n", 100*($ok/$total), $total;


#----

sub langmap {

return (
    AFRIKAANS           => 'af',
    ALBANIAN            => 'sq',
    ARABIC              => 'ar',
    ARMENIAN            => 'hy',
    BASQUE              => 'eu',
    BENGALI             => 'bn',
    BULGARIAN           => 'bg',
    BURMESE             => 'my',
    CATALAN             => 'ca',
    CHINESE             => 'zh-cn',
    CHINESET            => 'zh-tw',
    CROATIAN            => 'hr',
    CZECH               => 'cs',
    DANISH              => 'da',
    DUTCH               => 'nl',
    ENGLISH             => 'en',
    ESTONIAN            => 'et',
    FINNISH             => 'fi',
    FRENCH              => 'fr',
    GALICIAN            => 'gl',
    GEORGIAN            => 'ka',
    GERMAN              => 'de',
    GREEK               => 'el',
    GUJARATI            => 'gu',
    HEBREW              => 'he',
    HINDI               => 'hi',
    HUNGARIAN           => 'hu',
    INDONESIAN          => 'id',
    ITALIAN             => 'it',
    JAPANESE            => 'ja',
    KANNADA             => 'kn',
    KOREAN              => 'ko',
    LATVIAN             => 'lv',
    LITHUANIAN          => 'lt',
    MACEDONIAN          => 'mk',
    MALAY               => 'ms',
    MALAYALAM           => 'ml',
    MALTESE             => 'mt',
    NORWEGIAN           => 'nn',
    PERSIAN             => 'fa',
    POLISH              => 'pl',
    PORTUGUESE          => 'pt',
    ROMANIAN            => 'ro',
    RUSSIAN             => 'ru',
    SERBIAN             => 'sr',
    SLOVAK              => 'sk',
    SLOVENIAN           => 'sl',
    SPANISH             => 'es',
    SWAHILI             => 'sw',
    SWEDISH             => 'sv',
    TAGALOG             => 'tl',
    TAMIL               => 'ta',
    TG_UNKNOWN_LANGUAGE => '??',
    THAI                => 'th',
    TURKISH             => 'tr',
    UKRAINIAN           => 'uk',
    URDU                => 'ur',
    VIETNAMESE          => 'vi',
);

}