package Klass;

use strict;
use vars qw($VERSION @ISA);
use Exporter;
use Carp;

# debug only
use Data::Dumper;


$VERSION = '0.05';
@ISA = qw(Exporter AutoLoader);

#our $debug="0";

sub new {
   my $obj = shift;
   my $res = {};

   $res->{klass} = {}; 
 
   return bless $res, $obj;
}

sub loadkltable{
   my $obj = shift;
   my %opts = @_;

   croak "'theme' parameter missing" if !exists $opts{theme} || !$opts{theme};
   croak $opts{theme}." 'theme' already defined" if exists $obj->{$opts{theme}};
   croak "'filename' parameter missing" if !exists $opts{filename} || !$opts{filename};
   croak "file ".$opts{filename}." doesn't exist" if !(-e $opts{filename});

   $obj->{klass}{$opts{theme}} = do($opts{filename});
   croak "error: $@ $!\n" if $@ || $!;
   return 1;
}

sub klassFile{
   my $obj = shift;
   my %opts = @_;

   croak "'filename' parameter missing" if !exists $opts{filename} || !$opts{filename};
   croak "file ".$opts{filename}."doesn't exist" if !(-e $opts{filename});

   undef $/;
   open(F,$opts{filename}) or die("error: opening file '".$opts{filename}."'");
   my $str = <F>;
   close F;

   return $obj->klassString( str => $str );
}

sub klassString{
   my $obj = shift;
   my %opts = @_;

   croak "'str' parameter missing" if !exists $opts{str} || !$opts{str};

   my $string = $opts{str};
   my %c;

   foreach my $hashT (keys %{$obj->{klass}}) {
      my %kltable = %{$obj->{klass}{$hashT}};
      my %ocok;
      my $word;
      for $word (keys %kltable) {
         while($string =~ m{\b$word\b}g){
            $ocok{$word}++ 
         } 
      }
      for $word (keys %ocok){
         my %i = %{$kltable{$word}};
         for my $tema (keys %i) {$c{$hashT}{$tema} += $i{$tema} * $ocok{$word};}
      }
   }

   return \%c;
}

#sub biggest{ my %sc = @_;
## my $bi=-1;
# my $a,$b;
# my @what=("ignore");
# while (($a,$b) = each(%sc)){
#    push(@what,$a)      if ($b == $bi);
#    ($bi,@what)=($b,$a) if ($b >  $bi);
# }
# (@what);
#}

#sub fileGrp{
#  my @fs = @_;
#  (map { ($_ => +{klassFile($_)} )} @fs)
#}

#sub fileGrpBig{
#  my @fs = @_;
#  (map { ($_ => [biggest(klassFile($_))] )} @fs)
#}

1;
__END__

=head1 NAME

klass - Perl extension for table based klassification of documents 

=head1 SYNOPSIS

  use klass;
  klass::loadkltable("tablefile")
  %c = klass::klassStr("string")
  %c = klass::klassFile(filename)
  @x = klass::biggest(%c)

=head1 DESCRIPTION

Reads a classification table and makes the classification of a string 
or file based on that table.

=head2 Format of the tablefiles

Example of a pattern table file:

 {
  'score'     => { 'music' => 0.1, 'sport' => 0.9}, 
  '[Pp]iano'  => { 'music' => 1},
  'parsers?'  => { 'cc' => 1},
  'Camões'    => { 'literature'=> 1 },
 } 

=head2 Classification returned by KlassStr and KlassFile

The value returned by the function KlassStr is list of pairs keyword, value.

Example:

 ( 'cc'    => 0.9 ,
   'sport' => 0.9 ,
   'music' => 1.1 )          

The user can select the most voted classification(s) with C<klass::biggest> or 
try do build a more clever function.

=head2 Classification of a group of files

 klass::fileGrp(f1,f2,...)
 klass::fileGrpBig(f1,f2,...)

=head1 AUTHOR

J.Joao C<jj@di.uminho.pt>

=head1 SEE ALSO

perl(1).

=cut
