package text::translate;

use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);

require Exporter;
require AutoLoader;

@ISA = qw(Exporter);
@EXPORT= qw(trans  trans_dic  trans_prefix trans_und trans_dont_touch trans_ppp 
  trans_userdic trans_und_function);

$VERSION = '0.04';

=head1 NAME

text::translate - a perl extention for naif translation

=head1  SYNOPSIS

   use text::translate;
   trans_dic(<dictname>*)
   trans_userdic(<dictname>)              # where new words are saved
   trans_prefix(<undefWordPrefix>);
   trans(<string1>):string
   trans_ppp(<PrePostProcessingFunction>);# pre and pos processing
   trans_und(<filename>)
   trans_dont_touch(<patternstring>*)
   trans_und_function(<function or ID or JUSTPREFIX or INTERACTIVE>)

=head1  DESCRIPTION

=cut

use locale ;
my @newwords=();
my $prefix="@";
my %dict;

sub trans_interactive_mode{
	my ($p,$contexto)=@_;
        open(I,"+< /dev/tty");
	print I "$contexto$p?[=?\\n!] ";
        my $r = <I>;
        close(I);
        chop($r);
        if( $r eq "?") {
          $r=$prefix.$r;
        }
        elsif( $r eq "") {
          $r=$p ;
        }
        elsif( $r eq "2") {
          $r=$p ;
          $dict{lc($p)} = lc($r);
          push (@newwords,lc($p));
        }
        elsif( $r eq "=") {
          $r=$p ;
          $dict{lc($p)} = lc($r);
          push (@newwords,lc($p));
        }
        elsif( $r eq "!") {
          trans_und_function("ID");
          $r=$p ;
        }
        else {
          $dict{lc($p)} = lc($r);
          push (@newwords,lc($p));
        }
        $r };

my $status;
my $und_log;
my $undeffun = \&trans_interactive_mode ; 
my $dont_touch="#";
my $w='\w+(?:[\'-]\w+)*';  # word definition:
my $user_dict_name= "$ENV{HOME}/.text_translate";

sub trans_userdic{
  $user_dict_name=shift;
  if($status=="DICLOAD" and -e $user_dict_name){
    my $a,$b;
    open(DICT,"< $user_dict_name") || die("cant open $user_dict_name\n");
    while(<DICT>){
      chop;
      if(! /^#|^$/){
        ($a,$b)= split (/=/);
        $dict{$a}=$b;
      }
    }
   close(DICT);                        
  }
}

sub trad::rs::posProc {};
sub preProc {};

BEGIN { $status = "INIT";}
END   { save_new_dict();}

=head2 Setting dictionary(s)

In order to define the list of dictionaries to be used, call

 trans_dic(<filename1>, ... , <filenameN>);

=cut

sub trans_dic {
 my @dictlist=@_;
 my $old=$/;
 $/ = "\n";
 
 if(not @dictlist) {@dictlist= ("dict") }

# carregar o dicionário (todo em minúsculas)
# regras com sequencias de nao mais que 5 palavras
# $dict_name=$dictlist[0];
 if (-e $user_dict_name){ push (@dictlist, $user_dict_name);}
 my ($a,$b,$type);
 for(@dictlist){
   open(DICT,"< $_") || die("cant open $_\n");
   while(<DICT>){
     chop;
     if(! /^#|^$/){
       ($a,$b,$type)= split (/=/);
       $dict{lc($a)}=$b;
     }
   }
   close(DICT);
   if (-f "$_.pp") {require "$_.pp";}
 }
 $status="DICLOAD";
 $/ = $old;
}

=head2 Text not to be touched

In order to define the parts of the text not to be touched, call

 trans_dont_touch(<regExpressionString>*);

=cut

sub trans_dont_touch { $dont_touch = '('.
                                     join(')|(?:',@_) . 
                                     ')'; }

=head2 Undefined word processor function

 trans_und_function(function)

=cut

sub trans_und_function {
    my $a=shift;
    if($a eq "INTERACTIVE"){$undeffun= \&trans_interactive_mode}
    elsif($a eq "JUSTPREFIX"){undef $undeffun;}
    elsif($a eq "ID"){$prefix = "";
                      undef $undeffun;}
    elsif($a eq "ASKALTAVISTA"){}
    else{ $undeffun = $a;}
 }

=head2 Translating a phrase

In order to translate a phrase call

  trans(<phrase>)

The translated phrase will be returned

=cut

sub trans{
  die "Dict nod loaded :-( (missing a trans_dict call)" if ($status eq "INIT");
  my $r=$_[0];
  my $aux2;

  for($r){
    &preProc;

    s/=/@@/g;            # para guardar os '='
    $_ = '=' . $_ ;      # poe um = no inicio da linha.
                         # "=" define a posicao actual na tradução
   while(/=/){
   s/=(([{}0-9,.;?!\s-]+|$dont_touch)+)/$1=/;
   if (/=($w $w $w $w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 5 palav
      s/=($w $w $w $w $w)(\s*)/traduz($1,$2)/e}
   elsif (/=($w $w $w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 4 palav
      s/=($w $w $w $w)(\s*)/traduz($1,$2)/e}
   elsif (/=($w $w $w)(\s*)/    and defined $dict{lc($1)}) { #regra de 3 palav
      s/=($w $w $w)(\s*)/traduz($1,$2)/e}
   elsif(/=($w $w)(\s*)/        and defined $dict{lc($1)}) { #regra de 2 palav
      s/=($w $w)(\s*)/traduz($1,$2)/e}
   elsif(/=($w)(\s*)/           and defined $dict{lc($1)}) { #regra de 1 palav
      s/=($w)(\s*)/traduz($1,$2)/e}
   elsif(/=($w)(\s*)/) {                                # palavra desconhecida
      if($undeffun){
        s/=($w)(\s*)/($aux2=&{$undeffun}($1,$_)) ."$2="/e ;
        my $aux1=$1;
 # print "$aux1=$aux2\n"  if ($und_log);
        print UND "$aux1=$aux2\n"  if ($und_log);}
      else {
        s/=($w)(\s*)/$prefix$1$2=/   ; 
        print UND "$1=\n"  if ($und_log);}}
   else {
      s/=\s*$//;                           # chegou-se ao fim da linha
      s/==/@@=/;                           # cuidar de =!!
      s/=([^\w])/$1=/;}                    # avancar outros caracteres  
   }

   &trad::rs::posProc;
   s/@@/=/g;             # volta a por os =

   return($_);
   }
}

=head2 Undefined word prefix

in order to define the prefix to tag undefined words, call:

  trans_prefix(<newprefix>)

by default the prefix is "@".

=cut 

sub trans_prefix{ $prefix=shift; } 

=head2 Pos and pre processing

user can define o preprocessing function

  preProc();

and a postprocessing function

  trad::rs::posProc();

to adapt notations.

In order to do that, create a new file with those functions and
call

  trans_ppp(<filename>);

to activate them.

If filename has ".rr" suffix, then the rewriting system notation will be assumed
and trans_mkpp function will be called.

=cut

sub trans_ppp { 
  my $a=shift;
  my $b;
  if($a =~ /^(.*)\.rr$/){
     $b=$1; 
     trans_mkpp($a ,"$b.pos");
     $a ="$b.pos";
  }
  require $a ;
} 

sub trans_mkpp{
  my($f1,$f2)=@_;
  open(F1,"<$f1") or die("cant open $f1\n");
  open(F2,">$f2") or die("cant open $f2\n");
  
  print F2 "package trad::rs;\n# dont edit this file; edit $f1\n";
  while(<F1>){
    if(/RULES/){print F2 "\nsub posProc{\n  while(\n";}
    elsif(/(.*)==>(.*)/){print F2 " s/".$1."/".$2."/go ||\n";}
    else{print F2 $_;}
  }
  print F2 "  0){}\n} 1; \n";
  close F2;
  close F1;
}

=head2 Undefined word log file

In order to have a undefined word log file, call:

  trans_und(<filename>);

=cut

sub trans_und {
 ($und_log) = @_;
 `cp $und_log _${$}_`;
 open (UND, "|sort -u > $und_log") || die "cant open $und_log :-( \n";
 open X, "_${$}_" or die "_${$}_ not exists?";
 while(<X>) {
   print UND;
 }
 close X;
 unlink "_${$}_";
}
 
sub traduz{                                        
  my ($a,$s)= @_ ;
  if     (defined $dict{$a})
              {$dict{$a}."$s="}                   # =gato  -> cat =
  elsif  (defined $dict{lcfirst($a)})
              {ucfirst($dict{lcfirst($a)})."$s="} # =Gato  -> Cat =
  elsif  (defined $dict{lc($a)})
              {uc($dict{lc($a)})."$s="}           # =GATO  -> CAT =
  else   {"=$a$s"}                                # =ggg   -> =ggg
}

sub simplifynewrule{
  my ($a,$b)=@_; 
  }

=head1 Example

=head1 Dictionary format

  # Dicionario portugues ingles
  #  #1 - usado para adjectivos (para trocar a ordem)
  #  #2 - so para o "a" (an elephant)
  #  #3 - Para nomes proprios (O Joao => Joao)
  
  Português=Portuguese
  a partir de=from
  a=the
  abrir=open
  aceder=access
  acrescentado=added#1
  actividades de investigação=research activities
  algumas=some
  alterado=changed#1
  alterar o nome de=rename
  desconhecida=unknown#1
  não alterado=unchanged#1
  não foi alterado=is unchanged
  não reconhecida=unrecognized#1
  não se conseguiu=could not
  não é um=not a#3
  não é=is not
  não=not
  um=a#3
  uma=a#3

=head1 Postprocessing function


  sub posProc{

  while(
    # o gato bonito -> the beatifull cat
    # é bonito      -> is beautifull
    s/\b(the|a#3|some|all) (\w+) (\w+)#1/$1 $3 $2/g ||
    s/\b(is|are|were) (\w+)#1/$1 $2/g ||
  
    # O Joao -> Joao
    s/\b([Tt]he) (\w+)#2/$2/g ||
    s/#2//g ||

    # an elefant
    # a table
    s/#3 ([aeiouAEIOU]\w*)/n $1/g ||
    s/#3//g) {}
  }

=head1 A script example


  #!/usr/local/bin/perl
  use text::translate;

  trans_dic("dict","personal");
  trans_prefix("@@@");
  trans_und("dict.und");
  trans_ppp("dict.pos");
  trans_dont_touch('\\\\(\w+)', '<.*?>'); #LaTeX comands and SGML
  
  while(<>){
    print(trans($_));
  }


	if (defined $proc_desc) {
		&{$proc_desc}($p)
	}
	else { "#$p" }

=cut

sub save_new_dict{
#  print "saving\n";
  open(NEWDICT,">> $user_dict_name") or 
  open(NEWDICT,"> $user_dict_name") or 
  die("cant save new words in ($user_dict_name)");
  for (@newwords) {
#	print "$_=$dict{$_}\n";
	print NEWDICT "$_=$dict{$_}\n";
  }
  close(NEWDICT);
}

=head1 AUTHOR

J.Joao Almeida jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut

1; # para por feliz o perl...

__END__

