=head1 Name Package trad - a perl Module for naif translation =head1 Synopsis use trad; trans_dic() trans_prefix(); trans():string trans_ppp(); # pre and pos processing trans_und() trans_dont_touch() =head1 Description =cut package trad; require Exporter; @ISA = (Exporter); @EXPORT= (trans, trans_dic, trans_prefix,trans_und,trans_dont_touch,trans_ppp); #use Fcntl; use POSIX qw(setlocale); &POSIX::setlocale(POSIX::LC_CTYPE,"iso_8859_1"); my $status; my $und_log; my $dont_touch; my $w='\w+(?:[\'-]\w+)*'; # word definition: my $prefix="@"; sub posProc {}; sub preProc {}; BEGIN { $status = "INIT";} =head2 Setting dictionary(s) In order to define the list of dictionaries to be used, call trans_dic(, ... , ); =cut sub trans_dic { @dictlist=@_; if($#dictlist == 0) {$dictlist= ("dict") } # carregar o dicionário (todo em minúsculas) # regras com sequencias de nao mais que 5 palavras my $a,$b; for(@dictlist){ open(DICT,"< $_") || die "cant open $_\n"; while(){ chop; if(! /^#|^$/){ ($a,$b)= split (/=/); $dict{$a}=$b; } } close(DICT); if (-f "$_.pp") {require "$_.pp";} } $status="DICLOAD"; } =head2 Text not to be touched In order to define the parts of the text not to be touched, call trans_dont_touch(); =cut sub trans_dont_touch { ($dont_touch)=@_; } =head2 Translating a phrase In order to translate a phrase call trans() The translated phrase will be returned =cut sub trans{ die "Dict nod loaded :-( (faltou invocar trans_set)" if ($status eq "INIT"); my $r=$_[0]; for($r){ &preProc; s/=/@@/g; # para guardar os '=' $_ = '=' . $_ ; # poe um = no inicio da linha. # "=" define a posicao actual na tradução while(/=/){ s/=(([{}0-9,.;?!\s-]+|($dont_touch))+)/$1=/; if (/=($w $w $w $w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 5 palav s/=($w $w $w $w $w)(\s*)/traduz($1,$2)/e} elsif (/=($w $w $w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 4 palav s/=($w $w $w $w)(\s*)/traduz($1,$2)/e} elsif (/=($w $w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 3 palav s/=($w $w $w)(\s*)/traduz($1,$2)/e} elsif(/=($w $w)(\s*)/ and defined $dict{lc($1)}) { #regra de 2 palav s/=($w $w)(\s*)/traduz($1,$2)/e} elsif(/=($w)(\s*)/ and defined $dict{lc($1)}) { #regra de 1 palav s/=($w)(\s*)/traduz($1,$2)/e} elsif(/=($w)(\s*)/) { # palavra desconhecida s/=($w)(\s*)/$prefix$1$2=/; print UND "$1\n" if ($und_log);} else { s/=\s*$//; # chegou-se ao fim da linha s/=([^\w])/$1=/;} # avancar outros caracteres } &posProc; s/@@/=/g; # volta a por os = return($_); } } =head2 Undefined word prefix in order to define the prefix to tag undefined words, call: trans_prefix() by default the prefix is "@". =cut sub trans_prefix{ $prefix=shift; } =head2 Pos and pre processing user can define o preprocessing function preProc(); and a postprocessing function posProc(); to adapt notations. In order to do that, create a new file with those functions and call trans_ppp(); to activate them. =cut sub trans_ppp { require $_[0] ;} =head2 Undefined word log file In order to have a undefined word log file, call: trans_und(); =cut sub trans_und { ($und_log) = @_; open (UND, "|sort -u > $und_log") || die "cant open $und_log :-( \n"; } sub traduz{ my ($a,$s)= @_ ; if (defined $dict{$a}) {$dict{$a}."$s="} # =gato -> cat = elsif (defined $dict{lcfirst($a)}) {ucfirst($dict{lcfirst($a)})."$s="} # =Gato -> Cat = elsif (defined $dict{lc($a)}) {uc($dict{lc($a)})."$s="} # =GATO -> CAT = else {"=$a$s"} # =ggg -> =ggg } sub simplifynewrule{ my ($a,$b)=@_; } =head1 Example =head1 Dictionary format # Dicionario portugues ingles # #1 - usado para adjectivos (para trocar a ordem) # #2 - so para o "a" (an elephant) # #3 - Para nomes proprios (O Joao => Joao) Português=Portuguese a partir de=from a=the abrir=open aceder=access acrescentado=added#1 actividades de investigação=research activities algumas=some alterado=changed#1 alterar o nome de=rename desconhecida=unknown#1 não alterado=unchanged#1 não foi alterado=is unchanged não reconhecida=unrecognized#1 não se conseguiu=could not não é um=not a#3 não é=is not não=not um=a#3 uma=a#3 =head1 Postprocessing function sub posProc{ while( # o gato bonito -> the beatifull cat # é bonito -> is beautifull s/\b(the|a#3|some|all) (\w+) (\w+)#1/$1 $3 $2/g || s/\b(is|are|were) (\w+)#1/$1 $2/g || # O Joao -> Joao s/\b([Tt]he) (\w+)#2/$2/g || s/#2//g || # an elefant # a table s/#3 ([aeiouAEIOU]\w*)/n $1/g || s/#3//g) {} } 1; # to keep perl happy =head1 A script example #!/usr/local/bin/perl use trad; trans_dic("dict"); trans_prefix("@@@"); trans_und("dict.und"); trans_ppp("dict.pos"); trans_dont_touch('\\\\(\w+)'); #LaTeX comands while(<>){ print(trans($_)); } =cut 1; # para por feliz o perl...