#!/usr/bin/perl 

use POSIX qw(locale_h);
setlocale(&POSIX::LC_ALL, "pt_PT");
use locale;

use Text::RewriteRules;

my $v  = qr{[aeiouáéíóúàãõâêôäëïöüyw]}i;            ##vowels
my $c  = qr{[bcçdfghjklmñnpqrstvwyxz]}i;            ##consonant
my $f  = qr{(?:[çdfgjkqtv]|ss|rr|[nlcs]h|bs|cc)}i;  ##strong consonant
my $ac = qr{[áéíóúãõâêô]}i;                         ##accents

my @br = qw{sl sm sn sc rn bc lr bc bd bj bp pt pc dj pç zm tp};
my %br = ();
@br{@br} = @br;

while(<>){chomp; 
  s/(\w+)/vowelaccent(wordaccent(divide($1)))/ge;
  print "$_\n";
}

##                                               division in sylables
RULES/i divide
êm$==>ê|_nhem
($v)($c)($c)($v)==>$1$2|$3$4!! $br{"$2$3"}
($v)($c)($c)([lr])==>$1$2|$3$4!! $br{"$2$3"}
($v)($f)(?![|])==>$1|$2
(.[bclnprsx])($f)(?![|])==>$1|$2
($v)([bclmnprsxzh]$v)==>$1|$2
(\w)([lmnrsx])([bclmnprszx])($v)==>$1$2|$3$4!!"$2$3" ne "ss" && "$2$3" ne rr
($v|[lmnrsx])([bcp][lr]$v)==>$1|$2
($v)($c)($c)($v)==>$1|$2$3$4

###                                 quebra de ditongos / tritongos
([a])(i[ru])==>$1|$2
([ioeê])([aoe])==>$1|$2
u(ai|ou)==>u|$1
([^qg]u)(ei|iu|ir|$ac)==>$1|$2
([aeio])($ac)==>$1|$2
([íúô])($v)==>$1|$2

ENDRULES

##                                            mark the tonic sylab
RULES/i wordaccent
"=last=>
(\w*$ac)==>"$1
(\w*([zlr]|[iu]s?))$==>"$1
(\w+\|\w+)$==>"$1
(^\w+$)==>"$1
ENDRULES

##                          mark the tonic vowel in the tonic sylab
RULES/i vowelaccent
:($ac)==>$1:
"(\w*?($v|[yw]))==>$1:
([gq]u):($v|[yw])==>$1$2:
"==>
:=last=>
ENDRULES

### TODO
##Trata prefixo não misturados
#anti
#antropo
#aero
#sub
#super
#supra
#
##Tratar sufixos não misturados
#mente

__END__

=head1 NAME

ptsyl - division in syllagbles and mark principal accent

=head1 DESCRIPTION

Makes division in sylables according to the portuguese language rules.

=head1 AUTHOR

J.Joao Almeira, jj@di.uminho.pt

=cut      
