#!/usr/bin/perl -s use Text::RewriteRules; $/=''; use utf8; our ($f,$p); binmode(STDIN,":utf8"); binmode(STDOUT,":utf8"); use POSIX qw(locale_h); setlocale(&POSIX::LC_ALL, "pt_PT"); use locale; while(<>){ if ($p){ print d(c(b(a($_)))) ;} elsif ($f){ print f(d(c(b(a($_))))) ;} else { print g(f(d(c(b(a($_))))));} } RULES a ^[.«»]==>@ \*==>@ ENDRULES RULES b om==>ou ==> ==> ==>_< ==>>_ ==>*< ==>>* ([*_]<)([@,.\)]+)==>$2$1 >_\s*\.==>.>_ ([@,\)])(>[*_])==>$2$1 ([.])(>[*])==>$2$1 ENDRULES RULES/m c (\*<.*?>\*)\s*(_<.*?>_)\s*==>::TERMO: $1\n::CAT: $2\n::DEF: (\*<.*?>\*)\s*,\s*(_<.*?>_)\s*==>::TERMO: $1 ,\n::CAT: $2\n::DEF: (\*<.*?>\*)\s*(\^\{.\})\s*,\s*(_<.*?>_)\s*==>::TERMO: $1\n::MARCA2: $2 \,\n::CAT: $3\n::DEF: ENDRULES RULES d ^[@]\s*==>::MARCA1: *\n::START: (?:^|::START:\s*)(\w(?:-|\w)+),\s*(_<.*?>_)==>::TERMO: *<$1>* ,\n::DEF: $2\n::DEF: (?:^|::START:\s*)(\*<.*?>\*),==>::TERMO: $1 ,\n::CATDEF: ::CATDEF:\s*(n\.|f\.|adj\.|v\. t\.)==>::CAT: $1\n::DEF: (::CAT:\s*_<)\s*(\^\{.\})(\s*,?\s*)==>::MARCA2: $2$3\n$1 ::START:\s*==> @==>* /'\.==>_f._ /\.==>_f._ v\. 1\.==>v. t. ENDRULES RULES f (\*,\n)(m|f|adj|v\. t)\.==>${1}_$2._ ([*_])<==>$1 _\(==>(_ \)_==>_) >([*_])==>$1 \^\{(.)\}==>^$1 +,\n==>,\n \* +(\^\d)==>*,$1 ::MARCA1:\s*(.*)\n==>$1 \n::MARCA2:\s*(.*)==> $1 ::\w+:\s*==> ENDRULES RULES/m g G[eé]nero\b==>Gênero Mulher\b==>Mulhér mulher\b==>mulhér jogo\b==>jôgo Brás\.==>Bras. coxa\b==>côxa boca\b==>bôca flores\b==>flôres sobre\b==>sôbre moeda\b==>moéda fo(lhas?|orça?)\b==>fô$1 dobro\b==>dôbro soldo\b==>sôldo pôde\b==>póde eom\b==>com vm\b==>VIII Anl\.==>Ant. (\w*qa)==>$1qu (\w+ç)âo==>$1ão (\w+)==>$1 \n==>\n ENDRULES