#!/usr/bin/perl -s use locale; $/ = "<"; $pro = '[\w_.-]+\@[\w_.-]+|((https?|ftp|gopher)://|www)[\w_./~-]+'; $abrv = join '|', qw( srt?a? dra? [A-Z] etc exa? jr profs? arq av estr? tv lgo pr Oliv ig mrs? min rep); while(<>) { $n=0; s/(.*>)\n*/ / and print $1; s/($pro)/$p{++$n}=$1; " __MARCA__$n "/ge; s#([»\]])#$1 #g; s#([«\[])# $1#g; s#\"# \" #g; s/(\b|\s)/\n/g; s/(.)\n-\n/$1-/g; s/\n+/\n/g; s/\n(\.?[ºª])\b/$1/g; {} while ( s#\b([0-9]+)\n([\,.])\n([0-9]+\n)#$1$2$3#g ); # s/\b([a-z]+)\n-\n/$1-/g; s#\n($abrv)\n\.\n#\n$1\.\n#ig; # s/(\w)[-\xad]\n/$1/g if $junta; s/\n? out =head1 DESCRIPTION =head1 AUTHOR Paulo Rocha, paulo.rocha@alfa.di.uminho.pt J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). cqp(1) =cut __END__