#!/usr/bin/perl -s use Lingua::PT::PLN; our ($latin1,$text,$tag) ; binmode(STDOUT,":utf8") unless($latin1); $tag ||= "s"; if($text){ textSentence($tag ); } else { contextualSentence(shift);} sub textSentence{ my %opt=(st=>"s",psep=>""); if(ref $_[0] eq "HASH"){ %opt = ( $opt , %{shift(@_)}); } local $/= $opt{psep} ; while(<>){ s/^\s*//; s/\s+$//; print "
", Lingua::PT::PLN::xmlsentences({st=>$opt{st}}, $_), "
\n"; } } sub contextualSentence{ my $tag=shift; $/= "$tag>"; while(<>){ s{ ( <$tag\b .*? > ) (.*?) ($tag>) }{ $1 . Lingua::PT::PLN::xmlsentences($2) . $3 }xse; print } } __END__ =head1 NAME quebraxmlsent - breaks text in sentences =head1 SYNOPSIS quebraxmlsent -text textfile* quebraxmlsent -tag=ttt xmlfile* Options: -text I -latin1 (output encoding -- def:utf8) =head1 DESCRIPTION In text mode paragraphs are separeted by empty lines =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). Lingua::PT::PLN(3pm) =cut