~~SLIDESHOW~~
<?xml version="1.0" encoding="ISO-8859-1"?>
<text>
<body>
<entry>
<url>http://natura.di.uminho.pt</url>
<url>http://naturaaaa.di.uminho.pt</url>
<url2>www.di.uminho.pt</url2>
<orth>aaron</orth>
<domain xml:lang="pt">gato cat 33</domain>
<translation>Aaron, Aarão (nome próprio)</translation>
</entry>
<entry>
<orth>aback</orth>
<domain xml:lang="en">gato cat 33</domain>
<pos>z.</pos>
</entry>
<entry>
<orth>abaft</orth>
<pos>adv.</pos>
<translation>à popa, à ré</translation>
</entry>
use XML::TX;
my $types={ sentencePt => text("pt"),
sentenceEn => text("en"),
domain => sub{text($v{'xml:lang'} || "pt")},
url => "urlActive",
};
addType(
urlActive =>
{ markit => sub{ $c = markAsErr($c)
unless (LWP::Simple::head($c));
toxml()}, } );
markit($filename,$types);
fixit( $filename, $types );
isvalid( value, type )
url2 url
href urlActive
pos enumFromFile("POS")
orth text("en")
translation text("pt")
domain text(@xml:lang)
fig@url urlActive
%%
use LWP::Simple;
addType(
urlActive =>
{ markit => sub{ $c = markAsErr($c)
unless (LWP::Simple::head($c));
toxml()}, } );
addType(
typename => { markit => sub {...},
fixit => .... },
)
addType(
url => { markit => sub{
$c = markAsErr($c) unless $c =~ m{^(http|file)://};
toxml()},
fixit => sub{
$c = "http://" . $c if $c =~ /^www\./;
$c = markAsErr($c) unless $c =~ m{^(http|file)://};
toxml()}, },
);
use Date::Manip;
addType(
date => { markit => sub{ $c = markAsErr($c) unless ....
toxml()},
fixit => sub{
my $aux = ParseDate($c);
if ($aux){ $c = pp($aux); }
else { $c = markAsErr($c); }
toxml()}},
);
tx -correct x.tx y.xml > output
extract + process + rebuild
Correcção=ext_proc_rec(CorrectorInteractivo,....)
XML-DT based validators
final pos-processor
facet-oriente processor