~~SLIDESHOW~~
<?xml version="1.0" encoding="ISO-8859-1"?> <text> <body> <entry> <url>http://natura.di.uminho.pt</url> <url>http://naturaaaa.di.uminho.pt</url> <url2>www.di.uminho.pt</url2> <orth>aaron</orth> <domain xml:lang="pt">gato cat 33</domain> <translation>Aaron, Aarão (nome próprio)</translation> </entry> <entry> <orth>aback</orth> <domain xml:lang="en">gato cat 33</domain> <pos>z.</pos> </entry> <entry> <orth>abaft</orth> <pos>adv.</pos> <translation>à popa, à ré</translation> </entry>
use XML::TX; my $types={ sentencePt => text("pt"), sentenceEn => text("en"), domain => sub{text($v{'xml:lang'} || "pt")}, url => "urlActive", }; addType( urlActive => { markit => sub{ $c = markAsErr($c) unless (LWP::Simple::head($c)); toxml()}, } ); markit($filename,$types);
fixit( $filename, $types );
isvalid( value, type )
url2 url href urlActive pos enumFromFile("POS") orth text("en") translation text("pt") domain text(@xml:lang) fig@url urlActive %% use LWP::Simple; addType( urlActive => { markit => sub{ $c = markAsErr($c) unless (LWP::Simple::head($c)); toxml()}, } );
addType( typename => { markit => sub {...}, fixit => .... }, )
addType(
url => { markit => sub{ $c = markAsErr($c) unless $c =~ m{^(http|file)://}; toxml()}, fixit => sub{ $c = "http://" . $c if $c =~ /^www\./; $c = markAsErr($c) unless $c =~ m{^(http|file)://}; toxml()}, },
);
use Date::Manip;
addType(
date => { markit => sub{ $c = markAsErr($c) unless .... toxml()}, fixit => sub{ my $aux = ParseDate($c); if ($aux){ $c = pp($aux); } else { $c = markAsErr($c); } toxml()}},
);
tx -correct x.tx y.xml > output
extract + process + rebuild
Correcção=ext_proc_rec(CorrectorInteractivo,....)
XML-DT based validators
final pos-processor
facet-oriente processor