#!/usr/bin/perl use utf8; use warnings; use strict; use DBI; use XML::DT; use POSIX qw(locale_h); setlocale(LC_CTYPE, "pt_PT.UTF8"); -f "xml.db" and unlink "xml.db"; my $dbh = DBI->connect("dbi:SQLite:dbname=xml.db","",""); $dbh->do(q{CREATE TABLE entry (word NOT NULL, n INT, xml TEXT NOT NULL, normalized NOT NULL, PRIMARY KEY (word,n));}); $dbh->{sqlite_unicode} = 1; $dbh->begin_work(); my $sth = $dbh->prepare("INSERT INTO entry VALUES(?,?,?,?);"); my %handler = ( -pcdata => sub { for ($c) { s/&/&/g; s//>/g; } $c }, orth => sub { root->{orth} = lc($c); toxml; }, -default => sub { toxml }, entry => sub { my $orth = $v{orth}; delete $v{orth}; my $xml = toxml; my $n = $v{n} || 0; $sth->execute($orth, $n, $xml, n($orth)) or warn "** FAILED: [$orth, $n]\n"; } ); my $letter; print STDERR "xml2db ["; for my $file () { $file =~ /\/(.*)\.xml/ and $letter = $1; print STDERR "$letter,"; local $/ = "\n"; open XML, $file or die "Can't open $file: $!\n"; while () { next if m{}; s{<\?xml.*commit(); $dbh->do(q{CREATE INDEX entry_word ON entry (word);}); $dbh->do(q{CREATE INDEX entry_normalized ON entry (normalized);}); sub n { my $word = shift; $word = lc $word; $word =~ s!\.!!g; $word =~ y{áéíóúàèìòùãõâêîôûçÿïýĩẽüũ} {aeiouaeiouaoaeioucyiyieuu}; return $word; }