#!/usr/bin/perl -s use strict; use warnings; use DBI; our ($h); if ($h) { print STDERR "nat-ngramsIdx: Indexes a ngrams SQLite database\n"; print STDERR "\n"; print STDERR "For more help, please run 'perldoc nat-ngramsIdx'\n"; exit; } my $file = shift; die "Can't find that file\n" if (!$file || !-f$file); my $dbh = DBI->connect("dbi:SQLite:dbname=${file}_","",""); my $size = -1; $size = 2 if $file =~ m!2!; $size = 3 if $file =~ m!3!; $size = 4 if $file =~ m!4!; die "Strange ngrams filename\n" unless $size > 1; my $s; my @COLS=(undef,"word1","word2","word3","word4"); my @TBS=(undef,undef,"bigrams","trigrams","tetragrams"); my @SCOLS=(undef,"a","b","c","d"); my @IDX=(undef,undef,"bi","tri","tet"); print STDERR "Creating new table...\n"; $s = $dbh->prepare("CREATE TABLE tmp (". join(",",map {"$_ INTEGER"} @COLS[1..$size]). ",occs INTEGER);"); die unless $s; $s -> execute(); print STDERR "Attaching old table...\n"; $s = $dbh->prepare("ATTACH \"$file\" AS O;"); die unless $s; $s -> execute(); print STDERR "Selecting ordered values into new table...\n"; $s = $dbh->prepare("INSERT INTO tmp SELECT * FROM O.$TBS[$size] ORDER BY occs DESC;"); die unless $s; $s -> execute(); print STDERR "Detaching old table...\n"; $s = $dbh->prepare("DETACH O;"); die unless $s; $s -> execute(); print STDERR "Renaming temporary table...\n"; $s = $dbh->prepare("ALTER TABLE tmp RENAME TO $TBS[$size];"); die unless $s; $s -> execute(); for (1..$size) { my $idx = $IDX[$size]."_".$SCOLS[$_]; print STDERR "Creating idx $idx\n"; $s = $dbh->prepare("CREATE INDEX $idx ON $TBS[$size](word$_);"); die unless $s; $s -> execute(); } print STDERR "Moving file...\n"; `mv ${file}_ $file`; =encoding UTF-8 =head1 NAME nat-ngramsIdx - Indexes a ngrams SQLite file =head1 SYNOPSIS nat-ngramsIdx source.2.ngrams =head1 DESCRIPTION This is an utility tool to create indexes in ngrams SQLite files. Normally you do not need to use it directly. =head1 SEE ALSO NATools documentation =head1 AUTHOR Alberto Manuel Brandão Simões, Eambs@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2007-2012 by Alberto Manuel Brandão Simões =cut