#!/usr/bin/perl -s use strict; my @tab=(-1); my @ant=("_top"); my $ind; my $inda=0; binmode(STDOUT,":utf8"); @ARGV = ("-") unless @ARGV; for my $file (@ARGV){ open(F,$file) or die("cant open $file\n"); print "%enc utf8\n\n"; while(){ chomp; if(/%enc(oding)?\s+(latin1)/){next} if(/%enc(oding)?\s+(utf8)/i) {binmode(F,":utf8");next} my $termo; next unless /\w/; if(/^([ .-]*)(\S.*)/){ $termo = $2; $ind = length($1); ## print STDERR "Deb: $_($termo/$ind)\n"; if ($ind <= $inda) { while(@tab and $ind <= $tab[-1]){ pop(@tab); pop(@ant);} } push (@tab,$ind); push (@ant,$termo); print "\n$termo\nBT ",$ant[-2] ||"?","\n#lev ", @ant -1,"\n"; $inda = $ind; } } close F; } __END__ =head1 NAME tax2thesaurus - indented taxonomy to thesaurus =head1 SYNOPSIS tax2thesaurus file.tax > file.the =head1 DESCRIPTION A top term C<_top> is added to the taxonomy. =head2 example of the taxonomy format science physics dynamics math algebra trignometry literature =head2 Correspondent output science BT _top #lev 1 physics BT science #lev 2 dynamics BT physics #lev 3 math BT science #lev 2 algebra BT math #lev 3 trignometry BT math #lev 3 literature BT _top #lev 1 A mimilar output will be obtained with .science ...physics ......dynamics ...math ......algebra ......trignometry .literature =head2 Correspondent output =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut __END__ Geografia Política América América Central Belize Costa Rica El Salvador Guatemala Honduras Nicarágua Panamá América Do Norte Canadá Alberta