#!/usr/bin/perl -w -s
use Lingua::StarDict::Gen;
use Biblio::Thesaurus;
use utf8;
use strict;
our ($name,$dir,$i,$langs);
my @extralang = split(/\W+/,$langs);
my %extralang = map{($_=>$_)} @extralang;
$dir ||= "D/";
my $t=shift or die("usage thesarus2stardict [-name=NNN] thesaurus_file\n");
$name ||= "Thesaurus_$t";
$name =~ s/\.the$//;
my $s="/";
my $obj = thesaurusLoad($t);
#Lingua::StarDict::Gen::writeDict($obj->{$obj->{baselang}}, $name);
#mkdir($dir);
#Lingua::StarDict::Gen::writeDict(terms2xdxf($obj), $name, $dir);
if($i){ Lingua::StarDict::Gen::writeDict(terms2xdxf($obj),$name); }
else { mkdir $dir;
if ( $^O eq "MSWin32") {$s="\\"}
Lingua::StarDict::Gen::writeDict(terms2xdxf($obj),$name,"$dir$s"); }
sub terms2xdxf{
my $t=shift;
my $o={};
my @l = sort $t->languages();
my @okey = ();
$t->downtr(
{-default => sub {
if( $t->isLanguage($rel) and $extralang{$rel} )
{ "\n$rel: \t".join(",\n\t",map{push(@okey,$_);"$_"} @terms)}
elsif( $t->isLanguage($rel))
{ "\n$rel: \t".join(",\n\t",map{"$_"} @terms)}
elsif($t->isExternal($rel))
{ "\n$rel: \t".join(",\n\t",map{"$_"} @terms)}
else { "\n$rel→\t".join(",\n\t",map{"$_"} @terms)}
},
-eachTerm => sub { $o->{$term}="$_" ;
for my $k1(@okey){$o->{$k1}="$_";
@okey=(); }
},
# -end => sub { "Thesaurus :\n $_ \nFIM\n"},
-order => ["DEF", "DOM", @l,"IOF","BT","NT","RT"],
});
$o;
}
__END__
=encoding utf8
=head1 NAME
thesaurus2stardict - generates a StarDict dictionary from a thesaurus
=head1 SYNOPSIS
thesaurus2stardict [...] file.the
=head1 DESCRIPTION
By default, output is sent to directory D
-dir=outputdir (def: D)
-name=NAME (def: thesaurus_)
-i build and install (def: no install)
-l=EN use EN as the def. language
-langs=RU,FR also use the RU and FR languages
With C<-l=PT>, language will be "PT"
By default, language is the first language of the first concept.
With C<-i> (install) -- output directory is "/usr/share/stardict/dic/",
"c:\ProgramFiles\stardict\dic" or similar.
=head2 thesaurus format
Concepts separated by empty lines
Each line:
relation termlist (separeated by "," or ";")
Example:
%encoding uft8
%baselang PT
%languages EN
%languages FR
%desc[EN] NT Narrower Term
%desc NT Termo Específico
%desc BT Termo Genérico
%desc INST Instâncias
%desc IOF instância de
%inverse IOF INST
_top_
NT animal
animal
NT mamífero, animal doméstico, réptil
EN animal
mamífero
INST gato, leão
réptil
INST crocodilo , cobra
leão
EN lion
gato
EN cat
FR chat
IOF animal doméstico
SN Animal que tem quatro vidas e m(e)ia
sapo
IOF réptil
EN frog
=head1 AUTHOR
J.Joao Almeida, jj@di.uminho.pt
=head1 SEE ALSO
perl(1).
Lingua::StarDict::Gen
term2stardict
Bibio::Thesaurus(3pm)
=cut