#!/usr/bin/perl -s use strict; our ($fs,$f,$the,$baselang,$nfs,$e); #$fs ||=":"; $f ||= 'term'; $f="the" if $the; $f="tab" if $nfs; use encoding "utf8"; tabpp({format=>$f,fs=>$fs,bl=>$baselang,nfs=>$nfs,($e?(e=>$e):())}); sub tabpp{ my %opt =(format => "term", fs=>":"); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; $opt{format}="perl" if $opt{e}; my $fs=$opt{fs}; my ($f1)=@_; my ($c1,$rc1); my $e1 = <>; chomp $e1; $fs = guess_fs($e1) unless defined $fs; my $n=0; for (rsplit($fs,$e1)){ $rc1->{$n} = $_; $c1->{$_} = $n++; } # for (rsplit($fs,$e)){ $rc->{$n} = $_; $c->{$_} = $n++; } my $tup; if($opt{format} eq "term"){ print "%enc utf8\n"; while ($tup = <>){ chomp $tup; my @a=rsplit($fs,$tup); for (0..scalar @a -1){ next unless $a[$_]; print "$rc1->{$_}\t$a[$_]\n"; } print "\n"; } } elsif($opt{format} eq "tab" and defined $nfs){ print join($nfs,rsplit($fs,$e1)),"\n"; while ($tup = <>){ chomp $tup; print join($nfs,rsplit($fs,$tup)),"\n"; } } elsif($opt{format} eq "perl"){ my $exp = $opt{e}; $exp =~ s{\$F(\d+)}{ '$F__['.($1-1).']'}eg; $exp =~ s{\$([#\w]+)}{ if($1 eq "F__"){"\$$1"} else {'$F__['. (defined ($c1->{$1})?$c1->{$1}:die("Error: $1 not found\n")) .']'}}eg; while(<>){ chomp; my @F__ = rsplit($fs,$_); my $r = eval $exp; warn("bad exp...: $@ ($_)\n") if $@; print "$r\n" if $r; } } elsif($opt{format} eq "thesaurus" or $opt{format} eq "the"){ print "%enc utf8\n"; my $blp=0; if($opt{bl}){ $blp = $c1->{$opt{bl}}; die("no $opt{bl} found in schema\n") unless defined $blp; print "%baselang $opt{bl}\n"; } while ($tup = <>){ chomp $tup; my @a=rsplit($fs,$tup); print "$a[$blp]\n"; for (0..scalar @a -1){ next unless $a[$_]; next if $_ == $blp; print "$rc1->{$_}\t$a[$_]\n"; } print "\n"; } } close F1; } sub rsplit{ my ($fs,$tup)=@_; $tup=~ s/\r//; $tup=~ s/\xFF\xFE//; if($fs =~ /(\s|\\t|\\n|\\s)/){} else {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} } return split(/$fs/,$tup); } sub guess_fs{ my $e = shift; my %o; for(split(/[ #\w]+/,$e)){$o{$_}++ } return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":"; } __END__ =head1 NAME tabpp - pretty-print textual relational databases =head1 SYNOPSIS tabpp [-fs=#] [-f=term] tab* > f iconv -f latin -t utf8 tab1 | tabpp -t=term > f tabpp -nfs=# tab1 (change fs to "#") tabpp -e='perlexp' tab1 =head1 DESCRIPTION Input should be Unicode UTF8 If no tab is provided, stdin is used. =head1 FORMATS each input table: country:capital first line - name of the fields Portugal:Lisboa tuple 1 France:Paris tuple 2 Spain:Madrid tuple n Output in format term: country Portugal capital Lisboa country France capital Paris country Spain capital Madrid =head1 Options -fs='!' changes the field separator (default calculated from schema (first line) or ":") -f=format choose the output format (default "term") -nfs=# changes the field separator to # -e='$F1+$F2 . "=$F4"' format: thesaurus -f=the -the -baselang=en to produce a format of thesaurus ISO... with baselang= en =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut