#!/usr/bin/perl -s
use strict;
our ($fs,$f,$the,$baselang,$nfs,$e);
#$fs ||=":";
$f  ||= 'term';

$f="the" if $the;
$f="tab" if $nfs;

use encoding "utf8";

tabpp({format=>$f,fs=>$fs,bl=>$baselang,nfs=>$nfs,($e?(e=>$e):())});

sub tabpp{
  my %opt =(format => "term", fs=>":");
  if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
  $opt{format}="perl" if $opt{e};
  my $fs=$opt{fs};
  my ($f1)=@_; 
  my ($c1,$rc1);

  my $e1 = <>;
  chomp $e1;
  $fs = guess_fs($e1) unless defined $fs;
  my $n=0;
  for (rsplit($fs,$e1)){ $rc1->{$n} = $_; $c1->{$_} = $n++; }
#  for (rsplit($fs,$e)){ $rc->{$n} = $_; $c->{$_} = $n++; }

  my $tup;
  if($opt{format} eq "term"){
    print "%enc utf8\n";
    while ($tup = <>){ chomp $tup; 
      my @a=rsplit($fs,$tup);
      for (0..scalar @a -1){
        next unless $a[$_];
        print "$rc1->{$_}\t$a[$_]\n";
      }
      print "\n";
    } }
  elsif($opt{format} eq "tab" and defined $nfs){
    print join($nfs,rsplit($fs,$e1)),"\n";
    while ($tup = <>){ chomp $tup; 
      print join($nfs,rsplit($fs,$tup)),"\n";
    } }
  elsif($opt{format} eq "perl"){
    my $exp = $opt{e};
    $exp =~ s{\$F(\d+)}{ '$F__['.($1-1).']'}eg;
    $exp =~ s{\$([#\w]+)}{
     if($1 eq "F__"){"\$$1"}
     else           {'$F__['.
        (defined ($c1->{$1})?$c1->{$1}:die("Error: $1 not found\n")) .']'}}eg;
    while(<>){
       chomp;
       my @F__ = rsplit($fs,$_);
       my $r = eval $exp;
       warn("bad exp...: $@ ($_)\n") if $@;
       print "$r\n" if $r;
    }
  }
  elsif($opt{format} eq "thesaurus" or $opt{format} eq "the"){
    print "%enc utf8\n";
    my $blp=0;
    if($opt{bl}){ 
      $blp = $c1->{$opt{bl}};
      die("no $opt{bl} found in schema\n") unless defined $blp;
      print "%baselang $opt{bl}\n";
    }
    while ($tup = <>){ chomp $tup; 
      my @a=rsplit($fs,$tup);
      print "$a[$blp]\n";
      for (0..scalar @a -1){
        next unless $a[$_];
        next if $_ == $blp;
        print "$rc1->{$_}\t$a[$_]\n";
      }
      print "\n";
    } }
  close F1;
}

sub rsplit{
  my ($fs,$tup)=@_;
  $tup=~ s/\r//;
  $tup=~ s/\xFF\xFE//;
  if($fs =~ /(\s|\\t|\\n|\\s)/){}
  else          {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} }
  return split(/$fs/,$tup);
}


sub guess_fs{
  my $e = shift;
  my %o;
  for(split(/[ #\w]+/,$e)){$o{$_}++ }
  return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":";
}
__END__

=head1 NAME

tabpp - pretty-print textual relational databases

=head1 SYNOPSIS

 tabpp [-fs=#] [-f=term]  tab* > f
 iconv -f latin -t utf8 tab1 | tabpp -t=term > f
 tabpp -nfs=#  tab1     (change fs to "#")
 tabpp -e='perlexp'       tab1     

=head1 DESCRIPTION

Input should be Unicode UTF8

If no tab is provided, stdin is used.

=head1 FORMATS

each input table: 

  country:capital       first line - name of the fields
  Portugal:Lisboa       tuple 1
  France:Paris          tuple 2
  Spain:Madrid          tuple n

Output in format term:

  country Portugal
  capital Lisboa

  country France
  capital Paris  

  country Spain
  capital Madrid 

=head1  Options

 -fs='!'   changes the field separator (default calculated 
              from schema (first line) or ":")
 -f=format  choose the output format  (default "term")

 -nfs=#    changes the field separator to #

 -e='$F1+$F2 . "=$F4"' 

format: thesaurus

 -f=the 
 -the     
 -baselang=en
    to produce a format of thesaurus ISO... with baselang= en

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut