#!/usr/bin/perl -s
use strict;

use encoding "utf8";
our($fs);
 
my $esq = shift or die("Usage: $0 schema file*\n");

tabproj({fs=>$fs},$esq);

sub tabproj{
  my %opt =();
  if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
  my $fs=$opt{fs};
  my ($esq)=@_; 
  my (%c,$rc);
  @c{1..30} = ( 0..29);

  my $e = <>;
  chomp $e;
  $fs = guess_fs($e) unless defined $fs;
  my $n=0;
  for (rsplit($fs,$e)){ $c{$_} = $n++; }

  my @cmps= map {defined($c{$_})?$c{$_} : die("Error: no $_ field\n")} rsplit($fs,$esq);
  my @F = rsplit($fs,$e);
  print join ($fs,  @F[@cmps]),"\n";

  while(<>){
     chomp;
     @F = rsplit($fs,$_);
     print join ($fs,  @F[@cmps]),"\n";
  }
}

sub guess_fs{
  my $e = shift;
  my %o;
  for(split(/[ #\w]+/,$e)){$o{$_}++ }
  return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":";
}

sub rsplit{
  my ($fs,$tup)=@_;
  $tup=~ s/\r//;
  $tup=~ s/\xFF\xFE//;
  if($fs =~ /(\s|\\t|\\n|\\s)/){}
  else          {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} }
  return split(/$fs/,$tup);
}
__END__

=head1 NAME

tabproj - textual relational databases project

=head1 SYNOPSIS

 tabproj [-fs=#] sckema tab* > f
 iconv -f latin -t utf8 tab1 | tabproj 'f1:f3:f9'

=head1 DESCRIPTION

input should be Unicode UTF8.

If no input is provided, stdin is used.

=head1 FORMATS

each input table: 

  country:capital       first line - name of the fields
  Portugal:Lisboa       tuple 1
  France:Paris          tuple 2
  Spain:Madrid          tuple n

=head1 skema syntax

  fieldK:fieldj:...   (if FS = ":")

=head1  Options

 -fs='!'   changes the field separator (default calculated 
              from schema (first line) or ":")

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut