#!/usr/bin/perl -s
use strict;
our ($fs);
my $ks = shift or die("$0  [-fs=...] key tab*\n");
##my $f1 = shift;
#$fs ||= ':';

jkeys({fs=>$fs},$ks);

sub jkeys{
  my %opt =();
  if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
  my ($ks)=@_; 
  my (%c,@ja);
  my $fs=$opt{fs};

  my $e1 = <>;
  chomp $e1;
  $fs = guess_fs($e1) unless defined $fs;
  my $n=0;
  for (rsplit($fs,$e1)){ $c{$_} = $n++ }

  $n=0;
  for (rsplit($fs,$ks)){ 
    if(/^\d+$/){ push(@ja,$_-1);
         }
    else { die("Field '$_' not found in ($e1) \n".join("=",keys %c)."=\n") unless defined $c{$_};
         push(@ja,$c{$_});}
    }
  print "$e1\n";
  my %engolef=(); 
  my %engole=(); 
  my $lin=0;
  while(<>){ chomp; 
    $lin++;
    my @a=rsplit($fs,$_);
    my $k = join("$fs", @a[@ja]);
    if(defined $engole{$k}) { 
        warn ("## Removed repetition of '$k' in ($lin)($engole{$k}) $_\n");
    }
    else { push @{$engolef{$k}}, [$_]; 
           $engole{$k}=$lin;
           print "$_\n"
    }
  }
}

sub rsplit{
  my ($fs,$tup)=@_;
  $tup=~ s/\r//;
  $tup=~ s/\xFF\xFE//;
  if($fs =~ /(\s|\\t|\\n|\\s)/){}
  else          {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} }
  return split(/$fs/,$tup);
}


sub guess_fs{
  my $e = shift;
  my %o;
  for(split(/[ #\w]+/,$e)){$o{$_}++ }
  return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":";
}

__END__

=head1 NAME

tabkeys - check keys in (textual) table

=head1 SYNOPSIS

tabkeys [-fs=::]  "field1::field2" tab*

=head1 DESCRIPTION


If no tab is provided, stdin in used.

=head1 FORMATS

each table: 

  country:capital       first line - name of the fields
  Portugal:Lisboa       tuple 1
  France:Paris          tuple 2
  Spain:Madrid          tuple n

=head1  Options

 -fs='!'   changes the field separator (default calculated 
              from schema (first line) or ":")

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut