#!/usr/bin/perl -s
use strict;

use encoding "utf8";
our($fs);
#$fs ||= ":";

my $cond = shift or die("usage: tabselect [-fs=#] condition file*\n");

tabselect({fs=>$fs},$cond);

sub tabselect{
  my %opt =(fs=>":");
  if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
  my $fs=$opt{fs};
  my ($cond)=@_; 
  my ($c,$rc);

  my $e = <>;
  chomp $e;
  $fs = guess_fs($e) unless defined $fs;
  my $n=0;
  for (rsplit($fs,$e)){ $rc->{$n} = $_; $c->{$_} = $n++; }

  $cond =~ s{\$F(\d+)}{ '$F__['.($1-1).']'}eg;
  $cond =~ s{\$([#\w]+)}{ 
     if($1 eq "F__"){"\$$1"} 
     else           {'$F__['.
        (defined ($c->{$1})?$c->{$1}:die("Error: $1 not found\n")) .']'}}eg;

  print "$e\n";

  while(<>){
     chomp;
     my @F__ = rsplit($fs,$_);
     my $r = eval $cond;
     warn("bad condition...: $@ ($_)\n") if $@;
     print "$_\n" if $r;
  }
}

sub rsplit{
  my ($fs,$tup)=@_;
  $tup=~ s/\r//;
  $tup=~ s/\xFF\xFE//;
  if($fs =~ /(\s|\\t|\\n|\\s)/){}
  else          {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} }
  return split(/$fs/,$tup);
}

sub guess_fs{
  my $e = shift;
  my %o;
  for(split(/[ #\w]+/,$e)){$o{$_}++ }
  return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":";
}

__END__


=head1 NAME

tabselect - textual relational databases select

=head1 SYNOPSIS

 tabselect [-fs=#] condition tab* > f
 iconv -f latin -t utf8 tab1 | tabselect '$F1 eq $F4'

=head1 DESCRIPTION

input should be Unicode UTF8.

If no tab are provided stdin is used.

=head1 FORMATS

each input table: 

  country:capital       first line - name of the fields
  Portugal:Lisboa       tuple 1
  France:Paris          tuple 2
  Spain:Madrid          tuple n

=head1 Conditions syntax

Condition follows the Perl syntax for conditions; C<$F1> is the first field;
C<$id> is the field corresponding to first line C<id>

=head1  Options

 -fs='!'   changes the field separator (default calculated 
              from schema (first line) or ":")

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut