#!/usr/bin/perl -s use strict; use encoding "utf8"; our($fs); #$fs ||= ":"; my $cond = shift or die("usage: tabselect [-fs=#] condition file*\n"); tabselect({fs=>$fs},$cond); sub tabselect{ my %opt =(fs=>":"); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $fs=$opt{fs}; my ($cond)=@_; my ($c,$rc); my $e = <>; chomp $e; $fs = guess_fs($e) unless defined $fs; my $n=0; for (rsplit($fs,$e)){ $rc->{$n} = $_; $c->{$_} = $n++; } $cond =~ s{\$F(\d+)}{ '$F__['.($1-1).']'}eg; $cond =~ s{\$([#\w]+)}{ if($1 eq "F__"){"\$$1"} else {'$F__['. (defined ($c->{$1})?$c->{$1}:die("Error: $1 not found\n")) .']'}}eg; print "$e\n"; while(<>){ chomp; my @F__ = rsplit($fs,$_); my $r = eval $cond; warn("bad condition...: $@ ($_)\n") if $@; print "$_\n" if $r; } } sub rsplit{ my ($fs,$tup)=@_; $tup=~ s/\r//; $tup=~ s/\xFF\xFE//; if($fs =~ /(\s|\\t|\\n|\\s)/){} else {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} } return split(/$fs/,$tup); } sub guess_fs{ my $e = shift; my %o; for(split(/[ #\w]+/,$e)){$o{$_}++ } return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":"; } __END__ =head1 NAME tabselect - textual relational databases select =head1 SYNOPSIS tabselect [-fs=#] condition tab* > f iconv -f latin -t utf8 tab1 | tabselect '$F1 eq $F4' =head1 DESCRIPTION input should be Unicode UTF8. If no tab are provided stdin is used. =head1 FORMATS each input table: country:capital first line - name of the fields Portugal:Lisboa tuple 1 France:Paris tuple 2 Spain:Madrid tuple n =head1 Conditions syntax Condition follows the Perl syntax for conditions; C<$F1> is the first field; C<$id> is the field corresponding to first line C =head1 Options -fs='!' changes the field separator (default calculated from schema (first line) or ":") =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut