#!/usr/bin/perl -s use strict; use encoding "utf8"; our($fs); my $esq = shift or die("Usage: $0 schema file*\n"); tabproj({fs=>$fs},$esq); sub tabproj{ my %opt =(); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $fs=$opt{fs}; my ($esq)=@_; my (%c,$rc); @c{1..30} = ( 0..29); my $e = <>; chomp $e; $fs = guess_fs($e) unless defined $fs; my $n=0; for (rsplit($fs,$e)){ $c{$_} = $n++; } my @cmps= map {defined($c{$_})?$c{$_} : die("Error: no $_ field\n")} rsplit($fs,$esq); my @F = rsplit($fs,$e); print join ($fs, @F[@cmps]),"\n"; while(<>){ chomp; @F = rsplit($fs,$_); print join ($fs, @F[@cmps]),"\n"; } } sub guess_fs{ my $e = shift; my %o; for(split(/[ #\w]+/,$e)){$o{$_}++ } return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":"; } sub rsplit{ my ($fs,$tup)=@_; $tup=~ s/\r//; $tup=~ s/\xFF\xFE//; if($fs =~ /(\s|\\t|\\n|\\s)/){} else {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} } return split(/$fs/,$tup); } __END__ =head1 NAME tabproj - textual relational databases project =head1 SYNOPSIS tabproj [-fs=#] sckema tab* > f iconv -f latin -t utf8 tab1 | tabproj 'f1:f3:f9' =head1 DESCRIPTION input should be Unicode UTF8. If no input is provided, stdin is used. =head1 FORMATS each input table: country:capital first line - name of the fields Portugal:Lisboa tuple 1 France:Paris tuple 2 Spain:Madrid tuple n =head1 skema syntax fieldK:fieldj:... (if FS = ":") =head1 Options -fs='!' changes the field separator (default calculated from schema (first line) or ":") =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut