#!/usr/bin/perl -s use strict; our ($fs); my $ks = shift or die("$0 [-fs=...] key tab*\n"); ##my $f1 = shift; #$fs ||= ':'; jkeys({fs=>$fs},$ks); sub jkeys{ my %opt =(); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my ($ks)=@_; my (%c,@ja); my $fs=$opt{fs}; my $e1 = <>; chomp $e1; $fs = guess_fs($e1) unless defined $fs; my $n=0; for (rsplit($fs,$e1)){ $c{$_} = $n++ } $n=0; for (rsplit($fs,$ks)){ if(/^\d+$/){ push(@ja,$_-1); } else { die("Field '$_' not found in ($e1) \n".join("=",keys %c)."=\n") unless defined $c{$_}; push(@ja,$c{$_});} } print "$e1\n"; my %engolef=(); my %engole=(); my $lin=0; while(<>){ chomp; $lin++; my @a=rsplit($fs,$_); my $k = join("$fs", @a[@ja]); if(defined $engole{$k}) { warn ("## Removed repetition of '$k' in ($lin)($engole{$k}) $_\n"); } else { push @{$engolef{$k}}, [$_]; $engole{$k}=$lin; print "$_\n" } } } sub rsplit{ my ($fs,$tup)=@_; $tup=~ s/\r//; $tup=~ s/\xFF\xFE//; if($fs =~ /(\s|\\t|\\n|\\s)/){} else {$tup=~ s/^\s+//; $tup=~ s/\s*$//;$fs = qr{\s*$fs\s*} } return split(/$fs/,$tup); } sub guess_fs{ my $e = shift; my %o; for(split(/[ #\w]+/,$e)){$o{$_}++ } return (sort {$o{$b} <=> $o{$a}} (keys %o))[0] || ":"; } __END__ =head1 NAME tabkeys - check keys in (textual) table =head1 SYNOPSIS tabkeys [-fs=::] "field1::field2" tab* =head1 DESCRIPTION If no tab is provided, stdin in used. =head1 FORMATS each table: country:capital first line - name of the fields Portugal:Lisboa tuple 1 France:Paris tuple 2 Spain:Madrid tuple n =head1 Options -fs='!' changes the field separator (default calculated from schema (first line) or ":") =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut