#!/usr/bin/perl -s use strict; our ($fs,$all,$err); my $f1 = shift; my $f2 = shift or die("join [-fs=...] ta1 ta2\n"); $fs ||= ':'; $err||=""; jjoin($f1,$f2); sub jjoin{ my ($f1,$f2)=@_; my ($c1,$c2); open(F1,$f1) or die; my $e1 = ; chomp $e1; my $n=0; for (split(/\s*$fs\s*/,$e1)){ $c1->{$_} = $n++ } open(F2,$f2) or die; my $e2 = ; chomp $e2; $n=0; for (split(/\s*$fs\s*/,$e2)){ $c2->{$_} = $n++ } my $ns=[sort { $c1->{$a} <=> $c1->{$b}} keys %$c1]; my ($ja1,$ja2,$ja3)=([],[],[]); for(sort { $c2->{$a} <=> $c2->{$b}} keys %$c2){ if (defined($c1->{$_})){ push(@$ja1,$c1->{$_}); push(@$ja2,$c2->{$_});} else { push @$ns, $_; push(@$ja3,$c2->{$_}); } } die("Error: cant find any common fields\n") unless @$ja2; my %engole=(); my %used=(); ### engole f2 while(){ chomp; my @a=split(/\s*$fs\s*/,$_); push @{$engole{join(" $fs ", @a[@$ja2])}}, [@a[@$ja3]]; } close F2; ### engole f1 print join (" $fs ",@$ns),"\n"; my $tup; while ($tup = ){ chomp $tup; my @a=split(/\s*$fs\s*/,$tup); my $chave = join(" $fs ", @a[@$ja1]); if($engole{$chave}){ for (@{$engole{$chave}}){print join(" $fs ",$tup, @$_),"\n" } $used{$chave}++ if $all; } else {print $err, join(" $fs ",$tup,(map {""} @$ja3)),"\n" if $all} } if ($all){ for(keys %used) { delete $engole{$_} } for my $chave (keys %engole ){ for (@{$engole{$chave}}) {my @a=(map {""} keys %$c1); push (@a, @$_); my @keyl = split($fs,$chave); for my $i (0..(scalar @keyl) -1){ $a[$ja1->[$i]] = $keyl[$1] } print $err,join(" $fs ", @a), "\n"; } } } close F1; } __END__ =head1 NAME tabjoin - join of textual relational databases =head1 SYNOPSIS tabjoin [-fs=#] tab1 tab2 > tab3 =head1 DESCRIPTION At list there should be a common field name. =head1 FORMATS each table: country:capital first line - name of the fields Portugal:Lisboa tuple 1 France:Paris tuple 2 Spain:Madrid tuple n =head1 Options -fs='!' changes the field separator (default ":") -all also prints the "non-joined" tuples =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut