#!/usr/bin/perl -w -s our($rs,$fs,$fss, $nfs,$nfs2,$nfs3); # die("usage: $0 [options] File # option -fs=.. -rs=.. -fss=..\n"); my $n=1; $rs ||= ''; ## register separator (def: empty-lines) $rs =~ s/\\n/\n/g; $/= $rs; $nfs ||= '::'; ## output field separatos $nfs2 ||= '§§'; ## output subfield separator $nfs3 ||= '§'; ## output subfield separator $fs ||= qr{\s*\n(?=\w)}; $fss ||= qr{[ \t=:]+}; my (@sck,%sck,$table); while(<>){ next unless /\w/; if(/^\%/){ $text .= "$_"; next } chomp; my %r=(); my $coms_dirs = ""; my $term=undef; for $a (split(/$fs\s*/,$_)){ if($a =~ /^([#%].+)/){ $coms_dirs .= "$1\n" } elsif($a =~ /^([^\s=:]+)$fss\s*(.+)/s){ my ($id,$aux)=($1,$2); if(not $sck{$id}){ push(@sck,$id); $sck{$id}=1} $aux =~ s/\n\+/ §/g; push(@{$r{$id}}, $aux) } elsif($a =~ /^([^\s=:]+)$fss\s*/s){ ## field: empty value my $id=($1); if(not $sck{$id}){ push(@sck,$id); $sck{$id}=1} } else { $text .= ("???: $a\n") } } my %aux=(); for $a (keys %r){ $aux{$a} = join(" $nfs2 ", @{$r{$a}} ); } $text .= $coms_dirs if $coms_dirs; $text .= join(" $nfs ", map {$_ // ""} ( @aux{@sck} )) . "\n"; } print join(" $nfs ",@sck),"\n",$text; __END__ =encoding utf8 =head1 NAME term2tab - transform a tag-format into a table =head1 SYNOPSIS term2tab [option] tagdictionary -rs input register separator (def: empty lines) -fs input field separators (def: \n(?=[^\s] '\n' not followed by \s) -fss input tag-value sep, (def: [ \t:=]+ ) -nfs output fied separator (def "::") -nfs2 output fied separator 1 (for multiple values) (def "§§") -nfs3 output fied separator 2 (for \n in values) (def "§") =head1 DESCRIPTION term2tab converts tag-format in table; default register separater is empty line; Input file should follow the following tag-format lang1 t11 lang2 t12 rel1 ... lang1 t21 lang2 t22 lang2 moret22 rel2 !!! C output looks like: lang1 :: lang2 :: rel1 :: rel2 t11 :: t12 :: ... :: t21 :: t22 §§ moret22 :: :: !!! =head2 Options -fs='::' field separator (def \n) -rs='\n' record separatos (def "\n\n") -fss='=' fiels separator2 (def [=:\s]+) Example: if the file has the following format L1=v11 :: L2= v21 :: Ln= vn1 L1=v12 :: L2= v22 :: Ln= vn2 the command term2tab -rs='\n' -fs='::' -fss='=' file would produce table : L1::L2::Ln v11::v21::vn1 v12::v22::vn2 =head2 Errors and Warnings =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut