#!/usr/bin/perl use strict; use CGI qw(:standard); use CWB; use CWB::CQP; my $regis = CWB::RegistryDirectory(); my @rs = (); my $ctxt = ["1 tu","1 s","1 p","7 words","20 words","0 words"]; my $corpus = lc(param('c')) || ''; my $n = param('n') || '1 tu'; my $max = param('max') || '1000'; my $cqp ; my (@sats,@aligs,@poats); $cqp = CWB::CQP->new("-b 10 "); print header(-charset=>'utf-8'); binmode(STDOUT,":utf8"); # print header; binmode(STDOUT); print start_html("-title"=>"Pesquisa no corpus '$corpus'"); unless ($cqp->ok() ) { print p(join(br,$cqp->error_message)); } my @corpora = map {s/^\s*//;ucfirst(lc($_))} cq("show corpora;"); shift @corpora; ## first line is garbage.. is it? my $registry= param('r') || $regis ; my ($paral,$reg); if ($corpus) { $reg = new CWB::RegistryFile("$registry/$corpus"); @aligs = $reg->list_attributes("a"); $paral = shift(@aligs); ### print "ALIGS",ol(li([@aligs])); print p("ALIGS = ",$paral); print p("Name =",$reg->name); print p("Info =",$reg->info); print p("Id =",$reg->id); print p("Home =",$reg->home); @sats = $reg->list_attributes("s"); ###print "S-atrib",ol(li([@sats])); $n = fix_ctxt($n,$ctxt,@sats); } print h1("Pesquisa no corpus '$corpus/$paral'"), hr, p("registry = $regis"), start_form, table(Tr(td(["procurar"," contexto", " no corpus ", "max"])), Tr(td([textfield('k'), popup_menu('n',poss_ctxt($ctxt ,@sats)), popup_menu('c',[@corpora]), popup_menu('max',[20,200,1000,2000])]))), end_form; if (param()) { my $pat = param("k"); $pat = doyourbest($pat); print "
\n$pat\n
\n"; cq(uc($corpus).";"); cq("set HardBoundary 15;"); cq("set LeftContext $n ;"); cq("set RightContext $n ;"); cq("set LeftKWICDelim '' ;"); cq("set RightKWICDelim '' ;"); cq("show +$paral ;") if $paral; # cq("show -cpos ;"); # hide position in the corpus cq("A = $pat"); @rs = cq("cat A 1 $max;"); print "número de ocorrências encontradas: ",b(cq("size A;")),hr; if($paral){ print "\n"; while(@rs){ my $a=shift(@rs); my $b=shift(@rs); $a =~ s/_unicode_(\d+)_/pack("U",$1)/ge; $b =~ s/_unicode_(\d+)_/pack("U",$1)/ge; $b =~ s/-->$paral?://; #remove paralell corpus name print Tr(td([$a,$b])); } print "
\n"; } else{ print ol(li([@rs])); } } print hr,end_html; # undef $q; sub doyourbest { my $pat = shift; $pat =~ s/^\s*(\w+)\s*$/"$1";/; $pat =~ s/\b(alema|apos)\s*=\s*"(.*?)"/$1 contains "$2"/g; $pat } sub fix_ctxt { my ($n,$a,@b)=@_; poss_ctxt([$n,@$a],@b)->[0]; } sub poss_ctxt { my ($a,@b)=@_; return $a unless $corpus; my %a; @a{@b}=@b; my @c = grep { /words/ or /\d+\s+(\S+)/ && defined $a{$1} } @$a; [@c]; } sub cq { my $a=shift; my @r = $cqp->exec($a); unless ($cqp->ok() ){ print p($a,join(br,$cqp->error_message)); } @r; } __END__