#!/usr/bin/perl -w -s

use strict;
use Data::Dumper;
use File::MMagic;
use FileHandle;

our ($debug,$base,$redirect);  #command line options

my $red={};
my $l1='portug|pt';
my $l2='english';
my $filtro= sub{
  my $l = shift ; 
  $l =~ m{$l1|$l2}xi };

$red=do($redirect) if $redirect;
die("$@$!") if $@ || $!;

$base ||= "";

my %tabela = ();
my $mm = new File::MMagic;

my $file = shift;
my $grafo=detecta($file,@ARGV);
savein("_tmp4",$grafo);
for(keys %$grafo){
   for my $k2 (keys %{$grafo->{$_}}){
      print "$_	$k2\n"	if $grafo->{$_}{$k2} =~ m{($l1).*===.*($l2)}xi;
   }
}

sub tiraconclusoes { #! extrai grafo dos links simétricos
 my %simet = ();
 for my $q (keys(%tabela)) {
   for(keys(%{$tabela{$q}})) {
     $simet{$q}{$_}="($tabela{$_}{$q}===$tabela{$q}{$_})" if $tabela{$_}{$q} && $_ ne $q;
   }
 }
 \%simet;
}

sub adicionatabela {
  my ($f,$base,@ll) = @_;
  my ($dir) = ($f =~ m{(.*/)}g);
  for(@ll) {
    my ($a,$b)=@{$_};
    if( $a =~ m{^/}) {$a = "$base$a";}
    else             {$a = "$dir$a" unless( $a =~ /http/);}
    $a = $red->{$a} if $red->{$a};
    $tabela{$f}{$a} .= $b;
  }
}

sub set_filtro{
 $filtro=shift
}

sub detecta{
  my $d = shift;
  my $patt = shift || ".";
  for(`find $d`){
    chomp;
    next unless m/$patt/;
    next unless ishtml($_);
    ( print STDERR $_ ) if $debug;
    adicionatabela($_,$base, extrailink($_, $filtro));
  }
  savein("_grafogeral",\%tabela);
  tiraconclusoes();
}

sub ishtml{
  $mm->checktype_filename(shift) eq "text/html"
}

sub extrailink{
  my $f = shift;
  my $prec = shift;
  my @ll;
  local $/;
  undef $/;
  open(F, "$f") or die("erro");
  for(<F>){
     while( m{ <a \s+ href=['"] ([^'"]+) ['"] (?:.*?) > (.*?) </a> }gxis){
         unless(defined $prec and not &{$prec}($2)){ 
             push(@ll,[$1,$2]) }
     };
  }
  close(F);
  @ll;
}

sub savein{
 my %opt=(type => "Dumper");
 if(ref($_[0]) eq "HASH"){%opt = (%opt , %{shift(@_)}) } ;
 my ($file,$what)=@_;
 open(F,">$file");
 print F Dumper($what);
 close F;
}
