use Lingua::PTD qw(bws ucts);

our $max_freq = 0.00144;

sub gen_biwords {
    my ($l, $r, %opts) = @_;
    $max_freq = $opts{'t'} || $max_freq;


    my $left = Lingua::PTD->new( $l );
    my $right = Lingua::PTD->new( $r );

    sub filter {
        return 0 if lc($_->{'l'}) eq lc($_->{'r'});
        return 0 if ( $_->{'l'} =~ m/^\d+$/ || $_->{'r'} =~ m/^\d+$/ );
        return 0 if ( $_->{'l'} =~ m/(\d|[^a-zA-Z])+/ || $_->{'r'} =~ m/(\d|[^a-zA-Z])+/ );
        if ( $_->{'cl'}/$_->{'tl'} > $max_freq || $_->{'cr'}/$_->{'tr'} > $max_freq ) {
            return 0;
        }
        return 1;
    }


    my @result = ();

    #@result = @{ bws( $left, $right, sorter => { $a->{rank} <=> $b->{rank} } ) };
    @result = @{ bws( $left, $right, filter=> \&filter ) };
    #@result = @{ bws( $left, $right, filter => $filterlist, sorter => {$a->{rank} <=> $b->{rank}} ) };
    #@result = @{ bws( $left, $right, filter => $filterlist ) };
    #@result = @{ bws( $left, $right) };
    #@result = @{ bws( $left, $right, sorter => \&sorter) };

    # filter similar words
    #@result = grep { lc($_->{'l'}) ne lc($_->{'r'}) } @result;

    #@result = grep { ! ( $_->{'l'} =~ m/^\d+$/ || $_->{'r'} =~ m/^\d+$/ ) } @result;

    #@result = grep { ! ( $_->{'l'} =~ m/(\d|[^a-zA-Z])+/ || $_->{'r'} =~ m/(\d|[^a-zA-Z])+/ ) } @result;

    #@result = grep { $_->{'l'} =~ m/^\w+$/ || $_->{'r'} =~ m/^\w+$/ } @result;

    # filter by rank
    #@result = grep { $_->{'rank'} >= $threshold } @result if $threshold;

    # sort by rank
    #@result = sort { $a->{'rank'} <=> $b->{'rank'} } @result;

    return \@result;
}

1;
