#!/usr/bin/perl -s

use common::sense;
our $n ||= 3;
our $ssize ||= 50;

my @langs = @ARGV;

unless (@langs) {
	# compute langs
	my %langs;
	my @files = <out/$n-*.dmp>;
	for (@files) {
		m/$n-([^_]+)/;
		$langs{$1}++;
	}
	@langs = keys %langs;
}


my $used = {};

for my $l (@langs) {
	print STDERR "Processing language $l\n";
	my @files = <out/$n-${l}_*.dmp>;
	my $local_used = {};
	for my $f (@files) {

		print STDERR "  $f...\n";

		my $hash = do $f;
		my $N = $ssize;

		map { $used->{$_}++ } grep { /^_/ } keys %$hash;

		for my $k (sort {$hash->{$b} <=> $hash->{$a}} keys %$hash) {
			next if $k =~ /^_/;
			last unless $N;

			$local_used->{$k}++;
			$N--;
		}
	}

	my $N = $ssize;
	for my $k (sort {$local_used->{$b} <=> $local_used->{$a}} keys %$local_used) {
		last unless $N;

		$used->{$k}++;
		$N--;
	}

}


open my $fh, ">:utf8", "$n-features.txt";
print $fh "$_\n" for sort keys %$used;
close $fh;

print STDERR "Done\n";
