#!/usr/bin/env perl use warnings; use strict; use v5.26; use Getopt::Long; use utf8::all; my $dups; GetOptions ("dups|d" => \$dups); # flag my %warnings; my %duplicates; while (<>) { chomp; say and next if /^##/; # more than two slashes? if (length($_ =~ s/[^\/]//gr) > 2) { $warnings{'Too many slashes'}++; say "## FIXME: $_" and next } my ($word, $morph, $flags) = split /\//; # Do we have strange things in the word? if ($word =~ /\W/ && $word !~ /^\w+(-\w+)+$/) { $warnings{'Entries with non-word chars'}++; say "## WEIRD ENTRY: $_"; next; } $duplicates{$word}++; if ($duplicates{$word} >= 2) { $warnings{'Duplicated entries'}++; if ($dups) { say "## DUP: $_"; next; } } $morph =~ s{pt=([^=]+)$}{$a = $1; $a =~ tr/ ,/_-/; "pt=$a"}ge; if ($word =~ /^oku/) { $warnings{'Analyzed verbs'}++; $morph = "#v,$morph" unless $morph =~ /#v/; $flags .= "v" unless $flags =~ /v/; } say join("/", $word, $morph, $flags); } for my $k (keys %warnings) { printf STDERR "%30s - %d entries\n", $k, $warnings{$k}; }