#!/usr/bin/env perl use strict; use utf8::all; my $fl=0; my $rom= qr{(?i:[ivx]+)}; my $no= qr{(\d+(?:\.?[oºaª]\.?))}; while(<>){ # print "#"; s{\s+$}{\n}; unicode_fix(); s{^[ \t]+}{} if $fl; $fl=0; s{^\s*(\(?(\d+|$rom|[a-z])\))}{\n==item($1)}i; s{^\s*([.*►•]|[‐‑–—\-])(\s)}{\n==item($1)$2}i; s{^\s*(\d+\.)(\s)}{\n==item($1)$2}i; s{^\s*(\d\.\d\.)(\s)}{\n==item($1)$2}i; s{^\s*(\d\.\d\.\d\.)(\s)}{\n==item($1)$2}i; s{^(\s*(( C(?i:hapter) |C(?i:apítulo) |P(?i:arte?) |S(?i:ection) |S(?i:ec?ção) |A(?i:rticle) |A(?i:rtigo) |A(?i:nnex) |A(?i:nexo) |A(?i:ppendix) |A(?i:pêndice) |C(?i:onsiderando) |W(?i:hereas) ) \s+ (\d+|$rom|$no|[A-Z])) ) \s*$}{\n==SUBDIV= $2\n\n}x; s{\b[\-]\n}{} and $fl=1 ; print $_; } sub unicode_fix{ s/eˆ/ê/g; s/aˆ/â/g; s/oˆ/ô/g; s/e´/é/g; s/a´/á/g; s/o´/ó/g; s/u´/ú/g; s/a˜/ã/g; s/o˜/õ/g; s/n˜/ñ/g; s/ı´/í/g; }