#!/usr/bin/perl -s our ($debug); use strict; use utf8; my $f = shift or die; system ("pdftotext -raw $f $f.__.txt") ; open(F,"$f.__.txt") or die; binmode(F,":utf8"); binmode(STDOUT,":utf8"); while(){ s/fi/fi/g; s/ff/ff/g; s/fl/fl/g; s/ffi/ffi/g; s/[ˆ^]a/â/g; s/[ˆ^]e/ê/g; s/[ˆ^]o/ô/g; s/´a/á/g; s/`a/à/g; s/˜a/ã/g; s/´e/é/g; s/´ı/í/g; ## s/´i/í/g; s/´o/ó/g; s/˜o/õ/g; s/´u/ú/g; s/¨u/ü/g; s/¨o/ö/g; ## s/→/->/g; s/¸c/ç/g; s/c¸/ç/g; s/[ˆ^]A/Â/g; s/[ˆ^]E/Ê/g; s/[ˆ^]O/Ô/g; s/´A/Á/g; s/`A/À/g; s/˜A/Ã/g; s/´E/É/g; s/´I/Í/g; s/´O/Ó/g; s/˜O/Õ/g; s/´U/Ú/g; s/¨U/Ü/g; s/¨O/Ö/g; ## s/→/->/g; s/¸C/Ç/g; s/C¸/Ç/g; s/–\n//; s/-\n//; s/ //; s/([çÇS]) ([ãõÃÕ])/$1$2/g; s/([ˆ¸˜ı])/###($1)/g if $debug; print $_; } __END__ ¸˜ ˆ ¸ ˜ ı