#!/usr/bin/perl -s our ($w,$s,$p,$c,$com,$par,$key); # -w : remove dup lines that have no letters (def: keep) # -s : dont ignore spaces (def: ignore spaces in cmp) # -p=pat :remov dup lines that match /pat/ # -c case sensitive (def:ignore case) # -com comment instead of removing # -par paragraphs instead of lines # -key='\d+\s*(.+)' use $1 instead of line use strict; use utf8::all; my %line=(); my $original; $/="" if $par; while(<>){ $original=$_; (print and next) unless /\S/; if(not $w){ (print and next) unless /\w/; } if($p) { (print and next) unless /$p/; } if(defined $key){ if( m/$key/){ $_ = $1; } else { print $original and next } } s/\s//g unless ($s or $key) ; $_ = lc($_) unless ($c or $key) ; if($line{$_}){ if($com){ $original = "###($line{$_})$original"; } else { next } } else { $line{$_}=$. ; } print $original; } __END__ =encoding utf8 =head1 NAME rem-dup-lines - remove (or comment) non empty duplicated lines =head1 SYNOPSIS =head1 DESCRIPTION All empty lines are kept. Otherwise paragraph information would be lost. All lines that dont have alpha-num chars are kept (unless -w). Otherwise horizontal rules "---" and similar would be lost. For all the lines that contains at least one alpha-num char, remove duplicated lines, ignoring spaces (unless -s). By defaul it behaves case insensite (unless -c). =head2 Options -w also remove dup lines that don't have any alpha-num chars -p=pat just remove dup that match /pat/ -s don't ignore spaces in comparations (def: ignore) -c case sensitive (def:ignore case) -com comment duplicate lines with ###(line) -par remove dup paragraphs instead of lines (separated by empty lines) -key='\d+\s*(.+)' use $1 instead of line. Key must have 1 capture group! =head1 AUTHOR J.Joao Almeida, jj@di.uminho.pt =head1 SEE ALSO perl(1). =cut