#!/usr/bin/perl -s

our ($w,$s,$p,$c,$com,$par,$key); 
    # -w : remove dup lines that have no letters (def: keep)
    # -s : dont ignore spaces   (def: ignore spaces in cmp)
    # -p=pat :remov dup lines that match /pat/
    # -c   case sensitive (def:ignore case)
    # -com   comment instead of removing
    # -par   paragraphs instead of lines
    # -key='\d+\s*(.+)'   use $1 instead of line
    
use strict;
use utf8::all;
my %line=();
my $original;

$/="" if $par;

while(<>){
  $original=$_;
  (print and next) unless /\S/;
  if(not $w){ (print and next) unless /\w/; }
  if($p)    { (print and next) unless /$p/; }
  if(defined $key){
     if( m/$key/){ 
         $_ = $1; }
     else { 
         print $original and next }
  }

  s/\s//g     unless ($s or $key) ;
  $_ = lc($_) unless ($c or $key) ;

  if($line{$_}){
     if($com){ $original = "###($line{$_})$original"; }
     else    { next                         }
  }
  else {
     $line{$_}=$. ;
  }
  print $original;
}

__END__

=encoding utf8

=head1 NAME 

rem-dup-lines - remove (or comment) non empty duplicated lines

=head1 SYNOPSIS

=head1 DESCRIPTION

All empty lines are kept. Otherwise paragraph information would be lost.

All lines that dont have alpha-num chars are kept (unless -w). Otherwise
horizontal rules "---" and similar would be lost.

For all the lines that contains at least one alpha-num char, remove duplicated 
lines, ignoring spaces (unless -s). 

By defaul it behaves case insensite (unless -c).


=head2 Options

 -w       also remove dup lines that don't have any alpha-num chars
 -p=pat   just remove dup that match /pat/ 
 -s       don't ignore spaces in comparations (def: ignore)
 -c       case sensitive (def:ignore case)
 -com     comment duplicate lines with ###(line)
 -par     remove dup paragraphs instead of lines (separated by empty lines)
 -key='\d+\s*(.+)'   use $1 instead of line. Key must have 1 capture group!

=head1 AUTHOR

J.Joao Almeida, jj@di.uminho.pt

=head1 SEE ALSO

perl(1).

=cut