#!/usr/bin/perl -s

our($d,$e,$s,$name,$prefix,$dipp,$dislexskel,$dippskel,$script);
use strict;
use utf8::all;
use Parse::DSLUtils qw(:all);
my %lopt = ();
$lopt{name}=$name     if $name;
$lopt{prefix}=$prefix if $prefix;
$lopt{eval}=1         if ($e || $d);
$lopt{stdout}=1       if $s;

#if($dislexskel){ print DISLEXSKEL(); exit 0; }
#if($dippskel  ){ print DIPPSKEL()  ; exit 0; }

my $lexer = shift or die("usage:\n $0 file.plex\n $0 -e file.plex teste\n");

my $parse_lib =init_parse();
my $lex_lib   =init_lex();
my $parse_decl=init_parse_decl();

if($lexer =~ /\.dipp$/ or $dipp){
  my $f = _slurp8($lexer);
  $f =~ s/%dislex\s+(\S+)/dislex(_slurp8($1))/e;
  $f =~ s/%dislex\((\w+)\)\s+(\S+)/dislex({prefix=>$1},_slurp8($2))/e;
  $f =~ s/\n\%dislex\{(.*?)\n\%\}/dislex($1)/se;
  $f =~ s{\n%parse_decl\n}{$parse_decl};
  $f =~ s{\n%parse_lib\n}{$parse_lib};
  if($script){ 
     my $out = "a.out";
     $out = $1 if($lexer =~ m/(.+)\.dipp$/);
   
     open(my $file,">","$out.yp") or die ("cant create '$1.yp'\n");
     print $file $f;
     close $f;
     system("yapp -s -m main -b '/usr/bin/perl -s' -o $out $out.yp");

     chmod(0755,$out);
  }
  else { print $f; }
}
elsif($d){
   my $text=shift ;
   $text = join("",<>) unless $text;     
   dislex({%lopt},_slurp8($lexer)) ;
   yylexdebug(\&Parse::DSLUtils::lex,$text);
}
elsif($s){   dislex({%lopt}, _slurp8($lexer)); }
elsif($script) { 
  my $out = "a.out";
  $out = $1 if($lexer =~ m/(.+)\.dislex$/);

  my $cont= dislex({%lopt}, _slurp8($lexer)); 
  $cont =~ s{\n%lex_lib\n}{\n$lex_lib\n};

  open(my $f,">",$out) or die ("cant create $1\n");
  print $f q{#!/usr/bin/perl},"\n", dislex({%lopt}, _slurp8($lexer)); 
  close $f;
  chmod(0755,$out);
  }
else { print dislex({%lopt}, _slurp8($lexer)); }

sub init_lex{ 
 my $lexaux=q{
 our $yyst;
 our $yyFile; 

 sub _ppincludes{
   my %opt = (cpp => 0);
   if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
   my $t=shift;
   while($t =~ s{#?\binclude\(\s*?(\S+?)\s*?\)}{_slurp8($1) }e ){
      print STDERR "INC $1\n";}
   if($opt{cpp}){
      while($t =~ s{#include\s*["<](\S+?)[">]}{ _slurp8($1) }e ){
        print STDERR "INC $1\n";} }
   $t =~ s/[ \t\r]+\n/\n/g;
   $t =~ s/\n\r/\n/g;
 $t;
}

sub _slurp8{
  open(my $F,"<:utf8",$_[0]) or
      warn("###cant open $_[0]\n")&& return undef;
  my $c=join("",<$F>);
  close $F;
  $c =~  s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g;   ## remove sp before \n
  $c =~  s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//;  ## rem BOM!
  $c;
}

sub yylex{ my $f=shift;
   $yyst=undef;
   if(not $f){ $yyFile = join("",<>); }
   elsif(-f $f){
       open(my $F,"<:utf8",$f) 
           or warn("###cant open $f\n")&& return undef;
       $yyFile =join("",<$F>);
       close $F; }
   else      { $yyFile = $f; }

   $yyFile .= "__EOF__";
   $yyFile =~  s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; # BOM!
   $yyFile =~  s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g;   ## rem sp before \n

   #noincludes# $yyFile = _ppincludes($yyFile);        ## from DSLUtils
   #nopp# $yyFile = _ppincludes($yyFile);        ## from DSLUtils
   $p->YYParse( yylex   => \&lex,
                yyerror  => \&_yyerror);  ## from DSLUtils
 }

};
 return $lexaux;
}

sub init_parse{
 my $parseaux=q{
 our $yyst;
 our $yyFile; 
 sub yyparse{ my $f=shift;
   $yyst=undef;
   if(not $f){ $yyFile = join("",<>); }
   elsif(-f $f){
       open(my $F,"<:utf8",$f) 
           or warn("###cant open $f\n")&& return undef;
       $yyFile =join("",<$F>);
       close $F; }
   else      { $yyFile = $f; }

   $yyFile .= "__EOF__";
   $yyFile =~  s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; # BOM!
   $yyFile =~  s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g;   ## rem sp before \n

   my $p = new (__PACKAGE__ );
   $yyFile = _ppincludes($yyFile);        ## from DSLUtils
   $p->YYParse( yylex   => \&lex,
                yyerror  => \&_yyerror);  ## from DSLUtils
 }

 sub _ppincludes{
   my %opt = (cpp => 0);
   if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ;
   my $t=shift;
   while($t =~ s{#?\binclude\(\s*?(\S+?)\s*?\)}{_slurp8($1) }e ){
      print STDERR "INC $1\n";}
   if($opt{cpp}){
      while($t =~ s{#include\s*["<](\S+?)[">]}{ _slurp8($1) }e ){
        print STDERR "INC $1\n";} }
   $t =~ s/[ \t\r]+\n/\n/g;
   $t =~ s/\n\r/\n/g;
 $t;
}

sub _slurp8{
  open(my $F,"<:utf8",$_[0]) or
      warn("###cant open $_[0]\n")&& return undef;
  my $c=join("",<$F>);
  close $F;
  $c =~  s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g;   ## remove sp before \n
  $c =~  s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//;  ## rem BOM!
  $c;
}

sub _yyerror {   ### global used: $yyFile
  my $e= 20;
  my $start= pos($yyFile)-20;
  ($start,$e)=(0,pos($yyFile)) if $start < 0;
  my $m= substr($yyFile,$start,$e)."<<?>>".substr($yyFile,pos($yyFile),20);
  $m =~ s/\n/\\n/g;
  if ($_[0]->YYCurtok) {
     printf STDERR ('Error (%d): a "%s" (%s) was found where %s was expected'."\n",
        pos($yyFile), $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect);
     print STDERR ".....$m \n" ;
  }
  else {
     print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n";
     print STDERR ".....$m \n" ;
  }
}

};
return $parseaux;
}

sub init_parse_decl{
 return qq{
sub parseFile {
   yyinitfromfile(\$_[0]); parsecom(); }
sub parseStr  {
   yyinitfromstr(\$_[0]) ; parsecom(); }
sub parsecom {
  my \$p = new (__PACKAGE__ );
  \$yyFile = _ppincludes(\$yyFile);       ## from DSLUtils
  \$p->YYParse( yylex   => \\&lex,
               yyerror  => \\&_yyerror);  ## from DSLUtils
}
}
}

__END__

=head1 NAME

dislex - Perl lexer analyser/processor generator

=head1 SYNOPSIS

 dislex file.dislex   (creates a lex function)
 dislex file.dipp     (preproc. for yapp)

 dislex -d file.dislex example   (create lex funcion and test is with example)

 dislex -script f.dislex   (creates a "f" command)

=head1 DESCRIPTION

=head1 Options

 -d  degub mode
 -e  (the same as -s)  execute
 -name=lexer         ##default lex
 -prefix=zz          ##default yy
 -script             ##

=head1 Dislex Files

=head2 Start Conditions (STATES)

Start Conditions (STATES) use the global variable $yyst

   BEGIN 5                      // change to state 5

   STATE:5{                     // rules for state 5
    ...
   }
   
   %initstate 3                 // defines initial state

The structure of a normal state is

 STATE:n{                ## rules for state n
 RegExp1     { action...  return(TYPE,VALUE) }
 RegExp2     { BEGIN 3... return(TYPE,VALUE) }
 }

=head3 state stack

In a similar way as flex, there is a stack of States that can be used as

 REC S1                 -- push Current and BEGIN S1
 DONE                   -- BEGIN pop

or in a equivalent way:

 yy_push_state("S1")    -- push Current and BEGIN S1
 yy_pop_state()         -- BEGIN pop

=head2 global variable yyFile

dislex uses the global variable $yyFile  (the contents to be analysed)

 yyinitfromfile($file)   -- slurps file to $yyFile
 yyinitfromstr($string)  -- copys string do $yyFile


if no $yyFile available "join('', <>)" is used;


=head2 Example of the dislex input format

   %initstate  3       ## define inicial STATE ($yyst)
   %white   [\ \t]+    ## white-spaces are skiped
   %comments ##.+      ## comments are skiped

   %%

   STATE:3{   ### Block level
   \n*#(let:?)\s*        { BEGIN 0; return("LET",$1);      }
   \n*#(\w+):?           {          return("BLOCKID",lc($1));  }
   (.+?)(?=\n#\w)        {          return("TEXBLOCK",__remsp($1)); }
   (.+?)(?=__EOF__)      {          return("TEXBLOCK",__remsp($1)); }
   } 

   STATE:0{   ### let lhs
   ([\w]+)\[(\d+)\]      {          return("IDs",{id=>$1,n=>$2});}
   ([\w\-]+)             {          return("ID",$1);}
   
   (=)                   { BEGIN 1; return($1,$1);     }
   (\~)                  { BEGIN 0; return($1,$1);     }
   \n\s*\n               { BEGIN 0; return("LB","");   }
   ;\s*\n                { BEGIN 0; return("LB","");   }
   \n(?=\S)              { BEGIN 0; return("LB","");   }
   \s*#(\w+):?           { BEGIN 3; return("BLOCKID",lc($1));  }
   }

=head1 dipp input format

 Use Parse::DSLUtils

 ...normal yapp parser

 %parse_decl           --> imports definition of parseFile and 
                               parseStr

 %dislex file.dislex    --> compiles file.dislex and imports
                               generated lex function

or 

 %dislex{
 ....states and similar
 %}


=head2 Example of dipp input format

 %{
 use strict;
 use Parse::DSLUtils qw(:all);
 %}

 %token INT
 %left '+' '-'
 %left '*' '/'

 %%
 z   : c EOF {return 0;} ;
 
 c   :           { }
     | c exp ';' { print " = $_[2]\n"; } ;
 
 exp : exp '+' exp {$_[1]+$_[3] }
     | exp '-' exp {$_[1]-$_[3] }
     | exp '*' exp {$_[1]*$_[3] }
     | exp '/' exp {$_[1]/$_[3] }
     | '(' exp ')' {$_[2]       }
     | INT         {$_[1]       } ;
 
 %%
 
 ##----------------------------(lex)
 %dislex{
 %white    [\ \t\n]+
 %comments  ##.+
 %%
 (\d+)               { return("INT",$1); }
 ([\-\/+*();])     { return($1,$1);    }
 (\w+)               { return("ID",$1);  }
 
 %}

 ##----------------------------(declares parseFile parseStr))
 %parse_decl

 package main;
 my $f=shift or die("usage: $0 file\n");
 ex3::parseFile($f);

=head2 Parser using yapp and dislex -- Makefile example

 ex3: ex3.dipp
	dislex ex3.dipp > ex3.yp
	yapp -s -b '/usr/bin/perl -s' -o ex3 ex3.yp

=head2 Standalone dislex comand -- Makefile example

 ex3: ex3.dislex
	dislex -script ex3.dislex