#!/usr/bin/perl -s our($d,$e,$s,$name,$prefix,$dipp,$dislexskel,$dippskel,$script); use strict; use utf8::all; use Parse::DSLUtils qw(:all); my %lopt = (); $lopt{name}=$name if $name; $lopt{prefix}=$prefix if $prefix; $lopt{eval}=1 if ($e || $d); $lopt{stdout}=1 if $s; #if($dislexskel){ print DISLEXSKEL(); exit 0; } #if($dippskel ){ print DIPPSKEL() ; exit 0; } my $lexer = shift or die("usage:\n $0 file.plex\n $0 -e file.plex teste\n"); my $parse_lib =init_parse(); my $lex_lib =init_lex(); my $parse_decl=init_parse_decl(); if($lexer =~ /\.dipp$/ or $dipp){ my $f = _slurp8($lexer); $f =~ s/%dislex\s+(\S+)/dislex(_slurp8($1))/e; $f =~ s/%dislex\((\w+)\)\s+(\S+)/dislex({prefix=>$1},_slurp8($2))/e; $f =~ s/\n\%dislex\{(.*?)\n\%\}/dislex($1)/se; $f =~ s{\n%parse_decl\n}{$parse_decl}; $f =~ s{\n%parse_lib\n}{$parse_lib}; if($script){ my $out = "a.out"; $out = $1 if($lexer =~ m/(.+)\.dipp$/); open(my $file,">","$out.yp") or die ("cant create '$1.yp'\n"); print $file $f; close $f; system("yapp -s -m main -b '/usr/bin/perl -s' -o $out $out.yp"); chmod(0755,$out); } else { print $f; } } elsif($d){ my $text=shift ; $text = join("",<>) unless $text; dislex({%lopt},_slurp8($lexer)) ; yylexdebug(\&Parse::DSLUtils::lex,$text); } elsif($s){ dislex({%lopt}, _slurp8($lexer)); } elsif($script) { my $out = "a.out"; $out = $1 if($lexer =~ m/(.+)\.dislex$/); my $cont= dislex({%lopt}, _slurp8($lexer)); $cont =~ s{\n%lex_lib\n}{\n$lex_lib\n}; open(my $f,">",$out) or die ("cant create $1\n"); print $f q{#!/usr/bin/perl},"\n", dislex({%lopt}, _slurp8($lexer)); close $f; chmod(0755,$out); } else { print dislex({%lopt}, _slurp8($lexer)); } sub init_lex{ my $lexaux=q{ our $yyst; our $yyFile; sub _ppincludes{ my %opt = (cpp => 0); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $t=shift; while($t =~ s{#?\binclude\(\s*?(\S+?)\s*?\)}{_slurp8($1) }e ){ print STDERR "INC $1\n";} if($opt{cpp}){ while($t =~ s{#include\s*["<](\S+?)[">]}{ _slurp8($1) }e ){ print STDERR "INC $1\n";} } $t =~ s/[ \t\r]+\n/\n/g; $t =~ s/\n\r/\n/g; $t; } sub _slurp8{ open(my $F,"<:utf8",$_[0]) or warn("###cant open $_[0]\n")&& return undef; my $c=join("",<$F>); close $F; $c =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## remove sp before \n $c =~ s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; ## rem BOM! $c; } sub yylex{ my $f=shift; $yyst=undef; if(not $f){ $yyFile = join("",<>); } elsif(-f $f){ open(my $F,"<:utf8",$f) or warn("###cant open $f\n")&& return undef; $yyFile =join("",<$F>); close $F; } else { $yyFile = $f; } $yyFile .= "__EOF__"; $yyFile =~ s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; # BOM! $yyFile =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## rem sp before \n #noincludes# $yyFile = _ppincludes($yyFile); ## from DSLUtils #nopp# $yyFile = _ppincludes($yyFile); ## from DSLUtils $p->YYParse( yylex => \&lex, yyerror => \&_yyerror); ## from DSLUtils } }; return $lexaux; } sub init_parse{ my $parseaux=q{ our $yyst; our $yyFile; sub yyparse{ my $f=shift; $yyst=undef; if(not $f){ $yyFile = join("",<>); } elsif(-f $f){ open(my $F,"<:utf8",$f) or warn("###cant open $f\n")&& return undef; $yyFile =join("",<$F>); close $F; } else { $yyFile = $f; } $yyFile .= "__EOF__"; $yyFile =~ s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; # BOM! $yyFile =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## rem sp before \n my $p = new (__PACKAGE__ ); $yyFile = _ppincludes($yyFile); ## from DSLUtils $p->YYParse( yylex => \&lex, yyerror => \&_yyerror); ## from DSLUtils } sub _ppincludes{ my %opt = (cpp => 0); if(ref($_[0]) eq "HASH") {%opt = (%opt , %{shift(@_)}) } ; my $t=shift; while($t =~ s{#?\binclude\(\s*?(\S+?)\s*?\)}{_slurp8($1) }e ){ print STDERR "INC $1\n";} if($opt{cpp}){ while($t =~ s{#include\s*["<](\S+?)[">]}{ _slurp8($1) }e ){ print STDERR "INC $1\n";} } $t =~ s/[ \t\r]+\n/\n/g; $t =~ s/\n\r/\n/g; $t; } sub _slurp8{ open(my $F,"<:utf8",$_[0]) or warn("###cant open $_[0]\n")&& return undef; my $c=join("",<$F>); close $F; $c =~ s/[ \t]*(\r\n|\n\r|\n|\r)/\n/g; ## remove sp before \n $c =~ s/^(\xFF\xFE|\xFE\xFF|\xEF\xBB\xBF|\x{FEFF})//; ## rem BOM! $c; } sub _yyerror { ### global used: $yyFile my $e= 20; my $start= pos($yyFile)-20; ($start,$e)=(0,pos($yyFile)) if $start < 0; my $m= substr($yyFile,$start,$e)."<>".substr($yyFile,pos($yyFile),20); $m =~ s/\n/\\n/g; if ($_[0]->YYCurtok) { printf STDERR ('Error (%d): a "%s" (%s) was found where %s was expected'."\n", pos($yyFile), $_[0]->YYCurtok, $_[0]->YYCurval, $_[0]->YYExpect); print STDERR ".....$m \n" ; } else { print STDERR "Expecting one of ",join(", ",$_[0]->YYExpect),"\n"; print STDERR ".....$m \n" ; } } }; return $parseaux; } sub init_parse_decl{ return qq{ sub parseFile { yyinitfromfile(\$_[0]); parsecom(); } sub parseStr { yyinitfromstr(\$_[0]) ; parsecom(); } sub parsecom { my \$p = new (__PACKAGE__ ); \$yyFile = _ppincludes(\$yyFile); ## from DSLUtils \$p->YYParse( yylex => \\&lex, yyerror => \\&_yyerror); ## from DSLUtils } } } __END__ =head1 NAME dislex - Perl lexer analyser/processor generator =head1 SYNOPSIS dislex file.dislex (creates a lex function) dislex file.dipp (preproc. for yapp) dislex -d file.dislex example (create lex funcion and test is with example) dislex -script f.dislex (creates a "f" command) =head1 DESCRIPTION =head1 Options -d degub mode -e (the same as -s) execute -name=lexer ##default lex -prefix=zz ##default yy -script ## =head1 Dislex Files =head2 Start Conditions (STATES) Start Conditions (STATES) use the global variable $yyst BEGIN 5 // change to state 5 STATE:5{ // rules for state 5 ... } %initstate 3 // defines initial state The structure of a normal state is STATE:n{ ## rules for state n RegExp1 { action... return(TYPE,VALUE) } RegExp2 { BEGIN 3... return(TYPE,VALUE) } } =head3 state stack In a similar way as flex, there is a stack of States that can be used as REC S1 -- push Current and BEGIN S1 DONE -- BEGIN pop or in a equivalent way: yy_push_state("S1") -- push Current and BEGIN S1 yy_pop_state() -- BEGIN pop =head2 global variable yyFile dislex uses the global variable $yyFile (the contents to be analysed) yyinitfromfile($file) -- slurps file to $yyFile yyinitfromstr($string) -- copys string do $yyFile if no $yyFile available "join('', <>)" is used; =head2 Example of the dislex input format %initstate 3 ## define inicial STATE ($yyst) %white [\ \t]+ ## white-spaces are skiped %comments ##.+ ## comments are skiped %% STATE:3{ ### Block level \n*#(let:?)\s* { BEGIN 0; return("LET",$1); } \n*#(\w+):? { return("BLOCKID",lc($1)); } (.+?)(?=\n#\w) { return("TEXBLOCK",__remsp($1)); } (.+?)(?=__EOF__) { return("TEXBLOCK",__remsp($1)); } } STATE:0{ ### let lhs ([\w]+)\[(\d+)\] { return("IDs",{id=>$1,n=>$2});} ([\w\-]+) { return("ID",$1);} (=) { BEGIN 1; return($1,$1); } (\~) { BEGIN 0; return($1,$1); } \n\s*\n { BEGIN 0; return("LB",""); } ;\s*\n { BEGIN 0; return("LB",""); } \n(?=\S) { BEGIN 0; return("LB",""); } \s*#(\w+):? { BEGIN 3; return("BLOCKID",lc($1)); } } =head1 dipp input format Use Parse::DSLUtils ...normal yapp parser %parse_decl --> imports definition of parseFile and parseStr %dislex file.dislex --> compiles file.dislex and imports generated lex function or %dislex{ ....states and similar %} =head2 Example of dipp input format %{ use strict; use Parse::DSLUtils qw(:all); %} %token INT %left '+' '-' %left '*' '/' %% z : c EOF {return 0;} ; c : { } | c exp ';' { print " = $_[2]\n"; } ; exp : exp '+' exp {$_[1]+$_[3] } | exp '-' exp {$_[1]-$_[3] } | exp '*' exp {$_[1]*$_[3] } | exp '/' exp {$_[1]/$_[3] } | '(' exp ')' {$_[2] } | INT {$_[1] } ; %% ##----------------------------(lex) %dislex{ %white [\ \t\n]+ %comments ##.+ %% (\d+) { return("INT",$1); } ([\-\/+*();]) { return($1,$1); } (\w+) { return("ID",$1); } %} ##----------------------------(declares parseFile parseStr)) %parse_decl package main; my $f=shift or die("usage: $0 file\n"); ex3::parseFile($f); =head2 Parser using yapp and dislex -- Makefile example ex3: ex3.dipp dislex ex3.dipp > ex3.yp yapp -s -b '/usr/bin/perl -s' -o ex3 ex3.yp =head2 Standalone dislex comand -- Makefile example ex3: ex3.dislex dislex -script ex3.dislex