perlgrok.pl
changeset 8 31c4ce4d9b73
parent 7 c4085453688b
child 9 f1cb959b4603
equal deleted inserted replaced
7:c4085453688b 8:31c4ce4d9b73
     1 #!/usr/bin/perl
     1 #!/usr/bin/perl
     2 # $Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $
     2 # $Id: perlgrok.pl,v d6e64daafdc1 2021/09/29 23:46:59 grin $
     3 #
     3 #
     4 # grok parser in perl
     4 # grok parser in perl
     5 #
     5 #
     6 # (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+
     6 # (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+
     7 # grin*grin.hu
     7 # grin*grin.hu
    12 use strict;
    12 use strict;
    13 use IO::File;
    13 use IO::File;
    14 use Getopt::Long;
    14 use Getopt::Long;
    15 
    15 
    16 # pattern filename
    16 # pattern filename
    17 my $fn_pattern = "patterns";
    17 my @fn_patterns = ();
    18 
    18 
    19 # top pattern we resolve
    19 # top pattern we resolve
    20 my $top = "TEST";
    20 my $top = "TEST";
    21 
    21 
    22 # debug level
    22 # debug level
    26 my $fn_fail;
    26 my $fn_fail;
    27 my $fn_parsed = "-"; # SDTOUT
    27 my $fn_parsed = "-"; # SDTOUT
    28 my %input;
    28 my %input;
    29 my %processing;
    29 my %processing;
    30 my $help = 0;
    30 my $help = 0;
       
    31 my $re_trace = 1; # check all regexp
       
    32 
       
    33 my %xlate;  # perl names don't support period in name, so we need to translate
    31 
    34 
    32 GetOptions( 
    35 GetOptions( 
    33     "input=s"       => \$fn_test,
    36     "input=s"       => \$fn_test,
    34     "pattern=s"     => \$fn_pattern,
    37     "pattern=s"     => \@fn_patterns,
    35     "top=s"         => \$top,
    38     "top=s"         => \$top,
    36     "parsed=s"      => \$fn_parsed,
    39     "parsed=s"      => \$fn_parsed,
    37     "failed=s"      => \$fn_fail,
    40     "failed=s"      => \$fn_fail,
    38     "debug=i"       => \$DEBUG,
    41     "debug=i"       => \$DEBUG,
    39     "help"          => \$help,
    42     "help"          => \$help,
    40 ) or die "error in command line arguments";
    43 ) or die "error in command line arguments";
    41 
    44 
    42 if( $help ) {
    45 if( $help ) {
    43     my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $';
    46     my $VER = '$Id: perlgrok.pl,v d6e64daafdc1 2021/09/29 23:46:59 grin $';
    44     print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n";
    47     print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n";
    45     print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n";
    48     print "Usage: $0 [--input input_file] [--pattern pattern_file] [--pattern another] [--top final_macro] \n";
    46     print "          [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n";
    49     print "          [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n";
    47     print "       defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n";
    50     print "       defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n";
    48     exit;
    51     exit;
    49 }
    52 }
    50 
    53 
       
    54 if( $#fn_patterns < 0 ) {
       
    55     push @fn_patterns, "patterns";
       
    56 }
       
    57 
       
    58 
    51 # read patterns
    59 # read patterns
    52 my $f = new IO::File "< $fn_pattern" or die "cannot open patterns: $!";
    60 foreach my $fn_pattern (@fn_patterns) {
    53 while( <$f> ) {
    61     my $f = new IO::File "< $fn_pattern" or die "cannot open pattern $fn_pattern: $!";
    54     chomp;
    62     &d(7, "Reading $fn_pattern pattern file contents");
    55     # skip empty and comment lines
    63     while( <$f> ) {
    56     next if /^ *(#|$)/;
    64         chomp;
    57     
    65         # skip empty and comment lines
    58     # parse <name><space><content>
    66         next if /^ *(#|$)/;
    59     if( /^(\S+) +(.+)$/ ) {
    67         
    60         $input{$1} = $2;
    68         # parse <name><space><content>
    61     } else {
    69         if( /^(\S+) +(.+)$/ ) {
    62         die "Patternfile parse error:\n$_\n\n";
    70             if( defined( $input{$1} ) ) {
    63     }
    71                 &d(1, "Warning: duplicate macro $1 in $fn_pattern; overwrites previous version!");
    64 }
    72             }
    65 $f->close;
    73             $input{$1} = $2;
       
    74         } else {
       
    75             die "Patternfile $fn_pattern parse error:\n$_\n\n";
       
    76         }
       
    77     }
       
    78     $f->close;
       
    79 }
    66 
    80 
    67 &d(5, "Start program");
    81 &d(5, "Start program");
    68 &resolve( $top, '<root>' );
    82 &resolve( $top, '<root>' );
    69 
    83 
    70 my $res = $input{$top};
    84 my $res = $input{$top};
    71 &d(9, "RESULT\n========\n${top} = $res" );
    85 &d(9, "RESULT\n========\n${top} = $res" );
    72 &d(9, "Pattern length=" . length($res) );
    86 &d(9, "Pattern length=" . length($res) );
    73 
    87 
       
    88 eval {
       
    89     my $test = "abc123";
       
    90     $test =~ /$res/;
       
    91 };
       
    92 if( $@ ) {
       
    93     print "ERR: $@\n\n";
       
    94     exit;
       
    95 }
       
    96 
    74 ### read file
    97 ### read file
    75 $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!";
    98 my $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!";
    76 my $ff;
    99 my $ff;
    77 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; }
   100 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; }
    78 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!";
   101 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!";
    79 
   102 
    80 while( <$f> ) {
   103 while( <$f> ) {
    82     &d(5, "LINE:$_");
   105     &d(5, "LINE:$_");
    83     if( /$res/ ) {
   106     if( /$res/ ) {
    84         # there is a match, print parsed values
   107         # there is a match, print parsed values
    85         print $fout "$_\n";
   108         print $fout "$_\n";
    86         for my $k ( sort keys %+ ) {
   109         for my $k ( sort keys %+ ) {
    87             print $fout "\t$k => '$+{$k}'\n";
   110             print $fout "\t".&unxlate($k)." => '$+{$k}'\n";
    88         }
   111         }
    89     } else {
   112     } else {
    90         if( $fn_fail ) {
   113         if( $fn_fail ) {
    91             print $ff "$_\n"  if $fn_fail ;
   114             print $ff "$_\n"  if $fn_fail ;
    92         } else {
   115         } else {
   119     }
   142     }
   120     $processing{$key} = $caller_key;
   143     $processing{$key} = $caller_key;
   121     
   144     
   122     &d( 7, "Resolve_all_macros loop in $key");
   145     &d( 7, "Resolve_all_macros loop in $key");
   123     # while we have unresolved macros, walk and resolve them all
   146     # while we have unresolved macros, walk and resolve them all
   124     while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w\d]+))?\}/ ) {
   147     while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w.\d]+))?\}/ ) {
   125         my $r_key = $+{k};
   148         my $r_key = $+{k};
   126         my $r_name = $+{v} || ""; # this is just informational here
   149         my $r_name = $+{v} || ""; # this is just informational here
   127         
   150         
   128         &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" );
   151         &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" );
   129         
   152         
   141     
   164     
   142     # update macro in %input with fully resolved line
   165     # update macro in %input with fully resolved line
   143     $input{$key} = $data;
   166     $input{$key} = $data;
   144     &d(7, "RESOLVED '$key'! Result=/$data/");
   167     &d(7, "RESOLVED '$key'! Result=/$data/");
   145 
   168 
       
   169     if( $re_trace ) {
       
   170         # die if regexp is bad
       
   171         "abc" =~ /$data/;
       
   172     }
       
   173 
   146     # release loop protection for $key
   174     # release loop protection for $key
   147     delete $processing{$key};
   175     delete $processing{$key};
   148 }
   176 }
   149 
   177 
   150 
   178 
   152 sub replace_all {
   180 sub replace_all {
   153     my ($data, $key) = @_;
   181     my ($data, $key) = @_;
   154     
   182     
   155     &d(7, "Replace_all $key in /$$data/");
   183     &d(7, "Replace_all $key in /$$data/");
   156     # while we have this macro in $data somewhere, repeat...
   184     # while we have this macro in $data somewhere, repeat...
   157     while( $$data =~ /\{${key}(:(?<v>[_\w\d]+))?\}/ ) {
   185     while( $$data =~ /\{${key}(:(?<v>[_\w.\d]+))?\}/ ) {
   158         my $mname = $+{v};
   186         my $mname = $+{v};
   159         my $inner = $input{$key};	# shall be already resolved, may be checked (FIXME)
   187         my $inner = $input{$key};	# shall be already resolved, may be checked (FIXME)
   160         
   188         
   161         &d(7, " Found '$key'");
   189         &d(7, " Found '$key'");
   162         
   190         
       
   191         my $xname = $mname;
   163         if( $mname ) {
   192         if( $mname ) {
       
   193             # if it's not perl compatible, create a translation entry
       
   194             if( $mname !~ /^[_A-Za-z][_A-Za-z0-9]*\z/ ) {
       
   195                 $xname = &gen_xlate_entry($mname);
       
   196             }
   164             # named macro, create a named capture group
   197             # named macro, create a named capture group
   165             $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g;
   198             $$data =~ s/%\{${key}:${mname}\}/(?<${xname}>${inner})/g;
   166             &d(7, "  Replace $key:$mname with named capture group /$inner/");
   199             &d(7, "  Replace $key:$mname with named capture group /$inner/");
   167         } else {
   200         } else {
   168             # unnamed macro, we just throw it in (trust no1 and parenthesize it)
   201             # unnamed macro, we just throw it in (trust no1 and parenthesize it)
   169             $$data =~  s/%\{${key}\}/(?:${inner})/g;
   202             $$data =~  s/%\{${key}\}/(?:${inner})/g;
   170             &d(7, "  Replace $key with /$inner/");
   203             &d(7, "  Replace $key with /$inner/");
   171         }
   204         }
   172         
   205         
       
   206         &sanitize_labels( $data );
       
   207 
   173         &d(9, "   Replacement result is /$$data/");
   208         &d(9, "   Replacement result is /$$data/");
   174     }
   209     }
   175     &d(7, "REPLACED ALL '$key' in /$$data/");
   210     &d(7, "REPLACED ALL '$key' in /$$data/");
       
   211 }
       
   212 
       
   213 
       
   214 sub gen_xlate_entry {
       
   215     my ($mname) = @_;
       
   216     &d(7, "Generate a translation variable name for $mname");
       
   217     my $xname = "xlate_" . int(rand(1_000_000));
       
   218     my $count = 100_000;
       
   219     while( defined( $xlate{$xname} ) ) {
       
   220         $xname = "xlate_" . int(rand(1_000_000));
       
   221         die "Cannot generate unique translation for $mname in a _lot_ of steps" if --$count<=0;
       
   222     }
       
   223     
       
   224     &d(7, "Generated $xname as the name for $mname");
       
   225     $xlate{$xname} = $mname;
       
   226     return $xname;
       
   227 }
       
   228 
       
   229 
       
   230 # find ?<names> which perl incompatible, and xlate them forcibly
       
   231 sub sanitize_labels {
       
   232     my ($data) = @_;
       
   233 
       
   234     if( $$data =~ /(?<!\\)\((?!\\)\?<([_A-Za-z][_A-Za-z0-9]*[^>_A-Za-z0-9])/ ) {
       
   235         &d(7, "Irregular name in $$data, create translation");
       
   236         # we have at least one irregular, get all labels
       
   237         while( $$data =~ /(?<!\\)\((?!\\)\?<(.+?)>/g ) {
       
   238             my $label = $1;
       
   239             &d(9, "Got label $label (***BEFORE $` ##<->## $' AFTER***)");
       
   240             if( $label !~ /^[_A-Za-z][_A-Za-z0-9]*\z/ ) {
       
   241                 # not perl compatible, translate and replace
       
   242                 my $xname = &gen_xlate_entry($label);
       
   243                 $$data =~ s/(?<!\\)\((?!\\)\?<${label}>/(?<${xname}>/g;
       
   244                 &d(5, "ForceReplaced label '$label' with '$xname' in /$$data/");
       
   245             }
       
   246         }
       
   247     }
       
   248 }
       
   249 
       
   250 
       
   251 sub unxlate {
       
   252     my ($xkey) = @_;
       
   253     return $xlate{$xkey};
   176 }
   254 }
   177 
   255 
   178 
   256 
   179 # extremely low-key debug subsystem :-)
   257 # extremely low-key debug subsystem :-)
   180 sub d {
   258 sub d {