perlgrok.pl
changeset 6 78c000fd2ee7
child 7 c4085453688b
equal deleted inserted replaced
5:98bc7c6e581a 6:78c000fd2ee7
       
     1 #!/usr/bin/perl
       
     2 # $Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $
       
     3 #
       
     4 # grok parser in perl
       
     5 #
       
     6 # (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+
       
     7 # grin*grin.hu
       
     8 #
       
     9 # This has been thrown together within half an hour, so... beware of the rough edges.
       
    10 
       
    11 use warnings;
       
    12 use strict;
       
    13 use IO::File;
       
    14 use Getopt::Long;
       
    15 
       
    16 # pattern filename
       
    17 my $fn_pattern = "patterns";
       
    18 
       
    19 # top pattern we resolve
       
    20 my $top = "TEST";
       
    21 
       
    22 # debug level
       
    23 my $DEBUG=1;
       
    24 
       
    25 my $fn_test = "-"; # STDIN
       
    26 my $fn_fail;
       
    27 my $fn_parsed = "-"; # SDTOUT
       
    28 my %input;
       
    29 my %processing;
       
    30 my $help = 0;
       
    31 
       
    32 GetOptions( 
       
    33     "input=s"       => \$fn_test,
       
    34     "pattern=s"     => \$fn_pattern,
       
    35     "top=s"         => \$top,
       
    36     "parsed=s"      => \$fn_parsed,
       
    37     "failed=s"      => \$fn_fail,
       
    38     "debug=i"       => \$DEBUG,
       
    39     "help"          => \$help,
       
    40 ) or die "error in command line arguments";
       
    41 
       
    42 if( $help ) {
       
    43     my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $';
       
    44     print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n";
       
    45     print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n";
       
    46     print "          [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n";
       
    47     print "       defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n";
       
    48     exit;
       
    49 }
       
    50 
       
    51 # read patterns
       
    52 my $f = new IO::File "< patterns" or die "cannot open patterns: $!";
       
    53 while( <$f> ) {
       
    54     chomp;
       
    55     # skip empty and comment lines
       
    56     next if /^ *(#|$)/;
       
    57     
       
    58     # parse <name><space><content>
       
    59     if( /^(\S+) +(.+)$/ ) {
       
    60         $input{$1} = $2;
       
    61     } else {
       
    62         die "Patternfile parse error:\n$_\n\n";
       
    63     }
       
    64 }
       
    65 $f->close;
       
    66 
       
    67 &d(5, "Start program");
       
    68 &resolve( $top, '<root>' );
       
    69 
       
    70 my $res = $input{$top};
       
    71 &d(9, "RESULT\n========\n${top} = $res" );
       
    72 &d(9, "Pattern length=" . length($res) );
       
    73 
       
    74 ### read file
       
    75 $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!";
       
    76 my $ff;
       
    77 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; }
       
    78 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!";
       
    79 
       
    80 while( <$f> ) {
       
    81     chomp;
       
    82     &d(5, "LINE:$_");
       
    83     if( /$res/ ) {
       
    84         # there is a match, print parsed values
       
    85         print $fout "$_\n";
       
    86         for my $k ( sort keys %+ ) {
       
    87             print $fout "\t$k => '$+{$k}'\n";
       
    88         }
       
    89     } else {
       
    90         if( $fn_fail ) {
       
    91             print $ff "$_\n"  if $fn_fail ;
       
    92         } else {
       
    93             print "$_\n";
       
    94             print "\t-no match-\n";
       
    95         }
       
    96     }
       
    97 }
       
    98 
       
    99 $fout->close;
       
   100 $f->close;
       
   101 $ff->close if $fn_fail;
       
   102 
       
   103 exit;
       
   104 
       
   105 
       
   106 #########################################################################################################
       
   107 
       
   108 # fully resolve a macro line (recursively)
       
   109 sub resolve {
       
   110     my ($key, $caller_key) = @_;
       
   111     
       
   112     &d( 7, "Called resolve for $key from $caller_key");
       
   113     
       
   114     # get macro data from %input
       
   115     my $data = $input{$key};
       
   116     if( $processing{$key} ) {
       
   117         # already processing this key: looping
       
   118         die "Resolving loop in $key: already processing for $processing{$key}! Called from $caller_key.";
       
   119     }
       
   120     $processing{$key} = $caller_key;
       
   121     
       
   122     &d( 7, "Resolve_all_macros loop in $key");
       
   123     # while we have unresolved macros, walk and resolve them all
       
   124     while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w\d]+))?\}/ ) {
       
   125         my $r_key = $+{k};
       
   126         my $r_name = $+{v} || ""; # this is just informational here
       
   127         
       
   128         &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" );
       
   129         
       
   130         # do we know the macro?
       
   131         if( defined( $input{ $r_key } ) ) {
       
   132             # resolve everything in the macro
       
   133             &resolve( $r_key, $key );
       
   134             # replace fully resolved macro in $data
       
   135             &replace_all( \$data, $r_key );
       
   136             
       
   137         } else {
       
   138             die "Unknown macro '$r_key'! Called from $caller_key.";
       
   139         }
       
   140     }
       
   141     
       
   142     # update macro in %input with fully resolved line
       
   143     $input{$key} = $data;
       
   144     &d(7, "RESOLVED '$key'! Result=/$data/");
       
   145 
       
   146     # release loop protection for $key
       
   147     delete $processing{$key};
       
   148 }
       
   149 
       
   150 
       
   151 ## replace all macros with replacement [macro-free] regexp
       
   152 sub replace_all {
       
   153     my ($data, $key) = @_;
       
   154     
       
   155     &d(7, "Replace_all $key in /$$data/");
       
   156     # while we have this macro in $data somewhere, repeat...
       
   157     while( $$data =~ /\{${key}(:(?<v>[_\w\d]+))?\}/ ) {
       
   158         my $mname = $+{v};
       
   159         my $inner = $input{$key};	# shall be already resolved, may be checked (FIXME)
       
   160         
       
   161         &d(7, " Found '$key'");
       
   162         
       
   163         if( $mname ) {
       
   164             # named macro, create a named capture group
       
   165             $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g;
       
   166             &d(7, "  Replace $key:$mname with named capture group /$inner/");
       
   167         } else {
       
   168             # unnamed macro, we just throw it in (trust no1 and parenthesize it)
       
   169             $$data =~  s/%\{${key}\}/(?:${inner})/g;
       
   170             &d(7, "  Replace $key with /$inner/");
       
   171         }
       
   172         
       
   173         &d(9, "   Replacement result is /$$data/");
       
   174     }
       
   175     &d(7, "REPLACED ALL '$key' in /$$data/");
       
   176 }
       
   177 
       
   178 
       
   179 # extremely low-key debug subsystem :-)
       
   180 sub d {
       
   181     my ($lvl, $s) = @_;
       
   182     return if $lvl > $DEBUG;
       
   183     print scalar(localtime) . " [$$] ($lvl) $s\n";
       
   184 }