diff -r 98bc7c6e581a -r 78c000fd2ee7 perlgrok.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/perlgrok.pl Wed Sep 29 23:17:35 2021 +0200 @@ -0,0 +1,184 @@ +#!/usr/bin/perl +# $Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $ +# +# grok parser in perl +# +# (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+ +# grin*grin.hu +# +# This has been thrown together within half an hour, so... beware of the rough edges. + +use warnings; +use strict; +use IO::File; +use Getopt::Long; + +# pattern filename +my $fn_pattern = "patterns"; + +# top pattern we resolve +my $top = "TEST"; + +# debug level +my $DEBUG=1; + +my $fn_test = "-"; # STDIN +my $fn_fail; +my $fn_parsed = "-"; # SDTOUT +my %input; +my %processing; +my $help = 0; + +GetOptions( + "input=s" => \$fn_test, + "pattern=s" => \$fn_pattern, + "top=s" => \$top, + "parsed=s" => \$fn_parsed, + "failed=s" => \$fn_fail, + "debug=i" => \$DEBUG, + "help" => \$help, +) or die "error in command line arguments"; + +if( $help ) { + my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $'; + print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n"; + print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n"; + print " [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n"; + print " defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n"; + exit; +} + +# read patterns +my $f = new IO::File "< patterns" or die "cannot open patterns: $!"; +while( <$f> ) { + chomp; + # skip empty and comment lines + next if /^ *(#|$)/; + + # parse + if( /^(\S+) +(.+)$/ ) { + $input{$1} = $2; + } else { + die "Patternfile parse error:\n$_\n\n"; + } +} +$f->close; + +&d(5, "Start program"); +&resolve( $top, '' ); + +my $res = $input{$top}; +&d(9, "RESULT\n========\n${top} = $res" ); +&d(9, "Pattern length=" . length($res) ); + +### read file +$f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!"; +my $ff; +if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; } +my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!"; + +while( <$f> ) { + chomp; + &d(5, "LINE:$_"); + if( /$res/ ) { + # there is a match, print parsed values + print $fout "$_\n"; + for my $k ( sort keys %+ ) { + print $fout "\t$k => '$+{$k}'\n"; + } + } else { + if( $fn_fail ) { + print $ff "$_\n" if $fn_fail ; + } else { + print "$_\n"; + print "\t-no match-\n"; + } + } +} + +$fout->close; +$f->close; +$ff->close if $fn_fail; + +exit; + + +######################################################################################################### + +# fully resolve a macro line (recursively) +sub resolve { + my ($key, $caller_key) = @_; + + &d( 7, "Called resolve for $key from $caller_key"); + + # get macro data from %input + my $data = $input{$key}; + if( $processing{$key} ) { + # already processing this key: looping + die "Resolving loop in $key: already processing for $processing{$key}! Called from $caller_key."; + } + $processing{$key} = $caller_key; + + &d( 7, "Resolve_all_macros loop in $key"); + # while we have unresolved macros, walk and resolve them all + while( $data =~ /%\{(?[A-Z_0-9]+)(:(?[_\w\d]+))?\}/ ) { + my $r_key = $+{k}; + my $r_name = $+{v} || ""; # this is just informational here + + &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" ); + + # do we know the macro? + if( defined( $input{ $r_key } ) ) { + # resolve everything in the macro + &resolve( $r_key, $key ); + # replace fully resolved macro in $data + &replace_all( \$data, $r_key ); + + } else { + die "Unknown macro '$r_key'! Called from $caller_key."; + } + } + + # update macro in %input with fully resolved line + $input{$key} = $data; + &d(7, "RESOLVED '$key'! Result=/$data/"); + + # release loop protection for $key + delete $processing{$key}; +} + + +## replace all macros with replacement [macro-free] regexp +sub replace_all { + my ($data, $key) = @_; + + &d(7, "Replace_all $key in /$$data/"); + # while we have this macro in $data somewhere, repeat... + while( $$data =~ /\{${key}(:(?[_\w\d]+))?\}/ ) { + my $mname = $+{v}; + my $inner = $input{$key}; # shall be already resolved, may be checked (FIXME) + + &d(7, " Found '$key'"); + + if( $mname ) { + # named macro, create a named capture group + $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g; + &d(7, " Replace $key:$mname with named capture group /$inner/"); + } else { + # unnamed macro, we just throw it in (trust no1 and parenthesize it) + $$data =~ s/%\{${key}\}/(?:${inner})/g; + &d(7, " Replace $key with /$inner/"); + } + + &d(9, " Replacement result is /$$data/"); + } + &d(7, "REPLACED ALL '$key' in /$$data/"); +} + + +# extremely low-key debug subsystem :-) +sub d { + my ($lvl, $s) = @_; + return if $lvl > $DEBUG; + print scalar(localtime) . " [$$] ($lvl) $s\n"; +}