--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/perlgrok.pl Wed Sep 29 23:17:35 2021 +0200
@@ -0,0 +1,184 @@
+#!/usr/bin/perl
+# $Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $
+#
+# grok parser in perl
+#
+# (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+
+# grin*grin.hu
+#
+# This has been thrown together within half an hour, so... beware of the rough edges.
+
+use warnings;
+use strict;
+use IO::File;
+use Getopt::Long;
+
+# pattern filename
+my $fn_pattern = "patterns";
+
+# top pattern we resolve
+my $top = "TEST";
+
+# debug level
+my $DEBUG=1;
+
+my $fn_test = "-"; # STDIN
+my $fn_fail;
+my $fn_parsed = "-"; # SDTOUT
+my %input;
+my %processing;
+my $help = 0;
+
+GetOptions(
+ "input=s" => \$fn_test,
+ "pattern=s" => \$fn_pattern,
+ "top=s" => \$top,
+ "parsed=s" => \$fn_parsed,
+ "failed=s" => \$fn_fail,
+ "debug=i" => \$DEBUG,
+ "help" => \$help,
+) or die "error in command line arguments";
+
+if( $help ) {
+ my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $';
+ print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n";
+ print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n";
+ print " [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n";
+ print " defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n";
+ exit;
+}
+
+# read patterns
+my $f = new IO::File "< patterns" or die "cannot open patterns: $!";
+while( <$f> ) {
+ chomp;
+ # skip empty and comment lines
+ next if /^ *(#|$)/;
+
+ # parse <name><space><content>
+ if( /^(\S+) +(.+)$/ ) {
+ $input{$1} = $2;
+ } else {
+ die "Patternfile parse error:\n$_\n\n";
+ }
+}
+$f->close;
+
+&d(5, "Start program");
+&resolve( $top, '<root>' );
+
+my $res = $input{$top};
+&d(9, "RESULT\n========\n${top} = $res" );
+&d(9, "Pattern length=" . length($res) );
+
+### read file
+$f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!";
+my $ff;
+if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; }
+my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!";
+
+while( <$f> ) {
+ chomp;
+ &d(5, "LINE:$_");
+ if( /$res/ ) {
+ # there is a match, print parsed values
+ print $fout "$_\n";
+ for my $k ( sort keys %+ ) {
+ print $fout "\t$k => '$+{$k}'\n";
+ }
+ } else {
+ if( $fn_fail ) {
+ print $ff "$_\n" if $fn_fail ;
+ } else {
+ print "$_\n";
+ print "\t-no match-\n";
+ }
+ }
+}
+
+$fout->close;
+$f->close;
+$ff->close if $fn_fail;
+
+exit;
+
+
+#########################################################################################################
+
+# fully resolve a macro line (recursively)
+sub resolve {
+ my ($key, $caller_key) = @_;
+
+ &d( 7, "Called resolve for $key from $caller_key");
+
+ # get macro data from %input
+ my $data = $input{$key};
+ if( $processing{$key} ) {
+ # already processing this key: looping
+ die "Resolving loop in $key: already processing for $processing{$key}! Called from $caller_key.";
+ }
+ $processing{$key} = $caller_key;
+
+ &d( 7, "Resolve_all_macros loop in $key");
+ # while we have unresolved macros, walk and resolve them all
+ while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w\d]+))?\}/ ) {
+ my $r_key = $+{k};
+ my $r_name = $+{v} || ""; # this is just informational here
+
+ &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" );
+
+ # do we know the macro?
+ if( defined( $input{ $r_key } ) ) {
+ # resolve everything in the macro
+ &resolve( $r_key, $key );
+ # replace fully resolved macro in $data
+ &replace_all( \$data, $r_key );
+
+ } else {
+ die "Unknown macro '$r_key'! Called from $caller_key.";
+ }
+ }
+
+ # update macro in %input with fully resolved line
+ $input{$key} = $data;
+ &d(7, "RESOLVED '$key'! Result=/$data/");
+
+ # release loop protection for $key
+ delete $processing{$key};
+}
+
+
+## replace all macros with replacement [macro-free] regexp
+sub replace_all {
+ my ($data, $key) = @_;
+
+ &d(7, "Replace_all $key in /$$data/");
+ # while we have this macro in $data somewhere, repeat...
+ while( $$data =~ /\{${key}(:(?<v>[_\w\d]+))?\}/ ) {
+ my $mname = $+{v};
+ my $inner = $input{$key}; # shall be already resolved, may be checked (FIXME)
+
+ &d(7, " Found '$key'");
+
+ if( $mname ) {
+ # named macro, create a named capture group
+ $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g;
+ &d(7, " Replace $key:$mname with named capture group /$inner/");
+ } else {
+ # unnamed macro, we just throw it in (trust no1 and parenthesize it)
+ $$data =~ s/%\{${key}\}/(?:${inner})/g;
+ &d(7, " Replace $key with /$inner/");
+ }
+
+ &d(9, " Replacement result is /$$data/");
+ }
+ &d(7, "REPLACED ALL '$key' in /$$data/");
+}
+
+
+# extremely low-key debug subsystem :-)
+sub d {
+ my ($lvl, $s) = @_;
+ return if $lvl > $DEBUG;
+ print scalar(localtime) . " [$$] ($lvl) $s\n";
+}