|
1 #!/usr/bin/perl |
|
2 # $Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $ |
|
3 # |
|
4 # grok parser in perl |
|
5 # |
|
6 # (c)Peter 'grin' Gervai, 2021; CC_BY-SA-4.0 / GPLv3+ |
|
7 # grin*grin.hu |
|
8 # |
|
9 # This has been thrown together within half an hour, so... beware of the rough edges. |
|
10 |
|
11 use warnings; |
|
12 use strict; |
|
13 use IO::File; |
|
14 use Getopt::Long; |
|
15 |
|
16 # pattern filename |
|
17 my $fn_pattern = "patterns"; |
|
18 |
|
19 # top pattern we resolve |
|
20 my $top = "TEST"; |
|
21 |
|
22 # debug level |
|
23 my $DEBUG=1; |
|
24 |
|
25 my $fn_test = "-"; # STDIN |
|
26 my $fn_fail; |
|
27 my $fn_parsed = "-"; # SDTOUT |
|
28 my %input; |
|
29 my %processing; |
|
30 my $help = 0; |
|
31 |
|
32 GetOptions( |
|
33 "input=s" => \$fn_test, |
|
34 "pattern=s" => \$fn_pattern, |
|
35 "top=s" => \$top, |
|
36 "parsed=s" => \$fn_parsed, |
|
37 "failed=s" => \$fn_fail, |
|
38 "debug=i" => \$DEBUG, |
|
39 "help" => \$help, |
|
40 ) or die "error in command line arguments"; |
|
41 |
|
42 if( $help ) { |
|
43 my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $'; |
|
44 print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n"; |
|
45 print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n"; |
|
46 print " [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n"; |
|
47 print " defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n"; |
|
48 exit; |
|
49 } |
|
50 |
|
51 # read patterns |
|
52 my $f = new IO::File "< patterns" or die "cannot open patterns: $!"; |
|
53 while( <$f> ) { |
|
54 chomp; |
|
55 # skip empty and comment lines |
|
56 next if /^ *(#|$)/; |
|
57 |
|
58 # parse <name><space><content> |
|
59 if( /^(\S+) +(.+)$/ ) { |
|
60 $input{$1} = $2; |
|
61 } else { |
|
62 die "Patternfile parse error:\n$_\n\n"; |
|
63 } |
|
64 } |
|
65 $f->close; |
|
66 |
|
67 &d(5, "Start program"); |
|
68 &resolve( $top, '<root>' ); |
|
69 |
|
70 my $res = $input{$top}; |
|
71 &d(9, "RESULT\n========\n${top} = $res" ); |
|
72 &d(9, "Pattern length=" . length($res) ); |
|
73 |
|
74 ### read file |
|
75 $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!"; |
|
76 my $ff; |
|
77 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; } |
|
78 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!"; |
|
79 |
|
80 while( <$f> ) { |
|
81 chomp; |
|
82 &d(5, "LINE:$_"); |
|
83 if( /$res/ ) { |
|
84 # there is a match, print parsed values |
|
85 print $fout "$_\n"; |
|
86 for my $k ( sort keys %+ ) { |
|
87 print $fout "\t$k => '$+{$k}'\n"; |
|
88 } |
|
89 } else { |
|
90 if( $fn_fail ) { |
|
91 print $ff "$_\n" if $fn_fail ; |
|
92 } else { |
|
93 print "$_\n"; |
|
94 print "\t-no match-\n"; |
|
95 } |
|
96 } |
|
97 } |
|
98 |
|
99 $fout->close; |
|
100 $f->close; |
|
101 $ff->close if $fn_fail; |
|
102 |
|
103 exit; |
|
104 |
|
105 |
|
106 ######################################################################################################### |
|
107 |
|
108 # fully resolve a macro line (recursively) |
|
109 sub resolve { |
|
110 my ($key, $caller_key) = @_; |
|
111 |
|
112 &d( 7, "Called resolve for $key from $caller_key"); |
|
113 |
|
114 # get macro data from %input |
|
115 my $data = $input{$key}; |
|
116 if( $processing{$key} ) { |
|
117 # already processing this key: looping |
|
118 die "Resolving loop in $key: already processing for $processing{$key}! Called from $caller_key."; |
|
119 } |
|
120 $processing{$key} = $caller_key; |
|
121 |
|
122 &d( 7, "Resolve_all_macros loop in $key"); |
|
123 # while we have unresolved macros, walk and resolve them all |
|
124 while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w\d]+))?\}/ ) { |
|
125 my $r_key = $+{k}; |
|
126 my $r_name = $+{v} || ""; # this is just informational here |
|
127 |
|
128 &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" ); |
|
129 |
|
130 # do we know the macro? |
|
131 if( defined( $input{ $r_key } ) ) { |
|
132 # resolve everything in the macro |
|
133 &resolve( $r_key, $key ); |
|
134 # replace fully resolved macro in $data |
|
135 &replace_all( \$data, $r_key ); |
|
136 |
|
137 } else { |
|
138 die "Unknown macro '$r_key'! Called from $caller_key."; |
|
139 } |
|
140 } |
|
141 |
|
142 # update macro in %input with fully resolved line |
|
143 $input{$key} = $data; |
|
144 &d(7, "RESOLVED '$key'! Result=/$data/"); |
|
145 |
|
146 # release loop protection for $key |
|
147 delete $processing{$key}; |
|
148 } |
|
149 |
|
150 |
|
151 ## replace all macros with replacement [macro-free] regexp |
|
152 sub replace_all { |
|
153 my ($data, $key) = @_; |
|
154 |
|
155 &d(7, "Replace_all $key in /$$data/"); |
|
156 # while we have this macro in $data somewhere, repeat... |
|
157 while( $$data =~ /\{${key}(:(?<v>[_\w\d]+))?\}/ ) { |
|
158 my $mname = $+{v}; |
|
159 my $inner = $input{$key}; # shall be already resolved, may be checked (FIXME) |
|
160 |
|
161 &d(7, " Found '$key'"); |
|
162 |
|
163 if( $mname ) { |
|
164 # named macro, create a named capture group |
|
165 $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g; |
|
166 &d(7, " Replace $key:$mname with named capture group /$inner/"); |
|
167 } else { |
|
168 # unnamed macro, we just throw it in (trust no1 and parenthesize it) |
|
169 $$data =~ s/%\{${key}\}/(?:${inner})/g; |
|
170 &d(7, " Replace $key with /$inner/"); |
|
171 } |
|
172 |
|
173 &d(9, " Replacement result is /$$data/"); |
|
174 } |
|
175 &d(7, "REPLACED ALL '$key' in /$$data/"); |
|
176 } |
|
177 |
|
178 |
|
179 # extremely low-key debug subsystem :-) |
|
180 sub d { |
|
181 my ($lvl, $s) = @_; |
|
182 return if $lvl > $DEBUG; |
|
183 print scalar(localtime) . " [$$] ($lvl) $s\n"; |
|
184 } |