26 my $fn_fail; |
26 my $fn_fail; |
27 my $fn_parsed = "-"; # SDTOUT |
27 my $fn_parsed = "-"; # SDTOUT |
28 my %input; |
28 my %input; |
29 my %processing; |
29 my %processing; |
30 my $help = 0; |
30 my $help = 0; |
|
31 my $re_trace = 1; # check all regexp |
|
32 |
|
33 my %xlate; # perl names don't support period in name, so we need to translate |
31 |
34 |
32 GetOptions( |
35 GetOptions( |
33 "input=s" => \$fn_test, |
36 "input=s" => \$fn_test, |
34 "pattern=s" => \$fn_pattern, |
37 "pattern=s" => \@fn_patterns, |
35 "top=s" => \$top, |
38 "top=s" => \$top, |
36 "parsed=s" => \$fn_parsed, |
39 "parsed=s" => \$fn_parsed, |
37 "failed=s" => \$fn_fail, |
40 "failed=s" => \$fn_fail, |
38 "debug=i" => \$DEBUG, |
41 "debug=i" => \$DEBUG, |
39 "help" => \$help, |
42 "help" => \$help, |
40 ) or die "error in command line arguments"; |
43 ) or die "error in command line arguments"; |
41 |
44 |
42 if( $help ) { |
45 if( $help ) { |
43 my $VER = '$Id: perlgrok.pl,v bca12968c007 2021/09/29 20:31:56 grin $'; |
46 my $VER = '$Id: perlgrok.pl,v d6e64daafdc1 2021/09/29 23:46:59 grin $'; |
44 print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n"; |
47 print "$0 $VER (c) Peter 'grin' Gervai, 2021\n\n"; |
45 print "Usage: $0 [--input input_file] [--pattern pattern_file] [--top final_macro] \n"; |
48 print "Usage: $0 [--input input_file] [--pattern pattern_file] [--pattern another] [--top final_macro] \n"; |
46 print " [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n"; |
49 print " [--parsed parsed_outfile] [--failed failed_lines_file] [--debug n] [--help]\n"; |
47 print " defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n"; |
50 print " defaults are stdin, stdout, 'patterns' as patternfile and 'TEST' as final macro.\n\n"; |
48 exit; |
51 exit; |
49 } |
52 } |
50 |
53 |
|
54 if( $#fn_patterns < 0 ) { |
|
55 push @fn_patterns, "patterns"; |
|
56 } |
|
57 |
|
58 |
51 # read patterns |
59 # read patterns |
52 my $f = new IO::File "< $fn_pattern" or die "cannot open patterns: $!"; |
60 foreach my $fn_pattern (@fn_patterns) { |
53 while( <$f> ) { |
61 my $f = new IO::File "< $fn_pattern" or die "cannot open pattern $fn_pattern: $!"; |
54 chomp; |
62 &d(7, "Reading $fn_pattern pattern file contents"); |
55 # skip empty and comment lines |
63 while( <$f> ) { |
56 next if /^ *(#|$)/; |
64 chomp; |
57 |
65 # skip empty and comment lines |
58 # parse <name><space><content> |
66 next if /^ *(#|$)/; |
59 if( /^(\S+) +(.+)$/ ) { |
67 |
60 $input{$1} = $2; |
68 # parse <name><space><content> |
61 } else { |
69 if( /^(\S+) +(.+)$/ ) { |
62 die "Patternfile parse error:\n$_\n\n"; |
70 if( defined( $input{$1} ) ) { |
63 } |
71 &d(1, "Warning: duplicate macro $1 in $fn_pattern; overwrites previous version!"); |
64 } |
72 } |
65 $f->close; |
73 $input{$1} = $2; |
|
74 } else { |
|
75 die "Patternfile $fn_pattern parse error:\n$_\n\n"; |
|
76 } |
|
77 } |
|
78 $f->close; |
|
79 } |
66 |
80 |
67 &d(5, "Start program"); |
81 &d(5, "Start program"); |
68 &resolve( $top, '<root>' ); |
82 &resolve( $top, '<root>' ); |
69 |
83 |
70 my $res = $input{$top}; |
84 my $res = $input{$top}; |
71 &d(9, "RESULT\n========\n${top} = $res" ); |
85 &d(9, "RESULT\n========\n${top} = $res" ); |
72 &d(9, "Pattern length=" . length($res) ); |
86 &d(9, "Pattern length=" . length($res) ); |
73 |
87 |
|
88 eval { |
|
89 my $test = "abc123"; |
|
90 $test =~ /$res/; |
|
91 }; |
|
92 if( $@ ) { |
|
93 print "ERR: $@\n\n"; |
|
94 exit; |
|
95 } |
|
96 |
74 ### read file |
97 ### read file |
75 $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!"; |
98 my $f = new IO::File "< $fn_test" or die "cannot open $fn_test: $!"; |
76 my $ff; |
99 my $ff; |
77 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; } |
100 if( $fn_fail ) { $ff = new IO::File "> $fn_fail" or die "cannot open $fn_fail for writing: $!"; } |
78 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!"; |
101 my $fout = new IO::File "> $fn_parsed" or die "cannot open output for parsed: $!"; |
79 |
102 |
80 while( <$f> ) { |
103 while( <$f> ) { |
119 } |
142 } |
120 $processing{$key} = $caller_key; |
143 $processing{$key} = $caller_key; |
121 |
144 |
122 &d( 7, "Resolve_all_macros loop in $key"); |
145 &d( 7, "Resolve_all_macros loop in $key"); |
123 # while we have unresolved macros, walk and resolve them all |
146 # while we have unresolved macros, walk and resolve them all |
124 while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w\d]+))?\}/ ) { |
147 while( $data =~ /%\{(?<k>[A-Z_0-9]+)(:(?<v>[_\w.\d]+))?\}/ ) { |
125 my $r_key = $+{k}; |
148 my $r_key = $+{k}; |
126 my $r_name = $+{v} || ""; # this is just informational here |
149 my $r_name = $+{v} || ""; # this is just informational here |
127 |
150 |
128 &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" ); |
151 &d(7, "Resolve macro in $key for '$r_key' (name $r_name) in data /$data/" ); |
129 |
152 |
152 sub replace_all { |
180 sub replace_all { |
153 my ($data, $key) = @_; |
181 my ($data, $key) = @_; |
154 |
182 |
155 &d(7, "Replace_all $key in /$$data/"); |
183 &d(7, "Replace_all $key in /$$data/"); |
156 # while we have this macro in $data somewhere, repeat... |
184 # while we have this macro in $data somewhere, repeat... |
157 while( $$data =~ /\{${key}(:(?<v>[_\w\d]+))?\}/ ) { |
185 while( $$data =~ /\{${key}(:(?<v>[_\w.\d]+))?\}/ ) { |
158 my $mname = $+{v}; |
186 my $mname = $+{v}; |
159 my $inner = $input{$key}; # shall be already resolved, may be checked (FIXME) |
187 my $inner = $input{$key}; # shall be already resolved, may be checked (FIXME) |
160 |
188 |
161 &d(7, " Found '$key'"); |
189 &d(7, " Found '$key'"); |
162 |
190 |
|
191 my $xname = $mname; |
163 if( $mname ) { |
192 if( $mname ) { |
|
193 # if it's not perl compatible, create a translation entry |
|
194 if( $mname !~ /^[_A-Za-z][_A-Za-z0-9]*\z/ ) { |
|
195 $xname = &gen_xlate_entry($mname); |
|
196 } |
164 # named macro, create a named capture group |
197 # named macro, create a named capture group |
165 $$data =~ s/%\{${key}:${mname}\}/(?<${mname}>${inner})/g; |
198 $$data =~ s/%\{${key}:${mname}\}/(?<${xname}>${inner})/g; |
166 &d(7, " Replace $key:$mname with named capture group /$inner/"); |
199 &d(7, " Replace $key:$mname with named capture group /$inner/"); |
167 } else { |
200 } else { |
168 # unnamed macro, we just throw it in (trust no1 and parenthesize it) |
201 # unnamed macro, we just throw it in (trust no1 and parenthesize it) |
169 $$data =~ s/%\{${key}\}/(?:${inner})/g; |
202 $$data =~ s/%\{${key}\}/(?:${inner})/g; |
170 &d(7, " Replace $key with /$inner/"); |
203 &d(7, " Replace $key with /$inner/"); |
171 } |
204 } |
172 |
205 |
|
206 &sanitize_labels( $data ); |
|
207 |
173 &d(9, " Replacement result is /$$data/"); |
208 &d(9, " Replacement result is /$$data/"); |
174 } |
209 } |
175 &d(7, "REPLACED ALL '$key' in /$$data/"); |
210 &d(7, "REPLACED ALL '$key' in /$$data/"); |
|
211 } |
|
212 |
|
213 |
|
214 sub gen_xlate_entry { |
|
215 my ($mname) = @_; |
|
216 &d(7, "Generate a translation variable name for $mname"); |
|
217 my $xname = "xlate_" . int(rand(1_000_000)); |
|
218 my $count = 100_000; |
|
219 while( defined( $xlate{$xname} ) ) { |
|
220 $xname = "xlate_" . int(rand(1_000_000)); |
|
221 die "Cannot generate unique translation for $mname in a _lot_ of steps" if --$count<=0; |
|
222 } |
|
223 |
|
224 &d(7, "Generated $xname as the name for $mname"); |
|
225 $xlate{$xname} = $mname; |
|
226 return $xname; |
|
227 } |
|
228 |
|
229 |
|
230 # find ?<names> which perl incompatible, and xlate them forcibly |
|
231 sub sanitize_labels { |
|
232 my ($data) = @_; |
|
233 |
|
234 if( $$data =~ /(?<!\\)\((?!\\)\?<([_A-Za-z][_A-Za-z0-9]*[^>_A-Za-z0-9])/ ) { |
|
235 &d(7, "Irregular name in $$data, create translation"); |
|
236 # we have at least one irregular, get all labels |
|
237 while( $$data =~ /(?<!\\)\((?!\\)\?<(.+?)>/g ) { |
|
238 my $label = $1; |
|
239 &d(9, "Got label $label (***BEFORE $` ##<->## $' AFTER***)"); |
|
240 if( $label !~ /^[_A-Za-z][_A-Za-z0-9]*\z/ ) { |
|
241 # not perl compatible, translate and replace |
|
242 my $xname = &gen_xlate_entry($label); |
|
243 $$data =~ s/(?<!\\)\((?!\\)\?<${label}>/(?<${xname}>/g; |
|
244 &d(5, "ForceReplaced label '$label' with '$xname' in /$$data/"); |
|
245 } |
|
246 } |
|
247 } |
|
248 } |
|
249 |
|
250 |
|
251 sub unxlate { |
|
252 my ($xkey) = @_; |
|
253 return $xlate{$xkey}; |
176 } |
254 } |
177 |
255 |
178 |
256 |
179 # extremely low-key debug subsystem :-) |
257 # extremely low-key debug subsystem :-) |
180 sub d { |
258 sub d { |