Mercurial > bins
annotate bins_edit @ 9:0f248ad86f9f 1.1.29.e2
bins_edit: try to get encoding stuff right, move it into main
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Thu, 16 Oct 2008 00:11:14 +0200 |
parents | c28af937b9bd |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/perl -w |
2 | |
3 # bins_edit for BINS Photo Album version 1.1.29 | |
4 # Copyright (C) 2001-2004 J?r?me Sautret (Jerome@Sautret.org) | |
5 # | |
6 # $Id: bins_edit,v 1.21 2004/10/24 13:19:16 jerome Exp $ | |
7 # | |
8 # This program is free software; you can redistribute it and/or modify | |
9 # it under the terms of the GNU General Public License as published by | |
10 # the Free Software Foundation; either version 2 of the License, or | |
11 # (at your option) any later version. | |
12 # | |
13 # This program is distributed in the hope that it will be useful, | |
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 # GNU General Public License for more details. | |
17 # | |
18 # You should have received a copy of the GNU General Public License | |
19 # along with this program; see the file COPYING. If not, write to | |
20 # the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 # Boston, MA 02111-1307, USA. | |
22 | |
23 # Type "bins_edit -h" on command line for usage information. | |
24 | |
25 use strict; | |
26 | |
27 use Getopt::Long; | |
28 use IO::File; | |
29 use UNIVERSAL qw(isa); | |
30 | |
31 # XML parsing & writing | |
32 use XML::Grove; | |
33 use XML::Grove::Builder; | |
34 use XML::Grove::Path; | |
35 use XML::Grove::PerlSAX; | |
36 use XML::Parser::PerlSAX; | |
37 #use XML::Handler::XMLWriter; | |
38 use XML::Handler::YAWriter; | |
39 use Text::Iconv; | |
40 use HTML::Entities; | |
41 | |
42 my $verbose = 1; | |
43 my $html=0; | |
44 | |
45 my $localEncoding; | |
46 | |
47 # decode HTML entites which doesn't exist in XML | |
48 sub decodeEntites{ | |
49 my $s = shift; | |
50 | |
51 my %entities = ( | |
52 AElig => '?', # capital AE diphthong (ligature) | |
53 Aacute => '?', # capital A, acute accent | |
54 Acirc => '?', # capital A, circumflex accent | |
55 Agrave => '?', # capital A, grave accent | |
56 Aring => '?', # capital A, ring | |
57 Atilde => '?', # capital A, tilde | |
58 Auml => '?', # capital A, dieresis or umlaut mark | |
59 Ccedil => '?', # capital C, cedilla | |
60 ETH => '?', # capital Eth, Icelandic | |
61 Eacute => '?', # capital E, acute accent | |
62 Ecirc => '?', # capital E, circumflex accent | |
63 Egrave => '?', # capital E, grave accent | |
64 Euml => '?', # capital E, dieresis or umlaut mark | |
65 Iacute => '?', # capital I, acute accent | |
66 Icirc => '?', # capital I, circumflex accent | |
67 Igrave => '?', # capital I, grave accent | |
68 Iuml => '?', # capital I, dieresis or umlaut mark | |
69 Ntilde => '?', # capital N, tilde | |
70 Oacute => '?', # capital O, acute accent | |
71 Ocirc => '?', # capital O, circumflex accent | |
72 Ograve => '?', # capital O, grave accent | |
73 Oslash => '?', # capital O, slash | |
74 Otilde => '?', # capital O, tilde | |
75 Ouml => '?', # capital O, dieresis or umlaut mark | |
76 THORN => '?', # capital THORN, Icelandic | |
77 Uacute => '?', # capital U, acute accent | |
78 Ucirc => '?', # capital U, circumflex accent | |
79 Ugrave => '?', # capital U, grave accent | |
80 Uuml => '?', # capital U, dieresis or umlaut mark | |
81 Yacute => '?', # capital Y, acute accent | |
82 aacute => '?', # small a, acute accent | |
83 acirc => '?', # small a, circumflex accent | |
84 aelig => '?', # small ae diphthong (ligature) | |
85 agrave => '?', # small a, grave accent | |
86 aring => '?', # small a, ring | |
87 atilde => '?', # small a, tilde | |
88 auml => '?', # small a, dieresis or umlaut mark | |
89 ccedil => '?', # small c, cedilla | |
90 eacute => '?', # small e, acute accent | |
91 ecirc => '?', # small e, circumflex accent | |
92 egrave => '?', # small e, grave accent | |
93 eth => '?', # small eth, Icelandic | |
94 euml => '?', # small e, dieresis or umlaut mark | |
95 iacute => '?', # small i, acute accent | |
96 icirc => '?', # small i, circumflex accent | |
97 igrave => '?', # small i, grave accent | |
98 iuml => '?', # small i, dieresis or umlaut mark | |
99 ntilde => '?', # small n, tilde | |
100 oacute => '?', # small o, acute accent | |
101 ocirc => '?', # small o, circumflex accent | |
102 ograve => '?', # small o, grave accent | |
103 oslash => '?', # small o, slash | |
104 otilde => '?', # small o, tilde | |
105 ouml => '?', # small o, dieresis or umlaut mark | |
106 szlig => '?', # small sharp s, German (sz ligature) | |
107 thorn => '?', # small thorn, Icelandic | |
108 uacute => '?', # small u, acute accent | |
109 ucirc => '?', # small u, circumflex accent | |
110 ugrave => '?', # small u, grave accent | |
111 uuml => '?', # small u, dieresis or umlaut mark | |
112 yacute => '?', # small y, acute accent | |
113 yuml => '?', # small y, dieresis or umlaut mark | |
114 | |
115 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) | |
116 copy => '?', # copyright sign | |
117 reg => '?', # registered sign | |
118 nbsp => "\240", # non breaking space | |
119 | |
120 # Additional ISO-8859/1 entities listed in rfc1866 (section 14) | |
121 iexcl => '?', | |
122 cent => '?', | |
123 pound => '?', | |
124 curren => '?', | |
125 yen => '?', | |
126 brvbar => '?', | |
127 sect => '?', | |
128 uml => '?', | |
129 ordf => '?', | |
130 laquo => '?', | |
131 'not' => '?', # not is a keyword in perl | |
132 shy => '?', | |
133 macr => '?', | |
134 deg => '?', | |
135 plusmn => '?', | |
136 sup1 => '?', | |
137 sup2 => '?', | |
138 sup3 => '?', | |
139 acute => '?', | |
140 micro => '?', | |
141 para => '?', | |
142 middot => '?', | |
143 cedil => '?', | |
144 ordm => '?', | |
145 raquo => '?', | |
146 frac14 => '?', | |
147 frac12 => '?', | |
148 frac34 => '?', | |
149 iquest => '?', | |
150 'times' => '?', # times is a keyword in perl | |
151 divide => '?', | |
152 ); | |
153 | |
154 while (my($entity, $char) = each(%entities)) { | |
155 $s =~ s/\&$entity\;/$char/g; | |
156 } | |
157 return $s; | |
158 } | |
159 | |
160 sub charac_indent{ | |
161 my $n = shift(@_); | |
162 my $s="\n"; | |
163 for (1..$n){ | |
164 $s .= " "; | |
165 } | |
166 return XML::Grove::Characters->new ( Data => $s ); | |
167 } | |
168 | |
169 sub setField{ | |
170 my $field = shift(@_); # field to add or modify | |
171 my $value = shift(@_); # value to set to field | |
172 my $fileType = shift(@_); # type of file (iamge or album) | |
173 my $document = shift(@_); # XML document as a Grove | |
174 | |
175 if (! $html) { | |
176 $value = encode_entities($value, '\00-\31<&"'); | |
177 } | |
178 | |
179 my $characters = | |
180 XML::Grove::Characters->new( Data => | |
181 decodeEntites($value)); | |
182 #my $characters = XML::Grove::Characters->new ( Data => $value ); | |
183 | |
184 my $fieldName; | |
185 my $fieldValue; | |
186 foreach my $element | |
187 (@{$document->at_path('/'.$fileType.'/description')->{Contents}}) { | |
188 if (isa($element, 'XML::Grove::Element') && $element->{Name} eq "field") { | |
189 $fieldName = $element->{Attributes}{'name'}; | |
190 $fieldValue = ""; | |
191 if ($fieldName eq $field) { | |
192 print " Modifying field '$fieldName' to '$value'... " | |
193 if ($verbose >= 3); | |
194 @{$element->{Contents}} = ( charac_indent(3), | |
195 $characters, | |
196 charac_indent(2)); | |
197 print "OK.\n" if ($verbose >= 3); | |
198 return; | |
199 } | |
200 } | |
201 } | |
202 | |
203 print " Adding field '$field' with value '$value'... " if ($verbose >= 2); | |
204 my $element = XML::Grove::Element->new ( Name => 'field', | |
205 Contents => [charac_indent(3), | |
206 $characters, | |
207 charac_indent(2)], | |
208 Attributes => {"name" => $field}); | |
209 push @{$document->at_path('/'.$fileType.'/description')->{Contents}}, | |
210 (charac_indent(2), $element, charac_indent(1)); | |
211 | |
212 print "OK.\n" if ($verbose >= 2); | |
213 } | |
214 | |
215 sub setFields{ | |
216 my $file = shift(@_); | |
217 my $fields = shift(@_); | |
218 my $album = shift(@_); # type of file (0 if image or 1 if album) | |
219 my $document; | |
220 | |
221 my $fileType; | |
222 if ($album) { | |
223 $fileType = "album"; | |
224 } else{ | |
225 $fileType = "image"; | |
226 } | |
227 | |
228 if (-e $file) { | |
229 # Get XML document as a Grove | |
230 print " Reading file '$file'... " if ($verbose >= 2); | |
231 my $grove_builder = XML::Grove::Builder->new; | |
232 my $parser = XML::Parser::PerlSAX->new ( Handler => $grove_builder ); | |
233 $document = $parser->parse ( Source => { SystemId => $file } ); | |
234 print "OK.\n" if ($verbose >= 2); | |
235 } else { | |
236 print " Creating file '$file'... " if ($verbose >= 2); | |
237 my @elements; | |
238 push @elements, (charac_indent(1), | |
239 XML::Grove::Element->new ( Name => 'description', | |
240 Contents => [charac_indent(1)]), | |
241 charac_indent(1), | |
242 XML::Grove::Element->new ( Name => 'bins', | |
243 Contents => [charac_indent(1)]), | |
244 ); | |
245 if (!$album) { | |
246 push @elements, ( charac_indent(1), | |
247 XML::Grove::Element->new ( Name => 'exif', | |
248 Contents => | |
249 [charac_indent(1)]), | |
250 ); | |
251 } | |
252 push @elements, charac_indent(0); | |
253 my $element = | |
254 XML::Grove::Element->new ( Name => $fileType, | |
255 Contents => \@elements); | |
256 $document = XML::Grove::Document->new ( Contents => [ $element ] ); | |
257 print "OK.\n" if ($verbose >= 3); | |
258 } | |
259 | |
260 my $fieldName; | |
261 my $fieldValue; | |
262 while ( ($fieldName, $fieldValue) = each(%$fields) ) { | |
263 if (defined $fieldValue) { | |
264 setField($fieldName, $fieldValue, $fileType, $document); | |
265 } | |
266 } | |
267 | |
268 print " Writing file '$file'... " if ($verbose >= 2); | |
269 # Write the Grove to the desc file | |
270 my $fileHandler = new IO::File; | |
271 open($fileHandler, '>', $file) | |
272 or die("Cannot open file $file to write Exif tag ($!)"); | |
6 | 273 if( $localEncoding !~ /utf-?8/i ) { |
274 # if input is UTF-8 do not re-convert it again | |
275 binmode($fileHandler, ":utf8") if $^V ge v5.8.0; | |
276 } | |
0 | 277 |
278 my $my_handler = new XML::Handler::YAWriter( 'Output' => $fileHandler, | |
279 # 'Escape' => { | |
280 # '--' => '—', | |
281 #'&' => '&', | |
282 # }, | |
283 'Encoding' => "UTF-8", | |
284 ); | |
285 # my $my_handler = XML::Handler::XMLWriter->new( Output => $fileHandler, | |
286 # Newlines => 0); | |
287 $document->parse(DocumentHandler => $my_handler); | |
288 close ($fileHandler) || bail ("can't close $file ($!)"); | |
289 print "OK.\n" if ($verbose >= 2); | |
290 } | |
291 | |
292 sub copyleft{ | |
293 print "\nbins_edit for BINS Photo Album 1.1.29 (http://bins.sautret.org/)\n"; | |
294 print "Copyright ? 2001-2004 J?r?me Sautret (Jerome\@Sautret.org)\n"; | |
295 print "This is free software with ABSOLUTELY NO WARRANTY.\n"; | |
296 print "See COPYING file for details.\n\n"; | |
297 } | |
298 | |
299 sub usage{ | |
300 my $exit=shift; # should we exit after usage information ? | |
301 copyleft(); | |
302 | |
303 print <<EoF ; | |
304 bins_edit is a script to set fields in XML pictures description files for BINS. | |
305 | |
306 usage: | |
307 bins_edit [-a|--album] [-m|--html] | |
308 [-t|--title title] [-e|--event event] [-l|--location location] | |
309 [-p|--people people] [-y|--date date] [-d|--description description] | |
310 [--longdesc longDescription] [--shortdesc shortDesription] | |
311 [--sample pictureFileName] | |
312 [-g|--generic tag=value] | |
313 [-h|--help] [-v|--verbose] [-q|--quiet] file [files...] | |
314 | |
315 EoF | |
316 | |
317 if ($exit){ | |
318 print "Type bins_edit --help for complete help.\n"; | |
319 exit ($exit); | |
320 } | |
321 } | |
322 | |
323 | |
324 sub help{ | |
325 usage(0); | |
326 print <<EoF ; | |
327 Options: | |
328 -t, --title, -e, --event event, -l, --location, | |
329 -p, --people, -y, --date, -d, --description : | |
330 these switchs are used to set a value to a picture | |
331 desciption field. | |
332 -t, --title, --longdesc, --shortdesc, --sample : | |
333 these switchs are used to set a value to an album | |
334 desciption field (with --album option) | |
335 -a, --album : edit album description. | |
336 (default is editing image description) | |
337 In this case, the file parameter must be the | |
338 source directory of the album. | |
339 Only the --title, --longdesc, --shortdesc and --sample | |
340 switchs have sense with this option. | |
341 -m, --html : input value will be considering as HTML code, thus, | |
342 no HTML encoding will be done. | |
343 -v, --verbose : this switch can appear several times to increase | |
344 verbosity level. | |
345 -q, --quiet : suppress output | |
346 | |
347 If filenames have no .xml suffix, it is added, so you can directly give | |
348 picture names on the command line. | |
349 Spaces and other special characters (even newlines) can be used in values | |
350 given as parameters as long as they are enclosed between quotes. | |
351 | |
352 Examples: | |
353 Set the title of the Image.jpg file to "My picture": | |
354 bins_edit -t "My picture" Image.jpg | |
355 | |
356 Set the title and location of all JPEG pictures in the directory: | |
357 bins_edit --title Holiday --location Paris *.jpg | |
358 | |
359 Use of HTML values: | |
360 bins_edit --html --description '<b>BINS</b> is cool' file.jpg | |
361 | |
362 Set the title short description and sample image of the album | |
363 in the current directory (note the dot as final parameter): | |
364 bins_edit -a -t "My Album" --sample image.jpg --shortdesc "This is my album" . | |
365 | |
366 EoF | |
367 | |
368 exit 1; | |
369 } | |
370 | |
371 | |
9
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
372 sub determine_encoding { |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
373 $localEncoding = `locale charmap`; |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
374 chomp $localEncoding; |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
375 if ($? != 0 ) { |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
376 $localEncoding = "LATIN1"; |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
377 } else { |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
378 if (! $localEncoding or ($localEncoding eq "ANSI_X3.4-1968")) { |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
379 chop($localEncoding); |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
380 # ANSI is unspeakably primitive, promote it. |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
381 $localEncoding = "LATIN1"; |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
382 print "Forcing encoding to $localEncoding\n" if ($verbose >=2); |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
383 } |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
384 } |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
385 my $converter = Text::Iconv->new($localEncoding, "UTF-8"); |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
386 print "Using $localEncoding encoding on input\n" if ($verbose >=2); |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
387 } |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
388 |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
389 |
0 | 390 sub main{ |
391 my %values; | |
392 my $album = 0; # 1 if it a album description file | |
393 | |
394 | |
395 # process args | |
396 Getopt::Long::Configure("bundling"); | |
397 GetOptions('t|title:s' => \$values{title}, | |
398 'e|event:s' => \$values{event}, | |
399 'l|location:s' => \$values{location}, | |
400 'p|people:s' => \$values{people}, | |
401 'y|date:s' => \$values{date}, | |
402 'd|description:s' => \$values{description}, | |
403 'longdesc:s' => \$values{longdesc}, | |
404 'shortdesc:s' => \$values{shortdesc}, | |
405 'sample:s' => \$values{sampleimage}, | |
406 'g|generic=s%' => \%values, | |
407 'm|html' => \$html, | |
408 'a|album' => \$album, | |
409 'v|verbose+' => \$verbose, | |
410 'q|quiet' => sub { $verbose = 0 }, | |
411 'h|help' => sub { help() }, | |
412 'copyright' => sub { copyleft() }, | |
413 ) | |
414 or usage(1); | |
7 | 415 |
8 | 416 print "Verbosity is $verbose\n" if $verbose>1; |
7 | 417 |
9
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
418 &determine_encoding; |
0f248ad86f9f
bins_edit: try to get encoding stuff right, move it into main
Peter Gervai <grin@grin.hu>
parents:
8
diff
changeset
|
419 |
0 | 420 my @files; |
421 if ($#ARGV < 0) { | |
422 if ($album) { | |
423 @files = ("."); | |
424 } else { | |
425 print "No files specified.\n"; | |
426 usage(1) | |
427 } | |
428 } else { | |
429 @files = @ARGV; | |
430 } | |
431 | |
432 copyleft() if ($verbose >=2); | |
433 | |
434 foreach my $file (@files) { | |
435 if ($album) { | |
436 $file .= "/album.xml"; | |
437 } | |
438 if ($file !~ m/.xml$/) { | |
439 $file .= ".xml"; | |
440 } | |
441 print "Processing file '$file'... " if ($verbose >= 1); | |
442 print "\n" if ($verbose >= 2); | |
443 setFields($file, \%values, $album); | |
444 print "OK.\n" if ($verbose == 1); | |
445 } | |
446 } | |
447 | |
448 main(); |