view bins_edit @ 6:3021deef1599

Encoding related fixes * chomp encoding string * reformat else (seems someone hacked it in) * debug print encoding * do not use :utf8 binmode ("re-encode") on written file if the input is _already_ UTF-8
author Peter Gervai <grin@grin.hu>
date Thu, 16 Oct 2008 00:02:14 +0200
parents a84c32f131df
children 69923c621cfe
line wrap: on
line source

#!/usr/bin/perl -w

#    bins_edit for BINS Photo Album version 1.1.29
#    Copyright (C) 2001-2004 J?r?me Sautret (Jerome@Sautret.org)
#
#    $Id: bins_edit,v 1.21 2004/10/24 13:19:16 jerome Exp $
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; see the file COPYING.  If not, write to
#    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
#    Boston, MA 02111-1307, USA.

#    Type "bins_edit -h" on command line for usage information.

use strict;

use Getopt::Long;
use IO::File;
use UNIVERSAL qw(isa);

# XML parsing & writing
use XML::Grove;
use XML::Grove::Builder;
use XML::Grove::Path;
use XML::Grove::PerlSAX;
use XML::Parser::PerlSAX;
#use XML::Handler::XMLWriter;
use XML::Handler::YAWriter;
use Text::Iconv;
use HTML::Entities;

my $verbose = 1;
my $html=0;

my $localEncoding;
$localEncoding = `locale charmap`;
chomp $localEncoding;
if ($? != 0 ) {
  $localEncoding = "LATIN1";
} else {
  if (! $localEncoding or ($localEncoding eq "ANSI_X3.4-1968")) {
    chop($localEncoding);
    # ANSI is unspeakably primitive, promote it.
    $localEncoding = "LATIN1";
    print "Forcing encoding to $localEncoding\n" if ($verbose >=2);
  }
}
my $converter = Text::Iconv->new($localEncoding, "UTF-8");
print "Using $localEncoding encoding on input\n" if ($verbose >=2);

# decode HTML entites which doesn't exist in XML
sub decodeEntites{
  my $s = shift;

  my %entities = (
		 AElig	=> '?',	# capital AE diphthong (ligature)
		 Aacute	=> '?',	# capital A, acute accent
		 Acirc	=> '?',	# capital A, circumflex accent
		 Agrave	=> '?',	# capital A, grave accent
		 Aring	=> '?',	# capital A, ring
		 Atilde	=> '?',	# capital A, tilde
		 Auml	=> '?',	# capital A, dieresis or umlaut mark
		 Ccedil	=> '?',	# capital C, cedilla
		 ETH	=> '?',	# capital Eth, Icelandic
		 Eacute	=> '?',	# capital E, acute accent
		 Ecirc	=> '?',	# capital E, circumflex accent
		 Egrave	=> '?',	# capital E, grave accent
		 Euml	=> '?',	# capital E, dieresis or umlaut mark
		 Iacute	=> '?',	# capital I, acute accent
		 Icirc	=> '?',	# capital I, circumflex accent
		 Igrave	=> '?',	# capital I, grave accent
		 Iuml	=> '?',	# capital I, dieresis or umlaut mark
		 Ntilde	=> '?',	# capital N, tilde
		 Oacute	=> '?',	# capital O, acute accent
		 Ocirc	=> '?',	# capital O, circumflex accent
		 Ograve	=> '?',	# capital O, grave accent
		 Oslash	=> '?',	# capital O, slash
		 Otilde	=> '?',	# capital O, tilde
		 Ouml	=> '?',	# capital O, dieresis or umlaut mark
		 THORN	=> '?',	# capital THORN, Icelandic
		 Uacute	=> '?',	# capital U, acute accent
		 Ucirc	=> '?',	# capital U, circumflex accent
		 Ugrave	=> '?',	# capital U, grave accent
		 Uuml	=> '?',	# capital U, dieresis or umlaut mark
		 Yacute	=> '?',	# capital Y, acute accent
		 aacute	=> '?',	# small a, acute accent
		 acirc	=> '?',	# small a, circumflex accent
		 aelig	=> '?',	# small ae diphthong (ligature)
		 agrave	=> '?',	# small a, grave accent
		 aring	=> '?',	# small a, ring
		 atilde	=> '?',	# small a, tilde
		 auml	=> '?',	# small a, dieresis or umlaut mark
		 ccedil	=> '?',	# small c, cedilla
		 eacute	=> '?',	# small e, acute accent
		 ecirc	=> '?',	# small e, circumflex accent
		 egrave	=> '?',	# small e, grave accent
		 eth	=> '?',	# small eth, Icelandic
		 euml	=> '?',	# small e, dieresis or umlaut mark
		 iacute	=> '?',	# small i, acute accent
		 icirc	=> '?',	# small i, circumflex accent
		 igrave	=> '?',	# small i, grave accent
		 iuml	=> '?',	# small i, dieresis or umlaut mark
		 ntilde	=> '?',	# small n, tilde
		 oacute	=> '?',	# small o, acute accent
		 ocirc	=> '?',	# small o, circumflex accent
		 ograve	=> '?',	# small o, grave accent
		 oslash	=> '?',	# small o, slash
		 otilde	=> '?',	# small o, tilde
		 ouml	=> '?',	# small o, dieresis or umlaut mark
		 szlig	=> '?',	# small sharp s, German (sz ligature)
		 thorn	=> '?',	# small thorn, Icelandic
		 uacute	=> '?',	# small u, acute accent
		 ucirc	=> '?',	# small u, circumflex accent
		 ugrave	=> '?',	# small u, grave accent
		 uuml	=> '?',	# small u, dieresis or umlaut mark
		 yacute	=> '?',	# small y, acute accent
		 yuml	=> '?',	# small y, dieresis or umlaut mark

		 # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
		 copy   => '?',	# copyright sign
		 reg    => '?',	# registered sign
		 nbsp   => "\240", # non breaking space

		 # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
		 iexcl  => '?',
		 cent   => '?',
		 pound  => '?',
		 curren => '?',
		 yen    => '?',
		 brvbar => '?',
		 sect   => '?',
		 uml    => '?',
		 ordf   => '?',
		 laquo  => '?',
		 'not'   => '?', # not is a keyword in perl
		 shy    => '?',
		 macr   => '?',
		 deg    => '?',
		 plusmn => '?',
		 sup1   => '?',
		 sup2   => '?',
		 sup3   => '?',
		 acute  => '?',
		 micro  => '?',
		 para   => '?',
		 middot => '?',
		 cedil  => '?',
		 ordm   => '?',
		 raquo  => '?',
		 frac14 => '?',
		 frac12 => '?',
		 frac34 => '?',
		 iquest => '?',
		 'times' => '?', # times is a keyword in perl
		 divide => '?',
		);

  while (my($entity, $char) = each(%entities)) {
    $s =~ s/\&$entity\;/$char/g;
  }
  return $s;
}

sub charac_indent{
  my $n = shift(@_);
  my $s="\n";
  for (1..$n){
    $s .= "   ";
  }
  return XML::Grove::Characters->new ( Data => $s );
}

sub setField{
  my $field = shift(@_);  # field to add or modify
  my $value = shift(@_);  # value to set to field
  my $fileType  = shift(@_);  # type of file (iamge or album)
  my $document  = shift(@_);  # XML document as a Grove

  if (! $html) {
    $value = encode_entities($value, '\00-\31<&"');
  }

  my $characters =
    XML::Grove::Characters->new( Data =>
				 decodeEntites($value));
  #my $characters = XML::Grove::Characters->new ( Data => $value );

  my $fieldName;
  my $fieldValue;
  foreach my $element
    (@{$document->at_path('/'.$fileType.'/description')->{Contents}}) {
      if (isa($element, 'XML::Grove::Element') && $element->{Name} eq "field") {
	$fieldName = $element->{Attributes}{'name'};
	$fieldValue = "";
	if ($fieldName eq $field) {
	  print "  Modifying field '$fieldName' to '$value'... "
	    if ($verbose >= 3);
	  @{$element->{Contents}} = ( charac_indent(3),
				      $characters,
				      charac_indent(2));
	  print "OK.\n" if ($verbose >= 3);
	  return;
	}
      }
    }

  print "  Adding field '$field' with value '$value'... " if ($verbose >= 2);
  my $element = XML::Grove::Element->new ( Name => 'field',
					   Contents => [charac_indent(3),
							$characters,
							charac_indent(2)],
					   Attributes => {"name" => $field});
  push @{$document->at_path('/'.$fileType.'/description')->{Contents}},
    (charac_indent(2), $element, charac_indent(1));

  print "OK.\n" if ($verbose >= 2);
}

sub setFields{
  my $file   = shift(@_);
  my $fields = shift(@_);
  my $album  = shift(@_);  # type of file (0 if image or 1 if album)
  my $document;

  my $fileType;
  if ($album) {
    $fileType = "album";
  } else{
    $fileType = "image";
  }

  if (-e $file) {
    # Get XML document as a Grove
    print " Reading file '$file'... " if ($verbose >= 2);
    my $grove_builder = XML::Grove::Builder->new;
    my $parser = XML::Parser::PerlSAX->new ( Handler => $grove_builder );
    $document = $parser->parse ( Source => { SystemId => $file } );
    print "OK.\n" if ($verbose >= 2);
  } else {
    print " Creating file '$file'... " if ($verbose >= 2);
    my @elements;
    push @elements, (charac_indent(1),
		     XML::Grove::Element->new ( Name => 'description',
						Contents => [charac_indent(1)]),
		     charac_indent(1),
		     XML::Grove::Element->new ( Name => 'bins',
						Contents => [charac_indent(1)]),
		    );
    if (!$album) {
      push @elements, ( charac_indent(1),
			XML::Grove::Element->new ( Name => 'exif',
						   Contents =>
						   [charac_indent(1)]),
		      );
    }
    push @elements, charac_indent(0);
    my $element =
      XML::Grove::Element->new ( Name => $fileType,
				 Contents => \@elements);
    $document = XML::Grove::Document->new ( Contents => [ $element ] );
    print "OK.\n" if ($verbose >= 3);
  }

  my $fieldName;
  my $fieldValue;
  while ( ($fieldName, $fieldValue) = each(%$fields) ) {
    if (defined $fieldValue) {
      setField($fieldName, $fieldValue, $fileType, $document);
    }
  }

  print " Writing file '$file'... " if ($verbose >= 2);
  # Write the Grove to the desc file
  my $fileHandler = new IO::File;
  open($fileHandler, '>', $file)
    or die("Cannot open file $file to write Exif tag ($!)");
  if( $localEncoding !~ /utf-?8/i ) {
      # if input is UTF-8 do not re-convert it again
      binmode($fileHandler, ":utf8") if $^V ge v5.8.0;
  }

  my $my_handler = new XML::Handler::YAWriter( 'Output' => $fileHandler,
					 #      'Escape' => {
					#		    '--'  => '&mdash;',
							    #'&'   => '&amp;',
					#		   },
					       'Encoding' => "UTF-8",
					     );
#  my $my_handler = XML::Handler::XMLWriter->new( Output => $fileHandler,
#  						 Newlines => 0);
  $document->parse(DocumentHandler => $my_handler);
  close ($fileHandler) || bail ("can't close $file ($!)");
  print "OK.\n" if ($verbose >= 2);
}

sub copyleft{
print "\nbins_edit for BINS Photo Album 1.1.29 (http://bins.sautret.org/)\n";
print "Copyright ? 2001-2004 J?r?me Sautret (Jerome\@Sautret.org)\n";
print "This is free software with ABSOLUTELY NO WARRANTY.\n";
print "See COPYING file for details.\n\n";
}

sub usage{
  my $exit=shift; # should we exit after usage information ?
  copyleft();

  print <<EoF ;
bins_edit is a script to set fields in XML pictures description files for BINS.

usage:
bins_edit [-a|--album] [-m|--html]
          [-t|--title title] [-e|--event event] [-l|--location location]
          [-p|--people people] [-y|--date date] [-d|--description description]
	  [--longdesc longDescription] [--shortdesc shortDesription]
          [--sample pictureFileName]
          [-g|--generic tag=value]
          [-h|--help] [-v|--verbose] [-q|--quiet] file [files...]

EoF

  if ($exit){
    print "Type bins_edit --help for complete help.\n";
    exit ($exit);
  }
}


sub help{
  usage(0);
  print <<EoF ;
Options:
  -t, --title, -e, --event event, -l, --location,
  -p, --people, -y, --date, -d, --description :
                   these switchs are used to set a value to a picture 
                   desciption field.
  -t, --title, --longdesc, --shortdesc, --sample :
                   these switchs are used to set a value to an album
                   desciption field (with --album option)
  -a, --album :    edit album description.
                   (default is editing image description)
                   In this case, the file parameter must be the
                   source directory of the album.
                   Only the --title, --longdesc, --shortdesc and --sample
                   switchs have sense with this option.
  -m, --html :     input value will be considering as HTML code, thus,
                   no HTML encoding will be done.
  -v, --verbose :  this switch can appear several times to increase
                   verbosity level.
  -q, --quiet :    suppress output

If filenames have no .xml suffix, it is added, so you can directly give
picture names on the command line.
Spaces and other special characters (even newlines) can be used in values
given as parameters as long as they are enclosed between quotes.

Examples:
Set the title of the Image.jpg file to "My picture":
bins_edit -t "My picture" Image.jpg

Set the title and location of all JPEG pictures in the directory:
bins_edit --title Holiday --location Paris *.jpg

Use of HTML values:
bins_edit --html --description '<b>BINS</b> is cool' file.jpg

Set the title short description and sample image of the album 
in the current directory (note the dot as final parameter):
bins_edit -a -t "My Album" --sample image.jpg --shortdesc "This is my album" .

EoF

  exit 1;
}


sub main{
  my %values;
  my $album = 0;  # 1 if it a album description file


  # process args
  Getopt::Long::Configure("bundling");
  GetOptions('t|title:s'       => \$values{title},
	     'e|event:s'       => \$values{event},
	     'l|location:s'    => \$values{location},
	     'p|people:s'      => \$values{people},
	     'y|date:s'        => \$values{date},
	     'd|description:s' => \$values{description},
	     'longdesc:s'      => \$values{longdesc},
	     'shortdesc:s'     => \$values{shortdesc},
	     'sample:s'        => \$values{sampleimage},
	     'g|generic=s%'    => \%values,
	     'm|html'          => \$html,
	     'a|album'         => \$album,
	     'v|verbose+'      => \$verbose,
	     'q|quiet'         => sub { $verbose = 0 },
	     'h|help'          => sub { help() },
	     'copyright'       => sub { copyleft() },
	    )
    or usage(1);

  my @files;
  if ($#ARGV < 0) {
    if ($album) {
      @files = (".");
    } else {
      print "No files specified.\n";
      usage(1)
    }
  } else {
    @files = @ARGV;
  }

  copyleft() if ($verbose >=2);

  foreach my $file (@files) {
    if ($album) {
      $file .= "/album.xml";
    }
    if ($file !~ m/.xml$/) {
      $file .= ".xml";
    }
    print "Processing file '$file'... " if ($verbose >= 1);
    print "\n" if ($verbose >= 2);
    setFields($file, \%values, $album);
    print "OK.\n" if ($verbose == 1);
  }
}

main();