view misc/fetch-testmsg-whitelist.in @ 5:0a7a5940ee3a

Change description per license
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 15:03:24 +0100 (2009-03-10)
parents c7f6b056b673
children
line wrap: on
line source
#!/bin/sh

# Fetch a list of "empty" mail messages for whitelisting.  Many free mail
#   service providers add HTML or other text to mail.  That causes empty
#   and nearly empty mail messages to have valid DCC checksums and not be
#   ignored by DCC clients.

# The fetched file can be included in whiteclnt files.  For example, the
#   following line in @prefix@whiteclnt would whitelist many common
#   empty messages
#   include @prefix@/testmsg-whitelist

# By default the script fetches http://www.iecc.com/dcc-testmsg-whitelist.txt
#   to @prefix@/testmsg-whitelist

# The script should be run at most once a day.



# Rhyolite Software DCC 1.3.103-1.31 $Revision$
# @configure_input@

exec </dev/null

HTTP_REFERER=DCC-1.3.103-script; export HTTP_REFERER

DCC_HOMEDIR=@prefix@
URL=http://www.iecc.com/dcc-testmsg-whitelist.txt
TGT=testmsg-whitelist
LOG=$TGT.log
PGM=@FETCH_CMD@
FORCE=

USAGE="`basename $0`: [-xf] [-h homedir] [-p fetch-pgm] [-s src-URL]"
while getopts "xfh:p:s:" c; do
    case $c in
	x) set -x;;
	f) FORCE=yes;;
	h) DCC_HOMEDIR="$OPTARG";;
	p) PGM="$OPTARG";;
	s) URL="$OPTARG";;
	*) echo 1>&2 "$USAGE"; exit 1;;
    esac
done
shift `expr $OPTIND - 1 || true`
if test "$#" -ne 0; then
    echo 1>&2 "$USAGE"
    exit 1
fi


FNAME=`expr "$URL" : '.*/\([^/][^/]*\)'`
if test -z "$FNAME"; then
    FNAME="$URL"
    if test -z "$FNAME"; then
	echo 1>&2 "source file not specified"
	exit 1
    fi
fi

cd $DCC_HOMEDIR

# don't bother if the file exists and is no more than 2 weeks old
if test -f "$TGT" -a -z "$FORCE"; then
    if test "`find $TGT -mtime -7 -type f`"; then
	date "+%n%x %X: $TGT is too recent to fetch again" >>$LOG
	exit 0
    fi
fi

# Delay for an arbitrary, somewhat random number of seconds to try to spread
#   the load on the HTTP server for the list. Some versions of cksum yield
#   10 digit numbers that some versions of expr think are negative.
RND=`ps | cksum | sed -e 's/  */ + /' -e 's/\([0-9]\{6\}\)\([0-9]\)/\1 + \2/g'`
RND=`expr \( $RND \) % 123`
sleep $RND

# use fetch, wget, curl, or ftp that understands URLs
rm -f $FNAME
PGM_B=`basename $PGM`
if test "$PGM_B" = wget; then
    PGM_B=
    # Do not use --mirror because -r results in a 0 exit status
    #   even on failure.
    # Do not use --no-remove-listing, -nr, or --dont-remove-listing
    #   because none of them are supported by all versions of wget.
    # At least some versions of wget exit with 0 after having done
    #   nothing but emitting a usage message.
    if $PGM -nd --retr-symlinks -N --no-host-directories		\
	    --passive-ftp @FETCH_WGET_OPTS@ $URL >$LOG 2>&1; then
	if test -s $FNAME; then
	    if test -n "`sed -n -e 2q					\
		    -e 's/.*DOCTYPE.*/HTML/p'				\
		    -e 's/<HEAD>/HTML/p' -e 's/<head>/HTML/p' $FNAME`"; then
		rm $FNAME
	    fi
	fi
    fi
fi

if test "$PGM_B" = fetch; then
    PGM_B=
    $PGM -p -q -m $URL >$LOG 2>&1
fi

if test "$PGM_B" = curl; then
    PGM_B=
    $PGM -s -S --connect-timeout 30 --max-time 600 \
	@FETCH_CURL_OPTS@ $URL -o $FNAME >$LOG 2>&1
    # --fail does not work on at least some versions of curl
    if test -s $FNAME; then
	if test -n "`sed -n -e 2q					\
		    -e 's/.*DOCTYPE.*/HTML/p'				\
		    -e 's/<HEAD>/HTML/p' -e 's/<head>/HTML/p' $FNAME`"; then
	    rm $FNAME
	fi
    fi
fi

if test "$PGM_B" = ftp; then
    PGM_B=
    $PGM -p $URL  >$LOG 2>&1
    # if that did not work, try ancient anonymous FTP
    if test ! -s $FNAME; then
	HOST=`expr "$URL" : "ftp://\([^/]*\)/"`
	RFILE=`expr "$URL" : "ftp://[^/]*/\(.*\)"`
	echo "try old anonymous FTP"
	(echo "user anonymous `hostname`"; echo "get $RFILE $FNAME")	\
	    | ftp -n $HOST
    fi
    # some versions of ftp like to leave empty files
    if test ! -s $FNAME; then
	rm -f $FNAME
    fi
fi
# if some other program was specified, use it and hope it is simple enough
if test -n "$PGM_B"; then
    $PGM $URL
fi

if test ! -s "$FNAME"; then
    echo 1>&2 "failed to fetch $FNAME with $PGM"
    exit 1
fi

# work around wget timestamping
#   We use the mtime of the file to reduce fetching of the file.
#   We use `wget -N` to prevent .1 files
touch $FNAME

if test "$FNAME" != "$TGT"; then
    mv -f "$FNAME" "$TGT"
fi

date "+%n%x %X: fetched $TGT" >>$LOG