Mercurial > notdcc
comparison misc/fetch-testmsg-whitelist.in @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7f6b056b673 |
---|---|
1 #!/bin/sh | |
2 | |
3 # Fetch a list of "empty" mail messages for whitelisting. Many free mail | |
4 # service providers add HTML or other text to mail. That causes empty | |
5 # and nearly empty mail messages to have valid DCC checksums and not be | |
6 # ignored by DCC clients. | |
7 | |
8 # The fetched file can be included in whiteclnt files. For example, the | |
9 # following line in @prefix@whiteclnt would whitelist many common | |
10 # empty messages | |
11 # include @prefix@/testmsg-whitelist | |
12 | |
13 # By default the script fetches http://www.iecc.com/dcc-testmsg-whitelist.txt | |
14 # to @prefix@/testmsg-whitelist | |
15 | |
16 # The script should be run at most once a day. | |
17 | |
18 | |
19 | |
20 # Rhyolite Software DCC 1.3.103-1.31 $Revision$ | |
21 # @configure_input@ | |
22 | |
23 exec </dev/null | |
24 | |
25 HTTP_REFERER=DCC-1.3.103-script; export HTTP_REFERER | |
26 | |
27 DCC_HOMEDIR=@prefix@ | |
28 URL=http://www.iecc.com/dcc-testmsg-whitelist.txt | |
29 TGT=testmsg-whitelist | |
30 LOG=$TGT.log | |
31 PGM=@FETCH_CMD@ | |
32 FORCE= | |
33 | |
34 USAGE="`basename $0`: [-xf] [-h homedir] [-p fetch-pgm] [-s src-URL]" | |
35 while getopts "xfh:p:s:" c; do | |
36 case $c in | |
37 x) set -x;; | |
38 f) FORCE=yes;; | |
39 h) DCC_HOMEDIR="$OPTARG";; | |
40 p) PGM="$OPTARG";; | |
41 s) URL="$OPTARG";; | |
42 *) echo 1>&2 "$USAGE"; exit 1;; | |
43 esac | |
44 done | |
45 shift `expr $OPTIND - 1 || true` | |
46 if test "$#" -ne 0; then | |
47 echo 1>&2 "$USAGE" | |
48 exit 1 | |
49 fi | |
50 | |
51 | |
52 FNAME=`expr "$URL" : '.*/\([^/][^/]*\)'` | |
53 if test -z "$FNAME"; then | |
54 FNAME="$URL" | |
55 if test -z "$FNAME"; then | |
56 echo 1>&2 "source file not specified" | |
57 exit 1 | |
58 fi | |
59 fi | |
60 | |
61 cd $DCC_HOMEDIR | |
62 | |
63 # don't bother if the file exists and is no more than 2 weeks old | |
64 if test -f "$TGT" -a -z "$FORCE"; then | |
65 if test "`find $TGT -mtime -7 -type f`"; then | |
66 date "+%n%x %X: $TGT is too recent to fetch again" >>$LOG | |
67 exit 0 | |
68 fi | |
69 fi | |
70 | |
71 # Delay for an arbitrary, somewhat random number of seconds to try to spread | |
72 # the load on the HTTP server for the list. Some versions of cksum yield | |
73 # 10 digit numbers that some versions of expr think are negative. | |
74 RND=`ps | cksum | sed -e 's/ */ + /' -e 's/\([0-9]\{6\}\)\([0-9]\)/\1 + \2/g'` | |
75 RND=`expr \( $RND \) % 123` | |
76 sleep $RND | |
77 | |
78 # use fetch, wget, curl, or ftp that understands URLs | |
79 rm -f $FNAME | |
80 PGM_B=`basename $PGM` | |
81 if test "$PGM_B" = wget; then | |
82 PGM_B= | |
83 # Do not use --mirror because -r results in a 0 exit status | |
84 # even on failure. | |
85 # Do not use --no-remove-listing, -nr, or --dont-remove-listing | |
86 # because none of them are supported by all versions of wget. | |
87 # At least some versions of wget exit with 0 after having done | |
88 # nothing but emitting a usage message. | |
89 if $PGM -nd --retr-symlinks -N --no-host-directories \ | |
90 --passive-ftp @FETCH_WGET_OPTS@ $URL >$LOG 2>&1; then | |
91 if test -s $FNAME; then | |
92 if test -n "`sed -n -e 2q \ | |
93 -e 's/.*DOCTYPE.*/HTML/p' \ | |
94 -e 's/<HEAD>/HTML/p' -e 's/<head>/HTML/p' $FNAME`"; then | |
95 rm $FNAME | |
96 fi | |
97 fi | |
98 fi | |
99 fi | |
100 | |
101 if test "$PGM_B" = fetch; then | |
102 PGM_B= | |
103 $PGM -p -q -m $URL >$LOG 2>&1 | |
104 fi | |
105 | |
106 if test "$PGM_B" = curl; then | |
107 PGM_B= | |
108 $PGM -s -S --connect-timeout 30 --max-time 600 \ | |
109 @FETCH_CURL_OPTS@ $URL -o $FNAME >$LOG 2>&1 | |
110 # --fail does not work on at least some versions of curl | |
111 if test -s $FNAME; then | |
112 if test -n "`sed -n -e 2q \ | |
113 -e 's/.*DOCTYPE.*/HTML/p' \ | |
114 -e 's/<HEAD>/HTML/p' -e 's/<head>/HTML/p' $FNAME`"; then | |
115 rm $FNAME | |
116 fi | |
117 fi | |
118 fi | |
119 | |
120 if test "$PGM_B" = ftp; then | |
121 PGM_B= | |
122 $PGM -p $URL >$LOG 2>&1 | |
123 # if that did not work, try ancient anonymous FTP | |
124 if test ! -s $FNAME; then | |
125 HOST=`expr "$URL" : "ftp://\([^/]*\)/"` | |
126 RFILE=`expr "$URL" : "ftp://[^/]*/\(.*\)"` | |
127 echo "try old anonymous FTP" | |
128 (echo "user anonymous `hostname`"; echo "get $RFILE $FNAME") \ | |
129 | ftp -n $HOST | |
130 fi | |
131 # some versions of ftp like to leave empty files | |
132 if test ! -s $FNAME; then | |
133 rm -f $FNAME | |
134 fi | |
135 fi | |
136 # if some other program was specified, use it and hope it is simple enough | |
137 if test -n "$PGM_B"; then | |
138 $PGM $URL | |
139 fi | |
140 | |
141 if test ! -s "$FNAME"; then | |
142 echo 1>&2 "failed to fetch $FNAME with $PGM" | |
143 exit 1 | |
144 fi | |
145 | |
146 # work around wget timestamping | |
147 # We use the mtime of the file to reduce fetching of the file. | |
148 # We use `wget -N` to prevent .1 files | |
149 touch $FNAME | |
150 | |
151 if test "$FNAME" != "$TGT"; then | |
152 mv -f "$FNAME" "$TGT" | |
153 fi | |
154 | |
155 date "+%n%x %X: fetched $TGT" >>$LOG |