annotate dcclib/ckfuz2.c @ 0:c7f6b056b673

First import of vendor version
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 13:49:58 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
1 /* Distributed Checksum Clearinghouse
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
2 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
3 * compute fuzzy body checksum #2
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
4 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
5 * Copyright (c) 2008 by Rhyolite Software, LLC
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
6 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
7 * This agreement is not applicable to any entity which sells anti-spam
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
8 * solutions to others or provides an anti-spam solution as part of a
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
9 * security solution sold to other entities, or to a private network
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
10 * which employs the DCC or uses data provided by operation of the DCC
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
11 * but does not provide corresponding data to other users.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
12 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
13 * Permission to use, copy, modify, and distribute this software without
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
14 * changes for any purpose with or without fee is hereby granted, provided
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
15 * that the above copyright notice and this permission notice appear in all
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
16 * copies and any distributed versions or copies are either unchanged
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
17 * or not called anything similar to "DCC" or "Distributed Checksum
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
18 * Clearinghouse".
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
19 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
20 * Parties not eligible to receive a license under this agreement can
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
21 * obtain a commercial license to use DCC by contacting Rhyolite Software
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
22 * at sales@rhyolite.com.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
23 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
24 * A commercial license would be for Distributed Checksum and Reputation
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
25 * Clearinghouse software. That software includes additional features. This
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
26 * free license for Distributed ChecksumClearinghouse Software does not in any
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
28 * software
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
29 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
37 * SOFTWARE.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
38 *
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
39 * Rhyolite Software DCC 1.3.103-1.52 $Revision$
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
40 */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
41
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
42 #include "dcc_ck.h"
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
43
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
44 #include "ckfuz2_tbl.h"
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
45 struct {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
46 const char **words;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
47 u_int len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
48 const u_char *cset;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
49 } tbls[FUZ2_LAN_NUM] = {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
50 {word_tbl0, word_tbl0_LEN, 0},
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
51 {word_tbl1, word_tbl1_LEN, 0},
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
52 {word_tbl2, word_tbl2_LEN, dcc_cset_2},
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
53 };
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
54
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
55
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
56 #define FZ2 cks->fuz2
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
57
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
58 #define BUF_LEN 1024
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
59 typedef struct {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
60 char buf[BUF_LEN+sizeof(DCC_FUZ2_WORD)+1];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
61 int blen;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
62 } LBUF;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
63
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
64
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
65 #ifdef DCC_DEBUG_CKSUM
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
66 #define FUZ2(lp, b, l) (dcc_clnt_debug == 5 \
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
67 ? (write(1, b, l), MD5Update(&lp->md5, b, l)) \
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
68 : MD5Update(&lp->md5, b, l))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
69 #else
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
70 #define FUZ2(lp, b, l) MD5Update(&lp->md5, b, l)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
71 #endif
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
72
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
73
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
74 void
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
75 dcc_ck_fuz2_init(DCC_GOT_CKS *cks)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
76 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
77 FUZ2_LANG *lp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
78
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
79 FZ2.wlen = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
80 DCC_FUZ2_WORD_CLEAR(&FZ2.w);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
81 FZ2.st = DCC_FUZ2_ST_WORD;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
82 FZ2.url_cp = FZ2.url_buf;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
83 FZ2.urls = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
84
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
85 cks->sums[DCC_CK_FUZ2].type = DCC_CK_FUZ2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
86
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
87 FZ2.btotal = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
88 FZ2.xsummed = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
89 for (lp = FZ2.lang; lp <= LAST(FZ2.lang); ++lp) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
90 lp->wsummed = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
91 lp->wtotal = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
92 MD5Init(&lp->md5);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
93 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
94 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
95
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
96
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
97
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
98 static inline u_char /* 1=found it, 0=not a known word */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
99 lookup_word(const DCC_FUZ2_WORD *w, u_int wlen,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
100 const char **word_tbl, u_int word_tbl_len)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
101 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
102 const char *p;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
103 u_int n;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
104
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
105 p = word_tbl[fuz2_word_hash(w, word_tbl_len)];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
106 if (!p)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
107 return 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
108 for (;;) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
109 n = *p++;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
110 if (!n)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
111 return 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
112 if (n == wlen && !memcmp(w->b, p, n))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
113 return 1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
114 p += n;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
115 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
116 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
117
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
118
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
119
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
120 static void
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
121 add_word(DCC_GOT_CKS *cks, LBUF *lbp)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
122 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
123 FUZ2_LANG *lp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
124 int tbl;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
125
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
126 tbl = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
127 for (lp = FZ2.lang; lp < &FZ2.lang[FUZ2_LAN_NUM]; ++lp, ++lbp, ++tbl) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
128 if (tbls[tbl].cset != 0
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
129 && tbls[tbl].cset != cks->mime_cset)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
130 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
131 if (lookup_word(&FZ2.w, FZ2.wlen,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
132 tbls[tbl].words, tbls[tbl].len)) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
133 ++lp->wtotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
134 memcpy(&lbp->buf[lbp->blen], &FZ2.w, FZ2.wlen);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
135 if ((lbp->blen += FZ2.wlen) >= BUF_LEN) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
136 lp->wsummed += lbp->blen;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
137 FUZ2(lp, lbp->buf, lbp->blen);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
138 lbp->blen = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
139 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
140 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
141 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
142 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
143
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
144
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
145
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
146 /* resolve an HTML characater reference */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
147 static u_char /* 0 or found value */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
148 lookup_cref(DCC_FUZ2_WORD *w, u_int clen)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
149 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
150 const char *p;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
151 u_int n;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
152
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
153 if (clen > sizeof(DCC_FUZ2_WORD)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
154 || clen == 0)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
155 return 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
156 p = cref_tbl[fuz2_word_hash(w, cref_tbl_LEN)];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
157 if (!p)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
158 return 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
159 for (;;) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
160 n = *p++;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
161 if (!n)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
162 return 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
163 if (n == clen && !memcmp(w->b, p, n))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
164 return p[clen];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
165 p += n+1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
166 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
167 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
168
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
169
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
170
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
171 static int
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
172 adv_cref(DCC_GOT_CKS *cks, u_char c)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
173 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
174 switch (FZ2.cref_st) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
175 case DCC_CREF_ST_IDLE:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
176 dcc_logbad(EX_SOFTWARE, "impossible fuz2 cref state");
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
177 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
178
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
179 case DCC_CREF_ST_START:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
180 /* start to get a possible HTML character reference
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
181 * We have already passed the '&' */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
182 if (c == '#') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
183 FZ2.cref_st = DCC_CREF_ST_NUM;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
184 } else if ((c >= 'a' && c <= 'z')
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
185 || (c >= 'A' && c <= 'Z')) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
186 DCC_FUZ2_WORD_CLEAR(&FZ2.cref_w);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
187 FZ2.cref_w.b[0] = c;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
188 FZ2.cref_cnt = 1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
189 FZ2.cref_st = DCC_CREF_ST_NAME;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
190 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
191 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
192 return -2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
193 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
194 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
195
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
196 case DCC_CREF_ST_NUM:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
197 /* look for 'x' or the first digit */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
198 if (c == 'x' || c == 'X') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
199 FZ2.cref_st = DCC_CREF_ST_HEX;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
200 FZ2.cref_cnt = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
201 } else if (c >= '0' && c <= '9') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
202 FZ2.cref_cnt = c - '0';
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
203 FZ2.cref_st = DCC_CREF_ST_DEC;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
204 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
205 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
206 return -2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
207 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
208 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
209
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
210 case DCC_CREF_ST_DEC:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
211 if (c >= '0' && c <= '9') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
212 FZ2.cref_cnt = FZ2.cref_cnt*10 + (c - '0');
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
213 } else if (cks->mime_cset[c] == FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
214 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
215 return c == ';';
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
216 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
217 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
218 return -2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
219 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
220 if (FZ2.cref_cnt > 256)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
221 FZ2.cref_cnt = 256;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
222 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
223 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
224
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
225 case DCC_CREF_ST_HEX:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
226 if ((c >= 'a' && c <= 'f')
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
227 || (c >= 'A' && c <= 'F')) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
228 FZ2.cref_cnt = (FZ2.cref_cnt<<4) + (c & 0xf) + 9;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
229 } else if (c >= '0' && c <= '9') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
230 FZ2.cref_cnt = (FZ2.cref_cnt<<4) + (c - '0');
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
231 } else if (cks->mime_cset[c] == FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
232 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
233 return c == ';';
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
234 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
235 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
236 return -2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
237 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
238 if (FZ2.cref_cnt > 256)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
239 FZ2.cref_cnt = 256;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
240 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
241 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
242
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
243 case DCC_CREF_ST_NAME:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
244 if ((c >= 'a' && c <= 'z')
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
245 || (c >= 'A' && c <= 'Z')) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
246 /* If the word is too long, the final match will fail.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
247 * This will consume it */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
248 if (FZ2.cref_cnt < sizeof(DCC_FUZ2_WORD))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
249 FZ2.cref_w.b[FZ2.cref_cnt++] = c;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
250 } else if (cks->mime_cset[c] == FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
251 /* this character ends the cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
252 FZ2.cref_cnt = lookup_cref(&FZ2.cref_w, FZ2.cref_cnt);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
253 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
254 return c == ';';
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
255 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
256 FZ2.cref_st = DCC_CREF_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
257 return -2;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
258 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
259 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
260 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
261 return -1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
262 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
263
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
264
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
265 void
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
266 dcc_ck_fuz2(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
267 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
268 #define SKIP_WORD() (FZ2.wlen = sizeof(DCC_FUZ2_WORD)+1)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
269 #define JUNK() (SKIP_WORD(), FZ2.st = DCC_FUZ2_ST_WORD)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
270 LBUF *lbp, lbufs[FUZ2_LAN_NUM];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
271 FUZ2_LANG *lp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
272 int i;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
273 char *p;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
274 u_char c;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
275
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
276 if (cks->sums[DCC_CK_FUZ2].type != DCC_CK_FUZ2)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
277 return;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
278
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
279 for (lbp = lbufs; lbp <= LAST(lbufs); ++lbp)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
280 lbp->blen = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
281
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
282 while (bp_len != 0) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
283 switch (FZ2.st) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
284 case DCC_FUZ2_ST_WORD:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
285 /* gathering a word */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
286 do {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
287 if (FZ2.cref_st == DCC_CREF_ST_IDLE) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
288 --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
289 c = *bp++;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
290 c = cks->mime_cset[c];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
291 if (c == FC_CF) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
292 if (cks->mime_ct == DCC_CK_CT_HTML){
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
293 FZ2.cref_st = DCC_CREF_ST_START;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
294 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
295 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
296 c = FC_SP;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
297 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
298 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
299 i = adv_cref(cks, *bp);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
300 if (i == -2) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
301 JUNK(); /* bogus cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
302 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
303 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
304 if (i < 0) { /* get more of cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
305 --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
306 ++bp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
307 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
308 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
309 bp += i; /* use complete cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
310 bp_len -= i;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
311 c = cks->mime_cset[FZ2.cref_cnt];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
312
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
313 if (c == FC_SK)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
314 continue; /* ignore accent mark */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
315 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
316
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
317 if (c >= FC_A) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
318 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
319 if (FZ2.wlen < sizeof(DCC_FUZ2_WORD))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
320 FZ2.w.b[FZ2.wlen++] = c;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
321 else
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
322 SKIP_WORD();
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
323 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
324 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
325
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
326 if (c == FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
327 if (FZ2.wlen >= MIN_WLEN
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
328 && FZ2.wlen <=sizeof(DCC_FUZ2_WORD))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
329 add_word(cks, lbufs);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
330 FZ2.wlen = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
331 DCC_FUZ2_WORD_CLEAR(&FZ2.w);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
332 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
333 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
334 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
335
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
336 if (c == FC_LT) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
337 FZ2.tag_len = 0;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
338 DCC_FUZ2_WORD_CLEAR(&FZ2.tag);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
339 FZ2.st = DCC_FUZ2_ST_START_TAG;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
340 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
341 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
342
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
343 JUNK();
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
344 } while (bp_len != 0);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
345 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
346
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
347 case DCC_FUZ2_ST_START_TAG:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
348 /* collecting an HTML tag or comment
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
349 * We've passed the '<' */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
350 c = *bp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
351 #define SAVE_TAG(_c) (FZ2.tag.b[FZ2.tag_len++] = _c, \
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
352 ++FZ2.btotal, ++bp, --bp_len)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
353 if (((c >= 'a' && c <= 'z') /* tag */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
354 || (c >= '0' && c <= '9'))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
355 && FZ2.tag_len < sizeof(FZ2.tag)) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
356 SAVE_TAG(c);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
357 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
358 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
359 if (c >= 'A' && c <= 'Z'
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
360 && FZ2.tag_len < sizeof(FZ2.tag)) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
361 SAVE_TAG(c - ('A'-'a'));
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
362 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
363 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
364 if ((c == '/' /* end-tag */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
365 || c == '!') /* start of comment */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
366 && FZ2.tag_len == 0) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
367 SAVE_TAG(c);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
368 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
369 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
370 if (c == '-' /* comment */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
371 && FZ2.tag_len >= 1 && FZ2.tag_len <= 2) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
372 SAVE_TAG(c);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
373 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
374 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
375 #undef SAVE_TAG
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
376
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
377 /* notice an <html> tag while in text/plain
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
378 * and switch to text/html */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
379 if (FZ2.tag_len == 4
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
380 && cks->mime_ct != DCC_CK_CT_HTML
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
381 && !memcmp(FZ2.tag.b, "html", 4))
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
382 cks->mime_ct = DCC_CK_CT_HTML;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
383
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
384 if (cks->mime_ct == DCC_CK_CT_HTML
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
385 && FZ2.tag_len > 0) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
386 /* if we are in an HTML document and we
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
387 * have at least one character after '<',
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
388 * assume it is some kind of HTML tag */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
389 FZ2.xsummed += FZ2.tag_len+1; /* count '<' */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
390 if (c == '>') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
391 /* optimize common simple tags */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
392 ++FZ2.xsummed;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
393 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
394 ++bp, --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
395 FZ2.st = DCC_FUZ2_ST_WORD;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
396 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
397 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
398 if (FZ2.tag_len >= 3
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
399 && !memcmp(FZ2.tag.b, "!--", 3)) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
400 FZ2.st = DCC_FUZ2_ST_SKIP_COMMENT;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
401 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
402 FZ2.url.st = DCC_URL_ST_IDLE;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
403 FZ2.st = DCC_FUZ2_ST_SKIP_TAG;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
404 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
405 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
406 /* assume it is not an HTML tag and
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
407 * mark the whole word as junk */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
408 JUNK();
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
409 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
410 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
411
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
412 case DCC_FUZ2_ST_SKIP_TAG:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
413 /* Skip rest of boring HTML tag
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
414 * We ought to ignore '>' in quotes */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
415 do {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
416 if (FZ2.cref_st == DCC_CREF_ST_IDLE) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
417 --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
418 c = *bp++;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
419 if (c == FC_CF) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
420 FZ2.cref_st = DCC_CREF_ST_START;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
421 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
422 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
423 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
424 i = adv_cref(cks, *bp);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
425 if (i == -2)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
426 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
427 if (i < 0) { /* get more of cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
428 --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
429 ++bp;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
430 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
431 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
432 bp += i; /* use complete cref */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
433 bp_len -= i;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
434 c = FZ2.cref_cnt;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
435 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
436
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
437 /* capture URLs */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
438 i = dcc_ck_url(&FZ2.url, c, &FZ2.url_cp);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
439 c = i>>DCC_CK_URL_SHIFT;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
440 switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
441 case DCC_CK_URL_CHAR:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
442 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
443 case DCC_CK_URL_CK_LEN:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
444 /* Make room before starting a URL
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
445 * if we are too close to
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
446 * end of buffer for a maximum size URL.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
447 * Discard the first URL in the buffer.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
448 * This relies on dcc_ck_url() limiting
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
449 * the URL to DCC_URL_MAX bytes */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
450 while (FZ2.url_cp
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
451 >= &FZ2.url_buf[ISZ(FZ2.url_buf)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
452 - DCC_FUZ2_URL_MAX]) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
453 p = memchr(FZ2.url_buf, '\0',
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
454 FZ2.url_cp-FZ2.url_buf);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
455 if (!p) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
456 /* if this was the first URL,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
457 * discard half of it */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
458 p = &FZ2.url_buf[DCC_URL_MAX/2];
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
459 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
460 ++p;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
461 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
462 memmove(FZ2.url_buf, p,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
463 FZ2.url_cp - p);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
464 FZ2.url_cp -= p - FZ2.url_buf;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
465 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
466 if (FZ2.url_cp != FZ2.url_buf)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
467 *FZ2.url_cp++ = '\0';
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
468 ++FZ2.urls;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
469 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
470 case DCC_CK_URL_HOST:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
471 case DCC_CK_URL_DOT:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
472 if (FZ2.url_cp
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
473 < &FZ2.url_buf[ISZ(FZ2.url_buf)])
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
474 *FZ2.url_cp++ = c;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
475 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
476 case DCC_CK_URL_HOST_END:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
477 case DCC_CK_URL_HOST_RESET:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
478 case DCC_CK_URL_SKIP:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
479 continue;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
480 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
481
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
482 if (c == '>') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
483 ++FZ2.xsummed;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
484 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
485 FZ2.st = DCC_FUZ2_ST_WORD;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
486 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
487 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
488 if (cks->mime_cset[c] != FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
489 ++FZ2.xsummed;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
490 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
491 /* don't let wild tags run forever */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
492 if (++FZ2.tag_len > DCC_URL_FAILSAFE) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
493 JUNK();
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
494 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
495 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
496 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
497 } while (bp_len != 0);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
498 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
499
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
500 case DCC_FUZ2_ST_SKIP_COMMENT:
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
501 /* HTML comments can include HTML tags,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
502 * but spammers don't understand HTML comment syntax
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
503 * and Netscape and IE treat (and ignore) broken
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
504 * comments like strange tags. */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
505 do {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
506 --bp_len;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
507 c = *bp++;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
508 if (c == '>') {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
509 ++FZ2.xsummed;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
510 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
511 FZ2.st = DCC_FUZ2_ST_WORD;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
512 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
513 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
514 if (cks->mime_cset[c] != FC_SP) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
515 ++FZ2.xsummed;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
516 ++FZ2.btotal;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
517 /* don't let wild tags run forever */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
518 if (++FZ2.tag_len > DCC_URL_FAILSAFE) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
519 JUNK();
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
520 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
521 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
522 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
523 } while (bp_len != 0);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
524 break;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
525 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
526 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
527 for (lbp = lbufs, lp = FZ2.lang; lbp <= LAST(lbufs); ++lbp, ++lp) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
528 if (lbp->blen != 0) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
529 lp->wsummed += lbp->blen;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
530 FUZ2(lp, lbp->buf, lbp->blen);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
531 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
532 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
533 #undef SKIP_WORD
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
534 #undef JUNK
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
535 #undef BUF_LEN
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
536 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
537
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
538
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
539
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
540 void
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
541 dcc_ck_fuz2_fin(DCC_GOT_CKS *cks)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
542 {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
543 FUZ2_LANG *lp, *lp1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
544
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
545 if (cks->sums[DCC_CK_FUZ2].type != DCC_CK_FUZ2)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
546 return;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
547
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
548 /* pick the language checksum of the most words */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
549 lp = FZ2.lang;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
550 for (lp1 = lp+1; lp1 <= LAST(FZ2.lang); ++lp1) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
551 if (lp->wtotal < lp1->wtotal)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
552 lp = lp1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
553 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
554
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
555 #ifdef DCC_DEBUG_CKSUM
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
556 if (dcc_clnt_debug > 3)
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
557 printf("\n***fuz2: wtotal[%d]=%d summed=%d+%d btotal=%d\n",
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
558 (int)(lp-FZ2.lang),
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
559 lp->wtotal, lp->wsummed, FZ2.xsummed, FZ2.btotal);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
560 #endif
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
561 /* The FUZ2 checksum is not valid if it is on a few words and
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
562 * less than 10% of a big, binary file */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
563 if (lp->wtotal < 100
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
564 && (lp->wsummed+FZ2.xsummed)*10 < FZ2.btotal) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
565 cks->sums[DCC_CK_FUZ2].type = DCC_CK_INVALID;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
566 return;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
567 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
568 /* We cannot compute a checksum on a nearly empty message */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
569 if (lp->wtotal < 8) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
570 if (lp->wtotal + FZ2.urls*4 >= 8) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
571 /* use URLs if we lack words */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
572 FUZ2(lp, FZ2.url_buf, FZ2.url_cp - FZ2.url_buf);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
573 } else {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
574 /* Compute a checksum for local blacklising on messages
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
575 * that look empty to the FUZ2 checksum but are not and
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
576 * are not too binary. The resulting checksum is zero.
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
577 * Do not report it to the DCC server. */
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
578 if ((lp->wsummed+FZ2.xsummed) >= 120) {
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
579 memset(cks->sums[DCC_CK_FUZ2].sum, 0,
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
580 sizeof(cks->sums[DCC_CK_FUZ2].sum));
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
581 return;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
582 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
583 cks->sums[DCC_CK_FUZ2].type = DCC_CK_INVALID;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
584 return;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
585 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
586 }
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
587
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
588 MD5Final(cks->sums[DCC_CK_FUZ2].sum, &lp->md5);
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
589 cks->sums[DCC_CK_FUZ2].rpt2srvr = 1;
c7f6b056b673 First import of vendor version
Peter Gervai <grin@grin.hu>
parents:
diff changeset
590 }