0
|
1 /* Distributed Checksum Clearinghouse |
|
2 * |
|
3 * compute fuzzy body checksum #2 |
|
4 * |
|
5 * Copyright (c) 2008 by Rhyolite Software, LLC |
|
6 * |
|
7 * This agreement is not applicable to any entity which sells anti-spam |
|
8 * solutions to others or provides an anti-spam solution as part of a |
|
9 * security solution sold to other entities, or to a private network |
|
10 * which employs the DCC or uses data provided by operation of the DCC |
|
11 * but does not provide corresponding data to other users. |
|
12 * |
|
13 * Permission to use, copy, modify, and distribute this software without |
|
14 * changes for any purpose with or without fee is hereby granted, provided |
|
15 * that the above copyright notice and this permission notice appear in all |
|
16 * copies and any distributed versions or copies are either unchanged |
|
17 * or not called anything similar to "DCC" or "Distributed Checksum |
|
18 * Clearinghouse". |
|
19 * |
|
20 * Parties not eligible to receive a license under this agreement can |
|
21 * obtain a commercial license to use DCC by contacting Rhyolite Software |
|
22 * at sales@rhyolite.com. |
|
23 * |
|
24 * A commercial license would be for Distributed Checksum and Reputation |
|
25 * Clearinghouse software. That software includes additional features. This |
|
26 * free license for Distributed ChecksumClearinghouse Software does not in any |
|
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse |
|
28 * software |
|
29 * |
|
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL |
|
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
|
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC |
|
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES |
|
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
37 * SOFTWARE. |
|
38 * |
|
39 * Rhyolite Software DCC 1.3.103-1.52 $Revision$ |
|
40 */ |
|
41 |
|
42 #include "dcc_ck.h" |
|
43 |
|
44 #include "ckfuz2_tbl.h" |
|
45 struct { |
|
46 const char **words; |
|
47 u_int len; |
|
48 const u_char *cset; |
|
49 } tbls[FUZ2_LAN_NUM] = { |
|
50 {word_tbl0, word_tbl0_LEN, 0}, |
|
51 {word_tbl1, word_tbl1_LEN, 0}, |
|
52 {word_tbl2, word_tbl2_LEN, dcc_cset_2}, |
|
53 }; |
|
54 |
|
55 |
|
56 #define FZ2 cks->fuz2 |
|
57 |
|
58 #define BUF_LEN 1024 |
|
59 typedef struct { |
|
60 char buf[BUF_LEN+sizeof(DCC_FUZ2_WORD)+1]; |
|
61 int blen; |
|
62 } LBUF; |
|
63 |
|
64 |
|
65 #ifdef DCC_DEBUG_CKSUM |
|
66 #define FUZ2(lp, b, l) (dcc_clnt_debug == 5 \ |
|
67 ? (write(1, b, l), MD5Update(&lp->md5, b, l)) \ |
|
68 : MD5Update(&lp->md5, b, l)) |
|
69 #else |
|
70 #define FUZ2(lp, b, l) MD5Update(&lp->md5, b, l) |
|
71 #endif |
|
72 |
|
73 |
|
74 void |
|
75 dcc_ck_fuz2_init(DCC_GOT_CKS *cks) |
|
76 { |
|
77 FUZ2_LANG *lp; |
|
78 |
|
79 FZ2.wlen = 0; |
|
80 DCC_FUZ2_WORD_CLEAR(&FZ2.w); |
|
81 FZ2.st = DCC_FUZ2_ST_WORD; |
|
82 FZ2.url_cp = FZ2.url_buf; |
|
83 FZ2.urls = 0; |
|
84 |
|
85 cks->sums[DCC_CK_FUZ2].type = DCC_CK_FUZ2; |
|
86 |
|
87 FZ2.btotal = 0; |
|
88 FZ2.xsummed = 0; |
|
89 for (lp = FZ2.lang; lp <= LAST(FZ2.lang); ++lp) { |
|
90 lp->wsummed = 0; |
|
91 lp->wtotal = 0; |
|
92 MD5Init(&lp->md5); |
|
93 } |
|
94 } |
|
95 |
|
96 |
|
97 |
|
98 static inline u_char /* 1=found it, 0=not a known word */ |
|
99 lookup_word(const DCC_FUZ2_WORD *w, u_int wlen, |
|
100 const char **word_tbl, u_int word_tbl_len) |
|
101 { |
|
102 const char *p; |
|
103 u_int n; |
|
104 |
|
105 p = word_tbl[fuz2_word_hash(w, word_tbl_len)]; |
|
106 if (!p) |
|
107 return 0; |
|
108 for (;;) { |
|
109 n = *p++; |
|
110 if (!n) |
|
111 return 0; |
|
112 if (n == wlen && !memcmp(w->b, p, n)) |
|
113 return 1; |
|
114 p += n; |
|
115 } |
|
116 } |
|
117 |
|
118 |
|
119 |
|
120 static void |
|
121 add_word(DCC_GOT_CKS *cks, LBUF *lbp) |
|
122 { |
|
123 FUZ2_LANG *lp; |
|
124 int tbl; |
|
125 |
|
126 tbl = 0; |
|
127 for (lp = FZ2.lang; lp < &FZ2.lang[FUZ2_LAN_NUM]; ++lp, ++lbp, ++tbl) { |
|
128 if (tbls[tbl].cset != 0 |
|
129 && tbls[tbl].cset != cks->mime_cset) |
|
130 continue; |
|
131 if (lookup_word(&FZ2.w, FZ2.wlen, |
|
132 tbls[tbl].words, tbls[tbl].len)) { |
|
133 ++lp->wtotal; |
|
134 memcpy(&lbp->buf[lbp->blen], &FZ2.w, FZ2.wlen); |
|
135 if ((lbp->blen += FZ2.wlen) >= BUF_LEN) { |
|
136 lp->wsummed += lbp->blen; |
|
137 FUZ2(lp, lbp->buf, lbp->blen); |
|
138 lbp->blen = 0; |
|
139 } |
|
140 } |
|
141 } |
|
142 } |
|
143 |
|
144 |
|
145 |
|
146 /* resolve an HTML characater reference */ |
|
147 static u_char /* 0 or found value */ |
|
148 lookup_cref(DCC_FUZ2_WORD *w, u_int clen) |
|
149 { |
|
150 const char *p; |
|
151 u_int n; |
|
152 |
|
153 if (clen > sizeof(DCC_FUZ2_WORD) |
|
154 || clen == 0) |
|
155 return 0; |
|
156 p = cref_tbl[fuz2_word_hash(w, cref_tbl_LEN)]; |
|
157 if (!p) |
|
158 return 0; |
|
159 for (;;) { |
|
160 n = *p++; |
|
161 if (!n) |
|
162 return 0; |
|
163 if (n == clen && !memcmp(w->b, p, n)) |
|
164 return p[clen]; |
|
165 p += n+1; |
|
166 } |
|
167 } |
|
168 |
|
169 |
|
170 |
|
171 static int |
|
172 adv_cref(DCC_GOT_CKS *cks, u_char c) |
|
173 { |
|
174 switch (FZ2.cref_st) { |
|
175 case DCC_CREF_ST_IDLE: |
|
176 dcc_logbad(EX_SOFTWARE, "impossible fuz2 cref state"); |
|
177 break; |
|
178 |
|
179 case DCC_CREF_ST_START: |
|
180 /* start to get a possible HTML character reference |
|
181 * We have already passed the '&' */ |
|
182 if (c == '#') { |
|
183 FZ2.cref_st = DCC_CREF_ST_NUM; |
|
184 } else if ((c >= 'a' && c <= 'z') |
|
185 || (c >= 'A' && c <= 'Z')) { |
|
186 DCC_FUZ2_WORD_CLEAR(&FZ2.cref_w); |
|
187 FZ2.cref_w.b[0] = c; |
|
188 FZ2.cref_cnt = 1; |
|
189 FZ2.cref_st = DCC_CREF_ST_NAME; |
|
190 } else { |
|
191 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
192 return -2; |
|
193 } |
|
194 break; |
|
195 |
|
196 case DCC_CREF_ST_NUM: |
|
197 /* look for 'x' or the first digit */ |
|
198 if (c == 'x' || c == 'X') { |
|
199 FZ2.cref_st = DCC_CREF_ST_HEX; |
|
200 FZ2.cref_cnt = 0; |
|
201 } else if (c >= '0' && c <= '9') { |
|
202 FZ2.cref_cnt = c - '0'; |
|
203 FZ2.cref_st = DCC_CREF_ST_DEC; |
|
204 } else { |
|
205 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
206 return -2; |
|
207 } |
|
208 break; |
|
209 |
|
210 case DCC_CREF_ST_DEC: |
|
211 if (c >= '0' && c <= '9') { |
|
212 FZ2.cref_cnt = FZ2.cref_cnt*10 + (c - '0'); |
|
213 } else if (cks->mime_cset[c] == FC_SP) { |
|
214 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
215 return c == ';'; |
|
216 } else { |
|
217 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
218 return -2; |
|
219 } |
|
220 if (FZ2.cref_cnt > 256) |
|
221 FZ2.cref_cnt = 256; |
|
222 ++FZ2.btotal; |
|
223 break; |
|
224 |
|
225 case DCC_CREF_ST_HEX: |
|
226 if ((c >= 'a' && c <= 'f') |
|
227 || (c >= 'A' && c <= 'F')) { |
|
228 FZ2.cref_cnt = (FZ2.cref_cnt<<4) + (c & 0xf) + 9; |
|
229 } else if (c >= '0' && c <= '9') { |
|
230 FZ2.cref_cnt = (FZ2.cref_cnt<<4) + (c - '0'); |
|
231 } else if (cks->mime_cset[c] == FC_SP) { |
|
232 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
233 return c == ';'; |
|
234 } else { |
|
235 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
236 return -2; |
|
237 } |
|
238 if (FZ2.cref_cnt > 256) |
|
239 FZ2.cref_cnt = 256; |
|
240 ++FZ2.btotal; |
|
241 break; |
|
242 |
|
243 case DCC_CREF_ST_NAME: |
|
244 if ((c >= 'a' && c <= 'z') |
|
245 || (c >= 'A' && c <= 'Z')) { |
|
246 /* If the word is too long, the final match will fail. |
|
247 * This will consume it */ |
|
248 if (FZ2.cref_cnt < sizeof(DCC_FUZ2_WORD)) |
|
249 FZ2.cref_w.b[FZ2.cref_cnt++] = c; |
|
250 } else if (cks->mime_cset[c] == FC_SP) { |
|
251 /* this character ends the cref */ |
|
252 FZ2.cref_cnt = lookup_cref(&FZ2.cref_w, FZ2.cref_cnt); |
|
253 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
254 return c == ';'; |
|
255 } else { |
|
256 FZ2.cref_st = DCC_CREF_ST_IDLE; |
|
257 return -2; |
|
258 } |
|
259 break; |
|
260 } |
|
261 return -1; |
|
262 } |
|
263 |
|
264 |
|
265 void |
|
266 dcc_ck_fuz2(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) |
|
267 { |
|
268 #define SKIP_WORD() (FZ2.wlen = sizeof(DCC_FUZ2_WORD)+1) |
|
269 #define JUNK() (SKIP_WORD(), FZ2.st = DCC_FUZ2_ST_WORD) |
|
270 LBUF *lbp, lbufs[FUZ2_LAN_NUM]; |
|
271 FUZ2_LANG *lp; |
|
272 int i; |
|
273 char *p; |
|
274 u_char c; |
|
275 |
|
276 if (cks->sums[DCC_CK_FUZ2].type != DCC_CK_FUZ2) |
|
277 return; |
|
278 |
|
279 for (lbp = lbufs; lbp <= LAST(lbufs); ++lbp) |
|
280 lbp->blen = 0; |
|
281 |
|
282 while (bp_len != 0) { |
|
283 switch (FZ2.st) { |
|
284 case DCC_FUZ2_ST_WORD: |
|
285 /* gathering a word */ |
|
286 do { |
|
287 if (FZ2.cref_st == DCC_CREF_ST_IDLE) { |
|
288 --bp_len; |
|
289 c = *bp++; |
|
290 c = cks->mime_cset[c]; |
|
291 if (c == FC_CF) { |
|
292 if (cks->mime_ct == DCC_CK_CT_HTML){ |
|
293 FZ2.cref_st = DCC_CREF_ST_START; |
|
294 break; |
|
295 } |
|
296 c = FC_SP; |
|
297 } |
|
298 } else { |
|
299 i = adv_cref(cks, *bp); |
|
300 if (i == -2) { |
|
301 JUNK(); /* bogus cref */ |
|
302 continue; |
|
303 } |
|
304 if (i < 0) { /* get more of cref */ |
|
305 --bp_len; |
|
306 ++bp; |
|
307 continue; |
|
308 } |
|
309 bp += i; /* use complete cref */ |
|
310 bp_len -= i; |
|
311 c = cks->mime_cset[FZ2.cref_cnt]; |
|
312 |
|
313 if (c == FC_SK) |
|
314 continue; /* ignore accent mark */ |
|
315 } |
|
316 |
|
317 if (c >= FC_A) { |
|
318 ++FZ2.btotal; |
|
319 if (FZ2.wlen < sizeof(DCC_FUZ2_WORD)) |
|
320 FZ2.w.b[FZ2.wlen++] = c; |
|
321 else |
|
322 SKIP_WORD(); |
|
323 continue; |
|
324 } |
|
325 |
|
326 if (c == FC_SP) { |
|
327 if (FZ2.wlen >= MIN_WLEN |
|
328 && FZ2.wlen <=sizeof(DCC_FUZ2_WORD)) |
|
329 add_word(cks, lbufs); |
|
330 FZ2.wlen = 0; |
|
331 DCC_FUZ2_WORD_CLEAR(&FZ2.w); |
|
332 continue; |
|
333 } |
|
334 ++FZ2.btotal; |
|
335 |
|
336 if (c == FC_LT) { |
|
337 FZ2.tag_len = 0; |
|
338 DCC_FUZ2_WORD_CLEAR(&FZ2.tag); |
|
339 FZ2.st = DCC_FUZ2_ST_START_TAG; |
|
340 break; |
|
341 } |
|
342 |
|
343 JUNK(); |
|
344 } while (bp_len != 0); |
|
345 break; |
|
346 |
|
347 case DCC_FUZ2_ST_START_TAG: |
|
348 /* collecting an HTML tag or comment |
|
349 * We've passed the '<' */ |
|
350 c = *bp; |
|
351 #define SAVE_TAG(_c) (FZ2.tag.b[FZ2.tag_len++] = _c, \ |
|
352 ++FZ2.btotal, ++bp, --bp_len) |
|
353 if (((c >= 'a' && c <= 'z') /* tag */ |
|
354 || (c >= '0' && c <= '9')) |
|
355 && FZ2.tag_len < sizeof(FZ2.tag)) { |
|
356 SAVE_TAG(c); |
|
357 break; |
|
358 } |
|
359 if (c >= 'A' && c <= 'Z' |
|
360 && FZ2.tag_len < sizeof(FZ2.tag)) { |
|
361 SAVE_TAG(c - ('A'-'a')); |
|
362 break; |
|
363 } |
|
364 if ((c == '/' /* end-tag */ |
|
365 || c == '!') /* start of comment */ |
|
366 && FZ2.tag_len == 0) { |
|
367 SAVE_TAG(c); |
|
368 break; |
|
369 } |
|
370 if (c == '-' /* comment */ |
|
371 && FZ2.tag_len >= 1 && FZ2.tag_len <= 2) { |
|
372 SAVE_TAG(c); |
|
373 break; |
|
374 } |
|
375 #undef SAVE_TAG |
|
376 |
|
377 /* notice an <html> tag while in text/plain |
|
378 * and switch to text/html */ |
|
379 if (FZ2.tag_len == 4 |
|
380 && cks->mime_ct != DCC_CK_CT_HTML |
|
381 && !memcmp(FZ2.tag.b, "html", 4)) |
|
382 cks->mime_ct = DCC_CK_CT_HTML; |
|
383 |
|
384 if (cks->mime_ct == DCC_CK_CT_HTML |
|
385 && FZ2.tag_len > 0) { |
|
386 /* if we are in an HTML document and we |
|
387 * have at least one character after '<', |
|
388 * assume it is some kind of HTML tag */ |
|
389 FZ2.xsummed += FZ2.tag_len+1; /* count '<' */ |
|
390 if (c == '>') { |
|
391 /* optimize common simple tags */ |
|
392 ++FZ2.xsummed; |
|
393 ++FZ2.btotal; |
|
394 ++bp, --bp_len; |
|
395 FZ2.st = DCC_FUZ2_ST_WORD; |
|
396 break; |
|
397 } |
|
398 if (FZ2.tag_len >= 3 |
|
399 && !memcmp(FZ2.tag.b, "!--", 3)) { |
|
400 FZ2.st = DCC_FUZ2_ST_SKIP_COMMENT; |
|
401 } else { |
|
402 FZ2.url.st = DCC_URL_ST_IDLE; |
|
403 FZ2.st = DCC_FUZ2_ST_SKIP_TAG; |
|
404 } |
|
405 } else { |
|
406 /* assume it is not an HTML tag and |
|
407 * mark the whole word as junk */ |
|
408 JUNK(); |
|
409 } |
|
410 break; |
|
411 |
|
412 case DCC_FUZ2_ST_SKIP_TAG: |
|
413 /* Skip rest of boring HTML tag |
|
414 * We ought to ignore '>' in quotes */ |
|
415 do { |
|
416 if (FZ2.cref_st == DCC_CREF_ST_IDLE) { |
|
417 --bp_len; |
|
418 c = *bp++; |
|
419 if (c == FC_CF) { |
|
420 FZ2.cref_st = DCC_CREF_ST_START; |
|
421 continue; |
|
422 } |
|
423 } else { |
|
424 i = adv_cref(cks, *bp); |
|
425 if (i == -2) |
|
426 continue; |
|
427 if (i < 0) { /* get more of cref */ |
|
428 --bp_len; |
|
429 ++bp; |
|
430 continue; |
|
431 } |
|
432 bp += i; /* use complete cref */ |
|
433 bp_len -= i; |
|
434 c = FZ2.cref_cnt; |
|
435 } |
|
436 |
|
437 /* capture URLs */ |
|
438 i = dcc_ck_url(&FZ2.url, c, &FZ2.url_cp); |
|
439 c = i>>DCC_CK_URL_SHIFT; |
|
440 switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) { |
|
441 case DCC_CK_URL_CHAR: |
|
442 break; |
|
443 case DCC_CK_URL_CK_LEN: |
|
444 /* Make room before starting a URL |
|
445 * if we are too close to |
|
446 * end of buffer for a maximum size URL. |
|
447 * Discard the first URL in the buffer. |
|
448 * This relies on dcc_ck_url() limiting |
|
449 * the URL to DCC_URL_MAX bytes */ |
|
450 while (FZ2.url_cp |
|
451 >= &FZ2.url_buf[ISZ(FZ2.url_buf) |
|
452 - DCC_FUZ2_URL_MAX]) { |
|
453 p = memchr(FZ2.url_buf, '\0', |
|
454 FZ2.url_cp-FZ2.url_buf); |
|
455 if (!p) { |
|
456 /* if this was the first URL, |
|
457 * discard half of it */ |
|
458 p = &FZ2.url_buf[DCC_URL_MAX/2]; |
|
459 } else { |
|
460 ++p; |
|
461 } |
|
462 memmove(FZ2.url_buf, p, |
|
463 FZ2.url_cp - p); |
|
464 FZ2.url_cp -= p - FZ2.url_buf; |
|
465 } |
|
466 if (FZ2.url_cp != FZ2.url_buf) |
|
467 *FZ2.url_cp++ = '\0'; |
|
468 ++FZ2.urls; |
|
469 break; |
|
470 case DCC_CK_URL_HOST: |
|
471 case DCC_CK_URL_DOT: |
|
472 if (FZ2.url_cp |
|
473 < &FZ2.url_buf[ISZ(FZ2.url_buf)]) |
|
474 *FZ2.url_cp++ = c; |
|
475 continue; |
|
476 case DCC_CK_URL_HOST_END: |
|
477 case DCC_CK_URL_HOST_RESET: |
|
478 case DCC_CK_URL_SKIP: |
|
479 continue; |
|
480 } |
|
481 |
|
482 if (c == '>') { |
|
483 ++FZ2.xsummed; |
|
484 ++FZ2.btotal; |
|
485 FZ2.st = DCC_FUZ2_ST_WORD; |
|
486 break; |
|
487 } |
|
488 if (cks->mime_cset[c] != FC_SP) { |
|
489 ++FZ2.xsummed; |
|
490 ++FZ2.btotal; |
|
491 /* don't let wild tags run forever */ |
|
492 if (++FZ2.tag_len > DCC_URL_FAILSAFE) { |
|
493 JUNK(); |
|
494 break; |
|
495 } |
|
496 } |
|
497 } while (bp_len != 0); |
|
498 break; |
|
499 |
|
500 case DCC_FUZ2_ST_SKIP_COMMENT: |
|
501 /* HTML comments can include HTML tags, |
|
502 * but spammers don't understand HTML comment syntax |
|
503 * and Netscape and IE treat (and ignore) broken |
|
504 * comments like strange tags. */ |
|
505 do { |
|
506 --bp_len; |
|
507 c = *bp++; |
|
508 if (c == '>') { |
|
509 ++FZ2.xsummed; |
|
510 ++FZ2.btotal; |
|
511 FZ2.st = DCC_FUZ2_ST_WORD; |
|
512 break; |
|
513 } |
|
514 if (cks->mime_cset[c] != FC_SP) { |
|
515 ++FZ2.xsummed; |
|
516 ++FZ2.btotal; |
|
517 /* don't let wild tags run forever */ |
|
518 if (++FZ2.tag_len > DCC_URL_FAILSAFE) { |
|
519 JUNK(); |
|
520 break; |
|
521 } |
|
522 } |
|
523 } while (bp_len != 0); |
|
524 break; |
|
525 } |
|
526 } |
|
527 for (lbp = lbufs, lp = FZ2.lang; lbp <= LAST(lbufs); ++lbp, ++lp) { |
|
528 if (lbp->blen != 0) { |
|
529 lp->wsummed += lbp->blen; |
|
530 FUZ2(lp, lbp->buf, lbp->blen); |
|
531 } |
|
532 } |
|
533 #undef SKIP_WORD |
|
534 #undef JUNK |
|
535 #undef BUF_LEN |
|
536 } |
|
537 |
|
538 |
|
539 |
|
540 void |
|
541 dcc_ck_fuz2_fin(DCC_GOT_CKS *cks) |
|
542 { |
|
543 FUZ2_LANG *lp, *lp1; |
|
544 |
|
545 if (cks->sums[DCC_CK_FUZ2].type != DCC_CK_FUZ2) |
|
546 return; |
|
547 |
|
548 /* pick the language checksum of the most words */ |
|
549 lp = FZ2.lang; |
|
550 for (lp1 = lp+1; lp1 <= LAST(FZ2.lang); ++lp1) { |
|
551 if (lp->wtotal < lp1->wtotal) |
|
552 lp = lp1; |
|
553 } |
|
554 |
|
555 #ifdef DCC_DEBUG_CKSUM |
|
556 if (dcc_clnt_debug > 3) |
|
557 printf("\n***fuz2: wtotal[%d]=%d summed=%d+%d btotal=%d\n", |
|
558 (int)(lp-FZ2.lang), |
|
559 lp->wtotal, lp->wsummed, FZ2.xsummed, FZ2.btotal); |
|
560 #endif |
|
561 /* The FUZ2 checksum is not valid if it is on a few words and |
|
562 * less than 10% of a big, binary file */ |
|
563 if (lp->wtotal < 100 |
|
564 && (lp->wsummed+FZ2.xsummed)*10 < FZ2.btotal) { |
|
565 cks->sums[DCC_CK_FUZ2].type = DCC_CK_INVALID; |
|
566 return; |
|
567 } |
|
568 /* We cannot compute a checksum on a nearly empty message */ |
|
569 if (lp->wtotal < 8) { |
|
570 if (lp->wtotal + FZ2.urls*4 >= 8) { |
|
571 /* use URLs if we lack words */ |
|
572 FUZ2(lp, FZ2.url_buf, FZ2.url_cp - FZ2.url_buf); |
|
573 } else { |
|
574 /* Compute a checksum for local blacklising on messages |
|
575 * that look empty to the FUZ2 checksum but are not and |
|
576 * are not too binary. The resulting checksum is zero. |
|
577 * Do not report it to the DCC server. */ |
|
578 if ((lp->wsummed+FZ2.xsummed) >= 120) { |
|
579 memset(cks->sums[DCC_CK_FUZ2].sum, 0, |
|
580 sizeof(cks->sums[DCC_CK_FUZ2].sum)); |
|
581 return; |
|
582 } |
|
583 cks->sums[DCC_CK_FUZ2].type = DCC_CK_INVALID; |
|
584 return; |
|
585 } |
|
586 } |
|
587 |
|
588 MD5Final(cks->sums[DCC_CK_FUZ2].sum, &lp->md5); |
|
589 cks->sums[DCC_CK_FUZ2].rpt2srvr = 1; |
|
590 } |