Mercurial > notdcc
comparison dcclib/ckbody.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7f6b056b673 |
---|---|
1 /* Distributed Checksum Clearinghouse | |
2 * | |
3 * compute simple body checksum | |
4 * | |
5 * Copyright (c) 2008 by Rhyolite Software, LLC | |
6 * | |
7 * This agreement is not applicable to any entity which sells anti-spam | |
8 * solutions to others or provides an anti-spam solution as part of a | |
9 * security solution sold to other entities, or to a private network | |
10 * which employs the DCC or uses data provided by operation of the DCC | |
11 * but does not provide corresponding data to other users. | |
12 * | |
13 * Permission to use, copy, modify, and distribute this software without | |
14 * changes for any purpose with or without fee is hereby granted, provided | |
15 * that the above copyright notice and this permission notice appear in all | |
16 * copies and any distributed versions or copies are either unchanged | |
17 * or not called anything similar to "DCC" or "Distributed Checksum | |
18 * Clearinghouse". | |
19 * | |
20 * Parties not eligible to receive a license under this agreement can | |
21 * obtain a commercial license to use DCC by contacting Rhyolite Software | |
22 * at sales@rhyolite.com. | |
23 * | |
24 * A commercial license would be for Distributed Checksum and Reputation | |
25 * Clearinghouse software. That software includes additional features. This | |
26 * free license for Distributed ChecksumClearinghouse Software does not in any | |
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse | |
28 * software | |
29 * | |
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL | |
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES | |
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC | |
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES | |
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
37 * SOFTWARE. | |
38 * | |
39 * Rhyolite Software DCC 1.3.103-1.56 $Revision$ | |
40 */ | |
41 | |
42 #include "dcc_ck.h" | |
43 | |
44 | |
45 void | |
46 dcc_ck_body0(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) | |
47 { | |
48 # define BUF_LEN 1024 | |
49 char buf[BUF_LEN+5]; | |
50 u_char flen; | |
51 int blen; | |
52 char c; | |
53 | |
54 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) | |
55 return; | |
56 | |
57 flen = cks->ctx_body.flen; | |
58 blen = 0; | |
59 for (;;) { | |
60 if (bp_len == 0) { | |
61 if (blen != 0) { | |
62 cks->ctx_body.total += blen; | |
63 MD5Update(&cks->ctx_body.md5, buf, blen); | |
64 } | |
65 cks->ctx_body.flen = flen; | |
66 return; | |
67 } | |
68 --bp_len; | |
69 c = *bp++; | |
70 | |
71 /* Ignore the '>' in the sequence "\n>From" because | |
72 * it is sometimes added for old UNIX MUAs. | |
73 * As a side effect, ignore '\n' */ | |
74 if (flen != 0) { | |
75 if (c == "\n>From"[flen]) { | |
76 if (++flen >= 6) { | |
77 memcpy(&buf[blen], "From", 4); | |
78 if ((blen += 4) >= BUF_LEN) { | |
79 cks->ctx_body.total += blen; | |
80 MD5Update(&cks->ctx_body.md5, buf, | |
81 blen); | |
82 blen = 0; | |
83 } | |
84 flen = 0; | |
85 } | |
86 continue; | |
87 } | |
88 if (--flen != 0) { | |
89 memcpy(&buf[blen], ">From", flen); | |
90 if ((blen += flen) >= BUF_LEN) { | |
91 cks->ctx_body.total += blen; | |
92 MD5Update(&cks->ctx_body.md5, buf, | |
93 blen); | |
94 blen = 0; | |
95 } | |
96 flen = 0; | |
97 } | |
98 } | |
99 if (c == '\n') { | |
100 flen = 1; | |
101 continue; | |
102 } | |
103 | |
104 /* Ignore whitespace to avoid being confused by | |
105 * varying line endings added and removed by | |
106 * various MUAs and MTAs. | |
107 * As a side effect, ignore entirely blank messages. */ | |
108 if (c == ' ' || c == '\t' || c == '\r') | |
109 continue; | |
110 | |
111 /* Ignore '=' to minimize but not entirely avoid being | |
112 * confused by some some sequences that look like | |
113 * quoted-printable triples but that are not. | |
114 */ | |
115 if (c == '=') | |
116 continue; | |
117 | |
118 buf[blen] = c; | |
119 if (++blen >= BUF_LEN) { | |
120 cks->ctx_body.total += blen; | |
121 MD5Update(&cks->ctx_body.md5, buf, blen); | |
122 blen = 0; | |
123 } | |
124 } | |
125 } | |
126 | |
127 | |
128 | |
129 static void | |
130 dcc_ck_body0_fin(DCC_GOT_CKS *cks) | |
131 { | |
132 /* always generate the MD5 checksum so that grey listing has it */ | |
133 MD5Final(cks->sums[DCC_CK_BODY].sum, &cks->ctx_body.md5); | |
134 | |
135 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) | |
136 return; | |
137 | |
138 if (cks->ctx_body.total < 30) { | |
139 cks->sums[DCC_CK_BODY].type = DCC_CK_INVALID; | |
140 return; | |
141 } | |
142 | |
143 cks->sums[DCC_CK_BODY].rpt2srvr = 1; | |
144 } | |
145 | |
146 | |
147 | |
148 static void | |
149 decoders_init(DCC_GOT_CKS *cks) | |
150 { | |
151 cks->mime_bnd_matches = 0; | |
152 | |
153 cks->flags |= DCC_CKS_MIME_BOL; | |
154 cks->mime_ct = DCC_CK_CT_TEXT; | |
155 cks->mime_cset = dcc_cset_1; | |
156 cks->mime_ce = DCC_CK_CE_ASCII; | |
157 cks->qp.state = DCC_CK_QP_IDLE; | |
158 cks->b64.quantum_cnt = 0; | |
159 } | |
160 | |
161 | |
162 | |
163 /* start all of the checksums */ | |
164 void | |
165 dcc_cks_init(DCC_GOT_CKS *cks) | |
166 { | |
167 DCC_GOT_SUM *g; | |
168 | |
169 for (g = cks->sums; g <= LAST(cks->sums); ++g) { | |
170 CLR_GOT_SUM(g); | |
171 } | |
172 | |
173 cks->flags = 0; | |
174 cks->mime_nest = 0; | |
175 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
176 cks->mp_st = CK_MP_ST_TEXT; | |
177 decoders_init(cks); | |
178 | |
179 cks->sums[DCC_CK_BODY].type = DCC_CK_BODY; | |
180 cks->ctx_body.total = 0; | |
181 cks->ctx_body.flen = 1; | |
182 MD5Init(&cks->ctx_body.md5); | |
183 | |
184 dcc_ck_fuz1_init(cks); | |
185 dcc_ck_fuz2_init(cks); | |
186 } | |
187 | |
188 | |
189 | |
190 /* decode quoted-printable and base64 and then compute the body checksums */ | |
191 static void | |
192 decode_sum(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) | |
193 { | |
194 char tbuf[1024]; | |
195 const char *tbufp; | |
196 int len; | |
197 | |
198 /* Decode quoted-printable and base64 and make fuzzy sumes | |
199 * only while in the body of a MIME entity. | |
200 * Changing from the text, image, html, etc. requires a '\n' | |
201 * to flush the URL and other decoders in the checksummers. | |
202 * None of the checksums count whitespace. */ | |
203 if (cks->mp_st != CK_MP_ST_TEXT) { | |
204 if (bp_len == 0) | |
205 return; | |
206 #ifdef DCC_DEBUG_CKSUM | |
207 if (dcc_clnt_debug == 4) | |
208 write(1, bp, bp_len); | |
209 #endif | |
210 dcc_ck_body0(cks, bp, bp_len); | |
211 dcc_ck_fuz1(cks, "\n", 1); | |
212 dcc_ck_fuz2(cks, "\n", 1); | |
213 return; | |
214 } | |
215 | |
216 while (bp_len != 0) { | |
217 switch (cks->mime_ce) { | |
218 case DCC_CK_CE_ASCII: | |
219 default: | |
220 len = bp_len; | |
221 tbufp = bp; | |
222 bp_len = 0; | |
223 break; | |
224 case DCC_CK_CE_QP: | |
225 tbufp = tbuf; | |
226 len = dcc_ck_qp_decode(cks, &bp, &bp_len, | |
227 tbuf, sizeof(tbuf)); | |
228 break; | |
229 case DCC_CK_CE_B64: | |
230 tbufp = tbuf; | |
231 len = dcc_ck_b64_decode(cks, &bp, &bp_len, | |
232 tbuf, sizeof(tbuf)); | |
233 break; | |
234 } | |
235 | |
236 if (len != 0) { | |
237 #ifdef DCC_DEBUG_CKSUM | |
238 if (dcc_clnt_debug == 4) | |
239 write(1, tbufp, len); | |
240 #endif | |
241 dcc_ck_body0(cks, tbufp, len); | |
242 dcc_ck_fuz1(cks, tbufp, len); | |
243 if (cks->mime_ct != DCC_CK_CT_BINARY) | |
244 dcc_ck_fuz2(cks, tbufp, len); | |
245 } | |
246 } | |
247 } | |
248 | |
249 | |
250 | |
251 /* compute all of the body checksums on a chunk of raw text */ | |
252 void | |
253 dcc_ck_body(DCC_GOT_CKS *cks, const void *bp, u_int bp_len) | |
254 { | |
255 DCC_CK_BND *bndp; | |
256 const char *sum; /* 1st input byte not swallowed */ | |
257 const char *cmp; /* 1st not parsed for MIME */ | |
258 const char *cp; | |
259 char c; | |
260 int len, matched_len, i, j; | |
261 | |
262 sum = bp; | |
263 cmp = sum; | |
264 while (bp_len != 0) { | |
265 /* if we have no multipart hassles | |
266 * then pass buffer to qp/base64 decoder and quit */ | |
267 if (cks->mime_nest == 0) { | |
268 decode_sum(cks, sum, bp_len); | |
269 return; | |
270 } | |
271 | |
272 /* look for start of next line to start matching boundaries */ | |
273 if (cks->mime_bnd_matches == 0) { | |
274 cp = memchr(cmp, '\n', bp_len); | |
275 if (!cp) { | |
276 cp = cmp+bp_len; | |
277 } else { | |
278 ++cp; | |
279 } | |
280 | |
281 /* look for a MIME entity header in the text before | |
282 * the next line and possible start of a boundary */ | |
283 i = cp - cmp; | |
284 if (cks->mp_st == CK_MP_ST_HDRS) { | |
285 if (parse_mime_hdr(cks, cmp, i, 0)) { | |
286 /* blank header line ends the headers */ | |
287 j = cp-sum; | |
288 if (j) { | |
289 decode_sum(cks, sum, j); | |
290 sum = cp; | |
291 } | |
292 cks->mp_st = CK_MP_ST_TEXT; | |
293 } | |
294 } | |
295 /* We found the end of a line. Reset positions to | |
296 * start looking for a MIME boundary after it */ | |
297 if (*(cp-1) == '\n') { | |
298 cks->flags |= DCC_CKS_MIME_BOL; | |
299 cks->mime_bnd_matches = cks->mime_nest; | |
300 for (bndp = cks->mime_bnd; | |
301 bndp <= LAST(cks->mime_bnd); | |
302 ++bndp) { | |
303 bndp->cmp_len = 0; | |
304 } | |
305 } | |
306 cmp = cp; | |
307 if ((bp_len -= i) == 0) | |
308 break; | |
309 } | |
310 | |
311 /* look for (rest of) one of the active MIME boundaries */ | |
312 matched_len = 0; | |
313 for (bndp = cks->mime_bnd; | |
314 bndp < &cks->mime_bnd[cks->mime_nest]; | |
315 ++bndp) { | |
316 | |
317 if (bndp->cmp_len == DCC_CK_BND_MISS) | |
318 continue; /* already mismatched boundary */ | |
319 | |
320 j = bndp->bnd_len - bndp->cmp_len; | |
321 len = bp_len; | |
322 if (j > len) | |
323 j = len; | |
324 cp = cmp; | |
325 if (j > 0) { | |
326 if (memcmp(cp, &bndp->bnd[bndp->cmp_len], j)) { | |
327 bndp->cmp_len = DCC_CK_BND_MISS; | |
328 --cks->mime_bnd_matches; | |
329 continue; | |
330 } | |
331 /* this boundary matches so far */ | |
332 bndp->cmp_len += j; | |
333 cp += j; | |
334 if ((len -= j) <= 0) { | |
335 matched_len = bp_len; | |
336 continue; | |
337 } | |
338 /* since we did not exhaust len, we know | |
339 * we matched the entire boundary */ | |
340 j = 0; | |
341 } | |
342 | |
343 /* look for 1st '-' of trailing "--" */ | |
344 if (j == 0 | |
345 && *cp == '-') { | |
346 ++bndp->cmp_len; | |
347 if (--len <= 0) { | |
348 matched_len = bp_len; | |
349 continue; | |
350 } | |
351 ++cp; | |
352 j = -1; | |
353 } | |
354 /* look for 2nd '-' of trailing "--" */ | |
355 if (j == -1) { | |
356 if (*cp == '-') { | |
357 ++bndp->cmp_len; | |
358 if (--len <= 0) { | |
359 matched_len = bp_len; | |
360 continue; | |
361 } | |
362 ++cp; | |
363 } else { | |
364 bndp->cmp_len = DCC_CK_BND_MISS; | |
365 --cks->mime_bnd_matches; | |
366 continue; | |
367 } | |
368 } | |
369 /* check for trailing whitespace & '\n' */ | |
370 if ((c = *cp) == ' ' || c == '\t' || c == '\r') { | |
371 do { | |
372 ++cp; | |
373 } while (--len > 0 | |
374 && ((c = *cp) == ' ' || c == '\t' | |
375 || c == '\r')); | |
376 if (len <= 0) { | |
377 matched_len = bp_len; | |
378 continue; | |
379 } | |
380 } | |
381 if (*cp != '\n') { | |
382 /* mismatch after the end of the boundary */ | |
383 bndp->cmp_len = DCC_CK_BND_MISS; | |
384 --cks->mime_bnd_matches; | |
385 continue; | |
386 } | |
387 | |
388 /* We have found a MIME boundary. | |
389 * Flush b64 & qp decoders and fuzzy checksummers */ | |
390 j = cmp-sum; | |
391 if (j) | |
392 decode_sum(cks, sum, j); | |
393 | |
394 /* pass the boundary in the buffer */ | |
395 matched_len = ++cp - cmp; | |
396 cmp = sum = cp; | |
397 | |
398 /* Body checksum the boundary */ | |
399 cks->mp_st = CK_MP_ST_BND; | |
400 decode_sum(cks, bndp->bnd, bndp->bnd_len); | |
401 if (bndp->cmp_len != bndp->bnd_len) { | |
402 /* checksum trailing "--" of final boundary */ | |
403 decode_sum(cks, "--", 2); | |
404 /* end the current & inner entities */ | |
405 cks->mp_st = CK_MP_ST_EPILOGUE; | |
406 } else { | |
407 /* intermediate boundaries end inner entities */ | |
408 cks->mp_st = CK_MP_ST_HDRS; | |
409 ++bndp; | |
410 } | |
411 cks->mime_nest = bndp - cks->mime_bnd; | |
412 decoders_init(cks); | |
413 break; | |
414 } | |
415 bp_len -= matched_len; | |
416 } | |
417 | |
418 j = cmp-sum; | |
419 if (j) | |
420 decode_sum(cks, sum, j); | |
421 } | |
422 | |
423 | |
424 | |
425 /* finish all of the body checksums */ | |
426 void | |
427 dcc_cks_fin(DCC_GOT_CKS *cks) | |
428 { | |
429 dcc_ck_fuz1(cks, "\n", 1); /* flush URL decoders & line buffers */ | |
430 dcc_ck_fuz2(cks, "\n", 1); | |
431 | |
432 dcc_ck_body0_fin(cks); | |
433 dcc_ck_fuz1_fin(cks); | |
434 dcc_ck_fuz2_fin(cks); | |
435 } |