comparison dcclib/ckbody.c @ 0:c7f6b056b673

First import of vendor version
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 13:49:58 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7f6b056b673
1 /* Distributed Checksum Clearinghouse
2 *
3 * compute simple body checksum
4 *
5 * Copyright (c) 2008 by Rhyolite Software, LLC
6 *
7 * This agreement is not applicable to any entity which sells anti-spam
8 * solutions to others or provides an anti-spam solution as part of a
9 * security solution sold to other entities, or to a private network
10 * which employs the DCC or uses data provided by operation of the DCC
11 * but does not provide corresponding data to other users.
12 *
13 * Permission to use, copy, modify, and distribute this software without
14 * changes for any purpose with or without fee is hereby granted, provided
15 * that the above copyright notice and this permission notice appear in all
16 * copies and any distributed versions or copies are either unchanged
17 * or not called anything similar to "DCC" or "Distributed Checksum
18 * Clearinghouse".
19 *
20 * Parties not eligible to receive a license under this agreement can
21 * obtain a commercial license to use DCC by contacting Rhyolite Software
22 * at sales@rhyolite.com.
23 *
24 * A commercial license would be for Distributed Checksum and Reputation
25 * Clearinghouse software. That software includes additional features. This
26 * free license for Distributed ChecksumClearinghouse Software does not in any
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
28 * software
29 *
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
37 * SOFTWARE.
38 *
39 * Rhyolite Software DCC 1.3.103-1.56 $Revision$
40 */
41
42 #include "dcc_ck.h"
43
44
45 void
46 dcc_ck_body0(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
47 {
48 # define BUF_LEN 1024
49 char buf[BUF_LEN+5];
50 u_char flen;
51 int blen;
52 char c;
53
54 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY)
55 return;
56
57 flen = cks->ctx_body.flen;
58 blen = 0;
59 for (;;) {
60 if (bp_len == 0) {
61 if (blen != 0) {
62 cks->ctx_body.total += blen;
63 MD5Update(&cks->ctx_body.md5, buf, blen);
64 }
65 cks->ctx_body.flen = flen;
66 return;
67 }
68 --bp_len;
69 c = *bp++;
70
71 /* Ignore the '>' in the sequence "\n>From" because
72 * it is sometimes added for old UNIX MUAs.
73 * As a side effect, ignore '\n' */
74 if (flen != 0) {
75 if (c == "\n>From"[flen]) {
76 if (++flen >= 6) {
77 memcpy(&buf[blen], "From", 4);
78 if ((blen += 4) >= BUF_LEN) {
79 cks->ctx_body.total += blen;
80 MD5Update(&cks->ctx_body.md5, buf,
81 blen);
82 blen = 0;
83 }
84 flen = 0;
85 }
86 continue;
87 }
88 if (--flen != 0) {
89 memcpy(&buf[blen], ">From", flen);
90 if ((blen += flen) >= BUF_LEN) {
91 cks->ctx_body.total += blen;
92 MD5Update(&cks->ctx_body.md5, buf,
93 blen);
94 blen = 0;
95 }
96 flen = 0;
97 }
98 }
99 if (c == '\n') {
100 flen = 1;
101 continue;
102 }
103
104 /* Ignore whitespace to avoid being confused by
105 * varying line endings added and removed by
106 * various MUAs and MTAs.
107 * As a side effect, ignore entirely blank messages. */
108 if (c == ' ' || c == '\t' || c == '\r')
109 continue;
110
111 /* Ignore '=' to minimize but not entirely avoid being
112 * confused by some some sequences that look like
113 * quoted-printable triples but that are not.
114 */
115 if (c == '=')
116 continue;
117
118 buf[blen] = c;
119 if (++blen >= BUF_LEN) {
120 cks->ctx_body.total += blen;
121 MD5Update(&cks->ctx_body.md5, buf, blen);
122 blen = 0;
123 }
124 }
125 }
126
127
128
129 static void
130 dcc_ck_body0_fin(DCC_GOT_CKS *cks)
131 {
132 /* always generate the MD5 checksum so that grey listing has it */
133 MD5Final(cks->sums[DCC_CK_BODY].sum, &cks->ctx_body.md5);
134
135 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY)
136 return;
137
138 if (cks->ctx_body.total < 30) {
139 cks->sums[DCC_CK_BODY].type = DCC_CK_INVALID;
140 return;
141 }
142
143 cks->sums[DCC_CK_BODY].rpt2srvr = 1;
144 }
145
146
147
148 static void
149 decoders_init(DCC_GOT_CKS *cks)
150 {
151 cks->mime_bnd_matches = 0;
152
153 cks->flags |= DCC_CKS_MIME_BOL;
154 cks->mime_ct = DCC_CK_CT_TEXT;
155 cks->mime_cset = dcc_cset_1;
156 cks->mime_ce = DCC_CK_CE_ASCII;
157 cks->qp.state = DCC_CK_QP_IDLE;
158 cks->b64.quantum_cnt = 0;
159 }
160
161
162
163 /* start all of the checksums */
164 void
165 dcc_cks_init(DCC_GOT_CKS *cks)
166 {
167 DCC_GOT_SUM *g;
168
169 for (g = cks->sums; g <= LAST(cks->sums); ++g) {
170 CLR_GOT_SUM(g);
171 }
172
173 cks->flags = 0;
174 cks->mime_nest = 0;
175 cks->mhdr_st = CK_MHDR_ST_IDLE;
176 cks->mp_st = CK_MP_ST_TEXT;
177 decoders_init(cks);
178
179 cks->sums[DCC_CK_BODY].type = DCC_CK_BODY;
180 cks->ctx_body.total = 0;
181 cks->ctx_body.flen = 1;
182 MD5Init(&cks->ctx_body.md5);
183
184 dcc_ck_fuz1_init(cks);
185 dcc_ck_fuz2_init(cks);
186 }
187
188
189
190 /* decode quoted-printable and base64 and then compute the body checksums */
191 static void
192 decode_sum(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
193 {
194 char tbuf[1024];
195 const char *tbufp;
196 int len;
197
198 /* Decode quoted-printable and base64 and make fuzzy sumes
199 * only while in the body of a MIME entity.
200 * Changing from the text, image, html, etc. requires a '\n'
201 * to flush the URL and other decoders in the checksummers.
202 * None of the checksums count whitespace. */
203 if (cks->mp_st != CK_MP_ST_TEXT) {
204 if (bp_len == 0)
205 return;
206 #ifdef DCC_DEBUG_CKSUM
207 if (dcc_clnt_debug == 4)
208 write(1, bp, bp_len);
209 #endif
210 dcc_ck_body0(cks, bp, bp_len);
211 dcc_ck_fuz1(cks, "\n", 1);
212 dcc_ck_fuz2(cks, "\n", 1);
213 return;
214 }
215
216 while (bp_len != 0) {
217 switch (cks->mime_ce) {
218 case DCC_CK_CE_ASCII:
219 default:
220 len = bp_len;
221 tbufp = bp;
222 bp_len = 0;
223 break;
224 case DCC_CK_CE_QP:
225 tbufp = tbuf;
226 len = dcc_ck_qp_decode(cks, &bp, &bp_len,
227 tbuf, sizeof(tbuf));
228 break;
229 case DCC_CK_CE_B64:
230 tbufp = tbuf;
231 len = dcc_ck_b64_decode(cks, &bp, &bp_len,
232 tbuf, sizeof(tbuf));
233 break;
234 }
235
236 if (len != 0) {
237 #ifdef DCC_DEBUG_CKSUM
238 if (dcc_clnt_debug == 4)
239 write(1, tbufp, len);
240 #endif
241 dcc_ck_body0(cks, tbufp, len);
242 dcc_ck_fuz1(cks, tbufp, len);
243 if (cks->mime_ct != DCC_CK_CT_BINARY)
244 dcc_ck_fuz2(cks, tbufp, len);
245 }
246 }
247 }
248
249
250
251 /* compute all of the body checksums on a chunk of raw text */
252 void
253 dcc_ck_body(DCC_GOT_CKS *cks, const void *bp, u_int bp_len)
254 {
255 DCC_CK_BND *bndp;
256 const char *sum; /* 1st input byte not swallowed */
257 const char *cmp; /* 1st not parsed for MIME */
258 const char *cp;
259 char c;
260 int len, matched_len, i, j;
261
262 sum = bp;
263 cmp = sum;
264 while (bp_len != 0) {
265 /* if we have no multipart hassles
266 * then pass buffer to qp/base64 decoder and quit */
267 if (cks->mime_nest == 0) {
268 decode_sum(cks, sum, bp_len);
269 return;
270 }
271
272 /* look for start of next line to start matching boundaries */
273 if (cks->mime_bnd_matches == 0) {
274 cp = memchr(cmp, '\n', bp_len);
275 if (!cp) {
276 cp = cmp+bp_len;
277 } else {
278 ++cp;
279 }
280
281 /* look for a MIME entity header in the text before
282 * the next line and possible start of a boundary */
283 i = cp - cmp;
284 if (cks->mp_st == CK_MP_ST_HDRS) {
285 if (parse_mime_hdr(cks, cmp, i, 0)) {
286 /* blank header line ends the headers */
287 j = cp-sum;
288 if (j) {
289 decode_sum(cks, sum, j);
290 sum = cp;
291 }
292 cks->mp_st = CK_MP_ST_TEXT;
293 }
294 }
295 /* We found the end of a line. Reset positions to
296 * start looking for a MIME boundary after it */
297 if (*(cp-1) == '\n') {
298 cks->flags |= DCC_CKS_MIME_BOL;
299 cks->mime_bnd_matches = cks->mime_nest;
300 for (bndp = cks->mime_bnd;
301 bndp <= LAST(cks->mime_bnd);
302 ++bndp) {
303 bndp->cmp_len = 0;
304 }
305 }
306 cmp = cp;
307 if ((bp_len -= i) == 0)
308 break;
309 }
310
311 /* look for (rest of) one of the active MIME boundaries */
312 matched_len = 0;
313 for (bndp = cks->mime_bnd;
314 bndp < &cks->mime_bnd[cks->mime_nest];
315 ++bndp) {
316
317 if (bndp->cmp_len == DCC_CK_BND_MISS)
318 continue; /* already mismatched boundary */
319
320 j = bndp->bnd_len - bndp->cmp_len;
321 len = bp_len;
322 if (j > len)
323 j = len;
324 cp = cmp;
325 if (j > 0) {
326 if (memcmp(cp, &bndp->bnd[bndp->cmp_len], j)) {
327 bndp->cmp_len = DCC_CK_BND_MISS;
328 --cks->mime_bnd_matches;
329 continue;
330 }
331 /* this boundary matches so far */
332 bndp->cmp_len += j;
333 cp += j;
334 if ((len -= j) <= 0) {
335 matched_len = bp_len;
336 continue;
337 }
338 /* since we did not exhaust len, we know
339 * we matched the entire boundary */
340 j = 0;
341 }
342
343 /* look for 1st '-' of trailing "--" */
344 if (j == 0
345 && *cp == '-') {
346 ++bndp->cmp_len;
347 if (--len <= 0) {
348 matched_len = bp_len;
349 continue;
350 }
351 ++cp;
352 j = -1;
353 }
354 /* look for 2nd '-' of trailing "--" */
355 if (j == -1) {
356 if (*cp == '-') {
357 ++bndp->cmp_len;
358 if (--len <= 0) {
359 matched_len = bp_len;
360 continue;
361 }
362 ++cp;
363 } else {
364 bndp->cmp_len = DCC_CK_BND_MISS;
365 --cks->mime_bnd_matches;
366 continue;
367 }
368 }
369 /* check for trailing whitespace & '\n' */
370 if ((c = *cp) == ' ' || c == '\t' || c == '\r') {
371 do {
372 ++cp;
373 } while (--len > 0
374 && ((c = *cp) == ' ' || c == '\t'
375 || c == '\r'));
376 if (len <= 0) {
377 matched_len = bp_len;
378 continue;
379 }
380 }
381 if (*cp != '\n') {
382 /* mismatch after the end of the boundary */
383 bndp->cmp_len = DCC_CK_BND_MISS;
384 --cks->mime_bnd_matches;
385 continue;
386 }
387
388 /* We have found a MIME boundary.
389 * Flush b64 & qp decoders and fuzzy checksummers */
390 j = cmp-sum;
391 if (j)
392 decode_sum(cks, sum, j);
393
394 /* pass the boundary in the buffer */
395 matched_len = ++cp - cmp;
396 cmp = sum = cp;
397
398 /* Body checksum the boundary */
399 cks->mp_st = CK_MP_ST_BND;
400 decode_sum(cks, bndp->bnd, bndp->bnd_len);
401 if (bndp->cmp_len != bndp->bnd_len) {
402 /* checksum trailing "--" of final boundary */
403 decode_sum(cks, "--", 2);
404 /* end the current & inner entities */
405 cks->mp_st = CK_MP_ST_EPILOGUE;
406 } else {
407 /* intermediate boundaries end inner entities */
408 cks->mp_st = CK_MP_ST_HDRS;
409 ++bndp;
410 }
411 cks->mime_nest = bndp - cks->mime_bnd;
412 decoders_init(cks);
413 break;
414 }
415 bp_len -= matched_len;
416 }
417
418 j = cmp-sum;
419 if (j)
420 decode_sum(cks, sum, j);
421 }
422
423
424
425 /* finish all of the body checksums */
426 void
427 dcc_cks_fin(DCC_GOT_CKS *cks)
428 {
429 dcc_ck_fuz1(cks, "\n", 1); /* flush URL decoders & line buffers */
430 dcc_ck_fuz2(cks, "\n", 1);
431
432 dcc_ck_body0_fin(cks);
433 dcc_ck_fuz1_fin(cks);
434 dcc_ck_fuz2_fin(cks);
435 }