0
|
1 /* Distributed Checksum Clearinghouse |
|
2 * |
|
3 * compute simple body checksum |
|
4 * |
|
5 * Copyright (c) 2008 by Rhyolite Software, LLC |
|
6 * |
|
7 * This agreement is not applicable to any entity which sells anti-spam |
|
8 * solutions to others or provides an anti-spam solution as part of a |
|
9 * security solution sold to other entities, or to a private network |
|
10 * which employs the DCC or uses data provided by operation of the DCC |
|
11 * but does not provide corresponding data to other users. |
|
12 * |
|
13 * Permission to use, copy, modify, and distribute this software without |
|
14 * changes for any purpose with or without fee is hereby granted, provided |
|
15 * that the above copyright notice and this permission notice appear in all |
|
16 * copies and any distributed versions or copies are either unchanged |
|
17 * or not called anything similar to "DCC" or "Distributed Checksum |
|
18 * Clearinghouse". |
|
19 * |
|
20 * Parties not eligible to receive a license under this agreement can |
|
21 * obtain a commercial license to use DCC by contacting Rhyolite Software |
|
22 * at sales@rhyolite.com. |
|
23 * |
|
24 * A commercial license would be for Distributed Checksum and Reputation |
|
25 * Clearinghouse software. That software includes additional features. This |
|
26 * free license for Distributed ChecksumClearinghouse Software does not in any |
|
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse |
|
28 * software |
|
29 * |
|
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL |
|
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
|
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC |
|
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES |
|
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
37 * SOFTWARE. |
|
38 * |
|
39 * Rhyolite Software DCC 1.3.103-1.56 $Revision$ |
|
40 */ |
|
41 |
|
42 #include "dcc_ck.h" |
|
43 |
|
44 |
|
45 void |
|
46 dcc_ck_body0(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) |
|
47 { |
|
48 # define BUF_LEN 1024 |
|
49 char buf[BUF_LEN+5]; |
|
50 u_char flen; |
|
51 int blen; |
|
52 char c; |
|
53 |
|
54 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) |
|
55 return; |
|
56 |
|
57 flen = cks->ctx_body.flen; |
|
58 blen = 0; |
|
59 for (;;) { |
|
60 if (bp_len == 0) { |
|
61 if (blen != 0) { |
|
62 cks->ctx_body.total += blen; |
|
63 MD5Update(&cks->ctx_body.md5, buf, blen); |
|
64 } |
|
65 cks->ctx_body.flen = flen; |
|
66 return; |
|
67 } |
|
68 --bp_len; |
|
69 c = *bp++; |
|
70 |
|
71 /* Ignore the '>' in the sequence "\n>From" because |
|
72 * it is sometimes added for old UNIX MUAs. |
|
73 * As a side effect, ignore '\n' */ |
|
74 if (flen != 0) { |
|
75 if (c == "\n>From"[flen]) { |
|
76 if (++flen >= 6) { |
|
77 memcpy(&buf[blen], "From", 4); |
|
78 if ((blen += 4) >= BUF_LEN) { |
|
79 cks->ctx_body.total += blen; |
|
80 MD5Update(&cks->ctx_body.md5, buf, |
|
81 blen); |
|
82 blen = 0; |
|
83 } |
|
84 flen = 0; |
|
85 } |
|
86 continue; |
|
87 } |
|
88 if (--flen != 0) { |
|
89 memcpy(&buf[blen], ">From", flen); |
|
90 if ((blen += flen) >= BUF_LEN) { |
|
91 cks->ctx_body.total += blen; |
|
92 MD5Update(&cks->ctx_body.md5, buf, |
|
93 blen); |
|
94 blen = 0; |
|
95 } |
|
96 flen = 0; |
|
97 } |
|
98 } |
|
99 if (c == '\n') { |
|
100 flen = 1; |
|
101 continue; |
|
102 } |
|
103 |
|
104 /* Ignore whitespace to avoid being confused by |
|
105 * varying line endings added and removed by |
|
106 * various MUAs and MTAs. |
|
107 * As a side effect, ignore entirely blank messages. */ |
|
108 if (c == ' ' || c == '\t' || c == '\r') |
|
109 continue; |
|
110 |
|
111 /* Ignore '=' to minimize but not entirely avoid being |
|
112 * confused by some some sequences that look like |
|
113 * quoted-printable triples but that are not. |
|
114 */ |
|
115 if (c == '=') |
|
116 continue; |
|
117 |
|
118 buf[blen] = c; |
|
119 if (++blen >= BUF_LEN) { |
|
120 cks->ctx_body.total += blen; |
|
121 MD5Update(&cks->ctx_body.md5, buf, blen); |
|
122 blen = 0; |
|
123 } |
|
124 } |
|
125 } |
|
126 |
|
127 |
|
128 |
|
129 static void |
|
130 dcc_ck_body0_fin(DCC_GOT_CKS *cks) |
|
131 { |
|
132 /* always generate the MD5 checksum so that grey listing has it */ |
|
133 MD5Final(cks->sums[DCC_CK_BODY].sum, &cks->ctx_body.md5); |
|
134 |
|
135 if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) |
|
136 return; |
|
137 |
|
138 if (cks->ctx_body.total < 30) { |
|
139 cks->sums[DCC_CK_BODY].type = DCC_CK_INVALID; |
|
140 return; |
|
141 } |
|
142 |
|
143 cks->sums[DCC_CK_BODY].rpt2srvr = 1; |
|
144 } |
|
145 |
|
146 |
|
147 |
|
148 static void |
|
149 decoders_init(DCC_GOT_CKS *cks) |
|
150 { |
|
151 cks->mime_bnd_matches = 0; |
|
152 |
|
153 cks->flags |= DCC_CKS_MIME_BOL; |
|
154 cks->mime_ct = DCC_CK_CT_TEXT; |
|
155 cks->mime_cset = dcc_cset_1; |
|
156 cks->mime_ce = DCC_CK_CE_ASCII; |
|
157 cks->qp.state = DCC_CK_QP_IDLE; |
|
158 cks->b64.quantum_cnt = 0; |
|
159 } |
|
160 |
|
161 |
|
162 |
|
163 /* start all of the checksums */ |
|
164 void |
|
165 dcc_cks_init(DCC_GOT_CKS *cks) |
|
166 { |
|
167 DCC_GOT_SUM *g; |
|
168 |
|
169 for (g = cks->sums; g <= LAST(cks->sums); ++g) { |
|
170 CLR_GOT_SUM(g); |
|
171 } |
|
172 |
|
173 cks->flags = 0; |
|
174 cks->mime_nest = 0; |
|
175 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
176 cks->mp_st = CK_MP_ST_TEXT; |
|
177 decoders_init(cks); |
|
178 |
|
179 cks->sums[DCC_CK_BODY].type = DCC_CK_BODY; |
|
180 cks->ctx_body.total = 0; |
|
181 cks->ctx_body.flen = 1; |
|
182 MD5Init(&cks->ctx_body.md5); |
|
183 |
|
184 dcc_ck_fuz1_init(cks); |
|
185 dcc_ck_fuz2_init(cks); |
|
186 } |
|
187 |
|
188 |
|
189 |
|
190 /* decode quoted-printable and base64 and then compute the body checksums */ |
|
191 static void |
|
192 decode_sum(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) |
|
193 { |
|
194 char tbuf[1024]; |
|
195 const char *tbufp; |
|
196 int len; |
|
197 |
|
198 /* Decode quoted-printable and base64 and make fuzzy sumes |
|
199 * only while in the body of a MIME entity. |
|
200 * Changing from the text, image, html, etc. requires a '\n' |
|
201 * to flush the URL and other decoders in the checksummers. |
|
202 * None of the checksums count whitespace. */ |
|
203 if (cks->mp_st != CK_MP_ST_TEXT) { |
|
204 if (bp_len == 0) |
|
205 return; |
|
206 #ifdef DCC_DEBUG_CKSUM |
|
207 if (dcc_clnt_debug == 4) |
|
208 write(1, bp, bp_len); |
|
209 #endif |
|
210 dcc_ck_body0(cks, bp, bp_len); |
|
211 dcc_ck_fuz1(cks, "\n", 1); |
|
212 dcc_ck_fuz2(cks, "\n", 1); |
|
213 return; |
|
214 } |
|
215 |
|
216 while (bp_len != 0) { |
|
217 switch (cks->mime_ce) { |
|
218 case DCC_CK_CE_ASCII: |
|
219 default: |
|
220 len = bp_len; |
|
221 tbufp = bp; |
|
222 bp_len = 0; |
|
223 break; |
|
224 case DCC_CK_CE_QP: |
|
225 tbufp = tbuf; |
|
226 len = dcc_ck_qp_decode(cks, &bp, &bp_len, |
|
227 tbuf, sizeof(tbuf)); |
|
228 break; |
|
229 case DCC_CK_CE_B64: |
|
230 tbufp = tbuf; |
|
231 len = dcc_ck_b64_decode(cks, &bp, &bp_len, |
|
232 tbuf, sizeof(tbuf)); |
|
233 break; |
|
234 } |
|
235 |
|
236 if (len != 0) { |
|
237 #ifdef DCC_DEBUG_CKSUM |
|
238 if (dcc_clnt_debug == 4) |
|
239 write(1, tbufp, len); |
|
240 #endif |
|
241 dcc_ck_body0(cks, tbufp, len); |
|
242 dcc_ck_fuz1(cks, tbufp, len); |
|
243 if (cks->mime_ct != DCC_CK_CT_BINARY) |
|
244 dcc_ck_fuz2(cks, tbufp, len); |
|
245 } |
|
246 } |
|
247 } |
|
248 |
|
249 |
|
250 |
|
251 /* compute all of the body checksums on a chunk of raw text */ |
|
252 void |
|
253 dcc_ck_body(DCC_GOT_CKS *cks, const void *bp, u_int bp_len) |
|
254 { |
|
255 DCC_CK_BND *bndp; |
|
256 const char *sum; /* 1st input byte not swallowed */ |
|
257 const char *cmp; /* 1st not parsed for MIME */ |
|
258 const char *cp; |
|
259 char c; |
|
260 int len, matched_len, i, j; |
|
261 |
|
262 sum = bp; |
|
263 cmp = sum; |
|
264 while (bp_len != 0) { |
|
265 /* if we have no multipart hassles |
|
266 * then pass buffer to qp/base64 decoder and quit */ |
|
267 if (cks->mime_nest == 0) { |
|
268 decode_sum(cks, sum, bp_len); |
|
269 return; |
|
270 } |
|
271 |
|
272 /* look for start of next line to start matching boundaries */ |
|
273 if (cks->mime_bnd_matches == 0) { |
|
274 cp = memchr(cmp, '\n', bp_len); |
|
275 if (!cp) { |
|
276 cp = cmp+bp_len; |
|
277 } else { |
|
278 ++cp; |
|
279 } |
|
280 |
|
281 /* look for a MIME entity header in the text before |
|
282 * the next line and possible start of a boundary */ |
|
283 i = cp - cmp; |
|
284 if (cks->mp_st == CK_MP_ST_HDRS) { |
|
285 if (parse_mime_hdr(cks, cmp, i, 0)) { |
|
286 /* blank header line ends the headers */ |
|
287 j = cp-sum; |
|
288 if (j) { |
|
289 decode_sum(cks, sum, j); |
|
290 sum = cp; |
|
291 } |
|
292 cks->mp_st = CK_MP_ST_TEXT; |
|
293 } |
|
294 } |
|
295 /* We found the end of a line. Reset positions to |
|
296 * start looking for a MIME boundary after it */ |
|
297 if (*(cp-1) == '\n') { |
|
298 cks->flags |= DCC_CKS_MIME_BOL; |
|
299 cks->mime_bnd_matches = cks->mime_nest; |
|
300 for (bndp = cks->mime_bnd; |
|
301 bndp <= LAST(cks->mime_bnd); |
|
302 ++bndp) { |
|
303 bndp->cmp_len = 0; |
|
304 } |
|
305 } |
|
306 cmp = cp; |
|
307 if ((bp_len -= i) == 0) |
|
308 break; |
|
309 } |
|
310 |
|
311 /* look for (rest of) one of the active MIME boundaries */ |
|
312 matched_len = 0; |
|
313 for (bndp = cks->mime_bnd; |
|
314 bndp < &cks->mime_bnd[cks->mime_nest]; |
|
315 ++bndp) { |
|
316 |
|
317 if (bndp->cmp_len == DCC_CK_BND_MISS) |
|
318 continue; /* already mismatched boundary */ |
|
319 |
|
320 j = bndp->bnd_len - bndp->cmp_len; |
|
321 len = bp_len; |
|
322 if (j > len) |
|
323 j = len; |
|
324 cp = cmp; |
|
325 if (j > 0) { |
|
326 if (memcmp(cp, &bndp->bnd[bndp->cmp_len], j)) { |
|
327 bndp->cmp_len = DCC_CK_BND_MISS; |
|
328 --cks->mime_bnd_matches; |
|
329 continue; |
|
330 } |
|
331 /* this boundary matches so far */ |
|
332 bndp->cmp_len += j; |
|
333 cp += j; |
|
334 if ((len -= j) <= 0) { |
|
335 matched_len = bp_len; |
|
336 continue; |
|
337 } |
|
338 /* since we did not exhaust len, we know |
|
339 * we matched the entire boundary */ |
|
340 j = 0; |
|
341 } |
|
342 |
|
343 /* look for 1st '-' of trailing "--" */ |
|
344 if (j == 0 |
|
345 && *cp == '-') { |
|
346 ++bndp->cmp_len; |
|
347 if (--len <= 0) { |
|
348 matched_len = bp_len; |
|
349 continue; |
|
350 } |
|
351 ++cp; |
|
352 j = -1; |
|
353 } |
|
354 /* look for 2nd '-' of trailing "--" */ |
|
355 if (j == -1) { |
|
356 if (*cp == '-') { |
|
357 ++bndp->cmp_len; |
|
358 if (--len <= 0) { |
|
359 matched_len = bp_len; |
|
360 continue; |
|
361 } |
|
362 ++cp; |
|
363 } else { |
|
364 bndp->cmp_len = DCC_CK_BND_MISS; |
|
365 --cks->mime_bnd_matches; |
|
366 continue; |
|
367 } |
|
368 } |
|
369 /* check for trailing whitespace & '\n' */ |
|
370 if ((c = *cp) == ' ' || c == '\t' || c == '\r') { |
|
371 do { |
|
372 ++cp; |
|
373 } while (--len > 0 |
|
374 && ((c = *cp) == ' ' || c == '\t' |
|
375 || c == '\r')); |
|
376 if (len <= 0) { |
|
377 matched_len = bp_len; |
|
378 continue; |
|
379 } |
|
380 } |
|
381 if (*cp != '\n') { |
|
382 /* mismatch after the end of the boundary */ |
|
383 bndp->cmp_len = DCC_CK_BND_MISS; |
|
384 --cks->mime_bnd_matches; |
|
385 continue; |
|
386 } |
|
387 |
|
388 /* We have found a MIME boundary. |
|
389 * Flush b64 & qp decoders and fuzzy checksummers */ |
|
390 j = cmp-sum; |
|
391 if (j) |
|
392 decode_sum(cks, sum, j); |
|
393 |
|
394 /* pass the boundary in the buffer */ |
|
395 matched_len = ++cp - cmp; |
|
396 cmp = sum = cp; |
|
397 |
|
398 /* Body checksum the boundary */ |
|
399 cks->mp_st = CK_MP_ST_BND; |
|
400 decode_sum(cks, bndp->bnd, bndp->bnd_len); |
|
401 if (bndp->cmp_len != bndp->bnd_len) { |
|
402 /* checksum trailing "--" of final boundary */ |
|
403 decode_sum(cks, "--", 2); |
|
404 /* end the current & inner entities */ |
|
405 cks->mp_st = CK_MP_ST_EPILOGUE; |
|
406 } else { |
|
407 /* intermediate boundaries end inner entities */ |
|
408 cks->mp_st = CK_MP_ST_HDRS; |
|
409 ++bndp; |
|
410 } |
|
411 cks->mime_nest = bndp - cks->mime_bnd; |
|
412 decoders_init(cks); |
|
413 break; |
|
414 } |
|
415 bp_len -= matched_len; |
|
416 } |
|
417 |
|
418 j = cmp-sum; |
|
419 if (j) |
|
420 decode_sum(cks, sum, j); |
|
421 } |
|
422 |
|
423 |
|
424 |
|
425 /* finish all of the body checksums */ |
|
426 void |
|
427 dcc_cks_fin(DCC_GOT_CKS *cks) |
|
428 { |
|
429 dcc_ck_fuz1(cks, "\n", 1); /* flush URL decoders & line buffers */ |
|
430 dcc_ck_fuz2(cks, "\n", 1); |
|
431 |
|
432 dcc_ck_body0_fin(cks); |
|
433 dcc_ck_fuz1_fin(cks); |
|
434 dcc_ck_fuz2_fin(cks); |
|
435 } |