Mercurial > notdcc
diff dcclib/ckbody.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dcclib/ckbody.c Tue Mar 10 13:49:58 2009 +0100 @@ -0,0 +1,435 @@ +/* Distributed Checksum Clearinghouse + * + * compute simple body checksum + * + * Copyright (c) 2008 by Rhyolite Software, LLC + * + * This agreement is not applicable to any entity which sells anti-spam + * solutions to others or provides an anti-spam solution as part of a + * security solution sold to other entities, or to a private network + * which employs the DCC or uses data provided by operation of the DCC + * but does not provide corresponding data to other users. + * + * Permission to use, copy, modify, and distribute this software without + * changes for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear in all + * copies and any distributed versions or copies are either unchanged + * or not called anything similar to "DCC" or "Distributed Checksum + * Clearinghouse". + * + * Parties not eligible to receive a license under this agreement can + * obtain a commercial license to use DCC by contacting Rhyolite Software + * at sales@rhyolite.com. + * + * A commercial license would be for Distributed Checksum and Reputation + * Clearinghouse software. That software includes additional features. This + * free license for Distributed ChecksumClearinghouse Software does not in any + * way grant permision to use Distributed Checksum and Reputation Clearinghouse + * software + * + * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC + * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Rhyolite Software DCC 1.3.103-1.56 $Revision$ + */ + +#include "dcc_ck.h" + + +void +dcc_ck_body0(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) +{ +# define BUF_LEN 1024 + char buf[BUF_LEN+5]; + u_char flen; + int blen; + char c; + + if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) + return; + + flen = cks->ctx_body.flen; + blen = 0; + for (;;) { + if (bp_len == 0) { + if (blen != 0) { + cks->ctx_body.total += blen; + MD5Update(&cks->ctx_body.md5, buf, blen); + } + cks->ctx_body.flen = flen; + return; + } + --bp_len; + c = *bp++; + + /* Ignore the '>' in the sequence "\n>From" because + * it is sometimes added for old UNIX MUAs. + * As a side effect, ignore '\n' */ + if (flen != 0) { + if (c == "\n>From"[flen]) { + if (++flen >= 6) { + memcpy(&buf[blen], "From", 4); + if ((blen += 4) >= BUF_LEN) { + cks->ctx_body.total += blen; + MD5Update(&cks->ctx_body.md5, buf, + blen); + blen = 0; + } + flen = 0; + } + continue; + } + if (--flen != 0) { + memcpy(&buf[blen], ">From", flen); + if ((blen += flen) >= BUF_LEN) { + cks->ctx_body.total += blen; + MD5Update(&cks->ctx_body.md5, buf, + blen); + blen = 0; + } + flen = 0; + } + } + if (c == '\n') { + flen = 1; + continue; + } + + /* Ignore whitespace to avoid being confused by + * varying line endings added and removed by + * various MUAs and MTAs. + * As a side effect, ignore entirely blank messages. */ + if (c == ' ' || c == '\t' || c == '\r') + continue; + + /* Ignore '=' to minimize but not entirely avoid being + * confused by some some sequences that look like + * quoted-printable triples but that are not. + */ + if (c == '=') + continue; + + buf[blen] = c; + if (++blen >= BUF_LEN) { + cks->ctx_body.total += blen; + MD5Update(&cks->ctx_body.md5, buf, blen); + blen = 0; + } + } +} + + + +static void +dcc_ck_body0_fin(DCC_GOT_CKS *cks) +{ + /* always generate the MD5 checksum so that grey listing has it */ + MD5Final(cks->sums[DCC_CK_BODY].sum, &cks->ctx_body.md5); + + if (cks->sums[DCC_CK_BODY].type != DCC_CK_BODY) + return; + + if (cks->ctx_body.total < 30) { + cks->sums[DCC_CK_BODY].type = DCC_CK_INVALID; + return; + } + + cks->sums[DCC_CK_BODY].rpt2srvr = 1; +} + + + +static void +decoders_init(DCC_GOT_CKS *cks) +{ + cks->mime_bnd_matches = 0; + + cks->flags |= DCC_CKS_MIME_BOL; + cks->mime_ct = DCC_CK_CT_TEXT; + cks->mime_cset = dcc_cset_1; + cks->mime_ce = DCC_CK_CE_ASCII; + cks->qp.state = DCC_CK_QP_IDLE; + cks->b64.quantum_cnt = 0; +} + + + +/* start all of the checksums */ +void +dcc_cks_init(DCC_GOT_CKS *cks) +{ + DCC_GOT_SUM *g; + + for (g = cks->sums; g <= LAST(cks->sums); ++g) { + CLR_GOT_SUM(g); + } + + cks->flags = 0; + cks->mime_nest = 0; + cks->mhdr_st = CK_MHDR_ST_IDLE; + cks->mp_st = CK_MP_ST_TEXT; + decoders_init(cks); + + cks->sums[DCC_CK_BODY].type = DCC_CK_BODY; + cks->ctx_body.total = 0; + cks->ctx_body.flen = 1; + MD5Init(&cks->ctx_body.md5); + + dcc_ck_fuz1_init(cks); + dcc_ck_fuz2_init(cks); +} + + + +/* decode quoted-printable and base64 and then compute the body checksums */ +static void +decode_sum(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) +{ + char tbuf[1024]; + const char *tbufp; + int len; + + /* Decode quoted-printable and base64 and make fuzzy sumes + * only while in the body of a MIME entity. + * Changing from the text, image, html, etc. requires a '\n' + * to flush the URL and other decoders in the checksummers. + * None of the checksums count whitespace. */ + if (cks->mp_st != CK_MP_ST_TEXT) { + if (bp_len == 0) + return; +#ifdef DCC_DEBUG_CKSUM + if (dcc_clnt_debug == 4) + write(1, bp, bp_len); +#endif + dcc_ck_body0(cks, bp, bp_len); + dcc_ck_fuz1(cks, "\n", 1); + dcc_ck_fuz2(cks, "\n", 1); + return; + } + + while (bp_len != 0) { + switch (cks->mime_ce) { + case DCC_CK_CE_ASCII: + default: + len = bp_len; + tbufp = bp; + bp_len = 0; + break; + case DCC_CK_CE_QP: + tbufp = tbuf; + len = dcc_ck_qp_decode(cks, &bp, &bp_len, + tbuf, sizeof(tbuf)); + break; + case DCC_CK_CE_B64: + tbufp = tbuf; + len = dcc_ck_b64_decode(cks, &bp, &bp_len, + tbuf, sizeof(tbuf)); + break; + } + + if (len != 0) { +#ifdef DCC_DEBUG_CKSUM + if (dcc_clnt_debug == 4) + write(1, tbufp, len); +#endif + dcc_ck_body0(cks, tbufp, len); + dcc_ck_fuz1(cks, tbufp, len); + if (cks->mime_ct != DCC_CK_CT_BINARY) + dcc_ck_fuz2(cks, tbufp, len); + } + } +} + + + +/* compute all of the body checksums on a chunk of raw text */ +void +dcc_ck_body(DCC_GOT_CKS *cks, const void *bp, u_int bp_len) +{ + DCC_CK_BND *bndp; + const char *sum; /* 1st input byte not swallowed */ + const char *cmp; /* 1st not parsed for MIME */ + const char *cp; + char c; + int len, matched_len, i, j; + + sum = bp; + cmp = sum; + while (bp_len != 0) { + /* if we have no multipart hassles + * then pass buffer to qp/base64 decoder and quit */ + if (cks->mime_nest == 0) { + decode_sum(cks, sum, bp_len); + return; + } + + /* look for start of next line to start matching boundaries */ + if (cks->mime_bnd_matches == 0) { + cp = memchr(cmp, '\n', bp_len); + if (!cp) { + cp = cmp+bp_len; + } else { + ++cp; + } + + /* look for a MIME entity header in the text before + * the next line and possible start of a boundary */ + i = cp - cmp; + if (cks->mp_st == CK_MP_ST_HDRS) { + if (parse_mime_hdr(cks, cmp, i, 0)) { + /* blank header line ends the headers */ + j = cp-sum; + if (j) { + decode_sum(cks, sum, j); + sum = cp; + } + cks->mp_st = CK_MP_ST_TEXT; + } + } + /* We found the end of a line. Reset positions to + * start looking for a MIME boundary after it */ + if (*(cp-1) == '\n') { + cks->flags |= DCC_CKS_MIME_BOL; + cks->mime_bnd_matches = cks->mime_nest; + for (bndp = cks->mime_bnd; + bndp <= LAST(cks->mime_bnd); + ++bndp) { + bndp->cmp_len = 0; + } + } + cmp = cp; + if ((bp_len -= i) == 0) + break; + } + + /* look for (rest of) one of the active MIME boundaries */ + matched_len = 0; + for (bndp = cks->mime_bnd; + bndp < &cks->mime_bnd[cks->mime_nest]; + ++bndp) { + + if (bndp->cmp_len == DCC_CK_BND_MISS) + continue; /* already mismatched boundary */ + + j = bndp->bnd_len - bndp->cmp_len; + len = bp_len; + if (j > len) + j = len; + cp = cmp; + if (j > 0) { + if (memcmp(cp, &bndp->bnd[bndp->cmp_len], j)) { + bndp->cmp_len = DCC_CK_BND_MISS; + --cks->mime_bnd_matches; + continue; + } + /* this boundary matches so far */ + bndp->cmp_len += j; + cp += j; + if ((len -= j) <= 0) { + matched_len = bp_len; + continue; + } + /* since we did not exhaust len, we know + * we matched the entire boundary */ + j = 0; + } + + /* look for 1st '-' of trailing "--" */ + if (j == 0 + && *cp == '-') { + ++bndp->cmp_len; + if (--len <= 0) { + matched_len = bp_len; + continue; + } + ++cp; + j = -1; + } + /* look for 2nd '-' of trailing "--" */ + if (j == -1) { + if (*cp == '-') { + ++bndp->cmp_len; + if (--len <= 0) { + matched_len = bp_len; + continue; + } + ++cp; + } else { + bndp->cmp_len = DCC_CK_BND_MISS; + --cks->mime_bnd_matches; + continue; + } + } + /* check for trailing whitespace & '\n' */ + if ((c = *cp) == ' ' || c == '\t' || c == '\r') { + do { + ++cp; + } while (--len > 0 + && ((c = *cp) == ' ' || c == '\t' + || c == '\r')); + if (len <= 0) { + matched_len = bp_len; + continue; + } + } + if (*cp != '\n') { + /* mismatch after the end of the boundary */ + bndp->cmp_len = DCC_CK_BND_MISS; + --cks->mime_bnd_matches; + continue; + } + + /* We have found a MIME boundary. + * Flush b64 & qp decoders and fuzzy checksummers */ + j = cmp-sum; + if (j) + decode_sum(cks, sum, j); + + /* pass the boundary in the buffer */ + matched_len = ++cp - cmp; + cmp = sum = cp; + + /* Body checksum the boundary */ + cks->mp_st = CK_MP_ST_BND; + decode_sum(cks, bndp->bnd, bndp->bnd_len); + if (bndp->cmp_len != bndp->bnd_len) { + /* checksum trailing "--" of final boundary */ + decode_sum(cks, "--", 2); + /* end the current & inner entities */ + cks->mp_st = CK_MP_ST_EPILOGUE; + } else { + /* intermediate boundaries end inner entities */ + cks->mp_st = CK_MP_ST_HDRS; + ++bndp; + } + cks->mime_nest = bndp - cks->mime_bnd; + decoders_init(cks); + break; + } + bp_len -= matched_len; + } + + j = cmp-sum; + if (j) + decode_sum(cks, sum, j); +} + + + +/* finish all of the body checksums */ +void +dcc_cks_fin(DCC_GOT_CKS *cks) +{ + dcc_ck_fuz1(cks, "\n", 1); /* flush URL decoders & line buffers */ + dcc_ck_fuz2(cks, "\n", 1); + + dcc_ck_body0_fin(cks); + dcc_ck_fuz1_fin(cks); + dcc_ck_fuz2_fin(cks); +}