Mercurial > notdcc
diff dcclib/ckfuz1.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dcclib/ckfuz1.c Tue Mar 10 13:49:58 2009 +0100 @@ -0,0 +1,243 @@ +/* Distributed Checksum Clearinghouse + * + * compute fuzzy body checksum #1 + * + * Copyright (c) 2008 by Rhyolite Software, LLC + * + * This agreement is not applicable to any entity which sells anti-spam + * solutions to others or provides an anti-spam solution as part of a + * security solution sold to other entities, or to a private network + * which employs the DCC or uses data provided by operation of the DCC + * but does not provide corresponding data to other users. + * + * Permission to use, copy, modify, and distribute this software without + * changes for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear in all + * copies and any distributed versions or copies are either unchanged + * or not called anything similar to "DCC" or "Distributed Checksum + * Clearinghouse". + * + * Parties not eligible to receive a license under this agreement can + * obtain a commercial license to use DCC by contacting Rhyolite Software + * at sales@rhyolite.com. + * + * A commercial license would be for Distributed Checksum and Reputation + * Clearinghouse software. That software includes additional features. This + * free license for Distributed ChecksumClearinghouse Software does not in any + * way grant permision to use Distributed Checksum and Reputation Clearinghouse + * software + * + * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC + * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Rhyolite Software DCC 1.3.103-1.59 $Revision$ + */ + +#include "dcc_ck.h" + +#define FZ1 cks->fuz1 + +#define MAX_FUZ1_LEN (4*1024) + + +void +dcc_ck_fuz1_init(DCC_GOT_CKS *cks) +{ + cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1; + FZ1.total = 0; /* bytes summed */ + FZ1.eol = FZ1.cp = FZ1.buf; + FZ1.url.st = DCC_URL_ST_IDLE; + + MD5Init(&FZ1.md5); +} + + + +static inline u_char /* 0=keep the line, 1=discard it */ +dear_sucker(const char *cp, u_int llen) +{ +#define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w))) + + if (CK_WORD("dear")) + return 1; + if (CK_WORD("hello")) + return 1; + if (CK_WORD("greeting")) + return 1; + if (CK_WORD("date")) + return 1; + + return 0; +#undef CKWORD +} + + + +static inline u_char +add_sum(DCC_GOT_CKS *cks, int len) +{ + int i; + + if (!len) + return 1; + + /* ignore the end of very long spam, since + * it is likely to make the checksum differ */ + i = MAX_FUZ1_LEN - (FZ1.total + len); + if (i < 0) + len += i; + MD5Update(&FZ1.md5, FZ1.buf, len); + return (FZ1.total += len) < MAX_FUZ1_LEN; +} + + + +void +dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) +{ + char *cp; + DNSBL_WORK *blw; + int i, len, c; + + if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) + return; + + if (FZ1.total >= MAX_FUZ1_LEN) + return; + + cp = FZ1.cp; + + for (;;) { + if (bp_len == 0) { + /* Sum the buffer if it ends with a line. Note that + * every message always ends with an artificial "\n". */ + if (FZ1.eol == cp) { + add_sum(cks, cp - FZ1.buf); + FZ1.eol = cp = FZ1.buf; + } + FZ1.cp = cp; + return; + } + --bp_len; + c = *bp++; + + i = dcc_ck_url(&FZ1.url, c, &cp); + c = i>>DCC_CK_URL_SHIFT; + switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) { + case DCC_CK_URL_CHAR: + break; + case DCC_CK_URL_CK_LEN: + /* Make room before starting a URL + * if we are too close to the end of + * the buffer for a maximum size URL */ + if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) { + if (!FZ1.eol + || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) { + if (!add_sum(cks, cp - FZ1.buf)) + return; + FZ1.eol = 0; + cp = FZ1.buf; + } else { + len = FZ1.eol - FZ1.buf; + if (!add_sum(cks, len)) + return; + memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol); + FZ1.eol = FZ1.buf; + cp -= len; + } + } + blw = cks->dnsbl; + if (blw != 0) { + blw->tgt.dom.c[0] = '\0'; + blw->tgt_dom_len = 0; + } + continue; + case DCC_CK_URL_HOST: + case DCC_CK_URL_DOT: + blw = cks->dnsbl; + if (blw != 0 + && blw->unhit.url != 0 + && blw->tgt_dom_len < ISZ(blw->tgt.dom)) + blw->tgt.dom.c[blw->tgt_dom_len++] = c; + break; + case DCC_CK_URL_HOST_END: + blw = cks->dnsbl; + if (blw && blw->tgt_dom_len > 0 + && blw->tgt_dom_len < ISZ(blw->tgt.dom)) { + blw->tgt.dom.c[blw->tgt_dom_len] = '\0'; + url_dnsbl(blw); + } + break; + case DCC_CK_URL_HOST_RESET: + blw = cks->dnsbl; + if (blw != 0) { + blw->tgt.dom.c[0] = '\0'; + blw->tgt_dom_len = 0; + } + break; + case DCC_CK_URL_SKIP: + continue; + } + + /* collect only ASCII letters */ + if (c >= 'a' && c <= 'z') { + /* Collect more of a new line */ + *cp = c; + if (++cp < &FZ1.buf[sizeof(FZ1.buf)]) + continue; + + /* We are at the end of the buffer, + * so add it to the checksum */ + if (!add_sum(cks, cp - FZ1.buf)) + return; + cp = FZ1.buf; + FZ1.eol = 0; + continue; + } + + if (c == '\n') { + /* Ignore short lines starting with some strings */ + if (FZ1.eol + && (len = cp - FZ1.eol) > 0 + && len <= DCC_FUZ1_MAX_LINE + && dear_sucker(FZ1.eol, len)) { + cp = FZ1.eol; + continue; + } + + /* Add the line to the checksum if we do not + * have room in the buffer for another line */ + if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE + + DCC_HTTPS_LEN)]) { + if (!add_sum(cks, cp - FZ1.buf)) + return; + cp = FZ1.buf; + } + FZ1.eol = cp; + } + } +} + + + +void +dcc_ck_fuz1_fin(DCC_GOT_CKS *cks) +{ + if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) + return; + + /* we cannot compute a checksum on an empty or nearly empty message */ + if (FZ1.total < 30) { + cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID; + return; + } + + MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5); + cks->sums[DCC_CK_FUZ1].rpt2srvr = 1; +}