Mercurial > notdcc
view dcclib/ckfuz1.c @ 5:0a7a5940ee3a
Change description per license
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 15:03:24 +0100 |
parents | c7f6b056b673 |
children |
line wrap: on
line source
/* Distributed Checksum Clearinghouse * * compute fuzzy body checksum #1 * * Copyright (c) 2008 by Rhyolite Software, LLC * * This agreement is not applicable to any entity which sells anti-spam * solutions to others or provides an anti-spam solution as part of a * security solution sold to other entities, or to a private network * which employs the DCC or uses data provided by operation of the DCC * but does not provide corresponding data to other users. * * Permission to use, copy, modify, and distribute this software without * changes for any purpose with or without fee is hereby granted, provided * that the above copyright notice and this permission notice appear in all * copies and any distributed versions or copies are either unchanged * or not called anything similar to "DCC" or "Distributed Checksum * Clearinghouse". * * Parties not eligible to receive a license under this agreement can * obtain a commercial license to use DCC by contacting Rhyolite Software * at sales@rhyolite.com. * * A commercial license would be for Distributed Checksum and Reputation * Clearinghouse software. That software includes additional features. This * free license for Distributed ChecksumClearinghouse Software does not in any * way grant permision to use Distributed Checksum and Reputation Clearinghouse * software * * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * * Rhyolite Software DCC 1.3.103-1.59 $Revision$ */ #include "dcc_ck.h" #define FZ1 cks->fuz1 #define MAX_FUZ1_LEN (4*1024) void dcc_ck_fuz1_init(DCC_GOT_CKS *cks) { cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1; FZ1.total = 0; /* bytes summed */ FZ1.eol = FZ1.cp = FZ1.buf; FZ1.url.st = DCC_URL_ST_IDLE; MD5Init(&FZ1.md5); } static inline u_char /* 0=keep the line, 1=discard it */ dear_sucker(const char *cp, u_int llen) { #define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w))) if (CK_WORD("dear")) return 1; if (CK_WORD("hello")) return 1; if (CK_WORD("greeting")) return 1; if (CK_WORD("date")) return 1; return 0; #undef CKWORD } static inline u_char add_sum(DCC_GOT_CKS *cks, int len) { int i; if (!len) return 1; /* ignore the end of very long spam, since * it is likely to make the checksum differ */ i = MAX_FUZ1_LEN - (FZ1.total + len); if (i < 0) len += i; MD5Update(&FZ1.md5, FZ1.buf, len); return (FZ1.total += len) < MAX_FUZ1_LEN; } void dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) { char *cp; DNSBL_WORK *blw; int i, len, c; if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) return; if (FZ1.total >= MAX_FUZ1_LEN) return; cp = FZ1.cp; for (;;) { if (bp_len == 0) { /* Sum the buffer if it ends with a line. Note that * every message always ends with an artificial "\n". */ if (FZ1.eol == cp) { add_sum(cks, cp - FZ1.buf); FZ1.eol = cp = FZ1.buf; } FZ1.cp = cp; return; } --bp_len; c = *bp++; i = dcc_ck_url(&FZ1.url, c, &cp); c = i>>DCC_CK_URL_SHIFT; switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) { case DCC_CK_URL_CHAR: break; case DCC_CK_URL_CK_LEN: /* Make room before starting a URL * if we are too close to the end of * the buffer for a maximum size URL */ if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) { if (!FZ1.eol || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) { if (!add_sum(cks, cp - FZ1.buf)) return; FZ1.eol = 0; cp = FZ1.buf; } else { len = FZ1.eol - FZ1.buf; if (!add_sum(cks, len)) return; memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol); FZ1.eol = FZ1.buf; cp -= len; } } blw = cks->dnsbl; if (blw != 0) { blw->tgt.dom.c[0] = '\0'; blw->tgt_dom_len = 0; } continue; case DCC_CK_URL_HOST: case DCC_CK_URL_DOT: blw = cks->dnsbl; if (blw != 0 && blw->unhit.url != 0 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) blw->tgt.dom.c[blw->tgt_dom_len++] = c; break; case DCC_CK_URL_HOST_END: blw = cks->dnsbl; if (blw && blw->tgt_dom_len > 0 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) { blw->tgt.dom.c[blw->tgt_dom_len] = '\0'; url_dnsbl(blw); } break; case DCC_CK_URL_HOST_RESET: blw = cks->dnsbl; if (blw != 0) { blw->tgt.dom.c[0] = '\0'; blw->tgt_dom_len = 0; } break; case DCC_CK_URL_SKIP: continue; } /* collect only ASCII letters */ if (c >= 'a' && c <= 'z') { /* Collect more of a new line */ *cp = c; if (++cp < &FZ1.buf[sizeof(FZ1.buf)]) continue; /* We are at the end of the buffer, * so add it to the checksum */ if (!add_sum(cks, cp - FZ1.buf)) return; cp = FZ1.buf; FZ1.eol = 0; continue; } if (c == '\n') { /* Ignore short lines starting with some strings */ if (FZ1.eol && (len = cp - FZ1.eol) > 0 && len <= DCC_FUZ1_MAX_LINE && dear_sucker(FZ1.eol, len)) { cp = FZ1.eol; continue; } /* Add the line to the checksum if we do not * have room in the buffer for another line */ if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE + DCC_HTTPS_LEN)]) { if (!add_sum(cks, cp - FZ1.buf)) return; cp = FZ1.buf; } FZ1.eol = cp; } } } void dcc_ck_fuz1_fin(DCC_GOT_CKS *cks) { if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) return; /* we cannot compute a checksum on an empty or nearly empty message */ if (FZ1.total < 30) { cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID; return; } MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5); cks->sums[DCC_CK_FUZ1].rpt2srvr = 1; }