diff dcclib/ckfuz1.c @ 0:c7f6b056b673

First import of vendor version
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 13:49:58 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dcclib/ckfuz1.c	Tue Mar 10 13:49:58 2009 +0100
@@ -0,0 +1,243 @@
+/* Distributed Checksum Clearinghouse
+ *
+ * compute fuzzy body checksum #1
+ *
+ * Copyright (c) 2008 by Rhyolite Software, LLC
+ *
+ * This agreement is not applicable to any entity which sells anti-spam
+ * solutions to others or provides an anti-spam solution as part of a
+ * security solution sold to other entities, or to a private network
+ * which employs the DCC or uses data provided by operation of the DCC
+ * but does not provide corresponding data to other users.
+ *
+ * Permission to use, copy, modify, and distribute this software without
+ * changes for any purpose with or without fee is hereby granted, provided
+ * that the above copyright notice and this permission notice appear in all
+ * copies and any distributed versions or copies are either unchanged
+ * or not called anything similar to "DCC" or "Distributed Checksum
+ * Clearinghouse".
+ *
+ * Parties not eligible to receive a license under this agreement can
+ * obtain a commercial license to use DCC by contacting Rhyolite Software
+ * at sales@rhyolite.com.
+ *
+ * A commercial license would be for Distributed Checksum and Reputation
+ * Clearinghouse software.  That software includes additional features.  This
+ * free license for Distributed ChecksumClearinghouse Software does not in any
+ * way grant permision to use Distributed Checksum and Reputation Clearinghouse
+ * software
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
+ * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Rhyolite Software DCC 1.3.103-1.59 $Revision$
+ */
+
+#include "dcc_ck.h"
+
+#define FZ1  cks->fuz1
+
+#define MAX_FUZ1_LEN	(4*1024)
+
+
+void
+dcc_ck_fuz1_init(DCC_GOT_CKS *cks)
+{
+	cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1;
+	FZ1.total = 0;			/* bytes summed */
+	FZ1.eol = FZ1.cp = FZ1.buf;
+	FZ1.url.st = DCC_URL_ST_IDLE;
+
+	MD5Init(&FZ1.md5);
+}
+
+
+
+static inline u_char			/* 0=keep the line, 1=discard it */
+dear_sucker(const char *cp, u_int llen)
+{
+#define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w)))
+
+	if (CK_WORD("dear"))
+		return 1;
+	if (CK_WORD("hello"))
+		return 1;
+	if (CK_WORD("greeting"))
+		return 1;
+	if (CK_WORD("date"))
+		return 1;
+
+	return 0;
+#undef CKWORD
+}
+
+
+
+static inline u_char
+add_sum(DCC_GOT_CKS *cks, int len)
+{
+	int i;
+
+	if (!len)
+		return 1;
+
+	/* ignore the end of very long spam, since
+	 * it is likely to make the checksum differ */
+	i = MAX_FUZ1_LEN - (FZ1.total + len);
+	if (i < 0)
+		len += i;
+	MD5Update(&FZ1.md5, FZ1.buf, len);
+	return (FZ1.total += len) < MAX_FUZ1_LEN;
+}
+
+
+
+void
+dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
+{
+	char *cp;
+	DNSBL_WORK *blw;
+	int i, len, c;
+
+	if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
+		return;
+
+	if (FZ1.total >= MAX_FUZ1_LEN)
+		return;
+
+	cp = FZ1.cp;
+
+	for (;;) {
+		if (bp_len == 0) {
+			/* Sum the buffer if it ends with a line.  Note that
+			 * every message always ends with an artificial "\n". */
+			if (FZ1.eol == cp) {
+				add_sum(cks, cp - FZ1.buf);
+				FZ1.eol = cp = FZ1.buf;
+			}
+			FZ1.cp = cp;
+			return;
+		}
+		--bp_len;
+		c = *bp++;
+
+		i = dcc_ck_url(&FZ1.url, c, &cp);
+		c = i>>DCC_CK_URL_SHIFT;
+		switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) {
+		case DCC_CK_URL_CHAR:
+			break;
+		case DCC_CK_URL_CK_LEN:
+			/* Make room before starting a URL
+			 * if we are too close to the end of
+			 * the buffer for a maximum size URL */
+			if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) {
+				if (!FZ1.eol
+				    || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) {
+					if (!add_sum(cks, cp - FZ1.buf))
+					    return;
+					FZ1.eol = 0;
+					cp = FZ1.buf;
+				} else {
+					len = FZ1.eol - FZ1.buf;
+					if (!add_sum(cks, len))
+					    return;
+					memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol);
+					FZ1.eol = FZ1.buf;
+					cp -= len;
+				}
+			}
+			blw = cks->dnsbl;
+			if (blw != 0) {
+				blw->tgt.dom.c[0] = '\0';
+				blw->tgt_dom_len = 0;
+			}
+			continue;
+		case DCC_CK_URL_HOST:
+		case DCC_CK_URL_DOT:
+			blw = cks->dnsbl;
+			if (blw != 0
+			    && blw->unhit.url != 0
+			    && blw->tgt_dom_len < ISZ(blw->tgt.dom))
+				blw->tgt.dom.c[blw->tgt_dom_len++] = c;
+			break;
+		case DCC_CK_URL_HOST_END:
+			blw = cks->dnsbl;
+			if (blw && blw->tgt_dom_len > 0
+			    && blw->tgt_dom_len < ISZ(blw->tgt.dom)) {
+				blw->tgt.dom.c[blw->tgt_dom_len] = '\0';
+				url_dnsbl(blw);
+			}
+			break;
+		case DCC_CK_URL_HOST_RESET:
+			blw = cks->dnsbl;
+			if (blw != 0) {
+				blw->tgt.dom.c[0] = '\0';
+				blw->tgt_dom_len = 0;
+			}
+			break;
+		case DCC_CK_URL_SKIP:
+			continue;
+		}
+
+		/* collect only ASCII letters */
+		if (c >= 'a' && c <= 'z') {
+			/* Collect more of a new line */
+			*cp = c;
+			if (++cp < &FZ1.buf[sizeof(FZ1.buf)])
+				continue;
+
+			/* We are at the end of the buffer,
+			 * so add it to the checksum */
+			if (!add_sum(cks, cp - FZ1.buf))
+				return;
+			cp = FZ1.buf;
+			FZ1.eol = 0;
+			continue;
+		}
+
+		if (c == '\n') {
+			/* Ignore short lines starting with some strings */
+			if (FZ1.eol
+			    && (len = cp - FZ1.eol) > 0
+			    && len <= DCC_FUZ1_MAX_LINE
+			    && dear_sucker(FZ1.eol, len)) {
+				cp = FZ1.eol;
+				continue;
+			}
+
+			/* Add the line to the checksum if we do not
+			 * have room in the buffer for another line */
+			if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE
+							+ DCC_HTTPS_LEN)]) {
+				if (!add_sum(cks, cp - FZ1.buf))
+					return;
+				cp = FZ1.buf;
+			}
+			FZ1.eol = cp;
+		}
+	}
+}
+
+
+
+void
+dcc_ck_fuz1_fin(DCC_GOT_CKS *cks)
+{
+	if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
+		return;
+
+	/* we cannot compute a checksum on an empty or nearly empty message */
+	if (FZ1.total < 30) {
+		cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID;
+		return;
+	}
+
+	MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5);
+	cks->sums[DCC_CK_FUZ1].rpt2srvr = 1;
+}