comparison dcclib/ckfuz1.c @ 0:c7f6b056b673

First import of vendor version
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 13:49:58 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7f6b056b673
1 /* Distributed Checksum Clearinghouse
2 *
3 * compute fuzzy body checksum #1
4 *
5 * Copyright (c) 2008 by Rhyolite Software, LLC
6 *
7 * This agreement is not applicable to any entity which sells anti-spam
8 * solutions to others or provides an anti-spam solution as part of a
9 * security solution sold to other entities, or to a private network
10 * which employs the DCC or uses data provided by operation of the DCC
11 * but does not provide corresponding data to other users.
12 *
13 * Permission to use, copy, modify, and distribute this software without
14 * changes for any purpose with or without fee is hereby granted, provided
15 * that the above copyright notice and this permission notice appear in all
16 * copies and any distributed versions or copies are either unchanged
17 * or not called anything similar to "DCC" or "Distributed Checksum
18 * Clearinghouse".
19 *
20 * Parties not eligible to receive a license under this agreement can
21 * obtain a commercial license to use DCC by contacting Rhyolite Software
22 * at sales@rhyolite.com.
23 *
24 * A commercial license would be for Distributed Checksum and Reputation
25 * Clearinghouse software. That software includes additional features. This
26 * free license for Distributed ChecksumClearinghouse Software does not in any
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
28 * software
29 *
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
37 * SOFTWARE.
38 *
39 * Rhyolite Software DCC 1.3.103-1.59 $Revision$
40 */
41
42 #include "dcc_ck.h"
43
44 #define FZ1 cks->fuz1
45
46 #define MAX_FUZ1_LEN (4*1024)
47
48
49 void
50 dcc_ck_fuz1_init(DCC_GOT_CKS *cks)
51 {
52 cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1;
53 FZ1.total = 0; /* bytes summed */
54 FZ1.eol = FZ1.cp = FZ1.buf;
55 FZ1.url.st = DCC_URL_ST_IDLE;
56
57 MD5Init(&FZ1.md5);
58 }
59
60
61
62 static inline u_char /* 0=keep the line, 1=discard it */
63 dear_sucker(const char *cp, u_int llen)
64 {
65 #define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w)))
66
67 if (CK_WORD("dear"))
68 return 1;
69 if (CK_WORD("hello"))
70 return 1;
71 if (CK_WORD("greeting"))
72 return 1;
73 if (CK_WORD("date"))
74 return 1;
75
76 return 0;
77 #undef CKWORD
78 }
79
80
81
82 static inline u_char
83 add_sum(DCC_GOT_CKS *cks, int len)
84 {
85 int i;
86
87 if (!len)
88 return 1;
89
90 /* ignore the end of very long spam, since
91 * it is likely to make the checksum differ */
92 i = MAX_FUZ1_LEN - (FZ1.total + len);
93 if (i < 0)
94 len += i;
95 MD5Update(&FZ1.md5, FZ1.buf, len);
96 return (FZ1.total += len) < MAX_FUZ1_LEN;
97 }
98
99
100
101 void
102 dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len)
103 {
104 char *cp;
105 DNSBL_WORK *blw;
106 int i, len, c;
107
108 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
109 return;
110
111 if (FZ1.total >= MAX_FUZ1_LEN)
112 return;
113
114 cp = FZ1.cp;
115
116 for (;;) {
117 if (bp_len == 0) {
118 /* Sum the buffer if it ends with a line. Note that
119 * every message always ends with an artificial "\n". */
120 if (FZ1.eol == cp) {
121 add_sum(cks, cp - FZ1.buf);
122 FZ1.eol = cp = FZ1.buf;
123 }
124 FZ1.cp = cp;
125 return;
126 }
127 --bp_len;
128 c = *bp++;
129
130 i = dcc_ck_url(&FZ1.url, c, &cp);
131 c = i>>DCC_CK_URL_SHIFT;
132 switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) {
133 case DCC_CK_URL_CHAR:
134 break;
135 case DCC_CK_URL_CK_LEN:
136 /* Make room before starting a URL
137 * if we are too close to the end of
138 * the buffer for a maximum size URL */
139 if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) {
140 if (!FZ1.eol
141 || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) {
142 if (!add_sum(cks, cp - FZ1.buf))
143 return;
144 FZ1.eol = 0;
145 cp = FZ1.buf;
146 } else {
147 len = FZ1.eol - FZ1.buf;
148 if (!add_sum(cks, len))
149 return;
150 memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol);
151 FZ1.eol = FZ1.buf;
152 cp -= len;
153 }
154 }
155 blw = cks->dnsbl;
156 if (blw != 0) {
157 blw->tgt.dom.c[0] = '\0';
158 blw->tgt_dom_len = 0;
159 }
160 continue;
161 case DCC_CK_URL_HOST:
162 case DCC_CK_URL_DOT:
163 blw = cks->dnsbl;
164 if (blw != 0
165 && blw->unhit.url != 0
166 && blw->tgt_dom_len < ISZ(blw->tgt.dom))
167 blw->tgt.dom.c[blw->tgt_dom_len++] = c;
168 break;
169 case DCC_CK_URL_HOST_END:
170 blw = cks->dnsbl;
171 if (blw && blw->tgt_dom_len > 0
172 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) {
173 blw->tgt.dom.c[blw->tgt_dom_len] = '\0';
174 url_dnsbl(blw);
175 }
176 break;
177 case DCC_CK_URL_HOST_RESET:
178 blw = cks->dnsbl;
179 if (blw != 0) {
180 blw->tgt.dom.c[0] = '\0';
181 blw->tgt_dom_len = 0;
182 }
183 break;
184 case DCC_CK_URL_SKIP:
185 continue;
186 }
187
188 /* collect only ASCII letters */
189 if (c >= 'a' && c <= 'z') {
190 /* Collect more of a new line */
191 *cp = c;
192 if (++cp < &FZ1.buf[sizeof(FZ1.buf)])
193 continue;
194
195 /* We are at the end of the buffer,
196 * so add it to the checksum */
197 if (!add_sum(cks, cp - FZ1.buf))
198 return;
199 cp = FZ1.buf;
200 FZ1.eol = 0;
201 continue;
202 }
203
204 if (c == '\n') {
205 /* Ignore short lines starting with some strings */
206 if (FZ1.eol
207 && (len = cp - FZ1.eol) > 0
208 && len <= DCC_FUZ1_MAX_LINE
209 && dear_sucker(FZ1.eol, len)) {
210 cp = FZ1.eol;
211 continue;
212 }
213
214 /* Add the line to the checksum if we do not
215 * have room in the buffer for another line */
216 if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE
217 + DCC_HTTPS_LEN)]) {
218 if (!add_sum(cks, cp - FZ1.buf))
219 return;
220 cp = FZ1.buf;
221 }
222 FZ1.eol = cp;
223 }
224 }
225 }
226
227
228
229 void
230 dcc_ck_fuz1_fin(DCC_GOT_CKS *cks)
231 {
232 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1)
233 return;
234
235 /* we cannot compute a checksum on an empty or nearly empty message */
236 if (FZ1.total < 30) {
237 cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID;
238 return;
239 }
240
241 MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5);
242 cks->sums[DCC_CK_FUZ1].rpt2srvr = 1;
243 }