Mercurial > notdcc
comparison dcclib/ckfuz1.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7f6b056b673 |
---|---|
1 /* Distributed Checksum Clearinghouse | |
2 * | |
3 * compute fuzzy body checksum #1 | |
4 * | |
5 * Copyright (c) 2008 by Rhyolite Software, LLC | |
6 * | |
7 * This agreement is not applicable to any entity which sells anti-spam | |
8 * solutions to others or provides an anti-spam solution as part of a | |
9 * security solution sold to other entities, or to a private network | |
10 * which employs the DCC or uses data provided by operation of the DCC | |
11 * but does not provide corresponding data to other users. | |
12 * | |
13 * Permission to use, copy, modify, and distribute this software without | |
14 * changes for any purpose with or without fee is hereby granted, provided | |
15 * that the above copyright notice and this permission notice appear in all | |
16 * copies and any distributed versions or copies are either unchanged | |
17 * or not called anything similar to "DCC" or "Distributed Checksum | |
18 * Clearinghouse". | |
19 * | |
20 * Parties not eligible to receive a license under this agreement can | |
21 * obtain a commercial license to use DCC by contacting Rhyolite Software | |
22 * at sales@rhyolite.com. | |
23 * | |
24 * A commercial license would be for Distributed Checksum and Reputation | |
25 * Clearinghouse software. That software includes additional features. This | |
26 * free license for Distributed ChecksumClearinghouse Software does not in any | |
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse | |
28 * software | |
29 * | |
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL | |
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES | |
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC | |
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES | |
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
37 * SOFTWARE. | |
38 * | |
39 * Rhyolite Software DCC 1.3.103-1.59 $Revision$ | |
40 */ | |
41 | |
42 #include "dcc_ck.h" | |
43 | |
44 #define FZ1 cks->fuz1 | |
45 | |
46 #define MAX_FUZ1_LEN (4*1024) | |
47 | |
48 | |
49 void | |
50 dcc_ck_fuz1_init(DCC_GOT_CKS *cks) | |
51 { | |
52 cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1; | |
53 FZ1.total = 0; /* bytes summed */ | |
54 FZ1.eol = FZ1.cp = FZ1.buf; | |
55 FZ1.url.st = DCC_URL_ST_IDLE; | |
56 | |
57 MD5Init(&FZ1.md5); | |
58 } | |
59 | |
60 | |
61 | |
62 static inline u_char /* 0=keep the line, 1=discard it */ | |
63 dear_sucker(const char *cp, u_int llen) | |
64 { | |
65 #define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w))) | |
66 | |
67 if (CK_WORD("dear")) | |
68 return 1; | |
69 if (CK_WORD("hello")) | |
70 return 1; | |
71 if (CK_WORD("greeting")) | |
72 return 1; | |
73 if (CK_WORD("date")) | |
74 return 1; | |
75 | |
76 return 0; | |
77 #undef CKWORD | |
78 } | |
79 | |
80 | |
81 | |
82 static inline u_char | |
83 add_sum(DCC_GOT_CKS *cks, int len) | |
84 { | |
85 int i; | |
86 | |
87 if (!len) | |
88 return 1; | |
89 | |
90 /* ignore the end of very long spam, since | |
91 * it is likely to make the checksum differ */ | |
92 i = MAX_FUZ1_LEN - (FZ1.total + len); | |
93 if (i < 0) | |
94 len += i; | |
95 MD5Update(&FZ1.md5, FZ1.buf, len); | |
96 return (FZ1.total += len) < MAX_FUZ1_LEN; | |
97 } | |
98 | |
99 | |
100 | |
101 void | |
102 dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) | |
103 { | |
104 char *cp; | |
105 DNSBL_WORK *blw; | |
106 int i, len, c; | |
107 | |
108 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) | |
109 return; | |
110 | |
111 if (FZ1.total >= MAX_FUZ1_LEN) | |
112 return; | |
113 | |
114 cp = FZ1.cp; | |
115 | |
116 for (;;) { | |
117 if (bp_len == 0) { | |
118 /* Sum the buffer if it ends with a line. Note that | |
119 * every message always ends with an artificial "\n". */ | |
120 if (FZ1.eol == cp) { | |
121 add_sum(cks, cp - FZ1.buf); | |
122 FZ1.eol = cp = FZ1.buf; | |
123 } | |
124 FZ1.cp = cp; | |
125 return; | |
126 } | |
127 --bp_len; | |
128 c = *bp++; | |
129 | |
130 i = dcc_ck_url(&FZ1.url, c, &cp); | |
131 c = i>>DCC_CK_URL_SHIFT; | |
132 switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) { | |
133 case DCC_CK_URL_CHAR: | |
134 break; | |
135 case DCC_CK_URL_CK_LEN: | |
136 /* Make room before starting a URL | |
137 * if we are too close to the end of | |
138 * the buffer for a maximum size URL */ | |
139 if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) { | |
140 if (!FZ1.eol | |
141 || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) { | |
142 if (!add_sum(cks, cp - FZ1.buf)) | |
143 return; | |
144 FZ1.eol = 0; | |
145 cp = FZ1.buf; | |
146 } else { | |
147 len = FZ1.eol - FZ1.buf; | |
148 if (!add_sum(cks, len)) | |
149 return; | |
150 memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol); | |
151 FZ1.eol = FZ1.buf; | |
152 cp -= len; | |
153 } | |
154 } | |
155 blw = cks->dnsbl; | |
156 if (blw != 0) { | |
157 blw->tgt.dom.c[0] = '\0'; | |
158 blw->tgt_dom_len = 0; | |
159 } | |
160 continue; | |
161 case DCC_CK_URL_HOST: | |
162 case DCC_CK_URL_DOT: | |
163 blw = cks->dnsbl; | |
164 if (blw != 0 | |
165 && blw->unhit.url != 0 | |
166 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) | |
167 blw->tgt.dom.c[blw->tgt_dom_len++] = c; | |
168 break; | |
169 case DCC_CK_URL_HOST_END: | |
170 blw = cks->dnsbl; | |
171 if (blw && blw->tgt_dom_len > 0 | |
172 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) { | |
173 blw->tgt.dom.c[blw->tgt_dom_len] = '\0'; | |
174 url_dnsbl(blw); | |
175 } | |
176 break; | |
177 case DCC_CK_URL_HOST_RESET: | |
178 blw = cks->dnsbl; | |
179 if (blw != 0) { | |
180 blw->tgt.dom.c[0] = '\0'; | |
181 blw->tgt_dom_len = 0; | |
182 } | |
183 break; | |
184 case DCC_CK_URL_SKIP: | |
185 continue; | |
186 } | |
187 | |
188 /* collect only ASCII letters */ | |
189 if (c >= 'a' && c <= 'z') { | |
190 /* Collect more of a new line */ | |
191 *cp = c; | |
192 if (++cp < &FZ1.buf[sizeof(FZ1.buf)]) | |
193 continue; | |
194 | |
195 /* We are at the end of the buffer, | |
196 * so add it to the checksum */ | |
197 if (!add_sum(cks, cp - FZ1.buf)) | |
198 return; | |
199 cp = FZ1.buf; | |
200 FZ1.eol = 0; | |
201 continue; | |
202 } | |
203 | |
204 if (c == '\n') { | |
205 /* Ignore short lines starting with some strings */ | |
206 if (FZ1.eol | |
207 && (len = cp - FZ1.eol) > 0 | |
208 && len <= DCC_FUZ1_MAX_LINE | |
209 && dear_sucker(FZ1.eol, len)) { | |
210 cp = FZ1.eol; | |
211 continue; | |
212 } | |
213 | |
214 /* Add the line to the checksum if we do not | |
215 * have room in the buffer for another line */ | |
216 if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE | |
217 + DCC_HTTPS_LEN)]) { | |
218 if (!add_sum(cks, cp - FZ1.buf)) | |
219 return; | |
220 cp = FZ1.buf; | |
221 } | |
222 FZ1.eol = cp; | |
223 } | |
224 } | |
225 } | |
226 | |
227 | |
228 | |
229 void | |
230 dcc_ck_fuz1_fin(DCC_GOT_CKS *cks) | |
231 { | |
232 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) | |
233 return; | |
234 | |
235 /* we cannot compute a checksum on an empty or nearly empty message */ | |
236 if (FZ1.total < 30) { | |
237 cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID; | |
238 return; | |
239 } | |
240 | |
241 MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5); | |
242 cks->sums[DCC_CK_FUZ1].rpt2srvr = 1; | |
243 } |