0
|
1 /* Distributed Checksum Clearinghouse |
|
2 * |
|
3 * compute fuzzy body checksum #1 |
|
4 * |
|
5 * Copyright (c) 2008 by Rhyolite Software, LLC |
|
6 * |
|
7 * This agreement is not applicable to any entity which sells anti-spam |
|
8 * solutions to others or provides an anti-spam solution as part of a |
|
9 * security solution sold to other entities, or to a private network |
|
10 * which employs the DCC or uses data provided by operation of the DCC |
|
11 * but does not provide corresponding data to other users. |
|
12 * |
|
13 * Permission to use, copy, modify, and distribute this software without |
|
14 * changes for any purpose with or without fee is hereby granted, provided |
|
15 * that the above copyright notice and this permission notice appear in all |
|
16 * copies and any distributed versions or copies are either unchanged |
|
17 * or not called anything similar to "DCC" or "Distributed Checksum |
|
18 * Clearinghouse". |
|
19 * |
|
20 * Parties not eligible to receive a license under this agreement can |
|
21 * obtain a commercial license to use DCC by contacting Rhyolite Software |
|
22 * at sales@rhyolite.com. |
|
23 * |
|
24 * A commercial license would be for Distributed Checksum and Reputation |
|
25 * Clearinghouse software. That software includes additional features. This |
|
26 * free license for Distributed ChecksumClearinghouse Software does not in any |
|
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse |
|
28 * software |
|
29 * |
|
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL |
|
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
|
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC |
|
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES |
|
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
37 * SOFTWARE. |
|
38 * |
|
39 * Rhyolite Software DCC 1.3.103-1.59 $Revision$ |
|
40 */ |
|
41 |
|
42 #include "dcc_ck.h" |
|
43 |
|
44 #define FZ1 cks->fuz1 |
|
45 |
|
46 #define MAX_FUZ1_LEN (4*1024) |
|
47 |
|
48 |
|
49 void |
|
50 dcc_ck_fuz1_init(DCC_GOT_CKS *cks) |
|
51 { |
|
52 cks->sums[DCC_CK_FUZ1].type = DCC_CK_FUZ1; |
|
53 FZ1.total = 0; /* bytes summed */ |
|
54 FZ1.eol = FZ1.cp = FZ1.buf; |
|
55 FZ1.url.st = DCC_URL_ST_IDLE; |
|
56 |
|
57 MD5Init(&FZ1.md5); |
|
58 } |
|
59 |
|
60 |
|
61 |
|
62 static inline u_char /* 0=keep the line, 1=discard it */ |
|
63 dear_sucker(const char *cp, u_int llen) |
|
64 { |
|
65 #define CK_WORD(w) (llen >= sizeof(w) && !strncmp(cp, w, LITZ(w))) |
|
66 |
|
67 if (CK_WORD("dear")) |
|
68 return 1; |
|
69 if (CK_WORD("hello")) |
|
70 return 1; |
|
71 if (CK_WORD("greeting")) |
|
72 return 1; |
|
73 if (CK_WORD("date")) |
|
74 return 1; |
|
75 |
|
76 return 0; |
|
77 #undef CKWORD |
|
78 } |
|
79 |
|
80 |
|
81 |
|
82 static inline u_char |
|
83 add_sum(DCC_GOT_CKS *cks, int len) |
|
84 { |
|
85 int i; |
|
86 |
|
87 if (!len) |
|
88 return 1; |
|
89 |
|
90 /* ignore the end of very long spam, since |
|
91 * it is likely to make the checksum differ */ |
|
92 i = MAX_FUZ1_LEN - (FZ1.total + len); |
|
93 if (i < 0) |
|
94 len += i; |
|
95 MD5Update(&FZ1.md5, FZ1.buf, len); |
|
96 return (FZ1.total += len) < MAX_FUZ1_LEN; |
|
97 } |
|
98 |
|
99 |
|
100 |
|
101 void |
|
102 dcc_ck_fuz1(DCC_GOT_CKS *cks, const char *bp, u_int bp_len) |
|
103 { |
|
104 char *cp; |
|
105 DNSBL_WORK *blw; |
|
106 int i, len, c; |
|
107 |
|
108 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) |
|
109 return; |
|
110 |
|
111 if (FZ1.total >= MAX_FUZ1_LEN) |
|
112 return; |
|
113 |
|
114 cp = FZ1.cp; |
|
115 |
|
116 for (;;) { |
|
117 if (bp_len == 0) { |
|
118 /* Sum the buffer if it ends with a line. Note that |
|
119 * every message always ends with an artificial "\n". */ |
|
120 if (FZ1.eol == cp) { |
|
121 add_sum(cks, cp - FZ1.buf); |
|
122 FZ1.eol = cp = FZ1.buf; |
|
123 } |
|
124 FZ1.cp = cp; |
|
125 return; |
|
126 } |
|
127 --bp_len; |
|
128 c = *bp++; |
|
129 |
|
130 i = dcc_ck_url(&FZ1.url, c, &cp); |
|
131 c = i>>DCC_CK_URL_SHIFT; |
|
132 switch ((DCC_CK_URL)(i & DCC_CK_URL_MASK)) { |
|
133 case DCC_CK_URL_CHAR: |
|
134 break; |
|
135 case DCC_CK_URL_CK_LEN: |
|
136 /* Make room before starting a URL |
|
137 * if we are too close to the end of |
|
138 * the buffer for a maximum size URL */ |
|
139 if (cp >= &FZ1.buf[sizeof(FZ1.buf)-DCC_URL_MAX]) { |
|
140 if (!FZ1.eol |
|
141 || FZ1.eol < cp-DCC_FUZ1_MAX_LINE) { |
|
142 if (!add_sum(cks, cp - FZ1.buf)) |
|
143 return; |
|
144 FZ1.eol = 0; |
|
145 cp = FZ1.buf; |
|
146 } else { |
|
147 len = FZ1.eol - FZ1.buf; |
|
148 if (!add_sum(cks, len)) |
|
149 return; |
|
150 memmove(FZ1.buf, FZ1.eol, cp - FZ1.eol); |
|
151 FZ1.eol = FZ1.buf; |
|
152 cp -= len; |
|
153 } |
|
154 } |
|
155 blw = cks->dnsbl; |
|
156 if (blw != 0) { |
|
157 blw->tgt.dom.c[0] = '\0'; |
|
158 blw->tgt_dom_len = 0; |
|
159 } |
|
160 continue; |
|
161 case DCC_CK_URL_HOST: |
|
162 case DCC_CK_URL_DOT: |
|
163 blw = cks->dnsbl; |
|
164 if (blw != 0 |
|
165 && blw->unhit.url != 0 |
|
166 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) |
|
167 blw->tgt.dom.c[blw->tgt_dom_len++] = c; |
|
168 break; |
|
169 case DCC_CK_URL_HOST_END: |
|
170 blw = cks->dnsbl; |
|
171 if (blw && blw->tgt_dom_len > 0 |
|
172 && blw->tgt_dom_len < ISZ(blw->tgt.dom)) { |
|
173 blw->tgt.dom.c[blw->tgt_dom_len] = '\0'; |
|
174 url_dnsbl(blw); |
|
175 } |
|
176 break; |
|
177 case DCC_CK_URL_HOST_RESET: |
|
178 blw = cks->dnsbl; |
|
179 if (blw != 0) { |
|
180 blw->tgt.dom.c[0] = '\0'; |
|
181 blw->tgt_dom_len = 0; |
|
182 } |
|
183 break; |
|
184 case DCC_CK_URL_SKIP: |
|
185 continue; |
|
186 } |
|
187 |
|
188 /* collect only ASCII letters */ |
|
189 if (c >= 'a' && c <= 'z') { |
|
190 /* Collect more of a new line */ |
|
191 *cp = c; |
|
192 if (++cp < &FZ1.buf[sizeof(FZ1.buf)]) |
|
193 continue; |
|
194 |
|
195 /* We are at the end of the buffer, |
|
196 * so add it to the checksum */ |
|
197 if (!add_sum(cks, cp - FZ1.buf)) |
|
198 return; |
|
199 cp = FZ1.buf; |
|
200 FZ1.eol = 0; |
|
201 continue; |
|
202 } |
|
203 |
|
204 if (c == '\n') { |
|
205 /* Ignore short lines starting with some strings */ |
|
206 if (FZ1.eol |
|
207 && (len = cp - FZ1.eol) > 0 |
|
208 && len <= DCC_FUZ1_MAX_LINE |
|
209 && dear_sucker(FZ1.eol, len)) { |
|
210 cp = FZ1.eol; |
|
211 continue; |
|
212 } |
|
213 |
|
214 /* Add the line to the checksum if we do not |
|
215 * have room in the buffer for another line */ |
|
216 if (cp >= &FZ1.buf[sizeof(FZ1.buf) - (DCC_FUZ1_MAX_LINE |
|
217 + DCC_HTTPS_LEN)]) { |
|
218 if (!add_sum(cks, cp - FZ1.buf)) |
|
219 return; |
|
220 cp = FZ1.buf; |
|
221 } |
|
222 FZ1.eol = cp; |
|
223 } |
|
224 } |
|
225 } |
|
226 |
|
227 |
|
228 |
|
229 void |
|
230 dcc_ck_fuz1_fin(DCC_GOT_CKS *cks) |
|
231 { |
|
232 if (cks->sums[DCC_CK_FUZ1].type != DCC_CK_FUZ1) |
|
233 return; |
|
234 |
|
235 /* we cannot compute a checksum on an empty or nearly empty message */ |
|
236 if (FZ1.total < 30) { |
|
237 cks->sums[DCC_CK_FUZ1].type = DCC_CK_INVALID; |
|
238 return; |
|
239 } |
|
240 |
|
241 MD5Final(cks->sums[DCC_CK_FUZ1].sum, &FZ1.md5); |
|
242 cks->sums[DCC_CK_FUZ1].rpt2srvr = 1; |
|
243 } |