0
|
1 /* Distributed Checksum Clearinghouse |
|
2 * |
|
3 * decode MIME for checksums |
|
4 * |
|
5 * Copyright (c) 2008 by Rhyolite Software, LLC |
|
6 * |
|
7 * This agreement is not applicable to any entity which sells anti-spam |
|
8 * solutions to others or provides an anti-spam solution as part of a |
|
9 * security solution sold to other entities, or to a private network |
|
10 * which employs the DCC or uses data provided by operation of the DCC |
|
11 * but does not provide corresponding data to other users. |
|
12 * |
|
13 * Permission to use, copy, modify, and distribute this software without |
|
14 * changes for any purpose with or without fee is hereby granted, provided |
|
15 * that the above copyright notice and this permission notice appear in all |
|
16 * copies and any distributed versions or copies are either unchanged |
|
17 * or not called anything similar to "DCC" or "Distributed Checksum |
|
18 * Clearinghouse". |
|
19 * |
|
20 * Parties not eligible to receive a license under this agreement can |
|
21 * obtain a commercial license to use DCC by contacting Rhyolite Software |
|
22 * at sales@rhyolite.com. |
|
23 * |
|
24 * A commercial license would be for Distributed Checksum and Reputation |
|
25 * Clearinghouse software. That software includes additional features. This |
|
26 * free license for Distributed ChecksumClearinghouse Software does not in any |
|
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse |
|
28 * software |
|
29 * |
|
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL |
|
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
|
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC |
|
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES |
|
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
37 * SOFTWARE. |
|
38 * |
|
39 * Rhyolite Software DCC 1.3.103-1.39 $Revision$ |
|
40 */ |
|
41 |
|
42 #include "dcc_ck.h" |
|
43 |
|
44 /* Notice MIME headers */ |
|
45 void |
|
46 dcc_ck_mime_hdr(DCC_GOT_CKS *cks, |
|
47 const char *hdr, /* entire header line or name only */ |
|
48 const char *str) /* header value if not after name */ |
|
49 { |
|
50 /* parse at least the header name */ |
|
51 cks->mhdr_st = CK_MHDR_ST_CE_CT; |
|
52 cks->mhdr_pos = 0; |
|
53 parse_mime_hdr(cks, hdr, strlen(hdr), 1); |
|
54 |
|
55 /* parse the header value if present and we care about the header */ |
|
56 if (str |
|
57 && cks->mhdr_st != CK_MHDR_ST_IDLE) { |
|
58 parse_mime_hdr(cks, ":", 1, 1); |
|
59 parse_mime_hdr(cks, str, strlen(str), 1); |
|
60 } |
|
61 |
|
62 /* force the end of the line */ |
|
63 if (cks->mhdr_st != CK_MHDR_ST_IDLE) |
|
64 parse_mime_hdr(cks, "\n", 1, 1); |
|
65 |
|
66 if (cks->mime_nest != 0) |
|
67 cks->mp_st = CK_MP_ST_PREAMBLE; |
|
68 |
|
69 cks->flags |= DCC_CKS_MIME_BOL; |
|
70 } |
|
71 |
|
72 |
|
73 |
|
74 static u_char /* 1=matched */ |
|
75 match(DCC_GOT_CKS *cks, |
|
76 enum CK_MHDR_ST ok, enum CK_MHDR_ST fail, |
|
77 const char *tgt_str, u_int tgt_len, |
|
78 const char **bp, u_int *bp_len) |
|
79 { |
|
80 u_int len; |
|
81 |
|
82 len = min(tgt_len - cks->mhdr_pos, *bp_len); |
|
83 if (strncasecmp(tgt_str + cks->mhdr_pos, *bp, len)) { |
|
84 /* switch to failure state if there is enough of the |
|
85 * string to know it does not match */ |
|
86 cks->mhdr_st = fail; |
|
87 return 0; |
|
88 } |
|
89 |
|
90 *bp += len; |
|
91 *bp_len -= len; |
|
92 if ((u_int)(cks->mhdr_pos += len) >= tgt_len) { |
|
93 /* switch to the success state on a match */ |
|
94 cks->mhdr_st = ok; |
|
95 cks->mhdr_pos = 0; |
|
96 return 1; |
|
97 } |
|
98 |
|
99 /* wait for more input */ |
|
100 return 0; |
|
101 } |
|
102 |
|
103 |
|
104 |
|
105 /* ignore white space */ |
|
106 static u_char /* 0=buffer empty */ |
|
107 span_ws(const char **bp, u_int *bp_len) |
|
108 { |
|
109 char c; |
|
110 while ((c = **bp) == ' ' || c == '\t' || c == '\r' || c == '\n') { |
|
111 ++*bp; |
|
112 if (--*bp_len == 0) |
|
113 return 0; |
|
114 } |
|
115 return 1; |
|
116 } |
|
117 |
|
118 |
|
119 |
|
120 /* skip to white space or after semicolon that precedes the next parameter */ |
|
121 static u_char /* 0=buffer empty */ |
|
122 skip_param(const char **bp, u_int *bp_len) |
|
123 { |
|
124 char c; |
|
125 while ((c = **bp) != ' ' && c != '\t' && c != '\r' && c != '\n') { |
|
126 ++*bp; |
|
127 if (c == ';') { |
|
128 --*bp_len; |
|
129 return 1; |
|
130 } |
|
131 if (--*bp_len == 0) |
|
132 return 0; |
|
133 } |
|
134 return 1; |
|
135 } |
|
136 |
|
137 |
|
138 |
|
139 /* Parse MIME headers |
|
140 * Look for (parts of) Content-Type and Content-Transfer-Encoding |
|
141 * headers in a buffer. There can be at most one significant (not part of |
|
142 * folded whitespace) '\n' in the buffer and only as the last byte */ |
|
143 u_char /* 1=blank line */ |
|
144 parse_mime_hdr(DCC_GOT_CKS *cks, |
|
145 const char *bp, u_int bp_len, |
|
146 u_char in_hdrs) /* 1=in RFC 822 headers */ |
|
147 { |
|
148 #define MMATCH(str,ok,fail) match(cks,CK_MHDR_ST_##ok,CK_MHDR_ST_##fail, \ |
|
149 str,sizeof(str)-1, &bp, &bp_len) |
|
150 char c; |
|
151 DCC_CK_BND *bndp; |
|
152 |
|
153 if ((cks->flags & DCC_CKS_MIME_BOL) |
|
154 && !in_hdrs) { |
|
155 c = *bp; |
|
156 if (c == '\r') { |
|
157 /* ignore CR to ease detecting blank line */ |
|
158 if (--bp_len == 0) |
|
159 return 0; |
|
160 c = *++bp; |
|
161 } |
|
162 if (c == '\n') |
|
163 return 1; /* this line is blank */ |
|
164 |
|
165 /* reset parser line without folded whitespace */ |
|
166 if (c != ' ' && c != '\t') { |
|
167 cks->mhdr_st = CK_MHDR_ST_CE_CT; |
|
168 cks->mhdr_pos = 0; |
|
169 } |
|
170 cks->flags &= ~DCC_CKS_MIME_BOL; |
|
171 } |
|
172 |
|
173 do { |
|
174 switch (cks->mhdr_st) { |
|
175 case CK_MHDR_ST_IDLE: |
|
176 return 0; |
|
177 |
|
178 case CK_MHDR_ST_CE_CT: |
|
179 /* This state always preceeds the following states */ |
|
180 if (MMATCH("Content-T", CT_WS, IDLE)) { |
|
181 switch (*bp) { |
|
182 case 'r': |
|
183 case 'R': |
|
184 cks->mhdr_st = CK_MHDR_ST_CE; |
|
185 break; |
|
186 case 'y': |
|
187 case 'Y': |
|
188 cks->mhdr_st = CK_MHDR_ST_CT; |
|
189 break; |
|
190 default: |
|
191 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
192 return 0; |
|
193 } |
|
194 } |
|
195 break; |
|
196 |
|
197 case CK_MHDR_ST_CE: |
|
198 MMATCH("ransfer-Encoding:", CE_WS, IDLE); |
|
199 break; |
|
200 case CK_MHDR_ST_CE_WS: |
|
201 if (!span_ws(&bp, &bp_len)) |
|
202 return 0; |
|
203 switch (*bp) { |
|
204 case 'b': |
|
205 case 'B': |
|
206 cks->mhdr_st = CK_MHDR_ST_B64; |
|
207 break; |
|
208 case 'q': |
|
209 case 'Q': |
|
210 cks->mhdr_st = CK_MHDR_ST_QP; |
|
211 break; |
|
212 default: |
|
213 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
214 return 0; |
|
215 } |
|
216 break; |
|
217 case CK_MHDR_ST_QP: |
|
218 if (MMATCH("quoted-printable", IDLE, IDLE)) |
|
219 cks->mime_ce = DCC_CK_CE_QP; |
|
220 break; |
|
221 case CK_MHDR_ST_B64: |
|
222 if (MMATCH("base64", IDLE, IDLE)) |
|
223 cks->mime_ce = DCC_CK_CE_B64; |
|
224 break; |
|
225 |
|
226 case CK_MHDR_ST_CT: |
|
227 MMATCH("ype:", CT_WS, IDLE); |
|
228 break; |
|
229 case CK_MHDR_ST_CT_WS: |
|
230 /* We have matched "Content-type:" */ |
|
231 if (!span_ws(&bp, &bp_len)) |
|
232 return 0; |
|
233 switch (*bp) { |
|
234 case 't': |
|
235 case 'T': |
|
236 cks->mhdr_st = CK_MHDR_ST_TEXT; |
|
237 break; |
|
238 case 'm': |
|
239 case 'M': |
|
240 /* do not nest too deeply */ |
|
241 if (in_hdrs |
|
242 || cks->mime_nest < DIM(cks->mime_bnd)) { |
|
243 cks->mhdr_st = CK_MHDR_ST_MULTIPART; |
|
244 } else { |
|
245 cks->mhdr_st = CK_MHDR_ST_TEXT; |
|
246 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
247 } |
|
248 break; |
|
249 default: |
|
250 /* assume it is binary noise if it does |
|
251 * not match "Content-type: [tTmM]" */ |
|
252 cks->mime_ct = DCC_CK_CT_BINARY; |
|
253 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
254 return 0; |
|
255 } |
|
256 break; |
|
257 case CK_MHDR_ST_TEXT: |
|
258 /* we are looking for "Text" in "Content-type: Text" */ |
|
259 if (MMATCH("text", HTML, IDLE)) |
|
260 cks->mime_ct = DCC_CK_CT_TEXT; |
|
261 break; |
|
262 case CK_MHDR_ST_HTML: |
|
263 /* look for "Content-type: Text/html" */ |
|
264 if (MMATCH("/html", CSET_SKIP_PARAM, CSET_SKIP_PARAM)) |
|
265 cks->mime_ct = DCC_CK_CT_HTML; |
|
266 break; |
|
267 case CK_MHDR_ST_CSET_SKIP_PARAM: |
|
268 /* Look for semicolon or whitespace preceding next |
|
269 * parameter after "Content-type: Text/html" */ |
|
270 if (skip_param(&bp, &bp_len)) |
|
271 cks->mhdr_st = CK_MHDR_ST_CSET_SPAN_WS; |
|
272 break; |
|
273 case CK_MHDR_ST_CSET_SPAN_WS: |
|
274 /* skip optional whitespace before next parameter */ |
|
275 if (span_ws(&bp, &bp_len)) |
|
276 cks->mhdr_st = CK_MHDR_ST_CSET; |
|
277 break; |
|
278 case CK_MHDR_ST_CSET: |
|
279 /* have matched "Content-Type: text...;" |
|
280 * and are looking for a "charset=" parameter */ |
|
281 MMATCH("charset=", CSET_ISO_8859, CSET_SKIP_PARAM); |
|
282 break; |
|
283 case CK_MHDR_ST_CSET_ISO_8859: |
|
284 /* We have matched "Content-Type: text...charset=" |
|
285 * and are looking for "ISO-8859-*". |
|
286 * Ignore leading '"' */ |
|
287 if (cks->mhdr_pos == 0 |
|
288 && bp_len > 0 && *bp == '"') { |
|
289 ++bp; |
|
290 --bp_len; |
|
291 } |
|
292 MMATCH("iso-8859-", CSET_ISO_X, IDLE); |
|
293 break; |
|
294 case CK_MHDR_ST_CSET_ISO_X: |
|
295 for (;;) { |
|
296 if (bp_len == 0) |
|
297 return 0; |
|
298 --bp_len; |
|
299 c = *bp++; |
|
300 if (c < '0' || c > '9') { |
|
301 if ((c == '"' || c == ' ' || c == '\t' |
|
302 || c == ';' |
|
303 || c == '\r' || c == '\n') |
|
304 && cks->mhdr_pos == 2) |
|
305 cks->mime_cset = dcc_cset_2; |
|
306 else |
|
307 cks->mime_cset = dcc_cset_1; |
|
308 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
309 return 0; |
|
310 } |
|
311 cks->mhdr_pos = cks->mhdr_pos*10 + c - '0'; |
|
312 if (cks->mhdr_pos > 99) { |
|
313 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
314 return 0; |
|
315 } |
|
316 } |
|
317 case CK_MHDR_ST_MULTIPART: |
|
318 /* We are looking for "Content-type: Multipart" |
|
319 * after having seen "Content-type: M". |
|
320 * If it is not "ultipart", assume "essage" and that |
|
321 * it is text. */ |
|
322 cks->mhdr_st = CK_MHDR_ST_TEXT; |
|
323 MMATCH("multipart", BND_SKIP_PARAM, IDLE); |
|
324 break; |
|
325 case CK_MHDR_ST_BND_SKIP_PARAM: |
|
326 /* Look for semicolon or whitespace preceding next |
|
327 * parameter after "Content-type: M" */ |
|
328 if (skip_param(&bp, &bp_len)) |
|
329 cks->mhdr_st = CK_MHDR_ST_BND_SPAN_WS; |
|
330 break; |
|
331 case CK_MHDR_ST_BND_SPAN_WS: |
|
332 /* skip optional whitespace before next parameter */ |
|
333 if (span_ws(&bp, &bp_len)) |
|
334 cks->mhdr_st = CK_MHDR_ST_BND; |
|
335 break; |
|
336 case CK_MHDR_ST_BND: |
|
337 /* we have matched "Content-type: multipart" |
|
338 * and are looking for the "boundary" parameter */ |
|
339 if (MMATCH("boundary=", BND_VALUE, BND_SKIP_PARAM)) { |
|
340 if (in_hdrs) { |
|
341 cks->mime_nest = 0; |
|
342 /* allow missing initial blank line */ |
|
343 cks->mime_bnd_matches = 1; |
|
344 } |
|
345 bndp = &cks->mime_bnd[cks->mime_nest]; |
|
346 cks->flags &= ~DCC_CKS_MIME_QUOTED; |
|
347 bndp->bnd[0] = '-'; |
|
348 bndp->bnd[1] = '-'; |
|
349 cks->mhdr_pos = 2; |
|
350 } |
|
351 break; |
|
352 case CK_MHDR_ST_BND_VALUE: |
|
353 /* collect the bounary string */ |
|
354 bndp = &cks->mime_bnd[cks->mime_nest]; |
|
355 /* this accepts a lot more than RFC 2046 allows, |
|
356 * but spamware written by idiots doesn't comply */ |
|
357 for (;;) { |
|
358 if (bp_len == 0) |
|
359 return 0; |
|
360 --bp_len; |
|
361 c = *bp++; |
|
362 if (c == '\n') |
|
363 break; |
|
364 if (c == '\r') |
|
365 continue; |
|
366 if ((c == ' ' || c == '\t' || c == ';') |
|
367 && !(cks->flags & DCC_CKS_MIME_QUOTED)) |
|
368 break; |
|
369 if (c == '"') { |
|
370 cks->flags ^= DCC_CKS_MIME_QUOTED; |
|
371 continue; |
|
372 } |
|
373 bndp->bnd[cks->mhdr_pos] = c; |
|
374 if (++cks->mhdr_pos >= DCC_CK_BND_MAX) { |
|
375 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
376 return 0; |
|
377 } |
|
378 } |
|
379 bndp->bnd_len = cks->mhdr_pos; |
|
380 bndp->cmp_len = 0; |
|
381 ++cks->mime_nest; |
|
382 cks->mhdr_st = CK_MHDR_ST_IDLE; |
|
383 break; |
|
384 } |
|
385 } while (bp_len != 0); |
|
386 return 0; |
|
387 |
|
388 #undef MMATCH |
|
389 #undef MKSIP_WS |
|
390 } |
|
391 |
|
392 |
|
393 |
|
394 /* fetch bytes and convert from quoted-printable */ |
|
395 u_int /* output length */ |
|
396 dcc_ck_qp_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, |
|
397 char *obuf, u_int obuf_len) |
|
398 { |
|
399 # define GC(c) do {if (!ibuf_len) return result; \ |
|
400 --ibuf_len; (c) = *ibuf; ++ibuf;} while (0) |
|
401 u_int ibuf_len, result; |
|
402 const char *ibuf; |
|
403 u_char c = 0; |
|
404 |
|
405 if (obuf_len == 0) |
|
406 return 0; |
|
407 ibuf_len = *ibuf_lenp; |
|
408 ibuf = *ibufp; |
|
409 result = 0; |
|
410 while (ibuf_len != 0) { |
|
411 switch (cks->qp.state) { |
|
412 case DCC_CK_QP_IDLE: |
|
413 GC(c); |
|
414 if (c != '=') |
|
415 break; |
|
416 cks->qp.state = DCC_CK_QP_EQ; |
|
417 continue; |
|
418 |
|
419 case DCC_CK_QP_EQ: |
|
420 /* Consider first character after '=' */ |
|
421 GC(c); |
|
422 cks->qp.x = c; |
|
423 if (c == '\r') { |
|
424 ; |
|
425 } else if (c == '\n') { |
|
426 /* delete "=\n" like "=\r\n" |
|
427 * so that dccproc and dccm agree */ |
|
428 cks->qp.state = DCC_CK_QP_IDLE; |
|
429 continue; |
|
430 } else if (c >= '0' && c <= '9') { |
|
431 cks->qp.n = c-'0'; |
|
432 } else if (c >= 'a' && c <= 'f') { |
|
433 cks->qp.n = c-('a'-10); |
|
434 } else if (c >= 'A' && c <= 'F') { |
|
435 cks->qp.n = c-('A'-10); |
|
436 } else { |
|
437 cks->qp.state = DCC_CK_QP_FAIL1; |
|
438 c = '='; |
|
439 break; |
|
440 } |
|
441 cks->qp.state = DCC_CK_QP_1; |
|
442 continue; |
|
443 |
|
444 case DCC_CK_QP_1: |
|
445 /* consider second character after '=' */ |
|
446 GC(c); |
|
447 cks->qp.y = c; |
|
448 if (cks->qp.x == '\r') { |
|
449 if (c == '\n') { |
|
450 /* delete soft line-break */ |
|
451 cks->qp.state = DCC_CK_QP_IDLE; |
|
452 continue; |
|
453 } |
|
454 cks->qp.state = DCC_CK_QP_FAIL2; |
|
455 c = '='; |
|
456 break; |
|
457 } else if (c >= '0' && c <= '9') { |
|
458 c -= '0'; |
|
459 } else if (c >= 'a' && c <= 'f') { |
|
460 c -= ('a'-10); |
|
461 } else if (c >= 'A' && c <= 'F') { |
|
462 c -= ('A'-10); |
|
463 } else { |
|
464 cks->qp.state = DCC_CK_QP_FAIL2; |
|
465 c = '='; |
|
466 break; |
|
467 } |
|
468 cks->qp.state = DCC_CK_QP_IDLE; |
|
469 c = (cks->qp.n << 4) | c; |
|
470 break; |
|
471 |
|
472 case DCC_CK_QP_FAIL1: |
|
473 /* output character after '=' of a 2-character |
|
474 * sequence that was not quoted-printable after all */ |
|
475 cks->qp.state = DCC_CK_QP_IDLE; |
|
476 c = cks->qp.x; |
|
477 break; |
|
478 |
|
479 case DCC_CK_QP_FAIL2: |
|
480 /* output character after '=' of a 3-character |
|
481 * sequence that was not quoted-printable after all */ |
|
482 cks->qp.state = DCC_CK_QP_FAIL3; |
|
483 c = cks->qp.x; |
|
484 break; |
|
485 |
|
486 case DCC_CK_QP_FAIL3: |
|
487 /* output third character of a 3-character |
|
488 * sequence that was not quoted-printable after all */ |
|
489 cks->qp.state = DCC_CK_QP_IDLE; |
|
490 c = cks->qp.y; |
|
491 break; |
|
492 } |
|
493 |
|
494 *obuf++ = c; |
|
495 if (++result >= obuf_len) |
|
496 break; |
|
497 } |
|
498 *ibuf_lenp = ibuf_len; |
|
499 *ibufp = ibuf; |
|
500 return result; |
|
501 #undef GC |
|
502 } |
|
503 |
|
504 |
|
505 |
|
506 |
|
507 #define B64B 0100 /* bad */ |
|
508 #define B64EQ 0101 /* '=' */ |
|
509 static u_char base64_decode[128] = { |
|
510 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x00 */ |
|
511 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x08 */ |
|
512 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x10 */ |
|
513 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x18 */ |
|
514 |
|
515 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x20 ! " # $ % & ' */ |
|
516 B64B, B64B, B64B, 62, B64B, B64B, B64B, 63, /* 0x28 ( ) * + , - . / */ |
|
517 |
|
518 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30 0 1 2 3 4 5 6 7 */ |
|
519 60, 61, B64B, B64B, B64B, B64EQ,B64B, B64B, /* 0x38 8 9 : ; < = > ? */ |
|
520 |
|
521 B64B, 0, 1, 2, 3, 4, 5, 6, /* 0x40 @ A B C D E F G */ |
|
522 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48 H I J K L M N O */ |
|
523 |
|
524 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50 P Q R S T U V W */ |
|
525 23, 24, 25, B64B, B64B, B64B, B64B, B64B, /* 0x58 X Y Z [ \ ] ^ _ */ |
|
526 |
|
527 B64B, 26, 27, 28, 29, 30, 31, 32, /* 0x60 ` a b c d e f g */ |
|
528 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68 h i j k l m n o */ |
|
529 |
|
530 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70 p q r s t u v w */ |
|
531 49, 50, 51, B64B, B64B, B64B, B64B, B64B, /* 0x78 x y z { | } ~ del */ |
|
532 }; |
|
533 |
|
534 u_int /* output length */ |
|
535 dcc_ck_b64_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, |
|
536 char *obuf, u_int obuf_len) |
|
537 { |
|
538 u_char c; |
|
539 const char *ibuf; |
|
540 u_int ibuf_len, result; |
|
541 |
|
542 if (obuf_len < 3) |
|
543 return 0; |
|
544 obuf_len -= 3; |
|
545 ibuf_len = *ibuf_lenp; |
|
546 ibuf = *ibufp; |
|
547 result = 0; |
|
548 while (ibuf_len != 0) { |
|
549 --ibuf_len; |
|
550 c = *ibuf++; |
|
551 c = base64_decode[c]; |
|
552 if (c == B64B) |
|
553 continue; |
|
554 |
|
555 if (c == B64EQ) { |
|
556 switch (cks->b64.quantum_cnt) { |
|
557 case 2: |
|
558 *obuf++ = cks->b64.quantum>>4; |
|
559 ++result; |
|
560 break; |
|
561 case 3: |
|
562 *obuf++ = cks->b64.quantum>>10; |
|
563 *obuf++ = cks->b64.quantum>>2; |
|
564 result += 2; |
|
565 break; |
|
566 } |
|
567 cks->b64.quantum_cnt = 0; |
|
568 if (result >= obuf_len) |
|
569 break; |
|
570 } |
|
571 |
|
572 cks->b64.quantum = (cks->b64.quantum << 6) | c; |
|
573 if (++cks->b64.quantum_cnt >= 4) { |
|
574 cks->b64.quantum_cnt = 0; |
|
575 *obuf++ = cks->b64.quantum>>16; |
|
576 *obuf++ = cks->b64.quantum>>8; |
|
577 *obuf++ = cks->b64.quantum; |
|
578 result += 3; |
|
579 if (result >= obuf_len) |
|
580 break; |
|
581 } |
|
582 } |
|
583 *ibuf_lenp = ibuf_len; |
|
584 *ibufp = ibuf; |
|
585 return result; |
|
586 } |
|
587 |
|
588 |
|
589 |
|
590 /* skip parts of URLs */ |
|
591 int |
|
592 dcc_ck_url(DCC_URL_SKIP *url, char c, char **pbufp) |
|
593 { |
|
594 #define RET_C(s) return ((c<<DCC_CK_URL_SHIFT) | s) |
|
595 |
|
596 /* Continue skipping a URL to its end. |
|
597 * Assume the end is the next blank, comma, '>', or '\n' |
|
598 * unless the URL is quoted. Then continue to the quote |
|
599 * or until the length has become silly. */ |
|
600 |
|
601 /* convert ASCII upper to lower case */ |
|
602 if (c >= 'A' && c <= 'Z') |
|
603 c -= 'A' - 'a'; |
|
604 |
|
605 switch (url->st) { |
|
606 case DCC_URL_ST_IDLE: |
|
607 if (c == 'h') { |
|
608 /* start looking for 't' after 'h' in "http" */ |
|
609 url->flags = 0; |
|
610 url->st = DCC_URL_ST_T1; |
|
611 } else if (c == '=') { |
|
612 /* look for the '=' in "href=" or "img src=" */ |
|
613 url->st = DCC_URL_ST_QUOTE; |
|
614 } |
|
615 RET_C(DCC_CK_URL_CHAR); |
|
616 |
|
617 case DCC_URL_ST_QUOTE: |
|
618 /* look for '"' or 'H' after "href=" or "img src= */ |
|
619 if (c == 'h') { |
|
620 url->flags &= ~DCC_URL_QUOTES; |
|
621 url->st = DCC_URL_ST_T1; |
|
622 } else if (c == '"') { |
|
623 url->flags |= DCC_URL_DQUOTED; |
|
624 url->st = DCC_URL_ST_QH; |
|
625 } else if (c == '\'') { |
|
626 url->flags |= DCC_URL_SQUOTED; |
|
627 url->st = DCC_URL_ST_QH; |
|
628 } else { |
|
629 url->st = DCC_URL_ST_IDLE; |
|
630 } |
|
631 RET_C(DCC_CK_URL_CHAR); |
|
632 |
|
633 case DCC_URL_ST_QH: |
|
634 /* seen quote; looking for start of URL */ |
|
635 if (c == 'h') { |
|
636 url->st = DCC_URL_ST_T1; |
|
637 } else { |
|
638 url->st = DCC_URL_ST_IDLE; |
|
639 } |
|
640 RET_C(DCC_CK_URL_CHAR); |
|
641 |
|
642 case DCC_URL_ST_T1: |
|
643 if (c == 't') |
|
644 url->st = DCC_URL_ST_T2; |
|
645 else |
|
646 url->st = DCC_URL_ST_IDLE; |
|
647 RET_C(DCC_CK_URL_CHAR); |
|
648 |
|
649 case DCC_URL_ST_T2: |
|
650 if (c == 't') |
|
651 url->st = DCC_URL_ST_P; |
|
652 else |
|
653 url->st = DCC_URL_ST_IDLE; |
|
654 RET_C(DCC_CK_URL_CHAR); |
|
655 |
|
656 case DCC_URL_ST_P: |
|
657 if (c == 'p') |
|
658 url->st = DCC_URL_ST_S; |
|
659 else |
|
660 url->st = DCC_URL_ST_IDLE; |
|
661 RET_C(DCC_CK_URL_CHAR); |
|
662 |
|
663 case DCC_URL_ST_S: |
|
664 /* we are expecting the ':' or 's' after http */ |
|
665 if (c == 's') |
|
666 url->st = DCC_URL_ST_COLON; |
|
667 else if (c == ':') |
|
668 url->st = DCC_URL_ST_SLASH1; |
|
669 else |
|
670 url->st = DCC_URL_ST_IDLE; |
|
671 RET_C(DCC_CK_URL_CHAR); |
|
672 |
|
673 case DCC_URL_ST_COLON: |
|
674 /* we are expecting the ':' after http or https */ |
|
675 if (c == ':') |
|
676 url->st = DCC_URL_ST_SLASH1; |
|
677 else |
|
678 url->st = DCC_URL_ST_IDLE; |
|
679 RET_C(DCC_CK_URL_CHAR); |
|
680 |
|
681 case DCC_URL_ST_SLASH1: |
|
682 /* we are expecting the first '/' after http: */ |
|
683 if (c == '/') |
|
684 url->st = DCC_URL_ST_SLASH2; |
|
685 else |
|
686 url->st = DCC_URL_ST_IDLE; |
|
687 RET_C(DCC_CK_URL_CHAR); |
|
688 |
|
689 case DCC_URL_ST_SLASH2: |
|
690 /* we are expecting the second '/' after http:/" */ |
|
691 if (c != '/') { |
|
692 url->st = DCC_URL_ST_IDLE; |
|
693 RET_C(DCC_CK_URL_CHAR); |
|
694 } |
|
695 url->st = DCC_URL_ST_SLASH3_START; |
|
696 RET_C(DCC_CK_URL_CK_LEN); |
|
697 |
|
698 case DCC_URL_ST_SLASH3_START: |
|
699 url->dot = 0; |
|
700 url->start = *pbufp; |
|
701 url->total = 0; |
|
702 url->flags &= ~(DCC_URL_DEL_DOMAIN |
|
703 | DCC_URL_PERCENT1 | DCC_URL_PERCENT2); |
|
704 url->st = DCC_URL_ST_SLASH3; |
|
705 /* fall into DCC_URL_ST_SLASH3 */ |
|
706 case DCC_URL_ST_SLASH3: |
|
707 /* look for the end of the host name */ |
|
708 ++url->total; |
|
709 again: |
|
710 if (c == '.') { |
|
711 /* keep only 1st and 2nd level domain names */ |
|
712 url->flags &= ~DCC_URL_DEL_DOMAIN; |
|
713 if (!url->dot) { |
|
714 /* do nothing at first '.' unless the name |
|
715 * was too long */ |
|
716 if (*pbufp >= url->start+DCC_URL_MAX) { |
|
717 *pbufp = url->start; |
|
718 } else { |
|
719 url->dot = *pbufp; |
|
720 } |
|
721 } else { |
|
722 url->flags |= DCC_URL_DEL_DOMAIN; |
|
723 } |
|
724 RET_C(DCC_CK_URL_DOT); |
|
725 } |
|
726 /* delay deleting third level domains to not be |
|
727 * fooled by a trailing dot */ |
|
728 if (url->flags & DCC_URL_DEL_DOMAIN) { |
|
729 url->flags &= ~DCC_URL_DEL_DOMAIN; |
|
730 memmove(url->start, url->dot, |
|
731 *pbufp - url->dot); |
|
732 *pbufp -= (url->dot - url->start); |
|
733 url->dot = *pbufp; |
|
734 } |
|
735 |
|
736 if (c == '/') { |
|
737 url->st = DCC_URL_ST_SKIP; |
|
738 RET_C(DCC_CK_URL_HOST_END); |
|
739 } |
|
740 if (c == '"' && (url->flags & DCC_URL_DQUOTED)) { |
|
741 url->st = DCC_URL_ST_IDLE; |
|
742 RET_C(DCC_CK_URL_HOST_END); |
|
743 } |
|
744 if (c == '\'' && (url->flags & DCC_URL_SQUOTED)) { |
|
745 url->st = DCC_URL_ST_IDLE; |
|
746 RET_C(DCC_CK_URL_HOST_END); |
|
747 } |
|
748 if ((c == '<' || c == '>') |
|
749 && (url->flags & DCC_URL_QUOTES) == 0) { |
|
750 url->st = DCC_URL_ST_IDLE; |
|
751 RET_C(DCC_CK_URL_HOST_END); |
|
752 } |
|
753 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { |
|
754 if (!(url->flags & DCC_URL_QUOTED) |
|
755 || url->total > DCC_URL_FAILSAFE) { |
|
756 url->st = DCC_URL_ST_IDLE; |
|
757 RET_C(DCC_CK_URL_HOST_END); |
|
758 } |
|
759 /* whitespace in a URL hostname is at best username */ |
|
760 *pbufp = url->start; |
|
761 url->st = DCC_URL_ST_SLASH3_START; |
|
762 RET_C(DCC_CK_URL_HOST_RESET); |
|
763 } |
|
764 if (c == '@') { |
|
765 /* ignore username and password */ |
|
766 *pbufp = url->start; |
|
767 url->st = DCC_URL_ST_SLASH3_START; |
|
768 RET_C(DCC_CK_URL_HOST_RESET); |
|
769 } |
|
770 |
|
771 if (c == '%') { |
|
772 url->flags &= ~DCC_URL_PERCENT2; |
|
773 url->flags |= DCC_URL_PERCENT1; |
|
774 RET_C(DCC_CK_URL_SKIP); |
|
775 } |
|
776 if (url->flags & DCC_URL_PERCENT1) { |
|
777 if (c >= '0' && c <= '9') { |
|
778 c -= '0'; |
|
779 } else if (c >= 'a' && c <= 'f') { |
|
780 c -= 'a'-10; |
|
781 } else { |
|
782 *pbufp = url->start; |
|
783 url->st = DCC_URL_ST_SLASH3_START; |
|
784 RET_C(DCC_CK_URL_HOST_RESET); |
|
785 } |
|
786 if (url->flags & DCC_URL_PERCENT2) { |
|
787 url->flags &= ~(DCC_URL_PERCENT1 |
|
788 | DCC_URL_PERCENT2); |
|
789 c |= url->percent; |
|
790 if (c >= 'A' && c <= 'Z') |
|
791 c -= 'A' - 'a'; |
|
792 goto again; |
|
793 } |
|
794 url->percent = c << 4; |
|
795 url->flags |= DCC_URL_PERCENT2; |
|
796 RET_C(DCC_CK_URL_SKIP); |
|
797 } |
|
798 |
|
799 if (*pbufp >= url->start+DCC_URL_MAX) { |
|
800 /* long garbage is probably a username */ |
|
801 if (url->total > DCC_URL_FAILSAFE) { |
|
802 url->st = DCC_URL_ST_IDLE; |
|
803 RET_C(DCC_CK_URL_CHAR); |
|
804 } |
|
805 RET_C(DCC_CK_URL_SKIP); |
|
806 } |
|
807 RET_C(DCC_CK_URL_HOST); |
|
808 |
|
809 case DCC_URL_ST_SKIP: |
|
810 /* skip the rest of the URL */ |
|
811 ++url->total; |
|
812 if (c == '"' || c == '\'') { |
|
813 url->st = DCC_URL_ST_IDLE; |
|
814 RET_C(DCC_CK_URL_SKIP); |
|
815 } |
|
816 if ((c == '>' || c == ' ' || c == '\t' |
|
817 || c == '\n' || c == '\r') |
|
818 && (!(url->flags & DCC_URL_QUOTES) |
|
819 || url->total > DCC_URL_FAILSAFE)) { |
|
820 url->total = 0; |
|
821 url->st = DCC_URL_ST_IDLE; |
|
822 RET_C(DCC_CK_URL_CHAR); |
|
823 } |
|
824 RET_C(DCC_CK_URL_SKIP); |
|
825 } |
|
826 RET_C(DCC_CK_URL_CHAR); |
|
827 |
|
828 #undef RET_C |
|
829 } |