Mercurial > notdcc
comparison dcclib/ckmime.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7f6b056b673 |
---|---|
1 /* Distributed Checksum Clearinghouse | |
2 * | |
3 * decode MIME for checksums | |
4 * | |
5 * Copyright (c) 2008 by Rhyolite Software, LLC | |
6 * | |
7 * This agreement is not applicable to any entity which sells anti-spam | |
8 * solutions to others or provides an anti-spam solution as part of a | |
9 * security solution sold to other entities, or to a private network | |
10 * which employs the DCC or uses data provided by operation of the DCC | |
11 * but does not provide corresponding data to other users. | |
12 * | |
13 * Permission to use, copy, modify, and distribute this software without | |
14 * changes for any purpose with or without fee is hereby granted, provided | |
15 * that the above copyright notice and this permission notice appear in all | |
16 * copies and any distributed versions or copies are either unchanged | |
17 * or not called anything similar to "DCC" or "Distributed Checksum | |
18 * Clearinghouse". | |
19 * | |
20 * Parties not eligible to receive a license under this agreement can | |
21 * obtain a commercial license to use DCC by contacting Rhyolite Software | |
22 * at sales@rhyolite.com. | |
23 * | |
24 * A commercial license would be for Distributed Checksum and Reputation | |
25 * Clearinghouse software. That software includes additional features. This | |
26 * free license for Distributed ChecksumClearinghouse Software does not in any | |
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse | |
28 * software | |
29 * | |
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL | |
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES | |
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC | |
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES | |
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
37 * SOFTWARE. | |
38 * | |
39 * Rhyolite Software DCC 1.3.103-1.39 $Revision$ | |
40 */ | |
41 | |
42 #include "dcc_ck.h" | |
43 | |
44 /* Notice MIME headers */ | |
45 void | |
46 dcc_ck_mime_hdr(DCC_GOT_CKS *cks, | |
47 const char *hdr, /* entire header line or name only */ | |
48 const char *str) /* header value if not after name */ | |
49 { | |
50 /* parse at least the header name */ | |
51 cks->mhdr_st = CK_MHDR_ST_CE_CT; | |
52 cks->mhdr_pos = 0; | |
53 parse_mime_hdr(cks, hdr, strlen(hdr), 1); | |
54 | |
55 /* parse the header value if present and we care about the header */ | |
56 if (str | |
57 && cks->mhdr_st != CK_MHDR_ST_IDLE) { | |
58 parse_mime_hdr(cks, ":", 1, 1); | |
59 parse_mime_hdr(cks, str, strlen(str), 1); | |
60 } | |
61 | |
62 /* force the end of the line */ | |
63 if (cks->mhdr_st != CK_MHDR_ST_IDLE) | |
64 parse_mime_hdr(cks, "\n", 1, 1); | |
65 | |
66 if (cks->mime_nest != 0) | |
67 cks->mp_st = CK_MP_ST_PREAMBLE; | |
68 | |
69 cks->flags |= DCC_CKS_MIME_BOL; | |
70 } | |
71 | |
72 | |
73 | |
74 static u_char /* 1=matched */ | |
75 match(DCC_GOT_CKS *cks, | |
76 enum CK_MHDR_ST ok, enum CK_MHDR_ST fail, | |
77 const char *tgt_str, u_int tgt_len, | |
78 const char **bp, u_int *bp_len) | |
79 { | |
80 u_int len; | |
81 | |
82 len = min(tgt_len - cks->mhdr_pos, *bp_len); | |
83 if (strncasecmp(tgt_str + cks->mhdr_pos, *bp, len)) { | |
84 /* switch to failure state if there is enough of the | |
85 * string to know it does not match */ | |
86 cks->mhdr_st = fail; | |
87 return 0; | |
88 } | |
89 | |
90 *bp += len; | |
91 *bp_len -= len; | |
92 if ((u_int)(cks->mhdr_pos += len) >= tgt_len) { | |
93 /* switch to the success state on a match */ | |
94 cks->mhdr_st = ok; | |
95 cks->mhdr_pos = 0; | |
96 return 1; | |
97 } | |
98 | |
99 /* wait for more input */ | |
100 return 0; | |
101 } | |
102 | |
103 | |
104 | |
105 /* ignore white space */ | |
106 static u_char /* 0=buffer empty */ | |
107 span_ws(const char **bp, u_int *bp_len) | |
108 { | |
109 char c; | |
110 while ((c = **bp) == ' ' || c == '\t' || c == '\r' || c == '\n') { | |
111 ++*bp; | |
112 if (--*bp_len == 0) | |
113 return 0; | |
114 } | |
115 return 1; | |
116 } | |
117 | |
118 | |
119 | |
120 /* skip to white space or after semicolon that precedes the next parameter */ | |
121 static u_char /* 0=buffer empty */ | |
122 skip_param(const char **bp, u_int *bp_len) | |
123 { | |
124 char c; | |
125 while ((c = **bp) != ' ' && c != '\t' && c != '\r' && c != '\n') { | |
126 ++*bp; | |
127 if (c == ';') { | |
128 --*bp_len; | |
129 return 1; | |
130 } | |
131 if (--*bp_len == 0) | |
132 return 0; | |
133 } | |
134 return 1; | |
135 } | |
136 | |
137 | |
138 | |
139 /* Parse MIME headers | |
140 * Look for (parts of) Content-Type and Content-Transfer-Encoding | |
141 * headers in a buffer. There can be at most one significant (not part of | |
142 * folded whitespace) '\n' in the buffer and only as the last byte */ | |
143 u_char /* 1=blank line */ | |
144 parse_mime_hdr(DCC_GOT_CKS *cks, | |
145 const char *bp, u_int bp_len, | |
146 u_char in_hdrs) /* 1=in RFC 822 headers */ | |
147 { | |
148 #define MMATCH(str,ok,fail) match(cks,CK_MHDR_ST_##ok,CK_MHDR_ST_##fail, \ | |
149 str,sizeof(str)-1, &bp, &bp_len) | |
150 char c; | |
151 DCC_CK_BND *bndp; | |
152 | |
153 if ((cks->flags & DCC_CKS_MIME_BOL) | |
154 && !in_hdrs) { | |
155 c = *bp; | |
156 if (c == '\r') { | |
157 /* ignore CR to ease detecting blank line */ | |
158 if (--bp_len == 0) | |
159 return 0; | |
160 c = *++bp; | |
161 } | |
162 if (c == '\n') | |
163 return 1; /* this line is blank */ | |
164 | |
165 /* reset parser line without folded whitespace */ | |
166 if (c != ' ' && c != '\t') { | |
167 cks->mhdr_st = CK_MHDR_ST_CE_CT; | |
168 cks->mhdr_pos = 0; | |
169 } | |
170 cks->flags &= ~DCC_CKS_MIME_BOL; | |
171 } | |
172 | |
173 do { | |
174 switch (cks->mhdr_st) { | |
175 case CK_MHDR_ST_IDLE: | |
176 return 0; | |
177 | |
178 case CK_MHDR_ST_CE_CT: | |
179 /* This state always preceeds the following states */ | |
180 if (MMATCH("Content-T", CT_WS, IDLE)) { | |
181 switch (*bp) { | |
182 case 'r': | |
183 case 'R': | |
184 cks->mhdr_st = CK_MHDR_ST_CE; | |
185 break; | |
186 case 'y': | |
187 case 'Y': | |
188 cks->mhdr_st = CK_MHDR_ST_CT; | |
189 break; | |
190 default: | |
191 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
192 return 0; | |
193 } | |
194 } | |
195 break; | |
196 | |
197 case CK_MHDR_ST_CE: | |
198 MMATCH("ransfer-Encoding:", CE_WS, IDLE); | |
199 break; | |
200 case CK_MHDR_ST_CE_WS: | |
201 if (!span_ws(&bp, &bp_len)) | |
202 return 0; | |
203 switch (*bp) { | |
204 case 'b': | |
205 case 'B': | |
206 cks->mhdr_st = CK_MHDR_ST_B64; | |
207 break; | |
208 case 'q': | |
209 case 'Q': | |
210 cks->mhdr_st = CK_MHDR_ST_QP; | |
211 break; | |
212 default: | |
213 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
214 return 0; | |
215 } | |
216 break; | |
217 case CK_MHDR_ST_QP: | |
218 if (MMATCH("quoted-printable", IDLE, IDLE)) | |
219 cks->mime_ce = DCC_CK_CE_QP; | |
220 break; | |
221 case CK_MHDR_ST_B64: | |
222 if (MMATCH("base64", IDLE, IDLE)) | |
223 cks->mime_ce = DCC_CK_CE_B64; | |
224 break; | |
225 | |
226 case CK_MHDR_ST_CT: | |
227 MMATCH("ype:", CT_WS, IDLE); | |
228 break; | |
229 case CK_MHDR_ST_CT_WS: | |
230 /* We have matched "Content-type:" */ | |
231 if (!span_ws(&bp, &bp_len)) | |
232 return 0; | |
233 switch (*bp) { | |
234 case 't': | |
235 case 'T': | |
236 cks->mhdr_st = CK_MHDR_ST_TEXT; | |
237 break; | |
238 case 'm': | |
239 case 'M': | |
240 /* do not nest too deeply */ | |
241 if (in_hdrs | |
242 || cks->mime_nest < DIM(cks->mime_bnd)) { | |
243 cks->mhdr_st = CK_MHDR_ST_MULTIPART; | |
244 } else { | |
245 cks->mhdr_st = CK_MHDR_ST_TEXT; | |
246 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
247 } | |
248 break; | |
249 default: | |
250 /* assume it is binary noise if it does | |
251 * not match "Content-type: [tTmM]" */ | |
252 cks->mime_ct = DCC_CK_CT_BINARY; | |
253 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
254 return 0; | |
255 } | |
256 break; | |
257 case CK_MHDR_ST_TEXT: | |
258 /* we are looking for "Text" in "Content-type: Text" */ | |
259 if (MMATCH("text", HTML, IDLE)) | |
260 cks->mime_ct = DCC_CK_CT_TEXT; | |
261 break; | |
262 case CK_MHDR_ST_HTML: | |
263 /* look for "Content-type: Text/html" */ | |
264 if (MMATCH("/html", CSET_SKIP_PARAM, CSET_SKIP_PARAM)) | |
265 cks->mime_ct = DCC_CK_CT_HTML; | |
266 break; | |
267 case CK_MHDR_ST_CSET_SKIP_PARAM: | |
268 /* Look for semicolon or whitespace preceding next | |
269 * parameter after "Content-type: Text/html" */ | |
270 if (skip_param(&bp, &bp_len)) | |
271 cks->mhdr_st = CK_MHDR_ST_CSET_SPAN_WS; | |
272 break; | |
273 case CK_MHDR_ST_CSET_SPAN_WS: | |
274 /* skip optional whitespace before next parameter */ | |
275 if (span_ws(&bp, &bp_len)) | |
276 cks->mhdr_st = CK_MHDR_ST_CSET; | |
277 break; | |
278 case CK_MHDR_ST_CSET: | |
279 /* have matched "Content-Type: text...;" | |
280 * and are looking for a "charset=" parameter */ | |
281 MMATCH("charset=", CSET_ISO_8859, CSET_SKIP_PARAM); | |
282 break; | |
283 case CK_MHDR_ST_CSET_ISO_8859: | |
284 /* We have matched "Content-Type: text...charset=" | |
285 * and are looking for "ISO-8859-*". | |
286 * Ignore leading '"' */ | |
287 if (cks->mhdr_pos == 0 | |
288 && bp_len > 0 && *bp == '"') { | |
289 ++bp; | |
290 --bp_len; | |
291 } | |
292 MMATCH("iso-8859-", CSET_ISO_X, IDLE); | |
293 break; | |
294 case CK_MHDR_ST_CSET_ISO_X: | |
295 for (;;) { | |
296 if (bp_len == 0) | |
297 return 0; | |
298 --bp_len; | |
299 c = *bp++; | |
300 if (c < '0' || c > '9') { | |
301 if ((c == '"' || c == ' ' || c == '\t' | |
302 || c == ';' | |
303 || c == '\r' || c == '\n') | |
304 && cks->mhdr_pos == 2) | |
305 cks->mime_cset = dcc_cset_2; | |
306 else | |
307 cks->mime_cset = dcc_cset_1; | |
308 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
309 return 0; | |
310 } | |
311 cks->mhdr_pos = cks->mhdr_pos*10 + c - '0'; | |
312 if (cks->mhdr_pos > 99) { | |
313 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
314 return 0; | |
315 } | |
316 } | |
317 case CK_MHDR_ST_MULTIPART: | |
318 /* We are looking for "Content-type: Multipart" | |
319 * after having seen "Content-type: M". | |
320 * If it is not "ultipart", assume "essage" and that | |
321 * it is text. */ | |
322 cks->mhdr_st = CK_MHDR_ST_TEXT; | |
323 MMATCH("multipart", BND_SKIP_PARAM, IDLE); | |
324 break; | |
325 case CK_MHDR_ST_BND_SKIP_PARAM: | |
326 /* Look for semicolon or whitespace preceding next | |
327 * parameter after "Content-type: M" */ | |
328 if (skip_param(&bp, &bp_len)) | |
329 cks->mhdr_st = CK_MHDR_ST_BND_SPAN_WS; | |
330 break; | |
331 case CK_MHDR_ST_BND_SPAN_WS: | |
332 /* skip optional whitespace before next parameter */ | |
333 if (span_ws(&bp, &bp_len)) | |
334 cks->mhdr_st = CK_MHDR_ST_BND; | |
335 break; | |
336 case CK_MHDR_ST_BND: | |
337 /* we have matched "Content-type: multipart" | |
338 * and are looking for the "boundary" parameter */ | |
339 if (MMATCH("boundary=", BND_VALUE, BND_SKIP_PARAM)) { | |
340 if (in_hdrs) { | |
341 cks->mime_nest = 0; | |
342 /* allow missing initial blank line */ | |
343 cks->mime_bnd_matches = 1; | |
344 } | |
345 bndp = &cks->mime_bnd[cks->mime_nest]; | |
346 cks->flags &= ~DCC_CKS_MIME_QUOTED; | |
347 bndp->bnd[0] = '-'; | |
348 bndp->bnd[1] = '-'; | |
349 cks->mhdr_pos = 2; | |
350 } | |
351 break; | |
352 case CK_MHDR_ST_BND_VALUE: | |
353 /* collect the bounary string */ | |
354 bndp = &cks->mime_bnd[cks->mime_nest]; | |
355 /* this accepts a lot more than RFC 2046 allows, | |
356 * but spamware written by idiots doesn't comply */ | |
357 for (;;) { | |
358 if (bp_len == 0) | |
359 return 0; | |
360 --bp_len; | |
361 c = *bp++; | |
362 if (c == '\n') | |
363 break; | |
364 if (c == '\r') | |
365 continue; | |
366 if ((c == ' ' || c == '\t' || c == ';') | |
367 && !(cks->flags & DCC_CKS_MIME_QUOTED)) | |
368 break; | |
369 if (c == '"') { | |
370 cks->flags ^= DCC_CKS_MIME_QUOTED; | |
371 continue; | |
372 } | |
373 bndp->bnd[cks->mhdr_pos] = c; | |
374 if (++cks->mhdr_pos >= DCC_CK_BND_MAX) { | |
375 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
376 return 0; | |
377 } | |
378 } | |
379 bndp->bnd_len = cks->mhdr_pos; | |
380 bndp->cmp_len = 0; | |
381 ++cks->mime_nest; | |
382 cks->mhdr_st = CK_MHDR_ST_IDLE; | |
383 break; | |
384 } | |
385 } while (bp_len != 0); | |
386 return 0; | |
387 | |
388 #undef MMATCH | |
389 #undef MKSIP_WS | |
390 } | |
391 | |
392 | |
393 | |
394 /* fetch bytes and convert from quoted-printable */ | |
395 u_int /* output length */ | |
396 dcc_ck_qp_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, | |
397 char *obuf, u_int obuf_len) | |
398 { | |
399 # define GC(c) do {if (!ibuf_len) return result; \ | |
400 --ibuf_len; (c) = *ibuf; ++ibuf;} while (0) | |
401 u_int ibuf_len, result; | |
402 const char *ibuf; | |
403 u_char c = 0; | |
404 | |
405 if (obuf_len == 0) | |
406 return 0; | |
407 ibuf_len = *ibuf_lenp; | |
408 ibuf = *ibufp; | |
409 result = 0; | |
410 while (ibuf_len != 0) { | |
411 switch (cks->qp.state) { | |
412 case DCC_CK_QP_IDLE: | |
413 GC(c); | |
414 if (c != '=') | |
415 break; | |
416 cks->qp.state = DCC_CK_QP_EQ; | |
417 continue; | |
418 | |
419 case DCC_CK_QP_EQ: | |
420 /* Consider first character after '=' */ | |
421 GC(c); | |
422 cks->qp.x = c; | |
423 if (c == '\r') { | |
424 ; | |
425 } else if (c == '\n') { | |
426 /* delete "=\n" like "=\r\n" | |
427 * so that dccproc and dccm agree */ | |
428 cks->qp.state = DCC_CK_QP_IDLE; | |
429 continue; | |
430 } else if (c >= '0' && c <= '9') { | |
431 cks->qp.n = c-'0'; | |
432 } else if (c >= 'a' && c <= 'f') { | |
433 cks->qp.n = c-('a'-10); | |
434 } else if (c >= 'A' && c <= 'F') { | |
435 cks->qp.n = c-('A'-10); | |
436 } else { | |
437 cks->qp.state = DCC_CK_QP_FAIL1; | |
438 c = '='; | |
439 break; | |
440 } | |
441 cks->qp.state = DCC_CK_QP_1; | |
442 continue; | |
443 | |
444 case DCC_CK_QP_1: | |
445 /* consider second character after '=' */ | |
446 GC(c); | |
447 cks->qp.y = c; | |
448 if (cks->qp.x == '\r') { | |
449 if (c == '\n') { | |
450 /* delete soft line-break */ | |
451 cks->qp.state = DCC_CK_QP_IDLE; | |
452 continue; | |
453 } | |
454 cks->qp.state = DCC_CK_QP_FAIL2; | |
455 c = '='; | |
456 break; | |
457 } else if (c >= '0' && c <= '9') { | |
458 c -= '0'; | |
459 } else if (c >= 'a' && c <= 'f') { | |
460 c -= ('a'-10); | |
461 } else if (c >= 'A' && c <= 'F') { | |
462 c -= ('A'-10); | |
463 } else { | |
464 cks->qp.state = DCC_CK_QP_FAIL2; | |
465 c = '='; | |
466 break; | |
467 } | |
468 cks->qp.state = DCC_CK_QP_IDLE; | |
469 c = (cks->qp.n << 4) | c; | |
470 break; | |
471 | |
472 case DCC_CK_QP_FAIL1: | |
473 /* output character after '=' of a 2-character | |
474 * sequence that was not quoted-printable after all */ | |
475 cks->qp.state = DCC_CK_QP_IDLE; | |
476 c = cks->qp.x; | |
477 break; | |
478 | |
479 case DCC_CK_QP_FAIL2: | |
480 /* output character after '=' of a 3-character | |
481 * sequence that was not quoted-printable after all */ | |
482 cks->qp.state = DCC_CK_QP_FAIL3; | |
483 c = cks->qp.x; | |
484 break; | |
485 | |
486 case DCC_CK_QP_FAIL3: | |
487 /* output third character of a 3-character | |
488 * sequence that was not quoted-printable after all */ | |
489 cks->qp.state = DCC_CK_QP_IDLE; | |
490 c = cks->qp.y; | |
491 break; | |
492 } | |
493 | |
494 *obuf++ = c; | |
495 if (++result >= obuf_len) | |
496 break; | |
497 } | |
498 *ibuf_lenp = ibuf_len; | |
499 *ibufp = ibuf; | |
500 return result; | |
501 #undef GC | |
502 } | |
503 | |
504 | |
505 | |
506 | |
507 #define B64B 0100 /* bad */ | |
508 #define B64EQ 0101 /* '=' */ | |
509 static u_char base64_decode[128] = { | |
510 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x00 */ | |
511 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x08 */ | |
512 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x10 */ | |
513 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x18 */ | |
514 | |
515 B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x20 ! " # $ % & ' */ | |
516 B64B, B64B, B64B, 62, B64B, B64B, B64B, 63, /* 0x28 ( ) * + , - . / */ | |
517 | |
518 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30 0 1 2 3 4 5 6 7 */ | |
519 60, 61, B64B, B64B, B64B, B64EQ,B64B, B64B, /* 0x38 8 9 : ; < = > ? */ | |
520 | |
521 B64B, 0, 1, 2, 3, 4, 5, 6, /* 0x40 @ A B C D E F G */ | |
522 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48 H I J K L M N O */ | |
523 | |
524 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50 P Q R S T U V W */ | |
525 23, 24, 25, B64B, B64B, B64B, B64B, B64B, /* 0x58 X Y Z [ \ ] ^ _ */ | |
526 | |
527 B64B, 26, 27, 28, 29, 30, 31, 32, /* 0x60 ` a b c d e f g */ | |
528 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68 h i j k l m n o */ | |
529 | |
530 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70 p q r s t u v w */ | |
531 49, 50, 51, B64B, B64B, B64B, B64B, B64B, /* 0x78 x y z { | } ~ del */ | |
532 }; | |
533 | |
534 u_int /* output length */ | |
535 dcc_ck_b64_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, | |
536 char *obuf, u_int obuf_len) | |
537 { | |
538 u_char c; | |
539 const char *ibuf; | |
540 u_int ibuf_len, result; | |
541 | |
542 if (obuf_len < 3) | |
543 return 0; | |
544 obuf_len -= 3; | |
545 ibuf_len = *ibuf_lenp; | |
546 ibuf = *ibufp; | |
547 result = 0; | |
548 while (ibuf_len != 0) { | |
549 --ibuf_len; | |
550 c = *ibuf++; | |
551 c = base64_decode[c]; | |
552 if (c == B64B) | |
553 continue; | |
554 | |
555 if (c == B64EQ) { | |
556 switch (cks->b64.quantum_cnt) { | |
557 case 2: | |
558 *obuf++ = cks->b64.quantum>>4; | |
559 ++result; | |
560 break; | |
561 case 3: | |
562 *obuf++ = cks->b64.quantum>>10; | |
563 *obuf++ = cks->b64.quantum>>2; | |
564 result += 2; | |
565 break; | |
566 } | |
567 cks->b64.quantum_cnt = 0; | |
568 if (result >= obuf_len) | |
569 break; | |
570 } | |
571 | |
572 cks->b64.quantum = (cks->b64.quantum << 6) | c; | |
573 if (++cks->b64.quantum_cnt >= 4) { | |
574 cks->b64.quantum_cnt = 0; | |
575 *obuf++ = cks->b64.quantum>>16; | |
576 *obuf++ = cks->b64.quantum>>8; | |
577 *obuf++ = cks->b64.quantum; | |
578 result += 3; | |
579 if (result >= obuf_len) | |
580 break; | |
581 } | |
582 } | |
583 *ibuf_lenp = ibuf_len; | |
584 *ibufp = ibuf; | |
585 return result; | |
586 } | |
587 | |
588 | |
589 | |
590 /* skip parts of URLs */ | |
591 int | |
592 dcc_ck_url(DCC_URL_SKIP *url, char c, char **pbufp) | |
593 { | |
594 #define RET_C(s) return ((c<<DCC_CK_URL_SHIFT) | s) | |
595 | |
596 /* Continue skipping a URL to its end. | |
597 * Assume the end is the next blank, comma, '>', or '\n' | |
598 * unless the URL is quoted. Then continue to the quote | |
599 * or until the length has become silly. */ | |
600 | |
601 /* convert ASCII upper to lower case */ | |
602 if (c >= 'A' && c <= 'Z') | |
603 c -= 'A' - 'a'; | |
604 | |
605 switch (url->st) { | |
606 case DCC_URL_ST_IDLE: | |
607 if (c == 'h') { | |
608 /* start looking for 't' after 'h' in "http" */ | |
609 url->flags = 0; | |
610 url->st = DCC_URL_ST_T1; | |
611 } else if (c == '=') { | |
612 /* look for the '=' in "href=" or "img src=" */ | |
613 url->st = DCC_URL_ST_QUOTE; | |
614 } | |
615 RET_C(DCC_CK_URL_CHAR); | |
616 | |
617 case DCC_URL_ST_QUOTE: | |
618 /* look for '"' or 'H' after "href=" or "img src= */ | |
619 if (c == 'h') { | |
620 url->flags &= ~DCC_URL_QUOTES; | |
621 url->st = DCC_URL_ST_T1; | |
622 } else if (c == '"') { | |
623 url->flags |= DCC_URL_DQUOTED; | |
624 url->st = DCC_URL_ST_QH; | |
625 } else if (c == '\'') { | |
626 url->flags |= DCC_URL_SQUOTED; | |
627 url->st = DCC_URL_ST_QH; | |
628 } else { | |
629 url->st = DCC_URL_ST_IDLE; | |
630 } | |
631 RET_C(DCC_CK_URL_CHAR); | |
632 | |
633 case DCC_URL_ST_QH: | |
634 /* seen quote; looking for start of URL */ | |
635 if (c == 'h') { | |
636 url->st = DCC_URL_ST_T1; | |
637 } else { | |
638 url->st = DCC_URL_ST_IDLE; | |
639 } | |
640 RET_C(DCC_CK_URL_CHAR); | |
641 | |
642 case DCC_URL_ST_T1: | |
643 if (c == 't') | |
644 url->st = DCC_URL_ST_T2; | |
645 else | |
646 url->st = DCC_URL_ST_IDLE; | |
647 RET_C(DCC_CK_URL_CHAR); | |
648 | |
649 case DCC_URL_ST_T2: | |
650 if (c == 't') | |
651 url->st = DCC_URL_ST_P; | |
652 else | |
653 url->st = DCC_URL_ST_IDLE; | |
654 RET_C(DCC_CK_URL_CHAR); | |
655 | |
656 case DCC_URL_ST_P: | |
657 if (c == 'p') | |
658 url->st = DCC_URL_ST_S; | |
659 else | |
660 url->st = DCC_URL_ST_IDLE; | |
661 RET_C(DCC_CK_URL_CHAR); | |
662 | |
663 case DCC_URL_ST_S: | |
664 /* we are expecting the ':' or 's' after http */ | |
665 if (c == 's') | |
666 url->st = DCC_URL_ST_COLON; | |
667 else if (c == ':') | |
668 url->st = DCC_URL_ST_SLASH1; | |
669 else | |
670 url->st = DCC_URL_ST_IDLE; | |
671 RET_C(DCC_CK_URL_CHAR); | |
672 | |
673 case DCC_URL_ST_COLON: | |
674 /* we are expecting the ':' after http or https */ | |
675 if (c == ':') | |
676 url->st = DCC_URL_ST_SLASH1; | |
677 else | |
678 url->st = DCC_URL_ST_IDLE; | |
679 RET_C(DCC_CK_URL_CHAR); | |
680 | |
681 case DCC_URL_ST_SLASH1: | |
682 /* we are expecting the first '/' after http: */ | |
683 if (c == '/') | |
684 url->st = DCC_URL_ST_SLASH2; | |
685 else | |
686 url->st = DCC_URL_ST_IDLE; | |
687 RET_C(DCC_CK_URL_CHAR); | |
688 | |
689 case DCC_URL_ST_SLASH2: | |
690 /* we are expecting the second '/' after http:/" */ | |
691 if (c != '/') { | |
692 url->st = DCC_URL_ST_IDLE; | |
693 RET_C(DCC_CK_URL_CHAR); | |
694 } | |
695 url->st = DCC_URL_ST_SLASH3_START; | |
696 RET_C(DCC_CK_URL_CK_LEN); | |
697 | |
698 case DCC_URL_ST_SLASH3_START: | |
699 url->dot = 0; | |
700 url->start = *pbufp; | |
701 url->total = 0; | |
702 url->flags &= ~(DCC_URL_DEL_DOMAIN | |
703 | DCC_URL_PERCENT1 | DCC_URL_PERCENT2); | |
704 url->st = DCC_URL_ST_SLASH3; | |
705 /* fall into DCC_URL_ST_SLASH3 */ | |
706 case DCC_URL_ST_SLASH3: | |
707 /* look for the end of the host name */ | |
708 ++url->total; | |
709 again: | |
710 if (c == '.') { | |
711 /* keep only 1st and 2nd level domain names */ | |
712 url->flags &= ~DCC_URL_DEL_DOMAIN; | |
713 if (!url->dot) { | |
714 /* do nothing at first '.' unless the name | |
715 * was too long */ | |
716 if (*pbufp >= url->start+DCC_URL_MAX) { | |
717 *pbufp = url->start; | |
718 } else { | |
719 url->dot = *pbufp; | |
720 } | |
721 } else { | |
722 url->flags |= DCC_URL_DEL_DOMAIN; | |
723 } | |
724 RET_C(DCC_CK_URL_DOT); | |
725 } | |
726 /* delay deleting third level domains to not be | |
727 * fooled by a trailing dot */ | |
728 if (url->flags & DCC_URL_DEL_DOMAIN) { | |
729 url->flags &= ~DCC_URL_DEL_DOMAIN; | |
730 memmove(url->start, url->dot, | |
731 *pbufp - url->dot); | |
732 *pbufp -= (url->dot - url->start); | |
733 url->dot = *pbufp; | |
734 } | |
735 | |
736 if (c == '/') { | |
737 url->st = DCC_URL_ST_SKIP; | |
738 RET_C(DCC_CK_URL_HOST_END); | |
739 } | |
740 if (c == '"' && (url->flags & DCC_URL_DQUOTED)) { | |
741 url->st = DCC_URL_ST_IDLE; | |
742 RET_C(DCC_CK_URL_HOST_END); | |
743 } | |
744 if (c == '\'' && (url->flags & DCC_URL_SQUOTED)) { | |
745 url->st = DCC_URL_ST_IDLE; | |
746 RET_C(DCC_CK_URL_HOST_END); | |
747 } | |
748 if ((c == '<' || c == '>') | |
749 && (url->flags & DCC_URL_QUOTES) == 0) { | |
750 url->st = DCC_URL_ST_IDLE; | |
751 RET_C(DCC_CK_URL_HOST_END); | |
752 } | |
753 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { | |
754 if (!(url->flags & DCC_URL_QUOTED) | |
755 || url->total > DCC_URL_FAILSAFE) { | |
756 url->st = DCC_URL_ST_IDLE; | |
757 RET_C(DCC_CK_URL_HOST_END); | |
758 } | |
759 /* whitespace in a URL hostname is at best username */ | |
760 *pbufp = url->start; | |
761 url->st = DCC_URL_ST_SLASH3_START; | |
762 RET_C(DCC_CK_URL_HOST_RESET); | |
763 } | |
764 if (c == '@') { | |
765 /* ignore username and password */ | |
766 *pbufp = url->start; | |
767 url->st = DCC_URL_ST_SLASH3_START; | |
768 RET_C(DCC_CK_URL_HOST_RESET); | |
769 } | |
770 | |
771 if (c == '%') { | |
772 url->flags &= ~DCC_URL_PERCENT2; | |
773 url->flags |= DCC_URL_PERCENT1; | |
774 RET_C(DCC_CK_URL_SKIP); | |
775 } | |
776 if (url->flags & DCC_URL_PERCENT1) { | |
777 if (c >= '0' && c <= '9') { | |
778 c -= '0'; | |
779 } else if (c >= 'a' && c <= 'f') { | |
780 c -= 'a'-10; | |
781 } else { | |
782 *pbufp = url->start; | |
783 url->st = DCC_URL_ST_SLASH3_START; | |
784 RET_C(DCC_CK_URL_HOST_RESET); | |
785 } | |
786 if (url->flags & DCC_URL_PERCENT2) { | |
787 url->flags &= ~(DCC_URL_PERCENT1 | |
788 | DCC_URL_PERCENT2); | |
789 c |= url->percent; | |
790 if (c >= 'A' && c <= 'Z') | |
791 c -= 'A' - 'a'; | |
792 goto again; | |
793 } | |
794 url->percent = c << 4; | |
795 url->flags |= DCC_URL_PERCENT2; | |
796 RET_C(DCC_CK_URL_SKIP); | |
797 } | |
798 | |
799 if (*pbufp >= url->start+DCC_URL_MAX) { | |
800 /* long garbage is probably a username */ | |
801 if (url->total > DCC_URL_FAILSAFE) { | |
802 url->st = DCC_URL_ST_IDLE; | |
803 RET_C(DCC_CK_URL_CHAR); | |
804 } | |
805 RET_C(DCC_CK_URL_SKIP); | |
806 } | |
807 RET_C(DCC_CK_URL_HOST); | |
808 | |
809 case DCC_URL_ST_SKIP: | |
810 /* skip the rest of the URL */ | |
811 ++url->total; | |
812 if (c == '"' || c == '\'') { | |
813 url->st = DCC_URL_ST_IDLE; | |
814 RET_C(DCC_CK_URL_SKIP); | |
815 } | |
816 if ((c == '>' || c == ' ' || c == '\t' | |
817 || c == '\n' || c == '\r') | |
818 && (!(url->flags & DCC_URL_QUOTES) | |
819 || url->total > DCC_URL_FAILSAFE)) { | |
820 url->total = 0; | |
821 url->st = DCC_URL_ST_IDLE; | |
822 RET_C(DCC_CK_URL_CHAR); | |
823 } | |
824 RET_C(DCC_CK_URL_SKIP); | |
825 } | |
826 RET_C(DCC_CK_URL_CHAR); | |
827 | |
828 #undef RET_C | |
829 } |