Mercurial > notdcc
view dcclib/ckmime.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line source
/* Distributed Checksum Clearinghouse * * decode MIME for checksums * * Copyright (c) 2008 by Rhyolite Software, LLC * * This agreement is not applicable to any entity which sells anti-spam * solutions to others or provides an anti-spam solution as part of a * security solution sold to other entities, or to a private network * which employs the DCC or uses data provided by operation of the DCC * but does not provide corresponding data to other users. * * Permission to use, copy, modify, and distribute this software without * changes for any purpose with or without fee is hereby granted, provided * that the above copyright notice and this permission notice appear in all * copies and any distributed versions or copies are either unchanged * or not called anything similar to "DCC" or "Distributed Checksum * Clearinghouse". * * Parties not eligible to receive a license under this agreement can * obtain a commercial license to use DCC by contacting Rhyolite Software * at sales@rhyolite.com. * * A commercial license would be for Distributed Checksum and Reputation * Clearinghouse software. That software includes additional features. This * free license for Distributed ChecksumClearinghouse Software does not in any * way grant permision to use Distributed Checksum and Reputation Clearinghouse * software * * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * * Rhyolite Software DCC 1.3.103-1.39 $Revision$ */ #include "dcc_ck.h" /* Notice MIME headers */ void dcc_ck_mime_hdr(DCC_GOT_CKS *cks, const char *hdr, /* entire header line or name only */ const char *str) /* header value if not after name */ { /* parse at least the header name */ cks->mhdr_st = CK_MHDR_ST_CE_CT; cks->mhdr_pos = 0; parse_mime_hdr(cks, hdr, strlen(hdr), 1); /* parse the header value if present and we care about the header */ if (str && cks->mhdr_st != CK_MHDR_ST_IDLE) { parse_mime_hdr(cks, ":", 1, 1); parse_mime_hdr(cks, str, strlen(str), 1); } /* force the end of the line */ if (cks->mhdr_st != CK_MHDR_ST_IDLE) parse_mime_hdr(cks, "\n", 1, 1); if (cks->mime_nest != 0) cks->mp_st = CK_MP_ST_PREAMBLE; cks->flags |= DCC_CKS_MIME_BOL; } static u_char /* 1=matched */ match(DCC_GOT_CKS *cks, enum CK_MHDR_ST ok, enum CK_MHDR_ST fail, const char *tgt_str, u_int tgt_len, const char **bp, u_int *bp_len) { u_int len; len = min(tgt_len - cks->mhdr_pos, *bp_len); if (strncasecmp(tgt_str + cks->mhdr_pos, *bp, len)) { /* switch to failure state if there is enough of the * string to know it does not match */ cks->mhdr_st = fail; return 0; } *bp += len; *bp_len -= len; if ((u_int)(cks->mhdr_pos += len) >= tgt_len) { /* switch to the success state on a match */ cks->mhdr_st = ok; cks->mhdr_pos = 0; return 1; } /* wait for more input */ return 0; } /* ignore white space */ static u_char /* 0=buffer empty */ span_ws(const char **bp, u_int *bp_len) { char c; while ((c = **bp) == ' ' || c == '\t' || c == '\r' || c == '\n') { ++*bp; if (--*bp_len == 0) return 0; } return 1; } /* skip to white space or after semicolon that precedes the next parameter */ static u_char /* 0=buffer empty */ skip_param(const char **bp, u_int *bp_len) { char c; while ((c = **bp) != ' ' && c != '\t' && c != '\r' && c != '\n') { ++*bp; if (c == ';') { --*bp_len; return 1; } if (--*bp_len == 0) return 0; } return 1; } /* Parse MIME headers * Look for (parts of) Content-Type and Content-Transfer-Encoding * headers in a buffer. There can be at most one significant (not part of * folded whitespace) '\n' in the buffer and only as the last byte */ u_char /* 1=blank line */ parse_mime_hdr(DCC_GOT_CKS *cks, const char *bp, u_int bp_len, u_char in_hdrs) /* 1=in RFC 822 headers */ { #define MMATCH(str,ok,fail) match(cks,CK_MHDR_ST_##ok,CK_MHDR_ST_##fail, \ str,sizeof(str)-1, &bp, &bp_len) char c; DCC_CK_BND *bndp; if ((cks->flags & DCC_CKS_MIME_BOL) && !in_hdrs) { c = *bp; if (c == '\r') { /* ignore CR to ease detecting blank line */ if (--bp_len == 0) return 0; c = *++bp; } if (c == '\n') return 1; /* this line is blank */ /* reset parser line without folded whitespace */ if (c != ' ' && c != '\t') { cks->mhdr_st = CK_MHDR_ST_CE_CT; cks->mhdr_pos = 0; } cks->flags &= ~DCC_CKS_MIME_BOL; } do { switch (cks->mhdr_st) { case CK_MHDR_ST_IDLE: return 0; case CK_MHDR_ST_CE_CT: /* This state always preceeds the following states */ if (MMATCH("Content-T", CT_WS, IDLE)) { switch (*bp) { case 'r': case 'R': cks->mhdr_st = CK_MHDR_ST_CE; break; case 'y': case 'Y': cks->mhdr_st = CK_MHDR_ST_CT; break; default: cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } } break; case CK_MHDR_ST_CE: MMATCH("ransfer-Encoding:", CE_WS, IDLE); break; case CK_MHDR_ST_CE_WS: if (!span_ws(&bp, &bp_len)) return 0; switch (*bp) { case 'b': case 'B': cks->mhdr_st = CK_MHDR_ST_B64; break; case 'q': case 'Q': cks->mhdr_st = CK_MHDR_ST_QP; break; default: cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } break; case CK_MHDR_ST_QP: if (MMATCH("quoted-printable", IDLE, IDLE)) cks->mime_ce = DCC_CK_CE_QP; break; case CK_MHDR_ST_B64: if (MMATCH("base64", IDLE, IDLE)) cks->mime_ce = DCC_CK_CE_B64; break; case CK_MHDR_ST_CT: MMATCH("ype:", CT_WS, IDLE); break; case CK_MHDR_ST_CT_WS: /* We have matched "Content-type:" */ if (!span_ws(&bp, &bp_len)) return 0; switch (*bp) { case 't': case 'T': cks->mhdr_st = CK_MHDR_ST_TEXT; break; case 'm': case 'M': /* do not nest too deeply */ if (in_hdrs || cks->mime_nest < DIM(cks->mime_bnd)) { cks->mhdr_st = CK_MHDR_ST_MULTIPART; } else { cks->mhdr_st = CK_MHDR_ST_TEXT; cks->mhdr_st = CK_MHDR_ST_IDLE; } break; default: /* assume it is binary noise if it does * not match "Content-type: [tTmM]" */ cks->mime_ct = DCC_CK_CT_BINARY; cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } break; case CK_MHDR_ST_TEXT: /* we are looking for "Text" in "Content-type: Text" */ if (MMATCH("text", HTML, IDLE)) cks->mime_ct = DCC_CK_CT_TEXT; break; case CK_MHDR_ST_HTML: /* look for "Content-type: Text/html" */ if (MMATCH("/html", CSET_SKIP_PARAM, CSET_SKIP_PARAM)) cks->mime_ct = DCC_CK_CT_HTML; break; case CK_MHDR_ST_CSET_SKIP_PARAM: /* Look for semicolon or whitespace preceding next * parameter after "Content-type: Text/html" */ if (skip_param(&bp, &bp_len)) cks->mhdr_st = CK_MHDR_ST_CSET_SPAN_WS; break; case CK_MHDR_ST_CSET_SPAN_WS: /* skip optional whitespace before next parameter */ if (span_ws(&bp, &bp_len)) cks->mhdr_st = CK_MHDR_ST_CSET; break; case CK_MHDR_ST_CSET: /* have matched "Content-Type: text...;" * and are looking for a "charset=" parameter */ MMATCH("charset=", CSET_ISO_8859, CSET_SKIP_PARAM); break; case CK_MHDR_ST_CSET_ISO_8859: /* We have matched "Content-Type: text...charset=" * and are looking for "ISO-8859-*". * Ignore leading '"' */ if (cks->mhdr_pos == 0 && bp_len > 0 && *bp == '"') { ++bp; --bp_len; } MMATCH("iso-8859-", CSET_ISO_X, IDLE); break; case CK_MHDR_ST_CSET_ISO_X: for (;;) { if (bp_len == 0) return 0; --bp_len; c = *bp++; if (c < '0' || c > '9') { if ((c == '"' || c == ' ' || c == '\t' || c == ';' || c == '\r' || c == '\n') && cks->mhdr_pos == 2) cks->mime_cset = dcc_cset_2; else cks->mime_cset = dcc_cset_1; cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } cks->mhdr_pos = cks->mhdr_pos*10 + c - '0'; if (cks->mhdr_pos > 99) { cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } } case CK_MHDR_ST_MULTIPART: /* We are looking for "Content-type: Multipart" * after having seen "Content-type: M". * If it is not "ultipart", assume "essage" and that * it is text. */ cks->mhdr_st = CK_MHDR_ST_TEXT; MMATCH("multipart", BND_SKIP_PARAM, IDLE); break; case CK_MHDR_ST_BND_SKIP_PARAM: /* Look for semicolon or whitespace preceding next * parameter after "Content-type: M" */ if (skip_param(&bp, &bp_len)) cks->mhdr_st = CK_MHDR_ST_BND_SPAN_WS; break; case CK_MHDR_ST_BND_SPAN_WS: /* skip optional whitespace before next parameter */ if (span_ws(&bp, &bp_len)) cks->mhdr_st = CK_MHDR_ST_BND; break; case CK_MHDR_ST_BND: /* we have matched "Content-type: multipart" * and are looking for the "boundary" parameter */ if (MMATCH("boundary=", BND_VALUE, BND_SKIP_PARAM)) { if (in_hdrs) { cks->mime_nest = 0; /* allow missing initial blank line */ cks->mime_bnd_matches = 1; } bndp = &cks->mime_bnd[cks->mime_nest]; cks->flags &= ~DCC_CKS_MIME_QUOTED; bndp->bnd[0] = '-'; bndp->bnd[1] = '-'; cks->mhdr_pos = 2; } break; case CK_MHDR_ST_BND_VALUE: /* collect the bounary string */ bndp = &cks->mime_bnd[cks->mime_nest]; /* this accepts a lot more than RFC 2046 allows, * but spamware written by idiots doesn't comply */ for (;;) { if (bp_len == 0) return 0; --bp_len; c = *bp++; if (c == '\n') break; if (c == '\r') continue; if ((c == ' ' || c == '\t' || c == ';') && !(cks->flags & DCC_CKS_MIME_QUOTED)) break; if (c == '"') { cks->flags ^= DCC_CKS_MIME_QUOTED; continue; } bndp->bnd[cks->mhdr_pos] = c; if (++cks->mhdr_pos >= DCC_CK_BND_MAX) { cks->mhdr_st = CK_MHDR_ST_IDLE; return 0; } } bndp->bnd_len = cks->mhdr_pos; bndp->cmp_len = 0; ++cks->mime_nest; cks->mhdr_st = CK_MHDR_ST_IDLE; break; } } while (bp_len != 0); return 0; #undef MMATCH #undef MKSIP_WS } /* fetch bytes and convert from quoted-printable */ u_int /* output length */ dcc_ck_qp_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, char *obuf, u_int obuf_len) { # define GC(c) do {if (!ibuf_len) return result; \ --ibuf_len; (c) = *ibuf; ++ibuf;} while (0) u_int ibuf_len, result; const char *ibuf; u_char c = 0; if (obuf_len == 0) return 0; ibuf_len = *ibuf_lenp; ibuf = *ibufp; result = 0; while (ibuf_len != 0) { switch (cks->qp.state) { case DCC_CK_QP_IDLE: GC(c); if (c != '=') break; cks->qp.state = DCC_CK_QP_EQ; continue; case DCC_CK_QP_EQ: /* Consider first character after '=' */ GC(c); cks->qp.x = c; if (c == '\r') { ; } else if (c == '\n') { /* delete "=\n" like "=\r\n" * so that dccproc and dccm agree */ cks->qp.state = DCC_CK_QP_IDLE; continue; } else if (c >= '0' && c <= '9') { cks->qp.n = c-'0'; } else if (c >= 'a' && c <= 'f') { cks->qp.n = c-('a'-10); } else if (c >= 'A' && c <= 'F') { cks->qp.n = c-('A'-10); } else { cks->qp.state = DCC_CK_QP_FAIL1; c = '='; break; } cks->qp.state = DCC_CK_QP_1; continue; case DCC_CK_QP_1: /* consider second character after '=' */ GC(c); cks->qp.y = c; if (cks->qp.x == '\r') { if (c == '\n') { /* delete soft line-break */ cks->qp.state = DCC_CK_QP_IDLE; continue; } cks->qp.state = DCC_CK_QP_FAIL2; c = '='; break; } else if (c >= '0' && c <= '9') { c -= '0'; } else if (c >= 'a' && c <= 'f') { c -= ('a'-10); } else if (c >= 'A' && c <= 'F') { c -= ('A'-10); } else { cks->qp.state = DCC_CK_QP_FAIL2; c = '='; break; } cks->qp.state = DCC_CK_QP_IDLE; c = (cks->qp.n << 4) | c; break; case DCC_CK_QP_FAIL1: /* output character after '=' of a 2-character * sequence that was not quoted-printable after all */ cks->qp.state = DCC_CK_QP_IDLE; c = cks->qp.x; break; case DCC_CK_QP_FAIL2: /* output character after '=' of a 3-character * sequence that was not quoted-printable after all */ cks->qp.state = DCC_CK_QP_FAIL3; c = cks->qp.x; break; case DCC_CK_QP_FAIL3: /* output third character of a 3-character * sequence that was not quoted-printable after all */ cks->qp.state = DCC_CK_QP_IDLE; c = cks->qp.y; break; } *obuf++ = c; if (++result >= obuf_len) break; } *ibuf_lenp = ibuf_len; *ibufp = ibuf; return result; #undef GC } #define B64B 0100 /* bad */ #define B64EQ 0101 /* '=' */ static u_char base64_decode[128] = { B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x00 */ B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x08 */ B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x10 */ B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x18 */ B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x20 ! " # $ % & ' */ B64B, B64B, B64B, 62, B64B, B64B, B64B, 63, /* 0x28 ( ) * + , - . / */ 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30 0 1 2 3 4 5 6 7 */ 60, 61, B64B, B64B, B64B, B64EQ,B64B, B64B, /* 0x38 8 9 : ; < = > ? */ B64B, 0, 1, 2, 3, 4, 5, 6, /* 0x40 @ A B C D E F G */ 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48 H I J K L M N O */ 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50 P Q R S T U V W */ 23, 24, 25, B64B, B64B, B64B, B64B, B64B, /* 0x58 X Y Z [ \ ] ^ _ */ B64B, 26, 27, 28, 29, 30, 31, 32, /* 0x60 ` a b c d e f g */ 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68 h i j k l m n o */ 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70 p q r s t u v w */ 49, 50, 51, B64B, B64B, B64B, B64B, B64B, /* 0x78 x y z { | } ~ del */ }; u_int /* output length */ dcc_ck_b64_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, char *obuf, u_int obuf_len) { u_char c; const char *ibuf; u_int ibuf_len, result; if (obuf_len < 3) return 0; obuf_len -= 3; ibuf_len = *ibuf_lenp; ibuf = *ibufp; result = 0; while (ibuf_len != 0) { --ibuf_len; c = *ibuf++; c = base64_decode[c]; if (c == B64B) continue; if (c == B64EQ) { switch (cks->b64.quantum_cnt) { case 2: *obuf++ = cks->b64.quantum>>4; ++result; break; case 3: *obuf++ = cks->b64.quantum>>10; *obuf++ = cks->b64.quantum>>2; result += 2; break; } cks->b64.quantum_cnt = 0; if (result >= obuf_len) break; } cks->b64.quantum = (cks->b64.quantum << 6) | c; if (++cks->b64.quantum_cnt >= 4) { cks->b64.quantum_cnt = 0; *obuf++ = cks->b64.quantum>>16; *obuf++ = cks->b64.quantum>>8; *obuf++ = cks->b64.quantum; result += 3; if (result >= obuf_len) break; } } *ibuf_lenp = ibuf_len; *ibufp = ibuf; return result; } /* skip parts of URLs */ int dcc_ck_url(DCC_URL_SKIP *url, char c, char **pbufp) { #define RET_C(s) return ((c<<DCC_CK_URL_SHIFT) | s) /* Continue skipping a URL to its end. * Assume the end is the next blank, comma, '>', or '\n' * unless the URL is quoted. Then continue to the quote * or until the length has become silly. */ /* convert ASCII upper to lower case */ if (c >= 'A' && c <= 'Z') c -= 'A' - 'a'; switch (url->st) { case DCC_URL_ST_IDLE: if (c == 'h') { /* start looking for 't' after 'h' in "http" */ url->flags = 0; url->st = DCC_URL_ST_T1; } else if (c == '=') { /* look for the '=' in "href=" or "img src=" */ url->st = DCC_URL_ST_QUOTE; } RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_QUOTE: /* look for '"' or 'H' after "href=" or "img src= */ if (c == 'h') { url->flags &= ~DCC_URL_QUOTES; url->st = DCC_URL_ST_T1; } else if (c == '"') { url->flags |= DCC_URL_DQUOTED; url->st = DCC_URL_ST_QH; } else if (c == '\'') { url->flags |= DCC_URL_SQUOTED; url->st = DCC_URL_ST_QH; } else { url->st = DCC_URL_ST_IDLE; } RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_QH: /* seen quote; looking for start of URL */ if (c == 'h') { url->st = DCC_URL_ST_T1; } else { url->st = DCC_URL_ST_IDLE; } RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_T1: if (c == 't') url->st = DCC_URL_ST_T2; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_T2: if (c == 't') url->st = DCC_URL_ST_P; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_P: if (c == 'p') url->st = DCC_URL_ST_S; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_S: /* we are expecting the ':' or 's' after http */ if (c == 's') url->st = DCC_URL_ST_COLON; else if (c == ':') url->st = DCC_URL_ST_SLASH1; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_COLON: /* we are expecting the ':' after http or https */ if (c == ':') url->st = DCC_URL_ST_SLASH1; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_SLASH1: /* we are expecting the first '/' after http: */ if (c == '/') url->st = DCC_URL_ST_SLASH2; else url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); case DCC_URL_ST_SLASH2: /* we are expecting the second '/' after http:/" */ if (c != '/') { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); } url->st = DCC_URL_ST_SLASH3_START; RET_C(DCC_CK_URL_CK_LEN); case DCC_URL_ST_SLASH3_START: url->dot = 0; url->start = *pbufp; url->total = 0; url->flags &= ~(DCC_URL_DEL_DOMAIN | DCC_URL_PERCENT1 | DCC_URL_PERCENT2); url->st = DCC_URL_ST_SLASH3; /* fall into DCC_URL_ST_SLASH3 */ case DCC_URL_ST_SLASH3: /* look for the end of the host name */ ++url->total; again: if (c == '.') { /* keep only 1st and 2nd level domain names */ url->flags &= ~DCC_URL_DEL_DOMAIN; if (!url->dot) { /* do nothing at first '.' unless the name * was too long */ if (*pbufp >= url->start+DCC_URL_MAX) { *pbufp = url->start; } else { url->dot = *pbufp; } } else { url->flags |= DCC_URL_DEL_DOMAIN; } RET_C(DCC_CK_URL_DOT); } /* delay deleting third level domains to not be * fooled by a trailing dot */ if (url->flags & DCC_URL_DEL_DOMAIN) { url->flags &= ~DCC_URL_DEL_DOMAIN; memmove(url->start, url->dot, *pbufp - url->dot); *pbufp -= (url->dot - url->start); url->dot = *pbufp; } if (c == '/') { url->st = DCC_URL_ST_SKIP; RET_C(DCC_CK_URL_HOST_END); } if (c == '"' && (url->flags & DCC_URL_DQUOTED)) { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_HOST_END); } if (c == '\'' && (url->flags & DCC_URL_SQUOTED)) { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_HOST_END); } if ((c == '<' || c == '>') && (url->flags & DCC_URL_QUOTES) == 0) { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_HOST_END); } if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { if (!(url->flags & DCC_URL_QUOTED) || url->total > DCC_URL_FAILSAFE) { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_HOST_END); } /* whitespace in a URL hostname is at best username */ *pbufp = url->start; url->st = DCC_URL_ST_SLASH3_START; RET_C(DCC_CK_URL_HOST_RESET); } if (c == '@') { /* ignore username and password */ *pbufp = url->start; url->st = DCC_URL_ST_SLASH3_START; RET_C(DCC_CK_URL_HOST_RESET); } if (c == '%') { url->flags &= ~DCC_URL_PERCENT2; url->flags |= DCC_URL_PERCENT1; RET_C(DCC_CK_URL_SKIP); } if (url->flags & DCC_URL_PERCENT1) { if (c >= '0' && c <= '9') { c -= '0'; } else if (c >= 'a' && c <= 'f') { c -= 'a'-10; } else { *pbufp = url->start; url->st = DCC_URL_ST_SLASH3_START; RET_C(DCC_CK_URL_HOST_RESET); } if (url->flags & DCC_URL_PERCENT2) { url->flags &= ~(DCC_URL_PERCENT1 | DCC_URL_PERCENT2); c |= url->percent; if (c >= 'A' && c <= 'Z') c -= 'A' - 'a'; goto again; } url->percent = c << 4; url->flags |= DCC_URL_PERCENT2; RET_C(DCC_CK_URL_SKIP); } if (*pbufp >= url->start+DCC_URL_MAX) { /* long garbage is probably a username */ if (url->total > DCC_URL_FAILSAFE) { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); } RET_C(DCC_CK_URL_SKIP); } RET_C(DCC_CK_URL_HOST); case DCC_URL_ST_SKIP: /* skip the rest of the URL */ ++url->total; if (c == '"' || c == '\'') { url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_SKIP); } if ((c == '>' || c == ' ' || c == '\t' || c == '\n' || c == '\r') && (!(url->flags & DCC_URL_QUOTES) || url->total > DCC_URL_FAILSAFE)) { url->total = 0; url->st = DCC_URL_ST_IDLE; RET_C(DCC_CK_URL_CHAR); } RET_C(DCC_CK_URL_SKIP); } RET_C(DCC_CK_URL_CHAR); #undef RET_C }