Mercurial > notdcc
diff dcclib/ckmime.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dcclib/ckmime.c Tue Mar 10 13:49:58 2009 +0100 @@ -0,0 +1,829 @@ +/* Distributed Checksum Clearinghouse + * + * decode MIME for checksums + * + * Copyright (c) 2008 by Rhyolite Software, LLC + * + * This agreement is not applicable to any entity which sells anti-spam + * solutions to others or provides an anti-spam solution as part of a + * security solution sold to other entities, or to a private network + * which employs the DCC or uses data provided by operation of the DCC + * but does not provide corresponding data to other users. + * + * Permission to use, copy, modify, and distribute this software without + * changes for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear in all + * copies and any distributed versions or copies are either unchanged + * or not called anything similar to "DCC" or "Distributed Checksum + * Clearinghouse". + * + * Parties not eligible to receive a license under this agreement can + * obtain a commercial license to use DCC by contacting Rhyolite Software + * at sales@rhyolite.com. + * + * A commercial license would be for Distributed Checksum and Reputation + * Clearinghouse software. That software includes additional features. This + * free license for Distributed ChecksumClearinghouse Software does not in any + * way grant permision to use Distributed Checksum and Reputation Clearinghouse + * software + * + * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC + * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Rhyolite Software DCC 1.3.103-1.39 $Revision$ + */ + +#include "dcc_ck.h" + +/* Notice MIME headers */ +void +dcc_ck_mime_hdr(DCC_GOT_CKS *cks, + const char *hdr, /* entire header line or name only */ + const char *str) /* header value if not after name */ +{ + /* parse at least the header name */ + cks->mhdr_st = CK_MHDR_ST_CE_CT; + cks->mhdr_pos = 0; + parse_mime_hdr(cks, hdr, strlen(hdr), 1); + + /* parse the header value if present and we care about the header */ + if (str + && cks->mhdr_st != CK_MHDR_ST_IDLE) { + parse_mime_hdr(cks, ":", 1, 1); + parse_mime_hdr(cks, str, strlen(str), 1); + } + + /* force the end of the line */ + if (cks->mhdr_st != CK_MHDR_ST_IDLE) + parse_mime_hdr(cks, "\n", 1, 1); + + if (cks->mime_nest != 0) + cks->mp_st = CK_MP_ST_PREAMBLE; + + cks->flags |= DCC_CKS_MIME_BOL; +} + + + +static u_char /* 1=matched */ +match(DCC_GOT_CKS *cks, + enum CK_MHDR_ST ok, enum CK_MHDR_ST fail, + const char *tgt_str, u_int tgt_len, + const char **bp, u_int *bp_len) +{ + u_int len; + + len = min(tgt_len - cks->mhdr_pos, *bp_len); + if (strncasecmp(tgt_str + cks->mhdr_pos, *bp, len)) { + /* switch to failure state if there is enough of the + * string to know it does not match */ + cks->mhdr_st = fail; + return 0; + } + + *bp += len; + *bp_len -= len; + if ((u_int)(cks->mhdr_pos += len) >= tgt_len) { + /* switch to the success state on a match */ + cks->mhdr_st = ok; + cks->mhdr_pos = 0; + return 1; + } + + /* wait for more input */ + return 0; +} + + + +/* ignore white space */ +static u_char /* 0=buffer empty */ +span_ws(const char **bp, u_int *bp_len) +{ + char c; + while ((c = **bp) == ' ' || c == '\t' || c == '\r' || c == '\n') { + ++*bp; + if (--*bp_len == 0) + return 0; + } + return 1; +} + + + +/* skip to white space or after semicolon that precedes the next parameter */ +static u_char /* 0=buffer empty */ +skip_param(const char **bp, u_int *bp_len) +{ + char c; + while ((c = **bp) != ' ' && c != '\t' && c != '\r' && c != '\n') { + ++*bp; + if (c == ';') { + --*bp_len; + return 1; + } + if (--*bp_len == 0) + return 0; + } + return 1; +} + + + +/* Parse MIME headers + * Look for (parts of) Content-Type and Content-Transfer-Encoding + * headers in a buffer. There can be at most one significant (not part of + * folded whitespace) '\n' in the buffer and only as the last byte */ +u_char /* 1=blank line */ +parse_mime_hdr(DCC_GOT_CKS *cks, + const char *bp, u_int bp_len, + u_char in_hdrs) /* 1=in RFC 822 headers */ +{ +#define MMATCH(str,ok,fail) match(cks,CK_MHDR_ST_##ok,CK_MHDR_ST_##fail, \ + str,sizeof(str)-1, &bp, &bp_len) + char c; + DCC_CK_BND *bndp; + + if ((cks->flags & DCC_CKS_MIME_BOL) + && !in_hdrs) { + c = *bp; + if (c == '\r') { + /* ignore CR to ease detecting blank line */ + if (--bp_len == 0) + return 0; + c = *++bp; + } + if (c == '\n') + return 1; /* this line is blank */ + + /* reset parser line without folded whitespace */ + if (c != ' ' && c != '\t') { + cks->mhdr_st = CK_MHDR_ST_CE_CT; + cks->mhdr_pos = 0; + } + cks->flags &= ~DCC_CKS_MIME_BOL; + } + + do { + switch (cks->mhdr_st) { + case CK_MHDR_ST_IDLE: + return 0; + + case CK_MHDR_ST_CE_CT: + /* This state always preceeds the following states */ + if (MMATCH("Content-T", CT_WS, IDLE)) { + switch (*bp) { + case 'r': + case 'R': + cks->mhdr_st = CK_MHDR_ST_CE; + break; + case 'y': + case 'Y': + cks->mhdr_st = CK_MHDR_ST_CT; + break; + default: + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + } + break; + + case CK_MHDR_ST_CE: + MMATCH("ransfer-Encoding:", CE_WS, IDLE); + break; + case CK_MHDR_ST_CE_WS: + if (!span_ws(&bp, &bp_len)) + return 0; + switch (*bp) { + case 'b': + case 'B': + cks->mhdr_st = CK_MHDR_ST_B64; + break; + case 'q': + case 'Q': + cks->mhdr_st = CK_MHDR_ST_QP; + break; + default: + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + break; + case CK_MHDR_ST_QP: + if (MMATCH("quoted-printable", IDLE, IDLE)) + cks->mime_ce = DCC_CK_CE_QP; + break; + case CK_MHDR_ST_B64: + if (MMATCH("base64", IDLE, IDLE)) + cks->mime_ce = DCC_CK_CE_B64; + break; + + case CK_MHDR_ST_CT: + MMATCH("ype:", CT_WS, IDLE); + break; + case CK_MHDR_ST_CT_WS: + /* We have matched "Content-type:" */ + if (!span_ws(&bp, &bp_len)) + return 0; + switch (*bp) { + case 't': + case 'T': + cks->mhdr_st = CK_MHDR_ST_TEXT; + break; + case 'm': + case 'M': + /* do not nest too deeply */ + if (in_hdrs + || cks->mime_nest < DIM(cks->mime_bnd)) { + cks->mhdr_st = CK_MHDR_ST_MULTIPART; + } else { + cks->mhdr_st = CK_MHDR_ST_TEXT; + cks->mhdr_st = CK_MHDR_ST_IDLE; + } + break; + default: + /* assume it is binary noise if it does + * not match "Content-type: [tTmM]" */ + cks->mime_ct = DCC_CK_CT_BINARY; + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + break; + case CK_MHDR_ST_TEXT: + /* we are looking for "Text" in "Content-type: Text" */ + if (MMATCH("text", HTML, IDLE)) + cks->mime_ct = DCC_CK_CT_TEXT; + break; + case CK_MHDR_ST_HTML: + /* look for "Content-type: Text/html" */ + if (MMATCH("/html", CSET_SKIP_PARAM, CSET_SKIP_PARAM)) + cks->mime_ct = DCC_CK_CT_HTML; + break; + case CK_MHDR_ST_CSET_SKIP_PARAM: + /* Look for semicolon or whitespace preceding next + * parameter after "Content-type: Text/html" */ + if (skip_param(&bp, &bp_len)) + cks->mhdr_st = CK_MHDR_ST_CSET_SPAN_WS; + break; + case CK_MHDR_ST_CSET_SPAN_WS: + /* skip optional whitespace before next parameter */ + if (span_ws(&bp, &bp_len)) + cks->mhdr_st = CK_MHDR_ST_CSET; + break; + case CK_MHDR_ST_CSET: + /* have matched "Content-Type: text...;" + * and are looking for a "charset=" parameter */ + MMATCH("charset=", CSET_ISO_8859, CSET_SKIP_PARAM); + break; + case CK_MHDR_ST_CSET_ISO_8859: + /* We have matched "Content-Type: text...charset=" + * and are looking for "ISO-8859-*". + * Ignore leading '"' */ + if (cks->mhdr_pos == 0 + && bp_len > 0 && *bp == '"') { + ++bp; + --bp_len; + } + MMATCH("iso-8859-", CSET_ISO_X, IDLE); + break; + case CK_MHDR_ST_CSET_ISO_X: + for (;;) { + if (bp_len == 0) + return 0; + --bp_len; + c = *bp++; + if (c < '0' || c > '9') { + if ((c == '"' || c == ' ' || c == '\t' + || c == ';' + || c == '\r' || c == '\n') + && cks->mhdr_pos == 2) + cks->mime_cset = dcc_cset_2; + else + cks->mime_cset = dcc_cset_1; + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + cks->mhdr_pos = cks->mhdr_pos*10 + c - '0'; + if (cks->mhdr_pos > 99) { + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + } + case CK_MHDR_ST_MULTIPART: + /* We are looking for "Content-type: Multipart" + * after having seen "Content-type: M". + * If it is not "ultipart", assume "essage" and that + * it is text. */ + cks->mhdr_st = CK_MHDR_ST_TEXT; + MMATCH("multipart", BND_SKIP_PARAM, IDLE); + break; + case CK_MHDR_ST_BND_SKIP_PARAM: + /* Look for semicolon or whitespace preceding next + * parameter after "Content-type: M" */ + if (skip_param(&bp, &bp_len)) + cks->mhdr_st = CK_MHDR_ST_BND_SPAN_WS; + break; + case CK_MHDR_ST_BND_SPAN_WS: + /* skip optional whitespace before next parameter */ + if (span_ws(&bp, &bp_len)) + cks->mhdr_st = CK_MHDR_ST_BND; + break; + case CK_MHDR_ST_BND: + /* we have matched "Content-type: multipart" + * and are looking for the "boundary" parameter */ + if (MMATCH("boundary=", BND_VALUE, BND_SKIP_PARAM)) { + if (in_hdrs) { + cks->mime_nest = 0; + /* allow missing initial blank line */ + cks->mime_bnd_matches = 1; + } + bndp = &cks->mime_bnd[cks->mime_nest]; + cks->flags &= ~DCC_CKS_MIME_QUOTED; + bndp->bnd[0] = '-'; + bndp->bnd[1] = '-'; + cks->mhdr_pos = 2; + } + break; + case CK_MHDR_ST_BND_VALUE: + /* collect the bounary string */ + bndp = &cks->mime_bnd[cks->mime_nest]; + /* this accepts a lot more than RFC 2046 allows, + * but spamware written by idiots doesn't comply */ + for (;;) { + if (bp_len == 0) + return 0; + --bp_len; + c = *bp++; + if (c == '\n') + break; + if (c == '\r') + continue; + if ((c == ' ' || c == '\t' || c == ';') + && !(cks->flags & DCC_CKS_MIME_QUOTED)) + break; + if (c == '"') { + cks->flags ^= DCC_CKS_MIME_QUOTED; + continue; + } + bndp->bnd[cks->mhdr_pos] = c; + if (++cks->mhdr_pos >= DCC_CK_BND_MAX) { + cks->mhdr_st = CK_MHDR_ST_IDLE; + return 0; + } + } + bndp->bnd_len = cks->mhdr_pos; + bndp->cmp_len = 0; + ++cks->mime_nest; + cks->mhdr_st = CK_MHDR_ST_IDLE; + break; + } + } while (bp_len != 0); + return 0; + +#undef MMATCH +#undef MKSIP_WS +} + + + +/* fetch bytes and convert from quoted-printable */ +u_int /* output length */ +dcc_ck_qp_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, + char *obuf, u_int obuf_len) +{ +# define GC(c) do {if (!ibuf_len) return result; \ + --ibuf_len; (c) = *ibuf; ++ibuf;} while (0) + u_int ibuf_len, result; + const char *ibuf; + u_char c = 0; + + if (obuf_len == 0) + return 0; + ibuf_len = *ibuf_lenp; + ibuf = *ibufp; + result = 0; + while (ibuf_len != 0) { + switch (cks->qp.state) { + case DCC_CK_QP_IDLE: + GC(c); + if (c != '=') + break; + cks->qp.state = DCC_CK_QP_EQ; + continue; + + case DCC_CK_QP_EQ: + /* Consider first character after '=' */ + GC(c); + cks->qp.x = c; + if (c == '\r') { + ; + } else if (c == '\n') { + /* delete "=\n" like "=\r\n" + * so that dccproc and dccm agree */ + cks->qp.state = DCC_CK_QP_IDLE; + continue; + } else if (c >= '0' && c <= '9') { + cks->qp.n = c-'0'; + } else if (c >= 'a' && c <= 'f') { + cks->qp.n = c-('a'-10); + } else if (c >= 'A' && c <= 'F') { + cks->qp.n = c-('A'-10); + } else { + cks->qp.state = DCC_CK_QP_FAIL1; + c = '='; + break; + } + cks->qp.state = DCC_CK_QP_1; + continue; + + case DCC_CK_QP_1: + /* consider second character after '=' */ + GC(c); + cks->qp.y = c; + if (cks->qp.x == '\r') { + if (c == '\n') { + /* delete soft line-break */ + cks->qp.state = DCC_CK_QP_IDLE; + continue; + } + cks->qp.state = DCC_CK_QP_FAIL2; + c = '='; + break; + } else if (c >= '0' && c <= '9') { + c -= '0'; + } else if (c >= 'a' && c <= 'f') { + c -= ('a'-10); + } else if (c >= 'A' && c <= 'F') { + c -= ('A'-10); + } else { + cks->qp.state = DCC_CK_QP_FAIL2; + c = '='; + break; + } + cks->qp.state = DCC_CK_QP_IDLE; + c = (cks->qp.n << 4) | c; + break; + + case DCC_CK_QP_FAIL1: + /* output character after '=' of a 2-character + * sequence that was not quoted-printable after all */ + cks->qp.state = DCC_CK_QP_IDLE; + c = cks->qp.x; + break; + + case DCC_CK_QP_FAIL2: + /* output character after '=' of a 3-character + * sequence that was not quoted-printable after all */ + cks->qp.state = DCC_CK_QP_FAIL3; + c = cks->qp.x; + break; + + case DCC_CK_QP_FAIL3: + /* output third character of a 3-character + * sequence that was not quoted-printable after all */ + cks->qp.state = DCC_CK_QP_IDLE; + c = cks->qp.y; + break; + } + + *obuf++ = c; + if (++result >= obuf_len) + break; + } + *ibuf_lenp = ibuf_len; + *ibufp = ibuf; + return result; +#undef GC +} + + + + +#define B64B 0100 /* bad */ +#define B64EQ 0101 /* '=' */ +static u_char base64_decode[128] = { + B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x00 */ + B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x08 */ + B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x10 */ + B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x18 */ + + B64B, B64B, B64B, B64B, B64B, B64B, B64B, B64B, /* 0x20 ! " # $ % & ' */ + B64B, B64B, B64B, 62, B64B, B64B, B64B, 63, /* 0x28 ( ) * + , - . / */ + + 52, 53, 54, 55, 56, 57, 58, 59, /* 0x30 0 1 2 3 4 5 6 7 */ + 60, 61, B64B, B64B, B64B, B64EQ,B64B, B64B, /* 0x38 8 9 : ; < = > ? */ + + B64B, 0, 1, 2, 3, 4, 5, 6, /* 0x40 @ A B C D E F G */ + 7, 8, 9, 10, 11, 12, 13, 14, /* 0x48 H I J K L M N O */ + + 15, 16, 17, 18, 19, 20, 21, 22, /* 0x50 P Q R S T U V W */ + 23, 24, 25, B64B, B64B, B64B, B64B, B64B, /* 0x58 X Y Z [ \ ] ^ _ */ + + B64B, 26, 27, 28, 29, 30, 31, 32, /* 0x60 ` a b c d e f g */ + 33, 34, 35, 36, 37, 38, 39, 40, /* 0x68 h i j k l m n o */ + + 41, 42, 43, 44, 45, 46, 47, 48, /* 0x70 p q r s t u v w */ + 49, 50, 51, B64B, B64B, B64B, B64B, B64B, /* 0x78 x y z { | } ~ del */ +}; + +u_int /* output length */ +dcc_ck_b64_decode(DCC_GOT_CKS *cks, const char **ibufp, u_int *ibuf_lenp, + char *obuf, u_int obuf_len) +{ + u_char c; + const char *ibuf; + u_int ibuf_len, result; + + if (obuf_len < 3) + return 0; + obuf_len -= 3; + ibuf_len = *ibuf_lenp; + ibuf = *ibufp; + result = 0; + while (ibuf_len != 0) { + --ibuf_len; + c = *ibuf++; + c = base64_decode[c]; + if (c == B64B) + continue; + + if (c == B64EQ) { + switch (cks->b64.quantum_cnt) { + case 2: + *obuf++ = cks->b64.quantum>>4; + ++result; + break; + case 3: + *obuf++ = cks->b64.quantum>>10; + *obuf++ = cks->b64.quantum>>2; + result += 2; + break; + } + cks->b64.quantum_cnt = 0; + if (result >= obuf_len) + break; + } + + cks->b64.quantum = (cks->b64.quantum << 6) | c; + if (++cks->b64.quantum_cnt >= 4) { + cks->b64.quantum_cnt = 0; + *obuf++ = cks->b64.quantum>>16; + *obuf++ = cks->b64.quantum>>8; + *obuf++ = cks->b64.quantum; + result += 3; + if (result >= obuf_len) + break; + } + } + *ibuf_lenp = ibuf_len; + *ibufp = ibuf; + return result; +} + + + +/* skip parts of URLs */ +int +dcc_ck_url(DCC_URL_SKIP *url, char c, char **pbufp) +{ +#define RET_C(s) return ((c<<DCC_CK_URL_SHIFT) | s) + + /* Continue skipping a URL to its end. + * Assume the end is the next blank, comma, '>', or '\n' + * unless the URL is quoted. Then continue to the quote + * or until the length has become silly. */ + + /* convert ASCII upper to lower case */ + if (c >= 'A' && c <= 'Z') + c -= 'A' - 'a'; + + switch (url->st) { + case DCC_URL_ST_IDLE: + if (c == 'h') { + /* start looking for 't' after 'h' in "http" */ + url->flags = 0; + url->st = DCC_URL_ST_T1; + } else if (c == '=') { + /* look for the '=' in "href=" or "img src=" */ + url->st = DCC_URL_ST_QUOTE; + } + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_QUOTE: + /* look for '"' or 'H' after "href=" or "img src= */ + if (c == 'h') { + url->flags &= ~DCC_URL_QUOTES; + url->st = DCC_URL_ST_T1; + } else if (c == '"') { + url->flags |= DCC_URL_DQUOTED; + url->st = DCC_URL_ST_QH; + } else if (c == '\'') { + url->flags |= DCC_URL_SQUOTED; + url->st = DCC_URL_ST_QH; + } else { + url->st = DCC_URL_ST_IDLE; + } + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_QH: + /* seen quote; looking for start of URL */ + if (c == 'h') { + url->st = DCC_URL_ST_T1; + } else { + url->st = DCC_URL_ST_IDLE; + } + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_T1: + if (c == 't') + url->st = DCC_URL_ST_T2; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_T2: + if (c == 't') + url->st = DCC_URL_ST_P; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_P: + if (c == 'p') + url->st = DCC_URL_ST_S; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_S: + /* we are expecting the ':' or 's' after http */ + if (c == 's') + url->st = DCC_URL_ST_COLON; + else if (c == ':') + url->st = DCC_URL_ST_SLASH1; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_COLON: + /* we are expecting the ':' after http or https */ + if (c == ':') + url->st = DCC_URL_ST_SLASH1; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_SLASH1: + /* we are expecting the first '/' after http: */ + if (c == '/') + url->st = DCC_URL_ST_SLASH2; + else + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + + case DCC_URL_ST_SLASH2: + /* we are expecting the second '/' after http:/" */ + if (c != '/') { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + } + url->st = DCC_URL_ST_SLASH3_START; + RET_C(DCC_CK_URL_CK_LEN); + + case DCC_URL_ST_SLASH3_START: + url->dot = 0; + url->start = *pbufp; + url->total = 0; + url->flags &= ~(DCC_URL_DEL_DOMAIN + | DCC_URL_PERCENT1 | DCC_URL_PERCENT2); + url->st = DCC_URL_ST_SLASH3; + /* fall into DCC_URL_ST_SLASH3 */ + case DCC_URL_ST_SLASH3: + /* look for the end of the host name */ + ++url->total; +again: + if (c == '.') { + /* keep only 1st and 2nd level domain names */ + url->flags &= ~DCC_URL_DEL_DOMAIN; + if (!url->dot) { + /* do nothing at first '.' unless the name + * was too long */ + if (*pbufp >= url->start+DCC_URL_MAX) { + *pbufp = url->start; + } else { + url->dot = *pbufp; + } + } else { + url->flags |= DCC_URL_DEL_DOMAIN; + } + RET_C(DCC_CK_URL_DOT); + } + /* delay deleting third level domains to not be + * fooled by a trailing dot */ + if (url->flags & DCC_URL_DEL_DOMAIN) { + url->flags &= ~DCC_URL_DEL_DOMAIN; + memmove(url->start, url->dot, + *pbufp - url->dot); + *pbufp -= (url->dot - url->start); + url->dot = *pbufp; + } + + if (c == '/') { + url->st = DCC_URL_ST_SKIP; + RET_C(DCC_CK_URL_HOST_END); + } + if (c == '"' && (url->flags & DCC_URL_DQUOTED)) { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_HOST_END); + } + if (c == '\'' && (url->flags & DCC_URL_SQUOTED)) { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_HOST_END); + } + if ((c == '<' || c == '>') + && (url->flags & DCC_URL_QUOTES) == 0) { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_HOST_END); + } + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { + if (!(url->flags & DCC_URL_QUOTED) + || url->total > DCC_URL_FAILSAFE) { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_HOST_END); + } + /* whitespace in a URL hostname is at best username */ + *pbufp = url->start; + url->st = DCC_URL_ST_SLASH3_START; + RET_C(DCC_CK_URL_HOST_RESET); + } + if (c == '@') { + /* ignore username and password */ + *pbufp = url->start; + url->st = DCC_URL_ST_SLASH3_START; + RET_C(DCC_CK_URL_HOST_RESET); + } + + if (c == '%') { + url->flags &= ~DCC_URL_PERCENT2; + url->flags |= DCC_URL_PERCENT1; + RET_C(DCC_CK_URL_SKIP); + } + if (url->flags & DCC_URL_PERCENT1) { + if (c >= '0' && c <= '9') { + c -= '0'; + } else if (c >= 'a' && c <= 'f') { + c -= 'a'-10; + } else { + *pbufp = url->start; + url->st = DCC_URL_ST_SLASH3_START; + RET_C(DCC_CK_URL_HOST_RESET); + } + if (url->flags & DCC_URL_PERCENT2) { + url->flags &= ~(DCC_URL_PERCENT1 + | DCC_URL_PERCENT2); + c |= url->percent; + if (c >= 'A' && c <= 'Z') + c -= 'A' - 'a'; + goto again; + } + url->percent = c << 4; + url->flags |= DCC_URL_PERCENT2; + RET_C(DCC_CK_URL_SKIP); + } + + if (*pbufp >= url->start+DCC_URL_MAX) { + /* long garbage is probably a username */ + if (url->total > DCC_URL_FAILSAFE) { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + } + RET_C(DCC_CK_URL_SKIP); + } + RET_C(DCC_CK_URL_HOST); + + case DCC_URL_ST_SKIP: + /* skip the rest of the URL */ + ++url->total; + if (c == '"' || c == '\'') { + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_SKIP); + } + if ((c == '>' || c == ' ' || c == '\t' + || c == '\n' || c == '\r') + && (!(url->flags & DCC_URL_QUOTES) + || url->total > DCC_URL_FAILSAFE)) { + url->total = 0; + url->st = DCC_URL_ST_IDLE; + RET_C(DCC_CK_URL_CHAR); + } + RET_C(DCC_CK_URL_SKIP); + } + RET_C(DCC_CK_URL_CHAR); + +#undef RET_C +}