Mercurial > notdcc
view dcclib/ck.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line source
/* Distributed Checksum Clearinghouse * * compute simple checksums * * Copyright (c) 2008 by Rhyolite Software, LLC * * This agreement is not applicable to any entity which sells anti-spam * solutions to others or provides an anti-spam solution as part of a * security solution sold to other entities, or to a private network * which employs the DCC or uses data provided by operation of the DCC * but does not provide corresponding data to other users. * * Permission to use, copy, modify, and distribute this software without * changes for any purpose with or without fee is hereby granted, provided * that the above copyright notice and this permission notice appear in all * copies and any distributed versions or copies are either unchanged * or not called anything similar to "DCC" or "Distributed Checksum * Clearinghouse". * * Parties not eligible to receive a license under this agreement can * obtain a commercial license to use DCC by contacting Rhyolite Software * at sales@rhyolite.com. * * A commercial license would be for Distributed Checksum and Reputation * Clearinghouse software. That software includes additional features. This * free license for Distributed ChecksumClearinghouse Software does not in any * way grant permision to use Distributed Checksum and Reputation Clearinghouse * software * * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * * Rhyolite Software DCC 1.3.103-1.90 $Revision$ */ #include "dcc_ck.h" #include "dcc_heap_debug.h" #include "dcc_xhdr.h" #ifndef DCC_WIN32 #include <arpa/inet.h> #endif /* "substitute" or locally configured checksums */ typedef struct { u_int nm_len; const char *nm; /* name of the checksum */ } DCC_SUB_CK; static DCC_SUB_CK sub_cks[DCC_MAX_SUB_CKS]; static u_int num_sub_cks; /* get the checksum of an IPv6 address */ void dcc_ck_ipv6(DCC_SUM sum, const struct in6_addr *addr) { MD5_CTX ctx; MD5Init(&ctx); MD5Update(&ctx, (void *)addr, sizeof(*addr)); MD5Final(sum, &ctx); } /* add an IP address to the set of checksums */ void dcc_get_ipv6_ck(DCC_GOT_CKS *cks, const struct in6_addr *addrp) { cks->sums[DCC_CK_IP].type = DCC_CK_IP; cks->sums[DCC_CK_IP].rpt2srvr = 1; cks->sums[DCC_CK_IP].tgts = DCC_TGTS_INVALID; dcc_ck_ipv6(cks->sums[DCC_CK_IP].sum, addrp); if (&cks->ip_addr != addrp) cks->ip_addr = *addrp; } void dcc_unget_ip_ck(DCC_GOT_CKS *cks) { memset(&cks->ip_addr, 0, sizeof(cks->ip_addr)); CLR_GOT_SUM(&cks->sums[DCC_CK_IP]); CLR_GOT_SUM(&cks->sums[DCC_CK_IP]); } /* Make DCC_CK_IP from a string containing an IPv4 or IPv6 address. * Because inet_pton() is picky, the string must be unambiguous and * fussy. */ u_char dcc_get_str_ip_ck(DCC_GOT_CKS *cks, /* put checksum here */ const char *str) /* from this IP address string */ { DCC_SOCKU su; if (!dcc_str2ip(&su, str)) return 0; if (su.sa.sa_family == AF_INET) { /* treat IPv4 addresses as IPv6 so that everyone computes * the same checksum */ dcc_ipv4toipv6(&cks->ip_addr, su.ipv4.sin_addr); } else { cks->ip_addr = su.ipv6.sin6_addr; } dcc_get_ipv6_ck(cks, &cks->ip_addr); return 1; } /* Compute a checksum from a string with matching but optional carets or * quotes, after stripping the quotes or carets. * Ignore case and white space */ void dcc_str2ck(DCC_SUM sum, const char *hdr, /* substitute header type */ u_int hdr_len, const char *str) /* string to checksum */ { MD5_CTX ctx; u_int len; char *p; char c, cbuf[DCC_HDR_CK_MAX]; /* ignore whitespace, [<>'",] and case * do not ignore [.-_] to prevent confusing hostnames */ p = cbuf; while ((c = *str++) != '\0' && p <= LAST(cbuf)) { if (DCC_IS_WHITE(c) || c == '<' || c == '>' || c == '\'' || c == '"' || c == ',') continue; *p++ = DCC_TO_LOWER(c); } str = cbuf; len = p - str; /* strip trailing periods, mostly for mail_host */ while (len >= 1 && *(p-1) == '.') { --len; --p; } MD5Init(&ctx); if (hdr) MD5Update(&ctx, hdr, hdr_len); MD5Update(&ctx, str, len); MD5Final(sum, &ctx); } /* make checksum from a string for headers and envelope */ u_char /* 1=ok 0=bad string */ dcc_get_cks(DCC_GOT_CKS *cks, /* put checksum here */ DCC_CK_TYPES type, const char *str, /* checksum of this string */ u_char rpt2srvr) { DCC_GOT_SUM *g; g = &cks->sums[type]; switch (type) { case DCC_CK_INVALID: case DCC_CK_IP: case DCC_CK_SUB: case DCC_CK_SRVR_ID: case DCC_CK_BODY: case DCC_CK_FUZ1: case DCC_CK_FUZ2: case DCC_CK_G_MSG_R_TOTAL: case DCC_CK_G_TRIPLE_R_BULK: dcc_logbad(EX_SOFTWARE, "invalid checksum %s", dcc_type2str_err(type, 0, 0, 0)); return 0; case DCC_CK_ENV_FROM: case DCC_CK_FROM: case DCC_CK_ENV_TO: case DCC_CK_RECEIVED: case DCC_CK_MESSAGE_ID: dcc_str2ck(g->sum, 0, 0, str); break; } g->type = type; g->rpt2srvr = rpt2srvr; g->tgts = DCC_TGTS_INVALID; return 1; } /* make checksum for a locally configured header */ u_char /* 1=done 0=failed */ dcc_ck_get_sub(DCC_GOT_CKS *cks, const char *hdr, /* header name, not '\0' terminated */ const char *str) /* header value if not after hdr */ { DCC_GOT_SUM *g; const DCC_SUB_CK *sck; DCC_CK_TYPES type; int i; /* look for the header name in the list of locally configured headers */ sck = &sub_cks[0]; for (i = num_sub_cks; ; ++sck, --i) { if (i <= 0) return 0; /* this header is not in the list */ if (!strncasecmp(hdr, sck->nm, sck->nm_len) && (hdr[sck->nm_len] == '\0' || hdr[sck->nm_len] == ':')) break; } /* Get the header value if the caller did not separate it. * The colon is present if the header field was not separated */ if (!str) str = hdr+sck->nm_len+1; /* find a free checksum slot * or a slot already assigned to the header */ type = DCC_CK_SUB; g = &cks->sums[type]; for (;;) { if (type >= DIM(cks->sums)) return 0; /* none free */ if (g->type == DCC_CK_INVALID && (type > DCC_CK_TYPE_LAST || type == DCC_CK_SUB)) break; /* found a free slot */ if (g->type == DCC_CK_SUB && g->hdr_nm == sck->nm) break; /* found previously assigned slot */ ++g; ++type; } dcc_str2ck(g->sum, sck->nm, sck->nm_len, str); g->type = DCC_CK_SUB; g->rpt2srvr = 1; g->tgts = DCC_TGTS_INVALID; g->hdr_nm = sck->nm; return 1; } /* add to the list of locally configured or substitute headers */ u_char dcc_add_sub_hdr(DCC_EMSG emsg, const char *hdr) { const char *p; char c, *q; u_int n, len; if (num_sub_cks >= DIM(sub_cks)) { dcc_pemsg(EX_USAGE, emsg, "too many substitute headers with \"%s\"", hdr); return 0; } p = hdr; for (;;) { if (*p == '\0') break; if (*p == ':' && p[1] == '\0') { --p; break; } if (*p <= ' ' || *p >= 0x7f || *p == ':') { dcc_pemsg(EX_USAGE, emsg, "illegal SMTP field name character in \"%s\"", hdr); return 0; } ++p; } len = p - hdr; if (len == 0) { dcc_pemsg(EX_USAGE, emsg, "illegal empty field name"); return 0; } /* ignore duplicates */ for (n = 0; n < num_sub_cks; ++n) { if (len == sub_cks[n].nm_len && !strncasecmp(hdr, sub_cks[n].nm, len)) return 1; } sub_cks[num_sub_cks].nm_len = len; q = dcc_malloc(len+1); sub_cks[num_sub_cks].nm = q; do { c = *hdr++; *q++ = DCC_TO_LOWER(c); } while (--len > 0); *q = '\0'; ++num_sub_cks; return 1; } static int get_received_addr(char addr_buf[INET6_ADDRSTRLEN+2], const char *hdr) /* *hdr == '[' before the address */ { int a_len; a_len = 1+strspn(hdr+1, ".:abcdefABCDEF0123456789"); if (a_len <= 6+1 || a_len >= INET6_ADDRSTRLEN+1) return 0; if (hdr[a_len] != ']') return 0; /* capture the address * include leading '[' in case we later need a host name */ memcpy(addr_buf, hdr, a_len); addr_buf[a_len] = '\0'; return a_len; } /* find IP address, client host name, and HELO string in a Received: * header of forms: * #1 Received: from helo (hostname [addr] ... * Received: from helo ([addr] ... * #2 Received: from hostname [addr] ... * Received: from [addr] ... * #3 Received: from qmailheloandhostname (addr) ... * #4 Received: from qmailhostname (HELO qmailhelo) (addr) ... * or Received: from qmailhostname (HELO qmailhelo) ([addr]) ... * * ignore these forms: * #5 Received: from localhost by hostname with LMTP * #6 Received (qmail 4824 invoked by uid 1000); 8 Nov 2005 12:13:33 -0000 * #7 Received: (qmail 21530 invoked from network); 29 Aug 2005 16:05:04 -0000 * #8 Received: (from user@localhost) by lochost (8.12.10/8.12.10/Submit) ... * #9 Received: by hostname (Postfix) id ... * * This should be called only with Received: headers that are known to * have been added by trustworthy code such as the local system * or an MX secondary. * Return 0 for unknown header, "" if IP address found, or stupid type string */ const char * parse_received(const char *hdr, /* the null terminated header */ DCC_GOT_CKS *cks, /* put address checksum here */ char *helo, /* optionally put HELO value here */ int helo_len, char *clnt_str, int clnt_str_len, char *clnt_name, int clnt_name_len) { char addr_buf[INET6_ADDRSTRLEN+2]; const char *h, *n; int h_len, n_len, a_len; int i; /* make the field name optional */ if (!CLITCMP(hdr, "Received:")) hdr += LITZ("Received"); hdr += strspn(hdr, " \t\r\n"); /* #define DCC_DEBUG_PARSE_RECEIVED */ #ifdef DCC_DEBUG_PARSE_RECEIVED printf("\n\nReceived: %s\n", hdr); #endif #define SPAN_ADDR(l,p) (*(p) >= '0' && *(p) <= '9' \ && ((l) = strspn((p), "0123456789.")) >= 7 \ && (l) < INET_ADDRSTRLEN) if (CLITCMP(hdr, "from")) { /* It does not match "Received: from" in #1, #2, #3, and #5 * Recognize #6 and #7 */ if (!LITCMP(hdr, "(qmail ")) { hdr += LITZ("(qmail "); i = strspn(hdr, "0123456789"); if (i == 0) return 0; hdr += i; if (!LITCMP(hdr, " invoked from network)") || !LITCMP(hdr, " invoked by uid ")) return "qmail"; return 0; } /* recognize #8 */ if (!LITCMP(hdr, "(from ")) { hdr += LITZ("(from "); hdr = strpbrk(hdr, DCC_WHITESPACE"@"); if (!hdr || *hdr != '@') return 0; hdr = strpbrk(hdr, DCC_WHITESPACE")"); if (!hdr || *hdr != ')') return 0; hdr += 1+strspn(hdr+1, DCC_WHITESPACE); if (LITCMP(hdr, "by ")) return 0; hdr += LITZ("by "); if (strstr(hdr, "/Submit")) return "sendmail Submit"; return 0; } /* recognize #9 */ if (!LITCMP(hdr, "by ")) { hdr += LITZ("by "); hdr = strpbrk(hdr, DCC_WHITESPACE); if (!hdr) return 0; ++hdr; if (!LITCMP(hdr, "(Postfix)")) return "postfix"; return 0; } /* unrecognized */ return 0; } hdr += LITZ("from"); i = strspn(hdr, DCC_WHITESPACE); if (i == 0) return 0; hdr += i; /* We have "Received: from " * get the host name or HELO value before '(' or '[' in * #1, #2, #3, and #5 */ h = hdr; hdr = strpbrk(hdr, DCC_WHITESPACE"(["); if (!hdr) return 0; /* unrecognized */ h_len = hdr - h; hdr += strspn(hdr, DCC_WHITESPACE); if (*hdr == '(') { /* look for client host name of #1 * or IPv4 address of #3 * or HELO value and IPv4 address of #4 */ ++hdr; if (SPAN_ADDR(a_len, hdr) && hdr[a_len] == ')') { /* we seem to have the IPv4 address of #3 */ n = h; n_len = h_len; addr_buf[0] = '['; memcpy(addr_buf+1, hdr, a_len); addr_buf[a_len+1] = '\0'; } else if (!LITCMP(hdr, "HELO ") && hdr[LITZ("HELO ")] != '[') { /* we have the #4 qmail HELO form when reverse DNS name * and helo value differ or unrecognizable */ n = h; n_len = h_len; h = hdr + LITZ("HELO "); hdr = strpbrk(h, " \t'\"()[]"); if (!hdr) return 0; h_len = hdr - h; if (!h_len || LITCMP(hdr, ") (")) return 0; hdr += LITZ(") ("); if (SPAN_ADDR(a_len, hdr) && hdr[a_len] == ')') { addr_buf[0] = '['; memcpy(addr_buf+1, hdr, a_len); addr_buf[a_len+1] = '\0'; } else if (hdr[0] == '[' && SPAN_ADDR(a_len, hdr+1) && hdr[1+a_len] == ']' && hdr[2+a_len] == ')') { memcpy(addr_buf, hdr, a_len+1); addr_buf[a_len+1] = '\0'; } else { return 0; } } else { /* it is #1 or unrecognizable */ n = hdr; hdr = strpbrk(hdr, DCC_WHITESPACE"["); if (!hdr) return 0; n_len = hdr - n; hdr += strspn(hdr, DCC_WHITESPACE); if (*hdr != '[') return 0; a_len = get_received_addr(addr_buf, hdr); if (!a_len) return 0; } } else if (*hdr == '[') { /* format #2; we have possibly null client name and no HELO */ n = h; n_len = h_len; h_len = 0; a_len = get_received_addr(addr_buf, hdr); if (!a_len) return 0; } else if (!CLITCMP(hdr, "by ")) { /* recognize #5 */ hdr += LITZ("by "); n = strchr(hdr, ' '); if (!n || n > hdr+DCC_MAXDOMAINLEN) return 0; if (!CLITCMP(n, " with LMTP")) return "LMTP"; /* stupid type string */ return 0; } else { return 0; } /* it looks ok so send out all the answers * if the IP address makes sense */ if (!dcc_get_str_ip_ck(cks, addr_buf+1)) return 0; dcc_ipv6tostr(clnt_str, clnt_str_len, &cks->ip_addr); if (clnt_name && clnt_name_len) { if (n_len == 0) { /* use address as the client host name */ addr_buf[a_len] = ']'; n_len = a_len+1; n = addr_buf; } if (clnt_name_len > n_len+1) clnt_name_len = n_len+1; STRLCPY(clnt_name, n, clnt_name_len); } if (helo && helo_len) { if (helo_len > h_len+1) helo_len = h_len+1; STRLCPY(helo, h, helo_len); } #ifdef DCC_DEBUG_PARSE_RECEIVED printf("helo=%s clnt_str=%s clnt_name=%s\n", helo, clnt_str, clnt_name); #endif return ""; #undef SPAN_ADDR } u_char /* 1=found env_From value */ parse_return_path(const char *hdr, char *buf, int buf_len) { int i; if (CLITCMP(hdr, "Return-Path:")) return 0; hdr += LITZ("Return-Path:"); hdr += strspn(hdr, " \t"); i = strlen(hdr); while (i > 0 && (hdr[i-1] == '\r' || hdr[i-1] == '\n')) --i; if (i >= 2 && *hdr == '<' && hdr[i-1] == '>') { ++hdr; i -= 2; } if (i >= buf_len-1) i = buf_len-1; if (i <= 0) return 0; memcpy(buf, hdr, i); buf[i] = '\0'; return 1; } u_char /* 1=found env_From value */ parse_unix_from(const char *hdr, char *buf, int buf_len) { const char *p; int i; if (strncmp(hdr, "From ", LITZ("From "))) return 0; hdr += LITZ("From "); hdr += strspn(hdr, " "); p = strchr(hdr, ' '); if (p == 0) return 0; i = p-hdr; if (i >= buf_len) i = buf_len-1; if (i <= 0) return 0; memcpy(buf, hdr, i); buf[i] = '\0'; return 1; } u_char parse_mail_host(const char *env_from, char *buf, int buf_len) { const char *p, *p2, *p3; int i; p = strchr(env_from, '@'); if (!p) return 0; p2 = strchr(++p, '>'); if (!p2) p2 = p+strlen(p); /* do not try to figure out source routes */ p3 = strpbrk(p, ";@,"); if (p3 && p3 < p2) return 0; i = p2-p; if (i >= buf_len) i = buf_len-1; if (i <= 0) return 0; memcpy(buf, p, i); buf[i] = '\0'; return 1; } void dcc_print_cks(LOG_WRITE_FNC out, void *arg, u_char is_spam, DCC_TGTS local_tgts, const DCC_GOT_CKS *cks, DCC_CKS_WTGTS wtgts, u_char have_wlist) /* 1=have whiteclnt result */ { char tgts_buf[16], type_buf[26], cbuf[DCC_CK2STR_LEN]; # define LINE_LEN 81 char buf[LINE_LEN*6]; const DCC_GOT_SUM *g; u_char have_server, have_thold, headed; DCC_TGTS tgts; int cklen, buflen, inx, i; /* decide which column headings are needed */ have_server = 0; have_thold = 0; for (g = cks->sums, inx = 0; g <= LAST(cks->sums); ++g, ++inx) { if (g->type == DCC_CK_INVALID) continue; if (g->tgts != DCC_TGTS_INVALID) have_server = 1; if (wtgts[inx] != 0) have_wlist = 1; if (cks->tholds_rej[g->type] != DCC_THOLD_UNSET && g->type != DCC_CK_REP_TOTAL) have_thold = 1; } if (have_wlist) have_thold = 0; headed = 0; buflen = 0; for (g = cks->sums, inx = 0; g <= LAST(cks->sums); ++g, ++inx) { if (g->type == DCC_CK_INVALID) continue; if (!headed) { headed = 1; dcc_tgts2str(tgts_buf, sizeof(tgts_buf)-LITZ("spam"), local_tgts, 0); if (is_spam) STRLCAT(tgts_buf, " spam", sizeof(tgts_buf)); buflen += snprintf(buf+buflen, sizeof(buf)-buflen, " " DCC_XHDR_REPORTED" %-15s checksum", tgts_buf); if (have_server || have_wlist || have_thold) buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_SRVR, have_server ? "server" : ""); if (have_wlist) buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_WLIST, "wlist"); else if (have_thold) buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_THOLD, "thold"); if (ISZ(buf)-buflen > 1) buf[buflen++] = '\n'; } else if (buflen >= ISZ(buf)-LINE_LEN) { out(arg, buf, buflen); buflen = 0; } cklen = snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_CK, dcc_type2str(type_buf, sizeof(type_buf), g->type, g->hdr_nm, 0, 0), dcc_ck2str(cbuf, sizeof(cbuf), g->type, g->sum, 0)); buflen += cklen; if (g->rpt2srvr != 0 && g->tgts != DCC_TGTS_INVALID) { if (buflen < ISZ(buf)) buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_SRVR, dcc_tgts2str(tgts_buf, sizeof(tgts_buf), g->tgts, 0)); } else if (wtgts[inx] != 0 || (have_thold && cks->tholds_rej[g->type]!=DCC_THOLD_UNSET)) { /* steal space from blank server count for * long substitute checksums */ i = (PRINT_CK_PAT_SRVR_LEN - (cklen - (PRINT_CK_TYPE_LEN +2 +PRINT_CK_SUM_LEN))); if (i > PRINT_CK_PAT_SRVR_LEN) i = PRINT_CK_PAT_SRVR_LEN; if (i > ISZ(buf)-buflen) i = ISZ(buf)-buflen; if (i < 0) i = 0; if (i > ISZ(buf)) i = ISZ(buf); while (--i >= 0) { buf[buflen++] = ' '; } } if (buflen >= ISZ(buf)) { ; } else if (wtgts[inx] != 0) { buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_WLIST, wtgts[inx] == 0 ? "" : dcc_tgts2str(tgts_buf, sizeof(tgts_buf), wtgts[inx], 0)); } else if (have_thold && (tgts = cks->tholds_rej[g->type] ) != DCC_THOLD_UNSET) { buflen += snprintf(buf+buflen, sizeof(buf)-buflen, PRINT_CK_PAT_THOLD, dcc_thold2str(tgts_buf, sizeof(tgts_buf), g->type, tgts)); } if (buflen >= ISZ(buf)-1) buflen = sizeof(buf)-2; buf[buflen] = '\n'; buf[++buflen] = '\0'; } if (buflen != 0) out(arg, buf, buflen); #undef LINE_LEN }