Mercurial > notdcc
diff dcclib/clnt_send.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dcclib/clnt_send.c Tue Mar 10 13:49:58 2009 +0100 @@ -0,0 +1,3486 @@ +/* Distributed Checksum Clearinghouse + * + * send a request from client to server + * + * Copyright (c) 2008 by Rhyolite Software, LLC + * + * This agreement is not applicable to any entity which sells anti-spam + * solutions to others or provides an anti-spam solution as part of a + * security solution sold to other entities, or to a private network + * which employs the DCC or uses data provided by operation of the DCC + * but does not provide corresponding data to other users. + * + * Permission to use, copy, modify, and distribute this software without + * changes for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear in all + * copies and any distributed versions or copies are either unchanged + * or not called anything similar to "DCC" or "Distributed Checksum + * Clearinghouse". + * + * Parties not eligible to receive a license under this agreement can + * obtain a commercial license to use DCC by contacting Rhyolite Software + * at sales@rhyolite.com. + * + * A commercial license would be for Distributed Checksum and Reputation + * Clearinghouse software. That software includes additional features. This + * free license for Distributed ChecksumClearinghouse Software does not in any + * way grant permision to use Distributed Checksum and Reputation Clearinghouse + * software + * + * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC + * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Rhyolite Software DCC 1.3.103-1.198 $Revision$ + */ + +#include "dcc_clnt.h" +#ifdef USE_POLL +#include <poll.h> +#endif +#ifdef HAVE_ARPA_NAMESER_H +#include <arpa/nameser.h> +#endif +#ifdef HAVE_RESOLV_H +#include <resolv.h> +#endif + +DCC_CLNT_INFO *dcc_clnt_info; /* memory mapped shared data */ +u_char dcc_all_srvrs = 0; /* try to contact all servers */ + +/* #define CLNT_LOSSES */ +#ifdef CLNT_LOSSES +static u_int clnt_losses; +#endif + +u_char dcc_clnt_debug; +int dcc_debug_ttl; + + +#define AGE_AVG(_v,_n,_a,_b) ((_v) = ((_v)*_a + (_n)*_b + (_a+_b)/2)/(_a+_b)) + +/* send NOPs to measure the RTTs to servers when the current server + * becomes slow this often */ +#define FAST_RTT_SECS (15*60) + + +char dcc_clnt_hostname[MAXHOSTNAMELEN]; /* local hostname */ +static u_int32_t dcc_clnt_hid; /* our DCC host-ID */ + + +/* Each client knows about one or more servers, lest the current server + * crash. To ensure that counts of spam accumulate as quickly as possible, + * all of the processes on a client try to use a single server. The + * closest (or fastest) server is preferred. It is desirable for the + * servers to convert the hostnames of the servers to IP addresses + * frequently enough to track changes in address records, but not so + * often that a lot of time is wasted on the DNS. + * + * All of that implies that independent processes on the client need to + * cooperate in measuring the round trip time to the servers and maintaining + * their IP addresses. On UNIX systems, this is accomplished with mmap() + * and a well known file. + * + * The DCC client uses 3 locks: + * 1. mutex to ensure that only one thread in process sends bursts of NOPs + * to measure RTTs or resolves DNS names + * 2. mutex protecting the shared information in the map file for threads + * within a process + * 3. fcntl() lock on the file to protect the shared information among processes + * + * To avoid ABBA deadlocks, the locks are always sought in that order. + * For most operaitons, only #2/#3 is needed. Sometimes only #2. + * + * Some systems have broken fcntl() locking (e.g. NFS in Solaris). + * They lock the entire file. */ + + +/* the contexts must be locked to read or change these values */ +static int info_fd = -1; +#ifdef DCC_WIN32 +HANDLE info_map = INVALID_HANDLE_VALUE; +#endif +DCC_PATH dcc_info_nm; + +/* info_locked is set when the file system lock on changing the mapped file is + * held. The contexts must be locked while info_locked is set, as well as + * when it is checked or changed. */ +static u_char info_locked; + + + + +/* Get the RTT used to pick servers. + * It includes the local bias and the penalty for version mismatches */ +static int +effective_rtt(const DCC_SRVR_CLASS *class, const DCC_SRVR_ADDR *ap) +{ + NAM_INX nam_inx; + int rtt; + + rtt = ap->rtt; + if (rtt >= DCC_MAX_RTT) + return DCC_RTT_BAD; + + nam_inx = ap->nam_inx; + if (!GOOD_NAM(nam_inx)) + return DCC_RTT_BAD; + + rtt += class->nms[nam_inx].rtt_adj; + /* penalize servers with strange versions */ + if (ap->srvr_pkt_vers < DCC_PKT_VERSION +#ifdef DCC_PKT_VERSION7 + /* protocol version 7 and 8 are the same for the free code */ + && ap->srvr_pkt_vers+1 != DCC_PKT_VERSION +#endif + && ap->srvr_id != DCC_ID_INVALID) + rtt += DCC_RTT_VERS_ADJ; + + if (rtt >= DCC_RTT_BAD) + return DCC_RTT_BAD; + return rtt; +} + + + +static inline u_char +good_rtt(const DCC_SRVR_CLASS *class) +{ + int rtt; + + if (!HAVE_SRVR(class)) + return 0; + + rtt = effective_rtt(class, &class->addrs[class->srvr_inx]); + if (rtt > class->avg_thold_rtt) + return 0; + + return 1; +} + + +#define AP2CLASS(ap) DCC_GREY2CLASS(ap >= dcc_clnt_info->grey.addrs) + + +/* compare addresses while trying to ignore IPv4 vs. IPv6 details */ +static u_char /* 0=the addresses are equal */ +dcc_cmp_ap2su(const DCC_SRVR_ADDR *ap, const DCC_SOCKU *su) +{ + DCC_SRVR_ADDR ap4; + struct in_addr su_addr4; + + if (ap->ip.port != DCC_SU_PORT(su)) + return 1; + + if (ap->ip.family == AF_INET6 + && dcc_ipv6toipv4(&ap4.ip.u.v4, &ap->ip.u.v6)) { + ap4.ip.family = AF_INET; + ap = &ap4; + } + + if (su->sa.sa_family == AF_INET) { + return (ap->ip.family != AF_INET + || ap->ip.u.v4.s_addr != su->ipv4.sin_addr.s_addr); + } + + if (dcc_ipv6toipv4(&su_addr4, &su->ipv6.sin6_addr)) { + return (ap->ip.family != AF_INET + || ap->ip.u.v4.s_addr != su_addr4.s_addr); + } + + if (ap->ip.family != AF_INET6) + return 1; + + /* both are real IPv6 and not ::1 */ + return memcmp(&ap->ip.u.v6, &su->ipv6.sin6_addr, sizeof(ap->ip.u.v6)); +} + + + +int +dcc_ap2str_opt(char *buf, int buf_len, + const DCC_SRVR_CLASS *class, u_char inx, + char port_str) /* '\0' or '-' */ +{ + const DCC_SRVR_ADDR *ap; + char *buf1; + int len; + + ap = &class->addrs[inx]; + dcc_ip2str(buf, buf_len, &ap->ip); + + len = strlen(buf); + buf1 = buf+len; + buf_len -= len; + if (ap->ip.port == DCC_CLASS2PORT(class)) { + if (port_str) { + if (buf_len >= 1) { + *buf1 = ','; + if (buf_len >= 2) + *++buf1 = port_str; + *++buf1 = '\0'; + } + } + } else if (buf_len > 0) { + len = snprintf(buf1, buf_len, ",%d", ntohs(ap->ip.port)); + if (len >= buf_len) + len = buf_len-1; + buf1 += len; + } + + return buf1-buf; +} + + + +static const char * +addr2str(char *buf, u_int buf_len, const DCC_SRVR_CLASS *class, + int addrs_gen, const DCC_SRVR_ADDR *ap, const DCC_SOCKU *sup) +{ + DCC_SOCKU su; + char str[DCC_SU2STR_SIZE]; + const char *host; + + if (class->gen == addrs_gen) { + if (!sup) { + dcc_mk_su(&su, ap->ip.family, &ap->ip.u, ap->ip.port); + sup = &su; + } + dcc_su2str(str, sizeof(str), sup); + if (GOOD_NAM(ap->nam_inx) + && (host = class->nms[ap->nam_inx].hostname, + !strncmp(host, str, strlen(host)))) { + snprintf(buf, buf_len, "%s (%s)", host, str); + } else { + snprintf(buf, buf_len, "%s", str); + } + + } else if (sup) { + dcc_su2str(buf, buf_len, sup); + + } else { + snprintf(buf, buf_len, DCC_IS_GREY_STR(class)); + } + return buf; +} + + + +/* display or log the current error message if debugging */ +static inline void +flush_emsg(DCC_EMSG emsg, u_char debug) +{ + if (!emsg) + return; + if (!debug) + return; + if (*emsg != '\0') { + dcc_trace_msg("%s", emsg); + *emsg = '\0'; + } +} + + + +/* display or prepare a new, less interesting error message if we do not have + * a buffer or if there is no old, presumably more important message. */ +static void PATTRIB(3,4) +extra_pemsg(int ex_code, DCC_EMSG emsg, const char *msg, ...) +{ + va_list args; + + flush_emsg(emsg, dcc_clnt_debug); + if (!emsg || *emsg == '\0') { + va_start(args, msg); + dcc_vpemsg(ex_code, emsg, msg, args); + va_end(args); + } +} + + + +static void +trace_perf(const char *msg, const DCC_SRVR_ADDR *ap) +{ + DCC_SRVR_CLASS *class; + char abuf[60]; + char rbuf[30]; + + class = AP2CLASS(ap); + if (!GOOD_NAM(ap->nam_inx) + || class->nms[ap->nam_inx].rtt_adj == 0) { + rbuf[0] = 0; + } else if (ap->srvr_pkt_vers < DCC_PKT_VERSION + && ap->srvr_id != DCC_ID_INVALID) { + snprintf(rbuf, sizeof(rbuf), "%+d+%d", + class->nms[ap->nam_inx].rtt_adj/1000, + DCC_RTT_VERS_ADJ/1000); + } else { + snprintf(rbuf, sizeof(rbuf), "%+d", + class->nms[ap->nam_inx].rtt_adj/1000); + } + + if (ap->rtt == DCC_RTT_BAD) { + dcc_trace_msg("%s %s server %s with unknown RTT", + msg, DCC_IS_GREY_STR(class), + addr2str(abuf, sizeof(abuf), class, + class->gen, ap, 0)); + } else if (ap->total_xmits == 0) { + dcc_trace_msg("%s %s server %s with %.2f%s ms RTT," + " %d ms queue wait", + msg, DCC_IS_GREY_STR(class), + addr2str(abuf, sizeof(abuf), class, + class->gen, ap, 0), + ap->rtt/1000.0, rbuf, + ap->srvr_wait/1000); + } else { + dcc_trace_msg("%s %s server %s with %.0f%%" + " of %d requests answered," + " %.2f%s ms RTT, %d ms queue wait", + msg, DCC_IS_GREY_STR(class), + addr2str(abuf, sizeof(abuf), class, + class->gen, ap, 0), + (ap->total_resps*100.0)/ap->total_xmits, + ap->total_xmits, + ap->rtt/1000.0, rbuf, + ap->srvr_wait/1000); + } +} + + + +/* If the socket isn't always connected, it can receive + * datagrams from almost everywhere (for example, a DNS + * datagram could leak-in if the local port range is small + * and the local port has been recently doing DNS queries + * in its previous life). + * + * If the socket is connected, it can still receive + * datagrams not belonging to the connection per se. This + * will happen if it has been disconnected recently and there + * was pending data in the socket's queue. + * + * Before complaining, check that this datagram seems to be a response + * to something we sent */ +static void PATTRIB(5,6) +trace_bad_packet(const DCC_XLOG *xlog, const DCC_SOCKU *su, + const DCC_OP_RESP *resp, int resp_len, const char *p, ...) +{ + const DCC_XLOG_ENTRY *xloge; + va_list args; + char msgbuf[80]; + char pktbuf[9*10]; + int i, j, l; + + if (!dcc_clnt_debug && xlog && su) { + for (xloge = xlog->base; ; ++xloge) { + /* forget the error message if not from a DCC server */ + if (xloge >= xlog->next) + return; + + /* Don't check this server entry if we haven't + * transmitted anything to this host. */ + if (xloge->op_nums.t == DCC_OP_NUMS_NULL) + continue; + + /* is the packet from this server? */ + if (!memcmp(su, &xloge->su, sizeof(*su))) + break; + } + } + + va_start(args, p); + vsnprintf(msgbuf, sizeof(msgbuf), p, args); + va_end(args); + for (i = 0, j = 0; (i+1)*ISZ(resp->w[0]) <= resp_len; ++i) { + l = snprintf(&pktbuf[j], sizeof(pktbuf)-j, " %08x", resp->w[i]); + if (l < 9) + break; + j += l; + } + dcc_error_msg("%s;%s", msgbuf, pktbuf); +} + + + +/* Compute the delay before the next retransmission + * It always should be long enough for the DCC server to do some disk + * operations even if the server and network have usually been faster. */ +static int +retrans_time(int rtt, u_int xmit_num) +{ + u_int backoff; + + if (rtt < DCC_MIN_RTT) + rtt = DCC_MIN_RTT; + backoff = rtt << xmit_num; /* exponential backoff */ + backoff += DCC_DCCD_DELAY; /* varying server & network load */ + if (backoff > DCC_MAX_RTT) + backoff = DCC_MAX_RTT; + return backoff; +} + + + +static void +get_start_time(DCC_CLNT_CTXT *ctxt) +{ + gettimeofday(&ctxt->start, 0); + ctxt->now = ctxt->start; + ctxt->now_us = 0; +} + + + +static u_char /* 1=ok, 0=time jumped */ +get_now(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt) +{ + gettimeofday(&ctxt->now, 0); + ctxt->now_us = tv_diff2us(&ctxt->now, &ctxt->start); + if (ctxt->now_us >= 0 && ctxt->now_us < FOREVER_US) + return 1; + + /* ignore tiny reverse time jumps on some systems such as BSD/OS 4.1 */ + if (ctxt->now_us < 0 + && ctxt->now_us > -1000) { + ctxt->now = ctxt->start; + ctxt->now_us = 0; + return 1; + } + + dcc_pemsg(EX_OSERR, emsg, + "clock changed an impossible %.6f seconds", + (ctxt->now.tv_sec - ctxt->start.tv_sec) * 1.0 + + ((ctxt->now.tv_usec - ctxt->start.tv_usec)*1.0)/FOREVER_US); + ctxt->now = ctxt->start; + ctxt->now_us = 0; + return 0; +} + + + +static double +get_age(const DCC_CLNT_CTXT *ctxt) +{ + struct timeval now; + + gettimeofday(&now, 0); + return tv_diff2us(&now, &ctxt->start)/(DCC_US*1.0); +} + + + +#ifdef DCC_DEBUG_CLNT_LOCK +void +assert_info_locked(void) +{ + assert_ctxts_locked(); + if (!info_locked) + dcc_logbad(EX_SOFTWARE, "don't have info locked"); +} + + + +void +assert_info_unlocked(void) +{ + if (info_locked) + dcc_logbad(EX_SOFTWARE, "have info locked"); +} +#endif + + +/* Unlock the shared memory for other processes. + * The contexts must be locked */ +u_char /* 0=failed 1=ok */ +dcc_info_unlock(DCC_EMSG emsg) +{ + assert_ctxts_locked(); +#ifdef DCC_DEBUG_CLNT_LOCK + assert_info_locked(); +#else + if (!info_locked) + return 1; +#endif + + info_locked = 0; + return dcc_unlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE, + "info ", dcc_info_nm); +} + + + +/* Lock the shared memory so we can read and perhaps change it + * The contexts must be locked */ +u_char /* 0=failed, 1=ok */ +dcc_info_lock(DCC_EMSG emsg) +{ + assert_ctxts_locked(); + + if (info_locked) { +#ifdef DCC_DEBUG_CLNT_LOCK + dcc_logbad(EX_SOFTWARE, "info already locked"); +#endif + return 1; + } + + if (!dcc_exlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE, 60, + "info ", dcc_info_nm)) + return 0; + + info_locked = 1; + return 1; +} + + + +static u_char +unmap_info(DCC_EMSG emsg) +{ +#ifdef DCC_WIN32 + win32_unmap(&info_map, dcc_clnt_info, dcc_info_nm); +#else + if (0 > munmap((void *)dcc_clnt_info, sizeof(*dcc_clnt_info))) { + dcc_pemsg(EX_OSERR, emsg, "munmap(%s): %s", + dcc_info_nm, ERROR_STR()); + dcc_clnt_info = 0; + return 0; + } +#endif + dcc_clnt_info = 0; + return 1; +} + + + +/* Unmap and close the shared info + * The contexts must be locked but not the info */ +u_char /* 0=something wrong, 1=all over */ +dcc_unmap_close_info(DCC_EMSG emsg) /* cleared of stale messages */ +{ + u_char result = 1; + + assert_ctxts_locked(); + assert_info_unlocked(); + + if (!dcc_clnt_info) + return result; + + if (!unmap_info(emsg)) + result = 0; + + if (0 > close(info_fd)) { + extra_pemsg(EX_IOERR, emsg, "close(%s): %s", + dcc_info_nm, ERROR_STR()); + result = 0; + } + info_fd = -1; + + return result; +} + + + +/* discover our host ID if we do not already know it */ +static u_char +get_clnt_hid(DCC_EMSG emsg) +{ + struct timeval now; + int i; + u_char result; + + if (dcc_clnt_hid != 0) + return 1; + +#ifdef HAVE_GETHOSTID + dcc_clnt_hid = gethostid(); +#endif + /* add the host name even if we have a hostid in case the hostid + * is a commonl used RFC 1918 IP address */ + if (0 > gethostname(dcc_clnt_hostname, sizeof(dcc_clnt_hostname)-1)) { + dcc_pemsg(EX_NOHOST, emsg, "gethostname(): %s", ERROR_STR()); + /* do the best we can without a hostname */ + gettimeofday(&now, 0); + dcc_clnt_hid = now.tv_sec + now.tv_usec; + result = 0; + } else if (dcc_clnt_hostname[0] == '\0') { + dcc_pemsg(EX_NOHOST, emsg, "null hostname from gethostname()"); + /* do the best we can without a hostname */ + gettimeofday(&now, 0); + dcc_clnt_hid = now.tv_sec + now.tv_usec; + result = 0; + } else { + for (i = 0; i < ISZ(dcc_clnt_hostname); ++i) { + if (!dcc_clnt_hostname[i]) + break; + dcc_clnt_hid += dcc_clnt_hostname[i]*i; + } + result = 1; + } + + /* this should almost never happen, but it could */ + if (dcc_clnt_hid == 0) + dcc_clnt_hid = 1; + return result; +} + + + +/* write a new DCC map file */ +u_char +dcc_create_map(DCC_EMSG emsg, + const DCC_PATH map_nm0, + int *pfd, /* leave open & unlocked FD here */ + const DCC_SRVR_NM *dcc_nms, int dcc_nms_len, + const DCC_SRVR_NM *grey_nms, int grey_nms_len, + const DCC_IP *src, + u_char info_flags) /* DCC_INFO_FG_* */ +{ + static int op_nums_r; + DCC_CLNT_INFO info_clear; + int fd; + u_char created; + DCC_PATH map_nm; + int i; + + if (pfd && (fd = *pfd) >= 0) { + created = 0; + } else { + if (!fnm2rel(map_nm, map_nm0, 0)) { + dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"", + map_nm); + return 0; + } + fd = open(map_nm, O_RDWR|O_CREAT|O_EXCL, 0600); + if (fd < 0) { + dcc_pemsg(EX_IOERR, emsg, "open(%s): %s", + map_nm, ERROR_STR()); + return 0; + } + created = 1; + } + + memset(&info_clear, 0, sizeof(info_clear)); + strcpy(info_clear.version, DCC_MAP_INFO_VERSION); + + if (dcc_nms_len != 0) { + if (dcc_nms_len > DCC_MAX_SRVR_NMS) + dcc_nms_len = DCC_MAX_SRVR_NMS; + memcpy(info_clear.dcc.nms, dcc_nms, + sizeof(info_clear.dcc.nms[0])*dcc_nms_len); + } + info_clear.dcc.srvr_inx = NO_SRVR; + + if (grey_nms_len != 0) { + if (grey_nms_len > DCC_MAX_SRVR_NMS) + grey_nms_len = DCC_MAX_SRVR_NMS; + memcpy(info_clear.grey.nms, grey_nms, + sizeof(info_clear.grey.nms[0])*grey_nms_len); + } + info_clear.grey.srvr_inx = NO_SRVR; + + if (src != 0) + info_clear.src = *src; + + info_clear.flags = info_flags; + if (!get_clnt_hid(emsg)) { + close(fd); + if (pfd) + *pfd = -1; + if (created) + unlink(map_nm); + return 0; + } + + /* ensure that we have a new report ID even if we + * are repeatedly recreating a temporary map file */ + if (dcc_clnt_info) + op_nums_r += dcc_clnt_info->proto_hdr.op_nums.r; + info_clear.proto_hdr.op_nums.r = ++op_nums_r; + + i = write(fd, &info_clear, sizeof(info_clear)); + if (i != ISZ(info_clear)) { + if (i < 0) + dcc_pemsg(EX_SOFTWARE, emsg, "write(%s): %s", + map_nm, ERROR_STR()); + else + dcc_pemsg(EX_IOERR, emsg, + "write(%s)=%d instead of %d", + map_nm, i, ISZ(info_clear)); + close(fd); + if (pfd) + *pfd = -1; + if (created) + unlink(map_nm); + return 0; + } + + if (created) { + if (pfd) + *pfd = fd; + else + close(fd); + } + return 1; +} + + + +#ifdef DCC_MAP_INFO_VERSION_10 +/* lock and read the contents of the old info file */ +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_start(DCC_EMSG emsg, + void *old_info, int old_info_size, + const char *old_magic, int old_magic_size, + DCC_PATH new_info_nm) +{ + int i; + + assert_ctxts_locked(); + assert_info_unlocked(); + + /* only one process or thread can fix the file so wait for + * exclusive access to the old file */ + if (!dcc_info_lock(emsg)) + return -1; + + i = read(info_fd, old_info, old_info_size); + if (i != old_info_size) { + if (i < 0) { + dcc_pemsg(EX_IOERR, emsg, "read(%s): %s", + dcc_info_nm, ERROR_STR()); + } else { + dcc_pemsg(EX_IOERR, emsg, "read(%s)=%d instead of %d", + dcc_info_nm, i, old_info_size); + } + dcc_info_unlock(0); + return -1; + } + + if (-1 == lseek(info_fd, SEEK_SET, 0)) { + dcc_pemsg(EX_IOERR, emsg, "lseek(%s): %s", + dcc_info_nm, ERROR_STR()); + dcc_info_unlock(0); + return -1; + } + + if (strncmp(old_info, old_magic, old_magic_size)) { + if (!dcc_info_unlock(emsg)) + return -1; + return 0; + } + + if (!fnm2rel(new_info_nm, dcc_info_nm, "-new")) { + dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"", + dcc_info_nm); + dcc_info_unlock(0); + return -1; + } + unlink(new_info_nm); + return 1; +} + + + +/* the old file must be locked */ +static int /* -1=error, 1=done */ +map_convert_fin(DCC_EMSG emsg, + const DCC_PATH new_info_nm, + const struct stat *old_sb, + const DCC_SRVR_NM *dcc_nms, int dcc_nms_len, + const DCC_SRVR_NM *grey_nms, int grey_nms_len, + const DCC_IP *src, + u_char info_flags) /* DCC_INFO_FG_* */ +{ + int new_fd; +#ifdef DCC_WIN32 + DCC_PATH old_info_nm; +#endif + + new_fd = -1; + if (!dcc_create_map(emsg, new_info_nm, &new_fd, + dcc_nms, dcc_nms_len, grey_nms, grey_nms_len, + src, info_flags)) { + dcc_info_unlock(0); + return -1; + } + +#ifdef DCC_WIN32 + /* there are at least two races here, + * but Windows does not allow renaming or unlinking (e.g. by + * rename()) open files */ + if (!fnm2rel(old_info_nm, dcc_info_nm, "-old")) { + dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"", + dcc_info_nm); + return -1; + } + unlink(old_info_nm); + + if (!dcc_info_unlock(emsg)) { + close(new_fd); + unlink(new_info_nm); + return -1; + } + if (0 > close(info_fd)) { + dcc_pemsg(EX_IOERR, emsg, "close(%s): %s", + dcc_info_nm, ERROR_STR()); + close(new_fd); + unlink(new_info_nm); + return -1; + } + info_fd = -1; + + if (0 > rename(dcc_info_nm, old_info_nm)) { + dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s", + dcc_info_nm, old_info_nm, ERROR_STR()); + close(new_fd); + unlink(new_info_nm); + return -1; + } + + close(new_fd); + if (0 > rename(new_info_nm, dcc_info_nm)) { + dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s", + new_info_nm, dcc_info_nm, ERROR_STR()); + unlink(new_info_nm); + return -1; + } + return 1; +#else /* !DCC_WIN32 */ + /* if we are running as root, + * don't change the owner of the file */ + if (getuid() == 0 + && 0 > fchown(new_fd, old_sb->st_uid, old_sb->st_gid)) { + dcc_pemsg(EX_IOERR, emsg, "chown(%s,%d,%d): %s", + new_info_nm, (int)old_sb->st_uid, (int)old_sb->st_gid, + ERROR_STR()); + unlink(new_info_nm); + close(new_fd); + return -1; + } + + if (0 > rename(new_info_nm, dcc_info_nm)) { + dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s", + new_info_nm, dcc_info_nm, ERROR_STR()); + unlink(new_info_nm); + close(new_fd); + return -1; + } + + if (!dcc_info_unlock(emsg)) { + close(new_fd); + unlink(new_info_nm); + return -1; + } + + close(new_fd); + return 1; +#endif /* DCC_WIN32 */ +} + + +#endif /* DCC_MAP_INFO_VERSION_10 */ +#ifdef DCC_MAP_INFO_VERSION_10 +static void +map_convert_v10_nms(DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS], + const DCC_V10_SRVR_NM old_nms[DCC_MAX_SRVR_NMS]) +{ + int i; + + memset(new_nms, 0, sizeof(DCC_SRVR_NM)*DCC_MAX_SRVR_NMS); + for (i = 0; i < DCC_MAX_SRVR_NMS; ++i) { + new_nms[i].clnt_id = old_nms[i].clnt_id; + new_nms[i].port = old_nms[i].port; + strcpy(new_nms[i].hostname, old_nms[i].hostname); + memcpy(new_nms[i].passwd, old_nms[i].passwd, + sizeof(new_nms[i].passwd)); + new_nms[i].rtt_adj = old_nms[i].rtt_adj; + } +} +#endif /* DCC_MAP_INFO_VERSION_10 */ + + +#ifdef DCC_MAP_INFO_VERSION_5 +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v5(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS]; + DCC_V5_CLNT_INFO old_info; + int i; + + if ((int)old_sb->st_size < ISZ(DCC_V5_CLNT_INFO)) + return 0; + + i = map_convert_start(emsg, &old_info, sizeof(DCC_V5_CLNT_INFO), + DCC_MAP_INFO_VERSION_5, sizeof(old_info.version), + new_info_nm); + if (i <= 0) + return i; + + memset(&new_nms, 0, sizeof(new_nms)); + for (i = 0; i < DIM(new_nms); ++i) { + new_nms[i].clnt_id = old_info.nms[i].clnt_id; + new_nms[i].port = old_info.nms[i].port; + strcpy(new_nms[i].hostname, old_info.nms[i].hostname); + memcpy(new_nms[i].passwd, old_info.nms[i].passwd, + sizeof(new_nms[i].passwd)); + new_nms[i].rtt_adj = old_info.nms[i].rtt_adj*10*1000; + } + + return map_convert_fin(emsg, new_info_nm, old_sb, + new_nms, DIM(new_nms), 0, 0, + 0, old_info.flags); +} +#endif /* DCC_MAP_INFO_VERSION_5 */ + + + +#ifdef DCC_MAP_INFO_VERSION_6 +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v6(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS]; + DCC_V6_CLNT_INFO old_info; + int i; + + if ((int)old_sb->st_size < ISZ(DCC_V6_CLNT_INFO)) + return 0; + + i = map_convert_start(emsg, &old_info, sizeof(DCC_V6_CLNT_INFO), + DCC_MAP_INFO_VERSION_6, sizeof(old_info.version), + new_info_nm); + if (i <= 0) + return i; + + memset(&new_nms, 0, sizeof(new_nms)); + for (i = 0; i < DIM(new_nms); ++i) { + new_nms[i].clnt_id = old_info.nms[i].clnt_id; + new_nms[i].port = old_info.nms[i].port; + strcpy(new_nms[i].hostname, old_info.nms[i].hostname); + memcpy(new_nms[i].passwd, old_info.nms[i].passwd, + sizeof(new_nms[i].passwd)); + new_nms[i].rtt_adj = old_info.nms[i].rtt_adj; + } + + return map_convert_fin(emsg, new_info_nm, old_sb, + new_nms, DIM(new_nms), 0, 0, + 0, old_info.flags); +} +#endif /* DCC_MAP_INFO_VERSION_6 */ + + + +#ifdef DCC_MAP_INFO_VERSION_7 +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v7(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + union { + DCC_V7_IPV6_CLNT_INFO ipv6; + DCC_V7_NOIPV6_CLNT_INFO noipv6; + } old; + DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS]; + DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS]; + int flags, i; + + if (old_sb->st_size == sizeof(old.ipv6)) { + i = map_convert_start(emsg, &old.ipv6, sizeof(old.ipv6), + DCC_MAP_INFO_VERSION_7, + sizeof(old.ipv6.version), + new_info_nm); + if (i <= 0) + return i; + + map_convert_v10_nms(new_nms, old.ipv6.dcc.nms); + map_convert_v10_nms(grey_nms, old.ipv6.grey.nms); + flags = old.ipv6.flags; + + } else if (old_sb->st_size == sizeof(old.noipv6)) { + i = map_convert_start(emsg, &old.noipv6, sizeof(old.noipv6), + DCC_MAP_INFO_VERSION_7, + sizeof(old.noipv6.version), + new_info_nm); + if (i <= 0) + return i; + + map_convert_v10_nms(new_nms, old.noipv6.dcc.nms); + map_convert_v10_nms(grey_nms, old.noipv6.grey.nms); + flags = old.noipv6.flags; + + } else { + return 0; + } + + return map_convert_fin(emsg, new_info_nm, old_sb, + new_nms, DIM(new_nms), grey_nms, DIM(grey_nms), + 0, flags); +} + + + +#endif /* DCC_MAP_INFO_VERSION_7 */ +#ifdef DCC_MAP_INFO_VERSION_8 +/* Convert an old map file. + * The contexts must be locked on entry. + * The old file may be locked on exit */ +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v8(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + DCC_V8_CLNT_INFO old; + DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS]; + DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS]; + int i; + + if ((int)old_sb->st_size != ISZ(old)) + return 0; + + i = map_convert_start(emsg, &old, sizeof(old), + DCC_MAP_INFO_VERSION_8, sizeof(old.version), + new_info_nm); + if (i <= 0) + return i; + + map_convert_v10_nms(new_nms, old.dcc.nms); + map_convert_v10_nms(grey_nms, old.grey.nms); + + return map_convert_fin(emsg, new_info_nm, old_sb, + new_nms, DIM(new_nms), grey_nms, DIM(grey_nms), + 0, old.flags); +} + + + +#endif /* DCC_MAP_INFO_VERSION_8 */ +#ifdef DCC_MAP_INFO_VERSION_9 +/* Convert an old map file. + * The contexts must be locked on entry. + * The old file may be locked on exit */ +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v9(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + DCC_V9_CLNT_INFO old; + DCC_SRVR_NM nms[DCC_MAX_SRVR_NMS]; + DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS]; + int i; + + if ((int)old_sb->st_size != ISZ(old)) + return 0; + + i = map_convert_start(emsg, &old, sizeof(old), + DCC_MAP_INFO_VERSION_9, sizeof(old.version), + new_info_nm); + if (i <= 0) + return i; + + map_convert_v10_nms(nms, old.dcc.nms); + map_convert_v10_nms(grey_nms, old.grey.nms); + + return map_convert_fin(emsg, new_info_nm, old_sb, + nms, DIM(nms), grey_nms, DIM(grey_nms), + &old.src, old.flags); +} + + + +#endif /* DCC_MAP_INFO_VERSION_9 */ +#ifdef DCC_MAP_INFO_VERSION_10 +/* Convert an old map file. + * The contexts must be locked on entry. + * The old file may be locked on exit */ +static int /* -1=error, 0=wrong version, 1=done */ +map_convert_v10(DCC_EMSG emsg, const struct stat *old_sb) +{ + DCC_PATH new_info_nm; + DCC_V10_CLNT_INFO old; + DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS]; + DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS]; + int i; + + if ((int)old_sb->st_size != ISZ(old)) + return 0; + + i = map_convert_start(emsg, &old, sizeof(old), + DCC_MAP_INFO_VERSION_10, sizeof(old.version), + new_info_nm); + if (i <= 0) + return i; + + map_convert_v10_nms(new_nms, old.dcc.nms); + map_convert_v10_nms(grey_nms, old.grey.nms); + + return map_convert_fin(emsg, new_info_nm, old_sb, + new_nms, DIM(new_nms), grey_nms, DIM(grey_nms), + &old.src, old.flags); +} + + + +#endif /* DCC_MAP_INFO_VERSION_10 */ +/* convert from a previous version + * The contexts must be locked. The old file must be open and unlocked */ +static u_char +map_convert(DCC_EMSG emsg, + const struct stat *old_sb) +{ + int i; + + assert_ctxts_locked(); + assert_info_unlocked(); + +#ifdef DCC_MAP_INFO_VERSION_6 + i = map_convert_v5(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; + + i = map_convert_v6(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; +#endif /* DCC_MAP_INFO_VERSION_6 */ +#ifdef DCC_MAP_INFO_VERSION_7 + i = map_convert_v7(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; +#endif /* DCC_MAP_INFO_VERSION_7 */ +#ifdef DCC_MAP_INFO_VERSION_8 + i = map_convert_v8(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; +#endif /* DCC_MAP_INFO_VERSION_8 */ +#ifdef DCC_MAP_INFO_VERSION_9 + i = map_convert_v9(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; +#endif /* DCC_MAP_INFO_VERSION_9 */ +#ifdef DCC_MAP_INFO_VERSION_10 + i = map_convert_v10(emsg, old_sb); + if (i < 0) { + dcc_unmap_close_info(0); + return 0; + } + /* unlock old file and open & lock new file */ + if (i > 0) + return 1; +#endif /* DCC_MAP_INFO_VERSION_10 */ + dcc_pemsg(EX_DATAERR, emsg, "%s is not a DCC map file", + dcc_info_nm); + close(info_fd); + info_fd = -1; + return 0; +} + + + +/* Ensure the shared information is available, but do not lock it. + * The contexts must be locked + * SUID privileges are often released */ +u_char +dcc_map_info(DCC_EMSG emsg, /* cleared of stale messages */ + const char *new_info_nm, int new_info_fd) +{ + struct stat sb; +#ifndef DCC_WIN32 + void *p; +#endif + + for (;;) { + assert_ctxts_locked(); + assert_info_unlocked(); + + /* work only if needed, + * but always check for a version changed */ + if (!(new_info_nm && strcmp(new_info_nm, dcc_info_nm)) + && new_info_fd < 0 + && dcc_clnt_info) + return 1; + + if (!dcc_unmap_close_info(emsg)) { + if (new_info_fd >= 0) + close(new_info_fd); + return 0; + } + + if (new_info_nm) { + if (!fnm2rel(dcc_info_nm, new_info_nm, 0)) { + dcc_pemsg(EX_IOERR, emsg, "bad map name \"%s\"", + new_info_nm); + return 0; + } + /* don't change name if we convert the file + * and so come back here */ + new_info_nm = 0; + } + if (dcc_info_nm[0] == '\0') { + dcc_pemsg(EX_USAGE, emsg, "missing map file name"); + return 0; + } + + if (new_info_fd >= 0) { + info_fd = new_info_fd; + new_info_fd = -1; + } else { + info_fd = open(dcc_info_nm, O_RDWR, 0600); +#ifndef DCC_WIN32 + if (info_fd < 0 + && dcc_get_priv_home(dcc_info_nm)) { + info_fd = open(dcc_info_nm, O_RDWR, 0600); + dcc_rel_priv(); + } +#endif + if (info_fd < 0) { + dcc_pemsg(EX_NOINPUT, emsg, "open(%s): %s", + dcc_info_nm, ERROR_STR()); + return 0; + } + } + + /* refuse to use the file if it is not private */ + if (!dcc_ck_private(emsg, &sb, dcc_info_nm, info_fd)) { + dcc_unmap_close_info(0); + return 0; + } + + if ((int)sb.st_size != ISZ(*dcc_clnt_info)) { + if (map_convert(emsg, &sb)) + continue; + return 0; + } + +#ifdef DCC_WIN32 + dcc_clnt_info = win32_map(emsg, &info_map, dcc_info_nm, + info_fd, sizeof(*dcc_clnt_info)); + if (!dcc_clnt_info) { + close(info_fd); + info_fd = -1; + return 0; + } +#else + + /* don't give it to children */ + if (0 > fcntl(info_fd, F_SETFD, FD_CLOEXEC)) { + dcc_pemsg(EX_IOERR, emsg, + "fcntl(F_SETFD FD_CLOEXEC %s): %s", + dcc_info_nm, ERROR_STR()); + close(info_fd); + info_fd = -1; + return 0; + } + + p = mmap(0, sizeof(*dcc_clnt_info), + PROT_READ|PROT_WRITE, MAP_SHARED, info_fd, 0); + if (p == MAP_FAILED) { + dcc_pemsg(EX_IOERR, emsg, "mmap(%s): %s", + dcc_info_nm, ERROR_STR()); + close(info_fd); + info_fd = -1; + return 0; + } + dcc_clnt_info = p; +#endif /* DCC_WIN32 */ + + if (!strncmp(dcc_clnt_info->version, DCC_MAP_INFO_VERSION, + sizeof(dcc_clnt_info->version))) { + /* The file is the right version. Set our ID in case + * it has been copied from another system */ + if (!get_clnt_hid(emsg)) + return 0; + dcc_clnt_info->proto_hdr.op_nums.h = dcc_clnt_hid; + + return 1; + } + + unmap_info(0); + if (!map_convert(emsg, &sb)) + return 0; + } +} + + + +/* SUID privileges are often released */ +u_char /* 0=something wrong, 1=mapped */ +dcc_map_lock_info(DCC_EMSG emsg, /* cleared of stale messages */ + const char *new_info_nm, + int new_info_fd) +{ + if (!dcc_map_info(emsg, new_info_nm, new_info_fd)) + return 0; + + if (!dcc_info_lock(emsg)) + return 0; + + return 1; +} + + + +/* All servers are broken, so make a note to not try for a while. + * The contexts and the mapped information must be locked */ +static void +fail_more(const DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class) +{ + assert_info_locked(); + + /* do not inflate the delay if we are already delaying */ + if (class->fail_exp != 0 + && class->fail_time >= ctxt->now.tv_sec) + return; + + /* reset the backoff after a long quiet time */ + if (ctxt->now.tv_sec >= (class->fail_time + + (DCC_INIT_FAIL_SECS << class->fail_exp))) + class->fail_exp = 0; + + if (++class->fail_exp > DCC_MAX_FAIL_EXP) + class->fail_exp = DCC_MAX_FAIL_EXP; + class->fail_time = (ctxt->now.tv_sec + + (DCC_INIT_FAIL_SECS << class->fail_exp)); +} + + + +static u_char /* 0=failing */ +ck_fail_time(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class) +{ + int dt; + + assert_info_locked(); + + if (class->fail_exp == 0) + return 1; + + dt = class->fail_time - ctxt->now.tv_sec; + if (dt > 0 && dt <= DCC_MAX_FAIL_SECS) { + dcc_pemsg(EX_IOERR, emsg, + "continue not asking %s %d seconds after failure", + DCC_IS_GREY_STR(class), dt); + return 0; + } + + return 1; +} + + + +void +dcc_force_measure_rtt(DCC_SRVR_CLASS *class) +{ + assert_info_locked(); + + class->fail_exp = 0; /* stop giving up */ + + class->resolve = 0; /* force name resolution */ + + class->srvr_inx = NO_SRVR; + + class->measure = 0; /* force RTT measurement */ +} + + + +/* pick the best server + * The client information and the contexts must be exclusively locked. + * Assume there is at least one hostname. */ +static u_char /* 0=have none, 1=found one */ +pick_srvr(DCC_EMSG emsg, DCC_SRVR_CLASS *class) +{ + const DCC_SRVR_ADDR *ap, *min_ap; + int rtt; + int min_rtt; /* smallest RTT */ + int min2_rtt; /* second smallest RTT */ + SRVR_INX old_srvr_inx; + + assert_info_locked(); + + if (class->num_srvrs == 0) { + class->srvr_inx = NO_SRVR; + extra_pemsg(EX_USAGE, emsg, "no valid %s server IP addresses", + DCC_IS_GREY_STR(class)); + return 0; + } + + old_srvr_inx = class->srvr_inx; + min2_rtt = min_rtt = DCC_RTT_BAD; + min_ap = 0; + ap = &class->addrs[class->num_srvrs]; + while (ap > class->addrs) { + --ap; + rtt = effective_rtt(class, ap); + if (rtt == DCC_RTT_BAD) + continue; + + if (min_rtt > rtt) { + if (min2_rtt > min_rtt) + min2_rtt = min_rtt; + min_rtt = rtt; + min_ap = ap; + } else if (min2_rtt > rtt) { + min2_rtt = rtt; + } + } + + /* we found a usable server */ + if (min_ap) { + /* Compute the basic RTT to the server including a variance */ + class->base_rtt = min_rtt + DCC_DCCD_DELAY; + if (class->base_rtt > DCC_MAX_RTT) + class->base_rtt = DCC_MAX_RTT; + /* Decide how bad the server must get before we check for + * an alternative. + * If there is no good second choice, there is no point in a + * threshold for switching to it */ + class->thold_rtt = min2_rtt + DCC_DCCD_DELAY; + if (class->thold_rtt >= DCC_MAX_RTT) + class->thold_rtt = DCC_RTT_BAD; + + class->srvr_inx = (min_ap - class->addrs); + if (class->srvr_inx != old_srvr_inx) { + if (dcc_clnt_debug > 1 && + GOOD_SRVR(class, old_srvr_inx)) { + trace_perf("replacing", + &class->addrs[old_srvr_inx]); + trace_perf("pick", min_ap); + } + } + return 1; + } + + /* we failed to find a working server */ + class->srvr_inx = NO_SRVR; + + flush_emsg(emsg, dcc_clnt_debug); + if (!emsg || *emsg == '\0') { + char astr[(DCC_SU2STR_SIZE+1+5+1)*3]; + const char *s, *h0; + int l; + + l = dcc_ap2str_opt(astr, sizeof(astr), + class, 0, '\0'); + if (!strcmp(class->nms[0].hostname, astr)) { + h0 = ""; + s = ""; + } else { + h0 = class->nms[0].hostname; + s = class->nms[1].hostname[0] ? "s" : " "; + } + + if (class->num_srvrs > 1 && l < ISZ(astr)-2) { + astr[l++] = ' '; + l += dcc_ap2str_opt(&astr[l], sizeof(astr)-l, + class, 1, '\0'); + } + if (class->num_srvrs > 2 && l < ISZ(astr)-2) { + astr[l++] = ' '; + dcc_ap2str_opt(&astr[l], sizeof(astr)-l, + class, 1, '\0'); + } + dcc_pemsg(EX_IOERR, emsg, + "no working %s server%s%s%s%s%s%s%s" + " at %s%s", + DCC_IS_GREY_STR(class), + s, h0, + class->nms[1].hostname[0] ? " " : "", + class->nms[1].hostname, + class->nms[2].hostname[0] ? " " : "", + class->nms[2].hostname, + class->nms[3].hostname[0] ? " ..." : "", + + astr, + class->num_srvrs > 3 ? " ..." : ""); + } + return 0; +} + + + +/* count IP addresses per host name and per second level domain name */ +typedef struct name_addrs { + const char *sld; /* domain name */ + u_char sld_addrs; /* # of addresses for domain name */ + u_char host_addrs; /* # of addresses for a host name */ + u_char sld_addrs_inx; +} NAME_ADDRS[DCC_MAX_SRVR_NMS]; + + +/* delete an address from a growing list of addresses */ +static void +del_new_addr(DCC_SRVR_CLASS *class, + NAME_ADDRS name_addrs, /* addresses per server name */ + int tgt) /* delete this address */ +{ + NAM_INX nam_inx; + int i; + + /* adjust that host's and domain's numbers of addresses and our + * total number of addresses */ + nam_inx = class->addrs[tgt].nam_inx; + --name_addrs[nam_inx].host_addrs; + --name_addrs[name_addrs[nam_inx].sld_addrs_inx].sld_addrs; + --class->num_srvrs; + + /* slide the array of addresses to get rid of the discarded address */ + i = class->num_srvrs - tgt; + if (i > 0) + memmove(&class->addrs[tgt], &class->addrs[tgt+1], + i * sizeof(class->addrs[0])); + memset(&class->addrs[class->num_srvrs], 0, sizeof(class->addrs[0])); +} + + + +/* impose arbitrary local order on IP addresses */ +#define DCC_SRVRS_MOD 16381 + +static inline u_int +su_srvrs_mod(const DCC_SOCKU *sup, + DCC_SOCKU *sup2) +{ + u_int su_res; + + if (dcc_ipv6sutoipv4(sup2, sup)) { + su_res = sup2->ipv4.sin_addr.s_addr % DCC_SRVRS_MOD; + su_res *= dcc_clnt_info->residue; + su_res %= DCC_SRVRS_MOD; + su_res += DCC_SRVRS_MOD; /* distinguish IPv4 from IPv6 */ + } else { + *sup2 = *sup; + su_res = (sup->ipv6.sin6_addr.s6_addr32[0] % DCC_SRVRS_MOD + + sup->ipv6.sin6_addr.s6_addr32[1] % DCC_SRVRS_MOD + + sup->ipv6.sin6_addr.s6_addr32[2] % DCC_SRVRS_MOD + + sup->ipv6.sin6_addr.s6_addr32[3] % DCC_SRVRS_MOD); + su_res *= dcc_clnt_info->residue; + su_res %= DCC_SRVRS_MOD; + } + return su_res; +} + + + +/* partially order a pair of IP addresses with a reasonably unique ordering */ +static int +sucmp(const DCC_SOCKU *sup1, const DCC_SOCKU *sup2) +{ + DCC_SOCKU su1, su2; + u_int su1_res, su2_res; + int i; + + su1_res = su_srvrs_mod(sup1, &su1); + su2_res = su_srvrs_mod(sup2, &su2); + + i = (int)su1_res - (int)su2_res; + if (i) + return i; + return memcmp(&su1, &su2, sizeof(DCC_SOCKU)); +} + + + +/* Deal with a list of IP addresses or aliases for one DCC server hostname. + * the contexts and the mmap()'ed info must be locked */ +static void +copy_addrs(DCC_SRVR_CLASS *class, + const DCC_SRVR_NM *nmp, /* server name being resolved */ + const int nam_inx, + NAME_ADDRS name_addrs) /* addresses per server name */ +{ + DCC_SRVR_ADDR *ap; + const DCC_SRVR_NM *nmp2; + DCC_SOCKU *np, su, nxt, prev; + u_int16_t port; + int i, j, k; + + /* Keep as many IP addresses as we have room, but for as many + * named servers as possible + * Sort the addresses to keep our list stable when we re-check. + * Otherwise, we would start from scratch when nothing changes + * but the order of responses from a DNS server. + * Sort by residue class to pick a random subset when there + * are too many servers to fit in our list. */ + + port = nmp->port; + + nxt.sa.sa_family = AF_UNSPEC; + for (;;) { + /* Pick the next address in the newly resolved list + * to consider. We want the smallest address larger + * than the previous address we considered. + * "Smallest" is defined using the local random ordering + * of addresses. */ + prev = nxt; + nxt.sa.sa_family = AF_UNSPEC; + for (np = dcc_hostaddrs; np < dcc_hostaddrs_end; ++np) { + if (np->sa.sa_family == AF_UNSPEC) + continue; + su = *np; + *DCC_SU_PORTP(&su) = port; + if ((prev.sa.sa_family == AF_UNSPEC + || sucmp(&su, &prev) > 0) + && (nxt.sa.sa_family == AF_UNSPEC + || sucmp(&nxt, &su) > 0)) + nxt = su; + } + /* quit if we've considered them all */ + if (nxt.sa.sa_family == AF_UNSPEC) + break; + + /* ignore duplicate IP addresses even for other hostnames, + * unless the port numbers differ */ + ap = &class->addrs[class->num_srvrs]; + while (--ap >= class->addrs) { + if (!dcc_cmp_ap2su(ap, &nxt)) { + /* they are the same, so keep the one with + * the non-anonymous ID + * or smallest RTT adjustment */ + nmp2 = &class->nms[ap->nam_inx]; + i = (nmp->clnt_id == DCC_ID_ANON); + j = (nmp2->clnt_id == DCC_ID_ANON); + if (i != j) { + /* one is anonymous & other is not */ + if (i) + goto next_addr; + } else { + /* pick smallest RTT adjustment */ + if (nmp->rtt_adj >= nmp2->rtt_adj) + goto next_addr; + } + /* delete the previous instance */ + del_new_addr(class, name_addrs, + ap - class->addrs); + break; + } + } + + /* If we already have as many addresses as we will use, + * then pick one to discard. Discard the last address of + * the host in the second level domain with the most + * addresses but without eliminating all addresses for any + * host name. Look for the domain with the most IP addresses + * and that has at least one host with at least two + * addersses. */ + if (class->num_srvrs == DCC_MAX_SRVR_ADDRS) { + int host_max, sld_max; + NAM_INX nam1_inx, sld1_inx, sld2_inx; + + host_max = -1; + sld_max = -1; + nam1_inx = NO_NAM; + sld1_inx = NO_NAM; + for (i = 0; i <= nam_inx; i++) { + /* ignore hosts with only 1 IP address */ + j = name_addrs[i].host_addrs; + if (j <= 1) + continue; + sld2_inx = name_addrs[i].sld_addrs_inx; + k = name_addrs[sld2_inx].sld_addrs; + if (sld_max <= k) { + if (sld1_inx != sld2_inx) { + sld_max = k; + sld1_inx = sld2_inx; + host_max = j; + nam1_inx = i; + } else if (host_max <= j) { + host_max = j; + nam1_inx = i; + } + } + } + /* no additional IP addresses for the target host if + * it has the most IP addresses */ + if (nam1_inx == nam_inx) + return; + + /* find the last address of the host with the most */ + for (i = 0, j = 0; i < class->num_srvrs; i++) { + if (class->addrs[i].nam_inx == nam1_inx) + j = i; + } + /* and delete it */ + del_new_addr(class, name_addrs, j); + } + + /* install the new address in the growing list */ + ap = &class->addrs[class->num_srvrs]; + ap->rtt = DCC_RTT_BAD; + if (nxt.sa.sa_family == AF_INET && DCC_INFO_IPV6()) + dcc_ipv4sutoipv6(&nxt, &nxt); + else if (nxt.sa.sa_family == AF_INET6 && !DCC_INFO_IPV6()) + dcc_ipv6sutoipv4(&nxt, &nxt); + dcc_su2ip(&ap->ip, &nxt); + + /* If this is a previously known address, + * preserve what we already knew about it + * Check the address family separately because dcc_cmp_ap2su() + * does not and DCC_INFO_IPV6() might have changed. */ + for (i = 0; i < class->num_srvrs; ++i) { + if (class->addrs[i].ip.family == nxt.sa.sa_family + && !dcc_cmp_ap2su(&class->addrs[i], &nxt)) { + *ap = class->addrs[i]; + break; + } + } + ap->nam_inx = nam_inx; + ++class->num_srvrs; + + ++name_addrs[nam_inx].host_addrs; + ++name_addrs[name_addrs[nam_inx].sld_addrs_inx].sld_addrs; +next_addr:; + } +} + + + +/* resolve one server name into a scratch array of addresses */ +static void +resolve_nm(DCC_EMSG emsg, + DCC_SRVR_CLASS *class, + int nm_inx, /* name being resolved */ + NAME_ADDRS name_addrs) /* addresses per server name */ +{ + DCC_SRVR_NM *nmp; + const char *domain, *p1, *p2; + int error; + u_char result; + int i; + + nmp = &class->nms[nm_inx]; + nmp->defined = 0; + if (nmp->hostname[0] == '\0') + return; + + if (nmp->rtt_adj > DCC_RTT_ADJ_MAX) + nmp->rtt_adj = DCC_RTT_ADJ_MAX; + else if (nmp->rtt_adj < -DCC_RTT_ADJ_MAX) + nmp->rtt_adj = -DCC_RTT_ADJ_MAX; + + /* find the total number of addresses for this domain name */ + domain = nmp->hostname; + p1 = strchr(domain, '.'); + if (p1) { + for (;;) { + p2 = strchr(++p1, '.'); + if (!p2) + break; + domain = p1; + p1 = p2; + } + } + name_addrs[nm_inx].sld = domain; + for (i = 0; i < nm_inx; ++i) { + if (name_addrs[i].sld != 0 + && !strcmp(domain, name_addrs[i].sld)) + break; + } + name_addrs[nm_inx].sld_addrs_inx = i; + + dcc_host_lock(); + if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS) + result = dcc_get_host_SOCKS(nmp->hostname, + DCC_INFO_IPV6() ? 2 : 0, &error); + else + result = dcc_get_host(nmp->hostname, + DCC_INFO_IPV6() ? 2 : 0, &error); + if (!result) { + dcc_pemsg(EX_NOHOST, emsg, "%s: %s", + nmp->hostname, DCC_HSTRERROR(error)); + dcc_host_unlock(); + return; + } + nmp->defined = 1; + copy_addrs(class, nmp, nm_inx, name_addrs); + dcc_host_unlock(); +} + + + +/* resolve server hostnames again + * both locks must be held on entry + * both will be released while working + * on success, both are held + * on failure only the contexts are locked + */ +static u_char /* 0=no good addresses, 1=at least 1 */ +resolve_nms(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *cur) +{ + DCC_SRVR_CLASS new; + int nm_inx, a_inx; + NAME_ADDRS name_addrs; + DCC_SRVR_ADDR *new_ap, *cur_ap; + + assert_info_locked(); + + if (dcc_clnt_debug > 1) + dcc_trace_msg("resolve %s server host names", + DCC_IS_GREY_STR(cur)); + + /* try not to resolve names too often + * and discourage other processes and threads from resolving + * or measuring RTTs until we finish */ + cur->resolve = ctxt->now.tv_sec+DCC_MAP_RESOLVE; + cur->measure = ctxt->now.tv_sec+FAST_RTT_SECS; + + if (cur->nms[0].hostname[0] == '\0') { + if (HAVE_SRVR(cur)) { + ++cur->gen; + cur->avg_thold_rtt = DCC_RTT_BAD; + cur->srvr_inx = NO_SRVR; + } + cur->num_srvrs = 0; + memset(cur->addrs, 0, sizeof(cur->addrs)); + extra_pemsg(EX_USAGE, emsg, "no %s server hostnames", + DCC_IS_GREY_STR(cur)); + dcc_info_unlock(0); + return 0; + } + + new = *cur; + memset(new.addrs, 0, sizeof(new.addrs)); + new.num_srvrs = 0; + memset(&name_addrs, 0, sizeof(name_addrs)); + + if (dcc_clnt_info->residue == 0) { + dcc_clnt_info->residue = dcc_clnt_hid % DCC_SRVRS_MOD; + if (dcc_clnt_info->residue == 0) + dcc_clnt_info->residue = 1; + } + + /* unlock everything while we wait for DNS */ + if (!dcc_info_unlock(emsg)) { + cur->resolve = 0; + return 0; + } + dcc_ctxts_unlock(); + if (emsg) + *emsg = '\0'; + for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx) + resolve_nm(emsg, &new, nm_inx, name_addrs); + dcc_ctxts_lock(); + if (!dcc_info_lock(emsg)) { + cur->resolve = 0; + return 0; + } + + /* measure all RTTs at least as often as we resolve names */ + cur->measure = 0; + + /* if we fail to resolve even one server host names, + * complain but try to continue using the old IP addresses */ + a_inx = new.num_srvrs; + if (a_inx == 0) { + extra_pemsg(EX_USAGE, emsg, "no valid %s server hostnames", + DCC_IS_GREY_STR(cur)); + dcc_info_unlock(0); + return 0; + } + + /* see if anything changed */ + for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx) { + if (cur->nms[nm_inx].defined != new.nms[nm_inx].defined) + break; + } + if (nm_inx >= DIM(cur->nms) + && a_inx == cur->num_srvrs) { + /* we have the same number of old and new names and addresses, + * so compare the old and new addresses */ + new_ap = new.addrs; + cur_ap = cur->addrs; + for (;;) { + if (new_ap->nam_inx != cur_ap->nam_inx + || memcmp(&new_ap->ip, &cur_ap->ip, + sizeof(new_ap->ip))) { + break; + } + ++new_ap; + ++cur_ap; + if (!--a_inx) + return 1; /* nothing changed */ + } + } + + /* Something changed, so we must compute RTTs */ + ++cur->gen; + cur->srvr_inx = NO_SRVR; + cur->avg_thold_rtt = -DCC_RTT_BAD; + memcpy(&cur->addrs, &new.addrs, sizeof(cur->addrs)); + cur->num_srvrs = new.num_srvrs; + for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx) + cur->nms[nm_inx].defined = new.nms[nm_inx].defined; + + return 1; +} + + + +void +dcc_clnt_soc_close(DCC_CLNT_CTXT *ctxt) +{ + if (ctxt->soc == INVALID_SOCKET) + return; + if (SOCKET_ERROR == closesocket(ctxt->soc) + && dcc_clnt_debug) + dcc_trace_msg("closesocket(ctxt): %s", ERROR_STR()); + ctxt->soc = INVALID_SOCKET; + ctxt->conn_su.sa.sa_family = AF_UNSPEC; +} + + + +/* disconnect (or close) and (re)open the client + * The contexts and shared information must be locked on entry + * and both are locked on exit */ +u_char /* 0=failed to open the socket */ +dcc_clnt_soc_reopen(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt) +{ + DCC_SOCKU su; + DCC_SOCKLEN_T soc_len; + int retries; + + assert_info_locked(); + + if (ctxt->soc != INVALID_SOCKET) + dcc_clnt_soc_close(ctxt); + + /* try to bind to the specified local interface address + * if it has changed + * or if it has been some time since we last tried and failed. */ + if (dcc_clnt_info->src.family == AF_UNSPEC) { + ctxt->flags &= ~DCC_CTXT_SRCBAD; + } else if (!(ctxt->flags & DCC_CTXT_SRCBAD) + || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time, + DCC_CTXT_REBIND_SECS)) { + dcc_mk_su(&su, dcc_clnt_info->src.family, + &dcc_clnt_info->src.u, 0); + *DCC_SU_PORTP(&su) = DCC_SU_PORT(&ctxt->bind_su); + retries = -1; + if (0 >= dcc_udp_bind(emsg, &ctxt->soc, &su, &retries)) { + ctxt->flags |= DCC_CTXT_SRCBAD; + ctxt->bind_time = (ctxt->start.tv_sec + + DCC_CTXT_REBIND_SECS); + return 0; + } + ctxt->flags &= ~DCC_CTXT_SRCBAD; + ctxt->bind_time = 0; + + /* we have a bound socket */ + } + + /* If we do not have a bound socket, + * try to bind a new socket with IPv6 first if allowed */ + if (ctxt->soc == INVALID_SOCKET && DCC_INFO_IPV6()) { + dcc_mk_su(&su, AF_INET6, 0, DCC_SU_PORT(&ctxt->bind_su)); + retries = -1; + if (!dcc_udp_bind(emsg, &ctxt->soc, &su, &retries)) + return 0; + } + + /* if we still do not have a socket, try IPv4 */ + if (ctxt->soc == INVALID_SOCKET) { + dcc_clnt_info->flags &= ~DCC_INFO_FG_IPV6; + dcc_mk_su(&su, AF_INET, 0, DCC_SU_PORT(&ctxt->bind_su)); + retries = -1; + if (!dcc_udp_bind(emsg, &ctxt->soc, &su, &retries)) + return 0; + } + +#if !defined(USE_POLL) && !defined(DCC_WIN32) + if (ctxt->soc >= FD_SETSIZE) { + dcc_info_unlock(0); + dcc_pemsg(EX_IOERR, emsg, "socket FD %d > FD_SETSIZE %d", + ctxt->soc, FD_SETSIZE); + dcc_clnt_soc_close(ctxt); + return 0; + } +#endif + +#if defined(IPPROTO_IP) && defined(IP_TTL) + if (dcc_debug_ttl != 0 + && 0 > setsockopt(ctxt->soc, IPPROTO_IP, IP_TTL, + (void *)&dcc_debug_ttl, sizeof(dcc_debug_ttl))) { + dcc_pemsg(EX_IOERR, emsg, "setsockopt(TTL=%d):%s", + dcc_debug_ttl, ERROR_STR()); + dcc_clnt_soc_close(ctxt); + return 0; + } +#endif + + soc_len = sizeof(ctxt->bind_su); + if (0 > getsockname(ctxt->soc, &ctxt->bind_su.sa, &soc_len)) { + dcc_pemsg(EX_IOERR, emsg, "getsockname(): %s", ERROR_STR()); + dcc_clnt_soc_close(ctxt); + return 0; + } + if (su.sa.sa_family == AF_INET) + ctxt->flags |= DCC_CTXT_USING_IPV4; + else + ctxt->flags &= ~DCC_CTXT_USING_IPV4; + return 1; +} + + + +static int +do_recv(DCC_CLNT_CTXT *ctxt, DCC_OP_RESP *resp, int resp_len, DCC_SOCKU *sup) +{ + DCC_SOCKLEN_T su_len; + + su_len = sizeof(*sup); + memset(sup, 0, sizeof(*sup)); + if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS) + return Rrecvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0, + &sup->sa, &su_len); + else + return recvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0, + &sup->sa, &su_len); +} + + + +static void +clear_error(DCC_CLNT_CTXT *ctxt, const char *which) +{ + int err; + DCC_SOCKLEN_T errlen; + + errlen = sizeof(err); + if (0 > getsockopt(ctxt->soc, SOL_SOCKET, SO_ERROR, + WIN32_SOC_CAST &err, &errlen)) { + dcc_trace_msg("getsockopt(SO_ERROR): %s", ERROR_STR()); + } else if (dcc_clnt_debug > 3 && err) { + dcc_trace_msg("%s SO_ERROR: %s", which, ERROR_STR1(err)); + } +} + + + +/* clear the socket buffer */ +static u_char +dcc_clnt_soc_flush(DCC_CLNT_CTXT *ctxt) +{ + DCC_OP_RESP pkt; + DCC_SOCKU su; + char sbuf[DCC_SU2STR_SIZE]; + char rbuf[30]; + char ob[DCC_OPBUF]; + int pkt_len, pkt_num; + + for (pkt_num = 1; pkt_num <= 50; ++pkt_num) { + pkt_len = do_recv(ctxt, &pkt, sizeof(pkt), &su); + if (0 <= pkt_len) { + if (dcc_clnt_debug == 0 && pkt_num < 10) + continue; + dcc_su2str(sbuf, sizeof(sbuf), &su); + if (pkt_num > 1) + snprintf(rbuf, sizeof(rbuf), " #%d", pkt_num); + else + rbuf[0] = '\0'; + if (pkt_len < ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE) + || pkt_len != ntohs(pkt.hdr.len) + || pkt.hdr.pkt_vers < DCC_PKT_VERSION_MIN + || pkt.hdr.pkt_vers > DCC_PKT_VERSION_MAX) { + trace_bad_packet(0, &su, &pkt, pkt_len, + "flush%s %d stray bytes from" + " %s", + rbuf, pkt_len, sbuf); + } else { + dcc_trace_msg("flush%s %s from %s" + " ID=%d h=%#x p=%#x r=%#x t=%#x", + rbuf, + dcc_hdr_op2str(ob, sizeof(ob), + &pkt.hdr), + sbuf, + ntohl(pkt.hdr.sender), + pkt.hdr.op_nums.h, + pkt.hdr.op_nums.p, + pkt.hdr.op_nums.r, + pkt.hdr.op_nums.t); + + } + continue; + } + if (DCC_BLOCK_ERROR()) + return 1; + if (UNREACHABLE_ERRORS()) { + if (dcc_clnt_debug > 1 || pkt_num > 10) + dcc_trace_msg("ignore flushed error: %s", + ERROR_STR()); + continue; + } + dcc_trace_msg("flush recvfrom(%s): %s", + su.sa.sa_family + ? dcc_su2str(sbuf, sizeof(sbuf), &su) : "", + ERROR_STR()); + return 0; + } + + dcc_trace_msg("too many flushed packets or errors"); + return 0; +} + + + +/* connect() to the server + * The contexts and shared information must be locked on entry + * They are locked on exit */ +u_char +dcc_clnt_connect(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, + const DCC_SOCKU *su) /* 0=disconnect */ +{ + u_char was_connected; + + assert_info_locked(); + + /* disconnect if asked + * In theory you can use connect() with a "null address." + * In practice on some systems there is more than one or even + * no notion of an effective "null" address. */ + if (!su) { + if (ctxt->conn_su.sa.sa_family == AF_UNSPEC) { +#ifdef linux + /* some flavors of Linux say "Connection refused" on + * sendto() on a not-connected socket when a previous + * use of sendto() hit a closed port, particularly + * via loopback */ + clear_error(ctxt, "Linux dcc_clnt_connect(0)"); +#endif + return 1; + } + return dcc_clnt_soc_reopen(emsg, ctxt); + } + + /* already properly connected */ + if (!memcmp(&ctxt->conn_su, su, sizeof(ctxt->conn_su))) + return 1; + + was_connected = (ctxt->conn_su.sa.sa_family != AF_UNSPEC); + + /* At least some versions of Linux do not allow connsecutive valid + * calls to connect(). So for Linux, always close and reopen + * the socket. + * At least some versions of FreeBSD unbind a socket while + * reconnecting it. So if the socket was bound to local address + * or if it is time to try to bind it again, close and reopen it. + */ + if ((was_connected +#ifndef linux + && (dcc_clnt_info->src.family != AF_UNSPEC + && (!(ctxt->flags & DCC_CTXT_SRCBAD) + || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time, + DCC_CTXT_REBIND_SECS))) +#endif /* linux */ + ) || !dcc_clnt_soc_flush(ctxt)) { + if (!dcc_clnt_soc_reopen(emsg, ctxt)) + return 0; + } + + if (SOCKET_ERROR == connect(ctxt->soc, &su->sa, DCC_SU_LEN(su))) { + char sustr[DCC_SU2STR_SIZE]; + + dcc_pemsg(EX_IOERR, emsg, "connect(%s): %s", + dcc_su2str(sustr, sizeof(sustr), su), + ERROR_STR()); + dcc_clnt_soc_close(ctxt); + return 0; + } + ctxt->conn_su = *su; + + /* clear ICMP Unreachable errors from previous connections */ + if (was_connected) + clear_error(ctxt, "dcc_clnt_connect"); + + return 1; +} + + + +/* send a single DCC message + * the contexts and the shared information must be locked on entry + * nothing is unlocked */ +static int /* 0=failed this target, -1=all stop */ +clnt_xmit(DCC_CLNT_CTXT *ctxt, + DCC_SRVR_CLASS *class, const DCC_SRVR_ADDR *ap, + DCC_HDR *msg, int msg_len, u_char connect_ok) +{ + DCC_XLOG_ENTRY *xloge; +# define FSTR " from " + char tgt_abuf[80], src_abuf[LITZ(FSTR)+INET6_ADDRSTRLEN+1]; + char ob[DCC_OPBUF]; + DCC_XLOG_ENTRY *xloge1; + int i, result; + + msg->len = htons(msg_len); + + xloge = ctxt->xlog.next; + if (xloge > ctxt->xlog.last) + dcc_logbad(EX_SOFTWARE, "xloge > ctxt->xlog.last"); + ++msg->op_nums.t; + xloge->op_nums = msg->op_nums; + xloge->addr_inx = ap - class->addrs; + dcc_mk_su(&xloge->su, ap->ip.family, &ap->ip.u, ap->ip.port); + xloge->addrs_gen = class->gen; + xloge->sent_us = ctxt->now_us; + xloge->op = msg->op; + if (!GOOD_NAM(ap->nam_inx)) + dcc_logbad(EX_SOFTWARE, "clnt_xmit: bad nam_inx"); + xloge->id = class->nms[ap->nam_inx].clnt_id; + msg->sender = htonl(xloge->id); + if (xloge->id == DCC_ID_ANON) { + xloge->passwd[0] = '\0'; + memset((char *)msg + (msg_len-sizeof(DCC_SIGNATURE)), 0, + sizeof(DCC_SIGNATURE)); + } else { + if (xloge->id == 0) { + if (dcc_clnt_debug) + dcc_logbad(EX_SOFTWARE, "zero client-ID for %s", + class->nms[ap->nam_inx].hostname); + else + dcc_trace_msg("zero client-ID for %s", + class->nms[ap->nam_inx].hostname); + class->nms[ap->nam_inx].clnt_id = DCC_ID_ANON; + } else if (class->nms[ap->nam_inx].passwd[0] == '\0') { + if (dcc_clnt_debug) + dcc_logbad(EX_SOFTWARE, "null password for %s", + class->nms[ap->nam_inx].hostname); + else + dcc_trace_msg("null password for %s", + class->nms[ap->nam_inx].hostname); + class->nms[ap->nam_inx].clnt_id = DCC_ID_ANON; + } + strncpy(xloge->passwd, class->nms[ap->nam_inx].passwd, + sizeof(xloge->passwd)); + dcc_sign(xloge->passwd, sizeof(xloge->passwd), msg, msg_len); + } + + /* Use connect() when possible to get ICMP Unreachable messages. + * It is impossible when talking to more than one server. */ + for (xloge1 = ctxt->xlog.base; connect_ok && xloge1 < xloge; ++xloge1) { + if (xloge1->op_nums.t == DCC_OP_NUMS_NULL) + continue; + if (xloge->addr_inx != xloge1->addr_inx) { + connect_ok = 0; + break; + } + } + if (!dcc_clnt_connect(0, ctxt, connect_ok ? &xloge->su : 0)) + return -1; + + if (ctxt->conn_su.sa.sa_family != AF_UNSPEC) { + if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS) + i = Rsend(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0); + else + i = send(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0); + + } else { + if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS) + i = Rsendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0, + &xloge->su.sa, DCC_SU_LEN(&xloge->su)); + else + i = sendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0, + &xloge->su.sa, DCC_SU_LEN(&xloge->su)); + } + ++ctxt->xlog.cur[ap - class->addrs].xmits; + if (i == msg_len) { + if (dcc_clnt_debug > 3) + dcc_trace_msg("%8.6f sent %s t=%#x to %s", + get_age(ctxt), + dcc_hdr_op2str(ob, sizeof(ob), msg), + xloge->op_nums.t, + addr2str(tgt_abuf, sizeof(tgt_abuf), + class, class->gen, ap, 0)); + ++ctxt->xlog.next; + ++ctxt->xlog.outstanding; + return 1; + } + + /* stop output only for this target after ICMP Unreachable + * otherwise stop all output */ + if (i >= 0) { + result = -1; /* bad length is fatal */ + } else { + result = UNREACHABLE_ERRORS() ? 0 : -1; + } + + if (result < 0 || dcc_clnt_debug) { + if (ctxt->bind_su.sa.sa_family == AF_UNSPEC) { + src_abuf[0] = '\0'; + } else { + memcpy(src_abuf, FSTR, LITZ(FSTR)); + dcc_su2str(&src_abuf[LITZ(FSTR)], + sizeof(src_abuf)-LITZ(FSTR), + &ctxt->bind_su); + } + if (i < 0) { + dcc_trace_msg("%s(%s)%s: %s", + connect_ok ? "send" : "sendto", + addr2str(tgt_abuf, sizeof(tgt_abuf), + class, + class->gen, ap, 0), + src_abuf, ERROR_STR()); + } else { + dcc_trace_msg("%s(%s%s)=%d instead of %d", + connect_ok ? "send" : "sendto", + addr2str(tgt_abuf, sizeof(tgt_abuf), + class, + class->gen, ap, 0), + src_abuf, i, msg_len); + } + } + return result; + +#undef FSTR +} + + + +static void +update_rtt(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class, DCC_XLOG_ENTRY *xloge, + int us) +{ + DCC_SRVR_ADDR *ap; + + /* compute new RTT only if the map data structure is locked, + * the clock did not jump, + * and we're talking about the same hosts */ + if (!info_locked + || xloge->addrs_gen != class->gen) + return; + + ap = &class->addrs[xloge->addr_inx]; + + if (us < 0) + us = 0; + if (us > DCC_RTT_BAD) + us = DCC_RTT_BAD; + + if (ap->rtt == DCC_RTT_BAD) { + /* just set the RTT if this is a newly working server */ + ap->rtt = us; + ap->total_xmits = 0; + ap->total_resps = 0; + ap->resp_mem = 0; + ap->rtt_updated = 0; + + } else if (ctxt->now.tv_sec < ap->rtt_updated + FAST_RTT_SECS) { + /* adjust the RTT quickly if this is the first + * measurement in a long time */ + AGE_AVG(ap->rtt, us, 2, 1); + ap->rtt_updated = ctxt->now.tv_sec; + + } else { + AGE_AVG(ap->rtt, us, 9, 1); + ap->rtt_updated = ctxt->now.tv_sec; + } + + if (ap->rtt > DCC_MAX_RTT) + ap->rtt = DCC_MAX_RTT; +} + + + +/* Update response rate and penalize the RTT of servers that failed to respond. + * the data must be locked */ +static void +resp_rates(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class, + u_char measuring) +{ + DCC_SRVR_ADDR *ap; + DCC_XLOG_ENTRY *xloge; + const DCC_XLOG_ENTRY *xloge2; + int us, us2; + u_char seen; + int i; + + for (xloge = ctxt->xlog.base; xloge < ctxt->xlog.next; ++xloge) { + /* ignore responses we've already handled */ + if (xloge->op_nums.t == DCC_OP_NUMS_NULL) + continue; + + ap = &class->addrs[xloge->addr_inx]; + + /* Update the RTT of this server as if we would have received + * ia response if we had waited a little longer, unless we + * would be assuming a faster RTT than its current average. + * + * Use the longest of the time spent waiting for this request + * and the delays of any requests that were answered by the + * server. */ + us = ctxt->now_us - xloge->sent_us; + seen = 0; + for (xloge2=ctxt->xlog.base; xloge2<ctxt->xlog.next; ++xloge2) { + if (xloge2->addr_inx != xloge->addr_inx + || xloge2 == xloge) + continue; + if (xloge2->op_nums.t != DCC_OP_NUMS_NULL) { + seen = 1; + continue; + } + us2 = ctxt->now_us - xloge2->sent_us; + if (us < us2) + us = us2; + } + /* update the RTT + * if we waited at least as long as the current RTT + * or we received at least one response */ + if (ctxt->now_us >= ap->rtt && seen) + update_rtt(ctxt, class, xloge, us + DCC_DCCD_DELAY); + + /* having received its answer, forget this transmission */ + xloge->op_nums.t = DCC_OP_NUMS_NULL; + } + + /* maintain the response rate */ + for (i = 0, ap = class->addrs; i < DIM(ctxt->xlog.cur); ++i, ++ap) { + if (ap->rtt == DCC_RTT_BAD + || ctxt->xlog.cur[i].xmits == 0) + continue; + if (measuring) { + if (ctxt->xlog.cur[i].resps != 0) { + ++ctxt->xlog.working_addrs; + } else if (!(ap->resp_mem & ((1<<DCC_MAX_XMITS)-1))) { + /* this server is bad if there were no answers + * at all for this mesurement cycle */ + ap->rtt = DCC_RTT_BAD; + continue; + } + } + ap->total_xmits += ctxt->xlog.cur[i].xmits; + if (ap->total_xmits > DCC_TOTAL_XMITS_MAX) + ap->total_xmits = DCC_TOTAL_XMITS_MAX; + do { + ap->total_resps -= (ap->resp_mem + >> (DCC_TOTAL_XMITS_MAX-1)); + ap->resp_mem <<= 1; + if (ctxt->xlog.cur[i].resps != 0) { + ap->resp_mem |= 1; + ++ap->total_resps; + --ctxt->xlog.cur[i].resps; + } + } while (--ctxt->xlog.cur[i].xmits != 0); + } +} + + + +/* receive a single DCC response + * The contexts must be locked. + * The mapped or common info ought to be locked, but reception + * works if it is not. */ +static int /* -1=fatal error, 0=no data, 1=unreachable, 2=ok */ +clnt_recv(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class, + DCC_OP_RESP *resp, /* the response */ + int resp_len, + const DCC_HDR *msg, /* the original request */ + DCC_XLOG_ENTRY **xlogep) +{ + DCC_SOCKU su; + DCC_XLOG_ENTRY *xloge, *xloge1; + DCC_SRVR_ADDR *ap; + char str[DCC_SU2STR_SIZE+50]; + char ob[DCC_OPBUF]; + char ob2[DCC_OPBUF]; + int pkt_len; + + *xlogep = 0; + for (;;) { +next_pkt:; + pkt_len = do_recv(ctxt, resp, resp_len, &su); + if (pkt_len < 0) { + /* Stop looking when there are no more packets */ + if (DCC_BLOCK_ERROR()) + return 0; + + /* ignore ICMP Unreachables unless we have connected + * to a server. + * If so, forget all outstanding requests */ + if (ctxt->conn_su.sa.sa_family != AF_UNSPEC + && UNREACHABLE_ERRORS()) { + /* find one relevant request + * and mark all of them finished */ + for (xloge1 = ctxt->xlog.base, xloge = 0; + xloge1 < ctxt->xlog.next; + ++xloge1) { + if (xloge1->op_nums.t==DCC_OP_NUMS_NULL) + continue; + xloge = xloge1; + xloge->op_nums.t = DCC_OP_NUMS_NULL; + } + if (!xloge) { + if (dcc_clnt_debug) + dcc_trace_msg("ignore unmatched:" + " %s", ERROR_STR()); + continue; + } + if (dcc_clnt_debug) + dcc_trace_msg("note recvfrom(%s): %s", + dcc_su2str(str, + sizeof(str), + &ctxt->conn_su), + ERROR_STR()); + ctxt->xlog.outstanding = 0; + ap = &class->addrs[xloge->addr_inx]; + ap->rtt = DCC_RTT_BAD; + ++ctxt->xlog.cur[xloge->addr_inx].resps; + *xlogep = xloge; + return 1; + } + dcc_trace_msg( "clnt_recv recvfrom(%s): %s", + su.sa.sa_family + ? dcc_su2str(str, sizeof(str), &su) : "", + ERROR_STR()); + return -1; + } + + if (pkt_len > resp_len) { + trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len, + "recv(%s)=%d>%d", + dcc_su2str(str, sizeof(str), &su), + pkt_len, resp_len); + continue; + } + if (pkt_len < ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE)) { + trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len, + "recv(%s)=%d<%d", + dcc_su2str(str, sizeof(str), &su), + pkt_len, + ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE)); + continue; + } + if (pkt_len != ntohs(resp->hdr.len)) { + trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len, + "recv(%s)=%d but hdr len=%d", + dcc_su2str(str, sizeof(str), &su), + pkt_len, + ntohs(resp->hdr.len)); + continue; + } + + if (resp->hdr.pkt_vers < DCC_PKT_VERSION_MIN + || resp->hdr.pkt_vers > DCC_PKT_VERSION_MAX) { + trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len, + "unrecognized version #%d from %s", + resp->hdr.pkt_vers, + dcc_su2str(str, sizeof(str), &su)); + continue; + } + + /* We cannot use the server's apparent IP address because it + * might be multi-homed and respond with an address other than + * the address to which we sent. So use our records of + * which OP_NUMS was sent to which server address. */ + if (resp->hdr.op_nums.r != msg->op_nums.r + || resp->hdr.op_nums.p != msg->op_nums.p + || resp->hdr.op_nums.h != msg->op_nums.h) { + if (dcc_clnt_debug) + dcc_trace_msg("unmatched response from %s" + " ID=%d h=%#x/%#x p=%#x/%#x" + " r=%#x/%#x t=%#x", + dcc_su2str(str, sizeof(str), &su), + ntohl(resp->hdr.sender), + resp->hdr.op_nums.h, + msg->op_nums.h, + resp->hdr.op_nums.p, + msg->op_nums.p, + resp->hdr.op_nums.r, + msg->op_nums.r, + resp->hdr.op_nums.t); + continue; + } + + /* everything matches except perhaps the transmission # */ + xloge = ctxt->xlog.base; + for (;;) { + if (xloge >= ctxt->xlog.next) { + if (dcc_clnt_debug) + dcc_trace_msg("stray response from %s" + " ID=%d h=%#x p=%#x" + " r=%#x t=%#x/%#x", + dcc_su2str(str, + sizeof(str), &su), + ntohl(resp->hdr.sender), + resp->hdr.op_nums.h, + resp->hdr.op_nums.p, + resp->hdr.op_nums.r, + msg->op_nums.r, + resp->hdr.op_nums.t); + goto next_pkt; + } + if (resp->hdr.op_nums.t == xloge->op_nums.t) + break; + ++xloge; + } + + ap = &class->addrs[xloge->addr_inx]; + +#ifdef CLNT_LOSSES + if ((++clnt_losses % 5) == 0) { + dcc_trace_msg("dropped answer from %s", + addr2str(str, sizeof(str), class, + xloge->addrs_gen, ap, &su)); + continue; + } +#endif + + if (xloge->passwd[0] != '\0' + && !dcc_ck_signature(xloge->passwd, sizeof(xloge->passwd), + resp, pkt_len)) { + dcc_error_msg("%s ID=%d rejected our password for ID %d" + " and %s with %s" + " h=%#x p=%#x r=%#x t=%#x", + addr2str(str, sizeof(str), + class, xloge->addrs_gen, + ap, &su), + ntohl(resp->hdr.sender), + xloge->id, + dcc_hdr_op2str(ob, sizeof(ob), + msg), + dcc_hdr_op2str(ob2, sizeof(ob2), + &resp->hdr), + resp->hdr.op_nums.h, + resp->hdr.op_nums.p, + resp->hdr.op_nums.r, + resp->hdr.op_nums.t); + continue; + } + + if (dcc_clnt_debug > 3) + dcc_trace_msg("%8.6f received response from %s ID=%d" + " h=%#x p=%#x r=%#x t=%#x", + get_age(ctxt), + dcc_su2str(str, sizeof(str), &su), + ntohl(resp->hdr.sender), + resp->hdr.op_nums.h, + resp->hdr.op_nums.p, + resp->hdr.op_nums.r, + resp->hdr.op_nums.t); + + /* don't find the record of this transmission again */ + xloge->op_nums.t = DCC_OP_NUMS_NULL; + if (ctxt->xlog.outstanding != 0) + --ctxt->xlog.outstanding; + ++ctxt->xlog.cur[xloge->addr_inx].resps; + *xlogep = xloge; + + /* Notice if multi-homing is involved + * That is true if the address from which the client answered + * differs from the address to which we sent */ + if (!(ap->flags & DCC_SRVR_ADDR_MHOME) + && dcc_cmp_ap2su(ap, &su)) { + if (dcc_clnt_debug) + dcc_trace_msg("%s multi-homed at %s", + addr2str(str, sizeof(str), + class, xloge->addrs_gen, + ap, 0), + dcc_su2str(str,sizeof(str), &su)); + ap->flags |= DCC_SRVR_ADDR_MHOME; + } + + return 2; + } +} + + + +/* wait for an answer */ +int /* -1=error, 0=timeout, 1=ready */ +dcc_select_poll(DCC_EMSG emsg, + SOCKET fd, + u_char rd, /* 1=read 0=write */ + int us) /* <0=forever until signal */ +{ +#ifdef USE_POLL + struct pollfd fds; + int nfds; + int delay; + + if (us < 0) + delay = -1; + else + delay = (us+999)/1000; + + for (;;) { + fds.fd = fd; + /* At least some versions of Linux have POLLRDNORM etc. in + * asm/poll.h, but with definitions of POLLIN, POLLPRI, etc. + * that conflict with their definitions in sys/poll.h. + * Perhaps it is not necessary to check for high or + * low priority data, but the poll() documentation on + * some systems says that asking about POLLIN does not + * say anything about other data */ +#ifdef POLLRDNORM + if (rd) + fds.events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI; + else + fds.events = POLLOUT| POLLWRNORM | POLLWRBAND | POLLPRI; +#else + if (rd) + fds.events = POLLIN; + else + fds.events = POLLOUT; +#endif + fds.revents = 0; + nfds = poll(&fds, 1, delay); + if (nfds >= 0) + return nfds; + if (!DCC_SELECT_NERROR()) { + dcc_pemsg(EX_OSERR, emsg, "poll(): %s", ERROR_STR()); + return -1; + } + if (us < 0) /* stop forever on a signal */ + return 0; + } +#else + struct timeval delay, *delayp; + fd_set fds; + int nfds; + + if (us < 0) { + delayp = 0; + } else { + us2tv(&delay, us); + delayp = &delay; + } + + FD_ZERO(&fds); + for (;;) { + FD_SET(fd, &fds); + if (rd) + nfds = select(fd+1, &fds, 0, 0, delayp); + else + nfds = select(fd+1, 0, &fds, 0, delayp); + if (nfds >= 0) + return nfds; + if (!DCC_SELECT_NERROR()) { + dcc_pemsg(EX_OSERR, emsg, "select(): %s", ERROR_STR()); + return -1; + } + if (us < 0) /* stop forever on a signal */ + return 0; + } +#endif +} + + + +/* Make initial estimates of the RTT to all known servers + * The RTT's help the client pick a server that will respond quickly and + * reliably and to know when to retransmit a request that is lost due + * to network congestion or bit rot. + * Both locks must be held on entry. + * Both are released while working. + * Both locks are held on success. + * Only the contexts are locked on failure. */ +static u_char /* 0=failed, 1=at least 1 good server */ +measure_rtt(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, + DCC_SRVR_CLASS *class, + DCC_CLNT_FGS clnt_fgs) /* DCC_CLNT_FG_* */ +{ + DCC_SRVR_ADDR *ap; + DCC_NOP nop; + DCC_OP_RESP resp; + int delay_us, next_xmit; + int nfds, xmit_num; + int addrs_gen; + int tgt_addrs; + DCC_XLOG_ENTRY *xloge; + char ob[DCC_OPBUF], abuf[80]; + u_char vers; + u_char connect_ok; + int tgts, i; + + assert_info_locked(); + + /* Send NOP's to all addresses and wait for responses to + * measure each server's health and RTT. + * Treat all addresses as if they are of independent hosts */ + + if (class->nms[0].hostname[0] == '\0') { + class->srvr_inx = NO_SRVR; + dcc_pemsg(EX_NOHOST, emsg, "no %s server names", + DCC_IS_GREY_STR(class)); + dcc_info_unlock(0); + return 0; + } + + memcpy(&nop.hdr, &dcc_clnt_info->proto_hdr, sizeof(nop.hdr)); + /* servers ignore the version on NOPs except to guess the version + * we will accept */ + nop.hdr.pkt_vers = DCC_PKT_VERSION; + nop.hdr.op_nums.p = getpid(); + nop.hdr.op = DCC_OP_NOP; + /* Do not change the transaction ID so that dbclean can kludge it. + * Dccd does not care about the transaction ID on NOPs. */ + + if (!get_now(emsg, ctxt)) { + dcc_info_unlock(0); + return 0; + } + + /* discourage competition from other processes and threads */ + class->measure = ctxt->now.tv_sec+FAST_RTT_SECS; + + flush_emsg(emsg, 1); + + addrs_gen = class->gen; + + /* stop waiting for responses when we have enough working servers */ + tgt_addrs = class->num_srvrs; + if (!dcc_all_srvrs && tgt_addrs > 4) + tgt_addrs = 4; + + memset(&ctxt->xlog, 0, sizeof(ctxt->xlog)); + ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries; + ctxt->xlog.last = LAST(ctxt->xlog_entries); + delay_us = 0; + next_xmit = 0; + xmit_num = 0; + /* wait for the responses to the NOPs and retransmit as needed */ + for (;;) { + /* wait quietly until time to retransmit */ + if (delay_us <= 0) { + if (xmit_num >= DCC_MAX_XMITS) + break; + if (ctxt->xlog.working_addrs >= tgt_addrs) { + /* do not retransmit if we have heard from + * enough servers + * quit if we have waited at least one RTT */ + if (xmit_num > 0) + break; + delay_us = 0; + next_xmit = ctxt->now_us; + + } else { + /* get delay & time of next transmission */ + delay_us = retrans_time((clnt_fgs + & DCC_CLNT_FG_SLOW) + ? DCC_MAX_RTT + : DCC_MIN_RTT, + xmit_num++); + next_xmit = delay_us + ctxt->now_us; + + connect_ok = 1; + tgts = 0; + for (i = 0, ap = class->addrs; + ap <= LAST(class->addrs); + ++i, ++ap) { + if (ap->ip.family == 0 + || ctxt->xlog.cur[i].resps != 0) + continue; + if (ap->flags & DCC_SRVR_ADDR_MHOME) + connect_ok = 0; + ++tgts; + } + /* Use a connected socket early to get + * ICMP error messages from single server. + * no connection later to detect multi-homing + * that makes a server appear deaf */ + if (tgts > 1 + || xmit_num > DCC_MAX_XMITS/2) + connect_ok = 0; + for (i = 0, ap = class->addrs; + tgts > 0 && ap <= LAST(class->addrs); + ++i, ++ap) { + if (ap->ip.family == 0 + || !GOOD_NAM(ap->nam_inx) + || ctxt->xlog.cur[i].resps != 0) + continue; + --tgts; + if (0 > clnt_xmit(ctxt, class, ap, + &nop.hdr, sizeof(nop), + connect_ok)) + break; + } + } + + /* stop if nothing to wait for */ + if (!ctxt->xlog.outstanding) + break; + } + + if (!dcc_info_unlock(emsg)) + return 0; + dcc_ctxts_unlock(); + nfds = dcc_select_poll(emsg, ctxt->soc, 1, delay_us); + dcc_ctxts_lock(); + if (nfds < 0) + return 0; + if (!dcc_info_lock(emsg)) + return 0; + + i = get_now(emsg, ctxt); + if (!i) { /* give up if the clock jumped */ + class->measure = 0; + dcc_info_unlock(0); + return 0; + } + if (addrs_gen != class->gen) { + extra_pemsg(EX_IOERR, emsg, + "competition stopped RTT measurement"); + /* if we have at least one address, + * hope the other process will finish the job */ + if (HAVE_SRVR(class) + || pick_srvr(emsg, class)) + return 1; + + /* fail, but hope the other process will finish */ + dcc_info_unlock(0); + return 0; + } + + if (nfds > 0) { + for (;;) { + i = clnt_recv(ctxt, class, + &resp, sizeof(resp), + &nop.hdr, &xloge); + if (i <= 0) + break; + + if (i == 1) /* otherwise ignore Unreachable */ + continue; + + /* record the results of a probe, and notice + * if the server is the best so far */ + ap = &class->addrs[xloge->addr_inx]; + + if (resp.hdr.op != DCC_OP_OK) { + if (dcc_clnt_debug) + dcc_trace_msg("RTT NOP answered" + " with %s by %s", + dcc_hdr_op2str(ob, + sizeof(ob), + &resp.hdr), + addr2str(abuf, + sizeof(abuf), + class, + xloge->addrs_gen, + ap, 0)); + ap->rtt = DCC_RTT_BAD; + continue; + } + + vers = resp.ok.max_pkt_vers; + if (vers >= DCC_PKT_VERSION_MAX) + vers = DCC_PKT_VERSION_MAX; + else if (vers < DCC_PKT_VERSION_MIN) + vers = DCC_PKT_VERSION_MIN; + ap->srvr_pkt_vers = vers; + ap->srvr_id = ntohl(resp.hdr.sender); + memcpy(ap->brand, resp.ok.brand, + sizeof(ap->brand)); + ap->srvr_wait = ntohs(resp.ok.qdelay_ms)*1000; + + update_rtt(ctxt, class, xloge, + ctxt->now_us - xloge->sent_us + + ap->srvr_wait); + } + } + + if (ctxt->xlog.outstanding == 0 + || (ctxt->xlog.working_addrs >= tgt_addrs + && xmit_num > 1)) + next_xmit = ctxt->now_us; + delay_us = next_xmit - ctxt->now_us; + } + /* the contexts and the shared information are locked */ + resp_rates(ctxt, class, 1); + + if (!pick_srvr(emsg, class)) { + fail_more(ctxt, class); + dcc_info_unlock(0); + return 0; + } + + /* maintain long term average that is used to switch back to + * a good server that temporarily goes bad */ + if (class->thold_rtt == DCC_RTT_BAD) { + /* There is no point in trying to change servers + * Maybe we have only 1 */ + class->avg_thold_rtt = DCC_RTT_BAD; + } else if (class->avg_thold_rtt == -DCC_RTT_BAD) { + /* We are being forced to consider changing servers. + * The threshold for changing will be based on the RTT + * for the new server */ + class->avg_thold_rtt = class->base_rtt; + } else { + AGE_AVG(class->avg_thold_rtt, class->base_rtt, 9, 1); + } + + class->measure = ctxt->now.tv_sec+FAST_RTT_SECS; + + /* Several systems do not update the mtime of a file modified with + * mmap(). Some like BSD/OS delay changing the mtime until the file + * accessed with read(). Others including filesystems on some + * versions of Linux apparently never change the mtime. + * Do not bother temporary map files that have already been unlinked + * to avoid problems on systems that do not have futimes() */ + if (!(dcc_clnt_info->flags & DCC_INFO_FG_TMP)) + dcc_set_mtime(emsg, dcc_info_nm, info_fd, 0); + flush_emsg(emsg, 1); + + return 1; +} + + + +/* Get and write-lock common info + * The contexts but not the info must be locked. + * The contexts remain locked on failure. The shared information + * is locked only on success. */ +u_char /* 0=failed 1=ok */ +dcc_clnt_rdy(DCC_EMSG emsg, /* cleared of stale messages */ + DCC_CLNT_CTXT *ctxt, + DCC_CLNT_FGS clnt_fgs) /* DCC_CLNT_FG_* */ +{ + DCC_SRVR_CLASS *class; + DCC_IP bind_ip, zero_ip, *src_ip; + + if (!dcc_info_lock(emsg)) + return 0; + + if (!(clnt_fgs & DCC_CLNT_FG_RETRY)) + get_start_time(ctxt); + + class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY); + + /* just fail if things were broken and it's too soon to try again */ + if (!(clnt_fgs & DCC_CLNT_FG_NO_FAIL) + && !ck_fail_time(emsg, ctxt, class)) { + dcc_info_unlock(0); + return 0; + } + + /* Re-open the socket if it is closed, + * or we have switched between IPv4 and IPv6, + * or if the local address has changed + * or if the local address was broken a long time ago. + * Do not compare the source port numbers, because even when there + * is no explicit source address, the local port is cached in + * ctxt->bind_su. */ + dcc_su2ip(&bind_ip, &ctxt->bind_su); + bind_ip.port = 0; + if (dcc_clnt_info->src.family == AF_UNSPEC) { + memset(&zero_ip, 0, sizeof(zero_ip)); + zero_ip.family = bind_ip.family; + src_ip = &zero_ip; + } else { + src_ip = &dcc_clnt_info->src; + } + if (ctxt->soc == INVALID_SOCKET + || ((ctxt->flags & DCC_CTXT_USING_IPV4)!=0) != !DCC_INFO_IPV6() + || (memcmp(src_ip, &bind_ip, sizeof(*src_ip)) + && (!(ctxt->flags & DCC_CTXT_SRCBAD) + || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time, + DCC_CTXT_REBIND_SECS)))) { + if (!dcc_clnt_soc_reopen(emsg, ctxt)) { + dcc_info_unlock(0); + return 0; + } + } + + /* Try to pick a new server if we do not have a server + * or if the current server has become slow or unreliable. */ + if (!good_rtt(class)) + pick_srvr(emsg, class); + + /* Check for new IP addresses occassionally or when we have none + * If we cannot awaken a separate thread, do it ourself */ + if ((!HAVE_SRVR(class) + || DCC_IS_TIME(ctxt->now.tv_sec, class->resolve, DCC_MAP_RESOLVE)) + && (class->num_srvrs == 0 + || !dcc_clnt_wake_resolve())) { + if (!resolve_nms(emsg, ctxt, class)) + return 0; + } + + /* We might have switched to the current server when our + * best server became slow. + * If it has been a while, see if our best server is back + * by sending NOPs to all servers. */ + if ((class->measure == 0 + || (DCC_IS_TIME(ctxt->now.tv_sec, class->measure, FAST_RTT_SECS) + && !good_rtt(class))) + && !(clnt_fgs & DCC_CLNT_FG_NO_PICK_SRVR)) { + if (!measure_rtt(emsg, ctxt, class, clnt_fgs)) + return 0; + } + + if (!HAVE_SRVR(class) && !(clnt_fgs & DCC_CLNT_FG_BAD_SRVR_OK)) { + dcc_info_unlock(0); + return 0; + } + + dcc_clnt_soc_flush(ctxt); + return 1; +} + + + +/* send an operation to the server and get a response + * The operation and response buffers must be distinct, because the + * response buffer is changed before the last use of the operation + * buffer */ +u_char /* 0=failed 1=ok */ +dcc_clnt_op(DCC_EMSG emsg, + DCC_CLNT_CTXT *ctxt, + DCC_CLNT_FGS clnt_fgs, /* DCC_CLNT_FG_* */ + const SRVR_INX *srvr_inxp, /* 0 or ptr to server index */ + DCC_SRVR_ID *srvr_idp, /* ID of server used */ + DCC_SOCKU *resp_su, /* IP address of server used */ + DCC_HDR *msg, int msg_len, DCC_OPS op, + DCC_OP_RESP *resp, int resp_max_len) +{ + DCC_SRVR_CLASS *class; + DCC_SRVR_ADDR *cur_addr; +#ifdef DCC_PKT_VERSION7 + DCC_REPORT old_report; +#endif + char addr_buf[80]; + int addrs_gen; + DCC_OP_NUM op_num_r; + DCC_XLOG_ENTRY *xloge; + SRVR_INX srvr_inx; + int xmit_num; + int next_xmit, us, remaining, nfds; + u_char gotit; + int i; + + if (emsg) + *emsg = '\0'; + dcc_ctxts_lock(); + if (!dcc_clnt_info + && !dcc_map_info(emsg, 0, -1)) { + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + /* Get & lock common info. + * insist on a server to talk to so that class->srvr_inx is sane */ + if (!dcc_clnt_rdy(emsg, ctxt, + clnt_fgs & ~(DCC_CLNT_FG_BAD_SRVR_OK + | DCC_CLNT_FG_NO_PICK_SRVR))) { + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY); + + if (resp_max_len > ISZ(*resp)) + resp_max_len = ISZ(*resp); + else if (resp_max_len < ISZ(resp->hdr)) + dcc_logbad(EX_SOFTWARE, "dcc_clnt_op(resp_max_len=%d)", + resp_max_len); + + /* use server that the caller wants, + * if the caller specified the valid index of a server */ + if (!srvr_inxp + || !GOOD_SRVR(class, srvr_inx = *srvr_inxp)) + srvr_inx = class->srvr_inx; + + cur_addr = &class->addrs[srvr_inx]; + if (srvr_idp) + *srvr_idp = cur_addr->srvr_id; + if (resp_su) + dcc_mk_su(resp_su, cur_addr->ip.family, + &cur_addr->ip.u, cur_addr->ip.port); + addrs_gen = class->gen; + + ++dcc_clnt_info->proto_hdr.op_nums.r; + op_num_r = msg->op_nums.r; + memcpy(msg, &dcc_clnt_info->proto_hdr, sizeof(*msg)); + /* old transaction ID for retransmissions */ + if (clnt_fgs & DCC_CLNT_FG_RETRANS) + msg->op_nums.r = op_num_r; + if (cur_addr->srvr_pkt_vers > DCC_PKT_VERSION_MAX + || cur_addr->srvr_pkt_vers < DCC_PKT_VERSION_MIN) { + dcc_pemsg(EX_DATAERR, emsg, "impossible pkt_vers %d for %s", + cur_addr->srvr_pkt_vers, + addr2str(addr_buf, sizeof(addr_buf), class, + addrs_gen, cur_addr, 0)); + dcc_info_unlock(0); + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + +#ifdef DCC_PKT_VERSION7 + /* convert new report to old */ + if (cur_addr->srvr_pkt_vers <= DCC_PKT_VERSION7 + && op == DCC_OP_REPORT) { + DCC_TGTS tgts; + + tgts = ntohl(((DCC_REPORT *)msg)->tgts); +#ifdef DCC_PKT_VERSION4 + if (cur_addr->srvr_pkt_vers == DCC_PKT_VERSION4 + && (tgts & DCC_TGTS_SPAM)) { + memcpy(&old_report, msg, msg_len); + old_report.tgts = htonl(DCC_TGTS_TOO_MANY); + msg = &old_report.hdr; + } +#endif + } +#endif /* DCC_PKT_VERSION7 */ + + msg->pkt_vers = cur_addr->srvr_pkt_vers; + msg->op_nums.p = getpid(); + msg->op = op; + gotit = 0; + + /* The measured RTTs to servers helps the client pick a server + * that will respond quickly and reliably and to know when to + * retransmit a request that is lost due to network congestion or + * bit rot. + * + * It is desirable for a client to concentrate its reports to + * a single server. That makes detecting spam by this and other + * clients quicker. + * + * A client should retransmit when its initial transmission is lost + * due to bit rot or congestion. In case the loss is due to + * congestion, it should retransmit only a limited number of + * times and with increasing delays between retransmissions. + * + * It is more important that some requests from clients reach + * a DCC server than others. Most DCC checksum reports are not about + * spam, and so it is best to not spend too much network bandwidth + * retransmitting checksum reports or to delay the processing of the + * messages. Administrative commands must be tried harder. + * Therefore, let the caller of this routine decide whether to retry. + * This routine merely increases the measured RTT after failures. */ + + memset(&ctxt->xlog, 0, sizeof(ctxt->xlog)); + ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries; + ctxt->xlog.last = LAST(ctxt->xlog_entries); + xmit_num = 0; + next_xmit = ctxt->now_us; + + /* Transmit, wait for a response, and repeat if needed. + * The initial transmission is done as if it were a retransmission. */ + for (;;) { + us = next_xmit - ctxt->now_us; + if (us <= 0) { + /* We have delayed long enough for each outstanding + * transmission. We are done if we have sent enough */ + if (xmit_num >= DCC_MAX_XMITS) + break; + + /* stop if we don't have enough time to wait */ + us = retrans_time(cur_addr->rtt, xmit_num); + remaining = DCC_MAX_DELAY - ctxt->now_us; + if (us > remaining) { + if (remaining < DCC_MIN_RTT) + break; + us = remaining; + } + + /* wait as long as possible on the last try */ + if (++xmit_num == DCC_MAX_XMITS + && us < DCC_MAX_RTT) { + if (remaining > DCC_MAX_RTT) + us = DCC_MAX_RTT; + else + us = remaining; + } + next_xmit = us + ctxt->now_us; + + /* Because of the flooding algorithm among DCC servers, + * it is important that only a single server receive + * reports of the checksums for a mail message. + * That implies that retransmissions of reports must + * go to the original server, even if some other local + * client has re-resolved hostnames or switched + * to a better server. + * Otherwise our retransmissions to different servers + * would not be recognized as retransmissions but + * reports about separate copies of the mail message. + * Sp we should not retransmit if the server + * address table changes. */ + if (addrs_gen != class->gen + && op == DCC_OP_REPORT + && !(clnt_fgs & DCC_CLNT_FG_GREY)) { + if (dcc_clnt_debug) + dcc_trace_msg("server address" + " generation changed"); + break; + } + + if (!GOOD_NAM(cur_addr->nam_inx)) { + if (dcc_clnt_debug) + dcc_trace_msg("server deleted"); + break; + } + + /* use a connected socket early to get port + * unreachable ICMP error messages, but do not + * connect later to detect multi-homing */ + if (0 >= clnt_xmit(ctxt, class, cur_addr, msg, msg_len, + (!(cur_addr->flags + & DCC_SRVR_ADDR_MHOME) + && xmit_num < DCC_MAX_XMITS/2 + && ctxt->now_us<=DCC_MAX_DELAY/2))) + break; + } + + /* release the mapped info while we wait for an answer */ + if (!dcc_info_unlock(emsg)) { + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + dcc_ctxts_unlock(); + nfds = dcc_select_poll(emsg, ctxt->soc, 1, us); + if (nfds < 0) { + /* note error, but we may already have an answer */ + dcc_ctxts_lock(); + class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY); + break; + } + if (!get_now(emsg, ctxt)) + return 0; /* simply give up if time jumped */ + + /* recover the lock so that we can record the result of the + * newly arrived answer in the shared and mapped file */ + dcc_ctxts_lock(); + class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY); + if (!dcc_info_lock(emsg)) { + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + + if (nfds > 0) { + u_char unreachable = 0; + for (;;) { + DCC_OP_RESP buf; + + i = clnt_recv(ctxt, class, &buf, + min(ISZ(buf), resp_max_len), + msg, &xloge); + if (i <= 0) + break; + if (i == 1) { + /* stop delaying after the first + * ICMP Unreachable message, + * but collect everything that has + * already arrived */ + unreachable = 1; + continue; + } + + update_rtt(ctxt, class, xloge, + ctxt->now_us - xloge->sent_us + + ((op != DCC_OP_REPORT + && op != DCC_OP_QUERY) + ? cur_addr->srvr_wait : 0)); + + /* save the last answer we get */ + memcpy(resp, &buf, ntohs(buf.hdr.len)); + gotit = 1; + } + if (i < 0 || unreachable || gotit) + break; + } + } + /* the contexts and the shared information are locked */ + + /* penalize server for lost packets */ + resp_rates(ctxt, class, 0); + + /* fail if the server did not answer at all */ + if (!gotit) { +#if 0 + system("./abort_dccd"); +#endif + flush_emsg(emsg, dcc_clnt_debug); + dcc_pemsg(EX_TEMPFAIL, emsg, "no %s answer from %s after %d ms", + DCC_IS_GREY_STR(class), + addr2str(addr_buf, sizeof(addr_buf), class, + addrs_gen, cur_addr, 0), + ctxt->now_us/1000); + /* Since we got no answer at all, look for a different server. + * If we can't find any server or a different server + * or if we have already spent too much time, + * then don't try again for a while to not delay the MTA. + * If we find another server, then return the valid server-ID + * of the non-responsive server to let the caller know that it + * can try again immediately. */ + if (srvr_inxp && srvr_inx == *srvr_inxp) { + /* but only if not using a caller specified server */ + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + } else if (!pick_srvr(0, class) + || srvr_inx == class->srvr_inx) { + if (srvr_idp) { + if (dcc_clnt_debug) + dcc_trace_msg("no better alternate" + " for retry"); + *srvr_idp = DCC_ID_INVALID; + } + fail_more(ctxt, class); + } else if (srvr_idp + && (i = retrans_time(class->addrs[class ->srvr_inx + ].rtt, 0), + ctxt->now_us + i >= DCC_MAX_DELAY)) { + /* discourage the caller from trying the other server + * if the total delay after trying the other server + * would be excessive */ + if (dcc_clnt_debug) + dcc_trace_msg("alternate too slow for retry" + " with rtt %d ms after %d ms", + i/1000, + ctxt->now_us/1000); + *srvr_idp = DCC_ID_INVALID; + } + dcc_info_unlock(0); + dcc_ctxts_unlock(); + return 0; + } + + /* reset failure backoff */ + class->fail_exp = 0; + + if (!dcc_info_unlock(emsg)) { + dcc_ctxts_unlock(); + if (srvr_idp) + *srvr_idp = DCC_ID_INVALID; + return 0; + } + dcc_ctxts_unlock(); + + flush_emsg(emsg, dcc_clnt_debug); + return 1; +}