view dcclib/clnt_send.c @ 4:d329bb5c36d0

Changes making it compile the new upstream release
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 14:57:12 +0100
parents c7f6b056b673
children
line wrap: on
line source

/* Distributed Checksum Clearinghouse
 *
 * send a request from client to server
 *
 * Copyright (c) 2008 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software without
 * changes for any purpose with or without fee is hereby granted, provided
 * that the above copyright notice and this permission notice appear in all
 * copies and any distributed versions or copies are either unchanged
 * or not called anything similar to "DCC" or "Distributed Checksum
 * Clearinghouse".
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC by contacting Rhyolite Software
 * at sales@rhyolite.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.103-1.198 $Revision$
 */

#include "dcc_clnt.h"
#ifdef USE_POLL
#include <poll.h>
#endif
#ifdef HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#ifdef HAVE_RESOLV_H
#include <resolv.h>
#endif

DCC_CLNT_INFO *dcc_clnt_info;		/* memory mapped shared data */
u_char dcc_all_srvrs = 0;		/* try to contact all servers */

/* #define CLNT_LOSSES */
#ifdef CLNT_LOSSES
static u_int clnt_losses;
#endif

u_char dcc_clnt_debug;
int dcc_debug_ttl;


#define AGE_AVG(_v,_n,_a,_b) ((_v) = ((_v)*_a + (_n)*_b + (_a+_b)/2)/(_a+_b))

/* send NOPs to measure the RTTs to servers when the current server
 * becomes slow this often */
#define FAST_RTT_SECS    (15*60)


char dcc_clnt_hostname[MAXHOSTNAMELEN];	/* local hostname */
static u_int32_t dcc_clnt_hid;		/* our DCC host-ID */


/* Each client knows about one or more servers, lest the current server
 * crash.  To ensure that counts of spam accumulate as quickly as possible,
 * all of the processes on a client try to use a single server.  The
 * closest (or fastest) server is preferred.  It is desirable for the
 * servers to convert the hostnames of the servers to IP addresses
 * frequently enough to track changes in address records, but not so
 * often that a lot of time is wasted on the DNS.
 *
 * All of that implies that independent processes on the client need to
 * cooperate in measuring the round trip time to the servers and maintaining
 * their IP addresses.  On UNIX systems, this is accomplished with mmap()
 * and a well known file.
 *
 * The DCC client uses 3 locks:
 * 1. mutex to ensure that only one thread in process sends bursts of NOPs
 *	to measure RTTs or resolves DNS names
 * 2. mutex protecting the shared information in the map file for threads
 *	within a process
 * 3. fcntl() lock on the file to protect the shared information among processes
 *
 * To avoid ABBA deadlocks, the locks are always sought in that order.
 * For most operaitons, only #2/#3 is needed.  Sometimes only #2.
 *
 * Some systems have broken fcntl() locking (e.g. NFS in Solaris).
 * They lock the entire file. */


/* the contexts must be locked to read or change these values */
static int info_fd = -1;
#ifdef DCC_WIN32
HANDLE info_map = INVALID_HANDLE_VALUE;
#endif
DCC_PATH dcc_info_nm;

/* info_locked is set when the file system lock on changing the mapped file is
 *	held.  The contexts must be locked while info_locked is set, as well as
 *	when it is checked or changed. */
static u_char info_locked;




/* Get the RTT used to pick servers.
 *	It includes the local bias and the penalty for version mismatches */
static int
effective_rtt(const DCC_SRVR_CLASS *class, const DCC_SRVR_ADDR *ap)
{
	NAM_INX nam_inx;
	int rtt;

	rtt = ap->rtt;
	if (rtt >= DCC_MAX_RTT)
		return DCC_RTT_BAD;

	nam_inx = ap->nam_inx;
	if (!GOOD_NAM(nam_inx))
		return DCC_RTT_BAD;

	rtt += class->nms[nam_inx].rtt_adj;
	/* penalize servers with strange versions */
	if (ap->srvr_pkt_vers < DCC_PKT_VERSION
#ifdef DCC_PKT_VERSION7
	    /* protocol version 7 and 8 are the same for the free code */
	    && ap->srvr_pkt_vers+1 != DCC_PKT_VERSION
#endif
	    && ap->srvr_id != DCC_ID_INVALID)
		rtt += DCC_RTT_VERS_ADJ;

	if (rtt >= DCC_RTT_BAD)
		return DCC_RTT_BAD;
	return rtt;
}



static inline u_char
good_rtt(const DCC_SRVR_CLASS *class)
{
	int rtt;

	if (!HAVE_SRVR(class))
		return 0;

	rtt = effective_rtt(class, &class->addrs[class->srvr_inx]);
	if (rtt > class->avg_thold_rtt)
		return 0;

	return 1;
}


#define AP2CLASS(ap) DCC_GREY2CLASS(ap >= dcc_clnt_info->grey.addrs)


/* compare addresses while trying to ignore IPv4 vs. IPv6 details */
static u_char				/* 0=the addresses are equal */
dcc_cmp_ap2su(const DCC_SRVR_ADDR *ap, const DCC_SOCKU *su)
{
	DCC_SRVR_ADDR ap4;
	struct in_addr su_addr4;

	if (ap->ip.port != DCC_SU_PORT(su))
		return 1;

	if (ap->ip.family == AF_INET6
	    && dcc_ipv6toipv4(&ap4.ip.u.v4, &ap->ip.u.v6)) {
		ap4.ip.family = AF_INET;
		ap = &ap4;
	}

	if (su->sa.sa_family == AF_INET) {
		return (ap->ip.family != AF_INET
			|| ap->ip.u.v4.s_addr != su->ipv4.sin_addr.s_addr);
	}

	if (dcc_ipv6toipv4(&su_addr4, &su->ipv6.sin6_addr)) {
		return (ap->ip.family != AF_INET
			|| ap->ip.u.v4.s_addr != su_addr4.s_addr);
	}

	if (ap->ip.family != AF_INET6)
		return 1;

	/* both are real IPv6 and not ::1 */
	return memcmp(&ap->ip.u.v6, &su->ipv6.sin6_addr, sizeof(ap->ip.u.v6));
}



int
dcc_ap2str_opt(char *buf, int buf_len,
	       const DCC_SRVR_CLASS *class, u_char inx,
	       char port_str)		/* '\0' or '-' */
{
	const DCC_SRVR_ADDR *ap;
	char *buf1;
	int len;

	ap = &class->addrs[inx];
	dcc_ip2str(buf, buf_len, &ap->ip);

	len = strlen(buf);
	buf1 = buf+len;
	buf_len -= len;
	if (ap->ip.port == DCC_CLASS2PORT(class)) {
		if (port_str) {
			if (buf_len >= 1) {
				*buf1 = ',';
				if (buf_len >= 2)
					*++buf1 = port_str;
				*++buf1 = '\0';
			}
		}
	} else if (buf_len > 0) {
		len = snprintf(buf1, buf_len, ",%d", ntohs(ap->ip.port));
		if (len >= buf_len)
			len = buf_len-1;
		buf1 += len;
	}

	return buf1-buf;
}



static const char *
addr2str(char *buf, u_int buf_len, const DCC_SRVR_CLASS *class,
	 int addrs_gen, const DCC_SRVR_ADDR *ap, const DCC_SOCKU *sup)
{
	DCC_SOCKU su;
	char str[DCC_SU2STR_SIZE];
	const char *host;

	if (class->gen == addrs_gen) {
		if (!sup) {
			dcc_mk_su(&su, ap->ip.family, &ap->ip.u, ap->ip.port);
			sup = &su;
		}
		dcc_su2str(str, sizeof(str), sup);
		if (GOOD_NAM(ap->nam_inx)
		    && (host = class->nms[ap->nam_inx].hostname,
			!strncmp(host, str, strlen(host)))) {
			snprintf(buf, buf_len, "%s (%s)", host, str);
		} else {
			snprintf(buf, buf_len, "%s", str);
		}

	} else if (sup) {
		dcc_su2str(buf, buf_len, sup);

	} else {
		snprintf(buf, buf_len, DCC_IS_GREY_STR(class));
	}
	return buf;
}



/* display or log the current error message if debugging */
static inline void
flush_emsg(DCC_EMSG emsg, u_char debug)
{
	if (!emsg)
		return;
	if (!debug)
		return;
	if (*emsg != '\0') {
		dcc_trace_msg("%s", emsg);
		*emsg = '\0';
	}
}



/* display or prepare a new, less interesting error message if we do not have
 *	a buffer or if there is no old, presumably more important message. */
static void PATTRIB(3,4)
extra_pemsg(int ex_code, DCC_EMSG emsg, const char *msg, ...)
{
	va_list args;

	flush_emsg(emsg, dcc_clnt_debug);
	if (!emsg || *emsg == '\0') {
		va_start(args, msg);
		dcc_vpemsg(ex_code, emsg, msg, args);
		va_end(args);
	}
}



static void
trace_perf(const char *msg, const DCC_SRVR_ADDR *ap)
{
	DCC_SRVR_CLASS *class;
	char abuf[60];
	char rbuf[30];

	class = AP2CLASS(ap);
	if (!GOOD_NAM(ap->nam_inx)
	    || class->nms[ap->nam_inx].rtt_adj == 0) {
		rbuf[0] = 0;
	} else if (ap->srvr_pkt_vers < DCC_PKT_VERSION
		   && ap->srvr_id != DCC_ID_INVALID) {
		snprintf(rbuf, sizeof(rbuf), "%+d+%d",
			 class->nms[ap->nam_inx].rtt_adj/1000,
			 DCC_RTT_VERS_ADJ/1000);
	} else {
		snprintf(rbuf, sizeof(rbuf), "%+d",
			 class->nms[ap->nam_inx].rtt_adj/1000);
	}

	if (ap->rtt == DCC_RTT_BAD) {
		dcc_trace_msg("%s %s server %s with unknown RTT",
			      msg, DCC_IS_GREY_STR(class),
			      addr2str(abuf, sizeof(abuf), class,
				       class->gen, ap, 0));
	} else if (ap->total_xmits == 0) {
		dcc_trace_msg("%s %s server %s with %.2f%s ms RTT,"
			      " %d ms queue wait",
			      msg, DCC_IS_GREY_STR(class),
			      addr2str(abuf, sizeof(abuf), class,
				       class->gen, ap, 0),
			      ap->rtt/1000.0, rbuf,
			      ap->srvr_wait/1000);
	} else {
		dcc_trace_msg("%s %s server %s with %.0f%%"
			      " of %d requests answered,"
			      " %.2f%s ms RTT, %d ms queue wait",
			      msg, DCC_IS_GREY_STR(class),
			      addr2str(abuf, sizeof(abuf), class,
				       class->gen, ap, 0),
			      (ap->total_resps*100.0)/ap->total_xmits,
			      ap->total_xmits,
			      ap->rtt/1000.0, rbuf,
			      ap->srvr_wait/1000);
	}
}



/* If the socket isn't always connected, it can receive
 * datagrams from almost everywhere (for example, a DNS
 * datagram could leak-in if the local port range is small
 * and the local port has been recently doing DNS queries
 * in its previous life).
 *
 * If the socket is connected, it can still receive
 * datagrams not belonging to the connection per se. This
 * will happen if it has been disconnected recently and there
 * was pending data in the socket's queue.
 *
 * Before complaining, check that this datagram seems to be a response
 * to something we sent */
static void PATTRIB(5,6)
trace_bad_packet(const DCC_XLOG *xlog, const DCC_SOCKU *su,
		 const DCC_OP_RESP *resp, int resp_len, const char *p, ...)
{
	const DCC_XLOG_ENTRY *xloge;
	va_list args;
	char msgbuf[80];
	char pktbuf[9*10];
	int i, j, l;

	if (!dcc_clnt_debug && xlog && su) {
		for (xloge = xlog->base; ; ++xloge) {
			/* forget the error message if not from a DCC server */
			if (xloge >= xlog->next)
				return;

			/* Don't check this server entry if we haven't
			 * transmitted anything to this host. */
			if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
				continue;

			/* is the packet from this server? */
			if (!memcmp(su, &xloge->su, sizeof(*su)))
				break;
		}
	}

	va_start(args, p);
	vsnprintf(msgbuf, sizeof(msgbuf), p, args);
	va_end(args);
	for (i = 0, j = 0; (i+1)*ISZ(resp->w[0]) <= resp_len; ++i) {
		l = snprintf(&pktbuf[j], sizeof(pktbuf)-j, " %08x", resp->w[i]);
		if (l < 9)
			break;
		j += l;
	}
	dcc_error_msg("%s;%s", msgbuf, pktbuf);
}



/* Compute the delay before the next retransmission
 *      It always should be long enough for the DCC server to do some disk
 *	operations even if the server and network have usually been faster. */
static int
retrans_time(int rtt, u_int xmit_num)
{
	u_int backoff;

	if (rtt < DCC_MIN_RTT)
		rtt = DCC_MIN_RTT;
	backoff = rtt << xmit_num;	/* exponential backoff */
	backoff += DCC_DCCD_DELAY;	/* varying server & network load */
	if (backoff > DCC_MAX_RTT)
		backoff = DCC_MAX_RTT;
	return backoff;
}



static void
get_start_time(DCC_CLNT_CTXT *ctxt)
{
	gettimeofday(&ctxt->start, 0);
	ctxt->now = ctxt->start;
	ctxt->now_us = 0;
}



static u_char				/* 1=ok, 0=time jumped */
get_now(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
	gettimeofday(&ctxt->now, 0);
	ctxt->now_us = tv_diff2us(&ctxt->now, &ctxt->start);
	if (ctxt->now_us >= 0 && ctxt->now_us < FOREVER_US)
		return 1;

	/* ignore tiny reverse time jumps on some systems such as BSD/OS 4.1 */
	if (ctxt->now_us < 0
	    && ctxt->now_us > -1000) {
		ctxt->now = ctxt->start;
		ctxt->now_us = 0;
		return 1;
	}

	dcc_pemsg(EX_OSERR, emsg,
		  "clock changed an impossible %.6f seconds",
		  (ctxt->now.tv_sec - ctxt->start.tv_sec) * 1.0
		  + ((ctxt->now.tv_usec - ctxt->start.tv_usec)*1.0)/FOREVER_US);
	ctxt->now = ctxt->start;
	ctxt->now_us = 0;
	return 0;
}



static double
get_age(const DCC_CLNT_CTXT *ctxt)
{
	struct timeval now;

	gettimeofday(&now, 0);
	return tv_diff2us(&now, &ctxt->start)/(DCC_US*1.0);
}



#ifdef DCC_DEBUG_CLNT_LOCK
void
assert_info_locked(void)
{
	assert_ctxts_locked();
	if (!info_locked)
		dcc_logbad(EX_SOFTWARE, "don't have info locked");
}



void
assert_info_unlocked(void)
{
	if (info_locked)
		dcc_logbad(EX_SOFTWARE, "have info locked");
}
#endif


/* Unlock the shared memory for other processes.
 *      The contexts must be locked */
u_char					/* 0=failed 1=ok */
dcc_info_unlock(DCC_EMSG emsg)
{
	assert_ctxts_locked();
#ifdef DCC_DEBUG_CLNT_LOCK
	assert_info_locked();
#else
	if (!info_locked)
		return 1;
#endif

	info_locked = 0;
	return dcc_unlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE,
			     "info ", dcc_info_nm);
}



/* Lock the shared memory so we can read and perhaps change it
 *      The contexts must be locked */
u_char					/* 0=failed, 1=ok */
dcc_info_lock(DCC_EMSG emsg)
{
	assert_ctxts_locked();

	if (info_locked) {
#ifdef DCC_DEBUG_CLNT_LOCK
		dcc_logbad(EX_SOFTWARE, "info already locked");
#endif
		return 1;
	}

	if (!dcc_exlock_fd(emsg, info_fd, DCC_LOCK_ALL_FILE, 60,
			   "info ", dcc_info_nm))
		return 0;

	info_locked = 1;
	return 1;
}



static u_char
unmap_info(DCC_EMSG emsg)
{
#ifdef DCC_WIN32
	win32_unmap(&info_map, dcc_clnt_info, dcc_info_nm);
#else
	if (0 > munmap((void *)dcc_clnt_info, sizeof(*dcc_clnt_info))) {
		dcc_pemsg(EX_OSERR, emsg, "munmap(%s): %s",
			  dcc_info_nm, ERROR_STR());
		dcc_clnt_info = 0;
		return 0;
	}
#endif
	dcc_clnt_info = 0;
	return 1;
}



/* Unmap and close the shared info
 *      The contexts must be locked but not the info */
u_char					/* 0=something wrong, 1=all over */
dcc_unmap_close_info(DCC_EMSG emsg)	/* cleared of stale messages */
{
	u_char result = 1;

	assert_ctxts_locked();
	assert_info_unlocked();

	if (!dcc_clnt_info)
		return result;

	if (!unmap_info(emsg))
		result = 0;

	if (0 > close(info_fd)) {
		extra_pemsg(EX_IOERR, emsg, "close(%s): %s",
			    dcc_info_nm, ERROR_STR());
		result = 0;
	}
	info_fd = -1;

	return result;
}



/* discover our host ID if we do not already know it */
static u_char
get_clnt_hid(DCC_EMSG emsg)
{
	struct timeval now;
	int i;
	u_char result;

	if (dcc_clnt_hid != 0)
		return 1;

#ifdef HAVE_GETHOSTID
	dcc_clnt_hid = gethostid();
#endif
	/* add the host name even if we have a hostid in case the hostid
	 * is a commonl used RFC 1918 IP address */
	if (0 > gethostname(dcc_clnt_hostname, sizeof(dcc_clnt_hostname)-1)) {
		dcc_pemsg(EX_NOHOST, emsg, "gethostname(): %s", ERROR_STR());
		/* do the best we can without a hostname */
		gettimeofday(&now, 0);
		dcc_clnt_hid = now.tv_sec + now.tv_usec;
		result = 0;
	} else if (dcc_clnt_hostname[0] == '\0') {
		dcc_pemsg(EX_NOHOST, emsg, "null hostname from gethostname()");
		/* do the best we can without a hostname */
		gettimeofday(&now, 0);
		dcc_clnt_hid = now.tv_sec + now.tv_usec;
		result = 0;
	} else {
		for (i = 0; i < ISZ(dcc_clnt_hostname); ++i) {
			if (!dcc_clnt_hostname[i])
				break;
			dcc_clnt_hid += dcc_clnt_hostname[i]*i;
		}
		result = 1;
	}

	/* this should almost never happen, but it could */
	if (dcc_clnt_hid == 0)
		dcc_clnt_hid = 1;
	return result;
}



/* write a new DCC map file */
u_char
dcc_create_map(DCC_EMSG emsg,
	       const DCC_PATH map_nm0,
	       int *pfd,		/* leave open & unlocked FD here */
	       const DCC_SRVR_NM *dcc_nms, int dcc_nms_len,
	       const DCC_SRVR_NM *grey_nms, int grey_nms_len,
	       const DCC_IP *src,
	       u_char info_flags)	/* DCC_INFO_FG_* */
{
	static int op_nums_r;
	DCC_CLNT_INFO info_clear;
	int fd;
	u_char created;
	DCC_PATH map_nm;
	int i;

	if (pfd && (fd = *pfd) >= 0) {
		created = 0;
	} else {
		if (!fnm2rel(map_nm, map_nm0, 0)) {
			dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
				  map_nm);
			return 0;
		}
		fd = open(map_nm, O_RDWR|O_CREAT|O_EXCL, 0600);
		if (fd < 0) {
			dcc_pemsg(EX_IOERR, emsg, "open(%s): %s",
				  map_nm, ERROR_STR());
			return 0;
		}
		created = 1;
	}

	memset(&info_clear, 0, sizeof(info_clear));
	strcpy(info_clear.version, DCC_MAP_INFO_VERSION);

	if (dcc_nms_len != 0) {
		if (dcc_nms_len > DCC_MAX_SRVR_NMS)
			dcc_nms_len = DCC_MAX_SRVR_NMS;
		memcpy(info_clear.dcc.nms, dcc_nms,
		       sizeof(info_clear.dcc.nms[0])*dcc_nms_len);
	}
	info_clear.dcc.srvr_inx = NO_SRVR;

	if (grey_nms_len != 0) {
		if (grey_nms_len > DCC_MAX_SRVR_NMS)
			grey_nms_len = DCC_MAX_SRVR_NMS;
		memcpy(info_clear.grey.nms, grey_nms,
		       sizeof(info_clear.grey.nms[0])*grey_nms_len);
	}
	info_clear.grey.srvr_inx = NO_SRVR;

	if (src != 0)
		info_clear.src = *src;

	info_clear.flags = info_flags;
	if (!get_clnt_hid(emsg)) {
		close(fd);
		if (pfd)
			*pfd = -1;
		if (created)
			unlink(map_nm);
		return 0;
	}

	/* ensure that we have a new report ID even if we
	 * are repeatedly recreating a temporary map file */
	if (dcc_clnt_info)
		op_nums_r += dcc_clnt_info->proto_hdr.op_nums.r;
	info_clear.proto_hdr.op_nums.r = ++op_nums_r;

	i = write(fd, &info_clear, sizeof(info_clear));
	if (i != ISZ(info_clear)) {
		if (i < 0)
			dcc_pemsg(EX_SOFTWARE, emsg, "write(%s): %s",
				  map_nm, ERROR_STR());
		else
			dcc_pemsg(EX_IOERR, emsg,
				  "write(%s)=%d instead of %d",
				  map_nm, i, ISZ(info_clear));
		close(fd);
		if (pfd)
			*pfd = -1;
		if (created)
			unlink(map_nm);
		return 0;
	}

	if (created) {
		if (pfd)
			*pfd = fd;
		else
			close(fd);
	}
	return 1;
}



#ifdef DCC_MAP_INFO_VERSION_10
/* lock and read the contents of the old info file */
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_start(DCC_EMSG emsg,
		  void *old_info, int old_info_size,
		  const char *old_magic, int old_magic_size,
		  DCC_PATH new_info_nm)
{
	int i;

	assert_ctxts_locked();
	assert_info_unlocked();

	/* only one process or thread can fix the file so wait for
	 * exclusive access to the old file */
	if (!dcc_info_lock(emsg))
		return -1;

	i = read(info_fd, old_info, old_info_size);
	if (i != old_info_size) {
		if (i < 0) {
			dcc_pemsg(EX_IOERR, emsg, "read(%s): %s",
				  dcc_info_nm, ERROR_STR());
		} else {
			dcc_pemsg(EX_IOERR, emsg, "read(%s)=%d instead of %d",
				  dcc_info_nm, i, old_info_size);
		}
		dcc_info_unlock(0);
		return -1;
	}

	if (-1 == lseek(info_fd, SEEK_SET, 0)) {
		dcc_pemsg(EX_IOERR, emsg, "lseek(%s): %s",
			  dcc_info_nm, ERROR_STR());
		dcc_info_unlock(0);
		return -1;
	}

	if (strncmp(old_info, old_magic, old_magic_size)) {
		if (!dcc_info_unlock(emsg))
			return -1;
		return 0;
	}

	if (!fnm2rel(new_info_nm, dcc_info_nm, "-new")) {
		dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
			  dcc_info_nm);
		dcc_info_unlock(0);
		return -1;
	}
	unlink(new_info_nm);
	return 1;
}



/* the old file must be locked */
static int				/* -1=error, 1=done */
map_convert_fin(DCC_EMSG emsg,
		const DCC_PATH new_info_nm,
		const struct stat *old_sb,
		const DCC_SRVR_NM *dcc_nms, int dcc_nms_len,
		const DCC_SRVR_NM *grey_nms, int grey_nms_len,
		const DCC_IP *src,
		u_char info_flags)	/* DCC_INFO_FG_* */
{
	int new_fd;
#ifdef DCC_WIN32
	DCC_PATH old_info_nm;
#endif

	new_fd = -1;
	if (!dcc_create_map(emsg, new_info_nm, &new_fd,
			    dcc_nms, dcc_nms_len, grey_nms, grey_nms_len,
			    src, info_flags)) {
		dcc_info_unlock(0);
		return -1;
	}

#ifdef DCC_WIN32
	/* there are at least two races here,
	 * but Windows does not allow renaming or unlinking (e.g. by
	 * rename()) open files */
	if (!fnm2rel(old_info_nm, dcc_info_nm, "-old")) {
		dcc_pemsg(EX_IOERR, emsg, "long map name \"%s\"",
			  dcc_info_nm);
		return -1;
	}
	unlink(old_info_nm);

	if (!dcc_info_unlock(emsg)) {
		close(new_fd);
		unlink(new_info_nm);
		return -1;
	}
	if (0 > close(info_fd)) {
		dcc_pemsg(EX_IOERR, emsg, "close(%s): %s",
			  dcc_info_nm, ERROR_STR());
		close(new_fd);
		unlink(new_info_nm);
		return -1;
	}
	info_fd = -1;

	if (0 > rename(dcc_info_nm, old_info_nm)) {
		dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
			  dcc_info_nm, old_info_nm, ERROR_STR());
		close(new_fd);
		unlink(new_info_nm);
		return -1;
	}

	close(new_fd);
	if (0 > rename(new_info_nm, dcc_info_nm)) {
		dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
			  new_info_nm, dcc_info_nm, ERROR_STR());
		unlink(new_info_nm);
		return -1;
	}
	return 1;
#else /* !DCC_WIN32 */
	/* if we are running as root,
	 * don't change the owner of the file */
	if (getuid() == 0
	    && 0 > fchown(new_fd, old_sb->st_uid, old_sb->st_gid)) {
		dcc_pemsg(EX_IOERR, emsg, "chown(%s,%d,%d): %s",
			  new_info_nm, (int)old_sb->st_uid, (int)old_sb->st_gid,
			  ERROR_STR());
		unlink(new_info_nm);
		close(new_fd);
		return -1;
	}

	if (0 > rename(new_info_nm, dcc_info_nm)) {
		dcc_pemsg(EX_IOERR, emsg, "rename(%s, %s): %s",
			  new_info_nm, dcc_info_nm, ERROR_STR());
		unlink(new_info_nm);
		close(new_fd);
		return -1;
	}

	if (!dcc_info_unlock(emsg)) {
		close(new_fd);
		unlink(new_info_nm);
		return -1;
	}

	close(new_fd);
	return 1;
#endif /* DCC_WIN32 */
}


#endif /* DCC_MAP_INFO_VERSION_10 */
#ifdef DCC_MAP_INFO_VERSION_10
static void
map_convert_v10_nms(DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS],
		    const DCC_V10_SRVR_NM old_nms[DCC_MAX_SRVR_NMS])
{
	int i;

	memset(new_nms, 0, sizeof(DCC_SRVR_NM)*DCC_MAX_SRVR_NMS);
	for (i = 0; i < DCC_MAX_SRVR_NMS; ++i) {
		new_nms[i].clnt_id = old_nms[i].clnt_id;
		new_nms[i].port = old_nms[i].port;
		strcpy(new_nms[i].hostname, old_nms[i].hostname);
		memcpy(new_nms[i].passwd, old_nms[i].passwd,
		       sizeof(new_nms[i].passwd));
		new_nms[i].rtt_adj = old_nms[i].rtt_adj;
	}
}
#endif /* DCC_MAP_INFO_VERSION_10 */


#ifdef DCC_MAP_INFO_VERSION_5
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v5(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_V5_CLNT_INFO old_info;
	int i;

	if ((int)old_sb->st_size < ISZ(DCC_V5_CLNT_INFO))
		return 0;

	i = map_convert_start(emsg, &old_info, sizeof(DCC_V5_CLNT_INFO),
			      DCC_MAP_INFO_VERSION_5, sizeof(old_info.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	memset(&new_nms, 0, sizeof(new_nms));
	for (i = 0; i < DIM(new_nms); ++i) {
		new_nms[i].clnt_id = old_info.nms[i].clnt_id;
		new_nms[i].port = old_info.nms[i].port;
		strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
		memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
		       sizeof(new_nms[i].passwd));
		new_nms[i].rtt_adj = old_info.nms[i].rtt_adj*10*1000;
	}

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       new_nms, DIM(new_nms), 0, 0,
			       0, old_info.flags);
}
#endif /* DCC_MAP_INFO_VERSION_5 */



#ifdef DCC_MAP_INFO_VERSION_6
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v6(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_V6_CLNT_INFO old_info;
	int i;

	if ((int)old_sb->st_size < ISZ(DCC_V6_CLNT_INFO))
		return 0;

	i = map_convert_start(emsg, &old_info, sizeof(DCC_V6_CLNT_INFO),
			      DCC_MAP_INFO_VERSION_6, sizeof(old_info.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	memset(&new_nms, 0, sizeof(new_nms));
	for (i = 0; i < DIM(new_nms); ++i) {
		new_nms[i].clnt_id = old_info.nms[i].clnt_id;
		new_nms[i].port = old_info.nms[i].port;
		strcpy(new_nms[i].hostname, old_info.nms[i].hostname);
		memcpy(new_nms[i].passwd, old_info.nms[i].passwd,
		       sizeof(new_nms[i].passwd));
		new_nms[i].rtt_adj = old_info.nms[i].rtt_adj;
	}

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       new_nms, DIM(new_nms), 0, 0,
			       0, old_info.flags);
}
#endif /* DCC_MAP_INFO_VERSION_6 */



#ifdef DCC_MAP_INFO_VERSION_7
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v7(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	union {
	    DCC_V7_IPV6_CLNT_INFO   ipv6;
	    DCC_V7_NOIPV6_CLNT_INFO noipv6;
	} old;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS];
	int flags, i;

	if (old_sb->st_size == sizeof(old.ipv6)) {
		i = map_convert_start(emsg, &old.ipv6, sizeof(old.ipv6),
				      DCC_MAP_INFO_VERSION_7,
				      sizeof(old.ipv6.version),
				      new_info_nm);
		if (i <= 0)
			return i;

		map_convert_v10_nms(new_nms, old.ipv6.dcc.nms);
		map_convert_v10_nms(grey_nms, old.ipv6.grey.nms);
		flags = old.ipv6.flags;

	} else if (old_sb->st_size == sizeof(old.noipv6)) {
		i = map_convert_start(emsg, &old.noipv6, sizeof(old.noipv6),
				      DCC_MAP_INFO_VERSION_7,
				      sizeof(old.noipv6.version),
				      new_info_nm);
		if (i <= 0)
			return i;

		map_convert_v10_nms(new_nms, old.noipv6.dcc.nms);
		map_convert_v10_nms(grey_nms, old.noipv6.grey.nms);
		flags = old.noipv6.flags;

	} else {
		return 0;
	}

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       new_nms, DIM(new_nms), grey_nms, DIM(grey_nms),
			       0, flags);
}



#endif /* DCC_MAP_INFO_VERSION_7 */
#ifdef DCC_MAP_INFO_VERSION_8
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v8(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_V8_CLNT_INFO old;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS];
	int i;

	if ((int)old_sb->st_size != ISZ(old))
		return 0;

	i = map_convert_start(emsg, &old, sizeof(old),
			      DCC_MAP_INFO_VERSION_8, sizeof(old.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	map_convert_v10_nms(new_nms, old.dcc.nms);
	map_convert_v10_nms(grey_nms, old.grey.nms);

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       new_nms, DIM(new_nms), grey_nms, DIM(grey_nms),
			       0, old.flags);
}



#endif /* DCC_MAP_INFO_VERSION_8 */
#ifdef DCC_MAP_INFO_VERSION_9
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v9(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_V9_CLNT_INFO old;
	DCC_SRVR_NM nms[DCC_MAX_SRVR_NMS];
	DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS];
	int i;

	if ((int)old_sb->st_size != ISZ(old))
		return 0;

	i = map_convert_start(emsg, &old, sizeof(old),
			      DCC_MAP_INFO_VERSION_9, sizeof(old.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	map_convert_v10_nms(nms, old.dcc.nms);
	map_convert_v10_nms(grey_nms, old.grey.nms);

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       nms, DIM(nms), grey_nms, DIM(grey_nms),
			       &old.src, old.flags);
}



#endif /* DCC_MAP_INFO_VERSION_9 */
#ifdef DCC_MAP_INFO_VERSION_10
/* Convert an old map file.
 *      The contexts must be locked on entry.
 *      The old file may be locked on exit */
static int				/* -1=error, 0=wrong version, 1=done */
map_convert_v10(DCC_EMSG emsg, const struct stat *old_sb)
{
	DCC_PATH new_info_nm;
	DCC_V10_CLNT_INFO old;
	DCC_SRVR_NM new_nms[DCC_MAX_SRVR_NMS];
	DCC_SRVR_NM grey_nms[DCC_MAX_SRVR_NMS];
	int i;

	if ((int)old_sb->st_size != ISZ(old))
		return 0;

	i = map_convert_start(emsg, &old, sizeof(old),
			      DCC_MAP_INFO_VERSION_10, sizeof(old.version),
			      new_info_nm);
	if (i <= 0)
		return i;

	map_convert_v10_nms(new_nms, old.dcc.nms);
	map_convert_v10_nms(grey_nms, old.grey.nms);

	return map_convert_fin(emsg, new_info_nm, old_sb,
			       new_nms, DIM(new_nms), grey_nms, DIM(grey_nms),
			       &old.src, old.flags);
}



#endif /* DCC_MAP_INFO_VERSION_10 */
/* convert from a previous version
 *	The contexts must be locked.  The old file must be open and unlocked */
static u_char
map_convert(DCC_EMSG emsg,
	    const struct stat *old_sb)
{
	int i;

	assert_ctxts_locked();
	assert_info_unlocked();

#ifdef DCC_MAP_INFO_VERSION_6
	i = map_convert_v5(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;

	i = map_convert_v6(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;
#endif /* DCC_MAP_INFO_VERSION_6 */
#ifdef DCC_MAP_INFO_VERSION_7
	i = map_convert_v7(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;
#endif /* DCC_MAP_INFO_VERSION_7 */
#ifdef DCC_MAP_INFO_VERSION_8
	i = map_convert_v8(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;
#endif /* DCC_MAP_INFO_VERSION_8 */
#ifdef DCC_MAP_INFO_VERSION_9
	i = map_convert_v9(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;
#endif /* DCC_MAP_INFO_VERSION_9 */
#ifdef DCC_MAP_INFO_VERSION_10
	i = map_convert_v10(emsg, old_sb);
	if (i < 0) {
		dcc_unmap_close_info(0);
		return 0;
	}
	/* unlock old file and open & lock new file */
	if (i > 0)
		return 1;
#endif /* DCC_MAP_INFO_VERSION_10 */
	dcc_pemsg(EX_DATAERR, emsg, "%s is not a DCC map file",
		  dcc_info_nm);
	close(info_fd);
	info_fd = -1;
	return 0;
}



/* Ensure the shared information is available, but do not lock it.
 *      The contexts must be locked
 *      SUID privileges are often released */
u_char
dcc_map_info(DCC_EMSG emsg,		/* cleared of stale messages */
	     const char *new_info_nm, int new_info_fd)
{
	struct stat sb;
#ifndef DCC_WIN32
	void *p;
#endif

	for (;;) {
		assert_ctxts_locked();
		assert_info_unlocked();

		/* work only if needed,
		 * but always check for a version changed */
		if (!(new_info_nm && strcmp(new_info_nm, dcc_info_nm))
		    && new_info_fd < 0
		    && dcc_clnt_info)
			return 1;

		if (!dcc_unmap_close_info(emsg)) {
			if (new_info_fd >= 0)
				close(new_info_fd);
			return 0;
		}

		if (new_info_nm) {
			if (!fnm2rel(dcc_info_nm, new_info_nm, 0)) {
				dcc_pemsg(EX_IOERR, emsg, "bad map name \"%s\"",
					  new_info_nm);
				return 0;
			}
			/* don't change name if we convert the file
			 * and so come back here */
			new_info_nm = 0;
		}
		if (dcc_info_nm[0] == '\0') {
			dcc_pemsg(EX_USAGE, emsg, "missing map file name");
			return 0;
		}

		if (new_info_fd >= 0) {
			info_fd = new_info_fd;
			new_info_fd = -1;
		} else {
			info_fd = open(dcc_info_nm, O_RDWR, 0600);
#ifndef DCC_WIN32
			if (info_fd < 0
			    && dcc_get_priv_home(dcc_info_nm)) {
				info_fd = open(dcc_info_nm, O_RDWR, 0600);
				dcc_rel_priv();
			}
#endif
			if (info_fd < 0) {
				dcc_pemsg(EX_NOINPUT, emsg, "open(%s): %s",
					  dcc_info_nm, ERROR_STR());
				return 0;
			}
		}

		/* refuse to use the file if it is not private */
		if (!dcc_ck_private(emsg, &sb, dcc_info_nm, info_fd)) {
			dcc_unmap_close_info(0);
			return 0;
		}

		if ((int)sb.st_size != ISZ(*dcc_clnt_info)) {
			if (map_convert(emsg, &sb))
				continue;
			return 0;
		}

#ifdef DCC_WIN32
		dcc_clnt_info = win32_map(emsg, &info_map, dcc_info_nm,
					  info_fd, sizeof(*dcc_clnt_info));
		if (!dcc_clnt_info) {
			close(info_fd);
			info_fd = -1;
			return 0;
		}
#else

		/* don't give it to children */
		if (0 > fcntl(info_fd, F_SETFD, FD_CLOEXEC)) {
			dcc_pemsg(EX_IOERR, emsg,
				  "fcntl(F_SETFD FD_CLOEXEC %s): %s",
				  dcc_info_nm, ERROR_STR());
			close(info_fd);
			info_fd = -1;
			return 0;
		}

		p = mmap(0, sizeof(*dcc_clnt_info),
			 PROT_READ|PROT_WRITE, MAP_SHARED, info_fd, 0);
		if (p == MAP_FAILED) {
			dcc_pemsg(EX_IOERR, emsg, "mmap(%s): %s",
				  dcc_info_nm, ERROR_STR());
			close(info_fd);
			info_fd = -1;
			return 0;
		}
		dcc_clnt_info = p;
#endif /* DCC_WIN32 */

		if (!strncmp(dcc_clnt_info->version, DCC_MAP_INFO_VERSION,
			     sizeof(dcc_clnt_info->version))) {
			/* The file is the right version.  Set our ID in case
			 * it has been copied from another system */
			if (!get_clnt_hid(emsg))
				return 0;
			dcc_clnt_info->proto_hdr.op_nums.h = dcc_clnt_hid;

			return 1;
		}

		unmap_info(0);
		if (!map_convert(emsg, &sb))
			return 0;
	}
}



/* SUID privileges are often released */
u_char					/* 0=something wrong, 1=mapped */
dcc_map_lock_info(DCC_EMSG emsg,	/* cleared of stale messages */
		  const char *new_info_nm,
		  int new_info_fd)
{
	if (!dcc_map_info(emsg, new_info_nm, new_info_fd))
		return 0;

	if (!dcc_info_lock(emsg))
		return 0;

	return 1;
}



/* All servers are broken, so make a note to not try for a while.
 *      The contexts and the mapped information must be locked */
static void
fail_more(const DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
	assert_info_locked();

	/* do not inflate the delay if we are already delaying */
	if (class->fail_exp != 0
	    && class->fail_time >= ctxt->now.tv_sec)
		return;

	/* reset the backoff after a long quiet time */
	if (ctxt->now.tv_sec >= (class->fail_time
				 + (DCC_INIT_FAIL_SECS << class->fail_exp)))
		class->fail_exp = 0;

	if (++class->fail_exp > DCC_MAX_FAIL_EXP)
		class->fail_exp = DCC_MAX_FAIL_EXP;
	class->fail_time = (ctxt->now.tv_sec
			    + (DCC_INIT_FAIL_SECS << class->fail_exp));
}



static u_char				/* 0=failing */
ck_fail_time(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class)
{
	int dt;

	assert_info_locked();

	if (class->fail_exp == 0)
		return 1;

	dt = class->fail_time - ctxt->now.tv_sec;
	if (dt > 0 && dt <= DCC_MAX_FAIL_SECS) {
		dcc_pemsg(EX_IOERR, emsg,
			  "continue not asking %s %d seconds after failure",
			  DCC_IS_GREY_STR(class), dt);
		return 0;
	}

	return 1;
}



void
dcc_force_measure_rtt(DCC_SRVR_CLASS *class)
{
	assert_info_locked();

	class->fail_exp = 0;		/* stop giving up */

	class->resolve = 0;		/* force name resolution */

	class->srvr_inx = NO_SRVR;

	class->measure = 0;		/* force RTT measurement */
}



/* pick the best server
 *      The client information and the contexts must be exclusively locked.
 *      Assume there is at least one hostname. */
static u_char				/* 0=have none, 1=found one */
pick_srvr(DCC_EMSG emsg, DCC_SRVR_CLASS *class)
{
	const DCC_SRVR_ADDR *ap, *min_ap;
	int rtt;
	int min_rtt;			/* smallest RTT	*/
	int min2_rtt;			/* second smallest RTT */
	SRVR_INX old_srvr_inx;

	assert_info_locked();

	if (class->num_srvrs == 0) {
		class->srvr_inx = NO_SRVR;
		extra_pemsg(EX_USAGE, emsg, "no valid %s server IP addresses",
			    DCC_IS_GREY_STR(class));
		return 0;
	}

	old_srvr_inx = class->srvr_inx;
	min2_rtt = min_rtt = DCC_RTT_BAD;
	min_ap = 0;
	ap = &class->addrs[class->num_srvrs];
	while (ap > class->addrs) {
		--ap;
		rtt = effective_rtt(class, ap);
		if (rtt == DCC_RTT_BAD)
			continue;

		if (min_rtt > rtt) {
			if (min2_rtt > min_rtt)
				min2_rtt = min_rtt;
			min_rtt = rtt;
			min_ap = ap;
		} else if (min2_rtt > rtt) {
			min2_rtt = rtt;
		}
	}

	/* we found a usable server */
	if (min_ap) {
		/* Compute the basic RTT to the server including a variance */
		class->base_rtt = min_rtt + DCC_DCCD_DELAY;
		if (class->base_rtt > DCC_MAX_RTT)
			class->base_rtt = DCC_MAX_RTT;
		/* Decide how bad the server must get before we check for
		 *	an alternative.
		 * If there is no good second choice, there is no point in a
		 *	threshold for switching to it */
		class->thold_rtt = min2_rtt + DCC_DCCD_DELAY;
		if (class->thold_rtt >= DCC_MAX_RTT)
			class->thold_rtt = DCC_RTT_BAD;

		class->srvr_inx = (min_ap - class->addrs);
		if (class->srvr_inx != old_srvr_inx) {
			if (dcc_clnt_debug > 1 &&
			    GOOD_SRVR(class, old_srvr_inx)) {
				trace_perf("replacing",
					   &class->addrs[old_srvr_inx]);
				trace_perf("pick", min_ap);
			}
		}
		return 1;
	}

	/* we failed to find a working server */
	class->srvr_inx = NO_SRVR;

	flush_emsg(emsg, dcc_clnt_debug);
	if (!emsg || *emsg == '\0') {
		char astr[(DCC_SU2STR_SIZE+1+5+1)*3];
		const char *s, *h0;
		int l;

		l = dcc_ap2str_opt(astr, sizeof(astr),
				   class, 0, '\0');
		if (!strcmp(class->nms[0].hostname, astr)) {
			h0 = "";
			s = "";
		} else {
			h0 = class->nms[0].hostname;
			s = class->nms[1].hostname[0] ? "s" : " ";
		}

		if (class->num_srvrs > 1 && l < ISZ(astr)-2) {
			astr[l++] = ' ';
			l += dcc_ap2str_opt(&astr[l], sizeof(astr)-l,
					    class, 1, '\0');
		}
		if (class->num_srvrs > 2 && l < ISZ(astr)-2) {
			astr[l++] = ' ';
			dcc_ap2str_opt(&astr[l], sizeof(astr)-l,
					    class, 1, '\0');
		}
		dcc_pemsg(EX_IOERR, emsg,
			  "no working %s server%s%s%s%s%s%s%s"
			  " at %s%s",
			  DCC_IS_GREY_STR(class),
			  s, h0,
			  class->nms[1].hostname[0] ? " " : "",
			  class->nms[1].hostname,
			  class->nms[2].hostname[0] ? " " : "",
			  class->nms[2].hostname,
			  class->nms[3].hostname[0] ? " ..." : "",

			  astr,
			  class->num_srvrs > 3 ? " ..." : "");
	}
	return 0;
}



/* count IP addresses per host name and per second level domain name */
typedef struct name_addrs {
    const char *sld;			/* domain name */
    u_char     sld_addrs;		/* # of addresses for domain name */
    u_char     host_addrs;		/* # of addresses for a host name */
    u_char     sld_addrs_inx;
} NAME_ADDRS[DCC_MAX_SRVR_NMS];


/* delete an address from a growing list of addresses */
static void
del_new_addr(DCC_SRVR_CLASS *class,
	     NAME_ADDRS name_addrs,	/* addresses per server name */
	     int tgt)			/* delete this address */
{
	NAM_INX nam_inx;
	int i;

	/* adjust that host's and domain's numbers of addresses and our
	 * total number of addresses */
	nam_inx = class->addrs[tgt].nam_inx;
	--name_addrs[nam_inx].host_addrs;
	--name_addrs[name_addrs[nam_inx].sld_addrs_inx].sld_addrs;
	--class->num_srvrs;

	/* slide the array of addresses to get rid of the discarded address */
	i = class->num_srvrs - tgt;
	if (i > 0)
		memmove(&class->addrs[tgt], &class->addrs[tgt+1],
			i * sizeof(class->addrs[0]));
	memset(&class->addrs[class->num_srvrs], 0, sizeof(class->addrs[0]));
}



/* impose arbitrary local order on IP addresses */
#define DCC_SRVRS_MOD	    16381

static inline u_int
su_srvrs_mod(const DCC_SOCKU *sup,
	     DCC_SOCKU *sup2)
{
	u_int su_res;

	if (dcc_ipv6sutoipv4(sup2, sup)) {
		su_res = sup2->ipv4.sin_addr.s_addr % DCC_SRVRS_MOD;
		su_res *= dcc_clnt_info->residue;
		su_res %= DCC_SRVRS_MOD;
		su_res += DCC_SRVRS_MOD;    /* distinguish IPv4 from IPv6 */
	} else {
		*sup2 = *sup;
		su_res = (sup->ipv6.sin6_addr.s6_addr32[0] % DCC_SRVRS_MOD
			  + sup->ipv6.sin6_addr.s6_addr32[1] % DCC_SRVRS_MOD
			  + sup->ipv6.sin6_addr.s6_addr32[2] % DCC_SRVRS_MOD
			  + sup->ipv6.sin6_addr.s6_addr32[3] % DCC_SRVRS_MOD);
		su_res *= dcc_clnt_info->residue;
		su_res %= DCC_SRVRS_MOD;
	}
	return su_res;
}



/* partially order a pair of IP addresses with a reasonably unique ordering */
static int
sucmp(const DCC_SOCKU *sup1, const DCC_SOCKU *sup2)
{
	DCC_SOCKU su1, su2;
	u_int su1_res, su2_res;
	int i;

	su1_res = su_srvrs_mod(sup1, &su1);
	su2_res = su_srvrs_mod(sup2, &su2);

	i = (int)su1_res - (int)su2_res;
	if (i)
		return i;
	return memcmp(&su1, &su2, sizeof(DCC_SOCKU));
}



/* Deal with a list of IP addresses or aliases for one DCC server hostname.
 *	the contexts and the mmap()'ed info must be locked */
static void
copy_addrs(DCC_SRVR_CLASS *class,
	   const DCC_SRVR_NM *nmp,	/* server name being resolved */
	   const int nam_inx,
	   NAME_ADDRS name_addrs)	/* addresses per server name */
{
	DCC_SRVR_ADDR *ap;
	const DCC_SRVR_NM *nmp2;
	DCC_SOCKU *np, su, nxt, prev;
	u_int16_t port;
	int i, j, k;

	/* Keep as many IP addresses as we have room, but for as many
	 * named servers as possible
	 * Sort the addresses to keep our list stable when we re-check.
	 * Otherwise, we would start from scratch when nothing changes
	 * but the order of responses from a DNS server.
	 * Sort by residue class to pick a random subset when there
	 * are too many servers to fit in our list. */

	port = nmp->port;

	nxt.sa.sa_family = AF_UNSPEC;
	for (;;) {
		/* Pick the next address in the newly resolved list
		 * to consider.  We want the smallest address larger
		 * than the previous address we considered.
		 * "Smallest" is defined using the local random ordering
		 * of addresses. */
		prev = nxt;
		nxt.sa.sa_family = AF_UNSPEC;
		for (np = dcc_hostaddrs; np < dcc_hostaddrs_end; ++np) {
			if (np->sa.sa_family == AF_UNSPEC)
				continue;
			su = *np;
			*DCC_SU_PORTP(&su) = port;
			if ((prev.sa.sa_family == AF_UNSPEC
			     || sucmp(&su, &prev) > 0)
			    && (nxt.sa.sa_family == AF_UNSPEC
				|| sucmp(&nxt, &su) > 0))
				nxt = su;
		}
		/* quit if we've considered them all */
		if (nxt.sa.sa_family == AF_UNSPEC)
			break;

		/* ignore duplicate IP addresses even for other hostnames,
		 * unless the port numbers differ */
		ap = &class->addrs[class->num_srvrs];
		while (--ap >= class->addrs) {
			if (!dcc_cmp_ap2su(ap, &nxt)) {
				/* they are the same, so keep the one with
				 * the non-anonymous ID
				 * or smallest RTT adjustment */
				nmp2 = &class->nms[ap->nam_inx];
				i = (nmp->clnt_id == DCC_ID_ANON);
				j = (nmp2->clnt_id == DCC_ID_ANON);
				if (i != j) {
					/* one is anonymous & other is not */
					if (i)
					    goto next_addr;
				} else {
					/* pick smallest RTT adjustment */
					if (nmp->rtt_adj >= nmp2->rtt_adj)
					    goto next_addr;
				}
				/* delete the previous instance */
				del_new_addr(class, name_addrs,
					     ap - class->addrs);
				break;
			}
		}

		/* If we already have as many addresses as we will use,
		 * then pick one to discard. Discard the last address of
		 * the host in the second level domain with the most
		 * addresses but without eliminating all addresses for any
		 * host name.  Look for the domain with the most IP addresses
		 * and that has at least one host with at least two
		 * addersses. */
		if (class->num_srvrs == DCC_MAX_SRVR_ADDRS) {
			int host_max, sld_max;
			NAM_INX nam1_inx, sld1_inx, sld2_inx;

			host_max = -1;
			sld_max = -1;
			nam1_inx = NO_NAM;
			sld1_inx = NO_NAM;
			for (i = 0; i <= nam_inx; i++) {
				/* ignore hosts with only 1 IP address */
				j = name_addrs[i].host_addrs;
				if (j <= 1)
					continue;
				sld2_inx = name_addrs[i].sld_addrs_inx;
				k = name_addrs[sld2_inx].sld_addrs;
				if (sld_max <= k) {
					if (sld1_inx != sld2_inx) {
					    sld_max = k;
					    sld1_inx = sld2_inx;
					    host_max = j;
					    nam1_inx = i;
					} else if (host_max <= j) {
					    host_max = j;
					    nam1_inx = i;
					}
				}
			}
			/* no additional IP addresses for the target host if
			 * it has the most IP addresses */
			if (nam1_inx == nam_inx)
				return;

			/* find the last address of the host with the most */
			for (i = 0, j = 0; i < class->num_srvrs; i++) {
				if (class->addrs[i].nam_inx == nam1_inx)
					j = i;
			}
			/* and delete it */
			del_new_addr(class, name_addrs, j);
		}

		/* install the new address in the growing list */
		ap = &class->addrs[class->num_srvrs];
		ap->rtt = DCC_RTT_BAD;
		if (nxt.sa.sa_family == AF_INET && DCC_INFO_IPV6())
			dcc_ipv4sutoipv6(&nxt, &nxt);
		else if (nxt.sa.sa_family == AF_INET6 && !DCC_INFO_IPV6())
			dcc_ipv6sutoipv4(&nxt, &nxt);
		dcc_su2ip(&ap->ip, &nxt);

		/* If this is a previously known address,
		 * preserve what we already knew about it
		 * Check the address family separately because dcc_cmp_ap2su()
		 * does not and DCC_INFO_IPV6() might have changed. */
		for (i = 0; i < class->num_srvrs; ++i) {
			if (class->addrs[i].ip.family == nxt.sa.sa_family
			    && !dcc_cmp_ap2su(&class->addrs[i], &nxt)) {
				*ap = class->addrs[i];
				break;
			}
		}
		ap->nam_inx = nam_inx;
		++class->num_srvrs;

		++name_addrs[nam_inx].host_addrs;
		++name_addrs[name_addrs[nam_inx].sld_addrs_inx].sld_addrs;
next_addr:;
	}
}



/* resolve one server name into a scratch array of addresses */
static void
resolve_nm(DCC_EMSG emsg,
	   DCC_SRVR_CLASS *class,
	   int nm_inx,			/* name being resolved */
	   NAME_ADDRS name_addrs)	/* addresses per server name */
{
	DCC_SRVR_NM *nmp;
	const char *domain, *p1, *p2;
	int error;
	u_char result;
	int i;

	nmp = &class->nms[nm_inx];
	nmp->defined = 0;
	if (nmp->hostname[0] == '\0')
		return;

	if (nmp->rtt_adj > DCC_RTT_ADJ_MAX)
		nmp->rtt_adj = DCC_RTT_ADJ_MAX;
	else if (nmp->rtt_adj < -DCC_RTT_ADJ_MAX)
		nmp->rtt_adj = -DCC_RTT_ADJ_MAX;

	/* find the total number of addresses for this domain name */
	domain = nmp->hostname;
	p1 = strchr(domain, '.');
	if (p1) {
		for (;;) {
			p2 = strchr(++p1, '.');
			if (!p2)
				break;
			domain = p1;
			p1 = p2;
		}
	}
	name_addrs[nm_inx].sld = domain;
	for (i = 0; i < nm_inx; ++i) {
		if (name_addrs[i].sld != 0
		    && !strcmp(domain, name_addrs[i].sld))
			break;
	}
	name_addrs[nm_inx].sld_addrs_inx = i;

	dcc_host_lock();
	if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
		result = dcc_get_host_SOCKS(nmp->hostname,
					    DCC_INFO_IPV6() ? 2 : 0, &error);
	else
		result = dcc_get_host(nmp->hostname,
				      DCC_INFO_IPV6() ? 2 : 0, &error);
	if (!result) {
		dcc_pemsg(EX_NOHOST, emsg, "%s: %s",
			  nmp->hostname, DCC_HSTRERROR(error));
		dcc_host_unlock();
		return;
	}
	nmp->defined = 1;
	copy_addrs(class, nmp, nm_inx, name_addrs);
	dcc_host_unlock();
}



/* resolve server hostnames again
 *      both locks must be held on entry
 *      both will be released while working
 *      on success, both are held
 *      on failure only the contexts are locked
 */
static u_char				/* 0=no good addresses, 1=at least 1 */
resolve_nms(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *cur)
{
	DCC_SRVR_CLASS new;
	int nm_inx, a_inx;
	NAME_ADDRS name_addrs;
	DCC_SRVR_ADDR *new_ap, *cur_ap;

	assert_info_locked();

	if (dcc_clnt_debug > 1)
		dcc_trace_msg("resolve %s server host names",
			      DCC_IS_GREY_STR(cur));

	/* try not to resolve names too often
	 * and discourage other processes and threads from resolving
	 * or measuring RTTs until we finish */
	cur->resolve = ctxt->now.tv_sec+DCC_MAP_RESOLVE;
	cur->measure = ctxt->now.tv_sec+FAST_RTT_SECS;

	if (cur->nms[0].hostname[0] == '\0') {
		if (HAVE_SRVR(cur)) {
			++cur->gen;
			cur->avg_thold_rtt = DCC_RTT_BAD;
			cur->srvr_inx = NO_SRVR;
		}
		cur->num_srvrs = 0;
		memset(cur->addrs, 0, sizeof(cur->addrs));
		extra_pemsg(EX_USAGE, emsg, "no %s server hostnames",
			    DCC_IS_GREY_STR(cur));
		dcc_info_unlock(0);
		return 0;
	}

	new = *cur;
	memset(new.addrs, 0, sizeof(new.addrs));
	new.num_srvrs = 0;
	memset(&name_addrs, 0, sizeof(name_addrs));

	if (dcc_clnt_info->residue == 0) {
		dcc_clnt_info->residue = dcc_clnt_hid % DCC_SRVRS_MOD;
		if (dcc_clnt_info->residue == 0)
			dcc_clnt_info->residue = 1;
	}

	/* unlock everything while we wait for DNS */
	if (!dcc_info_unlock(emsg)) {
		cur->resolve = 0;
		return 0;
	}
	dcc_ctxts_unlock();
	if (emsg)
		*emsg = '\0';
	for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx)
		resolve_nm(emsg, &new, nm_inx, name_addrs);
	dcc_ctxts_lock();
	if (!dcc_info_lock(emsg)) {
		cur->resolve = 0;
		return 0;
	}

	/* measure all RTTs at least as often as we resolve names */
	cur->measure = 0;

	/* if we fail to resolve even one server host names,
	 * complain but try to continue using the old IP addresses */
	a_inx = new.num_srvrs;
	if (a_inx == 0) {
		extra_pemsg(EX_USAGE, emsg, "no valid %s server hostnames",
			    DCC_IS_GREY_STR(cur));
		dcc_info_unlock(0);
		return 0;
	}

	/* see if anything changed */
	for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx) {
		if (cur->nms[nm_inx].defined != new.nms[nm_inx].defined)
			break;
	}
	if (nm_inx >= DIM(cur->nms)
	    && a_inx == cur->num_srvrs) {
		/* we have the same number of old and new names and addresses,
		 * so compare the old and new addresses */
		new_ap = new.addrs;
		cur_ap = cur->addrs;
		for (;;) {
			if (new_ap->nam_inx != cur_ap->nam_inx
			    || memcmp(&new_ap->ip, &cur_ap->ip,
				      sizeof(new_ap->ip))) {
				break;
			}
			++new_ap;
			++cur_ap;
			if (!--a_inx)
				return 1;   /* nothing changed */
		}
	}

	/* Something changed, so we must compute RTTs */
	++cur->gen;
	cur->srvr_inx = NO_SRVR;
	cur->avg_thold_rtt = -DCC_RTT_BAD;
	memcpy(&cur->addrs, &new.addrs, sizeof(cur->addrs));
	cur->num_srvrs = new.num_srvrs;
	for (nm_inx = 0; nm_inx < DIM(cur->nms); ++nm_inx)
		cur->nms[nm_inx].defined = new.nms[nm_inx].defined;

	return 1;
}



void
dcc_clnt_soc_close(DCC_CLNT_CTXT *ctxt)
{
	if (ctxt->soc == INVALID_SOCKET)
		return;
	if (SOCKET_ERROR == closesocket(ctxt->soc)
	    && dcc_clnt_debug)
		dcc_trace_msg("closesocket(ctxt): %s", ERROR_STR());
	ctxt->soc = INVALID_SOCKET;
	ctxt->conn_su.sa.sa_family = AF_UNSPEC;
}



/* disconnect (or close) and (re)open the client
 *	The contexts and shared information must be locked on entry
 *	and both are locked on exit */
u_char					/* 0=failed to open the socket */
dcc_clnt_soc_reopen(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt)
{
	DCC_SOCKU su;
	DCC_SOCKLEN_T soc_len;
	int retries;

	assert_info_locked();

	if (ctxt->soc != INVALID_SOCKET)
		dcc_clnt_soc_close(ctxt);

	/* try to bind to the specified local interface address
	 * if it has changed
	 * or if it has been some time since we last tried and failed. */
	if (dcc_clnt_info->src.family == AF_UNSPEC) {
		ctxt->flags &= ~DCC_CTXT_SRCBAD;
	} else if (!(ctxt->flags & DCC_CTXT_SRCBAD)
		   || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time,
				  DCC_CTXT_REBIND_SECS)) {
		dcc_mk_su(&su, dcc_clnt_info->src.family,
			  &dcc_clnt_info->src.u, 0);
		*DCC_SU_PORTP(&su) = DCC_SU_PORT(&ctxt->bind_su);
		retries = -1;
		if (0 >= dcc_udp_bind(emsg, &ctxt->soc, &su, &retries)) {
			ctxt->flags |= DCC_CTXT_SRCBAD;
			ctxt->bind_time = (ctxt->start.tv_sec
					   + DCC_CTXT_REBIND_SECS);
			return 0;
		}
		ctxt->flags &= ~DCC_CTXT_SRCBAD;
		ctxt->bind_time = 0;

		/* we have a bound socket */
	}

	/* If we do not have a bound socket,
	 * try to bind a new socket with IPv6 first if allowed */
	if (ctxt->soc == INVALID_SOCKET && DCC_INFO_IPV6()) {
		dcc_mk_su(&su, AF_INET6, 0, DCC_SU_PORT(&ctxt->bind_su));
		retries = -1;
		if (!dcc_udp_bind(emsg, &ctxt->soc, &su, &retries))
			return 0;
	}

	/* if we still do not have a socket, try IPv4 */
	if (ctxt->soc == INVALID_SOCKET) {
		dcc_clnt_info->flags &= ~DCC_INFO_FG_IPV6;
		dcc_mk_su(&su, AF_INET, 0, DCC_SU_PORT(&ctxt->bind_su));
		retries = -1;
		if (!dcc_udp_bind(emsg, &ctxt->soc, &su, &retries))
			return 0;
	}

#if !defined(USE_POLL) && !defined(DCC_WIN32)
	if (ctxt->soc >= FD_SETSIZE) {
		dcc_info_unlock(0);
		dcc_pemsg(EX_IOERR, emsg, "socket FD %d > FD_SETSIZE %d",
			  ctxt->soc, FD_SETSIZE);
		dcc_clnt_soc_close(ctxt);
		return 0;
	}
#endif

#if defined(IPPROTO_IP) && defined(IP_TTL)
	if (dcc_debug_ttl != 0
	    && 0 > setsockopt(ctxt->soc, IPPROTO_IP, IP_TTL,
			      (void *)&dcc_debug_ttl, sizeof(dcc_debug_ttl))) {
		dcc_pemsg(EX_IOERR, emsg, "setsockopt(TTL=%d):%s",
			  dcc_debug_ttl, ERROR_STR());
		dcc_clnt_soc_close(ctxt);
		return 0;
	}
#endif

	soc_len = sizeof(ctxt->bind_su);
	if (0 > getsockname(ctxt->soc, &ctxt->bind_su.sa, &soc_len)) {
		dcc_pemsg(EX_IOERR, emsg, "getsockname(): %s", ERROR_STR());
		dcc_clnt_soc_close(ctxt);
		return 0;
	}
	if (su.sa.sa_family == AF_INET)
		ctxt->flags |= DCC_CTXT_USING_IPV4;
	else
		ctxt->flags &= ~DCC_CTXT_USING_IPV4;
	return 1;
}



static int
do_recv(DCC_CLNT_CTXT *ctxt, DCC_OP_RESP *resp, int resp_len, DCC_SOCKU *sup)
{
	DCC_SOCKLEN_T su_len;

	su_len = sizeof(*sup);
	memset(sup, 0, sizeof(*sup));
	if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
		return Rrecvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0,
				 &sup->sa, &su_len);
	else
		return recvfrom(ctxt->soc, WIN32_SOC_CAST resp, resp_len, 0,
				&sup->sa, &su_len);
}



static void
clear_error(DCC_CLNT_CTXT *ctxt, const char *which)
{
	int err;
	DCC_SOCKLEN_T errlen;

	errlen = sizeof(err);
	if (0 > getsockopt(ctxt->soc, SOL_SOCKET, SO_ERROR,
			   WIN32_SOC_CAST &err, &errlen)) {
		dcc_trace_msg("getsockopt(SO_ERROR): %s", ERROR_STR());
	} else if (dcc_clnt_debug > 3 && err) {
		dcc_trace_msg("%s SO_ERROR: %s", which, ERROR_STR1(err));
	}
}



/* clear the socket buffer */
static u_char
dcc_clnt_soc_flush(DCC_CLNT_CTXT *ctxt)
{
	DCC_OP_RESP pkt;
	DCC_SOCKU su;
	char sbuf[DCC_SU2STR_SIZE];
	char rbuf[30];
	char ob[DCC_OPBUF];
	int pkt_len, pkt_num;

	for (pkt_num = 1; pkt_num <= 50; ++pkt_num) {
		pkt_len = do_recv(ctxt, &pkt, sizeof(pkt), &su);
		if (0 <= pkt_len) {
			if (dcc_clnt_debug == 0 && pkt_num < 10)
				continue;
			dcc_su2str(sbuf, sizeof(sbuf), &su);
			if (pkt_num > 1)
				snprintf(rbuf, sizeof(rbuf), " #%d", pkt_num);
			else
				rbuf[0] = '\0';
			if (pkt_len < ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE)
			    || pkt_len != ntohs(pkt.hdr.len)
			    || pkt.hdr.pkt_vers < DCC_PKT_VERSION_MIN
			    || pkt.hdr.pkt_vers > DCC_PKT_VERSION_MAX) {
				trace_bad_packet(0, &su, &pkt, pkt_len,
						 "flush%s %d stray bytes from"
						 " %s",
						 rbuf, pkt_len, sbuf);
			} else {
				dcc_trace_msg("flush%s %s from %s"
					      " ID=%d h=%#x p=%#x r=%#x t=%#x",
					      rbuf,
					      dcc_hdr_op2str(ob, sizeof(ob),
							&pkt.hdr),
					      sbuf,
					      ntohl(pkt.hdr.sender),
					      pkt.hdr.op_nums.h,
					      pkt.hdr.op_nums.p,
					      pkt.hdr.op_nums.r,
					      pkt.hdr.op_nums.t);

			}
			continue;
		}
		if (DCC_BLOCK_ERROR())
			return 1;
		if (UNREACHABLE_ERRORS()) {
			if (dcc_clnt_debug > 1 || pkt_num > 10)
				dcc_trace_msg("ignore flushed error: %s",
					      ERROR_STR());
			continue;
		}
		dcc_trace_msg("flush recvfrom(%s): %s",
			      su.sa.sa_family
			      ? dcc_su2str(sbuf, sizeof(sbuf), &su) : "",
			      ERROR_STR());
		return 0;
	}

	dcc_trace_msg("too many flushed packets or errors");
	return 0;
}



/* connect() to the server
 *	The contexts and shared information must be locked on entry
 *	They are locked on exit */
u_char
dcc_clnt_connect(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt,
		 const DCC_SOCKU *su)	/* 0=disconnect */
{
	u_char was_connected;

	assert_info_locked();

	/* disconnect if asked
	 *	In theory you can use connect() with a "null address."
	 *	In practice on some systems there is more than one or even
	 *	no notion of an effective "null" address. */
	if (!su) {
		if (ctxt->conn_su.sa.sa_family == AF_UNSPEC) {
#ifdef linux
			/* some flavors of Linux say "Connection refused" on
			 * sendto() on a not-connected socket when a previous
			 * use of sendto() hit a closed port, particularly
			 * via loopback */
			clear_error(ctxt, "Linux dcc_clnt_connect(0)");
#endif
			return 1;
		}
		return dcc_clnt_soc_reopen(emsg, ctxt);
	}

	/* already properly connected */
	if (!memcmp(&ctxt->conn_su, su, sizeof(ctxt->conn_su)))
		return 1;

	was_connected = (ctxt->conn_su.sa.sa_family != AF_UNSPEC);

	/* At least some versions of Linux do not allow	connsecutive valid
	 *	calls to connect().  So for Linux, always close and reopen
	 *	the socket.
	 * At least some versions of FreeBSD unbind a socket while
	 *	reconnecting it.  So if the socket was bound to	local address
	 *	or if it is time to try to bind it again, close and reopen it.
	 */
	if ((was_connected
#ifndef linux
	     && (dcc_clnt_info->src.family != AF_UNSPEC
		 && (!(ctxt->flags & DCC_CTXT_SRCBAD)
		     || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time,
				    DCC_CTXT_REBIND_SECS)))
#endif /* linux */
	     ) || !dcc_clnt_soc_flush(ctxt)) {
		if (!dcc_clnt_soc_reopen(emsg, ctxt))
			return 0;
	}

	if (SOCKET_ERROR == connect(ctxt->soc, &su->sa, DCC_SU_LEN(su))) {
		char sustr[DCC_SU2STR_SIZE];

		dcc_pemsg(EX_IOERR, emsg, "connect(%s): %s",
			  dcc_su2str(sustr, sizeof(sustr), su),
			  ERROR_STR());
		dcc_clnt_soc_close(ctxt);
		return 0;
	}
	ctxt->conn_su = *su;

	/* clear ICMP Unreachable errors from previous connections */
	if (was_connected)
		clear_error(ctxt, "dcc_clnt_connect");

	return 1;
}



/* send a single DCC message
 * the contexts and the shared information must be locked on entry
 * nothing is unlocked */
static int				/* 0=failed this target, -1=all stop */
clnt_xmit(DCC_CLNT_CTXT *ctxt,
	  DCC_SRVR_CLASS *class, const DCC_SRVR_ADDR *ap,
	  DCC_HDR *msg, int msg_len, u_char connect_ok)
{
	DCC_XLOG_ENTRY *xloge;
#	define FSTR " from "
	char tgt_abuf[80], src_abuf[LITZ(FSTR)+INET6_ADDRSTRLEN+1];
	char ob[DCC_OPBUF];
	DCC_XLOG_ENTRY *xloge1;
	int i, result;

	msg->len = htons(msg_len);

	xloge = ctxt->xlog.next;
	if (xloge > ctxt->xlog.last)
		dcc_logbad(EX_SOFTWARE, "xloge > ctxt->xlog.last");
	++msg->op_nums.t;
	xloge->op_nums = msg->op_nums;
	xloge->addr_inx = ap - class->addrs;
	dcc_mk_su(&xloge->su, ap->ip.family, &ap->ip.u, ap->ip.port);
	xloge->addrs_gen = class->gen;
	xloge->sent_us = ctxt->now_us;
	xloge->op = msg->op;
	if (!GOOD_NAM(ap->nam_inx))
		dcc_logbad(EX_SOFTWARE, "clnt_xmit: bad nam_inx");
	xloge->id = class->nms[ap->nam_inx].clnt_id;
	msg->sender = htonl(xloge->id);
	if (xloge->id == DCC_ID_ANON) {
		xloge->passwd[0] = '\0';
		memset((char *)msg + (msg_len-sizeof(DCC_SIGNATURE)), 0,
		       sizeof(DCC_SIGNATURE));
	} else {
		if (xloge->id == 0) {
			if (dcc_clnt_debug)
				dcc_logbad(EX_SOFTWARE, "zero client-ID for %s",
					   class->nms[ap->nam_inx].hostname);
			else
				dcc_trace_msg("zero client-ID for %s",
					      class->nms[ap->nam_inx].hostname);
			class->nms[ap->nam_inx].clnt_id = DCC_ID_ANON;
		} else if (class->nms[ap->nam_inx].passwd[0] == '\0') {
			if (dcc_clnt_debug)
				dcc_logbad(EX_SOFTWARE, "null password for %s",
					   class->nms[ap->nam_inx].hostname);
			else
				dcc_trace_msg("null password for %s",
					      class->nms[ap->nam_inx].hostname);
			class->nms[ap->nam_inx].clnt_id = DCC_ID_ANON;
		}
		strncpy(xloge->passwd, class->nms[ap->nam_inx].passwd,
			sizeof(xloge->passwd));
		dcc_sign(xloge->passwd, sizeof(xloge->passwd), msg, msg_len);
	}

	/* Use connect() when possible to get ICMP Unreachable messages.
	 * It is impossible when talking to more than one server. */
	for (xloge1 = ctxt->xlog.base; connect_ok && xloge1 < xloge; ++xloge1) {
		if (xloge1->op_nums.t == DCC_OP_NUMS_NULL)
			continue;
		if (xloge->addr_inx != xloge1->addr_inx) {
			connect_ok = 0;
			break;
		}
	}
	if (!dcc_clnt_connect(0, ctxt, connect_ok ? &xloge->su : 0))
		return -1;

	if (ctxt->conn_su.sa.sa_family != AF_UNSPEC) {
		if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
			i = Rsend(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0);
		else
			i = send(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0);

	} else {
		if (dcc_clnt_info->flags & DCC_INFO_FG_SOCKS)
			i = Rsendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0,
				    &xloge->su.sa, DCC_SU_LEN(&xloge->su));
		else
			i = sendto(ctxt->soc, WIN32_SOC_CAST msg, msg_len, 0,
				   &xloge->su.sa, DCC_SU_LEN(&xloge->su));
	}
	++ctxt->xlog.cur[ap - class->addrs].xmits;
	if (i == msg_len) {
		if (dcc_clnt_debug > 3)
			dcc_trace_msg("%8.6f sent %s t=%#x to %s",
				      get_age(ctxt),
				      dcc_hdr_op2str(ob, sizeof(ob), msg),
				      xloge->op_nums.t,
				      addr2str(tgt_abuf, sizeof(tgt_abuf),
					       class, class->gen, ap, 0));
		++ctxt->xlog.next;
		++ctxt->xlog.outstanding;
		return 1;
	}

	/* stop output only for this target after ICMP Unreachable
	 * otherwise stop all output */
	if (i >= 0) {
		result = -1;		/* bad length is fatal */
	} else {
		result = UNREACHABLE_ERRORS() ? 0 : -1;
	}

	if (result < 0 || dcc_clnt_debug) {
		if (ctxt->bind_su.sa.sa_family == AF_UNSPEC) {
			src_abuf[0] = '\0';
		} else {
			memcpy(src_abuf, FSTR, LITZ(FSTR));
			dcc_su2str(&src_abuf[LITZ(FSTR)],
				   sizeof(src_abuf)-LITZ(FSTR),
				   &ctxt->bind_su);
		}
		if (i < 0) {
			dcc_trace_msg("%s(%s)%s: %s",
				      connect_ok ? "send" : "sendto",
				      addr2str(tgt_abuf, sizeof(tgt_abuf),
					       class,
					       class->gen, ap, 0),
				      src_abuf, ERROR_STR());
		} else {
			dcc_trace_msg("%s(%s%s)=%d instead of %d",
				      connect_ok ? "send" : "sendto",
				      addr2str(tgt_abuf, sizeof(tgt_abuf),
					       class,
					       class->gen, ap, 0),
				      src_abuf, i, msg_len);
		}
	}
	return result;

#undef FSTR
}



static void
update_rtt(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class, DCC_XLOG_ENTRY *xloge,
	   int us)
{
	DCC_SRVR_ADDR *ap;

	/* compute new RTT only if the map data structure is locked,
	 * the clock did not jump,
	 * and we're talking about the same hosts */
	if (!info_locked
	    || xloge->addrs_gen != class->gen)
		return;

	ap = &class->addrs[xloge->addr_inx];

	if (us < 0)
		us = 0;
	if (us > DCC_RTT_BAD)
		us = DCC_RTT_BAD;

	if (ap->rtt == DCC_RTT_BAD) {
		/* just set the RTT if this is a newly working server */
		ap->rtt = us;
		ap->total_xmits = 0;
		ap->total_resps = 0;
		ap->resp_mem = 0;
		ap->rtt_updated = 0;

	} else if (ctxt->now.tv_sec < ap->rtt_updated + FAST_RTT_SECS) {
		/* adjust the RTT quickly if this is the first
		 * measurement in a long time */
		AGE_AVG(ap->rtt, us, 2, 1);
		ap->rtt_updated = ctxt->now.tv_sec;

	} else {
		AGE_AVG(ap->rtt, us, 9, 1);
		ap->rtt_updated = ctxt->now.tv_sec;
	}

	if (ap->rtt > DCC_MAX_RTT)
		ap->rtt = DCC_MAX_RTT;
}



/* Update response rate and penalize the RTT of servers that failed to respond.
 *	the data must be locked */
static void
resp_rates(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
	   u_char measuring)
{
	DCC_SRVR_ADDR *ap;
	DCC_XLOG_ENTRY *xloge;
	const DCC_XLOG_ENTRY *xloge2;
	int us, us2;
	u_char seen;
	int i;

	for (xloge = ctxt->xlog.base; xloge < ctxt->xlog.next; ++xloge) {
		/* ignore responses we've already handled */
		if (xloge->op_nums.t == DCC_OP_NUMS_NULL)
			continue;

		ap = &class->addrs[xloge->addr_inx];

		/* Update the RTT of this server as if we would have received
		 * ia response if we had waited a little longer, unless we
		 * would be assuming a faster RTT than its current average.
		 *
		 * Use the longest of the time spent waiting for this request
		 * and the delays of any requests that were answered by the
		 * server. */
		us = ctxt->now_us - xloge->sent_us;
		seen = 0;
		for (xloge2=ctxt->xlog.base; xloge2<ctxt->xlog.next; ++xloge2) {
			if (xloge2->addr_inx != xloge->addr_inx
			    || xloge2 == xloge)
				continue;
			if (xloge2->op_nums.t != DCC_OP_NUMS_NULL) {
				seen = 1;
				continue;
			}
			us2 = ctxt->now_us - xloge2->sent_us;
			if (us < us2)
				us = us2;
		}
		/* update the RTT
		 * if we waited at least as long as the current RTT
		 * or we received at least one response */
		if (ctxt->now_us >= ap->rtt && seen)
			update_rtt(ctxt, class, xloge, us + DCC_DCCD_DELAY);

		/* having received its answer, forget this transmission */
		xloge->op_nums.t = DCC_OP_NUMS_NULL;
	}

	/* maintain the response rate */
	for (i = 0, ap = class->addrs; i < DIM(ctxt->xlog.cur); ++i, ++ap) {
		if (ap->rtt == DCC_RTT_BAD
		    || ctxt->xlog.cur[i].xmits == 0)
			continue;
		if (measuring) {
			if (ctxt->xlog.cur[i].resps != 0) {
				++ctxt->xlog.working_addrs;
			} else if (!(ap->resp_mem & ((1<<DCC_MAX_XMITS)-1))) {
				/* this server is bad if there were no answers
				 * at all for this mesurement cycle */
				ap->rtt = DCC_RTT_BAD;
				continue;
			}
		}
		ap->total_xmits += ctxt->xlog.cur[i].xmits;
		if (ap->total_xmits > DCC_TOTAL_XMITS_MAX)
			ap->total_xmits = DCC_TOTAL_XMITS_MAX;
		do {
			ap->total_resps -= (ap->resp_mem
					    >> (DCC_TOTAL_XMITS_MAX-1));
			ap->resp_mem <<= 1;
			if (ctxt->xlog.cur[i].resps != 0) {
				ap->resp_mem |= 1;
				++ap->total_resps;
				--ctxt->xlog.cur[i].resps;
			}
		} while (--ctxt->xlog.cur[i].xmits != 0);
	}
}



/* receive a single DCC response
 *      The contexts must be locked.
 *      The mapped or common info ought to be locked, but reception
 *      works if it is not. */
static int      /* -1=fatal error, 0=no data, 1=unreachable, 2=ok */
clnt_recv(DCC_CLNT_CTXT *ctxt, DCC_SRVR_CLASS *class,
	  DCC_OP_RESP *resp,		/* the response */
	  int resp_len,
	  const DCC_HDR *msg,		/* the original request */
	  DCC_XLOG_ENTRY **xlogep)
{
	DCC_SOCKU su;
	DCC_XLOG_ENTRY *xloge, *xloge1;
	DCC_SRVR_ADDR *ap;
	char str[DCC_SU2STR_SIZE+50];
	char ob[DCC_OPBUF];
	char ob2[DCC_OPBUF];
	int pkt_len;

	*xlogep = 0;
	for (;;) {
next_pkt:;
		pkt_len = do_recv(ctxt, resp, resp_len, &su);
		if (pkt_len < 0) {
			/* Stop looking when there are no more packets */
			if (DCC_BLOCK_ERROR())
				return 0;

			/* ignore ICMP Unreachables unless we have connected
			 * to a server.
			 * If so, forget all outstanding requests */
			if (ctxt->conn_su.sa.sa_family != AF_UNSPEC
			    && UNREACHABLE_ERRORS()) {
				/* find one relevant request
				 * and mark all of them finished */
				for (xloge1 = ctxt->xlog.base, xloge = 0;
				     xloge1 < ctxt->xlog.next;
				     ++xloge1) {
					if (xloge1->op_nums.t==DCC_OP_NUMS_NULL)
					    continue;
					xloge = xloge1;
					xloge->op_nums.t = DCC_OP_NUMS_NULL;
				}
				if (!xloge) {
					if (dcc_clnt_debug)
					    dcc_trace_msg("ignore unmatched:"
							" %s", ERROR_STR());
					continue;
				}
				if (dcc_clnt_debug)
					dcc_trace_msg("note recvfrom(%s): %s",
						      dcc_su2str(str,
							  sizeof(str),
							  &ctxt->conn_su),
						      ERROR_STR());
				ctxt->xlog.outstanding = 0;
				ap = &class->addrs[xloge->addr_inx];
				ap->rtt = DCC_RTT_BAD;
				++ctxt->xlog.cur[xloge->addr_inx].resps;
				*xlogep = xloge;
				return 1;
			}
			dcc_trace_msg( "clnt_recv recvfrom(%s): %s",
				      su.sa.sa_family
				      ? dcc_su2str(str, sizeof(str), &su) : "",
				      ERROR_STR());
			return -1;
		}

		if (pkt_len > resp_len) {
			trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len,
					 "recv(%s)=%d>%d",
					 dcc_su2str(str, sizeof(str), &su),
					 pkt_len, resp_len);
			continue;
		}
		if (pkt_len < ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE)) {
			trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len,
					 "recv(%s)=%d<%d",
					 dcc_su2str(str, sizeof(str), &su),
					 pkt_len,
					 ISZ(DCC_HDR)+ISZ(DCC_SIGNATURE));
			continue;
		}
		if (pkt_len != ntohs(resp->hdr.len)) {
			trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len,
					 "recv(%s)=%d but hdr len=%d",
					 dcc_su2str(str, sizeof(str), &su),
					 pkt_len,
					 ntohs(resp->hdr.len));
			continue;
		}

		if (resp->hdr.pkt_vers < DCC_PKT_VERSION_MIN
		    || resp->hdr.pkt_vers > DCC_PKT_VERSION_MAX) {
			trace_bad_packet(&ctxt->xlog, &su, resp, pkt_len,
					 "unrecognized version #%d from %s",
					 resp->hdr.pkt_vers,
					 dcc_su2str(str, sizeof(str), &su));
			continue;
		}

		/* We cannot use the server's apparent IP address because it
		 * might be multi-homed and respond with an address other than
		 * the address to which we sent.  So use our records of
		 * which OP_NUMS was sent to which server address. */
		if (resp->hdr.op_nums.r != msg->op_nums.r
		    || resp->hdr.op_nums.p != msg->op_nums.p
		    || resp->hdr.op_nums.h != msg->op_nums.h) {
			if (dcc_clnt_debug)
				dcc_trace_msg("unmatched response from %s"
					      " ID=%d h=%#x/%#x p=%#x/%#x"
					      " r=%#x/%#x t=%#x",
					      dcc_su2str(str, sizeof(str), &su),
					      ntohl(resp->hdr.sender),
					      resp->hdr.op_nums.h,
					      msg->op_nums.h,
					      resp->hdr.op_nums.p,
					      msg->op_nums.p,
					      resp->hdr.op_nums.r,
					      msg->op_nums.r,
					      resp->hdr.op_nums.t);
			continue;
		}

		/* everything matches except perhaps the transmission # */
		xloge = ctxt->xlog.base;
		for (;;) {
			if (xloge >= ctxt->xlog.next) {
				if (dcc_clnt_debug)
					dcc_trace_msg("stray response from %s"
						      " ID=%d h=%#x p=%#x"
						      " r=%#x t=%#x/%#x",
						      dcc_su2str(str,
							  sizeof(str), &su),
						      ntohl(resp->hdr.sender),
						      resp->hdr.op_nums.h,
						      resp->hdr.op_nums.p,
						      resp->hdr.op_nums.r,
						      msg->op_nums.r,
						      resp->hdr.op_nums.t);
				goto next_pkt;
			}
			if (resp->hdr.op_nums.t == xloge->op_nums.t)
				break;
			++xloge;
		}

		ap = &class->addrs[xloge->addr_inx];

#ifdef CLNT_LOSSES
		if ((++clnt_losses % 5) == 0) {
			dcc_trace_msg("dropped answer from %s",
				      addr2str(str, sizeof(str), class,
					       xloge->addrs_gen, ap, &su));
			continue;
		}
#endif

		if (xloge->passwd[0] != '\0'
		    && !dcc_ck_signature(xloge->passwd, sizeof(xloge->passwd),
					 resp, pkt_len)) {
			dcc_error_msg("%s ID=%d rejected our password for ID %d"
				      " and %s with %s"
				      "    h=%#x p=%#x r=%#x t=%#x",
				      addr2str(str, sizeof(str),
					       class, xloge->addrs_gen,
					       ap, &su),
				      ntohl(resp->hdr.sender),
				      xloge->id,
				      dcc_hdr_op2str(ob, sizeof(ob),
						     msg),
				      dcc_hdr_op2str(ob2, sizeof(ob2),
						     &resp->hdr),
				      resp->hdr.op_nums.h,
				      resp->hdr.op_nums.p,
				      resp->hdr.op_nums.r,
				      resp->hdr.op_nums.t);
			continue;
		}

		if (dcc_clnt_debug > 3)
			dcc_trace_msg("%8.6f received response from %s ID=%d"
				      "    h=%#x p=%#x r=%#x t=%#x",
				      get_age(ctxt),
				      dcc_su2str(str, sizeof(str), &su),
				      ntohl(resp->hdr.sender),
				      resp->hdr.op_nums.h,
				      resp->hdr.op_nums.p,
				      resp->hdr.op_nums.r,
				      resp->hdr.op_nums.t);

		/* don't find the record of this transmission again */
		xloge->op_nums.t = DCC_OP_NUMS_NULL;
		if (ctxt->xlog.outstanding != 0)
			--ctxt->xlog.outstanding;
		++ctxt->xlog.cur[xloge->addr_inx].resps;
		*xlogep = xloge;

		/* Notice if multi-homing is involved
		 * That is true if the address from which the client answered
		 * differs from the address to which we sent */
		if (!(ap->flags & DCC_SRVR_ADDR_MHOME)
		    && dcc_cmp_ap2su(ap, &su)) {
			if (dcc_clnt_debug)
				dcc_trace_msg("%s multi-homed at %s",
					      addr2str(str, sizeof(str),
						       class, xloge->addrs_gen,
						       ap, 0),
					      dcc_su2str(str,sizeof(str), &su));
			ap->flags |= DCC_SRVR_ADDR_MHOME;
		}

		return 2;
	}
}



/* wait for an answer */
int					/* -1=error, 0=timeout, 1=ready */
dcc_select_poll(DCC_EMSG emsg,
		SOCKET fd,
		u_char rd,		/* 1=read 0=write */
		int us)			/* <0=forever until signal */
{
#ifdef USE_POLL
	struct pollfd fds;
	int nfds;
	int delay;

	if (us < 0)
		delay = -1;
	else
		delay = (us+999)/1000;

	for (;;) {
		fds.fd = fd;
		/* At least some versions of Linux have POLLRDNORM etc. in
		 * asm/poll.h, but with definitions of POLLIN, POLLPRI, etc.
		 * that conflict with their definitions in sys/poll.h.
		 * Perhaps it is not necessary to check for high or
		 * low priority data, but the poll() documentation on
		 * some systems says that asking about POLLIN does not
		 * say anything about other data */
#ifdef POLLRDNORM
		if (rd)
			fds.events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
		else
			fds.events = POLLOUT| POLLWRNORM | POLLWRBAND | POLLPRI;
#else
		if (rd)
			fds.events = POLLIN;
		else
			fds.events = POLLOUT;
#endif
		fds.revents = 0;
		nfds = poll(&fds, 1, delay);
		if (nfds >= 0)
			return nfds;
		if (!DCC_SELECT_NERROR()) {
			dcc_pemsg(EX_OSERR, emsg, "poll(): %s", ERROR_STR());
			return -1;
		}
		if (us < 0)		/* stop forever on a signal */
			return 0;
	}
#else
	struct timeval delay, *delayp;
	fd_set fds;
	int nfds;

	if (us < 0) {
		delayp = 0;
	} else {
		us2tv(&delay, us);
		delayp = &delay;
	}

	FD_ZERO(&fds);
	for (;;) {
		FD_SET(fd, &fds);
		if (rd)
			nfds = select(fd+1, &fds, 0, 0, delayp);
		else
			nfds = select(fd+1, 0, &fds, 0, delayp);
		if (nfds >= 0)
			return nfds;
		if (!DCC_SELECT_NERROR()) {
			dcc_pemsg(EX_OSERR, emsg, "select(): %s", ERROR_STR());
			return -1;
		}
		if (us < 0)		/* stop forever on a signal */
			return 0;
	}
#endif
}



/* Make initial estimates of the RTT to all known servers
 *      The RTT's help the client pick a server that will respond quickly and
 *      reliably and to know when to retransmit a request that is lost due
 *      to network congestion or bit rot.
 * Both locks must be held on entry.
 * Both are released while working.
 * Both locks are held on success.
 * Only the contexts are locked on failure. */
static u_char				/* 0=failed, 1=at least 1 good server */
measure_rtt(DCC_EMSG emsg, DCC_CLNT_CTXT *ctxt,
	    DCC_SRVR_CLASS *class,
	    DCC_CLNT_FGS clnt_fgs)	/* DCC_CLNT_FG_* */
{
	DCC_SRVR_ADDR *ap;
	DCC_NOP nop;
	DCC_OP_RESP resp;
	int delay_us, next_xmit;
	int nfds, xmit_num;
	int addrs_gen;
	int tgt_addrs;
	DCC_XLOG_ENTRY *xloge;
	char ob[DCC_OPBUF], abuf[80];
	u_char vers;
	u_char connect_ok;
	int tgts, i;

	assert_info_locked();

	/* Send NOP's to all addresses and wait for responses to
	 * measure each server's health and RTT.
	 * Treat all addresses as if they are of independent hosts */

	if (class->nms[0].hostname[0] == '\0') {
		class->srvr_inx = NO_SRVR;
		dcc_pemsg(EX_NOHOST, emsg, "no %s server names",
			  DCC_IS_GREY_STR(class));
		dcc_info_unlock(0);
		return 0;
	}

	memcpy(&nop.hdr, &dcc_clnt_info->proto_hdr, sizeof(nop.hdr));
	/* servers ignore the version on NOPs except to guess the version
	 * we will accept */
	nop.hdr.pkt_vers = DCC_PKT_VERSION;
	nop.hdr.op_nums.p = getpid();
	nop.hdr.op = DCC_OP_NOP;
	/* Do not change the transaction ID so that dbclean can kludge it.
	 * Dccd does not care about the transaction ID on NOPs. */

	if (!get_now(emsg, ctxt)) {
		dcc_info_unlock(0);
		return 0;
	}

	/* discourage competition from other processes and threads */
	class->measure = ctxt->now.tv_sec+FAST_RTT_SECS;

	flush_emsg(emsg, 1);

	addrs_gen = class->gen;

	/* stop waiting for responses when we have enough working servers */
	tgt_addrs = class->num_srvrs;
	if (!dcc_all_srvrs && tgt_addrs > 4)
		tgt_addrs = 4;

	memset(&ctxt->xlog, 0, sizeof(ctxt->xlog));
	ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries;
	ctxt->xlog.last = LAST(ctxt->xlog_entries);
	delay_us = 0;
	next_xmit = 0;
	xmit_num = 0;
	/* wait for the responses to the NOPs and retransmit as needed */
	for (;;) {
		/* wait quietly until time to retransmit */
		if (delay_us <= 0) {
			if (xmit_num >= DCC_MAX_XMITS)
				break;
			if (ctxt->xlog.working_addrs >= tgt_addrs) {
				/* do not retransmit if we have heard from
				 *	enough servers
				 * quit if we have waited at least one RTT */
				if (xmit_num > 0)
					break;
				delay_us = 0;
				next_xmit = ctxt->now_us;

			} else {
				/* get delay & time of next transmission */
				delay_us = retrans_time((clnt_fgs
							& DCC_CLNT_FG_SLOW)
							? DCC_MAX_RTT
							: DCC_MIN_RTT,
							xmit_num++);
				next_xmit = delay_us + ctxt->now_us;

				connect_ok = 1;
				tgts = 0;
				for (i = 0, ap = class->addrs;
				     ap <= LAST(class->addrs);
				     ++i, ++ap) {
					if (ap->ip.family == 0
					    || ctxt->xlog.cur[i].resps != 0)
					    continue;
					if (ap->flags & DCC_SRVR_ADDR_MHOME)
					    connect_ok = 0;
					++tgts;
				}
				/* Use a connected socket early to get
				 *	ICMP error messages from single server.
				 * no connection later to detect multi-homing
				 *	that makes a server appear deaf */
				if (tgts > 1
				    || xmit_num > DCC_MAX_XMITS/2)
					connect_ok = 0;
				for (i = 0, ap = class->addrs;
				     tgts > 0 && ap <= LAST(class->addrs);
				     ++i, ++ap) {
					if (ap->ip.family == 0
					    || !GOOD_NAM(ap->nam_inx)
					    || ctxt->xlog.cur[i].resps != 0)
					    continue;
					--tgts;
					if (0 > clnt_xmit(ctxt, class, ap,
							&nop.hdr, sizeof(nop),
							connect_ok))
					    break;
				}
			}

			/* stop if nothing to wait for */
			if (!ctxt->xlog.outstanding)
				break;
		}

		if (!dcc_info_unlock(emsg))
			return 0;
		dcc_ctxts_unlock();
		nfds = dcc_select_poll(emsg, ctxt->soc, 1, delay_us);
		dcc_ctxts_lock();
		if (nfds < 0)
			return 0;
		if (!dcc_info_lock(emsg))
			return 0;

		i = get_now(emsg, ctxt);
		if (!i) {		/* give up if the clock jumped */
			class->measure = 0;
			dcc_info_unlock(0);
			return 0;
		}
		if (addrs_gen != class->gen) {
			extra_pemsg(EX_IOERR, emsg,
				    "competition stopped RTT measurement");
			/* if we have at least one address,
			 * hope the other process will finish the job */
			if (HAVE_SRVR(class)
			    || pick_srvr(emsg, class))
				return 1;

			/* fail, but hope the other process will finish */
			dcc_info_unlock(0);
			return 0;
		}

		if (nfds > 0) {
			for (;;) {
				i = clnt_recv(ctxt, class,
					      &resp, sizeof(resp),
					      &nop.hdr, &xloge);
				if (i <= 0)
					break;

				if (i == 1) /* otherwise ignore Unreachable */
					continue;

				/* record the results of a probe, and notice
				 * if the server is the best so far */
				ap = &class->addrs[xloge->addr_inx];

				if (resp.hdr.op != DCC_OP_OK) {
					if (dcc_clnt_debug)
					    dcc_trace_msg("RTT NOP answered"
							" with %s by %s",
							dcc_hdr_op2str(ob,
							    sizeof(ob),
							    &resp.hdr),
							addr2str(abuf,
							    sizeof(abuf),
							    class,
							    xloge->addrs_gen,
							    ap, 0));
					ap->rtt = DCC_RTT_BAD;
					continue;
				}

				vers = resp.ok.max_pkt_vers;
				if (vers >= DCC_PKT_VERSION_MAX)
					vers = DCC_PKT_VERSION_MAX;
				else if (vers < DCC_PKT_VERSION_MIN)
					vers = DCC_PKT_VERSION_MIN;
				ap->srvr_pkt_vers = vers;
				ap->srvr_id = ntohl(resp.hdr.sender);
				memcpy(ap->brand, resp.ok.brand,
				       sizeof(ap->brand));
				ap->srvr_wait = ntohs(resp.ok.qdelay_ms)*1000;

				update_rtt(ctxt, class, xloge,
					   ctxt->now_us - xloge->sent_us
					   + ap->srvr_wait);
			}
		}

		if (ctxt->xlog.outstanding == 0
		    || (ctxt->xlog.working_addrs >= tgt_addrs
			&& xmit_num > 1))
			next_xmit = ctxt->now_us;
		delay_us = next_xmit - ctxt->now_us;
	}
	/* the contexts and the shared information are locked */
	resp_rates(ctxt, class, 1);

	if (!pick_srvr(emsg, class)) {
		fail_more(ctxt, class);
		dcc_info_unlock(0);
		return 0;
	}

	/* maintain long term average that is used to switch back to
	 * a good server that temporarily goes bad */
	if (class->thold_rtt == DCC_RTT_BAD) {
		/* There is no point in trying to change servers
		 * Maybe we have only 1 */
		class->avg_thold_rtt = DCC_RTT_BAD;
	} else if (class->avg_thold_rtt == -DCC_RTT_BAD) {
		/* We are being forced to consider changing servers.
		 * The threshold for changing will be based on the RTT
		 * for the new server */
		class->avg_thold_rtt = class->base_rtt;
	} else {
		AGE_AVG(class->avg_thold_rtt, class->base_rtt, 9, 1);
	}

	class->measure = ctxt->now.tv_sec+FAST_RTT_SECS;

	/* Several systems do not update the mtime of a file modified with
	 * mmap().  Some like BSD/OS delay changing the mtime until the file
	 * accessed with read().  Others including filesystems on some
	 * versions of Linux apparently never change the mtime.
	 * Do not bother temporary map files that have already been unlinked
	 * to avoid problems on systems that do not have futimes() */
	if (!(dcc_clnt_info->flags & DCC_INFO_FG_TMP))
		dcc_set_mtime(emsg, dcc_info_nm, info_fd, 0);
	flush_emsg(emsg, 1);

	return 1;
}



/* Get and write-lock common info
 *      The contexts but not the info must be locked.
 *      The contexts remain locked on failure.  The shared information
 *	    is locked only on success. */
u_char					/* 0=failed 1=ok */
dcc_clnt_rdy(DCC_EMSG emsg,		/* cleared of stale messages */
	     DCC_CLNT_CTXT *ctxt,
	     DCC_CLNT_FGS clnt_fgs)	/* DCC_CLNT_FG_* */
{
	DCC_SRVR_CLASS *class;
	DCC_IP bind_ip, zero_ip, *src_ip;

	if (!dcc_info_lock(emsg))
		return 0;

	if (!(clnt_fgs & DCC_CLNT_FG_RETRY))
		get_start_time(ctxt);

	class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY);

	/* just fail if things were broken and it's too soon to try again */
	if (!(clnt_fgs & DCC_CLNT_FG_NO_FAIL)
	    && !ck_fail_time(emsg, ctxt, class)) {
		dcc_info_unlock(0);
		return 0;
	}

	/* Re-open the socket if it is closed,
	 * or we have switched between IPv4 and IPv6,
	 * or if the local address has changed
	 * or if the local address was broken a long time ago.
	 * Do not compare the source port numbers, because even when there
	 * is no explicit source address, the local port is cached in
	 * ctxt->bind_su. */
	dcc_su2ip(&bind_ip, &ctxt->bind_su);
	bind_ip.port = 0;
	if (dcc_clnt_info->src.family == AF_UNSPEC) {
		memset(&zero_ip, 0, sizeof(zero_ip));
		zero_ip.family = bind_ip.family;
		src_ip = &zero_ip;
	} else {
		src_ip = &dcc_clnt_info->src;
	}
	if (ctxt->soc == INVALID_SOCKET
	    || ((ctxt->flags & DCC_CTXT_USING_IPV4)!=0) != !DCC_INFO_IPV6()
	    || (memcmp(src_ip, &bind_ip, sizeof(*src_ip))
		&& (!(ctxt->flags & DCC_CTXT_SRCBAD)
		    || DCC_IS_TIME(ctxt->start.tv_sec, ctxt->bind_time,
				   DCC_CTXT_REBIND_SECS)))) {
		if (!dcc_clnt_soc_reopen(emsg, ctxt)) {
			dcc_info_unlock(0);
			return 0;
		}
	}

	/* Try to pick a new server if we do not have a server
	 * or if the current server has become slow or unreliable. */
	if (!good_rtt(class))
		pick_srvr(emsg, class);

	/* Check for new IP addresses occassionally or when we have none
	 * If we cannot awaken a separate thread, do it ourself */
	if ((!HAVE_SRVR(class)
	     || DCC_IS_TIME(ctxt->now.tv_sec, class->resolve, DCC_MAP_RESOLVE))
	    && (class->num_srvrs == 0
		|| !dcc_clnt_wake_resolve())) {
		if (!resolve_nms(emsg, ctxt, class))
			return 0;
	}

	/* We might have switched to the current server when our
	 * best server became slow.
	 * If it has been a while, see if our best server is back
	 * by sending NOPs to all servers. */
	if ((class->measure == 0
	     || (DCC_IS_TIME(ctxt->now.tv_sec, class->measure, FAST_RTT_SECS)
		 && !good_rtt(class)))
	    && !(clnt_fgs & DCC_CLNT_FG_NO_PICK_SRVR)) {
		if (!measure_rtt(emsg, ctxt, class, clnt_fgs))
			return 0;
	}

	if (!HAVE_SRVR(class) && !(clnt_fgs & DCC_CLNT_FG_BAD_SRVR_OK)) {
		dcc_info_unlock(0);
		return 0;
	}

	dcc_clnt_soc_flush(ctxt);
	return 1;
}



/* send an operation to the server and get a response
 *      The operation and response buffers must be distinct, because the
 *	    response buffer is changed before the last use of the operation
 *	    buffer */
u_char					/* 0=failed 1=ok */
dcc_clnt_op(DCC_EMSG emsg,
	    DCC_CLNT_CTXT *ctxt,
	    DCC_CLNT_FGS clnt_fgs,	/* DCC_CLNT_FG_* */
	    const SRVR_INX *srvr_inxp,	/* 0 or ptr to server index */
	    DCC_SRVR_ID *srvr_idp,	/* ID of server used */
	    DCC_SOCKU *resp_su,		/* IP address of server used */
	    DCC_HDR *msg, int msg_len, DCC_OPS op,
	    DCC_OP_RESP *resp, int resp_max_len)
{
	DCC_SRVR_CLASS *class;
	DCC_SRVR_ADDR *cur_addr;
#ifdef DCC_PKT_VERSION7
	DCC_REPORT  old_report;
#endif
	char addr_buf[80];
	int addrs_gen;
	DCC_OP_NUM op_num_r;
	DCC_XLOG_ENTRY *xloge;
	SRVR_INX srvr_inx;
	int xmit_num;
	int next_xmit, us, remaining, nfds;
	u_char gotit;
	int i;

	if (emsg)
		*emsg = '\0';
	dcc_ctxts_lock();
	if (!dcc_clnt_info
	    && !dcc_map_info(emsg, 0, -1)) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	/* Get & lock common info.
	 * insist on a server to talk to so that class->srvr_inx is sane */
	if (!dcc_clnt_rdy(emsg, ctxt,
			  clnt_fgs & ~(DCC_CLNT_FG_BAD_SRVR_OK
				       | DCC_CLNT_FG_NO_PICK_SRVR))) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY);

	if (resp_max_len > ISZ(*resp))
		resp_max_len = ISZ(*resp);
	else if (resp_max_len < ISZ(resp->hdr))
		dcc_logbad(EX_SOFTWARE, "dcc_clnt_op(resp_max_len=%d)",
			   resp_max_len);

	/* use server that the caller wants,
	 * if the caller specified the valid index of a server */
	if (!srvr_inxp
	    || !GOOD_SRVR(class, srvr_inx = *srvr_inxp))
		srvr_inx = class->srvr_inx;

	cur_addr = &class->addrs[srvr_inx];
	if (srvr_idp)
		*srvr_idp = cur_addr->srvr_id;
	if (resp_su)
		dcc_mk_su(resp_su, cur_addr->ip.family,
			  &cur_addr->ip.u, cur_addr->ip.port);
	addrs_gen = class->gen;

	++dcc_clnt_info->proto_hdr.op_nums.r;
	op_num_r = msg->op_nums.r;
	memcpy(msg, &dcc_clnt_info->proto_hdr, sizeof(*msg));
	/* old transaction ID for retransmissions */
	if (clnt_fgs & DCC_CLNT_FG_RETRANS)
		msg->op_nums.r = op_num_r;
	if (cur_addr->srvr_pkt_vers > DCC_PKT_VERSION_MAX
	    || cur_addr->srvr_pkt_vers < DCC_PKT_VERSION_MIN) {
		dcc_pemsg(EX_DATAERR, emsg, "impossible pkt_vers %d for %s",
			  cur_addr->srvr_pkt_vers,
			  addr2str(addr_buf, sizeof(addr_buf), class,
				   addrs_gen, cur_addr, 0));
		dcc_info_unlock(0);
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}

#ifdef DCC_PKT_VERSION7
	/* convert new report to old */
	if (cur_addr->srvr_pkt_vers <= DCC_PKT_VERSION7
	    && op == DCC_OP_REPORT) {
		DCC_TGTS tgts;

		tgts = ntohl(((DCC_REPORT *)msg)->tgts);
#ifdef DCC_PKT_VERSION4
		if (cur_addr->srvr_pkt_vers == DCC_PKT_VERSION4
		    && (tgts & DCC_TGTS_SPAM)) {
			memcpy(&old_report, msg, msg_len);
			old_report.tgts = htonl(DCC_TGTS_TOO_MANY);
			msg = &old_report.hdr;
		}
#endif
	}
#endif /* DCC_PKT_VERSION7 */

	msg->pkt_vers = cur_addr->srvr_pkt_vers;
	msg->op_nums.p = getpid();
	msg->op = op;
	gotit = 0;

	/* The measured RTTs to servers helps the client pick a server
	 * that will respond quickly and reliably and to know when to
	 * retransmit a request that is lost due to network congestion or
	 * bit rot.
	 *
	 * It is desirable for a client to concentrate its reports to
	 * a single server.  That makes detecting spam by this and other
	 * clients quicker.
	 *
	 * A client should retransmit when its initial transmission is lost
	 * due to bit rot or congestion.  In case the loss is due to
	 * congestion, it should retransmit only a limited number of
	 * times and with increasing delays between retransmissions.
	 *
	 * It is more important that some requests from clients reach
	 * a DCC server than others.  Most DCC checksum reports are not about
	 * spam, and so it is best to not spend too much network bandwidth
	 * retransmitting checksum reports or to delay the processing of the
	 * messages. Administrative commands must be tried harder.
	 * Therefore, let the caller of this routine decide whether to retry.
	 * This routine merely increases the measured RTT after failures. */

	memset(&ctxt->xlog, 0, sizeof(ctxt->xlog));
	ctxt->xlog.base = ctxt->xlog.next = ctxt->xlog_entries;
	ctxt->xlog.last = LAST(ctxt->xlog_entries);
	xmit_num = 0;
	next_xmit = ctxt->now_us;

	/* Transmit, wait for a response, and repeat if needed.
	 * The initial transmission is done as if it were a retransmission. */
	for (;;) {
		us = next_xmit - ctxt->now_us;
		if (us <= 0) {
			/* We have delayed long enough for each outstanding
			 * transmission.  We are done if we have sent enough */
			if (xmit_num >= DCC_MAX_XMITS)
				break;

			/* stop if we don't have enough time to wait */
			us = retrans_time(cur_addr->rtt, xmit_num);
			remaining =  DCC_MAX_DELAY - ctxt->now_us;
			if (us > remaining) {
				if (remaining < DCC_MIN_RTT)
					break;
				us = remaining;
			}

			/* wait as long as possible on the last try */
			if (++xmit_num == DCC_MAX_XMITS
			    && us < DCC_MAX_RTT) {
				if (remaining > DCC_MAX_RTT)
					us = DCC_MAX_RTT;
				else
					us = remaining;
			}
			next_xmit = us + ctxt->now_us;

			/* Because of the flooding algorithm among DCC servers,
			 * it is important that only a single server receive
			 * reports of the checksums for a mail message.
			 * That implies that retransmissions of reports must
			 * go to the original server, even if some other local
			 * client has re-resolved hostnames or switched
			 * to a better server.
			 * Otherwise our retransmissions to different servers
			 * would not be recognized as retransmissions but
			 * reports about separate copies of the mail message.
			 * Sp we should not retransmit if the server
			 * address table changes. */
			if (addrs_gen != class->gen
			    && op == DCC_OP_REPORT
			    && !(clnt_fgs & DCC_CLNT_FG_GREY)) {
				if (dcc_clnt_debug)
					dcc_trace_msg("server address"
						      " generation changed");
				break;
			}

			if (!GOOD_NAM(cur_addr->nam_inx)) {
				if (dcc_clnt_debug)
					dcc_trace_msg("server deleted");
				break;
			}

			/* use a connected socket early to get port
			 * unreachable ICMP error messages, but do not
			 * connect later to detect multi-homing */
			if (0 >= clnt_xmit(ctxt, class, cur_addr, msg, msg_len,
					   (!(cur_addr->flags
					      & DCC_SRVR_ADDR_MHOME)
					    && xmit_num < DCC_MAX_XMITS/2
					    && ctxt->now_us<=DCC_MAX_DELAY/2)))
				break;
		}

		/* release the mapped info while we wait for an answer */
		if (!dcc_info_unlock(emsg)) {
			dcc_ctxts_unlock();
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
			return 0;
		}
		dcc_ctxts_unlock();
		nfds = dcc_select_poll(emsg, ctxt->soc, 1, us);
		if (nfds < 0) {
			/* note error, but we may already have an answer */
			dcc_ctxts_lock();
			class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY);
			break;
		}
		if (!get_now(emsg, ctxt))
			return 0;       /* simply give up if time jumped */

		/* recover the lock so that we can record the result of the
		 * newly arrived answer in the shared and mapped file */
		dcc_ctxts_lock();
		class = DCC_GREY2CLASS(clnt_fgs & DCC_CLNT_FG_GREY);
		if (!dcc_info_lock(emsg)) {
			dcc_ctxts_unlock();
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
			return 0;
		}

		if (nfds > 0) {
			u_char unreachable = 0;
			for (;;) {
				DCC_OP_RESP buf;

				i = clnt_recv(ctxt, class, &buf,
					      min(ISZ(buf), resp_max_len),
					      msg, &xloge);
				if (i <= 0)
					break;
				if (i == 1) {
					/* stop delaying after the first
					 * ICMP Unreachable message,
					 * but collect everything that has
					 * already arrived */
					unreachable = 1;
					continue;
				}

				update_rtt(ctxt, class, xloge,
					   ctxt->now_us - xloge->sent_us
					   + ((op != DCC_OP_REPORT
					       && op != DCC_OP_QUERY)
					      ? cur_addr->srvr_wait : 0));

				/* save the last answer we get */
				memcpy(resp, &buf, ntohs(buf.hdr.len));
				gotit = 1;
			}
			if (i < 0 || unreachable || gotit)
				break;
		}
	}
	/* the contexts and the shared information are locked */

	/* penalize server for lost packets */
	resp_rates(ctxt, class, 0);

	/* fail if the server did not answer at all */
	if (!gotit) {
#if 0
		system("./abort_dccd");
#endif
		flush_emsg(emsg, dcc_clnt_debug);
		dcc_pemsg(EX_TEMPFAIL, emsg, "no %s answer from %s after %d ms",
			  DCC_IS_GREY_STR(class),
			  addr2str(addr_buf, sizeof(addr_buf), class,
				   addrs_gen, cur_addr, 0),
			  ctxt->now_us/1000);
		/* Since we got no answer at all, look for a different server.
		 * If we can't find any server or a different server
		 * or if we have already spent too much time,
		 * then don't try again for a while to not delay the MTA.
		 * If we find another server, then return the valid server-ID
		 * of the non-responsive server to let the caller know that it
		 * can try again immediately. */
		if (srvr_inxp && srvr_inx == *srvr_inxp) {
			/* but only if not using a caller specified server */
			if (srvr_idp)
				*srvr_idp = DCC_ID_INVALID;
		} else if (!pick_srvr(0, class)
			   || srvr_inx == class->srvr_inx) {
			if (srvr_idp) {
				if (dcc_clnt_debug)
					dcc_trace_msg("no better alternate"
						      " for retry");
				*srvr_idp = DCC_ID_INVALID;
			}
			fail_more(ctxt, class);
		} else if (srvr_idp
			   && (i = retrans_time(class->addrs[class ->srvr_inx
							].rtt, 0),
			       ctxt->now_us + i >= DCC_MAX_DELAY)) {
			/* discourage the caller from trying the other server
			 * if the total delay after trying the other server
			 * would be excessive */
			if (dcc_clnt_debug)
				dcc_trace_msg("alternate too slow for retry"
					      " with rtt %d ms after %d ms",
					      i/1000,
					      ctxt->now_us/1000);
			*srvr_idp = DCC_ID_INVALID;
		}
		dcc_info_unlock(0);
		dcc_ctxts_unlock();
		return 0;
	}

	/* reset failure backoff */
	class->fail_exp = 0;

	if (!dcc_info_unlock(emsg)) {
		dcc_ctxts_unlock();
		if (srvr_idp)
			*srvr_idp = DCC_ID_INVALID;
		return 0;
	}
	dcc_ctxts_unlock();

	flush_emsg(emsg, dcc_clnt_debug);
	return 1;
}