view dccd/dccd.c @ 3:b689077d4918

Ignore old patches
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 14:31:24 +0100
parents c7f6b056b673
children
line wrap: on
line source

/* Distributed Checksum Clearinghouse
 *
 * server
 *
 * Copyright (c) 2008 by Rhyolite Software, LLC
 *
 * This agreement is not applicable to any entity which sells anti-spam
 * solutions to others or provides an anti-spam solution as part of a
 * security solution sold to other entities, or to a private network
 * which employs the DCC or uses data provided by operation of the DCC
 * but does not provide corresponding data to other users.
 *
 * Permission to use, copy, modify, and distribute this software without
 * changes for any purpose with or without fee is hereby granted, provided
 * that the above copyright notice and this permission notice appear in all
 * copies and any distributed versions or copies are either unchanged
 * or not called anything similar to "DCC" or "Distributed Checksum
 * Clearinghouse".
 *
 * Parties not eligible to receive a license under this agreement can
 * obtain a commercial license to use DCC by contacting Rhyolite Software
 * at sales@rhyolite.com.
 *
 * A commercial license would be for Distributed Checksum and Reputation
 * Clearinghouse software.  That software includes additional features.  This
 * free license for Distributed ChecksumClearinghouse Software does not in any
 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
 * software
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 *
 * Rhyolite Software DCC 1.3.103-1.297 $Revision$
 */

#include "dccd_defs.h"
#include <signal.h>			/* for Linux and SunOS */
#include <sys/uio.h>
#include <sys/wait.h>
#include "dcc_ifaddrs.h"

DCC_EMSG dcc_emsg;

static const char *homedir;
static char *aargs[10];
static char **aarg = &aargs[0];
static DCC_PATH dbclean_def = DCC_LIBEXECDIR"/dbclean";
static char *dbclean = dbclean_def;
static pid_t dbclean_pid = -1;
static char *addr_str;			/* an IP address for dbclean */
static char dbclean_str[] = "dbclean";
#define MAX_DBCLEAN_FLAGS 50
static char dbclean_flags[MAX_DBCLEAN_FLAGS+1] = "-Pq";
static int dbclean_flags_len = LITZ("-Pq");
static const char *dbclean_argv[20] = {dbclean_str, dbclean_flags};
static int dbclean_argc = 2;
static char dbclean_args_str[200];
static int dbclean_args_str_len;

/* do not try to run dbclean too often */
time_t dbclean_limit_secs = DBCLEAN_LIMIT_SECS;
static time_t dbclean_failed;

static double wfix_quiet_rate = 1.0;	/* max load that allows window fixing */
static double wfix_busy_rate;		/* measured active rate */
static double wfix_rate_change = 0.4;	/* rate reduction to this allows fix */
static DB_PTR wfix_size;
static u_char wfix_size_set = 0;
DBCLEAN_WFIX_STATE dbclean_wfix_state = WFIX_DELAY;
static double wfix_ops;
static time_t wfix_check_start;
static time_t dbclean_wfix_secs;	/* window fixing timer */
#define WFIX_POST_CLEAN_SECS (6*60*60)	/* do not fix it soon after cleaning */
#define WFIX_PRE_CLEAN_SECS (2*60*60)	/*	or just before cleaning */
#define WFIX_RECHECK_SECS   (15*60)	/* between checks for window overflow */
#define WFIX_QUIET_SECS	    (5*60)	/* waiting for clients to flee */
#define	WFIX_MEASURE_SECS   (5*60)	/* counting clients */
#define WFIX_MAX_SECS	(WFIX_PRE_CLEAN_SECS+WFIX_POST_CLEAN_SECS)

const char *need_del_dbclean;
time_t del_dbclean_next;
time_t dbclean_limit;
static time_t clean_fake_secs;		/* timer for missing cron job */
static time_t clean_last_secs;

u_long dccd_tracemask = DCC_TRACE_ON_DEF_BITS;

u_char background = 1;
int stopint;				/* !=0 if stopping or received signal */

static time_t flods_ck_secs = FLODS_CK_SECS;

const char *brand = "";

static char *my_srvr_id_str;
DCC_SRVR_ID my_srvr_id;

u_char use_ipv6 = 0;
u_int16_t def_port;			/* default port #, network byte order */
typedef struct port_list {
    struct port_list *fwd;
    u_int16_t	port;			/* network byte order */
} PORT_LIST;
static PORT_LIST *ports;
SRVR_SOC *srvr_socs;
static SRVR_SOC **srvr_socs_end = &srvr_socs;
int srvr_rcvbuf = 1024*1024;

#ifdef USE_DBCLEAN_F
static u_char db_mode = DB_OPEN_MMAP_WRITE;
#else
static u_char db_mode = 0;
#endif

int grey_embargo = DEF_GREY_EMBARGO;	/* seconds to delay new traffic */
int grey_window = DEF_GREY_WINDOW;	/* wait as long as this */
int grey_white = DEF_GREY_WHITE;	/* remember this long */

char our_hostname[MAXHOSTNAMELEN];
DCC_SUM host_id_sum;			/* advertised with our server-ID */
time_t host_id_next, host_id_last;	/* when to advertise */

u_char anon_off;			/* turn off anonymous access */
time_t anon_delay_us = DCC_ANON_DELAY_US_DEF;   /* delay anonymous clients */
u_int anon_delay_inflate = DCC_ANON_INFLATE_OFF;

u_char stop_mode;			/* 0=normal 1=reboot 2=with/DB clean */

static int total_ops;
static QUEUE *queue_free;
static QUEUE *queue_head;

/* assume or hope we can handle 500 requests/second */
int queue_max = 500*DCC_MAX_RTT_SECS;	/* ultimate bound on queue size */
static int queue_max_cur;		/* current upper bound */
static int queue_cur;			/* current queue size */

struct timeval wake_time;		/* when we awoke from select() */
struct timeval req_recv_time;		/* when request arrived */

DCC_TS future;				/* timestamp sanity */


static DB_NOKEEP_CKS set_new_nokeep_cks, reset_new_nokeep_cks;

DCC_TGTS flod_tholds[DCC_DIM_CKS];	/* flood at or after these thresholds */

static const char *parse_rl_rate(RL_RATE *, float, const char *, const char *);
static void add_dbclean_flag(char);
static void add_dbclean_arg(const char *);
static void check_dbclean(int);
static void run_dbclean(const char *, const char *);
static void sigterm(int);
static void sighup(int);
static void stop_children(void);
static u_char get_if_changes(u_char);
static SRVR_SOC *add_srvr_soc(u_char, int, const void *, u_int16_t);
static int open_srvr_socs(int);
static void wfix_later(time_t);
static void recv_job(void) NRATTRIB;
static u_char new_job(SRVR_SOC *);
static void NRATTRIB dccd_quit(int, const char *, ...) PATTRIB(2,3);

static void
usage(const char* barg)
{
	static const char str[] = {
	    "usage: [-64dVbfFQ] -i server-ID [-n brand] [-h homedir]\n"
	    "   [-I [host-ID][,user]] [-a [server-addr][,server-port]]"
	    " [-q qsize]\n"
	    "   [-G [on,][weak-body,][weak-IP,]embargo],[window],[white]]\n"
	    "   [-W [rate][,chg][,dbsize]] [-K [no-]type] [-T tracemode]\n"
	    "   [-u anon-delay[,inflate] [-C dbclean]"
	    " [-L ltype,facility.level]\n"
	    "   [-R [RL_SUB],[RL_FREE],[RL_ALL_FREE],[RL_BUGS]]"
	};
	static u_char complained;

	if (!complained) {
		if (barg)
			dcc_error_msg("unrecognized \"%s\"\nusage: %s\n..."
				      " continuing",
				      barg, str);
		else
			dcc_error_msg("%s\n... continuing", str);
		complained = 1;
	}
}


int NRATTRIB
main(int argc, char **argv)
{
	char *p;
	const char *rest;
	u_char print_version = 0;
	DCC_CK_TYPES type;
	DCC_SOCKU *sup;
	u_int16_t port;
	int new_embargo, new_window, new_white;
	int error, i;
	const char *cp;
	double d1, d2;
	u_long l;

	dcc_syslog_init(1, argv[0], 0);

	if (DCC_DIM_CKS != DCC_COMP_DIM_CKS)
		dcc_logbad(EX_SOFTWARE,
			   "DCC_DIM_CKS != DCC_COMP_DIM_CKS;"
			   " check uses of both");

	/* get first bytes of our hostname to name our server-ID */
	memset(our_hostname, 0, sizeof(our_hostname));
	if (0 > gethostname(our_hostname, sizeof(our_hostname)-1))
		dcc_logbad(EX_NOHOST, "gethostname(): %s", ERROR_STR());
	our_hostname[sizeof(our_hostname)-1] = '\0';
	if (our_hostname[0] == '\0')
		dcc_logbad(EX_NOHOST, "null hostname from gethostname()");
	strncpy((char *)host_id_sum, our_hostname, sizeof(host_id_sum));

	parse_rl_rate(&rl_sub_rate, 0.5, "RL_SUB", "400");
	parse_rl_rate(&rl_anon_rate, RL_AVG_SECS, "RL_ANON", "50");
	parse_rl_rate(&rl_all_anon_rate, 0.5, "RL_ALL_ANON", "600");
	parse_rl_rate(&rl_bugs_rate, RL_AVG_SECS, "RL_BUGS", "0.1");

	/* this must match DCCD_GETOPTS in cron-dccd.in */
	while ((i = getopt(argc, argv,
			   "64dVbfFQi:n:h:a:I:q:G:t:W:K:T:u:C:L:R:")) != -1) {
		switch (i) {
		case '6':
#ifndef NO_IPV6
			use_ipv6 = 2;
#endif
			break;
		case '4':
			use_ipv6 = 0;
			break;

		case 'd':
			add_dbclean_flag('d');
			++db_debug;
			break;

		case 'V':
			fprintf(stderr, DCC_VERSION"\n");
			print_version = 1;
			break;

		case 'b':
			background = 0;
			break;

		case 'f':
			db_mode &= ~DB_OPEN_MMAP_WRITE;
			add_dbclean_flag('f');
			break;

		case 'F':
			db_mode |= DB_OPEN_MMAP_WRITE;
			add_dbclean_flag('F');
			break;

		case 'Q':
			query_only = 1;
			break;

		case 'i':
			my_srvr_id_str = optarg;
			if (!dcc_get_srvr_id(dcc_emsg, &my_srvr_id,
					     my_srvr_id_str, 0, 0, 0))
				dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
			add_dbclean_arg("-i");
			add_dbclean_arg(my_srvr_id_str);
			break;

		case 'n':
			/* RFC 2822 says "values between 33 and 126" */
			cp = optarg;
			while (*cp >= 33 && *cp <= 126 && *cp != ':')
				++cp;
			if (cp == optarg
			    || (cp - optarg) > ISZ(DCC_BRAND)
			    || *cp != '\0') {
				dcc_error_msg("invalid brand name \"-n %s\"",
					      optarg);
			} else {
				brand = optarg;
			}
			break;

		case 'h':
			homedir = optarg;
			/* tell dbclean "-h ." because we will already
			 * be there and that allows our -h to be relative */
			add_dbclean_arg("-h.");
			break;

		case 'I':
			p = strchr(optarg, ',');
			if (p) {
				*p++ = '\0';
				dcc_daemon_su(p);
				if (*optarg == '\0')
					break;
			}
			if (*optarg != '\0')
				strncpy((char *)host_id_sum, optarg,
					sizeof(host_id_sum));
			break;

		case 'a':
			/* postpone checking host names until we know -6 */
			if (aarg > LAST(aargs)) {
				dcc_error_msg("too many -a args");
				break;
			}
			optarg += strspn(optarg, DCC_WHITESPACE);
			*aarg++ = optarg;
			break;

		case 'q':
			l = strtoul(optarg, &p, 10);
			if (*p != '\0' || l < 2 || l > 1000) {
				dcc_error_msg("invalid queue length \"%s\"",
					      optarg);
			} else {
				queue_max = l;
			}
			break;

		case 'G':
			grey_on = 1;
			dcc_syslog_init(1, argv[0], " grey");
			add_dbclean_arg("-Gon");
			/* handle leading "on" "weak-body", and "weak-IP" */
			rest = optarg;
			while (*rest) {
				if (dcc_ck_word_comma(&rest, "weak-body")
				    || dcc_ck_word_comma(&rest, "weak_body")
				    || dcc_ck_word_comma(&rest, "weak")) {
					grey_weak_body = 1;
					continue;
				}
				if (dcc_ck_word_comma(&rest, "weak-IP")
				    || dcc_ck_word_comma(&rest, "weak_IP")) {
					grey_weak_ip = 1;
					continue;
				}
				if (!dcc_ck_word_comma(&rest, "on"))
					break;
			}
			new_embargo = dcc_get_secs(rest, &rest,
						   0, MAX_GREY_EMBARGO,
						   grey_embargo);
			if (new_embargo < 0) {
				dcc_error_msg("invalid greylist embargo"
					      " \"-G %s\"", optarg);
				break;
			}
			new_window = dcc_get_secs(rest, &rest,
						  new_embargo, MAX_GREY_WINDOW,
						  max(new_embargo,grey_window));
			if (new_window < 0) {
				dcc_error_msg("invalid greylist wait time"
					      " \"-G %s\"", optarg);
				break;
			}
			new_white = dcc_get_secs(rest, &rest,
						 new_window, MAX_GREY_WHITE,
						 max(new_window, grey_white));
			if (new_white < 0 || *rest != '\0') {
				dcc_error_msg("invalid greylist whitelist time"
					      " \"-G %s\"", optarg);
				break;
			}
			grey_embargo = new_embargo;
			grey_window = new_window;
			grey_white = new_white;
			break;

		case 't':		/* obsolete */
			break;

		case 'W':
			p = optarg;
			if (*p == '\0') {
				dcc_error_msg("unrecognized"
					      " \"-W %s\"", optarg);
				break;
			}
			d1 = wfix_quiet_rate;
			if (*p != '\0' && *p != ',') {
				d1 = strtod(p, &p);
				if ((*p != '\0' && *p != ',')
				    || d1 < 0.0 || d1 > 1000*1000.0) {
					dcc_error_msg("bad quiet rate in"
						      " \"-W %s\"", optarg);
					break;
				}
			}
			if (*p == ',')
				++p;
			d2 = wfix_rate_change;
			if (*p != '\0' && *p != ',') {
				d1 = strtod(p, &p);
				if ((*p != '\0' && *p != ',')
				    || d1 < 0.0 || d1 > 1000*1000.0) {
					dcc_error_msg("bad rate change in"
						      " \"-W %s\"", optarg);
					break;
				}
			}
			if (*p == ',')
				++p;
			l = wfix_size/(1024*1024);
			if (*p != '\0') {
				l = strtoul(p, &p, 10);
				if ((*p != '\0' || l < DB_MIN_MIN_MBYTE
				     || l > MAX_MAX_DB_MBYTE)
				    && l != wfix_size) {
					dcc_error_msg("bad database size in"
						      " \"-W %s\"", optarg);
					break;
				}
			}
			wfix_quiet_rate = d1;
			wfix_rate_change = d2;
			if (wfix_size/(1024*1024) != l) {
				wfix_size = ((DB_PTR)l)*(1024*1024);
				wfix_size_set = 1;
			}
			break;

		case 'K':
			if (!strcasecmp(optarg, "all")) {
				reset_new_nokeep_cks = -1;
				break;
			}
			if (!CLITCMP(optarg, "no-")) {
				optarg += LITZ("no-");
				i = 0;
			} else {
				i = 1;
			}
			type = dcc_str2type_db(optarg, -1);
			if (type == DCC_CK_INVALID) {
				dcc_error_msg("bad checksum type in"
					      " \"-K %s\"", optarg);
				break;
			}
			if (i)
				DB_SET_NOKEEP(reset_new_nokeep_cks, type);
			else
				DB_SET_NOKEEP(set_new_nokeep_cks, type);
			break;

		case 'T':
			if (!strcasecmp(optarg, "ADMN")) {
				dccd_tracemask |= DCC_TRACE_ADMN_BIT;
			} else if (!strcasecmp(optarg, "ANON")) {
				dccd_tracemask |= DCC_TRACE_ANON_BIT;
			} else if (!strcasecmp(optarg, "CLNT")) {
				dccd_tracemask |= DCC_TRACE_CLNT_BIT;
			} else if (!strcasecmp(optarg, "RLIM")) {
				dccd_tracemask |= DCC_TRACE_RLIM_BIT;
			} else if (!strcasecmp(optarg, "QUERY")) {
				dccd_tracemask |= DCC_TRACE_QUERY_BIT;
			} else if (!strcasecmp(optarg, "RIDC")) {
				dccd_tracemask |= DCC_TRACE_RIDC_BIT;
			} else if (!strcasecmp(optarg, "FLOOD")) {
				dccd_tracemask |= DCC_TRACE_FLOD_BIT;
			} else if (!strcasecmp(optarg, "FLOOD2")) {
				dccd_tracemask |= DCC_TRACE_FLOD2_BIT;
			} else if (!strcasecmp(optarg, "IDS")) {
				dccd_tracemask |= DCC_TRACE_IDS_BIT;
			} else if (!strcasecmp(optarg, "BL")) {
				dccd_tracemask |= DCC_TRACE_BL_BIT;
			} else if (!strcasecmp(optarg, "DB")) {
				dccd_tracemask |= DCC_TRACE_DB_BIT;
			} else if (!strcasecmp(optarg, "WLIST")) {
				dccd_tracemask |= DCC_TRACE_WLIST_BIT;
			} else {
				dcc_error_msg("invalid trace mode \"%s\"",
					      optarg);
			}
			break;

		case 'u':
			i = parse_dccd_delay(dcc_emsg, &anon_delay_us,
					     &anon_delay_inflate, optarg,
					     0, 0);
			if (!i) {
				dcc_error_msg("%s", dcc_emsg);
			} else if (i == 2) {
				anon_off = 1;
			} else {
				anon_off = 0;
			}
			break;

		case 'C':
			if (*optarg == '\0') {
				dcc_error_msg("no path to dbclean \"-C %s\"",
					      optarg);
				break;
			}
			/* capture the path to the dbclean program */
			dbclean = optarg;
			/* capture any args following the program */
			for (p = strpbrk(optarg, DCC_WHITESPACE);
			     p != 0;
			     p = strpbrk(p, DCC_WHITESPACE)) {
				*p++ = '\0';
				p += strspn(p, DCC_WHITESPACE);
				if (*p == '\0')
					break;
				add_dbclean_arg(p);
			}
			break;

		case 'L':
			if (dcc_parse_log_opt(optarg)) {
				add_dbclean_arg("-L");
				add_dbclean_arg(optarg);
			}
			break;

		case 'R':
			rest = parse_rl_rate(&rl_sub_rate, -1.0,
					     "RL_SUB", optarg);
			rest = parse_rl_rate(&rl_anon_rate, -1.0,
					     "RL_ANON", rest);
			rest = parse_rl_rate(&rl_all_anon_rate, -1.0,
					     "RL_ALL_ANON", rest);
			rest = parse_rl_rate(&rl_bugs_rate, -1.0,
					     "RL_BUGS", rest);
			break;

		default:
			usage(optopt2str(optopt));
		}
	}
	argc -= optind;
	argv += optind;
	if (argc != 0)
		usage(argv[0]);

	if (my_srvr_id == DCC_ID_INVALID) {
		if (print_version)
			exit(EX_OK);
		dcc_logbad(EX_USAGE, "server-ID not set with -i");
	}

	if (grey_on) {
		anon_off = 1;
		dccd_tracemask |= DCC_TRACE_IDS_BIT;
	}

	/* parse addresses after we know whether -6 was among the args */
	for (aarg = &aargs[0]; aarg <= LAST(aargs) && *aarg; ++aarg) {
		char hostname[DCC_MAXDOMAINLEN];

		addr_str = *aarg;
		rest = dcc_parse_nm_port(dcc_emsg, *aarg, 0,
					 hostname, sizeof(hostname),
					 &port, 0, 0, 0, 0);
		if (!rest) {
			dcc_error_msg("%s", dcc_emsg);
			continue;
		}
		rest += strspn(rest, DCC_WHITESPACE);
		if (*rest != '\0')
			dcc_error_msg("unrecognized port number in"
				      "\"-a %s\"", *aarg);
		if (hostname[0] == '\0') {
			PORT_LIST *pport = dcc_malloc(sizeof(*pport));
			pport->port = port;
			pport->fwd = ports;
			ports = pport;
			continue;
		}
		if (!strcmp(hostname, "@")) {
			++addr_str;	/* "" but not a const */
			add_srvr_soc(SRVR_SOC_ADDR, AF_UNSPEC, 0, port);
			continue;
		}
		dcc_host_lock();
		if (!dcc_get_host(hostname, use_ipv6 ? 2 : 0, &error)) {
			dcc_host_unlock();
			dcc_error_msg("%s: %s",
				      *aarg, DCC_HSTRERROR(error));
			continue;
		}
		for (sup = dcc_hostaddrs; sup < dcc_hostaddrs_end; ++sup) {
			if (sup->sa.sa_family == AF_INET)
				add_srvr_soc(SRVR_SOC_ADDR, AF_INET,
					     &sup->ipv4.sin_addr, port);
			else
				add_srvr_soc(SRVR_SOC_ADDR, AF_INET6,
					     &sup->ipv6.sin6_addr, port);
		}
		dcc_host_unlock();
	}
	if (addr_str) {
		/* tell dbclean about one "-a addr" */
		add_dbclean_arg("-a");
		add_dbclean_arg(addr_str);
	}

	dcc_clnt_unthread_init();
	if (!dcc_cdhome(dcc_emsg, homedir, 0))
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);

	i = check_load_ids(1);
	if (i < 0)
		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
	else if (!i)
		dcc_error_msg("%s", dcc_emsg);

	if (!def_port)
		def_port = DCC_GREY2PORT(grey_on);
	if (!srvr_socs && !ports) {
		ports = dcc_malloc(sizeof(*ports));
		ports->fwd = 0;
		ports->port = def_port;
	}
	get_if_changes(db_debug != 0);

	/* make initial attempt to open the server UDP sockets
	 * This also sets use_ipv6 to 0 or 1 if it is still 2 */
	if (open_srvr_socs(45) <= 0)
		dcc_logbad(EX_OSERR, "failed to open any server sockets");

	add_dbclean_flag(use_ipv6 == 0 ? '4' : '6');

	if (background) {
		if (0 > daemon(1, 0))
			dcc_logbad(EX_OSERR, "daemon(): %s", ERROR_STR());
	}

	if (!background)
		signal(SIGHUP, sigterm);    /* SIGHUP fatal during debugging */
	else
		signal(SIGHUP, sighup);	/* speed configuration check */
	signal(SIGTERM, sigterm);
	signal(SIGINT, sigterm);
	signal(SIGPIPE, SIG_IGN);
#ifdef SIGXFSZ
	signal(SIGXFSZ, SIG_IGN);
#endif

	atexit(stop_children);

	gettimeofday(&db_time, 0);
	wake_time = db_time;

	flod_mmap_path_set();

	/* open the database, and try once to fix it */
	if (!dccd_db_open(DB_OPEN_LOCK_NOWAIT)) {
		dcc_error_msg("%s", dcc_emsg);
		run_dbclean("SRbad", "database initially broken");
		check_dbclean(0);	/* stall until dbclean finishes */
		if (!dccd_db_open(DB_OPEN_LOCK_NOWAIT)) {
			dcc_error_msg("%s", dcc_emsg);
			dcc_logbad(EX_NOINPUT,
				   "could not start database %s", db_nm);
		}
	}
	/* do not start if dbclean is running */
	if (!lock_dbclean(dcc_emsg, db_nm))
		dcc_logbad(dcc_ex_code, "%s: dbclean already running?",
			   dcc_emsg);
	unlock_dbclean();

	flod_trace_gen = db_time.tv_sec;
	host_id_next = db_time.tv_sec + DCC_SRVR_ID_SECS_ST;
	check_blacklist_file();
	flods_init();
	clients_load();
	stats_clear();
	if (flod_mmaps != 0
	    && flod_mmaps->dccd_stats.reset.tv_sec != 0) {
		memcpy(&dccd_stats, &flod_mmaps->dccd_stats,
		       sizeof(dccd_stats));
	}
	dcc_trace_msg(DCC_VERSION" listening to port %d  %s  %s",
		      ntohs(srvr_socs->arg_port),
		      dcc_homedir, db_window_size_str);

	recv_job();
}



static SRVR_SOC *			/* 0 or new entry */
add_srvr_soc(u_char flags,
	     int family,		/* AF_UNSPEC or 0 if addrp==0 */
	     const void *addrp,		/* 0, *in_addr, or *in6_addr */
	     u_int16_t port)
{
	DCC_SOCKU su;
	SRVR_SOC *sp;

	dcc_mk_su(&su, family, addrp, port);
	for (sp = srvr_socs; sp; sp = sp->fwd) {
		if (sp->arg_family == family
		    && sp->arg_port == port
		    && !memcmp(&sp->arg_addr, addrp,
			       ((family == AF_INET)
				? sizeof(sp->arg_addr.in4)
				: sizeof(sp->arg_addr.in6))))
			return sp;
	}

	sp = dcc_malloc(sizeof(*sp));
	memset(sp, 0, sizeof(*sp));
	sp->flags = flags;
	sp->udp = -1;
	sp->listen = -1;
	sp->su = su;
	sp->arg_family = family;
	if (addrp)
		memcpy(&sp->arg_addr, addrp,
		       ((family == AF_INET)
			? sizeof(sp->arg_addr.in4)
			: sizeof(sp->arg_addr.in6)));
	sp->arg_port = port;

	*srvr_socs_end = sp;
	srvr_socs_end = &sp->fwd;

	return sp;
}



static int				/* # of sockets opened */
open_srvr_socs(int retry_secs)
{
	static u_char srvr_rcvbuf_set = 0;
	int *retry_secsp;
	u_char family;
	SRVR_SOC *sp;
	int i;
	int num_socs = 0;

	if (stopint)
		return -1;

	retry_secsp = retry_secs ? &retry_secs : 0;

	for (sp = srvr_socs; sp; sp = sp->fwd) {
		if (sp->udp >= 0) {
			++num_socs;
			continue;
		}

		/* resolve default port number if we finally know it */
		if (!sp->arg_port)
			sp->arg_port = def_port;

		family = sp->arg_family;

		/* create the UDP socket
		 * If we are using INADDR_ANY
		 * and do not yet know if IPv6 works, just try it */
		if (family == AF_UNSPEC
		    && use_ipv6 == 2) {
			dcc_mk_su(&sp->su, AF_INET6,
				  &sp->arg_addr, sp->arg_port);
			i = dcc_udp_bind(dcc_emsg, &sp->udp,
					 &sp->su, retry_secsp);
			if (i == 0) {
				dcc_error_msg("%s", dcc_emsg);
				/* still don't know about IPv6 */
				continue;
			}
			if (i > 0) {
				/* we finished an INADDR_ANY socket
				 * and learned that IPv6 works */
				use_ipv6 = 1;
				continue;
			}
			/* we know or guess that IPv6 does not work */
			use_ipv6 = 0;
		}

		if (family == AF_UNSPEC) {
			/* using INADDR_ANY but now know whether IPv6 works */
			family = use_ipv6 ? AF_INET6 : AF_INET;
		} else if (family == AF_INET6 && use_ipv6 == 2) {
			/* don't know if IPv6 works but have an IPv6 address */
			use_ipv6 = 1;
		}

		dcc_mk_su(&sp->su, family, &sp->arg_addr, sp->arg_port);
		if (0 >= dcc_udp_bind(dcc_emsg, &sp->udp,
				      &sp->su, retry_secsp)) {
			dcc_error_msg("%s", dcc_emsg);
			continue;
		}

		/* set socket receive buffer size as large as possible */
		for (;;) {
			if (!setsockopt(sp->udp, SOL_SOCKET, SO_RCVBUF,
					&srvr_rcvbuf, sizeof(srvr_rcvbuf)))
				break;
			if (srvr_rcvbuf_set || srvr_rcvbuf <= 4096) {
				dcc_error_msg("setsockopt(%s,SO_RCVBUF=%d): %s",
					      dcc_su2str_err(&sp->su),
					      srvr_rcvbuf,
					      ERROR_STR());
				break;
			}
			srvr_rcvbuf -= 4096;
		}
		srvr_rcvbuf_set = 1;

		++num_socs;
	}

	/* Finally decide the IPv6 issue if we found no sign of IPv6 */
	if (use_ipv6 == 2)
		use_ipv6 = 0;

	return num_socs;
}



/* get ready to bind to all local IP addreses */
static u_char				/* 1=added an interface */
add_ifs(u_char not_quiet)
{
	u_char added;
	SRVR_SOC *sp;
	PORT_LIST *pport;
#ifdef HAVE_GETIFADDRS
	struct ifaddrs *ifap0, *ifap;
	int num_ifs;
#endif

	if (!ports)
		return 0;

	added = 0;

#ifdef HAVE_GETIFADDRS
	if (0 > getifaddrs(&ifap0)) {
		dcc_error_msg("getifaddrs(): %s", ERROR_STR());
		ifap0 = 0;
	}

	num_ifs = 0;
	for (pport = ports; pport; pport = pport->fwd) {
		const SRVR_SOC *listener = 0;

		for (ifap = ifap0; ifap; ifap = ifap->ifa_next) {
			if (!(ifap->ifa_flags & IFF_UP))
				continue;
			if (!ifap->ifa_addr)
				continue;
			switch (ifap->ifa_addr->sa_family) {
			case AF_INET:
				++num_ifs;
				sp = add_srvr_soc(SRVR_SOC_IF | SRVR_SOC_NEW,
						  ifap->ifa_addr->sa_family,
						  &((struct sockaddr_in *)ifap
						    ->ifa_addr)->sin_addr,
						  pport->port);
				break;
			case AF_INET6:
				if (use_ipv6 == 0)
					continue;
				if (use_ipv6 == 2)
					use_ipv6 = 1;
				++num_ifs;
				sp = add_srvr_soc(SRVR_SOC_IF | SRVR_SOC_NEW,
						  ifap->ifa_addr->sa_family,
						  &((struct sockaddr_in6*)ifap
						    ->ifa_addr)->sin6_addr,
						  pport->port);
				break;
			default:
				continue;
			}
			if (sp->flags & SRVR_SOC_NEW) {
				added = 1;
				if (not_quiet)
					dcc_trace_msg("start listening on %s",
						      dcc_su2str_err(&sp->su));
			}
			sp->flags &= ~(SRVR_SOC_MARK | SRVR_SOC_NEW);

			/* interfaces can have duplicate addresses */
			if (listener == sp)
				continue;
			if (!listener) {
				listener = sp;
				if (!(sp->flags & SRVR_SOC_LISTEN)) {
					sp->flags |= SRVR_SOC_LISTEN;
					added = 1;
				}
			} else {
				if (sp->flags & SRVR_SOC_LISTEN) {
					sp->flags &= ~SRVR_SOC_LISTEN;
					added = 1;
				}
			}
		}
	}
#ifdef HAVE_FREEIFADDRS
	/* since this is done only a few times when HAVE_FREEIFADDRS is not
	 * defined, don't worry if we cannot release the list of interfaces */
	freeifaddrs(ifap0);
#endif

	if (num_ifs > 0)
		return added;
#endif /* HAVE_GETIFADDRS */

	/* if we got no joy from getifaddrs(), use INADDR_ANY */
	for (pport = ports; pport; pport = pport->fwd) {
		sp = add_srvr_soc(SRVR_SOC_IF | SRVR_SOC_LISTEN | SRVR_SOC_NEW,
				  AF_UNSPEC, 0, pport->port);
		if (sp->flags & SRVR_SOC_NEW) {
			added = 1;
			if (not_quiet)
				dcc_trace_msg("fallback listen on %s",
					      dcc_su2str_err(&sp->su));
		}
		sp->flags &= ~(SRVR_SOC_MARK | SRVR_SOC_NEW);
	}

	return added;
}



/* deal with changes to network interfaces */
static u_char				/* 1=something changed */
get_if_changes(u_char not_quiet)
{
	SRVR_SOC *sp, **spp;
	u_char changed;

	for (sp = srvr_socs; sp; sp = sp->fwd) {
		if (sp->flags & SRVR_SOC_IF)
			sp->flags |= SRVR_SOC_MARK;
	}

	changed = add_ifs(not_quiet);

	spp = &srvr_socs;
	while ((sp = *spp) != 0) {
		/* an interface recognized by add_srvr_soc() will have
		 * its SRVR_SOC_MARK cleared */
		if (!(sp->flags & SRVR_SOC_MARK)) {
			spp = &sp->fwd;
			continue;
		}

		/* forget interfaces that have disappeared */
		dcc_trace_msg("stop listening on %s", dcc_su2str_err(&sp->su));
		changed = 1;
		if (srvr_socs_end == &sp->fwd)
			srvr_socs_end = spp;
		*spp = sp->fwd;
		if (sp->udp >= 0)
			close(sp->udp);
		if (sp->listen >= 0)
			close(sp->listen);
		dcc_free(sp);
	}

	return changed;
}



static const char *
parse_rl_rate(RL_RATE *rate, float penalty_secs,
	      const char *label, const char *arg)
{
	char *p;
	int per_sec, hi;

	if (penalty_secs >= 0.0)
		rate->penalty_secs = penalty_secs;

	if (*arg == '\0')
		return arg;

	if (*arg == ',')
		return ++arg;

	per_sec = strtod(arg, &p) * RL_SCALE;
	hi = per_sec*RL_AVG_SECS;
	if ((*p != '\0' && *p != ',')
	    || hi < RL_SCALE || per_sec > RL_MAX_CREDITS) {
		dcc_error_msg("invalid %s value in \"%s\"",
			      label, arg);
		return "";
	}

	/* maximum events/second * RL_SCALE */
	rate->per_sec = per_sec;

	/* maximum allowed accumulated credits */
	rate->hi = hi;

	/* minimum credit account balance */
	rate->lo = -per_sec * rate->penalty_secs;

	return (*p == ',') ? p+1 : p;
}



static void
add_dbclean_flag(char flag)
{
	if (dbclean_flags_len >= MAX_DBCLEAN_FLAGS)
		dcc_logbad(EX_SOFTWARE, "too many flags for dbclean");
	dbclean_flags[dbclean_flags_len++] = flag;
}



static void
add_dbclean_arg(const char *arg)
{
	int i;

	if (dbclean_argc >= DIM(dbclean_argv)-2)
		dcc_logbad(EX_SOFTWARE, "too many args for dbclean");
	dbclean_argv[dbclean_argc++] = arg;
	i = snprintf(dbclean_args_str+dbclean_args_str_len,
		     sizeof(dbclean_args_str)-dbclean_args_str_len,
		     " %s", arg);
	dbclean_args_str_len += i;
	if (dbclean_args_str_len >= ISZ(dbclean_args_str)-2)
		dcc_logbad(EX_SOFTWARE, "too many args for dbclean");
}



/* check effort to repair database */
static void
check_dbclean(int options)
{
	int status;
	pid_t pid;
	u_char ok;

	if (dbclean_pid < 0)
		return;

	pid = waitpid(dbclean_pid, &status, options);
	if (pid != dbclean_pid)
		return;

	dbclean_pid = -1;

	/* do not try failing dbclean too often */
#if defined(WIFEXITED) && defined(WEXITSTATUS) && defined(WTERMSIG) && defined(WIFSIGNALED)
	ok = 1;
	if (WIFSIGNALED(status)) {
		dcc_error_msg("dbclean exited with signal %d",
			      WTERMSIG(status));
		ok = 0;
	} else if (WIFEXITED(status)) {
		status = WEXITSTATUS(status);
		if (status != EX_OK) {
			if (status > 100 && status < 130)
				dcc_error_msg("dbclean stopped after signal %d",
					      status-100);
			else
				dcc_error_msg("dbclean exited with status %d",
					      status);
			ok = 0;
		}
	}
#else
	ok = (status == EX_OK);
#endif
	if (ok) {
		dbclean_failed = 0;
		dbclean_limit_secs = DBCLEAN_LIMIT_SECS;
	} else {
		dbclean_failed = db_time.tv_sec;
		dbclean_limit_secs *= 2;
		if (dbclean_limit_secs > DEL_DBCLEAN_SECS)
			dbclean_limit_secs = DEL_DBCLEAN_SECS;
	}

	/* don't restart dbclean until after it has stopped running
	 * and cooled for a while */
	dbclean_limit = db_time.tv_sec + dbclean_limit_secs;
}



/* try to repair the database */
static void
run_dbclean(const char *mode,		/* combination of '', R, S, and W */
	    const char *reason)
{
	int i;

	check_dbclean(0);		/* wait until previous ends */

	wfix_later(WFIX_RECHECK_SECS);

	i = snprintf(&dbclean_flags[dbclean_flags_len],
		     ISZ(dbclean_flags)-dbclean_flags_len, "%s",
		     mode);
	if (i+dbclean_flags_len >= ISZ(dbclean_flags))
		dcc_logbad(EX_SOFTWARE, "too many flags for dbclean");

	dbclean_pid = fork();
	if (dbclean_pid < 0) {
		dcc_error_msg("dbclean fork(): %s", ERROR_STR());
	} else if (dbclean_pid == 0) {
		dcc_trace_msg("%s; starting `%s %s%s`",
			      reason, dbclean, dbclean_flags, dbclean_args_str);
		execv(dbclean, (char **)dbclean_argv);
		dcc_error_msg("execv(%s %s%s): %s",
			      dbclean, dbclean_flags, dbclean_args_str,
			      ERROR_STR());
		exit(-1);
	}

	need_del_dbclean = 0;
	dbclean_limit = db_time.tv_sec + dbclean_limit_secs;
}



static void
close_srvr_socs(void)
{
	SRVR_SOC *sp;

	for (sp = srvr_socs; sp; sp = sp->fwd) {
		if (sp->udp >= 0) {
			close(sp->udp);
			sp->udp = -1;
		}
		iflod_listen_close(sp);
	}
}



/* close files and otherwise clean up after being forked as a helper */
void
after_fork(void)
{
	IFLOD_INFO *ifp;
	OFLOD_INFO *ofp;

	resolve_hosts_pid = -1;
	dbclean_pid = -1;

	close_srvr_socs();
	for (ifp = iflods.infos; ifp <= LAST(iflods.infos); ++ifp) {
		if (ifp->soc >= 0)
			close(ifp->soc);
	}
	for (ofp = oflods.infos; ofp <= LAST(oflods.infos); ++ofp) {
		if (ofp->soc >= 0)
			close(ofp->soc);
	}

	signal(SIGHUP, SIG_DFL);
	signal(SIGTERM, SIG_DFL);
	signal(SIGINT, SIG_DFL);
	signal(SIGPIPE, SIG_DFL);
#ifdef SIGXFSZ
	signal(SIGXFSZ, SIG_DFL);
#endif
}



/* do not worry about cleaning to fix a window overflow for a while */
static void
wfix_later(time_t delay)
{
	dbclean_wfix_state = WFIX_DELAY;
	dbclean_wfix_secs = db_time.tv_sec + delay;
}



static double
wfix_measure(u_char force)
{
	double secs;

	secs = db_time.tv_sec - wfix_check_start;
	if (force || secs <= 0.0 || secs > WFIX_MEASURE_SECS*2
	    || total_ops < wfix_ops) {
		wfix_check_start = db_time.tv_sec;
		dbclean_wfix_secs = db_time.tv_sec + WFIX_MEASURE_SECS;
		wfix_ops = total_ops;
		return -1.0;
	}
	return (total_ops - wfix_ops) / secs;
}



static u_char				/* 1=need dbclean now */
wfix(char *reason, u_int reason_len)
{
	double rate;
	struct timeval sn;
	time_t next_clean;

	/* stop everything if dbclean is running */
	if (db_minimum_map || wfix_quiet_rate <= 0.0) {
		wfix_later(WFIX_RECHECK_SECS);
		return 0;
	}

	switch (dbclean_wfix_state) {
	case WFIX_DELAY:		/* just checking */
		if (!DB_IS_TIME(dbclean_wfix_secs, WFIX_MAX_SECS))
			return 0;

		/* no quick cleaning soon after the database was created,
		 * cleaned or repaired */
		dcc_ts2timeval(&sn, &db_parms.sn);
		if (sn.tv_sec > db_time.tv_sec)
			sn.tv_sec = 0;
		if (sn.tv_sec < dbclean_failed
		    && dbclean_failed <= db_time.tv_sec)
			sn.tv_sec = dbclean_failed;
		if (sn.tv_sec <= db_time.tv_sec) {
			dbclean_wfix_secs = sn.tv_sec + WFIX_POST_CLEAN_SECS;
			if (!DB_IS_TIME(dbclean_wfix_secs, WFIX_MAX_SECS))
				return 0;
		}

		/* check later if dbclean might run soon */
		next_clean = clean_fake_secs;
		if (db_time.tv_sec >= next_clean - WFIX_PRE_CLEAN_SECS) {
			dbclean_wfix_secs = next_clean + WFIX_POST_CLEAN_SECS;
			if (!DB_IS_TIME(dbclean_wfix_secs, WFIX_MAX_SECS))
				return 0;
		}
		next_clean = clean_last_secs + 24*60*60;
		if (db_time.tv_sec >= next_clean - WFIX_PRE_CLEAN_SECS) {
			dbclean_wfix_secs = next_clean + WFIX_POST_CLEAN_SECS;
			if (!DB_IS_TIME(dbclean_wfix_secs, WFIX_MAX_SECS))
				return 0;
		}

		/* check later if the database is not too large now */
		if (db_fsize + db_hash_fsize
		    < (wfix_size_set ? wfix_size : db_max_rss)) {
			wfix_later(WFIX_RECHECK_SECS);
			break;
		}
		/* the database is too big, so try to chase the clients
		 * to the other DCC server (if any) */
		dbclean_wfix_state = WFIX_BUSY;
		wfix_measure(1);
		break;

	case WFIX_BUSY:
		rate = wfix_measure(0);
		if (rate >= 0.0) {
			wfix_busy_rate = rate;
			dbclean_wfix_state = WFIX_QUIET;
			dbclean_wfix_secs = db_time.tv_sec + WFIX_QUIET_SECS;
		}
		break;

	case WFIX_QUIET:		/* waiting for clients to flee */
		dbclean_wfix_state = WFIX_CHECK;
		wfix_measure(1);
		break;

	case WFIX_CHECK:		/* counting clients */
		rate = wfix_measure(0);
		if (rate < 0.0)
			break;
		if (rate <= wfix_quiet_rate
		    || rate <= wfix_busy_rate * wfix_rate_change) {
			snprintf(reason, reason_len,
				 "database size "L_DPAT">"L_DPAT
				 "; load changed from %0.1f to %0.1f",
				 db_fsize + db_hash_fsize,
				 wfix_size_set ? wfix_size : db_max_rss,
				 wfix_busy_rate, rate);
			return 1;
		}
		/* Other DCC servers did not take over the load. Maybe later */
		if (db_debug)
			dcc_trace_msg("database size "L_DPAT" > "L_DPAT
				      ", but load changed from %0.1f to %0.1f",
				      db_fsize + db_hash_fsize,
				      wfix_size_set ? wfix_size : db_max_rss,
				      wfix_busy_rate, rate);
		wfix_later(WFIX_RECHECK_SECS);
		break;
	}

	return 0;
}



/* check for changes or other interesting events when the flood timer expires */
static time_t				/* microseconds to wait */
check_changes(void)
{
	static time_t misc_timer, files_timer;
	time_t secs;
	DB_HADDR hash_free;
	const char *mode;
	const char *reason;
	char reason_buf[100];

	reason = 0;
	mode = 0;

	if (db_failed_line) {
		snprintf(reason_buf, sizeof(reason_buf),
			 "database broken at line %d in %s "DCC_VERSION,
			 db_failed_line, db_failed_file);
		reason = reason_buf;
		mode = "Rbad";

	} else if ((hash_free
		    = db_hash_len - db_hash_used) < MIN_CLEAN_HASH_ENTRIES
		   || hash_free < db_hash_len/20) {
		/* try to expand the hash table when there are only
		 * a few free slots or the load factor rises above .95 */
		if (hash_free < MIN_HASH_ENTRIES)
			snprintf(reason_buf, sizeof(reason_buf),
				 "%d free hash entries",
				 hash_free);
		else
			snprintf(reason_buf, sizeof(reason_buf),
				 "%d free hash entries among %d total",
				 hash_free, db_hash_len);
		reason = reason_buf;
		mode = "Rhash";

	} else {
		/* check nothing else if it is not yet time */
		secs = next_flods_ck - db_time.tv_sec;
		if (secs > 0 && secs <= flods_ck_secs)
			return secs * DCC_US;
	}

	next_flods_ck = db_time.tv_sec + flods_ck_secs;

	/* do not make some checks too often even when flood checking is
	 * being rushed */
	if (DB_IS_TIME(misc_timer, MISC_CK_SECS)) {
		misc_timer = db_time.tv_sec + MISC_CK_SECS;

		/* shrink our memory footprint
		 * if we are so idle that we don't have anything to flush */
		if (db_need_flush_secs == 0) {
			if (db_unload(0, 1) == 1) {
				rel_db_states();
				db_unload(0, 1);
			}
		}

		if (DB_IS_TIME(files_timer, 30)) {
			files_timer = db_time.tv_sec + 30;

			if (0 >= check_load_ids(0))
				dcc_error_msg("check/reload: %s", dcc_emsg);

			check_blacklist_file();

#ifdef HAVE_GETIFADDRS
			/* check for network interface changes, but only
			 * if we can release the result of getifaddrs() */
			if (get_if_changes(1)) {
				int socs = open_srvr_socs(0);
				if (!socs)
					bad_stop("no server sockets");
				else if (socs > 0)
					flods_restart("new network interfaces",
						      1);
			}
#endif
		}

		if (DB_IS_TIME(need_clients_save, CLIENTS_SAVE_SECS))
			clients_save();

		/* sound a claim to our server-ID if the database is locked */
		if (DB_IS_TIME(host_id_next, DCC_SRVR_ID_SECS)
		    && DB_IS_LOCKED()
		    &&!db_failed_line ) {
			refresh_srvr_rcd(host_id_sum, my_srvr_id,
					 "adding server-ID claim");

			if (!db_failed_line ) {
				host_id_last = db_time.tv_sec;
				host_id_next = host_id_last + DCC_SRVR_ID_SECS;
			}
		}
	}

	/* note when hash expansion finishes and collect a zombie */
	check_dbclean(WNOHANG);

	if (reason != 0) {
		;

	} else if (DB_IS_TIME(clean_fake_secs, 3*24*60*60)
		   && (!db_minimum_map
		       || DB_IS_TIME(clean_last_secs+2*24*60*60, 4*24*60*60))) {
		reason = "work around broken cron job";
		mode = "Rcron";

	} else if (need_del_dbclean != 0
		   && DB_IS_TIME(del_dbclean_next, DEL_DBCLEAN_SECS)) {
		/* the deletion of a report needs to be cleaned up */
		reason = need_del_dbclean;
		mode = "Rdel";

	} else if (DB_IS_TIME(dbclean_wfix_secs, WFIX_MAX_SECS)
		   && wfix(reason_buf, sizeof(reason_buf))) {
		reason = reason_buf;
		mode = "Rquick";
	}

	if (reason) {
		if (!DB_IS_TIME(dbclean_limit, dbclean_limit_secs)) {
			if (next_flods_ck > dbclean_limit)
				next_flods_ck = dbclean_limit;
		} else if (dbclean_pid < 0) {
			run_dbclean(mode, reason);
		} else {
			RUSH_NEXT_FLODS_CK();
		}
	} else {
		flods_ck(0);
	}

	/* we probably delayed */
	gettimeofday(&db_time, 0);

	secs = next_flods_ck - db_time.tv_sec;
	return secs >= 0 ? secs*DCC_US : 0;
}



static void NRATTRIB
recv_job(void)
{
	fd_set rfds, *prfds, wfds, *pwfds;
#	define PFD_SET(_fd0,_fds) {int _fd = _fd0;	    \
		p##_fds = &_fds; FD_SET(_fd,p##_fds);	    \
		if (max_fd < _fd) max_fd = _fd;}
	int max_fd, nfds;
	IFLOD_INFO *ifp;
	OFLOD_INFO *ofp;
	struct timeval delay;
	time_t delay_us, slept_us, was_too_busy, us;
	struct timeval iflods_read, busy_time, extra_time;
	u_char db_has_failed;
#	define QUANTUM (DCC_US/10)	/* try to use only ~50% of the system */
	SRVR_SOC *sp;
	QUEUE *q;
	int bad_select;
	u_char worked;
	int fd, i;

	bad_select = 3;
	was_too_busy = 0;
	gettimeofday(&db_time, 0);
	iflods_read = db_time;
	busy_time = db_time;
	extra_time = db_time;
	delay_us = flods_ck_secs*DCC_US;
	db_has_failed = 0;
	for (;;) {
		dcc_emsg[0] = '\0';

		if (stopint) {
			flods_ck_secs = SHUTDOWN_DELAY;
			if (flods_off < 100000) {
				flods_off = 100000;
				flods_stop("server stopping", 0);
				check_dbclean(WNOHANG);
				if (dbclean_pid > 0) {
					kill(dbclean_pid, SIGINT);
					usleep(100*1000);
				}
				/* get started flushing the database while
				 * we wait for flooding to stop */
				rel_db_states();
				db_minimum_map = 1;
				db_unload(0, 2);
			}
			/* get serious when the floods have stopped */
			if (oflods.total == 0) {
				if (stopint < 0)
					dccd_quit(0, "gracefully stopping%s",
						  stop_mode == 1
						  ? " for reboot"
						  : stop_mode == 2
						  ? " cleanly"
						  : "");
				dccd_quit(stopint | 128,
					  "exiting with signal %d", stopint);
			}
		}

		if (db_has_failed != db_failed_line) {
			db_has_failed = db_failed_line;
			if (db_failed_line) {
				++flods_off;
				flods_stop("database corrupt", 1);
				rel_db_states();
				db_unload(0, 2);
				db_unlock();
			}
		}

		FD_ZERO(&rfds);
		prfds = 0;
		FD_ZERO(&wfds);
		pwfds = 0;
		max_fd = -1;

		/* look for client requests */
		for (sp = srvr_socs; sp; sp = sp->fwd) {
			if (sp->udp >= 0)
				PFD_SET(sp->udp, rfds);
		}

		if (was_too_busy > 0)
			was_too_busy = QUANTUM - tv_diff2us(&db_time,
							&busy_time);
		if (was_too_busy > 0
		    && (tv_diff2us(&db_time, &extra_time)
			< min(30, min(KEEPALIVE_IN, KEEPALIVE_OUT)/2))
		    && FLODS_OK()) {
			/* if we have been too busy,
			 * then do nothing extra for a while */
			if (delay_us > was_too_busy)
				delay_us = was_too_busy;
		} else {
			extra_time = db_time;

			/* Accept new incoming flood connections
			 * if flooding is on
			 * and we don't already have too many floods. */
			if (iflods.open < DCCD_MAX_FLOODS) {
				for (sp = srvr_socs; sp; sp = sp->fwd) {
					if (sp->listen >= 0)
					    PFD_SET(sp->listen, rfds);
				}
			}

			/* pump floods out */
			for (ofp = oflods.infos, i = 0;
			     i < oflods.open;
			     ++ofp) {
				if (ofp->soc < 0)
					continue;
				++i;
				if (ofp->flags & OFLOD_FG_CONNECTED) {
					PFD_SET(ofp->soc, rfds);
					if (!(ofp->flags & OFLOD_FG_EAGAIN)
					    && ofp->obuf_len != 0)
					    PFD_SET(ofp->soc, wfds);
				} else {
					PFD_SET(ofp->soc, wfds);
				}
			}

			/* pump floods in */
			for (ifp = iflods.infos, i = 0;
			     i < iflods.open;
			     ++ifp) {
				if (ifp->soc < 0)
					continue;
				++i;
				if (ifp->flags & IFLOD_FG_CONNECTED) {
					PFD_SET(ifp->soc, rfds);
				} else {
					PFD_SET(ifp->soc, wfds);
				}
			}
		}

		/* push data to the disk */
		if (db_need_flush_secs != 0) {
			if (DB_IS_TIME(db_need_flush_secs,
				       max(DB_URGENT_NEED_FLUSH_SECS,
					   DB_NEED_FLUSH_SECS))) {
				db_flush_needed();
				gettimeofday(&db_time, 0);
			}
			us = db_need_flush_secs - db_time.tv_sec;
			if (us >= 0) {
				us *= DCC_US;
				if (delay_us > us)
					delay_us = us;
			}
		}

		/* let dbclean run if we have run out of work
		 * or if we have been holding the lock for 0.1 seconds*/
		if (db_minimum_map
		    && DB_IS_LOCKED()
		    && (delay_us != 0
			|| tv_diff2us(&db_time, &db_locked) >= DCC_US/10)) {
			db_unlock();
		}

		/* delay until it is time to answer the oldest anonymous
		 * request or something else that needs doing */
		delay.tv_sec = delay_us/DCC_US;
		delay.tv_usec = delay_us%DCC_US;
		nfds = select(max_fd+1, prfds, pwfds, 0, &delay);
		if (nfds < 0) {
			if (errno != EINTR) {
				if (--bad_select < 0)
					bad_stop("give up after select(): %s",
						 ERROR_STR());
				else
					dcc_error_msg("select(): %s",
						      ERROR_STR());
			}
			/* ignore EINTR but recompute timers */
			FD_ZERO(&rfds);
			FD_ZERO(&wfds);
		} else {
			bad_select = 3;
		}

		gettimeofday(&wake_time, 0);
		slept_us = tv_diff2us(&wake_time, &db_time);
		if (slept_us >= 500) {
			/* If select() paused for at least 0.5 millisecond,
			 * then the waiting request has just now arrived. */
			req_recv_time = wake_time;
		} else {
			/* If select() did not pause, then assume the waiting
			 * requests arrived when we were half finished working
			 * on flooding and other work besides ordinary requests
			 * before calling select(). */
			tv_add_us(&req_recv_time,
				  tv_diff2us(&wake_time, &req_recv_time) / 2);
		}
		db_time = wake_time;

		worked = 0;
		for (sp = srvr_socs; sp; sp = sp->fwd) {
			/* queue a new anonymous request
			 * or answer a new authenticated request */
			fd = sp->udp;
			if (fd >= 0 && FD_ISSET(fd, &rfds)) {
				--nfds;
				worked = 1;
				while (new_job(sp))
					continue;
			}

			/* start a new incoming flood */
			fd = sp->listen;
			if (fd >= 0 && FD_ISSET(fd, &rfds)) {
				--nfds;
				worked = 1;
				iflod_start(sp);
			}
		}
		if (worked)
			gettimeofday(&db_time, 0);
		/* reset request receipt clock for next time */
		req_recv_time = db_time;

		/* Accept new flood data or start new SOCKS floods.
		 * Listen to all peers to prevent starvation */
		worked = 0;
		for (ifp = iflods.infos, i = 0;
		     nfds > 0 && i < iflods.open;
		     ++ifp) {
			if (ifp->soc < 0)
				continue;
			++i;
			if (FD_ISSET(ifp->soc, &rfds)
			    || FD_ISSET(ifp->soc, &wfds)) {
				--nfds;
				iflod_read(ifp);
				worked = 1;
			}
		}
		if (worked) {
			gettimeofday(&db_time, 0);
			iflods_read = db_time;
		} else if (was_too_busy <= 0) {
			/* if incoming floods have been quiet for
			 * awhile, then assume flooding has caught up
			 * after having been turned off */
			if (tv_diff2us(&wake_time, &iflods_read) > 2*DCC_US)
				iflods_ok_timer = db_time.tv_sec;
		}

		/* pump output flood data and receive confirmations
		 * talk to all peers to prevent starvation */
		worked = 0;
		for (ofp = oflods.infos, i = 0;
		     nfds > 0 && i < oflods.open;
		     ++ofp) {
			if (ofp->soc < 0)
				continue;
			++i;
			if (FD_ISSET(ofp->soc, &rfds)) {
				--nfds;
				oflod_read(ofp);
				if (ofp->soc < 0)
					continue;
			}
			if (FD_ISSET(ofp->soc, &wfds)) {
				--nfds;
				oflod_write(ofp);
				worked = 1;
			}
		}
		if (worked)
			gettimeofday(&db_time, 0);

		/* process delayed jobs when their times arrive */
		worked = 0;
		for (;;) {
			q = queue_head;
			if (!q) {
				delay_us = flods_ck_secs*DCC_US;
				break;
			}

			/* decide whether this job's time has come
			 * while defending against time jumps */
			delay_us = tv_diff2us(&q->answer, &db_time);
			if (delay_us >= 1000
			    && delay_us <= DCC_MAX_RTT
			    && !stopint)
				break;  /* not yet time for next job */

			queue_head = q->later;
			if (queue_head)
				queue_head->earlier = 0;
			--queue_cur;
			do_work(q);
			worked = 1;
			free_q(q);
		}
		if (worked)
			gettimeofday(&db_time, 0);

		/* check configuration changes etc. */
		us = check_changes();
		if (delay_us >= us)
			delay_us = us;

		us = tv_diff2us(&db_time, &wake_time);
		if (us >= QUANTUM && !stopint) {
			gettimeofday(&db_time, 0);
			busy_time = db_time;
			was_too_busy = QUANTUM;
		}
	}
}



static void
add_queue(QUEUE *q)
{
	QUEUE *qnext, **qp;

	TMSG1(QUERY, "received %s", op_id_ip(q));
	if (!ck_clnt_id(q)) {
		free_q(q);
		return;
	}

	++total_ops;

	/* immediately process requests from authenticated clients
	 * if flooding is working */
	if (q->delay_us == 0) {
		do_work(q);
		free_q(q);
		return;
	}

	/* don't let the queue of delayed requests get too large */
	if (queue_cur >= queue_max) {
		clnt_msg(q, "drop excess queued %s", op_id_ip(q));
		free_q(q);
		return;
	}

	tv_add_us(&q->answer, q->delay_us);

	/* add the new job to the queue */
	++queue_cur;
	qp = &queue_head;
	for (;;) {
		qnext = *qp;
		if (!qnext) {
			*qp = q;
			break;
		}
		if (qnext->answer.tv_sec > q->answer.tv_sec
		    || (qnext->answer.tv_sec == q->answer.tv_sec
			&& qnext->answer.tv_usec > q->answer.tv_usec)) {
			q->later = qnext;
			qnext->earlier = q;
			*qp = q;
			break;
		}
		q->earlier = qnext;
		qp = &qnext->later;
	}
}



/* get a new job in a datagram */
static u_char				/* 1=call again */
new_job(SRVR_SOC *sp)
{
	QUEUE *q;
	static struct iovec iov = {0, sizeof(q->pkt)};
	static struct msghdr msg;
	int i, j;

	/* Find a free queue entry for the job.
	 * Because we don't check for incoming jobs unless we think the
	 * queue is not full, there must always be a free entry or
	 * permission to make more entries. */
	q = queue_free;
	if (q) {
		queue_free = q->later;
	} else {
		i = 16;
		q = dcc_malloc(i * sizeof(*q));
		if (!q)
			dcc_logbad(EX_UNAVAILABLE,
				   "malloc(%d queue entries) failed", i);
		queue_max_cur += i;
		/* put all but the last new queue entry on the free list */
		while (--i > 0) {
			q->later = queue_free;
			queue_free = q;
			++q;
		}
	}

	memset(q, 0, sizeof(*q));
	q->sp = sp;
	iov.iov_base = (char *)&q->pkt;
	msg.msg_name = (void *)&q->clnt_su;
	msg.msg_namelen = sizeof(q->clnt_su);
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	i = recvmsg(sp->udp, &msg, 0);
	if (i < 0) {
		/* ignore some results of ICMP unreachables for UDP
		 * retransmissions seen on some platforms */
		if (DCC_BLOCK_ERROR()) {
			;
		} else if (UNREACHABLE_ERRORS()) {
			TMSG2(QUERY, "recvmsg(%s): %s",
			      dcc_su2str_err(&sp->su), ERROR_STR());
		} else {
			dcc_error_msg("recvmsg(%s): %s",
				      dcc_su2str_err(&sp->su), ERROR_STR());
		}
		free_q(q);
		return 0;
	}
	if (q->clnt_su.sa.sa_family != sp->su.sa.sa_family
	    && !dcc_ipv4sutoipv6(&q->clnt_su, &q->clnt_su)) {
		dcc_error_msg("recvmsg address family %d instead of %d",
			      q->clnt_su.sa.sa_family,
			      sp->su.sa.sa_family);
		free_q(q);
		return 1;
	}

	if (DCC_SU_PORT(&q->clnt_su) == 0) {
		drop_msg(q, "source port 0");
		free_q(q);
		return 1;
	}

	q->pkt_len = i;
	if (i < ISZ(DCC_HDR)) {
		drop_msg(q, "short request of %d bytes", i);
		free_q(q);
		return 1;
	}
	j = ntohs(q->pkt.hdr.len);
	if (j != i) {
		drop_msg(q, "request with header length %d instead of %d",
			 j, i);
		free_q(q);
		return 1;
	}

	if (q->pkt.hdr.pkt_vers > DCC_PKT_VERSION_MAX_VALID
	    || q->pkt.hdr.pkt_vers < DCC_PKT_VERSION_MIN_VALID
	    || ((q->pkt.hdr.pkt_vers > DCC_PKT_VERSION_MAX
		 || q->pkt.hdr.pkt_vers < DCC_PKT_VERSION_MIN)
		&& q->pkt.hdr.op != DCC_OP_NOP)) {
		drop_msg(q, "%s in unrecognized protocol version #%d",
			 qop2str(q), q->pkt.hdr.pkt_vers);
		free_q(q);
		return 1;
	}

	q->answer = req_recv_time;

	switch ((DCC_OPS)q->pkt.hdr.op) {
	case DCC_OP_NOP:
		do_nop(q);
		free_q(q);
		return 1;

	case DCC_OP_REPORT:
		if (db_parms.flags & DB_PARM_FG_GREY)
			break;		/* not valid for greylist servers */
		add_queue(q);
		return 1;

	case DCC_OP_QUERY:
		add_queue(q);
		return 1;

	case DCC_OP_ADMN:
		do_admn(q);
		free_q(q);
		return 1;

	case DCC_OP_DELETE:
		do_delete(q);
		free_q(q);
		return 1;

	case DCC_OP_GREY_REPORT:
	case DCC_OP_GREY_QUERY:
	case DCC_OP_GREY_WHITE:
		if (!(db_parms.flags & DB_PARM_FG_GREY))
			break;		/* valid only for greylist servers */
		do_grey(q);
		free_q(q);
		return 1;

	case DCC_OP_GREY_SPAM:
		if (!(db_parms.flags & DB_PARM_FG_GREY))
			break;		/* valid only for greylist servers */
		do_grey_spam(q);
		free_q(q);
		return 1;

	case DCC_OP_INVALID:
	case DCC_OP_ANSWER:
	case DCC_OP_OK:
	case DCC_OP_ERROR:
		break;
	}

	drop_msg(q, "invalid %s", op_id_ip(q));
	free_q(q);
	return 1;
}



void
free_q(QUEUE *q)
{
	if (q->rl)
		--q->rl->ref_cnt;
	q->later = queue_free;
	queue_free = q;
}



u_char
dccd_db_open(u_char lock_mode)
{
	DCC_CK_TYPES type;
	time_t clean_secs;
	DCC_TGTS tgts;
	int i;

	if (!db_open(dcc_emsg, -1, 0, 0, lock_mode | db_mode))
		return 0;

	if (grey_on) {
		/* for greylisting, ignore the args an silently impose our
		 * notion of which checksums to keep and flooding thresholds */
		db_parms.nokeep_cks = def_nokeep_cks();
		if (grey_weak_ip)
			DB_RESET_NOKEEP(db_parms.nokeep_cks, DCC_CK_IP);

		for (type = DCC_CK_TYPE_FIRST;
		     type <= DCC_CK_TYPE_LAST;
		     ++type) {
			if (type == DCC_CK_SRVR_ID) {
				flod_tholds[type] = 1;
				continue;
			}

			if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
				flod_tholds[type] = DCC_TGTS_INVALID;
			else
				flod_tholds[type] = 1;

			if (DCC_CK_IS_GREY_TRIPLE(grey_on,type)
			    || type == DCC_CK_IP) {
				db_parms.ex_secs[type].all = grey_window;
				db_parms.ex_secs[type].spam = grey_white;
			} else if (type == DCC_CK_BODY
				   || DCC_CK_IS_GREY_MSG(grey_on,type)) {
				db_parms.ex_secs[type].all = grey_window;
				db_parms.ex_secs[type].spam = grey_window;
			} else {
				db_parms.ex_secs[type].all = 1;
				db_parms.ex_secs[type].spam = 1;
			}
		}

		summarize_delay_secs = grey_embargo - FLODS_CK_SECS*2;

	} else {
		/* impose our notion of which normal checksums to keep */
		DB_SET_NOKEEP(set_new_nokeep_cks, DCC_CK_FLOD_PATH);
		DB_SET_NOKEEP(set_new_nokeep_cks, DCC_CK_INVALID);
		DB_SET_NOKEEP(set_new_nokeep_cks, DCC_CK_REP_TOTAL);
		DB_SET_NOKEEP(set_new_nokeep_cks, DCC_CK_REP_BULK);
		db_parms.nokeep_cks = ((def_nokeep_cks()
					& ~reset_new_nokeep_cks)
				       | set_new_nokeep_cks);

		for (type = DCC_CK_TYPE_FIRST;
		     type <= DCC_CK_TYPE_LAST;
		     ++type) {
			if (type == DCC_CK_SRVR_ID) {
				flod_tholds[type] = 1;
				continue;
			}

			if (type == DCC_CK_REP_TOTAL)
				tgts = DCC_TGTS_INVALID;
			if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
				tgts = DCC_TGTS_INVALID;
			else if (DCC_CK_IS_REP_CMN(0, type))
				tgts = DCC_TGTS_INVALID;
			else
				tgts = BULK_THRESHOLD;
			flod_tholds[type] = tgts;
		}

		/* We should not delay reports or summaries so much that
		 * dbclean might expire them before we can summarize them. */
		summarize_delay_secs = DCC_OLD_SPAM_SECS;
		for (type = DCC_CK_TYPE_FIRST;
		     type <= DCC_CK_TYPE_LAST;
		     ++type) {
			if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
				continue;
			i = db_parms.ex_secs[type].spam;
			if (i != 0 && summarize_delay_secs > i)
				summarize_delay_secs = i;
		}
	}
	if (summarize_delay_secs < 1)
		summarize_delay_secs = 1;

	/* adjust the thresholds after possible changes to kept checksums */
	set_db_tholds(db_parms.nokeep_cks);

	/* If we instead of cron asked for the last cleaning, make a note
	 * to clean the database during the graveyard shift.
	 * Otherwise the database will bloat while the cron job is broken.
	 *
	 * Compute 1 day + 45 minutes after cron should have last cleaned the
	 *	database, if it has been cleaned by cron within the last 3 days
	 * or the quarter hour when it was last cleaned by this mechanism
	 *	provided that was between local midnight and 05:00
	 * or 3 minutes past a random quarter hour beteen midnight and 05:00 */
	if (db_parms.cleaned_cron >= db_time.tv_sec - 3*24*60*60
	    && db_parms.cleaned_cron <= db_time.tv_sec) {
		clean_last_secs = db_parms.cleaned_cron;
		/* failsafe cleaning for the greylist database starts 15 minutes
		 * before the main database */
		clean_secs = clean_last_secs + 45*60 - grey_on*15*60;
	} else {
		struct tm tm;

		clean_last_secs = db_parms.cleaned;
		if (clean_last_secs >= db_time.tv_sec)
			clean_last_secs = 0;
		clean_secs = clean_last_secs;

		/* if the previous time for this mechanism is not good,
		 * pick a new random time */
		dcc_localtime(clean_secs, &tm);
		if (clean_secs == 0 || tm.tm_hour < 1 || tm.tm_hour >= 5) {
			int rnum = (u_int)(db_time.tv_sec + db_time.tv_usec
					   + my_srvr_id - grey_on) % 23;
			tm.tm_hour = ((rnum / 4) % 4) + 1;
			tm.tm_min = (rnum % 4) * 15;
			clean_secs = mktime(&tm);
		}
	}
	/* round down to a quarter hour to prevent creep due to inevitiable
	 * delays in cron or this mechanism starting dbclean */
	clean_secs -= clean_secs % (15*60);
	clean_secs %= (24*60*60);	/* failsafe cleaning target minute */

	/* compute the next scheduled failsafe cleaning. */
	clean_fake_secs = db_time.tv_sec - (db_time.tv_sec % (24*60*60));
	clean_fake_secs += clean_secs;
	if (clean_fake_secs <= db_time.tv_sec)
		clean_fake_secs += 24*60*60;

	/* The next failsafe cleaning should happen a day after the
	 * most recent cron or failsafe cleaning, modulo the delay before
	 * dbclean starts */
	while (clean_fake_secs <= clean_last_secs + 24*60*60 - 60*60)
		clean_fake_secs += 24*60*60;

	/* Do not failsafe clean during the first 48 hours after the
	 * database was created to give the cron job a chance.
	 * We do not want failsafe cleaning to ever be running when the
	 * cron job tries to start. */
	while (clean_fake_secs <= db_parms.cleared + 2*24*60*60
	       && db_parms.cleared <= db_time.tv_sec)
		clean_fake_secs += 24*60*60;

	total_ops = 0;

	/* push our thresholds and flags to the file */
	return db_flush_parms(0);
}



/* clean shut down */
static void NRATTRIB
dccd_quit(int exitcode, const char *p, ...)
{
	va_list args;

	if (stop_mode == 1)
		db_stop();
	else if (stop_mode == 2)
		make_clean(2);
	db_unlock();

	va_start(args, p);
	if (exitcode)
		dcc_verror_msg(p, args);
	else
		dcc_vtrace_msg(p, args);
	va_end(args);

	/* db_close() can take a long time, so close some things early. */
	stop_children();
	check_dbclean(WNOHANG);
	clients_save();

#ifdef HAVE_COHERENT_MMAP
	/* If mmap() is not coherent, do not call close_srvr_socs() but
	 * keep the UDP sockets open to prevent another server from starting
	 * until we have flushed our buffers to prevent problems on systems
	 * that lack inter-process coherent mmap() */
	if (!(db_mode & DB_OPEN_MMAP_WRITE))
		close_srvr_socs();
#endif
	db_close(1);

	if (exitcode)
		dcc_error_msg("stopped");
	else
		dcc_trace_msg("stopped");
	exit(exitcode);
}



/* watch for fatal signals */
static void
sigterm(int sig)
{
	stopint = sig;
	stop_mode = 1;
	next_flods_ck = 0;
	(void)signal(sig, SIG_DFL);	/* catch it only once */
}



/* SIGHUP hurries checking the configuration files */
static void
sighup(int sig UATTRIB)
{
	next_flods_ck = 0;
}



/* emergency shutdown but close the database cleanly */
void
bad_stop(const char *pat, ...)
{
	va_list args;

	if (stopint)
		return;

	va_start(args, pat);
	dcc_verror_msg(pat, args);
	va_end(args);

	stopint = -1;
	next_flods_ck = 0;
}



static void
stop_children(void)
{
	if (resolve_hosts_pid > 0)
		kill(resolve_hosts_pid, SIGKILL);

	if (dbclean_pid > 0)
		kill(dbclean_pid, SIGKILL);
}