Mercurial > notdcc

diff dbclean/dbclean.c @ 0:c7f6b056b673
First import of vendor version
author: Peter Gervai <grin@grin.hu>
date: Tue, 10 Mar 2009 13:49:58 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dbclean/dbclean.c	Tue Mar 10 13:49:58 2009 +0100
@@ -0,0 +1,2789 @@
+/* Distributed Clearinghouse Checksum database cleaner
+ *
+ * Copyright (c) 2008 by Rhyolite Software, LLC
+ *
+ * This agreement is not applicable to any entity which sells anti-spam
+ * solutions to others or provides an anti-spam solution as part of a
+ * security solution sold to other entities, or to a private network
+ * which employs the DCC or uses data provided by operation of the DCC
+ * but does not provide corresponding data to other users.
+ *
+ * Permission to use, copy, modify, and distribute this software without
+ * changes for any purpose with or without fee is hereby granted, provided
+ * that the above copyright notice and this permission notice appear in all
+ * copies and any distributed versions or copies are either unchanged
+ * or not called anything similar to "DCC" or "Distributed Checksum
+ * Clearinghouse".
+ *
+ * Parties not eligible to receive a license under this agreement can
+ * obtain a commercial license to use DCC by contacting Rhyolite Software
+ * at sales@rhyolite.com.
+ *
+ * A commercial license would be for Distributed Checksum and Reputation
+ * Clearinghouse software.  That software includes additional features.  This
+ * free license for Distributed ChecksumClearinghouse Software does not in any
+ * way grant permision to use Distributed Checksum and Reputation Clearinghouse
+ * software
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
+ * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
+ * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Rhyolite Software DCC 1.3.103-1.254 $Revision$
+ */
+
+#include "srvr_defs.h"
+#include "dcc_ck.h"
+#include <signal.h>
+
+static DCC_EMSG dcc_emsg;
+
+static DCC_WF dbclean_wf;
+static DCC_WHITE_TBL dbclean_white_tbl;
+static DCC_CLNT_CTXT *ctxt;
+static DCC_OP_RESP aop_resp;
+static int flods_off;
+static int dccd_unlocked;		/* dccd has been told to unlock	*/
+
+static DCC_SRVR_NM srvr = DCC_SRVR_NM_DEF;
+static DCC_CLNT_ID srvr_clnt_id = DCC_ID_INVALID;
+static const ID_TBL *srvr_clnt_tbl;
+static u_char info_flags = 0;
+#ifdef USE_DBCLEAN_F
+static u_char db_mode = DB_OPEN_MMAP_WRITE;
+#else
+static u_char db_mode = DB_OPEN_MMAP_WRITE_NOSYNC;
+#endif
+
+static u_char cleardb;			/* 1=clear the database */
+static enum {
+    NORMAL_MODE,
+    REPAIR_MODE,			/* database broken */
+    QUICK_MODE,				/* too big for window */
+    HASH_MODE,				/* hash table full */
+    NO_CRON_MODE,			/* work around missing cron job */
+    DEL_MODE				/* after deletion */
+} clean_mode = NORMAL_MODE;
+static u_char standalone;		/* 1=don't talk to dccd */
+static u_char keep_white;		/* 1=do not rebuild whitelist */
+
+static int exit_value = -1;
+
+static const char *homedir;
+static u_char cur_db_created;
+static const char *cur_db_nm_str = DB_DCC_NAME;
+static DCC_PATH cur_db_nm;
+static DCC_PATH cur_hash_nm;
+static int old_db_fd = -1;
+static DB_HADDR old_db_hash_used;
+static DB_PARMS old_db_parms;
+static DB_PARMS new_db_parms;
+static DB_PTR old_db_pos,  new_db_csize;
+static off_t new_db_fsize;
+static u_int new_db_pagesize;
+static FLOD_MMAPS new_flod_mmaps;
+static u_char adj_delay_pos;
+static u_char new_db_created;
+static DCC_PATH new_db_nm;
+static int new_db_fd = -1;
+static u_char new_hash_created;
+static DCC_PATH new_hash_nm;
+static DCC_PATH old_db_nm;
+
+static int expire_secs = -1;
+static int def_expire_secs = DB_EXPIRE_SECS_DEF;
+static int expire_spamsecs = -1;
+static int def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF;
+static int have_expire_parms = 0;
+static double def_exp_ratio = 0.0;
+static DB_EX_SECS new_ex_secs;
+static DB_EX_TS new_ex_ts;
+
+static DB_HADDR new_hash_len;
+
+static int expired_rcds, comp_rcds, obs_rcds, expired_cks;
+static int white_cks, kept_cks;
+
+static DCC_TS future_ts;
+
+#define RESTART_DELAY	(60*5)
+#define SHORT_DELAY	30
+
+static struct timeval clean_start;
+
+static struct timeval progress_rpt_last;    /* when previous progress report */
+static struct timeval progress_rpt_checked; /* when last checked */
+static struct timeval progress_rpt_start;   /* start of progress reporting */
+#define REPORT_INTERVAL_SECS	    (5*60)
+#define REPORT_INTERVAL_FAST_SECS   10
+#define	UNLOCK_INTERVAL_USECS	    (DCC_US/2)
+static int progress_rpt_cnt;		/* operations until next check */
+static int progress_rpt_base;
+static u_char progress_rpt_started;	/* 1=have started reporting progress */
+static int progress_rpt_percent;
+
+static u_char write_new_flush(u_char);
+static u_char write_new_rcd(const void *, int);
+static void write_new_hdr(u_char);
+static void unlink_whine(const char *, u_char);
+static void rename_bail(const char *, const char *);
+static u_char expire(DB_PTR);
+static u_char copy_db(void);
+static u_char catchup(DCC_EMSG);
+static void parse_white(void);
+static void build_hash(void);
+static u_char persist_aop(DCC_AOPS, u_int32_t, int);
+static void dccd_new_db(const char *);
+static void finish(void);
+static void exit_dbclean(int) NRATTRIB;
+static void sigterm(int);
+
+
+static void
+usage(u_char die)
+{
+	const char str[] = {
+		"usage: [-64dfFNPSVq] [-i id]"
+		" [-a [server-addr][,server-port]] [-h homedir]\n"
+		"   [-G on] [-R mode] [-s hash-size] [-e seconds]"
+		" [-E spamsecs]\n"
+		"   [-L ltype,facility.level]"};
+	static u_char complained;
+
+	/* its important to try to run, so don't give up unless necessary */
+	if (die) {
+		dcc_logbad(EX_USAGE, complained ? "giving up" : str);
+	} else if (!complained) {
+		dcc_error_msg("%s\ncontinuing", str);
+		complained = 1;
+	}
+}
+
+
+int NRATTRIB
+main(int argc, char **argv)
+{
+	char hostname[DCC_MAXDOMAINLEN];
+	u_char print_version = 0;
+	struct stat cur_db_sb;
+	u_int tgt_db_pagesize;
+	const char *cp;
+	char *p;
+	u_long l;
+	int i;
+
+	gettimeofday(&db_time, 0);
+	clean_start = db_time;
+
+	dcc_timeval2ts(&future_ts, &clean_start, 24*60*60);
+
+	dcc_syslog_init(1, argv[0], 0);
+
+	/* this must match DBCLEAN_GETOPTS in cron-dccd.in */
+	while ((i = getopt(argc, argv, "64dfFNPSVqi:a:h:G:R:s:e:E:L:")) != -1) {
+		switch (i) {
+		case '6':
+#ifndef NO_IPV6
+			info_flags = DCC_INFO_FG_IPV6;
+#endif
+			break;
+		case '4':
+			info_flags = 0;
+			break;
+
+		case 'd':
+			if (db_debug++)
+				++dcc_clnt_debug;
+			break;
+
+		case 'f':
+			db_mode &= ~DB_OPEN_MMAP_WRITE;
+			break;
+
+
+		case 'F':
+			db_mode |= DB_OPEN_MMAP_WRITE;
+			break;
+
+		case 'N':		/* make a new, clear database */
+			cleardb = 1;
+			standalone = 1;
+			break;
+
+		case 'P':
+			if (have_expire_parms > 0)
+				dcc_logbad(EX_USAGE,
+					   "do not use -P with -e or -E");
+			have_expire_parms = -1;
+			break;
+
+		case 'S':
+			standalone = 1;
+			break;
+
+		case 'V':
+			fprintf(stderr, DCC_VERSION"\n");
+			print_version = 1;
+			break;
+
+		case 'q':
+			trace_quiet = 1;
+			break;
+
+		case 'i':
+			l = strtoul(optarg, &p, 10);
+			if (*p != '\0'
+			    || l < DCC_SRVR_ID_MIN
+			    || l > DCC_SRVR_ID_MAX)
+				dcc_logbad(EX_USAGE, "invalid DCC ID \"-i %s\"",
+					   optarg);
+			srvr_clnt_id = l;
+			break;
+
+		case 'a':
+			cp = dcc_parse_nm_port(dcc_emsg, optarg, srvr.port,
+					       hostname, sizeof(hostname),
+					       &srvr.port, 0, 0, 0, 0);
+			if (!cp) {
+				dcc_error_msg("%s", dcc_emsg);
+				break;
+			}
+			cp += strspn(cp, DCC_WHITESPACE);
+			if (*cp != '\0') {
+				dcc_error_msg("unrecognized port number in"
+					      "\"-a %s\"", optarg);
+				break;
+			}
+			if (hostname[0] == '\0')
+				strcpy(srvr.hostname, DCC_SRVR_NM_DEF_HOST);
+			else
+				BUFCPY(srvr.hostname, hostname);
+			break;
+
+		case 'h':
+			homedir = optarg;
+			break;
+
+		case 'G':
+			dcc_syslog_init(1, argv[0], " grey");
+			if (have_expire_parms > 0)
+				dcc_logbad(EX_USAGE,
+					   "do not use -G with -e or -E");
+			if (strcasecmp(optarg, "on"))
+				usage(0);   /* be generous and allow -Gasdf */
+			grey_on = 1;
+			have_expire_parms = -1;
+			cur_db_nm_str = DB_GREY_NAME;
+			break;
+
+		case 'R':
+			if (!strcasecmp(optarg, "bad"))
+				clean_mode = REPAIR_MODE;
+			else if (!strcasecmp(optarg, "quick"))
+				clean_mode = QUICK_MODE;
+			else if (!strcasecmp(optarg, "hash"))
+				clean_mode = HASH_MODE;
+			else if (!strcasecmp(optarg, "cron"))
+				clean_mode = NO_CRON_MODE;
+			else if (!strcasecmp(optarg, "del"))
+				clean_mode = DEL_MODE;
+			else
+				dcc_logbad(EX_USAGE,
+					   "unrecognized repair mode -R %s",
+					   optarg);
+			break;
+
+		case 's':		/* hash table size in entries */
+			new_hash_len = strtoul(optarg, &p, 0);
+			if (*p != '\0'
+			    || new_hash_len < MIN_HASH_ENTRIES
+			    || new_hash_len > MAX_HASH_ENTRIES)
+				dcc_logbad(EX_USAGE,
+					   "invalid database size \"%s\"",
+					   optarg);
+			break;
+
+		case 'e':		/* expiration for non-bulk checksums */
+			if (grey_on)
+				dcc_logbad(EX_USAGE,
+					   "do not use -e with -G");
+			if (have_expire_parms < 0)
+				dcc_logbad(EX_USAGE,
+					   "-e cannot be used with -P");
+			have_expire_parms = 1;
+			expire_secs = dcc_get_secs(optarg, 0,
+						   DB_EXPIRE_SECS_MIN,
+						   DB_EXPIRE_SECS_MAX, -1);
+			if (expire_secs < 0)
+				dcc_logbad(EX_USAGE,
+					   "invalid expiration seconds"
+					   " \"-e %s\"",
+					   optarg);
+			break;
+
+		case 'E':		/* expiration for bulk checksums */
+			if (grey_on)
+				dcc_logbad(EX_USAGE,
+					   "do not use -E with -G");
+			if (have_expire_parms < 0)
+				dcc_logbad(EX_USAGE,
+					   "do not use -E with -P");
+			have_expire_parms = 1;
+			expire_spamsecs = dcc_get_secs(optarg, 0,
+						       DB_EXPIRE_SECS_MIN,
+						       DB_EXPIRE_SECS_MAX, -1);
+			if (expire_spamsecs < 0)
+				dcc_logbad(EX_USAGE,
+					   "invalid spam expiration seconds"
+					   " \"-E %s\"",
+					   optarg);
+			break;
+
+		case 'L':
+			dcc_parse_log_opt(optarg);
+			break;
+
+		default:
+			usage(0);
+		}
+	}
+	argc -= optind;
+	argv += optind;
+	if (argc != 0)
+		usage(1);
+
+	if (srvr_clnt_id == DCC_ID_INVALID && !standalone) {
+		if (print_version)
+			exit(EX_OK);
+		usage(1);
+	}
+	srvr.clnt_id = srvr_clnt_id;
+
+	if (srvr.port == 0)
+		srvr.port = DCC_GREY2PORT(grey_on);
+
+	dcc_clnt_unthread_init();
+	/* move to the target directory
+	 * and set homedir for fnm2rel_good() */
+	if (!dcc_cdhome(dcc_emsg, homedir, 0))
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+	fnm2rel_good(cur_db_nm, cur_db_nm_str, 0);
+	cp = "";
+	switch (clean_mode) {
+	case NORMAL_MODE: cp = "cleaning"; break;
+	case REPAIR_MODE: cp = "repairing"; break;
+	case QUICK_MODE: cp = "quick cleaning"; break;
+	case HASH_MODE: cp = "expanding hash table in"; break;
+	case NO_CRON_MODE: cp = "work around missing cron job for"; break;
+	case DEL_MODE: cp = "clean up deletion in"; break;
+	}
+	quiet_trace_msg(DCC_VERSION" %s %s", cp, fnm2abs_err(0, cur_db_nm));
+
+	atexit(finish);
+	signal(SIGHUP, sigterm);
+	signal(SIGTERM, sigterm);
+	signal(SIGINT, sigterm);
+#ifdef SIGXFSZ
+	signal(SIGXFSZ, SIG_IGN);
+#endif
+
+	if (!standalone) {
+		i = load_ids(dcc_emsg, srvr_clnt_id, &srvr_clnt_tbl, 1);
+		if (i <= 0)
+			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+		memcpy(srvr.passwd, srvr_clnt_tbl->cur_passwd,
+		       sizeof(srvr.passwd));
+	}
+
+	fnm2rel_good(cur_hash_nm, cur_db_nm, DB_HASH_SUFFIX);
+	fnm2rel_good(old_db_nm, cur_db_nm, "-old");
+	fnm2rel_good(new_db_nm, cur_db_nm, "-new");
+	fnm2rel_good(new_hash_nm, new_db_nm, DB_HASH_SUFFIX);
+
+	/* exclude other instances of this program */
+	if (!lock_dbclean(dcc_emsg, cur_db_nm))
+		dcc_logbad(dcc_ex_code, "%s: dbclean already running?",
+			   dcc_emsg);
+
+	/* create & the lock new database file */
+	new_db_fd = dcc_lock_open(dcc_emsg, new_db_nm, O_RDWR|O_CREAT,
+				  DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0);
+	if (new_db_fd == -1)
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+	if (0 > ftruncate(new_db_fd, 0))
+		dcc_logbad(EX_IOERR, "truncate(%s,0): %s",
+			   new_db_nm, ERROR_STR());
+	new_db_fsize = 0;
+	new_db_created = 1;
+	new_db_csize = DB_PTR_BASE;
+
+	tgt_db_pagesize = 0;
+	if (0 > stat(cur_db_nm, &cur_db_sb)) {
+		if (errno != ENOENT)
+			dcc_logbad(EX_IOERR, "stat(%s): %s",
+				   cur_db_nm, ERROR_STR());
+		/* empty a missing database */
+		cleardb = 1;
+	} else if (cur_db_sb.st_size == 0) {
+		/* empty an empty database */
+		cleardb = 1;
+	} else if (grey_on && cur_db_sb.st_size < DB_MIN_MIN_MBYTE*1024*1024) {
+		/* Force a relatively large page size for typical tiny
+		 * greylist databases.  Try to use few mmap() pages */
+		tgt_db_pagesize = cur_db_sb.st_size/4;
+		if (tgt_db_pagesize < MIN_HASH_ENTRIES*sizeof(HASH_ENTRY))
+			tgt_db_pagesize = MIN_HASH_ENTRIES*sizeof(HASH_ENTRY);
+	}
+	new_db_pagesize = db_get_pagesize(0, tgt_db_pagesize);
+	write_new_hdr(1);
+
+
+	if (standalone) {
+		u_char busy;
+
+		/* open and lock the current database to ensure
+		 * the daemon is not running */
+		old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm, O_RDWR,
+					  DCC_LOCK_OPEN_NOWAIT,
+					  DCC_LOCK_ALL_FILE, &busy);
+		if (busy)
+			dcc_logbad(EX_USAGE, "database %s in use: %s",
+				   cur_db_nm, dcc_emsg);
+		if (cleardb
+		    && stat(cur_db_nm, &cur_db_sb) >= 0) {
+			if (cur_db_sb.st_size != 0)
+				dcc_logbad(EX_USAGE, "%s already exists",
+					   cur_db_nm);
+			cur_db_created = 1;
+		}
+
+		/* create and lock the current database if it did not exist
+		 * to ensure that the server daemon is not running */
+		if (old_db_fd < 0) {
+			old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm,
+						  O_RDWR|O_CREAT,
+						  DCC_LOCK_OPEN_NOWAIT,
+						  DCC_LOCK_ALL_FILE, 0);
+			if (old_db_fd < 0)
+				dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+			cur_db_created = 1;
+		}
+
+	} else {
+		/* Tell the daemon to start turning off the flooding
+		 * so we can adjust its positions in the flood map file
+		 * Try very hard to talk to it because releasing the database
+		 * can cause some UNIX flavors to stall dccd. */
+		DCC_CLNT_FGS clnt_fgs;
+
+		clnt_fgs = DCC_CLNT_FG_SLOW;
+		if (grey_on)
+			clnt_fgs |= DCC_CLNT_FG_GREY;
+		ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr,
+					 0, clnt_fgs, info_flags);
+		/* try very hard to contact dccd */
+		if (!ctxt)
+			ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr,
+						 0, clnt_fgs, info_flags);
+		if (!ctxt)
+			dcc_logbad(EX_DCC_RESTART, "initial contact: %s",
+				   dcc_emsg);
+
+		++flods_off;
+		if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_SHUTDOWN,
+				 SHORT_DELAY))
+			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+	}
+
+	/* resolve whitelisted host names before locking the database */
+	parse_white();
+
+	/* Tell the daemon to unlock the database between operations
+	 * and insist it stop flooding. */
+	if (!standalone) {
+		/* give the daemon a chance to stop pumping the floods */
+		for (;;) {
+			if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK,
+					 SHORT_DELAY))
+				dcc_logbad(EX_UNAVAILABLE, "%s", dcc_emsg);
+
+			i = flod_running(aop_resp.resp.val.string);
+			if (i < 0)
+				dcc_logbad(EX_PROTOCOL,
+					   "%s: unrecognized \"%s\"",
+					   dcc_aop2str(0, 0,
+						       DCC_AOP_FLOD,
+						       DCC_AOP_FLOD_CHECK),
+					   aop_resp.resp.val.string);
+			if (i == 0)
+				break;
+			if (time(0) > clean_start.tv_sec+45) {
+				if (flods_off < 2) {
+					++flods_off;
+					if (!persist_aop(DCC_AOP_FLOD,
+							DCC_AOP_FLOD_HALT,
+							SHORT_DELAY))
+					    dcc_logbad(dcc_ex_code, "%s",
+						       dcc_emsg);
+					continue;
+				}
+				if (time(0) > clean_start.tv_sec+60)
+					dcc_logbad(EX_UNAVAILABLE,
+						   "failed to stop floods: %s",
+						   aop_resp.resp.val.string);
+			}
+			usleep(100*1000);
+		}
+		dccd_unlocked = 1;
+		if (!persist_aop(DCC_AOP_DB_CLEAN, 0, SHORT_DELAY))
+			dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+		/* The daemon adds its own and removes our hold on flooding
+		 * when we tell it to unlock the database after every
+		 * operation. */
+		--flods_off;
+	}
+
+	if (cleardb) {
+		quiet_trace_msg(DCC_VERSION" %s database %s",
+				cur_db_created ? "creating" : "clearing",
+				cur_db_nm);
+
+	} else if (clean_mode == REPAIR_MODE) {
+		dcc_error_msg("explicit repair of %s", cur_db_nm);
+
+	} else {
+		if (!db_open(0, old_db_fd, cur_db_nm, 0,
+			     DB_OPEN_RDONLY
+			     | (standalone
+				? DB_OPEN_LOCK_NOWAIT : DB_OPEN_LOCK_WAIT))) {
+			/* If the hash table is sick, check timestamps only
+			 * as much as no hash table allows.
+			 * Then rebuild the hash table. */
+			clean_mode = REPAIR_MODE;
+
+		} else {
+			if (db_debug) {
+				quiet_trace_msg("%s  %s",
+						db_window_size_str, new_db_nm);
+				quiet_trace_msg("%d old hash entries total,"
+						" %d or %d%% used",
+						HADDR2LEN(db_hash_len),
+						HADDR2LEN(db_hash_used),
+						(int)((HADDR2LEN(db_hash_used)
+						       * 100.0)
+						      /HADDR2LEN(db_hash_len)));
+			}
+			old_db_parms = db_parms;
+			old_db_hash_used = db_hash_used;
+
+			/* save a handle on the old database to get
+			 * reports that arrive while we expire it */
+			old_db_fd = dup(db_fd);
+			if (old_db_fd < 0)
+				dcc_logbad(EX_OSERR, "dup(%s): %s",
+					   cur_db_nm, ERROR_STR());
+
+			/* read old and create new database file */
+			if (!expire(db_csize)) {
+				old_db_hash_used = 0;
+				clean_mode = REPAIR_MODE;
+			}
+		}
+
+		if (clean_mode == REPAIR_MODE)
+			dcc_error_msg("repairing %s", cur_db_nm);
+	}
+
+	/* if we are repairing the hash table (including now repairing
+	 * after encountering problems while expiring),
+	 * copy the current file with minimal expiring */
+	if (clean_mode == REPAIR_MODE
+	    && !cleardb
+	    && !copy_db())
+		exit_dbclean(EX_UNAVAILABLE);
+	build_hash();
+
+	/* Copy any records from the old file to the new file that were
+	 * added to the old file while we were creating the new file. */
+	if (!cleardb
+	    && !catchup(dcc_emsg))
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+
+	/* we have the new database locked
+	 *
+	 * preserve the current data file as "*-old" */
+	rename_bail(cur_db_nm, old_db_nm);
+
+	/* delete the current hash file, and install both new files */
+	rename_bail(new_hash_nm, cur_hash_nm);
+	strcpy(new_hash_nm, cur_hash_nm);
+	new_hash_created = 0;
+	if (db_hash_fd >= 0)
+		strcpy(db_hash_nm, cur_hash_nm);
+
+	rename_bail(new_db_nm, cur_db_nm);
+	strcpy(new_db_nm, cur_db_nm);
+	new_db_created = 0;
+	if (db_fd > 0)
+		strcpy(db_nm, cur_db_nm);
+	cur_db_created = 0;
+
+	if (cleardb) {
+		flod_mmap_path_set();
+		unlink_whine(flod_mmap_path, 1);
+		if (!db_close(1))
+			exit_dbclean(EX_UNAVAILABLE);
+		exit_dbclean(EX_OK);
+	}
+
+	/* if the daemon was not running, we're finished */
+	if (standalone) {
+		/* install the flood positions if things are ok */
+		if (flod_mmaps) {
+			memcpy(flod_mmaps, &new_flod_mmaps,
+			       sizeof(new_flod_mmaps));
+			flod_unmap(0, 0);
+		}
+		if (!db_close(1))
+			exit_dbclean(EX_UNAVAILABLE);
+		exit_dbclean(EX_OK);
+	}
+
+	/* tell the daemon to switch to the new database.  This will leave
+	 * the daemon stuck waiting for us to unlock the new database. */
+	dccd_new_db("copy late arrivals");
+
+	/* install the flood positions if things are ok */
+	if (flod_mmaps) {
+		memcpy(flod_mmaps, &new_flod_mmaps,
+		       sizeof(new_flod_mmaps));
+		flod_unmap(0, 0);
+	}
+
+	/* Copy any records from the old file to the new file in the
+	 * race to tell the daemon to switch to the new file.
+	 * The new file is still locked from build_hash().
+	 * The daemon should be stuck waiting to open it in the
+	 * DCC_AOP_DB_NEW request via the preceding dccd_new_db().
+	 *
+	 * Since the daemon has switched and probably cannot go back,
+	 * ignore any errors */
+	catchup(0);
+	if (!db_close(1))
+		exit_dbclean(EX_UNAVAILABLE);
+
+	/* finish() will be called via exit() to tell the daemon to resume
+	 * flooding if necessary.  However, in the normal case, we removed
+	 * all counts against flooding before calling dccd_new_db() */
+	exit_dbclean(EX_OK);
+}
+
+
+
+/* adjust output flood positions */
+static DB_PTR
+adj_mmap(void)
+{
+	FLOD_MMAP *mp;
+	DB_PTR delta, new_pos;
+
+	delta = new_db_csize - old_db_pos;
+	new_pos = 0;
+	for (mp = new_flod_mmaps.mmaps;
+	     mp <= LAST(new_flod_mmaps.mmaps);
+	     ++mp) {
+		/* do nothing to marks we have already adjusted */
+		if (!(mp->flags & FLODMAP_FG_MARK))
+			continue;
+		if (mp->confirm_pos > old_db_pos) {
+			/* note the next mark that will need adjusting
+			 * but do not adjust it yet */
+			if (new_pos == 0
+			    || new_pos > mp->confirm_pos)
+				new_pos = mp->confirm_pos;
+		} else {
+			/* adjust marks not past the current position */
+			mp->confirm_pos += delta;
+			mp->flags &= ~FLODMAP_FG_MARK;
+		}
+	}
+	if (adj_delay_pos) {
+		if (new_flod_mmaps.delay_pos > old_db_pos) {
+			if (new_pos == 0
+			    || new_pos > new_flod_mmaps.delay_pos)
+				new_pos = new_flod_mmaps.delay_pos;
+		} else {
+			new_flod_mmaps.delay_pos += delta;
+			adj_delay_pos = 0;
+		}
+	}
+
+	return new_pos;
+}
+
+
+
+/* find a checksum
+ *	Leave db_sts.rcd2 pointing at the record. */
+static u_char
+get_ck(DB_RCD_CK **ckp,			/* point this to the checksum */
+       DCC_CK_TYPES type, const DCC_SUM sum)
+{
+	DB_FOUND db_result;
+
+	/* We must lock the file to keep the daemon from changing the
+	 * internal hash table links. */
+	if (!DB_IS_LOCKED()
+	    && 0 > db_lock())
+		return 0;
+
+	dcc_emsg[0] = '\0';
+	db_result = db_lookup(dcc_emsg, type, sum, 0, MAX_HASH_ENTRIES,
+			      &db_sts.hash, &db_sts.rcd2, ckp);
+	switch (db_result) {
+	case DB_FOUND_LATER:
+	case DB_FOUND_SYSERR:
+		dcc_error_msg("hash lookup for %s from "L_HPAT" = %d: %s",
+			      DB_TYPE2STR(type), old_db_pos, db_result,
+			      dcc_emsg);
+		break;
+
+	case DB_FOUND_IT:
+	case DB_FOUND_EMPTY:
+	case DB_FOUND_CHAIN:
+	case DB_FOUND_INTRUDER:
+		return 1;
+	}
+
+	return 0;
+}
+
+
+
+/* check the leading report for not recent checksum
+ *	on entry db_sts.rcd points to the record under consideration
+ *	Leave db_sts.rcd2 pointing at the leading record. */
+static int				/* -1=broken database 0=expire 1=keep */
+get_lead(DCC_CK_TYPES type, const DB_RCD_CK *rcd_ck)
+{
+	DB_RCD_CK *lead_ck;
+	DCC_TGTS rcd_tgts, lead_tgts;
+
+	if (DCC_CK_IS_REP_CMN(grey_on, type)) {
+		/* do not keep reputations on systems without reputation code */
+		return 0;
+	}
+
+	if (!get_ck(&lead_ck, type, rcd_ck->sum))
+		return -1;
+
+	if (!lead_ck) {
+		dcc_error_msg("no leader for %s %s at "L_HPAT,
+			      DB_TYPE2STR(type),
+			      dcc_ck2str_err(type, rcd_ck->sum, 0),
+			      old_db_pos);
+		return -1;
+	}
+
+	/* We know the target checksum is not recent.  Forget the target if
+	 * both the target and the leader are ancient. The leader might not be
+	 * the newest checksum, but it usually is. Note also that the target
+	 * might be the leader. */
+	if (dcc_ts_older_ts(&db_sts.rcd2.d.r->ts, &new_ex_ts[type].spam)
+	    && dcc_ts_older_ts(&db_sts.rcd.d.r->ts, &new_ex_ts[type].spam))
+		return 0;
+
+	lead_tgts = DB_TGTS_CK(lead_ck);
+	rcd_tgts = DB_TGTS_CK(rcd_ck);
+
+	/* We know either the leader or the target is not ancient.
+	 * Keep the target if the leader's total is respectable.
+	 * We might eventually compress the target. */
+	return (lead_tgts >= db_tholds[type]);
+}
+
+
+
+static void
+report_progress_init(void)
+{
+	gettimeofday(&db_time, 0);
+	progress_rpt_start.tv_sec = db_time.tv_sec;
+	progress_rpt_checked = db_time;
+	progress_rpt_last = db_time;
+	progress_rpt_base = 100;
+	progress_rpt_cnt = progress_rpt_base;
+	progress_rpt_started = 0;
+}
+
+
+
+static time_t				/* us since last check */
+report_progress(u_char final,
+		const char *s1, const char *s2,
+		DB_PTR done, DB_PTR total, DB_PTR scale)
+{
+	time_t reported_us, checked_us, secs, interval;
+	double percent;
+
+	if (!total)
+		percent = 100.0;
+	else
+		percent = (done*100.0)/total;
+
+	gettimeofday(&db_time, 0);
+	checked_us = tv_diff2us(&db_time, &progress_rpt_checked);
+	progress_rpt_checked = db_time;
+
+	/* Check frequently enough to report or unlock the database.
+	 * Adjust the number of operations until the next check
+	 * based on the time spent on the previous */
+	if (checked_us > 0)
+		progress_rpt_base = ((progress_rpt_base * 0.5 * DCC_US
+				      * min(REPORT_INTERVAL_FAST_SECS*DCC_US,
+					    UNLOCK_INTERVAL_USECS))
+				     / checked_us);
+	else
+		progress_rpt_base = 100;
+	if (progress_rpt_base < 100)
+		progress_rpt_base = 100;
+	if (progress_rpt_base > 10*1000)
+		progress_rpt_base = 10*1000;
+	progress_rpt_cnt = progress_rpt_base;
+
+	interval = ((db_debug > 1)
+		    ? REPORT_INTERVAL_FAST_SECS
+		    : REPORT_INTERVAL_SECS);
+
+	/* try not to start reporting progress at the end */
+	if (!progress_rpt_started
+	    && (total*1.0 - done*1.0) / progress_rpt_base <= interval*1.0)
+		return checked_us;
+
+	reported_us = tv_diff2us(&db_time, &progress_rpt_last);
+	if (reported_us >= interval * DCC_US
+	    || (final && progress_rpt_percent != 100)) {
+		progress_rpt_started = 1;
+		progress_rpt_percent = percent;
+		secs = db_time.tv_sec - progress_rpt_start.tv_sec;
+		secs -= secs % interval;
+		progress_rpt_last.tv_sec = progress_rpt_start.tv_sec + secs;
+		if (db_debug > 1)
+			quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%"
+					"    db_mmaps=%d hash=%d",
+					s1, done/scale, total/scale,
+					s2, progress_rpt_percent,
+					db_stats.db_mmaps, db_stats.hash_mmaps);
+		else
+			quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%",
+					s1, done/scale, total/scale,
+					s2, progress_rpt_percent);
+	}
+
+
+	if (clean_mode == QUICK_MODE
+	    && !final) {
+		if (db_time.tv_sec > clean_start.tv_sec + 30*60)
+			dcc_logbad(EX_UNAVAILABLE, "quick cleaning too slow");
+	}
+
+	return checked_us;
+}
+
+
+
+/* delete old, less fuzzy checksums in the new record */
+static void
+fuzzy_obs(DB_RCD *new, DB_RCD_CK **end_ck)
+{
+	DB_RCD_CK *rcd_ck;
+	DCC_CK_TYPES type;
+	int len;
+
+	rcd_ck = new->cks;
+	while (rcd_ck < *end_ck) {
+		type = DB_CK_TYPE(rcd_ck);
+		if (!dcc_ts_older_ts(&new->ts, &new_ex_ts[type].all)) {
+			++rcd_ck;
+			continue;
+		}
+
+		++obs_rcds;
+		new->fgs_num_cks = (((new->fgs_num_cks - 1)
+				     & ~DB_RCD_FG_DELAY)
+				    | DB_RCD_FG_TRIM);
+		--*end_ck;
+		len = (char *)*end_ck - (char *)rcd_ck;
+		if (len == 0)
+			return;
+		memmove(rcd_ck, rcd_ck+1, len);
+	}
+}
+
+
+
+static void
+adj_def_expire(void)
+{
+	double new_dbsize, new_dbsize1, day_rate, db_ratio;
+	int spam_secs, secs;
+	struct timeval tv;
+	char new_dbsize_buf[20], csize_buf[20], old_csize_buf[20];
+	char day_rate_buf[20];
+
+	/* do this only once */
+	if (def_exp_ratio != 0.0)
+		return;
+
+	/* Compute the ratio of size of the database 24 hours from now
+	 * to the size of the window. Assume:
+	 *  - We will receive about the same number of reports in the next
+	 *	24 hours as the last 24.  This is a good assumption for
+	 *	weekdays, but as much as 30% wrong about weekends.
+	 *  - Dbclean will be run once per day at the current time.
+	 *  - The size of the database is a linear function of expiration
+	 *	duration.  This is tenuous when the spam expiration duration
+	 *	is less than 1 day.
+	 * Use the maximum of two guesses for tomorrow's database size.
+	 *	One guess is the current size, base on assuming that
+	 *	we will use roughly the same expiration durations and
+	 *	so the database will grow to about size it now has.
+	 *	The other guess uses the previous database size and the
+	 *	avarage data rate.  It compensates for short term changes
+	 *	in the rate and for running dbclean more than once per day. */
+	new_dbsize = db_parms.db_csize;
+	size2str(csize_buf, sizeof(csize_buf), new_dbsize, 1);
+	new_dbsize1 = db_parms.old_db_csize;
+	size2str(old_csize_buf, sizeof(old_csize_buf), new_dbsize1, 1);
+	day_rate = db_add_rate(&db_parms, 0);
+	if (day_rate >= 0.0)
+		day_rate *= (24*60*60);
+	size2str(day_rate_buf, sizeof(day_rate_buf), day_rate, 1);
+
+	/* without information, be pessimistic and assume 1.4 GByte/day */
+	if (day_rate <= 0.0 && !grey_on)
+		day_rate = 1.4*1024.0*1024.0*1024.0;
+	if (day_rate > 0.0) {
+		new_dbsize1 += day_rate;
+		if (new_dbsize < new_dbsize1)
+			new_dbsize = new_dbsize1;
+	}
+
+	size2str(new_dbsize_buf, sizeof(new_dbsize_buf), new_dbsize, 1);
+	if (db_debug)
+		quiet_trace_msg("predict new_dbsize=%s from db_csize=%s"
+				" old_db_csize=%s rate=%s",
+				new_dbsize_buf,
+				csize_buf, old_csize_buf, day_rate_buf);
+
+	/*  Assume there will be 20% as many bytes used in the hash table
+	 * as in the database */
+	new_dbsize *= 1.2;
+
+	/* we cannot adjust the defaults
+	 *	- 1st time dbclean run
+	 *	- if the previous run used a larger than default value
+	 *	- there is no need to reduce the default because the predicted
+	 *	    maximum size is smaller than the target maximum
+	 */
+	spam_secs = db_parms.ex_secs[DCC_CK_FUZ2].spam;
+	if (spam_secs != 0
+	    && spam_secs <= DB_EXPIRE_SPAMSECS_DEF
+	    && new_dbsize > db_max_byte
+	    && (db_ratio = (db_max_byte / new_dbsize)) < 1.0) {
+		def_exp_ratio = (spam_secs * db_ratio) / DB_EXPIRE_SPAMSECS_DEF;
+
+		/* change the two durations together and so with same errors */
+		def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF * def_exp_ratio;
+		def_expire_secs = DB_EXPIRE_SECS_DEF * def_exp_ratio;
+
+		def_expire_secs -= def_expire_secs % (60*60);
+		if (def_expire_secs < DB_EXPIRE_SECS_DEF_MIN)
+			def_expire_secs = DB_EXPIRE_SECS_DEF_MIN;
+
+		def_expire_spamsecs -= def_expire_spamsecs % (24*60*60);
+		if (def_expire_spamsecs < DB_EXPIRE_SPAMSECS_DEF_MIN)
+			def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF_MIN;
+
+#if DB_MIN_MBYTE == 0 && !defined(GOT_PHYSMEM)
+		if (def_expire_secs == DB_EXPIRE_SECS_DEF_MIN
+		    || def_expire_spamsecs == DB_EXPIRE_SPAMSECS_DEF_MIN)
+			quiet_trace_msg("cannot determine physical RAM; rebuild"
+					" with ./configure with-db-memory");
+#endif
+		return;
+	}
+
+	def_exp_ratio = 1.0;
+
+	/* if the defaults do not need to be reduced now but they
+	 * were reduced before, then relax them gently */
+	if (spam_secs < DB_EXPIRE_SPAMSECS_DEF) {
+		dcc_ts2timeval(&tv, &db_parms.ex_spam[DCC_CK_FUZ2]);
+		secs = clean_start.tv_sec - tv.tv_sec;
+		if (secs > 0
+		    && secs < DB_EXPIRE_SPAMSECS_DEF)
+			def_expire_spamsecs = secs;
+
+		dcc_ts2timeval(&tv, &db_parms.ex_all[DCC_CK_FUZ2]);
+		secs = clean_start.tv_sec - tv.tv_sec;
+		if (secs > 0
+		    && secs < DB_EXPIRE_SECS_DEF)
+			def_expire_secs = secs;
+	}
+}
+
+
+
+/* copy the existing database, discard junk and old entries */
+static u_char				/* 1=done 0=database broken */
+expire(DB_PTR old_db_csize)
+{
+#define EXPIRE_BAIL() {alarm(0); flod_unmap(0, 0); db_close(0); return 0;}
+
+	DCC_TS ts;
+	u_char emptied, reduced_defaults;
+	u_char old_ok[DCC_DIM_CKS];
+	DB_RCD rcd, new;
+	const DB_RCD_CK *rcd_ck, *rcd_ck2;
+	DB_RCD_CK *new_ck;
+	DCC_TGTS tgts_raw, ck_tgts;
+	u_char needed, obs_lvl, timely;
+	int old_num_cks, new_num_cks, nokeep_num_cks;
+	DB_PTR min_confirm_pos, next_adj_pos;
+	FLOD_MMAP *mp;
+	DCC_CK_TYPES prev_type, type, type2;
+	int rcd_len;
+	struct stat sb;
+	time_t need_unlock;
+	int i;
+
+	reduced_defaults = 0;
+	if (expire_secs < 0) {
+		adj_def_expire();
+		if (def_expire_secs > expire_spamsecs
+		    && expire_spamsecs > 0) {
+			expire_secs = expire_spamsecs;
+		} else {
+			if (def_expire_secs != DB_EXPIRE_SECS_DEF
+			    && def_exp_ratio != 1.0)
+				reduced_defaults = 1;
+			expire_secs = def_expire_secs;
+		}
+	}
+	if (expire_spamsecs < 0) {
+		adj_def_expire();
+		if (def_expire_spamsecs < expire_secs) {
+			expire_spamsecs = expire_secs;
+		} else {
+			if (def_expire_spamsecs != DB_EXPIRE_SPAMSECS_DEF
+			    && def_exp_ratio != 1.0)
+				reduced_defaults = 1;
+			expire_spamsecs = def_expire_spamsecs;
+		}
+	}
+
+	if (expire_spamsecs > 0 && expire_spamsecs < expire_secs)
+		dcc_logbad(EX_USAGE,
+			   "spam expiration -E must be longer than -e");
+
+	expired_rcds = 0;
+	expired_cks = 0;
+	kept_cks = white_cks;
+	need_unlock = 0;
+	report_progress_init();
+
+	/* Compute timestamps for records we keep.
+	 * Use the values from the previous use of dbclean as defaults
+	 * unless they are bogus */
+	memset(old_ok, 0, sizeof(old_ok));
+	dcc_secs2ts(&ts, clean_start.tv_sec);
+	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
+		DB_EX_SEC *th = &db_parms.ex_secs[type];
+
+		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
+			continue;
+		if (DCC_CK_IS_REP_OP(grey_on, type))
+			continue;
+
+		if (th->spam <= 0 || th->spam > DB_EXPIRE_SECS_MAX)
+			continue;
+		if (th->all <= 0 || th->all > th->spam)
+			continue;
+
+		if (dcc_ts_newer_ts(&db_parms.ex_spam[type], &ts))
+			continue;
+		if (dcc_ts_newer_ts(&db_parms.ex_all[type], &ts))
+			continue;
+
+		old_ok[type] = 1;	/* old values for this type are ok */
+	}
+
+	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
+		DB_EX_SEC *new_th = &new_ex_secs[type];
+		DB_EX_TS_TYPE *new_ts = &new_ex_ts[type];
+		int old_all = db_parms.ex_secs[type].all;
+		int old_spam = db_parms.ex_secs[type].spam;
+
+		if (type == DCC_CK_SRVR_ID) {
+			/* keep server-ID declarations 5 weeks or a week longer
+			 * than reputations so that they will be flooded 1st */
+			new_th->all = DB_EXPIRE_SRVR_ID_SECS;
+			new_th->spam = DB_EXPIRE_SRVR_ID_SECS;
+
+		} else if (grey_on) {
+			if (old_ok[type]) {
+				/* This is the path by which the dccd -G
+				 * parameters are used. */
+				new_th->all = old_all;
+				new_th->spam = old_spam;
+			} else if (DCC_CK_IS_GREY_TRIPLE(1, type)) {
+				new_th->all = DEF_GREY_WINDOW;
+				new_th->spam = DEF_GREY_WHITE;
+			} else if (DCC_CK_IS_GREY_MSG(1, type)
+				   || type == DCC_CK_BODY) {
+				new_th->all = DEF_GREY_WINDOW;
+				new_th->spam = DEF_GREY_WINDOW;
+			} else {
+				new_th->all = 1;
+				new_th->spam = 1;
+			}
+		} else if (have_expire_parms < 0 && old_ok[type]
+			   && (db_parms.flags & DB_PARM_EXP_SET)) {
+			/* use the old durations they are valid
+			 * and we have no expiriation parameters */
+			new_th->all = old_all;
+			new_th->spam = old_spam;
+
+		} else {
+			new_th->all = expire_secs;
+			new_th->spam = (DCC_CK_LONG_TERM(type)
+					? expire_spamsecs
+					: expire_secs);
+			if (reduced_defaults) {
+				quiet_trace_msg("adjust default by"
+						" %4.2f to -e%dhours"
+						" -E%ddays",
+						def_exp_ratio,
+						expire_secs/(60*60),
+						expire_spamsecs
+						/ (24*60*60));
+				reduced_defaults = 0;
+			}
+		}
+
+		/* compute oldest timestamp for this type of checksum,
+		 * without going crazy with "-Enever" */
+		dcc_secs2ts(&new_ts->spam,
+			    clean_start.tv_sec - min(clean_start.tv_sec,
+						     new_th->spam));
+		dcc_secs2ts(&new_ts->all,
+			    clean_start.tv_sec - min(clean_start.tv_sec,
+						     new_th->all));
+	}
+
+	/* put the timestampes into the new file */
+	write_new_hdr(1);
+
+	/* if we are running as root,
+	 * don't change the owner of the database */
+	if (getuid() == 0) {
+		if (0 > fstat(old_db_fd, &sb))
+			dcc_logbad(EX_IOERR, "fstat(%s): %s",
+				   old_db_nm, ERROR_STR());
+		if (0 > fchown(new_db_fd, sb.st_uid, sb.st_gid))
+			dcc_logbad(EX_IOERR, "fchown(%s,%d,%d): %s",
+				   new_db_nm, (int)sb.st_uid, (int)sb.st_gid,
+				   ERROR_STR());
+	}
+
+	if (DB_PTR_BASE != lseek(old_db_fd, DB_PTR_BASE, SEEK_SET))
+		dcc_logbad(EX_IOERR, "lseek(%s,%d): %s",
+			   cur_db_nm, DB_PTR_BASE, ERROR_STR());
+	read_rcd_invalidate(0);
+
+	flod_mmap(0, &db_parms.sn, 0, 1, 1);
+	if (flod_mmaps)
+		memcpy(&new_flod_mmaps, flod_mmaps, sizeof(new_flod_mmaps));
+	min_confirm_pos = new_flod_mmaps.delay_pos;
+	next_adj_pos = DB_PTR_BASE;
+	for (mp = new_flod_mmaps.mmaps;
+	     mp <= LAST(new_flod_mmaps.mmaps);
+	     ++mp) {
+		if (mp->rem_hostname[0] == '\0') {
+			mp->flags &= ~FLODMAP_FG_MARK;
+		} else {
+			mp->flags |= FLODMAP_FG_MARK;
+			if (min_confirm_pos > mp->confirm_pos)
+				min_confirm_pos = mp->confirm_pos;
+		}
+	}
+	adj_delay_pos = (new_flod_mmaps.delay_pos != 0) ? 1 : 0;
+
+	emptied = cleardb;
+	dcc_timeval2ts(&new_flod_mmaps.sn, &clean_start, 0);
+
+	/* copy the old file to the new,
+	 * discarding and compressing old data as we go */
+	for (old_db_pos = DB_PTR_BASE;
+	     old_db_pos < old_db_csize;
+	     old_db_pos += rcd_len) {
+		if (--progress_rpt_cnt <= 0)
+			need_unlock += report_progress(0, "  processed",
+						       "MBytes",
+						       old_db_pos, old_db_csize,
+						       1024*1024);
+
+		if (old_db_pos == next_adj_pos)
+			next_adj_pos = adj_mmap();
+
+		if (clean_mode != REPAIR_MODE) {
+			/* read the record by mapping if not repairing */
+			if (!db_map_rcd(0, &db_sts.rcd, old_db_pos, &rcd_len))
+				EXPIRE_BAIL();
+			memcpy(&rcd, db_sts.rcd.d.r, rcd_len);
+		} else {
+			rcd_len = read_rcd(0, &rcd,
+					   old_db_fd, old_db_pos, cur_db_nm);
+			if (rcd_len <= 0) {
+				if (rcd_len == 0)
+					dcc_error_msg("unexpected EOF in %s at "
+						      L_HPAT" instead of "
+						      L_HPAT,
+						      cur_db_nm,
+						      old_db_pos,
+						      old_db_csize);
+				/* give up and ask our neighbors to rewind */
+				emptied = 1;
+				old_db_pos = old_db_csize;
+				break;
+			}
+		}
+
+		/* skip end-of-page padding */
+		if (rcd_len == sizeof(rcd)-sizeof(rcd.cks))
+			continue;
+
+		if (DB_RCD_ID(&rcd) == DCC_ID_WHITE) {
+			/* skip whitelist entries if whitelist source is ok */
+			if (!keep_white)
+				continue;
+			/* refresh whitelist entries if source is bad */
+			dcc_timeval2ts(&rcd.ts, &clean_start, 0);
+		}
+
+		old_num_cks = DB_NUM_CKS(&rcd);
+
+		/* expire or throw away deleted reports */
+		tgts_raw = DB_TGTS_RCD_RAW(&rcd);
+		if (tgts_raw == 0) {
+			++expired_rcds;
+			expired_cks += old_num_cks;
+			continue;
+		}
+		if (tgts_raw > DCC_TGTS_MAX_DB) {
+			dcc_error_msg("discarding report at "L_HPAT
+				      " with bogus target count %#x",
+				      old_db_pos, tgts_raw);
+			++expired_rcds;
+			expired_cks += old_num_cks;
+			continue;
+		}
+
+		if (dcc_ts_newer_ts(&rcd.ts, &future_ts)) {
+			static int whines = 0;
+			if (whines < 50)
+				dcc_error_msg("discarding report at "L_HPAT
+					      " from the future %s%s",
+					      old_db_pos,
+					      ts2str_err(&rcd.ts),
+					      ++whines >= 20
+					      ? "; stop complaining"
+					      : "");
+			++expired_rcds;
+			expired_cks += old_num_cks;
+			continue;
+		}
+
+
+		needed = 0;
+		obs_lvl = 0;
+		timely = 1;
+		nokeep_num_cks = 0;
+		memcpy(&new, &rcd, sizeof(new)-sizeof(new.cks));
+		new.fgs_num_cks &= (DB_RCD_FG_TRIM | DB_RCD_FG_SUMRY
+				    | DB_RCD_FG_DELAY);
+		new_ck = new.cks;
+		for (prev_type = DCC_CK_INVALID, rcd_ck = rcd.cks;
+		     rcd_ck < &rcd.cks[old_num_cks];
+		     prev_type = type, ++rcd_ck) {
+			type = DB_CK_TYPE(rcd_ck);
+			if (!DCC_CK_OK_DB(grey_on, type)) {
+				static int whines = 0;
+				if (whines < 20)
+					dcc_error_msg("discarding %s"
+						      " checksum at "L_HPAT"%s",
+						      DB_TYPE2STR(type),
+						      old_db_pos,
+						      ++whines >= 20
+						      ? "; stop complaining"
+						      : "");
+				++expired_cks;
+				new.fgs_num_cks |= DB_RCD_FG_TRIM;
+				new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+				continue;
+			}
+
+			if (type <= prev_type
+			    && prev_type != DCC_CK_FLOD_PATH) {
+				dcc_error_msg("discarding out of order %s"
+					      " checksum at "L_HPAT,
+					      DB_TYPE2STR(type),
+					      old_db_pos);
+				++expired_cks;
+				new.fgs_num_cks |= DB_RCD_FG_TRIM;
+				new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+				continue;
+			}
+
+			/* Silently discard pure junk from other servers,
+			 * provided it is junk by default */
+			if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
+			    && DB_GLOBAL_NOKEEP(grey_on, type)
+			    && type != DCC_CK_FLOD_PATH
+			    && type != DCC_CK_SRVR_ID
+			    && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
+				++expired_cks;
+				continue;
+			}
+
+			/* Keep paths except on old records or records that
+			 * have been trimmed or compressed.
+			 * Never remove paths from server-ID declarations. */
+			if (type == DCC_CK_FLOD_PATH) {
+				if (DB_RCD_TRIMMED(&new)
+				    || DB_RCD_ID(&new) == DCC_ID_COMP)
+					continue;
+				/* forget line number on old whitelist entry */
+				if (DB_RCD_ID(&rcd) == DCC_ID_WHITE)
+					continue;
+				rcd_ck2 = rcd_ck+1;
+				for (;;) {
+					type2 = DB_CK_TYPE(rcd_ck2);
+					if (type2 == DCC_CK_SRVR_ID
+					    || !dcc_ts_older_ts(&rcd.ts,
+							&new_ex_ts[type2
+							    ].all)) {
+					    /* keep this path since this report
+					     * is a server-ID declaration
+					     * or not old */
+					    *new_ck = *rcd_ck;
+					    ++new_ck;
+					    ++new.fgs_num_cks;
+					    ++nokeep_num_cks;
+					    break;
+					}
+					if (++rcd_ck2>=&rcd.cks[old_num_cks]) {
+					    /* we are discarding this path */
+					    new.fgs_num_cks |= DB_RCD_FG_TRIM;
+					    new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+					    break;
+					}
+				}
+				continue;
+			}
+
+			if (!dcc_ts_older_ts(&rcd.ts, &new_ex_ts[type].all)) {
+				/* This report is recent.
+				 * However, obsolete or junk checksums
+				 * don't make the report needed */
+				if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
+				    && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
+					++nokeep_num_cks;
+				} else if (DB_CK_OBS(rcd_ck)) {
+					/* This checksum is obsolete.
+					 * If it has the highest level of
+					 * fuzziness, then it controls whether
+					 * the whole report is needed,. */
+					if (obs_lvl < db_ck_fuzziness[type]) {
+					    obs_lvl = db_ck_fuzziness[type];
+					    needed = 0;
+					}
+				} else {
+					/* This checksum is not obsolete.
+					 * If it is at least as fuzzy as any
+					 * other checksum, then it can say
+					 * the report is needed */
+					if (obs_lvl <= db_ck_fuzziness[type]) {
+					    obs_lvl = db_ck_fuzziness[type];
+					    needed = 1;
+					}
+				}
+
+			} else {
+				/* This checksum is at least somewhat old.
+				 * Throw away delete requests
+				 * and other servers' useless checksums */
+				if (tgts_raw == DCC_TGTS_DEL
+				    || DB_TEST_NOKEEP(db_parms.nokeep_cks,
+						      type)) {
+					++expired_cks;
+					new.fgs_num_cks |= DB_RCD_FG_TRIM;
+					new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+					continue;
+				}
+				/* Throw away old obsolete checksums
+				 * and entire reports if the fuzziest
+				 * checksum is obsolete */
+				if (DB_CK_OBS(rcd_ck)) {
+					if (obs_lvl < db_ck_fuzziness[type]) {
+					    obs_lvl = db_ck_fuzziness[type];
+					    needed = 0;
+					}
+					++expired_cks;
+					new.fgs_num_cks |= DB_RCD_FG_TRIM;
+					new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+					continue;
+				}
+
+				/* old summaries are unneeded, because
+				 * they have already been flooded.
+				 * They do not contribute to local counts */
+				if (DB_RCD_SUMRY(&rcd))
+					continue;
+
+				/* The checksum is old enough to compress, so
+				 * mark the record as eligible for splitting. */
+				timely = 0;
+
+				/* Discard this checksum if its ultimate total
+				 * is low or ancient
+				 * or if it reaches spam after this report.
+				 * To determine the ultimate total, we must
+				 * have a hash table to find the newest record,
+				 * which contains the final total */
+				if (clean_mode != REPAIR_MODE) {
+					i = get_lead(type, rcd_ck);
+					if (i < 0)
+					    EXPIRE_BAIL();
+					if (!i) {
+					    ++expired_cks;
+					    new.fgs_num_cks |= DB_RCD_FG_TRIM;
+					    new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
+					    continue;
+					}
+				}
+
+				if (obs_lvl <= db_ck_fuzziness[type]) {
+					/* Since we did not delete this
+					 * checksum, we need the record if this
+					 * checksum is fuzzy enough to control
+					 * our need. */
+					needed = 1;
+					/* If this is the fuzziest checksum we
+					 * have seen, then preceding and so
+					 * less fuzzy checksums are obsolete,
+					 * if they are old.
+					 * Assume that checksums are ordered
+					 * in the record by fuzziness. */
+					if (obs_lvl < db_ck_fuzziness[type]) {
+					    obs_lvl = db_ck_fuzziness[type];
+					    if (obs_lvl != DCC_CK_FUZ_LVL_REP
+						&& !grey_on)
+						fuzzy_obs(&new, &new_ck);
+					}
+				}
+			}
+
+			/* Keep this checksum if we decide the whole report
+			 * is needed. */
+			*new_ck = *rcd_ck;
+
+			++new_ck;
+			++new.fgs_num_cks;
+		}
+
+		/* occassionally let the daemon work with the old file */
+		if (need_unlock >= UNLOCK_INTERVAL_USECS) {
+			need_unlock = 0;
+			if (!standalone && !db_unlock())
+				EXPIRE_BAIL();
+		}
+
+		/* if none of its checksums are needed,
+		 * then discard the entire record */
+		if (!needed) {
+			expired_cks += DB_NUM_CKS(&new);
+			++expired_rcds;
+			continue;
+		}
+
+		new_num_cks = DB_NUM_CKS(&new);
+		kept_cks += new_num_cks - nokeep_num_cks;
+
+		/* Put the new record into the new file.
+		 *
+		 * If all of the record is recent, if it contains 1 checksum,
+		 * or if all of its totals are the same, then simply add it.
+		 *
+		 * Otherwise, divide it into records of identical counts
+		 * to allow compression or combining with other records. */
+		if (new_num_cks > 1
+		    && (!timely
+			|| DB_RCD_ID(&new) == DCC_ID_COMP
+			|| DB_RCD_TRIMMED(&new))) {
+			for (;;) {
+				/* skip the checksums that have the same total
+				 * as the first checksum to leave them with the
+				 * original new report */
+				new_ck = new.cks;
+				ck_tgts = DB_TGTS_CK(new_ck);
+				for (i = 1; i < new_num_cks; ++i) {
+					++new_ck;
+					if (DB_TGTS_CK(new_ck) != ck_tgts)
+					    break;
+				}
+				if (new_num_cks <= i)
+					break;
+				new_num_cks -= i;
+
+				/* write the checksums with the common total */
+				new.srvr_id_auth = DCC_ID_COMP;
+				new.fgs_num_cks = i;
+				if (!write_new_rcd(&new,
+						   sizeof(new) - sizeof(new.cks)
+						   + i*sizeof(new.cks[0])))
+					EXPIRE_BAIL();
+
+				/* handle the remaining checksums */
+				new.fgs_num_cks = new_num_cks;
+				memmove(&new.cks[0], &new.cks[i],
+					new_num_cks*sizeof(new.cks[0]));
+			}
+		}
+
+		/* write the rest (or all) of the new record */
+		if (!write_new_rcd(&new,
+				   sizeof(new) - sizeof(new.cks)
+				   + new_num_cks*sizeof(new.cks[0])))
+			EXPIRE_BAIL();
+	}
+	write_new_flush(1);
+	alarm(0);
+
+	/* do final adjustment of the flooding positions */
+	adj_mmap();
+	/* force them to be right if the system crashed with the
+	 * flod.map file on the disk more up to date and so after the
+	 * database file on the disk */
+	for (mp = new_flod_mmaps.mmaps;
+	     mp <= LAST(new_flod_mmaps.mmaps);
+	     ++mp) {
+		if (mp->rem_hostname[0] != '\0'
+		    && mp->confirm_pos > new_db_csize)
+			mp->confirm_pos = new_db_csize;
+	}
+
+	/* We are finished with the old file.
+	 *	Mark all of its pages MADV_DONTNEED */
+	rel_db_states();
+	i = (db_unload(0, 2) != 0);
+	if (!db_close(1))
+		i = 0;
+
+	write_new_hdr(emptied);
+	report_progress(1, "  processed", "MBytes",
+			old_db_pos, old_db_csize, 1024*1024);
+	if (grey_on)
+		quiet_trace_msg("expired %d records and %d checksums in %s",
+				expired_rcds, expired_cks, cur_db_nm);
+	else
+		quiet_trace_msg("expired %d records and %d checksums,"
+				" obsoleted %d checksums in %s",
+				expired_rcds, expired_cks, obs_rcds, cur_db_nm);
+	return i;
+}
+
+
+
+/* copy the database copy while doing minimal expiring */
+static u_char
+copy_db(void)
+{
+	static DB_VERSION_BUF old_version4 = DB_VERSION4_STR;
+	static DB_VERSION_BUF old_version3 = DB_VERSION3_STR;
+	union {
+	    DB_HDR	hdr;
+	    DB_V4_PARMS	v4;
+	    DB_V3_PARMS v3;
+	} old_db;
+	struct timeval sn;
+
+	/* do not lock the old database because the daemon must continue
+	 * to answer requests */
+	if (old_db_fd < 0) {
+		old_db_fd = open(cur_db_nm, O_RDONLY, 0);
+		if (old_db_fd == -1)
+			dcc_logbad(EX_IOERR, "open(%s): %s",
+				   cur_db_nm, ERROR_STR());
+	}
+
+	if (!read_db_hdr(dcc_emsg, &old_db.hdr, old_db_fd, cur_db_nm))
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+	if (memcmp(old_db.hdr.p.version, db_version_buf,
+		   sizeof(old_db.hdr.p.version))) {
+		if (!memcmp(old_db.v4.version, old_version4,
+			   sizeof(old_db.v4.version))) {
+			memset(&old_db_parms, 0,
+			       sizeof(old_db_parms));
+			memcpy(old_db_parms.version, db_version_buf,
+			       sizeof(old_db_parms.version));
+
+			old_db_parms.db_csize = old_db.v4.db_csize;
+			old_db_parms.pagesize = old_db.v4.pagesize;
+			old_db_parms.sn = old_db.v4.sn;
+			old_db_parms.cleared = old_db.v4.cleared;
+			old_db_parms.cleaned = old_db.v4.cleaned;
+			old_db_parms.cleaned_cron = old_db.v4.cleaned_cron;
+			memcpy(old_db_parms.ex_spam, old_db.v4.ex_spam,
+			       sizeof(old_db_parms.ex_spam));
+			memcpy(old_db_parms.ex_all, old_db.v4.ex_spam,
+			       sizeof(old_db_parms.ex_all));
+			memcpy(old_db_parms.ex_secs, old_db.v4.ex_secs,
+			       sizeof(old_db_parms.ex_secs));
+			old_db_parms.nokeep_cks = old_db.v4.nokeep_cks;
+			old_db_parms.flags = old_db.v4.flags;
+			old_db_parms.old_db_csize = old_db.v4.old_db_csize;
+			old_db_parms.db_added = old_db.v4.db_added;
+			old_db_parms.hash_used = old_db.v4.hash_used;
+			old_db_parms.old_hash_used = old_db.v4.old_hash_used;
+			old_db_parms.hash_added = old_db.v4.hash_added;
+			old_db_parms.rate_secs = old_db.v4.rate_secs;
+			old_db_parms.last_rate_sec = old_db.v4.last_rate_sec;
+			old_db_parms.old_kept_cks = old_db.v4.old_kept_cks;
+
+		} else if (!memcmp(old_db.v3.version, old_version3,
+				   sizeof(old_db.v3.version))) {
+			memset(&old_db_parms, 0,
+			       sizeof(old_db_parms));
+			memcpy(old_db_parms.version, db_version_buf,
+			       sizeof(old_db_parms.version));
+
+			old_db_parms.db_csize = old_db.v3.db_csize;
+			old_db_parms.pagesize = old_db.v3.pagesize;
+			old_db_parms.sn = old_db.v3.sn;
+			memcpy(old_db_parms.ex_spam, old_db.v3.ex_spam,
+			       sizeof(old_db_parms.ex_spam));
+			memcpy(old_db_parms.ex_secs, old_db.v3.ex_secs,
+			       sizeof(old_db_parms.ex_secs));
+			old_db_parms.nokeep_cks = old_db.v3.nokeep_cks;
+			if (old_db.v3.flags & DB_PARM_V3_FG_GREY)
+				old_db_parms.flags |= DB_PARM_FG_GREY;
+			if (old_db.v3.flags & DB_PARM_V3_FG_CLEARED)
+				old_db_parms.flags |= DB_PARM_FG_CLEARED;
+			old_db_parms.old_db_csize = old_db.v3.old_db_csize;
+			old_db_parms.db_added = old_db.v3.db_added;
+			old_db_parms.hash_used = old_db.v3.hash_used;
+			old_db_parms.old_hash_used = old_db.v3.old_hash_used;
+			old_db_parms.hash_added = old_db.v3.hash_added;
+			old_db_parms.rate_secs = old_db.v3.rate_secs;
+			old_db_parms.last_rate_sec = old_db.v3.last_rate_sec;
+			old_db_parms.old_kept_cks = old_db.v3.old_kept_cks;
+
+			dcc_ts2timeval(&sn, &old_db_parms.sn);
+			old_db_parms.cleared = sn.tv_sec;
+			old_db_parms.cleaned = sn.tv_sec;
+			if (old_db.v3.flags & DB_PARM_V3_FG_SELF_CLEAN2) {
+				old_db_parms.cleared -= 2*24*60*60;
+				old_db_parms.cleaned -= 24*60*60;
+			}
+		} else {
+			dcc_logbad(EX_IOERR, "%s has the wrong magic \"%.*s\"",
+				   cur_db_nm,
+				   ISZ(DB_VERSION_BUF), old_db.hdr.p.version);
+		}
+	} else {
+		old_db_parms = old_db.hdr.p;
+	}
+
+	db_parms.sn = old_db_parms.sn;
+	db_parms.cleared = old_db_parms.cleared;
+	db_parms.cleaned = old_db_parms.cleaned;
+	db_parms.cleaned_cron = old_db_parms.cleaned_cron;
+	memcpy(db_parms.ex_all, old_db_parms.ex_all,
+	       sizeof(db_parms.ex_all));
+	memcpy(db_parms.ex_spam, old_db_parms.ex_spam,
+	       sizeof(db_parms.ex_spam));
+	memcpy(&db_parms.ex_secs, &old_db_parms.ex_secs,
+	       sizeof(db_parms.ex_secs));
+	db_parms.nokeep_cks = old_db_parms.nokeep_cks;
+	db_parms.flags = old_db_parms.flags;
+
+	set_db_tholds(db_parms.nokeep_cks);
+
+	return expire(old_db_parms.db_csize);
+}
+
+
+
+/* Copy any records from the old file to the new file that were
+ * added to the old file while we were creating the new file. */
+static u_char
+catchup(DCC_EMSG emsg)
+{
+	DB_HDR old_db_hdr;
+	DB_RCD rcd;
+	int rcd_len;
+	u_char result;
+	int count, old_count;
+
+	/* Because dccd knows dbclean is running, dccd will have been
+	 * keeping its header block more accurate than usual. */
+	result = 1;
+	count = 0;
+	do {
+		old_count = count;
+		if (!read_db_hdr(dcc_emsg, &old_db_hdr,
+				old_db_fd, old_db_nm)) {
+			emsg = 0;
+			result = 0;
+			break;
+		}
+		if (old_db_hdr.p.db_csize < old_db_pos) {
+			dcc_error_msg("%s mysteriously truncated", old_db_nm);
+			result = 0;
+			break;
+		}
+		if ((off_t)old_db_pos != lseek(old_db_fd, old_db_pos,
+					       SEEK_SET)) {
+			dcc_pemsg(EX_IOERR, emsg, "lseek(%s, "L_HPAT"): %s",
+				  old_db_nm, old_db_pos, ERROR_STR());
+			emsg = 0;
+			result = 0;
+			break;
+		}
+		read_rcd_invalidate(0);
+		while (old_db_pos < old_db_hdr.p.db_csize) {
+			rcd_len = read_rcd(emsg, &rcd,
+					   old_db_fd, old_db_pos, old_db_nm);
+			if (rcd_len <= 0) {
+				if (rcd_len == 0)
+					dcc_pemsg(EX_IOERR, emsg,
+						  "premature EOF in %s"
+						  " at "L_HPAT
+						  " instead of "L_HPAT,
+						  old_db_nm,
+						  old_db_pos,
+						  old_db_hdr.p.db_csize);
+				emsg = 0;
+				result = 0;
+				break;
+			}
+			/* If something bad happens, we may not be able to
+			 * go back to the old file.  Carry on to get as much
+			 * data as we can although we know the dccd daemon
+			 * may croak when we release it */
+			if (!db_add_rcd(emsg, &rcd)) {
+				emsg = 0;
+				result = 0;
+				break;
+			}
+			old_db_pos += rcd_len;
+			++count;
+		}
+	} while (result && old_count != count);
+
+	if (count > 0 && db_debug >= 1)
+		quiet_trace_msg("copied %d late reports%s",
+				count, result ? "" : " with problems");
+
+	return result;
+}
+
+
+
+/* try to compress old report pointed to by db_sts.rcd with a predecessor */
+static void
+compress_old(void)
+{
+	DB_PTR prev, prev1;
+	DB_RCD_CK *new_ck, *prev_ck;
+	int new_ck_num, prev_ck_num;
+	DCC_TGTS new_tgts, prev_tgts;
+	DCC_CK_TYPES new_type, prev_type;
+#define NEWER (db_sts.rcd.d.r)
+#define OLDER (db_sts.rcd2.d.r)
+
+	/* Before spending the time to map a preceding checksum,
+	 * find at least one checksum worth keeping and that might
+	 * be combined or compressed with its predecessor. */
+	prev = DB_PTR_NULL;
+	prev_type = DCC_CK_INVALID;
+	for (new_ck_num = DB_NUM_CKS(NEWER),
+	     new_ck = NEWER->cks;
+	     new_ck_num != 0;
+	     --new_ck_num, ++new_ck) {
+		if (DB_CK_OBS(new_ck))
+			continue;
+		new_type = DB_CK_TYPE(new_ck);
+		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type))
+			continue;
+		/* all of the checksums in this record must be old */
+		if (!dcc_ts_older_ts(&NEWER->ts, &new_ex_ts[new_type].all))
+			return;
+		/* you can compress reports only if you have >=2 */
+		prev1 = DB_PTR_EX(new_ck->prev);
+		if (prev1 != DB_PTR_NULL) {
+			prev = prev1;
+			prev_type = new_type;
+		}
+	}
+	if (prev_type == DCC_CK_INVALID)
+		return;
+
+	/* having picked a checksum,
+	 * map the record containing its predecessor */
+	prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, prev_type);
+	if (!prev_ck)
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+
+	/* The current and previous records must be old
+	 * and contain the same useful checksums. */
+	new_ck_num = DB_NUM_CKS(NEWER);
+	new_ck = NEWER->cks;
+	prev_ck_num = DB_NUM_CKS(OLDER);
+	prev_ck = OLDER->cks;
+	for (;;) {
+		/* we must run out of checksums in the two reports at the
+		 * same time */
+		if (prev_ck_num == 0 || new_ck_num == 0) {
+			if (prev_ck_num == new_ck_num)
+				break;
+			return;
+		}
+
+		/* ignore paths and other junk */
+		if (DB_CK_OBS(prev_ck)) {
+			--prev_ck_num;
+			++prev_ck;
+			continue;
+		}
+		prev_type = DB_CK_TYPE(prev_ck);
+		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, prev_type)) {
+			--prev_ck_num;
+			++prev_ck;
+			continue;
+		}
+		if (DB_CK_OBS(new_ck)) {
+			--new_ck_num;
+			++new_ck;
+			continue;
+		}
+		new_type = DB_CK_TYPE(new_ck);
+		if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type)) {
+			--new_ck_num;
+			++new_ck;
+			continue;
+		}
+
+		/* because the checksums are ordered,
+		 * give up at the first difference in checksums */
+		if (new_type != prev_type
+		    || memcmp(new_ck->sum, prev_ck->sum, sizeof(new_ck->sum)))
+			return;
+
+		/* Give up at the first recent and valuable checksum. */
+		if (!dcc_ts_older_ts(&OLDER->ts, &new_ex_ts[new_type].all))
+			return;
+
+		--prev_ck_num;
+		++prev_ck;
+		--new_ck_num;
+		++new_ck;
+	}
+
+	/* The current and previous records are compatiable.
+	 * Add the count of the previous record to the current record
+	 * and mark the previous record useless.
+	 * The individual totals in the current record are already correct,
+	 * so postpone worrying about the deleted record. */
+	new_tgts = DB_TGTS_RCD_RAW(NEWER);
+	if (new_tgts < DCC_TGTS_TOO_MANY) {
+		prev_tgts = DB_TGTS_RCD(OLDER);
+		if (prev_tgts > DCC_TGTS_TOO_MANY
+		    || prev_tgts == 0)
+			return;
+		if (prev_tgts == DCC_TGTS_TOO_MANY) {
+			new_tgts = DCC_TGTS_TOO_MANY;
+		} else {
+			new_tgts += prev_tgts;
+			if (new_tgts > DCC_TGTS_TOO_MANY)
+				new_tgts = DCC_TGTS_TOO_MANY;
+		}
+		DB_TGTS_RCD_SET(NEWER, new_tgts);
+	}
+
+	/* Mark the previous record to be deleted next time. */
+	DB_TGTS_RCD_SET(OLDER, 0);
+	/* Mark it dirty so that the need to delete it gets to the file. */
+	SET_FLUSH_RCD(&db_sts.rcd2, 1);
+
+	NEWER->srvr_id_auth = DCC_ID_COMP;
+	NEWER->fgs_num_cks &= ~(DB_RCD_FG_TRIM
+				| DB_RCD_FG_SUMRY
+				| DB_RCD_FG_DELAY);
+	/* use the newest timestamp */
+	if (dcc_ts_older_ts(&NEWER->ts, &OLDER->ts))
+		NEWER->ts = OLDER->ts;
+	SET_FLUSH_RCD(&db_sts.rcd, 1);
+
+	++comp_rcds;
+
+#undef NEWER
+#undef OLDER
+}
+
+
+
+/* write a parsed whitelist checksum */
+static int
+white_write(DCC_EMSG emsg, DCC_WF *wf,
+	    DCC_CK_TYPES type, DCC_SUM sum, DCC_TGTS tgts)
+{
+	DB_RCD rcd;
+	int rcd_len;
+	char buf[30];
+	DCC_FNM_LNO_BUF fnm_buf;
+
+	/* ignore checksums that clients are never supposed to send
+	 * to the server or for some other reason cannot be whitelisted */
+	switch (type) {
+	case DCC_CK_INVALID:
+	case DCC_CK_ENV_TO:
+	case DCC_CK_G_MSG_R_TOTAL:
+	case DCC_CK_G_TRIPLE_R_BULK:
+	case DCC_CK_SRVR_ID:
+		dcc_pemsg(EX_DATAERR, emsg,
+			  "%s checksum cannot be used%s",
+			  dcc_type2str_err(type, 0, 0, grey_on),
+			  wf_fnm_lno(&fnm_buf, wf));
+		return 0;
+
+	case DCC_CK_IP:
+	case DCC_CK_ENV_FROM:
+	case DCC_CK_FROM:
+	case DCC_CK_MESSAGE_ID:
+	case DCC_CK_RECEIVED:
+	case DCC_CK_SUB:
+	case DCC_CK_BODY:
+	case DCC_CK_FUZ1:
+	case DCC_CK_FUZ2:
+		break;			/* these are ok */
+	}
+
+	if (tgts == DCC_TGTS_OK_MX
+	    || tgts == DCC_TGTS_OK_MXDCC
+	    || tgts == DCC_TGTS_SUBMIT_CLIENT) {
+		dcc_pemsg(EX_DATAERR, emsg,"\"%s\" ignored%s",
+			  dcc_tgts2str(buf, sizeof(buf), tgts, 0),
+			  wf_fnm_lno(&fnm_buf, wf));
+		return 0;
+	}
+
+	/* Greylist whitelist entries cannot involve blacklisting.
+	 * They use DCC_TGTS_GREY_WHITE to signal whitelisting */
+	if (grey_on) {
+		/* ignore anything except whitelisting */
+		if (tgts != DCC_TGTS_OK) {
+			dcc_pemsg(EX_DATAERR, emsg, "\"%s\" ignored%s",
+				  dcc_tgts2str(buf, sizeof(buf), tgts, 0),
+				  wf_fnm_lno(&fnm_buf, wf));
+			return 0;
+		}
+		tgts = DCC_TGTS_GREY_WHITE;
+	}
+
+	memset(&rcd, 0, sizeof(rcd));
+	dcc_timeval2ts(&rcd.ts, &clean_start, 0);
+	rcd.srvr_id_auth = DCC_ID_WHITE;
+	DB_TGTS_RCD_SET(&rcd, tgts);
+
+	rcd.cks[0].type_fgs = DCC_CK_FLOD_PATH;
+	memcpy(rcd.cks[0].sum, &wf->lno, sizeof(wf->lno));
+	rcd.cks[0].sum[sizeof(wf->lno)] = wf->fno;
+
+	rcd.cks[1].type_fgs = type;
+	memcpy(rcd.cks[1].sum, sum, sizeof(rcd.cks[1]));
+
+	rcd_len = sizeof(rcd) - sizeof(rcd.cks) + 2*sizeof(rcd.cks[0]);
+	rcd.fgs_num_cks = 2;
+
+	if (!write_new_rcd(&rcd, rcd_len))
+		return -1;
+
+	++white_cks;
+	return 1;
+}
+
+
+
+/* Add the whitelist of certified non-spam and non-spammers
+ *	and otherwise start the database */
+static void
+parse_white(void)
+{
+	int white_fd;
+
+	white_cks = 0;
+
+	if (!keep_white) {
+		memset(&dbclean_white_tbl, 0,sizeof(dbclean_white_tbl));
+		dcc_wf_init(&dbclean_wf, 0);
+		fnm2rel_good(dbclean_wf.ascii_nm, WHITELIST_NM(grey_on), 0);
+		dbclean_wf.wtbl = &dbclean_white_tbl;
+		white_fd = open(dbclean_wf.ascii_nm, O_RDONLY, 0);
+		if (white_fd < 0) {
+			/* worry only if the file exists but can't be used */
+			if (errno != ENOENT) {
+				dcc_error_msg("open(%s): %s",
+					      dbclean_wf.ascii_nm, ERROR_STR());
+				keep_white = 1;
+			}
+		} else {
+			if (0 > dcc_parse_whitefile(0, &dbclean_wf, white_fd,
+						    white_write, 0))
+				keep_white = 1;
+			if (0 > close(white_fd))
+				dcc_error_msg("close(%s): %s",
+					      dbclean_wf.ascii_nm, ERROR_STR());
+		}
+	}
+	if (keep_white) {
+		/* If the whitelist was bad, purge the new database of
+		 * the bad new whitelist.  We will use the existing
+		 * whitelist */
+		write_new_flush(1);
+		new_db_csize = DB_PTR_BASE;
+		if (0 > ftruncate(new_db_fd, DB_PTR_BASE))
+			dcc_logbad(EX_IOERR, "truncate(%s, %d): %s",
+				   new_db_nm, DB_PTR_BASE, ERROR_STR());
+		new_db_fsize = DB_PTR_BASE;
+		white_cks = 0;
+	}
+
+	/* update the counts in the database file */
+	write_new_hdr(1);
+}
+
+
+
+/* check for conflicts in the whitelist file in the record pointed to
+ *	by db_sts.rcd */
+static void
+check_white(void)
+{
+	static int msgs;
+	static int prev_lno1, prev_lno2;
+	static int prev_fno1, prev_fno2;
+	const DB_RCD_CK *rcd_ck, *prev_ck;
+	int lno1, lno2;
+	int fno1, fno2;
+	DCC_TGTS tgts1, tgts2;
+	char tgts1_buf[30], tgts2_buf[30];
+	const char *fname1, *fname2;
+	DCC_CK_TYPES type;
+	DB_PTR prev;
+
+	/* don't check if we have already complained enough */
+	if (msgs > 20)
+		return;
+
+	rcd_ck = db_sts.rcd.d.r->cks;
+
+	/* it is pointless without line numbers, which are lacking only
+	 * if we saved the old whitelist entries because the file is
+	 * broken */
+	if (DB_NUM_CKS(db_sts.rcd.d.r) != 2
+	    || DB_CK_TYPE(rcd_ck) != DCC_CK_FLOD_PATH)
+		return;
+
+	/* conflict is impossible with a single line */
+	++rcd_ck;
+	prev = DB_PTR_EX(rcd_ck->prev);
+	if (prev == DB_PTR_NULL)
+		return;
+
+	type = DB_CK_TYPE(rcd_ck);
+	prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, type);
+	if (!prev_ck)
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+
+	tgts1 = DB_TGTS_RCD(db_sts.rcd2.d.r);
+	tgts2 = DB_TGTS_RCD(db_sts.rcd.d.r);
+	if (tgts1 == tgts2)
+		return;			/* no conflict */
+
+	memcpy(&lno1, db_sts.rcd2.d.r->cks[0].sum, sizeof(lno1));
+	fno1 = db_sts.rcd2.d.r->cks[0].sum[sizeof(lno1)];
+	memcpy(&lno2, db_sts.rcd.d.r->cks[0].sum, sizeof(lno2));
+	fno2 = db_sts.rcd.d.r->cks[0].sum[sizeof(lno2)];
+
+	if (lno1 == prev_lno1 && fno1 == prev_fno1
+	    && lno2 == prev_lno2 && fno2 == prev_fno2)
+		return;
+
+	fname1 = wf_fnm(&dbclean_wf, fno1);
+	fname2 = wf_fnm(&dbclean_wf, fno2);
+	if (fname1 == fname2) {
+		fname1 = "";
+	} else {
+		fname1 = path2fnm(fname1);
+	}
+	dcc_error_msg("\"%s\" in line %d%s%s conflicts with \"%s\""
+		      " in line %d of %s",
+		      dcc_tgts2str(tgts1_buf, sizeof(tgts1_buf),
+				   tgts1, grey_on),
+		      lno1,
+		      *fname1 != '\0' ? " of " : "", fname1,
+		      dcc_tgts2str(tgts2_buf, sizeof(tgts2_buf),
+				   tgts2, grey_on),
+		      lno2,
+		      fname2);
+	++msgs;
+	prev_lno1 = lno1;
+	prev_fno1 = fno1;
+	prev_lno2 = lno2;
+	prev_fno2 = fno2;
+}
+
+
+
+/* rebuild the hash table and the totals and links within the database file
+ *	finish with the file locked */
+static void
+build_hash(void)
+{
+	DB_PTR rcd_pos;
+	DB_HADDR haddr_window, haddr_lo, haddr_hi;
+	int pass, total_passes;
+	int rcd_len;
+	int rcd_cks, rcd_sums;
+	DB_PTR rcds, sums;		/* passes can inflate these */
+	const DB_RCD_CK *rcd_ck;
+	DB_HADDR guess_hash_len;
+	double db_rate, hash_ratio;
+	struct timeval db_flushed;
+
+	db_buf_init(new_db_pagesize, 0);
+
+	if (new_hash_len == 0) {
+		/* Try to choose a hash table size now so that when it
+		 * is next time to rebuild after 24 hours of incoming
+		 * checksums, the alpha or load factor will still be 0.9.
+		 * We probably ran 24 hours ago, so the old hash size
+		 * is an estimate of the size tomorrow. */
+
+		/* Guess the number of distinct checksums added
+		 * tomorrow based on the current average rate */
+		db_rate = db_add_rate(&new_db_parms, 1);
+		if (db_rate > 0.0) {
+			/* Increase the average rate by 10% to account
+			 * for the 30% decrease often seen on weekends. */
+			guess_hash_len = db_rate * 1.1 * 24*60*60;
+
+			/* predict # of distinct checksums in current data */
+			hash_ratio = old_db_parms.old_kept_cks;
+			if (hash_ratio == 0.0) {
+				hash_ratio = 1.0;
+			} else {
+				hash_ratio = (HADDR2LEN(old_db_parms
+							.old_hash_used)
+					      / hash_ratio);
+				if (hash_ratio > 1.0 || hash_ratio < 0.3)
+					hash_ratio = 1.0;
+			}
+			guess_hash_len += (kept_cks * hash_ratio) + white_cks;
+
+			if (db_debug)
+				quiet_trace_msg("hash size from old=%d"
+						"  %d from db_rate=%.1f"
+						" hash_ratio=%.1f=%d/%d"
+						" kept=%d white=%d",
+						old_db_hash_used,
+						guess_hash_len,
+						db_rate, hash_ratio,
+						HADDR2LEN(old_db_parms
+							.old_hash_used),
+						old_db_parms.old_kept_cks,
+						kept_cks, white_cks);
+
+		} else {
+			/* guess if we do not have a good measure
+			 * of the recent rate */
+			guess_hash_len = kept_cks+white_cks;
+			guess_hash_len += guess_hash_len/5;
+		}
+
+		new_hash_len = old_db_hash_used;
+		if (new_hash_len < guess_hash_len)
+			new_hash_len = guess_hash_len;
+
+		/* go for load factor 0.9 */
+		new_hash_len += new_hash_len/10;
+
+		if (new_hash_len > db_max_hash_entries)
+			quiet_trace_msg("default hash size %d entries"
+					" > maximum %d",
+					new_hash_len, db_max_hash_entries);
+
+		if (grey_on) {
+			if (new_hash_len < MIN_HASH_ENTRIES)
+				new_hash_len = MIN_HASH_ENTRIES;
+		} else {
+			if (new_hash_len < DEF_HASH_ENTRIES)
+				new_hash_len = DEF_HASH_ENTRIES;
+		}
+	}
+
+	/* Open and lock the new database */
+	unlink_whine(new_hash_nm, 1);
+	new_hash_created = 1;
+	if (!db_open(0, -1, new_db_nm, new_hash_len,
+		     DB_OPEN_LOCK_NOWAIT | db_mode)) {
+		dcc_logbad(dcc_ex_code, "could not start database %s",
+			   new_db_nm);
+	}
+	if (db_debug)
+		quiet_trace_msg("%s  %s", db_window_size_str, new_db_nm);
+
+	/* guess which checksums we will keep so that we can count them */
+	if (old_db_parms.nokeep_cks != 0)
+		db_parms.nokeep_cks = old_db_parms.nokeep_cks;
+
+	/* add every record in the database file to the hash table and
+	 * fix its accumulated counts and reverse links */
+	comp_rcds = 0;
+	sums = 0;
+	rcds = 0;
+	report_progress_init();
+	db_flushed = db_time;
+
+	/* if the hash table does not fit in 75% of RAM,
+	 * then make several passes over the data with as much of the
+	 * hash table as fits. */
+	haddr_window = db_hash_page_len*((db_buf_total*3)/4);
+	if (haddr_window < db_hash_len/16)
+		haddr_window = db_hash_len/16;
+	total_passes = (db_hash_len+haddr_window-1)/haddr_window;
+
+	for (haddr_lo = 0, pass = 1;
+	     haddr_lo < db_hash_len;
+	     haddr_lo = haddr_hi, ++pass) {
+		if (haddr_lo > db_hash_len-haddr_window)
+			haddr_hi = MAX_HASH_ENTRIES;
+		else
+			haddr_hi = haddr_lo+haddr_window;
+		for (rcd_pos = DB_PTR_BASE;
+		     rcd_pos < db_csize;
+		     rcd_pos += rcd_len) {
+			/* skip reports crossing page bounardies */
+			if (rcd_pos%db_pagesize > db_page_max) {
+				rcd_len = DB_RCD_HDR_LEN;
+				continue;
+			}
+			if (--progress_rpt_cnt <= 0) {
+				report_progress(0, "  hash rebuilt",
+						"checksums",
+						sums/total_passes, kept_cks, 1);
+				if (db_time.tv_sec != db_flushed.tv_sec) {
+					db_flushed = db_time;
+					if (!db_flush_db(dcc_emsg))
+					    dcc_logbad(dcc_ex_code,
+						       "flushing after linking"
+						       L_HPAT": %s",
+						       rcd_pos, dcc_emsg);
+				}
+			}
+
+			if (!db_map_rcd(0, &db_sts.rcd, rcd_pos, &rcd_len)) {
+				dcc_logbad(dcc_ex_code,
+					   "hash build failed reading"
+					   " record at "L_HPAT,
+					   rcd_pos);
+			}
+
+			/* skip end of page padding */
+			if (db_sts.rcd.d.r->fgs_num_cks == 0)
+				continue;
+
+			++rcds;
+
+			/* count the checksums we'll link in this record */
+			rcd_cks = DB_NUM_CKS(db_sts.rcd.d.r);
+			rcd_sums = 0;
+			for (rcd_ck = db_sts.rcd.d.r->cks;
+			     rcd_ck < &db_sts.rcd.d.r->cks[rcd_cks];
+			     ++rcd_ck) {
+				if (!DB_TEST_NOKEEP(db_parms.nokeep_cks,
+						    DB_CK_TYPE(rcd_ck)))
+					++rcd_sums;
+			}
+			sums += rcd_sums;
+
+			/* Mark the record dirty so that any new hash links
+			 * get to the file if we are using -F. */
+			db_set_flush(&db_sts.rcd, 0, rcd_len);
+			if (!db_link_rcd(dcc_emsg, haddr_lo, haddr_hi)) {
+				dcc_logbad(dcc_ex_code,
+					   "relinking record at "L_HPAT": %s",
+					   rcd_pos, dcc_emsg);
+			}
+
+			/* check for conflicts in the whitelist file */
+			if (DB_RCD_ID(db_sts.rcd.d.r) == DCC_ID_WHITE)
+				check_white();
+
+			compress_old();
+		}
+
+		if (progress_rpt_started && pass < total_passes)
+			quiet_trace_msg("    pass %d", pass);
+	}
+
+	report_progress(1, "  hash rebuilt", "checksums",
+			sums/total_passes, kept_cks, 1);
+
+	db_parms.old_hash_used = db_hash_used;
+	db_parms.old_kept_cks = kept_cks;
+	db_parms.hash_used = db_hash_used;
+	db_parms.old_db_csize = db_csize;
+	if (!db_flush_parms(dcc_emsg))
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+
+	quiet_trace_msg("hashed "L_DPAT" records containing "L_DPAT" checksums,"
+			" compressed %d records",
+			rcds/total_passes, sums/total_passes, comp_rcds);
+
+	/* Try to finish as much disk I/O on the new file as we can to minimize
+	 * stalling by dccd when we close the file and hand it over.  This also
+	 * reduces system stalling hours later when dbclean runs again. */
+	if (!make_clean(1))
+		dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
+
+
+	quiet_trace_msg("%d hash entries total, %d or %d%% used",
+			HADDR2LEN(db_hash_len),
+			HADDR2LEN(db_hash_used),
+			(int)((HADDR2LEN(db_hash_used)*100.0)
+			      / HADDR2LEN(db_hash_len)));
+}
+
+
+
+static u_char
+write_new_db(const void *buf, int buflen, off_t pos, u_char fatal)
+{
+	int i;
+
+	if (pos != lseek(new_db_fd, pos, SEEK_SET)) {
+		if (fatal) {
+			dcc_logbad(EX_IOERR, "lseek(%s, 0): %s",
+				   new_db_nm, ERROR_STR());
+		} else {
+			dcc_error_msg("lseek(%s, 0): %s",
+				      new_db_nm, ERROR_STR());
+		}
+		return 0;
+	}
+
+	i = write(new_db_fd, buf, buflen);
+	if (i == buflen) {
+		if (new_db_fsize < pos+buflen)
+			new_db_fsize = pos+buflen;
+		return 1;
+	}
+
+	if (fatal) {
+		if (i < 0)
+			dcc_logbad(EX_IOERR, "write(%s): %s",
+				   new_db_nm, ERROR_STR());
+		else
+			dcc_logbad(EX_IOERR, "write(%s)=%d instead of %d",
+				   new_db_nm, i, buflen);
+	} else {
+		if (i < 0)
+			dcc_error_msg("write(%s): %s",
+				      new_db_nm, ERROR_STR());
+		else
+			dcc_error_msg("write(%s)=%d instead of %d",
+				      new_db_nm, i, buflen);
+	}
+	return 0;
+}
+
+
+
+/* use a large buffer to encourage the file system to avoid fragmentation */
+static union {
+    u_char  c[DB_MIN_MIN_MBYTE*(1024*1024)/4];
+    DB_HDR  hdr;
+} write_new_db_buf;
+static u_int write_new_db_buflen = 0;
+static DB_PTR write_new_base;
+
+static u_char
+write_new_flush(u_char fatal)
+{
+	u_char result = 1;
+
+	if (write_new_db_buflen != 0) {
+		if (!write_new_db(&write_new_db_buf, write_new_db_buflen,
+				  write_new_base, fatal))
+			result = 0;
+	}
+
+	write_new_base = new_db_csize;
+	write_new_db_buflen = 0;
+	return result;
+}
+
+
+static u_char
+write_new_buf(const void *buf, int buflen)
+{
+	if (write_new_db_buflen + buflen > ISZ(write_new_db_buf)
+	    && !write_new_flush(0))
+		return 0;
+
+	memcpy(&write_new_db_buf.c[write_new_db_buflen], buf, buflen);
+	write_new_db_buflen += buflen;
+	return 1;
+}
+
+
+
+/* add a record to the new file */
+static u_char
+write_new_rcd(const void *buf, int buflen)
+{
+	static const u_char zeros[DB_RCD_LEN_MAX] = {0};
+	DB_PTR new_page_num;
+	u_char result;
+	int pad, i;
+
+	/* pad accross page boundaries */
+	new_page_num = DB_PTR2PG_NUM(new_db_csize + buflen, new_db_pagesize);
+	if (new_page_num != DB_PTR2PG_NUM(new_db_csize, new_db_pagesize)) {
+		pad = new_page_num*new_db_pagesize - new_db_csize;
+		pad = (((pad + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN)
+		       * DB_RCD_HDR_LEN);
+		do {
+			i = sizeof(zeros);
+			if (i > pad)
+				i = pad;
+			if (!write_new_buf(zeros, i))
+				return 0;
+			pad -= i;
+			new_db_csize += i;
+		} while (pad != 0);
+	}
+
+	result = write_new_buf(buf, buflen);
+	new_db_csize += buflen;
+	return result;
+}
+
+
+
+/* write the magic string at the head of the database file */
+static void
+write_new_hdr(u_char emptied)
+{
+	DB_HDR *new;
+	struct timeval old_sn;
+	time_t new_rate_secs;
+	DCC_CK_TYPES type;
+	int i;
+
+	write_new_flush(1);
+
+	memset(&write_new_db_buf, 0, sizeof(write_new_db_buf));
+	write_new_base = 0;
+	if (new_db_fsize > ISZ(DB_HDR)
+	    || new_db_pagesize == 0) {
+		write_new_db_buflen = sizeof(DB_HDR);
+	} else {
+		write_new_db_buflen = new_db_pagesize;
+		if (write_new_db_buflen > ISZ(write_new_db_buf))
+			write_new_db_buflen = ISZ(write_new_db_buf);
+	}
+
+	new = &write_new_db_buf.hdr;
+	memset(new, 0, sizeof(*new));
+	memcpy(new->p.version, db_version_buf, sizeof(new->p.version));
+
+	dcc_timeval2ts(&new->p.sn, &clean_start, 0);
+	if (emptied) {
+		new->p.cleared = clean_start.tv_sec;
+	} else {
+		new->p.cleared = old_db_parms.cleared;
+		switch (clean_mode) {
+		case NORMAL_MODE:
+			new->p.cleaned = clean_start.tv_sec;
+			new->p.cleaned_cron = clean_start.tv_sec;
+			break;
+		case NO_CRON_MODE:
+			new->p.cleaned = clean_start.tv_sec;
+			new->p.cleaned_cron = old_db_parms.cleaned_cron;
+			break;
+		case REPAIR_MODE:
+		case QUICK_MODE:
+		case HASH_MODE:
+		case DEL_MODE:
+			new->p.cleaned = old_db_parms.cleaned;
+			new->p.cleaned_cron = old_db_parms.cleaned_cron;
+			break;
+		}
+	}
+
+	if (grey_on)
+		new->p.flags |= DB_PARM_FG_GREY;
+	if (emptied || (old_db_parms.flags & DB_PARM_FG_CLEARED))
+		new->p.flags |= DB_PARM_FG_CLEARED;
+	if (have_expire_parms > 0
+	    || (have_expire_parms < 0
+		&& (old_db_parms.flags & DB_PARM_EXP_SET)))
+		new->p.flags |= DB_PARM_EXP_SET;
+
+	new->p.nokeep_cks = (emptied || old_db_parms.nokeep_cks == 0
+			     ? def_nokeep_cks()
+			     : old_db_parms.nokeep_cks);
+
+	new->p.pagesize = new_db_pagesize;
+	new->p.db_csize = new_db_csize;
+
+	/* update the traffic counts */
+	if (!emptied
+	    && old_db_parms.db_csize != 0
+	    && old_db_parms.db_csize >= old_db_parms.old_db_csize
+	    && old_db_parms.hash_used != 0
+	    && old_db_parms.hash_used >= old_db_parms.old_hash_used) {
+		if (old_db_parms.rate_secs > 0
+		    && old_db_parms.rate_secs <= DB_MAX_RATE_SECS) {
+			new->p.rate_secs = old_db_parms.rate_secs;
+			new->p.db_added = old_db_parms.db_added;
+			new->p.hash_added = old_db_parms.hash_added;
+		}
+		new->p.last_rate_sec = clean_start.tv_sec;
+		dcc_ts2timeval(&old_sn, &old_db_parms.sn);
+		new_rate_secs = clean_start.tv_sec - old_sn.tv_sec;
+		if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS) {
+			new_rate_secs += new->p.rate_secs;
+			if (new_rate_secs > DB_MAX_RATE_SECS) {
+				double trim, new_val;
+				trim = DB_MAX_RATE_SECS;
+				trim /= new_rate_secs;
+
+				new_val = new->p.db_added;
+				new_val *= trim;
+				new->p.db_added = new_val;
+
+				new_val = new->p.hash_added;
+				new_val *= trim;
+				new->p.hash_added = new_val;
+
+				new_rate_secs = DB_MAX_RATE_SECS;
+			}
+			new->p.db_added += (old_db_parms.db_csize
+					    - old_db_parms.old_db_csize);
+			new->p.hash_added += (old_db_parms.hash_used
+					      - old_db_parms.old_hash_used);
+			new->p.rate_secs = new_rate_secs;
+		}
+	}
+
+	for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
+		if (new_ex_secs[type].all != 0) {
+			new->p.ex_secs[type].all = new_ex_secs[type].all;
+			new->p.ex_secs[type].spam = new_ex_secs[type].spam;
+			new->p.ex_all[type] = new_ex_ts[type].all;
+			new->p.ex_spam[type] = new_ex_ts[type].spam;
+		} else {
+			new->p.ex_secs[type].all = def_expire_secs;
+			new->p.ex_secs[type].spam = (DCC_CK_LONG_TERM(type)
+						     ? def_expire_spamsecs
+						     : def_expire_secs);
+		}
+	}
+
+	new_db_parms = new->p;
+
+	for (;;) {
+		write_new_flush(1);
+
+		/* ensure that the last page of the file is complete */
+		if (new_db_pagesize == 0)
+			break;
+		i = new_db_fsize % new_db_pagesize;
+		if (i == 0)
+			break;
+		write_new_db_buflen = new_db_pagesize - i;
+		if (write_new_db_buflen > ISZ(write_new_db_buf))
+			write_new_db_buflen = ISZ(write_new_db_buf);
+		memset(&write_new_db_buf, 0, write_new_db_buflen);
+		write_new_base = new_db_fsize;
+	}
+}
+
+
+
+static void
+unlink_whine(const char *nm, u_char enoent_ok)
+{
+	if (0 > unlink(nm)
+	    && (!enoent_ok || errno != ENOENT))
+		dcc_error_msg("unlink(%s): %s", nm, ERROR_STR());
+}
+
+
+
+static void
+rename_bail(const char *from, const char *to)
+{
+	if (0 > rename(from, to))
+		dcc_logbad(EX_IOERR, "rename(%s, %s): %s",
+			   from, to, ERROR_STR());
+}
+
+
+
+/* try for a long time or until the server hears */
+static u_char				/* 1=ok, 0=failed */
+persist_aop(DCC_AOPS aop, u_int32_t val1,
+	    int secs)			/* try for this long */
+{
+	return dcc_aop_persist(dcc_emsg, ctxt,
+			       grey_on ? DCC_CLNT_FG_GREY : 0,
+			       db_debug != 0,
+			       aop, val1, secs, &aop_resp);
+}
+
+
+
+/* tell the daemon to switch to the new database */
+static void
+dccd_new_db(const char *msg)
+{
+	/* Send a round of NOPs and ask about status to ensure the server
+	 * has dealt with requests that arrived while we had the database
+	 * locked and otherwise caught up.  We want to try to ensure that
+	 * the server is listening when we re-open the database so that
+	 * it does not leave flooding off.
+	 * On some systems with lame mmap() support including BSD/OS, the
+	 * the daemon can stall for minutes in close().  If that or something
+	 * else makes the daemon stall, this can appear to fail. */
+	if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_LIST, RESTART_DELAY))
+		dcc_error_msg("%s: %s; continuing", msg, dcc_emsg);
+
+	dccd_unlocked = 0;
+	if (!persist_aop(DCC_AOP_DB_NEW, 0, RESTART_DELAY)) {
+		/* This cannot be a fatal error,
+		 * lest we leave the database broken */
+		dcc_error_msg("%s: %s; continuing", msg, dcc_emsg);
+	}
+}
+
+
+
+static void
+finish(void)
+{
+	int bailing = 0;
+
+	/* delete the new files */
+#ifndef DCC_DBCLEAN_KEEP_NEW			/* for debugging */
+	if (new_db_created) {
+		unlink_whine(new_db_nm, 0);
+		new_db_created = 0;
+		bailing = -1;
+	}
+	/* we don't really know if the new hash file was created,
+	 * so don't worry about problems */
+	if (new_hash_created) {
+		unlink_whine(new_hash_nm, 1);
+		new_hash_created = 0;
+		bailing = -1;
+	}
+#endif
+	if (cur_db_created) {
+		unlink_whine(cur_db_nm, 0);
+		unlink_whine(cur_hash_nm, 1);
+		cur_db_created = 0;
+		bailing = -1;
+	}
+
+	if (new_db_fd >= 0) {
+		if (0 > close(new_db_fd))
+			dcc_error_msg("close(%s): %s",
+				      new_db_nm, ERROR_STR());
+		new_db_fd = -1;
+	}
+	if (old_db_fd >= 0) {
+		/* In most cases nothing cares about the old database now.
+		 * We often have kept the old database open and locked until
+		 * now.  Delete it unless we are debugging */
+		if (db_debug < 4 && exit_value == EX_OK) {
+			unlink_whine(old_db_nm, 0);
+		} else {
+			/* Push it to the disk so it won't lurk in the buffer
+			 * cache or elsewhere to slow a system reboot */
+			if (exit_value == EX_OK
+			    && 0 > fsync(old_db_fd))
+				dcc_error_msg("fsync(%s): %s",
+					      old_db_nm, ERROR_STR());
+		}
+		if (0 > close(old_db_fd))
+			dcc_error_msg("close(%s): %s",
+				      old_db_nm, ERROR_STR());
+		old_db_fd = -1;
+	}
+	flod_unmap(0, 0);
+
+	/* release the daemon, but if the database is still open, it's bad */
+	db_close(bailing);
+	/* tell the daemon to switch databases */
+	if (dccd_unlocked)
+		dccd_new_db("finish");
+
+	while (flods_off > 0) {
+		--flods_off;
+		if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME,
+				 RESTART_DELAY))
+			dcc_error_msg("%s", dcc_emsg);
+	}
+
+	unlock_dbclean();
+}
+
+
+
+static void NRATTRIB
+exit_dbclean(int v)
+{
+	exit(exit_value = v);
+}
+
+
+
+/* terminate with a signal */
+static void NRATTRIB
+sigterm(int s)
+{
+	dcc_error_msg("interrupted by signal %d", s);
+	exit_dbclean(s+100);
+}
author	Peter Gervai <grin@grin.hu>
date	Tue, 10 Mar 2009 13:49:58 +0100
parents
children