Mercurial > notdcc
diff dbclean/dbclean.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dbclean/dbclean.c Tue Mar 10 13:49:58 2009 +0100 @@ -0,0 +1,2789 @@ +/* Distributed Clearinghouse Checksum database cleaner + * + * Copyright (c) 2008 by Rhyolite Software, LLC + * + * This agreement is not applicable to any entity which sells anti-spam + * solutions to others or provides an anti-spam solution as part of a + * security solution sold to other entities, or to a private network + * which employs the DCC or uses data provided by operation of the DCC + * but does not provide corresponding data to other users. + * + * Permission to use, copy, modify, and distribute this software without + * changes for any purpose with or without fee is hereby granted, provided + * that the above copyright notice and this permission notice appear in all + * copies and any distributed versions or copies are either unchanged + * or not called anything similar to "DCC" or "Distributed Checksum + * Clearinghouse". + * + * Parties not eligible to receive a license under this agreement can + * obtain a commercial license to use DCC by contacting Rhyolite Software + * at sales@rhyolite.com. + * + * A commercial license would be for Distributed Checksum and Reputation + * Clearinghouse software. That software includes additional features. This + * free license for Distributed ChecksumClearinghouse Software does not in any + * way grant permision to use Distributed Checksum and Reputation Clearinghouse + * software + * + * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL + * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC + * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES + * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, + * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, + * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + * + * Rhyolite Software DCC 1.3.103-1.254 $Revision$ + */ + +#include "srvr_defs.h" +#include "dcc_ck.h" +#include <signal.h> + +static DCC_EMSG dcc_emsg; + +static DCC_WF dbclean_wf; +static DCC_WHITE_TBL dbclean_white_tbl; +static DCC_CLNT_CTXT *ctxt; +static DCC_OP_RESP aop_resp; +static int flods_off; +static int dccd_unlocked; /* dccd has been told to unlock */ + +static DCC_SRVR_NM srvr = DCC_SRVR_NM_DEF; +static DCC_CLNT_ID srvr_clnt_id = DCC_ID_INVALID; +static const ID_TBL *srvr_clnt_tbl; +static u_char info_flags = 0; +#ifdef USE_DBCLEAN_F +static u_char db_mode = DB_OPEN_MMAP_WRITE; +#else +static u_char db_mode = DB_OPEN_MMAP_WRITE_NOSYNC; +#endif + +static u_char cleardb; /* 1=clear the database */ +static enum { + NORMAL_MODE, + REPAIR_MODE, /* database broken */ + QUICK_MODE, /* too big for window */ + HASH_MODE, /* hash table full */ + NO_CRON_MODE, /* work around missing cron job */ + DEL_MODE /* after deletion */ +} clean_mode = NORMAL_MODE; +static u_char standalone; /* 1=don't talk to dccd */ +static u_char keep_white; /* 1=do not rebuild whitelist */ + +static int exit_value = -1; + +static const char *homedir; +static u_char cur_db_created; +static const char *cur_db_nm_str = DB_DCC_NAME; +static DCC_PATH cur_db_nm; +static DCC_PATH cur_hash_nm; +static int old_db_fd = -1; +static DB_HADDR old_db_hash_used; +static DB_PARMS old_db_parms; +static DB_PARMS new_db_parms; +static DB_PTR old_db_pos, new_db_csize; +static off_t new_db_fsize; +static u_int new_db_pagesize; +static FLOD_MMAPS new_flod_mmaps; +static u_char adj_delay_pos; +static u_char new_db_created; +static DCC_PATH new_db_nm; +static int new_db_fd = -1; +static u_char new_hash_created; +static DCC_PATH new_hash_nm; +static DCC_PATH old_db_nm; + +static int expire_secs = -1; +static int def_expire_secs = DB_EXPIRE_SECS_DEF; +static int expire_spamsecs = -1; +static int def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF; +static int have_expire_parms = 0; +static double def_exp_ratio = 0.0; +static DB_EX_SECS new_ex_secs; +static DB_EX_TS new_ex_ts; + +static DB_HADDR new_hash_len; + +static int expired_rcds, comp_rcds, obs_rcds, expired_cks; +static int white_cks, kept_cks; + +static DCC_TS future_ts; + +#define RESTART_DELAY (60*5) +#define SHORT_DELAY 30 + +static struct timeval clean_start; + +static struct timeval progress_rpt_last; /* when previous progress report */ +static struct timeval progress_rpt_checked; /* when last checked */ +static struct timeval progress_rpt_start; /* start of progress reporting */ +#define REPORT_INTERVAL_SECS (5*60) +#define REPORT_INTERVAL_FAST_SECS 10 +#define UNLOCK_INTERVAL_USECS (DCC_US/2) +static int progress_rpt_cnt; /* operations until next check */ +static int progress_rpt_base; +static u_char progress_rpt_started; /* 1=have started reporting progress */ +static int progress_rpt_percent; + +static u_char write_new_flush(u_char); +static u_char write_new_rcd(const void *, int); +static void write_new_hdr(u_char); +static void unlink_whine(const char *, u_char); +static void rename_bail(const char *, const char *); +static u_char expire(DB_PTR); +static u_char copy_db(void); +static u_char catchup(DCC_EMSG); +static void parse_white(void); +static void build_hash(void); +static u_char persist_aop(DCC_AOPS, u_int32_t, int); +static void dccd_new_db(const char *); +static void finish(void); +static void exit_dbclean(int) NRATTRIB; +static void sigterm(int); + + +static void +usage(u_char die) +{ + const char str[] = { + "usage: [-64dfFNPSVq] [-i id]" + " [-a [server-addr][,server-port]] [-h homedir]\n" + " [-G on] [-R mode] [-s hash-size] [-e seconds]" + " [-E spamsecs]\n" + " [-L ltype,facility.level]"}; + static u_char complained; + + /* its important to try to run, so don't give up unless necessary */ + if (die) { + dcc_logbad(EX_USAGE, complained ? "giving up" : str); + } else if (!complained) { + dcc_error_msg("%s\ncontinuing", str); + complained = 1; + } +} + + +int NRATTRIB +main(int argc, char **argv) +{ + char hostname[DCC_MAXDOMAINLEN]; + u_char print_version = 0; + struct stat cur_db_sb; + u_int tgt_db_pagesize; + const char *cp; + char *p; + u_long l; + int i; + + gettimeofday(&db_time, 0); + clean_start = db_time; + + dcc_timeval2ts(&future_ts, &clean_start, 24*60*60); + + dcc_syslog_init(1, argv[0], 0); + + /* this must match DBCLEAN_GETOPTS in cron-dccd.in */ + while ((i = getopt(argc, argv, "64dfFNPSVqi:a:h:G:R:s:e:E:L:")) != -1) { + switch (i) { + case '6': +#ifndef NO_IPV6 + info_flags = DCC_INFO_FG_IPV6; +#endif + break; + case '4': + info_flags = 0; + break; + + case 'd': + if (db_debug++) + ++dcc_clnt_debug; + break; + + case 'f': + db_mode &= ~DB_OPEN_MMAP_WRITE; + break; + + + case 'F': + db_mode |= DB_OPEN_MMAP_WRITE; + break; + + case 'N': /* make a new, clear database */ + cleardb = 1; + standalone = 1; + break; + + case 'P': + if (have_expire_parms > 0) + dcc_logbad(EX_USAGE, + "do not use -P with -e or -E"); + have_expire_parms = -1; + break; + + case 'S': + standalone = 1; + break; + + case 'V': + fprintf(stderr, DCC_VERSION"\n"); + print_version = 1; + break; + + case 'q': + trace_quiet = 1; + break; + + case 'i': + l = strtoul(optarg, &p, 10); + if (*p != '\0' + || l < DCC_SRVR_ID_MIN + || l > DCC_SRVR_ID_MAX) + dcc_logbad(EX_USAGE, "invalid DCC ID \"-i %s\"", + optarg); + srvr_clnt_id = l; + break; + + case 'a': + cp = dcc_parse_nm_port(dcc_emsg, optarg, srvr.port, + hostname, sizeof(hostname), + &srvr.port, 0, 0, 0, 0); + if (!cp) { + dcc_error_msg("%s", dcc_emsg); + break; + } + cp += strspn(cp, DCC_WHITESPACE); + if (*cp != '\0') { + dcc_error_msg("unrecognized port number in" + "\"-a %s\"", optarg); + break; + } + if (hostname[0] == '\0') + strcpy(srvr.hostname, DCC_SRVR_NM_DEF_HOST); + else + BUFCPY(srvr.hostname, hostname); + break; + + case 'h': + homedir = optarg; + break; + + case 'G': + dcc_syslog_init(1, argv[0], " grey"); + if (have_expire_parms > 0) + dcc_logbad(EX_USAGE, + "do not use -G with -e or -E"); + if (strcasecmp(optarg, "on")) + usage(0); /* be generous and allow -Gasdf */ + grey_on = 1; + have_expire_parms = -1; + cur_db_nm_str = DB_GREY_NAME; + break; + + case 'R': + if (!strcasecmp(optarg, "bad")) + clean_mode = REPAIR_MODE; + else if (!strcasecmp(optarg, "quick")) + clean_mode = QUICK_MODE; + else if (!strcasecmp(optarg, "hash")) + clean_mode = HASH_MODE; + else if (!strcasecmp(optarg, "cron")) + clean_mode = NO_CRON_MODE; + else if (!strcasecmp(optarg, "del")) + clean_mode = DEL_MODE; + else + dcc_logbad(EX_USAGE, + "unrecognized repair mode -R %s", + optarg); + break; + + case 's': /* hash table size in entries */ + new_hash_len = strtoul(optarg, &p, 0); + if (*p != '\0' + || new_hash_len < MIN_HASH_ENTRIES + || new_hash_len > MAX_HASH_ENTRIES) + dcc_logbad(EX_USAGE, + "invalid database size \"%s\"", + optarg); + break; + + case 'e': /* expiration for non-bulk checksums */ + if (grey_on) + dcc_logbad(EX_USAGE, + "do not use -e with -G"); + if (have_expire_parms < 0) + dcc_logbad(EX_USAGE, + "-e cannot be used with -P"); + have_expire_parms = 1; + expire_secs = dcc_get_secs(optarg, 0, + DB_EXPIRE_SECS_MIN, + DB_EXPIRE_SECS_MAX, -1); + if (expire_secs < 0) + dcc_logbad(EX_USAGE, + "invalid expiration seconds" + " \"-e %s\"", + optarg); + break; + + case 'E': /* expiration for bulk checksums */ + if (grey_on) + dcc_logbad(EX_USAGE, + "do not use -E with -G"); + if (have_expire_parms < 0) + dcc_logbad(EX_USAGE, + "do not use -E with -P"); + have_expire_parms = 1; + expire_spamsecs = dcc_get_secs(optarg, 0, + DB_EXPIRE_SECS_MIN, + DB_EXPIRE_SECS_MAX, -1); + if (expire_spamsecs < 0) + dcc_logbad(EX_USAGE, + "invalid spam expiration seconds" + " \"-E %s\"", + optarg); + break; + + case 'L': + dcc_parse_log_opt(optarg); + break; + + default: + usage(0); + } + } + argc -= optind; + argv += optind; + if (argc != 0) + usage(1); + + if (srvr_clnt_id == DCC_ID_INVALID && !standalone) { + if (print_version) + exit(EX_OK); + usage(1); + } + srvr.clnt_id = srvr_clnt_id; + + if (srvr.port == 0) + srvr.port = DCC_GREY2PORT(grey_on); + + dcc_clnt_unthread_init(); + /* move to the target directory + * and set homedir for fnm2rel_good() */ + if (!dcc_cdhome(dcc_emsg, homedir, 0)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + fnm2rel_good(cur_db_nm, cur_db_nm_str, 0); + cp = ""; + switch (clean_mode) { + case NORMAL_MODE: cp = "cleaning"; break; + case REPAIR_MODE: cp = "repairing"; break; + case QUICK_MODE: cp = "quick cleaning"; break; + case HASH_MODE: cp = "expanding hash table in"; break; + case NO_CRON_MODE: cp = "work around missing cron job for"; break; + case DEL_MODE: cp = "clean up deletion in"; break; + } + quiet_trace_msg(DCC_VERSION" %s %s", cp, fnm2abs_err(0, cur_db_nm)); + + atexit(finish); + signal(SIGHUP, sigterm); + signal(SIGTERM, sigterm); + signal(SIGINT, sigterm); +#ifdef SIGXFSZ + signal(SIGXFSZ, SIG_IGN); +#endif + + if (!standalone) { + i = load_ids(dcc_emsg, srvr_clnt_id, &srvr_clnt_tbl, 1); + if (i <= 0) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + memcpy(srvr.passwd, srvr_clnt_tbl->cur_passwd, + sizeof(srvr.passwd)); + } + + fnm2rel_good(cur_hash_nm, cur_db_nm, DB_HASH_SUFFIX); + fnm2rel_good(old_db_nm, cur_db_nm, "-old"); + fnm2rel_good(new_db_nm, cur_db_nm, "-new"); + fnm2rel_good(new_hash_nm, new_db_nm, DB_HASH_SUFFIX); + + /* exclude other instances of this program */ + if (!lock_dbclean(dcc_emsg, cur_db_nm)) + dcc_logbad(dcc_ex_code, "%s: dbclean already running?", + dcc_emsg); + + /* create & the lock new database file */ + new_db_fd = dcc_lock_open(dcc_emsg, new_db_nm, O_RDWR|O_CREAT, + DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0); + if (new_db_fd == -1) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + if (0 > ftruncate(new_db_fd, 0)) + dcc_logbad(EX_IOERR, "truncate(%s,0): %s", + new_db_nm, ERROR_STR()); + new_db_fsize = 0; + new_db_created = 1; + new_db_csize = DB_PTR_BASE; + + tgt_db_pagesize = 0; + if (0 > stat(cur_db_nm, &cur_db_sb)) { + if (errno != ENOENT) + dcc_logbad(EX_IOERR, "stat(%s): %s", + cur_db_nm, ERROR_STR()); + /* empty a missing database */ + cleardb = 1; + } else if (cur_db_sb.st_size == 0) { + /* empty an empty database */ + cleardb = 1; + } else if (grey_on && cur_db_sb.st_size < DB_MIN_MIN_MBYTE*1024*1024) { + /* Force a relatively large page size for typical tiny + * greylist databases. Try to use few mmap() pages */ + tgt_db_pagesize = cur_db_sb.st_size/4; + if (tgt_db_pagesize < MIN_HASH_ENTRIES*sizeof(HASH_ENTRY)) + tgt_db_pagesize = MIN_HASH_ENTRIES*sizeof(HASH_ENTRY); + } + new_db_pagesize = db_get_pagesize(0, tgt_db_pagesize); + write_new_hdr(1); + + + if (standalone) { + u_char busy; + + /* open and lock the current database to ensure + * the daemon is not running */ + old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm, O_RDWR, + DCC_LOCK_OPEN_NOWAIT, + DCC_LOCK_ALL_FILE, &busy); + if (busy) + dcc_logbad(EX_USAGE, "database %s in use: %s", + cur_db_nm, dcc_emsg); + if (cleardb + && stat(cur_db_nm, &cur_db_sb) >= 0) { + if (cur_db_sb.st_size != 0) + dcc_logbad(EX_USAGE, "%s already exists", + cur_db_nm); + cur_db_created = 1; + } + + /* create and lock the current database if it did not exist + * to ensure that the server daemon is not running */ + if (old_db_fd < 0) { + old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm, + O_RDWR|O_CREAT, + DCC_LOCK_OPEN_NOWAIT, + DCC_LOCK_ALL_FILE, 0); + if (old_db_fd < 0) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + cur_db_created = 1; + } + + } else { + /* Tell the daemon to start turning off the flooding + * so we can adjust its positions in the flood map file + * Try very hard to talk to it because releasing the database + * can cause some UNIX flavors to stall dccd. */ + DCC_CLNT_FGS clnt_fgs; + + clnt_fgs = DCC_CLNT_FG_SLOW; + if (grey_on) + clnt_fgs |= DCC_CLNT_FG_GREY; + ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr, + 0, clnt_fgs, info_flags); + /* try very hard to contact dccd */ + if (!ctxt) + ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr, + 0, clnt_fgs, info_flags); + if (!ctxt) + dcc_logbad(EX_DCC_RESTART, "initial contact: %s", + dcc_emsg); + + ++flods_off; + if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_SHUTDOWN, + SHORT_DELAY)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + } + + /* resolve whitelisted host names before locking the database */ + parse_white(); + + /* Tell the daemon to unlock the database between operations + * and insist it stop flooding. */ + if (!standalone) { + /* give the daemon a chance to stop pumping the floods */ + for (;;) { + if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK, + SHORT_DELAY)) + dcc_logbad(EX_UNAVAILABLE, "%s", dcc_emsg); + + i = flod_running(aop_resp.resp.val.string); + if (i < 0) + dcc_logbad(EX_PROTOCOL, + "%s: unrecognized \"%s\"", + dcc_aop2str(0, 0, + DCC_AOP_FLOD, + DCC_AOP_FLOD_CHECK), + aop_resp.resp.val.string); + if (i == 0) + break; + if (time(0) > clean_start.tv_sec+45) { + if (flods_off < 2) { + ++flods_off; + if (!persist_aop(DCC_AOP_FLOD, + DCC_AOP_FLOD_HALT, + SHORT_DELAY)) + dcc_logbad(dcc_ex_code, "%s", + dcc_emsg); + continue; + } + if (time(0) > clean_start.tv_sec+60) + dcc_logbad(EX_UNAVAILABLE, + "failed to stop floods: %s", + aop_resp.resp.val.string); + } + usleep(100*1000); + } + dccd_unlocked = 1; + if (!persist_aop(DCC_AOP_DB_CLEAN, 0, SHORT_DELAY)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + /* The daemon adds its own and removes our hold on flooding + * when we tell it to unlock the database after every + * operation. */ + --flods_off; + } + + if (cleardb) { + quiet_trace_msg(DCC_VERSION" %s database %s", + cur_db_created ? "creating" : "clearing", + cur_db_nm); + + } else if (clean_mode == REPAIR_MODE) { + dcc_error_msg("explicit repair of %s", cur_db_nm); + + } else { + if (!db_open(0, old_db_fd, cur_db_nm, 0, + DB_OPEN_RDONLY + | (standalone + ? DB_OPEN_LOCK_NOWAIT : DB_OPEN_LOCK_WAIT))) { + /* If the hash table is sick, check timestamps only + * as much as no hash table allows. + * Then rebuild the hash table. */ + clean_mode = REPAIR_MODE; + + } else { + if (db_debug) { + quiet_trace_msg("%s %s", + db_window_size_str, new_db_nm); + quiet_trace_msg("%d old hash entries total," + " %d or %d%% used", + HADDR2LEN(db_hash_len), + HADDR2LEN(db_hash_used), + (int)((HADDR2LEN(db_hash_used) + * 100.0) + /HADDR2LEN(db_hash_len))); + } + old_db_parms = db_parms; + old_db_hash_used = db_hash_used; + + /* save a handle on the old database to get + * reports that arrive while we expire it */ + old_db_fd = dup(db_fd); + if (old_db_fd < 0) + dcc_logbad(EX_OSERR, "dup(%s): %s", + cur_db_nm, ERROR_STR()); + + /* read old and create new database file */ + if (!expire(db_csize)) { + old_db_hash_used = 0; + clean_mode = REPAIR_MODE; + } + } + + if (clean_mode == REPAIR_MODE) + dcc_error_msg("repairing %s", cur_db_nm); + } + + /* if we are repairing the hash table (including now repairing + * after encountering problems while expiring), + * copy the current file with minimal expiring */ + if (clean_mode == REPAIR_MODE + && !cleardb + && !copy_db()) + exit_dbclean(EX_UNAVAILABLE); + build_hash(); + + /* Copy any records from the old file to the new file that were + * added to the old file while we were creating the new file. */ + if (!cleardb + && !catchup(dcc_emsg)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + + /* we have the new database locked + * + * preserve the current data file as "*-old" */ + rename_bail(cur_db_nm, old_db_nm); + + /* delete the current hash file, and install both new files */ + rename_bail(new_hash_nm, cur_hash_nm); + strcpy(new_hash_nm, cur_hash_nm); + new_hash_created = 0; + if (db_hash_fd >= 0) + strcpy(db_hash_nm, cur_hash_nm); + + rename_bail(new_db_nm, cur_db_nm); + strcpy(new_db_nm, cur_db_nm); + new_db_created = 0; + if (db_fd > 0) + strcpy(db_nm, cur_db_nm); + cur_db_created = 0; + + if (cleardb) { + flod_mmap_path_set(); + unlink_whine(flod_mmap_path, 1); + if (!db_close(1)) + exit_dbclean(EX_UNAVAILABLE); + exit_dbclean(EX_OK); + } + + /* if the daemon was not running, we're finished */ + if (standalone) { + /* install the flood positions if things are ok */ + if (flod_mmaps) { + memcpy(flod_mmaps, &new_flod_mmaps, + sizeof(new_flod_mmaps)); + flod_unmap(0, 0); + } + if (!db_close(1)) + exit_dbclean(EX_UNAVAILABLE); + exit_dbclean(EX_OK); + } + + /* tell the daemon to switch to the new database. This will leave + * the daemon stuck waiting for us to unlock the new database. */ + dccd_new_db("copy late arrivals"); + + /* install the flood positions if things are ok */ + if (flod_mmaps) { + memcpy(flod_mmaps, &new_flod_mmaps, + sizeof(new_flod_mmaps)); + flod_unmap(0, 0); + } + + /* Copy any records from the old file to the new file in the + * race to tell the daemon to switch to the new file. + * The new file is still locked from build_hash(). + * The daemon should be stuck waiting to open it in the + * DCC_AOP_DB_NEW request via the preceding dccd_new_db(). + * + * Since the daemon has switched and probably cannot go back, + * ignore any errors */ + catchup(0); + if (!db_close(1)) + exit_dbclean(EX_UNAVAILABLE); + + /* finish() will be called via exit() to tell the daemon to resume + * flooding if necessary. However, in the normal case, we removed + * all counts against flooding before calling dccd_new_db() */ + exit_dbclean(EX_OK); +} + + + +/* adjust output flood positions */ +static DB_PTR +adj_mmap(void) +{ + FLOD_MMAP *mp; + DB_PTR delta, new_pos; + + delta = new_db_csize - old_db_pos; + new_pos = 0; + for (mp = new_flod_mmaps.mmaps; + mp <= LAST(new_flod_mmaps.mmaps); + ++mp) { + /* do nothing to marks we have already adjusted */ + if (!(mp->flags & FLODMAP_FG_MARK)) + continue; + if (mp->confirm_pos > old_db_pos) { + /* note the next mark that will need adjusting + * but do not adjust it yet */ + if (new_pos == 0 + || new_pos > mp->confirm_pos) + new_pos = mp->confirm_pos; + } else { + /* adjust marks not past the current position */ + mp->confirm_pos += delta; + mp->flags &= ~FLODMAP_FG_MARK; + } + } + if (adj_delay_pos) { + if (new_flod_mmaps.delay_pos > old_db_pos) { + if (new_pos == 0 + || new_pos > new_flod_mmaps.delay_pos) + new_pos = new_flod_mmaps.delay_pos; + } else { + new_flod_mmaps.delay_pos += delta; + adj_delay_pos = 0; + } + } + + return new_pos; +} + + + +/* find a checksum + * Leave db_sts.rcd2 pointing at the record. */ +static u_char +get_ck(DB_RCD_CK **ckp, /* point this to the checksum */ + DCC_CK_TYPES type, const DCC_SUM sum) +{ + DB_FOUND db_result; + + /* We must lock the file to keep the daemon from changing the + * internal hash table links. */ + if (!DB_IS_LOCKED() + && 0 > db_lock()) + return 0; + + dcc_emsg[0] = '\0'; + db_result = db_lookup(dcc_emsg, type, sum, 0, MAX_HASH_ENTRIES, + &db_sts.hash, &db_sts.rcd2, ckp); + switch (db_result) { + case DB_FOUND_LATER: + case DB_FOUND_SYSERR: + dcc_error_msg("hash lookup for %s from "L_HPAT" = %d: %s", + DB_TYPE2STR(type), old_db_pos, db_result, + dcc_emsg); + break; + + case DB_FOUND_IT: + case DB_FOUND_EMPTY: + case DB_FOUND_CHAIN: + case DB_FOUND_INTRUDER: + return 1; + } + + return 0; +} + + + +/* check the leading report for not recent checksum + * on entry db_sts.rcd points to the record under consideration + * Leave db_sts.rcd2 pointing at the leading record. */ +static int /* -1=broken database 0=expire 1=keep */ +get_lead(DCC_CK_TYPES type, const DB_RCD_CK *rcd_ck) +{ + DB_RCD_CK *lead_ck; + DCC_TGTS rcd_tgts, lead_tgts; + + if (DCC_CK_IS_REP_CMN(grey_on, type)) { + /* do not keep reputations on systems without reputation code */ + return 0; + } + + if (!get_ck(&lead_ck, type, rcd_ck->sum)) + return -1; + + if (!lead_ck) { + dcc_error_msg("no leader for %s %s at "L_HPAT, + DB_TYPE2STR(type), + dcc_ck2str_err(type, rcd_ck->sum, 0), + old_db_pos); + return -1; + } + + /* We know the target checksum is not recent. Forget the target if + * both the target and the leader are ancient. The leader might not be + * the newest checksum, but it usually is. Note also that the target + * might be the leader. */ + if (dcc_ts_older_ts(&db_sts.rcd2.d.r->ts, &new_ex_ts[type].spam) + && dcc_ts_older_ts(&db_sts.rcd.d.r->ts, &new_ex_ts[type].spam)) + return 0; + + lead_tgts = DB_TGTS_CK(lead_ck); + rcd_tgts = DB_TGTS_CK(rcd_ck); + + /* We know either the leader or the target is not ancient. + * Keep the target if the leader's total is respectable. + * We might eventually compress the target. */ + return (lead_tgts >= db_tholds[type]); +} + + + +static void +report_progress_init(void) +{ + gettimeofday(&db_time, 0); + progress_rpt_start.tv_sec = db_time.tv_sec; + progress_rpt_checked = db_time; + progress_rpt_last = db_time; + progress_rpt_base = 100; + progress_rpt_cnt = progress_rpt_base; + progress_rpt_started = 0; +} + + + +static time_t /* us since last check */ +report_progress(u_char final, + const char *s1, const char *s2, + DB_PTR done, DB_PTR total, DB_PTR scale) +{ + time_t reported_us, checked_us, secs, interval; + double percent; + + if (!total) + percent = 100.0; + else + percent = (done*100.0)/total; + + gettimeofday(&db_time, 0); + checked_us = tv_diff2us(&db_time, &progress_rpt_checked); + progress_rpt_checked = db_time; + + /* Check frequently enough to report or unlock the database. + * Adjust the number of operations until the next check + * based on the time spent on the previous */ + if (checked_us > 0) + progress_rpt_base = ((progress_rpt_base * 0.5 * DCC_US + * min(REPORT_INTERVAL_FAST_SECS*DCC_US, + UNLOCK_INTERVAL_USECS)) + / checked_us); + else + progress_rpt_base = 100; + if (progress_rpt_base < 100) + progress_rpt_base = 100; + if (progress_rpt_base > 10*1000) + progress_rpt_base = 10*1000; + progress_rpt_cnt = progress_rpt_base; + + interval = ((db_debug > 1) + ? REPORT_INTERVAL_FAST_SECS + : REPORT_INTERVAL_SECS); + + /* try not to start reporting progress at the end */ + if (!progress_rpt_started + && (total*1.0 - done*1.0) / progress_rpt_base <= interval*1.0) + return checked_us; + + reported_us = tv_diff2us(&db_time, &progress_rpt_last); + if (reported_us >= interval * DCC_US + || (final && progress_rpt_percent != 100)) { + progress_rpt_started = 1; + progress_rpt_percent = percent; + secs = db_time.tv_sec - progress_rpt_start.tv_sec; + secs -= secs % interval; + progress_rpt_last.tv_sec = progress_rpt_start.tv_sec + secs; + if (db_debug > 1) + quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%" + " db_mmaps=%d hash=%d", + s1, done/scale, total/scale, + s2, progress_rpt_percent, + db_stats.db_mmaps, db_stats.hash_mmaps); + else + quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%", + s1, done/scale, total/scale, + s2, progress_rpt_percent); + } + + + if (clean_mode == QUICK_MODE + && !final) { + if (db_time.tv_sec > clean_start.tv_sec + 30*60) + dcc_logbad(EX_UNAVAILABLE, "quick cleaning too slow"); + } + + return checked_us; +} + + + +/* delete old, less fuzzy checksums in the new record */ +static void +fuzzy_obs(DB_RCD *new, DB_RCD_CK **end_ck) +{ + DB_RCD_CK *rcd_ck; + DCC_CK_TYPES type; + int len; + + rcd_ck = new->cks; + while (rcd_ck < *end_ck) { + type = DB_CK_TYPE(rcd_ck); + if (!dcc_ts_older_ts(&new->ts, &new_ex_ts[type].all)) { + ++rcd_ck; + continue; + } + + ++obs_rcds; + new->fgs_num_cks = (((new->fgs_num_cks - 1) + & ~DB_RCD_FG_DELAY) + | DB_RCD_FG_TRIM); + --*end_ck; + len = (char *)*end_ck - (char *)rcd_ck; + if (len == 0) + return; + memmove(rcd_ck, rcd_ck+1, len); + } +} + + + +static void +adj_def_expire(void) +{ + double new_dbsize, new_dbsize1, day_rate, db_ratio; + int spam_secs, secs; + struct timeval tv; + char new_dbsize_buf[20], csize_buf[20], old_csize_buf[20]; + char day_rate_buf[20]; + + /* do this only once */ + if (def_exp_ratio != 0.0) + return; + + /* Compute the ratio of size of the database 24 hours from now + * to the size of the window. Assume: + * - We will receive about the same number of reports in the next + * 24 hours as the last 24. This is a good assumption for + * weekdays, but as much as 30% wrong about weekends. + * - Dbclean will be run once per day at the current time. + * - The size of the database is a linear function of expiration + * duration. This is tenuous when the spam expiration duration + * is less than 1 day. + * Use the maximum of two guesses for tomorrow's database size. + * One guess is the current size, base on assuming that + * we will use roughly the same expiration durations and + * so the database will grow to about size it now has. + * The other guess uses the previous database size and the + * avarage data rate. It compensates for short term changes + * in the rate and for running dbclean more than once per day. */ + new_dbsize = db_parms.db_csize; + size2str(csize_buf, sizeof(csize_buf), new_dbsize, 1); + new_dbsize1 = db_parms.old_db_csize; + size2str(old_csize_buf, sizeof(old_csize_buf), new_dbsize1, 1); + day_rate = db_add_rate(&db_parms, 0); + if (day_rate >= 0.0) + day_rate *= (24*60*60); + size2str(day_rate_buf, sizeof(day_rate_buf), day_rate, 1); + + /* without information, be pessimistic and assume 1.4 GByte/day */ + if (day_rate <= 0.0 && !grey_on) + day_rate = 1.4*1024.0*1024.0*1024.0; + if (day_rate > 0.0) { + new_dbsize1 += day_rate; + if (new_dbsize < new_dbsize1) + new_dbsize = new_dbsize1; + } + + size2str(new_dbsize_buf, sizeof(new_dbsize_buf), new_dbsize, 1); + if (db_debug) + quiet_trace_msg("predict new_dbsize=%s from db_csize=%s" + " old_db_csize=%s rate=%s", + new_dbsize_buf, + csize_buf, old_csize_buf, day_rate_buf); + + /* Assume there will be 20% as many bytes used in the hash table + * as in the database */ + new_dbsize *= 1.2; + + /* we cannot adjust the defaults + * - 1st time dbclean run + * - if the previous run used a larger than default value + * - there is no need to reduce the default because the predicted + * maximum size is smaller than the target maximum + */ + spam_secs = db_parms.ex_secs[DCC_CK_FUZ2].spam; + if (spam_secs != 0 + && spam_secs <= DB_EXPIRE_SPAMSECS_DEF + && new_dbsize > db_max_byte + && (db_ratio = (db_max_byte / new_dbsize)) < 1.0) { + def_exp_ratio = (spam_secs * db_ratio) / DB_EXPIRE_SPAMSECS_DEF; + + /* change the two durations together and so with same errors */ + def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF * def_exp_ratio; + def_expire_secs = DB_EXPIRE_SECS_DEF * def_exp_ratio; + + def_expire_secs -= def_expire_secs % (60*60); + if (def_expire_secs < DB_EXPIRE_SECS_DEF_MIN) + def_expire_secs = DB_EXPIRE_SECS_DEF_MIN; + + def_expire_spamsecs -= def_expire_spamsecs % (24*60*60); + if (def_expire_spamsecs < DB_EXPIRE_SPAMSECS_DEF_MIN) + def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF_MIN; + +#if DB_MIN_MBYTE == 0 && !defined(GOT_PHYSMEM) + if (def_expire_secs == DB_EXPIRE_SECS_DEF_MIN + || def_expire_spamsecs == DB_EXPIRE_SPAMSECS_DEF_MIN) + quiet_trace_msg("cannot determine physical RAM; rebuild" + " with ./configure with-db-memory"); +#endif + return; + } + + def_exp_ratio = 1.0; + + /* if the defaults do not need to be reduced now but they + * were reduced before, then relax them gently */ + if (spam_secs < DB_EXPIRE_SPAMSECS_DEF) { + dcc_ts2timeval(&tv, &db_parms.ex_spam[DCC_CK_FUZ2]); + secs = clean_start.tv_sec - tv.tv_sec; + if (secs > 0 + && secs < DB_EXPIRE_SPAMSECS_DEF) + def_expire_spamsecs = secs; + + dcc_ts2timeval(&tv, &db_parms.ex_all[DCC_CK_FUZ2]); + secs = clean_start.tv_sec - tv.tv_sec; + if (secs > 0 + && secs < DB_EXPIRE_SECS_DEF) + def_expire_secs = secs; + } +} + + + +/* copy the existing database, discard junk and old entries */ +static u_char /* 1=done 0=database broken */ +expire(DB_PTR old_db_csize) +{ +#define EXPIRE_BAIL() {alarm(0); flod_unmap(0, 0); db_close(0); return 0;} + + DCC_TS ts; + u_char emptied, reduced_defaults; + u_char old_ok[DCC_DIM_CKS]; + DB_RCD rcd, new; + const DB_RCD_CK *rcd_ck, *rcd_ck2; + DB_RCD_CK *new_ck; + DCC_TGTS tgts_raw, ck_tgts; + u_char needed, obs_lvl, timely; + int old_num_cks, new_num_cks, nokeep_num_cks; + DB_PTR min_confirm_pos, next_adj_pos; + FLOD_MMAP *mp; + DCC_CK_TYPES prev_type, type, type2; + int rcd_len; + struct stat sb; + time_t need_unlock; + int i; + + reduced_defaults = 0; + if (expire_secs < 0) { + adj_def_expire(); + if (def_expire_secs > expire_spamsecs + && expire_spamsecs > 0) { + expire_secs = expire_spamsecs; + } else { + if (def_expire_secs != DB_EXPIRE_SECS_DEF + && def_exp_ratio != 1.0) + reduced_defaults = 1; + expire_secs = def_expire_secs; + } + } + if (expire_spamsecs < 0) { + adj_def_expire(); + if (def_expire_spamsecs < expire_secs) { + expire_spamsecs = expire_secs; + } else { + if (def_expire_spamsecs != DB_EXPIRE_SPAMSECS_DEF + && def_exp_ratio != 1.0) + reduced_defaults = 1; + expire_spamsecs = def_expire_spamsecs; + } + } + + if (expire_spamsecs > 0 && expire_spamsecs < expire_secs) + dcc_logbad(EX_USAGE, + "spam expiration -E must be longer than -e"); + + expired_rcds = 0; + expired_cks = 0; + kept_cks = white_cks; + need_unlock = 0; + report_progress_init(); + + /* Compute timestamps for records we keep. + * Use the values from the previous use of dbclean as defaults + * unless they are bogus */ + memset(old_ok, 0, sizeof(old_ok)); + dcc_secs2ts(&ts, clean_start.tv_sec); + for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { + DB_EX_SEC *th = &db_parms.ex_secs[type]; + + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)) + continue; + if (DCC_CK_IS_REP_OP(grey_on, type)) + continue; + + if (th->spam <= 0 || th->spam > DB_EXPIRE_SECS_MAX) + continue; + if (th->all <= 0 || th->all > th->spam) + continue; + + if (dcc_ts_newer_ts(&db_parms.ex_spam[type], &ts)) + continue; + if (dcc_ts_newer_ts(&db_parms.ex_all[type], &ts)) + continue; + + old_ok[type] = 1; /* old values for this type are ok */ + } + + for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { + DB_EX_SEC *new_th = &new_ex_secs[type]; + DB_EX_TS_TYPE *new_ts = &new_ex_ts[type]; + int old_all = db_parms.ex_secs[type].all; + int old_spam = db_parms.ex_secs[type].spam; + + if (type == DCC_CK_SRVR_ID) { + /* keep server-ID declarations 5 weeks or a week longer + * than reputations so that they will be flooded 1st */ + new_th->all = DB_EXPIRE_SRVR_ID_SECS; + new_th->spam = DB_EXPIRE_SRVR_ID_SECS; + + } else if (grey_on) { + if (old_ok[type]) { + /* This is the path by which the dccd -G + * parameters are used. */ + new_th->all = old_all; + new_th->spam = old_spam; + } else if (DCC_CK_IS_GREY_TRIPLE(1, type)) { + new_th->all = DEF_GREY_WINDOW; + new_th->spam = DEF_GREY_WHITE; + } else if (DCC_CK_IS_GREY_MSG(1, type) + || type == DCC_CK_BODY) { + new_th->all = DEF_GREY_WINDOW; + new_th->spam = DEF_GREY_WINDOW; + } else { + new_th->all = 1; + new_th->spam = 1; + } + } else if (have_expire_parms < 0 && old_ok[type] + && (db_parms.flags & DB_PARM_EXP_SET)) { + /* use the old durations they are valid + * and we have no expiriation parameters */ + new_th->all = old_all; + new_th->spam = old_spam; + + } else { + new_th->all = expire_secs; + new_th->spam = (DCC_CK_LONG_TERM(type) + ? expire_spamsecs + : expire_secs); + if (reduced_defaults) { + quiet_trace_msg("adjust default by" + " %4.2f to -e%dhours" + " -E%ddays", + def_exp_ratio, + expire_secs/(60*60), + expire_spamsecs + / (24*60*60)); + reduced_defaults = 0; + } + } + + /* compute oldest timestamp for this type of checksum, + * without going crazy with "-Enever" */ + dcc_secs2ts(&new_ts->spam, + clean_start.tv_sec - min(clean_start.tv_sec, + new_th->spam)); + dcc_secs2ts(&new_ts->all, + clean_start.tv_sec - min(clean_start.tv_sec, + new_th->all)); + } + + /* put the timestampes into the new file */ + write_new_hdr(1); + + /* if we are running as root, + * don't change the owner of the database */ + if (getuid() == 0) { + if (0 > fstat(old_db_fd, &sb)) + dcc_logbad(EX_IOERR, "fstat(%s): %s", + old_db_nm, ERROR_STR()); + if (0 > fchown(new_db_fd, sb.st_uid, sb.st_gid)) + dcc_logbad(EX_IOERR, "fchown(%s,%d,%d): %s", + new_db_nm, (int)sb.st_uid, (int)sb.st_gid, + ERROR_STR()); + } + + if (DB_PTR_BASE != lseek(old_db_fd, DB_PTR_BASE, SEEK_SET)) + dcc_logbad(EX_IOERR, "lseek(%s,%d): %s", + cur_db_nm, DB_PTR_BASE, ERROR_STR()); + read_rcd_invalidate(0); + + flod_mmap(0, &db_parms.sn, 0, 1, 1); + if (flod_mmaps) + memcpy(&new_flod_mmaps, flod_mmaps, sizeof(new_flod_mmaps)); + min_confirm_pos = new_flod_mmaps.delay_pos; + next_adj_pos = DB_PTR_BASE; + for (mp = new_flod_mmaps.mmaps; + mp <= LAST(new_flod_mmaps.mmaps); + ++mp) { + if (mp->rem_hostname[0] == '\0') { + mp->flags &= ~FLODMAP_FG_MARK; + } else { + mp->flags |= FLODMAP_FG_MARK; + if (min_confirm_pos > mp->confirm_pos) + min_confirm_pos = mp->confirm_pos; + } + } + adj_delay_pos = (new_flod_mmaps.delay_pos != 0) ? 1 : 0; + + emptied = cleardb; + dcc_timeval2ts(&new_flod_mmaps.sn, &clean_start, 0); + + /* copy the old file to the new, + * discarding and compressing old data as we go */ + for (old_db_pos = DB_PTR_BASE; + old_db_pos < old_db_csize; + old_db_pos += rcd_len) { + if (--progress_rpt_cnt <= 0) + need_unlock += report_progress(0, " processed", + "MBytes", + old_db_pos, old_db_csize, + 1024*1024); + + if (old_db_pos == next_adj_pos) + next_adj_pos = adj_mmap(); + + if (clean_mode != REPAIR_MODE) { + /* read the record by mapping if not repairing */ + if (!db_map_rcd(0, &db_sts.rcd, old_db_pos, &rcd_len)) + EXPIRE_BAIL(); + memcpy(&rcd, db_sts.rcd.d.r, rcd_len); + } else { + rcd_len = read_rcd(0, &rcd, + old_db_fd, old_db_pos, cur_db_nm); + if (rcd_len <= 0) { + if (rcd_len == 0) + dcc_error_msg("unexpected EOF in %s at " + L_HPAT" instead of " + L_HPAT, + cur_db_nm, + old_db_pos, + old_db_csize); + /* give up and ask our neighbors to rewind */ + emptied = 1; + old_db_pos = old_db_csize; + break; + } + } + + /* skip end-of-page padding */ + if (rcd_len == sizeof(rcd)-sizeof(rcd.cks)) + continue; + + if (DB_RCD_ID(&rcd) == DCC_ID_WHITE) { + /* skip whitelist entries if whitelist source is ok */ + if (!keep_white) + continue; + /* refresh whitelist entries if source is bad */ + dcc_timeval2ts(&rcd.ts, &clean_start, 0); + } + + old_num_cks = DB_NUM_CKS(&rcd); + + /* expire or throw away deleted reports */ + tgts_raw = DB_TGTS_RCD_RAW(&rcd); + if (tgts_raw == 0) { + ++expired_rcds; + expired_cks += old_num_cks; + continue; + } + if (tgts_raw > DCC_TGTS_MAX_DB) { + dcc_error_msg("discarding report at "L_HPAT + " with bogus target count %#x", + old_db_pos, tgts_raw); + ++expired_rcds; + expired_cks += old_num_cks; + continue; + } + + if (dcc_ts_newer_ts(&rcd.ts, &future_ts)) { + static int whines = 0; + if (whines < 50) + dcc_error_msg("discarding report at "L_HPAT + " from the future %s%s", + old_db_pos, + ts2str_err(&rcd.ts), + ++whines >= 20 + ? "; stop complaining" + : ""); + ++expired_rcds; + expired_cks += old_num_cks; + continue; + } + + + needed = 0; + obs_lvl = 0; + timely = 1; + nokeep_num_cks = 0; + memcpy(&new, &rcd, sizeof(new)-sizeof(new.cks)); + new.fgs_num_cks &= (DB_RCD_FG_TRIM | DB_RCD_FG_SUMRY + | DB_RCD_FG_DELAY); + new_ck = new.cks; + for (prev_type = DCC_CK_INVALID, rcd_ck = rcd.cks; + rcd_ck < &rcd.cks[old_num_cks]; + prev_type = type, ++rcd_ck) { + type = DB_CK_TYPE(rcd_ck); + if (!DCC_CK_OK_DB(grey_on, type)) { + static int whines = 0; + if (whines < 20) + dcc_error_msg("discarding %s" + " checksum at "L_HPAT"%s", + DB_TYPE2STR(type), + old_db_pos, + ++whines >= 20 + ? "; stop complaining" + : ""); + ++expired_cks; + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + continue; + } + + if (type <= prev_type + && prev_type != DCC_CK_FLOD_PATH) { + dcc_error_msg("discarding out of order %s" + " checksum at "L_HPAT, + DB_TYPE2STR(type), + old_db_pos); + ++expired_cks; + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + continue; + } + + /* Silently discard pure junk from other servers, + * provided it is junk by default */ + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type) + && DB_GLOBAL_NOKEEP(grey_on, type) + && type != DCC_CK_FLOD_PATH + && type != DCC_CK_SRVR_ID + && DB_RCD_ID(&rcd) != DCC_ID_WHITE) { + ++expired_cks; + continue; + } + + /* Keep paths except on old records or records that + * have been trimmed or compressed. + * Never remove paths from server-ID declarations. */ + if (type == DCC_CK_FLOD_PATH) { + if (DB_RCD_TRIMMED(&new) + || DB_RCD_ID(&new) == DCC_ID_COMP) + continue; + /* forget line number on old whitelist entry */ + if (DB_RCD_ID(&rcd) == DCC_ID_WHITE) + continue; + rcd_ck2 = rcd_ck+1; + for (;;) { + type2 = DB_CK_TYPE(rcd_ck2); + if (type2 == DCC_CK_SRVR_ID + || !dcc_ts_older_ts(&rcd.ts, + &new_ex_ts[type2 + ].all)) { + /* keep this path since this report + * is a server-ID declaration + * or not old */ + *new_ck = *rcd_ck; + ++new_ck; + ++new.fgs_num_cks; + ++nokeep_num_cks; + break; + } + if (++rcd_ck2>=&rcd.cks[old_num_cks]) { + /* we are discarding this path */ + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + break; + } + } + continue; + } + + if (!dcc_ts_older_ts(&rcd.ts, &new_ex_ts[type].all)) { + /* This report is recent. + * However, obsolete or junk checksums + * don't make the report needed */ + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type) + && DB_RCD_ID(&rcd) != DCC_ID_WHITE) { + ++nokeep_num_cks; + } else if (DB_CK_OBS(rcd_ck)) { + /* This checksum is obsolete. + * If it has the highest level of + * fuzziness, then it controls whether + * the whole report is needed,. */ + if (obs_lvl < db_ck_fuzziness[type]) { + obs_lvl = db_ck_fuzziness[type]; + needed = 0; + } + } else { + /* This checksum is not obsolete. + * If it is at least as fuzzy as any + * other checksum, then it can say + * the report is needed */ + if (obs_lvl <= db_ck_fuzziness[type]) { + obs_lvl = db_ck_fuzziness[type]; + needed = 1; + } + } + + } else { + /* This checksum is at least somewhat old. + * Throw away delete requests + * and other servers' useless checksums */ + if (tgts_raw == DCC_TGTS_DEL + || DB_TEST_NOKEEP(db_parms.nokeep_cks, + type)) { + ++expired_cks; + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + continue; + } + /* Throw away old obsolete checksums + * and entire reports if the fuzziest + * checksum is obsolete */ + if (DB_CK_OBS(rcd_ck)) { + if (obs_lvl < db_ck_fuzziness[type]) { + obs_lvl = db_ck_fuzziness[type]; + needed = 0; + } + ++expired_cks; + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + continue; + } + + /* old summaries are unneeded, because + * they have already been flooded. + * They do not contribute to local counts */ + if (DB_RCD_SUMRY(&rcd)) + continue; + + /* The checksum is old enough to compress, so + * mark the record as eligible for splitting. */ + timely = 0; + + /* Discard this checksum if its ultimate total + * is low or ancient + * or if it reaches spam after this report. + * To determine the ultimate total, we must + * have a hash table to find the newest record, + * which contains the final total */ + if (clean_mode != REPAIR_MODE) { + i = get_lead(type, rcd_ck); + if (i < 0) + EXPIRE_BAIL(); + if (!i) { + ++expired_cks; + new.fgs_num_cks |= DB_RCD_FG_TRIM; + new.fgs_num_cks &= ~DB_RCD_FG_DELAY; + continue; + } + } + + if (obs_lvl <= db_ck_fuzziness[type]) { + /* Since we did not delete this + * checksum, we need the record if this + * checksum is fuzzy enough to control + * our need. */ + needed = 1; + /* If this is the fuzziest checksum we + * have seen, then preceding and so + * less fuzzy checksums are obsolete, + * if they are old. + * Assume that checksums are ordered + * in the record by fuzziness. */ + if (obs_lvl < db_ck_fuzziness[type]) { + obs_lvl = db_ck_fuzziness[type]; + if (obs_lvl != DCC_CK_FUZ_LVL_REP + && !grey_on) + fuzzy_obs(&new, &new_ck); + } + } + } + + /* Keep this checksum if we decide the whole report + * is needed. */ + *new_ck = *rcd_ck; + + ++new_ck; + ++new.fgs_num_cks; + } + + /* occassionally let the daemon work with the old file */ + if (need_unlock >= UNLOCK_INTERVAL_USECS) { + need_unlock = 0; + if (!standalone && !db_unlock()) + EXPIRE_BAIL(); + } + + /* if none of its checksums are needed, + * then discard the entire record */ + if (!needed) { + expired_cks += DB_NUM_CKS(&new); + ++expired_rcds; + continue; + } + + new_num_cks = DB_NUM_CKS(&new); + kept_cks += new_num_cks - nokeep_num_cks; + + /* Put the new record into the new file. + * + * If all of the record is recent, if it contains 1 checksum, + * or if all of its totals are the same, then simply add it. + * + * Otherwise, divide it into records of identical counts + * to allow compression or combining with other records. */ + if (new_num_cks > 1 + && (!timely + || DB_RCD_ID(&new) == DCC_ID_COMP + || DB_RCD_TRIMMED(&new))) { + for (;;) { + /* skip the checksums that have the same total + * as the first checksum to leave them with the + * original new report */ + new_ck = new.cks; + ck_tgts = DB_TGTS_CK(new_ck); + for (i = 1; i < new_num_cks; ++i) { + ++new_ck; + if (DB_TGTS_CK(new_ck) != ck_tgts) + break; + } + if (new_num_cks <= i) + break; + new_num_cks -= i; + + /* write the checksums with the common total */ + new.srvr_id_auth = DCC_ID_COMP; + new.fgs_num_cks = i; + if (!write_new_rcd(&new, + sizeof(new) - sizeof(new.cks) + + i*sizeof(new.cks[0]))) + EXPIRE_BAIL(); + + /* handle the remaining checksums */ + new.fgs_num_cks = new_num_cks; + memmove(&new.cks[0], &new.cks[i], + new_num_cks*sizeof(new.cks[0])); + } + } + + /* write the rest (or all) of the new record */ + if (!write_new_rcd(&new, + sizeof(new) - sizeof(new.cks) + + new_num_cks*sizeof(new.cks[0]))) + EXPIRE_BAIL(); + } + write_new_flush(1); + alarm(0); + + /* do final adjustment of the flooding positions */ + adj_mmap(); + /* force them to be right if the system crashed with the + * flod.map file on the disk more up to date and so after the + * database file on the disk */ + for (mp = new_flod_mmaps.mmaps; + mp <= LAST(new_flod_mmaps.mmaps); + ++mp) { + if (mp->rem_hostname[0] != '\0' + && mp->confirm_pos > new_db_csize) + mp->confirm_pos = new_db_csize; + } + + /* We are finished with the old file. + * Mark all of its pages MADV_DONTNEED */ + rel_db_states(); + i = (db_unload(0, 2) != 0); + if (!db_close(1)) + i = 0; + + write_new_hdr(emptied); + report_progress(1, " processed", "MBytes", + old_db_pos, old_db_csize, 1024*1024); + if (grey_on) + quiet_trace_msg("expired %d records and %d checksums in %s", + expired_rcds, expired_cks, cur_db_nm); + else + quiet_trace_msg("expired %d records and %d checksums," + " obsoleted %d checksums in %s", + expired_rcds, expired_cks, obs_rcds, cur_db_nm); + return i; +} + + + +/* copy the database copy while doing minimal expiring */ +static u_char +copy_db(void) +{ + static DB_VERSION_BUF old_version4 = DB_VERSION4_STR; + static DB_VERSION_BUF old_version3 = DB_VERSION3_STR; + union { + DB_HDR hdr; + DB_V4_PARMS v4; + DB_V3_PARMS v3; + } old_db; + struct timeval sn; + + /* do not lock the old database because the daemon must continue + * to answer requests */ + if (old_db_fd < 0) { + old_db_fd = open(cur_db_nm, O_RDONLY, 0); + if (old_db_fd == -1) + dcc_logbad(EX_IOERR, "open(%s): %s", + cur_db_nm, ERROR_STR()); + } + + if (!read_db_hdr(dcc_emsg, &old_db.hdr, old_db_fd, cur_db_nm)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + if (memcmp(old_db.hdr.p.version, db_version_buf, + sizeof(old_db.hdr.p.version))) { + if (!memcmp(old_db.v4.version, old_version4, + sizeof(old_db.v4.version))) { + memset(&old_db_parms, 0, + sizeof(old_db_parms)); + memcpy(old_db_parms.version, db_version_buf, + sizeof(old_db_parms.version)); + + old_db_parms.db_csize = old_db.v4.db_csize; + old_db_parms.pagesize = old_db.v4.pagesize; + old_db_parms.sn = old_db.v4.sn; + old_db_parms.cleared = old_db.v4.cleared; + old_db_parms.cleaned = old_db.v4.cleaned; + old_db_parms.cleaned_cron = old_db.v4.cleaned_cron; + memcpy(old_db_parms.ex_spam, old_db.v4.ex_spam, + sizeof(old_db_parms.ex_spam)); + memcpy(old_db_parms.ex_all, old_db.v4.ex_spam, + sizeof(old_db_parms.ex_all)); + memcpy(old_db_parms.ex_secs, old_db.v4.ex_secs, + sizeof(old_db_parms.ex_secs)); + old_db_parms.nokeep_cks = old_db.v4.nokeep_cks; + old_db_parms.flags = old_db.v4.flags; + old_db_parms.old_db_csize = old_db.v4.old_db_csize; + old_db_parms.db_added = old_db.v4.db_added; + old_db_parms.hash_used = old_db.v4.hash_used; + old_db_parms.old_hash_used = old_db.v4.old_hash_used; + old_db_parms.hash_added = old_db.v4.hash_added; + old_db_parms.rate_secs = old_db.v4.rate_secs; + old_db_parms.last_rate_sec = old_db.v4.last_rate_sec; + old_db_parms.old_kept_cks = old_db.v4.old_kept_cks; + + } else if (!memcmp(old_db.v3.version, old_version3, + sizeof(old_db.v3.version))) { + memset(&old_db_parms, 0, + sizeof(old_db_parms)); + memcpy(old_db_parms.version, db_version_buf, + sizeof(old_db_parms.version)); + + old_db_parms.db_csize = old_db.v3.db_csize; + old_db_parms.pagesize = old_db.v3.pagesize; + old_db_parms.sn = old_db.v3.sn; + memcpy(old_db_parms.ex_spam, old_db.v3.ex_spam, + sizeof(old_db_parms.ex_spam)); + memcpy(old_db_parms.ex_secs, old_db.v3.ex_secs, + sizeof(old_db_parms.ex_secs)); + old_db_parms.nokeep_cks = old_db.v3.nokeep_cks; + if (old_db.v3.flags & DB_PARM_V3_FG_GREY) + old_db_parms.flags |= DB_PARM_FG_GREY; + if (old_db.v3.flags & DB_PARM_V3_FG_CLEARED) + old_db_parms.flags |= DB_PARM_FG_CLEARED; + old_db_parms.old_db_csize = old_db.v3.old_db_csize; + old_db_parms.db_added = old_db.v3.db_added; + old_db_parms.hash_used = old_db.v3.hash_used; + old_db_parms.old_hash_used = old_db.v3.old_hash_used; + old_db_parms.hash_added = old_db.v3.hash_added; + old_db_parms.rate_secs = old_db.v3.rate_secs; + old_db_parms.last_rate_sec = old_db.v3.last_rate_sec; + old_db_parms.old_kept_cks = old_db.v3.old_kept_cks; + + dcc_ts2timeval(&sn, &old_db_parms.sn); + old_db_parms.cleared = sn.tv_sec; + old_db_parms.cleaned = sn.tv_sec; + if (old_db.v3.flags & DB_PARM_V3_FG_SELF_CLEAN2) { + old_db_parms.cleared -= 2*24*60*60; + old_db_parms.cleaned -= 24*60*60; + } + } else { + dcc_logbad(EX_IOERR, "%s has the wrong magic \"%.*s\"", + cur_db_nm, + ISZ(DB_VERSION_BUF), old_db.hdr.p.version); + } + } else { + old_db_parms = old_db.hdr.p; + } + + db_parms.sn = old_db_parms.sn; + db_parms.cleared = old_db_parms.cleared; + db_parms.cleaned = old_db_parms.cleaned; + db_parms.cleaned_cron = old_db_parms.cleaned_cron; + memcpy(db_parms.ex_all, old_db_parms.ex_all, + sizeof(db_parms.ex_all)); + memcpy(db_parms.ex_spam, old_db_parms.ex_spam, + sizeof(db_parms.ex_spam)); + memcpy(&db_parms.ex_secs, &old_db_parms.ex_secs, + sizeof(db_parms.ex_secs)); + db_parms.nokeep_cks = old_db_parms.nokeep_cks; + db_parms.flags = old_db_parms.flags; + + set_db_tholds(db_parms.nokeep_cks); + + return expire(old_db_parms.db_csize); +} + + + +/* Copy any records from the old file to the new file that were + * added to the old file while we were creating the new file. */ +static u_char +catchup(DCC_EMSG emsg) +{ + DB_HDR old_db_hdr; + DB_RCD rcd; + int rcd_len; + u_char result; + int count, old_count; + + /* Because dccd knows dbclean is running, dccd will have been + * keeping its header block more accurate than usual. */ + result = 1; + count = 0; + do { + old_count = count; + if (!read_db_hdr(dcc_emsg, &old_db_hdr, + old_db_fd, old_db_nm)) { + emsg = 0; + result = 0; + break; + } + if (old_db_hdr.p.db_csize < old_db_pos) { + dcc_error_msg("%s mysteriously truncated", old_db_nm); + result = 0; + break; + } + if ((off_t)old_db_pos != lseek(old_db_fd, old_db_pos, + SEEK_SET)) { + dcc_pemsg(EX_IOERR, emsg, "lseek(%s, "L_HPAT"): %s", + old_db_nm, old_db_pos, ERROR_STR()); + emsg = 0; + result = 0; + break; + } + read_rcd_invalidate(0); + while (old_db_pos < old_db_hdr.p.db_csize) { + rcd_len = read_rcd(emsg, &rcd, + old_db_fd, old_db_pos, old_db_nm); + if (rcd_len <= 0) { + if (rcd_len == 0) + dcc_pemsg(EX_IOERR, emsg, + "premature EOF in %s" + " at "L_HPAT + " instead of "L_HPAT, + old_db_nm, + old_db_pos, + old_db_hdr.p.db_csize); + emsg = 0; + result = 0; + break; + } + /* If something bad happens, we may not be able to + * go back to the old file. Carry on to get as much + * data as we can although we know the dccd daemon + * may croak when we release it */ + if (!db_add_rcd(emsg, &rcd)) { + emsg = 0; + result = 0; + break; + } + old_db_pos += rcd_len; + ++count; + } + } while (result && old_count != count); + + if (count > 0 && db_debug >= 1) + quiet_trace_msg("copied %d late reports%s", + count, result ? "" : " with problems"); + + return result; +} + + + +/* try to compress old report pointed to by db_sts.rcd with a predecessor */ +static void +compress_old(void) +{ + DB_PTR prev, prev1; + DB_RCD_CK *new_ck, *prev_ck; + int new_ck_num, prev_ck_num; + DCC_TGTS new_tgts, prev_tgts; + DCC_CK_TYPES new_type, prev_type; +#define NEWER (db_sts.rcd.d.r) +#define OLDER (db_sts.rcd2.d.r) + + /* Before spending the time to map a preceding checksum, + * find at least one checksum worth keeping and that might + * be combined or compressed with its predecessor. */ + prev = DB_PTR_NULL; + prev_type = DCC_CK_INVALID; + for (new_ck_num = DB_NUM_CKS(NEWER), + new_ck = NEWER->cks; + new_ck_num != 0; + --new_ck_num, ++new_ck) { + if (DB_CK_OBS(new_ck)) + continue; + new_type = DB_CK_TYPE(new_ck); + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type)) + continue; + /* all of the checksums in this record must be old */ + if (!dcc_ts_older_ts(&NEWER->ts, &new_ex_ts[new_type].all)) + return; + /* you can compress reports only if you have >=2 */ + prev1 = DB_PTR_EX(new_ck->prev); + if (prev1 != DB_PTR_NULL) { + prev = prev1; + prev_type = new_type; + } + } + if (prev_type == DCC_CK_INVALID) + return; + + /* having picked a checksum, + * map the record containing its predecessor */ + prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, prev_type); + if (!prev_ck) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + + /* The current and previous records must be old + * and contain the same useful checksums. */ + new_ck_num = DB_NUM_CKS(NEWER); + new_ck = NEWER->cks; + prev_ck_num = DB_NUM_CKS(OLDER); + prev_ck = OLDER->cks; + for (;;) { + /* we must run out of checksums in the two reports at the + * same time */ + if (prev_ck_num == 0 || new_ck_num == 0) { + if (prev_ck_num == new_ck_num) + break; + return; + } + + /* ignore paths and other junk */ + if (DB_CK_OBS(prev_ck)) { + --prev_ck_num; + ++prev_ck; + continue; + } + prev_type = DB_CK_TYPE(prev_ck); + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, prev_type)) { + --prev_ck_num; + ++prev_ck; + continue; + } + if (DB_CK_OBS(new_ck)) { + --new_ck_num; + ++new_ck; + continue; + } + new_type = DB_CK_TYPE(new_ck); + if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type)) { + --new_ck_num; + ++new_ck; + continue; + } + + /* because the checksums are ordered, + * give up at the first difference in checksums */ + if (new_type != prev_type + || memcmp(new_ck->sum, prev_ck->sum, sizeof(new_ck->sum))) + return; + + /* Give up at the first recent and valuable checksum. */ + if (!dcc_ts_older_ts(&OLDER->ts, &new_ex_ts[new_type].all)) + return; + + --prev_ck_num; + ++prev_ck; + --new_ck_num; + ++new_ck; + } + + /* The current and previous records are compatiable. + * Add the count of the previous record to the current record + * and mark the previous record useless. + * The individual totals in the current record are already correct, + * so postpone worrying about the deleted record. */ + new_tgts = DB_TGTS_RCD_RAW(NEWER); + if (new_tgts < DCC_TGTS_TOO_MANY) { + prev_tgts = DB_TGTS_RCD(OLDER); + if (prev_tgts > DCC_TGTS_TOO_MANY + || prev_tgts == 0) + return; + if (prev_tgts == DCC_TGTS_TOO_MANY) { + new_tgts = DCC_TGTS_TOO_MANY; + } else { + new_tgts += prev_tgts; + if (new_tgts > DCC_TGTS_TOO_MANY) + new_tgts = DCC_TGTS_TOO_MANY; + } + DB_TGTS_RCD_SET(NEWER, new_tgts); + } + + /* Mark the previous record to be deleted next time. */ + DB_TGTS_RCD_SET(OLDER, 0); + /* Mark it dirty so that the need to delete it gets to the file. */ + SET_FLUSH_RCD(&db_sts.rcd2, 1); + + NEWER->srvr_id_auth = DCC_ID_COMP; + NEWER->fgs_num_cks &= ~(DB_RCD_FG_TRIM + | DB_RCD_FG_SUMRY + | DB_RCD_FG_DELAY); + /* use the newest timestamp */ + if (dcc_ts_older_ts(&NEWER->ts, &OLDER->ts)) + NEWER->ts = OLDER->ts; + SET_FLUSH_RCD(&db_sts.rcd, 1); + + ++comp_rcds; + +#undef NEWER +#undef OLDER +} + + + +/* write a parsed whitelist checksum */ +static int +white_write(DCC_EMSG emsg, DCC_WF *wf, + DCC_CK_TYPES type, DCC_SUM sum, DCC_TGTS tgts) +{ + DB_RCD rcd; + int rcd_len; + char buf[30]; + DCC_FNM_LNO_BUF fnm_buf; + + /* ignore checksums that clients are never supposed to send + * to the server or for some other reason cannot be whitelisted */ + switch (type) { + case DCC_CK_INVALID: + case DCC_CK_ENV_TO: + case DCC_CK_G_MSG_R_TOTAL: + case DCC_CK_G_TRIPLE_R_BULK: + case DCC_CK_SRVR_ID: + dcc_pemsg(EX_DATAERR, emsg, + "%s checksum cannot be used%s", + dcc_type2str_err(type, 0, 0, grey_on), + wf_fnm_lno(&fnm_buf, wf)); + return 0; + + case DCC_CK_IP: + case DCC_CK_ENV_FROM: + case DCC_CK_FROM: + case DCC_CK_MESSAGE_ID: + case DCC_CK_RECEIVED: + case DCC_CK_SUB: + case DCC_CK_BODY: + case DCC_CK_FUZ1: + case DCC_CK_FUZ2: + break; /* these are ok */ + } + + if (tgts == DCC_TGTS_OK_MX + || tgts == DCC_TGTS_OK_MXDCC + || tgts == DCC_TGTS_SUBMIT_CLIENT) { + dcc_pemsg(EX_DATAERR, emsg,"\"%s\" ignored%s", + dcc_tgts2str(buf, sizeof(buf), tgts, 0), + wf_fnm_lno(&fnm_buf, wf)); + return 0; + } + + /* Greylist whitelist entries cannot involve blacklisting. + * They use DCC_TGTS_GREY_WHITE to signal whitelisting */ + if (grey_on) { + /* ignore anything except whitelisting */ + if (tgts != DCC_TGTS_OK) { + dcc_pemsg(EX_DATAERR, emsg, "\"%s\" ignored%s", + dcc_tgts2str(buf, sizeof(buf), tgts, 0), + wf_fnm_lno(&fnm_buf, wf)); + return 0; + } + tgts = DCC_TGTS_GREY_WHITE; + } + + memset(&rcd, 0, sizeof(rcd)); + dcc_timeval2ts(&rcd.ts, &clean_start, 0); + rcd.srvr_id_auth = DCC_ID_WHITE; + DB_TGTS_RCD_SET(&rcd, tgts); + + rcd.cks[0].type_fgs = DCC_CK_FLOD_PATH; + memcpy(rcd.cks[0].sum, &wf->lno, sizeof(wf->lno)); + rcd.cks[0].sum[sizeof(wf->lno)] = wf->fno; + + rcd.cks[1].type_fgs = type; + memcpy(rcd.cks[1].sum, sum, sizeof(rcd.cks[1])); + + rcd_len = sizeof(rcd) - sizeof(rcd.cks) + 2*sizeof(rcd.cks[0]); + rcd.fgs_num_cks = 2; + + if (!write_new_rcd(&rcd, rcd_len)) + return -1; + + ++white_cks; + return 1; +} + + + +/* Add the whitelist of certified non-spam and non-spammers + * and otherwise start the database */ +static void +parse_white(void) +{ + int white_fd; + + white_cks = 0; + + if (!keep_white) { + memset(&dbclean_white_tbl, 0,sizeof(dbclean_white_tbl)); + dcc_wf_init(&dbclean_wf, 0); + fnm2rel_good(dbclean_wf.ascii_nm, WHITELIST_NM(grey_on), 0); + dbclean_wf.wtbl = &dbclean_white_tbl; + white_fd = open(dbclean_wf.ascii_nm, O_RDONLY, 0); + if (white_fd < 0) { + /* worry only if the file exists but can't be used */ + if (errno != ENOENT) { + dcc_error_msg("open(%s): %s", + dbclean_wf.ascii_nm, ERROR_STR()); + keep_white = 1; + } + } else { + if (0 > dcc_parse_whitefile(0, &dbclean_wf, white_fd, + white_write, 0)) + keep_white = 1; + if (0 > close(white_fd)) + dcc_error_msg("close(%s): %s", + dbclean_wf.ascii_nm, ERROR_STR()); + } + } + if (keep_white) { + /* If the whitelist was bad, purge the new database of + * the bad new whitelist. We will use the existing + * whitelist */ + write_new_flush(1); + new_db_csize = DB_PTR_BASE; + if (0 > ftruncate(new_db_fd, DB_PTR_BASE)) + dcc_logbad(EX_IOERR, "truncate(%s, %d): %s", + new_db_nm, DB_PTR_BASE, ERROR_STR()); + new_db_fsize = DB_PTR_BASE; + white_cks = 0; + } + + /* update the counts in the database file */ + write_new_hdr(1); +} + + + +/* check for conflicts in the whitelist file in the record pointed to + * by db_sts.rcd */ +static void +check_white(void) +{ + static int msgs; + static int prev_lno1, prev_lno2; + static int prev_fno1, prev_fno2; + const DB_RCD_CK *rcd_ck, *prev_ck; + int lno1, lno2; + int fno1, fno2; + DCC_TGTS tgts1, tgts2; + char tgts1_buf[30], tgts2_buf[30]; + const char *fname1, *fname2; + DCC_CK_TYPES type; + DB_PTR prev; + + /* don't check if we have already complained enough */ + if (msgs > 20) + return; + + rcd_ck = db_sts.rcd.d.r->cks; + + /* it is pointless without line numbers, which are lacking only + * if we saved the old whitelist entries because the file is + * broken */ + if (DB_NUM_CKS(db_sts.rcd.d.r) != 2 + || DB_CK_TYPE(rcd_ck) != DCC_CK_FLOD_PATH) + return; + + /* conflict is impossible with a single line */ + ++rcd_ck; + prev = DB_PTR_EX(rcd_ck->prev); + if (prev == DB_PTR_NULL) + return; + + type = DB_CK_TYPE(rcd_ck); + prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, type); + if (!prev_ck) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + + tgts1 = DB_TGTS_RCD(db_sts.rcd2.d.r); + tgts2 = DB_TGTS_RCD(db_sts.rcd.d.r); + if (tgts1 == tgts2) + return; /* no conflict */ + + memcpy(&lno1, db_sts.rcd2.d.r->cks[0].sum, sizeof(lno1)); + fno1 = db_sts.rcd2.d.r->cks[0].sum[sizeof(lno1)]; + memcpy(&lno2, db_sts.rcd.d.r->cks[0].sum, sizeof(lno2)); + fno2 = db_sts.rcd.d.r->cks[0].sum[sizeof(lno2)]; + + if (lno1 == prev_lno1 && fno1 == prev_fno1 + && lno2 == prev_lno2 && fno2 == prev_fno2) + return; + + fname1 = wf_fnm(&dbclean_wf, fno1); + fname2 = wf_fnm(&dbclean_wf, fno2); + if (fname1 == fname2) { + fname1 = ""; + } else { + fname1 = path2fnm(fname1); + } + dcc_error_msg("\"%s\" in line %d%s%s conflicts with \"%s\"" + " in line %d of %s", + dcc_tgts2str(tgts1_buf, sizeof(tgts1_buf), + tgts1, grey_on), + lno1, + *fname1 != '\0' ? " of " : "", fname1, + dcc_tgts2str(tgts2_buf, sizeof(tgts2_buf), + tgts2, grey_on), + lno2, + fname2); + ++msgs; + prev_lno1 = lno1; + prev_fno1 = fno1; + prev_lno2 = lno2; + prev_fno2 = fno2; +} + + + +/* rebuild the hash table and the totals and links within the database file + * finish with the file locked */ +static void +build_hash(void) +{ + DB_PTR rcd_pos; + DB_HADDR haddr_window, haddr_lo, haddr_hi; + int pass, total_passes; + int rcd_len; + int rcd_cks, rcd_sums; + DB_PTR rcds, sums; /* passes can inflate these */ + const DB_RCD_CK *rcd_ck; + DB_HADDR guess_hash_len; + double db_rate, hash_ratio; + struct timeval db_flushed; + + db_buf_init(new_db_pagesize, 0); + + if (new_hash_len == 0) { + /* Try to choose a hash table size now so that when it + * is next time to rebuild after 24 hours of incoming + * checksums, the alpha or load factor will still be 0.9. + * We probably ran 24 hours ago, so the old hash size + * is an estimate of the size tomorrow. */ + + /* Guess the number of distinct checksums added + * tomorrow based on the current average rate */ + db_rate = db_add_rate(&new_db_parms, 1); + if (db_rate > 0.0) { + /* Increase the average rate by 10% to account + * for the 30% decrease often seen on weekends. */ + guess_hash_len = db_rate * 1.1 * 24*60*60; + + /* predict # of distinct checksums in current data */ + hash_ratio = old_db_parms.old_kept_cks; + if (hash_ratio == 0.0) { + hash_ratio = 1.0; + } else { + hash_ratio = (HADDR2LEN(old_db_parms + .old_hash_used) + / hash_ratio); + if (hash_ratio > 1.0 || hash_ratio < 0.3) + hash_ratio = 1.0; + } + guess_hash_len += (kept_cks * hash_ratio) + white_cks; + + if (db_debug) + quiet_trace_msg("hash size from old=%d" + " %d from db_rate=%.1f" + " hash_ratio=%.1f=%d/%d" + " kept=%d white=%d", + old_db_hash_used, + guess_hash_len, + db_rate, hash_ratio, + HADDR2LEN(old_db_parms + .old_hash_used), + old_db_parms.old_kept_cks, + kept_cks, white_cks); + + } else { + /* guess if we do not have a good measure + * of the recent rate */ + guess_hash_len = kept_cks+white_cks; + guess_hash_len += guess_hash_len/5; + } + + new_hash_len = old_db_hash_used; + if (new_hash_len < guess_hash_len) + new_hash_len = guess_hash_len; + + /* go for load factor 0.9 */ + new_hash_len += new_hash_len/10; + + if (new_hash_len > db_max_hash_entries) + quiet_trace_msg("default hash size %d entries" + " > maximum %d", + new_hash_len, db_max_hash_entries); + + if (grey_on) { + if (new_hash_len < MIN_HASH_ENTRIES) + new_hash_len = MIN_HASH_ENTRIES; + } else { + if (new_hash_len < DEF_HASH_ENTRIES) + new_hash_len = DEF_HASH_ENTRIES; + } + } + + /* Open and lock the new database */ + unlink_whine(new_hash_nm, 1); + new_hash_created = 1; + if (!db_open(0, -1, new_db_nm, new_hash_len, + DB_OPEN_LOCK_NOWAIT | db_mode)) { + dcc_logbad(dcc_ex_code, "could not start database %s", + new_db_nm); + } + if (db_debug) + quiet_trace_msg("%s %s", db_window_size_str, new_db_nm); + + /* guess which checksums we will keep so that we can count them */ + if (old_db_parms.nokeep_cks != 0) + db_parms.nokeep_cks = old_db_parms.nokeep_cks; + + /* add every record in the database file to the hash table and + * fix its accumulated counts and reverse links */ + comp_rcds = 0; + sums = 0; + rcds = 0; + report_progress_init(); + db_flushed = db_time; + + /* if the hash table does not fit in 75% of RAM, + * then make several passes over the data with as much of the + * hash table as fits. */ + haddr_window = db_hash_page_len*((db_buf_total*3)/4); + if (haddr_window < db_hash_len/16) + haddr_window = db_hash_len/16; + total_passes = (db_hash_len+haddr_window-1)/haddr_window; + + for (haddr_lo = 0, pass = 1; + haddr_lo < db_hash_len; + haddr_lo = haddr_hi, ++pass) { + if (haddr_lo > db_hash_len-haddr_window) + haddr_hi = MAX_HASH_ENTRIES; + else + haddr_hi = haddr_lo+haddr_window; + for (rcd_pos = DB_PTR_BASE; + rcd_pos < db_csize; + rcd_pos += rcd_len) { + /* skip reports crossing page bounardies */ + if (rcd_pos%db_pagesize > db_page_max) { + rcd_len = DB_RCD_HDR_LEN; + continue; + } + if (--progress_rpt_cnt <= 0) { + report_progress(0, " hash rebuilt", + "checksums", + sums/total_passes, kept_cks, 1); + if (db_time.tv_sec != db_flushed.tv_sec) { + db_flushed = db_time; + if (!db_flush_db(dcc_emsg)) + dcc_logbad(dcc_ex_code, + "flushing after linking" + L_HPAT": %s", + rcd_pos, dcc_emsg); + } + } + + if (!db_map_rcd(0, &db_sts.rcd, rcd_pos, &rcd_len)) { + dcc_logbad(dcc_ex_code, + "hash build failed reading" + " record at "L_HPAT, + rcd_pos); + } + + /* skip end of page padding */ + if (db_sts.rcd.d.r->fgs_num_cks == 0) + continue; + + ++rcds; + + /* count the checksums we'll link in this record */ + rcd_cks = DB_NUM_CKS(db_sts.rcd.d.r); + rcd_sums = 0; + for (rcd_ck = db_sts.rcd.d.r->cks; + rcd_ck < &db_sts.rcd.d.r->cks[rcd_cks]; + ++rcd_ck) { + if (!DB_TEST_NOKEEP(db_parms.nokeep_cks, + DB_CK_TYPE(rcd_ck))) + ++rcd_sums; + } + sums += rcd_sums; + + /* Mark the record dirty so that any new hash links + * get to the file if we are using -F. */ + db_set_flush(&db_sts.rcd, 0, rcd_len); + if (!db_link_rcd(dcc_emsg, haddr_lo, haddr_hi)) { + dcc_logbad(dcc_ex_code, + "relinking record at "L_HPAT": %s", + rcd_pos, dcc_emsg); + } + + /* check for conflicts in the whitelist file */ + if (DB_RCD_ID(db_sts.rcd.d.r) == DCC_ID_WHITE) + check_white(); + + compress_old(); + } + + if (progress_rpt_started && pass < total_passes) + quiet_trace_msg(" pass %d", pass); + } + + report_progress(1, " hash rebuilt", "checksums", + sums/total_passes, kept_cks, 1); + + db_parms.old_hash_used = db_hash_used; + db_parms.old_kept_cks = kept_cks; + db_parms.hash_used = db_hash_used; + db_parms.old_db_csize = db_csize; + if (!db_flush_parms(dcc_emsg)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + + quiet_trace_msg("hashed "L_DPAT" records containing "L_DPAT" checksums," + " compressed %d records", + rcds/total_passes, sums/total_passes, comp_rcds); + + /* Try to finish as much disk I/O on the new file as we can to minimize + * stalling by dccd when we close the file and hand it over. This also + * reduces system stalling hours later when dbclean runs again. */ + if (!make_clean(1)) + dcc_logbad(dcc_ex_code, "%s", dcc_emsg); + + + quiet_trace_msg("%d hash entries total, %d or %d%% used", + HADDR2LEN(db_hash_len), + HADDR2LEN(db_hash_used), + (int)((HADDR2LEN(db_hash_used)*100.0) + / HADDR2LEN(db_hash_len))); +} + + + +static u_char +write_new_db(const void *buf, int buflen, off_t pos, u_char fatal) +{ + int i; + + if (pos != lseek(new_db_fd, pos, SEEK_SET)) { + if (fatal) { + dcc_logbad(EX_IOERR, "lseek(%s, 0): %s", + new_db_nm, ERROR_STR()); + } else { + dcc_error_msg("lseek(%s, 0): %s", + new_db_nm, ERROR_STR()); + } + return 0; + } + + i = write(new_db_fd, buf, buflen); + if (i == buflen) { + if (new_db_fsize < pos+buflen) + new_db_fsize = pos+buflen; + return 1; + } + + if (fatal) { + if (i < 0) + dcc_logbad(EX_IOERR, "write(%s): %s", + new_db_nm, ERROR_STR()); + else + dcc_logbad(EX_IOERR, "write(%s)=%d instead of %d", + new_db_nm, i, buflen); + } else { + if (i < 0) + dcc_error_msg("write(%s): %s", + new_db_nm, ERROR_STR()); + else + dcc_error_msg("write(%s)=%d instead of %d", + new_db_nm, i, buflen); + } + return 0; +} + + + +/* use a large buffer to encourage the file system to avoid fragmentation */ +static union { + u_char c[DB_MIN_MIN_MBYTE*(1024*1024)/4]; + DB_HDR hdr; +} write_new_db_buf; +static u_int write_new_db_buflen = 0; +static DB_PTR write_new_base; + +static u_char +write_new_flush(u_char fatal) +{ + u_char result = 1; + + if (write_new_db_buflen != 0) { + if (!write_new_db(&write_new_db_buf, write_new_db_buflen, + write_new_base, fatal)) + result = 0; + } + + write_new_base = new_db_csize; + write_new_db_buflen = 0; + return result; +} + + +static u_char +write_new_buf(const void *buf, int buflen) +{ + if (write_new_db_buflen + buflen > ISZ(write_new_db_buf) + && !write_new_flush(0)) + return 0; + + memcpy(&write_new_db_buf.c[write_new_db_buflen], buf, buflen); + write_new_db_buflen += buflen; + return 1; +} + + + +/* add a record to the new file */ +static u_char +write_new_rcd(const void *buf, int buflen) +{ + static const u_char zeros[DB_RCD_LEN_MAX] = {0}; + DB_PTR new_page_num; + u_char result; + int pad, i; + + /* pad accross page boundaries */ + new_page_num = DB_PTR2PG_NUM(new_db_csize + buflen, new_db_pagesize); + if (new_page_num != DB_PTR2PG_NUM(new_db_csize, new_db_pagesize)) { + pad = new_page_num*new_db_pagesize - new_db_csize; + pad = (((pad + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN) + * DB_RCD_HDR_LEN); + do { + i = sizeof(zeros); + if (i > pad) + i = pad; + if (!write_new_buf(zeros, i)) + return 0; + pad -= i; + new_db_csize += i; + } while (pad != 0); + } + + result = write_new_buf(buf, buflen); + new_db_csize += buflen; + return result; +} + + + +/* write the magic string at the head of the database file */ +static void +write_new_hdr(u_char emptied) +{ + DB_HDR *new; + struct timeval old_sn; + time_t new_rate_secs; + DCC_CK_TYPES type; + int i; + + write_new_flush(1); + + memset(&write_new_db_buf, 0, sizeof(write_new_db_buf)); + write_new_base = 0; + if (new_db_fsize > ISZ(DB_HDR) + || new_db_pagesize == 0) { + write_new_db_buflen = sizeof(DB_HDR); + } else { + write_new_db_buflen = new_db_pagesize; + if (write_new_db_buflen > ISZ(write_new_db_buf)) + write_new_db_buflen = ISZ(write_new_db_buf); + } + + new = &write_new_db_buf.hdr; + memset(new, 0, sizeof(*new)); + memcpy(new->p.version, db_version_buf, sizeof(new->p.version)); + + dcc_timeval2ts(&new->p.sn, &clean_start, 0); + if (emptied) { + new->p.cleared = clean_start.tv_sec; + } else { + new->p.cleared = old_db_parms.cleared; + switch (clean_mode) { + case NORMAL_MODE: + new->p.cleaned = clean_start.tv_sec; + new->p.cleaned_cron = clean_start.tv_sec; + break; + case NO_CRON_MODE: + new->p.cleaned = clean_start.tv_sec; + new->p.cleaned_cron = old_db_parms.cleaned_cron; + break; + case REPAIR_MODE: + case QUICK_MODE: + case HASH_MODE: + case DEL_MODE: + new->p.cleaned = old_db_parms.cleaned; + new->p.cleaned_cron = old_db_parms.cleaned_cron; + break; + } + } + + if (grey_on) + new->p.flags |= DB_PARM_FG_GREY; + if (emptied || (old_db_parms.flags & DB_PARM_FG_CLEARED)) + new->p.flags |= DB_PARM_FG_CLEARED; + if (have_expire_parms > 0 + || (have_expire_parms < 0 + && (old_db_parms.flags & DB_PARM_EXP_SET))) + new->p.flags |= DB_PARM_EXP_SET; + + new->p.nokeep_cks = (emptied || old_db_parms.nokeep_cks == 0 + ? def_nokeep_cks() + : old_db_parms.nokeep_cks); + + new->p.pagesize = new_db_pagesize; + new->p.db_csize = new_db_csize; + + /* update the traffic counts */ + if (!emptied + && old_db_parms.db_csize != 0 + && old_db_parms.db_csize >= old_db_parms.old_db_csize + && old_db_parms.hash_used != 0 + && old_db_parms.hash_used >= old_db_parms.old_hash_used) { + if (old_db_parms.rate_secs > 0 + && old_db_parms.rate_secs <= DB_MAX_RATE_SECS) { + new->p.rate_secs = old_db_parms.rate_secs; + new->p.db_added = old_db_parms.db_added; + new->p.hash_added = old_db_parms.hash_added; + } + new->p.last_rate_sec = clean_start.tv_sec; + dcc_ts2timeval(&old_sn, &old_db_parms.sn); + new_rate_secs = clean_start.tv_sec - old_sn.tv_sec; + if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS) { + new_rate_secs += new->p.rate_secs; + if (new_rate_secs > DB_MAX_RATE_SECS) { + double trim, new_val; + trim = DB_MAX_RATE_SECS; + trim /= new_rate_secs; + + new_val = new->p.db_added; + new_val *= trim; + new->p.db_added = new_val; + + new_val = new->p.hash_added; + new_val *= trim; + new->p.hash_added = new_val; + + new_rate_secs = DB_MAX_RATE_SECS; + } + new->p.db_added += (old_db_parms.db_csize + - old_db_parms.old_db_csize); + new->p.hash_added += (old_db_parms.hash_used + - old_db_parms.old_hash_used); + new->p.rate_secs = new_rate_secs; + } + } + + for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { + if (new_ex_secs[type].all != 0) { + new->p.ex_secs[type].all = new_ex_secs[type].all; + new->p.ex_secs[type].spam = new_ex_secs[type].spam; + new->p.ex_all[type] = new_ex_ts[type].all; + new->p.ex_spam[type] = new_ex_ts[type].spam; + } else { + new->p.ex_secs[type].all = def_expire_secs; + new->p.ex_secs[type].spam = (DCC_CK_LONG_TERM(type) + ? def_expire_spamsecs + : def_expire_secs); + } + } + + new_db_parms = new->p; + + for (;;) { + write_new_flush(1); + + /* ensure that the last page of the file is complete */ + if (new_db_pagesize == 0) + break; + i = new_db_fsize % new_db_pagesize; + if (i == 0) + break; + write_new_db_buflen = new_db_pagesize - i; + if (write_new_db_buflen > ISZ(write_new_db_buf)) + write_new_db_buflen = ISZ(write_new_db_buf); + memset(&write_new_db_buf, 0, write_new_db_buflen); + write_new_base = new_db_fsize; + } +} + + + +static void +unlink_whine(const char *nm, u_char enoent_ok) +{ + if (0 > unlink(nm) + && (!enoent_ok || errno != ENOENT)) + dcc_error_msg("unlink(%s): %s", nm, ERROR_STR()); +} + + + +static void +rename_bail(const char *from, const char *to) +{ + if (0 > rename(from, to)) + dcc_logbad(EX_IOERR, "rename(%s, %s): %s", + from, to, ERROR_STR()); +} + + + +/* try for a long time or until the server hears */ +static u_char /* 1=ok, 0=failed */ +persist_aop(DCC_AOPS aop, u_int32_t val1, + int secs) /* try for this long */ +{ + return dcc_aop_persist(dcc_emsg, ctxt, + grey_on ? DCC_CLNT_FG_GREY : 0, + db_debug != 0, + aop, val1, secs, &aop_resp); +} + + + +/* tell the daemon to switch to the new database */ +static void +dccd_new_db(const char *msg) +{ + /* Send a round of NOPs and ask about status to ensure the server + * has dealt with requests that arrived while we had the database + * locked and otherwise caught up. We want to try to ensure that + * the server is listening when we re-open the database so that + * it does not leave flooding off. + * On some systems with lame mmap() support including BSD/OS, the + * the daemon can stall for minutes in close(). If that or something + * else makes the daemon stall, this can appear to fail. */ + if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_LIST, RESTART_DELAY)) + dcc_error_msg("%s: %s; continuing", msg, dcc_emsg); + + dccd_unlocked = 0; + if (!persist_aop(DCC_AOP_DB_NEW, 0, RESTART_DELAY)) { + /* This cannot be a fatal error, + * lest we leave the database broken */ + dcc_error_msg("%s: %s; continuing", msg, dcc_emsg); + } +} + + + +static void +finish(void) +{ + int bailing = 0; + + /* delete the new files */ +#ifndef DCC_DBCLEAN_KEEP_NEW /* for debugging */ + if (new_db_created) { + unlink_whine(new_db_nm, 0); + new_db_created = 0; + bailing = -1; + } + /* we don't really know if the new hash file was created, + * so don't worry about problems */ + if (new_hash_created) { + unlink_whine(new_hash_nm, 1); + new_hash_created = 0; + bailing = -1; + } +#endif + if (cur_db_created) { + unlink_whine(cur_db_nm, 0); + unlink_whine(cur_hash_nm, 1); + cur_db_created = 0; + bailing = -1; + } + + if (new_db_fd >= 0) { + if (0 > close(new_db_fd)) + dcc_error_msg("close(%s): %s", + new_db_nm, ERROR_STR()); + new_db_fd = -1; + } + if (old_db_fd >= 0) { + /* In most cases nothing cares about the old database now. + * We often have kept the old database open and locked until + * now. Delete it unless we are debugging */ + if (db_debug < 4 && exit_value == EX_OK) { + unlink_whine(old_db_nm, 0); + } else { + /* Push it to the disk so it won't lurk in the buffer + * cache or elsewhere to slow a system reboot */ + if (exit_value == EX_OK + && 0 > fsync(old_db_fd)) + dcc_error_msg("fsync(%s): %s", + old_db_nm, ERROR_STR()); + } + if (0 > close(old_db_fd)) + dcc_error_msg("close(%s): %s", + old_db_nm, ERROR_STR()); + old_db_fd = -1; + } + flod_unmap(0, 0); + + /* release the daemon, but if the database is still open, it's bad */ + db_close(bailing); + /* tell the daemon to switch databases */ + if (dccd_unlocked) + dccd_new_db("finish"); + + while (flods_off > 0) { + --flods_off; + if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME, + RESTART_DELAY)) + dcc_error_msg("%s", dcc_emsg); + } + + unlock_dbclean(); +} + + + +static void NRATTRIB +exit_dbclean(int v) +{ + exit(exit_value = v); +} + + + +/* terminate with a signal */ +static void NRATTRIB +sigterm(int s) +{ + dcc_error_msg("interrupted by signal %d", s); + exit_dbclean(s+100); +}