comparison dbclean/dbclean.c @ 0:c7f6b056b673

First import of vendor version
author Peter Gervai <grin@grin.hu>
date Tue, 10 Mar 2009 13:49:58 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7f6b056b673
1 /* Distributed Clearinghouse Checksum database cleaner
2 *
3 * Copyright (c) 2008 by Rhyolite Software, LLC
4 *
5 * This agreement is not applicable to any entity which sells anti-spam
6 * solutions to others or provides an anti-spam solution as part of a
7 * security solution sold to other entities, or to a private network
8 * which employs the DCC or uses data provided by operation of the DCC
9 * but does not provide corresponding data to other users.
10 *
11 * Permission to use, copy, modify, and distribute this software without
12 * changes for any purpose with or without fee is hereby granted, provided
13 * that the above copyright notice and this permission notice appear in all
14 * copies and any distributed versions or copies are either unchanged
15 * or not called anything similar to "DCC" or "Distributed Checksum
16 * Clearinghouse".
17 *
18 * Parties not eligible to receive a license under this agreement can
19 * obtain a commercial license to use DCC by contacting Rhyolite Software
20 * at sales@rhyolite.com.
21 *
22 * A commercial license would be for Distributed Checksum and Reputation
23 * Clearinghouse software. That software includes additional features. This
24 * free license for Distributed ChecksumClearinghouse Software does not in any
25 * way grant permision to use Distributed Checksum and Reputation Clearinghouse
26 * software
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL
29 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
30 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC
31 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES
32 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
33 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
34 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
35 * SOFTWARE.
36 *
37 * Rhyolite Software DCC 1.3.103-1.254 $Revision$
38 */
39
40 #include "srvr_defs.h"
41 #include "dcc_ck.h"
42 #include <signal.h>
43
44 static DCC_EMSG dcc_emsg;
45
46 static DCC_WF dbclean_wf;
47 static DCC_WHITE_TBL dbclean_white_tbl;
48 static DCC_CLNT_CTXT *ctxt;
49 static DCC_OP_RESP aop_resp;
50 static int flods_off;
51 static int dccd_unlocked; /* dccd has been told to unlock */
52
53 static DCC_SRVR_NM srvr = DCC_SRVR_NM_DEF;
54 static DCC_CLNT_ID srvr_clnt_id = DCC_ID_INVALID;
55 static const ID_TBL *srvr_clnt_tbl;
56 static u_char info_flags = 0;
57 #ifdef USE_DBCLEAN_F
58 static u_char db_mode = DB_OPEN_MMAP_WRITE;
59 #else
60 static u_char db_mode = DB_OPEN_MMAP_WRITE_NOSYNC;
61 #endif
62
63 static u_char cleardb; /* 1=clear the database */
64 static enum {
65 NORMAL_MODE,
66 REPAIR_MODE, /* database broken */
67 QUICK_MODE, /* too big for window */
68 HASH_MODE, /* hash table full */
69 NO_CRON_MODE, /* work around missing cron job */
70 DEL_MODE /* after deletion */
71 } clean_mode = NORMAL_MODE;
72 static u_char standalone; /* 1=don't talk to dccd */
73 static u_char keep_white; /* 1=do not rebuild whitelist */
74
75 static int exit_value = -1;
76
77 static const char *homedir;
78 static u_char cur_db_created;
79 static const char *cur_db_nm_str = DB_DCC_NAME;
80 static DCC_PATH cur_db_nm;
81 static DCC_PATH cur_hash_nm;
82 static int old_db_fd = -1;
83 static DB_HADDR old_db_hash_used;
84 static DB_PARMS old_db_parms;
85 static DB_PARMS new_db_parms;
86 static DB_PTR old_db_pos, new_db_csize;
87 static off_t new_db_fsize;
88 static u_int new_db_pagesize;
89 static FLOD_MMAPS new_flod_mmaps;
90 static u_char adj_delay_pos;
91 static u_char new_db_created;
92 static DCC_PATH new_db_nm;
93 static int new_db_fd = -1;
94 static u_char new_hash_created;
95 static DCC_PATH new_hash_nm;
96 static DCC_PATH old_db_nm;
97
98 static int expire_secs = -1;
99 static int def_expire_secs = DB_EXPIRE_SECS_DEF;
100 static int expire_spamsecs = -1;
101 static int def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF;
102 static int have_expire_parms = 0;
103 static double def_exp_ratio = 0.0;
104 static DB_EX_SECS new_ex_secs;
105 static DB_EX_TS new_ex_ts;
106
107 static DB_HADDR new_hash_len;
108
109 static int expired_rcds, comp_rcds, obs_rcds, expired_cks;
110 static int white_cks, kept_cks;
111
112 static DCC_TS future_ts;
113
114 #define RESTART_DELAY (60*5)
115 #define SHORT_DELAY 30
116
117 static struct timeval clean_start;
118
119 static struct timeval progress_rpt_last; /* when previous progress report */
120 static struct timeval progress_rpt_checked; /* when last checked */
121 static struct timeval progress_rpt_start; /* start of progress reporting */
122 #define REPORT_INTERVAL_SECS (5*60)
123 #define REPORT_INTERVAL_FAST_SECS 10
124 #define UNLOCK_INTERVAL_USECS (DCC_US/2)
125 static int progress_rpt_cnt; /* operations until next check */
126 static int progress_rpt_base;
127 static u_char progress_rpt_started; /* 1=have started reporting progress */
128 static int progress_rpt_percent;
129
130 static u_char write_new_flush(u_char);
131 static u_char write_new_rcd(const void *, int);
132 static void write_new_hdr(u_char);
133 static void unlink_whine(const char *, u_char);
134 static void rename_bail(const char *, const char *);
135 static u_char expire(DB_PTR);
136 static u_char copy_db(void);
137 static u_char catchup(DCC_EMSG);
138 static void parse_white(void);
139 static void build_hash(void);
140 static u_char persist_aop(DCC_AOPS, u_int32_t, int);
141 static void dccd_new_db(const char *);
142 static void finish(void);
143 static void exit_dbclean(int) NRATTRIB;
144 static void sigterm(int);
145
146
147 static void
148 usage(u_char die)
149 {
150 const char str[] = {
151 "usage: [-64dfFNPSVq] [-i id]"
152 " [-a [server-addr][,server-port]] [-h homedir]\n"
153 " [-G on] [-R mode] [-s hash-size] [-e seconds]"
154 " [-E spamsecs]\n"
155 " [-L ltype,facility.level]"};
156 static u_char complained;
157
158 /* its important to try to run, so don't give up unless necessary */
159 if (die) {
160 dcc_logbad(EX_USAGE, complained ? "giving up" : str);
161 } else if (!complained) {
162 dcc_error_msg("%s\ncontinuing", str);
163 complained = 1;
164 }
165 }
166
167
168 int NRATTRIB
169 main(int argc, char **argv)
170 {
171 char hostname[DCC_MAXDOMAINLEN];
172 u_char print_version = 0;
173 struct stat cur_db_sb;
174 u_int tgt_db_pagesize;
175 const char *cp;
176 char *p;
177 u_long l;
178 int i;
179
180 gettimeofday(&db_time, 0);
181 clean_start = db_time;
182
183 dcc_timeval2ts(&future_ts, &clean_start, 24*60*60);
184
185 dcc_syslog_init(1, argv[0], 0);
186
187 /* this must match DBCLEAN_GETOPTS in cron-dccd.in */
188 while ((i = getopt(argc, argv, "64dfFNPSVqi:a:h:G:R:s:e:E:L:")) != -1) {
189 switch (i) {
190 case '6':
191 #ifndef NO_IPV6
192 info_flags = DCC_INFO_FG_IPV6;
193 #endif
194 break;
195 case '4':
196 info_flags = 0;
197 break;
198
199 case 'd':
200 if (db_debug++)
201 ++dcc_clnt_debug;
202 break;
203
204 case 'f':
205 db_mode &= ~DB_OPEN_MMAP_WRITE;
206 break;
207
208
209 case 'F':
210 db_mode |= DB_OPEN_MMAP_WRITE;
211 break;
212
213 case 'N': /* make a new, clear database */
214 cleardb = 1;
215 standalone = 1;
216 break;
217
218 case 'P':
219 if (have_expire_parms > 0)
220 dcc_logbad(EX_USAGE,
221 "do not use -P with -e or -E");
222 have_expire_parms = -1;
223 break;
224
225 case 'S':
226 standalone = 1;
227 break;
228
229 case 'V':
230 fprintf(stderr, DCC_VERSION"\n");
231 print_version = 1;
232 break;
233
234 case 'q':
235 trace_quiet = 1;
236 break;
237
238 case 'i':
239 l = strtoul(optarg, &p, 10);
240 if (*p != '\0'
241 || l < DCC_SRVR_ID_MIN
242 || l > DCC_SRVR_ID_MAX)
243 dcc_logbad(EX_USAGE, "invalid DCC ID \"-i %s\"",
244 optarg);
245 srvr_clnt_id = l;
246 break;
247
248 case 'a':
249 cp = dcc_parse_nm_port(dcc_emsg, optarg, srvr.port,
250 hostname, sizeof(hostname),
251 &srvr.port, 0, 0, 0, 0);
252 if (!cp) {
253 dcc_error_msg("%s", dcc_emsg);
254 break;
255 }
256 cp += strspn(cp, DCC_WHITESPACE);
257 if (*cp != '\0') {
258 dcc_error_msg("unrecognized port number in"
259 "\"-a %s\"", optarg);
260 break;
261 }
262 if (hostname[0] == '\0')
263 strcpy(srvr.hostname, DCC_SRVR_NM_DEF_HOST);
264 else
265 BUFCPY(srvr.hostname, hostname);
266 break;
267
268 case 'h':
269 homedir = optarg;
270 break;
271
272 case 'G':
273 dcc_syslog_init(1, argv[0], " grey");
274 if (have_expire_parms > 0)
275 dcc_logbad(EX_USAGE,
276 "do not use -G with -e or -E");
277 if (strcasecmp(optarg, "on"))
278 usage(0); /* be generous and allow -Gasdf */
279 grey_on = 1;
280 have_expire_parms = -1;
281 cur_db_nm_str = DB_GREY_NAME;
282 break;
283
284 case 'R':
285 if (!strcasecmp(optarg, "bad"))
286 clean_mode = REPAIR_MODE;
287 else if (!strcasecmp(optarg, "quick"))
288 clean_mode = QUICK_MODE;
289 else if (!strcasecmp(optarg, "hash"))
290 clean_mode = HASH_MODE;
291 else if (!strcasecmp(optarg, "cron"))
292 clean_mode = NO_CRON_MODE;
293 else if (!strcasecmp(optarg, "del"))
294 clean_mode = DEL_MODE;
295 else
296 dcc_logbad(EX_USAGE,
297 "unrecognized repair mode -R %s",
298 optarg);
299 break;
300
301 case 's': /* hash table size in entries */
302 new_hash_len = strtoul(optarg, &p, 0);
303 if (*p != '\0'
304 || new_hash_len < MIN_HASH_ENTRIES
305 || new_hash_len > MAX_HASH_ENTRIES)
306 dcc_logbad(EX_USAGE,
307 "invalid database size \"%s\"",
308 optarg);
309 break;
310
311 case 'e': /* expiration for non-bulk checksums */
312 if (grey_on)
313 dcc_logbad(EX_USAGE,
314 "do not use -e with -G");
315 if (have_expire_parms < 0)
316 dcc_logbad(EX_USAGE,
317 "-e cannot be used with -P");
318 have_expire_parms = 1;
319 expire_secs = dcc_get_secs(optarg, 0,
320 DB_EXPIRE_SECS_MIN,
321 DB_EXPIRE_SECS_MAX, -1);
322 if (expire_secs < 0)
323 dcc_logbad(EX_USAGE,
324 "invalid expiration seconds"
325 " \"-e %s\"",
326 optarg);
327 break;
328
329 case 'E': /* expiration for bulk checksums */
330 if (grey_on)
331 dcc_logbad(EX_USAGE,
332 "do not use -E with -G");
333 if (have_expire_parms < 0)
334 dcc_logbad(EX_USAGE,
335 "do not use -E with -P");
336 have_expire_parms = 1;
337 expire_spamsecs = dcc_get_secs(optarg, 0,
338 DB_EXPIRE_SECS_MIN,
339 DB_EXPIRE_SECS_MAX, -1);
340 if (expire_spamsecs < 0)
341 dcc_logbad(EX_USAGE,
342 "invalid spam expiration seconds"
343 " \"-E %s\"",
344 optarg);
345 break;
346
347 case 'L':
348 dcc_parse_log_opt(optarg);
349 break;
350
351 default:
352 usage(0);
353 }
354 }
355 argc -= optind;
356 argv += optind;
357 if (argc != 0)
358 usage(1);
359
360 if (srvr_clnt_id == DCC_ID_INVALID && !standalone) {
361 if (print_version)
362 exit(EX_OK);
363 usage(1);
364 }
365 srvr.clnt_id = srvr_clnt_id;
366
367 if (srvr.port == 0)
368 srvr.port = DCC_GREY2PORT(grey_on);
369
370 dcc_clnt_unthread_init();
371 /* move to the target directory
372 * and set homedir for fnm2rel_good() */
373 if (!dcc_cdhome(dcc_emsg, homedir, 0))
374 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
375 fnm2rel_good(cur_db_nm, cur_db_nm_str, 0);
376 cp = "";
377 switch (clean_mode) {
378 case NORMAL_MODE: cp = "cleaning"; break;
379 case REPAIR_MODE: cp = "repairing"; break;
380 case QUICK_MODE: cp = "quick cleaning"; break;
381 case HASH_MODE: cp = "expanding hash table in"; break;
382 case NO_CRON_MODE: cp = "work around missing cron job for"; break;
383 case DEL_MODE: cp = "clean up deletion in"; break;
384 }
385 quiet_trace_msg(DCC_VERSION" %s %s", cp, fnm2abs_err(0, cur_db_nm));
386
387 atexit(finish);
388 signal(SIGHUP, sigterm);
389 signal(SIGTERM, sigterm);
390 signal(SIGINT, sigterm);
391 #ifdef SIGXFSZ
392 signal(SIGXFSZ, SIG_IGN);
393 #endif
394
395 if (!standalone) {
396 i = load_ids(dcc_emsg, srvr_clnt_id, &srvr_clnt_tbl, 1);
397 if (i <= 0)
398 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
399 memcpy(srvr.passwd, srvr_clnt_tbl->cur_passwd,
400 sizeof(srvr.passwd));
401 }
402
403 fnm2rel_good(cur_hash_nm, cur_db_nm, DB_HASH_SUFFIX);
404 fnm2rel_good(old_db_nm, cur_db_nm, "-old");
405 fnm2rel_good(new_db_nm, cur_db_nm, "-new");
406 fnm2rel_good(new_hash_nm, new_db_nm, DB_HASH_SUFFIX);
407
408 /* exclude other instances of this program */
409 if (!lock_dbclean(dcc_emsg, cur_db_nm))
410 dcc_logbad(dcc_ex_code, "%s: dbclean already running?",
411 dcc_emsg);
412
413 /* create & the lock new database file */
414 new_db_fd = dcc_lock_open(dcc_emsg, new_db_nm, O_RDWR|O_CREAT,
415 DCC_LOCK_OPEN_NOWAIT, DCC_LOCK_ALL_FILE, 0);
416 if (new_db_fd == -1)
417 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
418 if (0 > ftruncate(new_db_fd, 0))
419 dcc_logbad(EX_IOERR, "truncate(%s,0): %s",
420 new_db_nm, ERROR_STR());
421 new_db_fsize = 0;
422 new_db_created = 1;
423 new_db_csize = DB_PTR_BASE;
424
425 tgt_db_pagesize = 0;
426 if (0 > stat(cur_db_nm, &cur_db_sb)) {
427 if (errno != ENOENT)
428 dcc_logbad(EX_IOERR, "stat(%s): %s",
429 cur_db_nm, ERROR_STR());
430 /* empty a missing database */
431 cleardb = 1;
432 } else if (cur_db_sb.st_size == 0) {
433 /* empty an empty database */
434 cleardb = 1;
435 } else if (grey_on && cur_db_sb.st_size < DB_MIN_MIN_MBYTE*1024*1024) {
436 /* Force a relatively large page size for typical tiny
437 * greylist databases. Try to use few mmap() pages */
438 tgt_db_pagesize = cur_db_sb.st_size/4;
439 if (tgt_db_pagesize < MIN_HASH_ENTRIES*sizeof(HASH_ENTRY))
440 tgt_db_pagesize = MIN_HASH_ENTRIES*sizeof(HASH_ENTRY);
441 }
442 new_db_pagesize = db_get_pagesize(0, tgt_db_pagesize);
443 write_new_hdr(1);
444
445
446 if (standalone) {
447 u_char busy;
448
449 /* open and lock the current database to ensure
450 * the daemon is not running */
451 old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm, O_RDWR,
452 DCC_LOCK_OPEN_NOWAIT,
453 DCC_LOCK_ALL_FILE, &busy);
454 if (busy)
455 dcc_logbad(EX_USAGE, "database %s in use: %s",
456 cur_db_nm, dcc_emsg);
457 if (cleardb
458 && stat(cur_db_nm, &cur_db_sb) >= 0) {
459 if (cur_db_sb.st_size != 0)
460 dcc_logbad(EX_USAGE, "%s already exists",
461 cur_db_nm);
462 cur_db_created = 1;
463 }
464
465 /* create and lock the current database if it did not exist
466 * to ensure that the server daemon is not running */
467 if (old_db_fd < 0) {
468 old_db_fd = dcc_lock_open(dcc_emsg, cur_db_nm,
469 O_RDWR|O_CREAT,
470 DCC_LOCK_OPEN_NOWAIT,
471 DCC_LOCK_ALL_FILE, 0);
472 if (old_db_fd < 0)
473 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
474 cur_db_created = 1;
475 }
476
477 } else {
478 /* Tell the daemon to start turning off the flooding
479 * so we can adjust its positions in the flood map file
480 * Try very hard to talk to it because releasing the database
481 * can cause some UNIX flavors to stall dccd. */
482 DCC_CLNT_FGS clnt_fgs;
483
484 clnt_fgs = DCC_CLNT_FG_SLOW;
485 if (grey_on)
486 clnt_fgs |= DCC_CLNT_FG_GREY;
487 ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr,
488 0, clnt_fgs, info_flags);
489 /* try very hard to contact dccd */
490 if (!ctxt)
491 ctxt = dcc_tmp_clnt_init(dcc_emsg, 0, &srvr,
492 0, clnt_fgs, info_flags);
493 if (!ctxt)
494 dcc_logbad(EX_DCC_RESTART, "initial contact: %s",
495 dcc_emsg);
496
497 ++flods_off;
498 if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_SHUTDOWN,
499 SHORT_DELAY))
500 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
501 }
502
503 /* resolve whitelisted host names before locking the database */
504 parse_white();
505
506 /* Tell the daemon to unlock the database between operations
507 * and insist it stop flooding. */
508 if (!standalone) {
509 /* give the daemon a chance to stop pumping the floods */
510 for (;;) {
511 if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_CHECK,
512 SHORT_DELAY))
513 dcc_logbad(EX_UNAVAILABLE, "%s", dcc_emsg);
514
515 i = flod_running(aop_resp.resp.val.string);
516 if (i < 0)
517 dcc_logbad(EX_PROTOCOL,
518 "%s: unrecognized \"%s\"",
519 dcc_aop2str(0, 0,
520 DCC_AOP_FLOD,
521 DCC_AOP_FLOD_CHECK),
522 aop_resp.resp.val.string);
523 if (i == 0)
524 break;
525 if (time(0) > clean_start.tv_sec+45) {
526 if (flods_off < 2) {
527 ++flods_off;
528 if (!persist_aop(DCC_AOP_FLOD,
529 DCC_AOP_FLOD_HALT,
530 SHORT_DELAY))
531 dcc_logbad(dcc_ex_code, "%s",
532 dcc_emsg);
533 continue;
534 }
535 if (time(0) > clean_start.tv_sec+60)
536 dcc_logbad(EX_UNAVAILABLE,
537 "failed to stop floods: %s",
538 aop_resp.resp.val.string);
539 }
540 usleep(100*1000);
541 }
542 dccd_unlocked = 1;
543 if (!persist_aop(DCC_AOP_DB_CLEAN, 0, SHORT_DELAY))
544 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
545 /* The daemon adds its own and removes our hold on flooding
546 * when we tell it to unlock the database after every
547 * operation. */
548 --flods_off;
549 }
550
551 if (cleardb) {
552 quiet_trace_msg(DCC_VERSION" %s database %s",
553 cur_db_created ? "creating" : "clearing",
554 cur_db_nm);
555
556 } else if (clean_mode == REPAIR_MODE) {
557 dcc_error_msg("explicit repair of %s", cur_db_nm);
558
559 } else {
560 if (!db_open(0, old_db_fd, cur_db_nm, 0,
561 DB_OPEN_RDONLY
562 | (standalone
563 ? DB_OPEN_LOCK_NOWAIT : DB_OPEN_LOCK_WAIT))) {
564 /* If the hash table is sick, check timestamps only
565 * as much as no hash table allows.
566 * Then rebuild the hash table. */
567 clean_mode = REPAIR_MODE;
568
569 } else {
570 if (db_debug) {
571 quiet_trace_msg("%s %s",
572 db_window_size_str, new_db_nm);
573 quiet_trace_msg("%d old hash entries total,"
574 " %d or %d%% used",
575 HADDR2LEN(db_hash_len),
576 HADDR2LEN(db_hash_used),
577 (int)((HADDR2LEN(db_hash_used)
578 * 100.0)
579 /HADDR2LEN(db_hash_len)));
580 }
581 old_db_parms = db_parms;
582 old_db_hash_used = db_hash_used;
583
584 /* save a handle on the old database to get
585 * reports that arrive while we expire it */
586 old_db_fd = dup(db_fd);
587 if (old_db_fd < 0)
588 dcc_logbad(EX_OSERR, "dup(%s): %s",
589 cur_db_nm, ERROR_STR());
590
591 /* read old and create new database file */
592 if (!expire(db_csize)) {
593 old_db_hash_used = 0;
594 clean_mode = REPAIR_MODE;
595 }
596 }
597
598 if (clean_mode == REPAIR_MODE)
599 dcc_error_msg("repairing %s", cur_db_nm);
600 }
601
602 /* if we are repairing the hash table (including now repairing
603 * after encountering problems while expiring),
604 * copy the current file with minimal expiring */
605 if (clean_mode == REPAIR_MODE
606 && !cleardb
607 && !copy_db())
608 exit_dbclean(EX_UNAVAILABLE);
609 build_hash();
610
611 /* Copy any records from the old file to the new file that were
612 * added to the old file while we were creating the new file. */
613 if (!cleardb
614 && !catchup(dcc_emsg))
615 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
616
617 /* we have the new database locked
618 *
619 * preserve the current data file as "*-old" */
620 rename_bail(cur_db_nm, old_db_nm);
621
622 /* delete the current hash file, and install both new files */
623 rename_bail(new_hash_nm, cur_hash_nm);
624 strcpy(new_hash_nm, cur_hash_nm);
625 new_hash_created = 0;
626 if (db_hash_fd >= 0)
627 strcpy(db_hash_nm, cur_hash_nm);
628
629 rename_bail(new_db_nm, cur_db_nm);
630 strcpy(new_db_nm, cur_db_nm);
631 new_db_created = 0;
632 if (db_fd > 0)
633 strcpy(db_nm, cur_db_nm);
634 cur_db_created = 0;
635
636 if (cleardb) {
637 flod_mmap_path_set();
638 unlink_whine(flod_mmap_path, 1);
639 if (!db_close(1))
640 exit_dbclean(EX_UNAVAILABLE);
641 exit_dbclean(EX_OK);
642 }
643
644 /* if the daemon was not running, we're finished */
645 if (standalone) {
646 /* install the flood positions if things are ok */
647 if (flod_mmaps) {
648 memcpy(flod_mmaps, &new_flod_mmaps,
649 sizeof(new_flod_mmaps));
650 flod_unmap(0, 0);
651 }
652 if (!db_close(1))
653 exit_dbclean(EX_UNAVAILABLE);
654 exit_dbclean(EX_OK);
655 }
656
657 /* tell the daemon to switch to the new database. This will leave
658 * the daemon stuck waiting for us to unlock the new database. */
659 dccd_new_db("copy late arrivals");
660
661 /* install the flood positions if things are ok */
662 if (flod_mmaps) {
663 memcpy(flod_mmaps, &new_flod_mmaps,
664 sizeof(new_flod_mmaps));
665 flod_unmap(0, 0);
666 }
667
668 /* Copy any records from the old file to the new file in the
669 * race to tell the daemon to switch to the new file.
670 * The new file is still locked from build_hash().
671 * The daemon should be stuck waiting to open it in the
672 * DCC_AOP_DB_NEW request via the preceding dccd_new_db().
673 *
674 * Since the daemon has switched and probably cannot go back,
675 * ignore any errors */
676 catchup(0);
677 if (!db_close(1))
678 exit_dbclean(EX_UNAVAILABLE);
679
680 /* finish() will be called via exit() to tell the daemon to resume
681 * flooding if necessary. However, in the normal case, we removed
682 * all counts against flooding before calling dccd_new_db() */
683 exit_dbclean(EX_OK);
684 }
685
686
687
688 /* adjust output flood positions */
689 static DB_PTR
690 adj_mmap(void)
691 {
692 FLOD_MMAP *mp;
693 DB_PTR delta, new_pos;
694
695 delta = new_db_csize - old_db_pos;
696 new_pos = 0;
697 for (mp = new_flod_mmaps.mmaps;
698 mp <= LAST(new_flod_mmaps.mmaps);
699 ++mp) {
700 /* do nothing to marks we have already adjusted */
701 if (!(mp->flags & FLODMAP_FG_MARK))
702 continue;
703 if (mp->confirm_pos > old_db_pos) {
704 /* note the next mark that will need adjusting
705 * but do not adjust it yet */
706 if (new_pos == 0
707 || new_pos > mp->confirm_pos)
708 new_pos = mp->confirm_pos;
709 } else {
710 /* adjust marks not past the current position */
711 mp->confirm_pos += delta;
712 mp->flags &= ~FLODMAP_FG_MARK;
713 }
714 }
715 if (adj_delay_pos) {
716 if (new_flod_mmaps.delay_pos > old_db_pos) {
717 if (new_pos == 0
718 || new_pos > new_flod_mmaps.delay_pos)
719 new_pos = new_flod_mmaps.delay_pos;
720 } else {
721 new_flod_mmaps.delay_pos += delta;
722 adj_delay_pos = 0;
723 }
724 }
725
726 return new_pos;
727 }
728
729
730
731 /* find a checksum
732 * Leave db_sts.rcd2 pointing at the record. */
733 static u_char
734 get_ck(DB_RCD_CK **ckp, /* point this to the checksum */
735 DCC_CK_TYPES type, const DCC_SUM sum)
736 {
737 DB_FOUND db_result;
738
739 /* We must lock the file to keep the daemon from changing the
740 * internal hash table links. */
741 if (!DB_IS_LOCKED()
742 && 0 > db_lock())
743 return 0;
744
745 dcc_emsg[0] = '\0';
746 db_result = db_lookup(dcc_emsg, type, sum, 0, MAX_HASH_ENTRIES,
747 &db_sts.hash, &db_sts.rcd2, ckp);
748 switch (db_result) {
749 case DB_FOUND_LATER:
750 case DB_FOUND_SYSERR:
751 dcc_error_msg("hash lookup for %s from "L_HPAT" = %d: %s",
752 DB_TYPE2STR(type), old_db_pos, db_result,
753 dcc_emsg);
754 break;
755
756 case DB_FOUND_IT:
757 case DB_FOUND_EMPTY:
758 case DB_FOUND_CHAIN:
759 case DB_FOUND_INTRUDER:
760 return 1;
761 }
762
763 return 0;
764 }
765
766
767
768 /* check the leading report for not recent checksum
769 * on entry db_sts.rcd points to the record under consideration
770 * Leave db_sts.rcd2 pointing at the leading record. */
771 static int /* -1=broken database 0=expire 1=keep */
772 get_lead(DCC_CK_TYPES type, const DB_RCD_CK *rcd_ck)
773 {
774 DB_RCD_CK *lead_ck;
775 DCC_TGTS rcd_tgts, lead_tgts;
776
777 if (DCC_CK_IS_REP_CMN(grey_on, type)) {
778 /* do not keep reputations on systems without reputation code */
779 return 0;
780 }
781
782 if (!get_ck(&lead_ck, type, rcd_ck->sum))
783 return -1;
784
785 if (!lead_ck) {
786 dcc_error_msg("no leader for %s %s at "L_HPAT,
787 DB_TYPE2STR(type),
788 dcc_ck2str_err(type, rcd_ck->sum, 0),
789 old_db_pos);
790 return -1;
791 }
792
793 /* We know the target checksum is not recent. Forget the target if
794 * both the target and the leader are ancient. The leader might not be
795 * the newest checksum, but it usually is. Note also that the target
796 * might be the leader. */
797 if (dcc_ts_older_ts(&db_sts.rcd2.d.r->ts, &new_ex_ts[type].spam)
798 && dcc_ts_older_ts(&db_sts.rcd.d.r->ts, &new_ex_ts[type].spam))
799 return 0;
800
801 lead_tgts = DB_TGTS_CK(lead_ck);
802 rcd_tgts = DB_TGTS_CK(rcd_ck);
803
804 /* We know either the leader or the target is not ancient.
805 * Keep the target if the leader's total is respectable.
806 * We might eventually compress the target. */
807 return (lead_tgts >= db_tholds[type]);
808 }
809
810
811
812 static void
813 report_progress_init(void)
814 {
815 gettimeofday(&db_time, 0);
816 progress_rpt_start.tv_sec = db_time.tv_sec;
817 progress_rpt_checked = db_time;
818 progress_rpt_last = db_time;
819 progress_rpt_base = 100;
820 progress_rpt_cnt = progress_rpt_base;
821 progress_rpt_started = 0;
822 }
823
824
825
826 static time_t /* us since last check */
827 report_progress(u_char final,
828 const char *s1, const char *s2,
829 DB_PTR done, DB_PTR total, DB_PTR scale)
830 {
831 time_t reported_us, checked_us, secs, interval;
832 double percent;
833
834 if (!total)
835 percent = 100.0;
836 else
837 percent = (done*100.0)/total;
838
839 gettimeofday(&db_time, 0);
840 checked_us = tv_diff2us(&db_time, &progress_rpt_checked);
841 progress_rpt_checked = db_time;
842
843 /* Check frequently enough to report or unlock the database.
844 * Adjust the number of operations until the next check
845 * based on the time spent on the previous */
846 if (checked_us > 0)
847 progress_rpt_base = ((progress_rpt_base * 0.5 * DCC_US
848 * min(REPORT_INTERVAL_FAST_SECS*DCC_US,
849 UNLOCK_INTERVAL_USECS))
850 / checked_us);
851 else
852 progress_rpt_base = 100;
853 if (progress_rpt_base < 100)
854 progress_rpt_base = 100;
855 if (progress_rpt_base > 10*1000)
856 progress_rpt_base = 10*1000;
857 progress_rpt_cnt = progress_rpt_base;
858
859 interval = ((db_debug > 1)
860 ? REPORT_INTERVAL_FAST_SECS
861 : REPORT_INTERVAL_SECS);
862
863 /* try not to start reporting progress at the end */
864 if (!progress_rpt_started
865 && (total*1.0 - done*1.0) / progress_rpt_base <= interval*1.0)
866 return checked_us;
867
868 reported_us = tv_diff2us(&db_time, &progress_rpt_last);
869 if (reported_us >= interval * DCC_US
870 || (final && progress_rpt_percent != 100)) {
871 progress_rpt_started = 1;
872 progress_rpt_percent = percent;
873 secs = db_time.tv_sec - progress_rpt_start.tv_sec;
874 secs -= secs % interval;
875 progress_rpt_last.tv_sec = progress_rpt_start.tv_sec + secs;
876 if (db_debug > 1)
877 quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%"
878 " db_mmaps=%d hash=%d",
879 s1, done/scale, total/scale,
880 s2, progress_rpt_percent,
881 db_stats.db_mmaps, db_stats.hash_mmaps);
882 else
883 quiet_trace_msg("%s "L_DPAT" of "L_DPAT" %s or %d%%",
884 s1, done/scale, total/scale,
885 s2, progress_rpt_percent);
886 }
887
888
889 if (clean_mode == QUICK_MODE
890 && !final) {
891 if (db_time.tv_sec > clean_start.tv_sec + 30*60)
892 dcc_logbad(EX_UNAVAILABLE, "quick cleaning too slow");
893 }
894
895 return checked_us;
896 }
897
898
899
900 /* delete old, less fuzzy checksums in the new record */
901 static void
902 fuzzy_obs(DB_RCD *new, DB_RCD_CK **end_ck)
903 {
904 DB_RCD_CK *rcd_ck;
905 DCC_CK_TYPES type;
906 int len;
907
908 rcd_ck = new->cks;
909 while (rcd_ck < *end_ck) {
910 type = DB_CK_TYPE(rcd_ck);
911 if (!dcc_ts_older_ts(&new->ts, &new_ex_ts[type].all)) {
912 ++rcd_ck;
913 continue;
914 }
915
916 ++obs_rcds;
917 new->fgs_num_cks = (((new->fgs_num_cks - 1)
918 & ~DB_RCD_FG_DELAY)
919 | DB_RCD_FG_TRIM);
920 --*end_ck;
921 len = (char *)*end_ck - (char *)rcd_ck;
922 if (len == 0)
923 return;
924 memmove(rcd_ck, rcd_ck+1, len);
925 }
926 }
927
928
929
930 static void
931 adj_def_expire(void)
932 {
933 double new_dbsize, new_dbsize1, day_rate, db_ratio;
934 int spam_secs, secs;
935 struct timeval tv;
936 char new_dbsize_buf[20], csize_buf[20], old_csize_buf[20];
937 char day_rate_buf[20];
938
939 /* do this only once */
940 if (def_exp_ratio != 0.0)
941 return;
942
943 /* Compute the ratio of size of the database 24 hours from now
944 * to the size of the window. Assume:
945 * - We will receive about the same number of reports in the next
946 * 24 hours as the last 24. This is a good assumption for
947 * weekdays, but as much as 30% wrong about weekends.
948 * - Dbclean will be run once per day at the current time.
949 * - The size of the database is a linear function of expiration
950 * duration. This is tenuous when the spam expiration duration
951 * is less than 1 day.
952 * Use the maximum of two guesses for tomorrow's database size.
953 * One guess is the current size, base on assuming that
954 * we will use roughly the same expiration durations and
955 * so the database will grow to about size it now has.
956 * The other guess uses the previous database size and the
957 * avarage data rate. It compensates for short term changes
958 * in the rate and for running dbclean more than once per day. */
959 new_dbsize = db_parms.db_csize;
960 size2str(csize_buf, sizeof(csize_buf), new_dbsize, 1);
961 new_dbsize1 = db_parms.old_db_csize;
962 size2str(old_csize_buf, sizeof(old_csize_buf), new_dbsize1, 1);
963 day_rate = db_add_rate(&db_parms, 0);
964 if (day_rate >= 0.0)
965 day_rate *= (24*60*60);
966 size2str(day_rate_buf, sizeof(day_rate_buf), day_rate, 1);
967
968 /* without information, be pessimistic and assume 1.4 GByte/day */
969 if (day_rate <= 0.0 && !grey_on)
970 day_rate = 1.4*1024.0*1024.0*1024.0;
971 if (day_rate > 0.0) {
972 new_dbsize1 += day_rate;
973 if (new_dbsize < new_dbsize1)
974 new_dbsize = new_dbsize1;
975 }
976
977 size2str(new_dbsize_buf, sizeof(new_dbsize_buf), new_dbsize, 1);
978 if (db_debug)
979 quiet_trace_msg("predict new_dbsize=%s from db_csize=%s"
980 " old_db_csize=%s rate=%s",
981 new_dbsize_buf,
982 csize_buf, old_csize_buf, day_rate_buf);
983
984 /* Assume there will be 20% as many bytes used in the hash table
985 * as in the database */
986 new_dbsize *= 1.2;
987
988 /* we cannot adjust the defaults
989 * - 1st time dbclean run
990 * - if the previous run used a larger than default value
991 * - there is no need to reduce the default because the predicted
992 * maximum size is smaller than the target maximum
993 */
994 spam_secs = db_parms.ex_secs[DCC_CK_FUZ2].spam;
995 if (spam_secs != 0
996 && spam_secs <= DB_EXPIRE_SPAMSECS_DEF
997 && new_dbsize > db_max_byte
998 && (db_ratio = (db_max_byte / new_dbsize)) < 1.0) {
999 def_exp_ratio = (spam_secs * db_ratio) / DB_EXPIRE_SPAMSECS_DEF;
1000
1001 /* change the two durations together and so with same errors */
1002 def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF * def_exp_ratio;
1003 def_expire_secs = DB_EXPIRE_SECS_DEF * def_exp_ratio;
1004
1005 def_expire_secs -= def_expire_secs % (60*60);
1006 if (def_expire_secs < DB_EXPIRE_SECS_DEF_MIN)
1007 def_expire_secs = DB_EXPIRE_SECS_DEF_MIN;
1008
1009 def_expire_spamsecs -= def_expire_spamsecs % (24*60*60);
1010 if (def_expire_spamsecs < DB_EXPIRE_SPAMSECS_DEF_MIN)
1011 def_expire_spamsecs = DB_EXPIRE_SPAMSECS_DEF_MIN;
1012
1013 #if DB_MIN_MBYTE == 0 && !defined(GOT_PHYSMEM)
1014 if (def_expire_secs == DB_EXPIRE_SECS_DEF_MIN
1015 || def_expire_spamsecs == DB_EXPIRE_SPAMSECS_DEF_MIN)
1016 quiet_trace_msg("cannot determine physical RAM; rebuild"
1017 " with ./configure with-db-memory");
1018 #endif
1019 return;
1020 }
1021
1022 def_exp_ratio = 1.0;
1023
1024 /* if the defaults do not need to be reduced now but they
1025 * were reduced before, then relax them gently */
1026 if (spam_secs < DB_EXPIRE_SPAMSECS_DEF) {
1027 dcc_ts2timeval(&tv, &db_parms.ex_spam[DCC_CK_FUZ2]);
1028 secs = clean_start.tv_sec - tv.tv_sec;
1029 if (secs > 0
1030 && secs < DB_EXPIRE_SPAMSECS_DEF)
1031 def_expire_spamsecs = secs;
1032
1033 dcc_ts2timeval(&tv, &db_parms.ex_all[DCC_CK_FUZ2]);
1034 secs = clean_start.tv_sec - tv.tv_sec;
1035 if (secs > 0
1036 && secs < DB_EXPIRE_SECS_DEF)
1037 def_expire_secs = secs;
1038 }
1039 }
1040
1041
1042
1043 /* copy the existing database, discard junk and old entries */
1044 static u_char /* 1=done 0=database broken */
1045 expire(DB_PTR old_db_csize)
1046 {
1047 #define EXPIRE_BAIL() {alarm(0); flod_unmap(0, 0); db_close(0); return 0;}
1048
1049 DCC_TS ts;
1050 u_char emptied, reduced_defaults;
1051 u_char old_ok[DCC_DIM_CKS];
1052 DB_RCD rcd, new;
1053 const DB_RCD_CK *rcd_ck, *rcd_ck2;
1054 DB_RCD_CK *new_ck;
1055 DCC_TGTS tgts_raw, ck_tgts;
1056 u_char needed, obs_lvl, timely;
1057 int old_num_cks, new_num_cks, nokeep_num_cks;
1058 DB_PTR min_confirm_pos, next_adj_pos;
1059 FLOD_MMAP *mp;
1060 DCC_CK_TYPES prev_type, type, type2;
1061 int rcd_len;
1062 struct stat sb;
1063 time_t need_unlock;
1064 int i;
1065
1066 reduced_defaults = 0;
1067 if (expire_secs < 0) {
1068 adj_def_expire();
1069 if (def_expire_secs > expire_spamsecs
1070 && expire_spamsecs > 0) {
1071 expire_secs = expire_spamsecs;
1072 } else {
1073 if (def_expire_secs != DB_EXPIRE_SECS_DEF
1074 && def_exp_ratio != 1.0)
1075 reduced_defaults = 1;
1076 expire_secs = def_expire_secs;
1077 }
1078 }
1079 if (expire_spamsecs < 0) {
1080 adj_def_expire();
1081 if (def_expire_spamsecs < expire_secs) {
1082 expire_spamsecs = expire_secs;
1083 } else {
1084 if (def_expire_spamsecs != DB_EXPIRE_SPAMSECS_DEF
1085 && def_exp_ratio != 1.0)
1086 reduced_defaults = 1;
1087 expire_spamsecs = def_expire_spamsecs;
1088 }
1089 }
1090
1091 if (expire_spamsecs > 0 && expire_spamsecs < expire_secs)
1092 dcc_logbad(EX_USAGE,
1093 "spam expiration -E must be longer than -e");
1094
1095 expired_rcds = 0;
1096 expired_cks = 0;
1097 kept_cks = white_cks;
1098 need_unlock = 0;
1099 report_progress_init();
1100
1101 /* Compute timestamps for records we keep.
1102 * Use the values from the previous use of dbclean as defaults
1103 * unless they are bogus */
1104 memset(old_ok, 0, sizeof(old_ok));
1105 dcc_secs2ts(&ts, clean_start.tv_sec);
1106 for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
1107 DB_EX_SEC *th = &db_parms.ex_secs[type];
1108
1109 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type))
1110 continue;
1111 if (DCC_CK_IS_REP_OP(grey_on, type))
1112 continue;
1113
1114 if (th->spam <= 0 || th->spam > DB_EXPIRE_SECS_MAX)
1115 continue;
1116 if (th->all <= 0 || th->all > th->spam)
1117 continue;
1118
1119 if (dcc_ts_newer_ts(&db_parms.ex_spam[type], &ts))
1120 continue;
1121 if (dcc_ts_newer_ts(&db_parms.ex_all[type], &ts))
1122 continue;
1123
1124 old_ok[type] = 1; /* old values for this type are ok */
1125 }
1126
1127 for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
1128 DB_EX_SEC *new_th = &new_ex_secs[type];
1129 DB_EX_TS_TYPE *new_ts = &new_ex_ts[type];
1130 int old_all = db_parms.ex_secs[type].all;
1131 int old_spam = db_parms.ex_secs[type].spam;
1132
1133 if (type == DCC_CK_SRVR_ID) {
1134 /* keep server-ID declarations 5 weeks or a week longer
1135 * than reputations so that they will be flooded 1st */
1136 new_th->all = DB_EXPIRE_SRVR_ID_SECS;
1137 new_th->spam = DB_EXPIRE_SRVR_ID_SECS;
1138
1139 } else if (grey_on) {
1140 if (old_ok[type]) {
1141 /* This is the path by which the dccd -G
1142 * parameters are used. */
1143 new_th->all = old_all;
1144 new_th->spam = old_spam;
1145 } else if (DCC_CK_IS_GREY_TRIPLE(1, type)) {
1146 new_th->all = DEF_GREY_WINDOW;
1147 new_th->spam = DEF_GREY_WHITE;
1148 } else if (DCC_CK_IS_GREY_MSG(1, type)
1149 || type == DCC_CK_BODY) {
1150 new_th->all = DEF_GREY_WINDOW;
1151 new_th->spam = DEF_GREY_WINDOW;
1152 } else {
1153 new_th->all = 1;
1154 new_th->spam = 1;
1155 }
1156 } else if (have_expire_parms < 0 && old_ok[type]
1157 && (db_parms.flags & DB_PARM_EXP_SET)) {
1158 /* use the old durations they are valid
1159 * and we have no expiriation parameters */
1160 new_th->all = old_all;
1161 new_th->spam = old_spam;
1162
1163 } else {
1164 new_th->all = expire_secs;
1165 new_th->spam = (DCC_CK_LONG_TERM(type)
1166 ? expire_spamsecs
1167 : expire_secs);
1168 if (reduced_defaults) {
1169 quiet_trace_msg("adjust default by"
1170 " %4.2f to -e%dhours"
1171 " -E%ddays",
1172 def_exp_ratio,
1173 expire_secs/(60*60),
1174 expire_spamsecs
1175 / (24*60*60));
1176 reduced_defaults = 0;
1177 }
1178 }
1179
1180 /* compute oldest timestamp for this type of checksum,
1181 * without going crazy with "-Enever" */
1182 dcc_secs2ts(&new_ts->spam,
1183 clean_start.tv_sec - min(clean_start.tv_sec,
1184 new_th->spam));
1185 dcc_secs2ts(&new_ts->all,
1186 clean_start.tv_sec - min(clean_start.tv_sec,
1187 new_th->all));
1188 }
1189
1190 /* put the timestampes into the new file */
1191 write_new_hdr(1);
1192
1193 /* if we are running as root,
1194 * don't change the owner of the database */
1195 if (getuid() == 0) {
1196 if (0 > fstat(old_db_fd, &sb))
1197 dcc_logbad(EX_IOERR, "fstat(%s): %s",
1198 old_db_nm, ERROR_STR());
1199 if (0 > fchown(new_db_fd, sb.st_uid, sb.st_gid))
1200 dcc_logbad(EX_IOERR, "fchown(%s,%d,%d): %s",
1201 new_db_nm, (int)sb.st_uid, (int)sb.st_gid,
1202 ERROR_STR());
1203 }
1204
1205 if (DB_PTR_BASE != lseek(old_db_fd, DB_PTR_BASE, SEEK_SET))
1206 dcc_logbad(EX_IOERR, "lseek(%s,%d): %s",
1207 cur_db_nm, DB_PTR_BASE, ERROR_STR());
1208 read_rcd_invalidate(0);
1209
1210 flod_mmap(0, &db_parms.sn, 0, 1, 1);
1211 if (flod_mmaps)
1212 memcpy(&new_flod_mmaps, flod_mmaps, sizeof(new_flod_mmaps));
1213 min_confirm_pos = new_flod_mmaps.delay_pos;
1214 next_adj_pos = DB_PTR_BASE;
1215 for (mp = new_flod_mmaps.mmaps;
1216 mp <= LAST(new_flod_mmaps.mmaps);
1217 ++mp) {
1218 if (mp->rem_hostname[0] == '\0') {
1219 mp->flags &= ~FLODMAP_FG_MARK;
1220 } else {
1221 mp->flags |= FLODMAP_FG_MARK;
1222 if (min_confirm_pos > mp->confirm_pos)
1223 min_confirm_pos = mp->confirm_pos;
1224 }
1225 }
1226 adj_delay_pos = (new_flod_mmaps.delay_pos != 0) ? 1 : 0;
1227
1228 emptied = cleardb;
1229 dcc_timeval2ts(&new_flod_mmaps.sn, &clean_start, 0);
1230
1231 /* copy the old file to the new,
1232 * discarding and compressing old data as we go */
1233 for (old_db_pos = DB_PTR_BASE;
1234 old_db_pos < old_db_csize;
1235 old_db_pos += rcd_len) {
1236 if (--progress_rpt_cnt <= 0)
1237 need_unlock += report_progress(0, " processed",
1238 "MBytes",
1239 old_db_pos, old_db_csize,
1240 1024*1024);
1241
1242 if (old_db_pos == next_adj_pos)
1243 next_adj_pos = adj_mmap();
1244
1245 if (clean_mode != REPAIR_MODE) {
1246 /* read the record by mapping if not repairing */
1247 if (!db_map_rcd(0, &db_sts.rcd, old_db_pos, &rcd_len))
1248 EXPIRE_BAIL();
1249 memcpy(&rcd, db_sts.rcd.d.r, rcd_len);
1250 } else {
1251 rcd_len = read_rcd(0, &rcd,
1252 old_db_fd, old_db_pos, cur_db_nm);
1253 if (rcd_len <= 0) {
1254 if (rcd_len == 0)
1255 dcc_error_msg("unexpected EOF in %s at "
1256 L_HPAT" instead of "
1257 L_HPAT,
1258 cur_db_nm,
1259 old_db_pos,
1260 old_db_csize);
1261 /* give up and ask our neighbors to rewind */
1262 emptied = 1;
1263 old_db_pos = old_db_csize;
1264 break;
1265 }
1266 }
1267
1268 /* skip end-of-page padding */
1269 if (rcd_len == sizeof(rcd)-sizeof(rcd.cks))
1270 continue;
1271
1272 if (DB_RCD_ID(&rcd) == DCC_ID_WHITE) {
1273 /* skip whitelist entries if whitelist source is ok */
1274 if (!keep_white)
1275 continue;
1276 /* refresh whitelist entries if source is bad */
1277 dcc_timeval2ts(&rcd.ts, &clean_start, 0);
1278 }
1279
1280 old_num_cks = DB_NUM_CKS(&rcd);
1281
1282 /* expire or throw away deleted reports */
1283 tgts_raw = DB_TGTS_RCD_RAW(&rcd);
1284 if (tgts_raw == 0) {
1285 ++expired_rcds;
1286 expired_cks += old_num_cks;
1287 continue;
1288 }
1289 if (tgts_raw > DCC_TGTS_MAX_DB) {
1290 dcc_error_msg("discarding report at "L_HPAT
1291 " with bogus target count %#x",
1292 old_db_pos, tgts_raw);
1293 ++expired_rcds;
1294 expired_cks += old_num_cks;
1295 continue;
1296 }
1297
1298 if (dcc_ts_newer_ts(&rcd.ts, &future_ts)) {
1299 static int whines = 0;
1300 if (whines < 50)
1301 dcc_error_msg("discarding report at "L_HPAT
1302 " from the future %s%s",
1303 old_db_pos,
1304 ts2str_err(&rcd.ts),
1305 ++whines >= 20
1306 ? "; stop complaining"
1307 : "");
1308 ++expired_rcds;
1309 expired_cks += old_num_cks;
1310 continue;
1311 }
1312
1313
1314 needed = 0;
1315 obs_lvl = 0;
1316 timely = 1;
1317 nokeep_num_cks = 0;
1318 memcpy(&new, &rcd, sizeof(new)-sizeof(new.cks));
1319 new.fgs_num_cks &= (DB_RCD_FG_TRIM | DB_RCD_FG_SUMRY
1320 | DB_RCD_FG_DELAY);
1321 new_ck = new.cks;
1322 for (prev_type = DCC_CK_INVALID, rcd_ck = rcd.cks;
1323 rcd_ck < &rcd.cks[old_num_cks];
1324 prev_type = type, ++rcd_ck) {
1325 type = DB_CK_TYPE(rcd_ck);
1326 if (!DCC_CK_OK_DB(grey_on, type)) {
1327 static int whines = 0;
1328 if (whines < 20)
1329 dcc_error_msg("discarding %s"
1330 " checksum at "L_HPAT"%s",
1331 DB_TYPE2STR(type),
1332 old_db_pos,
1333 ++whines >= 20
1334 ? "; stop complaining"
1335 : "");
1336 ++expired_cks;
1337 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1338 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1339 continue;
1340 }
1341
1342 if (type <= prev_type
1343 && prev_type != DCC_CK_FLOD_PATH) {
1344 dcc_error_msg("discarding out of order %s"
1345 " checksum at "L_HPAT,
1346 DB_TYPE2STR(type),
1347 old_db_pos);
1348 ++expired_cks;
1349 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1350 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1351 continue;
1352 }
1353
1354 /* Silently discard pure junk from other servers,
1355 * provided it is junk by default */
1356 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
1357 && DB_GLOBAL_NOKEEP(grey_on, type)
1358 && type != DCC_CK_FLOD_PATH
1359 && type != DCC_CK_SRVR_ID
1360 && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
1361 ++expired_cks;
1362 continue;
1363 }
1364
1365 /* Keep paths except on old records or records that
1366 * have been trimmed or compressed.
1367 * Never remove paths from server-ID declarations. */
1368 if (type == DCC_CK_FLOD_PATH) {
1369 if (DB_RCD_TRIMMED(&new)
1370 || DB_RCD_ID(&new) == DCC_ID_COMP)
1371 continue;
1372 /* forget line number on old whitelist entry */
1373 if (DB_RCD_ID(&rcd) == DCC_ID_WHITE)
1374 continue;
1375 rcd_ck2 = rcd_ck+1;
1376 for (;;) {
1377 type2 = DB_CK_TYPE(rcd_ck2);
1378 if (type2 == DCC_CK_SRVR_ID
1379 || !dcc_ts_older_ts(&rcd.ts,
1380 &new_ex_ts[type2
1381 ].all)) {
1382 /* keep this path since this report
1383 * is a server-ID declaration
1384 * or not old */
1385 *new_ck = *rcd_ck;
1386 ++new_ck;
1387 ++new.fgs_num_cks;
1388 ++nokeep_num_cks;
1389 break;
1390 }
1391 if (++rcd_ck2>=&rcd.cks[old_num_cks]) {
1392 /* we are discarding this path */
1393 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1394 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1395 break;
1396 }
1397 }
1398 continue;
1399 }
1400
1401 if (!dcc_ts_older_ts(&rcd.ts, &new_ex_ts[type].all)) {
1402 /* This report is recent.
1403 * However, obsolete or junk checksums
1404 * don't make the report needed */
1405 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, type)
1406 && DB_RCD_ID(&rcd) != DCC_ID_WHITE) {
1407 ++nokeep_num_cks;
1408 } else if (DB_CK_OBS(rcd_ck)) {
1409 /* This checksum is obsolete.
1410 * If it has the highest level of
1411 * fuzziness, then it controls whether
1412 * the whole report is needed,. */
1413 if (obs_lvl < db_ck_fuzziness[type]) {
1414 obs_lvl = db_ck_fuzziness[type];
1415 needed = 0;
1416 }
1417 } else {
1418 /* This checksum is not obsolete.
1419 * If it is at least as fuzzy as any
1420 * other checksum, then it can say
1421 * the report is needed */
1422 if (obs_lvl <= db_ck_fuzziness[type]) {
1423 obs_lvl = db_ck_fuzziness[type];
1424 needed = 1;
1425 }
1426 }
1427
1428 } else {
1429 /* This checksum is at least somewhat old.
1430 * Throw away delete requests
1431 * and other servers' useless checksums */
1432 if (tgts_raw == DCC_TGTS_DEL
1433 || DB_TEST_NOKEEP(db_parms.nokeep_cks,
1434 type)) {
1435 ++expired_cks;
1436 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1437 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1438 continue;
1439 }
1440 /* Throw away old obsolete checksums
1441 * and entire reports if the fuzziest
1442 * checksum is obsolete */
1443 if (DB_CK_OBS(rcd_ck)) {
1444 if (obs_lvl < db_ck_fuzziness[type]) {
1445 obs_lvl = db_ck_fuzziness[type];
1446 needed = 0;
1447 }
1448 ++expired_cks;
1449 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1450 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1451 continue;
1452 }
1453
1454 /* old summaries are unneeded, because
1455 * they have already been flooded.
1456 * They do not contribute to local counts */
1457 if (DB_RCD_SUMRY(&rcd))
1458 continue;
1459
1460 /* The checksum is old enough to compress, so
1461 * mark the record as eligible for splitting. */
1462 timely = 0;
1463
1464 /* Discard this checksum if its ultimate total
1465 * is low or ancient
1466 * or if it reaches spam after this report.
1467 * To determine the ultimate total, we must
1468 * have a hash table to find the newest record,
1469 * which contains the final total */
1470 if (clean_mode != REPAIR_MODE) {
1471 i = get_lead(type, rcd_ck);
1472 if (i < 0)
1473 EXPIRE_BAIL();
1474 if (!i) {
1475 ++expired_cks;
1476 new.fgs_num_cks |= DB_RCD_FG_TRIM;
1477 new.fgs_num_cks &= ~DB_RCD_FG_DELAY;
1478 continue;
1479 }
1480 }
1481
1482 if (obs_lvl <= db_ck_fuzziness[type]) {
1483 /* Since we did not delete this
1484 * checksum, we need the record if this
1485 * checksum is fuzzy enough to control
1486 * our need. */
1487 needed = 1;
1488 /* If this is the fuzziest checksum we
1489 * have seen, then preceding and so
1490 * less fuzzy checksums are obsolete,
1491 * if they are old.
1492 * Assume that checksums are ordered
1493 * in the record by fuzziness. */
1494 if (obs_lvl < db_ck_fuzziness[type]) {
1495 obs_lvl = db_ck_fuzziness[type];
1496 if (obs_lvl != DCC_CK_FUZ_LVL_REP
1497 && !grey_on)
1498 fuzzy_obs(&new, &new_ck);
1499 }
1500 }
1501 }
1502
1503 /* Keep this checksum if we decide the whole report
1504 * is needed. */
1505 *new_ck = *rcd_ck;
1506
1507 ++new_ck;
1508 ++new.fgs_num_cks;
1509 }
1510
1511 /* occassionally let the daemon work with the old file */
1512 if (need_unlock >= UNLOCK_INTERVAL_USECS) {
1513 need_unlock = 0;
1514 if (!standalone && !db_unlock())
1515 EXPIRE_BAIL();
1516 }
1517
1518 /* if none of its checksums are needed,
1519 * then discard the entire record */
1520 if (!needed) {
1521 expired_cks += DB_NUM_CKS(&new);
1522 ++expired_rcds;
1523 continue;
1524 }
1525
1526 new_num_cks = DB_NUM_CKS(&new);
1527 kept_cks += new_num_cks - nokeep_num_cks;
1528
1529 /* Put the new record into the new file.
1530 *
1531 * If all of the record is recent, if it contains 1 checksum,
1532 * or if all of its totals are the same, then simply add it.
1533 *
1534 * Otherwise, divide it into records of identical counts
1535 * to allow compression or combining with other records. */
1536 if (new_num_cks > 1
1537 && (!timely
1538 || DB_RCD_ID(&new) == DCC_ID_COMP
1539 || DB_RCD_TRIMMED(&new))) {
1540 for (;;) {
1541 /* skip the checksums that have the same total
1542 * as the first checksum to leave them with the
1543 * original new report */
1544 new_ck = new.cks;
1545 ck_tgts = DB_TGTS_CK(new_ck);
1546 for (i = 1; i < new_num_cks; ++i) {
1547 ++new_ck;
1548 if (DB_TGTS_CK(new_ck) != ck_tgts)
1549 break;
1550 }
1551 if (new_num_cks <= i)
1552 break;
1553 new_num_cks -= i;
1554
1555 /* write the checksums with the common total */
1556 new.srvr_id_auth = DCC_ID_COMP;
1557 new.fgs_num_cks = i;
1558 if (!write_new_rcd(&new,
1559 sizeof(new) - sizeof(new.cks)
1560 + i*sizeof(new.cks[0])))
1561 EXPIRE_BAIL();
1562
1563 /* handle the remaining checksums */
1564 new.fgs_num_cks = new_num_cks;
1565 memmove(&new.cks[0], &new.cks[i],
1566 new_num_cks*sizeof(new.cks[0]));
1567 }
1568 }
1569
1570 /* write the rest (or all) of the new record */
1571 if (!write_new_rcd(&new,
1572 sizeof(new) - sizeof(new.cks)
1573 + new_num_cks*sizeof(new.cks[0])))
1574 EXPIRE_BAIL();
1575 }
1576 write_new_flush(1);
1577 alarm(0);
1578
1579 /* do final adjustment of the flooding positions */
1580 adj_mmap();
1581 /* force them to be right if the system crashed with the
1582 * flod.map file on the disk more up to date and so after the
1583 * database file on the disk */
1584 for (mp = new_flod_mmaps.mmaps;
1585 mp <= LAST(new_flod_mmaps.mmaps);
1586 ++mp) {
1587 if (mp->rem_hostname[0] != '\0'
1588 && mp->confirm_pos > new_db_csize)
1589 mp->confirm_pos = new_db_csize;
1590 }
1591
1592 /* We are finished with the old file.
1593 * Mark all of its pages MADV_DONTNEED */
1594 rel_db_states();
1595 i = (db_unload(0, 2) != 0);
1596 if (!db_close(1))
1597 i = 0;
1598
1599 write_new_hdr(emptied);
1600 report_progress(1, " processed", "MBytes",
1601 old_db_pos, old_db_csize, 1024*1024);
1602 if (grey_on)
1603 quiet_trace_msg("expired %d records and %d checksums in %s",
1604 expired_rcds, expired_cks, cur_db_nm);
1605 else
1606 quiet_trace_msg("expired %d records and %d checksums,"
1607 " obsoleted %d checksums in %s",
1608 expired_rcds, expired_cks, obs_rcds, cur_db_nm);
1609 return i;
1610 }
1611
1612
1613
1614 /* copy the database copy while doing minimal expiring */
1615 static u_char
1616 copy_db(void)
1617 {
1618 static DB_VERSION_BUF old_version4 = DB_VERSION4_STR;
1619 static DB_VERSION_BUF old_version3 = DB_VERSION3_STR;
1620 union {
1621 DB_HDR hdr;
1622 DB_V4_PARMS v4;
1623 DB_V3_PARMS v3;
1624 } old_db;
1625 struct timeval sn;
1626
1627 /* do not lock the old database because the daemon must continue
1628 * to answer requests */
1629 if (old_db_fd < 0) {
1630 old_db_fd = open(cur_db_nm, O_RDONLY, 0);
1631 if (old_db_fd == -1)
1632 dcc_logbad(EX_IOERR, "open(%s): %s",
1633 cur_db_nm, ERROR_STR());
1634 }
1635
1636 if (!read_db_hdr(dcc_emsg, &old_db.hdr, old_db_fd, cur_db_nm))
1637 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
1638 if (memcmp(old_db.hdr.p.version, db_version_buf,
1639 sizeof(old_db.hdr.p.version))) {
1640 if (!memcmp(old_db.v4.version, old_version4,
1641 sizeof(old_db.v4.version))) {
1642 memset(&old_db_parms, 0,
1643 sizeof(old_db_parms));
1644 memcpy(old_db_parms.version, db_version_buf,
1645 sizeof(old_db_parms.version));
1646
1647 old_db_parms.db_csize = old_db.v4.db_csize;
1648 old_db_parms.pagesize = old_db.v4.pagesize;
1649 old_db_parms.sn = old_db.v4.sn;
1650 old_db_parms.cleared = old_db.v4.cleared;
1651 old_db_parms.cleaned = old_db.v4.cleaned;
1652 old_db_parms.cleaned_cron = old_db.v4.cleaned_cron;
1653 memcpy(old_db_parms.ex_spam, old_db.v4.ex_spam,
1654 sizeof(old_db_parms.ex_spam));
1655 memcpy(old_db_parms.ex_all, old_db.v4.ex_spam,
1656 sizeof(old_db_parms.ex_all));
1657 memcpy(old_db_parms.ex_secs, old_db.v4.ex_secs,
1658 sizeof(old_db_parms.ex_secs));
1659 old_db_parms.nokeep_cks = old_db.v4.nokeep_cks;
1660 old_db_parms.flags = old_db.v4.flags;
1661 old_db_parms.old_db_csize = old_db.v4.old_db_csize;
1662 old_db_parms.db_added = old_db.v4.db_added;
1663 old_db_parms.hash_used = old_db.v4.hash_used;
1664 old_db_parms.old_hash_used = old_db.v4.old_hash_used;
1665 old_db_parms.hash_added = old_db.v4.hash_added;
1666 old_db_parms.rate_secs = old_db.v4.rate_secs;
1667 old_db_parms.last_rate_sec = old_db.v4.last_rate_sec;
1668 old_db_parms.old_kept_cks = old_db.v4.old_kept_cks;
1669
1670 } else if (!memcmp(old_db.v3.version, old_version3,
1671 sizeof(old_db.v3.version))) {
1672 memset(&old_db_parms, 0,
1673 sizeof(old_db_parms));
1674 memcpy(old_db_parms.version, db_version_buf,
1675 sizeof(old_db_parms.version));
1676
1677 old_db_parms.db_csize = old_db.v3.db_csize;
1678 old_db_parms.pagesize = old_db.v3.pagesize;
1679 old_db_parms.sn = old_db.v3.sn;
1680 memcpy(old_db_parms.ex_spam, old_db.v3.ex_spam,
1681 sizeof(old_db_parms.ex_spam));
1682 memcpy(old_db_parms.ex_secs, old_db.v3.ex_secs,
1683 sizeof(old_db_parms.ex_secs));
1684 old_db_parms.nokeep_cks = old_db.v3.nokeep_cks;
1685 if (old_db.v3.flags & DB_PARM_V3_FG_GREY)
1686 old_db_parms.flags |= DB_PARM_FG_GREY;
1687 if (old_db.v3.flags & DB_PARM_V3_FG_CLEARED)
1688 old_db_parms.flags |= DB_PARM_FG_CLEARED;
1689 old_db_parms.old_db_csize = old_db.v3.old_db_csize;
1690 old_db_parms.db_added = old_db.v3.db_added;
1691 old_db_parms.hash_used = old_db.v3.hash_used;
1692 old_db_parms.old_hash_used = old_db.v3.old_hash_used;
1693 old_db_parms.hash_added = old_db.v3.hash_added;
1694 old_db_parms.rate_secs = old_db.v3.rate_secs;
1695 old_db_parms.last_rate_sec = old_db.v3.last_rate_sec;
1696 old_db_parms.old_kept_cks = old_db.v3.old_kept_cks;
1697
1698 dcc_ts2timeval(&sn, &old_db_parms.sn);
1699 old_db_parms.cleared = sn.tv_sec;
1700 old_db_parms.cleaned = sn.tv_sec;
1701 if (old_db.v3.flags & DB_PARM_V3_FG_SELF_CLEAN2) {
1702 old_db_parms.cleared -= 2*24*60*60;
1703 old_db_parms.cleaned -= 24*60*60;
1704 }
1705 } else {
1706 dcc_logbad(EX_IOERR, "%s has the wrong magic \"%.*s\"",
1707 cur_db_nm,
1708 ISZ(DB_VERSION_BUF), old_db.hdr.p.version);
1709 }
1710 } else {
1711 old_db_parms = old_db.hdr.p;
1712 }
1713
1714 db_parms.sn = old_db_parms.sn;
1715 db_parms.cleared = old_db_parms.cleared;
1716 db_parms.cleaned = old_db_parms.cleaned;
1717 db_parms.cleaned_cron = old_db_parms.cleaned_cron;
1718 memcpy(db_parms.ex_all, old_db_parms.ex_all,
1719 sizeof(db_parms.ex_all));
1720 memcpy(db_parms.ex_spam, old_db_parms.ex_spam,
1721 sizeof(db_parms.ex_spam));
1722 memcpy(&db_parms.ex_secs, &old_db_parms.ex_secs,
1723 sizeof(db_parms.ex_secs));
1724 db_parms.nokeep_cks = old_db_parms.nokeep_cks;
1725 db_parms.flags = old_db_parms.flags;
1726
1727 set_db_tholds(db_parms.nokeep_cks);
1728
1729 return expire(old_db_parms.db_csize);
1730 }
1731
1732
1733
1734 /* Copy any records from the old file to the new file that were
1735 * added to the old file while we were creating the new file. */
1736 static u_char
1737 catchup(DCC_EMSG emsg)
1738 {
1739 DB_HDR old_db_hdr;
1740 DB_RCD rcd;
1741 int rcd_len;
1742 u_char result;
1743 int count, old_count;
1744
1745 /* Because dccd knows dbclean is running, dccd will have been
1746 * keeping its header block more accurate than usual. */
1747 result = 1;
1748 count = 0;
1749 do {
1750 old_count = count;
1751 if (!read_db_hdr(dcc_emsg, &old_db_hdr,
1752 old_db_fd, old_db_nm)) {
1753 emsg = 0;
1754 result = 0;
1755 break;
1756 }
1757 if (old_db_hdr.p.db_csize < old_db_pos) {
1758 dcc_error_msg("%s mysteriously truncated", old_db_nm);
1759 result = 0;
1760 break;
1761 }
1762 if ((off_t)old_db_pos != lseek(old_db_fd, old_db_pos,
1763 SEEK_SET)) {
1764 dcc_pemsg(EX_IOERR, emsg, "lseek(%s, "L_HPAT"): %s",
1765 old_db_nm, old_db_pos, ERROR_STR());
1766 emsg = 0;
1767 result = 0;
1768 break;
1769 }
1770 read_rcd_invalidate(0);
1771 while (old_db_pos < old_db_hdr.p.db_csize) {
1772 rcd_len = read_rcd(emsg, &rcd,
1773 old_db_fd, old_db_pos, old_db_nm);
1774 if (rcd_len <= 0) {
1775 if (rcd_len == 0)
1776 dcc_pemsg(EX_IOERR, emsg,
1777 "premature EOF in %s"
1778 " at "L_HPAT
1779 " instead of "L_HPAT,
1780 old_db_nm,
1781 old_db_pos,
1782 old_db_hdr.p.db_csize);
1783 emsg = 0;
1784 result = 0;
1785 break;
1786 }
1787 /* If something bad happens, we may not be able to
1788 * go back to the old file. Carry on to get as much
1789 * data as we can although we know the dccd daemon
1790 * may croak when we release it */
1791 if (!db_add_rcd(emsg, &rcd)) {
1792 emsg = 0;
1793 result = 0;
1794 break;
1795 }
1796 old_db_pos += rcd_len;
1797 ++count;
1798 }
1799 } while (result && old_count != count);
1800
1801 if (count > 0 && db_debug >= 1)
1802 quiet_trace_msg("copied %d late reports%s",
1803 count, result ? "" : " with problems");
1804
1805 return result;
1806 }
1807
1808
1809
1810 /* try to compress old report pointed to by db_sts.rcd with a predecessor */
1811 static void
1812 compress_old(void)
1813 {
1814 DB_PTR prev, prev1;
1815 DB_RCD_CK *new_ck, *prev_ck;
1816 int new_ck_num, prev_ck_num;
1817 DCC_TGTS new_tgts, prev_tgts;
1818 DCC_CK_TYPES new_type, prev_type;
1819 #define NEWER (db_sts.rcd.d.r)
1820 #define OLDER (db_sts.rcd2.d.r)
1821
1822 /* Before spending the time to map a preceding checksum,
1823 * find at least one checksum worth keeping and that might
1824 * be combined or compressed with its predecessor. */
1825 prev = DB_PTR_NULL;
1826 prev_type = DCC_CK_INVALID;
1827 for (new_ck_num = DB_NUM_CKS(NEWER),
1828 new_ck = NEWER->cks;
1829 new_ck_num != 0;
1830 --new_ck_num, ++new_ck) {
1831 if (DB_CK_OBS(new_ck))
1832 continue;
1833 new_type = DB_CK_TYPE(new_ck);
1834 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type))
1835 continue;
1836 /* all of the checksums in this record must be old */
1837 if (!dcc_ts_older_ts(&NEWER->ts, &new_ex_ts[new_type].all))
1838 return;
1839 /* you can compress reports only if you have >=2 */
1840 prev1 = DB_PTR_EX(new_ck->prev);
1841 if (prev1 != DB_PTR_NULL) {
1842 prev = prev1;
1843 prev_type = new_type;
1844 }
1845 }
1846 if (prev_type == DCC_CK_INVALID)
1847 return;
1848
1849 /* having picked a checksum,
1850 * map the record containing its predecessor */
1851 prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, prev_type);
1852 if (!prev_ck)
1853 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
1854
1855 /* The current and previous records must be old
1856 * and contain the same useful checksums. */
1857 new_ck_num = DB_NUM_CKS(NEWER);
1858 new_ck = NEWER->cks;
1859 prev_ck_num = DB_NUM_CKS(OLDER);
1860 prev_ck = OLDER->cks;
1861 for (;;) {
1862 /* we must run out of checksums in the two reports at the
1863 * same time */
1864 if (prev_ck_num == 0 || new_ck_num == 0) {
1865 if (prev_ck_num == new_ck_num)
1866 break;
1867 return;
1868 }
1869
1870 /* ignore paths and other junk */
1871 if (DB_CK_OBS(prev_ck)) {
1872 --prev_ck_num;
1873 ++prev_ck;
1874 continue;
1875 }
1876 prev_type = DB_CK_TYPE(prev_ck);
1877 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, prev_type)) {
1878 --prev_ck_num;
1879 ++prev_ck;
1880 continue;
1881 }
1882 if (DB_CK_OBS(new_ck)) {
1883 --new_ck_num;
1884 ++new_ck;
1885 continue;
1886 }
1887 new_type = DB_CK_TYPE(new_ck);
1888 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, new_type)) {
1889 --new_ck_num;
1890 ++new_ck;
1891 continue;
1892 }
1893
1894 /* because the checksums are ordered,
1895 * give up at the first difference in checksums */
1896 if (new_type != prev_type
1897 || memcmp(new_ck->sum, prev_ck->sum, sizeof(new_ck->sum)))
1898 return;
1899
1900 /* Give up at the first recent and valuable checksum. */
1901 if (!dcc_ts_older_ts(&OLDER->ts, &new_ex_ts[new_type].all))
1902 return;
1903
1904 --prev_ck_num;
1905 ++prev_ck;
1906 --new_ck_num;
1907 ++new_ck;
1908 }
1909
1910 /* The current and previous records are compatiable.
1911 * Add the count of the previous record to the current record
1912 * and mark the previous record useless.
1913 * The individual totals in the current record are already correct,
1914 * so postpone worrying about the deleted record. */
1915 new_tgts = DB_TGTS_RCD_RAW(NEWER);
1916 if (new_tgts < DCC_TGTS_TOO_MANY) {
1917 prev_tgts = DB_TGTS_RCD(OLDER);
1918 if (prev_tgts > DCC_TGTS_TOO_MANY
1919 || prev_tgts == 0)
1920 return;
1921 if (prev_tgts == DCC_TGTS_TOO_MANY) {
1922 new_tgts = DCC_TGTS_TOO_MANY;
1923 } else {
1924 new_tgts += prev_tgts;
1925 if (new_tgts > DCC_TGTS_TOO_MANY)
1926 new_tgts = DCC_TGTS_TOO_MANY;
1927 }
1928 DB_TGTS_RCD_SET(NEWER, new_tgts);
1929 }
1930
1931 /* Mark the previous record to be deleted next time. */
1932 DB_TGTS_RCD_SET(OLDER, 0);
1933 /* Mark it dirty so that the need to delete it gets to the file. */
1934 SET_FLUSH_RCD(&db_sts.rcd2, 1);
1935
1936 NEWER->srvr_id_auth = DCC_ID_COMP;
1937 NEWER->fgs_num_cks &= ~(DB_RCD_FG_TRIM
1938 | DB_RCD_FG_SUMRY
1939 | DB_RCD_FG_DELAY);
1940 /* use the newest timestamp */
1941 if (dcc_ts_older_ts(&NEWER->ts, &OLDER->ts))
1942 NEWER->ts = OLDER->ts;
1943 SET_FLUSH_RCD(&db_sts.rcd, 1);
1944
1945 ++comp_rcds;
1946
1947 #undef NEWER
1948 #undef OLDER
1949 }
1950
1951
1952
1953 /* write a parsed whitelist checksum */
1954 static int
1955 white_write(DCC_EMSG emsg, DCC_WF *wf,
1956 DCC_CK_TYPES type, DCC_SUM sum, DCC_TGTS tgts)
1957 {
1958 DB_RCD rcd;
1959 int rcd_len;
1960 char buf[30];
1961 DCC_FNM_LNO_BUF fnm_buf;
1962
1963 /* ignore checksums that clients are never supposed to send
1964 * to the server or for some other reason cannot be whitelisted */
1965 switch (type) {
1966 case DCC_CK_INVALID:
1967 case DCC_CK_ENV_TO:
1968 case DCC_CK_G_MSG_R_TOTAL:
1969 case DCC_CK_G_TRIPLE_R_BULK:
1970 case DCC_CK_SRVR_ID:
1971 dcc_pemsg(EX_DATAERR, emsg,
1972 "%s checksum cannot be used%s",
1973 dcc_type2str_err(type, 0, 0, grey_on),
1974 wf_fnm_lno(&fnm_buf, wf));
1975 return 0;
1976
1977 case DCC_CK_IP:
1978 case DCC_CK_ENV_FROM:
1979 case DCC_CK_FROM:
1980 case DCC_CK_MESSAGE_ID:
1981 case DCC_CK_RECEIVED:
1982 case DCC_CK_SUB:
1983 case DCC_CK_BODY:
1984 case DCC_CK_FUZ1:
1985 case DCC_CK_FUZ2:
1986 break; /* these are ok */
1987 }
1988
1989 if (tgts == DCC_TGTS_OK_MX
1990 || tgts == DCC_TGTS_OK_MXDCC
1991 || tgts == DCC_TGTS_SUBMIT_CLIENT) {
1992 dcc_pemsg(EX_DATAERR, emsg,"\"%s\" ignored%s",
1993 dcc_tgts2str(buf, sizeof(buf), tgts, 0),
1994 wf_fnm_lno(&fnm_buf, wf));
1995 return 0;
1996 }
1997
1998 /* Greylist whitelist entries cannot involve blacklisting.
1999 * They use DCC_TGTS_GREY_WHITE to signal whitelisting */
2000 if (grey_on) {
2001 /* ignore anything except whitelisting */
2002 if (tgts != DCC_TGTS_OK) {
2003 dcc_pemsg(EX_DATAERR, emsg, "\"%s\" ignored%s",
2004 dcc_tgts2str(buf, sizeof(buf), tgts, 0),
2005 wf_fnm_lno(&fnm_buf, wf));
2006 return 0;
2007 }
2008 tgts = DCC_TGTS_GREY_WHITE;
2009 }
2010
2011 memset(&rcd, 0, sizeof(rcd));
2012 dcc_timeval2ts(&rcd.ts, &clean_start, 0);
2013 rcd.srvr_id_auth = DCC_ID_WHITE;
2014 DB_TGTS_RCD_SET(&rcd, tgts);
2015
2016 rcd.cks[0].type_fgs = DCC_CK_FLOD_PATH;
2017 memcpy(rcd.cks[0].sum, &wf->lno, sizeof(wf->lno));
2018 rcd.cks[0].sum[sizeof(wf->lno)] = wf->fno;
2019
2020 rcd.cks[1].type_fgs = type;
2021 memcpy(rcd.cks[1].sum, sum, sizeof(rcd.cks[1]));
2022
2023 rcd_len = sizeof(rcd) - sizeof(rcd.cks) + 2*sizeof(rcd.cks[0]);
2024 rcd.fgs_num_cks = 2;
2025
2026 if (!write_new_rcd(&rcd, rcd_len))
2027 return -1;
2028
2029 ++white_cks;
2030 return 1;
2031 }
2032
2033
2034
2035 /* Add the whitelist of certified non-spam and non-spammers
2036 * and otherwise start the database */
2037 static void
2038 parse_white(void)
2039 {
2040 int white_fd;
2041
2042 white_cks = 0;
2043
2044 if (!keep_white) {
2045 memset(&dbclean_white_tbl, 0,sizeof(dbclean_white_tbl));
2046 dcc_wf_init(&dbclean_wf, 0);
2047 fnm2rel_good(dbclean_wf.ascii_nm, WHITELIST_NM(grey_on), 0);
2048 dbclean_wf.wtbl = &dbclean_white_tbl;
2049 white_fd = open(dbclean_wf.ascii_nm, O_RDONLY, 0);
2050 if (white_fd < 0) {
2051 /* worry only if the file exists but can't be used */
2052 if (errno != ENOENT) {
2053 dcc_error_msg("open(%s): %s",
2054 dbclean_wf.ascii_nm, ERROR_STR());
2055 keep_white = 1;
2056 }
2057 } else {
2058 if (0 > dcc_parse_whitefile(0, &dbclean_wf, white_fd,
2059 white_write, 0))
2060 keep_white = 1;
2061 if (0 > close(white_fd))
2062 dcc_error_msg("close(%s): %s",
2063 dbclean_wf.ascii_nm, ERROR_STR());
2064 }
2065 }
2066 if (keep_white) {
2067 /* If the whitelist was bad, purge the new database of
2068 * the bad new whitelist. We will use the existing
2069 * whitelist */
2070 write_new_flush(1);
2071 new_db_csize = DB_PTR_BASE;
2072 if (0 > ftruncate(new_db_fd, DB_PTR_BASE))
2073 dcc_logbad(EX_IOERR, "truncate(%s, %d): %s",
2074 new_db_nm, DB_PTR_BASE, ERROR_STR());
2075 new_db_fsize = DB_PTR_BASE;
2076 white_cks = 0;
2077 }
2078
2079 /* update the counts in the database file */
2080 write_new_hdr(1);
2081 }
2082
2083
2084
2085 /* check for conflicts in the whitelist file in the record pointed to
2086 * by db_sts.rcd */
2087 static void
2088 check_white(void)
2089 {
2090 static int msgs;
2091 static int prev_lno1, prev_lno2;
2092 static int prev_fno1, prev_fno2;
2093 const DB_RCD_CK *rcd_ck, *prev_ck;
2094 int lno1, lno2;
2095 int fno1, fno2;
2096 DCC_TGTS tgts1, tgts2;
2097 char tgts1_buf[30], tgts2_buf[30];
2098 const char *fname1, *fname2;
2099 DCC_CK_TYPES type;
2100 DB_PTR prev;
2101
2102 /* don't check if we have already complained enough */
2103 if (msgs > 20)
2104 return;
2105
2106 rcd_ck = db_sts.rcd.d.r->cks;
2107
2108 /* it is pointless without line numbers, which are lacking only
2109 * if we saved the old whitelist entries because the file is
2110 * broken */
2111 if (DB_NUM_CKS(db_sts.rcd.d.r) != 2
2112 || DB_CK_TYPE(rcd_ck) != DCC_CK_FLOD_PATH)
2113 return;
2114
2115 /* conflict is impossible with a single line */
2116 ++rcd_ck;
2117 prev = DB_PTR_EX(rcd_ck->prev);
2118 if (prev == DB_PTR_NULL)
2119 return;
2120
2121 type = DB_CK_TYPE(rcd_ck);
2122 prev_ck = db_map_rcd_ck(dcc_emsg, &db_sts.rcd2, prev, type);
2123 if (!prev_ck)
2124 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
2125
2126 tgts1 = DB_TGTS_RCD(db_sts.rcd2.d.r);
2127 tgts2 = DB_TGTS_RCD(db_sts.rcd.d.r);
2128 if (tgts1 == tgts2)
2129 return; /* no conflict */
2130
2131 memcpy(&lno1, db_sts.rcd2.d.r->cks[0].sum, sizeof(lno1));
2132 fno1 = db_sts.rcd2.d.r->cks[0].sum[sizeof(lno1)];
2133 memcpy(&lno2, db_sts.rcd.d.r->cks[0].sum, sizeof(lno2));
2134 fno2 = db_sts.rcd.d.r->cks[0].sum[sizeof(lno2)];
2135
2136 if (lno1 == prev_lno1 && fno1 == prev_fno1
2137 && lno2 == prev_lno2 && fno2 == prev_fno2)
2138 return;
2139
2140 fname1 = wf_fnm(&dbclean_wf, fno1);
2141 fname2 = wf_fnm(&dbclean_wf, fno2);
2142 if (fname1 == fname2) {
2143 fname1 = "";
2144 } else {
2145 fname1 = path2fnm(fname1);
2146 }
2147 dcc_error_msg("\"%s\" in line %d%s%s conflicts with \"%s\""
2148 " in line %d of %s",
2149 dcc_tgts2str(tgts1_buf, sizeof(tgts1_buf),
2150 tgts1, grey_on),
2151 lno1,
2152 *fname1 != '\0' ? " of " : "", fname1,
2153 dcc_tgts2str(tgts2_buf, sizeof(tgts2_buf),
2154 tgts2, grey_on),
2155 lno2,
2156 fname2);
2157 ++msgs;
2158 prev_lno1 = lno1;
2159 prev_fno1 = fno1;
2160 prev_lno2 = lno2;
2161 prev_fno2 = fno2;
2162 }
2163
2164
2165
2166 /* rebuild the hash table and the totals and links within the database file
2167 * finish with the file locked */
2168 static void
2169 build_hash(void)
2170 {
2171 DB_PTR rcd_pos;
2172 DB_HADDR haddr_window, haddr_lo, haddr_hi;
2173 int pass, total_passes;
2174 int rcd_len;
2175 int rcd_cks, rcd_sums;
2176 DB_PTR rcds, sums; /* passes can inflate these */
2177 const DB_RCD_CK *rcd_ck;
2178 DB_HADDR guess_hash_len;
2179 double db_rate, hash_ratio;
2180 struct timeval db_flushed;
2181
2182 db_buf_init(new_db_pagesize, 0);
2183
2184 if (new_hash_len == 0) {
2185 /* Try to choose a hash table size now so that when it
2186 * is next time to rebuild after 24 hours of incoming
2187 * checksums, the alpha or load factor will still be 0.9.
2188 * We probably ran 24 hours ago, so the old hash size
2189 * is an estimate of the size tomorrow. */
2190
2191 /* Guess the number of distinct checksums added
2192 * tomorrow based on the current average rate */
2193 db_rate = db_add_rate(&new_db_parms, 1);
2194 if (db_rate > 0.0) {
2195 /* Increase the average rate by 10% to account
2196 * for the 30% decrease often seen on weekends. */
2197 guess_hash_len = db_rate * 1.1 * 24*60*60;
2198
2199 /* predict # of distinct checksums in current data */
2200 hash_ratio = old_db_parms.old_kept_cks;
2201 if (hash_ratio == 0.0) {
2202 hash_ratio = 1.0;
2203 } else {
2204 hash_ratio = (HADDR2LEN(old_db_parms
2205 .old_hash_used)
2206 / hash_ratio);
2207 if (hash_ratio > 1.0 || hash_ratio < 0.3)
2208 hash_ratio = 1.0;
2209 }
2210 guess_hash_len += (kept_cks * hash_ratio) + white_cks;
2211
2212 if (db_debug)
2213 quiet_trace_msg("hash size from old=%d"
2214 " %d from db_rate=%.1f"
2215 " hash_ratio=%.1f=%d/%d"
2216 " kept=%d white=%d",
2217 old_db_hash_used,
2218 guess_hash_len,
2219 db_rate, hash_ratio,
2220 HADDR2LEN(old_db_parms
2221 .old_hash_used),
2222 old_db_parms.old_kept_cks,
2223 kept_cks, white_cks);
2224
2225 } else {
2226 /* guess if we do not have a good measure
2227 * of the recent rate */
2228 guess_hash_len = kept_cks+white_cks;
2229 guess_hash_len += guess_hash_len/5;
2230 }
2231
2232 new_hash_len = old_db_hash_used;
2233 if (new_hash_len < guess_hash_len)
2234 new_hash_len = guess_hash_len;
2235
2236 /* go for load factor 0.9 */
2237 new_hash_len += new_hash_len/10;
2238
2239 if (new_hash_len > db_max_hash_entries)
2240 quiet_trace_msg("default hash size %d entries"
2241 " > maximum %d",
2242 new_hash_len, db_max_hash_entries);
2243
2244 if (grey_on) {
2245 if (new_hash_len < MIN_HASH_ENTRIES)
2246 new_hash_len = MIN_HASH_ENTRIES;
2247 } else {
2248 if (new_hash_len < DEF_HASH_ENTRIES)
2249 new_hash_len = DEF_HASH_ENTRIES;
2250 }
2251 }
2252
2253 /* Open and lock the new database */
2254 unlink_whine(new_hash_nm, 1);
2255 new_hash_created = 1;
2256 if (!db_open(0, -1, new_db_nm, new_hash_len,
2257 DB_OPEN_LOCK_NOWAIT | db_mode)) {
2258 dcc_logbad(dcc_ex_code, "could not start database %s",
2259 new_db_nm);
2260 }
2261 if (db_debug)
2262 quiet_trace_msg("%s %s", db_window_size_str, new_db_nm);
2263
2264 /* guess which checksums we will keep so that we can count them */
2265 if (old_db_parms.nokeep_cks != 0)
2266 db_parms.nokeep_cks = old_db_parms.nokeep_cks;
2267
2268 /* add every record in the database file to the hash table and
2269 * fix its accumulated counts and reverse links */
2270 comp_rcds = 0;
2271 sums = 0;
2272 rcds = 0;
2273 report_progress_init();
2274 db_flushed = db_time;
2275
2276 /* if the hash table does not fit in 75% of RAM,
2277 * then make several passes over the data with as much of the
2278 * hash table as fits. */
2279 haddr_window = db_hash_page_len*((db_buf_total*3)/4);
2280 if (haddr_window < db_hash_len/16)
2281 haddr_window = db_hash_len/16;
2282 total_passes = (db_hash_len+haddr_window-1)/haddr_window;
2283
2284 for (haddr_lo = 0, pass = 1;
2285 haddr_lo < db_hash_len;
2286 haddr_lo = haddr_hi, ++pass) {
2287 if (haddr_lo > db_hash_len-haddr_window)
2288 haddr_hi = MAX_HASH_ENTRIES;
2289 else
2290 haddr_hi = haddr_lo+haddr_window;
2291 for (rcd_pos = DB_PTR_BASE;
2292 rcd_pos < db_csize;
2293 rcd_pos += rcd_len) {
2294 /* skip reports crossing page bounardies */
2295 if (rcd_pos%db_pagesize > db_page_max) {
2296 rcd_len = DB_RCD_HDR_LEN;
2297 continue;
2298 }
2299 if (--progress_rpt_cnt <= 0) {
2300 report_progress(0, " hash rebuilt",
2301 "checksums",
2302 sums/total_passes, kept_cks, 1);
2303 if (db_time.tv_sec != db_flushed.tv_sec) {
2304 db_flushed = db_time;
2305 if (!db_flush_db(dcc_emsg))
2306 dcc_logbad(dcc_ex_code,
2307 "flushing after linking"
2308 L_HPAT": %s",
2309 rcd_pos, dcc_emsg);
2310 }
2311 }
2312
2313 if (!db_map_rcd(0, &db_sts.rcd, rcd_pos, &rcd_len)) {
2314 dcc_logbad(dcc_ex_code,
2315 "hash build failed reading"
2316 " record at "L_HPAT,
2317 rcd_pos);
2318 }
2319
2320 /* skip end of page padding */
2321 if (db_sts.rcd.d.r->fgs_num_cks == 0)
2322 continue;
2323
2324 ++rcds;
2325
2326 /* count the checksums we'll link in this record */
2327 rcd_cks = DB_NUM_CKS(db_sts.rcd.d.r);
2328 rcd_sums = 0;
2329 for (rcd_ck = db_sts.rcd.d.r->cks;
2330 rcd_ck < &db_sts.rcd.d.r->cks[rcd_cks];
2331 ++rcd_ck) {
2332 if (!DB_TEST_NOKEEP(db_parms.nokeep_cks,
2333 DB_CK_TYPE(rcd_ck)))
2334 ++rcd_sums;
2335 }
2336 sums += rcd_sums;
2337
2338 /* Mark the record dirty so that any new hash links
2339 * get to the file if we are using -F. */
2340 db_set_flush(&db_sts.rcd, 0, rcd_len);
2341 if (!db_link_rcd(dcc_emsg, haddr_lo, haddr_hi)) {
2342 dcc_logbad(dcc_ex_code,
2343 "relinking record at "L_HPAT": %s",
2344 rcd_pos, dcc_emsg);
2345 }
2346
2347 /* check for conflicts in the whitelist file */
2348 if (DB_RCD_ID(db_sts.rcd.d.r) == DCC_ID_WHITE)
2349 check_white();
2350
2351 compress_old();
2352 }
2353
2354 if (progress_rpt_started && pass < total_passes)
2355 quiet_trace_msg(" pass %d", pass);
2356 }
2357
2358 report_progress(1, " hash rebuilt", "checksums",
2359 sums/total_passes, kept_cks, 1);
2360
2361 db_parms.old_hash_used = db_hash_used;
2362 db_parms.old_kept_cks = kept_cks;
2363 db_parms.hash_used = db_hash_used;
2364 db_parms.old_db_csize = db_csize;
2365 if (!db_flush_parms(dcc_emsg))
2366 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
2367
2368 quiet_trace_msg("hashed "L_DPAT" records containing "L_DPAT" checksums,"
2369 " compressed %d records",
2370 rcds/total_passes, sums/total_passes, comp_rcds);
2371
2372 /* Try to finish as much disk I/O on the new file as we can to minimize
2373 * stalling by dccd when we close the file and hand it over. This also
2374 * reduces system stalling hours later when dbclean runs again. */
2375 if (!make_clean(1))
2376 dcc_logbad(dcc_ex_code, "%s", dcc_emsg);
2377
2378
2379 quiet_trace_msg("%d hash entries total, %d or %d%% used",
2380 HADDR2LEN(db_hash_len),
2381 HADDR2LEN(db_hash_used),
2382 (int)((HADDR2LEN(db_hash_used)*100.0)
2383 / HADDR2LEN(db_hash_len)));
2384 }
2385
2386
2387
2388 static u_char
2389 write_new_db(const void *buf, int buflen, off_t pos, u_char fatal)
2390 {
2391 int i;
2392
2393 if (pos != lseek(new_db_fd, pos, SEEK_SET)) {
2394 if (fatal) {
2395 dcc_logbad(EX_IOERR, "lseek(%s, 0): %s",
2396 new_db_nm, ERROR_STR());
2397 } else {
2398 dcc_error_msg("lseek(%s, 0): %s",
2399 new_db_nm, ERROR_STR());
2400 }
2401 return 0;
2402 }
2403
2404 i = write(new_db_fd, buf, buflen);
2405 if (i == buflen) {
2406 if (new_db_fsize < pos+buflen)
2407 new_db_fsize = pos+buflen;
2408 return 1;
2409 }
2410
2411 if (fatal) {
2412 if (i < 0)
2413 dcc_logbad(EX_IOERR, "write(%s): %s",
2414 new_db_nm, ERROR_STR());
2415 else
2416 dcc_logbad(EX_IOERR, "write(%s)=%d instead of %d",
2417 new_db_nm, i, buflen);
2418 } else {
2419 if (i < 0)
2420 dcc_error_msg("write(%s): %s",
2421 new_db_nm, ERROR_STR());
2422 else
2423 dcc_error_msg("write(%s)=%d instead of %d",
2424 new_db_nm, i, buflen);
2425 }
2426 return 0;
2427 }
2428
2429
2430
2431 /* use a large buffer to encourage the file system to avoid fragmentation */
2432 static union {
2433 u_char c[DB_MIN_MIN_MBYTE*(1024*1024)/4];
2434 DB_HDR hdr;
2435 } write_new_db_buf;
2436 static u_int write_new_db_buflen = 0;
2437 static DB_PTR write_new_base;
2438
2439 static u_char
2440 write_new_flush(u_char fatal)
2441 {
2442 u_char result = 1;
2443
2444 if (write_new_db_buflen != 0) {
2445 if (!write_new_db(&write_new_db_buf, write_new_db_buflen,
2446 write_new_base, fatal))
2447 result = 0;
2448 }
2449
2450 write_new_base = new_db_csize;
2451 write_new_db_buflen = 0;
2452 return result;
2453 }
2454
2455
2456 static u_char
2457 write_new_buf(const void *buf, int buflen)
2458 {
2459 if (write_new_db_buflen + buflen > ISZ(write_new_db_buf)
2460 && !write_new_flush(0))
2461 return 0;
2462
2463 memcpy(&write_new_db_buf.c[write_new_db_buflen], buf, buflen);
2464 write_new_db_buflen += buflen;
2465 return 1;
2466 }
2467
2468
2469
2470 /* add a record to the new file */
2471 static u_char
2472 write_new_rcd(const void *buf, int buflen)
2473 {
2474 static const u_char zeros[DB_RCD_LEN_MAX] = {0};
2475 DB_PTR new_page_num;
2476 u_char result;
2477 int pad, i;
2478
2479 /* pad accross page boundaries */
2480 new_page_num = DB_PTR2PG_NUM(new_db_csize + buflen, new_db_pagesize);
2481 if (new_page_num != DB_PTR2PG_NUM(new_db_csize, new_db_pagesize)) {
2482 pad = new_page_num*new_db_pagesize - new_db_csize;
2483 pad = (((pad + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN)
2484 * DB_RCD_HDR_LEN);
2485 do {
2486 i = sizeof(zeros);
2487 if (i > pad)
2488 i = pad;
2489 if (!write_new_buf(zeros, i))
2490 return 0;
2491 pad -= i;
2492 new_db_csize += i;
2493 } while (pad != 0);
2494 }
2495
2496 result = write_new_buf(buf, buflen);
2497 new_db_csize += buflen;
2498 return result;
2499 }
2500
2501
2502
2503 /* write the magic string at the head of the database file */
2504 static void
2505 write_new_hdr(u_char emptied)
2506 {
2507 DB_HDR *new;
2508 struct timeval old_sn;
2509 time_t new_rate_secs;
2510 DCC_CK_TYPES type;
2511 int i;
2512
2513 write_new_flush(1);
2514
2515 memset(&write_new_db_buf, 0, sizeof(write_new_db_buf));
2516 write_new_base = 0;
2517 if (new_db_fsize > ISZ(DB_HDR)
2518 || new_db_pagesize == 0) {
2519 write_new_db_buflen = sizeof(DB_HDR);
2520 } else {
2521 write_new_db_buflen = new_db_pagesize;
2522 if (write_new_db_buflen > ISZ(write_new_db_buf))
2523 write_new_db_buflen = ISZ(write_new_db_buf);
2524 }
2525
2526 new = &write_new_db_buf.hdr;
2527 memset(new, 0, sizeof(*new));
2528 memcpy(new->p.version, db_version_buf, sizeof(new->p.version));
2529
2530 dcc_timeval2ts(&new->p.sn, &clean_start, 0);
2531 if (emptied) {
2532 new->p.cleared = clean_start.tv_sec;
2533 } else {
2534 new->p.cleared = old_db_parms.cleared;
2535 switch (clean_mode) {
2536 case NORMAL_MODE:
2537 new->p.cleaned = clean_start.tv_sec;
2538 new->p.cleaned_cron = clean_start.tv_sec;
2539 break;
2540 case NO_CRON_MODE:
2541 new->p.cleaned = clean_start.tv_sec;
2542 new->p.cleaned_cron = old_db_parms.cleaned_cron;
2543 break;
2544 case REPAIR_MODE:
2545 case QUICK_MODE:
2546 case HASH_MODE:
2547 case DEL_MODE:
2548 new->p.cleaned = old_db_parms.cleaned;
2549 new->p.cleaned_cron = old_db_parms.cleaned_cron;
2550 break;
2551 }
2552 }
2553
2554 if (grey_on)
2555 new->p.flags |= DB_PARM_FG_GREY;
2556 if (emptied || (old_db_parms.flags & DB_PARM_FG_CLEARED))
2557 new->p.flags |= DB_PARM_FG_CLEARED;
2558 if (have_expire_parms > 0
2559 || (have_expire_parms < 0
2560 && (old_db_parms.flags & DB_PARM_EXP_SET)))
2561 new->p.flags |= DB_PARM_EXP_SET;
2562
2563 new->p.nokeep_cks = (emptied || old_db_parms.nokeep_cks == 0
2564 ? def_nokeep_cks()
2565 : old_db_parms.nokeep_cks);
2566
2567 new->p.pagesize = new_db_pagesize;
2568 new->p.db_csize = new_db_csize;
2569
2570 /* update the traffic counts */
2571 if (!emptied
2572 && old_db_parms.db_csize != 0
2573 && old_db_parms.db_csize >= old_db_parms.old_db_csize
2574 && old_db_parms.hash_used != 0
2575 && old_db_parms.hash_used >= old_db_parms.old_hash_used) {
2576 if (old_db_parms.rate_secs > 0
2577 && old_db_parms.rate_secs <= DB_MAX_RATE_SECS) {
2578 new->p.rate_secs = old_db_parms.rate_secs;
2579 new->p.db_added = old_db_parms.db_added;
2580 new->p.hash_added = old_db_parms.hash_added;
2581 }
2582 new->p.last_rate_sec = clean_start.tv_sec;
2583 dcc_ts2timeval(&old_sn, &old_db_parms.sn);
2584 new_rate_secs = clean_start.tv_sec - old_sn.tv_sec;
2585 if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS) {
2586 new_rate_secs += new->p.rate_secs;
2587 if (new_rate_secs > DB_MAX_RATE_SECS) {
2588 double trim, new_val;
2589 trim = DB_MAX_RATE_SECS;
2590 trim /= new_rate_secs;
2591
2592 new_val = new->p.db_added;
2593 new_val *= trim;
2594 new->p.db_added = new_val;
2595
2596 new_val = new->p.hash_added;
2597 new_val *= trim;
2598 new->p.hash_added = new_val;
2599
2600 new_rate_secs = DB_MAX_RATE_SECS;
2601 }
2602 new->p.db_added += (old_db_parms.db_csize
2603 - old_db_parms.old_db_csize);
2604 new->p.hash_added += (old_db_parms.hash_used
2605 - old_db_parms.old_hash_used);
2606 new->p.rate_secs = new_rate_secs;
2607 }
2608 }
2609
2610 for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) {
2611 if (new_ex_secs[type].all != 0) {
2612 new->p.ex_secs[type].all = new_ex_secs[type].all;
2613 new->p.ex_secs[type].spam = new_ex_secs[type].spam;
2614 new->p.ex_all[type] = new_ex_ts[type].all;
2615 new->p.ex_spam[type] = new_ex_ts[type].spam;
2616 } else {
2617 new->p.ex_secs[type].all = def_expire_secs;
2618 new->p.ex_secs[type].spam = (DCC_CK_LONG_TERM(type)
2619 ? def_expire_spamsecs
2620 : def_expire_secs);
2621 }
2622 }
2623
2624 new_db_parms = new->p;
2625
2626 for (;;) {
2627 write_new_flush(1);
2628
2629 /* ensure that the last page of the file is complete */
2630 if (new_db_pagesize == 0)
2631 break;
2632 i = new_db_fsize % new_db_pagesize;
2633 if (i == 0)
2634 break;
2635 write_new_db_buflen = new_db_pagesize - i;
2636 if (write_new_db_buflen > ISZ(write_new_db_buf))
2637 write_new_db_buflen = ISZ(write_new_db_buf);
2638 memset(&write_new_db_buf, 0, write_new_db_buflen);
2639 write_new_base = new_db_fsize;
2640 }
2641 }
2642
2643
2644
2645 static void
2646 unlink_whine(const char *nm, u_char enoent_ok)
2647 {
2648 if (0 > unlink(nm)
2649 && (!enoent_ok || errno != ENOENT))
2650 dcc_error_msg("unlink(%s): %s", nm, ERROR_STR());
2651 }
2652
2653
2654
2655 static void
2656 rename_bail(const char *from, const char *to)
2657 {
2658 if (0 > rename(from, to))
2659 dcc_logbad(EX_IOERR, "rename(%s, %s): %s",
2660 from, to, ERROR_STR());
2661 }
2662
2663
2664
2665 /* try for a long time or until the server hears */
2666 static u_char /* 1=ok, 0=failed */
2667 persist_aop(DCC_AOPS aop, u_int32_t val1,
2668 int secs) /* try for this long */
2669 {
2670 return dcc_aop_persist(dcc_emsg, ctxt,
2671 grey_on ? DCC_CLNT_FG_GREY : 0,
2672 db_debug != 0,
2673 aop, val1, secs, &aop_resp);
2674 }
2675
2676
2677
2678 /* tell the daemon to switch to the new database */
2679 static void
2680 dccd_new_db(const char *msg)
2681 {
2682 /* Send a round of NOPs and ask about status to ensure the server
2683 * has dealt with requests that arrived while we had the database
2684 * locked and otherwise caught up. We want to try to ensure that
2685 * the server is listening when we re-open the database so that
2686 * it does not leave flooding off.
2687 * On some systems with lame mmap() support including BSD/OS, the
2688 * the daemon can stall for minutes in close(). If that or something
2689 * else makes the daemon stall, this can appear to fail. */
2690 if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_LIST, RESTART_DELAY))
2691 dcc_error_msg("%s: %s; continuing", msg, dcc_emsg);
2692
2693 dccd_unlocked = 0;
2694 if (!persist_aop(DCC_AOP_DB_NEW, 0, RESTART_DELAY)) {
2695 /* This cannot be a fatal error,
2696 * lest we leave the database broken */
2697 dcc_error_msg("%s: %s; continuing", msg, dcc_emsg);
2698 }
2699 }
2700
2701
2702
2703 static void
2704 finish(void)
2705 {
2706 int bailing = 0;
2707
2708 /* delete the new files */
2709 #ifndef DCC_DBCLEAN_KEEP_NEW /* for debugging */
2710 if (new_db_created) {
2711 unlink_whine(new_db_nm, 0);
2712 new_db_created = 0;
2713 bailing = -1;
2714 }
2715 /* we don't really know if the new hash file was created,
2716 * so don't worry about problems */
2717 if (new_hash_created) {
2718 unlink_whine(new_hash_nm, 1);
2719 new_hash_created = 0;
2720 bailing = -1;
2721 }
2722 #endif
2723 if (cur_db_created) {
2724 unlink_whine(cur_db_nm, 0);
2725 unlink_whine(cur_hash_nm, 1);
2726 cur_db_created = 0;
2727 bailing = -1;
2728 }
2729
2730 if (new_db_fd >= 0) {
2731 if (0 > close(new_db_fd))
2732 dcc_error_msg("close(%s): %s",
2733 new_db_nm, ERROR_STR());
2734 new_db_fd = -1;
2735 }
2736 if (old_db_fd >= 0) {
2737 /* In most cases nothing cares about the old database now.
2738 * We often have kept the old database open and locked until
2739 * now. Delete it unless we are debugging */
2740 if (db_debug < 4 && exit_value == EX_OK) {
2741 unlink_whine(old_db_nm, 0);
2742 } else {
2743 /* Push it to the disk so it won't lurk in the buffer
2744 * cache or elsewhere to slow a system reboot */
2745 if (exit_value == EX_OK
2746 && 0 > fsync(old_db_fd))
2747 dcc_error_msg("fsync(%s): %s",
2748 old_db_nm, ERROR_STR());
2749 }
2750 if (0 > close(old_db_fd))
2751 dcc_error_msg("close(%s): %s",
2752 old_db_nm, ERROR_STR());
2753 old_db_fd = -1;
2754 }
2755 flod_unmap(0, 0);
2756
2757 /* release the daemon, but if the database is still open, it's bad */
2758 db_close(bailing);
2759 /* tell the daemon to switch databases */
2760 if (dccd_unlocked)
2761 dccd_new_db("finish");
2762
2763 while (flods_off > 0) {
2764 --flods_off;
2765 if (!persist_aop(DCC_AOP_FLOD, DCC_AOP_FLOD_RESUME,
2766 RESTART_DELAY))
2767 dcc_error_msg("%s", dcc_emsg);
2768 }
2769
2770 unlock_dbclean();
2771 }
2772
2773
2774
2775 static void NRATTRIB
2776 exit_dbclean(int v)
2777 {
2778 exit(exit_value = v);
2779 }
2780
2781
2782
2783 /* terminate with a signal */
2784 static void NRATTRIB
2785 sigterm(int s)
2786 {
2787 dcc_error_msg("interrupted by signal %d", s);
2788 exit_dbclean(s+100);
2789 }