Mercurial > notdcc
comparison srvrlib/db.c @ 0:c7f6b056b673
First import of vendor version
author | Peter Gervai <grin@grin.hu> |
---|---|
date | Tue, 10 Mar 2009 13:49:58 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7f6b056b673 |
---|---|
1 /* Distributed Checksum Clearinghouse | |
2 * | |
3 * server database functions | |
4 * | |
5 * Copyright (c) 2008 by Rhyolite Software, LLC | |
6 * | |
7 * This agreement is not applicable to any entity which sells anti-spam | |
8 * solutions to others or provides an anti-spam solution as part of a | |
9 * security solution sold to other entities, or to a private network | |
10 * which employs the DCC or uses data provided by operation of the DCC | |
11 * but does not provide corresponding data to other users. | |
12 * | |
13 * Permission to use, copy, modify, and distribute this software without | |
14 * changes for any purpose with or without fee is hereby granted, provided | |
15 * that the above copyright notice and this permission notice appear in all | |
16 * copies and any distributed versions or copies are either unchanged | |
17 * or not called anything similar to "DCC" or "Distributed Checksum | |
18 * Clearinghouse". | |
19 * | |
20 * Parties not eligible to receive a license under this agreement can | |
21 * obtain a commercial license to use DCC by contacting Rhyolite Software | |
22 * at sales@rhyolite.com. | |
23 * | |
24 * A commercial license would be for Distributed Checksum and Reputation | |
25 * Clearinghouse software. That software includes additional features. This | |
26 * free license for Distributed ChecksumClearinghouse Software does not in any | |
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse | |
28 * software | |
29 * | |
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL | |
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES | |
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC | |
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES | |
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | |
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | |
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS | |
37 * SOFTWARE. | |
38 * | |
39 * Rhyolite Software DCC 1.3.103-1.214 $Revision$ | |
40 */ | |
41 | |
42 #include "srvr_defs.h" | |
43 #include <syslog.h> | |
44 #include <sys/resource.h> | |
45 #if defined(HAVE_HW_PHYSMEM) || defined(HAVE_BOOTTIME) | |
46 #include <sys/sysctl.h> | |
47 #endif | |
48 #ifdef HAVE_PSTAT_GETSTATIC /* HP-UX */ | |
49 #include <sys/pstat.h> | |
50 #endif | |
51 | |
52 DB_STATS db_stats; | |
53 | |
54 DB_STATES db_sts; | |
55 | |
56 DCC_PATH db_path_buf; | |
57 | |
58 int db_fd = -1; | |
59 DCC_PATH db_nm; | |
60 int db_hash_fd = -1; | |
61 DCC_PATH db_hash_nm; | |
62 struct timeval db_locked; /* 1=database not locked */ | |
63 | |
64 struct timeval db_time; | |
65 | |
66 int db_debug; | |
67 | |
68 u_char grey_on; | |
69 static u_char db_use_write; /* 0=no 1=if RAM big enough 2=always */ | |
70 static u_char db_dirty; | |
71 static u_char db_rdonly; | |
72 int db_failed_line; /* bad happened at this line # */ | |
73 const char *db_failed_file; /* in this file */ | |
74 static u_char db_invalidate; /* do not write to the files */ | |
75 | |
76 /* Without mmap(MAP_NOSYNC) as on Solaris or a good msync() as on BSD/OS, | |
77 * we must rely on the kernel's update/syncer/bufdaemon/etc. So in this | |
78 * case just fondle the mmap()'ed pages and hope things work out. | |
79 * | |
80 * With a msync() and with mmap(MAP_NOSYNC), use MAP_NOSYNC if we can because | |
81 * some systems flush too quickly while others such as FreeBSD 6.1 stall | |
82 * for seconds while thinking about flushing the database. | |
83 * But with mmap(MAP_NOSYNC) we leave large amounts of data in RAM that take | |
84 * too long time to be pushed to the disk when the system is shutting down. | |
85 * So | |
86 * - hit only those chunks of memory with real data or changes to data | |
87 * with msync(). Trust dbclean to rebuild everything else at need. | |
88 * | |
89 * - when it seems the system is being shut down, delete the hash table | |
90 * and let it be rebuilt when the system is rebooted. When the | |
91 * hash table is rebuilt, "obsolete" markings in the data file that | |
92 * might have been lost will be remade. | |
93 * | |
94 * A third case involves dccd -F. It requires that all changes be pushed to | |
95 * the disk whenever dccd unlocks the database so that dbclean can see changes | |
96 * dccd makes. It also requires that dbclean write all of its changes so | |
97 * that dccd will find them when it reopens the database. | |
98 */ | |
99 | |
100 #if !defined(MAP_NOSYNC) || defined(HAVE_OLD_MSYNC) || !defined(HAVE_BOOTTIME) | |
101 #undef USE_MAP_NOSYNC | |
102 #else | |
103 #define USE_MAP_NOSYNC | |
104 #endif | |
105 | |
106 static u_char db_not_synced; /* database unsynchronized with disk */ | |
107 | |
108 | |
109 #define DCC_MADV_WILLNEED(p) 0 | |
110 #ifdef MADV_WILLNEED | |
111 #undef DCC_MADV_WILLNEED | |
112 #define DCC_MADV_WILLNEED(p) madvise(p, db_pagesize, MADV_WILLNEED) | |
113 #endif | |
114 #ifdef POSIX_MADV_WILLNEED | |
115 #undef DCC_MADV_WILLNEED | |
116 #define DCC_MADV_WILLNEED(p) posix_madvise(p, db_pagesize, POSIX_MADV_WILLNEED) | |
117 #endif | |
118 | |
119 #define DCC_MADV_RANDOM(p) 0 | |
120 #ifdef MADV_RANDOM | |
121 #undef DCC_MADV_RANDOM | |
122 #define DCC_MADV_RANDOM(p) madvise(p, db_pagesize, MADV_RANDOM) | |
123 #endif | |
124 #ifdef POSIX_MADV_RANDOM | |
125 #undef DCC_MADV_RANDOM | |
126 #define DCC_MADV_RANDOM(p) posix_madvise(p, db_pagesize, POSIX_MADV_RANDOM) | |
127 #endif | |
128 | |
129 #define DCC_MADV_DONTNEED(p) 0 | |
130 /* The Linux people claim that it is just fine that their notion of | |
131 * MADV_DONTNEED implies discarding changes to data. Worse, some versions of | |
132 * Linux/GNU libc define POSIX_MADV_DONTNEED as the data-corrupting Linux | |
133 * MADV_DONTNEED. This seems to be because they cannot admit their mistake of | |
134 * not distinguishing between the functions of MADV_FREE and MADV_DONTNEED and | |
135 * their misreading of other systems' documentation for MADV_DONTNEED */ | |
136 #ifndef linux | |
137 #ifdef MADV_DONTNEED | |
138 #undef DCC_MADV_DONTNEED | |
139 #define DCC_MADV_DONTNEED(p) madvise(p, db_pagesize, MADV_DONTNEED) | |
140 #endif | |
141 #ifdef POSIX_MADV_DONTNEED | |
142 #undef DCC_MADV_DONTNEED | |
143 #define DCC_MADV_DONTNEED(p) posix_madvise(p, db_pagesize, POSIX_MADV_DONTNEED) | |
144 #endif | |
145 #endif /* !linux */ | |
146 | |
147 #define DCC_MADV_FREE(p) 0 | |
148 #ifdef MADV_FREE | |
149 #undef DCC_MADV_FREE | |
150 #define DCC_MADV_FREE(p) madvise(p, db_pagesize, MADV_FREE) | |
151 #endif | |
152 #ifdef POSIX_MADV_FREE | |
153 #undef DCC_MADV_FREE | |
154 #define DCC_MADV_FREE(p) posix_madvise(p, db_pagesize, POSIX_MADV_FREE) | |
155 #endif | |
156 | |
157 | |
158 u_char db_minimum_map; /* this is dccd & dbclean is running */ | |
159 | |
160 int db_buf_total; /* total # of db buffers */ | |
161 DB_PTR db_max_rss; /* maximum db resident set size */ | |
162 DB_PTR db_max_byte; /* maximum db bytes in both files */ | |
163 | |
164 static u_int system_pagesize; /* kernel page size */ | |
165 | |
166 static DB_BUF db_bufs[DB_BUF_MAX]; /* control mmap()'ed blocks */ | |
167 static DB_BUF *buf_oldest, *buf_newest; | |
168 | |
169 #define DB_HASH_TOTAL DB_BUF_MAX | |
170 static DB_BUF *db_buf_hash[DB_HASH_TOTAL]; | |
171 /* fancy 16-bit multiplicative hash assumes multiplication needs 1 cycle | |
172 * and so the hash is faster than dealing with a collision */ | |
173 #define DB_BUF_HASH(pnum,t) (&db_buf_hash[((((pnum)*(t)*0x9ccf) & 0xffff) \ | |
174 * DB_BUF_MAX) >> 16]) | |
175 | |
176 time_t db_need_flush_secs; | |
177 static time_t db_urgent_need_flush_secs; | |
178 | |
179 const DB_VERSION_BUF db_version_buf = DB_VERSION_STR; | |
180 DB_PARMS db_parms; | |
181 static DB_PARMS db_parms_stored; | |
182 | |
183 DCC_TGTS db_tholds[DCC_DIM_CKS]; | |
184 | |
185 u_int db_pagesize; /* size of 1 mmap()'ed buffer */ | |
186 static u_int db_pagesize_part; | |
187 | |
188 DB_HOFF db_hash_fsize; /* size of hash table file */ | |
189 static u_int hash_clear_pg_num; | |
190 DB_HADDR db_hash_len; /* # of hash table entries */ | |
191 DB_HADDR db_hash_divisor; /* modulus */ | |
192 DB_HADDR db_hash_used; /* # of hash table entries in use */ | |
193 u_int db_hash_page_len; /* # of HASH_ENTRY's per buffer */ | |
194 DB_HADDR db_max_hash_entries = 0; /* after db_buf_init()*/ | |
195 DB_PTR db_fsize; /* size of database file */ | |
196 DB_PTR db_csize; /* size of database contents in bytes */ | |
197 static DB_PTR db_csize_stored_hash; /* DB size stored in hash file */ | |
198 static DB_HADDR db_hash_used_stored_hash; | |
199 u_int db_page_max; /* only padding after this in DB buf */ | |
200 static DB_PTR db_window_size; /* size of mmap() window */ | |
201 char db_window_size_str[128]; | |
202 static char db_physmem_str[80]; | |
203 | |
204 static const u_char dcc_ck_fuzziness[DCC_DIM_CKS] = { | |
205 0, /* DCC_CK_INVALID */ | |
206 DCC_CK_FUZ_LVL_NO, /* DCC_CK_IP */ | |
207 DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ | |
208 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ | |
209 DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ | |
210 DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ | |
211 DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ | |
212 DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ | |
213 DCC_CK_FUZ_LVL1, /* DCC_CK_FUZ1 */ | |
214 DCC_CK_FUZ_LVL2, /* DCC_CK_FUZ2 */ | |
215 DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_TOTAL */ | |
216 DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_BULK */ | |
217 DCC_CK_FUZ_LVL2, /* DCC_CK_SRVR_ID */ | |
218 DCC_CK_FUZ_LVL2 /* DCC_CK_ENV_TO */ | |
219 }; | |
220 static const u_char grey_ck_fuzziness[DCC_DIM_CKS] = { | |
221 0, /* DCC_CK_INVALID */ | |
222 DCC_CK_FUZ_LVL2, /* DCC_CK_IP */ | |
223 DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ | |
224 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ | |
225 DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ | |
226 DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ | |
227 DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ | |
228 DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ | |
229 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ1 */ | |
230 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ2 */ | |
231 DCC_CK_FUZ_LVL_NO, /* DCC_CK_GREY_MSG */ | |
232 DCC_CK_FUZ_LVL1, /* DCC_CK_GREY_TRIPLE */ | |
233 DCC_CK_FUZ_LVL1, /* DCC_CK_SRVR_ID */ | |
234 DCC_CK_FUZ_LVL1 /* DCC_CK_ENV_TO */ | |
235 }; | |
236 const u_char *db_ck_fuzziness = dcc_ck_fuzziness; | |
237 | |
238 | |
239 static u_char buf_flush(DCC_EMSG, DB_BUF *, u_char); | |
240 static u_char buf_munmap(DCC_EMSG, DB_BUF *); | |
241 static DB_BUF *find_buf(DCC_EMSG, DB_BUF_TYPE, DB_PG_NUM); | |
242 static u_char map_hash(DCC_EMSG, DB_HADDR, DB_STATE *, u_char); | |
243 static u_char map_hash_ctl(DCC_EMSG, u_char); | |
244 static u_char map_db(DCC_EMSG, DB_PTR, u_int, DB_STATE *, u_char); | |
245 static u_char db_set_sizes(DCC_EMSG); | |
246 | |
247 | |
248 /* compute the least common multiple of two numbers */ | |
249 static u_int | |
250 lcm(u_int n, u_int m) | |
251 { | |
252 u_int r, x, gcd; | |
253 | |
254 /* first get the gcd of the two numbers */ | |
255 if (n >= m) { | |
256 x = n; | |
257 gcd = m; | |
258 } else { | |
259 x = m; | |
260 gcd = n; | |
261 } | |
262 for (;;) { | |
263 r = x % gcd; | |
264 if (r == 0) | |
265 return n * (m / gcd); | |
266 x = gcd; | |
267 gcd = r; | |
268 } | |
269 } | |
270 | |
271 | |
272 | |
273 const char * | |
274 db_ptr2str(DB_PTR val) | |
275 { | |
276 static int bufno; | |
277 static struct { | |
278 char str[16]; | |
279 } bufs[4]; | |
280 char *s; | |
281 const char *units; | |
282 | |
283 if (val == 0) | |
284 return "0"; | |
285 | |
286 s = bufs[bufno].str; | |
287 bufno = (bufno+1) % DIM(bufs); | |
288 | |
289 if (val % (1024*1024*1024) == 0) { | |
290 val /= (1024*1024*1024); | |
291 units = "GB"; | |
292 } else if (val % (1024*1024) == 0) { | |
293 val /= (1024*1024); | |
294 units = "MB"; | |
295 } else if (val % 1024 == 0) { | |
296 val /= 1024; | |
297 units = "KB"; | |
298 } else { | |
299 units = ""; | |
300 } | |
301 if (val > 1000*1000*1000) | |
302 snprintf(s, sizeof(bufs[0].str), "%d,%03d,%03d,%03d%s", | |
303 (int)(val / (1000*1000*1000)), | |
304 (int)(val / (1000*1000)) % 1000, | |
305 (int)(val / 1000) % 1000, | |
306 (int)(val % 1000), | |
307 units); | |
308 else if (val > 1000*1000) | |
309 snprintf(s, sizeof(bufs[0].str), "%d,%03d,%03d%s", | |
310 (int)(val / (1000*1000)), | |
311 (int)(val / 1000) % 1000, | |
312 (int)(val % 1000), | |
313 units); | |
314 else if (val > 1000*10) | |
315 snprintf(s, sizeof(bufs[0].str), "%d,%03d%s", | |
316 (int)(val / 1000), | |
317 (int)(val % 1000), | |
318 units); | |
319 else | |
320 snprintf(s, sizeof(bufs[0].str), "%d%s", | |
321 (int)val, | |
322 units); | |
323 return s; | |
324 } | |
325 | |
326 | |
327 | |
328 const char * | |
329 size2str(char *buf, u_int buf_len, | |
330 double num, u_char bytes_or_entries) /* 0=number 1=bytes */ | |
331 { | |
332 const char *units; | |
333 double k; | |
334 | |
335 k = bytes_or_entries ? 1024.0 : 1000.0; | |
336 | |
337 if (num < k) { | |
338 units = ""; | |
339 } else if (num < k*k) { | |
340 num /= k; | |
341 units = "K"; | |
342 } else if (num < k*k*k) { | |
343 num /= k*k; | |
344 units = "M"; | |
345 } else { | |
346 num /= k*k*k; | |
347 units = "G"; | |
348 } | |
349 | |
350 if ((int)num >= 100) | |
351 snprintf(buf, buf_len, "%.0f%s", num, units); | |
352 else | |
353 snprintf(buf, buf_len, "%.2g%s", num, units); | |
354 return buf; | |
355 } | |
356 | |
357 | |
358 | |
359 void PATTRIB(5,6) | |
360 db_failure(int linenum, const char *file, int ex_code, DCC_EMSG emsg, | |
361 const char *p, ...) | |
362 { | |
363 va_list args; | |
364 | |
365 if (!db_failed_line) { | |
366 db_failed_line = linenum; | |
367 db_failed_file = file; | |
368 } | |
369 va_start(args, p); | |
370 dcc_vpemsg(ex_code, emsg, p, args); | |
371 va_end(args); | |
372 } | |
373 | |
374 | |
375 | |
376 void PATTRIB(3,4) | |
377 db_error_msg(int linenum, const char *file, const char *p, ...) | |
378 { | |
379 va_list args; | |
380 | |
381 if (!db_failed_line) { | |
382 db_failed_line = linenum; | |
383 db_failed_file = file; | |
384 } | |
385 va_start(args, p); | |
386 dcc_verror_msg(p, args); | |
387 va_end(args); | |
388 } | |
389 | |
390 | |
391 | |
392 double /* hashes or bytes/second */ | |
393 db_add_rate(const DB_PARMS *parms, | |
394 u_char hash_or_db) /* 1=hash */ | |
395 { | |
396 struct timeval sn; | |
397 time_t new_rate_secs; | |
398 time_t total_secs; | |
399 double added, cur, prev; | |
400 | |
401 total_secs = parms->rate_secs; | |
402 if (hash_or_db) { | |
403 added = parms->hash_added; | |
404 cur = parms->hash_used; | |
405 prev = parms->old_hash_used; | |
406 } else { | |
407 added = parms->db_added; | |
408 cur = parms->db_csize; | |
409 prev = parms->old_db_csize; | |
410 } | |
411 | |
412 if (total_secs <= 0 || total_secs > DB_MAX_RATE_SECS | |
413 || added <= 0.0) { | |
414 added = 0.0; | |
415 total_secs = 0; | |
416 } | |
417 | |
418 dcc_ts2timeval(&sn, &parms->sn); | |
419 new_rate_secs = parms->last_rate_sec - sn.tv_sec; | |
420 if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS | |
421 && cur > prev) { | |
422 total_secs += new_rate_secs; | |
423 added += cur - prev; | |
424 } | |
425 | |
426 if (total_secs <= DB_MIN_RATE_SECS) | |
427 return -1.0; | |
428 return added / total_secs; | |
429 } | |
430 | |
431 | |
432 | |
433 DB_NOKEEP_CKS | |
434 def_nokeep_cks(void) | |
435 { | |
436 DCC_CK_TYPES type; | |
437 DB_NOKEEP_CKS nokeep = 0; | |
438 | |
439 for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { | |
440 if (DB_GLOBAL_NOKEEP(grey_on, type)) | |
441 DB_SET_NOKEEP(nokeep, type); | |
442 } | |
443 DB_SET_NOKEEP(nokeep, DCC_CK_INVALID); | |
444 DB_SET_NOKEEP(nokeep, DCC_CK_FLOD_PATH); | |
445 | |
446 return nokeep; | |
447 } | |
448 | |
449 | |
450 | |
451 void | |
452 set_db_tholds(DB_NOKEEP_CKS nokeep) | |
453 { | |
454 DCC_CK_TYPES type; | |
455 | |
456 for (type = 0; type < DIM(db_tholds); ++type) { | |
457 db_tholds[type] = (DB_TEST_NOKEEP(nokeep, type) | |
458 ? DCC_TGTS_INVALID | |
459 : DCC_CK_IS_REP_CMN(grey_on, type) | |
460 ? DCC_TGTS_INVALID | |
461 : grey_on ? 1 | |
462 : type == DCC_CK_SRVR_ID ? 1 | |
463 : BULK_THRESHOLD); | |
464 } | |
465 } | |
466 | |
467 | |
468 | |
469 static const char * | |
470 buf2path(const DB_BUF *b) | |
471 { | |
472 switch (b->buf_type) { | |
473 case DB_BUF_TYPE_HASH: | |
474 return db_hash_nm; | |
475 case DB_BUF_TYPE_DB: | |
476 return db_nm; | |
477 case DB_BUF_TYPE_FREE: | |
478 default: | |
479 dcc_logbad(EX_SOFTWARE, "impossible buffer type for a path"); | |
480 } | |
481 } | |
482 | |
483 | |
484 | |
485 static int | |
486 buf2fd(const DB_BUF *b) | |
487 { | |
488 switch (b->buf_type) { | |
489 case DB_BUF_TYPE_HASH: | |
490 return db_hash_fd; | |
491 case DB_BUF_TYPE_DB: | |
492 return db_fd; | |
493 case DB_BUF_TYPE_FREE: | |
494 default: | |
495 dcc_logbad(EX_SOFTWARE, "impossible buffer type for fd"); | |
496 } | |
497 } | |
498 | |
499 | |
500 | |
501 static void | |
502 rel_db_state(DB_STATE *st) | |
503 { | |
504 DB_BUF *b; | |
505 | |
506 b = st->b; | |
507 if (!b) | |
508 return; | |
509 st->b = 0; | |
510 st->d.v = 0; | |
511 st->s.rptr = DB_PTR_BAD; | |
512 if (--b->lock_cnt < 0) | |
513 dcc_logbad(EX_SOFTWARE,"negative database buffer lock"); | |
514 } | |
515 | |
516 | |
517 | |
518 void | |
519 rel_db_states(void) | |
520 { | |
521 DB_STATE *st; | |
522 | |
523 for (st = &db_sts.rcd; st <= &db_sts.hash_ctl; ++st) { | |
524 rel_db_state(st); | |
525 } | |
526 } | |
527 | |
528 | |
529 | |
530 /* release one or all unneeded buffers */ | |
531 u_char /* 0=problem 1=did nothing 2=did>=1 */ | |
532 db_unload(DCC_EMSG emsg, | |
533 u_char some) /* 0=all, 1=only one, 2=finished */ | |
534 { | |
535 DB_BUF *b; | |
536 u_char result; | |
537 | |
538 result = 1; | |
539 for (b = buf_oldest; b != 0; b = b->newer) { | |
540 if (b->buf_type == DB_BUF_TYPE_FREE | |
541 || b->lock_cnt != 0) | |
542 continue; | |
543 if (some == 2 | |
544 && !(b->flags & DB_BUF_FG_USE_WRITE) | |
545 && 0 > DCC_MADV_DONTNEED(b->buf.v)) | |
546 dcc_error_msg("madvise(DONTNEED %s,%#x): %s", | |
547 buf2path(b), db_pagesize, ERROR_STR()); | |
548 if (!buf_munmap(emsg, b)) { | |
549 emsg = 0; | |
550 result = 0; | |
551 } else if (result) { | |
552 result = 2; | |
553 } | |
554 if (some == 1) | |
555 return result; | |
556 } | |
557 | |
558 return result; | |
559 } | |
560 | |
561 | |
562 | |
563 static u_char | |
564 buf_write_part(DCC_EMSG emsg, DB_BUF *b, off_t offset, void *buf, int len) | |
565 { | |
566 int i; | |
567 | |
568 offset += (off_t)b->pg_num * (off_t)db_pagesize; | |
569 | |
570 if (offset != lseek(buf2fd(b), offset, SEEK_SET)) { | |
571 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, | |
572 "buf_write_part lseek(%s,"OFF_HPAT"): %s", | |
573 buf2path(b), offset, ERROR_STR()); | |
574 return 0; | |
575 } | |
576 i = write(buf2fd(b), buf, len); | |
577 if (i != len) { | |
578 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, | |
579 "buf_write_part(%s,%u)=%d: %s", | |
580 buf2path(b), len, i, ERROR_STR()); | |
581 return 0; | |
582 } | |
583 | |
584 return 1; | |
585 } | |
586 | |
587 | |
588 | |
589 /* push part of a buffer toward the disk | |
590 * this can be needed even when the file has been opened and mapped | |
591 * read-only by dbclean */ | |
592 static u_char | |
593 buf_flush_part(DCC_EMSG emsg, DB_BUF *b, | |
594 u_int part, /* DB_BUF_NUM_PARTS=buffer */ | |
595 u_char async UATTRIB) | |
596 { | |
597 u_int flush_len; | |
598 char *flush_base; | |
599 DB_BUF_FM bit; | |
600 | |
601 bit = PART2BIT(part) & (b->flush | b->flush_urgent); | |
602 if (!bit) | |
603 return 1; | |
604 | |
605 /* Send a new buffer to disk at once. */ | |
606 if (b->flags & DB_BUF_FG_EXTENSION) { | |
607 DB_BUF *b1, *b0; | |
608 u_char result; | |
609 | |
610 /* To give the file system a chance to make the hash table | |
611 * contiguous, first write all preceding new buffers. | |
612 * In almost all cases, there will be none. */ | |
613 result = 1; | |
614 do { | |
615 b0 = b; | |
616 for (b1 = buf_oldest; b1 != 0; b1 = b1->newer) { | |
617 if (!(b1->flags & DB_BUF_FG_EXTENSION) | |
618 || b1->buf_type != b0->buf_type | |
619 || b1->pg_num >= b0->pg_num) | |
620 continue; | |
621 b0 = b1; | |
622 } | |
623 b0->flags &= ~DB_BUF_FG_EXTENSION; | |
624 b0->flush = 0; | |
625 b0->flush_urgent = 0; | |
626 if (!db_invalidate | |
627 && !buf_write_part(emsg, b0, | |
628 0, b0->buf.c, db_pagesize)) | |
629 result = 0; | |
630 } while (b0 != b); | |
631 return result; | |
632 } | |
633 | |
634 flush_base = b->ranges[part].lo; | |
635 flush_len = b->ranges[part].hi - flush_base; | |
636 b->flush &= ~bit; | |
637 b->flush_urgent &= ~bit; | |
638 | |
639 if (db_invalidate) | |
640 return 1; | |
641 | |
642 if (b->flags & DB_BUF_FG_USE_WRITE) { | |
643 static char *wbuf; | |
644 static u_int wbuf_len; | |
645 | |
646 /* In at least FreeBSD you cannot write() to the file | |
647 * that underlies a mmap() region from that region */ | |
648 if (wbuf_len < db_pagesize_part) { | |
649 /* the page size for the current file | |
650 * might be different from the old file */ | |
651 if (wbuf) | |
652 free(wbuf); | |
653 wbuf_len = db_pagesize_part; | |
654 wbuf = malloc(wbuf_len); | |
655 } | |
656 | |
657 memcpy(wbuf, flush_base, flush_len); | |
658 return buf_write_part(emsg, b, flush_base - b->buf.c, | |
659 wbuf, flush_len); | |
660 | |
661 #ifndef HAVE_OLD_MSYNC | |
662 } else if (async) { | |
663 if (0 > MSYNC(flush_base, flush_len, MS_ASYNC)) { | |
664 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, | |
665 "msync(db buffer %s,%#lx,%#x,MS_ASYNC): %s", | |
666 buf2path(b), (long)flush_base, flush_len, | |
667 ERROR_STR()); | |
668 return 0; | |
669 } | |
670 #endif | |
671 } else { | |
672 if (0 > MSYNC(flush_base, flush_len, MS_SYNC)) { | |
673 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, | |
674 "msync(db buffer %s,%#lx,%#x,MS_SYNC): %s", | |
675 buf2path(b), (long)flush_base, flush_len, | |
676 ERROR_STR()); | |
677 return 0; | |
678 } | |
679 } | |
680 | |
681 return 1; | |
682 } | |
683 | |
684 | |
685 | |
686 static u_char | |
687 buf_flush(DCC_EMSG emsg, DB_BUF *b, u_char async) | |
688 { | |
689 u_int part; | |
690 DB_BUF_FM bits; | |
691 u_char result = 1; | |
692 | |
693 bits = b->flush_urgent | b->flush; | |
694 for (part = 0; bits != 0 && part < DB_BUF_NUM_PARTS; ++part) { | |
695 if (bits & PART2BIT(part)) { | |
696 if (!buf_flush_part(emsg, b, part, async)) { | |
697 emsg = 0; | |
698 result = 0; | |
699 } | |
700 bits = b->flush_urgent | b->flush; | |
701 } | |
702 } | |
703 return result; | |
704 } | |
705 | |
706 | |
707 | |
708 /* Try to keep the data clean so that the fsync() required by Solaris | |
709 * when the file is unloaded is not too expensive. | |
710 * Try to flush frequently so that we don't stall as long in msync(). | |
711 */ | |
712 void | |
713 db_flush_needed(void) | |
714 { | |
715 static DB_BUF *next_b = db_bufs; | |
716 static u_int next_part; | |
717 DB_BUF *b; | |
718 u_int part, all_parts; | |
719 int buf_num; | |
720 u_char worked; | |
721 | |
722 /* send to the disk changes that cannot be recreated by dbclean */ | |
723 if (db_urgent_need_flush_secs != 0 | |
724 && DB_IS_TIME(db_urgent_need_flush_secs, | |
725 DB_URGENT_NEED_FLUSH_SECS)) { | |
726 worked = 0; | |
727 for (b = buf_newest; b; b = b->older) { | |
728 if (b->buf_type == DB_BUF_TYPE_FREE) | |
729 continue; | |
730 | |
731 for (part = 0; | |
732 b->flush_urgent != 0 && part < DB_BUF_NUM_PARTS; | |
733 ++part) { | |
734 if ((b->flush_urgent & PART2BIT(part))) { | |
735 buf_flush_part(0, b, part, 1); | |
736 worked = 1; | |
737 } | |
738 } | |
739 | |
740 /* Switch new data pages to mmap() | |
741 * when this is not dbclean, since only dccd calls here | |
742 * they are not using mmap() | |
743 * they are either hash table pages or | |
744 * not the last page in the file */ | |
745 if ((b->flags & DB_BUF_FG_USE_WRITE) | |
746 && !db_use_write | |
747 && (b->buf_type != DB_BUF_TYPE_DB | |
748 || (DB_PTR2PG_NUM(db_csize-1, db_pagesize) | |
749 != b->pg_num))) { | |
750 if (b->lock_cnt != 0) | |
751 rel_db_states(); | |
752 buf_munmap(0, b); | |
753 } | |
754 } | |
755 | |
756 /* Keep the clock running if we did any work. This tends to | |
757 * avoid stalls caused by colliding with the FreeBSD syncer */ | |
758 if (worked) { | |
759 gettimeofday(&db_time, 0); | |
760 db_urgent_need_flush_secs = (db_time.tv_sec | |
761 + DB_URGENT_NEED_FLUSH_SECS); | |
762 } else { | |
763 db_urgent_need_flush_secs = 0; | |
764 } | |
765 } | |
766 | |
767 /* assume there will be nothing more to do */ | |
768 db_need_flush_secs = db_urgent_need_flush_secs; | |
769 | |
770 #ifdef USE_MAP_NOSYNC | |
771 /* if we are using mmap(MAP_NOSYNC), then there are no bits | |
772 * set in any b->flush words except that of the recent | |
773 * DB_BUF_FG_USE_WRITE extensions of the file. It is best to let | |
774 * those blocks stay in RAM until the whole buffer is flushed and | |
775 * switched to mmap above */ | |
776 if (!db_use_write) | |
777 return; | |
778 #endif | |
779 | |
780 b = next_b; | |
781 part = next_part; | |
782 all_parts = DB_PARTS_PER_FLUSH; | |
783 for (buf_num = DIM(db_bufs); buf_num >= 0; --buf_num) { | |
784 if (b > LAST(db_bufs)) { | |
785 part = 0; | |
786 b = db_bufs; | |
787 } | |
788 if (!b->flush | |
789 || part >= DB_BUF_NUM_PARTS | |
790 || b->buf_type == DB_BUF_TYPE_FREE) { | |
791 part = 0; | |
792 ++b; | |
793 continue; | |
794 } | |
795 | |
796 while (part < DB_BUF_NUM_PARTS) { | |
797 if (b->flush & PART2BIT(part)) { | |
798 buf_flush_part(0, b, part, 1); | |
799 if (--all_parts == 0) { | |
800 next_part = part+1; | |
801 next_b = b; | |
802 db_need_flush_secs = (db_time.tv_sec | |
803 + DB_NEED_FLUSH_SECS); | |
804 return; | |
805 } | |
806 if (!b->flush) | |
807 part = DB_BUF_NUM_PARTS; | |
808 } | |
809 ++part; | |
810 } | |
811 } | |
812 } | |
813 | |
814 | |
815 | |
816 /* occassionally flush an unlocked data buffer for dbclean | |
817 * dbclean mostly changes only the current record, so get started | |
818 * writing the data to avoid stalling the system at the end. */ | |
819 u_char | |
820 db_flush_db(DCC_EMSG emsg UATTRIB) | |
821 { | |
822 #ifdef USE_MAP_NOSYNC | |
823 DB_BUF *b; | |
824 int limit; | |
825 int pg_num; | |
826 | |
827 /* Gently push the new hash table to disk. | |
828 * The disk image will never be accurate. This only allocates space. | |
829 * Do not do this for systems that lack mmap(NOSYNC) such as Linux | |
830 * that thrash themselves as the hash table is being built. A | |
831 * long pause when the database is closed is not as bad as spending | |
832 * hours building the hash table. */ | |
833 while (hash_clear_pg_num < db_hash_fsize/db_hash_page_len) { | |
834 pg_num = hash_clear_pg_num++; | |
835 for (b = buf_oldest; b != 0; b = b->newer) { | |
836 if (b->pg_num != pg_num | |
837 || b->buf_type != DB_BUF_TYPE_HASH) | |
838 continue; | |
839 if (!(b->flags & DB_BUF_FG_EXTENSION)) | |
840 break; | |
841 if (b->lock_cnt != 0) | |
842 rel_db_states(); | |
843 return buf_munmap(emsg, b); | |
844 } | |
845 | |
846 /* look for the next page if this one has already | |
847 * been flushed */ | |
848 } | |
849 | |
850 /* flush some ordinary buffers */ | |
851 limit = 2; | |
852 for (b = buf_oldest; b != 0; b = b->newer) { | |
853 if (b->flush_urgent == 0 | |
854 || b->buf_type == DB_BUF_TYPE_FREE | |
855 || b->lock_cnt != 0) | |
856 continue; | |
857 if (!buf_flush(emsg, b, 1)) | |
858 return 0; | |
859 if (--limit <= 0) | |
860 return 1; | |
861 } | |
862 #endif | |
863 return 1; | |
864 } | |
865 | |
866 | |
867 | |
868 /* mark part of a buffer dirty | |
869 * "Urgent" changes are flushed by a timer. Ordinary changes | |
870 * are often ignored and expected to be rebuilt if the system crashes. | |
871 * That the hash table is deleted as the system is shut down while the | |
872 * database must be flushed from the system's buffer cache is a reason | |
873 * to keep the disk image of the database good. */ | |
874 void | |
875 db_set_flush(DB_STATE *st, u_char urgent, u_int len) | |
876 { | |
877 DB_BUF *b; | |
878 DB_BUF_FM bit, new_bits, old_bits; | |
879 char *buf_base, *part_end, *start, *end; | |
880 u_int part, i; | |
881 | |
882 /* nothing to do if the kernel is handling it | |
883 * or if we are letting this change be reconstructed by dbclean */ | |
884 b = st->b; | |
885 if (!(b->flags & DB_BUF_FG_USE_WRITE)) { | |
886 #ifdef USE_MAP_NOSYNC | |
887 if (!urgent) | |
888 #endif | |
889 return; | |
890 } | |
891 | |
892 start = st->d.c; | |
893 buf_base = b->buf.c; | |
894 | |
895 /* Increase to even pages in the hope that the file system might | |
896 * be able to page-flip. This might at least avoid reading into the | |
897 * buffer cache to honor a write(). Besides, Solaris' msync() handles | |
898 * only even pages. */ | |
899 i = (start - buf_base) % system_pagesize; | |
900 start -= i; | |
901 len += i; | |
902 len = ((len + system_pagesize-1) / system_pagesize) * system_pagesize; | |
903 | |
904 end = start + len; | |
905 if (end > buf_base+db_pagesize) | |
906 dcc_logbad(EX_SOFTWARE, "inflated dirty buffer size"); | |
907 | |
908 part = (start - buf_base) / db_pagesize_part; | |
909 part_end = buf_base + part * db_pagesize_part; | |
910 bit = PART2BIT(part); | |
911 new_bits = 0; | |
912 old_bits = b->flush | b->flush_urgent; | |
913 do { | |
914 part_end += db_pagesize_part; | |
915 if (part_end > end) | |
916 part_end = end; | |
917 | |
918 if (!(old_bits & bit)) { | |
919 b->ranges[part].lo = start; | |
920 b->ranges[part].hi = part_end; | |
921 } else { | |
922 if (b->ranges[part].lo > start) | |
923 b->ranges[part].lo = start; | |
924 if (b->ranges[part].hi < part_end) | |
925 b->ranges[part].hi = part_end; | |
926 } | |
927 new_bits |= bit; | |
928 | |
929 start = part_end; | |
930 bit <<= 1; | |
931 ++part; | |
932 } while (part_end < end); | |
933 | |
934 if (urgent) { | |
935 b->flush_urgent |= new_bits; | |
936 if (!db_urgent_need_flush_secs) { | |
937 db_urgent_need_flush_secs = (db_time.tv_sec | |
938 + DB_URGENT_NEED_FLUSH_SECS); | |
939 if (db_need_flush_secs == 0) | |
940 db_need_flush_secs = db_urgent_need_flush_secs; | |
941 } | |
942 } else { | |
943 b->flush |= new_bits; | |
944 if (db_need_flush_secs == 0 | |
945 || db_need_flush_secs > db_time.tv_sec+DB_NEED_FLUSH_SECS) | |
946 db_need_flush_secs = db_time.tv_sec+DB_NEED_FLUSH_SECS; | |
947 } | |
948 } | |
949 | |
950 | |
951 | |
952 /* Shut down the database, including flushing and releasing all | |
953 * mmap()'ed buffers | |
954 * Do nothing to the files for mode=-1 because the file is new and garbage | |
955 * or the caller is a fork of the server shedding memory. */ | |
956 u_char | |
957 db_close(int mode) /* -1=invalidate, 0=dirty, 1=clean */ | |
958 { | |
959 u_char result; | |
960 | |
961 if (mode >= 0) { | |
962 /* flush the data and then release and flush the dirty flags */ | |
963 result = make_clean(mode == 0 ? 0 : 1); | |
964 if (!db_unload(0, 0)) | |
965 result = 0; | |
966 } else { | |
967 db_invalidate = 1; | |
968 rel_db_states(); | |
969 result = (db_unload(0, 0) > 0); | |
970 } | |
971 | |
972 /* Close the hash table first because the server is often | |
973 * waiting for the lock on the main file held by dbclean. | |
974 * Destroy the hash table if it is bad */ | |
975 if (db_hash_fd >= 0) { | |
976 if (0 > close(db_hash_fd)) { | |
977 dcc_pemsg(EX_IOERR, 0, "close(%s): %s", | |
978 db_hash_nm, ERROR_STR()); | |
979 result = 0; | |
980 } | |
981 db_hash_fd = -1; | |
982 } | |
983 if (db_fd >= 0) { | |
984 if (0 > close(db_fd)) { | |
985 dcc_pemsg(EX_IOERR, 0, "close(%s): %s", | |
986 db_nm, ERROR_STR()); | |
987 result = 0; | |
988 } | |
989 db_fd = -1; | |
990 } | |
991 | |
992 db_locked.tv_sec = 0; | |
993 return result; | |
994 } | |
995 | |
996 | |
997 | |
998 /* Delete the hash table if the system is being rebooted and we | |
999 * don't trust the file system to get all of the hash table. This might | |
1000 * make system shut down faster */ | |
1001 void | |
1002 db_stop(void) | |
1003 { | |
1004 if (db_hash_fd < 0 | |
1005 || !DB_IS_LOCKED() | |
1006 || !db_not_synced | |
1007 || db_hash_nm[0] == '\0') | |
1008 return; | |
1009 | |
1010 if (0 > unlink(db_hash_nm) | |
1011 && errno != ENOENT) | |
1012 dcc_error_msg("unlink(%s): %s", db_hash_nm, ERROR_STR()); | |
1013 } | |
1014 | |
1015 | |
1016 | |
1017 /* see if (another) instance of dbclean is already running */ | |
1018 static int dbclean_lock_fd = -1; | |
1019 static DCC_PATH dbclean_lock_nm; | |
1020 | |
1021 u_char /* 1=no (other) dbclean */ | |
1022 lock_dbclean(DCC_EMSG emsg, const char *cur_db_nm) | |
1023 { | |
1024 char pid[32]; | |
1025 int i; | |
1026 | |
1027 fnm2rel_good(dbclean_lock_nm, cur_db_nm, DB_LOCK_SUFFIX); | |
1028 dbclean_lock_fd = dcc_lock_open(emsg, dbclean_lock_nm, | |
1029 O_RDWR|O_CREAT, | |
1030 DCC_LOCK_OPEN_NOWAIT, | |
1031 DCC_LOCK_ALL_FILE, 0); | |
1032 if (dbclean_lock_fd < 0) | |
1033 return 0; | |
1034 | |
1035 i = 1+snprintf(pid, sizeof(pid), "%ld\n", (long)getpid()); | |
1036 if (i != write(dbclean_lock_fd, pid, i)) | |
1037 dcc_logbad(EX_IOERR, "write(%s, pid): %s", | |
1038 dbclean_lock_nm, ERROR_STR()); | |
1039 | |
1040 /* Let anyone write in it in case we are running as root | |
1041 * and get interrupted by a crash or gdb. A stray, stale | |
1042 * private lock file cannot be locked */ | |
1043 chmod(dbclean_lock_nm, 0666); | |
1044 | |
1045 return 1; | |
1046 } | |
1047 | |
1048 | |
1049 | |
1050 void | |
1051 unlock_dbclean(void) | |
1052 { | |
1053 if (dbclean_lock_fd >= 0) { | |
1054 if (0 > unlink(dbclean_lock_nm)) | |
1055 dcc_error_msg("unlink(%s): %s", | |
1056 dbclean_lock_nm, ERROR_STR()); | |
1057 close(dbclean_lock_fd); | |
1058 dbclean_lock_fd = -1; | |
1059 } | |
1060 } | |
1061 | |
1062 | |
1063 | |
1064 /* This locking does only multiple-readers/single-writer */ | |
1065 int /* -1=failed, 0=was not locked, 1=was */ | |
1066 db_lock(void) | |
1067 { | |
1068 struct stat sb; | |
1069 | |
1070 if (DB_IS_LOCKED()) | |
1071 return 1; | |
1072 | |
1073 if (!dcc_exlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, 15*60, "", db_nm)) | |
1074 return -1; | |
1075 if (0 > fstat(db_fd, &sb)) { | |
1076 db_failure(__LINE__,__FILE__, EX_IOERR, 0, | |
1077 "stat(%s): %s", db_nm, ERROR_STR()); | |
1078 return -1; | |
1079 } | |
1080 if (db_fsize != (DB_HOFF)sb.st_size) { | |
1081 if (db_fsize > (DB_HOFF)sb.st_size || !db_rdonly) { | |
1082 db_failure(__LINE__,__FILE__, EX_IOERR, 0, | |
1083 "%s size changed from "OFF_HPAT | |
1084 " to "OFF_HPAT, | |
1085 db_nm, db_fsize, sb.st_size); | |
1086 return -1; | |
1087 } | |
1088 db_fsize = sb.st_size; | |
1089 } | |
1090 | |
1091 db_locked = db_time; | |
1092 return 0; | |
1093 } | |
1094 | |
1095 | |
1096 | |
1097 /* flush buffers to make the disk reasonably correct but not perfect | |
1098 * This does not compensate for a lack of coherent mmap() in the system. | |
1099 * | |
1100 * It leaves the disk only as accurate as implied by db_not_synced. | |
1101 * This flushes buffers marked either urgent and ordinarily dirty. | |
1102 * If db_not_synced is set, then non-urgent dirty bits are not set. */ | |
1103 static u_char | |
1104 make_clean_flush(void) | |
1105 { | |
1106 DB_BUF *b; | |
1107 u_char result; | |
1108 | |
1109 result = 1; | |
1110 for (b = buf_oldest; b != 0; b = b->newer) { | |
1111 if (b->buf_type == DB_BUF_TYPE_FREE) | |
1112 continue; | |
1113 if (!buf_flush(0, b, 0)) | |
1114 result = 0; | |
1115 } | |
1116 | |
1117 return result; | |
1118 } | |
1119 | |
1120 | |
1121 | |
1122 /* push all of our database changes to the disk and try to clear the dirty bit | |
1123 * do not necessarily unmap anything */ | |
1124 u_char | |
1125 make_clean(u_char clean) /* 0=leave hash marked dirty, */ | |
1126 { /* 1=marked clean, 2=fsync */ | |
1127 u_char need_db_fsync, result; | |
1128 struct stat sb; | |
1129 | |
1130 rel_db_states(); | |
1131 | |
1132 result = 1; | |
1133 | |
1134 /* quit if we are giving up */ | |
1135 if (db_invalidate) | |
1136 return result; | |
1137 | |
1138 if (db_failed_line) | |
1139 clean = 0; | |
1140 | |
1141 if (!make_clean_flush()) { | |
1142 clean = 0; | |
1143 result = 0; | |
1144 } | |
1145 | |
1146 /* simply unlock all of the buffers if they are clean | |
1147 * and do not need to (or cannot) be synchronized with fsync() */ | |
1148 if (!db_dirty | |
1149 && (clean < 2 /* not asked to synchronize */ | |
1150 || db_rdonly /* cannot be synchronized */ | |
1151 || !db_not_synced)) /* does not need to be synchronized */ | |
1152 return result; | |
1153 | |
1154 need_db_fsync = (clean == 2); | |
1155 | |
1156 /* Send the meta-data to disk so that other processes | |
1157 * such as dbclean can find the new length of the file | |
1158 * on Solaris. Otherwise the file looks broken because | |
1159 * its contained data length can be larger than its | |
1160 * inode size on Solaris. */ | |
1161 if (!need_db_fsync && clean) { | |
1162 if (0 > fstat(db_fd, &sb)) { | |
1163 dcc_error_msg("make_clean fstat(%s): %s", | |
1164 db_nm, ERROR_STR()); | |
1165 need_db_fsync = 1; | |
1166 } else if (db_fsize != (DB_HOFF)sb.st_size) { | |
1167 if (db_debug) | |
1168 quiet_trace_msg("need fsync() because db_fsize=" | |
1169 OFF_HPAT" but stat="OFF_HPAT, | |
1170 db_fsize, sb.st_size); | |
1171 need_db_fsync = 1; | |
1172 } | |
1173 } | |
1174 | |
1175 if (need_db_fsync | |
1176 && 0 > fsync(db_fd)) { | |
1177 dcc_error_msg("make_clean fsync(%s): %s", | |
1178 db_nm, ERROR_STR()); | |
1179 clean = 0; | |
1180 result = 0; | |
1181 } | |
1182 | |
1183 if (clean && !map_hash_ctl(0, 0)) { | |
1184 clean = 0; | |
1185 result = 0; | |
1186 } | |
1187 if (clean == 2) { | |
1188 if (0 > fsync(db_hash_fd)) { | |
1189 dcc_error_msg("make_clean fsync(%s): %s", | |
1190 db_hash_nm, ERROR_STR()); | |
1191 clean = 0; | |
1192 result = 0; | |
1193 } else { | |
1194 db_not_synced = 0; | |
1195 db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_NOSYNC; | |
1196 SET_FLUSH_HCTL(1); | |
1197 if (!make_clean_flush()) { | |
1198 clean = 0; | |
1199 result = 0; | |
1200 } | |
1201 } | |
1202 } | |
1203 | |
1204 /* Clean the dirty flag in the hash table. | |
1205 * With luck, this will reach the disk after everything else. */ | |
1206 if (clean | |
1207 && !(db_sts.hash_ctl.d.vals->s.flags & HASH_CTL_FG_CLEAN)) { | |
1208 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_CLEAN; | |
1209 SET_FLUSH_HCTL(0); | |
1210 } | |
1211 | |
1212 /* finally flush the flag in the hash table */ | |
1213 rel_db_states(); | |
1214 if (!make_clean_flush()) | |
1215 result = 0; | |
1216 | |
1217 if (clean) | |
1218 db_dirty = 0; | |
1219 return result; | |
1220 } | |
1221 | |
1222 | |
1223 | |
1224 /* mark the hash file and so the database dirty */ | |
1225 static u_char | |
1226 db_make_dirty(DCC_EMSG emsg) | |
1227 { | |
1228 if (db_dirty) | |
1229 return 1; | |
1230 | |
1231 if (!DB_IS_LOCKED()) { | |
1232 dcc_logbad(EX_SOFTWARE, "dirtying unlocked database"); | |
1233 return 0; | |
1234 } | |
1235 | |
1236 if (db_rdonly) | |
1237 dcc_logbad(EX_SOFTWARE, "dirtying read-only database"); | |
1238 | |
1239 if (!map_hash_ctl(emsg, 0)) | |
1240 return 0; | |
1241 db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_CLEAN; | |
1242 #ifdef USE_MAP_NOSYNC | |
1243 if (!(db_sts.hash_ctl.d.vals->s.flags & HASH_CTL_FG_NOSYNC)) { | |
1244 db_sts.hash_ctl.d.vals->s.synced = time(0); | |
1245 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_NOSYNC; | |
1246 } | |
1247 db_not_synced = 1; | |
1248 #endif | |
1249 | |
1250 SET_FLUSH_HCTL(1); | |
1251 if (!buf_flush_part(emsg, db_sts.hash_ctl.b, 0, 0)) | |
1252 return 0; | |
1253 | |
1254 db_dirty = 1; | |
1255 return 1; | |
1256 } | |
1257 | |
1258 | |
1259 | |
1260 /* (start to) unlock the database */ | |
1261 u_char /* 0=failed, 1=at least started */ | |
1262 db_unlock(void) | |
1263 { | |
1264 DB_BUF *b; | |
1265 int result; | |
1266 | |
1267 if (!DB_IS_LOCKED()) | |
1268 return 1; | |
1269 | |
1270 /* Clear the dirty bit in the database because we may not | |
1271 * be able to lock the database later to clear the dirty bit. | |
1272 * Dbclean needs to see the dirty bit clear. */ | |
1273 result = make_clean(1); | |
1274 | |
1275 /* Release DB_BUF_FG_USE_WRITE buffers because they are not consistent | |
1276 * among processes | |
1277 * Release everything if dccd wants stay out of RAM in favor | |
1278 * of dbclean */ | |
1279 for (b = buf_oldest; b != 0; b = b->newer) { | |
1280 if (b->buf_type == DB_BUF_TYPE_FREE) | |
1281 continue; | |
1282 if (db_minimum_map | |
1283 || (b->flags & DB_BUF_FG_USE_WRITE)) | |
1284 buf_munmap(0, b); | |
1285 } | |
1286 | |
1287 if (!dcc_unlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm)) | |
1288 result = 0; | |
1289 db_locked.tv_sec = 0; | |
1290 return result; | |
1291 } | |
1292 | |
1293 | |
1294 | |
1295 static const char * | |
1296 mbyte2str(DB_PTR val) | |
1297 { | |
1298 return db_ptr2str(val*1024*1024); | |
1299 } | |
1300 | |
1301 | |
1302 | |
1303 #if defined(RLIMIT_AS) || defined(RLIMIT_RSS) || defined(RLIMIT_FSIZE) | |
1304 static DB_PTR | |
1305 use_rlimit(int resource, const char *rlimit_nm, | |
1306 DB_PTR cur_val, DB_PTR min_val, const char *val_nm) | |
1307 { | |
1308 struct rlimit limit_old, limit_new; | |
1309 DB_PTR new_val; | |
1310 | |
1311 if (0 > getrlimit(resource, &limit_old)) { | |
1312 dcc_error_msg("getrlimit(%s): %s", rlimit_nm, ERROR_STR()); | |
1313 return cur_val; | |
1314 } | |
1315 | |
1316 if ((DB_PTR)limit_old.rlim_cur >= cur_val+DB_PAD_MBYTE*1024) | |
1317 return cur_val; | |
1318 | |
1319 /* assume we are root and try to increase the hard limit */ | |
1320 if ((DB_PTR)limit_new.rlim_max < cur_val+DB_PAD_BYTE) { | |
1321 limit_new = limit_old; | |
1322 limit_new.rlim_max = cur_val+DB_PAD_BYTE; | |
1323 if (0 > setrlimit(resource, &limit_new)) { | |
1324 if (db_debug) | |
1325 quiet_trace_msg("setrlimit(%s, " | |
1326 L_DPAT","L_DPAT"): %s", | |
1327 rlimit_nm, | |
1328 (DB_PTR)limit_new.rlim_cur, | |
1329 (DB_PTR)limit_new.rlim_max, | |
1330 ERROR_STR()); | |
1331 } else { | |
1332 if (0 > getrlimit(resource, &limit_old)) { | |
1333 dcc_error_msg("getrlimit(%s): %s", | |
1334 rlimit_nm, ERROR_STR()); | |
1335 return cur_val; | |
1336 } | |
1337 } | |
1338 } | |
1339 | |
1340 limit_new = limit_old; | |
1341 if ((DB_PTR)limit_new.rlim_max < min_val+DB_PAD_BYTE) | |
1342 limit_new.rlim_max = min_val + DB_PAD_BYTE; | |
1343 limit_new.rlim_cur = limit_new.rlim_max; | |
1344 if ((DB_PTR)limit_new.rlim_cur > cur_val+DB_PAD_BYTE) | |
1345 limit_new.rlim_cur = cur_val+DB_PAD_BYTE; | |
1346 if (0 > setrlimit(resource, &limit_new)) { | |
1347 dcc_error_msg("setrlimit(%s, "L_DPAT","L_DPAT"): %s", | |
1348 rlimit_nm, | |
1349 (DB_PTR)limit_new.rlim_cur, | |
1350 (DB_PTR)limit_new.rlim_max, | |
1351 ERROR_STR()); | |
1352 new_val = limit_old.rlim_cur - DB_PAD_BYTE; | |
1353 if (new_val < min_val) | |
1354 new_val = min_val; | |
1355 } else { | |
1356 if (limit_old.rlim_cur < limit_new.rlim_cur | |
1357 && db_debug) | |
1358 quiet_trace_msg("increased %s from %s to %s", | |
1359 rlimit_nm, | |
1360 db_ptr2str(limit_old.rlim_cur), | |
1361 #ifdef RLIM_INFINITY | |
1362 (limit_new.rlim_cur == RLIM_INFINITY) | |
1363 ? "infinity" : | |
1364 #endif | |
1365 db_ptr2str(limit_new.rlim_cur)); | |
1366 new_val = limit_new.rlim_cur - DB_PAD_BYTE; | |
1367 } | |
1368 | |
1369 if (cur_val > new_val) { | |
1370 quiet_trace_msg("%s reduced %s from %s to %s", | |
1371 rlimit_nm, val_nm, | |
1372 db_ptr2str(cur_val), | |
1373 db_ptr2str(new_val)); | |
1374 return new_val; | |
1375 } | |
1376 | |
1377 return cur_val; | |
1378 } | |
1379 #endif | |
1380 | |
1381 | |
1382 | |
1383 static void | |
1384 get_db_max_rss(void) | |
1385 { | |
1386 DB_PTR old_val, new_val, db_min_mbyte, db_min_byte, db_max_mbyte; | |
1387 int physmem_str_len; | |
1388 DB_PTR physmem; | |
1389 | |
1390 /* use default maximum if maximum is bogus or unset by ./configure */ | |
1391 db_max_mbyte = MAX_MAX_DB_MBYTE; | |
1392 #if DB_MAX_MBYTE != 0 | |
1393 db_max_mbyte = DB_MAX_MBYTE; | |
1394 if (db_max_mbyte < DB_MIN_MIN_MBYTE | |
1395 || db_max_mbyte > MAX_MAX_DB_MBYTE) { | |
1396 quiet_trace_msg("ignore bad ./configure --with-max-db-mem=%d", | |
1397 DB_MAX_MBYTE); | |
1398 db_max_mbyte = MAX_MAX_DB_MBYTE; | |
1399 } else if (db_debug) { | |
1400 quiet_trace_msg("DB max=%s" | |
1401 " from ./configure --with-max-db-mem=%d", | |
1402 mbyte2str(db_max_mbyte), DB_MAX_MBYTE); | |
1403 } | |
1404 #endif | |
1405 #ifndef HAVE_BIG_FILES | |
1406 /* we need big off_t for files larger than 2 GBytes */ | |
1407 if (db_max_mbyte > DB_MAX_2G_MBYTE) { | |
1408 old_val = db_max_mbyte; | |
1409 db_max_mbyte= DB_MAX_2G_MBYTE; | |
1410 if (db_debug) | |
1411 quiet_trace_msg("32-bit off_t reduced DB max from %s" | |
1412 " to %s", | |
1413 mbyte2str(old_val), | |
1414 mbyte2str(db_max_mbyte)); | |
1415 } | |
1416 #endif | |
1417 | |
1418 /* use default if ./configure --with-db-memory=MB is bogus or unset */ | |
1419 #if DB_MIN_MBYTE == 0 | |
1420 db_min_mbyte = 64; | |
1421 #else | |
1422 db_min_mbyte = DB_MIN_MBYTE; | |
1423 if (db_min_mbyte < DB_MIN_MIN_MBYTE) { | |
1424 quiet_trace_msg("ignore bad ./configure --with-db-memory=%d", | |
1425 DB_MIN_MBYTE); | |
1426 db_min_mbyte = DB_DEF_MIN_MBYTE; | |
1427 } else if (db_min_mbyte > db_max_mbyte) { | |
1428 quiet_trace_msg("ignore ./configure --with-db-memory=%d" | |
1429 " > DB max=%s", | |
1430 mbyte2str(db_max_mbyte)); | |
1431 db_min_mbyte = DB_DEF_MIN_MBYTE; | |
1432 } else if (db_debug) { | |
1433 quiet_trace_msg("use ./configure --with-db-memory=%d", | |
1434 DB_MIN_MBYTE); | |
1435 } | |
1436 #endif | |
1437 | |
1438 db_min_byte = db_min_mbyte * (1024*1024); | |
1439 db_max_byte = db_max_mbyte * (1024*1024); | |
1440 | |
1441 #ifdef RLIMIT_FSIZE | |
1442 db_max_mbyte = (use_rlimit(RLIMIT_FSIZE, "RLIMIT_FSIZE", | |
1443 db_max_byte, db_min_byte, "DB max") | |
1444 / (1024*1024)); | |
1445 db_max_byte = db_max_mbyte * (1024*1024); | |
1446 #endif /* RLIMIT_FSIZE */ | |
1447 | |
1448 physmem = 0; | |
1449 #ifdef HAVE_PHYSMEM_TOTAL | |
1450 /* maybe someday physmem_total() will be widely available */ | |
1451 physmem = physmem_total(); | |
1452 if (db_debug) | |
1453 quiet_trace_msg("real=%s from physmem_total()", | |
1454 db_ptr2str(physmem)); | |
1455 #endif | |
1456 #ifdef HAVE__SC_PHYS_PAGES | |
1457 if (physmem == 0) { | |
1458 long pages, sizepage; | |
1459 | |
1460 if ((pages = sysconf(_SC_PHYS_PAGES)) == -1) { | |
1461 dcc_error_msg("sysconf(_SC_PHYS_PAGES): %s", | |
1462 ERROR_STR()); | |
1463 } else if ((sizepage = sysconf(_SC_PAGESIZE)) == -1) { | |
1464 dcc_error_msg("sysconf(_SC_PAGESIZE): %s", | |
1465 ERROR_STR()); | |
1466 } else { | |
1467 physmem = (DB_PTR)pages * (DB_PTR)sizepage; | |
1468 if (db_debug) | |
1469 quiet_trace_msg("real=%s" | |
1470 " from sysconf(_SC_PHYS_PAGES)" | |
1471 " and sysconf(_SC_PAGESIZE)", | |
1472 db_ptr2str(physmem)); | |
1473 } | |
1474 } | |
1475 #endif | |
1476 #ifdef HAVE_HW_PHYSMEM | |
1477 if (physmem == 0) { | |
1478 int mib[2] = {CTL_HW, HW_PHYSMEM}; | |
1479 unsigned long int hw_physmem; | |
1480 size_t hw_physmem_len; | |
1481 | |
1482 hw_physmem_len = sizeof(hw_physmem); | |
1483 if (0 > sysctl(mib, 2, &hw_physmem, &hw_physmem_len, 0,0)) { | |
1484 dcc_error_msg("sysctl(HW_PHYSMEM): %s", ERROR_STR()); | |
1485 } else { | |
1486 physmem = hw_physmem; | |
1487 if (db_debug) | |
1488 quiet_trace_msg("real=%s from sysctl(mib)", | |
1489 db_ptr2str(physmem)); | |
1490 } | |
1491 } | |
1492 #endif | |
1493 #ifdef HAVE_PSTAT_GETSTATIC | |
1494 if (physmem == 0) { | |
1495 struct pst_static pss; | |
1496 | |
1497 if (0 > pstat_getstatic(&pss, sizeof pss, 1, 0)) { | |
1498 dcc_error_msg("pstat_getstatic(): %s", ERROR_STR()); | |
1499 } else if (pss.physical_memory <= 0 | |
1500 || pss.page_size < 0) { | |
1501 dcc_error_msg("pstat_getstatic() says" | |
1502 " physical_memory=%d page_size=%d", | |
1503 pss.physical_memory, pss.page_size); | |
1504 } else { | |
1505 physmem = ((DB_PTR)pss.physical_memory | |
1506 * (DB_PTR)pss.page_size); | |
1507 if (db_debug) | |
1508 quiet_trace_msg("real=%s" | |
1509 " from pstat_getstatic()", | |
1510 db_ptr2str(physmem)); | |
1511 } | |
1512 } | |
1513 #endif | |
1514 | |
1515 physmem_str_len = 0; | |
1516 db_physmem_str[0] = '\0'; | |
1517 if (physmem == 0) { | |
1518 quiet_trace_msg("failed to get real memory size"); | |
1519 } else { | |
1520 physmem_str_len = snprintf(db_physmem_str, | |
1521 sizeof(db_physmem_str), | |
1522 " real=%s", | |
1523 db_ptr2str(physmem)); | |
1524 | |
1525 /* Try to use half of physical memory | |
1526 * if there is less than 2 GByte | |
1527 * all except 512 MByte between 2 GByte and 4 GByte, | |
1528 * and all but 1 GByte if there is more than 4 GByte */ | |
1529 if (physmem/(1024*1024) < 2*1024) | |
1530 new_val = physmem/2; | |
1531 else if (physmem/(1024*1024) <= 4*1024) | |
1532 new_val = physmem - 512*(1024*1024); | |
1533 else | |
1534 new_val = physmem - 1024*(1024*1024); | |
1535 if (new_val < db_min_byte) { | |
1536 if (db_debug) | |
1537 quiet_trace_msg("real=%s would give DB max=%s" | |
1538 " smaller than minimum %s", | |
1539 db_ptr2str(physmem), | |
1540 db_ptr2str(new_val), | |
1541 mbyte2str(db_min_mbyte)); | |
1542 new_val = db_min_byte; | |
1543 } | |
1544 if (db_max_byte > new_val) { | |
1545 old_val = db_max_byte; | |
1546 db_max_mbyte = new_val / (1024*1024); | |
1547 db_max_byte = db_max_mbyte * (1024*1024); | |
1548 if (db_debug) | |
1549 quiet_trace_msg("real=%s reduced DB max" | |
1550 " from %s to %s", | |
1551 db_ptr2str(physmem), | |
1552 db_ptr2str(old_val), | |
1553 db_ptr2str(db_max_byte)); | |
1554 } | |
1555 } | |
1556 | |
1557 /* window need not be larger than the limit on the database size */ | |
1558 db_max_rss = db_max_byte; | |
1559 | |
1560 #ifdef RLIMIT_AS | |
1561 /* try not to break process virtual memory limit, | |
1562 * but only if it is not ridiculously tiny */ | |
1563 db_max_rss = use_rlimit(RLIMIT_AS, "RLIMIT_AS", | |
1564 db_max_rss, db_min_byte, "max RSS"); | |
1565 #endif /* RLIMIT_AS */ | |
1566 #ifdef RLIMIT_RSS | |
1567 /* try not to break process resident memory limit | |
1568 * but only if it is not ridiculously tiny */ | |
1569 db_max_rss = use_rlimit(RLIMIT_RSS, "RLIMIT_RSS", | |
1570 db_max_rss, db_min_byte, "max RSS"); | |
1571 #endif /* RLIMIT_RSS */ | |
1572 | |
1573 /* limit the database to the window size */ | |
1574 if (db_max_byte > db_max_rss) { | |
1575 old_val = db_max_mbyte; | |
1576 db_max_mbyte = db_max_rss / (1024*1024); | |
1577 db_max_byte = db_max_mbyte * (1024*1024); | |
1578 if (db_debug) | |
1579 quiet_trace_msg("max RSS reduced DB max from %s to %s", | |
1580 mbyte2str(old_val), | |
1581 mbyte2str(db_max_mbyte)); | |
1582 } | |
1583 | |
1584 #ifndef HAVE_64BIT_PTR | |
1585 /* We cannot use a window larger than 2 GBytes on most systems without | |
1586 * big pointers. Among the things that break is trying to mmap() more | |
1587 * than 2 GBytes. So limit the window on 32-bit systems to a little | |
1588 * less than 2 GBytes and the database to not much more */ | |
1589 if (db_max_rss > DB_MAX_2G_MBYTE*(1024*1024)) { | |
1590 if (db_debug) | |
1591 quiet_trace_msg("32-bit pointers reduced max RSS" | |
1592 " from %s to %s", | |
1593 db_ptr2str(db_max_rss), | |
1594 mbyte2str(DB_MAX_2G_MBYTE)); | |
1595 db_max_rss = DB_MAX_2G_MBYTE*(1024*1024); | |
1596 new_val = db_max_rss+db_max_rss/4; | |
1597 if (db_max_byte > new_val) { | |
1598 old_val = db_max_mbyte; | |
1599 db_max_mbyte = new_val / (1024*1024); | |
1600 db_max_byte = db_max_mbyte * (1024*1024); | |
1601 if (db_debug) | |
1602 quiet_trace_msg("32-bit pointers reduced DB max" | |
1603 " from %s to %s", | |
1604 mbyte2str(old_val), | |
1605 mbyte2str(db_max_mbyte)); | |
1606 } | |
1607 } | |
1608 #endif | |
1609 | |
1610 snprintf(&db_physmem_str[physmem_str_len], | |
1611 sizeof(db_physmem_str) - physmem_str_len, | |
1612 " max RSS=%s DB max=%s", | |
1613 db_ptr2str(db_max_rss), mbyte2str(db_max_mbyte)); | |
1614 } | |
1615 | |
1616 | |
1617 | |
1618 /* Pick a buffer size that will hold an integral number of DB hash | |
1619 * table entries and is a multiple of system's page size. | |
1620 * The entire hash table should reside in memory | |
1621 * if the system has enough memory. */ | |
1622 u_int | |
1623 db_get_pagesize(u_int old_pagesize, /* 0 or required page size */ | |
1624 u_int tgt_pagesize) /* 0 or target page size */ | |
1625 { | |
1626 u_int min_pagesize, max_pagesize; | |
1627 | |
1628 /* Ask the operating system only once so we don't get differing | |
1629 * answers and so compute a varying page size. | |
1630 * Some systems can't keep their stories straight. */ | |
1631 if (db_max_rss == 0) | |
1632 get_db_max_rss(); | |
1633 | |
1634 /* Compute the least common multiple of the system page and | |
1635 * the DB hash table entry size. | |
1636 * This will give us the smallest page size that we can use. */ | |
1637 system_pagesize = getpagesize(); | |
1638 min_pagesize = lcm(system_pagesize, sizeof(HASH_ENTRY)); | |
1639 | |
1640 /* The kludge to speed conversion of database addresses to page numbers | |
1641 * and offsets on 32-bit systems depends on the page size being | |
1642 * a multiple of 256 */ | |
1643 if ((min_pagesize % (1<<DB_PTR_SHIFT)) != 0) | |
1644 dcc_logbad(EX_SOFTWARE, "page size not a multiple of 256"); | |
1645 | |
1646 /* The DB buffer or page size must also be a multiple of the | |
1647 * the end-of-page padding used in the main database file. */ | |
1648 if (sizeof(DB_RCD) % DB_RCD_HDR_LEN != 0) | |
1649 dcc_logbad(EX_SOFTWARE, | |
1650 "DB padding size %d" | |
1651 " is not a divisor of DB entry size %d", | |
1652 DB_RCD_HDR_LEN, ISZ(DB_RCD)); | |
1653 if (DB_RCD_LEN_MAX % DB_RCD_HDR_LEN != 0) | |
1654 dcc_logbad(EX_SOFTWARE, | |
1655 "DB record not a multiple of header size"); | |
1656 min_pagesize = lcm(min_pagesize, DB_RCD_HDR_LEN); | |
1657 | |
1658 /* Use the old buffer size if available so we are not confused | |
1659 * by padding at the ends of the old pages. | |
1660 * Fail if it is impossible. This should cause dbclean to | |
1661 * rebuild the database. */ | |
1662 if (old_pagesize != 0) { | |
1663 if ((old_pagesize % min_pagesize) != 0) | |
1664 return 0; | |
1665 /* adjust the number of buffers to fit our window size */ | |
1666 db_buf_total = db_max_rss / old_pagesize; | |
1667 if (db_buf_total < (int)DB_BUF_MIN) | |
1668 return 0; | |
1669 if (db_buf_total > DB_BUF_MAX) | |
1670 db_buf_total = DB_BUF_MAX; | |
1671 return old_pagesize; | |
1672 } | |
1673 | |
1674 db_buf_total = DB_BUF_MAX; | |
1675 max_pagesize = db_max_rss / db_buf_total; | |
1676 max_pagesize -= max_pagesize % min_pagesize; | |
1677 | |
1678 /* If we have a target page size, try to use it instead of the | |
1679 * maximum page size allowed by the resident set size. | |
1680 * Normal DCC databases grow large and want pages as large as possible | |
1681 * but greylist databases are often small. | |
1682 * We also want a tiny page when first reading the parameters while | |
1683 * opening. */ | |
1684 if (tgt_pagesize != 0 && tgt_pagesize < max_pagesize) { | |
1685 tgt_pagesize -= tgt_pagesize % min_pagesize; | |
1686 if (tgt_pagesize < min_pagesize) | |
1687 tgt_pagesize = min_pagesize; | |
1688 return tgt_pagesize; | |
1689 } else if (max_pagesize > min_pagesize) { | |
1690 return max_pagesize; | |
1691 } else { | |
1692 return min_pagesize; | |
1693 } | |
1694 } | |
1695 | |
1696 | |
1697 | |
1698 /* (re)create the buffer pool | |
1699 * The buffers are small blocks that point to the real mmap()'ed memory. | |
1700 */ | |
1701 u_char | |
1702 db_buf_init(u_int old_pagesize, /* 0 or required page size */ | |
1703 u_int tgt_pagesize) /* 0 or target page size */ | |
1704 { | |
1705 DB_BUF *b, *bprev, *bnext; | |
1706 int i; | |
1707 | |
1708 | |
1709 db_pagesize = db_get_pagesize(old_pagesize, tgt_pagesize); | |
1710 if (db_pagesize == 0) | |
1711 return 0; | |
1712 | |
1713 /* The fragments of pages must be multiples of system pages | |
1714 * so that msync() on Solaris can be given multiples of system | |
1715 * pages. It's also a generally good idea. */ | |
1716 db_pagesize_part = db_pagesize/DB_BUF_NUM_PARTS; | |
1717 db_pagesize_part = ((db_pagesize_part + system_pagesize-1) | |
1718 / system_pagesize) * system_pagesize; | |
1719 | |
1720 db_page_max = db_pagesize - DB_RCD_HDR_LEN; | |
1721 db_hash_page_len = db_pagesize/sizeof(HASH_ENTRY); | |
1722 | |
1723 db_max_hash_entries = (MAX_HASH_ENTRIES | |
1724 - MAX_HASH_ENTRIES % db_hash_page_len); | |
1725 | |
1726 memset(db_bufs, 0, sizeof(db_bufs)); | |
1727 b = db_bufs; | |
1728 buf_oldest = b; | |
1729 bprev = 0; | |
1730 for (i = db_buf_total; --i != 0; b = bnext) { | |
1731 bnext = b+1; | |
1732 b->older = bprev; | |
1733 b->newer = bnext; | |
1734 bprev = b; | |
1735 } | |
1736 b->older = bprev; | |
1737 buf_newest = b; | |
1738 | |
1739 memset(db_buf_hash, 0, sizeof(db_buf_hash)); | |
1740 | |
1741 return 1; | |
1742 } | |
1743 | |
1744 | |
1745 | |
1746 static u_char | |
1747 make_new_hash(DCC_EMSG emsg, DB_HADDR new_hash_len) | |
1748 { | |
1749 struct stat sb; | |
1750 HASH_ENTRY *hash; | |
1751 DB_HADDR next_haddr, cur_haddr, prev_haddr; | |
1752 u_int pagenum; | |
1753 | |
1754 if (getuid() == 0) { | |
1755 /* if we are running as root, | |
1756 * don't change the owner of the database */ | |
1757 if (0 > fstat(db_fd, &sb)) { | |
1758 dcc_pemsg(EX_IOERR, emsg, "fstat(%s): %s", | |
1759 db_nm, ERROR_STR()); | |
1760 return 0; | |
1761 } | |
1762 if (0 > fchown(db_hash_fd, sb.st_uid, sb.st_gid)) { | |
1763 dcc_pemsg(EX_IOERR, emsg, "fchown(%s,%d,%d): %s", | |
1764 db_hash_nm, (int)sb.st_uid, (int)sb.st_gid, | |
1765 ERROR_STR()); | |
1766 return 0; | |
1767 } | |
1768 } | |
1769 | |
1770 if (new_hash_len < MIN_HASH_ENTRIES) | |
1771 new_hash_len = MIN_HASH_ENTRIES; | |
1772 | |
1773 /* Increase the requested hash table size to a multiple of the database | |
1774 * page size. The page size is chosen to be a multiple of the size of | |
1775 * a single hash table entry. */ | |
1776 db_hash_fsize = (((DB_HOFF)new_hash_len)*sizeof(HASH_ENTRY) | |
1777 + db_pagesize-1); | |
1778 db_hash_fsize -= db_hash_fsize % db_pagesize; | |
1779 new_hash_len = db_hash_fsize / sizeof(HASH_ENTRY); | |
1780 | |
1781 if (new_hash_len > db_max_hash_entries) | |
1782 new_hash_len = db_max_hash_entries; | |
1783 | |
1784 /* create the empty hash table file */ | |
1785 rel_db_states(); | |
1786 if (!db_unload(emsg, 0)) | |
1787 return 0; | |
1788 if (0 > ftruncate(db_hash_fd, 0)) { | |
1789 dcc_pemsg(EX_IOERR, emsg, "truncate(%s,"L_HPAT"): %s", | |
1790 db_hash_nm, db_csize, ERROR_STR()); | |
1791 return 0; | |
1792 } | |
1793 | |
1794 db_hash_len = new_hash_len; | |
1795 db_hash_used_stored_hash = db_hash_used = DB_HADDR_BASE; | |
1796 db_hash_divisor = get_db_hash_divisor(db_hash_len); | |
1797 | |
1798 /* Clear new hash file by linking its entries into the free list */ | |
1799 /* map and clear the first page */ | |
1800 if (!map_hash_ctl(emsg, 1)) | |
1801 return 0; | |
1802 | |
1803 /* create the header */ | |
1804 strcpy(db_sts.hash_ctl.d.vals->s.magic, HASH_MAGIC_STR); | |
1805 db_sts.hash_ctl.d.vals->s.free_fwd = DB_HADDR_BASE; | |
1806 db_sts.hash_ctl.d.vals->s.free_bak = db_hash_len-1; | |
1807 db_sts.hash_ctl.d.vals->s.len = db_hash_len; | |
1808 db_sts.hash_ctl.d.vals->s.divisor = db_hash_divisor; | |
1809 db_sts.hash_ctl.d.vals->s.used = DB_HADDR_BASE; | |
1810 db_sts.hash_ctl.d.vals->s.synced = time(0); | |
1811 db_dirty = 1; | |
1812 #ifdef USE_MAP_NOSYNC | |
1813 db_sts.hash_ctl.d.vals->s.synced = time(0); | |
1814 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_NOSYNC; | |
1815 db_not_synced = 1; | |
1816 #endif | |
1817 | |
1818 /* Link the hash table entries in the first and following pages. | |
1819 * The page size is chosen to be a multiple of the size of a | |
1820 * single hash table entry. */ | |
1821 prev_haddr = FREE_HADDR_END; | |
1822 cur_haddr = DB_HADDR_BASE; | |
1823 next_haddr = cur_haddr+1; | |
1824 hash = &db_sts.hash_ctl.d.vals->h[DB_HADDR_BASE]; | |
1825 pagenum = 0; | |
1826 for (;;) { | |
1827 do { | |
1828 DB_HADDR_CP(hash->bak, prev_haddr); | |
1829 if (next_haddr == db_hash_len) | |
1830 DB_HADDR_CP(hash->fwd, FREE_HADDR_END); | |
1831 else | |
1832 DB_HADDR_CP(hash->fwd, next_haddr); | |
1833 ++hash; | |
1834 prev_haddr = cur_haddr; | |
1835 cur_haddr = next_haddr++; | |
1836 } while (cur_haddr % db_hash_page_len != 0); | |
1837 | |
1838 if (++pagenum >= db_hash_fsize/db_pagesize) | |
1839 break; | |
1840 | |
1841 if (!map_hash(emsg, cur_haddr, &db_sts.free, 1)) | |
1842 return 0; | |
1843 db_sts.free.b->flush_urgent = (DB_BUF_FM)-1; | |
1844 hash = db_sts.free.d.h; | |
1845 } | |
1846 | |
1847 hash_clear_pg_num = 0; | |
1848 | |
1849 return 1; | |
1850 } | |
1851 | |
1852 | |
1853 | |
1854 static u_char | |
1855 check_old_hash(DCC_EMSG emsg) | |
1856 { | |
1857 static const u_char magic[sizeof(((HASH_CTL*)0)->s.magic) | |
1858 ] = HASH_MAGIC_STR; | |
1859 const HASH_CTL *vals; | |
1860 struct stat sb; | |
1861 u_char old_db; | |
1862 | |
1863 /* check the size of the existing hash file */ | |
1864 if (0 > fstat(db_hash_fd, &sb)) { | |
1865 dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", | |
1866 db_hash_nm, ERROR_STR()); | |
1867 return 0; | |
1868 } | |
1869 db_hash_fsize = sb.st_size; | |
1870 if ((db_hash_fsize % sizeof(HASH_ENTRY)) != 0) { | |
1871 dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_DPAT"," | |
1872 " not a multiple of %d", | |
1873 db_hash_nm, db_hash_fsize, | |
1874 ISZ(HASH_ENTRY)); | |
1875 return 0; | |
1876 } | |
1877 | |
1878 db_hash_len = db_hash_fsize/sizeof(HASH_ENTRY); | |
1879 if (db_hash_len < MIN_HASH_ENTRIES) { | |
1880 dcc_pemsg(EX_DATAERR, emsg, | |
1881 "%s has too few records, "OFF_DPAT" bytes", | |
1882 db_hash_nm, db_hash_fsize); | |
1883 return 0; | |
1884 } | |
1885 | |
1886 /* check the magic number */ | |
1887 if (!map_hash_ctl(emsg, 0)) | |
1888 return 0; | |
1889 vals = db_sts.hash_ctl.d.vals; | |
1890 if (memcmp(vals->s.magic, &magic, sizeof(magic))) { | |
1891 dcc_pemsg(EX_DATAERR, emsg, | |
1892 "%s has the wrong magic \"%.*s\"", | |
1893 db_hash_nm, ISZ(HASH_ENTRY), vals->s.magic); | |
1894 return 0; | |
1895 } | |
1896 | |
1897 if (!(vals->s.flags & HASH_CTL_FG_CLEAN)) { | |
1898 dcc_pemsg(EX_DATAERR, emsg, "%s was not closed cleanly", | |
1899 db_hash_nm); | |
1900 return 0; | |
1901 } | |
1902 if (vals->s.flags & HASH_CTL_FG_NOSYNC) { | |
1903 #ifdef HAVE_BOOTTIME | |
1904 int mib[2] = {CTL_KERN, KERN_BOOTTIME}; | |
1905 size_t boottime_len; | |
1906 #endif | |
1907 struct timeval boottime; | |
1908 | |
1909 boottime.tv_sec = 0x7fffffff; | |
1910 #ifdef HAVE_BOOTTIME | |
1911 boottime_len = sizeof(boottime); | |
1912 if (0 > sysctl(mib, 2, &boottime, &boottime_len, 0, 0)) { | |
1913 dcc_error_msg("sysctl(KERN_BOOTTIME): %s", ERROR_STR()); | |
1914 } | |
1915 #endif | |
1916 if (vals->s.synced <= boottime.tv_sec) { | |
1917 dcc_pemsg(EX_DATAERR, emsg, "%s was not synchronized;" | |
1918 " synced=%d boottime=%d", | |
1919 db_hash_nm, | |
1920 (int)vals->s.synced, (int)boottime.tv_sec); | |
1921 return 0; | |
1922 } | |
1923 db_not_synced = 1; | |
1924 } | |
1925 | |
1926 if (DB_HADDR_INVALID(vals->s.free_fwd) | |
1927 && (vals->s.free_fwd != FREE_HADDR_END | |
1928 || vals->s.free_fwd != vals->s.free_bak)) { | |
1929 dcc_pemsg(EX_DATAERR, emsg, | |
1930 "%s has a broken free list head of %#x", | |
1931 db_hash_nm, vals->s.free_fwd); | |
1932 return 0; | |
1933 } | |
1934 if (DB_HADDR_INVALID(vals->s.free_bak) | |
1935 && (vals->s.free_bak != FREE_HADDR_END | |
1936 || vals->s.free_fwd != vals->s.free_bak)) { | |
1937 dcc_pemsg(EX_DATAERR, emsg, | |
1938 "%s has a broken free list tail of %#x", | |
1939 db_hash_nm, vals->s.free_bak); | |
1940 return 0; | |
1941 } | |
1942 | |
1943 if (db_hash_len != vals->s.len) { | |
1944 dcc_pemsg(EX_DATAERR, emsg, | |
1945 "%s has %d entries but claims %d", | |
1946 db_hash_nm, db_hash_len, | |
1947 vals->s.len); | |
1948 return 0; | |
1949 } | |
1950 | |
1951 db_hash_divisor = vals->s.divisor; | |
1952 if (db_hash_divisor < MIN_HASH_DIVISOR | |
1953 || db_hash_divisor >= db_hash_len) { | |
1954 dcc_pemsg(EX_DATAERR, emsg, "%s has hash divisor %d", | |
1955 db_hash_nm, db_hash_len); | |
1956 return 0; | |
1957 } | |
1958 | |
1959 db_hash_used_stored_hash = db_hash_used = vals->s.used; | |
1960 if (db_hash_used < DB_HADDR_BASE) { | |
1961 dcc_pemsg(EX_DATAERR, emsg, | |
1962 "%s contains impossible %u entries", | |
1963 db_hash_nm, HADDR2LEN(db_hash_used)); | |
1964 return 0; | |
1965 } | |
1966 if (db_hash_used >= db_hash_len) { | |
1967 if (db_hash_used > db_hash_len) | |
1968 dcc_pemsg(EX_DATAERR, emsg, | |
1969 "%s contains only %u entries but %u used", | |
1970 db_hash_nm, | |
1971 HADDR2LEN(db_hash_len), | |
1972 HADDR2LEN(db_hash_used)); | |
1973 else | |
1974 dcc_pemsg(EX_DATAERR, emsg, | |
1975 "%s is filled with %u entries", | |
1976 db_hash_nm, | |
1977 HADDR2LEN(db_hash_len)); | |
1978 return 0; | |
1979 } | |
1980 | |
1981 /* old databases lack the growth values */ | |
1982 old_db = 0; | |
1983 if (!db_rdonly | |
1984 && db_parms.old_db_csize == 0 | |
1985 && db_parms.db_added == 0 | |
1986 && db_parms.hash_used == 0 | |
1987 && db_parms.old_hash_used == 0 | |
1988 && db_parms.hash_added == 0 | |
1989 && db_parms.rate_secs == 0 | |
1990 && db_parms.last_rate_sec == 0) { | |
1991 quiet_trace_msg("repair database growth measurements"); | |
1992 db_parms.old_db_csize = db_parms.db_csize; | |
1993 old_db = 1; | |
1994 } | |
1995 | |
1996 if (db_hash_used != db_parms.hash_used | |
1997 && db_hash_fsize != 0) { | |
1998 if (old_db) { | |
1999 quiet_trace_msg("repair db_parms.old hash_used" | |
2000 " and old_hash_used"); | |
2001 db_parms.old_hash_used = db_hash_used; | |
2002 db_parms.hash_used = db_hash_used; | |
2003 } else { | |
2004 dcc_pemsg(EX_DATAERR, emsg, | |
2005 "%s contains %d" | |
2006 " entries instead of the %d that %s claims", | |
2007 db_hash_nm, db_hash_used, | |
2008 db_parms.hash_used, db_nm); | |
2009 return 0; | |
2010 } | |
2011 } | |
2012 | |
2013 db_csize_stored_hash = vals->s.db_csize; | |
2014 if (db_csize_stored_hash != db_csize | |
2015 && db_hash_fsize != 0) { | |
2016 dcc_pemsg(EX_DATAERR, emsg, | |
2017 "%s contains "L_DPAT | |
2018 " bytes instead of the "L_DPAT" that %s claims", | |
2019 db_nm, db_csize, | |
2020 db_csize_stored_hash, db_hash_nm); | |
2021 return 0; | |
2022 } | |
2023 | |
2024 return 1; | |
2025 } | |
2026 | |
2027 | |
2028 | |
2029 /* open the files and generally get ready to work */ | |
2030 u_char /* 0=failed, 1=ok */ | |
2031 db_open(DCC_EMSG emsg, | |
2032 int new_db_fd, /* -1 or already open db_fd */ | |
2033 const char *new_db_nm, | |
2034 DB_HADDR new_hash_len, /* 0 or # of entries */ | |
2035 DB_OPEN_MODES mode) /* DB_OPEN_* */ | |
2036 { | |
2037 u_int cur_pagesize; | |
2038 int hash_flags, db_open_flags; | |
2039 struct stat db_sb; | |
2040 # define OPEN_BAIL() {if (new_db_fd >= 0) db_fd = -1; \ | |
2041 db_close(-1); return 0;} | |
2042 | |
2043 db_close(1); | |
2044 db_failed_line = __LINE__; | |
2045 db_failed_file = __FILE__; | |
2046 db_not_synced = 0; | |
2047 db_minimum_map = 0; | |
2048 db_invalidate = 0; | |
2049 db_dirty = 0; | |
2050 db_locked.tv_sec = 0; | |
2051 | |
2052 db_rdonly = (mode & DB_OPEN_RDONLY) != 0; | |
2053 db_use_write = (mode & DB_OPEN_MMAP_WRITE) != 0; | |
2054 | |
2055 memset(&db_stats, 0, sizeof(db_stats)); | |
2056 | |
2057 if (!new_db_nm && db_nm[0] == '\0') | |
2058 new_db_nm = grey_on ? DB_GREY_NAME : DB_DCC_NAME; | |
2059 if (new_db_nm) { | |
2060 if (!fnm2rel(db_nm, new_db_nm, 0) | |
2061 || !fnm2rel(db_hash_nm, db_nm, DB_HASH_SUFFIX)) { | |
2062 dcc_pemsg(EX_DATAERR, emsg, | |
2063 "invalid DB nm \"%s\"", new_db_nm); | |
2064 return 0; | |
2065 } | |
2066 } | |
2067 | |
2068 if (new_db_fd >= 0) { | |
2069 if (new_hash_len != 0) { | |
2070 dcc_logbad(EX_SOFTWARE, | |
2071 "extending db_open(%s) without locking", | |
2072 db_nm); | |
2073 return 0; | |
2074 } | |
2075 if (!db_rdonly) { | |
2076 dcc_logbad(EX_SOFTWARE, | |
2077 "db_open(%s) read/write without locking", | |
2078 db_nm); | |
2079 return 0; | |
2080 } | |
2081 db_open_flags = O_RDONLY; | |
2082 hash_flags = O_RDONLY; | |
2083 | |
2084 db_fd = new_db_fd; | |
2085 | |
2086 } else { | |
2087 db_open_flags = O_RDWR; | |
2088 if (new_hash_len != 0) { | |
2089 if (db_rdonly) { | |
2090 dcc_logbad(EX_SOFTWARE, | |
2091 "db_open(%s) creating read-only", | |
2092 db_nm); | |
2093 return 0; | |
2094 } | |
2095 hash_flags = O_RDWR | O_CREAT; | |
2096 } else { | |
2097 /* must open the file read/write to lock it */ | |
2098 hash_flags = O_RDWR; | |
2099 } | |
2100 | |
2101 db_fd = dcc_lock_open(emsg, db_nm, db_open_flags, | |
2102 (mode & DB_OPEN_LOCK_NOWAIT) | |
2103 ? DCC_LOCK_OPEN_NOWAIT | |
2104 : 0, | |
2105 DCC_LOCK_ALL_FILE, 0); | |
2106 if (db_fd == -1) { | |
2107 db_close(-1); | |
2108 return 0; | |
2109 } | |
2110 } | |
2111 gettimeofday(&db_time, 0); | |
2112 db_locked = db_time; | |
2113 if (0 > fstat(db_fd, &db_sb)) { | |
2114 dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", db_nm, ERROR_STR()); | |
2115 OPEN_BAIL(); | |
2116 return 0; | |
2117 } | |
2118 db_csize = db_fsize = db_sb.st_size; | |
2119 if (db_fsize < ISZ(DB_HDR)) { | |
2120 dcc_pemsg(EX_IOERR, emsg, | |
2121 "%s with %d bytes is too small to be a DCC database", | |
2122 db_nm, (int)db_fsize); | |
2123 OPEN_BAIL(); | |
2124 } | |
2125 | |
2126 /* check the header of the database file by temporarily mapping it */ | |
2127 db_buf_init(0, sizeof(DB_HDR)); | |
2128 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) | |
2129 OPEN_BAIL(); | |
2130 | |
2131 db_parms_stored = *db_sts.db_parms.d.parms; | |
2132 db_parms = *db_sts.db_parms.d.parms; | |
2133 | |
2134 if (memcmp(db_parms.version, db_version_buf, sizeof(db_version_buf))) { | |
2135 dcc_pemsg(EX_DATAERR, emsg, | |
2136 "%s contains the wrong magic string \"%.*s\"", | |
2137 db_nm, ISZ(db_parms.version), db_parms.version); | |
2138 OPEN_BAIL(); | |
2139 } | |
2140 if (!(db_parms.flags & DB_PARM_FG_GREY) != !grey_on) { | |
2141 dcc_pemsg(EX_DATAERR, emsg, | |
2142 "%s is%s a greylist database but must%s be", | |
2143 db_nm, | |
2144 (db_parms.flags & DB_PARM_FG_GREY) ? "" : " not", | |
2145 grey_on ? "" : " not"); | |
2146 OPEN_BAIL(); | |
2147 } | |
2148 | |
2149 cur_pagesize = db_parms.pagesize; | |
2150 | |
2151 DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_INVALID); | |
2152 DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_FLOD_PATH); | |
2153 set_db_tholds(db_parms.nokeep_cks); | |
2154 | |
2155 db_ck_fuzziness = grey_on ? grey_ck_fuzziness : dcc_ck_fuzziness; | |
2156 | |
2157 db_csize = db_parms.db_csize; | |
2158 if (db_csize < sizeof(DB_HDR)) { | |
2159 dcc_pemsg(EX_DATAERR, emsg, | |
2160 "%s says it contains "L_DPAT" bytes" | |
2161 " or fewer than the minimum of %d", | |
2162 db_nm, db_csize, DB_PTR_BASE); | |
2163 /* that is a fatal error if we are not rebuilding */ | |
2164 if (new_hash_len != 0) | |
2165 OPEN_BAIL(); | |
2166 } | |
2167 if (db_csize > db_fsize) { | |
2168 dcc_pemsg(EX_DATAERR, emsg, | |
2169 "%s says it contains "L_DPAT" bytes" | |
2170 " or more than the actual size of "OFF_DPAT, | |
2171 db_nm, db_csize, db_fsize); | |
2172 /* that is a fatal error if we are not rebuilding */ | |
2173 if (new_hash_len != 0) | |
2174 OPEN_BAIL(); | |
2175 } | |
2176 | |
2177 /* The buffer or page size we use must be the page size used to | |
2178 * write the files. Try to change our size to match the file */ | |
2179 if (cur_pagesize != db_pagesize) { | |
2180 db_invalidate = 1; | |
2181 rel_db_states(); | |
2182 if (!db_unload(emsg, 0)) | |
2183 OPEN_BAIL(); | |
2184 db_invalidate = 0; | |
2185 if (!db_buf_init(cur_pagesize, 0)) { | |
2186 dcc_error_msg("%s has page size %d" | |
2187 " incompatible with %d in %s", | |
2188 db_nm, | |
2189 cur_pagesize, db_get_pagesize(0, 0), | |
2190 path2fnm(db_hash_nm)); | |
2191 OPEN_BAIL(); | |
2192 } | |
2193 } | |
2194 | |
2195 db_csize_stored_hash = 0; | |
2196 db_hash_len = 0; | |
2197 db_hash_fd = open(db_hash_nm, hash_flags, 0666); | |
2198 if (db_hash_fd < 0) { | |
2199 dcc_pemsg(EX_IOERR, emsg, "open(%s): %s", | |
2200 db_hash_nm, ERROR_STR()); | |
2201 OPEN_BAIL(); | |
2202 } | |
2203 if (0 > fcntl(db_hash_fd, F_SETFD, FD_CLOEXEC)) { | |
2204 dcc_pemsg(EX_IOERR, emsg, "fcntl(%s, FD_CLOEXEC): %s", | |
2205 db_hash_nm, ERROR_STR()); | |
2206 OPEN_BAIL(); | |
2207 } | |
2208 | |
2209 if (new_hash_len != 0) { | |
2210 if (!make_new_hash(emsg, new_hash_len)) | |
2211 OPEN_BAIL(); | |
2212 } else { | |
2213 if (!check_old_hash(emsg)) | |
2214 OPEN_BAIL(); | |
2215 } | |
2216 | |
2217 if (db_fsize % db_pagesize != 0) { | |
2218 dcc_pemsg(EX_DATAERR, emsg, | |
2219 "%s has size "OFF_HPAT"," | |
2220 " not a multiple of its page size of %#x", | |
2221 db_nm, db_fsize, db_pagesize); | |
2222 OPEN_BAIL(); | |
2223 } | |
2224 if (db_fsize > db_csize + db_pagesize || db_csize > db_fsize) { | |
2225 dcc_pemsg(EX_DATAERR, emsg, | |
2226 "%s has size "OFF_HPAT" but claims "L_HPAT, | |
2227 db_nm, db_fsize, db_csize); | |
2228 OPEN_BAIL(); | |
2229 } | |
2230 | |
2231 #ifndef USE_MAP_NOSYNC | |
2232 /* Use `dbclean -F` on systems without mmap(NOSYNC) but with lots of | |
2233 * RAM. Some Linux systems otherwise take too long to run dbclean. */ | |
2234 if (mode & DB_OPEN_MMAP_WRITE_NOSYNC) { | |
2235 if (db_max_rss > db_fsize + db_hash_fsize) | |
2236 db_use_write = 1; | |
2237 if (db_debug) | |
2238 quiet_trace_msg("db_max_rss="OFF_HPAT | |
2239 " db_fsize+db_hash_fsize="OFF_HPAT | |
2240 " so%s use -F", | |
2241 db_max_rss, db_fsize+db_hash_fsize, | |
2242 db_use_write ? "" : " do not"); | |
2243 } | |
2244 #endif | |
2245 | |
2246 db_window_size = (DB_PTR)db_pagesize * db_buf_total; | |
2247 snprintf(db_window_size_str, sizeof(db_window_size_str), | |
2248 "window=%s%s", | |
2249 db_ptr2str(db_window_size), db_physmem_str); | |
2250 rel_db_states(); | |
2251 db_failed_line = 0; | |
2252 | |
2253 return 1; | |
2254 #undef OPEN_BAIL | |
2255 } | |
2256 | |
2257 | |
2258 | |
2259 static u_char | |
2260 buf_munmap(DCC_EMSG emsg, DB_BUF *b) | |
2261 { | |
2262 u_char result; | |
2263 | |
2264 if (b->lock_cnt != 0) | |
2265 dcc_logbad(EX_SOFTWARE, "unmapping locked DB buffer"); | |
2266 | |
2267 result = buf_flush(emsg, b, 1); | |
2268 | |
2269 if (db_invalidate) { | |
2270 if (0 > DCC_MADV_FREE(b->buf.v)) | |
2271 dcc_error_msg("madvise(FREE %s,%#x): %s", | |
2272 buf2path(b), db_pagesize, ERROR_STR()); | |
2273 } | |
2274 | |
2275 if (0 > munmap(b->buf.v, db_pagesize)) { | |
2276 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, | |
2277 "munmap(%s,%d): %s", | |
2278 buf2path(b), db_pagesize, ERROR_STR()); | |
2279 result = 0; | |
2280 } | |
2281 b->buf.v = 0; | |
2282 b->pg_num = -1; | |
2283 b->buf_type = DB_BUF_TYPE_FREE; | |
2284 | |
2285 return result; | |
2286 } | |
2287 | |
2288 | |
2289 | |
2290 static u_char | |
2291 buf_mmap(DCC_EMSG emsg, DB_BUF *b, DB_PG_NUM pg_num, u_char extend) | |
2292 { | |
2293 int prot, flags; | |
2294 off_t offset; | |
2295 int fd; | |
2296 void *p; | |
2297 int retry; | |
2298 u_char unloaded; | |
2299 | |
2300 | |
2301 offset = (off_t)pg_num * (off_t)db_pagesize; | |
2302 fd = buf2fd(b); | |
2303 | |
2304 if (extend) { | |
2305 offset = 0; | |
2306 #if defined(MAP_ANON)|| defined(MAP_ANONYMOUS) | |
2307 fd = -1; | |
2308 b->flags |= DB_BUF_FG_USE_WRITE | DB_BUF_FG_EXTENSION; | |
2309 #ifdef MAP_ANONYMOUS | |
2310 /* Linux redefines things and requires either MAP_ANON | |
2311 * or MAP_PRIVATE; */ | |
2312 flags = MAP_ANONYMOUS| MAP_PRIVATE; | |
2313 #else | |
2314 flags = MAP_ANON | MAP_PRIVATE; | |
2315 #endif /* MAP_ANONYMOUS */ | |
2316 #else /* have neither MAP_ANON nor MAP_ANONYMOUS */ | |
2317 b->flags |= DB_BUF_FG_USE_WRITE; | |
2318 flags = MAP_PRIVATE; | |
2319 #endif | |
2320 } else if (db_rdonly) { | |
2321 flags = MAP_SHARED; | |
2322 } else if (db_use_write && !db_minimum_map) { | |
2323 /* write() buffers instead of letting the Solaris virtual | |
2324 * memory system do it. Solaris will bog the system down doing | |
2325 * nothing but flushing dirty mmap() pages | |
2326 * We cannot use this hack in two processes simultaneously, | |
2327 * so do not use it in dccd while dbclean is running */ | |
2328 b->flags |= DB_BUF_FG_USE_WRITE; | |
2329 flags = MAP_PRIVATE; | |
2330 } else { | |
2331 #ifdef USE_MAP_NOSYNC | |
2332 flags = (MAP_SHARED | MAP_NOSYNC); | |
2333 #else | |
2334 flags = MAP_SHARED; | |
2335 #endif | |
2336 } | |
2337 | |
2338 prot = db_rdonly ? PROT_READ : (PROT_READ | PROT_WRITE); | |
2339 for (retry = 1, unloaded = 2; unloaded > 1; ++retry) { | |
2340 p = mmap(0, db_pagesize, prot, flags, fd, offset); | |
2341 | |
2342 if (p == MAP_FAILED) { | |
2343 if (errno == EACCES | |
2344 || errno == EBADF | |
2345 || errno == EINVAL | |
2346 || errno == ENODEV | |
2347 || retry > 20) { | |
2348 dcc_pemsg(EX_IOERR, emsg, | |
2349 "try #%d"" mmap(%s" | |
2350 " %#x,%#x,%#x,%d,"OFF_HPAT"): %s", | |
2351 retry, | |
2352 buf2path(b), | |
2353 db_pagesize, prot, flags, fd, offset, | |
2354 ERROR_STR()); | |
2355 return 0; | |
2356 } | |
2357 dcc_error_msg("try #%d mmap(%s" | |
2358 " %#x,%#x,%#x,%d,"OFF_HPAT"): %s", | |
2359 retry, | |
2360 buf2path(b), | |
2361 db_pagesize, prot, flags, fd, offset, | |
2362 ERROR_STR()); | |
2363 /* #define MMAP_FAIL_DEBUG 3 */ | |
2364 #ifdef MMAP_FAIL_DEBUG | |
2365 } else if (((uint)random() % MMAP_FAIL_DEBUG) == 0) { | |
2366 /* pretend mmap() failed randomly */ | |
2367 dcc_error_msg(" test fail #%d mmap(%s,%#x,"OFF_HPAT")", | |
2368 retry, | |
2369 buf2path(b), db_pagesize, offset); | |
2370 if (0 > munmap(p, db_pagesize)) | |
2371 dcc_error_msg( "test munmap(): %s", | |
2372 ERROR_STR()); | |
2373 #endif | |
2374 } else { | |
2375 /* It worked. | |
2376 * Say so if it was not the first attempt. */ | |
2377 if (retry != 1) | |
2378 dcc_error_msg("try #%d" | |
2379 " mmap(%s,%#x,"OFF_HPAT") ok", | |
2380 retry, | |
2381 buf2path(b), db_pagesize, offset); | |
2382 break; | |
2383 } | |
2384 | |
2385 /* mmap() fails occassionally on some systems, | |
2386 * so try to release something and try again */ | |
2387 unloaded = db_unload(0, 1); | |
2388 } | |
2389 | |
2390 | |
2391 b->buf.v = p; | |
2392 b->flush = 0; | |
2393 b->flush_urgent = 0; | |
2394 | |
2395 if (extend) | |
2396 return 1; | |
2397 | |
2398 /* madvise() on some systems including FreeBSD uses a lot of CPU cycles, | |
2399 * so it should not be done unless it is likely to do significant good. | |
2400 * Get all of our buffers if there is plenty of memory | |
2401 * and we are not trying to stay out of the way of dbclean. */ | |
2402 if (!db_minimum_map && db_fsize <= db_max_rss) { | |
2403 /* The flat file would fit. If the hash table would also | |
2404 * fit, tell the kernel to be aggressive */ | |
2405 if (db_fsize + db_hash_fsize <= db_max_rss | |
2406 && 0 > DCC_MADV_WILLNEED(p)) | |
2407 dcc_error_msg("madvise(WILLNEED %s,%#x): %s", | |
2408 buf2path(b), db_pagesize, ERROR_STR()); | |
2409 } else { | |
2410 if (0 > DCC_MADV_RANDOM(p)) | |
2411 dcc_error_msg("madvise(RANDOM %s,%#x): %s", | |
2412 buf2path(b), db_pagesize, ERROR_STR()); | |
2413 } | |
2414 | |
2415 return 1; | |
2416 } | |
2417 | |
2418 | |
2419 | |
2420 /* get a free buffer for a chunk of either the hash table or database files */ | |
2421 static DB_BUF * | |
2422 get_free_buf(DCC_EMSG emsg, DB_BUF **bh) | |
2423 { | |
2424 DB_BUF *b; | |
2425 | |
2426 /* Look for an unlocked buffer. | |
2427 * We know there is one because we have more buffers than | |
2428 * can be locked simultaneously. */ | |
2429 b = buf_oldest; | |
2430 for (;;) { | |
2431 if (!b) | |
2432 dcc_logbad(EX_SOFTWARE, "broken DB buffer MRU chain"); | |
2433 if (!b->lock_cnt) | |
2434 break; | |
2435 b = b->newer; | |
2436 } | |
2437 | |
2438 /* Found an unlocked buffer. | |
2439 * Unlink it from its hash chain. */ | |
2440 if (b->fwd) | |
2441 b->fwd->bak = b->bak; | |
2442 if (b->bak) | |
2443 b->bak->fwd = b->fwd; | |
2444 else if (b->hash) | |
2445 *b->hash = b->fwd; | |
2446 if (b->buf_type != DB_BUF_TYPE_FREE) { | |
2447 if (!buf_munmap(emsg, b)) | |
2448 return 0; | |
2449 } | |
2450 | |
2451 b->flags = 0; | |
2452 | |
2453 /* put it on the new hash chain */ | |
2454 b->bak = 0; | |
2455 b->hash = bh; | |
2456 b->fwd = *bh; | |
2457 *bh = b; | |
2458 if (b->fwd) | |
2459 b->fwd->bak = b; | |
2460 | |
2461 return b; | |
2462 } | |
2463 | |
2464 | |
2465 | |
2466 static DB_BUF * | |
2467 find_buf(DCC_EMSG emsg, DB_BUF_TYPE buf_type, DB_PG_NUM pg_num) | |
2468 { | |
2469 DB_BUF *b, **bh; | |
2470 | |
2471 bh = DB_BUF_HASH(pg_num, buf_type); | |
2472 b = *bh; | |
2473 for (;;) { | |
2474 if (!b) { | |
2475 /* we ran off the end of the buffer hash chain, | |
2476 * so get a free buffer */ | |
2477 b = get_free_buf(emsg, bh); | |
2478 if (!b) | |
2479 return 0; | |
2480 b->buf_type = buf_type; | |
2481 b->pg_num = pg_num; | |
2482 break; | |
2483 } | |
2484 if (b->buf_type == buf_type | |
2485 && b->pg_num == pg_num) | |
2486 break; /* found the buffer we need */ | |
2487 | |
2488 b = b->fwd; | |
2489 } | |
2490 | |
2491 /* make the buffer newest */ | |
2492 if (buf_newest != b) { | |
2493 /* unlink it */ | |
2494 b->newer->older = b->older; | |
2495 if (b->older) | |
2496 b->older->newer = b->newer; | |
2497 else | |
2498 buf_oldest = b->newer; | |
2499 /* insert it at the head of the MRU list */ | |
2500 b->newer = 0; | |
2501 b->older = buf_newest; | |
2502 buf_newest->newer = b; | |
2503 buf_newest = b; | |
2504 } | |
2505 | |
2506 return b; | |
2507 } | |
2508 | |
2509 | |
2510 | |
2511 static DB_BUF * | |
2512 find_st_buf(DCC_EMSG emsg, DB_BUF_TYPE buf_type, DB_STATE *st, | |
2513 DB_PG_NUM pg_num, u_char extend) | |
2514 { | |
2515 DB_BUF *b; | |
2516 | |
2517 /* release previous buffer unless it is the right one */ | |
2518 b = st->b; | |
2519 if (b) { | |
2520 if (b->pg_num == pg_num | |
2521 && b->buf_type == buf_type) | |
2522 return b; /* already have the target buffer */ | |
2523 | |
2524 st->b = 0; | |
2525 st->d.v = 0; | |
2526 if (--b->lock_cnt < 0) | |
2527 dcc_logbad(EX_SOFTWARE, "bad database buffer lock"); | |
2528 } | |
2529 | |
2530 /* look for the buffer */ | |
2531 b = find_buf(emsg, buf_type, pg_num); | |
2532 if (!b) | |
2533 return 0; | |
2534 | |
2535 ++b->lock_cnt; | |
2536 if (b->buf.v) { | |
2537 if (extend && !(b->flags & DB_BUF_FG_USE_WRITE)) | |
2538 dcc_logbad(EX_SOFTWARE, "extending ordinary buffer"); | |
2539 | |
2540 } else { | |
2541 /* map it if it was not already known */ | |
2542 if (!buf_mmap(emsg, b, pg_num, extend)) { | |
2543 b->buf_type = DB_BUF_TYPE_FREE; | |
2544 b->pg_num = -1; | |
2545 if (--b->lock_cnt != 0) | |
2546 dcc_logbad(EX_SOFTWARE, | |
2547 "stolen database buffer lock %d", | |
2548 b->lock_cnt); | |
2549 return 0; | |
2550 } | |
2551 if (buf_type == DB_BUF_TYPE_DB) | |
2552 ++db_stats.db_mmaps; | |
2553 else if (buf_type == DB_BUF_TYPE_HASH) | |
2554 ++db_stats.hash_mmaps; | |
2555 } | |
2556 | |
2557 st->b = b; | |
2558 st->d.v = 0; | |
2559 return b; | |
2560 } | |
2561 | |
2562 | |
2563 | |
2564 static u_char | |
2565 map_hash_ctl(DCC_EMSG emsg, u_char new) | |
2566 { | |
2567 DB_BUF *b; | |
2568 | |
2569 b = find_st_buf(emsg, DB_BUF_TYPE_HASH, &db_sts.hash_ctl, 0, new); | |
2570 if (!b) | |
2571 return 0; | |
2572 db_sts.hash_ctl.s.haddr = 0; | |
2573 db_sts.hash_ctl.d.v = b->buf.v; | |
2574 return 1; | |
2575 } | |
2576 | |
2577 | |
2578 | |
2579 /* mmap() a hash table entry */ | |
2580 static u_char | |
2581 map_hash(DCC_EMSG emsg, | |
2582 DB_HADDR haddr, /* this entry */ | |
2583 DB_STATE *st, /* point this to the entry */ | |
2584 u_char new) | |
2585 { | |
2586 DB_PG_NUM pg_num; | |
2587 DB_PG_OFF pg_off; | |
2588 DB_BUF *b; | |
2589 | |
2590 if (haddr >= db_hash_len || haddr < DB_HADDR_BASE) { | |
2591 dcc_pemsg(EX_DATAERR, emsg, "invalid hash address %#x", | |
2592 haddr); | |
2593 return 0; | |
2594 } | |
2595 | |
2596 pg_num = haddr / db_hash_page_len; | |
2597 pg_off = haddr % db_hash_page_len; | |
2598 | |
2599 b = find_st_buf(emsg, DB_BUF_TYPE_HASH, st, pg_num, new); | |
2600 if (!b) | |
2601 return 0; | |
2602 st->s.haddr = haddr; | |
2603 st->d.h = &b->buf.h[pg_off]; | |
2604 return 1; | |
2605 } | |
2606 | |
2607 | |
2608 | |
2609 /* unlink a hash table entry from the free list | |
2610 * uses db_sts.tmp */ | |
2611 static u_char | |
2612 unlink_free_hash(DCC_EMSG emsg, | |
2613 DB_STATE *hash_st) /* remove this from the free list */ | |
2614 { | |
2615 DB_HADDR fwd, bak; | |
2616 | |
2617 if (!db_make_dirty(emsg)) | |
2618 return 0; | |
2619 | |
2620 fwd = DB_HADDR_EX(hash_st->d.h->fwd); | |
2621 bak = DB_HADDR_EX(hash_st->d.h->bak); | |
2622 if (!HE_IS_FREE(hash_st->d.h) | |
2623 || (DB_HADDR_INVALID(fwd) && fwd != FREE_HADDR_END) | |
2624 || (DB_HADDR_INVALID(bak) && bak != FREE_HADDR_END) | |
2625 || DB_HPTR_EX(hash_st->d.h->rcd) != DB_PTR_NULL) { | |
2626 dcc_pemsg(EX_DATAERR, emsg, | |
2627 "bad hash free list entry at %#x", hash_st->s.haddr); | |
2628 return 0; | |
2629 } | |
2630 | |
2631 if (fwd != FREE_HADDR_END) { | |
2632 if (!map_hash(emsg, fwd, &db_sts.tmp, 0)) | |
2633 return 0; | |
2634 if (DB_HADDR_EX(db_sts.tmp.d.h->bak) != hash_st->s.haddr) { | |
2635 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", | |
2636 hash_st->s.haddr, fwd); | |
2637 return 0; | |
2638 } | |
2639 DB_HADDR_CP(db_sts.tmp.d.h->bak, bak); | |
2640 SET_FLUSH_HE(&db_sts.tmp); | |
2641 } else { | |
2642 if (!map_hash_ctl(emsg, 0)) | |
2643 return 0; | |
2644 if (db_sts.hash_ctl.d.vals->s.free_bak != hash_st->s.haddr) { | |
2645 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", | |
2646 hash_st->s.haddr, fwd); | |
2647 return 0; | |
2648 } | |
2649 db_sts.hash_ctl.d.vals->s.free_bak = bak; | |
2650 SET_FLUSH_HCTL(0); | |
2651 } | |
2652 | |
2653 if (bak != FREE_HADDR_END) { | |
2654 if (!map_hash(emsg, bak, &db_sts.tmp, 0)) | |
2655 return 0; | |
2656 if (DB_HADDR_EX(db_sts.tmp.d.h->fwd) != hash_st->s.haddr) { | |
2657 dcc_pemsg(EX_DATAERR, emsg, "bad free %#x <-- free %#x", | |
2658 bak, hash_st->s.haddr); | |
2659 return 0; | |
2660 } | |
2661 DB_HADDR_CP(db_sts.tmp.d.h->fwd, fwd); | |
2662 SET_FLUSH_HE(&db_sts.tmp); | |
2663 } else { | |
2664 if (!map_hash_ctl(emsg, 0)) | |
2665 return 0; | |
2666 if (db_sts.hash_ctl.d.vals->s.free_fwd != hash_st->s.haddr) { | |
2667 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", | |
2668 hash_st->s.haddr, bak); | |
2669 return 0; | |
2670 } | |
2671 db_sts.hash_ctl.d.vals->s.free_fwd = fwd; | |
2672 SET_FLUSH_HCTL(0); | |
2673 } | |
2674 | |
2675 memset(hash_st->d.h, 0, sizeof(HASH_ENTRY)); | |
2676 SET_FLUSH_HE(hash_st); | |
2677 | |
2678 ++db_hash_used; | |
2679 return 1; | |
2680 } | |
2681 | |
2682 | |
2683 | |
2684 /* get a free hash table entry and leave db_sts.free pointing to it */ | |
2685 static u_char /* 0=failed, 1=got it */ | |
2686 get_free_hash(DCC_EMSG emsg, | |
2687 DB_HADDR result) /* try near here */ | |
2688 { | |
2689 DB_HADDR pg_start, pg_lim, bak; | |
2690 int i; | |
2691 | |
2692 if (db_hash_len <= db_hash_used) { | |
2693 dcc_pemsg(EX_OSFILE, emsg, "no free hash table entry;" | |
2694 " %d of %d used", db_hash_used, db_hash_len); | |
2695 return 0; | |
2696 } | |
2697 | |
2698 /* Look first near the target */ | |
2699 if (result < DB_HADDR_BASE) | |
2700 result = DB_HADDR_BASE; | |
2701 pg_start = result - (result % db_hash_page_len); | |
2702 pg_lim = pg_start + db_hash_page_len-1; | |
2703 if (pg_lim >= db_hash_len) | |
2704 pg_lim = db_hash_len-1; | |
2705 for (i = 0; i < 3 && ++result < pg_lim; ++i) { | |
2706 if (!map_hash(emsg, result, &db_sts.free, 0)) | |
2707 return 0; | |
2708 if (HE_IS_FREE(db_sts.free.d.h)) | |
2709 return unlink_free_hash(emsg, &db_sts.free); | |
2710 } | |
2711 | |
2712 /* check the local ad hoc free list at the end of the page */ | |
2713 if (!map_hash(emsg, pg_lim, &db_sts.free, 0)) | |
2714 return 0; | |
2715 if (HE_IS_FREE(db_sts.free.d.h)) { | |
2716 /* the ad hoc free list is not empty, | |
2717 * so try to use the previous entry */ | |
2718 bak = DB_HADDR_EX(db_sts.free.d.h->bak); | |
2719 if (bak != FREE_HADDR_END) { | |
2720 if (!map_hash(emsg, bak, &db_sts.free, 0)) | |
2721 return 0; | |
2722 } | |
2723 return unlink_free_hash(emsg, &db_sts.free); | |
2724 } | |
2725 | |
2726 | |
2727 /* Give up and search from the start of the free list. This happens | |
2728 * only when the current and all preceding pages are full. */ | |
2729 if (!map_hash_ctl(emsg, 0)) | |
2730 return 0; | |
2731 result = db_sts.hash_ctl.d.vals->s.free_fwd; | |
2732 if (DB_HADDR_INVALID(result)) { | |
2733 dcc_pemsg(EX_DATAERR, emsg, | |
2734 "broken hash free list head of %#x", result); | |
2735 return 0; | |
2736 } | |
2737 if (!map_hash(emsg, result, &db_sts.free, 0)) | |
2738 return 0; | |
2739 return unlink_free_hash(emsg, &db_sts.free); | |
2740 } | |
2741 | |
2742 | |
2743 | |
2744 /* mmap() a database entry | |
2745 * We assume that no database entry spans buffers, | |
2746 * and that there are enough buffers to accomodate all possible | |
2747 * concurrent requests. */ | |
2748 static u_char | |
2749 map_db(DCC_EMSG emsg, | |
2750 DB_PTR rptr, /* address of the record */ | |
2751 u_int tgt_len, /* its length */ | |
2752 DB_STATE *st, /* point this to the record */ | |
2753 u_char extend) | |
2754 { | |
2755 DB_PG_NUM pg_num; | |
2756 DB_PG_OFF pg_off; | |
2757 DB_BUF *b; | |
2758 | |
2759 if (rptr+tgt_len > db_fsize) { | |
2760 db_failure(__LINE__,__FILE__, EX_DATAERR, emsg, | |
2761 "invalid database address "L_HPAT" or length %d" | |
2762 " past db_fsize "OFF_HPAT" in %s", | |
2763 rptr, tgt_len, db_fsize, db_nm); | |
2764 return 0; | |
2765 } | |
2766 | |
2767 /* Try to optimize this to avoid udivdi3() and umoddi3(), | |
2768 * because they are a major time sink here on 32-bit systems */ | |
2769 pg_num = DB_PTR2PG_NUM(rptr, db_pagesize); | |
2770 #ifdef HAVE_64BIT_LONG | |
2771 pg_off = rptr % db_pagesize; | |
2772 #else | |
2773 pg_off = rptr - pg_num*(DB_PTR)db_pagesize; | |
2774 #endif | |
2775 | |
2776 /* do not go past the end of a buffer */ | |
2777 if (tgt_len+pg_off > db_pagesize) { | |
2778 db_failure(__LINE__,__FILE__, EX_DATAERR, emsg, | |
2779 "invalid database address "L_HPAT | |
2780 " or length %#x in %s", | |
2781 rptr, tgt_len, db_nm); | |
2782 return 0; | |
2783 } | |
2784 | |
2785 b = find_st_buf(emsg, DB_BUF_TYPE_DB, st, pg_num, extend); | |
2786 if (!b) | |
2787 return 0; | |
2788 st->s.rptr = rptr; | |
2789 st->d.r = (DB_RCD *)&b->buf.c[pg_off]; | |
2790 return 1; | |
2791 } | |
2792 | |
2793 | |
2794 | |
2795 u_char /* 0=failed, 1=got it */ | |
2796 db_map_rcd(DCC_EMSG emsg, | |
2797 DB_STATE *rcd_st, /* point this to the record */ | |
2798 DB_PTR rptr, /* that is here */ | |
2799 int *rcd_lenp) /* put its length here */ | |
2800 { | |
2801 u_int rcd_len; | |
2802 | |
2803 if (DB_PTR_IS_BAD(rptr)) { | |
2804 dcc_pemsg(EX_DATAERR, emsg, | |
2805 "getting bogus record at "L_HPAT", in %s", | |
2806 rptr, db_nm); | |
2807 return 0; | |
2808 } | |
2809 | |
2810 if (!map_db(emsg, rptr, DB_RCD_HDR_LEN, rcd_st, 0)) | |
2811 return 0; | |
2812 rcd_len = DB_RCD_LEN(rcd_st->d.r); | |
2813 | |
2814 if (&rcd_st->d.c[rcd_len] > &rcd_st->b->buf.c[db_pagesize]) { | |
2815 dcc_pemsg(EX_DATAERR, emsg, | |
2816 "invalid checksum count %d at "L_HPAT" in %s", | |
2817 DB_NUM_CKS(rcd_st->d.r), rptr, db_nm); | |
2818 return 0; | |
2819 } | |
2820 | |
2821 if (rcd_lenp) | |
2822 *rcd_lenp = rcd_len; | |
2823 return 1; | |
2824 } | |
2825 | |
2826 | |
2827 | |
2828 /* write the new sizes of the files into the files */ | |
2829 static u_char | |
2830 db_set_sizes(DCC_EMSG emsg) | |
2831 { | |
2832 u_char result = 1; | |
2833 | |
2834 if (db_hash_fd != -1 | |
2835 && (db_csize_stored_hash != db_csize | |
2836 || db_hash_used_stored_hash != db_hash_used)) { | |
2837 if (!map_hash_ctl(emsg, 0)) { | |
2838 result = 0; | |
2839 } else { | |
2840 db_sts.hash_ctl.d.vals->s.db_csize = db_csize; | |
2841 db_csize_stored_hash = db_csize; | |
2842 | |
2843 db_sts.hash_ctl.d.vals->s.used = db_hash_used; | |
2844 db_hash_used_stored_hash = db_hash_used; | |
2845 | |
2846 SET_FLUSH_HCTL(0); | |
2847 } | |
2848 } | |
2849 | |
2850 if (db_fd != -1 | |
2851 && (db_parms_stored.db_csize != db_csize | |
2852 || db_parms_stored.hash_used != db_hash_used)) { | |
2853 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) { | |
2854 result = 0; | |
2855 } else { | |
2856 db_sts.db_parms.d.parms->db_csize = db_csize; | |
2857 db_parms_stored.db_csize = db_csize; | |
2858 db_parms.db_csize = db_csize; | |
2859 | |
2860 db_sts.db_parms.d.parms->hash_used = db_hash_used; | |
2861 db_parms_stored.hash_used = db_hash_used; | |
2862 db_parms.hash_used = db_hash_used; | |
2863 | |
2864 db_sts.db_parms.d.parms->last_rate_sec = db_time.tv_sec; | |
2865 db_parms_stored.last_rate_sec = db_time.tv_sec; | |
2866 db_parms.last_rate_sec = db_time.tv_sec; | |
2867 | |
2868 db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); | |
2869 } | |
2870 } | |
2871 | |
2872 return result; | |
2873 } | |
2874 | |
2875 | |
2876 | |
2877 /* write the database parameters into the magic number headers of the files */ | |
2878 u_char | |
2879 db_flush_parms(DCC_EMSG emsg) | |
2880 { | |
2881 if (!db_set_sizes(emsg)) | |
2882 return 0; | |
2883 | |
2884 if (db_fd == -1) | |
2885 return 1; | |
2886 | |
2887 if (memcmp(&db_parms, &db_parms_stored, sizeof(db_parms))) { | |
2888 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) | |
2889 return 0; | |
2890 | |
2891 db_parms.pagesize = db_pagesize; | |
2892 | |
2893 *db_sts.db_parms.d.parms = db_parms; | |
2894 db_parms_stored = db_parms; | |
2895 | |
2896 db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); | |
2897 } | |
2898 | |
2899 return 1; | |
2900 } | |
2901 | |
2902 | |
2903 | |
2904 /* find a checksum in an already mapped record */ | |
2905 DB_RCD_CK * /* 0=not found, 1=broken database */ | |
2906 db_find_ck(DCC_EMSG emsg, | |
2907 DB_RCD *rcd, | |
2908 DB_PTR rptr, | |
2909 DCC_CK_TYPES type) /* find this type of checksum */ | |
2910 { | |
2911 DB_RCD_CK *rcd_ck; | |
2912 int i; | |
2913 | |
2914 rcd_ck = rcd->cks; | |
2915 i = DB_NUM_CKS(rcd); | |
2916 if (i >= DCC_NUM_CKS) { | |
2917 dcc_pemsg(EX_DATAERR, emsg, | |
2918 "impossible %d checksums in "L_HPAT" in %s", | |
2919 i, rptr, db_nm); | |
2920 return (DB_RCD_CK *)1; | |
2921 } | |
2922 | |
2923 for (; i != 0; --i, ++rcd_ck) { | |
2924 if (DB_CK_TYPE(rcd_ck) == type) | |
2925 return rcd_ck; | |
2926 } | |
2927 | |
2928 return 0; | |
2929 } | |
2930 | |
2931 | |
2932 | |
2933 /* find a checksum type known to be in a record */ | |
2934 DB_RCD_CK * /* 0=it's not there */ | |
2935 db_map_rcd_ck(DCC_EMSG emsg, | |
2936 DB_STATE *rcd_st, /* point this to the record */ | |
2937 DB_PTR rptr, /* that is here */ | |
2938 DCC_CK_TYPES type) /* find this type of checksum */ | |
2939 { | |
2940 DB_RCD_CK *rcd_ck; | |
2941 | |
2942 if (!db_map_rcd(emsg, rcd_st, rptr, 0)) | |
2943 return 0; | |
2944 | |
2945 rcd_ck = db_find_ck(emsg, rcd_st->d.r, rptr, type); | |
2946 if (rcd_ck == (DB_RCD_CK *)1) | |
2947 return 0; | |
2948 if (rcd_ck == 0) { | |
2949 dcc_pemsg(EX_DATAERR, emsg, | |
2950 "missing \"%s\" checksum in "L_HPAT" in %s", | |
2951 DB_TYPE2STR(type), rptr, db_nm); | |
2952 return 0; | |
2953 } | |
2954 return rcd_ck; | |
2955 } | |
2956 | |
2957 | |
2958 | |
2959 static inline u_char /* 1=has a small prime factor */ | |
2960 modulus_has_divisor(DB_HADDR len) | |
2961 { | |
2962 static int primes[] = { | |
2963 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, | |
2964 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, | |
2965 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, | |
2966 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, | |
2967 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, | |
2968 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, | |
2969 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499}; | |
2970 int *p; | |
2971 | |
2972 for (p = &primes[0]; p <= LAST(primes); ++p) { | |
2973 if ((len % *p) == 0) | |
2974 return 1; | |
2975 } | |
2976 return 0; | |
2977 } | |
2978 | |
2979 | |
2980 | |
2981 /* Get a modulus for the hash function that is tolerably likely to be | |
2982 * relatively prime to most inputs. The worst that happens when the modulus | |
2983 * is composite is that large multiples of its factors will suffer more | |
2984 * collisions. */ | |
2985 DB_HADDR | |
2986 get_db_hash_divisor(DB_HADDR len) | |
2987 { | |
2988 DB_HADDR divisor; | |
2989 | |
2990 divisor = len - DB_HADDR_BASE; | |
2991 if (!(divisor & 1)) | |
2992 --divisor; | |
2993 while (divisor >= MIN_HASH_ENTRIES) { | |
2994 if (modulus_has_divisor(divisor)) | |
2995 divisor -= 2; | |
2996 else | |
2997 break; | |
2998 } | |
2999 return divisor; | |
3000 } | |
3001 | |
3002 | |
3003 | |
3004 DB_HADDR | |
3005 db_hash(DCC_CK_TYPES type, const DCC_SUM sum) | |
3006 { | |
3007 u_int64_t accum, wrap; | |
3008 const u_int32_t *wp; | |
3009 union { | |
3010 DCC_SUM sum; | |
3011 u_int32_t words[4]; | |
3012 } buf; | |
3013 int align; | |
3014 DB_HADDR haddr; | |
3015 | |
3016 #ifdef HAVE_64BIT_PTR | |
3017 align = (u_int64_t)sum & 3; | |
3018 #else | |
3019 align = (u_int)sum & 3; | |
3020 #endif | |
3021 if (align == 0) { | |
3022 /* We almost always take this branch because database | |
3023 * records contain 12+N*24 bytes. That also implies that | |
3024 * we should not hope for better than 4 byte alignment. */ | |
3025 wp = (u_int32_t *)sum; | |
3026 } else { | |
3027 memcpy(buf.sum, sum, sizeof(buf.sum)); | |
3028 wp = buf.words; | |
3029 } | |
3030 | |
3031 /* MD5 checksums are uniformly distributed, and so DCC_SUMs are | |
3032 * directly useful for hashing except when they are server-IDs */ | |
3033 accum = *wp++; | |
3034 accum += *wp++; | |
3035 wrap = accum >>32; | |
3036 accum <<= 32; | |
3037 accum += wrap + type; | |
3038 accum += *wp++; | |
3039 accum += *wp; | |
3040 | |
3041 haddr = accum % db_hash_divisor; | |
3042 haddr += DB_HADDR_BASE; | |
3043 | |
3044 /* do not hash into the last slot of a page, because it is used to | |
3045 * find local free slots */ | |
3046 if (haddr % db_hash_page_len == db_hash_page_len-1) { | |
3047 ++haddr; | |
3048 if (haddr >= db_hash_len) | |
3049 haddr = DB_HADDR_BASE; | |
3050 } | |
3051 return haddr; | |
3052 } | |
3053 | |
3054 | |
3055 | |
3056 /* look for a checksum in the hash table | |
3057 * return with an excuse, the home slot, or the last entry on | |
3058 * the collision chain */ | |
3059 DB_FOUND | |
3060 db_lookup(DCC_EMSG emsg, DCC_CK_TYPES type, const DCC_SUM sum, | |
3061 DB_HADDR lo, /* postpone if out of this window */ | |
3062 DB_HADDR hi, | |
3063 DB_STATE *hash_st, /* hash block for record or related */ | |
3064 DB_STATE *rcd_st, /* put the record or garbage here */ | |
3065 DB_RCD_CK **prcd_ck) /* point to cksum if found */ | |
3066 { | |
3067 DB_HADDR haddr, haddr_fwd, haddr_bak; | |
3068 DB_PTR db_ptr; | |
3069 DB_RCD_CK *found_ck; | |
3070 DB_HADDR failsafe; | |
3071 | |
3072 haddr = db_hash(type, sum); | |
3073 if (haddr < lo || haddr > hi) { | |
3074 if (lo == 0 && hi == MAX_HASH_ENTRIES) { | |
3075 dcc_pemsg(EX_DATAERR, emsg, | |
3076 "out of range hash address"); | |
3077 return DB_FOUND_SYSERR; | |
3078 } | |
3079 return DB_FOUND_LATER; | |
3080 } | |
3081 | |
3082 if (prcd_ck) | |
3083 *prcd_ck = 0; | |
3084 | |
3085 if (!map_hash(emsg, haddr, hash_st, 0)) | |
3086 return DB_FOUND_SYSERR; | |
3087 | |
3088 if (HE_IS_FREE(hash_st->d.h)) | |
3089 return DB_FOUND_EMPTY; | |
3090 | |
3091 if (!DB_HADDR_C_NULL(hash_st->d.h->bak)) | |
3092 return DB_FOUND_INTRUDER; | |
3093 | |
3094 /* We know that the current hash table entry is in its home slot. | |
3095 * It might be for the key or checksum we are looking for | |
3096 * or it might be for some other checksum with the same hash value. */ | |
3097 for (failsafe = 0; failsafe <= db_hash_len; ++failsafe) { | |
3098 if (HE_CMP(hash_st->d.h, type, sum)) { | |
3099 /* This hash table entry could be for our target | |
3100 * checksum. Read the corresponding record so we | |
3101 * decide whether we have a hash collision or we | |
3102 * have found a record containing our target checksum. | |
3103 * | |
3104 * find right type of checksum in the record */ | |
3105 db_ptr = DB_HPTR_EX(hash_st->d.h->rcd); | |
3106 found_ck = db_map_rcd_ck(emsg, rcd_st, db_ptr, type); | |
3107 if (!found_ck) | |
3108 return DB_FOUND_SYSERR; | |
3109 if (!memcmp(sum, found_ck->sum, | |
3110 sizeof(DCC_SUM))) { | |
3111 if (prcd_ck) | |
3112 *prcd_ck = found_ck; | |
3113 return DB_FOUND_IT; | |
3114 } | |
3115 } | |
3116 | |
3117 /* This DB record was a hash collision, or for a checksum | |
3118 * other than our target. | |
3119 * Fail if this is the end of the hash chain */ | |
3120 haddr_fwd = DB_HADDR_EX(hash_st->d.h->fwd); | |
3121 if (haddr_fwd == DB_HADDR_NULL) | |
3122 return DB_FOUND_CHAIN; | |
3123 | |
3124 if (DB_HADDR_INVALID(haddr_fwd)) { | |
3125 dcc_pemsg(EX_DATAERR, emsg, | |
3126 "broken hash chain fwd-link" | |
3127 " #%d %#x at %#x in %s", | |
3128 failsafe, haddr_fwd, haddr, db_hash_nm); | |
3129 return DB_FOUND_SYSERR; | |
3130 } | |
3131 | |
3132 if (!map_hash(emsg, haddr_fwd, hash_st, 0)) | |
3133 return DB_FOUND_SYSERR; | |
3134 | |
3135 haddr_bak = DB_HADDR_EX(hash_st->d.h->bak); | |
3136 if (haddr_bak != haddr) { | |
3137 dcc_pemsg(EX_DATAERR, emsg, | |
3138 "broken hash chain links #%d," | |
3139 " %#x-->%#x but %#x<--%#x in %s", | |
3140 failsafe, | |
3141 haddr, haddr_fwd, | |
3142 haddr_bak, haddr_fwd, | |
3143 db_hash_nm); | |
3144 return DB_FOUND_SYSERR; | |
3145 } | |
3146 haddr = haddr_fwd; | |
3147 } | |
3148 dcc_pemsg(EX_DATAERR, emsg, "infinite hash chain at %#x in %s", | |
3149 haddr, db_hash_nm); | |
3150 return DB_FOUND_SYSERR; | |
3151 } | |
3152 | |
3153 | |
3154 | |
3155 /* combine checksums */ | |
3156 DCC_TGTS | |
3157 db_sum_ck(DCC_TGTS prev, /* previous sum */ | |
3158 DCC_TGTS rcd_tgts, /* from the record */ | |
3159 DCC_CK_TYPES type UATTRIB) | |
3160 { | |
3161 DCC_TGTS res; | |
3162 | |
3163 /* This arithmetic must be commutative (after handling deleted | |
3164 * values), because inter-server flooding causes records to appear in | |
3165 * the database out of temporal order. | |
3166 * | |
3167 * DCC_TGTS_TOO_MANY can be thought of as a count of plus infinity. | |
3168 * DCC_TGTS_OK is like minus infinity. | |
3169 * DCC_TGTS_OK2 like half of minus infinity | |
3170 * DCC_TGTS_TOO_MANY (plus infinity) added to DCC_TGTS_OK (minus | |
3171 * infinity) or DCC_TGTS_OK2 yields DCC_TGTS_OK or DCC_TGTS_OK2. | |
3172 * | |
3173 * Reputations never reach infinity. | |
3174 * | |
3175 * Claims of not-spam from all clients are discarded as they arrive | |
3176 * and before here. They can only come from the local white list | |
3177 */ | |
3178 #define SUM_OK_DEL(p,r) { \ | |
3179 if (rcd_tgts == DCC_TGTS_OK || prev == DCC_TGTS_OK) \ | |
3180 return DCC_TGTS_OK; \ | |
3181 if (rcd_tgts == DCC_TGTS_OK2 || prev == DCC_TGTS_OK2) \ | |
3182 return DCC_TGTS_OK2; \ | |
3183 if (rcd_tgts == DCC_TGTS_DEL) \ | |
3184 return prev; \ | |
3185 } | |
3186 | |
3187 res = prev+rcd_tgts; | |
3188 if (res <= DCC_TGTS_TOO_MANY) | |
3189 return res; | |
3190 | |
3191 SUM_OK_DEL(prev, rcd_tgts); | |
3192 return DCC_TGTS_TOO_MANY; | |
3193 #undef SUM_OK_DEL | |
3194 } | |
3195 | |
3196 | |
3197 | |
3198 /* delete all reports that contain the given checksum */ | |
3199 static u_char /* 1=done, 0=broken database */ | |
3200 del_ck(DCC_EMSG emsg, | |
3201 DCC_TGTS *res, /* residual targets after deletion */ | |
3202 const DB_RCD *new, /* delete reports older than this one */ | |
3203 DCC_CK_TYPES type, /* delete this type of checksum */ | |
3204 DB_RCD_CK *prev_ck, /* starting with this one */ | |
3205 DB_STATE *prev_st) /* use this scratch state block */ | |
3206 { | |
3207 DB_PTR prev; | |
3208 | |
3209 *res = 0; | |
3210 for (;;) { | |
3211 /* delete reports that are older than the delete request */ | |
3212 if (dcc_ts_newer_ts(&new->ts, &prev_st->d.r->ts) | |
3213 && DB_RCD_ID(prev_st->d.r) != DCC_ID_WHITE) { | |
3214 DB_TGTS_RCD_SET(prev_st->d.r, 0); | |
3215 DB_TGTS_CK_SET(prev_ck, 0); | |
3216 SET_FLUSH_RCD(prev_st, 1); | |
3217 | |
3218 } else { | |
3219 /* sum reports that are not deleted */ | |
3220 *res = db_sum_ck(*res, DB_TGTS_RCD(prev_st->d.r), type); | |
3221 } | |
3222 | |
3223 prev = DB_PTR_EX(prev_ck->prev); | |
3224 if (prev == DB_PTR_NULL) | |
3225 return 1; | |
3226 prev_ck = db_map_rcd_ck(emsg, prev_st, prev, type); | |
3227 if (!prev_ck) | |
3228 return 0; | |
3229 } | |
3230 } | |
3231 | |
3232 | |
3233 | |
3234 /* see if the new and preceding records are from the same era */ | |
3235 static inline u_char /* 1=different eras */ | |
3236 ck_old_spam(const DB_RCD *new, const DCC_TS* prev, DCC_CK_TYPES type) | |
3237 { | |
3238 struct timeval tv; | |
3239 time_t secs; | |
3240 DCC_TS past; | |
3241 | |
3242 secs = db_parms.ex_secs[type].spam; | |
3243 if (secs > DCC_OLD_SPAM_SECS) | |
3244 secs = DCC_OLD_SPAM_SECS; | |
3245 dcc_ts2timeval(&tv, &new->ts); | |
3246 dcc_timeval2ts(&past, &tv, -secs); | |
3247 | |
3248 return dcc_ts_older_ts(prev, &past); | |
3249 } | |
3250 | |
3251 | |
3252 | |
3253 /* Mark reports made obsolete by a spam report | |
3254 * A new report of spam makes sufficiently old reports obsolete. | |
3255 * | |
3256 * Sufficiently recent non-obsolete reports make a new report obsolete, | |
3257 * or at least not worth spending bandwidth to flood. | |
3258 * "Sufficiently recent" should be defined so that this server and | |
3259 * its downstream flooding peers always have reports of the checksums | |
3260 * in the report. So we want to keep (not make obsolete) at least one | |
3261 * report per expiration duration. We cannot know the expiration durations | |
3262 * of our peers, but we known DB_EXPIRE_SPAMSECS_DEF_MIN which influences | |
3263 * DCC_OLD_SPAM_SECS. | |
3264 * | |
3265 * However, if another checksum in the new report was kept, then | |
3266 * prefer marking old checksums obsolete. | |
3267 * | |
3268 * db_sts.rcd points to the new record | |
3269 * db_sts.rcd2 points the the previous record and is changed | |
3270 */ | |
3271 static u_char /* 1=done, 0=broken database */ | |
3272 ck_obs_spam(DCC_EMSG emsg, | |
3273 const DB_RCD *new, | |
3274 DCC_TGTS new_tgts, | |
3275 DB_RCD_CK *new_ck, | |
3276 DCC_CK_TYPES type, /* check this type of checksum */ | |
3277 DB_RCD_CK *prev_ck, /* starting with this one */ | |
3278 DCC_TGTS prev_ck_tgts, | |
3279 u_char *keeping_new) /* 1=already keeping the new record */ | |
3280 { | |
3281 int limit; | |
3282 DB_PTR prev; | |
3283 | |
3284 limit = 100; | |
3285 for (;;) { | |
3286 /* preceding white listed entries make new entries obsolete */ | |
3287 if (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE) { | |
3288 new_ck->type_fgs |= DB_CK_FG_OBS; | |
3289 SET_FLUSH_RCD(&db_sts.rcd, 1); | |
3290 return 1; | |
3291 } | |
3292 | |
3293 if (DB_CK_OBS(prev_ck) | |
3294 || DB_TGTS_RCD(db_sts.rcd2.d.r) == 0) { | |
3295 /* notice duplicates and | |
3296 * don't look forever for recent non-obsolete report */ | |
3297 if (!memcmp(&new->ts, &db_sts.rcd2.d.r->ts, | |
3298 sizeof(new->ts)) | |
3299 || --limit == 0) { | |
3300 *keeping_new = 1; | |
3301 return 1; | |
3302 } | |
3303 | |
3304 } else if (prev_ck_tgts != DCC_TGTS_TOO_MANY) { | |
3305 /* Mark this predecessor obsolete because it | |
3306 * was before the checksum became spam. */ | |
3307 prev_ck->type_fgs |= DB_CK_FG_OBS; | |
3308 SET_FLUSH_RCD(&db_sts.rcd2, 0); | |
3309 | |
3310 /* continue backwards to mark more non-spam | |
3311 * predecessors obsolete */ | |
3312 | |
3313 } else if (!*keeping_new | |
3314 && ck_old_spam(new, &db_sts.rcd2.d.r->ts, type)) { | |
3315 /* We do not yet have a reason to keep the new report | |
3316 * and this predecessor is at or after a spam report. | |
3317 * We need the new report because it and the | |
3318 * predecessor are from different eras. | |
3319 * If the new report is not of spam, it will be | |
3320 * compressed with a preceding spam report. */ | |
3321 *keeping_new = 1; | |
3322 /* The predecessor is not needed if the new record | |
3323 * is for spam */ | |
3324 if (new_tgts == DCC_TGTS_TOO_MANY) { | |
3325 prev_ck->type_fgs |= DB_CK_FG_OBS; | |
3326 SET_FLUSH_RCD(&db_sts.rcd2, 0); | |
3327 } | |
3328 /* We're finished, because all older preceding reports | |
3329 * were marked obsolete when this older predecessor | |
3330 * was linked. */ | |
3331 return 1; | |
3332 | |
3333 } else { | |
3334 /* this predecessor is about as recent as the new | |
3335 * record, so the new record is unneeded noise that | |
3336 * would bloat other servers' databases. */ | |
3337 new_ck->type_fgs |= DB_CK_FG_OBS; | |
3338 return 1; | |
3339 } | |
3340 | |
3341 prev = DB_PTR_EX(prev_ck->prev); | |
3342 if (prev == DB_PTR_NULL) { | |
3343 /* the new record is a new report of spam */ | |
3344 *keeping_new = 1; | |
3345 return 1; | |
3346 } | |
3347 | |
3348 prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, prev, type); | |
3349 if (!prev_ck) | |
3350 return 0; | |
3351 prev_ck_tgts = DB_TGTS_CK(prev_ck); | |
3352 } | |
3353 } | |
3354 | |
3355 | |
3356 | |
3357 /* mark extra server-ID declarations obsolete | |
3358 * | |
3359 * db_sts.rcd points to the new record | |
3360 * db_sts.rcd2 points the the previous record and is changed */ | |
3361 static u_char /* 1=done, 0=broken database */ | |
3362 srvr_id_ck(DCC_EMSG emsg, | |
3363 const DB_RCD *new, | |
3364 DB_RCD_CK *new_ck, | |
3365 DB_RCD_CK *prev_ck) /* starting with this one */ | |
3366 { | |
3367 DB_PTR prev; | |
3368 DCC_SRVR_ID new_id, prev_id; | |
3369 struct timeval tv; | |
3370 DCC_TS week_ts; | |
3371 | |
3372 dcc_ts2timeval(&tv, &new->ts); | |
3373 tv.tv_usec = 0; | |
3374 tv.tv_sec -= tv.tv_sec % (7*24*60*60); | |
3375 dcc_timeval2ts(&week_ts, &tv, 0); | |
3376 | |
3377 new_id = DB_RCD_ID(new); | |
3378 for (;;) { | |
3379 /* mark duplicate older declarations and deletions obsolete */ | |
3380 prev_id = DB_RCD_ID(db_sts.rcd2.d.r); | |
3381 if (!DCC_ID_SRVR_TYPE(prev_id) | |
3382 || DB_TGTS_RCD(db_sts.rcd2.d.r) == 0) { | |
3383 if (dcc_ts_newer_ts(&db_sts.rcd2.d.r->ts, &new->ts)) { | |
3384 new_ck->type_fgs |= DB_CK_FG_OBS; | |
3385 SET_FLUSH_RCD(&db_sts.rcd, 1); | |
3386 } else { | |
3387 prev_ck->type_fgs |= DB_CK_FG_OBS; | |
3388 SET_FLUSH_RCD(&db_sts.rcd2, 1); | |
3389 } | |
3390 return 1; | |
3391 } | |
3392 | |
3393 /* Keep many identical type declarations as a kludge to ensure | |
3394 * that rewound flooding sends type declarations early. | |
3395 * Keep only one delcaration per week. */ | |
3396 if (DCC_ID_SRVR_TYPE(new_id)) { | |
3397 /* Zap the new declaration and stop if the | |
3398 * new declaration is older than the predecessor. */ | |
3399 if (dcc_ts_newer_ts(&db_sts.rcd2.d.r->ts, &new->ts)) { | |
3400 new_ck->type_fgs |= DB_CK_FG_OBS; | |
3401 SET_FLUSH_RCD(&db_sts.rcd, 1); | |
3402 return 1; | |
3403 } | |
3404 | |
3405 /* Stop when we find a duplicate type declaration | |
3406 * of a different week */ | |
3407 if (prev_id == new_id | |
3408 && dcc_ts_older_ts(&db_sts.rcd2.d.r->ts, | |
3409 &week_ts)) { | |
3410 return 1; | |
3411 } | |
3412 | |
3413 /* continue zapping preceding declarations */ | |
3414 prev_ck->type_fgs |= DB_CK_FG_OBS; | |
3415 SET_FLUSH_RCD(&db_sts.rcd2, 1); | |
3416 } | |
3417 | |
3418 prev = DB_PTR_EX(prev_ck->prev); | |
3419 if (prev == DB_PTR_NULL) | |
3420 return 1; | |
3421 | |
3422 prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, | |
3423 prev, DCC_CK_SRVR_ID); | |
3424 if (!prev_ck) | |
3425 return 0; | |
3426 } | |
3427 } | |
3428 | |
3429 | |
3430 | |
3431 /* Install pointers in the hash table for a record and fix the accumulated | |
3432 * counts in the record pointed to by db_sts.rcd | |
3433 * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp | |
3434 * The caller must deal with db_make_dirty() */ | |
3435 u_char /* 0=failed, 1=done */ | |
3436 db_link_rcd(DCC_EMSG emsg, DB_HADDR lo, DB_HADDR hi) | |
3437 { | |
3438 DCC_TGTS res; | |
3439 DB_RCD *rcd; | |
3440 DB_RCD_CK *prev_ck; | |
3441 DB_RCD_CK *rcd_ck; | |
3442 DCC_CK_TYPES rcd_type; | |
3443 DCC_TGTS rcd_tgts, prev_ck_tgts; | |
3444 int ck_num; | |
3445 DB_HADDR haddr; | |
3446 u_char keeping_new; | |
3447 | |
3448 keeping_new = 0; | |
3449 rcd = db_sts.rcd.d.r; | |
3450 rcd_tgts = DB_TGTS_RCD_RAW(rcd); | |
3451 rcd_ck = rcd->cks; | |
3452 ck_num = DB_NUM_CKS(rcd); | |
3453 if (ck_num > DIM(rcd->cks)) { | |
3454 dcc_pemsg(EX_OSFILE, emsg, | |
3455 "bogus checksum count %#x at "L_HPAT" in %s", | |
3456 rcd->fgs_num_cks, db_sts.rcd.s.rptr, db_nm); | |
3457 return 0; | |
3458 } | |
3459 for (; ck_num > 0; --ck_num, ++rcd_ck) { | |
3460 rcd_type = DB_CK_TYPE(rcd_ck); | |
3461 if (!DCC_CK_OK_DB(grey_on, rcd_type)) { | |
3462 dcc_pemsg(EX_OSFILE, emsg, | |
3463 "invalid checksum type %s at "L_HPAT" in %s", | |
3464 DB_TYPE2STR(rcd_type), | |
3465 db_sts.rcd.s.rptr, db_nm); | |
3466 return 0; | |
3467 } | |
3468 | |
3469 rcd_ck->prev = DB_PTR_CP(DB_PTR_NULL); | |
3470 | |
3471 /* Do not link paths or whitelist file and line numbers */ | |
3472 if (rcd_type == DCC_CK_FLOD_PATH) { | |
3473 DB_TGTS_CK_SET(rcd_ck, 0); | |
3474 continue; | |
3475 } | |
3476 | |
3477 /* Do not link or total some checksums unless they are | |
3478 * whitelist entries. If they are whitelist entries, they | |
3479 * will eventually get set to DCC_TGTS_OK or DCC_TGTS_OK2. | |
3480 * Blacklist entries are noticed later by server-ID | |
3481 * or do not matter DCC_TGTS_TOO_MANY. */ | |
3482 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, rcd_type) | |
3483 && DB_RCD_ID(rcd) != DCC_ID_WHITE) { | |
3484 DB_TGTS_CK_SET(rcd_ck, 1); | |
3485 continue; | |
3486 } | |
3487 | |
3488 res = (rcd_tgts == DCC_TGTS_DEL) ? 0 : rcd_tgts; | |
3489 | |
3490 switch (db_lookup(emsg, rcd_type, rcd_ck->sum, lo, hi, | |
3491 &db_sts.hash, &db_sts.rcd2, &prev_ck)) { | |
3492 case DB_FOUND_SYSERR: | |
3493 return 0; | |
3494 | |
3495 case DB_FOUND_LATER: | |
3496 continue; | |
3497 | |
3498 case DB_FOUND_IT: | |
3499 /* We found the checksum | |
3500 * Update the hash table to point to the new record */ | |
3501 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); | |
3502 SET_FLUSH_HE(&db_sts.hash); | |
3503 /* link new record to existing record */ | |
3504 rcd_ck->prev = DB_PTR_CP(db_sts.rcd2.s.rptr); | |
3505 | |
3506 /* delete predecessors to a delete request | |
3507 * and compute the remaining sum */ | |
3508 if (rcd_tgts == DCC_TGTS_DEL) { | |
3509 if (!del_ck(emsg, &res, rcd, rcd_type, | |
3510 prev_ck, &db_sts.rcd2)) | |
3511 return 0; | |
3512 /* delete requests are obsolete if the | |
3513 * checksum is whitelisted */ | |
3514 if (res == DCC_TGTS_OK | |
3515 || res == DCC_TGTS_OK2) | |
3516 rcd_ck->type_fgs |= DB_CK_FG_OBS; | |
3517 break; | |
3518 } | |
3519 | |
3520 /* Simple checksum with a predecessor | |
3521 * This does not do the substantial extra work | |
3522 * to notice all delete requests that arrived early. | |
3523 * That problem is handled by the incoming flood | |
3524 * duplicate report detection mechanism. | |
3525 * We must detect precessors that were deleted because | |
3526 * they are partial duplicates of the new record. */ | |
3527 prev_ck_tgts = DB_TGTS_CK(prev_ck); | |
3528 if (DB_RCD_SUMRY(rcd)) | |
3529 res = prev_ck_tgts; | |
3530 else | |
3531 res = db_sum_ck(prev_ck_tgts, res, rcd_type); | |
3532 if ((res == DCC_TGTS_OK || res == DCC_TGTS_OK2 | |
3533 || (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE)) | |
3534 && DB_RCD_ID(rcd) != DCC_ID_WHITE){ | |
3535 /* obsolete whitelisted checksums */ | |
3536 rcd_ck->type_fgs |= DB_CK_FG_OBS; | |
3537 break; | |
3538 } | |
3539 if (res == DCC_TGTS_TOO_MANY) { | |
3540 /* mark obsolete unneeded reports of spam */ | |
3541 if (!DB_CK_OBS(rcd_ck) | |
3542 && !ck_obs_spam(emsg, rcd, rcd_tgts, | |
3543 rcd_ck, rcd_type, | |
3544 prev_ck, prev_ck_tgts, | |
3545 &keeping_new)) | |
3546 return 0; /* (broken database) */ | |
3547 } else if (rcd_type == DCC_CK_SRVR_ID) { | |
3548 /* mark obsolete server-ID assertions */ | |
3549 if (!DB_CK_OBS(rcd_ck) | |
3550 && !srvr_id_ck(emsg, rcd, rcd_ck, prev_ck)) | |
3551 return 0; /* (broken database) */ | |
3552 } | |
3553 break; | |
3554 | |
3555 case DB_FOUND_EMPTY: | |
3556 /* We found an empty hash table slot. | |
3557 * Update the slot to point to our new record | |
3558 * after removing it from the free list, | |
3559 * which marks it dirty. */ | |
3560 if (!unlink_free_hash(emsg, &db_sts.hash)) | |
3561 return 0; | |
3562 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); | |
3563 HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); | |
3564 if (res >= BULK_THRESHOLD) | |
3565 keeping_new = 1; | |
3566 break; | |
3567 | |
3568 case DB_FOUND_CHAIN: | |
3569 /* We found a hash collision, a chain of 1 or more | |
3570 * records with the same hash value. | |
3571 * Get a free slot, link it to the end of the | |
3572 * existing chain, and point it to the new record. | |
3573 * The buffer containing the free slot is marked | |
3574 * dirty when it is removed from the free list. */ | |
3575 if (!get_free_hash(emsg, db_sts.hash.s.haddr)) | |
3576 return 0; | |
3577 DB_HADDR_CP(db_sts.free.d.h->bak, db_sts.hash.s.haddr); | |
3578 DB_HADDR_CP(db_sts.hash.d.h->fwd, db_sts.free.s.haddr); | |
3579 DB_HPTR_CP(db_sts.free.d.h->rcd, db_sts.rcd.s.rptr); | |
3580 HE_MERGE(db_sts.free.d.h,rcd_type, rcd_ck->sum); | |
3581 SET_FLUSH_HE(&db_sts.hash); | |
3582 if (res >= BULK_THRESHOLD) | |
3583 keeping_new = 1; | |
3584 break; | |
3585 | |
3586 case DB_FOUND_INTRUDER: | |
3587 /* The home hash slot for our key contains an | |
3588 * intruder. Move it to a new free slot */ | |
3589 if (!get_free_hash(emsg, db_sts.hash.s.haddr)) | |
3590 return 0; | |
3591 *db_sts.free.d.h = *db_sts.hash.d.h; | |
3592 /* re-link the neighbors of the intruder */ | |
3593 haddr = DB_HADDR_EX(db_sts.free.d.h->bak); | |
3594 if (haddr == DB_HADDR_NULL) { | |
3595 dcc_pemsg(EX_DATAERR, emsg, | |
3596 "bad hash chain reverse link at %#x" | |
3597 " in %s", | |
3598 haddr, db_hash_nm); | |
3599 return 0; | |
3600 } | |
3601 if (!map_hash(emsg, haddr, &db_sts.tmp, 0)) | |
3602 return 0; | |
3603 DB_HADDR_CP(db_sts.tmp.d.h->fwd, db_sts.free.s.haddr); | |
3604 SET_FLUSH_HE(&db_sts.tmp); | |
3605 haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd); | |
3606 if (haddr != DB_HADDR_NULL) { | |
3607 if (!map_hash(emsg, haddr, &db_sts.tmp, 0)) | |
3608 return 0; | |
3609 DB_HADDR_CP(db_sts.tmp.d.h->bak, | |
3610 db_sts.free.s.haddr); | |
3611 SET_FLUSH_HE(&db_sts.tmp); | |
3612 } | |
3613 /* install the new entry in its home slot */ | |
3614 DB_HADDR_CP(db_sts.hash.d.h->fwd, DB_HADDR_NULL); | |
3615 DB_HADDR_CP(db_sts.hash.d.h->bak, DB_HADDR_NULL); | |
3616 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); | |
3617 HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); | |
3618 SET_FLUSH_HE(&db_sts.hash); | |
3619 if (res >= BULK_THRESHOLD) | |
3620 keeping_new = 1; | |
3621 break; | |
3622 } | |
3623 | |
3624 /* Fix the checksum's total in the record */ | |
3625 DB_TGTS_CK_SET(rcd_ck, res); | |
3626 SET_FLUSH_RCD(&db_sts.rcd, 0); | |
3627 } | |
3628 | |
3629 return db_set_sizes(emsg); | |
3630 } | |
3631 | |
3632 | |
3633 | |
3634 /* Add a record to the database and the hash table | |
3635 * The record must be known to be valid | |
3636 * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp | |
3637 * On exit db_sts.rcd points to the new record in the database */ | |
3638 DB_PTR /* 0=failed */ | |
3639 db_add_rcd(DCC_EMSG emsg, const DB_RCD *new_rcd) | |
3640 { | |
3641 u_int new_rcd_len, pad_len; | |
3642 DB_PTR new_db_csize, rcd_pos, new_page_num; | |
3643 DB_BUF *b; | |
3644 | |
3645 if (!db_make_dirty(emsg)) | |
3646 return 0; | |
3647 | |
3648 new_rcd_len = (sizeof(*new_rcd) | |
3649 - sizeof(new_rcd->cks) | |
3650 + (DB_NUM_CKS(new_rcd) * sizeof(new_rcd->cks[0]))); | |
3651 | |
3652 rcd_pos = db_csize; | |
3653 new_db_csize = rcd_pos+new_rcd_len; | |
3654 | |
3655 new_page_num = DB_PTR2PG_NUM(new_db_csize, db_pagesize); | |
3656 if (new_page_num == DB_PTR2PG_NUM(db_csize, db_pagesize)) { | |
3657 if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd, 0)) | |
3658 return 0; | |
3659 | |
3660 } else { | |
3661 /* fill with zeros to get past a page boundary. */ | |
3662 pad_len = new_page_num*db_pagesize - db_csize; | |
3663 pad_len = (((pad_len + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN) | |
3664 * DB_RCD_HDR_LEN); | |
3665 if (pad_len != 0) { | |
3666 if (!map_db(emsg, db_csize, pad_len, &db_sts.rcd, 0)) | |
3667 return 0; | |
3668 memset(db_sts.rcd.d.r, 0, pad_len); | |
3669 db_set_flush(&db_sts.rcd, 1, pad_len); | |
3670 db_csize += pad_len; | |
3671 | |
3672 rcd_pos = db_csize; | |
3673 new_db_csize = rcd_pos+new_rcd_len; | |
3674 } | |
3675 | |
3676 /* extend the file by writing a full page to it with write(), | |
3677 * because extending by mmap() often does not work */ | |
3678 db_fsize = db_csize+db_pagesize; | |
3679 if (!map_db(emsg, rcd_pos, db_pagesize, &db_sts.rcd, 1)) | |
3680 return 0; | |
3681 b = db_sts.rcd.b; | |
3682 b->flush = (DB_BUF_FM)-1; | |
3683 | |
3684 /* push new page to disk if dblist or dbclean is running */ | |
3685 if (db_minimum_map) { | |
3686 rel_db_state(&db_sts.rcd); | |
3687 if (!buf_munmap(emsg, b)) | |
3688 return 0; | |
3689 if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd, 0)) | |
3690 return 0; | |
3691 } | |
3692 } | |
3693 | |
3694 /* install the record */ | |
3695 memcpy(db_sts.rcd.d.r, new_rcd, new_rcd_len); | |
3696 /* Mark its buffer to be sent to the disk to keep the database | |
3697 * as good as possible even if we crash. We don't need to worry | |
3698 * about later changes to the hash links because dbclean will | |
3699 * rebuild them if we crash */ | |
3700 db_set_flush(&db_sts.rcd, 1, new_rcd_len); | |
3701 db_csize = new_db_csize; | |
3702 | |
3703 /* install pointers in the hash table | |
3704 * and update the total counts in the record */ | |
3705 if (!db_link_rcd(emsg, 0, MAX_HASH_ENTRIES)) | |
3706 return 0; | |
3707 | |
3708 ++db_stats.adds; | |
3709 return rcd_pos; | |
3710 } |