0
|
1 /* Distributed Checksum Clearinghouse |
|
2 * |
|
3 * server database functions |
|
4 * |
|
5 * Copyright (c) 2008 by Rhyolite Software, LLC |
|
6 * |
|
7 * This agreement is not applicable to any entity which sells anti-spam |
|
8 * solutions to others or provides an anti-spam solution as part of a |
|
9 * security solution sold to other entities, or to a private network |
|
10 * which employs the DCC or uses data provided by operation of the DCC |
|
11 * but does not provide corresponding data to other users. |
|
12 * |
|
13 * Permission to use, copy, modify, and distribute this software without |
|
14 * changes for any purpose with or without fee is hereby granted, provided |
|
15 * that the above copyright notice and this permission notice appear in all |
|
16 * copies and any distributed versions or copies are either unchanged |
|
17 * or not called anything similar to "DCC" or "Distributed Checksum |
|
18 * Clearinghouse". |
|
19 * |
|
20 * Parties not eligible to receive a license under this agreement can |
|
21 * obtain a commercial license to use DCC by contacting Rhyolite Software |
|
22 * at sales@rhyolite.com. |
|
23 * |
|
24 * A commercial license would be for Distributed Checksum and Reputation |
|
25 * Clearinghouse software. That software includes additional features. This |
|
26 * free license for Distributed ChecksumClearinghouse Software does not in any |
|
27 * way grant permision to use Distributed Checksum and Reputation Clearinghouse |
|
28 * software |
|
29 * |
|
30 * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL |
|
31 * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES |
|
32 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC |
|
33 * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES |
|
34 * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, |
|
35 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
|
36 * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS |
|
37 * SOFTWARE. |
|
38 * |
|
39 * Rhyolite Software DCC 1.3.103-1.214 $Revision$ |
|
40 */ |
|
41 |
|
42 #include "srvr_defs.h" |
|
43 #include <syslog.h> |
|
44 #include <sys/resource.h> |
|
45 #if defined(HAVE_HW_PHYSMEM) || defined(HAVE_BOOTTIME) |
|
46 #include <sys/sysctl.h> |
|
47 #endif |
|
48 #ifdef HAVE_PSTAT_GETSTATIC /* HP-UX */ |
|
49 #include <sys/pstat.h> |
|
50 #endif |
|
51 |
|
52 DB_STATS db_stats; |
|
53 |
|
54 DB_STATES db_sts; |
|
55 |
|
56 DCC_PATH db_path_buf; |
|
57 |
|
58 int db_fd = -1; |
|
59 DCC_PATH db_nm; |
|
60 int db_hash_fd = -1; |
|
61 DCC_PATH db_hash_nm; |
|
62 struct timeval db_locked; /* 1=database not locked */ |
|
63 |
|
64 struct timeval db_time; |
|
65 |
|
66 int db_debug; |
|
67 |
|
68 u_char grey_on; |
|
69 static u_char db_use_write; /* 0=no 1=if RAM big enough 2=always */ |
|
70 static u_char db_dirty; |
|
71 static u_char db_rdonly; |
|
72 int db_failed_line; /* bad happened at this line # */ |
|
73 const char *db_failed_file; /* in this file */ |
|
74 static u_char db_invalidate; /* do not write to the files */ |
|
75 |
|
76 /* Without mmap(MAP_NOSYNC) as on Solaris or a good msync() as on BSD/OS, |
|
77 * we must rely on the kernel's update/syncer/bufdaemon/etc. So in this |
|
78 * case just fondle the mmap()'ed pages and hope things work out. |
|
79 * |
|
80 * With a msync() and with mmap(MAP_NOSYNC), use MAP_NOSYNC if we can because |
|
81 * some systems flush too quickly while others such as FreeBSD 6.1 stall |
|
82 * for seconds while thinking about flushing the database. |
|
83 * But with mmap(MAP_NOSYNC) we leave large amounts of data in RAM that take |
|
84 * too long time to be pushed to the disk when the system is shutting down. |
|
85 * So |
|
86 * - hit only those chunks of memory with real data or changes to data |
|
87 * with msync(). Trust dbclean to rebuild everything else at need. |
|
88 * |
|
89 * - when it seems the system is being shut down, delete the hash table |
|
90 * and let it be rebuilt when the system is rebooted. When the |
|
91 * hash table is rebuilt, "obsolete" markings in the data file that |
|
92 * might have been lost will be remade. |
|
93 * |
|
94 * A third case involves dccd -F. It requires that all changes be pushed to |
|
95 * the disk whenever dccd unlocks the database so that dbclean can see changes |
|
96 * dccd makes. It also requires that dbclean write all of its changes so |
|
97 * that dccd will find them when it reopens the database. |
|
98 */ |
|
99 |
|
100 #if !defined(MAP_NOSYNC) || defined(HAVE_OLD_MSYNC) || !defined(HAVE_BOOTTIME) |
|
101 #undef USE_MAP_NOSYNC |
|
102 #else |
|
103 #define USE_MAP_NOSYNC |
|
104 #endif |
|
105 |
|
106 static u_char db_not_synced; /* database unsynchronized with disk */ |
|
107 |
|
108 |
|
109 #define DCC_MADV_WILLNEED(p) 0 |
|
110 #ifdef MADV_WILLNEED |
|
111 #undef DCC_MADV_WILLNEED |
|
112 #define DCC_MADV_WILLNEED(p) madvise(p, db_pagesize, MADV_WILLNEED) |
|
113 #endif |
|
114 #ifdef POSIX_MADV_WILLNEED |
|
115 #undef DCC_MADV_WILLNEED |
|
116 #define DCC_MADV_WILLNEED(p) posix_madvise(p, db_pagesize, POSIX_MADV_WILLNEED) |
|
117 #endif |
|
118 |
|
119 #define DCC_MADV_RANDOM(p) 0 |
|
120 #ifdef MADV_RANDOM |
|
121 #undef DCC_MADV_RANDOM |
|
122 #define DCC_MADV_RANDOM(p) madvise(p, db_pagesize, MADV_RANDOM) |
|
123 #endif |
|
124 #ifdef POSIX_MADV_RANDOM |
|
125 #undef DCC_MADV_RANDOM |
|
126 #define DCC_MADV_RANDOM(p) posix_madvise(p, db_pagesize, POSIX_MADV_RANDOM) |
|
127 #endif |
|
128 |
|
129 #define DCC_MADV_DONTNEED(p) 0 |
|
130 /* The Linux people claim that it is just fine that their notion of |
|
131 * MADV_DONTNEED implies discarding changes to data. Worse, some versions of |
|
132 * Linux/GNU libc define POSIX_MADV_DONTNEED as the data-corrupting Linux |
|
133 * MADV_DONTNEED. This seems to be because they cannot admit their mistake of |
|
134 * not distinguishing between the functions of MADV_FREE and MADV_DONTNEED and |
|
135 * their misreading of other systems' documentation for MADV_DONTNEED */ |
|
136 #ifndef linux |
|
137 #ifdef MADV_DONTNEED |
|
138 #undef DCC_MADV_DONTNEED |
|
139 #define DCC_MADV_DONTNEED(p) madvise(p, db_pagesize, MADV_DONTNEED) |
|
140 #endif |
|
141 #ifdef POSIX_MADV_DONTNEED |
|
142 #undef DCC_MADV_DONTNEED |
|
143 #define DCC_MADV_DONTNEED(p) posix_madvise(p, db_pagesize, POSIX_MADV_DONTNEED) |
|
144 #endif |
|
145 #endif /* !linux */ |
|
146 |
|
147 #define DCC_MADV_FREE(p) 0 |
|
148 #ifdef MADV_FREE |
|
149 #undef DCC_MADV_FREE |
|
150 #define DCC_MADV_FREE(p) madvise(p, db_pagesize, MADV_FREE) |
|
151 #endif |
|
152 #ifdef POSIX_MADV_FREE |
|
153 #undef DCC_MADV_FREE |
|
154 #define DCC_MADV_FREE(p) posix_madvise(p, db_pagesize, POSIX_MADV_FREE) |
|
155 #endif |
|
156 |
|
157 |
|
158 u_char db_minimum_map; /* this is dccd & dbclean is running */ |
|
159 |
|
160 int db_buf_total; /* total # of db buffers */ |
|
161 DB_PTR db_max_rss; /* maximum db resident set size */ |
|
162 DB_PTR db_max_byte; /* maximum db bytes in both files */ |
|
163 |
|
164 static u_int system_pagesize; /* kernel page size */ |
|
165 |
|
166 static DB_BUF db_bufs[DB_BUF_MAX]; /* control mmap()'ed blocks */ |
|
167 static DB_BUF *buf_oldest, *buf_newest; |
|
168 |
|
169 #define DB_HASH_TOTAL DB_BUF_MAX |
|
170 static DB_BUF *db_buf_hash[DB_HASH_TOTAL]; |
|
171 /* fancy 16-bit multiplicative hash assumes multiplication needs 1 cycle |
|
172 * and so the hash is faster than dealing with a collision */ |
|
173 #define DB_BUF_HASH(pnum,t) (&db_buf_hash[((((pnum)*(t)*0x9ccf) & 0xffff) \ |
|
174 * DB_BUF_MAX) >> 16]) |
|
175 |
|
176 time_t db_need_flush_secs; |
|
177 static time_t db_urgent_need_flush_secs; |
|
178 |
|
179 const DB_VERSION_BUF db_version_buf = DB_VERSION_STR; |
|
180 DB_PARMS db_parms; |
|
181 static DB_PARMS db_parms_stored; |
|
182 |
|
183 DCC_TGTS db_tholds[DCC_DIM_CKS]; |
|
184 |
|
185 u_int db_pagesize; /* size of 1 mmap()'ed buffer */ |
|
186 static u_int db_pagesize_part; |
|
187 |
|
188 DB_HOFF db_hash_fsize; /* size of hash table file */ |
|
189 static u_int hash_clear_pg_num; |
|
190 DB_HADDR db_hash_len; /* # of hash table entries */ |
|
191 DB_HADDR db_hash_divisor; /* modulus */ |
|
192 DB_HADDR db_hash_used; /* # of hash table entries in use */ |
|
193 u_int db_hash_page_len; /* # of HASH_ENTRY's per buffer */ |
|
194 DB_HADDR db_max_hash_entries = 0; /* after db_buf_init()*/ |
|
195 DB_PTR db_fsize; /* size of database file */ |
|
196 DB_PTR db_csize; /* size of database contents in bytes */ |
|
197 static DB_PTR db_csize_stored_hash; /* DB size stored in hash file */ |
|
198 static DB_HADDR db_hash_used_stored_hash; |
|
199 u_int db_page_max; /* only padding after this in DB buf */ |
|
200 static DB_PTR db_window_size; /* size of mmap() window */ |
|
201 char db_window_size_str[128]; |
|
202 static char db_physmem_str[80]; |
|
203 |
|
204 static const u_char dcc_ck_fuzziness[DCC_DIM_CKS] = { |
|
205 0, /* DCC_CK_INVALID */ |
|
206 DCC_CK_FUZ_LVL_NO, /* DCC_CK_IP */ |
|
207 DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ |
|
208 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ |
|
209 DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ |
|
210 DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ |
|
211 DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ |
|
212 DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ |
|
213 DCC_CK_FUZ_LVL1, /* DCC_CK_FUZ1 */ |
|
214 DCC_CK_FUZ_LVL2, /* DCC_CK_FUZ2 */ |
|
215 DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_TOTAL */ |
|
216 DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_BULK */ |
|
217 DCC_CK_FUZ_LVL2, /* DCC_CK_SRVR_ID */ |
|
218 DCC_CK_FUZ_LVL2 /* DCC_CK_ENV_TO */ |
|
219 }; |
|
220 static const u_char grey_ck_fuzziness[DCC_DIM_CKS] = { |
|
221 0, /* DCC_CK_INVALID */ |
|
222 DCC_CK_FUZ_LVL2, /* DCC_CK_IP */ |
|
223 DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ |
|
224 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ |
|
225 DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ |
|
226 DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ |
|
227 DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ |
|
228 DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ |
|
229 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ1 */ |
|
230 DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ2 */ |
|
231 DCC_CK_FUZ_LVL_NO, /* DCC_CK_GREY_MSG */ |
|
232 DCC_CK_FUZ_LVL1, /* DCC_CK_GREY_TRIPLE */ |
|
233 DCC_CK_FUZ_LVL1, /* DCC_CK_SRVR_ID */ |
|
234 DCC_CK_FUZ_LVL1 /* DCC_CK_ENV_TO */ |
|
235 }; |
|
236 const u_char *db_ck_fuzziness = dcc_ck_fuzziness; |
|
237 |
|
238 |
|
239 static u_char buf_flush(DCC_EMSG, DB_BUF *, u_char); |
|
240 static u_char buf_munmap(DCC_EMSG, DB_BUF *); |
|
241 static DB_BUF *find_buf(DCC_EMSG, DB_BUF_TYPE, DB_PG_NUM); |
|
242 static u_char map_hash(DCC_EMSG, DB_HADDR, DB_STATE *, u_char); |
|
243 static u_char map_hash_ctl(DCC_EMSG, u_char); |
|
244 static u_char map_db(DCC_EMSG, DB_PTR, u_int, DB_STATE *, u_char); |
|
245 static u_char db_set_sizes(DCC_EMSG); |
|
246 |
|
247 |
|
248 /* compute the least common multiple of two numbers */ |
|
249 static u_int |
|
250 lcm(u_int n, u_int m) |
|
251 { |
|
252 u_int r, x, gcd; |
|
253 |
|
254 /* first get the gcd of the two numbers */ |
|
255 if (n >= m) { |
|
256 x = n; |
|
257 gcd = m; |
|
258 } else { |
|
259 x = m; |
|
260 gcd = n; |
|
261 } |
|
262 for (;;) { |
|
263 r = x % gcd; |
|
264 if (r == 0) |
|
265 return n * (m / gcd); |
|
266 x = gcd; |
|
267 gcd = r; |
|
268 } |
|
269 } |
|
270 |
|
271 |
|
272 |
|
273 const char * |
|
274 db_ptr2str(DB_PTR val) |
|
275 { |
|
276 static int bufno; |
|
277 static struct { |
|
278 char str[16]; |
|
279 } bufs[4]; |
|
280 char *s; |
|
281 const char *units; |
|
282 |
|
283 if (val == 0) |
|
284 return "0"; |
|
285 |
|
286 s = bufs[bufno].str; |
|
287 bufno = (bufno+1) % DIM(bufs); |
|
288 |
|
289 if (val % (1024*1024*1024) == 0) { |
|
290 val /= (1024*1024*1024); |
|
291 units = "GB"; |
|
292 } else if (val % (1024*1024) == 0) { |
|
293 val /= (1024*1024); |
|
294 units = "MB"; |
|
295 } else if (val % 1024 == 0) { |
|
296 val /= 1024; |
|
297 units = "KB"; |
|
298 } else { |
|
299 units = ""; |
|
300 } |
|
301 if (val > 1000*1000*1000) |
|
302 snprintf(s, sizeof(bufs[0].str), "%d,%03d,%03d,%03d%s", |
|
303 (int)(val / (1000*1000*1000)), |
|
304 (int)(val / (1000*1000)) % 1000, |
|
305 (int)(val / 1000) % 1000, |
|
306 (int)(val % 1000), |
|
307 units); |
|
308 else if (val > 1000*1000) |
|
309 snprintf(s, sizeof(bufs[0].str), "%d,%03d,%03d%s", |
|
310 (int)(val / (1000*1000)), |
|
311 (int)(val / 1000) % 1000, |
|
312 (int)(val % 1000), |
|
313 units); |
|
314 else if (val > 1000*10) |
|
315 snprintf(s, sizeof(bufs[0].str), "%d,%03d%s", |
|
316 (int)(val / 1000), |
|
317 (int)(val % 1000), |
|
318 units); |
|
319 else |
|
320 snprintf(s, sizeof(bufs[0].str), "%d%s", |
|
321 (int)val, |
|
322 units); |
|
323 return s; |
|
324 } |
|
325 |
|
326 |
|
327 |
|
328 const char * |
|
329 size2str(char *buf, u_int buf_len, |
|
330 double num, u_char bytes_or_entries) /* 0=number 1=bytes */ |
|
331 { |
|
332 const char *units; |
|
333 double k; |
|
334 |
|
335 k = bytes_or_entries ? 1024.0 : 1000.0; |
|
336 |
|
337 if (num < k) { |
|
338 units = ""; |
|
339 } else if (num < k*k) { |
|
340 num /= k; |
|
341 units = "K"; |
|
342 } else if (num < k*k*k) { |
|
343 num /= k*k; |
|
344 units = "M"; |
|
345 } else { |
|
346 num /= k*k*k; |
|
347 units = "G"; |
|
348 } |
|
349 |
|
350 if ((int)num >= 100) |
|
351 snprintf(buf, buf_len, "%.0f%s", num, units); |
|
352 else |
|
353 snprintf(buf, buf_len, "%.2g%s", num, units); |
|
354 return buf; |
|
355 } |
|
356 |
|
357 |
|
358 |
|
359 void PATTRIB(5,6) |
|
360 db_failure(int linenum, const char *file, int ex_code, DCC_EMSG emsg, |
|
361 const char *p, ...) |
|
362 { |
|
363 va_list args; |
|
364 |
|
365 if (!db_failed_line) { |
|
366 db_failed_line = linenum; |
|
367 db_failed_file = file; |
|
368 } |
|
369 va_start(args, p); |
|
370 dcc_vpemsg(ex_code, emsg, p, args); |
|
371 va_end(args); |
|
372 } |
|
373 |
|
374 |
|
375 |
|
376 void PATTRIB(3,4) |
|
377 db_error_msg(int linenum, const char *file, const char *p, ...) |
|
378 { |
|
379 va_list args; |
|
380 |
|
381 if (!db_failed_line) { |
|
382 db_failed_line = linenum; |
|
383 db_failed_file = file; |
|
384 } |
|
385 va_start(args, p); |
|
386 dcc_verror_msg(p, args); |
|
387 va_end(args); |
|
388 } |
|
389 |
|
390 |
|
391 |
|
392 double /* hashes or bytes/second */ |
|
393 db_add_rate(const DB_PARMS *parms, |
|
394 u_char hash_or_db) /* 1=hash */ |
|
395 { |
|
396 struct timeval sn; |
|
397 time_t new_rate_secs; |
|
398 time_t total_secs; |
|
399 double added, cur, prev; |
|
400 |
|
401 total_secs = parms->rate_secs; |
|
402 if (hash_or_db) { |
|
403 added = parms->hash_added; |
|
404 cur = parms->hash_used; |
|
405 prev = parms->old_hash_used; |
|
406 } else { |
|
407 added = parms->db_added; |
|
408 cur = parms->db_csize; |
|
409 prev = parms->old_db_csize; |
|
410 } |
|
411 |
|
412 if (total_secs <= 0 || total_secs > DB_MAX_RATE_SECS |
|
413 || added <= 0.0) { |
|
414 added = 0.0; |
|
415 total_secs = 0; |
|
416 } |
|
417 |
|
418 dcc_ts2timeval(&sn, &parms->sn); |
|
419 new_rate_secs = parms->last_rate_sec - sn.tv_sec; |
|
420 if (new_rate_secs > 0 && new_rate_secs <= DB_MAX_RATE_SECS |
|
421 && cur > prev) { |
|
422 total_secs += new_rate_secs; |
|
423 added += cur - prev; |
|
424 } |
|
425 |
|
426 if (total_secs <= DB_MIN_RATE_SECS) |
|
427 return -1.0; |
|
428 return added / total_secs; |
|
429 } |
|
430 |
|
431 |
|
432 |
|
433 DB_NOKEEP_CKS |
|
434 def_nokeep_cks(void) |
|
435 { |
|
436 DCC_CK_TYPES type; |
|
437 DB_NOKEEP_CKS nokeep = 0; |
|
438 |
|
439 for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { |
|
440 if (DB_GLOBAL_NOKEEP(grey_on, type)) |
|
441 DB_SET_NOKEEP(nokeep, type); |
|
442 } |
|
443 DB_SET_NOKEEP(nokeep, DCC_CK_INVALID); |
|
444 DB_SET_NOKEEP(nokeep, DCC_CK_FLOD_PATH); |
|
445 |
|
446 return nokeep; |
|
447 } |
|
448 |
|
449 |
|
450 |
|
451 void |
|
452 set_db_tholds(DB_NOKEEP_CKS nokeep) |
|
453 { |
|
454 DCC_CK_TYPES type; |
|
455 |
|
456 for (type = 0; type < DIM(db_tholds); ++type) { |
|
457 db_tholds[type] = (DB_TEST_NOKEEP(nokeep, type) |
|
458 ? DCC_TGTS_INVALID |
|
459 : DCC_CK_IS_REP_CMN(grey_on, type) |
|
460 ? DCC_TGTS_INVALID |
|
461 : grey_on ? 1 |
|
462 : type == DCC_CK_SRVR_ID ? 1 |
|
463 : BULK_THRESHOLD); |
|
464 } |
|
465 } |
|
466 |
|
467 |
|
468 |
|
469 static const char * |
|
470 buf2path(const DB_BUF *b) |
|
471 { |
|
472 switch (b->buf_type) { |
|
473 case DB_BUF_TYPE_HASH: |
|
474 return db_hash_nm; |
|
475 case DB_BUF_TYPE_DB: |
|
476 return db_nm; |
|
477 case DB_BUF_TYPE_FREE: |
|
478 default: |
|
479 dcc_logbad(EX_SOFTWARE, "impossible buffer type for a path"); |
|
480 } |
|
481 } |
|
482 |
|
483 |
|
484 |
|
485 static int |
|
486 buf2fd(const DB_BUF *b) |
|
487 { |
|
488 switch (b->buf_type) { |
|
489 case DB_BUF_TYPE_HASH: |
|
490 return db_hash_fd; |
|
491 case DB_BUF_TYPE_DB: |
|
492 return db_fd; |
|
493 case DB_BUF_TYPE_FREE: |
|
494 default: |
|
495 dcc_logbad(EX_SOFTWARE, "impossible buffer type for fd"); |
|
496 } |
|
497 } |
|
498 |
|
499 |
|
500 |
|
501 static void |
|
502 rel_db_state(DB_STATE *st) |
|
503 { |
|
504 DB_BUF *b; |
|
505 |
|
506 b = st->b; |
|
507 if (!b) |
|
508 return; |
|
509 st->b = 0; |
|
510 st->d.v = 0; |
|
511 st->s.rptr = DB_PTR_BAD; |
|
512 if (--b->lock_cnt < 0) |
|
513 dcc_logbad(EX_SOFTWARE,"negative database buffer lock"); |
|
514 } |
|
515 |
|
516 |
|
517 |
|
518 void |
|
519 rel_db_states(void) |
|
520 { |
|
521 DB_STATE *st; |
|
522 |
|
523 for (st = &db_sts.rcd; st <= &db_sts.hash_ctl; ++st) { |
|
524 rel_db_state(st); |
|
525 } |
|
526 } |
|
527 |
|
528 |
|
529 |
|
530 /* release one or all unneeded buffers */ |
|
531 u_char /* 0=problem 1=did nothing 2=did>=1 */ |
|
532 db_unload(DCC_EMSG emsg, |
|
533 u_char some) /* 0=all, 1=only one, 2=finished */ |
|
534 { |
|
535 DB_BUF *b; |
|
536 u_char result; |
|
537 |
|
538 result = 1; |
|
539 for (b = buf_oldest; b != 0; b = b->newer) { |
|
540 if (b->buf_type == DB_BUF_TYPE_FREE |
|
541 || b->lock_cnt != 0) |
|
542 continue; |
|
543 if (some == 2 |
|
544 && !(b->flags & DB_BUF_FG_USE_WRITE) |
|
545 && 0 > DCC_MADV_DONTNEED(b->buf.v)) |
|
546 dcc_error_msg("madvise(DONTNEED %s,%#x): %s", |
|
547 buf2path(b), db_pagesize, ERROR_STR()); |
|
548 if (!buf_munmap(emsg, b)) { |
|
549 emsg = 0; |
|
550 result = 0; |
|
551 } else if (result) { |
|
552 result = 2; |
|
553 } |
|
554 if (some == 1) |
|
555 return result; |
|
556 } |
|
557 |
|
558 return result; |
|
559 } |
|
560 |
|
561 |
|
562 |
|
563 static u_char |
|
564 buf_write_part(DCC_EMSG emsg, DB_BUF *b, off_t offset, void *buf, int len) |
|
565 { |
|
566 int i; |
|
567 |
|
568 offset += (off_t)b->pg_num * (off_t)db_pagesize; |
|
569 |
|
570 if (offset != lseek(buf2fd(b), offset, SEEK_SET)) { |
|
571 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, |
|
572 "buf_write_part lseek(%s,"OFF_HPAT"): %s", |
|
573 buf2path(b), offset, ERROR_STR()); |
|
574 return 0; |
|
575 } |
|
576 i = write(buf2fd(b), buf, len); |
|
577 if (i != len) { |
|
578 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, |
|
579 "buf_write_part(%s,%u)=%d: %s", |
|
580 buf2path(b), len, i, ERROR_STR()); |
|
581 return 0; |
|
582 } |
|
583 |
|
584 return 1; |
|
585 } |
|
586 |
|
587 |
|
588 |
|
589 /* push part of a buffer toward the disk |
|
590 * this can be needed even when the file has been opened and mapped |
|
591 * read-only by dbclean */ |
|
592 static u_char |
|
593 buf_flush_part(DCC_EMSG emsg, DB_BUF *b, |
|
594 u_int part, /* DB_BUF_NUM_PARTS=buffer */ |
|
595 u_char async UATTRIB) |
|
596 { |
|
597 u_int flush_len; |
|
598 char *flush_base; |
|
599 DB_BUF_FM bit; |
|
600 |
|
601 bit = PART2BIT(part) & (b->flush | b->flush_urgent); |
|
602 if (!bit) |
|
603 return 1; |
|
604 |
|
605 /* Send a new buffer to disk at once. */ |
|
606 if (b->flags & DB_BUF_FG_EXTENSION) { |
|
607 DB_BUF *b1, *b0; |
|
608 u_char result; |
|
609 |
|
610 /* To give the file system a chance to make the hash table |
|
611 * contiguous, first write all preceding new buffers. |
|
612 * In almost all cases, there will be none. */ |
|
613 result = 1; |
|
614 do { |
|
615 b0 = b; |
|
616 for (b1 = buf_oldest; b1 != 0; b1 = b1->newer) { |
|
617 if (!(b1->flags & DB_BUF_FG_EXTENSION) |
|
618 || b1->buf_type != b0->buf_type |
|
619 || b1->pg_num >= b0->pg_num) |
|
620 continue; |
|
621 b0 = b1; |
|
622 } |
|
623 b0->flags &= ~DB_BUF_FG_EXTENSION; |
|
624 b0->flush = 0; |
|
625 b0->flush_urgent = 0; |
|
626 if (!db_invalidate |
|
627 && !buf_write_part(emsg, b0, |
|
628 0, b0->buf.c, db_pagesize)) |
|
629 result = 0; |
|
630 } while (b0 != b); |
|
631 return result; |
|
632 } |
|
633 |
|
634 flush_base = b->ranges[part].lo; |
|
635 flush_len = b->ranges[part].hi - flush_base; |
|
636 b->flush &= ~bit; |
|
637 b->flush_urgent &= ~bit; |
|
638 |
|
639 if (db_invalidate) |
|
640 return 1; |
|
641 |
|
642 if (b->flags & DB_BUF_FG_USE_WRITE) { |
|
643 static char *wbuf; |
|
644 static u_int wbuf_len; |
|
645 |
|
646 /* In at least FreeBSD you cannot write() to the file |
|
647 * that underlies a mmap() region from that region */ |
|
648 if (wbuf_len < db_pagesize_part) { |
|
649 /* the page size for the current file |
|
650 * might be different from the old file */ |
|
651 if (wbuf) |
|
652 free(wbuf); |
|
653 wbuf_len = db_pagesize_part; |
|
654 wbuf = malloc(wbuf_len); |
|
655 } |
|
656 |
|
657 memcpy(wbuf, flush_base, flush_len); |
|
658 return buf_write_part(emsg, b, flush_base - b->buf.c, |
|
659 wbuf, flush_len); |
|
660 |
|
661 #ifndef HAVE_OLD_MSYNC |
|
662 } else if (async) { |
|
663 if (0 > MSYNC(flush_base, flush_len, MS_ASYNC)) { |
|
664 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, |
|
665 "msync(db buffer %s,%#lx,%#x,MS_ASYNC): %s", |
|
666 buf2path(b), (long)flush_base, flush_len, |
|
667 ERROR_STR()); |
|
668 return 0; |
|
669 } |
|
670 #endif |
|
671 } else { |
|
672 if (0 > MSYNC(flush_base, flush_len, MS_SYNC)) { |
|
673 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, |
|
674 "msync(db buffer %s,%#lx,%#x,MS_SYNC): %s", |
|
675 buf2path(b), (long)flush_base, flush_len, |
|
676 ERROR_STR()); |
|
677 return 0; |
|
678 } |
|
679 } |
|
680 |
|
681 return 1; |
|
682 } |
|
683 |
|
684 |
|
685 |
|
686 static u_char |
|
687 buf_flush(DCC_EMSG emsg, DB_BUF *b, u_char async) |
|
688 { |
|
689 u_int part; |
|
690 DB_BUF_FM bits; |
|
691 u_char result = 1; |
|
692 |
|
693 bits = b->flush_urgent | b->flush; |
|
694 for (part = 0; bits != 0 && part < DB_BUF_NUM_PARTS; ++part) { |
|
695 if (bits & PART2BIT(part)) { |
|
696 if (!buf_flush_part(emsg, b, part, async)) { |
|
697 emsg = 0; |
|
698 result = 0; |
|
699 } |
|
700 bits = b->flush_urgent | b->flush; |
|
701 } |
|
702 } |
|
703 return result; |
|
704 } |
|
705 |
|
706 |
|
707 |
|
708 /* Try to keep the data clean so that the fsync() required by Solaris |
|
709 * when the file is unloaded is not too expensive. |
|
710 * Try to flush frequently so that we don't stall as long in msync(). |
|
711 */ |
|
712 void |
|
713 db_flush_needed(void) |
|
714 { |
|
715 static DB_BUF *next_b = db_bufs; |
|
716 static u_int next_part; |
|
717 DB_BUF *b; |
|
718 u_int part, all_parts; |
|
719 int buf_num; |
|
720 u_char worked; |
|
721 |
|
722 /* send to the disk changes that cannot be recreated by dbclean */ |
|
723 if (db_urgent_need_flush_secs != 0 |
|
724 && DB_IS_TIME(db_urgent_need_flush_secs, |
|
725 DB_URGENT_NEED_FLUSH_SECS)) { |
|
726 worked = 0; |
|
727 for (b = buf_newest; b; b = b->older) { |
|
728 if (b->buf_type == DB_BUF_TYPE_FREE) |
|
729 continue; |
|
730 |
|
731 for (part = 0; |
|
732 b->flush_urgent != 0 && part < DB_BUF_NUM_PARTS; |
|
733 ++part) { |
|
734 if ((b->flush_urgent & PART2BIT(part))) { |
|
735 buf_flush_part(0, b, part, 1); |
|
736 worked = 1; |
|
737 } |
|
738 } |
|
739 |
|
740 /* Switch new data pages to mmap() |
|
741 * when this is not dbclean, since only dccd calls here |
|
742 * they are not using mmap() |
|
743 * they are either hash table pages or |
|
744 * not the last page in the file */ |
|
745 if ((b->flags & DB_BUF_FG_USE_WRITE) |
|
746 && !db_use_write |
|
747 && (b->buf_type != DB_BUF_TYPE_DB |
|
748 || (DB_PTR2PG_NUM(db_csize-1, db_pagesize) |
|
749 != b->pg_num))) { |
|
750 if (b->lock_cnt != 0) |
|
751 rel_db_states(); |
|
752 buf_munmap(0, b); |
|
753 } |
|
754 } |
|
755 |
|
756 /* Keep the clock running if we did any work. This tends to |
|
757 * avoid stalls caused by colliding with the FreeBSD syncer */ |
|
758 if (worked) { |
|
759 gettimeofday(&db_time, 0); |
|
760 db_urgent_need_flush_secs = (db_time.tv_sec |
|
761 + DB_URGENT_NEED_FLUSH_SECS); |
|
762 } else { |
|
763 db_urgent_need_flush_secs = 0; |
|
764 } |
|
765 } |
|
766 |
|
767 /* assume there will be nothing more to do */ |
|
768 db_need_flush_secs = db_urgent_need_flush_secs; |
|
769 |
|
770 #ifdef USE_MAP_NOSYNC |
|
771 /* if we are using mmap(MAP_NOSYNC), then there are no bits |
|
772 * set in any b->flush words except that of the recent |
|
773 * DB_BUF_FG_USE_WRITE extensions of the file. It is best to let |
|
774 * those blocks stay in RAM until the whole buffer is flushed and |
|
775 * switched to mmap above */ |
|
776 if (!db_use_write) |
|
777 return; |
|
778 #endif |
|
779 |
|
780 b = next_b; |
|
781 part = next_part; |
|
782 all_parts = DB_PARTS_PER_FLUSH; |
|
783 for (buf_num = DIM(db_bufs); buf_num >= 0; --buf_num) { |
|
784 if (b > LAST(db_bufs)) { |
|
785 part = 0; |
|
786 b = db_bufs; |
|
787 } |
|
788 if (!b->flush |
|
789 || part >= DB_BUF_NUM_PARTS |
|
790 || b->buf_type == DB_BUF_TYPE_FREE) { |
|
791 part = 0; |
|
792 ++b; |
|
793 continue; |
|
794 } |
|
795 |
|
796 while (part < DB_BUF_NUM_PARTS) { |
|
797 if (b->flush & PART2BIT(part)) { |
|
798 buf_flush_part(0, b, part, 1); |
|
799 if (--all_parts == 0) { |
|
800 next_part = part+1; |
|
801 next_b = b; |
|
802 db_need_flush_secs = (db_time.tv_sec |
|
803 + DB_NEED_FLUSH_SECS); |
|
804 return; |
|
805 } |
|
806 if (!b->flush) |
|
807 part = DB_BUF_NUM_PARTS; |
|
808 } |
|
809 ++part; |
|
810 } |
|
811 } |
|
812 } |
|
813 |
|
814 |
|
815 |
|
816 /* occassionally flush an unlocked data buffer for dbclean |
|
817 * dbclean mostly changes only the current record, so get started |
|
818 * writing the data to avoid stalling the system at the end. */ |
|
819 u_char |
|
820 db_flush_db(DCC_EMSG emsg UATTRIB) |
|
821 { |
|
822 #ifdef USE_MAP_NOSYNC |
|
823 DB_BUF *b; |
|
824 int limit; |
|
825 int pg_num; |
|
826 |
|
827 /* Gently push the new hash table to disk. |
|
828 * The disk image will never be accurate. This only allocates space. |
|
829 * Do not do this for systems that lack mmap(NOSYNC) such as Linux |
|
830 * that thrash themselves as the hash table is being built. A |
|
831 * long pause when the database is closed is not as bad as spending |
|
832 * hours building the hash table. */ |
|
833 while (hash_clear_pg_num < db_hash_fsize/db_hash_page_len) { |
|
834 pg_num = hash_clear_pg_num++; |
|
835 for (b = buf_oldest; b != 0; b = b->newer) { |
|
836 if (b->pg_num != pg_num |
|
837 || b->buf_type != DB_BUF_TYPE_HASH) |
|
838 continue; |
|
839 if (!(b->flags & DB_BUF_FG_EXTENSION)) |
|
840 break; |
|
841 if (b->lock_cnt != 0) |
|
842 rel_db_states(); |
|
843 return buf_munmap(emsg, b); |
|
844 } |
|
845 |
|
846 /* look for the next page if this one has already |
|
847 * been flushed */ |
|
848 } |
|
849 |
|
850 /* flush some ordinary buffers */ |
|
851 limit = 2; |
|
852 for (b = buf_oldest; b != 0; b = b->newer) { |
|
853 if (b->flush_urgent == 0 |
|
854 || b->buf_type == DB_BUF_TYPE_FREE |
|
855 || b->lock_cnt != 0) |
|
856 continue; |
|
857 if (!buf_flush(emsg, b, 1)) |
|
858 return 0; |
|
859 if (--limit <= 0) |
|
860 return 1; |
|
861 } |
|
862 #endif |
|
863 return 1; |
|
864 } |
|
865 |
|
866 |
|
867 |
|
868 /* mark part of a buffer dirty |
|
869 * "Urgent" changes are flushed by a timer. Ordinary changes |
|
870 * are often ignored and expected to be rebuilt if the system crashes. |
|
871 * That the hash table is deleted as the system is shut down while the |
|
872 * database must be flushed from the system's buffer cache is a reason |
|
873 * to keep the disk image of the database good. */ |
|
874 void |
|
875 db_set_flush(DB_STATE *st, u_char urgent, u_int len) |
|
876 { |
|
877 DB_BUF *b; |
|
878 DB_BUF_FM bit, new_bits, old_bits; |
|
879 char *buf_base, *part_end, *start, *end; |
|
880 u_int part, i; |
|
881 |
|
882 /* nothing to do if the kernel is handling it |
|
883 * or if we are letting this change be reconstructed by dbclean */ |
|
884 b = st->b; |
|
885 if (!(b->flags & DB_BUF_FG_USE_WRITE)) { |
|
886 #ifdef USE_MAP_NOSYNC |
|
887 if (!urgent) |
|
888 #endif |
|
889 return; |
|
890 } |
|
891 |
|
892 start = st->d.c; |
|
893 buf_base = b->buf.c; |
|
894 |
|
895 /* Increase to even pages in the hope that the file system might |
|
896 * be able to page-flip. This might at least avoid reading into the |
|
897 * buffer cache to honor a write(). Besides, Solaris' msync() handles |
|
898 * only even pages. */ |
|
899 i = (start - buf_base) % system_pagesize; |
|
900 start -= i; |
|
901 len += i; |
|
902 len = ((len + system_pagesize-1) / system_pagesize) * system_pagesize; |
|
903 |
|
904 end = start + len; |
|
905 if (end > buf_base+db_pagesize) |
|
906 dcc_logbad(EX_SOFTWARE, "inflated dirty buffer size"); |
|
907 |
|
908 part = (start - buf_base) / db_pagesize_part; |
|
909 part_end = buf_base + part * db_pagesize_part; |
|
910 bit = PART2BIT(part); |
|
911 new_bits = 0; |
|
912 old_bits = b->flush | b->flush_urgent; |
|
913 do { |
|
914 part_end += db_pagesize_part; |
|
915 if (part_end > end) |
|
916 part_end = end; |
|
917 |
|
918 if (!(old_bits & bit)) { |
|
919 b->ranges[part].lo = start; |
|
920 b->ranges[part].hi = part_end; |
|
921 } else { |
|
922 if (b->ranges[part].lo > start) |
|
923 b->ranges[part].lo = start; |
|
924 if (b->ranges[part].hi < part_end) |
|
925 b->ranges[part].hi = part_end; |
|
926 } |
|
927 new_bits |= bit; |
|
928 |
|
929 start = part_end; |
|
930 bit <<= 1; |
|
931 ++part; |
|
932 } while (part_end < end); |
|
933 |
|
934 if (urgent) { |
|
935 b->flush_urgent |= new_bits; |
|
936 if (!db_urgent_need_flush_secs) { |
|
937 db_urgent_need_flush_secs = (db_time.tv_sec |
|
938 + DB_URGENT_NEED_FLUSH_SECS); |
|
939 if (db_need_flush_secs == 0) |
|
940 db_need_flush_secs = db_urgent_need_flush_secs; |
|
941 } |
|
942 } else { |
|
943 b->flush |= new_bits; |
|
944 if (db_need_flush_secs == 0 |
|
945 || db_need_flush_secs > db_time.tv_sec+DB_NEED_FLUSH_SECS) |
|
946 db_need_flush_secs = db_time.tv_sec+DB_NEED_FLUSH_SECS; |
|
947 } |
|
948 } |
|
949 |
|
950 |
|
951 |
|
952 /* Shut down the database, including flushing and releasing all |
|
953 * mmap()'ed buffers |
|
954 * Do nothing to the files for mode=-1 because the file is new and garbage |
|
955 * or the caller is a fork of the server shedding memory. */ |
|
956 u_char |
|
957 db_close(int mode) /* -1=invalidate, 0=dirty, 1=clean */ |
|
958 { |
|
959 u_char result; |
|
960 |
|
961 if (mode >= 0) { |
|
962 /* flush the data and then release and flush the dirty flags */ |
|
963 result = make_clean(mode == 0 ? 0 : 1); |
|
964 if (!db_unload(0, 0)) |
|
965 result = 0; |
|
966 } else { |
|
967 db_invalidate = 1; |
|
968 rel_db_states(); |
|
969 result = (db_unload(0, 0) > 0); |
|
970 } |
|
971 |
|
972 /* Close the hash table first because the server is often |
|
973 * waiting for the lock on the main file held by dbclean. |
|
974 * Destroy the hash table if it is bad */ |
|
975 if (db_hash_fd >= 0) { |
|
976 if (0 > close(db_hash_fd)) { |
|
977 dcc_pemsg(EX_IOERR, 0, "close(%s): %s", |
|
978 db_hash_nm, ERROR_STR()); |
|
979 result = 0; |
|
980 } |
|
981 db_hash_fd = -1; |
|
982 } |
|
983 if (db_fd >= 0) { |
|
984 if (0 > close(db_fd)) { |
|
985 dcc_pemsg(EX_IOERR, 0, "close(%s): %s", |
|
986 db_nm, ERROR_STR()); |
|
987 result = 0; |
|
988 } |
|
989 db_fd = -1; |
|
990 } |
|
991 |
|
992 db_locked.tv_sec = 0; |
|
993 return result; |
|
994 } |
|
995 |
|
996 |
|
997 |
|
998 /* Delete the hash table if the system is being rebooted and we |
|
999 * don't trust the file system to get all of the hash table. This might |
|
1000 * make system shut down faster */ |
|
1001 void |
|
1002 db_stop(void) |
|
1003 { |
|
1004 if (db_hash_fd < 0 |
|
1005 || !DB_IS_LOCKED() |
|
1006 || !db_not_synced |
|
1007 || db_hash_nm[0] == '\0') |
|
1008 return; |
|
1009 |
|
1010 if (0 > unlink(db_hash_nm) |
|
1011 && errno != ENOENT) |
|
1012 dcc_error_msg("unlink(%s): %s", db_hash_nm, ERROR_STR()); |
|
1013 } |
|
1014 |
|
1015 |
|
1016 |
|
1017 /* see if (another) instance of dbclean is already running */ |
|
1018 static int dbclean_lock_fd = -1; |
|
1019 static DCC_PATH dbclean_lock_nm; |
|
1020 |
|
1021 u_char /* 1=no (other) dbclean */ |
|
1022 lock_dbclean(DCC_EMSG emsg, const char *cur_db_nm) |
|
1023 { |
|
1024 char pid[32]; |
|
1025 int i; |
|
1026 |
|
1027 fnm2rel_good(dbclean_lock_nm, cur_db_nm, DB_LOCK_SUFFIX); |
|
1028 dbclean_lock_fd = dcc_lock_open(emsg, dbclean_lock_nm, |
|
1029 O_RDWR|O_CREAT, |
|
1030 DCC_LOCK_OPEN_NOWAIT, |
|
1031 DCC_LOCK_ALL_FILE, 0); |
|
1032 if (dbclean_lock_fd < 0) |
|
1033 return 0; |
|
1034 |
|
1035 i = 1+snprintf(pid, sizeof(pid), "%ld\n", (long)getpid()); |
|
1036 if (i != write(dbclean_lock_fd, pid, i)) |
|
1037 dcc_logbad(EX_IOERR, "write(%s, pid): %s", |
|
1038 dbclean_lock_nm, ERROR_STR()); |
|
1039 |
|
1040 /* Let anyone write in it in case we are running as root |
|
1041 * and get interrupted by a crash or gdb. A stray, stale |
|
1042 * private lock file cannot be locked */ |
|
1043 chmod(dbclean_lock_nm, 0666); |
|
1044 |
|
1045 return 1; |
|
1046 } |
|
1047 |
|
1048 |
|
1049 |
|
1050 void |
|
1051 unlock_dbclean(void) |
|
1052 { |
|
1053 if (dbclean_lock_fd >= 0) { |
|
1054 if (0 > unlink(dbclean_lock_nm)) |
|
1055 dcc_error_msg("unlink(%s): %s", |
|
1056 dbclean_lock_nm, ERROR_STR()); |
|
1057 close(dbclean_lock_fd); |
|
1058 dbclean_lock_fd = -1; |
|
1059 } |
|
1060 } |
|
1061 |
|
1062 |
|
1063 |
|
1064 /* This locking does only multiple-readers/single-writer */ |
|
1065 int /* -1=failed, 0=was not locked, 1=was */ |
|
1066 db_lock(void) |
|
1067 { |
|
1068 struct stat sb; |
|
1069 |
|
1070 if (DB_IS_LOCKED()) |
|
1071 return 1; |
|
1072 |
|
1073 if (!dcc_exlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, 15*60, "", db_nm)) |
|
1074 return -1; |
|
1075 if (0 > fstat(db_fd, &sb)) { |
|
1076 db_failure(__LINE__,__FILE__, EX_IOERR, 0, |
|
1077 "stat(%s): %s", db_nm, ERROR_STR()); |
|
1078 return -1; |
|
1079 } |
|
1080 if (db_fsize != (DB_HOFF)sb.st_size) { |
|
1081 if (db_fsize > (DB_HOFF)sb.st_size || !db_rdonly) { |
|
1082 db_failure(__LINE__,__FILE__, EX_IOERR, 0, |
|
1083 "%s size changed from "OFF_HPAT |
|
1084 " to "OFF_HPAT, |
|
1085 db_nm, db_fsize, sb.st_size); |
|
1086 return -1; |
|
1087 } |
|
1088 db_fsize = sb.st_size; |
|
1089 } |
|
1090 |
|
1091 db_locked = db_time; |
|
1092 return 0; |
|
1093 } |
|
1094 |
|
1095 |
|
1096 |
|
1097 /* flush buffers to make the disk reasonably correct but not perfect |
|
1098 * This does not compensate for a lack of coherent mmap() in the system. |
|
1099 * |
|
1100 * It leaves the disk only as accurate as implied by db_not_synced. |
|
1101 * This flushes buffers marked either urgent and ordinarily dirty. |
|
1102 * If db_not_synced is set, then non-urgent dirty bits are not set. */ |
|
1103 static u_char |
|
1104 make_clean_flush(void) |
|
1105 { |
|
1106 DB_BUF *b; |
|
1107 u_char result; |
|
1108 |
|
1109 result = 1; |
|
1110 for (b = buf_oldest; b != 0; b = b->newer) { |
|
1111 if (b->buf_type == DB_BUF_TYPE_FREE) |
|
1112 continue; |
|
1113 if (!buf_flush(0, b, 0)) |
|
1114 result = 0; |
|
1115 } |
|
1116 |
|
1117 return result; |
|
1118 } |
|
1119 |
|
1120 |
|
1121 |
|
1122 /* push all of our database changes to the disk and try to clear the dirty bit |
|
1123 * do not necessarily unmap anything */ |
|
1124 u_char |
|
1125 make_clean(u_char clean) /* 0=leave hash marked dirty, */ |
|
1126 { /* 1=marked clean, 2=fsync */ |
|
1127 u_char need_db_fsync, result; |
|
1128 struct stat sb; |
|
1129 |
|
1130 rel_db_states(); |
|
1131 |
|
1132 result = 1; |
|
1133 |
|
1134 /* quit if we are giving up */ |
|
1135 if (db_invalidate) |
|
1136 return result; |
|
1137 |
|
1138 if (db_failed_line) |
|
1139 clean = 0; |
|
1140 |
|
1141 if (!make_clean_flush()) { |
|
1142 clean = 0; |
|
1143 result = 0; |
|
1144 } |
|
1145 |
|
1146 /* simply unlock all of the buffers if they are clean |
|
1147 * and do not need to (or cannot) be synchronized with fsync() */ |
|
1148 if (!db_dirty |
|
1149 && (clean < 2 /* not asked to synchronize */ |
|
1150 || db_rdonly /* cannot be synchronized */ |
|
1151 || !db_not_synced)) /* does not need to be synchronized */ |
|
1152 return result; |
|
1153 |
|
1154 need_db_fsync = (clean == 2); |
|
1155 |
|
1156 /* Send the meta-data to disk so that other processes |
|
1157 * such as dbclean can find the new length of the file |
|
1158 * on Solaris. Otherwise the file looks broken because |
|
1159 * its contained data length can be larger than its |
|
1160 * inode size on Solaris. */ |
|
1161 if (!need_db_fsync && clean) { |
|
1162 if (0 > fstat(db_fd, &sb)) { |
|
1163 dcc_error_msg("make_clean fstat(%s): %s", |
|
1164 db_nm, ERROR_STR()); |
|
1165 need_db_fsync = 1; |
|
1166 } else if (db_fsize != (DB_HOFF)sb.st_size) { |
|
1167 if (db_debug) |
|
1168 quiet_trace_msg("need fsync() because db_fsize=" |
|
1169 OFF_HPAT" but stat="OFF_HPAT, |
|
1170 db_fsize, sb.st_size); |
|
1171 need_db_fsync = 1; |
|
1172 } |
|
1173 } |
|
1174 |
|
1175 if (need_db_fsync |
|
1176 && 0 > fsync(db_fd)) { |
|
1177 dcc_error_msg("make_clean fsync(%s): %s", |
|
1178 db_nm, ERROR_STR()); |
|
1179 clean = 0; |
|
1180 result = 0; |
|
1181 } |
|
1182 |
|
1183 if (clean && !map_hash_ctl(0, 0)) { |
|
1184 clean = 0; |
|
1185 result = 0; |
|
1186 } |
|
1187 if (clean == 2) { |
|
1188 if (0 > fsync(db_hash_fd)) { |
|
1189 dcc_error_msg("make_clean fsync(%s): %s", |
|
1190 db_hash_nm, ERROR_STR()); |
|
1191 clean = 0; |
|
1192 result = 0; |
|
1193 } else { |
|
1194 db_not_synced = 0; |
|
1195 db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_NOSYNC; |
|
1196 SET_FLUSH_HCTL(1); |
|
1197 if (!make_clean_flush()) { |
|
1198 clean = 0; |
|
1199 result = 0; |
|
1200 } |
|
1201 } |
|
1202 } |
|
1203 |
|
1204 /* Clean the dirty flag in the hash table. |
|
1205 * With luck, this will reach the disk after everything else. */ |
|
1206 if (clean |
|
1207 && !(db_sts.hash_ctl.d.vals->s.flags & HASH_CTL_FG_CLEAN)) { |
|
1208 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_CLEAN; |
|
1209 SET_FLUSH_HCTL(0); |
|
1210 } |
|
1211 |
|
1212 /* finally flush the flag in the hash table */ |
|
1213 rel_db_states(); |
|
1214 if (!make_clean_flush()) |
|
1215 result = 0; |
|
1216 |
|
1217 if (clean) |
|
1218 db_dirty = 0; |
|
1219 return result; |
|
1220 } |
|
1221 |
|
1222 |
|
1223 |
|
1224 /* mark the hash file and so the database dirty */ |
|
1225 static u_char |
|
1226 db_make_dirty(DCC_EMSG emsg) |
|
1227 { |
|
1228 if (db_dirty) |
|
1229 return 1; |
|
1230 |
|
1231 if (!DB_IS_LOCKED()) { |
|
1232 dcc_logbad(EX_SOFTWARE, "dirtying unlocked database"); |
|
1233 return 0; |
|
1234 } |
|
1235 |
|
1236 if (db_rdonly) |
|
1237 dcc_logbad(EX_SOFTWARE, "dirtying read-only database"); |
|
1238 |
|
1239 if (!map_hash_ctl(emsg, 0)) |
|
1240 return 0; |
|
1241 db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_CLEAN; |
|
1242 #ifdef USE_MAP_NOSYNC |
|
1243 if (!(db_sts.hash_ctl.d.vals->s.flags & HASH_CTL_FG_NOSYNC)) { |
|
1244 db_sts.hash_ctl.d.vals->s.synced = time(0); |
|
1245 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_NOSYNC; |
|
1246 } |
|
1247 db_not_synced = 1; |
|
1248 #endif |
|
1249 |
|
1250 SET_FLUSH_HCTL(1); |
|
1251 if (!buf_flush_part(emsg, db_sts.hash_ctl.b, 0, 0)) |
|
1252 return 0; |
|
1253 |
|
1254 db_dirty = 1; |
|
1255 return 1; |
|
1256 } |
|
1257 |
|
1258 |
|
1259 |
|
1260 /* (start to) unlock the database */ |
|
1261 u_char /* 0=failed, 1=at least started */ |
|
1262 db_unlock(void) |
|
1263 { |
|
1264 DB_BUF *b; |
|
1265 int result; |
|
1266 |
|
1267 if (!DB_IS_LOCKED()) |
|
1268 return 1; |
|
1269 |
|
1270 /* Clear the dirty bit in the database because we may not |
|
1271 * be able to lock the database later to clear the dirty bit. |
|
1272 * Dbclean needs to see the dirty bit clear. */ |
|
1273 result = make_clean(1); |
|
1274 |
|
1275 /* Release DB_BUF_FG_USE_WRITE buffers because they are not consistent |
|
1276 * among processes |
|
1277 * Release everything if dccd wants stay out of RAM in favor |
|
1278 * of dbclean */ |
|
1279 for (b = buf_oldest; b != 0; b = b->newer) { |
|
1280 if (b->buf_type == DB_BUF_TYPE_FREE) |
|
1281 continue; |
|
1282 if (db_minimum_map |
|
1283 || (b->flags & DB_BUF_FG_USE_WRITE)) |
|
1284 buf_munmap(0, b); |
|
1285 } |
|
1286 |
|
1287 if (!dcc_unlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm)) |
|
1288 result = 0; |
|
1289 db_locked.tv_sec = 0; |
|
1290 return result; |
|
1291 } |
|
1292 |
|
1293 |
|
1294 |
|
1295 static const char * |
|
1296 mbyte2str(DB_PTR val) |
|
1297 { |
|
1298 return db_ptr2str(val*1024*1024); |
|
1299 } |
|
1300 |
|
1301 |
|
1302 |
|
1303 #if defined(RLIMIT_AS) || defined(RLIMIT_RSS) || defined(RLIMIT_FSIZE) |
|
1304 static DB_PTR |
|
1305 use_rlimit(int resource, const char *rlimit_nm, |
|
1306 DB_PTR cur_val, DB_PTR min_val, const char *val_nm) |
|
1307 { |
|
1308 struct rlimit limit_old, limit_new; |
|
1309 DB_PTR new_val; |
|
1310 |
|
1311 if (0 > getrlimit(resource, &limit_old)) { |
|
1312 dcc_error_msg("getrlimit(%s): %s", rlimit_nm, ERROR_STR()); |
|
1313 return cur_val; |
|
1314 } |
|
1315 |
|
1316 if ((DB_PTR)limit_old.rlim_cur >= cur_val+DB_PAD_MBYTE*1024) |
|
1317 return cur_val; |
|
1318 |
|
1319 /* assume we are root and try to increase the hard limit */ |
|
1320 if ((DB_PTR)limit_new.rlim_max < cur_val+DB_PAD_BYTE) { |
|
1321 limit_new = limit_old; |
|
1322 limit_new.rlim_max = cur_val+DB_PAD_BYTE; |
|
1323 if (0 > setrlimit(resource, &limit_new)) { |
|
1324 if (db_debug) |
|
1325 quiet_trace_msg("setrlimit(%s, " |
|
1326 L_DPAT","L_DPAT"): %s", |
|
1327 rlimit_nm, |
|
1328 (DB_PTR)limit_new.rlim_cur, |
|
1329 (DB_PTR)limit_new.rlim_max, |
|
1330 ERROR_STR()); |
|
1331 } else { |
|
1332 if (0 > getrlimit(resource, &limit_old)) { |
|
1333 dcc_error_msg("getrlimit(%s): %s", |
|
1334 rlimit_nm, ERROR_STR()); |
|
1335 return cur_val; |
|
1336 } |
|
1337 } |
|
1338 } |
|
1339 |
|
1340 limit_new = limit_old; |
|
1341 if ((DB_PTR)limit_new.rlim_max < min_val+DB_PAD_BYTE) |
|
1342 limit_new.rlim_max = min_val + DB_PAD_BYTE; |
|
1343 limit_new.rlim_cur = limit_new.rlim_max; |
|
1344 if ((DB_PTR)limit_new.rlim_cur > cur_val+DB_PAD_BYTE) |
|
1345 limit_new.rlim_cur = cur_val+DB_PAD_BYTE; |
|
1346 if (0 > setrlimit(resource, &limit_new)) { |
|
1347 dcc_error_msg("setrlimit(%s, "L_DPAT","L_DPAT"): %s", |
|
1348 rlimit_nm, |
|
1349 (DB_PTR)limit_new.rlim_cur, |
|
1350 (DB_PTR)limit_new.rlim_max, |
|
1351 ERROR_STR()); |
|
1352 new_val = limit_old.rlim_cur - DB_PAD_BYTE; |
|
1353 if (new_val < min_val) |
|
1354 new_val = min_val; |
|
1355 } else { |
|
1356 if (limit_old.rlim_cur < limit_new.rlim_cur |
|
1357 && db_debug) |
|
1358 quiet_trace_msg("increased %s from %s to %s", |
|
1359 rlimit_nm, |
|
1360 db_ptr2str(limit_old.rlim_cur), |
|
1361 #ifdef RLIM_INFINITY |
|
1362 (limit_new.rlim_cur == RLIM_INFINITY) |
|
1363 ? "infinity" : |
|
1364 #endif |
|
1365 db_ptr2str(limit_new.rlim_cur)); |
|
1366 new_val = limit_new.rlim_cur - DB_PAD_BYTE; |
|
1367 } |
|
1368 |
|
1369 if (cur_val > new_val) { |
|
1370 quiet_trace_msg("%s reduced %s from %s to %s", |
|
1371 rlimit_nm, val_nm, |
|
1372 db_ptr2str(cur_val), |
|
1373 db_ptr2str(new_val)); |
|
1374 return new_val; |
|
1375 } |
|
1376 |
|
1377 return cur_val; |
|
1378 } |
|
1379 #endif |
|
1380 |
|
1381 |
|
1382 |
|
1383 static void |
|
1384 get_db_max_rss(void) |
|
1385 { |
|
1386 DB_PTR old_val, new_val, db_min_mbyte, db_min_byte, db_max_mbyte; |
|
1387 int physmem_str_len; |
|
1388 DB_PTR physmem; |
|
1389 |
|
1390 /* use default maximum if maximum is bogus or unset by ./configure */ |
|
1391 db_max_mbyte = MAX_MAX_DB_MBYTE; |
|
1392 #if DB_MAX_MBYTE != 0 |
|
1393 db_max_mbyte = DB_MAX_MBYTE; |
|
1394 if (db_max_mbyte < DB_MIN_MIN_MBYTE |
|
1395 || db_max_mbyte > MAX_MAX_DB_MBYTE) { |
|
1396 quiet_trace_msg("ignore bad ./configure --with-max-db-mem=%d", |
|
1397 DB_MAX_MBYTE); |
|
1398 db_max_mbyte = MAX_MAX_DB_MBYTE; |
|
1399 } else if (db_debug) { |
|
1400 quiet_trace_msg("DB max=%s" |
|
1401 " from ./configure --with-max-db-mem=%d", |
|
1402 mbyte2str(db_max_mbyte), DB_MAX_MBYTE); |
|
1403 } |
|
1404 #endif |
|
1405 #ifndef HAVE_BIG_FILES |
|
1406 /* we need big off_t for files larger than 2 GBytes */ |
|
1407 if (db_max_mbyte > DB_MAX_2G_MBYTE) { |
|
1408 old_val = db_max_mbyte; |
|
1409 db_max_mbyte= DB_MAX_2G_MBYTE; |
|
1410 if (db_debug) |
|
1411 quiet_trace_msg("32-bit off_t reduced DB max from %s" |
|
1412 " to %s", |
|
1413 mbyte2str(old_val), |
|
1414 mbyte2str(db_max_mbyte)); |
|
1415 } |
|
1416 #endif |
|
1417 |
|
1418 /* use default if ./configure --with-db-memory=MB is bogus or unset */ |
|
1419 #if DB_MIN_MBYTE == 0 |
|
1420 db_min_mbyte = 64; |
|
1421 #else |
|
1422 db_min_mbyte = DB_MIN_MBYTE; |
|
1423 if (db_min_mbyte < DB_MIN_MIN_MBYTE) { |
|
1424 quiet_trace_msg("ignore bad ./configure --with-db-memory=%d", |
|
1425 DB_MIN_MBYTE); |
|
1426 db_min_mbyte = DB_DEF_MIN_MBYTE; |
|
1427 } else if (db_min_mbyte > db_max_mbyte) { |
|
1428 quiet_trace_msg("ignore ./configure --with-db-memory=%d" |
|
1429 " > DB max=%s", |
|
1430 mbyte2str(db_max_mbyte)); |
|
1431 db_min_mbyte = DB_DEF_MIN_MBYTE; |
|
1432 } else if (db_debug) { |
|
1433 quiet_trace_msg("use ./configure --with-db-memory=%d", |
|
1434 DB_MIN_MBYTE); |
|
1435 } |
|
1436 #endif |
|
1437 |
|
1438 db_min_byte = db_min_mbyte * (1024*1024); |
|
1439 db_max_byte = db_max_mbyte * (1024*1024); |
|
1440 |
|
1441 #ifdef RLIMIT_FSIZE |
|
1442 db_max_mbyte = (use_rlimit(RLIMIT_FSIZE, "RLIMIT_FSIZE", |
|
1443 db_max_byte, db_min_byte, "DB max") |
|
1444 / (1024*1024)); |
|
1445 db_max_byte = db_max_mbyte * (1024*1024); |
|
1446 #endif /* RLIMIT_FSIZE */ |
|
1447 |
|
1448 physmem = 0; |
|
1449 #ifdef HAVE_PHYSMEM_TOTAL |
|
1450 /* maybe someday physmem_total() will be widely available */ |
|
1451 physmem = physmem_total(); |
|
1452 if (db_debug) |
|
1453 quiet_trace_msg("real=%s from physmem_total()", |
|
1454 db_ptr2str(physmem)); |
|
1455 #endif |
|
1456 #ifdef HAVE__SC_PHYS_PAGES |
|
1457 if (physmem == 0) { |
|
1458 long pages, sizepage; |
|
1459 |
|
1460 if ((pages = sysconf(_SC_PHYS_PAGES)) == -1) { |
|
1461 dcc_error_msg("sysconf(_SC_PHYS_PAGES): %s", |
|
1462 ERROR_STR()); |
|
1463 } else if ((sizepage = sysconf(_SC_PAGESIZE)) == -1) { |
|
1464 dcc_error_msg("sysconf(_SC_PAGESIZE): %s", |
|
1465 ERROR_STR()); |
|
1466 } else { |
|
1467 physmem = (DB_PTR)pages * (DB_PTR)sizepage; |
|
1468 if (db_debug) |
|
1469 quiet_trace_msg("real=%s" |
|
1470 " from sysconf(_SC_PHYS_PAGES)" |
|
1471 " and sysconf(_SC_PAGESIZE)", |
|
1472 db_ptr2str(physmem)); |
|
1473 } |
|
1474 } |
|
1475 #endif |
|
1476 #ifdef HAVE_HW_PHYSMEM |
|
1477 if (physmem == 0) { |
|
1478 int mib[2] = {CTL_HW, HW_PHYSMEM}; |
|
1479 unsigned long int hw_physmem; |
|
1480 size_t hw_physmem_len; |
|
1481 |
|
1482 hw_physmem_len = sizeof(hw_physmem); |
|
1483 if (0 > sysctl(mib, 2, &hw_physmem, &hw_physmem_len, 0,0)) { |
|
1484 dcc_error_msg("sysctl(HW_PHYSMEM): %s", ERROR_STR()); |
|
1485 } else { |
|
1486 physmem = hw_physmem; |
|
1487 if (db_debug) |
|
1488 quiet_trace_msg("real=%s from sysctl(mib)", |
|
1489 db_ptr2str(physmem)); |
|
1490 } |
|
1491 } |
|
1492 #endif |
|
1493 #ifdef HAVE_PSTAT_GETSTATIC |
|
1494 if (physmem == 0) { |
|
1495 struct pst_static pss; |
|
1496 |
|
1497 if (0 > pstat_getstatic(&pss, sizeof pss, 1, 0)) { |
|
1498 dcc_error_msg("pstat_getstatic(): %s", ERROR_STR()); |
|
1499 } else if (pss.physical_memory <= 0 |
|
1500 || pss.page_size < 0) { |
|
1501 dcc_error_msg("pstat_getstatic() says" |
|
1502 " physical_memory=%d page_size=%d", |
|
1503 pss.physical_memory, pss.page_size); |
|
1504 } else { |
|
1505 physmem = ((DB_PTR)pss.physical_memory |
|
1506 * (DB_PTR)pss.page_size); |
|
1507 if (db_debug) |
|
1508 quiet_trace_msg("real=%s" |
|
1509 " from pstat_getstatic()", |
|
1510 db_ptr2str(physmem)); |
|
1511 } |
|
1512 } |
|
1513 #endif |
|
1514 |
|
1515 physmem_str_len = 0; |
|
1516 db_physmem_str[0] = '\0'; |
|
1517 if (physmem == 0) { |
|
1518 quiet_trace_msg("failed to get real memory size"); |
|
1519 } else { |
|
1520 physmem_str_len = snprintf(db_physmem_str, |
|
1521 sizeof(db_physmem_str), |
|
1522 " real=%s", |
|
1523 db_ptr2str(physmem)); |
|
1524 |
|
1525 /* Try to use half of physical memory |
|
1526 * if there is less than 2 GByte |
|
1527 * all except 512 MByte between 2 GByte and 4 GByte, |
|
1528 * and all but 1 GByte if there is more than 4 GByte */ |
|
1529 if (physmem/(1024*1024) < 2*1024) |
|
1530 new_val = physmem/2; |
|
1531 else if (physmem/(1024*1024) <= 4*1024) |
|
1532 new_val = physmem - 512*(1024*1024); |
|
1533 else |
|
1534 new_val = physmem - 1024*(1024*1024); |
|
1535 if (new_val < db_min_byte) { |
|
1536 if (db_debug) |
|
1537 quiet_trace_msg("real=%s would give DB max=%s" |
|
1538 " smaller than minimum %s", |
|
1539 db_ptr2str(physmem), |
|
1540 db_ptr2str(new_val), |
|
1541 mbyte2str(db_min_mbyte)); |
|
1542 new_val = db_min_byte; |
|
1543 } |
|
1544 if (db_max_byte > new_val) { |
|
1545 old_val = db_max_byte; |
|
1546 db_max_mbyte = new_val / (1024*1024); |
|
1547 db_max_byte = db_max_mbyte * (1024*1024); |
|
1548 if (db_debug) |
|
1549 quiet_trace_msg("real=%s reduced DB max" |
|
1550 " from %s to %s", |
|
1551 db_ptr2str(physmem), |
|
1552 db_ptr2str(old_val), |
|
1553 db_ptr2str(db_max_byte)); |
|
1554 } |
|
1555 } |
|
1556 |
|
1557 /* window need not be larger than the limit on the database size */ |
|
1558 db_max_rss = db_max_byte; |
|
1559 |
|
1560 #ifdef RLIMIT_AS |
|
1561 /* try not to break process virtual memory limit, |
|
1562 * but only if it is not ridiculously tiny */ |
|
1563 db_max_rss = use_rlimit(RLIMIT_AS, "RLIMIT_AS", |
|
1564 db_max_rss, db_min_byte, "max RSS"); |
|
1565 #endif /* RLIMIT_AS */ |
|
1566 #ifdef RLIMIT_RSS |
|
1567 /* try not to break process resident memory limit |
|
1568 * but only if it is not ridiculously tiny */ |
|
1569 db_max_rss = use_rlimit(RLIMIT_RSS, "RLIMIT_RSS", |
|
1570 db_max_rss, db_min_byte, "max RSS"); |
|
1571 #endif /* RLIMIT_RSS */ |
|
1572 |
|
1573 /* limit the database to the window size */ |
|
1574 if (db_max_byte > db_max_rss) { |
|
1575 old_val = db_max_mbyte; |
|
1576 db_max_mbyte = db_max_rss / (1024*1024); |
|
1577 db_max_byte = db_max_mbyte * (1024*1024); |
|
1578 if (db_debug) |
|
1579 quiet_trace_msg("max RSS reduced DB max from %s to %s", |
|
1580 mbyte2str(old_val), |
|
1581 mbyte2str(db_max_mbyte)); |
|
1582 } |
|
1583 |
|
1584 #ifndef HAVE_64BIT_PTR |
|
1585 /* We cannot use a window larger than 2 GBytes on most systems without |
|
1586 * big pointers. Among the things that break is trying to mmap() more |
|
1587 * than 2 GBytes. So limit the window on 32-bit systems to a little |
|
1588 * less than 2 GBytes and the database to not much more */ |
|
1589 if (db_max_rss > DB_MAX_2G_MBYTE*(1024*1024)) { |
|
1590 if (db_debug) |
|
1591 quiet_trace_msg("32-bit pointers reduced max RSS" |
|
1592 " from %s to %s", |
|
1593 db_ptr2str(db_max_rss), |
|
1594 mbyte2str(DB_MAX_2G_MBYTE)); |
|
1595 db_max_rss = DB_MAX_2G_MBYTE*(1024*1024); |
|
1596 new_val = db_max_rss+db_max_rss/4; |
|
1597 if (db_max_byte > new_val) { |
|
1598 old_val = db_max_mbyte; |
|
1599 db_max_mbyte = new_val / (1024*1024); |
|
1600 db_max_byte = db_max_mbyte * (1024*1024); |
|
1601 if (db_debug) |
|
1602 quiet_trace_msg("32-bit pointers reduced DB max" |
|
1603 " from %s to %s", |
|
1604 mbyte2str(old_val), |
|
1605 mbyte2str(db_max_mbyte)); |
|
1606 } |
|
1607 } |
|
1608 #endif |
|
1609 |
|
1610 snprintf(&db_physmem_str[physmem_str_len], |
|
1611 sizeof(db_physmem_str) - physmem_str_len, |
|
1612 " max RSS=%s DB max=%s", |
|
1613 db_ptr2str(db_max_rss), mbyte2str(db_max_mbyte)); |
|
1614 } |
|
1615 |
|
1616 |
|
1617 |
|
1618 /* Pick a buffer size that will hold an integral number of DB hash |
|
1619 * table entries and is a multiple of system's page size. |
|
1620 * The entire hash table should reside in memory |
|
1621 * if the system has enough memory. */ |
|
1622 u_int |
|
1623 db_get_pagesize(u_int old_pagesize, /* 0 or required page size */ |
|
1624 u_int tgt_pagesize) /* 0 or target page size */ |
|
1625 { |
|
1626 u_int min_pagesize, max_pagesize; |
|
1627 |
|
1628 /* Ask the operating system only once so we don't get differing |
|
1629 * answers and so compute a varying page size. |
|
1630 * Some systems can't keep their stories straight. */ |
|
1631 if (db_max_rss == 0) |
|
1632 get_db_max_rss(); |
|
1633 |
|
1634 /* Compute the least common multiple of the system page and |
|
1635 * the DB hash table entry size. |
|
1636 * This will give us the smallest page size that we can use. */ |
|
1637 system_pagesize = getpagesize(); |
|
1638 min_pagesize = lcm(system_pagesize, sizeof(HASH_ENTRY)); |
|
1639 |
|
1640 /* The kludge to speed conversion of database addresses to page numbers |
|
1641 * and offsets on 32-bit systems depends on the page size being |
|
1642 * a multiple of 256 */ |
|
1643 if ((min_pagesize % (1<<DB_PTR_SHIFT)) != 0) |
|
1644 dcc_logbad(EX_SOFTWARE, "page size not a multiple of 256"); |
|
1645 |
|
1646 /* The DB buffer or page size must also be a multiple of the |
|
1647 * the end-of-page padding used in the main database file. */ |
|
1648 if (sizeof(DB_RCD) % DB_RCD_HDR_LEN != 0) |
|
1649 dcc_logbad(EX_SOFTWARE, |
|
1650 "DB padding size %d" |
|
1651 " is not a divisor of DB entry size %d", |
|
1652 DB_RCD_HDR_LEN, ISZ(DB_RCD)); |
|
1653 if (DB_RCD_LEN_MAX % DB_RCD_HDR_LEN != 0) |
|
1654 dcc_logbad(EX_SOFTWARE, |
|
1655 "DB record not a multiple of header size"); |
|
1656 min_pagesize = lcm(min_pagesize, DB_RCD_HDR_LEN); |
|
1657 |
|
1658 /* Use the old buffer size if available so we are not confused |
|
1659 * by padding at the ends of the old pages. |
|
1660 * Fail if it is impossible. This should cause dbclean to |
|
1661 * rebuild the database. */ |
|
1662 if (old_pagesize != 0) { |
|
1663 if ((old_pagesize % min_pagesize) != 0) |
|
1664 return 0; |
|
1665 /* adjust the number of buffers to fit our window size */ |
|
1666 db_buf_total = db_max_rss / old_pagesize; |
|
1667 if (db_buf_total < (int)DB_BUF_MIN) |
|
1668 return 0; |
|
1669 if (db_buf_total > DB_BUF_MAX) |
|
1670 db_buf_total = DB_BUF_MAX; |
|
1671 return old_pagesize; |
|
1672 } |
|
1673 |
|
1674 db_buf_total = DB_BUF_MAX; |
|
1675 max_pagesize = db_max_rss / db_buf_total; |
|
1676 max_pagesize -= max_pagesize % min_pagesize; |
|
1677 |
|
1678 /* If we have a target page size, try to use it instead of the |
|
1679 * maximum page size allowed by the resident set size. |
|
1680 * Normal DCC databases grow large and want pages as large as possible |
|
1681 * but greylist databases are often small. |
|
1682 * We also want a tiny page when first reading the parameters while |
|
1683 * opening. */ |
|
1684 if (tgt_pagesize != 0 && tgt_pagesize < max_pagesize) { |
|
1685 tgt_pagesize -= tgt_pagesize % min_pagesize; |
|
1686 if (tgt_pagesize < min_pagesize) |
|
1687 tgt_pagesize = min_pagesize; |
|
1688 return tgt_pagesize; |
|
1689 } else if (max_pagesize > min_pagesize) { |
|
1690 return max_pagesize; |
|
1691 } else { |
|
1692 return min_pagesize; |
|
1693 } |
|
1694 } |
|
1695 |
|
1696 |
|
1697 |
|
1698 /* (re)create the buffer pool |
|
1699 * The buffers are small blocks that point to the real mmap()'ed memory. |
|
1700 */ |
|
1701 u_char |
|
1702 db_buf_init(u_int old_pagesize, /* 0 or required page size */ |
|
1703 u_int tgt_pagesize) /* 0 or target page size */ |
|
1704 { |
|
1705 DB_BUF *b, *bprev, *bnext; |
|
1706 int i; |
|
1707 |
|
1708 |
|
1709 db_pagesize = db_get_pagesize(old_pagesize, tgt_pagesize); |
|
1710 if (db_pagesize == 0) |
|
1711 return 0; |
|
1712 |
|
1713 /* The fragments of pages must be multiples of system pages |
|
1714 * so that msync() on Solaris can be given multiples of system |
|
1715 * pages. It's also a generally good idea. */ |
|
1716 db_pagesize_part = db_pagesize/DB_BUF_NUM_PARTS; |
|
1717 db_pagesize_part = ((db_pagesize_part + system_pagesize-1) |
|
1718 / system_pagesize) * system_pagesize; |
|
1719 |
|
1720 db_page_max = db_pagesize - DB_RCD_HDR_LEN; |
|
1721 db_hash_page_len = db_pagesize/sizeof(HASH_ENTRY); |
|
1722 |
|
1723 db_max_hash_entries = (MAX_HASH_ENTRIES |
|
1724 - MAX_HASH_ENTRIES % db_hash_page_len); |
|
1725 |
|
1726 memset(db_bufs, 0, sizeof(db_bufs)); |
|
1727 b = db_bufs; |
|
1728 buf_oldest = b; |
|
1729 bprev = 0; |
|
1730 for (i = db_buf_total; --i != 0; b = bnext) { |
|
1731 bnext = b+1; |
|
1732 b->older = bprev; |
|
1733 b->newer = bnext; |
|
1734 bprev = b; |
|
1735 } |
|
1736 b->older = bprev; |
|
1737 buf_newest = b; |
|
1738 |
|
1739 memset(db_buf_hash, 0, sizeof(db_buf_hash)); |
|
1740 |
|
1741 return 1; |
|
1742 } |
|
1743 |
|
1744 |
|
1745 |
|
1746 static u_char |
|
1747 make_new_hash(DCC_EMSG emsg, DB_HADDR new_hash_len) |
|
1748 { |
|
1749 struct stat sb; |
|
1750 HASH_ENTRY *hash; |
|
1751 DB_HADDR next_haddr, cur_haddr, prev_haddr; |
|
1752 u_int pagenum; |
|
1753 |
|
1754 if (getuid() == 0) { |
|
1755 /* if we are running as root, |
|
1756 * don't change the owner of the database */ |
|
1757 if (0 > fstat(db_fd, &sb)) { |
|
1758 dcc_pemsg(EX_IOERR, emsg, "fstat(%s): %s", |
|
1759 db_nm, ERROR_STR()); |
|
1760 return 0; |
|
1761 } |
|
1762 if (0 > fchown(db_hash_fd, sb.st_uid, sb.st_gid)) { |
|
1763 dcc_pemsg(EX_IOERR, emsg, "fchown(%s,%d,%d): %s", |
|
1764 db_hash_nm, (int)sb.st_uid, (int)sb.st_gid, |
|
1765 ERROR_STR()); |
|
1766 return 0; |
|
1767 } |
|
1768 } |
|
1769 |
|
1770 if (new_hash_len < MIN_HASH_ENTRIES) |
|
1771 new_hash_len = MIN_HASH_ENTRIES; |
|
1772 |
|
1773 /* Increase the requested hash table size to a multiple of the database |
|
1774 * page size. The page size is chosen to be a multiple of the size of |
|
1775 * a single hash table entry. */ |
|
1776 db_hash_fsize = (((DB_HOFF)new_hash_len)*sizeof(HASH_ENTRY) |
|
1777 + db_pagesize-1); |
|
1778 db_hash_fsize -= db_hash_fsize % db_pagesize; |
|
1779 new_hash_len = db_hash_fsize / sizeof(HASH_ENTRY); |
|
1780 |
|
1781 if (new_hash_len > db_max_hash_entries) |
|
1782 new_hash_len = db_max_hash_entries; |
|
1783 |
|
1784 /* create the empty hash table file */ |
|
1785 rel_db_states(); |
|
1786 if (!db_unload(emsg, 0)) |
|
1787 return 0; |
|
1788 if (0 > ftruncate(db_hash_fd, 0)) { |
|
1789 dcc_pemsg(EX_IOERR, emsg, "truncate(%s,"L_HPAT"): %s", |
|
1790 db_hash_nm, db_csize, ERROR_STR()); |
|
1791 return 0; |
|
1792 } |
|
1793 |
|
1794 db_hash_len = new_hash_len; |
|
1795 db_hash_used_stored_hash = db_hash_used = DB_HADDR_BASE; |
|
1796 db_hash_divisor = get_db_hash_divisor(db_hash_len); |
|
1797 |
|
1798 /* Clear new hash file by linking its entries into the free list */ |
|
1799 /* map and clear the first page */ |
|
1800 if (!map_hash_ctl(emsg, 1)) |
|
1801 return 0; |
|
1802 |
|
1803 /* create the header */ |
|
1804 strcpy(db_sts.hash_ctl.d.vals->s.magic, HASH_MAGIC_STR); |
|
1805 db_sts.hash_ctl.d.vals->s.free_fwd = DB_HADDR_BASE; |
|
1806 db_sts.hash_ctl.d.vals->s.free_bak = db_hash_len-1; |
|
1807 db_sts.hash_ctl.d.vals->s.len = db_hash_len; |
|
1808 db_sts.hash_ctl.d.vals->s.divisor = db_hash_divisor; |
|
1809 db_sts.hash_ctl.d.vals->s.used = DB_HADDR_BASE; |
|
1810 db_sts.hash_ctl.d.vals->s.synced = time(0); |
|
1811 db_dirty = 1; |
|
1812 #ifdef USE_MAP_NOSYNC |
|
1813 db_sts.hash_ctl.d.vals->s.synced = time(0); |
|
1814 db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_NOSYNC; |
|
1815 db_not_synced = 1; |
|
1816 #endif |
|
1817 |
|
1818 /* Link the hash table entries in the first and following pages. |
|
1819 * The page size is chosen to be a multiple of the size of a |
|
1820 * single hash table entry. */ |
|
1821 prev_haddr = FREE_HADDR_END; |
|
1822 cur_haddr = DB_HADDR_BASE; |
|
1823 next_haddr = cur_haddr+1; |
|
1824 hash = &db_sts.hash_ctl.d.vals->h[DB_HADDR_BASE]; |
|
1825 pagenum = 0; |
|
1826 for (;;) { |
|
1827 do { |
|
1828 DB_HADDR_CP(hash->bak, prev_haddr); |
|
1829 if (next_haddr == db_hash_len) |
|
1830 DB_HADDR_CP(hash->fwd, FREE_HADDR_END); |
|
1831 else |
|
1832 DB_HADDR_CP(hash->fwd, next_haddr); |
|
1833 ++hash; |
|
1834 prev_haddr = cur_haddr; |
|
1835 cur_haddr = next_haddr++; |
|
1836 } while (cur_haddr % db_hash_page_len != 0); |
|
1837 |
|
1838 if (++pagenum >= db_hash_fsize/db_pagesize) |
|
1839 break; |
|
1840 |
|
1841 if (!map_hash(emsg, cur_haddr, &db_sts.free, 1)) |
|
1842 return 0; |
|
1843 db_sts.free.b->flush_urgent = (DB_BUF_FM)-1; |
|
1844 hash = db_sts.free.d.h; |
|
1845 } |
|
1846 |
|
1847 hash_clear_pg_num = 0; |
|
1848 |
|
1849 return 1; |
|
1850 } |
|
1851 |
|
1852 |
|
1853 |
|
1854 static u_char |
|
1855 check_old_hash(DCC_EMSG emsg) |
|
1856 { |
|
1857 static const u_char magic[sizeof(((HASH_CTL*)0)->s.magic) |
|
1858 ] = HASH_MAGIC_STR; |
|
1859 const HASH_CTL *vals; |
|
1860 struct stat sb; |
|
1861 u_char old_db; |
|
1862 |
|
1863 /* check the size of the existing hash file */ |
|
1864 if (0 > fstat(db_hash_fd, &sb)) { |
|
1865 dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", |
|
1866 db_hash_nm, ERROR_STR()); |
|
1867 return 0; |
|
1868 } |
|
1869 db_hash_fsize = sb.st_size; |
|
1870 if ((db_hash_fsize % sizeof(HASH_ENTRY)) != 0) { |
|
1871 dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_DPAT"," |
|
1872 " not a multiple of %d", |
|
1873 db_hash_nm, db_hash_fsize, |
|
1874 ISZ(HASH_ENTRY)); |
|
1875 return 0; |
|
1876 } |
|
1877 |
|
1878 db_hash_len = db_hash_fsize/sizeof(HASH_ENTRY); |
|
1879 if (db_hash_len < MIN_HASH_ENTRIES) { |
|
1880 dcc_pemsg(EX_DATAERR, emsg, |
|
1881 "%s has too few records, "OFF_DPAT" bytes", |
|
1882 db_hash_nm, db_hash_fsize); |
|
1883 return 0; |
|
1884 } |
|
1885 |
|
1886 /* check the magic number */ |
|
1887 if (!map_hash_ctl(emsg, 0)) |
|
1888 return 0; |
|
1889 vals = db_sts.hash_ctl.d.vals; |
|
1890 if (memcmp(vals->s.magic, &magic, sizeof(magic))) { |
|
1891 dcc_pemsg(EX_DATAERR, emsg, |
|
1892 "%s has the wrong magic \"%.*s\"", |
|
1893 db_hash_nm, ISZ(HASH_ENTRY), vals->s.magic); |
|
1894 return 0; |
|
1895 } |
|
1896 |
|
1897 if (!(vals->s.flags & HASH_CTL_FG_CLEAN)) { |
|
1898 dcc_pemsg(EX_DATAERR, emsg, "%s was not closed cleanly", |
|
1899 db_hash_nm); |
|
1900 return 0; |
|
1901 } |
|
1902 if (vals->s.flags & HASH_CTL_FG_NOSYNC) { |
|
1903 #ifdef HAVE_BOOTTIME |
|
1904 int mib[2] = {CTL_KERN, KERN_BOOTTIME}; |
|
1905 size_t boottime_len; |
|
1906 #endif |
|
1907 struct timeval boottime; |
|
1908 |
|
1909 boottime.tv_sec = 0x7fffffff; |
|
1910 #ifdef HAVE_BOOTTIME |
|
1911 boottime_len = sizeof(boottime); |
|
1912 if (0 > sysctl(mib, 2, &boottime, &boottime_len, 0, 0)) { |
|
1913 dcc_error_msg("sysctl(KERN_BOOTTIME): %s", ERROR_STR()); |
|
1914 } |
|
1915 #endif |
|
1916 if (vals->s.synced <= boottime.tv_sec) { |
|
1917 dcc_pemsg(EX_DATAERR, emsg, "%s was not synchronized;" |
|
1918 " synced=%d boottime=%d", |
|
1919 db_hash_nm, |
|
1920 (int)vals->s.synced, (int)boottime.tv_sec); |
|
1921 return 0; |
|
1922 } |
|
1923 db_not_synced = 1; |
|
1924 } |
|
1925 |
|
1926 if (DB_HADDR_INVALID(vals->s.free_fwd) |
|
1927 && (vals->s.free_fwd != FREE_HADDR_END |
|
1928 || vals->s.free_fwd != vals->s.free_bak)) { |
|
1929 dcc_pemsg(EX_DATAERR, emsg, |
|
1930 "%s has a broken free list head of %#x", |
|
1931 db_hash_nm, vals->s.free_fwd); |
|
1932 return 0; |
|
1933 } |
|
1934 if (DB_HADDR_INVALID(vals->s.free_bak) |
|
1935 && (vals->s.free_bak != FREE_HADDR_END |
|
1936 || vals->s.free_fwd != vals->s.free_bak)) { |
|
1937 dcc_pemsg(EX_DATAERR, emsg, |
|
1938 "%s has a broken free list tail of %#x", |
|
1939 db_hash_nm, vals->s.free_bak); |
|
1940 return 0; |
|
1941 } |
|
1942 |
|
1943 if (db_hash_len != vals->s.len) { |
|
1944 dcc_pemsg(EX_DATAERR, emsg, |
|
1945 "%s has %d entries but claims %d", |
|
1946 db_hash_nm, db_hash_len, |
|
1947 vals->s.len); |
|
1948 return 0; |
|
1949 } |
|
1950 |
|
1951 db_hash_divisor = vals->s.divisor; |
|
1952 if (db_hash_divisor < MIN_HASH_DIVISOR |
|
1953 || db_hash_divisor >= db_hash_len) { |
|
1954 dcc_pemsg(EX_DATAERR, emsg, "%s has hash divisor %d", |
|
1955 db_hash_nm, db_hash_len); |
|
1956 return 0; |
|
1957 } |
|
1958 |
|
1959 db_hash_used_stored_hash = db_hash_used = vals->s.used; |
|
1960 if (db_hash_used < DB_HADDR_BASE) { |
|
1961 dcc_pemsg(EX_DATAERR, emsg, |
|
1962 "%s contains impossible %u entries", |
|
1963 db_hash_nm, HADDR2LEN(db_hash_used)); |
|
1964 return 0; |
|
1965 } |
|
1966 if (db_hash_used >= db_hash_len) { |
|
1967 if (db_hash_used > db_hash_len) |
|
1968 dcc_pemsg(EX_DATAERR, emsg, |
|
1969 "%s contains only %u entries but %u used", |
|
1970 db_hash_nm, |
|
1971 HADDR2LEN(db_hash_len), |
|
1972 HADDR2LEN(db_hash_used)); |
|
1973 else |
|
1974 dcc_pemsg(EX_DATAERR, emsg, |
|
1975 "%s is filled with %u entries", |
|
1976 db_hash_nm, |
|
1977 HADDR2LEN(db_hash_len)); |
|
1978 return 0; |
|
1979 } |
|
1980 |
|
1981 /* old databases lack the growth values */ |
|
1982 old_db = 0; |
|
1983 if (!db_rdonly |
|
1984 && db_parms.old_db_csize == 0 |
|
1985 && db_parms.db_added == 0 |
|
1986 && db_parms.hash_used == 0 |
|
1987 && db_parms.old_hash_used == 0 |
|
1988 && db_parms.hash_added == 0 |
|
1989 && db_parms.rate_secs == 0 |
|
1990 && db_parms.last_rate_sec == 0) { |
|
1991 quiet_trace_msg("repair database growth measurements"); |
|
1992 db_parms.old_db_csize = db_parms.db_csize; |
|
1993 old_db = 1; |
|
1994 } |
|
1995 |
|
1996 if (db_hash_used != db_parms.hash_used |
|
1997 && db_hash_fsize != 0) { |
|
1998 if (old_db) { |
|
1999 quiet_trace_msg("repair db_parms.old hash_used" |
|
2000 " and old_hash_used"); |
|
2001 db_parms.old_hash_used = db_hash_used; |
|
2002 db_parms.hash_used = db_hash_used; |
|
2003 } else { |
|
2004 dcc_pemsg(EX_DATAERR, emsg, |
|
2005 "%s contains %d" |
|
2006 " entries instead of the %d that %s claims", |
|
2007 db_hash_nm, db_hash_used, |
|
2008 db_parms.hash_used, db_nm); |
|
2009 return 0; |
|
2010 } |
|
2011 } |
|
2012 |
|
2013 db_csize_stored_hash = vals->s.db_csize; |
|
2014 if (db_csize_stored_hash != db_csize |
|
2015 && db_hash_fsize != 0) { |
|
2016 dcc_pemsg(EX_DATAERR, emsg, |
|
2017 "%s contains "L_DPAT |
|
2018 " bytes instead of the "L_DPAT" that %s claims", |
|
2019 db_nm, db_csize, |
|
2020 db_csize_stored_hash, db_hash_nm); |
|
2021 return 0; |
|
2022 } |
|
2023 |
|
2024 return 1; |
|
2025 } |
|
2026 |
|
2027 |
|
2028 |
|
2029 /* open the files and generally get ready to work */ |
|
2030 u_char /* 0=failed, 1=ok */ |
|
2031 db_open(DCC_EMSG emsg, |
|
2032 int new_db_fd, /* -1 or already open db_fd */ |
|
2033 const char *new_db_nm, |
|
2034 DB_HADDR new_hash_len, /* 0 or # of entries */ |
|
2035 DB_OPEN_MODES mode) /* DB_OPEN_* */ |
|
2036 { |
|
2037 u_int cur_pagesize; |
|
2038 int hash_flags, db_open_flags; |
|
2039 struct stat db_sb; |
|
2040 # define OPEN_BAIL() {if (new_db_fd >= 0) db_fd = -1; \ |
|
2041 db_close(-1); return 0;} |
|
2042 |
|
2043 db_close(1); |
|
2044 db_failed_line = __LINE__; |
|
2045 db_failed_file = __FILE__; |
|
2046 db_not_synced = 0; |
|
2047 db_minimum_map = 0; |
|
2048 db_invalidate = 0; |
|
2049 db_dirty = 0; |
|
2050 db_locked.tv_sec = 0; |
|
2051 |
|
2052 db_rdonly = (mode & DB_OPEN_RDONLY) != 0; |
|
2053 db_use_write = (mode & DB_OPEN_MMAP_WRITE) != 0; |
|
2054 |
|
2055 memset(&db_stats, 0, sizeof(db_stats)); |
|
2056 |
|
2057 if (!new_db_nm && db_nm[0] == '\0') |
|
2058 new_db_nm = grey_on ? DB_GREY_NAME : DB_DCC_NAME; |
|
2059 if (new_db_nm) { |
|
2060 if (!fnm2rel(db_nm, new_db_nm, 0) |
|
2061 || !fnm2rel(db_hash_nm, db_nm, DB_HASH_SUFFIX)) { |
|
2062 dcc_pemsg(EX_DATAERR, emsg, |
|
2063 "invalid DB nm \"%s\"", new_db_nm); |
|
2064 return 0; |
|
2065 } |
|
2066 } |
|
2067 |
|
2068 if (new_db_fd >= 0) { |
|
2069 if (new_hash_len != 0) { |
|
2070 dcc_logbad(EX_SOFTWARE, |
|
2071 "extending db_open(%s) without locking", |
|
2072 db_nm); |
|
2073 return 0; |
|
2074 } |
|
2075 if (!db_rdonly) { |
|
2076 dcc_logbad(EX_SOFTWARE, |
|
2077 "db_open(%s) read/write without locking", |
|
2078 db_nm); |
|
2079 return 0; |
|
2080 } |
|
2081 db_open_flags = O_RDONLY; |
|
2082 hash_flags = O_RDONLY; |
|
2083 |
|
2084 db_fd = new_db_fd; |
|
2085 |
|
2086 } else { |
|
2087 db_open_flags = O_RDWR; |
|
2088 if (new_hash_len != 0) { |
|
2089 if (db_rdonly) { |
|
2090 dcc_logbad(EX_SOFTWARE, |
|
2091 "db_open(%s) creating read-only", |
|
2092 db_nm); |
|
2093 return 0; |
|
2094 } |
|
2095 hash_flags = O_RDWR | O_CREAT; |
|
2096 } else { |
|
2097 /* must open the file read/write to lock it */ |
|
2098 hash_flags = O_RDWR; |
|
2099 } |
|
2100 |
|
2101 db_fd = dcc_lock_open(emsg, db_nm, db_open_flags, |
|
2102 (mode & DB_OPEN_LOCK_NOWAIT) |
|
2103 ? DCC_LOCK_OPEN_NOWAIT |
|
2104 : 0, |
|
2105 DCC_LOCK_ALL_FILE, 0); |
|
2106 if (db_fd == -1) { |
|
2107 db_close(-1); |
|
2108 return 0; |
|
2109 } |
|
2110 } |
|
2111 gettimeofday(&db_time, 0); |
|
2112 db_locked = db_time; |
|
2113 if (0 > fstat(db_fd, &db_sb)) { |
|
2114 dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", db_nm, ERROR_STR()); |
|
2115 OPEN_BAIL(); |
|
2116 return 0; |
|
2117 } |
|
2118 db_csize = db_fsize = db_sb.st_size; |
|
2119 if (db_fsize < ISZ(DB_HDR)) { |
|
2120 dcc_pemsg(EX_IOERR, emsg, |
|
2121 "%s with %d bytes is too small to be a DCC database", |
|
2122 db_nm, (int)db_fsize); |
|
2123 OPEN_BAIL(); |
|
2124 } |
|
2125 |
|
2126 /* check the header of the database file by temporarily mapping it */ |
|
2127 db_buf_init(0, sizeof(DB_HDR)); |
|
2128 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) |
|
2129 OPEN_BAIL(); |
|
2130 |
|
2131 db_parms_stored = *db_sts.db_parms.d.parms; |
|
2132 db_parms = *db_sts.db_parms.d.parms; |
|
2133 |
|
2134 if (memcmp(db_parms.version, db_version_buf, sizeof(db_version_buf))) { |
|
2135 dcc_pemsg(EX_DATAERR, emsg, |
|
2136 "%s contains the wrong magic string \"%.*s\"", |
|
2137 db_nm, ISZ(db_parms.version), db_parms.version); |
|
2138 OPEN_BAIL(); |
|
2139 } |
|
2140 if (!(db_parms.flags & DB_PARM_FG_GREY) != !grey_on) { |
|
2141 dcc_pemsg(EX_DATAERR, emsg, |
|
2142 "%s is%s a greylist database but must%s be", |
|
2143 db_nm, |
|
2144 (db_parms.flags & DB_PARM_FG_GREY) ? "" : " not", |
|
2145 grey_on ? "" : " not"); |
|
2146 OPEN_BAIL(); |
|
2147 } |
|
2148 |
|
2149 cur_pagesize = db_parms.pagesize; |
|
2150 |
|
2151 DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_INVALID); |
|
2152 DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_FLOD_PATH); |
|
2153 set_db_tholds(db_parms.nokeep_cks); |
|
2154 |
|
2155 db_ck_fuzziness = grey_on ? grey_ck_fuzziness : dcc_ck_fuzziness; |
|
2156 |
|
2157 db_csize = db_parms.db_csize; |
|
2158 if (db_csize < sizeof(DB_HDR)) { |
|
2159 dcc_pemsg(EX_DATAERR, emsg, |
|
2160 "%s says it contains "L_DPAT" bytes" |
|
2161 " or fewer than the minimum of %d", |
|
2162 db_nm, db_csize, DB_PTR_BASE); |
|
2163 /* that is a fatal error if we are not rebuilding */ |
|
2164 if (new_hash_len != 0) |
|
2165 OPEN_BAIL(); |
|
2166 } |
|
2167 if (db_csize > db_fsize) { |
|
2168 dcc_pemsg(EX_DATAERR, emsg, |
|
2169 "%s says it contains "L_DPAT" bytes" |
|
2170 " or more than the actual size of "OFF_DPAT, |
|
2171 db_nm, db_csize, db_fsize); |
|
2172 /* that is a fatal error if we are not rebuilding */ |
|
2173 if (new_hash_len != 0) |
|
2174 OPEN_BAIL(); |
|
2175 } |
|
2176 |
|
2177 /* The buffer or page size we use must be the page size used to |
|
2178 * write the files. Try to change our size to match the file */ |
|
2179 if (cur_pagesize != db_pagesize) { |
|
2180 db_invalidate = 1; |
|
2181 rel_db_states(); |
|
2182 if (!db_unload(emsg, 0)) |
|
2183 OPEN_BAIL(); |
|
2184 db_invalidate = 0; |
|
2185 if (!db_buf_init(cur_pagesize, 0)) { |
|
2186 dcc_error_msg("%s has page size %d" |
|
2187 " incompatible with %d in %s", |
|
2188 db_nm, |
|
2189 cur_pagesize, db_get_pagesize(0, 0), |
|
2190 path2fnm(db_hash_nm)); |
|
2191 OPEN_BAIL(); |
|
2192 } |
|
2193 } |
|
2194 |
|
2195 db_csize_stored_hash = 0; |
|
2196 db_hash_len = 0; |
|
2197 db_hash_fd = open(db_hash_nm, hash_flags, 0666); |
|
2198 if (db_hash_fd < 0) { |
|
2199 dcc_pemsg(EX_IOERR, emsg, "open(%s): %s", |
|
2200 db_hash_nm, ERROR_STR()); |
|
2201 OPEN_BAIL(); |
|
2202 } |
|
2203 if (0 > fcntl(db_hash_fd, F_SETFD, FD_CLOEXEC)) { |
|
2204 dcc_pemsg(EX_IOERR, emsg, "fcntl(%s, FD_CLOEXEC): %s", |
|
2205 db_hash_nm, ERROR_STR()); |
|
2206 OPEN_BAIL(); |
|
2207 } |
|
2208 |
|
2209 if (new_hash_len != 0) { |
|
2210 if (!make_new_hash(emsg, new_hash_len)) |
|
2211 OPEN_BAIL(); |
|
2212 } else { |
|
2213 if (!check_old_hash(emsg)) |
|
2214 OPEN_BAIL(); |
|
2215 } |
|
2216 |
|
2217 if (db_fsize % db_pagesize != 0) { |
|
2218 dcc_pemsg(EX_DATAERR, emsg, |
|
2219 "%s has size "OFF_HPAT"," |
|
2220 " not a multiple of its page size of %#x", |
|
2221 db_nm, db_fsize, db_pagesize); |
|
2222 OPEN_BAIL(); |
|
2223 } |
|
2224 if (db_fsize > db_csize + db_pagesize || db_csize > db_fsize) { |
|
2225 dcc_pemsg(EX_DATAERR, emsg, |
|
2226 "%s has size "OFF_HPAT" but claims "L_HPAT, |
|
2227 db_nm, db_fsize, db_csize); |
|
2228 OPEN_BAIL(); |
|
2229 } |
|
2230 |
|
2231 #ifndef USE_MAP_NOSYNC |
|
2232 /* Use `dbclean -F` on systems without mmap(NOSYNC) but with lots of |
|
2233 * RAM. Some Linux systems otherwise take too long to run dbclean. */ |
|
2234 if (mode & DB_OPEN_MMAP_WRITE_NOSYNC) { |
|
2235 if (db_max_rss > db_fsize + db_hash_fsize) |
|
2236 db_use_write = 1; |
|
2237 if (db_debug) |
|
2238 quiet_trace_msg("db_max_rss="OFF_HPAT |
|
2239 " db_fsize+db_hash_fsize="OFF_HPAT |
|
2240 " so%s use -F", |
|
2241 db_max_rss, db_fsize+db_hash_fsize, |
|
2242 db_use_write ? "" : " do not"); |
|
2243 } |
|
2244 #endif |
|
2245 |
|
2246 db_window_size = (DB_PTR)db_pagesize * db_buf_total; |
|
2247 snprintf(db_window_size_str, sizeof(db_window_size_str), |
|
2248 "window=%s%s", |
|
2249 db_ptr2str(db_window_size), db_physmem_str); |
|
2250 rel_db_states(); |
|
2251 db_failed_line = 0; |
|
2252 |
|
2253 return 1; |
|
2254 #undef OPEN_BAIL |
|
2255 } |
|
2256 |
|
2257 |
|
2258 |
|
2259 static u_char |
|
2260 buf_munmap(DCC_EMSG emsg, DB_BUF *b) |
|
2261 { |
|
2262 u_char result; |
|
2263 |
|
2264 if (b->lock_cnt != 0) |
|
2265 dcc_logbad(EX_SOFTWARE, "unmapping locked DB buffer"); |
|
2266 |
|
2267 result = buf_flush(emsg, b, 1); |
|
2268 |
|
2269 if (db_invalidate) { |
|
2270 if (0 > DCC_MADV_FREE(b->buf.v)) |
|
2271 dcc_error_msg("madvise(FREE %s,%#x): %s", |
|
2272 buf2path(b), db_pagesize, ERROR_STR()); |
|
2273 } |
|
2274 |
|
2275 if (0 > munmap(b->buf.v, db_pagesize)) { |
|
2276 db_failure(__LINE__,__FILE__, EX_IOERR, emsg, |
|
2277 "munmap(%s,%d): %s", |
|
2278 buf2path(b), db_pagesize, ERROR_STR()); |
|
2279 result = 0; |
|
2280 } |
|
2281 b->buf.v = 0; |
|
2282 b->pg_num = -1; |
|
2283 b->buf_type = DB_BUF_TYPE_FREE; |
|
2284 |
|
2285 return result; |
|
2286 } |
|
2287 |
|
2288 |
|
2289 |
|
2290 static u_char |
|
2291 buf_mmap(DCC_EMSG emsg, DB_BUF *b, DB_PG_NUM pg_num, u_char extend) |
|
2292 { |
|
2293 int prot, flags; |
|
2294 off_t offset; |
|
2295 int fd; |
|
2296 void *p; |
|
2297 int retry; |
|
2298 u_char unloaded; |
|
2299 |
|
2300 |
|
2301 offset = (off_t)pg_num * (off_t)db_pagesize; |
|
2302 fd = buf2fd(b); |
|
2303 |
|
2304 if (extend) { |
|
2305 offset = 0; |
|
2306 #if defined(MAP_ANON)|| defined(MAP_ANONYMOUS) |
|
2307 fd = -1; |
|
2308 b->flags |= DB_BUF_FG_USE_WRITE | DB_BUF_FG_EXTENSION; |
|
2309 #ifdef MAP_ANONYMOUS |
|
2310 /* Linux redefines things and requires either MAP_ANON |
|
2311 * or MAP_PRIVATE; */ |
|
2312 flags = MAP_ANONYMOUS| MAP_PRIVATE; |
|
2313 #else |
|
2314 flags = MAP_ANON | MAP_PRIVATE; |
|
2315 #endif /* MAP_ANONYMOUS */ |
|
2316 #else /* have neither MAP_ANON nor MAP_ANONYMOUS */ |
|
2317 b->flags |= DB_BUF_FG_USE_WRITE; |
|
2318 flags = MAP_PRIVATE; |
|
2319 #endif |
|
2320 } else if (db_rdonly) { |
|
2321 flags = MAP_SHARED; |
|
2322 } else if (db_use_write && !db_minimum_map) { |
|
2323 /* write() buffers instead of letting the Solaris virtual |
|
2324 * memory system do it. Solaris will bog the system down doing |
|
2325 * nothing but flushing dirty mmap() pages |
|
2326 * We cannot use this hack in two processes simultaneously, |
|
2327 * so do not use it in dccd while dbclean is running */ |
|
2328 b->flags |= DB_BUF_FG_USE_WRITE; |
|
2329 flags = MAP_PRIVATE; |
|
2330 } else { |
|
2331 #ifdef USE_MAP_NOSYNC |
|
2332 flags = (MAP_SHARED | MAP_NOSYNC); |
|
2333 #else |
|
2334 flags = MAP_SHARED; |
|
2335 #endif |
|
2336 } |
|
2337 |
|
2338 prot = db_rdonly ? PROT_READ : (PROT_READ | PROT_WRITE); |
|
2339 for (retry = 1, unloaded = 2; unloaded > 1; ++retry) { |
|
2340 p = mmap(0, db_pagesize, prot, flags, fd, offset); |
|
2341 |
|
2342 if (p == MAP_FAILED) { |
|
2343 if (errno == EACCES |
|
2344 || errno == EBADF |
|
2345 || errno == EINVAL |
|
2346 || errno == ENODEV |
|
2347 || retry > 20) { |
|
2348 dcc_pemsg(EX_IOERR, emsg, |
|
2349 "try #%d"" mmap(%s" |
|
2350 " %#x,%#x,%#x,%d,"OFF_HPAT"): %s", |
|
2351 retry, |
|
2352 buf2path(b), |
|
2353 db_pagesize, prot, flags, fd, offset, |
|
2354 ERROR_STR()); |
|
2355 return 0; |
|
2356 } |
|
2357 dcc_error_msg("try #%d mmap(%s" |
|
2358 " %#x,%#x,%#x,%d,"OFF_HPAT"): %s", |
|
2359 retry, |
|
2360 buf2path(b), |
|
2361 db_pagesize, prot, flags, fd, offset, |
|
2362 ERROR_STR()); |
|
2363 /* #define MMAP_FAIL_DEBUG 3 */ |
|
2364 #ifdef MMAP_FAIL_DEBUG |
|
2365 } else if (((uint)random() % MMAP_FAIL_DEBUG) == 0) { |
|
2366 /* pretend mmap() failed randomly */ |
|
2367 dcc_error_msg(" test fail #%d mmap(%s,%#x,"OFF_HPAT")", |
|
2368 retry, |
|
2369 buf2path(b), db_pagesize, offset); |
|
2370 if (0 > munmap(p, db_pagesize)) |
|
2371 dcc_error_msg( "test munmap(): %s", |
|
2372 ERROR_STR()); |
|
2373 #endif |
|
2374 } else { |
|
2375 /* It worked. |
|
2376 * Say so if it was not the first attempt. */ |
|
2377 if (retry != 1) |
|
2378 dcc_error_msg("try #%d" |
|
2379 " mmap(%s,%#x,"OFF_HPAT") ok", |
|
2380 retry, |
|
2381 buf2path(b), db_pagesize, offset); |
|
2382 break; |
|
2383 } |
|
2384 |
|
2385 /* mmap() fails occassionally on some systems, |
|
2386 * so try to release something and try again */ |
|
2387 unloaded = db_unload(0, 1); |
|
2388 } |
|
2389 |
|
2390 |
|
2391 b->buf.v = p; |
|
2392 b->flush = 0; |
|
2393 b->flush_urgent = 0; |
|
2394 |
|
2395 if (extend) |
|
2396 return 1; |
|
2397 |
|
2398 /* madvise() on some systems including FreeBSD uses a lot of CPU cycles, |
|
2399 * so it should not be done unless it is likely to do significant good. |
|
2400 * Get all of our buffers if there is plenty of memory |
|
2401 * and we are not trying to stay out of the way of dbclean. */ |
|
2402 if (!db_minimum_map && db_fsize <= db_max_rss) { |
|
2403 /* The flat file would fit. If the hash table would also |
|
2404 * fit, tell the kernel to be aggressive */ |
|
2405 if (db_fsize + db_hash_fsize <= db_max_rss |
|
2406 && 0 > DCC_MADV_WILLNEED(p)) |
|
2407 dcc_error_msg("madvise(WILLNEED %s,%#x): %s", |
|
2408 buf2path(b), db_pagesize, ERROR_STR()); |
|
2409 } else { |
|
2410 if (0 > DCC_MADV_RANDOM(p)) |
|
2411 dcc_error_msg("madvise(RANDOM %s,%#x): %s", |
|
2412 buf2path(b), db_pagesize, ERROR_STR()); |
|
2413 } |
|
2414 |
|
2415 return 1; |
|
2416 } |
|
2417 |
|
2418 |
|
2419 |
|
2420 /* get a free buffer for a chunk of either the hash table or database files */ |
|
2421 static DB_BUF * |
|
2422 get_free_buf(DCC_EMSG emsg, DB_BUF **bh) |
|
2423 { |
|
2424 DB_BUF *b; |
|
2425 |
|
2426 /* Look for an unlocked buffer. |
|
2427 * We know there is one because we have more buffers than |
|
2428 * can be locked simultaneously. */ |
|
2429 b = buf_oldest; |
|
2430 for (;;) { |
|
2431 if (!b) |
|
2432 dcc_logbad(EX_SOFTWARE, "broken DB buffer MRU chain"); |
|
2433 if (!b->lock_cnt) |
|
2434 break; |
|
2435 b = b->newer; |
|
2436 } |
|
2437 |
|
2438 /* Found an unlocked buffer. |
|
2439 * Unlink it from its hash chain. */ |
|
2440 if (b->fwd) |
|
2441 b->fwd->bak = b->bak; |
|
2442 if (b->bak) |
|
2443 b->bak->fwd = b->fwd; |
|
2444 else if (b->hash) |
|
2445 *b->hash = b->fwd; |
|
2446 if (b->buf_type != DB_BUF_TYPE_FREE) { |
|
2447 if (!buf_munmap(emsg, b)) |
|
2448 return 0; |
|
2449 } |
|
2450 |
|
2451 b->flags = 0; |
|
2452 |
|
2453 /* put it on the new hash chain */ |
|
2454 b->bak = 0; |
|
2455 b->hash = bh; |
|
2456 b->fwd = *bh; |
|
2457 *bh = b; |
|
2458 if (b->fwd) |
|
2459 b->fwd->bak = b; |
|
2460 |
|
2461 return b; |
|
2462 } |
|
2463 |
|
2464 |
|
2465 |
|
2466 static DB_BUF * |
|
2467 find_buf(DCC_EMSG emsg, DB_BUF_TYPE buf_type, DB_PG_NUM pg_num) |
|
2468 { |
|
2469 DB_BUF *b, **bh; |
|
2470 |
|
2471 bh = DB_BUF_HASH(pg_num, buf_type); |
|
2472 b = *bh; |
|
2473 for (;;) { |
|
2474 if (!b) { |
|
2475 /* we ran off the end of the buffer hash chain, |
|
2476 * so get a free buffer */ |
|
2477 b = get_free_buf(emsg, bh); |
|
2478 if (!b) |
|
2479 return 0; |
|
2480 b->buf_type = buf_type; |
|
2481 b->pg_num = pg_num; |
|
2482 break; |
|
2483 } |
|
2484 if (b->buf_type == buf_type |
|
2485 && b->pg_num == pg_num) |
|
2486 break; /* found the buffer we need */ |
|
2487 |
|
2488 b = b->fwd; |
|
2489 } |
|
2490 |
|
2491 /* make the buffer newest */ |
|
2492 if (buf_newest != b) { |
|
2493 /* unlink it */ |
|
2494 b->newer->older = b->older; |
|
2495 if (b->older) |
|
2496 b->older->newer = b->newer; |
|
2497 else |
|
2498 buf_oldest = b->newer; |
|
2499 /* insert it at the head of the MRU list */ |
|
2500 b->newer = 0; |
|
2501 b->older = buf_newest; |
|
2502 buf_newest->newer = b; |
|
2503 buf_newest = b; |
|
2504 } |
|
2505 |
|
2506 return b; |
|
2507 } |
|
2508 |
|
2509 |
|
2510 |
|
2511 static DB_BUF * |
|
2512 find_st_buf(DCC_EMSG emsg, DB_BUF_TYPE buf_type, DB_STATE *st, |
|
2513 DB_PG_NUM pg_num, u_char extend) |
|
2514 { |
|
2515 DB_BUF *b; |
|
2516 |
|
2517 /* release previous buffer unless it is the right one */ |
|
2518 b = st->b; |
|
2519 if (b) { |
|
2520 if (b->pg_num == pg_num |
|
2521 && b->buf_type == buf_type) |
|
2522 return b; /* already have the target buffer */ |
|
2523 |
|
2524 st->b = 0; |
|
2525 st->d.v = 0; |
|
2526 if (--b->lock_cnt < 0) |
|
2527 dcc_logbad(EX_SOFTWARE, "bad database buffer lock"); |
|
2528 } |
|
2529 |
|
2530 /* look for the buffer */ |
|
2531 b = find_buf(emsg, buf_type, pg_num); |
|
2532 if (!b) |
|
2533 return 0; |
|
2534 |
|
2535 ++b->lock_cnt; |
|
2536 if (b->buf.v) { |
|
2537 if (extend && !(b->flags & DB_BUF_FG_USE_WRITE)) |
|
2538 dcc_logbad(EX_SOFTWARE, "extending ordinary buffer"); |
|
2539 |
|
2540 } else { |
|
2541 /* map it if it was not already known */ |
|
2542 if (!buf_mmap(emsg, b, pg_num, extend)) { |
|
2543 b->buf_type = DB_BUF_TYPE_FREE; |
|
2544 b->pg_num = -1; |
|
2545 if (--b->lock_cnt != 0) |
|
2546 dcc_logbad(EX_SOFTWARE, |
|
2547 "stolen database buffer lock %d", |
|
2548 b->lock_cnt); |
|
2549 return 0; |
|
2550 } |
|
2551 if (buf_type == DB_BUF_TYPE_DB) |
|
2552 ++db_stats.db_mmaps; |
|
2553 else if (buf_type == DB_BUF_TYPE_HASH) |
|
2554 ++db_stats.hash_mmaps; |
|
2555 } |
|
2556 |
|
2557 st->b = b; |
|
2558 st->d.v = 0; |
|
2559 return b; |
|
2560 } |
|
2561 |
|
2562 |
|
2563 |
|
2564 static u_char |
|
2565 map_hash_ctl(DCC_EMSG emsg, u_char new) |
|
2566 { |
|
2567 DB_BUF *b; |
|
2568 |
|
2569 b = find_st_buf(emsg, DB_BUF_TYPE_HASH, &db_sts.hash_ctl, 0, new); |
|
2570 if (!b) |
|
2571 return 0; |
|
2572 db_sts.hash_ctl.s.haddr = 0; |
|
2573 db_sts.hash_ctl.d.v = b->buf.v; |
|
2574 return 1; |
|
2575 } |
|
2576 |
|
2577 |
|
2578 |
|
2579 /* mmap() a hash table entry */ |
|
2580 static u_char |
|
2581 map_hash(DCC_EMSG emsg, |
|
2582 DB_HADDR haddr, /* this entry */ |
|
2583 DB_STATE *st, /* point this to the entry */ |
|
2584 u_char new) |
|
2585 { |
|
2586 DB_PG_NUM pg_num; |
|
2587 DB_PG_OFF pg_off; |
|
2588 DB_BUF *b; |
|
2589 |
|
2590 if (haddr >= db_hash_len || haddr < DB_HADDR_BASE) { |
|
2591 dcc_pemsg(EX_DATAERR, emsg, "invalid hash address %#x", |
|
2592 haddr); |
|
2593 return 0; |
|
2594 } |
|
2595 |
|
2596 pg_num = haddr / db_hash_page_len; |
|
2597 pg_off = haddr % db_hash_page_len; |
|
2598 |
|
2599 b = find_st_buf(emsg, DB_BUF_TYPE_HASH, st, pg_num, new); |
|
2600 if (!b) |
|
2601 return 0; |
|
2602 st->s.haddr = haddr; |
|
2603 st->d.h = &b->buf.h[pg_off]; |
|
2604 return 1; |
|
2605 } |
|
2606 |
|
2607 |
|
2608 |
|
2609 /* unlink a hash table entry from the free list |
|
2610 * uses db_sts.tmp */ |
|
2611 static u_char |
|
2612 unlink_free_hash(DCC_EMSG emsg, |
|
2613 DB_STATE *hash_st) /* remove this from the free list */ |
|
2614 { |
|
2615 DB_HADDR fwd, bak; |
|
2616 |
|
2617 if (!db_make_dirty(emsg)) |
|
2618 return 0; |
|
2619 |
|
2620 fwd = DB_HADDR_EX(hash_st->d.h->fwd); |
|
2621 bak = DB_HADDR_EX(hash_st->d.h->bak); |
|
2622 if (!HE_IS_FREE(hash_st->d.h) |
|
2623 || (DB_HADDR_INVALID(fwd) && fwd != FREE_HADDR_END) |
|
2624 || (DB_HADDR_INVALID(bak) && bak != FREE_HADDR_END) |
|
2625 || DB_HPTR_EX(hash_st->d.h->rcd) != DB_PTR_NULL) { |
|
2626 dcc_pemsg(EX_DATAERR, emsg, |
|
2627 "bad hash free list entry at %#x", hash_st->s.haddr); |
|
2628 return 0; |
|
2629 } |
|
2630 |
|
2631 if (fwd != FREE_HADDR_END) { |
|
2632 if (!map_hash(emsg, fwd, &db_sts.tmp, 0)) |
|
2633 return 0; |
|
2634 if (DB_HADDR_EX(db_sts.tmp.d.h->bak) != hash_st->s.haddr) { |
|
2635 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", |
|
2636 hash_st->s.haddr, fwd); |
|
2637 return 0; |
|
2638 } |
|
2639 DB_HADDR_CP(db_sts.tmp.d.h->bak, bak); |
|
2640 SET_FLUSH_HE(&db_sts.tmp); |
|
2641 } else { |
|
2642 if (!map_hash_ctl(emsg, 0)) |
|
2643 return 0; |
|
2644 if (db_sts.hash_ctl.d.vals->s.free_bak != hash_st->s.haddr) { |
|
2645 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", |
|
2646 hash_st->s.haddr, fwd); |
|
2647 return 0; |
|
2648 } |
|
2649 db_sts.hash_ctl.d.vals->s.free_bak = bak; |
|
2650 SET_FLUSH_HCTL(0); |
|
2651 } |
|
2652 |
|
2653 if (bak != FREE_HADDR_END) { |
|
2654 if (!map_hash(emsg, bak, &db_sts.tmp, 0)) |
|
2655 return 0; |
|
2656 if (DB_HADDR_EX(db_sts.tmp.d.h->fwd) != hash_st->s.haddr) { |
|
2657 dcc_pemsg(EX_DATAERR, emsg, "bad free %#x <-- free %#x", |
|
2658 bak, hash_st->s.haddr); |
|
2659 return 0; |
|
2660 } |
|
2661 DB_HADDR_CP(db_sts.tmp.d.h->fwd, fwd); |
|
2662 SET_FLUSH_HE(&db_sts.tmp); |
|
2663 } else { |
|
2664 if (!map_hash_ctl(emsg, 0)) |
|
2665 return 0; |
|
2666 if (db_sts.hash_ctl.d.vals->s.free_fwd != hash_st->s.haddr) { |
|
2667 dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", |
|
2668 hash_st->s.haddr, bak); |
|
2669 return 0; |
|
2670 } |
|
2671 db_sts.hash_ctl.d.vals->s.free_fwd = fwd; |
|
2672 SET_FLUSH_HCTL(0); |
|
2673 } |
|
2674 |
|
2675 memset(hash_st->d.h, 0, sizeof(HASH_ENTRY)); |
|
2676 SET_FLUSH_HE(hash_st); |
|
2677 |
|
2678 ++db_hash_used; |
|
2679 return 1; |
|
2680 } |
|
2681 |
|
2682 |
|
2683 |
|
2684 /* get a free hash table entry and leave db_sts.free pointing to it */ |
|
2685 static u_char /* 0=failed, 1=got it */ |
|
2686 get_free_hash(DCC_EMSG emsg, |
|
2687 DB_HADDR result) /* try near here */ |
|
2688 { |
|
2689 DB_HADDR pg_start, pg_lim, bak; |
|
2690 int i; |
|
2691 |
|
2692 if (db_hash_len <= db_hash_used) { |
|
2693 dcc_pemsg(EX_OSFILE, emsg, "no free hash table entry;" |
|
2694 " %d of %d used", db_hash_used, db_hash_len); |
|
2695 return 0; |
|
2696 } |
|
2697 |
|
2698 /* Look first near the target */ |
|
2699 if (result < DB_HADDR_BASE) |
|
2700 result = DB_HADDR_BASE; |
|
2701 pg_start = result - (result % db_hash_page_len); |
|
2702 pg_lim = pg_start + db_hash_page_len-1; |
|
2703 if (pg_lim >= db_hash_len) |
|
2704 pg_lim = db_hash_len-1; |
|
2705 for (i = 0; i < 3 && ++result < pg_lim; ++i) { |
|
2706 if (!map_hash(emsg, result, &db_sts.free, 0)) |
|
2707 return 0; |
|
2708 if (HE_IS_FREE(db_sts.free.d.h)) |
|
2709 return unlink_free_hash(emsg, &db_sts.free); |
|
2710 } |
|
2711 |
|
2712 /* check the local ad hoc free list at the end of the page */ |
|
2713 if (!map_hash(emsg, pg_lim, &db_sts.free, 0)) |
|
2714 return 0; |
|
2715 if (HE_IS_FREE(db_sts.free.d.h)) { |
|
2716 /* the ad hoc free list is not empty, |
|
2717 * so try to use the previous entry */ |
|
2718 bak = DB_HADDR_EX(db_sts.free.d.h->bak); |
|
2719 if (bak != FREE_HADDR_END) { |
|
2720 if (!map_hash(emsg, bak, &db_sts.free, 0)) |
|
2721 return 0; |
|
2722 } |
|
2723 return unlink_free_hash(emsg, &db_sts.free); |
|
2724 } |
|
2725 |
|
2726 |
|
2727 /* Give up and search from the start of the free list. This happens |
|
2728 * only when the current and all preceding pages are full. */ |
|
2729 if (!map_hash_ctl(emsg, 0)) |
|
2730 return 0; |
|
2731 result = db_sts.hash_ctl.d.vals->s.free_fwd; |
|
2732 if (DB_HADDR_INVALID(result)) { |
|
2733 dcc_pemsg(EX_DATAERR, emsg, |
|
2734 "broken hash free list head of %#x", result); |
|
2735 return 0; |
|
2736 } |
|
2737 if (!map_hash(emsg, result, &db_sts.free, 0)) |
|
2738 return 0; |
|
2739 return unlink_free_hash(emsg, &db_sts.free); |
|
2740 } |
|
2741 |
|
2742 |
|
2743 |
|
2744 /* mmap() a database entry |
|
2745 * We assume that no database entry spans buffers, |
|
2746 * and that there are enough buffers to accomodate all possible |
|
2747 * concurrent requests. */ |
|
2748 static u_char |
|
2749 map_db(DCC_EMSG emsg, |
|
2750 DB_PTR rptr, /* address of the record */ |
|
2751 u_int tgt_len, /* its length */ |
|
2752 DB_STATE *st, /* point this to the record */ |
|
2753 u_char extend) |
|
2754 { |
|
2755 DB_PG_NUM pg_num; |
|
2756 DB_PG_OFF pg_off; |
|
2757 DB_BUF *b; |
|
2758 |
|
2759 if (rptr+tgt_len > db_fsize) { |
|
2760 db_failure(__LINE__,__FILE__, EX_DATAERR, emsg, |
|
2761 "invalid database address "L_HPAT" or length %d" |
|
2762 " past db_fsize "OFF_HPAT" in %s", |
|
2763 rptr, tgt_len, db_fsize, db_nm); |
|
2764 return 0; |
|
2765 } |
|
2766 |
|
2767 /* Try to optimize this to avoid udivdi3() and umoddi3(), |
|
2768 * because they are a major time sink here on 32-bit systems */ |
|
2769 pg_num = DB_PTR2PG_NUM(rptr, db_pagesize); |
|
2770 #ifdef HAVE_64BIT_LONG |
|
2771 pg_off = rptr % db_pagesize; |
|
2772 #else |
|
2773 pg_off = rptr - pg_num*(DB_PTR)db_pagesize; |
|
2774 #endif |
|
2775 |
|
2776 /* do not go past the end of a buffer */ |
|
2777 if (tgt_len+pg_off > db_pagesize) { |
|
2778 db_failure(__LINE__,__FILE__, EX_DATAERR, emsg, |
|
2779 "invalid database address "L_HPAT |
|
2780 " or length %#x in %s", |
|
2781 rptr, tgt_len, db_nm); |
|
2782 return 0; |
|
2783 } |
|
2784 |
|
2785 b = find_st_buf(emsg, DB_BUF_TYPE_DB, st, pg_num, extend); |
|
2786 if (!b) |
|
2787 return 0; |
|
2788 st->s.rptr = rptr; |
|
2789 st->d.r = (DB_RCD *)&b->buf.c[pg_off]; |
|
2790 return 1; |
|
2791 } |
|
2792 |
|
2793 |
|
2794 |
|
2795 u_char /* 0=failed, 1=got it */ |
|
2796 db_map_rcd(DCC_EMSG emsg, |
|
2797 DB_STATE *rcd_st, /* point this to the record */ |
|
2798 DB_PTR rptr, /* that is here */ |
|
2799 int *rcd_lenp) /* put its length here */ |
|
2800 { |
|
2801 u_int rcd_len; |
|
2802 |
|
2803 if (DB_PTR_IS_BAD(rptr)) { |
|
2804 dcc_pemsg(EX_DATAERR, emsg, |
|
2805 "getting bogus record at "L_HPAT", in %s", |
|
2806 rptr, db_nm); |
|
2807 return 0; |
|
2808 } |
|
2809 |
|
2810 if (!map_db(emsg, rptr, DB_RCD_HDR_LEN, rcd_st, 0)) |
|
2811 return 0; |
|
2812 rcd_len = DB_RCD_LEN(rcd_st->d.r); |
|
2813 |
|
2814 if (&rcd_st->d.c[rcd_len] > &rcd_st->b->buf.c[db_pagesize]) { |
|
2815 dcc_pemsg(EX_DATAERR, emsg, |
|
2816 "invalid checksum count %d at "L_HPAT" in %s", |
|
2817 DB_NUM_CKS(rcd_st->d.r), rptr, db_nm); |
|
2818 return 0; |
|
2819 } |
|
2820 |
|
2821 if (rcd_lenp) |
|
2822 *rcd_lenp = rcd_len; |
|
2823 return 1; |
|
2824 } |
|
2825 |
|
2826 |
|
2827 |
|
2828 /* write the new sizes of the files into the files */ |
|
2829 static u_char |
|
2830 db_set_sizes(DCC_EMSG emsg) |
|
2831 { |
|
2832 u_char result = 1; |
|
2833 |
|
2834 if (db_hash_fd != -1 |
|
2835 && (db_csize_stored_hash != db_csize |
|
2836 || db_hash_used_stored_hash != db_hash_used)) { |
|
2837 if (!map_hash_ctl(emsg, 0)) { |
|
2838 result = 0; |
|
2839 } else { |
|
2840 db_sts.hash_ctl.d.vals->s.db_csize = db_csize; |
|
2841 db_csize_stored_hash = db_csize; |
|
2842 |
|
2843 db_sts.hash_ctl.d.vals->s.used = db_hash_used; |
|
2844 db_hash_used_stored_hash = db_hash_used; |
|
2845 |
|
2846 SET_FLUSH_HCTL(0); |
|
2847 } |
|
2848 } |
|
2849 |
|
2850 if (db_fd != -1 |
|
2851 && (db_parms_stored.db_csize != db_csize |
|
2852 || db_parms_stored.hash_used != db_hash_used)) { |
|
2853 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) { |
|
2854 result = 0; |
|
2855 } else { |
|
2856 db_sts.db_parms.d.parms->db_csize = db_csize; |
|
2857 db_parms_stored.db_csize = db_csize; |
|
2858 db_parms.db_csize = db_csize; |
|
2859 |
|
2860 db_sts.db_parms.d.parms->hash_used = db_hash_used; |
|
2861 db_parms_stored.hash_used = db_hash_used; |
|
2862 db_parms.hash_used = db_hash_used; |
|
2863 |
|
2864 db_sts.db_parms.d.parms->last_rate_sec = db_time.tv_sec; |
|
2865 db_parms_stored.last_rate_sec = db_time.tv_sec; |
|
2866 db_parms.last_rate_sec = db_time.tv_sec; |
|
2867 |
|
2868 db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); |
|
2869 } |
|
2870 } |
|
2871 |
|
2872 return result; |
|
2873 } |
|
2874 |
|
2875 |
|
2876 |
|
2877 /* write the database parameters into the magic number headers of the files */ |
|
2878 u_char |
|
2879 db_flush_parms(DCC_EMSG emsg) |
|
2880 { |
|
2881 if (!db_set_sizes(emsg)) |
|
2882 return 0; |
|
2883 |
|
2884 if (db_fd == -1) |
|
2885 return 1; |
|
2886 |
|
2887 if (memcmp(&db_parms, &db_parms_stored, sizeof(db_parms))) { |
|
2888 if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms, 0)) |
|
2889 return 0; |
|
2890 |
|
2891 db_parms.pagesize = db_pagesize; |
|
2892 |
|
2893 *db_sts.db_parms.d.parms = db_parms; |
|
2894 db_parms_stored = db_parms; |
|
2895 |
|
2896 db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); |
|
2897 } |
|
2898 |
|
2899 return 1; |
|
2900 } |
|
2901 |
|
2902 |
|
2903 |
|
2904 /* find a checksum in an already mapped record */ |
|
2905 DB_RCD_CK * /* 0=not found, 1=broken database */ |
|
2906 db_find_ck(DCC_EMSG emsg, |
|
2907 DB_RCD *rcd, |
|
2908 DB_PTR rptr, |
|
2909 DCC_CK_TYPES type) /* find this type of checksum */ |
|
2910 { |
|
2911 DB_RCD_CK *rcd_ck; |
|
2912 int i; |
|
2913 |
|
2914 rcd_ck = rcd->cks; |
|
2915 i = DB_NUM_CKS(rcd); |
|
2916 if (i >= DCC_NUM_CKS) { |
|
2917 dcc_pemsg(EX_DATAERR, emsg, |
|
2918 "impossible %d checksums in "L_HPAT" in %s", |
|
2919 i, rptr, db_nm); |
|
2920 return (DB_RCD_CK *)1; |
|
2921 } |
|
2922 |
|
2923 for (; i != 0; --i, ++rcd_ck) { |
|
2924 if (DB_CK_TYPE(rcd_ck) == type) |
|
2925 return rcd_ck; |
|
2926 } |
|
2927 |
|
2928 return 0; |
|
2929 } |
|
2930 |
|
2931 |
|
2932 |
|
2933 /* find a checksum type known to be in a record */ |
|
2934 DB_RCD_CK * /* 0=it's not there */ |
|
2935 db_map_rcd_ck(DCC_EMSG emsg, |
|
2936 DB_STATE *rcd_st, /* point this to the record */ |
|
2937 DB_PTR rptr, /* that is here */ |
|
2938 DCC_CK_TYPES type) /* find this type of checksum */ |
|
2939 { |
|
2940 DB_RCD_CK *rcd_ck; |
|
2941 |
|
2942 if (!db_map_rcd(emsg, rcd_st, rptr, 0)) |
|
2943 return 0; |
|
2944 |
|
2945 rcd_ck = db_find_ck(emsg, rcd_st->d.r, rptr, type); |
|
2946 if (rcd_ck == (DB_RCD_CK *)1) |
|
2947 return 0; |
|
2948 if (rcd_ck == 0) { |
|
2949 dcc_pemsg(EX_DATAERR, emsg, |
|
2950 "missing \"%s\" checksum in "L_HPAT" in %s", |
|
2951 DB_TYPE2STR(type), rptr, db_nm); |
|
2952 return 0; |
|
2953 } |
|
2954 return rcd_ck; |
|
2955 } |
|
2956 |
|
2957 |
|
2958 |
|
2959 static inline u_char /* 1=has a small prime factor */ |
|
2960 modulus_has_divisor(DB_HADDR len) |
|
2961 { |
|
2962 static int primes[] = { |
|
2963 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, |
|
2964 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, |
|
2965 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, |
|
2966 197, 199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, |
|
2967 271, 277, 281, 283, 293, 307, 311, 313, 317, 331, 337, 347, 349, |
|
2968 353, 359, 367, 373, 379, 383, 389, 397, 401, 409, 419, 421, 431, |
|
2969 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499}; |
|
2970 int *p; |
|
2971 |
|
2972 for (p = &primes[0]; p <= LAST(primes); ++p) { |
|
2973 if ((len % *p) == 0) |
|
2974 return 1; |
|
2975 } |
|
2976 return 0; |
|
2977 } |
|
2978 |
|
2979 |
|
2980 |
|
2981 /* Get a modulus for the hash function that is tolerably likely to be |
|
2982 * relatively prime to most inputs. The worst that happens when the modulus |
|
2983 * is composite is that large multiples of its factors will suffer more |
|
2984 * collisions. */ |
|
2985 DB_HADDR |
|
2986 get_db_hash_divisor(DB_HADDR len) |
|
2987 { |
|
2988 DB_HADDR divisor; |
|
2989 |
|
2990 divisor = len - DB_HADDR_BASE; |
|
2991 if (!(divisor & 1)) |
|
2992 --divisor; |
|
2993 while (divisor >= MIN_HASH_ENTRIES) { |
|
2994 if (modulus_has_divisor(divisor)) |
|
2995 divisor -= 2; |
|
2996 else |
|
2997 break; |
|
2998 } |
|
2999 return divisor; |
|
3000 } |
|
3001 |
|
3002 |
|
3003 |
|
3004 DB_HADDR |
|
3005 db_hash(DCC_CK_TYPES type, const DCC_SUM sum) |
|
3006 { |
|
3007 u_int64_t accum, wrap; |
|
3008 const u_int32_t *wp; |
|
3009 union { |
|
3010 DCC_SUM sum; |
|
3011 u_int32_t words[4]; |
|
3012 } buf; |
|
3013 int align; |
|
3014 DB_HADDR haddr; |
|
3015 |
|
3016 #ifdef HAVE_64BIT_PTR |
|
3017 align = (u_int64_t)sum & 3; |
|
3018 #else |
|
3019 align = (u_int)sum & 3; |
|
3020 #endif |
|
3021 if (align == 0) { |
|
3022 /* We almost always take this branch because database |
|
3023 * records contain 12+N*24 bytes. That also implies that |
|
3024 * we should not hope for better than 4 byte alignment. */ |
|
3025 wp = (u_int32_t *)sum; |
|
3026 } else { |
|
3027 memcpy(buf.sum, sum, sizeof(buf.sum)); |
|
3028 wp = buf.words; |
|
3029 } |
|
3030 |
|
3031 /* MD5 checksums are uniformly distributed, and so DCC_SUMs are |
|
3032 * directly useful for hashing except when they are server-IDs */ |
|
3033 accum = *wp++; |
|
3034 accum += *wp++; |
|
3035 wrap = accum >>32; |
|
3036 accum <<= 32; |
|
3037 accum += wrap + type; |
|
3038 accum += *wp++; |
|
3039 accum += *wp; |
|
3040 |
|
3041 haddr = accum % db_hash_divisor; |
|
3042 haddr += DB_HADDR_BASE; |
|
3043 |
|
3044 /* do not hash into the last slot of a page, because it is used to |
|
3045 * find local free slots */ |
|
3046 if (haddr % db_hash_page_len == db_hash_page_len-1) { |
|
3047 ++haddr; |
|
3048 if (haddr >= db_hash_len) |
|
3049 haddr = DB_HADDR_BASE; |
|
3050 } |
|
3051 return haddr; |
|
3052 } |
|
3053 |
|
3054 |
|
3055 |
|
3056 /* look for a checksum in the hash table |
|
3057 * return with an excuse, the home slot, or the last entry on |
|
3058 * the collision chain */ |
|
3059 DB_FOUND |
|
3060 db_lookup(DCC_EMSG emsg, DCC_CK_TYPES type, const DCC_SUM sum, |
|
3061 DB_HADDR lo, /* postpone if out of this window */ |
|
3062 DB_HADDR hi, |
|
3063 DB_STATE *hash_st, /* hash block for record or related */ |
|
3064 DB_STATE *rcd_st, /* put the record or garbage here */ |
|
3065 DB_RCD_CK **prcd_ck) /* point to cksum if found */ |
|
3066 { |
|
3067 DB_HADDR haddr, haddr_fwd, haddr_bak; |
|
3068 DB_PTR db_ptr; |
|
3069 DB_RCD_CK *found_ck; |
|
3070 DB_HADDR failsafe; |
|
3071 |
|
3072 haddr = db_hash(type, sum); |
|
3073 if (haddr < lo || haddr > hi) { |
|
3074 if (lo == 0 && hi == MAX_HASH_ENTRIES) { |
|
3075 dcc_pemsg(EX_DATAERR, emsg, |
|
3076 "out of range hash address"); |
|
3077 return DB_FOUND_SYSERR; |
|
3078 } |
|
3079 return DB_FOUND_LATER; |
|
3080 } |
|
3081 |
|
3082 if (prcd_ck) |
|
3083 *prcd_ck = 0; |
|
3084 |
|
3085 if (!map_hash(emsg, haddr, hash_st, 0)) |
|
3086 return DB_FOUND_SYSERR; |
|
3087 |
|
3088 if (HE_IS_FREE(hash_st->d.h)) |
|
3089 return DB_FOUND_EMPTY; |
|
3090 |
|
3091 if (!DB_HADDR_C_NULL(hash_st->d.h->bak)) |
|
3092 return DB_FOUND_INTRUDER; |
|
3093 |
|
3094 /* We know that the current hash table entry is in its home slot. |
|
3095 * It might be for the key or checksum we are looking for |
|
3096 * or it might be for some other checksum with the same hash value. */ |
|
3097 for (failsafe = 0; failsafe <= db_hash_len; ++failsafe) { |
|
3098 if (HE_CMP(hash_st->d.h, type, sum)) { |
|
3099 /* This hash table entry could be for our target |
|
3100 * checksum. Read the corresponding record so we |
|
3101 * decide whether we have a hash collision or we |
|
3102 * have found a record containing our target checksum. |
|
3103 * |
|
3104 * find right type of checksum in the record */ |
|
3105 db_ptr = DB_HPTR_EX(hash_st->d.h->rcd); |
|
3106 found_ck = db_map_rcd_ck(emsg, rcd_st, db_ptr, type); |
|
3107 if (!found_ck) |
|
3108 return DB_FOUND_SYSERR; |
|
3109 if (!memcmp(sum, found_ck->sum, |
|
3110 sizeof(DCC_SUM))) { |
|
3111 if (prcd_ck) |
|
3112 *prcd_ck = found_ck; |
|
3113 return DB_FOUND_IT; |
|
3114 } |
|
3115 } |
|
3116 |
|
3117 /* This DB record was a hash collision, or for a checksum |
|
3118 * other than our target. |
|
3119 * Fail if this is the end of the hash chain */ |
|
3120 haddr_fwd = DB_HADDR_EX(hash_st->d.h->fwd); |
|
3121 if (haddr_fwd == DB_HADDR_NULL) |
|
3122 return DB_FOUND_CHAIN; |
|
3123 |
|
3124 if (DB_HADDR_INVALID(haddr_fwd)) { |
|
3125 dcc_pemsg(EX_DATAERR, emsg, |
|
3126 "broken hash chain fwd-link" |
|
3127 " #%d %#x at %#x in %s", |
|
3128 failsafe, haddr_fwd, haddr, db_hash_nm); |
|
3129 return DB_FOUND_SYSERR; |
|
3130 } |
|
3131 |
|
3132 if (!map_hash(emsg, haddr_fwd, hash_st, 0)) |
|
3133 return DB_FOUND_SYSERR; |
|
3134 |
|
3135 haddr_bak = DB_HADDR_EX(hash_st->d.h->bak); |
|
3136 if (haddr_bak != haddr) { |
|
3137 dcc_pemsg(EX_DATAERR, emsg, |
|
3138 "broken hash chain links #%d," |
|
3139 " %#x-->%#x but %#x<--%#x in %s", |
|
3140 failsafe, |
|
3141 haddr, haddr_fwd, |
|
3142 haddr_bak, haddr_fwd, |
|
3143 db_hash_nm); |
|
3144 return DB_FOUND_SYSERR; |
|
3145 } |
|
3146 haddr = haddr_fwd; |
|
3147 } |
|
3148 dcc_pemsg(EX_DATAERR, emsg, "infinite hash chain at %#x in %s", |
|
3149 haddr, db_hash_nm); |
|
3150 return DB_FOUND_SYSERR; |
|
3151 } |
|
3152 |
|
3153 |
|
3154 |
|
3155 /* combine checksums */ |
|
3156 DCC_TGTS |
|
3157 db_sum_ck(DCC_TGTS prev, /* previous sum */ |
|
3158 DCC_TGTS rcd_tgts, /* from the record */ |
|
3159 DCC_CK_TYPES type UATTRIB) |
|
3160 { |
|
3161 DCC_TGTS res; |
|
3162 |
|
3163 /* This arithmetic must be commutative (after handling deleted |
|
3164 * values), because inter-server flooding causes records to appear in |
|
3165 * the database out of temporal order. |
|
3166 * |
|
3167 * DCC_TGTS_TOO_MANY can be thought of as a count of plus infinity. |
|
3168 * DCC_TGTS_OK is like minus infinity. |
|
3169 * DCC_TGTS_OK2 like half of minus infinity |
|
3170 * DCC_TGTS_TOO_MANY (plus infinity) added to DCC_TGTS_OK (minus |
|
3171 * infinity) or DCC_TGTS_OK2 yields DCC_TGTS_OK or DCC_TGTS_OK2. |
|
3172 * |
|
3173 * Reputations never reach infinity. |
|
3174 * |
|
3175 * Claims of not-spam from all clients are discarded as they arrive |
|
3176 * and before here. They can only come from the local white list |
|
3177 */ |
|
3178 #define SUM_OK_DEL(p,r) { \ |
|
3179 if (rcd_tgts == DCC_TGTS_OK || prev == DCC_TGTS_OK) \ |
|
3180 return DCC_TGTS_OK; \ |
|
3181 if (rcd_tgts == DCC_TGTS_OK2 || prev == DCC_TGTS_OK2) \ |
|
3182 return DCC_TGTS_OK2; \ |
|
3183 if (rcd_tgts == DCC_TGTS_DEL) \ |
|
3184 return prev; \ |
|
3185 } |
|
3186 |
|
3187 res = prev+rcd_tgts; |
|
3188 if (res <= DCC_TGTS_TOO_MANY) |
|
3189 return res; |
|
3190 |
|
3191 SUM_OK_DEL(prev, rcd_tgts); |
|
3192 return DCC_TGTS_TOO_MANY; |
|
3193 #undef SUM_OK_DEL |
|
3194 } |
|
3195 |
|
3196 |
|
3197 |
|
3198 /* delete all reports that contain the given checksum */ |
|
3199 static u_char /* 1=done, 0=broken database */ |
|
3200 del_ck(DCC_EMSG emsg, |
|
3201 DCC_TGTS *res, /* residual targets after deletion */ |
|
3202 const DB_RCD *new, /* delete reports older than this one */ |
|
3203 DCC_CK_TYPES type, /* delete this type of checksum */ |
|
3204 DB_RCD_CK *prev_ck, /* starting with this one */ |
|
3205 DB_STATE *prev_st) /* use this scratch state block */ |
|
3206 { |
|
3207 DB_PTR prev; |
|
3208 |
|
3209 *res = 0; |
|
3210 for (;;) { |
|
3211 /* delete reports that are older than the delete request */ |
|
3212 if (dcc_ts_newer_ts(&new->ts, &prev_st->d.r->ts) |
|
3213 && DB_RCD_ID(prev_st->d.r) != DCC_ID_WHITE) { |
|
3214 DB_TGTS_RCD_SET(prev_st->d.r, 0); |
|
3215 DB_TGTS_CK_SET(prev_ck, 0); |
|
3216 SET_FLUSH_RCD(prev_st, 1); |
|
3217 |
|
3218 } else { |
|
3219 /* sum reports that are not deleted */ |
|
3220 *res = db_sum_ck(*res, DB_TGTS_RCD(prev_st->d.r), type); |
|
3221 } |
|
3222 |
|
3223 prev = DB_PTR_EX(prev_ck->prev); |
|
3224 if (prev == DB_PTR_NULL) |
|
3225 return 1; |
|
3226 prev_ck = db_map_rcd_ck(emsg, prev_st, prev, type); |
|
3227 if (!prev_ck) |
|
3228 return 0; |
|
3229 } |
|
3230 } |
|
3231 |
|
3232 |
|
3233 |
|
3234 /* see if the new and preceding records are from the same era */ |
|
3235 static inline u_char /* 1=different eras */ |
|
3236 ck_old_spam(const DB_RCD *new, const DCC_TS* prev, DCC_CK_TYPES type) |
|
3237 { |
|
3238 struct timeval tv; |
|
3239 time_t secs; |
|
3240 DCC_TS past; |
|
3241 |
|
3242 secs = db_parms.ex_secs[type].spam; |
|
3243 if (secs > DCC_OLD_SPAM_SECS) |
|
3244 secs = DCC_OLD_SPAM_SECS; |
|
3245 dcc_ts2timeval(&tv, &new->ts); |
|
3246 dcc_timeval2ts(&past, &tv, -secs); |
|
3247 |
|
3248 return dcc_ts_older_ts(prev, &past); |
|
3249 } |
|
3250 |
|
3251 |
|
3252 |
|
3253 /* Mark reports made obsolete by a spam report |
|
3254 * A new report of spam makes sufficiently old reports obsolete. |
|
3255 * |
|
3256 * Sufficiently recent non-obsolete reports make a new report obsolete, |
|
3257 * or at least not worth spending bandwidth to flood. |
|
3258 * "Sufficiently recent" should be defined so that this server and |
|
3259 * its downstream flooding peers always have reports of the checksums |
|
3260 * in the report. So we want to keep (not make obsolete) at least one |
|
3261 * report per expiration duration. We cannot know the expiration durations |
|
3262 * of our peers, but we known DB_EXPIRE_SPAMSECS_DEF_MIN which influences |
|
3263 * DCC_OLD_SPAM_SECS. |
|
3264 * |
|
3265 * However, if another checksum in the new report was kept, then |
|
3266 * prefer marking old checksums obsolete. |
|
3267 * |
|
3268 * db_sts.rcd points to the new record |
|
3269 * db_sts.rcd2 points the the previous record and is changed |
|
3270 */ |
|
3271 static u_char /* 1=done, 0=broken database */ |
|
3272 ck_obs_spam(DCC_EMSG emsg, |
|
3273 const DB_RCD *new, |
|
3274 DCC_TGTS new_tgts, |
|
3275 DB_RCD_CK *new_ck, |
|
3276 DCC_CK_TYPES type, /* check this type of checksum */ |
|
3277 DB_RCD_CK *prev_ck, /* starting with this one */ |
|
3278 DCC_TGTS prev_ck_tgts, |
|
3279 u_char *keeping_new) /* 1=already keeping the new record */ |
|
3280 { |
|
3281 int limit; |
|
3282 DB_PTR prev; |
|
3283 |
|
3284 limit = 100; |
|
3285 for (;;) { |
|
3286 /* preceding white listed entries make new entries obsolete */ |
|
3287 if (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE) { |
|
3288 new_ck->type_fgs |= DB_CK_FG_OBS; |
|
3289 SET_FLUSH_RCD(&db_sts.rcd, 1); |
|
3290 return 1; |
|
3291 } |
|
3292 |
|
3293 if (DB_CK_OBS(prev_ck) |
|
3294 || DB_TGTS_RCD(db_sts.rcd2.d.r) == 0) { |
|
3295 /* notice duplicates and |
|
3296 * don't look forever for recent non-obsolete report */ |
|
3297 if (!memcmp(&new->ts, &db_sts.rcd2.d.r->ts, |
|
3298 sizeof(new->ts)) |
|
3299 || --limit == 0) { |
|
3300 *keeping_new = 1; |
|
3301 return 1; |
|
3302 } |
|
3303 |
|
3304 } else if (prev_ck_tgts != DCC_TGTS_TOO_MANY) { |
|
3305 /* Mark this predecessor obsolete because it |
|
3306 * was before the checksum became spam. */ |
|
3307 prev_ck->type_fgs |= DB_CK_FG_OBS; |
|
3308 SET_FLUSH_RCD(&db_sts.rcd2, 0); |
|
3309 |
|
3310 /* continue backwards to mark more non-spam |
|
3311 * predecessors obsolete */ |
|
3312 |
|
3313 } else if (!*keeping_new |
|
3314 && ck_old_spam(new, &db_sts.rcd2.d.r->ts, type)) { |
|
3315 /* We do not yet have a reason to keep the new report |
|
3316 * and this predecessor is at or after a spam report. |
|
3317 * We need the new report because it and the |
|
3318 * predecessor are from different eras. |
|
3319 * If the new report is not of spam, it will be |
|
3320 * compressed with a preceding spam report. */ |
|
3321 *keeping_new = 1; |
|
3322 /* The predecessor is not needed if the new record |
|
3323 * is for spam */ |
|
3324 if (new_tgts == DCC_TGTS_TOO_MANY) { |
|
3325 prev_ck->type_fgs |= DB_CK_FG_OBS; |
|
3326 SET_FLUSH_RCD(&db_sts.rcd2, 0); |
|
3327 } |
|
3328 /* We're finished, because all older preceding reports |
|
3329 * were marked obsolete when this older predecessor |
|
3330 * was linked. */ |
|
3331 return 1; |
|
3332 |
|
3333 } else { |
|
3334 /* this predecessor is about as recent as the new |
|
3335 * record, so the new record is unneeded noise that |
|
3336 * would bloat other servers' databases. */ |
|
3337 new_ck->type_fgs |= DB_CK_FG_OBS; |
|
3338 return 1; |
|
3339 } |
|
3340 |
|
3341 prev = DB_PTR_EX(prev_ck->prev); |
|
3342 if (prev == DB_PTR_NULL) { |
|
3343 /* the new record is a new report of spam */ |
|
3344 *keeping_new = 1; |
|
3345 return 1; |
|
3346 } |
|
3347 |
|
3348 prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, prev, type); |
|
3349 if (!prev_ck) |
|
3350 return 0; |
|
3351 prev_ck_tgts = DB_TGTS_CK(prev_ck); |
|
3352 } |
|
3353 } |
|
3354 |
|
3355 |
|
3356 |
|
3357 /* mark extra server-ID declarations obsolete |
|
3358 * |
|
3359 * db_sts.rcd points to the new record |
|
3360 * db_sts.rcd2 points the the previous record and is changed */ |
|
3361 static u_char /* 1=done, 0=broken database */ |
|
3362 srvr_id_ck(DCC_EMSG emsg, |
|
3363 const DB_RCD *new, |
|
3364 DB_RCD_CK *new_ck, |
|
3365 DB_RCD_CK *prev_ck) /* starting with this one */ |
|
3366 { |
|
3367 DB_PTR prev; |
|
3368 DCC_SRVR_ID new_id, prev_id; |
|
3369 struct timeval tv; |
|
3370 DCC_TS week_ts; |
|
3371 |
|
3372 dcc_ts2timeval(&tv, &new->ts); |
|
3373 tv.tv_usec = 0; |
|
3374 tv.tv_sec -= tv.tv_sec % (7*24*60*60); |
|
3375 dcc_timeval2ts(&week_ts, &tv, 0); |
|
3376 |
|
3377 new_id = DB_RCD_ID(new); |
|
3378 for (;;) { |
|
3379 /* mark duplicate older declarations and deletions obsolete */ |
|
3380 prev_id = DB_RCD_ID(db_sts.rcd2.d.r); |
|
3381 if (!DCC_ID_SRVR_TYPE(prev_id) |
|
3382 || DB_TGTS_RCD(db_sts.rcd2.d.r) == 0) { |
|
3383 if (dcc_ts_newer_ts(&db_sts.rcd2.d.r->ts, &new->ts)) { |
|
3384 new_ck->type_fgs |= DB_CK_FG_OBS; |
|
3385 SET_FLUSH_RCD(&db_sts.rcd, 1); |
|
3386 } else { |
|
3387 prev_ck->type_fgs |= DB_CK_FG_OBS; |
|
3388 SET_FLUSH_RCD(&db_sts.rcd2, 1); |
|
3389 } |
|
3390 return 1; |
|
3391 } |
|
3392 |
|
3393 /* Keep many identical type declarations as a kludge to ensure |
|
3394 * that rewound flooding sends type declarations early. |
|
3395 * Keep only one delcaration per week. */ |
|
3396 if (DCC_ID_SRVR_TYPE(new_id)) { |
|
3397 /* Zap the new declaration and stop if the |
|
3398 * new declaration is older than the predecessor. */ |
|
3399 if (dcc_ts_newer_ts(&db_sts.rcd2.d.r->ts, &new->ts)) { |
|
3400 new_ck->type_fgs |= DB_CK_FG_OBS; |
|
3401 SET_FLUSH_RCD(&db_sts.rcd, 1); |
|
3402 return 1; |
|
3403 } |
|
3404 |
|
3405 /* Stop when we find a duplicate type declaration |
|
3406 * of a different week */ |
|
3407 if (prev_id == new_id |
|
3408 && dcc_ts_older_ts(&db_sts.rcd2.d.r->ts, |
|
3409 &week_ts)) { |
|
3410 return 1; |
|
3411 } |
|
3412 |
|
3413 /* continue zapping preceding declarations */ |
|
3414 prev_ck->type_fgs |= DB_CK_FG_OBS; |
|
3415 SET_FLUSH_RCD(&db_sts.rcd2, 1); |
|
3416 } |
|
3417 |
|
3418 prev = DB_PTR_EX(prev_ck->prev); |
|
3419 if (prev == DB_PTR_NULL) |
|
3420 return 1; |
|
3421 |
|
3422 prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, |
|
3423 prev, DCC_CK_SRVR_ID); |
|
3424 if (!prev_ck) |
|
3425 return 0; |
|
3426 } |
|
3427 } |
|
3428 |
|
3429 |
|
3430 |
|
3431 /* Install pointers in the hash table for a record and fix the accumulated |
|
3432 * counts in the record pointed to by db_sts.rcd |
|
3433 * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp |
|
3434 * The caller must deal with db_make_dirty() */ |
|
3435 u_char /* 0=failed, 1=done */ |
|
3436 db_link_rcd(DCC_EMSG emsg, DB_HADDR lo, DB_HADDR hi) |
|
3437 { |
|
3438 DCC_TGTS res; |
|
3439 DB_RCD *rcd; |
|
3440 DB_RCD_CK *prev_ck; |
|
3441 DB_RCD_CK *rcd_ck; |
|
3442 DCC_CK_TYPES rcd_type; |
|
3443 DCC_TGTS rcd_tgts, prev_ck_tgts; |
|
3444 int ck_num; |
|
3445 DB_HADDR haddr; |
|
3446 u_char keeping_new; |
|
3447 |
|
3448 keeping_new = 0; |
|
3449 rcd = db_sts.rcd.d.r; |
|
3450 rcd_tgts = DB_TGTS_RCD_RAW(rcd); |
|
3451 rcd_ck = rcd->cks; |
|
3452 ck_num = DB_NUM_CKS(rcd); |
|
3453 if (ck_num > DIM(rcd->cks)) { |
|
3454 dcc_pemsg(EX_OSFILE, emsg, |
|
3455 "bogus checksum count %#x at "L_HPAT" in %s", |
|
3456 rcd->fgs_num_cks, db_sts.rcd.s.rptr, db_nm); |
|
3457 return 0; |
|
3458 } |
|
3459 for (; ck_num > 0; --ck_num, ++rcd_ck) { |
|
3460 rcd_type = DB_CK_TYPE(rcd_ck); |
|
3461 if (!DCC_CK_OK_DB(grey_on, rcd_type)) { |
|
3462 dcc_pemsg(EX_OSFILE, emsg, |
|
3463 "invalid checksum type %s at "L_HPAT" in %s", |
|
3464 DB_TYPE2STR(rcd_type), |
|
3465 db_sts.rcd.s.rptr, db_nm); |
|
3466 return 0; |
|
3467 } |
|
3468 |
|
3469 rcd_ck->prev = DB_PTR_CP(DB_PTR_NULL); |
|
3470 |
|
3471 /* Do not link paths or whitelist file and line numbers */ |
|
3472 if (rcd_type == DCC_CK_FLOD_PATH) { |
|
3473 DB_TGTS_CK_SET(rcd_ck, 0); |
|
3474 continue; |
|
3475 } |
|
3476 |
|
3477 /* Do not link or total some checksums unless they are |
|
3478 * whitelist entries. If they are whitelist entries, they |
|
3479 * will eventually get set to DCC_TGTS_OK or DCC_TGTS_OK2. |
|
3480 * Blacklist entries are noticed later by server-ID |
|
3481 * or do not matter DCC_TGTS_TOO_MANY. */ |
|
3482 if (DB_TEST_NOKEEP(db_parms.nokeep_cks, rcd_type) |
|
3483 && DB_RCD_ID(rcd) != DCC_ID_WHITE) { |
|
3484 DB_TGTS_CK_SET(rcd_ck, 1); |
|
3485 continue; |
|
3486 } |
|
3487 |
|
3488 res = (rcd_tgts == DCC_TGTS_DEL) ? 0 : rcd_tgts; |
|
3489 |
|
3490 switch (db_lookup(emsg, rcd_type, rcd_ck->sum, lo, hi, |
|
3491 &db_sts.hash, &db_sts.rcd2, &prev_ck)) { |
|
3492 case DB_FOUND_SYSERR: |
|
3493 return 0; |
|
3494 |
|
3495 case DB_FOUND_LATER: |
|
3496 continue; |
|
3497 |
|
3498 case DB_FOUND_IT: |
|
3499 /* We found the checksum |
|
3500 * Update the hash table to point to the new record */ |
|
3501 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); |
|
3502 SET_FLUSH_HE(&db_sts.hash); |
|
3503 /* link new record to existing record */ |
|
3504 rcd_ck->prev = DB_PTR_CP(db_sts.rcd2.s.rptr); |
|
3505 |
|
3506 /* delete predecessors to a delete request |
|
3507 * and compute the remaining sum */ |
|
3508 if (rcd_tgts == DCC_TGTS_DEL) { |
|
3509 if (!del_ck(emsg, &res, rcd, rcd_type, |
|
3510 prev_ck, &db_sts.rcd2)) |
|
3511 return 0; |
|
3512 /* delete requests are obsolete if the |
|
3513 * checksum is whitelisted */ |
|
3514 if (res == DCC_TGTS_OK |
|
3515 || res == DCC_TGTS_OK2) |
|
3516 rcd_ck->type_fgs |= DB_CK_FG_OBS; |
|
3517 break; |
|
3518 } |
|
3519 |
|
3520 /* Simple checksum with a predecessor |
|
3521 * This does not do the substantial extra work |
|
3522 * to notice all delete requests that arrived early. |
|
3523 * That problem is handled by the incoming flood |
|
3524 * duplicate report detection mechanism. |
|
3525 * We must detect precessors that were deleted because |
|
3526 * they are partial duplicates of the new record. */ |
|
3527 prev_ck_tgts = DB_TGTS_CK(prev_ck); |
|
3528 if (DB_RCD_SUMRY(rcd)) |
|
3529 res = prev_ck_tgts; |
|
3530 else |
|
3531 res = db_sum_ck(prev_ck_tgts, res, rcd_type); |
|
3532 if ((res == DCC_TGTS_OK || res == DCC_TGTS_OK2 |
|
3533 || (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE)) |
|
3534 && DB_RCD_ID(rcd) != DCC_ID_WHITE){ |
|
3535 /* obsolete whitelisted checksums */ |
|
3536 rcd_ck->type_fgs |= DB_CK_FG_OBS; |
|
3537 break; |
|
3538 } |
|
3539 if (res == DCC_TGTS_TOO_MANY) { |
|
3540 /* mark obsolete unneeded reports of spam */ |
|
3541 if (!DB_CK_OBS(rcd_ck) |
|
3542 && !ck_obs_spam(emsg, rcd, rcd_tgts, |
|
3543 rcd_ck, rcd_type, |
|
3544 prev_ck, prev_ck_tgts, |
|
3545 &keeping_new)) |
|
3546 return 0; /* (broken database) */ |
|
3547 } else if (rcd_type == DCC_CK_SRVR_ID) { |
|
3548 /* mark obsolete server-ID assertions */ |
|
3549 if (!DB_CK_OBS(rcd_ck) |
|
3550 && !srvr_id_ck(emsg, rcd, rcd_ck, prev_ck)) |
|
3551 return 0; /* (broken database) */ |
|
3552 } |
|
3553 break; |
|
3554 |
|
3555 case DB_FOUND_EMPTY: |
|
3556 /* We found an empty hash table slot. |
|
3557 * Update the slot to point to our new record |
|
3558 * after removing it from the free list, |
|
3559 * which marks it dirty. */ |
|
3560 if (!unlink_free_hash(emsg, &db_sts.hash)) |
|
3561 return 0; |
|
3562 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); |
|
3563 HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); |
|
3564 if (res >= BULK_THRESHOLD) |
|
3565 keeping_new = 1; |
|
3566 break; |
|
3567 |
|
3568 case DB_FOUND_CHAIN: |
|
3569 /* We found a hash collision, a chain of 1 or more |
|
3570 * records with the same hash value. |
|
3571 * Get a free slot, link it to the end of the |
|
3572 * existing chain, and point it to the new record. |
|
3573 * The buffer containing the free slot is marked |
|
3574 * dirty when it is removed from the free list. */ |
|
3575 if (!get_free_hash(emsg, db_sts.hash.s.haddr)) |
|
3576 return 0; |
|
3577 DB_HADDR_CP(db_sts.free.d.h->bak, db_sts.hash.s.haddr); |
|
3578 DB_HADDR_CP(db_sts.hash.d.h->fwd, db_sts.free.s.haddr); |
|
3579 DB_HPTR_CP(db_sts.free.d.h->rcd, db_sts.rcd.s.rptr); |
|
3580 HE_MERGE(db_sts.free.d.h,rcd_type, rcd_ck->sum); |
|
3581 SET_FLUSH_HE(&db_sts.hash); |
|
3582 if (res >= BULK_THRESHOLD) |
|
3583 keeping_new = 1; |
|
3584 break; |
|
3585 |
|
3586 case DB_FOUND_INTRUDER: |
|
3587 /* The home hash slot for our key contains an |
|
3588 * intruder. Move it to a new free slot */ |
|
3589 if (!get_free_hash(emsg, db_sts.hash.s.haddr)) |
|
3590 return 0; |
|
3591 *db_sts.free.d.h = *db_sts.hash.d.h; |
|
3592 /* re-link the neighbors of the intruder */ |
|
3593 haddr = DB_HADDR_EX(db_sts.free.d.h->bak); |
|
3594 if (haddr == DB_HADDR_NULL) { |
|
3595 dcc_pemsg(EX_DATAERR, emsg, |
|
3596 "bad hash chain reverse link at %#x" |
|
3597 " in %s", |
|
3598 haddr, db_hash_nm); |
|
3599 return 0; |
|
3600 } |
|
3601 if (!map_hash(emsg, haddr, &db_sts.tmp, 0)) |
|
3602 return 0; |
|
3603 DB_HADDR_CP(db_sts.tmp.d.h->fwd, db_sts.free.s.haddr); |
|
3604 SET_FLUSH_HE(&db_sts.tmp); |
|
3605 haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd); |
|
3606 if (haddr != DB_HADDR_NULL) { |
|
3607 if (!map_hash(emsg, haddr, &db_sts.tmp, 0)) |
|
3608 return 0; |
|
3609 DB_HADDR_CP(db_sts.tmp.d.h->bak, |
|
3610 db_sts.free.s.haddr); |
|
3611 SET_FLUSH_HE(&db_sts.tmp); |
|
3612 } |
|
3613 /* install the new entry in its home slot */ |
|
3614 DB_HADDR_CP(db_sts.hash.d.h->fwd, DB_HADDR_NULL); |
|
3615 DB_HADDR_CP(db_sts.hash.d.h->bak, DB_HADDR_NULL); |
|
3616 DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); |
|
3617 HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); |
|
3618 SET_FLUSH_HE(&db_sts.hash); |
|
3619 if (res >= BULK_THRESHOLD) |
|
3620 keeping_new = 1; |
|
3621 break; |
|
3622 } |
|
3623 |
|
3624 /* Fix the checksum's total in the record */ |
|
3625 DB_TGTS_CK_SET(rcd_ck, res); |
|
3626 SET_FLUSH_RCD(&db_sts.rcd, 0); |
|
3627 } |
|
3628 |
|
3629 return db_set_sizes(emsg); |
|
3630 } |
|
3631 |
|
3632 |
|
3633 |
|
3634 /* Add a record to the database and the hash table |
|
3635 * The record must be known to be valid |
|
3636 * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp |
|
3637 * On exit db_sts.rcd points to the new record in the database */ |
|
3638 DB_PTR /* 0=failed */ |
|
3639 db_add_rcd(DCC_EMSG emsg, const DB_RCD *new_rcd) |
|
3640 { |
|
3641 u_int new_rcd_len, pad_len; |
|
3642 DB_PTR new_db_csize, rcd_pos, new_page_num; |
|
3643 DB_BUF *b; |
|
3644 |
|
3645 if (!db_make_dirty(emsg)) |
|
3646 return 0; |
|
3647 |
|
3648 new_rcd_len = (sizeof(*new_rcd) |
|
3649 - sizeof(new_rcd->cks) |
|
3650 + (DB_NUM_CKS(new_rcd) * sizeof(new_rcd->cks[0]))); |
|
3651 |
|
3652 rcd_pos = db_csize; |
|
3653 new_db_csize = rcd_pos+new_rcd_len; |
|
3654 |
|
3655 new_page_num = DB_PTR2PG_NUM(new_db_csize, db_pagesize); |
|
3656 if (new_page_num == DB_PTR2PG_NUM(db_csize, db_pagesize)) { |
|
3657 if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd, 0)) |
|
3658 return 0; |
|
3659 |
|
3660 } else { |
|
3661 /* fill with zeros to get past a page boundary. */ |
|
3662 pad_len = new_page_num*db_pagesize - db_csize; |
|
3663 pad_len = (((pad_len + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN) |
|
3664 * DB_RCD_HDR_LEN); |
|
3665 if (pad_len != 0) { |
|
3666 if (!map_db(emsg, db_csize, pad_len, &db_sts.rcd, 0)) |
|
3667 return 0; |
|
3668 memset(db_sts.rcd.d.r, 0, pad_len); |
|
3669 db_set_flush(&db_sts.rcd, 1, pad_len); |
|
3670 db_csize += pad_len; |
|
3671 |
|
3672 rcd_pos = db_csize; |
|
3673 new_db_csize = rcd_pos+new_rcd_len; |
|
3674 } |
|
3675 |
|
3676 /* extend the file by writing a full page to it with write(), |
|
3677 * because extending by mmap() often does not work */ |
|
3678 db_fsize = db_csize+db_pagesize; |
|
3679 if (!map_db(emsg, rcd_pos, db_pagesize, &db_sts.rcd, 1)) |
|
3680 return 0; |
|
3681 b = db_sts.rcd.b; |
|
3682 b->flush = (DB_BUF_FM)-1; |
|
3683 |
|
3684 /* push new page to disk if dblist or dbclean is running */ |
|
3685 if (db_minimum_map) { |
|
3686 rel_db_state(&db_sts.rcd); |
|
3687 if (!buf_munmap(emsg, b)) |
|
3688 return 0; |
|
3689 if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd, 0)) |
|
3690 return 0; |
|
3691 } |
|
3692 } |
|
3693 |
|
3694 /* install the record */ |
|
3695 memcpy(db_sts.rcd.d.r, new_rcd, new_rcd_len); |
|
3696 /* Mark its buffer to be sent to the disk to keep the database |
|
3697 * as good as possible even if we crash. We don't need to worry |
|
3698 * about later changes to the hash links because dbclean will |
|
3699 * rebuild them if we crash */ |
|
3700 db_set_flush(&db_sts.rcd, 1, new_rcd_len); |
|
3701 db_csize = new_db_csize; |
|
3702 |
|
3703 /* install pointers in the hash table |
|
3704 * and update the total counts in the record */ |
|
3705 if (!db_link_rcd(emsg, 0, MAX_HASH_ENTRIES)) |
|
3706 return 0; |
|
3707 |
|
3708 ++db_stats.adds; |
|
3709 return rcd_pos; |
|
3710 } |