1 | /* Cache memory handling. |
2 | Copyright (C) 2004-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published |
7 | by the Free Software Foundation; version 2 of the License, or |
8 | (at your option) any later version. |
9 | |
10 | This program is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | GNU General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU General Public License |
16 | along with this program; if not, see <https://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <assert.h> |
19 | #include <errno.h> |
20 | #include <error.h> |
21 | #include <fcntl.h> |
22 | #include <inttypes.h> |
23 | #include <libintl.h> |
24 | #include <limits.h> |
25 | #include <obstack.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <unistd.h> |
29 | #include <sys/mman.h> |
30 | #include <sys/param.h> |
31 | |
32 | #include "dbg_log.h" |
33 | #include "nscd.h" |
34 | |
35 | |
36 | static int |
37 | sort_he (const void *p1, const void *p2) |
38 | { |
39 | struct hashentry *h1 = *(struct hashentry **) p1; |
40 | struct hashentry *h2 = *(struct hashentry **) p2; |
41 | |
42 | if (h1 < h2) |
43 | return -1; |
44 | if (h1 > h2) |
45 | return 1; |
46 | return 0; |
47 | } |
48 | |
49 | |
50 | static int |
51 | sort_he_data (const void *p1, const void *p2) |
52 | { |
53 | struct hashentry *h1 = *(struct hashentry **) p1; |
54 | struct hashentry *h2 = *(struct hashentry **) p2; |
55 | |
56 | if (h1->packet < h2->packet) |
57 | return -1; |
58 | if (h1->packet > h2->packet) |
59 | return 1; |
60 | return 0; |
61 | } |
62 | |
63 | |
64 | /* Basic definitions for the bitmap implementation. Only BITMAP_T |
65 | needs to be changed to choose a different word size. */ |
66 | #define BITMAP_T uint8_t |
67 | #define BITS (CHAR_BIT * sizeof (BITMAP_T)) |
68 | #define ALLBITS ((((BITMAP_T) 1) << BITS) - 1) |
69 | #define HIGHBIT (((BITMAP_T) 1) << (BITS - 1)) |
70 | |
71 | |
72 | static void |
73 | markrange (BITMAP_T *mark, ref_t start, size_t len) |
74 | { |
75 | /* Adjust parameters for block alignment. */ |
76 | assert ((start & BLOCK_ALIGN_M1) == 0); |
77 | start /= BLOCK_ALIGN; |
78 | len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN; |
79 | |
80 | size_t elem = start / BITS; |
81 | |
82 | if (start % BITS != 0) |
83 | { |
84 | if (start % BITS + len <= BITS) |
85 | { |
86 | /* All fits in the partial byte. */ |
87 | mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS); |
88 | return; |
89 | } |
90 | |
91 | mark[elem++] |= ALLBITS << (start % BITS); |
92 | len -= BITS - (start % BITS); |
93 | } |
94 | |
95 | while (len >= BITS) |
96 | { |
97 | mark[elem++] = ALLBITS; |
98 | len -= BITS; |
99 | } |
100 | |
101 | if (len > 0) |
102 | mark[elem] |= ALLBITS >> (BITS - len); |
103 | } |
104 | |
105 | |
106 | void |
107 | gc (struct database_dyn *db) |
108 | { |
109 | /* We need write access. */ |
110 | pthread_rwlock_wrlock (rwlock: &db->lock); |
111 | |
112 | /* And the memory handling lock. */ |
113 | pthread_mutex_lock (mutex: &db->memlock); |
114 | |
115 | /* We need an array representing the data area. All memory |
116 | allocation is BLOCK_ALIGN aligned so this is the level at which |
117 | we have to look at the memory. We use a mark and sweep algorithm |
118 | where the marks are placed in this array. */ |
119 | assert (db->head->first_free % BLOCK_ALIGN == 0); |
120 | |
121 | BITMAP_T *mark; |
122 | bool mark_use_malloc; |
123 | /* In prune_cache we are also using a dynamically allocated array. |
124 | If the array in the caller is too large we have malloc'ed it. */ |
125 | size_t stack_used = sizeof (bool) * db->head->module; |
126 | if (__glibc_unlikely (stack_used > MAX_STACK_USE)) |
127 | stack_used = 0; |
128 | size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS; |
129 | size_t memory_needed = nmark * sizeof (BITMAP_T); |
130 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
131 | { |
132 | mark = (BITMAP_T *) alloca_account (memory_needed, stack_used); |
133 | mark_use_malloc = false; |
134 | memset (s: mark, c: '\0', n: memory_needed); |
135 | } |
136 | else |
137 | { |
138 | mark = (BITMAP_T *) xcalloc (n: 1, s: memory_needed); |
139 | mark_use_malloc = true; |
140 | } |
141 | |
142 | /* Create an array which can hold pointer to all the entries in hash |
143 | entries. */ |
144 | memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *); |
145 | struct hashentry **he; |
146 | struct hashentry **he_data; |
147 | bool he_use_malloc; |
148 | if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE)) |
149 | { |
150 | he = alloca_account (memory_needed, stack_used); |
151 | he_use_malloc = false; |
152 | } |
153 | else |
154 | { |
155 | he = xmalloc (n: memory_needed); |
156 | he_use_malloc = true; |
157 | } |
158 | he_data = &he[db->head->nentries]; |
159 | |
160 | size_t cnt = 0; |
161 | for (size_t idx = 0; idx < db->head->module; ++idx) |
162 | { |
163 | ref_t *prevp = &db->head->array[idx]; |
164 | ref_t run = *prevp; |
165 | |
166 | while (run != ENDREF) |
167 | { |
168 | assert (cnt < db->head->nentries); |
169 | he[cnt] = (struct hashentry *) (db->data + run); |
170 | |
171 | he[cnt]->prevp = prevp; |
172 | prevp = &he[cnt]->next; |
173 | |
174 | /* This is the hash entry itself. */ |
175 | markrange (mark, start: run, len: sizeof (struct hashentry)); |
176 | |
177 | /* Add the information for the data itself. We do this |
178 | only for the one special entry marked with FIRST. */ |
179 | if (he[cnt]->first) |
180 | { |
181 | struct datahead *dh |
182 | = (struct datahead *) (db->data + he[cnt]->packet); |
183 | markrange (mark, start: he[cnt]->packet, len: dh->allocsize); |
184 | } |
185 | |
186 | run = he[cnt]->next; |
187 | |
188 | ++cnt; |
189 | } |
190 | } |
191 | assert (cnt == db->head->nentries); |
192 | |
193 | /* Sort the entries by the addresses of the referenced data. All |
194 | the entries pointing to the same DATAHEAD object will have the |
195 | same key. Stability of the sorting is unimportant. */ |
196 | memcpy (dest: he_data, src: he, n: cnt * sizeof (struct hashentry *)); |
197 | qsort (base: he_data, nmemb: cnt, size: sizeof (struct hashentry *), compar: sort_he_data); |
198 | |
199 | /* Sort the entries by their address. */ |
200 | qsort (base: he, nmemb: cnt, size: sizeof (struct hashentry *), compar: sort_he); |
201 | |
202 | #define obstack_chunk_alloc xmalloc |
203 | #define obstack_chunk_free free |
204 | struct obstack ob; |
205 | obstack_init (&ob); |
206 | |
207 | /* Determine the highest used address. */ |
208 | size_t high = nmark; |
209 | while (high > 0 && mark[high - 1] == 0) |
210 | --high; |
211 | |
212 | /* No memory used. */ |
213 | if (high == 0) |
214 | { |
215 | db->head->first_free = 0; |
216 | goto out; |
217 | } |
218 | |
219 | /* Determine the highest offset. */ |
220 | BITMAP_T mask = HIGHBIT; |
221 | while ((mark[high - 1] & mask) == 0) |
222 | mask >>= 1; |
223 | |
224 | /* Now we can iterate over the MARK array and find bits which are not |
225 | set. These represent memory which can be recovered. */ |
226 | size_t byte = 0; |
227 | /* Find the first gap. */ |
228 | while (byte < high && mark[byte] == ALLBITS) |
229 | ++byte; |
230 | |
231 | if (byte == high |
232 | || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0)) |
233 | /* No gap. */ |
234 | goto out; |
235 | |
236 | mask = 1; |
237 | cnt = 0; |
238 | while ((mark[byte] & mask) != 0) |
239 | { |
240 | ++cnt; |
241 | mask <<= 1; |
242 | } |
243 | ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN; |
244 | assert (off_free <= db->head->first_free); |
245 | |
246 | struct hashentry **next_hash = he; |
247 | struct hashentry **next_data = he_data; |
248 | |
249 | /* Skip over the hash entries in the first block which does not get |
250 | moved. */ |
251 | while (next_hash < &he[db->head->nentries] |
252 | && *next_hash < (struct hashentry *) (db->data + off_free)) |
253 | ++next_hash; |
254 | |
255 | while (next_data < &he_data[db->head->nentries] |
256 | && (*next_data)->packet < off_free) |
257 | ++next_data; |
258 | |
259 | |
260 | /* Now we start modifying the data. Make sure all readers of the |
261 | data are aware of this and temporarily don't use the data. */ |
262 | atomic_fetch_add_relaxed (&db->head->gc_cycle, 1); |
263 | assert ((db->head->gc_cycle & 1) == 1); |
264 | |
265 | |
266 | /* We do not perform the move operations right away since the |
267 | he_data array is not sorted by the address of the data. */ |
268 | struct moveinfo |
269 | { |
270 | void *from; |
271 | void *to; |
272 | size_t size; |
273 | struct moveinfo *next; |
274 | } *moves = NULL; |
275 | |
276 | while (byte < high) |
277 | { |
278 | /* Search for the next filled block. BYTE is the index of the |
279 | entry in MARK, MASK is the bit, and CNT is the bit number. |
280 | OFF_FILLED is the corresponding offset. */ |
281 | if ((mark[byte] & ~(mask - 1)) == 0) |
282 | { |
283 | /* No other bit set in the same element of MARK. Search in the |
284 | following memory. */ |
285 | do |
286 | ++byte; |
287 | while (byte < high && mark[byte] == 0); |
288 | |
289 | if (byte == high) |
290 | /* That was it. */ |
291 | break; |
292 | |
293 | mask = 1; |
294 | cnt = 0; |
295 | } |
296 | /* Find the exact bit. */ |
297 | while ((mark[byte] & mask) == 0) |
298 | { |
299 | ++cnt; |
300 | mask <<= 1; |
301 | } |
302 | |
303 | ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN; |
304 | assert (off_alloc <= db->head->first_free); |
305 | |
306 | /* Find the end of the used area. */ |
307 | if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1)) |
308 | { |
309 | /* All other bits set. Search the next bytes in MARK. */ |
310 | do |
311 | ++byte; |
312 | while (byte < high && mark[byte] == ALLBITS); |
313 | |
314 | mask = 1; |
315 | cnt = 0; |
316 | } |
317 | if (byte < high) |
318 | { |
319 | /* Find the exact bit. */ |
320 | while ((mark[byte] & mask) != 0) |
321 | { |
322 | ++cnt; |
323 | mask <<= 1; |
324 | } |
325 | } |
326 | |
327 | ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN; |
328 | assert (off_allocend <= db->head->first_free); |
329 | /* Now we know that we can copy the area from OFF_ALLOC to |
330 | OFF_ALLOCEND (not included) to the memory starting at |
331 | OFF_FREE. First fix up all the entries for the |
332 | displacement. */ |
333 | ref_t disp = off_alloc - off_free; |
334 | |
335 | struct moveinfo *new_move; |
336 | if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE, |
337 | 1)) |
338 | new_move = alloca_account (sizeof (*new_move), stack_used); |
339 | else |
340 | new_move = obstack_alloc (&ob, sizeof (*new_move)); |
341 | new_move->from = db->data + off_alloc; |
342 | new_move->to = db->data + off_free; |
343 | new_move->size = off_allocend - off_alloc; |
344 | /* Create a circular list to be always able to append at the end. */ |
345 | if (moves == NULL) |
346 | moves = new_move->next = new_move; |
347 | else |
348 | { |
349 | new_move->next = moves->next; |
350 | moves = moves->next = new_move; |
351 | } |
352 | |
353 | /* The following loop will prepare to move this much data. */ |
354 | off_free += off_allocend - off_alloc; |
355 | |
356 | while (off_alloc < off_allocend) |
357 | { |
358 | /* Determine whether the next entry is for a hash entry or |
359 | the data. */ |
360 | if ((struct hashentry *) (db->data + off_alloc) == *next_hash) |
361 | { |
362 | /* Just correct the forward reference. */ |
363 | *(*next_hash++)->prevp -= disp; |
364 | |
365 | off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1) |
366 | & ~BLOCK_ALIGN_M1); |
367 | } |
368 | else |
369 | { |
370 | assert (next_data < &he_data[db->head->nentries]); |
371 | assert ((*next_data)->packet == off_alloc); |
372 | |
373 | struct datahead *dh = (struct datahead *) (db->data + off_alloc); |
374 | do |
375 | { |
376 | assert ((*next_data)->key >= (*next_data)->packet); |
377 | assert ((*next_data)->key + (*next_data)->len |
378 | <= (*next_data)->packet + dh->allocsize); |
379 | |
380 | (*next_data)->packet -= disp; |
381 | (*next_data)->key -= disp; |
382 | ++next_data; |
383 | } |
384 | while (next_data < &he_data[db->head->nentries] |
385 | && (*next_data)->packet == off_alloc); |
386 | |
387 | off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1; |
388 | } |
389 | } |
390 | assert (off_alloc == off_allocend); |
391 | |
392 | assert (off_alloc <= db->head->first_free); |
393 | if (off_alloc == db->head->first_free) |
394 | /* We are done, that was the last block. */ |
395 | break; |
396 | } |
397 | assert (next_hash == &he[db->head->nentries]); |
398 | assert (next_data == &he_data[db->head->nentries]); |
399 | |
400 | /* Now perform the actual moves. */ |
401 | if (moves != NULL) |
402 | { |
403 | struct moveinfo *runp = moves->next; |
404 | do |
405 | { |
406 | assert ((char *) runp->to >= db->data); |
407 | assert ((char *) runp->to + runp->size |
408 | <= db->data + db->head->first_free); |
409 | assert ((char *) runp->from >= db->data); |
410 | assert ((char *) runp->from + runp->size |
411 | <= db->data + db->head->first_free); |
412 | |
413 | /* The regions may overlap. */ |
414 | memmove (dest: runp->to, src: runp->from, n: runp->size); |
415 | runp = runp->next; |
416 | } |
417 | while (runp != moves->next); |
418 | |
419 | if (__glibc_unlikely (debug_level >= 3)) |
420 | dbg_log (_("freed %zu bytes in %s cache" ), |
421 | (size_t) (db->head->first_free |
422 | - ((char *) moves->to + moves->size - db->data)), |
423 | dbnames[db - dbs]); |
424 | |
425 | /* The byte past the end of the last copied block is the next |
426 | available byte. */ |
427 | db->head->first_free = (char *) moves->to + moves->size - db->data; |
428 | |
429 | /* Consistency check. */ |
430 | if (__glibc_unlikely (debug_level >= 3)) |
431 | { |
432 | for (size_t idx = 0; idx < db->head->module; ++idx) |
433 | { |
434 | ref_t run = db->head->array[idx]; |
435 | size_t cnt = 0; |
436 | |
437 | while (run != ENDREF) |
438 | { |
439 | if (run + sizeof (struct hashentry) > db->head->first_free) |
440 | { |
441 | dbg_log (str: "entry %zu in hash bucket %zu out of bounds: " |
442 | "%" PRIu32 "+%zu > %zu\n" , |
443 | cnt, idx, run, sizeof (struct hashentry), |
444 | (size_t) db->head->first_free); |
445 | break; |
446 | } |
447 | |
448 | struct hashentry *he = (struct hashentry *) (db->data + run); |
449 | |
450 | if (he->key + he->len > db->head->first_free) |
451 | dbg_log (str: "key of entry %zu in hash bucket %zu out of " |
452 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
453 | cnt, idx, he->key, (size_t) he->len, |
454 | (size_t) db->head->first_free); |
455 | |
456 | if (he->packet + sizeof (struct datahead) |
457 | > db->head->first_free) |
458 | dbg_log (str: "packet of entry %zu in hash bucket %zu out of " |
459 | "bounds: %" PRIu32 "+%zu > %zu\n" , |
460 | cnt, idx, he->packet, sizeof (struct datahead), |
461 | (size_t) db->head->first_free); |
462 | else |
463 | { |
464 | struct datahead *dh = (struct datahead *) (db->data |
465 | + he->packet); |
466 | if (he->packet + dh->allocsize |
467 | > db->head->first_free) |
468 | dbg_log (str: "full key of entry %zu in hash bucket %zu " |
469 | "out of bounds: %" PRIu32 "+%zu > %zu" , |
470 | cnt, idx, he->packet, (size_t) dh->allocsize, |
471 | (size_t) db->head->first_free); |
472 | } |
473 | |
474 | run = he->next; |
475 | ++cnt; |
476 | } |
477 | } |
478 | } |
479 | } |
480 | |
481 | /* Make sure the data on disk is updated. */ |
482 | if (db->persistent) |
483 | msync (addr: db->head, len: db->data + db->head->first_free - (char *) db->head, |
484 | MS_ASYNC); |
485 | |
486 | |
487 | /* Now we are done modifying the data. */ |
488 | atomic_fetch_add_relaxed (&db->head->gc_cycle, 1); |
489 | assert ((db->head->gc_cycle & 1) == 0); |
490 | |
491 | /* We are done. */ |
492 | out: |
493 | pthread_mutex_unlock (mutex: &db->memlock); |
494 | pthread_rwlock_unlock (rwlock: &db->lock); |
495 | |
496 | if (he_use_malloc) |
497 | free (ptr: he); |
498 | if (mark_use_malloc) |
499 | free (ptr: mark); |
500 | |
501 | obstack_free (&ob, NULL); |
502 | } |
503 | |
504 | |
505 | void * |
506 | mempool_alloc (struct database_dyn *db, size_t len, int data_alloc) |
507 | { |
508 | /* Make sure LEN is a multiple of our maximum alignment so we can |
509 | keep track of used memory is multiples of this alignment value. */ |
510 | if ((len & BLOCK_ALIGN_M1) != 0) |
511 | len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1); |
512 | |
513 | if (data_alloc) |
514 | pthread_rwlock_rdlock (rwlock: &db->lock); |
515 | |
516 | pthread_mutex_lock (mutex: &db->memlock); |
517 | |
518 | assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0); |
519 | |
520 | bool tried_resize = false; |
521 | void *res; |
522 | retry: |
523 | res = db->data + db->head->first_free; |
524 | |
525 | if (__glibc_unlikely (db->head->first_free + len > db->head->data_size)) |
526 | { |
527 | if (! tried_resize) |
528 | { |
529 | /* Try to resize the database. Grow size of 1/8th. */ |
530 | size_t oldtotal = (sizeof (struct database_pers_head) |
531 | + roundup (db->head->module * sizeof (ref_t), |
532 | ALIGN) |
533 | + db->head->data_size); |
534 | size_t new_data_size = (db->head->data_size |
535 | + MAX (2 * len, db->head->data_size / 8)); |
536 | size_t newtotal = (sizeof (struct database_pers_head) |
537 | + roundup (db->head->module * sizeof (ref_t), ALIGN) |
538 | + new_data_size); |
539 | if (newtotal > db->max_db_size) |
540 | { |
541 | new_data_size -= newtotal - db->max_db_size; |
542 | newtotal = db->max_db_size; |
543 | } |
544 | |
545 | if (db->mmap_used && newtotal > oldtotal |
546 | /* We only have to adjust the file size. The new pages |
547 | become magically available. */ |
548 | && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal, |
549 | newtotal |
550 | - oldtotal)) == 0) |
551 | { |
552 | db->head->data_size = new_data_size; |
553 | tried_resize = true; |
554 | goto retry; |
555 | } |
556 | } |
557 | |
558 | if (data_alloc) |
559 | pthread_rwlock_unlock (rwlock: &db->lock); |
560 | |
561 | if (! db->last_alloc_failed) |
562 | { |
563 | dbg_log (_("no more memory for database '%s'" ), dbnames[db - dbs]); |
564 | |
565 | db->last_alloc_failed = true; |
566 | } |
567 | |
568 | ++db->head->addfailed; |
569 | |
570 | /* No luck. */ |
571 | res = NULL; |
572 | } |
573 | else |
574 | { |
575 | db->head->first_free += len; |
576 | |
577 | db->last_alloc_failed = false; |
578 | |
579 | } |
580 | |
581 | pthread_mutex_unlock (mutex: &db->memlock); |
582 | |
583 | return res; |
584 | } |
585 | |