1/* Cache memory handling.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
17
18#include <assert.h>
19#include <errno.h>
20#include <error.h>
21#include <fcntl.h>
22#include <inttypes.h>
23#include <libintl.h>
24#include <limits.h>
25#include <obstack.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29#include <sys/mman.h>
30#include <sys/param.h>
31
32#include "dbg_log.h"
33#include "nscd.h"
34
35
36static int
37sort_he (const void *p1, const void *p2)
38{
39 struct hashentry *h1 = *(struct hashentry **) p1;
40 struct hashentry *h2 = *(struct hashentry **) p2;
41
42 if (h1 < h2)
43 return -1;
44 if (h1 > h2)
45 return 1;
46 return 0;
47}
48
49
50static int
51sort_he_data (const void *p1, const void *p2)
52{
53 struct hashentry *h1 = *(struct hashentry **) p1;
54 struct hashentry *h2 = *(struct hashentry **) p2;
55
56 if (h1->packet < h2->packet)
57 return -1;
58 if (h1->packet > h2->packet)
59 return 1;
60 return 0;
61}
62
63
64/* Basic definitions for the bitmap implementation. Only BITMAP_T
65 needs to be changed to choose a different word size. */
66#define BITMAP_T uint8_t
67#define BITS (CHAR_BIT * sizeof (BITMAP_T))
68#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
69#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
70
71
72static void
73markrange (BITMAP_T *mark, ref_t start, size_t len)
74{
75 /* Adjust parameters for block alignment. */
76 assert ((start & BLOCK_ALIGN_M1) == 0);
77 start /= BLOCK_ALIGN;
78 len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
79
80 size_t elem = start / BITS;
81
82 if (start % BITS != 0)
83 {
84 if (start % BITS + len <= BITS)
85 {
86 /* All fits in the partial byte. */
87 mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
88 return;
89 }
90
91 mark[elem++] |= ALLBITS << (start % BITS);
92 len -= BITS - (start % BITS);
93 }
94
95 while (len >= BITS)
96 {
97 mark[elem++] = ALLBITS;
98 len -= BITS;
99 }
100
101 if (len > 0)
102 mark[elem] |= ALLBITS >> (BITS - len);
103}
104
105
106void
107gc (struct database_dyn *db)
108{
109 /* We need write access. */
110 pthread_rwlock_wrlock (rwlock: &db->lock);
111
112 /* And the memory handling lock. */
113 pthread_mutex_lock (mutex: &db->memlock);
114
115 /* We need an array representing the data area. All memory
116 allocation is BLOCK_ALIGN aligned so this is the level at which
117 we have to look at the memory. We use a mark and sweep algorithm
118 where the marks are placed in this array. */
119 assert (db->head->first_free % BLOCK_ALIGN == 0);
120
121 BITMAP_T *mark;
122 bool mark_use_malloc;
123 /* In prune_cache we are also using a dynamically allocated array.
124 If the array in the caller is too large we have malloc'ed it. */
125 size_t stack_used = sizeof (bool) * db->head->module;
126 if (__glibc_unlikely (stack_used > MAX_STACK_USE))
127 stack_used = 0;
128 size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
129 size_t memory_needed = nmark * sizeof (BITMAP_T);
130 if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
131 {
132 mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
133 mark_use_malloc = false;
134 memset (s: mark, c: '\0', n: memory_needed);
135 }
136 else
137 {
138 mark = (BITMAP_T *) xcalloc (n: 1, s: memory_needed);
139 mark_use_malloc = true;
140 }
141
142 /* Create an array which can hold pointer to all the entries in hash
143 entries. */
144 memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
145 struct hashentry **he;
146 struct hashentry **he_data;
147 bool he_use_malloc;
148 if (__glibc_likely (stack_used + memory_needed <= MAX_STACK_USE))
149 {
150 he = alloca_account (memory_needed, stack_used);
151 he_use_malloc = false;
152 }
153 else
154 {
155 he = xmalloc (n: memory_needed);
156 he_use_malloc = true;
157 }
158 he_data = &he[db->head->nentries];
159
160 size_t cnt = 0;
161 for (size_t idx = 0; idx < db->head->module; ++idx)
162 {
163 ref_t *prevp = &db->head->array[idx];
164 ref_t run = *prevp;
165
166 while (run != ENDREF)
167 {
168 assert (cnt < db->head->nentries);
169 he[cnt] = (struct hashentry *) (db->data + run);
170
171 he[cnt]->prevp = prevp;
172 prevp = &he[cnt]->next;
173
174 /* This is the hash entry itself. */
175 markrange (mark, start: run, len: sizeof (struct hashentry));
176
177 /* Add the information for the data itself. We do this
178 only for the one special entry marked with FIRST. */
179 if (he[cnt]->first)
180 {
181 struct datahead *dh
182 = (struct datahead *) (db->data + he[cnt]->packet);
183 markrange (mark, start: he[cnt]->packet, len: dh->allocsize);
184 }
185
186 run = he[cnt]->next;
187
188 ++cnt;
189 }
190 }
191 assert (cnt == db->head->nentries);
192
193 /* Sort the entries by the addresses of the referenced data. All
194 the entries pointing to the same DATAHEAD object will have the
195 same key. Stability of the sorting is unimportant. */
196 memcpy (dest: he_data, src: he, n: cnt * sizeof (struct hashentry *));
197 qsort (base: he_data, nmemb: cnt, size: sizeof (struct hashentry *), compar: sort_he_data);
198
199 /* Sort the entries by their address. */
200 qsort (base: he, nmemb: cnt, size: sizeof (struct hashentry *), compar: sort_he);
201
202#define obstack_chunk_alloc xmalloc
203#define obstack_chunk_free free
204 struct obstack ob;
205 obstack_init (&ob);
206
207 /* Determine the highest used address. */
208 size_t high = nmark;
209 while (high > 0 && mark[high - 1] == 0)
210 --high;
211
212 /* No memory used. */
213 if (high == 0)
214 {
215 db->head->first_free = 0;
216 goto out;
217 }
218
219 /* Determine the highest offset. */
220 BITMAP_T mask = HIGHBIT;
221 while ((mark[high - 1] & mask) == 0)
222 mask >>= 1;
223
224 /* Now we can iterate over the MARK array and find bits which are not
225 set. These represent memory which can be recovered. */
226 size_t byte = 0;
227 /* Find the first gap. */
228 while (byte < high && mark[byte] == ALLBITS)
229 ++byte;
230
231 if (byte == high
232 || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
233 /* No gap. */
234 goto out;
235
236 mask = 1;
237 cnt = 0;
238 while ((mark[byte] & mask) != 0)
239 {
240 ++cnt;
241 mask <<= 1;
242 }
243 ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
244 assert (off_free <= db->head->first_free);
245
246 struct hashentry **next_hash = he;
247 struct hashentry **next_data = he_data;
248
249 /* Skip over the hash entries in the first block which does not get
250 moved. */
251 while (next_hash < &he[db->head->nentries]
252 && *next_hash < (struct hashentry *) (db->data + off_free))
253 ++next_hash;
254
255 while (next_data < &he_data[db->head->nentries]
256 && (*next_data)->packet < off_free)
257 ++next_data;
258
259
260 /* Now we start modifying the data. Make sure all readers of the
261 data are aware of this and temporarily don't use the data. */
262 atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
263 assert ((db->head->gc_cycle & 1) == 1);
264
265
266 /* We do not perform the move operations right away since the
267 he_data array is not sorted by the address of the data. */
268 struct moveinfo
269 {
270 void *from;
271 void *to;
272 size_t size;
273 struct moveinfo *next;
274 } *moves = NULL;
275
276 while (byte < high)
277 {
278 /* Search for the next filled block. BYTE is the index of the
279 entry in MARK, MASK is the bit, and CNT is the bit number.
280 OFF_FILLED is the corresponding offset. */
281 if ((mark[byte] & ~(mask - 1)) == 0)
282 {
283 /* No other bit set in the same element of MARK. Search in the
284 following memory. */
285 do
286 ++byte;
287 while (byte < high && mark[byte] == 0);
288
289 if (byte == high)
290 /* That was it. */
291 break;
292
293 mask = 1;
294 cnt = 0;
295 }
296 /* Find the exact bit. */
297 while ((mark[byte] & mask) == 0)
298 {
299 ++cnt;
300 mask <<= 1;
301 }
302
303 ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
304 assert (off_alloc <= db->head->first_free);
305
306 /* Find the end of the used area. */
307 if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
308 {
309 /* All other bits set. Search the next bytes in MARK. */
310 do
311 ++byte;
312 while (byte < high && mark[byte] == ALLBITS);
313
314 mask = 1;
315 cnt = 0;
316 }
317 if (byte < high)
318 {
319 /* Find the exact bit. */
320 while ((mark[byte] & mask) != 0)
321 {
322 ++cnt;
323 mask <<= 1;
324 }
325 }
326
327 ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
328 assert (off_allocend <= db->head->first_free);
329 /* Now we know that we can copy the area from OFF_ALLOC to
330 OFF_ALLOCEND (not included) to the memory starting at
331 OFF_FREE. First fix up all the entries for the
332 displacement. */
333 ref_t disp = off_alloc - off_free;
334
335 struct moveinfo *new_move;
336 if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
337 1))
338 new_move = alloca_account (sizeof (*new_move), stack_used);
339 else
340 new_move = obstack_alloc (&ob, sizeof (*new_move));
341 new_move->from = db->data + off_alloc;
342 new_move->to = db->data + off_free;
343 new_move->size = off_allocend - off_alloc;
344 /* Create a circular list to be always able to append at the end. */
345 if (moves == NULL)
346 moves = new_move->next = new_move;
347 else
348 {
349 new_move->next = moves->next;
350 moves = moves->next = new_move;
351 }
352
353 /* The following loop will prepare to move this much data. */
354 off_free += off_allocend - off_alloc;
355
356 while (off_alloc < off_allocend)
357 {
358 /* Determine whether the next entry is for a hash entry or
359 the data. */
360 if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
361 {
362 /* Just correct the forward reference. */
363 *(*next_hash++)->prevp -= disp;
364
365 off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
366 & ~BLOCK_ALIGN_M1);
367 }
368 else
369 {
370 assert (next_data < &he_data[db->head->nentries]);
371 assert ((*next_data)->packet == off_alloc);
372
373 struct datahead *dh = (struct datahead *) (db->data + off_alloc);
374 do
375 {
376 assert ((*next_data)->key >= (*next_data)->packet);
377 assert ((*next_data)->key + (*next_data)->len
378 <= (*next_data)->packet + dh->allocsize);
379
380 (*next_data)->packet -= disp;
381 (*next_data)->key -= disp;
382 ++next_data;
383 }
384 while (next_data < &he_data[db->head->nentries]
385 && (*next_data)->packet == off_alloc);
386
387 off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
388 }
389 }
390 assert (off_alloc == off_allocend);
391
392 assert (off_alloc <= db->head->first_free);
393 if (off_alloc == db->head->first_free)
394 /* We are done, that was the last block. */
395 break;
396 }
397 assert (next_hash == &he[db->head->nentries]);
398 assert (next_data == &he_data[db->head->nentries]);
399
400 /* Now perform the actual moves. */
401 if (moves != NULL)
402 {
403 struct moveinfo *runp = moves->next;
404 do
405 {
406 assert ((char *) runp->to >= db->data);
407 assert ((char *) runp->to + runp->size
408 <= db->data + db->head->first_free);
409 assert ((char *) runp->from >= db->data);
410 assert ((char *) runp->from + runp->size
411 <= db->data + db->head->first_free);
412
413 /* The regions may overlap. */
414 memmove (dest: runp->to, src: runp->from, n: runp->size);
415 runp = runp->next;
416 }
417 while (runp != moves->next);
418
419 if (__glibc_unlikely (debug_level >= 3))
420 dbg_log (_("freed %zu bytes in %s cache"),
421 (size_t) (db->head->first_free
422 - ((char *) moves->to + moves->size - db->data)),
423 dbnames[db - dbs]);
424
425 /* The byte past the end of the last copied block is the next
426 available byte. */
427 db->head->first_free = (char *) moves->to + moves->size - db->data;
428
429 /* Consistency check. */
430 if (__glibc_unlikely (debug_level >= 3))
431 {
432 for (size_t idx = 0; idx < db->head->module; ++idx)
433 {
434 ref_t run = db->head->array[idx];
435 size_t cnt = 0;
436
437 while (run != ENDREF)
438 {
439 if (run + sizeof (struct hashentry) > db->head->first_free)
440 {
441 dbg_log (str: "entry %zu in hash bucket %zu out of bounds: "
442 "%" PRIu32 "+%zu > %zu\n",
443 cnt, idx, run, sizeof (struct hashentry),
444 (size_t) db->head->first_free);
445 break;
446 }
447
448 struct hashentry *he = (struct hashentry *) (db->data + run);
449
450 if (he->key + he->len > db->head->first_free)
451 dbg_log (str: "key of entry %zu in hash bucket %zu out of "
452 "bounds: %" PRIu32 "+%zu > %zu\n",
453 cnt, idx, he->key, (size_t) he->len,
454 (size_t) db->head->first_free);
455
456 if (he->packet + sizeof (struct datahead)
457 > db->head->first_free)
458 dbg_log (str: "packet of entry %zu in hash bucket %zu out of "
459 "bounds: %" PRIu32 "+%zu > %zu\n",
460 cnt, idx, he->packet, sizeof (struct datahead),
461 (size_t) db->head->first_free);
462 else
463 {
464 struct datahead *dh = (struct datahead *) (db->data
465 + he->packet);
466 if (he->packet + dh->allocsize
467 > db->head->first_free)
468 dbg_log (str: "full key of entry %zu in hash bucket %zu "
469 "out of bounds: %" PRIu32 "+%zu > %zu",
470 cnt, idx, he->packet, (size_t) dh->allocsize,
471 (size_t) db->head->first_free);
472 }
473
474 run = he->next;
475 ++cnt;
476 }
477 }
478 }
479 }
480
481 /* Make sure the data on disk is updated. */
482 if (db->persistent)
483 msync (addr: db->head, len: db->data + db->head->first_free - (char *) db->head,
484 MS_ASYNC);
485
486
487 /* Now we are done modifying the data. */
488 atomic_fetch_add_relaxed (&db->head->gc_cycle, 1);
489 assert ((db->head->gc_cycle & 1) == 0);
490
491 /* We are done. */
492 out:
493 pthread_mutex_unlock (mutex: &db->memlock);
494 pthread_rwlock_unlock (rwlock: &db->lock);
495
496 if (he_use_malloc)
497 free (ptr: he);
498 if (mark_use_malloc)
499 free (ptr: mark);
500
501 obstack_free (&ob, NULL);
502}
503
504
505void *
506mempool_alloc (struct database_dyn *db, size_t len, int data_alloc)
507{
508 /* Make sure LEN is a multiple of our maximum alignment so we can
509 keep track of used memory is multiples of this alignment value. */
510 if ((len & BLOCK_ALIGN_M1) != 0)
511 len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
512
513 if (data_alloc)
514 pthread_rwlock_rdlock (rwlock: &db->lock);
515
516 pthread_mutex_lock (mutex: &db->memlock);
517
518 assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
519
520 bool tried_resize = false;
521 void *res;
522 retry:
523 res = db->data + db->head->first_free;
524
525 if (__glibc_unlikely (db->head->first_free + len > db->head->data_size))
526 {
527 if (! tried_resize)
528 {
529 /* Try to resize the database. Grow size of 1/8th. */
530 size_t oldtotal = (sizeof (struct database_pers_head)
531 + roundup (db->head->module * sizeof (ref_t),
532 ALIGN)
533 + db->head->data_size);
534 size_t new_data_size = (db->head->data_size
535 + MAX (2 * len, db->head->data_size / 8));
536 size_t newtotal = (sizeof (struct database_pers_head)
537 + roundup (db->head->module * sizeof (ref_t), ALIGN)
538 + new_data_size);
539 if (newtotal > db->max_db_size)
540 {
541 new_data_size -= newtotal - db->max_db_size;
542 newtotal = db->max_db_size;
543 }
544
545 if (db->mmap_used && newtotal > oldtotal
546 /* We only have to adjust the file size. The new pages
547 become magically available. */
548 && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
549 newtotal
550 - oldtotal)) == 0)
551 {
552 db->head->data_size = new_data_size;
553 tried_resize = true;
554 goto retry;
555 }
556 }
557
558 if (data_alloc)
559 pthread_rwlock_unlock (rwlock: &db->lock);
560
561 if (! db->last_alloc_failed)
562 {
563 dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
564
565 db->last_alloc_failed = true;
566 }
567
568 ++db->head->addfailed;
569
570 /* No luck. */
571 res = NULL;
572 }
573 else
574 {
575 db->head->first_free += len;
576
577 db->last_alloc_failed = false;
578
579 }
580
581 pthread_mutex_unlock (mutex: &db->memlock);
582
583 return res;
584}
585

source code of glibc/nscd/mem.c