1/*
2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp.h"
14#include "kmp_io.h"
15#include "kmp_wrapper_malloc.h"
16
17#if KMP_USE_HWLOC
18#if HWLOC_API_VERSION > 0x00020300
19#define KMP_HWLOC_LOCATION_TYPE_CPUSET HWLOC_LOCATION_TYPE_CPUSET
20#elif HWLOC_API_VERSION == 0x00020300
21#define KMP_HWLOC_LOCATION_TYPE_CPUSET \
22 hwloc_location::HWLOC_LOCATION_TYPE_CPUSET
23#else
24enum hwloc_memattr_id_e {
25 HWLOC_MEMATTR_ID_BANDWIDTH,
26 HWLOC_MEMATTR_ID_CAPACITY
27};
28#endif
29#endif // KMP_USE_HWLOC
30
31// Disable bget when it is not used
32#if KMP_USE_BGET
33
34/* Thread private buffer management code */
35
36typedef int (*bget_compact_t)(size_t, int);
37typedef void *(*bget_acquire_t)(size_t);
38typedef void (*bget_release_t)(void *);
39
40/* NOTE: bufsize must be a signed datatype */
41
42#if KMP_OS_WINDOWS
43#if KMP_ARCH_X86 || KMP_ARCH_ARM
44typedef kmp_int32 bufsize;
45#else
46typedef kmp_int64 bufsize;
47#endif
48#else
49typedef ssize_t bufsize;
50#endif // KMP_OS_WINDOWS
51
52/* The three modes of operation are, fifo search, lifo search, and best-fit */
53
54typedef enum bget_mode {
55 bget_mode_fifo = 0,
56 bget_mode_lifo = 1,
57 bget_mode_best = 2
58} bget_mode_t;
59
60static void bpool(kmp_info_t *th, void *buffer, bufsize len);
61static void *bget(kmp_info_t *th, bufsize size);
62static void *bgetz(kmp_info_t *th, bufsize size);
63static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
64static void brel(kmp_info_t *th, void *buf);
65static void bectl(kmp_info_t *th, bget_compact_t compact,
66 bget_acquire_t acquire, bget_release_t release,
67 bufsize pool_incr);
68
69/* BGET CONFIGURATION */
70/* Buffer allocation size quantum: all buffers allocated are a
71 multiple of this size. This MUST be a power of two. */
72
73/* On IA-32 architecture with Linux* OS, malloc() does not
74 ensure 16 byte alignment */
75
76#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
77
78#define SizeQuant 8
79#define AlignType double
80
81#else
82
83#define SizeQuant 16
84#define AlignType _Quad
85
86#endif
87
88// Define this symbol to enable the bstats() function which calculates the
89// total free space in the buffer pool, the largest available buffer, and the
90// total space currently allocated.
91#define BufStats 1
92
93#ifdef KMP_DEBUG
94
95// Define this symbol to enable the bpoold() function which dumps the buffers
96// in a buffer pool.
97#define BufDump 1
98
99// Define this symbol to enable the bpoolv() function for validating a buffer
100// pool.
101#define BufValid 1
102
103// Define this symbol to enable the bufdump() function which allows dumping the
104// contents of an allocated or free buffer.
105#define DumpData 1
106
107#ifdef NOT_USED_NOW
108
109// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
110// who attempt to use pointers into released buffers.
111#define FreeWipe 1
112
113// Use a best fit algorithm when searching for space for an allocation request.
114// This uses memory more efficiently, but allocation will be much slower.
115#define BestFit 1
116
117#endif /* NOT_USED_NOW */
118#endif /* KMP_DEBUG */
119
120static bufsize bget_bin_size[] = {
121 0,
122 // 1 << 6, /* .5 Cache line */
123 1 << 7, /* 1 Cache line, new */
124 1 << 8, /* 2 Cache lines */
125 1 << 9, /* 4 Cache lines, new */
126 1 << 10, /* 8 Cache lines */
127 1 << 11, /* 16 Cache lines, new */
128 1 << 12, 1 << 13, /* new */
129 1 << 14, 1 << 15, /* new */
130 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
131 1 << 21, /* 2MB */
132 1 << 22, /* 4MB */
133 1 << 23, /* 8MB */
134 1 << 24, /* 16MB */
135 1 << 25, /* 32MB */
136};
137
138#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
139
140struct bfhead;
141
142// Declare the interface, including the requested buffer size type, bufsize.
143
144/* Queue links */
145typedef struct qlinks {
146 struct bfhead *flink; /* Forward link */
147 struct bfhead *blink; /* Backward link */
148} qlinks_t;
149
150/* Header in allocated and free buffers */
151typedef struct bhead2 {
152 kmp_info_t *bthr; /* The thread which owns the buffer pool */
153 bufsize prevfree; /* Relative link back to previous free buffer in memory or
154 0 if previous buffer is allocated. */
155 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
156} bhead2_t;
157
158/* Make sure the bhead structure is a multiple of SizeQuant in size. */
159typedef union bhead {
160 KMP_ALIGN(SizeQuant)
161 AlignType b_align;
162 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
163 bhead2_t bb;
164} bhead_t;
165#define BH(p) ((bhead_t *)(p))
166
167/* Header in directly allocated buffers (by acqfcn) */
168typedef struct bdhead {
169 bufsize tsize; /* Total size, including overhead */
170 bhead_t bh; /* Common header */
171} bdhead_t;
172#define BDH(p) ((bdhead_t *)(p))
173
174/* Header in free buffers */
175typedef struct bfhead {
176 bhead_t bh; /* Common allocated/free header */
177 qlinks_t ql; /* Links on free list */
178} bfhead_t;
179#define BFH(p) ((bfhead_t *)(p))
180
181typedef struct thr_data {
182 bfhead_t freelist[MAX_BGET_BINS];
183#if BufStats
184 size_t totalloc; /* Total space currently allocated */
185 long numget, numrel; /* Number of bget() and brel() calls */
186 long numpblk; /* Number of pool blocks */
187 long numpget, numprel; /* Number of block gets and rels */
188 long numdget, numdrel; /* Number of direct gets and rels */
189#endif /* BufStats */
190
191 /* Automatic expansion block management functions */
192 bget_compact_t compfcn;
193 bget_acquire_t acqfcn;
194 bget_release_t relfcn;
195
196 bget_mode_t mode; /* what allocation mode to use? */
197
198 bufsize exp_incr; /* Expansion block size */
199 bufsize pool_len; /* 0: no bpool calls have been made
200 -1: not all pool blocks are the same size
201 >0: (common) block size for all bpool calls made so far
202 */
203 bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
204} thr_data_t;
205
206/* Minimum allocation quantum: */
207#define QLSize (sizeof(qlinks_t))
208#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
209#define MaxSize \
210 (bufsize)( \
211 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
212// Maximum for the requested size.
213
214/* End sentinel: value placed in bsize field of dummy block delimiting
215 end of pool block. The most negative number which will fit in a
216 bufsize, defined in a way that the compiler will accept. */
217
218#define ESent \
219 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
220
221/* Thread Data management routines */
222static int bget_get_bin(bufsize size) {
223 // binary chop bins
224 int lo = 0, hi = MAX_BGET_BINS - 1;
225
226 KMP_DEBUG_ASSERT(size > 0);
227
228 while ((hi - lo) > 1) {
229 int mid = (lo + hi) >> 1;
230 if (size < bget_bin_size[mid])
231 hi = mid - 1;
232 else
233 lo = mid;
234 }
235
236 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
237
238 return lo;
239}
240
241static void set_thr_data(kmp_info_t *th) {
242 int i;
243 thr_data_t *data;
244
245 data = (thr_data_t *)((!th->th.th_local.bget_data)
246 ? __kmp_allocate(sizeof(*data))
247 : th->th.th_local.bget_data);
248
249 memset(s: data, c: '\0', n: sizeof(*data));
250
251 for (i = 0; i < MAX_BGET_BINS; ++i) {
252 data->freelist[i].ql.flink = &data->freelist[i];
253 data->freelist[i].ql.blink = &data->freelist[i];
254 }
255
256 th->th.th_local.bget_data = data;
257 th->th.th_local.bget_list = 0;
258#if !USE_CMP_XCHG_FOR_BGET
259#ifdef USE_QUEUING_LOCK_FOR_BGET
260 __kmp_init_lock(&th->th.th_local.bget_lock);
261#else
262 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
263#endif /* USE_LOCK_FOR_BGET */
264#endif /* ! USE_CMP_XCHG_FOR_BGET */
265}
266
267static thr_data_t *get_thr_data(kmp_info_t *th) {
268 thr_data_t *data;
269
270 data = (thr_data_t *)th->th.th_local.bget_data;
271
272 KMP_DEBUG_ASSERT(data != 0);
273
274 return data;
275}
276
277/* Walk the free list and release the enqueued buffers */
278static void __kmp_bget_dequeue(kmp_info_t *th) {
279 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
280
281 if (p != 0) {
282#if USE_CMP_XCHG_FOR_BGET
283 {
284 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
285 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
286 CCAST(void *, old_value), nullptr)) {
287 KMP_CPU_PAUSE();
288 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
289 }
290 p = CCAST(void *, old_value);
291 }
292#else /* ! USE_CMP_XCHG_FOR_BGET */
293#ifdef USE_QUEUING_LOCK_FOR_BGET
294 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
295#else
296 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
297#endif /* USE_QUEUING_LOCK_FOR_BGET */
298
299 p = (void *)th->th.th_local.bget_list;
300 th->th.th_local.bget_list = 0;
301
302#ifdef USE_QUEUING_LOCK_FOR_BGET
303 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
304#else
305 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
306#endif
307#endif /* USE_CMP_XCHG_FOR_BGET */
308
309 /* Check again to make sure the list is not empty */
310 while (p != 0) {
311 void *buf = p;
312 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
313
314 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
315 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
316 (kmp_uintptr_t)th); // clear possible mark
317 KMP_DEBUG_ASSERT(b->ql.blink == 0);
318
319 p = (void *)b->ql.flink;
320
321 brel(th, buf);
322 }
323 }
324}
325
326/* Chain together the free buffers by using the thread owner field */
327static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
328#ifdef USE_QUEUING_LOCK_FOR_BGET
329 ,
330 kmp_int32 rel_gtid
331#endif
332) {
333 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
334
335 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
336 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
337 (kmp_uintptr_t)th); // clear possible mark
338
339 b->ql.blink = 0;
340
341 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
342 __kmp_gtid_from_thread(th)));
343
344#if USE_CMP_XCHG_FOR_BGET
345 {
346 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
347 /* the next pointer must be set before setting bget_list to buf to avoid
348 exposing a broken list to other threads, even for an instant. */
349 b->ql.flink = BFH(CCAST(void *, old_value));
350
351 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
352 CCAST(void *, old_value), buf)) {
353 KMP_CPU_PAUSE();
354 old_value = TCR_PTR(th->th.th_local.bget_list);
355 /* the next pointer must be set before setting bget_list to buf to avoid
356 exposing a broken list to other threads, even for an instant. */
357 b->ql.flink = BFH(CCAST(void *, old_value));
358 }
359 }
360#else /* ! USE_CMP_XCHG_FOR_BGET */
361#ifdef USE_QUEUING_LOCK_FOR_BGET
362 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
363#else
364 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
365#endif
366
367 b->ql.flink = BFH(th->th.th_local.bget_list);
368 th->th.th_local.bget_list = (void *)buf;
369
370#ifdef USE_QUEUING_LOCK_FOR_BGET
371 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
372#else
373 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
374#endif
375#endif /* USE_CMP_XCHG_FOR_BGET */
376}
377
378/* insert buffer back onto a new freelist */
379static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
380 int bin;
381
382 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
383 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
384
385 bin = bget_get_bin(size: b->bh.bb.bsize);
386
387 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
388 &thr->freelist[bin]);
389 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
390 &thr->freelist[bin]);
391
392 b->ql.flink = &thr->freelist[bin];
393 b->ql.blink = thr->freelist[bin].ql.blink;
394
395 thr->freelist[bin].ql.blink = b;
396 b->ql.blink->ql.flink = b;
397}
398
399/* unlink the buffer from the old freelist */
400static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
401 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
402 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
403
404 b->ql.blink->ql.flink = b->ql.flink;
405 b->ql.flink->ql.blink = b->ql.blink;
406}
407
408/* GET STATS -- check info on free list */
409static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
410 thr_data_t *thr = get_thr_data(th);
411 int bin;
412
413 *total_free = *max_free = 0;
414
415 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
416 bfhead_t *b, *best;
417
418 best = &thr->freelist[bin];
419 b = best->ql.flink;
420
421 while (b != &thr->freelist[bin]) {
422 *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
423 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
424 best = b;
425
426 /* Link to next buffer */
427 b = b->ql.flink;
428 }
429
430 if (*max_free < best->bh.bb.bsize)
431 *max_free = best->bh.bb.bsize;
432 }
433
434 if (*max_free > (bufsize)sizeof(bhead_t))
435 *max_free -= sizeof(bhead_t);
436}
437
438/* BGET -- Allocate a buffer. */
439static void *bget(kmp_info_t *th, bufsize requested_size) {
440 thr_data_t *thr = get_thr_data(th);
441 bufsize size = requested_size;
442 bfhead_t *b;
443 void *buf;
444 int compactseq = 0;
445 int use_blink = 0;
446 /* For BestFit */
447 bfhead_t *best;
448
449 if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
450 return NULL;
451 }
452
453 __kmp_bget_dequeue(th); /* Release any queued buffers */
454
455 if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
456 size = SizeQ;
457 }
458#if defined(SizeQuant) && (SizeQuant > 1)
459 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
460#endif
461
462 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
463 KMP_DEBUG_ASSERT(size >= 0);
464 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
465
466 use_blink = (thr->mode == bget_mode_lifo);
467
468 /* If a compact function was provided in the call to bectl(), wrap
469 a loop around the allocation process to allow compaction to
470 intervene in case we don't find a suitable buffer in the chain. */
471
472 for (;;) {
473 int bin;
474
475 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
476 /* Link to next buffer */
477 b = (use_blink ? thr->freelist[bin].ql.blink
478 : thr->freelist[bin].ql.flink);
479
480 if (thr->mode == bget_mode_best) {
481 best = &thr->freelist[bin];
482
483 /* Scan the free list searching for the first buffer big enough
484 to hold the requested size buffer. */
485 while (b != &thr->freelist[bin]) {
486 if (b->bh.bb.bsize >= (bufsize)size) {
487 if ((best == &thr->freelist[bin]) ||
488 (b->bh.bb.bsize < best->bh.bb.bsize)) {
489 best = b;
490 }
491 }
492
493 /* Link to next buffer */
494 b = (use_blink ? b->ql.blink : b->ql.flink);
495 }
496 b = best;
497 }
498
499 while (b != &thr->freelist[bin]) {
500 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
501
502 // Buffer is big enough to satisfy the request. Allocate it to the
503 // caller. We must decide whether the buffer is large enough to split
504 // into the part given to the caller and a free buffer that remains
505 // on the free list, or whether the entire buffer should be removed
506 // from the free list and given to the caller in its entirety. We
507 // only split the buffer if enough room remains for a header plus the
508 // minimum quantum of allocation.
509 if ((b->bh.bb.bsize - (bufsize)size) >
510 (bufsize)(SizeQ + (sizeof(bhead_t)))) {
511 bhead_t *ba, *bn;
512
513 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
514 bn = BH(((char *)ba) + size);
515
516 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
517
518 /* Subtract size from length of free block. */
519 b->bh.bb.bsize -= (bufsize)size;
520
521 /* Link allocated buffer to the previous free buffer. */
522 ba->bb.prevfree = b->bh.bb.bsize;
523
524 /* Plug negative size into user buffer. */
525 ba->bb.bsize = -size;
526
527 /* Mark this buffer as owned by this thread. */
528 TCW_PTR(ba->bb.bthr,
529 th); // not an allocated address (do not mark it)
530 /* Mark buffer after this one not preceded by free block. */
531 bn->bb.prevfree = 0;
532
533 // unlink buffer from old freelist, and reinsert into new freelist
534 __kmp_bget_remove_from_freelist(b);
535 __kmp_bget_insert_into_freelist(thr, b);
536#if BufStats
537 thr->totalloc += (size_t)size;
538 thr->numget++; /* Increment number of bget() calls */
539#endif
540 buf = (void *)((((char *)ba) + sizeof(bhead_t)));
541 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
542 return buf;
543 } else {
544 bhead_t *ba;
545
546 ba = BH(((char *)b) + b->bh.bb.bsize);
547
548 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
549
550 /* The buffer isn't big enough to split. Give the whole
551 shebang to the caller and remove it from the free list. */
552
553 __kmp_bget_remove_from_freelist(b);
554#if BufStats
555 thr->totalloc += (size_t)b->bh.bb.bsize;
556 thr->numget++; /* Increment number of bget() calls */
557#endif
558 /* Negate size to mark buffer allocated. */
559 b->bh.bb.bsize = -(b->bh.bb.bsize);
560
561 /* Mark this buffer as owned by this thread. */
562 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
563 /* Zero the back pointer in the next buffer in memory
564 to indicate that this buffer is allocated. */
565 ba->bb.prevfree = 0;
566
567 /* Give user buffer starting at queue links. */
568 buf = (void *)&(b->ql);
569 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
570 return buf;
571 }
572 }
573
574 /* Link to next buffer */
575 b = (use_blink ? b->ql.blink : b->ql.flink);
576 }
577 }
578
579 /* We failed to find a buffer. If there's a compact function defined,
580 notify it of the size requested. If it returns TRUE, try the allocation
581 again. */
582
583 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
584 break;
585 }
586 }
587
588 /* No buffer available with requested size free. */
589
590 /* Don't give up yet -- look in the reserve supply. */
591 if (thr->acqfcn != 0) {
592 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
593 /* Request is too large to fit in a single expansion block.
594 Try to satisfy it by a direct buffer acquisition. */
595 bdhead_t *bdh;
596
597 size += sizeof(bdhead_t) - sizeof(bhead_t);
598
599 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
600
601 /* richryan */
602 bdh = BDH((*thr->acqfcn)((bufsize)size));
603 if (bdh != NULL) {
604
605 // Mark the buffer special by setting size field of its header to zero.
606 bdh->bh.bb.bsize = 0;
607
608 /* Mark this buffer as owned by this thread. */
609 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
610 // because direct buffer never goes to free list
611 bdh->bh.bb.prevfree = 0;
612 bdh->tsize = size;
613#if BufStats
614 thr->totalloc += (size_t)size;
615 thr->numget++; /* Increment number of bget() calls */
616 thr->numdget++; /* Direct bget() call count */
617#endif
618 buf = (void *)(bdh + 1);
619 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
620 return buf;
621 }
622
623 } else {
624
625 /* Try to obtain a new expansion block */
626 void *newpool;
627
628 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
629
630 /* richryan */
631 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
632 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
633 if (newpool != NULL) {
634 bpool(th, buffer: newpool, len: thr->exp_incr);
635 buf = bget(
636 th, requested_size); /* This can't, I say, can't get into a loop. */
637 return buf;
638 }
639 }
640 }
641
642 /* Still no buffer available */
643
644 return NULL;
645}
646
647/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
648 the entire contents of the buffer to zero, not just the
649 region requested by the caller. */
650
651static void *bgetz(kmp_info_t *th, bufsize size) {
652 char *buf = (char *)bget(th, requested_size: size);
653
654 if (buf != NULL) {
655 bhead_t *b;
656 bufsize rsize;
657
658 b = BH(buf - sizeof(bhead_t));
659 rsize = -(b->bb.bsize);
660 if (rsize == 0) {
661 bdhead_t *bd;
662
663 bd = BDH(buf - sizeof(bdhead_t));
664 rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
665 } else {
666 rsize -= sizeof(bhead_t);
667 }
668
669 KMP_DEBUG_ASSERT(rsize >= size);
670
671 (void)memset(s: buf, c: 0, n: (bufsize)rsize);
672 }
673 return ((void *)buf);
674}
675
676/* BGETR -- Reallocate a buffer. This is a minimal implementation,
677 simply in terms of brel() and bget(). It could be
678 enhanced to allow the buffer to grow into adjacent free
679 blocks and to avoid moving data unnecessarily. */
680
681static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
682 void *nbuf;
683 bufsize osize; /* Old size of buffer */
684 bhead_t *b;
685
686 nbuf = bget(th, requested_size: size);
687 if (nbuf == NULL) { /* Acquire new buffer */
688 return NULL;
689 }
690 if (buf == NULL) {
691 return nbuf;
692 }
693 b = BH(((char *)buf) - sizeof(bhead_t));
694 osize = -b->bb.bsize;
695 if (osize == 0) {
696 /* Buffer acquired directly through acqfcn. */
697 bdhead_t *bd;
698
699 bd = BDH(((char *)buf) - sizeof(bdhead_t));
700 osize = bd->tsize - (bufsize)sizeof(bdhead_t);
701 } else {
702 osize -= sizeof(bhead_t);
703 }
704
705 KMP_DEBUG_ASSERT(osize > 0);
706
707 (void)KMP_MEMCPY(dest: (char *)nbuf, src: (char *)buf, /* Copy the data */
708 n: (size_t)((size < osize) ? size : osize));
709 brel(th, buf);
710
711 return nbuf;
712}
713
714/* BREL -- Release a buffer. */
715static void brel(kmp_info_t *th, void *buf) {
716 thr_data_t *thr = get_thr_data(th);
717 bfhead_t *b, *bn;
718 kmp_info_t *bth;
719
720 KMP_DEBUG_ASSERT(buf != NULL);
721 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
722
723 b = BFH(((char *)buf) - sizeof(bhead_t));
724
725 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
726 bdhead_t *bdh;
727
728 bdh = BDH(((char *)buf) - sizeof(bdhead_t));
729 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
730#if BufStats
731 thr->totalloc -= (size_t)bdh->tsize;
732 thr->numdrel++; /* Number of direct releases */
733 thr->numrel++; /* Increment number of brel() calls */
734#endif /* BufStats */
735#ifdef FreeWipe
736 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
737#endif /* FreeWipe */
738
739 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
740
741 KMP_DEBUG_ASSERT(thr->relfcn != 0);
742 (*thr->relfcn)((void *)bdh); /* Release it directly. */
743 return;
744 }
745
746 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
747 ~1); // clear possible mark before comparison
748 if (bth != th) {
749 /* Add this buffer to be released by the owning thread later */
750 __kmp_bget_enqueue(th: bth, buf
751#ifdef USE_QUEUING_LOCK_FOR_BGET
752 ,
753 __kmp_gtid_from_thread(th)
754#endif
755 );
756 return;
757 }
758
759 /* Buffer size must be negative, indicating that the buffer is allocated. */
760 if (b->bh.bb.bsize >= 0) {
761 bn = NULL;
762 }
763 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
764
765 /* Back pointer in next buffer must be zero, indicating the same thing: */
766
767 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
768
769#if BufStats
770 thr->numrel++; /* Increment number of brel() calls */
771 thr->totalloc += (size_t)b->bh.bb.bsize;
772#endif
773
774 /* If the back link is nonzero, the previous buffer is free. */
775
776 if (b->bh.bb.prevfree != 0) {
777 /* The previous buffer is free. Consolidate this buffer with it by adding
778 the length of this buffer to the previous free buffer. Note that we
779 subtract the size in the buffer being released, since it's negative to
780 indicate that the buffer is allocated. */
781 bufsize size = b->bh.bb.bsize;
782
783 /* Make the previous buffer the one we're working on. */
784 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
785 b->bh.bb.prevfree);
786 b = BFH(((char *)b) - b->bh.bb.prevfree);
787 b->bh.bb.bsize -= size;
788
789 /* unlink the buffer from the old freelist */
790 __kmp_bget_remove_from_freelist(b);
791 } else {
792 /* The previous buffer isn't allocated. Mark this buffer size as positive
793 (i.e. free) and fall through to place the buffer on the free list as an
794 isolated free block. */
795 b->bh.bb.bsize = -b->bh.bb.bsize;
796 }
797
798 /* insert buffer back onto a new freelist */
799 __kmp_bget_insert_into_freelist(thr, b);
800
801 /* Now we look at the next buffer in memory, located by advancing from
802 the start of this buffer by its size, to see if that buffer is
803 free. If it is, we combine this buffer with the next one in
804 memory, dechaining the second buffer from the free list. */
805 bn = BFH(((char *)b) + b->bh.bb.bsize);
806 if (bn->bh.bb.bsize > 0) {
807
808 /* The buffer is free. Remove it from the free list and add
809 its size to that of our buffer. */
810 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
811 bn->bh.bb.bsize);
812
813 __kmp_bget_remove_from_freelist(b: bn);
814
815 b->bh.bb.bsize += bn->bh.bb.bsize;
816
817 /* unlink the buffer from the old freelist, and reinsert it into the new
818 * freelist */
819 __kmp_bget_remove_from_freelist(b);
820 __kmp_bget_insert_into_freelist(thr, b);
821
822 /* Finally, advance to the buffer that follows the newly
823 consolidated free block. We must set its backpointer to the
824 head of the consolidated free block. We know the next block
825 must be an allocated block because the process of recombination
826 guarantees that two free blocks will never be contiguous in
827 memory. */
828 bn = BFH(((char *)b) + b->bh.bb.bsize);
829 }
830#ifdef FreeWipe
831 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
832 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
833#endif
834 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
835
836 /* The next buffer is allocated. Set the backpointer in it to point
837 to this buffer; the previous free buffer in memory. */
838
839 bn->bh.bb.prevfree = b->bh.bb.bsize;
840
841 /* If a block-release function is defined, and this free buffer
842 constitutes the entire block, release it. Note that pool_len
843 is defined in such a way that the test will fail unless all
844 pool blocks are the same size. */
845 if (thr->relfcn != 0 &&
846 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
847#if BufStats
848 if (thr->numpblk !=
849 1) { /* Do not release the last buffer until finalization time */
850#endif
851
852 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
853 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
854 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
855 b->bh.bb.bsize);
856
857 /* Unlink the buffer from the free list */
858 __kmp_bget_remove_from_freelist(b);
859
860 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
861
862 (*thr->relfcn)(b);
863#if BufStats
864 thr->numprel++; /* Nr of expansion block releases */
865 thr->numpblk--; /* Total number of blocks */
866 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
867
868 // avoid leaving stale last_pool pointer around if it is being dealloced
869 if (thr->last_pool == b)
870 thr->last_pool = 0;
871 } else {
872 thr->last_pool = b;
873 }
874#endif /* BufStats */
875 }
876}
877
878/* BECTL -- Establish automatic pool expansion control */
879static void bectl(kmp_info_t *th, bget_compact_t compact,
880 bget_acquire_t acquire, bget_release_t release,
881 bufsize pool_incr) {
882 thr_data_t *thr = get_thr_data(th);
883
884 thr->compfcn = compact;
885 thr->acqfcn = acquire;
886 thr->relfcn = release;
887 thr->exp_incr = pool_incr;
888}
889
890/* BPOOL -- Add a region of memory to the buffer pool. */
891static void bpool(kmp_info_t *th, void *buf, bufsize len) {
892 /* int bin = 0; */
893 thr_data_t *thr = get_thr_data(th);
894 bfhead_t *b = BFH(buf);
895 bhead_t *bn;
896
897 __kmp_bget_dequeue(th); /* Release any queued buffers */
898
899#ifdef SizeQuant
900 len &= ~((bufsize)(SizeQuant - 1));
901#endif
902 if (thr->pool_len == 0) {
903 thr->pool_len = len;
904 } else if (len != thr->pool_len) {
905 thr->pool_len = -1;
906 }
907#if BufStats
908 thr->numpget++; /* Number of block acquisitions */
909 thr->numpblk++; /* Number of blocks total */
910 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
911#endif /* BufStats */
912
913 /* Since the block is initially occupied by a single free buffer,
914 it had better not be (much) larger than the largest buffer
915 whose size we can store in bhead.bb.bsize. */
916 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
917
918 /* Clear the backpointer at the start of the block to indicate that
919 there is no free block prior to this one. That blocks
920 recombination when the first block in memory is released. */
921 b->bh.bb.prevfree = 0;
922
923 /* Create a dummy allocated buffer at the end of the pool. This dummy
924 buffer is seen when a buffer at the end of the pool is released and
925 blocks recombination of the last buffer with the dummy buffer at
926 the end. The length in the dummy buffer is set to the largest
927 negative number to denote the end of the pool for diagnostic
928 routines (this specific value is not counted on by the actual
929 allocation and release functions). */
930 len -= sizeof(bhead_t);
931 b->bh.bb.bsize = (bufsize)len;
932 /* Set the owner of this buffer */
933 TCW_PTR(b->bh.bb.bthr,
934 (kmp_info_t *)((kmp_uintptr_t)th |
935 1)); // mark the buffer as allocated address
936
937 /* Chain the new block to the free list. */
938 __kmp_bget_insert_into_freelist(thr, b);
939
940#ifdef FreeWipe
941 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
942 (size_t)(len - sizeof(bfhead_t)));
943#endif
944 bn = BH(((char *)b) + len);
945 bn->bb.prevfree = (bufsize)len;
946 /* Definition of ESent assumes two's complement! */
947 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
948
949 bn->bb.bsize = ESent;
950}
951
952/* BFREED -- Dump the free lists for this thread. */
953static void bfreed(kmp_info_t *th) {
954 int bin = 0, count = 0;
955 int gtid = __kmp_gtid_from_thread(thr: th);
956 thr_data_t *thr = get_thr_data(th);
957
958#if BufStats
959 __kmp_printf_no_lock(format: "__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
960 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
961 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
962 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
963 " drel=%" KMP_INT64_SPEC "\n",
964 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
965 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
966 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
967 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
968#endif
969
970 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
971 bfhead_t *b;
972
973 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
974 b = b->ql.flink) {
975 bufsize bs = b->bh.bb.bsize;
976
977 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
978 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
979 KMP_DEBUG_ASSERT(bs > 0);
980
981 count += 1;
982
983 __kmp_printf_no_lock(
984 format: "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
985 (long)bs);
986#ifdef FreeWipe
987 {
988 char *lerr = ((char *)b) + sizeof(bfhead_t);
989 if ((bs > sizeof(bfhead_t)) &&
990 ((*lerr != 0x55) ||
991 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
992 0))) {
993 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
994 "free block have been overstored.)\n",
995 gtid);
996 }
997 }
998#endif
999 }
1000 }
1001
1002 if (count == 0)
1003 __kmp_printf_no_lock(format: "__kmp_printpool: T#%d No free blocks\n", gtid);
1004}
1005
1006void __kmp_initialize_bget(kmp_info_t *th) {
1007 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
1008
1009 set_thr_data(th);
1010
1011 bectl(th, compact: (bget_compact_t)0, acquire: (bget_acquire_t)malloc, release: (bget_release_t)free,
1012 pool_incr: (bufsize)__kmp_malloc_pool_incr);
1013}
1014
1015void __kmp_finalize_bget(kmp_info_t *th) {
1016 thr_data_t *thr;
1017 bfhead_t *b;
1018
1019 KMP_DEBUG_ASSERT(th != 0);
1020
1021#if BufStats
1022 thr = (thr_data_t *)th->th.th_local.bget_data;
1023 KMP_DEBUG_ASSERT(thr != NULL);
1024 b = thr->last_pool;
1025
1026 /* If a block-release function is defined, and this free buffer constitutes
1027 the entire block, release it. Note that pool_len is defined in such a way
1028 that the test will fail unless all pool blocks are the same size. */
1029
1030 // Deallocate the last pool if one exists because we no longer do it in brel()
1031 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1032 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1033 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1034 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1035 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1036 b->bh.bb.bsize);
1037
1038 /* Unlink the buffer from the free list */
1039 __kmp_bget_remove_from_freelist(b);
1040
1041 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1042
1043 (*thr->relfcn)(b);
1044 thr->numprel++; /* Nr of expansion block releases */
1045 thr->numpblk--; /* Total number of blocks */
1046 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1047 }
1048#endif /* BufStats */
1049
1050 /* Deallocate bget_data */
1051 if (th->th.th_local.bget_data != NULL) {
1052 __kmp_free(th->th.th_local.bget_data);
1053 th->th.th_local.bget_data = NULL;
1054 }
1055}
1056
1057void kmpc_set_poolsize(size_t size) {
1058 bectl(__kmp_get_thread(), compact: (bget_compact_t)0, acquire: (bget_acquire_t)malloc,
1059 release: (bget_release_t)free, pool_incr: (bufsize)size);
1060}
1061
1062size_t kmpc_get_poolsize(void) {
1063 thr_data_t *p;
1064
1065 p = get_thr_data(__kmp_get_thread());
1066
1067 return p->exp_incr;
1068}
1069
1070void kmpc_set_poolmode(int mode) {
1071 thr_data_t *p;
1072
1073 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1074 mode == bget_mode_best) {
1075 p = get_thr_data(__kmp_get_thread());
1076 p->mode = (bget_mode_t)mode;
1077 }
1078}
1079
1080int kmpc_get_poolmode(void) {
1081 thr_data_t *p;
1082
1083 p = get_thr_data(__kmp_get_thread());
1084
1085 return p->mode;
1086}
1087
1088void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1089 kmp_info_t *th = __kmp_get_thread();
1090 bufsize a, b;
1091
1092 __kmp_bget_dequeue(th); /* Release any queued buffers */
1093
1094 bcheck(th, max_free: &a, total_free: &b);
1095
1096 *maxmem = a;
1097 *allmem = b;
1098}
1099
1100void kmpc_poolprint(void) {
1101 kmp_info_t *th = __kmp_get_thread();
1102
1103 __kmp_bget_dequeue(th); /* Release any queued buffers */
1104
1105 bfreed(th);
1106}
1107
1108#endif // #if KMP_USE_BGET
1109
1110void *kmpc_malloc(size_t size) {
1111 void *ptr;
1112 ptr = bget(th: __kmp_entry_thread(), requested_size: (bufsize)(size + sizeof(ptr)));
1113 if (ptr != NULL) {
1114 // save allocated pointer just before one returned to user
1115 *(void **)ptr = ptr;
1116 ptr = (void **)ptr + 1;
1117 }
1118 return ptr;
1119}
1120
1121#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1122
1123void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1124 void *ptr;
1125 void *ptr_allocated;
1126 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1127 if (!IS_POWER_OF_TWO(alignment)) {
1128 // AC: do we need to issue a warning here?
1129 errno = EINVAL;
1130 return NULL;
1131 }
1132 size = size + sizeof(void *) + alignment;
1133 ptr_allocated = bget(th: __kmp_entry_thread(), requested_size: (bufsize)size);
1134 if (ptr_allocated != NULL) {
1135 // save allocated pointer just before one returned to user
1136 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1137 ~(alignment - 1));
1138 *((void **)ptr - 1) = ptr_allocated;
1139 } else {
1140 ptr = NULL;
1141 }
1142 return ptr;
1143}
1144
1145void *kmpc_calloc(size_t nelem, size_t elsize) {
1146 void *ptr;
1147 ptr = bgetz(th: __kmp_entry_thread(), size: (bufsize)(nelem * elsize + sizeof(ptr)));
1148 if (ptr != NULL) {
1149 // save allocated pointer just before one returned to user
1150 *(void **)ptr = ptr;
1151 ptr = (void **)ptr + 1;
1152 }
1153 return ptr;
1154}
1155
1156void *kmpc_realloc(void *ptr, size_t size) {
1157 void *result = NULL;
1158 if (ptr == NULL) {
1159 // If pointer is NULL, realloc behaves like malloc.
1160 result = bget(th: __kmp_entry_thread(), requested_size: (bufsize)(size + sizeof(ptr)));
1161 // save allocated pointer just before one returned to user
1162 if (result != NULL) {
1163 *(void **)result = result;
1164 result = (void **)result + 1;
1165 }
1166 } else if (size == 0) {
1167 // If size is 0, realloc behaves like free.
1168 // The thread must be registered by the call to kmpc_malloc() or
1169 // kmpc_calloc() before.
1170 // So it should be safe to call __kmp_get_thread(), not
1171 // __kmp_entry_thread().
1172 KMP_ASSERT(*((void **)ptr - 1));
1173 brel(__kmp_get_thread(), buf: *((void **)ptr - 1));
1174 } else {
1175 result = bgetr(th: __kmp_entry_thread(), buf: *((void **)ptr - 1),
1176 size: (bufsize)(size + sizeof(ptr)));
1177 if (result != NULL) {
1178 *(void **)result = result;
1179 result = (void **)result + 1;
1180 }
1181 }
1182 return result;
1183}
1184
1185// NOTE: the library must have already been initialized by a previous allocate
1186void kmpc_free(void *ptr) {
1187 if (!__kmp_init_serial) {
1188 return;
1189 }
1190 if (ptr != NULL) {
1191 kmp_info_t *th = __kmp_get_thread();
1192 __kmp_bget_dequeue(th); /* Release any queued buffers */
1193 // extract allocated pointer and free it
1194 KMP_ASSERT(*((void **)ptr - 1));
1195 brel(th, buf: *((void **)ptr - 1));
1196 }
1197}
1198
1199void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1200 void *ptr;
1201 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1202 (int)size KMP_SRC_LOC_PARM));
1203 ptr = bget(th, requested_size: (bufsize)size);
1204 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1205 return ptr;
1206}
1207
1208void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1209 size_t elsize KMP_SRC_LOC_DECL) {
1210 void *ptr;
1211 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1212 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1213 ptr = bgetz(th, size: (bufsize)(nelem * elsize));
1214 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1215 return ptr;
1216}
1217
1218void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1219 size_t size KMP_SRC_LOC_DECL) {
1220 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1221 ptr, (int)size KMP_SRC_LOC_PARM));
1222 ptr = bgetr(th, buf: ptr, size: (bufsize)size);
1223 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1224 return ptr;
1225}
1226
1227void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1228 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1229 ptr KMP_SRC_LOC_PARM));
1230 if (ptr != NULL) {
1231 __kmp_bget_dequeue(th); /* Release any queued buffers */
1232 brel(th, buf: ptr);
1233 }
1234 KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1235}
1236
1237/* OMP 5.0 Memory Management support */
1238static const char *kmp_mk_lib_name;
1239static void *h_memkind;
1240/* memkind experimental API: */
1241// memkind_alloc
1242static void *(*kmp_mk_alloc)(void *k, size_t sz);
1243// memkind_free
1244static void (*kmp_mk_free)(void *kind, void *ptr);
1245// memkind_check_available
1246static int (*kmp_mk_check)(void *kind);
1247// kinds we are going to use
1248static void **mk_default;
1249static void **mk_interleave;
1250static void **mk_hbw;
1251static void **mk_hbw_interleave;
1252static void **mk_hbw_preferred;
1253static void **mk_hugetlb;
1254static void **mk_hbw_hugetlb;
1255static void **mk_hbw_preferred_hugetlb;
1256static void **mk_dax_kmem;
1257static void **mk_dax_kmem_all;
1258static void **mk_dax_kmem_preferred;
1259static void *(*kmp_target_alloc_host)(size_t size, int device);
1260static void *(*kmp_target_alloc_shared)(size_t size, int device);
1261static void *(*kmp_target_alloc_device)(size_t size, int device);
1262static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1263static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1264static void *(*kmp_target_free_host)(void *ptr, int device);
1265static void *(*kmp_target_free_shared)(void *ptr, int device);
1266static void *(*kmp_target_free_device)(void *ptr, int device);
1267static bool __kmp_target_mem_available;
1268
1269#define KMP_IS_TARGET_MEM_SPACE(MS) \
1270 (MS == llvm_omp_target_host_mem_space || \
1271 MS == llvm_omp_target_shared_mem_space || \
1272 MS == llvm_omp_target_device_mem_space)
1273
1274#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1275 (MA == llvm_omp_target_host_mem_alloc || \
1276 MA == llvm_omp_target_shared_mem_alloc || \
1277 MA == llvm_omp_target_device_mem_alloc)
1278
1279#define KMP_IS_PREDEF_MEM_SPACE(MS) \
1280 (MS == omp_null_mem_space || MS == omp_default_mem_space || \
1281 MS == omp_large_cap_mem_space || MS == omp_const_mem_space || \
1282 MS == omp_high_bw_mem_space || MS == omp_low_lat_mem_space || \
1283 KMP_IS_TARGET_MEM_SPACE(MS))
1284
1285/// Support OMP 6.0 target memory management
1286/// Expected offload runtime entries.
1287///
1288/// Returns number of resources and list of unique resource IDs in "resouces".
1289/// Runtime needs to invoke this twice to get the number of resources, allocate
1290/// space for the resource IDs, and finally let offload runtime write resource
1291/// IDs in "resources".
1292/// int __tgt_get_mem_resources(int num_devices, const int *devices,
1293/// int host_access, omp_memspace_handle_t memspace,
1294/// int *resources);
1295///
1296/// Redirects omp_alloc call to offload runtime.
1297/// void *__tgt_omp_alloc(size_t size, omp_allocator_handle_t allocator);
1298///
1299/// Redirects omp_free call to offload runtime.
1300/// void __tgt_omp_free(void *ptr, omp_allocator_handle_t);
1301class kmp_tgt_allocator_t {
1302 bool supported = false;
1303 using get_mem_resources_t = int (*)(int, const int *, int,
1304 omp_memspace_handle_t, int *);
1305 using omp_alloc_t = void *(*)(size_t, omp_allocator_handle_t);
1306 using omp_free_t = void (*)(void *, omp_allocator_handle_t);
1307 get_mem_resources_t tgt_get_mem_resources = nullptr;
1308 omp_alloc_t tgt_omp_alloc = nullptr;
1309 omp_free_t tgt_omp_free = nullptr;
1310
1311public:
1312 /// Initialize interface with offload runtime
1313 void init() {
1314 tgt_get_mem_resources =
1315 (get_mem_resources_t)KMP_DLSYM("__tgt_get_mem_resources");
1316 tgt_omp_alloc = (omp_alloc_t)KMP_DLSYM("__tgt_omp_alloc");
1317 tgt_omp_free = (omp_free_t)KMP_DLSYM("__tgt_omp_free");
1318 supported = tgt_get_mem_resources && tgt_omp_alloc && tgt_omp_free;
1319 }
1320 /// Obtain resource information from offload runtime. We assume offload
1321 /// runtime backends maintain a list of unique resource IDS.
1322 int get_mem_resources(int ndevs, const int *devs, int host,
1323 omp_memspace_handle_t memspace, int *resources) {
1324 if (supported)
1325 return tgt_get_mem_resources(ndevs, devs, host, memspace, resources);
1326 return 0;
1327 }
1328 /// Invoke offload runtime's memory allocation routine
1329 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
1330 if (supported)
1331 return tgt_omp_alloc(size, allocator);
1332 return nullptr;
1333 }
1334 /// Invoke offload runtime's memory deallocation routine
1335 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
1336 if (supported)
1337 tgt_omp_free(ptr, allocator);
1338 }
1339} __kmp_tgt_allocator;
1340
1341extern "C" int omp_get_num_devices(void);
1342
1343/// Maintain a list of target memory spaces that are identified with the
1344/// requested information. There will be only one unique memory space object
1345/// that matches the input.
1346class kmp_tgt_memspace_list_t {
1347 kmp_memspace_t *memspace_list = nullptr;
1348 KMP_LOCK_INIT(mtx);
1349 /// Find memory space that matches the provided input
1350 kmp_memspace_t *find(int num_resources, const int *resources,
1351 omp_memspace_handle_t memspace) {
1352 kmp_memspace_t *ms = memspace_list;
1353 while (ms) {
1354 if (ms->num_resources == num_resources && ms->memspace == memspace &&
1355 !memcmp(s1: ms->resources, s2: resources, n: sizeof(int) * num_resources))
1356 break;
1357 ms = ms->next;
1358 }
1359 return ms;
1360 }
1361 /// Return memory space for the provided input. It tries to find existing
1362 /// memory space that exactly matches the provided input or create one if
1363 /// not found.
1364 omp_memspace_handle_t get(int num_resources, const int *resources,
1365 omp_memspace_handle_t memspace) {
1366 int gtid = __kmp_entry_gtid();
1367 __kmp_acquire_lock(lck: &mtx, gtid);
1368 // Sort absolute IDs in the resource list
1369 int *sorted_resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1370 KMP_MEMCPY(dest: sorted_resources, src: resources, n: num_resources * sizeof(int));
1371 qsort(base: sorted_resources, nmemb: (size_t)num_resources, size: sizeof(int),
1372 compar: [](const void *a, const void *b) {
1373 const int val_a = *(const int *)a;
1374 const int val_b = *(const int *)b;
1375 return (val_a > val_b) ? 1 : ((val_a < val_b) ? -1 : 0);
1376 });
1377 kmp_memspace_t *ms = find(num_resources, resources: sorted_resources, memspace);
1378 if (ms) {
1379 __kmp_free(sorted_resources);
1380 __kmp_release_lock(lck: &mtx, gtid);
1381 return ms;
1382 }
1383 ms = (kmp_memspace_t *)__kmp_allocate(sizeof(kmp_memspace_t));
1384 ms->memspace = memspace;
1385 ms->num_resources = num_resources;
1386 ms->resources = sorted_resources;
1387 ms->next = memspace_list;
1388 memspace_list = ms;
1389 __kmp_release_lock(lck: &mtx, gtid);
1390 return ms;
1391 }
1392
1393public:
1394 /// Initialize memory space list
1395 void init() { __kmp_init_lock(lck: &mtx); }
1396 /// Release resources for the memory space list
1397 void fini() {
1398 kmp_memspace_t *ms = memspace_list;
1399 while (ms) {
1400 if (ms->resources)
1401 __kmp_free(ms->resources);
1402 kmp_memspace_t *tmp = ms;
1403 ms = ms->next;
1404 __kmp_free(tmp);
1405 }
1406 __kmp_destroy_lock(lck: &mtx);
1407 }
1408 /// Return memory space for the provided input
1409 omp_memspace_handle_t get_memspace(int num_devices, const int *devices,
1410 int host_access,
1411 omp_memspace_handle_t memspace) {
1412 int actual_num_devices = num_devices;
1413 int *actual_devices = const_cast<int *>(devices);
1414 if (actual_num_devices == 0) {
1415 actual_num_devices = omp_get_num_devices();
1416 if (actual_num_devices <= 0)
1417 return omp_null_mem_space;
1418 }
1419 if (actual_devices == NULL) {
1420 // Prepare list of all devices in this case.
1421 actual_devices = (int *)__kmp_allocate(sizeof(int) * actual_num_devices);
1422 for (int i = 0; i < actual_num_devices; i++)
1423 actual_devices[i] = i;
1424 }
1425 // Get the number of available resources first
1426 int num_resources = __kmp_tgt_allocator.get_mem_resources(
1427 ndevs: actual_num_devices, devs: actual_devices, host: host_access, memspace, NULL);
1428 if (num_resources <= 0)
1429 return omp_null_mem_space; // No available resources
1430
1431 omp_memspace_handle_t ms = omp_null_mem_space;
1432 if (num_resources > 0) {
1433 int *resources = (int *)__kmp_allocate(sizeof(int) * num_resources);
1434 // Let offload runtime write the resource IDs
1435 num_resources = __kmp_tgt_allocator.get_mem_resources(
1436 ndevs: actual_num_devices, devs: actual_devices, host: host_access, memspace, resources);
1437 ms = get(num_resources, resources, memspace);
1438 __kmp_free(resources);
1439 }
1440 if (!devices && actual_devices)
1441 __kmp_free(actual_devices);
1442 return ms;
1443 }
1444 /// Return sub memory space from the parent memory space
1445 omp_memspace_handle_t get_memspace(int num_resources, const int *resources,
1446 omp_memspace_handle_t parent) {
1447 kmp_memspace_t *ms = (kmp_memspace_t *)parent;
1448 return get(num_resources, resources, memspace: ms->memspace);
1449 }
1450} __kmp_tgt_memspace_list;
1451
1452#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1453static inline void chk_kind(void ***pkind) {
1454 KMP_DEBUG_ASSERT(pkind);
1455 if (*pkind) // symbol found
1456 if (kmp_mk_check(**pkind)) // kind not available or error
1457 *pkind = NULL;
1458}
1459#endif
1460
1461void __kmp_init_memkind() {
1462// as of 2018-07-31 memkind does not support Windows*, exclude it for now
1463#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1464 // use of statically linked memkind is problematic, as it depends on libnuma
1465 kmp_mk_lib_name = "libmemkind.so";
1466 h_memkind = dlopen(file: kmp_mk_lib_name, RTLD_LAZY);
1467 if (h_memkind) {
1468 kmp_mk_check = (int (*)(void *))dlsym(handle: h_memkind, name: "memkind_check_available");
1469 kmp_mk_alloc =
1470 (void *(*)(void *, size_t))dlsym(handle: h_memkind, name: "memkind_malloc");
1471 kmp_mk_free = (void (*)(void *, void *))dlsym(handle: h_memkind, name: "memkind_free");
1472 mk_default = (void **)dlsym(handle: h_memkind, name: "MEMKIND_DEFAULT");
1473 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1474 !kmp_mk_check(*mk_default)) {
1475 __kmp_memkind_available = 1;
1476 mk_interleave = (void **)dlsym(handle: h_memkind, name: "MEMKIND_INTERLEAVE");
1477 chk_kind(pkind: &mk_interleave);
1478 mk_hbw = (void **)dlsym(handle: h_memkind, name: "MEMKIND_HBW");
1479 chk_kind(pkind: &mk_hbw);
1480 mk_hbw_interleave = (void **)dlsym(handle: h_memkind, name: "MEMKIND_HBW_INTERLEAVE");
1481 chk_kind(pkind: &mk_hbw_interleave);
1482 mk_hbw_preferred = (void **)dlsym(handle: h_memkind, name: "MEMKIND_HBW_PREFERRED");
1483 chk_kind(pkind: &mk_hbw_preferred);
1484 mk_hugetlb = (void **)dlsym(handle: h_memkind, name: "MEMKIND_HUGETLB");
1485 chk_kind(pkind: &mk_hugetlb);
1486 mk_hbw_hugetlb = (void **)dlsym(handle: h_memkind, name: "MEMKIND_HBW_HUGETLB");
1487 chk_kind(pkind: &mk_hbw_hugetlb);
1488 mk_hbw_preferred_hugetlb =
1489 (void **)dlsym(handle: h_memkind, name: "MEMKIND_HBW_PREFERRED_HUGETLB");
1490 chk_kind(pkind: &mk_hbw_preferred_hugetlb);
1491 mk_dax_kmem = (void **)dlsym(handle: h_memkind, name: "MEMKIND_DAX_KMEM");
1492 chk_kind(pkind: &mk_dax_kmem);
1493 mk_dax_kmem_all = (void **)dlsym(handle: h_memkind, name: "MEMKIND_DAX_KMEM_ALL");
1494 chk_kind(pkind: &mk_dax_kmem_all);
1495 mk_dax_kmem_preferred =
1496 (void **)dlsym(handle: h_memkind, name: "MEMKIND_DAX_KMEM_PREFERRED");
1497 chk_kind(pkind: &mk_dax_kmem_preferred);
1498 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1499 return; // success
1500 }
1501 dlclose(handle: h_memkind); // failure
1502 }
1503#else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1504 kmp_mk_lib_name = "";
1505#endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1506 h_memkind = NULL;
1507 kmp_mk_check = NULL;
1508 kmp_mk_alloc = NULL;
1509 kmp_mk_free = NULL;
1510 mk_default = NULL;
1511 mk_interleave = NULL;
1512 mk_hbw = NULL;
1513 mk_hbw_interleave = NULL;
1514 mk_hbw_preferred = NULL;
1515 mk_hugetlb = NULL;
1516 mk_hbw_hugetlb = NULL;
1517 mk_hbw_preferred_hugetlb = NULL;
1518 mk_dax_kmem = NULL;
1519 mk_dax_kmem_all = NULL;
1520 mk_dax_kmem_preferred = NULL;
1521}
1522
1523void __kmp_fini_memkind() {
1524#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1525 if (__kmp_memkind_available)
1526 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1527 if (h_memkind) {
1528 dlclose(handle: h_memkind);
1529 h_memkind = NULL;
1530 }
1531 kmp_mk_check = NULL;
1532 kmp_mk_alloc = NULL;
1533 kmp_mk_free = NULL;
1534 mk_default = NULL;
1535 mk_interleave = NULL;
1536 mk_hbw = NULL;
1537 mk_hbw_interleave = NULL;
1538 mk_hbw_preferred = NULL;
1539 mk_hugetlb = NULL;
1540 mk_hbw_hugetlb = NULL;
1541 mk_hbw_preferred_hugetlb = NULL;
1542 mk_dax_kmem = NULL;
1543 mk_dax_kmem_all = NULL;
1544 mk_dax_kmem_preferred = NULL;
1545#endif
1546}
1547
1548#if KMP_USE_HWLOC
1549static bool __kmp_is_hwloc_membind_supported(hwloc_membind_policy_t policy) {
1550#if HWLOC_API_VERSION >= 0x00020300
1551 const hwloc_topology_support *support;
1552 support = hwloc_topology_get_support(__kmp_hwloc_topology);
1553 if (support) {
1554 if (policy == HWLOC_MEMBIND_BIND)
1555 return (support->membind->alloc_membind &&
1556 support->membind->bind_membind);
1557 if (policy == HWLOC_MEMBIND_INTERLEAVE)
1558 return (support->membind->alloc_membind &&
1559 support->membind->interleave_membind);
1560 }
1561 return false;
1562#else
1563 return false;
1564#endif
1565}
1566
1567void *__kmp_hwloc_alloc_membind(hwloc_memattr_id_e attr, size_t size,
1568 hwloc_membind_policy_t policy) {
1569#if HWLOC_API_VERSION >= 0x00020300
1570 void *ptr = NULL;
1571 hwloc_obj_t node;
1572 struct hwloc_location initiator;
1573 int ret;
1574 // TODO: We should make this more efficient by getting rid of the OS syscall
1575 // 'hwloc_bitmap_alloc' and 'hwloc_get_cpubind' to get affinity and instead
1576 // use th_affin_mask field when it's capable of getting the underlying
1577 // mask implementation.
1578 hwloc_cpuset_t mask = hwloc_bitmap_alloc();
1579 ret = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
1580 if (ret < 0) {
1581 hwloc_bitmap_free(mask);
1582 return ptr;
1583 }
1584 initiator.type = KMP_HWLOC_LOCATION_TYPE_CPUSET;
1585 initiator.location.cpuset = mask;
1586 ret = hwloc_memattr_get_best_target(__kmp_hwloc_topology, attr, &initiator, 0,
1587 &node, NULL);
1588 if (ret < 0) {
1589 return ptr;
1590 }
1591 return hwloc_alloc_membind(__kmp_hwloc_topology, size, node->nodeset, policy,
1592 HWLOC_MEMBIND_BYNODESET);
1593#else
1594 return NULL;
1595#endif
1596}
1597
1598void *__kmp_hwloc_membind_policy(omp_memspace_handle_t ms, size_t size,
1599 hwloc_membind_policy_t policy) {
1600#if HWLOC_API_VERSION >= 0x00020300
1601 void *ptr = NULL;
1602 if (ms == omp_high_bw_mem_space) {
1603 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH, size, policy);
1604 } else if (ms == omp_large_cap_mem_space) {
1605 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY, size, policy);
1606 } else {
1607 ptr = hwloc_alloc(__kmp_hwloc_topology, size);
1608 }
1609 return ptr;
1610#else
1611 return NULL;
1612#endif
1613}
1614#endif // KMP_USE_HWLOC
1615
1616void __kmp_init_target_mem() {
1617 *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1618 *(void **)(&kmp_target_alloc_shared) =
1619 KMP_DLSYM("llvm_omp_target_alloc_shared");
1620 *(void **)(&kmp_target_alloc_device) =
1621 KMP_DLSYM("llvm_omp_target_alloc_device");
1622 *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1623 *(void **)(&kmp_target_free_shared) =
1624 KMP_DLSYM("llvm_omp_target_free_shared");
1625 *(void **)(&kmp_target_free_device) =
1626 KMP_DLSYM("llvm_omp_target_free_device");
1627 __kmp_target_mem_available =
1628 kmp_target_alloc_host && kmp_target_alloc_shared &&
1629 kmp_target_alloc_device && kmp_target_free_host &&
1630 kmp_target_free_shared && kmp_target_free_device;
1631 // lock/pin and unlock/unpin target calls
1632 *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1633 *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1634 __kmp_tgt_allocator.init();
1635 __kmp_tgt_memspace_list.init();
1636}
1637
1638/// Finalize target memory support
1639void __kmp_fini_target_mem() { __kmp_tgt_memspace_list.fini(); }
1640
1641omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1642 int ntraits,
1643 omp_alloctrait_t traits[]) {
1644 kmp_allocator_t *al;
1645 int i;
1646 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1647 al->memspace = ms; // not used currently
1648
1649 // Assign default values if applicable
1650 al->alignment = 1;
1651 al->pinned = false;
1652 al->partition = omp_atv_environment;
1653 al->pin_device = -1;
1654 al->preferred_device = -1;
1655 al->target_access = omp_atv_single;
1656 al->atomic_scope = omp_atv_device;
1657
1658 for (i = 0; i < ntraits; ++i) {
1659 switch (traits[i].key) {
1660 case omp_atk_sync_hint:
1661 case omp_atk_access:
1662 break;
1663 case omp_atk_pinned:
1664 al->pinned = true;
1665 break;
1666 case omp_atk_alignment:
1667 __kmp_type_convert(src: traits[i].value, dest: &(al->alignment));
1668 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1669 break;
1670 case omp_atk_pool_size:
1671 al->pool_size = traits[i].value;
1672 break;
1673 case omp_atk_fallback:
1674 al->fb = (omp_alloctrait_value_t)traits[i].value;
1675 KMP_DEBUG_ASSERT(
1676 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1677 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1678 break;
1679 case omp_atk_fb_data:
1680 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1681 break;
1682 case omp_atk_partition:
1683#if KMP_USE_HWLOC
1684 al->membind = (omp_alloctrait_value_t)traits[i].value;
1685 KMP_DEBUG_ASSERT(al->membind == omp_atv_environment ||
1686 al->membind == omp_atv_nearest ||
1687 al->membind == omp_atv_blocked ||
1688 al->membind == omp_atv_interleaved);
1689#endif
1690 al->memkind = RCAST(void **, traits[i].value);
1691 break;
1692 case omp_atk_pin_device:
1693 __kmp_type_convert(src: traits[i].value, dest: &(al->pin_device));
1694 break;
1695 case omp_atk_preferred_device:
1696 __kmp_type_convert(src: traits[i].value, dest: &(al->preferred_device));
1697 break;
1698 case omp_atk_target_access:
1699 al->target_access = (omp_alloctrait_value_t)traits[i].value;
1700 break;
1701 case omp_atk_atomic_scope:
1702 al->atomic_scope = (omp_alloctrait_value_t)traits[i].value;
1703 break;
1704 case omp_atk_part_size:
1705 __kmp_type_convert(src: traits[i].value, dest: &(al->part_size));
1706 break;
1707 default:
1708 KMP_ASSERT2(0, "Unexpected allocator trait");
1709 }
1710 }
1711
1712 if (al->memspace > kmp_max_mem_space) {
1713 // Memory space has been allocated for targets.
1714 return (omp_allocator_handle_t)al;
1715 }
1716
1717 KMP_DEBUG_ASSERT(KMP_IS_PREDEF_MEM_SPACE(al->memspace));
1718
1719 if (al->fb == 0) {
1720 // set default allocator
1721 al->fb = omp_atv_default_mem_fb;
1722 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1723 } else if (al->fb == omp_atv_allocator_fb) {
1724 KMP_ASSERT(al->fb_data != NULL);
1725 } else if (al->fb == omp_atv_default_mem_fb) {
1726 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1727 }
1728 if (__kmp_memkind_available) {
1729 // Let's use memkind library if available
1730 if (ms == omp_high_bw_mem_space) {
1731 if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1732 al->memkind = mk_hbw_interleave;
1733 } else if (mk_hbw_preferred) {
1734 // AC: do not try to use MEMKIND_HBW for now, because memkind library
1735 // cannot reliably detect exhaustion of HBW memory.
1736 // It could be possible using hbw_verify_memory_region() but memkind
1737 // manual says: "Using this function in production code may result in
1738 // serious performance penalty".
1739 al->memkind = mk_hbw_preferred;
1740 } else {
1741 // HBW is requested but not available --> return NULL allocator
1742 __kmp_free(al);
1743 return omp_null_allocator;
1744 }
1745 } else if (ms == omp_large_cap_mem_space) {
1746 if (mk_dax_kmem_all) {
1747 // All pmem nodes are visited
1748 al->memkind = mk_dax_kmem_all;
1749 } else if (mk_dax_kmem) {
1750 // Only closest pmem node is visited
1751 al->memkind = mk_dax_kmem;
1752 } else {
1753 __kmp_free(al);
1754 return omp_null_allocator;
1755 }
1756 } else {
1757 if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1758 al->memkind = mk_interleave;
1759 } else {
1760 al->memkind = mk_default;
1761 }
1762 }
1763 } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1764 __kmp_free(al);
1765 return omp_null_allocator;
1766 } else {
1767 if (!__kmp_hwloc_available &&
1768 (ms == omp_high_bw_mem_space || ms == omp_large_cap_mem_space)) {
1769 // cannot detect HBW memory presence without memkind library
1770 __kmp_free(al);
1771 return omp_null_allocator;
1772 }
1773 }
1774 return (omp_allocator_handle_t)al;
1775}
1776
1777void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1778 if (allocator > kmp_max_mem_alloc)
1779 __kmp_free(allocator);
1780}
1781
1782void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1783 if (allocator == omp_null_allocator)
1784 allocator = omp_default_mem_alloc;
1785 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1786}
1787
1788omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
1789 return __kmp_threads[gtid]->th.th_def_allocator;
1790}
1791
1792omp_memspace_handle_t __kmp_get_devices_memspace(int ndevs, const int *devs,
1793 omp_memspace_handle_t memspace,
1794 int host) {
1795 if (!__kmp_init_serial)
1796 __kmp_serial_initialize();
1797 // Only accept valid device description and predefined memory space
1798 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1799 return omp_null_mem_space;
1800
1801 return __kmp_tgt_memspace_list.get_memspace(num_devices: ndevs, devices: devs, host_access: host, memspace);
1802}
1803
1804omp_allocator_handle_t
1805__kmp_get_devices_allocator(int ndevs, const int *devs,
1806 omp_memspace_handle_t memspace, int host) {
1807 if (!__kmp_init_serial)
1808 __kmp_serial_initialize();
1809 // Only accept valid device description and predefined memory space
1810 if (ndevs < 0 || (ndevs > 0 && !devs) || memspace > kmp_max_mem_space)
1811 return omp_null_allocator;
1812
1813 omp_memspace_handle_t mspace =
1814 __kmp_get_devices_memspace(ndevs, devs, memspace, host);
1815 if (mspace == omp_null_mem_space)
1816 return omp_null_allocator;
1817
1818 return __kmpc_init_allocator(__kmp_entry_gtid(), ms: mspace, ntraits: 0, NULL);
1819}
1820
1821int __kmp_get_memspace_num_resources(omp_memspace_handle_t memspace) {
1822 if (!__kmp_init_serial)
1823 __kmp_serial_initialize();
1824 if (memspace == omp_null_mem_space)
1825 return 0;
1826 if (memspace < kmp_max_mem_space)
1827 return 1; // return 1 for predefined memory space
1828 kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1829 return ms->num_resources;
1830}
1831
1832omp_memspace_handle_t __kmp_get_submemspace(omp_memspace_handle_t memspace,
1833 int num_resources, int *resources) {
1834 if (!__kmp_init_serial)
1835 __kmp_serial_initialize();
1836 if (memspace == omp_null_mem_space || memspace < kmp_max_mem_space)
1837 return memspace; // return input memory space for predefined memory space
1838 kmp_memspace_t *ms = (kmp_memspace_t *)memspace;
1839 if (num_resources == 0 || ms->num_resources < num_resources || !resources)
1840 return omp_null_mem_space; // input memory space cannot satisfy the request
1841
1842 // The stored resource ID is an absolute ID only known to the offload backend,
1843 // and the returned memory space will still keep the property.
1844 int *resources_abs = (int *)__kmp_allocate(sizeof(int) * num_resources);
1845
1846 // Collect absolute resource ID from the relative ID
1847 for (int i = 0; i < num_resources; i++)
1848 resources_abs[i] = ms->resources[resources[i]];
1849
1850 omp_memspace_handle_t submemspace = __kmp_tgt_memspace_list.get_memspace(
1851 num_resources, resources: resources_abs, parent: memspace);
1852 __kmp_free(resources_abs);
1853
1854 return submemspace;
1855}
1856
1857typedef struct kmp_mem_desc { // Memory block descriptor
1858 void *ptr_alloc; // Pointer returned by allocator
1859 size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1860 size_t size_orig; // Original size requested
1861 void *ptr_align; // Pointer to aligned memory, returned
1862 kmp_allocator_t *allocator; // allocator
1863} kmp_mem_desc_t;
1864static int alignment = sizeof(void *); // align to pointer size by default
1865
1866// external interfaces are wrappers over internal implementation
1867void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1868 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1869 void *ptr = __kmp_alloc(gtid, align: 0, sz: size, al: allocator);
1870 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1871 return ptr;
1872}
1873
1874void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1875 omp_allocator_handle_t allocator) {
1876 KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1877 (int)size, allocator));
1878 void *ptr = __kmp_alloc(gtid, align: algn, sz: size, al: allocator);
1879 KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1880 return ptr;
1881}
1882
1883void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1884 omp_allocator_handle_t allocator) {
1885 KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1886 (int)size, allocator));
1887 void *ptr = __kmp_calloc(gtid, align: 0, nmemb, sz: size, al: allocator);
1888 KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1889 return ptr;
1890}
1891
1892void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1893 omp_allocator_handle_t allocator,
1894 omp_allocator_handle_t free_allocator) {
1895 KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1896 allocator, free_allocator));
1897 void *nptr = __kmp_realloc(gtid, ptr, sz: size, al: allocator, free_al: free_allocator);
1898 KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1899 return nptr;
1900}
1901
1902void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1903 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1904 ___kmpc_free(gtid, ptr, al: allocator);
1905 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1906 return;
1907}
1908
1909// internal implementation, called from inside the library
1910void *__kmp_alloc(int gtid, size_t algn, size_t size,
1911 omp_allocator_handle_t allocator) {
1912 void *ptr = NULL;
1913 kmp_allocator_t *al;
1914 KMP_DEBUG_ASSERT(__kmp_init_serial);
1915 if (size == 0)
1916 return NULL;
1917 if (allocator == omp_null_allocator)
1918 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1919 kmp_int32 default_device =
1920 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1921
1922 al = RCAST(kmp_allocator_t *, allocator);
1923
1924 int sz_desc = sizeof(kmp_mem_desc_t);
1925 kmp_mem_desc_t desc;
1926 kmp_uintptr_t addr; // address returned by allocator
1927 kmp_uintptr_t addr_align; // address to return to caller
1928 kmp_uintptr_t addr_descr; // address of memory block descriptor
1929 size_t align = alignment; // default alignment
1930 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1931 align = al->alignment; // alignment required by allocator trait
1932 if (align < algn)
1933 align = algn; // max of allocator trait, parameter and sizeof(void*)
1934 desc.size_orig = size;
1935 desc.size_a = size + sz_desc + align;
1936 bool is_pinned = false;
1937 if (allocator > kmp_max_mem_alloc)
1938 is_pinned = al->pinned;
1939
1940 // Use default allocator if hwloc and libmemkind are not available
1941 int use_default_allocator =
1942 (!__kmp_hwloc_available && !__kmp_memkind_available);
1943
1944 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
1945 // Memspace has been allocated for targets.
1946 return __kmp_tgt_allocator.omp_alloc(size, allocator);
1947 }
1948
1949 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1950 // Use size input directly as the memory may not be accessible on host.
1951 // Use default device for now.
1952 if (__kmp_target_mem_available) {
1953 kmp_int32 device =
1954 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1955 if (allocator == llvm_omp_target_host_mem_alloc)
1956 ptr = kmp_target_alloc_host(size, device);
1957 else if (allocator == llvm_omp_target_shared_mem_alloc)
1958 ptr = kmp_target_alloc_shared(size, device);
1959 else // allocator == llvm_omp_target_device_mem_alloc
1960 ptr = kmp_target_alloc_device(size, device);
1961 return ptr;
1962 } else {
1963 KMP_INFORM(TargetMemNotAvailable);
1964 }
1965 }
1966
1967 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1968 if (__kmp_target_mem_available) {
1969 kmp_int32 device =
1970 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1971 if (al->memspace == llvm_omp_target_host_mem_space)
1972 ptr = kmp_target_alloc_host(size, device);
1973 else if (al->memspace == llvm_omp_target_shared_mem_space)
1974 ptr = kmp_target_alloc_shared(size, device);
1975 else // al->memspace == llvm_omp_target_device_mem_space
1976 ptr = kmp_target_alloc_device(size, device);
1977 return ptr;
1978 } else {
1979 KMP_INFORM(TargetMemNotAvailable);
1980 }
1981 }
1982
1983#if KMP_USE_HWLOC
1984 if (__kmp_hwloc_available) {
1985 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_BIND)) {
1986 if (allocator < kmp_max_mem_alloc) {
1987 // pre-defined allocator
1988 if (allocator == omp_high_bw_mem_alloc) {
1989 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_BANDWIDTH,
1990 desc.size_a, HWLOC_MEMBIND_BIND);
1991 if (ptr == NULL)
1992 use_default_allocator = true;
1993 } else if (allocator == omp_large_cap_mem_alloc) {
1994 ptr = __kmp_hwloc_alloc_membind(HWLOC_MEMATTR_ID_CAPACITY,
1995 desc.size_a, HWLOC_MEMBIND_BIND);
1996 if (ptr == NULL)
1997 use_default_allocator = true;
1998 } else {
1999 use_default_allocator = true;
2000 }
2001 if (use_default_allocator) {
2002 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2003 }
2004 } else if (al->pool_size > 0) {
2005 // custom allocator with pool size requested
2006 kmp_uint64 used =
2007 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2008 if (used + desc.size_a > al->pool_size) {
2009 // not enough space, need to go fallback path
2010 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2011 if (al->fb == omp_atv_default_mem_fb) {
2012 al = (kmp_allocator_t *)omp_default_mem_alloc;
2013 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2014 } else if (al->fb == omp_atv_abort_fb) {
2015 KMP_ASSERT(0); // abort fallback requested
2016 } else if (al->fb == omp_atv_allocator_fb) {
2017 KMP_ASSERT(al != al->fb_data);
2018 al = al->fb_data;
2019 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2020 } // else ptr == NULL;
2021 } else {
2022 // pool has enough space
2023 if (al->membind == omp_atv_interleaved) {
2024 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2025 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2026 HWLOC_MEMBIND_INTERLEAVE);
2027 }
2028 } else if (al->membind == omp_atv_environment) {
2029 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2030 HWLOC_MEMBIND_DEFAULT);
2031 } else {
2032 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2033 }
2034 if (ptr == NULL) {
2035 if (al->fb == omp_atv_default_mem_fb) {
2036 al = (kmp_allocator_t *)omp_default_mem_alloc;
2037 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2038 } else if (al->fb == omp_atv_abort_fb) {
2039 KMP_ASSERT(0); // abort fallback requested
2040 } else if (al->fb == omp_atv_allocator_fb) {
2041 KMP_ASSERT(al != al->fb_data);
2042 al = al->fb_data;
2043 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2044 }
2045 }
2046 }
2047 } else {
2048 // custom allocator, pool size not requested
2049 if (al->membind == omp_atv_interleaved) {
2050 if (__kmp_is_hwloc_membind_supported(HWLOC_MEMBIND_INTERLEAVE)) {
2051 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2052 HWLOC_MEMBIND_INTERLEAVE);
2053 }
2054 } else if (al->membind == omp_atv_environment) {
2055 ptr = __kmp_hwloc_membind_policy(al->memspace, desc.size_a,
2056 HWLOC_MEMBIND_DEFAULT);
2057 } else {
2058 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2059 }
2060 if (ptr == NULL) {
2061 if (al->fb == omp_atv_default_mem_fb) {
2062 al = (kmp_allocator_t *)omp_default_mem_alloc;
2063 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2064 } else if (al->fb == omp_atv_abort_fb) {
2065 KMP_ASSERT(0); // abort fallback requested
2066 } else if (al->fb == omp_atv_allocator_fb) {
2067 KMP_ASSERT(al != al->fb_data);
2068 al = al->fb_data;
2069 return __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
2070 }
2071 }
2072 }
2073 } else { // alloc membind not supported, use hwloc_alloc
2074 ptr = hwloc_alloc(__kmp_hwloc_topology, desc.size_a);
2075 }
2076 } else {
2077#endif
2078 if (__kmp_memkind_available) {
2079 if (allocator < kmp_max_mem_alloc) {
2080 // pre-defined allocator
2081 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2082 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
2083 } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2084 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
2085 } else {
2086 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2087 }
2088 } else if (al->pool_size > 0) {
2089 // custom allocator with pool size requested
2090 kmp_uint64 used =
2091 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2092 if (used + desc.size_a > al->pool_size) {
2093 // not enough space, need to go fallback path
2094 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2095 if (al->fb == omp_atv_default_mem_fb) {
2096 al = (kmp_allocator_t *)omp_default_mem_alloc;
2097 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2098 } else if (al->fb == omp_atv_abort_fb) {
2099 KMP_ASSERT(0); // abort fallback requested
2100 } else if (al->fb == omp_atv_allocator_fb) {
2101 KMP_ASSERT(al != al->fb_data);
2102 al = al->fb_data;
2103 ptr = __kmp_alloc(gtid, algn, size, allocator: (omp_allocator_handle_t)al);
2104 if (is_pinned && kmp_target_lock_mem)
2105 kmp_target_lock_mem(ptr, size, default_device);
2106 return ptr;
2107 } // else ptr == NULL;
2108 } else {
2109 // pool has enough space
2110 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2111 if (ptr == NULL) {
2112 if (al->fb == omp_atv_default_mem_fb) {
2113 al = (kmp_allocator_t *)omp_default_mem_alloc;
2114 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2115 } else if (al->fb == omp_atv_abort_fb) {
2116 KMP_ASSERT(0); // abort fallback requested
2117 } else if (al->fb == omp_atv_allocator_fb) {
2118 KMP_ASSERT(al != al->fb_data);
2119 al = al->fb_data;
2120 ptr = __kmp_alloc(gtid, algn, size, allocator: (omp_allocator_handle_t)al);
2121 if (is_pinned && kmp_target_lock_mem)
2122 kmp_target_lock_mem(ptr, size, default_device);
2123 return ptr;
2124 }
2125 }
2126 }
2127 } else {
2128 // custom allocator, pool size not requested
2129 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
2130 if (ptr == NULL) {
2131 if (al->fb == omp_atv_default_mem_fb) {
2132 al = (kmp_allocator_t *)omp_default_mem_alloc;
2133 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
2134 } else if (al->fb == omp_atv_abort_fb) {
2135 KMP_ASSERT(0); // abort fallback requested
2136 } else if (al->fb == omp_atv_allocator_fb) {
2137 KMP_ASSERT(al != al->fb_data);
2138 al = al->fb_data;
2139 ptr = __kmp_alloc(gtid, algn, size, allocator: (omp_allocator_handle_t)al);
2140 if (is_pinned && kmp_target_lock_mem)
2141 kmp_target_lock_mem(ptr, size, default_device);
2142 return ptr;
2143 }
2144 }
2145 }
2146 } else if (allocator < kmp_max_mem_alloc) {
2147 // pre-defined allocator
2148 if (allocator == omp_high_bw_mem_alloc) {
2149 KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
2150 } else if (allocator == omp_large_cap_mem_alloc) {
2151 KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
2152 } else if (allocator == omp_const_mem_alloc) {
2153 KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
2154 } else if (allocator == omp_low_lat_mem_alloc) {
2155 KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
2156 } else if (allocator == omp_cgroup_mem_alloc) {
2157 KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
2158 } else if (allocator == omp_pteam_mem_alloc) {
2159 KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
2160 } else if (allocator == omp_thread_mem_alloc) {
2161 KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
2162 } else { // default allocator requested
2163 use_default_allocator = true;
2164 }
2165 if (use_default_allocator) {
2166 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2167 use_default_allocator = false;
2168 }
2169 } else if (al->pool_size > 0) {
2170 // custom allocator with pool size requested
2171 kmp_uint64 used =
2172 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
2173 if (used + desc.size_a > al->pool_size) {
2174 // not enough space, need to go fallback path
2175 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2176 if (al->fb == omp_atv_default_mem_fb) {
2177 al = (kmp_allocator_t *)omp_default_mem_alloc;
2178 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2179 } else if (al->fb == omp_atv_abort_fb) {
2180 KMP_ASSERT(0); // abort fallback requested
2181 } else if (al->fb == omp_atv_allocator_fb) {
2182 KMP_ASSERT(al != al->fb_data);
2183 al = al->fb_data;
2184 ptr = __kmp_alloc(gtid, algn, size, allocator: (omp_allocator_handle_t)al);
2185 if (is_pinned && kmp_target_lock_mem)
2186 kmp_target_lock_mem(ptr, size, default_device);
2187 return ptr;
2188 } // else ptr == NULL
2189 } else {
2190 // pool has enough space
2191 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2192 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2193 KMP_ASSERT(0); // abort fallback requested
2194 } // no sense to look for another fallback because of same internal
2195 // alloc
2196 }
2197 } else {
2198 // custom allocator, pool size not requested
2199 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
2200 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
2201 KMP_ASSERT(0); // abort fallback requested
2202 } // no sense to look for another fallback because of same internal alloc
2203 }
2204#if KMP_USE_HWLOC
2205 }
2206#endif
2207 KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
2208 if (ptr == NULL)
2209 return NULL;
2210
2211 if (is_pinned && kmp_target_lock_mem)
2212 kmp_target_lock_mem(ptr, desc.size_a, default_device);
2213
2214 addr = (kmp_uintptr_t)ptr;
2215 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
2216 addr_descr = addr_align - sz_desc;
2217
2218 desc.ptr_alloc = ptr;
2219 desc.ptr_align = (void *)addr_align;
2220 desc.allocator = al;
2221 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
2222 KMP_MB();
2223
2224 return desc.ptr_align;
2225}
2226
2227void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
2228 omp_allocator_handle_t allocator) {
2229 void *ptr = NULL;
2230 kmp_allocator_t *al;
2231 KMP_DEBUG_ASSERT(__kmp_init_serial);
2232
2233 if (allocator == omp_null_allocator)
2234 allocator = __kmp_threads[gtid]->th.th_def_allocator;
2235
2236 al = RCAST(kmp_allocator_t *, allocator);
2237
2238 if (nmemb == 0 || size == 0)
2239 return ptr;
2240
2241 if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
2242 if (al->fb == omp_atv_abort_fb) {
2243 KMP_ASSERT(0);
2244 }
2245 return ptr;
2246 }
2247
2248 ptr = __kmp_alloc(gtid, algn, size: nmemb * size, allocator);
2249
2250 if (ptr) {
2251 memset(s: ptr, c: 0x00, n: nmemb * size);
2252 }
2253 return ptr;
2254}
2255
2256void *__kmp_realloc(int gtid, void *ptr, size_t size,
2257 omp_allocator_handle_t allocator,
2258 omp_allocator_handle_t free_allocator) {
2259 void *nptr = NULL;
2260 KMP_DEBUG_ASSERT(__kmp_init_serial);
2261
2262 if (size == 0) {
2263 if (ptr != NULL)
2264 ___kmpc_free(gtid, ptr, al: free_allocator);
2265 return nptr;
2266 }
2267
2268 nptr = __kmp_alloc(gtid, algn: 0, size, allocator);
2269
2270 if (nptr != NULL && ptr != NULL) {
2271 kmp_mem_desc_t desc;
2272 kmp_uintptr_t addr_align; // address to return to caller
2273 kmp_uintptr_t addr_descr; // address of memory block descriptor
2274
2275 addr_align = (kmp_uintptr_t)ptr;
2276 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2277 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2278
2279 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2280 KMP_DEBUG_ASSERT(desc.size_orig > 0);
2281 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
2282 KMP_MEMCPY(dest: (char *)nptr, src: (char *)ptr,
2283 n: (size_t)((size < desc.size_orig) ? size : desc.size_orig));
2284 }
2285
2286 if (nptr != NULL) {
2287 ___kmpc_free(gtid, ptr, al: free_allocator);
2288 }
2289
2290 return nptr;
2291}
2292
2293void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
2294 if (ptr == NULL)
2295 return;
2296
2297 kmp_allocator_t *al;
2298 omp_allocator_handle_t oal;
2299 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
2300 kmp_mem_desc_t desc;
2301 kmp_uintptr_t addr_align; // address to return to caller
2302 kmp_uintptr_t addr_descr; // address of memory block descriptor
2303
2304 if (al > kmp_max_mem_alloc && al->memspace > kmp_max_mem_space) {
2305 __kmp_tgt_allocator.omp_free(ptr, allocator);
2306 return;
2307 }
2308
2309 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
2310 (allocator > kmp_max_mem_alloc &&
2311 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
2312 kmp_int32 device =
2313 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2314 if (allocator == llvm_omp_target_host_mem_alloc) {
2315 kmp_target_free_host(ptr, device);
2316 } else if (allocator == llvm_omp_target_shared_mem_alloc) {
2317 kmp_target_free_shared(ptr, device);
2318 } else if (allocator == llvm_omp_target_device_mem_alloc) {
2319 kmp_target_free_device(ptr, device);
2320 }
2321 return;
2322 }
2323
2324 addr_align = (kmp_uintptr_t)ptr;
2325 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
2326 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
2327
2328 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
2329 if (allocator) {
2330 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
2331 }
2332 al = desc.allocator;
2333 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
2334 KMP_DEBUG_ASSERT(al);
2335
2336 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
2337 kmp_int32 device =
2338 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
2339 kmp_target_unlock_mem(desc.ptr_alloc, device);
2340 }
2341
2342#if KMP_USE_HWLOC
2343 if (__kmp_hwloc_available) {
2344 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2345 kmp_uint64 used =
2346 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2347 (void)used; // to suppress compiler warning
2348 KMP_DEBUG_ASSERT(used >= desc.size_a);
2349 }
2350 hwloc_free(__kmp_hwloc_topology, desc.ptr_alloc, desc.size_a);
2351 } else {
2352#endif
2353 if (__kmp_memkind_available) {
2354 if (oal < kmp_max_mem_alloc) {
2355 // pre-defined allocator
2356 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
2357 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
2358 } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
2359 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
2360 } else {
2361 kmp_mk_free(*mk_default, desc.ptr_alloc);
2362 }
2363 } else {
2364 if (al->pool_size > 0) { // custom allocator with pool size requested
2365 kmp_uint64 used =
2366 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2367 (void)used; // to suppress compiler warning
2368 KMP_DEBUG_ASSERT(used >= desc.size_a);
2369 }
2370 kmp_mk_free(*al->memkind, desc.ptr_alloc);
2371 }
2372 } else {
2373 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
2374 kmp_uint64 used =
2375 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
2376 (void)used; // to suppress compiler warning
2377 KMP_DEBUG_ASSERT(used >= desc.size_a);
2378 }
2379 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
2380 }
2381#if KMP_USE_HWLOC
2382 }
2383#endif
2384}
2385
2386/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
2387 memory leaks, but it may be useful for debugging memory corruptions, used
2388 freed pointers, etc. */
2389/* #define LEAK_MEMORY */
2390struct kmp_mem_descr { // Memory block descriptor.
2391 void *ptr_allocated; // Pointer returned by malloc(), subject for free().
2392 size_t size_allocated; // Size of allocated memory block.
2393 void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
2394 size_t size_aligned; // Size of aligned memory block.
2395};
2396typedef struct kmp_mem_descr kmp_mem_descr_t;
2397
2398/* Allocate memory on requested boundary, fill allocated memory with 0x00.
2399 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2400 error. Must use __kmp_free when freeing memory allocated by this routine! */
2401static void *___kmp_allocate_align(size_t size,
2402 size_t alignment KMP_SRC_LOC_DECL) {
2403 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
2404 requested to return properly aligned pointer. Original pointer returned
2405 by malloc() and size of allocated block is saved in descriptor just
2406 before the aligned pointer. This information used by __kmp_free() -- it
2407 has to pass to free() original pointer, not aligned one.
2408
2409 +---------+------------+-----------------------------------+---------+
2410 | padding | descriptor | aligned block | padding |
2411 +---------+------------+-----------------------------------+---------+
2412 ^ ^
2413 | |
2414 | +- Aligned pointer returned to caller
2415 +- Pointer returned by malloc()
2416
2417 Aligned block is filled with zeros, paddings are filled with 0xEF. */
2418
2419 kmp_mem_descr_t descr;
2420 kmp_uintptr_t addr_allocated; // Address returned by malloc().
2421 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
2422 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
2423
2424 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
2425 (int)size, (int)alignment KMP_SRC_LOC_PARM));
2426
2427 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
2428 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
2429 // Make sure kmp_uintptr_t is enough to store addresses.
2430
2431 descr.size_aligned = size;
2432 descr.size_allocated =
2433 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
2434
2435#if KMP_DEBUG
2436 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
2437#else
2438 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
2439#endif
2440 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
2441 descr.ptr_allocated));
2442 if (descr.ptr_allocated == NULL) {
2443 KMP_FATAL(OutOfHeapMemory);
2444 }
2445
2446 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2447 addr_aligned =
2448 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
2449 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
2450
2451 descr.ptr_aligned = (void *)addr_aligned;
2452
2453 KE_TRACE(26, (" ___kmp_allocate_align: "
2454 "ptr_allocated=%p, size_allocated=%d, "
2455 "ptr_aligned=%p, size_aligned=%d\n",
2456 descr.ptr_allocated, (int)descr.size_allocated,
2457 descr.ptr_aligned, (int)descr.size_aligned));
2458
2459 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
2460 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
2461 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2462 addr_allocated + descr.size_allocated);
2463 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
2464#ifdef KMP_DEBUG
2465 memset(s: descr.ptr_allocated, c: 0xEF, n: descr.size_allocated);
2466// Fill allocated memory block with 0xEF.
2467#endif
2468 memset(s: descr.ptr_aligned, c: 0x00, n: descr.size_aligned);
2469 // Fill the aligned memory block (which is intended for using by caller) with
2470 // 0x00. Do not
2471 // put this filling under KMP_DEBUG condition! Many callers expect zeroed
2472 // memory. (Padding
2473 // bytes remain filled with 0xEF in debugging library.)
2474 *((kmp_mem_descr_t *)addr_descr) = descr;
2475
2476 KMP_MB();
2477
2478 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
2479 return descr.ptr_aligned;
2480} // func ___kmp_allocate_align
2481
2482/* Allocate memory on cache line boundary, fill allocated memory with 0x00.
2483 Do not call this func directly! Use __kmp_allocate macro instead.
2484 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2485 error. Must use __kmp_free when freeing memory allocated by this routine! */
2486void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
2487 void *ptr;
2488 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2489 (int)size KMP_SRC_LOC_PARM));
2490 ptr = ___kmp_allocate_align(size, alignment: __kmp_align_alloc KMP_SRC_LOC_PARM);
2491 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2492 return ptr;
2493} // func ___kmp_allocate
2494
2495/* Allocate memory on page boundary, fill allocated memory with 0x00.
2496 Does not call this func directly! Use __kmp_page_allocate macro instead.
2497 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2498 error. Must use __kmp_free when freeing memory allocated by this routine! */
2499void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
2500 int page_size = 8 * 1024;
2501 void *ptr;
2502
2503 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2504 (int)size KMP_SRC_LOC_PARM));
2505 ptr = ___kmp_allocate_align(size, alignment: page_size KMP_SRC_LOC_PARM);
2506 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2507 return ptr;
2508} // ___kmp_page_allocate
2509
2510/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2511 In debug mode, fill the memory block with 0xEF before call to free(). */
2512void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
2513 kmp_mem_descr_t descr;
2514#if KMP_DEBUG
2515 kmp_uintptr_t addr_allocated; // Address returned by malloc().
2516 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2517#endif
2518 KE_TRACE(25,
2519 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2520 KMP_ASSERT(ptr != NULL);
2521
2522 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2523
2524 KE_TRACE(26, (" __kmp_free: "
2525 "ptr_allocated=%p, size_allocated=%d, "
2526 "ptr_aligned=%p, size_aligned=%d\n",
2527 descr.ptr_allocated, (int)descr.size_allocated,
2528 descr.ptr_aligned, (int)descr.size_aligned));
2529#if KMP_DEBUG
2530 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2531 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2532 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2533 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2534 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2535 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2536 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2537 addr_allocated + descr.size_allocated);
2538 memset(s: descr.ptr_allocated, c: 0xEF, n: descr.size_allocated);
2539// Fill memory block with 0xEF, it helps catch using freed memory.
2540#endif
2541
2542#ifndef LEAK_MEMORY
2543 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2544#ifdef KMP_DEBUG
2545 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2546#else
2547 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2548#endif
2549#endif
2550 KMP_MB();
2551 KE_TRACE(25, ("<- __kmp_free() returns\n"));
2552} // func ___kmp_free
2553
2554#if USE_FAST_MEMORY == 3
2555// Allocate fast memory by first scanning the thread's free lists
2556// If a chunk the right size exists, grab it off the free list.
2557// Otherwise allocate normally using kmp_thread_malloc.
2558
2559// AC: How to choose the limit? Just get 16 for now...
2560#define KMP_FREE_LIST_LIMIT 16
2561
2562// Always use 128 bytes for determining buckets for caching memory blocks
2563#define DCACHE_LINE 128
2564
2565void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2566 void *ptr;
2567 size_t num_lines, idx;
2568 int index;
2569 void *alloc_ptr;
2570 size_t alloc_size;
2571 kmp_mem_descr_t *descr;
2572
2573 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2574 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
2575
2576 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2577 idx = num_lines - 1;
2578 KMP_DEBUG_ASSERT(idx >= 0);
2579 if (idx < 2) {
2580 index = 0; // idx is [ 0, 1 ], use first free list
2581 num_lines = 2; // 1, 2 cache lines or less than cache line
2582 } else if ((idx >>= 2) == 0) {
2583 index = 1; // idx is [ 2, 3 ], use second free list
2584 num_lines = 4; // 3, 4 cache lines
2585 } else if ((idx >>= 2) == 0) {
2586 index = 2; // idx is [ 4, 15 ], use third free list
2587 num_lines = 16; // 5, 6, ..., 16 cache lines
2588 } else if ((idx >>= 2) == 0) {
2589 index = 3; // idx is [ 16, 63 ], use fourth free list
2590 num_lines = 64; // 17, 18, ..., 64 cache lines
2591 } else {
2592 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2593 }
2594
2595 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2596 if (ptr != NULL) {
2597 // pop the head of no-sync free list
2598 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2599 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2600 sizeof(kmp_mem_descr_t)))
2601 ->ptr_aligned);
2602 goto end;
2603 }
2604 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2605 if (ptr != NULL) {
2606 // no-sync free list is empty, use sync free list (filled in by other
2607 // threads only)
2608 // pop the head of the sync free list, push NULL instead
2609 while (!KMP_COMPARE_AND_STORE_PTR(
2610 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2611 KMP_CPU_PAUSE();
2612 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2613 }
2614 // push the rest of chain into no-sync free list (can be NULL if there was
2615 // the only block)
2616 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2617 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2618 sizeof(kmp_mem_descr_t)))
2619 ->ptr_aligned);
2620 goto end;
2621 }
2622
2623alloc_call:
2624 // haven't found block in the free lists, thus allocate it
2625 size = num_lines * DCACHE_LINE;
2626
2627 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2628 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2629 "alloc_size %d\n",
2630 __kmp_gtid_from_thread(this_thr), alloc_size));
2631 alloc_ptr = bget(th: this_thr, requested_size: (bufsize)alloc_size);
2632
2633 // align ptr to DCACHE_LINE
2634 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2635 DCACHE_LINE) &
2636 ~(DCACHE_LINE - 1));
2637 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2638
2639 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2640 // we don't need size_allocated
2641 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2642 // (it is already saved in bget buffer,
2643 // but we may want to use another allocator in future)
2644 descr->size_aligned = size;
2645
2646end:
2647 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2648 __kmp_gtid_from_thread(this_thr), ptr));
2649 return ptr;
2650} // func __kmp_fast_allocate
2651
2652// Free fast memory and place it on the thread's free list if it is of
2653// the correct size.
2654void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2655 kmp_mem_descr_t *descr;
2656 kmp_info_t *alloc_thr;
2657 size_t size;
2658 size_t idx;
2659 int index;
2660
2661 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2662 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2663 KMP_ASSERT(ptr != NULL);
2664
2665 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2666
2667 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2668 (int)descr->size_aligned));
2669
2670 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2671
2672 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2673 if (idx == size) {
2674 index = 0; // 2 cache lines
2675 } else if ((idx <<= 1) == size) {
2676 index = 1; // 4 cache lines
2677 } else if ((idx <<= 2) == size) {
2678 index = 2; // 16 cache lines
2679 } else if ((idx <<= 2) == size) {
2680 index = 3; // 64 cache lines
2681 } else {
2682 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2683 goto free_call; // 65 or more cache lines ( > 8KB )
2684 }
2685
2686 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2687 if (alloc_thr == this_thr) {
2688 // push block to self no-sync free list, linking previous head (LIFO)
2689 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2690 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2691 } else {
2692 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2693 if (head == NULL) {
2694 // Create new free list
2695 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2696 *((void **)ptr) = NULL; // mark the tail of the list
2697 descr->size_allocated = (size_t)1; // head of the list keeps its length
2698 } else {
2699 // need to check existed "other" list's owner thread and size of queue
2700 kmp_mem_descr_t *dsc =
2701 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2702 // allocating thread, same for all queue nodes
2703 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2704 size_t q_sz =
2705 dsc->size_allocated + 1; // new size in case we add current task
2706 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2707 // we can add current task to "other" list, no sync needed
2708 *((void **)ptr) = head;
2709 descr->size_allocated = q_sz;
2710 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2711 } else {
2712 // either queue blocks owner is changing or size limit exceeded
2713 // return old queue to allocating thread (q_th) synchronously,
2714 // and start new list for alloc_thr's tasks
2715 void *old_ptr;
2716 void *tail = head;
2717 void *next = *((void **)head);
2718 while (next != NULL) {
2719 KMP_DEBUG_ASSERT(
2720 // queue size should decrease by 1 each step through the list
2721 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2722 ->size_allocated +
2723 1 ==
2724 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2725 ->size_allocated);
2726 tail = next; // remember tail node
2727 next = *((void **)next);
2728 }
2729 KMP_DEBUG_ASSERT(q_th != NULL);
2730 // push block to owner's sync free list
2731 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2732 /* the next pointer must be set before setting free_list to ptr to avoid
2733 exposing a broken list to other threads, even for an instant. */
2734 *((void **)tail) = old_ptr;
2735
2736 while (!KMP_COMPARE_AND_STORE_PTR(
2737 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2738 KMP_CPU_PAUSE();
2739 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2740 *((void **)tail) = old_ptr;
2741 }
2742
2743 // start new list of not-selt tasks
2744 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2745 *((void **)ptr) = NULL;
2746 descr->size_allocated = (size_t)1; // head of queue keeps its length
2747 }
2748 }
2749 }
2750 goto end;
2751
2752free_call:
2753 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2754 __kmp_gtid_from_thread(this_thr), size));
2755 __kmp_bget_dequeue(th: this_thr); /* Release any queued buffers */
2756 brel(th: this_thr, buf: descr->ptr_allocated);
2757
2758end:
2759 KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2760
2761} // func __kmp_fast_free
2762
2763// Initialize the thread free lists related to fast memory
2764// Only do this when a thread is initially created.
2765void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2766 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2767
2768 memset(s: this_thr->th.th_free_lists, c: 0, NUM_LISTS * sizeof(kmp_free_list_t));
2769}
2770
2771// Free the memory in the thread free lists related to fast memory
2772// Only do this when a thread is being reaped (destroyed).
2773void __kmp_free_fast_memory(kmp_info_t *th) {
2774 // Suppose we use BGET underlying allocator, walk through its structures...
2775 int bin;
2776 thr_data_t *thr = get_thr_data(th);
2777 void **lst = NULL;
2778
2779 KE_TRACE(
2780 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2781
2782 __kmp_bget_dequeue(th); // Release any queued buffers
2783
2784 // Dig through free lists and extract all allocated blocks
2785 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2786 bfhead_t *b = thr->freelist[bin].ql.flink;
2787 while (b != &thr->freelist[bin]) {
2788 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2789 *((void **)b) =
2790 lst; // link the list (override bthr, but keep flink yet)
2791 lst = (void **)b; // push b into lst
2792 }
2793 b = b->ql.flink; // get next buffer
2794 }
2795 }
2796 while (lst != NULL) {
2797 void *next = *lst;
2798 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2799 lst, next, th, __kmp_gtid_from_thread(th)));
2800 (*thr->relfcn)(lst);
2801#if BufStats
2802 // count blocks to prevent problems in __kmp_finalize_bget()
2803 thr->numprel++; /* Nr of expansion block releases */
2804 thr->numpblk--; /* Total number of blocks */
2805#endif
2806 lst = (void **)next;
2807 }
2808
2809 KE_TRACE(
2810 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2811}
2812
2813#endif // USE_FAST_MEMORY
2814

source code of openmp/runtime/src/kmp_alloc.cpp