kmp_threadprivate.cpp source code [openmp/runtime/src/kmp_threadprivate.cpp]

1	/*
2	* kmp_threadprivate.cpp -- OpenMP threadprivate support library
3	*/
4
5	//===----------------------------------------------------------------------===//
6	//
7	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8	// See https://llvm.org/LICENSE.txt for license information.
9	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "kmp.h"
14	#include "kmp_i18n.h"
15	#include "kmp_itt.h"
16
17	#define USE_CHECKS_COMMON
18
19	#define KMP_INLINE_SUBR 1
20
21	void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22	void *data_addr, size_t pc_size);
23	struct private_common kmp_threadprivate_insert(int* gtid, void *pc_addr,
24	void *data_addr,
25	size_t pc_size);
26
27	struct shared_table __kmp_threadprivate_d_table;
28
29	static
30	#ifdef KMP_INLINE_SUBR
31	__forceinline
32	#endif
33	struct private_common *
34	__kmp_threadprivate_find_task_common(struct common_table tbl, int* gtid,
35	void *pc_addr)
36
37	{
38	struct private_common *tn;
39
40	#ifdef KMP_TASK_COMMON_DEBUG
41	KC_TRACE(`10`, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42	"address %p\n",
43	gtid, pc_addr));
44	dump_list();
45	#endif
46
47	for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48	if (tn->gbl_addr == pc_addr) {
49	#ifdef KMP_TASK_COMMON_DEBUG
50	KC_TRACE(`10`, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51	"node %p on list\n",
52	gtid, pc_addr));
53	#endif
54	return tn;
55	}
56	}
57	return `0`;
58	}
59
60	static
61	#ifdef KMP_INLINE_SUBR
62	__forceinline
63	#endif
64	struct shared_common *
65	__kmp_find_shared_task_common(struct shared_table tbl, int* gtid,
66	void *pc_addr) {
67	struct shared_common *tn;
68
69	for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70	if (tn->gbl_addr == pc_addr) {
71	#ifdef KMP_TASK_COMMON_DEBUG
72	KC_TRACE(
73	`10`,
74	("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75	gtid, pc_addr));
76	#endif
77	return tn;
78	}
79	}
80	return `0`;
81	}
82
83	// Create a template for the data initialized storage. Either the template is
84	// NULL indicating zero fill, or the template is a copy of the original data.
85	static struct private_data __kmp_init_common_data(void* *pc_addr,
86	size_t pc_size) {
87	struct private_data *d;
88	size_t i;
89	char *p;
90
91	d = (struct private_data )__kmp_allocate(sizeof(struct* private_data));
92	/*
93	d->data = 0; // AC: commented out because __kmp_allocate zeroes the
94	memory
95	d->next = 0;
96	*/
97	d->size = pc_size;
98	d->more = `1`;
99
100	p = (char *)pc_addr;
101
102	for (i = pc_size; i > `0`; --i) {
103	if (*p++ != `'\0'`) {
104	d->data = __kmp_allocate(pc_size);
105	KMP_MEMCPY(dest: d->data, src: pc_addr, n: pc_size);
106	break;
107	}
108	}
109
110	return d;
111	}
112
113	// Initialize the data area from the template.
114	static void __kmp_copy_common_data(void pc_addr, struct* private_data *d) {
115	char addr = (char* *)pc_addr;
116
117	for (size_t offset = `0`; d != `0`; d = d->next) {
118	for (int i = d->more; i > `0`; --i) {
119	if (d->data == `0`)
120	memset(s: &addr[offset], c: `'\0'`, n: d->size);
121	else
122	KMP_MEMCPY(dest: &addr[offset], src: d->data, n: d->size);
123	offset += d->size;
124	}
125	}
126	}
127
128	/ we are called from __kmp_serial_initialize() with __kmp_initz_lock held. /
129	void __kmp_common_initialize(void) {
130	if (!TCR_4(__kmp_init_common)) {
131	int q;
132	#ifdef KMP_DEBUG
133	int gtid;
134	#endif
135
136	__kmp_threadpriv_cache_list = NULL;
137
138	#ifdef KMP_DEBUG
139	/ verify the uber masters were initialized /
140	for (gtid = `0`; gtid < __kmp_threads_capacity; gtid++)
141	if (__kmp_root[gtid]) {
142	KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
143	for (q = `0`; q < KMP_HASH_TABLE_SIZE; ++q)
144	KMP_DEBUG_ASSERT(
145	!__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
146	/ __kmp_root[ gitd ]-> r.r_uber_thread ->*
147	* th.th_pri_common -> data[ q ] = 0;*/
148	}
149	#endif /* KMP_DEBUG */
150
151	for (q = `0`; q < KMP_HASH_TABLE_SIZE; ++q)
152	__kmp_threadprivate_d_table.data[q] = `0`;
153
154	TCW_4(__kmp_init_common, TRUE);
155	}
156	}
157
158	/ Call all destructors for threadprivate data belonging to all threads.*
159	Currently unused! /*
160	void __kmp_common_destroy(void) {
161	if (TCR_4(__kmp_init_common)) {
162	int q;
163
164	TCW_4(__kmp_init_common, FALSE);
165
166	for (q = `0`; q < KMP_HASH_TABLE_SIZE; ++q) {
167	int gtid;
168	struct private_common *tn;
169	struct shared_common *d_tn;
170
171	/ C++ destructors need to be called once per thread before exiting.*
172	Don't call destructors for primary thread though unless we used copy
173	constructor /*
174
175	for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
176	d_tn = d_tn->next) {
177	if (d_tn->is_vec) {
178	if (d_tn->dt.dtorv != `0`) {
179	for (gtid = `0`; gtid < __kmp_all_nth; ++gtid) {
180	if (__kmp_threads[gtid]) {
181	if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
182	: (!KMP_UBER_GTID(gtid))) {
183	tn = __kmp_threadprivate_find_task_common(
184	tbl: __kmp_threads[gtid]->th.th_pri_common, gtid,
185	pc_addr: d_tn->gbl_addr);
186	if (tn) {
187	(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
188	}
189	}
190	}
191	}
192	if (d_tn->obj_init != `0`) {
193	(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
194	}
195	}
196	} else {
197	if (d_tn->dt.dtor != `0`) {
198	for (gtid = `0`; gtid < __kmp_all_nth; ++gtid) {
199	if (__kmp_threads[gtid]) {
200	if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
201	: (!KMP_UBER_GTID(gtid))) {
202	tn = __kmp_threadprivate_find_task_common(
203	tbl: __kmp_threads[gtid]->th.th_pri_common, gtid,
204	pc_addr: d_tn->gbl_addr);
205	if (tn) {
206	(*d_tn->dt.dtor)(tn->par_addr);
207	}
208	}
209	}
210	}
211	if (d_tn->obj_init != `0`) {
212	(*d_tn->dt.dtor)(d_tn->obj_init);
213	}
214	}
215	}
216	}
217	__kmp_threadprivate_d_table.data[q] = `0`;
218	}
219	}
220	}
221
222	/ Call all destructors for threadprivate data belonging to this thread /
223	void __kmp_common_destroy_gtid(int gtid) {
224	struct private_common *tn;
225	struct shared_common *d_tn;
226
227	if (!TCR_4(__kmp_init_gtid)) {
228	// This is possible when one of multiple roots initiates early library
229	// termination in a sequential region while other teams are active, and its
230	// child threads are about to end.
231	return;
232	}
233
234	KC_TRACE(`10`, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
235	if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
236
237	if (TCR_4(__kmp_init_common)) {
238
239	/ Cannot do this here since not all threads have destroyed their data /
240	/ TCW_4(__kmp_init_common, FALSE); /
241
242	for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
243
244	d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid,
245	pc_addr: tn->gbl_addr);
246	if (d_tn == NULL)
247	continue;
248	if (d_tn->is_vec) {
249	if (d_tn->dt.dtorv != `0`) {
250	(void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
251	if (d_tn->obj_init != `0`) {
252	(void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
253	}
254	}
255	} else {
256	if (d_tn->dt.dtor != `0`) {
257	(void)(*d_tn->dt.dtor)(tn->par_addr);
258	if (d_tn->obj_init != `0`) {
259	(void)(*d_tn->dt.dtor)(d_tn->obj_init);
260	}
261	}
262	}
263	}
264	KC_TRACE(`30`, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
265	"complete\n",
266	gtid));
267	}
268	}
269	}
270
271	#ifdef KMP_TASK_COMMON_DEBUG
272	static void dump_list(void) {
273	int p, q;
274
275	for (p = `0`; p < __kmp_all_nth; ++p) {
276	if (!__kmp_threads[p])
277	continue;
278	for (q = `0`; q < KMP_HASH_TABLE_SIZE; ++q) {
279	if (__kmp_threads[p]->th.th_pri_common->data[q]) {
280	struct private_common *tn;
281
282	KC_TRACE(`10`, ("\tdump_list: gtid:%d addresses\n", p));
283
284	for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
285	tn = tn->next) {
286	KC_TRACE(`10`,
287	("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
288	tn->gbl_addr, tn->par_addr));
289	}
290	}
291	}
292	}
293	}
294	#endif /* KMP_TASK_COMMON_DEBUG */
295
296	// NOTE: this routine is to be called only from the serial part of the program.
297	void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
298	void *data_addr, size_t pc_size) {
299	struct shared_common *lnk_tn, d_tn;
300	KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
301	__kmp_threads[gtid]->th.th_root->r.r_active == `0`);
302
303	d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid,
304	pc_addr);
305
306	if (d_tn == `0`) {
307	d_tn = (struct shared_common )__kmp_allocate(sizeof(struct* shared_common));
308
309	d_tn->gbl_addr = pc_addr;
310	d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size);
311	/*
312	d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
313	zeroes the memory
314	d_tn->ct.ctor = 0;
315	d_tn->cct.cctor = 0;;
316	d_tn->dt.dtor = 0;
317	d_tn->is_vec = FALSE;
318	d_tn->vec_len = 0L;
319	*/
320	d_tn->cmn_size = pc_size;
321
322	__kmp_acquire_lock(lck: &__kmp_global_lock, gtid);
323
324	lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
325
326	d_tn->next = *lnk_tn;
327	*lnk_tn = d_tn;
328
329	__kmp_release_lock(lck: &__kmp_global_lock, gtid);
330	}
331	}
332
333	struct private_common kmp_threadprivate_insert(int* gtid, void *pc_addr,
334	void *data_addr,
335	size_t pc_size) {
336	struct private_common tn, *tt;
337	struct shared_common *d_tn;
338
339	/ +++++++++ START OF CRITICAL SECTION +++++++++ /
340	__kmp_acquire_lock(lck: &__kmp_global_lock, gtid);
341
342	tn = (struct private_common )__kmp_allocate(sizeof(struct* private_common));
343
344	tn->gbl_addr = pc_addr;
345
346	d_tn = __kmp_find_shared_task_common(
347	tbl: &__kmp_threadprivate_d_table, gtid,
348	pc_addr); / Only the MASTER data table exists. /
349
350	if (d_tn != `0`) {
351	/ This threadprivate variable has already been seen. /
352
353	if (d_tn->pod_init == `0` && d_tn->obj_init == `0`) {
354	d_tn->cmn_size = pc_size;
355
356	if (d_tn->is_vec) {
357	if (d_tn->ct.ctorv != `0`) {
358	/ Construct from scratch so no prototype exists /
359	d_tn->obj_init = `0`;
360	} else if (d_tn->cct.cctorv != `0`) {
361	/ Now data initialize the prototype since it was previously*
362	* registered */
363	d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
364	(void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
365	} else {
366	d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size: d_tn->cmn_size);
367	}
368	} else {
369	if (d_tn->ct.ctor != `0`) {
370	/ Construct from scratch so no prototype exists /
371	d_tn->obj_init = `0`;
372	} else if (d_tn->cct.cctor != `0`) {
373	/ Now data initialize the prototype since it was previously*
374	registered /*
375	d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
376	(void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
377	} else {
378	d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size: d_tn->cmn_size);
379	}
380	}
381	}
382	} else {
383	struct shared_common **lnk_tn;
384
385	d_tn = (struct shared_common )__kmp_allocate(sizeof(struct* shared_common));
386	d_tn->gbl_addr = pc_addr;
387	d_tn->cmn_size = pc_size;
388	d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size);
389	/*
390	d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
391	zeroes the memory
392	d_tn->ct.ctor = 0;
393	d_tn->cct.cctor = 0;
394	d_tn->dt.dtor = 0;
395	d_tn->is_vec = FALSE;
396	d_tn->vec_len = 0L;
397	*/
398	lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
399
400	d_tn->next = *lnk_tn;
401	*lnk_tn = d_tn;
402	}
403
404	tn->cmn_size = d_tn->cmn_size;
405
406	if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
407	tn->par_addr = (void *)pc_addr;
408	} else {
409	tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
410	}
411
412	__kmp_release_lock(lck: &__kmp_global_lock, gtid);
413	/ +++++++++ END OF CRITICAL SECTION +++++++++ /
414
415	#ifdef USE_CHECKS_COMMON
416	if (pc_size > d_tn->cmn_size) {
417	KC_TRACE(
418	`10`, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
419	" ,%" KMP_UINTPTR_SPEC ")\n",
420	pc_addr, pc_size, d_tn->cmn_size));
421	KMP_FATAL(TPCommonBlocksInconsist);
422	}
423	#endif /* USE_CHECKS_COMMON */
424
425	tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
426
427	#ifdef KMP_TASK_COMMON_DEBUG
428	if (*tt != `0`) {
429	KC_TRACE(
430	`10`,
431	("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
432	gtid, pc_addr));
433	}
434	#endif
435	tn->next = *tt;
436	*tt = tn;
437
438	#ifdef KMP_TASK_COMMON_DEBUG
439	KC_TRACE(`10`,
440	("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
441	gtid, pc_addr));
442	dump_list();
443	#endif
444
445	/ Link the node into a simple list /
446
447	tn->link = __kmp_threads[gtid]->th.th_pri_head;
448	__kmp_threads[gtid]->th.th_pri_head = tn;
449
450	if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
451	return tn;
452
453	/ if C++ object with copy constructor, use it;*
454	* else if C++ object with constructor, use it for the non-primary thread
455	copies only;
456	* else use pod_init and memcpy
457	*
458	* C++ constructors need to be called once for each non-primary thread on
459	* allocate
460	* C++ copy constructors need to be called once for each thread on allocate */
461
462	/ C++ object with constructors/destructors; don't call constructors for*
463	primary thread though /*
464	if (d_tn->is_vec) {
465	if (d_tn->ct.ctorv != `0`) {
466	(void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
467	} else if (d_tn->cct.cctorv != `0`) {
468	(void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
469	} else if (tn->par_addr != tn->gbl_addr) {
470	__kmp_copy_common_data(pc_addr: tn->par_addr, d: d_tn->pod_init);
471	}
472	} else {
473	if (d_tn->ct.ctor != `0`) {
474	(void)(*d_tn->ct.ctor)(tn->par_addr);
475	} else if (d_tn->cct.cctor != `0`) {
476	(void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
477	} else if (tn->par_addr != tn->gbl_addr) {
478	__kmp_copy_common_data(pc_addr: tn->par_addr, d: d_tn->pod_init);
479	}
480	}
481	/ !BUILD_OPENMP_C*
482	if (tn->par_addr != tn->gbl_addr)
483	__kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); /*
484
485	return tn;
486	}
487
488	/ ------------------------------------------------------------------------ /
489	/ We are currently parallel, and we know the thread id. /
490	/ ------------------------------------------------------------------------ /
491
492	/!*
493	@ingroup THREADPRIVATE
494
495	@param loc source location information
496	@param data pointer to data being privatized
497	@param ctor pointer to constructor function for data
498	@param cctor pointer to copy constructor function for data
499	@param dtor pointer to destructor function for data
500
501	Register constructors and destructors for thread private data.
502	This function is called when executing in parallel, when we know the thread id.
503	*/
504	void __kmpc_threadprivate_register(ident_t loc, void* *data, kmpc_ctor ctor,
505	kmpc_cctor cctor, kmpc_dtor dtor) {
506	struct shared_common d_tn, *lnk_tn;
507
508	KC_TRACE(`10`, ("__kmpc_threadprivate_register: called\n"));
509
510	#ifdef USE_CHECKS_COMMON
511	/ copy constructor must be zero for current code gen (Nov 2002 - jph) /
512	KMP_ASSERT(cctor == `0`);
513	#endif /* USE_CHECKS_COMMON */
514
515	/ Only the global data table exists. /
516	d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid: -`1`, pc_addr: data);
517
518	if (d_tn == `0`) {
519	d_tn = (struct shared_common )__kmp_allocate(sizeof(struct* shared_common));
520	d_tn->gbl_addr = data;
521
522	d_tn->ct.ctor = ctor;
523	d_tn->cct.cctor = cctor;
524	d_tn->dt.dtor = dtor;
525	/*
526	d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
527	zeroes the memory
528	d_tn->vec_len = 0L;
529	d_tn->obj_init = 0;
530	d_tn->pod_init = 0;
531	*/
532	lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
533
534	d_tn->next = *lnk_tn;
535	*lnk_tn = d_tn;
536	}
537	}
538
539	void __kmpc_threadprivate(ident_t loc, kmp_int32 global_tid, void *data,
540	size_t size) {
541	void *ret;
542	struct private_common *tn;
543
544	KC_TRACE(`10`, ("__kmpc_threadprivate: T#%d called\n", global_tid));
545
546	#ifdef USE_CHECKS_COMMON
547	if (!__kmp_init_serial)
548	KMP_FATAL(RTLNotInitialized);
549	#endif /* USE_CHECKS_COMMON */
550
551	if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
552	/ The parallel address will NEVER overlap with the data_address /
553	/ dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the*
554	* data_address; use data_address = data */
555
556	KC_TRACE(`20`, ("__kmpc_threadprivate: T#%d inserting private data\n",
557	global_tid));
558	kmp_threadprivate_insert_private_data(gtid: global_tid, pc_addr: data, data_addr: data, pc_size: size);
559
560	ret = data;
561	} else {
562	KC_TRACE(
563	`50`,
564	("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
565	global_tid, data));
566	tn = __kmp_threadprivate_find_task_common(
567	tbl: __kmp_threads[global_tid]->th.th_pri_common, gtid: global_tid, pc_addr: data);
568
569	if (tn) {
570	KC_TRACE(`20`, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
571	#ifdef USE_CHECKS_COMMON
572	if ((size_t)size > tn->cmn_size) {
573	KC_TRACE(`10`, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
574	" ,%" KMP_UINTPTR_SPEC ")\n",
575	data, size, tn->cmn_size));
576	KMP_FATAL(TPCommonBlocksInconsist);
577	}
578	#endif /* USE_CHECKS_COMMON */
579	} else {
580	/ The parallel address will NEVER overlap with the data_address /
581	/ dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use*
582	* data_address = data */
583	KC_TRACE(`20`, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
584	tn = kmp_threadprivate_insert(gtid: global_tid, pc_addr: data, data_addr: data, pc_size: size);
585	}
586
587	ret = tn->par_addr;
588	}
589	KC_TRACE(`10`, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
590	global_tid, ret));
591
592	return ret;
593	}
594
595	static kmp_cached_addr_t __kmp_find_cache(void* *data) {
596	kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
597	while (ptr && ptr->data != data)
598	ptr = ptr->next;
599	return ptr;
600	}
601
602	/!*
603	@ingroup THREADPRIVATE
604	@param loc source location information
605	@param global_tid global thread number
606	@param data pointer to data to privatize
607	@param size size of data to privatize
608	@param cache pointer to cache
609	@return pointer to private storage
610
611	Allocate private storage for threadprivate data.
612	*/
613	void *
614	__kmpc_threadprivate_cached(ident_t *loc,
615	kmp_int32 global_tid, // gtid.
616	void data, // Pointer to original global variable.*
617	size_t size, // Size of original global variable.
618	void ***cache) {
619	KC_TRACE(`10`, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
620	"address: %p, size: %" KMP_SIZE_T_SPEC "\n",
621	global_tid, *cache, data, size));
622
623	if (TCR_PTR(*cache) == `0`) {
624	__kmp_acquire_lock(lck: &__kmp_global_lock, gtid: global_tid);
625
626	if (TCR_PTR(*cache) == `0`) {
627	__kmp_acquire_bootstrap_lock(lck: &__kmp_tp_cached_lock);
628	// Compiler often passes in NULL cache, even if it's already been created
629	void **my_cache;
630	kmp_cached_addr_t *tp_cache_addr;
631	// Look for an existing cache
632	tp_cache_addr = __kmp_find_cache(data);
633	if (!tp_cache_addr) { // Cache was never created; do it now
634	__kmp_tp_cached = `1`;
635	KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
636	sizeof(void ) __kmp_tp_capacity +
637	sizeof(kmp_cached_addr_t)););
638	// No need to zero the allocated memory; __kmp_allocate does that.
639	KC_TRACE(`50`, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
640	"address %p\n",
641	global_tid, my_cache));
642	/ TODO: free all this memory in __kmp_common_destroy using*
643	* __kmp_threadpriv_cache_list */
644	/ Add address of mycache to linked list for cleanup later /
645	tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
646	tp_cache_addr->addr = my_cache;
647	tp_cache_addr->data = data;
648	tp_cache_addr->compiler_cache = cache;
649	tp_cache_addr->next = __kmp_threadpriv_cache_list;
650	__kmp_threadpriv_cache_list = tp_cache_addr;
651	} else { // A cache was already created; use it
652	my_cache = tp_cache_addr->addr;
653	tp_cache_addr->compiler_cache = cache;
654	}
655	KMP_MB();
656
657	TCW_PTR(*cache, my_cache);
658	__kmp_release_bootstrap_lock(lck: &__kmp_tp_cached_lock);
659
660	KMP_MB();
661	}
662	__kmp_release_lock(lck: &__kmp_global_lock, gtid: global_tid);
663	}
664
665	void *ret;
666	if ((ret = TCR_PTR((*cache)[global_tid])) == `0`) {
667	ret = __kmpc_threadprivate(loc, global_tid, data, size: (size_t)size);
668
669	TCW_PTR((*cache)[global_tid], ret);
670	}
671	KC_TRACE(`10`,
672	("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
673	global_tid, ret));
674	return ret;
675	}
676
677	// This function should only be called when both __kmp_tp_cached_lock and
678	// kmp_forkjoin_lock are held.
679	void __kmp_threadprivate_resize_cache(int newCapacity) {
680	KC_TRACE(`10`, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
681	newCapacity));
682
683	kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
684
685	while (ptr) {
686	if (ptr->data) { // this location has an active cache; resize it
687	void **my_cache;
688	KMP_ITT_IGNORE(my_cache =
689	(void )__kmp_allocate(sizeof*(void* ) newCapacity +
690	sizeof(kmp_cached_addr_t)););
691	// No need to zero the allocated memory; __kmp_allocate does that.
692	KC_TRACE(`50`, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
693	my_cache));
694	// Now copy old cache into new cache
695	void **old_cache = ptr->addr;
696	for (int i = `0`; i < __kmp_tp_capacity; ++i) {
697	my_cache[i] = old_cache[i];
698	}
699
700	// Add address of new my_cache to linked list for cleanup later
701	kmp_cached_addr_t *tp_cache_addr;
702	tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
703	tp_cache_addr->addr = my_cache;
704	tp_cache_addr->data = ptr->data;
705	tp_cache_addr->compiler_cache = ptr->compiler_cache;
706	tp_cache_addr->next = __kmp_threadpriv_cache_list;
707	__kmp_threadpriv_cache_list = tp_cache_addr;
708
709	// Copy new cache to compiler's location: We can copy directly
710	// to (compiler_cache) if compiler guarantees it will keep*
711	// using the same location for the cache. This is not yet true
712	// for some compilers, in which case we have to check if
713	// compiler_cache is still pointing at old cache, and if so, we
714	// can point it at the new cache with an atomic compare&swap
715	// operation. (Old method will always work, but we should shift
716	// to new method (commented line below) when Intel and Clang
717	// compilers use new method.)
718	(void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
719	my_cache);
720	// TCW_PTR((tp_cache_addr->compiler_cache), my_cache);*
721
722	// If the store doesn't happen here, the compiler's old behavior will
723	// inevitably call __kmpc_threadprivate_cache with a new location for the
724	// cache, and that function will store the resized cache there at that
725	// point.
726
727	// Nullify old cache's data pointer so we skip it next time
728	ptr->data = NULL;
729	}
730	ptr = ptr->next;
731	}
732	// After all caches are resized, update __kmp_tp_capacity to the new size
733	(volatile* int *)&__kmp_tp_capacity = newCapacity;
734	}
735
736	/!*
737	@ingroup THREADPRIVATE
738	@param loc source location information
739	@param data pointer to data being privatized
740	@param ctor pointer to constructor function for data
741	@param cctor pointer to copy constructor function for data
742	@param dtor pointer to destructor function for data
743	@param vector_length length of the vector (bytes or elements?)
744	Register vector constructors and destructors for thread private data.
745	*/
746	void __kmpc_threadprivate_register_vec(ident_t loc, void* *data,
747	kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
748	kmpc_dtor_vec dtor,
749	size_t vector_length) {
750	struct shared_common d_tn, *lnk_tn;
751
752	KC_TRACE(`10`, ("__kmpc_threadprivate_register_vec: called\n"));
753
754	#ifdef USE_CHECKS_COMMON
755	/ copy constructor must be zero for current code gen (Nov 2002 - jph) /
756	KMP_ASSERT(cctor == `0`);
757	#endif /* USE_CHECKS_COMMON */
758
759	d_tn = __kmp_find_shared_task_common(
760	tbl: &__kmp_threadprivate_d_table, gtid: -`1`,
761	pc_addr: data); / Only the global data table exists. /
762
763	if (d_tn == `0`) {
764	d_tn = (struct shared_common )__kmp_allocate(sizeof(struct* shared_common));
765	d_tn->gbl_addr = data;
766
767	d_tn->ct.ctorv = ctor;
768	d_tn->cct.cctorv = cctor;
769	d_tn->dt.dtorv = dtor;
770	d_tn->is_vec = TRUE;
771	d_tn->vec_len = (size_t)vector_length;
772	// d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory
773	// d_tn->pod_init = 0;
774	lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
775
776	d_tn->next = *lnk_tn;
777	*lnk_tn = d_tn;
778	}
779	}
780
781	void __kmp_cleanup_threadprivate_caches() {
782	kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
783
784	while (ptr) {
785	void **cache = ptr->addr;
786	__kmp_threadpriv_cache_list = ptr->next;
787	if (*ptr->compiler_cache)
788	*ptr->compiler_cache = NULL;
789	ptr->compiler_cache = NULL;
790	ptr->data = NULL;
791	ptr->addr = NULL;
792	ptr->next = NULL;
793	// Threadprivate data pointed at by cache entries are destroyed at end of
794	// __kmp_launch_thread with __kmp_common_destroy_gtid.
795	__kmp_free(cache); // implicitly frees ptr too
796	ptr = __kmp_threadpriv_cache_list;
797	}
798	}
799

source code of openmp/runtime/src/kmp_threadprivate.cpp