1 | /* |
2 | * kmp_threadprivate.cpp -- OpenMP threadprivate support library |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "kmp.h" |
14 | #include "kmp_i18n.h" |
15 | #include "kmp_itt.h" |
16 | |
17 | #define USE_CHECKS_COMMON |
18 | |
19 | #define KMP_INLINE_SUBR 1 |
20 | |
21 | void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, |
22 | void *data_addr, size_t pc_size); |
23 | struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, |
24 | void *data_addr, |
25 | size_t pc_size); |
26 | |
27 | struct shared_table __kmp_threadprivate_d_table; |
28 | |
29 | static |
30 | #ifdef KMP_INLINE_SUBR |
31 | __forceinline |
32 | #endif |
33 | struct private_common * |
34 | __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, |
35 | void *pc_addr) |
36 | |
37 | { |
38 | struct private_common *tn; |
39 | |
40 | #ifdef KMP_TASK_COMMON_DEBUG |
41 | KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " |
42 | "address %p\n" , |
43 | gtid, pc_addr)); |
44 | dump_list(); |
45 | #endif |
46 | |
47 | for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { |
48 | if (tn->gbl_addr == pc_addr) { |
49 | #ifdef KMP_TASK_COMMON_DEBUG |
50 | KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " |
51 | "node %p on list\n" , |
52 | gtid, pc_addr)); |
53 | #endif |
54 | return tn; |
55 | } |
56 | } |
57 | return 0; |
58 | } |
59 | |
60 | static |
61 | #ifdef KMP_INLINE_SUBR |
62 | __forceinline |
63 | #endif |
64 | struct shared_common * |
65 | __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, |
66 | void *pc_addr) { |
67 | struct shared_common *tn; |
68 | |
69 | for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { |
70 | if (tn->gbl_addr == pc_addr) { |
71 | #ifdef KMP_TASK_COMMON_DEBUG |
72 | KC_TRACE( |
73 | 10, |
74 | ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n" , |
75 | gtid, pc_addr)); |
76 | #endif |
77 | return tn; |
78 | } |
79 | } |
80 | return 0; |
81 | } |
82 | |
83 | // Create a template for the data initialized storage. Either the template is |
84 | // NULL indicating zero fill, or the template is a copy of the original data. |
85 | static struct private_data *__kmp_init_common_data(void *pc_addr, |
86 | size_t pc_size) { |
87 | struct private_data *d; |
88 | size_t i; |
89 | char *p; |
90 | |
91 | d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); |
92 | /* |
93 | d->data = 0; // AC: commented out because __kmp_allocate zeroes the |
94 | memory |
95 | d->next = 0; |
96 | */ |
97 | d->size = pc_size; |
98 | d->more = 1; |
99 | |
100 | p = (char *)pc_addr; |
101 | |
102 | for (i = pc_size; i > 0; --i) { |
103 | if (*p++ != '\0') { |
104 | d->data = __kmp_allocate(pc_size); |
105 | KMP_MEMCPY(dest: d->data, src: pc_addr, n: pc_size); |
106 | break; |
107 | } |
108 | } |
109 | |
110 | return d; |
111 | } |
112 | |
113 | // Initialize the data area from the template. |
114 | static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { |
115 | char *addr = (char *)pc_addr; |
116 | |
117 | for (size_t offset = 0; d != 0; d = d->next) { |
118 | for (int i = d->more; i > 0; --i) { |
119 | if (d->data == 0) |
120 | memset(s: &addr[offset], c: '\0', n: d->size); |
121 | else |
122 | KMP_MEMCPY(dest: &addr[offset], src: d->data, n: d->size); |
123 | offset += d->size; |
124 | } |
125 | } |
126 | } |
127 | |
128 | /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ |
129 | void __kmp_common_initialize(void) { |
130 | if (!TCR_4(__kmp_init_common)) { |
131 | int q; |
132 | #ifdef KMP_DEBUG |
133 | int gtid; |
134 | #endif |
135 | |
136 | __kmp_threadpriv_cache_list = NULL; |
137 | |
138 | #ifdef KMP_DEBUG |
139 | /* verify the uber masters were initialized */ |
140 | for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) |
141 | if (__kmp_root[gtid]) { |
142 | KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); |
143 | for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) |
144 | KMP_DEBUG_ASSERT( |
145 | !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); |
146 | /* __kmp_root[ gitd ]-> r.r_uber_thread -> |
147 | * th.th_pri_common -> data[ q ] = 0;*/ |
148 | } |
149 | #endif /* KMP_DEBUG */ |
150 | |
151 | for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) |
152 | __kmp_threadprivate_d_table.data[q] = 0; |
153 | |
154 | TCW_4(__kmp_init_common, TRUE); |
155 | } |
156 | } |
157 | |
158 | /* Call all destructors for threadprivate data belonging to all threads. |
159 | Currently unused! */ |
160 | void __kmp_common_destroy(void) { |
161 | if (TCR_4(__kmp_init_common)) { |
162 | int q; |
163 | |
164 | TCW_4(__kmp_init_common, FALSE); |
165 | |
166 | for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { |
167 | int gtid; |
168 | struct private_common *tn; |
169 | struct shared_common *d_tn; |
170 | |
171 | /* C++ destructors need to be called once per thread before exiting. |
172 | Don't call destructors for primary thread though unless we used copy |
173 | constructor */ |
174 | |
175 | for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; |
176 | d_tn = d_tn->next) { |
177 | if (d_tn->is_vec) { |
178 | if (d_tn->dt.dtorv != 0) { |
179 | for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { |
180 | if (__kmp_threads[gtid]) { |
181 | if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) |
182 | : (!KMP_UBER_GTID(gtid))) { |
183 | tn = __kmp_threadprivate_find_task_common( |
184 | tbl: __kmp_threads[gtid]->th.th_pri_common, gtid, |
185 | pc_addr: d_tn->gbl_addr); |
186 | if (tn) { |
187 | (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); |
188 | } |
189 | } |
190 | } |
191 | } |
192 | if (d_tn->obj_init != 0) { |
193 | (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); |
194 | } |
195 | } |
196 | } else { |
197 | if (d_tn->dt.dtor != 0) { |
198 | for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { |
199 | if (__kmp_threads[gtid]) { |
200 | if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) |
201 | : (!KMP_UBER_GTID(gtid))) { |
202 | tn = __kmp_threadprivate_find_task_common( |
203 | tbl: __kmp_threads[gtid]->th.th_pri_common, gtid, |
204 | pc_addr: d_tn->gbl_addr); |
205 | if (tn) { |
206 | (*d_tn->dt.dtor)(tn->par_addr); |
207 | } |
208 | } |
209 | } |
210 | } |
211 | if (d_tn->obj_init != 0) { |
212 | (*d_tn->dt.dtor)(d_tn->obj_init); |
213 | } |
214 | } |
215 | } |
216 | } |
217 | __kmp_threadprivate_d_table.data[q] = 0; |
218 | } |
219 | } |
220 | } |
221 | |
222 | /* Call all destructors for threadprivate data belonging to this thread */ |
223 | void __kmp_common_destroy_gtid(int gtid) { |
224 | struct private_common *tn; |
225 | struct shared_common *d_tn; |
226 | |
227 | if (!TCR_4(__kmp_init_gtid)) { |
228 | // This is possible when one of multiple roots initiates early library |
229 | // termination in a sequential region while other teams are active, and its |
230 | // child threads are about to end. |
231 | return; |
232 | } |
233 | |
234 | KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n" , gtid)); |
235 | if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { |
236 | |
237 | if (TCR_4(__kmp_init_common)) { |
238 | |
239 | /* Cannot do this here since not all threads have destroyed their data */ |
240 | /* TCW_4(__kmp_init_common, FALSE); */ |
241 | |
242 | for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { |
243 | |
244 | d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid, |
245 | pc_addr: tn->gbl_addr); |
246 | if (d_tn == NULL) |
247 | continue; |
248 | if (d_tn->is_vec) { |
249 | if (d_tn->dt.dtorv != 0) { |
250 | (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); |
251 | if (d_tn->obj_init != 0) { |
252 | (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); |
253 | } |
254 | } |
255 | } else { |
256 | if (d_tn->dt.dtor != 0) { |
257 | (void)(*d_tn->dt.dtor)(tn->par_addr); |
258 | if (d_tn->obj_init != 0) { |
259 | (void)(*d_tn->dt.dtor)(d_tn->obj_init); |
260 | } |
261 | } |
262 | } |
263 | } |
264 | KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " |
265 | "complete\n" , |
266 | gtid)); |
267 | } |
268 | } |
269 | } |
270 | |
271 | #ifdef KMP_TASK_COMMON_DEBUG |
272 | static void dump_list(void) { |
273 | int p, q; |
274 | |
275 | for (p = 0; p < __kmp_all_nth; ++p) { |
276 | if (!__kmp_threads[p]) |
277 | continue; |
278 | for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { |
279 | if (__kmp_threads[p]->th.th_pri_common->data[q]) { |
280 | struct private_common *tn; |
281 | |
282 | KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n" , p)); |
283 | |
284 | for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; |
285 | tn = tn->next) { |
286 | KC_TRACE(10, |
287 | ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n" , |
288 | tn->gbl_addr, tn->par_addr)); |
289 | } |
290 | } |
291 | } |
292 | } |
293 | } |
294 | #endif /* KMP_TASK_COMMON_DEBUG */ |
295 | |
296 | // NOTE: this routine is to be called only from the serial part of the program. |
297 | void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, |
298 | void *data_addr, size_t pc_size) { |
299 | struct shared_common **lnk_tn, *d_tn; |
300 | KMP_DEBUG_ASSERT(__kmp_threads[gtid] && |
301 | __kmp_threads[gtid]->th.th_root->r.r_active == 0); |
302 | |
303 | d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid, |
304 | pc_addr); |
305 | |
306 | if (d_tn == 0) { |
307 | d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); |
308 | |
309 | d_tn->gbl_addr = pc_addr; |
310 | d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size); |
311 | /* |
312 | d_tn->obj_init = 0; // AC: commented out because __kmp_allocate |
313 | zeroes the memory |
314 | d_tn->ct.ctor = 0; |
315 | d_tn->cct.cctor = 0;; |
316 | d_tn->dt.dtor = 0; |
317 | d_tn->is_vec = FALSE; |
318 | d_tn->vec_len = 0L; |
319 | */ |
320 | d_tn->cmn_size = pc_size; |
321 | |
322 | __kmp_acquire_lock(lck: &__kmp_global_lock, gtid); |
323 | |
324 | lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); |
325 | |
326 | d_tn->next = *lnk_tn; |
327 | *lnk_tn = d_tn; |
328 | |
329 | __kmp_release_lock(lck: &__kmp_global_lock, gtid); |
330 | } |
331 | } |
332 | |
333 | struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, |
334 | void *data_addr, |
335 | size_t pc_size) { |
336 | struct private_common *tn, **tt; |
337 | struct shared_common *d_tn; |
338 | |
339 | /* +++++++++ START OF CRITICAL SECTION +++++++++ */ |
340 | __kmp_acquire_lock(lck: &__kmp_global_lock, gtid); |
341 | |
342 | tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); |
343 | |
344 | tn->gbl_addr = pc_addr; |
345 | |
346 | d_tn = __kmp_find_shared_task_common( |
347 | tbl: &__kmp_threadprivate_d_table, gtid, |
348 | pc_addr); /* Only the MASTER data table exists. */ |
349 | |
350 | if (d_tn != 0) { |
351 | /* This threadprivate variable has already been seen. */ |
352 | |
353 | if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { |
354 | d_tn->cmn_size = pc_size; |
355 | |
356 | if (d_tn->is_vec) { |
357 | if (d_tn->ct.ctorv != 0) { |
358 | /* Construct from scratch so no prototype exists */ |
359 | d_tn->obj_init = 0; |
360 | } else if (d_tn->cct.cctorv != 0) { |
361 | /* Now data initialize the prototype since it was previously |
362 | * registered */ |
363 | d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); |
364 | (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); |
365 | } else { |
366 | d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size: d_tn->cmn_size); |
367 | } |
368 | } else { |
369 | if (d_tn->ct.ctor != 0) { |
370 | /* Construct from scratch so no prototype exists */ |
371 | d_tn->obj_init = 0; |
372 | } else if (d_tn->cct.cctor != 0) { |
373 | /* Now data initialize the prototype since it was previously |
374 | registered */ |
375 | d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); |
376 | (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); |
377 | } else { |
378 | d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size: d_tn->cmn_size); |
379 | } |
380 | } |
381 | } |
382 | } else { |
383 | struct shared_common **lnk_tn; |
384 | |
385 | d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); |
386 | d_tn->gbl_addr = pc_addr; |
387 | d_tn->cmn_size = pc_size; |
388 | d_tn->pod_init = __kmp_init_common_data(pc_addr: data_addr, pc_size); |
389 | /* |
390 | d_tn->obj_init = 0; // AC: commented out because __kmp_allocate |
391 | zeroes the memory |
392 | d_tn->ct.ctor = 0; |
393 | d_tn->cct.cctor = 0; |
394 | d_tn->dt.dtor = 0; |
395 | d_tn->is_vec = FALSE; |
396 | d_tn->vec_len = 0L; |
397 | */ |
398 | lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); |
399 | |
400 | d_tn->next = *lnk_tn; |
401 | *lnk_tn = d_tn; |
402 | } |
403 | |
404 | tn->cmn_size = d_tn->cmn_size; |
405 | |
406 | if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { |
407 | tn->par_addr = (void *)pc_addr; |
408 | } else { |
409 | tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); |
410 | } |
411 | |
412 | __kmp_release_lock(lck: &__kmp_global_lock, gtid); |
413 | /* +++++++++ END OF CRITICAL SECTION +++++++++ */ |
414 | |
415 | #ifdef USE_CHECKS_COMMON |
416 | if (pc_size > d_tn->cmn_size) { |
417 | KC_TRACE( |
418 | 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC |
419 | " ,%" KMP_UINTPTR_SPEC ")\n" , |
420 | pc_addr, pc_size, d_tn->cmn_size)); |
421 | KMP_FATAL(TPCommonBlocksInconsist); |
422 | } |
423 | #endif /* USE_CHECKS_COMMON */ |
424 | |
425 | tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); |
426 | |
427 | #ifdef KMP_TASK_COMMON_DEBUG |
428 | if (*tt != 0) { |
429 | KC_TRACE( |
430 | 10, |
431 | ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n" , |
432 | gtid, pc_addr)); |
433 | } |
434 | #endif |
435 | tn->next = *tt; |
436 | *tt = tn; |
437 | |
438 | #ifdef KMP_TASK_COMMON_DEBUG |
439 | KC_TRACE(10, |
440 | ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n" , |
441 | gtid, pc_addr)); |
442 | dump_list(); |
443 | #endif |
444 | |
445 | /* Link the node into a simple list */ |
446 | |
447 | tn->link = __kmp_threads[gtid]->th.th_pri_head; |
448 | __kmp_threads[gtid]->th.th_pri_head = tn; |
449 | |
450 | if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) |
451 | return tn; |
452 | |
453 | /* if C++ object with copy constructor, use it; |
454 | * else if C++ object with constructor, use it for the non-primary thread |
455 | copies only; |
456 | * else use pod_init and memcpy |
457 | * |
458 | * C++ constructors need to be called once for each non-primary thread on |
459 | * allocate |
460 | * C++ copy constructors need to be called once for each thread on allocate */ |
461 | |
462 | /* C++ object with constructors/destructors; don't call constructors for |
463 | primary thread though */ |
464 | if (d_tn->is_vec) { |
465 | if (d_tn->ct.ctorv != 0) { |
466 | (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); |
467 | } else if (d_tn->cct.cctorv != 0) { |
468 | (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); |
469 | } else if (tn->par_addr != tn->gbl_addr) { |
470 | __kmp_copy_common_data(pc_addr: tn->par_addr, d: d_tn->pod_init); |
471 | } |
472 | } else { |
473 | if (d_tn->ct.ctor != 0) { |
474 | (void)(*d_tn->ct.ctor)(tn->par_addr); |
475 | } else if (d_tn->cct.cctor != 0) { |
476 | (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); |
477 | } else if (tn->par_addr != tn->gbl_addr) { |
478 | __kmp_copy_common_data(pc_addr: tn->par_addr, d: d_tn->pod_init); |
479 | } |
480 | } |
481 | /* !BUILD_OPENMP_C |
482 | if (tn->par_addr != tn->gbl_addr) |
483 | __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ |
484 | |
485 | return tn; |
486 | } |
487 | |
488 | /* ------------------------------------------------------------------------ */ |
489 | /* We are currently parallel, and we know the thread id. */ |
490 | /* ------------------------------------------------------------------------ */ |
491 | |
492 | /*! |
493 | @ingroup THREADPRIVATE |
494 | |
495 | @param loc source location information |
496 | @param data pointer to data being privatized |
497 | @param ctor pointer to constructor function for data |
498 | @param cctor pointer to copy constructor function for data |
499 | @param dtor pointer to destructor function for data |
500 | |
501 | Register constructors and destructors for thread private data. |
502 | This function is called when executing in parallel, when we know the thread id. |
503 | */ |
504 | void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, |
505 | kmpc_cctor cctor, kmpc_dtor dtor) { |
506 | struct shared_common *d_tn, **lnk_tn; |
507 | |
508 | KC_TRACE(10, ("__kmpc_threadprivate_register: called\n" )); |
509 | |
510 | #ifdef USE_CHECKS_COMMON |
511 | /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ |
512 | KMP_ASSERT(cctor == 0); |
513 | #endif /* USE_CHECKS_COMMON */ |
514 | |
515 | /* Only the global data table exists. */ |
516 | d_tn = __kmp_find_shared_task_common(tbl: &__kmp_threadprivate_d_table, gtid: -1, pc_addr: data); |
517 | |
518 | if (d_tn == 0) { |
519 | d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); |
520 | d_tn->gbl_addr = data; |
521 | |
522 | d_tn->ct.ctor = ctor; |
523 | d_tn->cct.cctor = cctor; |
524 | d_tn->dt.dtor = dtor; |
525 | /* |
526 | d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate |
527 | zeroes the memory |
528 | d_tn->vec_len = 0L; |
529 | d_tn->obj_init = 0; |
530 | d_tn->pod_init = 0; |
531 | */ |
532 | lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); |
533 | |
534 | d_tn->next = *lnk_tn; |
535 | *lnk_tn = d_tn; |
536 | } |
537 | } |
538 | |
539 | void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, |
540 | size_t size) { |
541 | void *ret; |
542 | struct private_common *tn; |
543 | |
544 | KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n" , global_tid)); |
545 | |
546 | #ifdef USE_CHECKS_COMMON |
547 | if (!__kmp_init_serial) |
548 | KMP_FATAL(RTLNotInitialized); |
549 | #endif /* USE_CHECKS_COMMON */ |
550 | |
551 | if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { |
552 | /* The parallel address will NEVER overlap with the data_address */ |
553 | /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the |
554 | * data_address; use data_address = data */ |
555 | |
556 | KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n" , |
557 | global_tid)); |
558 | kmp_threadprivate_insert_private_data(gtid: global_tid, pc_addr: data, data_addr: data, pc_size: size); |
559 | |
560 | ret = data; |
561 | } else { |
562 | KC_TRACE( |
563 | 50, |
564 | ("__kmpc_threadprivate: T#%d try to find private data at address %p\n" , |
565 | global_tid, data)); |
566 | tn = __kmp_threadprivate_find_task_common( |
567 | tbl: __kmp_threads[global_tid]->th.th_pri_common, gtid: global_tid, pc_addr: data); |
568 | |
569 | if (tn) { |
570 | KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n" , global_tid)); |
571 | #ifdef USE_CHECKS_COMMON |
572 | if ((size_t)size > tn->cmn_size) { |
573 | KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC |
574 | " ,%" KMP_UINTPTR_SPEC ")\n" , |
575 | data, size, tn->cmn_size)); |
576 | KMP_FATAL(TPCommonBlocksInconsist); |
577 | } |
578 | #endif /* USE_CHECKS_COMMON */ |
579 | } else { |
580 | /* The parallel address will NEVER overlap with the data_address */ |
581 | /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use |
582 | * data_address = data */ |
583 | KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n" , global_tid)); |
584 | tn = kmp_threadprivate_insert(gtid: global_tid, pc_addr: data, data_addr: data, pc_size: size); |
585 | } |
586 | |
587 | ret = tn->par_addr; |
588 | } |
589 | KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n" , |
590 | global_tid, ret)); |
591 | |
592 | return ret; |
593 | } |
594 | |
595 | static kmp_cached_addr_t *__kmp_find_cache(void *data) { |
596 | kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; |
597 | while (ptr && ptr->data != data) |
598 | ptr = ptr->next; |
599 | return ptr; |
600 | } |
601 | |
602 | /*! |
603 | @ingroup THREADPRIVATE |
604 | @param loc source location information |
605 | @param global_tid global thread number |
606 | @param data pointer to data to privatize |
607 | @param size size of data to privatize |
608 | @param cache pointer to cache |
609 | @return pointer to private storage |
610 | |
611 | Allocate private storage for threadprivate data. |
612 | */ |
613 | void * |
614 | __kmpc_threadprivate_cached(ident_t *loc, |
615 | kmp_int32 global_tid, // gtid. |
616 | void *data, // Pointer to original global variable. |
617 | size_t size, // Size of original global variable. |
618 | void ***cache) { |
619 | KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " |
620 | "address: %p, size: %" KMP_SIZE_T_SPEC "\n" , |
621 | global_tid, *cache, data, size)); |
622 | |
623 | if (TCR_PTR(*cache) == 0) { |
624 | __kmp_acquire_lock(lck: &__kmp_global_lock, gtid: global_tid); |
625 | |
626 | if (TCR_PTR(*cache) == 0) { |
627 | __kmp_acquire_bootstrap_lock(lck: &__kmp_tp_cached_lock); |
628 | // Compiler often passes in NULL cache, even if it's already been created |
629 | void **my_cache; |
630 | kmp_cached_addr_t *tp_cache_addr; |
631 | // Look for an existing cache |
632 | tp_cache_addr = __kmp_find_cache(data); |
633 | if (!tp_cache_addr) { // Cache was never created; do it now |
634 | __kmp_tp_cached = 1; |
635 | KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate( |
636 | sizeof(void *) * __kmp_tp_capacity + |
637 | sizeof(kmp_cached_addr_t));); |
638 | // No need to zero the allocated memory; __kmp_allocate does that. |
639 | KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at " |
640 | "address %p\n" , |
641 | global_tid, my_cache)); |
642 | /* TODO: free all this memory in __kmp_common_destroy using |
643 | * __kmp_threadpriv_cache_list */ |
644 | /* Add address of mycache to linked list for cleanup later */ |
645 | tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; |
646 | tp_cache_addr->addr = my_cache; |
647 | tp_cache_addr->data = data; |
648 | tp_cache_addr->compiler_cache = cache; |
649 | tp_cache_addr->next = __kmp_threadpriv_cache_list; |
650 | __kmp_threadpriv_cache_list = tp_cache_addr; |
651 | } else { // A cache was already created; use it |
652 | my_cache = tp_cache_addr->addr; |
653 | tp_cache_addr->compiler_cache = cache; |
654 | } |
655 | KMP_MB(); |
656 | |
657 | TCW_PTR(*cache, my_cache); |
658 | __kmp_release_bootstrap_lock(lck: &__kmp_tp_cached_lock); |
659 | |
660 | KMP_MB(); |
661 | } |
662 | __kmp_release_lock(lck: &__kmp_global_lock, gtid: global_tid); |
663 | } |
664 | |
665 | void *ret; |
666 | if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { |
667 | ret = __kmpc_threadprivate(loc, global_tid, data, size: (size_t)size); |
668 | |
669 | TCW_PTR((*cache)[global_tid], ret); |
670 | } |
671 | KC_TRACE(10, |
672 | ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n" , |
673 | global_tid, ret)); |
674 | return ret; |
675 | } |
676 | |
677 | // This function should only be called when both __kmp_tp_cached_lock and |
678 | // kmp_forkjoin_lock are held. |
679 | void __kmp_threadprivate_resize_cache(int newCapacity) { |
680 | KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n" , |
681 | newCapacity)); |
682 | |
683 | kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; |
684 | |
685 | while (ptr) { |
686 | if (ptr->data) { // this location has an active cache; resize it |
687 | void **my_cache; |
688 | KMP_ITT_IGNORE(my_cache = |
689 | (void **)__kmp_allocate(sizeof(void *) * newCapacity + |
690 | sizeof(kmp_cached_addr_t));); |
691 | // No need to zero the allocated memory; __kmp_allocate does that. |
692 | KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n" , |
693 | my_cache)); |
694 | // Now copy old cache into new cache |
695 | void **old_cache = ptr->addr; |
696 | for (int i = 0; i < __kmp_tp_capacity; ++i) { |
697 | my_cache[i] = old_cache[i]; |
698 | } |
699 | |
700 | // Add address of new my_cache to linked list for cleanup later |
701 | kmp_cached_addr_t *tp_cache_addr; |
702 | tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity]; |
703 | tp_cache_addr->addr = my_cache; |
704 | tp_cache_addr->data = ptr->data; |
705 | tp_cache_addr->compiler_cache = ptr->compiler_cache; |
706 | tp_cache_addr->next = __kmp_threadpriv_cache_list; |
707 | __kmp_threadpriv_cache_list = tp_cache_addr; |
708 | |
709 | // Copy new cache to compiler's location: We can copy directly |
710 | // to (*compiler_cache) if compiler guarantees it will keep |
711 | // using the same location for the cache. This is not yet true |
712 | // for some compilers, in which case we have to check if |
713 | // compiler_cache is still pointing at old cache, and if so, we |
714 | // can point it at the new cache with an atomic compare&swap |
715 | // operation. (Old method will always work, but we should shift |
716 | // to new method (commented line below) when Intel and Clang |
717 | // compilers use new method.) |
718 | (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache, |
719 | my_cache); |
720 | // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache); |
721 | |
722 | // If the store doesn't happen here, the compiler's old behavior will |
723 | // inevitably call __kmpc_threadprivate_cache with a new location for the |
724 | // cache, and that function will store the resized cache there at that |
725 | // point. |
726 | |
727 | // Nullify old cache's data pointer so we skip it next time |
728 | ptr->data = NULL; |
729 | } |
730 | ptr = ptr->next; |
731 | } |
732 | // After all caches are resized, update __kmp_tp_capacity to the new size |
733 | *(volatile int *)&__kmp_tp_capacity = newCapacity; |
734 | } |
735 | |
736 | /*! |
737 | @ingroup THREADPRIVATE |
738 | @param loc source location information |
739 | @param data pointer to data being privatized |
740 | @param ctor pointer to constructor function for data |
741 | @param cctor pointer to copy constructor function for data |
742 | @param dtor pointer to destructor function for data |
743 | @param vector_length length of the vector (bytes or elements?) |
744 | Register vector constructors and destructors for thread private data. |
745 | */ |
746 | void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, |
747 | kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, |
748 | kmpc_dtor_vec dtor, |
749 | size_t vector_length) { |
750 | struct shared_common *d_tn, **lnk_tn; |
751 | |
752 | KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n" )); |
753 | |
754 | #ifdef USE_CHECKS_COMMON |
755 | /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ |
756 | KMP_ASSERT(cctor == 0); |
757 | #endif /* USE_CHECKS_COMMON */ |
758 | |
759 | d_tn = __kmp_find_shared_task_common( |
760 | tbl: &__kmp_threadprivate_d_table, gtid: -1, |
761 | pc_addr: data); /* Only the global data table exists. */ |
762 | |
763 | if (d_tn == 0) { |
764 | d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); |
765 | d_tn->gbl_addr = data; |
766 | |
767 | d_tn->ct.ctorv = ctor; |
768 | d_tn->cct.cctorv = cctor; |
769 | d_tn->dt.dtorv = dtor; |
770 | d_tn->is_vec = TRUE; |
771 | d_tn->vec_len = (size_t)vector_length; |
772 | // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory |
773 | // d_tn->pod_init = 0; |
774 | lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); |
775 | |
776 | d_tn->next = *lnk_tn; |
777 | *lnk_tn = d_tn; |
778 | } |
779 | } |
780 | |
781 | void __kmp_cleanup_threadprivate_caches() { |
782 | kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list; |
783 | |
784 | while (ptr) { |
785 | void **cache = ptr->addr; |
786 | __kmp_threadpriv_cache_list = ptr->next; |
787 | if (*ptr->compiler_cache) |
788 | *ptr->compiler_cache = NULL; |
789 | ptr->compiler_cache = NULL; |
790 | ptr->data = NULL; |
791 | ptr->addr = NULL; |
792 | ptr->next = NULL; |
793 | // Threadprivate data pointed at by cache entries are destroyed at end of |
794 | // __kmp_launch_thread with __kmp_common_destroy_gtid. |
795 | __kmp_free(cache); // implicitly frees ptr too |
796 | ptr = __kmp_threadpriv_cache_list; |
797 | } |
798 | } |
799 | |