1 | /* |
2 | * kmp_os.h -- KPTS runtime header file. |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef KMP_OS_H |
14 | #define KMP_OS_H |
15 | |
16 | #include "kmp_config.h" |
17 | #include <atomic> |
18 | #include <stdarg.h> |
19 | #include <stdlib.h> |
20 | #include <string.h> |
21 | |
22 | #define KMP_FTN_PLAIN 1 |
23 | #define KMP_FTN_APPEND 2 |
24 | #define KMP_FTN_UPPER 3 |
25 | /* |
26 | #define KMP_FTN_PREPEND 4 |
27 | #define KMP_FTN_UAPPEND 5 |
28 | */ |
29 | |
30 | #define KMP_PTR_SKIP (sizeof(void *)) |
31 | |
32 | /* -------------------------- Compiler variations ------------------------ */ |
33 | |
34 | #define KMP_OFF 0 |
35 | #define KMP_ON 1 |
36 | |
37 | #define KMP_MEM_CONS_VOLATILE 0 |
38 | #define KMP_MEM_CONS_FENCE 1 |
39 | |
40 | #ifndef KMP_MEM_CONS_MODEL |
41 | #define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE |
42 | #endif |
43 | |
44 | #ifndef __has_cpp_attribute |
45 | #define __has_cpp_attribute(x) 0 |
46 | #endif |
47 | |
48 | #ifndef __has_attribute |
49 | #define __has_attribute(x) 0 |
50 | #endif |
51 | |
52 | /* ------------------------- Compiler recognition ---------------------- */ |
53 | #define KMP_COMPILER_ICC 0 |
54 | #define KMP_COMPILER_GCC 0 |
55 | #define KMP_COMPILER_CLANG 0 |
56 | #define KMP_COMPILER_MSVC 0 |
57 | #define KMP_COMPILER_ICX 0 |
58 | |
59 | #if __INTEL_CLANG_COMPILER |
60 | #undef KMP_COMPILER_ICX |
61 | #define KMP_COMPILER_ICX 1 |
62 | #elif defined(__INTEL_COMPILER) |
63 | #undef KMP_COMPILER_ICC |
64 | #define KMP_COMPILER_ICC 1 |
65 | #elif defined(__clang__) |
66 | #undef KMP_COMPILER_CLANG |
67 | #define KMP_COMPILER_CLANG 1 |
68 | #elif defined(__GNUC__) |
69 | #undef KMP_COMPILER_GCC |
70 | #define KMP_COMPILER_GCC 1 |
71 | #elif defined(_MSC_VER) |
72 | #undef KMP_COMPILER_MSVC |
73 | #define KMP_COMPILER_MSVC 1 |
74 | #else |
75 | #error Unknown compiler |
76 | #endif |
77 | |
78 | #if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ |
79 | KMP_OS_DRAGONFLY || KMP_OS_AIX) && \ |
80 | !KMP_OS_WASI |
81 | #define KMP_AFFINITY_SUPPORTED 1 |
82 | #if KMP_OS_WINDOWS && KMP_ARCH_X86_64 |
83 | #define KMP_GROUP_AFFINITY 1 |
84 | #else |
85 | #define KMP_GROUP_AFFINITY 0 |
86 | #endif |
87 | #else |
88 | #define KMP_AFFINITY_SUPPORTED 0 |
89 | #define KMP_GROUP_AFFINITY 0 |
90 | #endif |
91 | |
92 | #if (KMP_OS_LINUX || (KMP_OS_FREEBSD && __FreeBSD_version >= 1301000)) |
93 | #define KMP_HAVE_SCHED_GETCPU 1 |
94 | #else |
95 | #define KMP_HAVE_SCHED_GETCPU 0 |
96 | #endif |
97 | |
98 | /* Check for quad-precision extension. */ |
99 | #define KMP_HAVE_QUAD 0 |
100 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
101 | #if KMP_COMPILER_ICC || KMP_COMPILER_ICX |
102 | /* _Quad is already defined for icc */ |
103 | #undef KMP_HAVE_QUAD |
104 | #define KMP_HAVE_QUAD 1 |
105 | #elif KMP_COMPILER_CLANG |
106 | /* Clang doesn't support a software-implemented |
107 | 128-bit extended precision type yet */ |
108 | typedef long double _Quad; |
109 | #elif KMP_COMPILER_GCC |
110 | /* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad until |
111 | NetBSD 10.0 which ships with GCC 10.5 */ |
112 | #if (!KMP_OS_NETBSD || __GNUC__ >= 10) |
113 | typedef __float128 _Quad; |
114 | #undef KMP_HAVE_QUAD |
115 | #define KMP_HAVE_QUAD 1 |
116 | #endif |
117 | #elif KMP_COMPILER_MSVC |
118 | typedef long double _Quad; |
119 | #endif |
120 | #else |
121 | #if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC |
122 | typedef long double _Quad; |
123 | #undef KMP_HAVE_QUAD |
124 | #define KMP_HAVE_QUAD 1 |
125 | #endif |
126 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
127 | |
128 | #define KMP_USE_X87CONTROL 0 |
129 | #if KMP_OS_WINDOWS |
130 | #define KMP_END_OF_LINE "\r\n" |
131 | typedef char kmp_int8; |
132 | typedef unsigned char kmp_uint8; |
133 | typedef short kmp_int16; |
134 | typedef unsigned short kmp_uint16; |
135 | typedef int kmp_int32; |
136 | typedef unsigned int kmp_uint32; |
137 | #define KMP_INT32_SPEC "d" |
138 | #define KMP_UINT32_SPEC "u" |
139 | #ifndef KMP_STRUCT64 |
140 | typedef __int64 kmp_int64; |
141 | typedef unsigned __int64 kmp_uint64; |
142 | #define KMP_INT64_SPEC "I64d" |
143 | #define KMP_UINT64_SPEC "I64u" |
144 | #else |
145 | struct kmp_struct64 { |
146 | kmp_int32 a, b; |
147 | }; |
148 | typedef struct kmp_struct64 kmp_int64; |
149 | typedef struct kmp_struct64 kmp_uint64; |
150 | /* Not sure what to use for KMP_[U]INT64_SPEC here */ |
151 | #endif |
152 | #if KMP_ARCH_X86 && KMP_MSVC_COMPAT |
153 | #undef KMP_USE_X87CONTROL |
154 | #define KMP_USE_X87CONTROL 1 |
155 | #endif |
156 | #if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 |
157 | #define KMP_INTPTR 1 |
158 | typedef __int64 kmp_intptr_t; |
159 | typedef unsigned __int64 kmp_uintptr_t; |
160 | #define KMP_INTPTR_SPEC "I64d" |
161 | #define KMP_UINTPTR_SPEC "I64u" |
162 | #endif |
163 | #endif /* KMP_OS_WINDOWS */ |
164 | |
165 | #if KMP_OS_UNIX |
166 | #define KMP_END_OF_LINE "\n" |
167 | typedef char kmp_int8; |
168 | typedef unsigned char kmp_uint8; |
169 | typedef short kmp_int16; |
170 | typedef unsigned short kmp_uint16; |
171 | typedef int kmp_int32; |
172 | typedef unsigned int kmp_uint32; |
173 | typedef long long kmp_int64; |
174 | typedef unsigned long long kmp_uint64; |
175 | #define KMP_INT32_SPEC "d" |
176 | #define KMP_UINT32_SPEC "u" |
177 | #define KMP_INT64_SPEC "lld" |
178 | #define KMP_UINT64_SPEC "llu" |
179 | #endif /* KMP_OS_UNIX */ |
180 | |
181 | #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ |
182 | KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 |
183 | #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC |
184 | #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ |
185 | KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ |
186 | KMP_ARCH_VE || KMP_ARCH_S390X |
187 | #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC |
188 | #else |
189 | #error "Can't determine size_t printf format specifier." |
190 | #endif |
191 | |
192 | #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_WASM || KMP_ARCH_PPC |
193 | #define KMP_SIZE_T_MAX (0xFFFFFFFF) |
194 | #else |
195 | #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) |
196 | #endif |
197 | |
198 | typedef size_t kmp_size_t; |
199 | typedef float kmp_real32; |
200 | typedef double kmp_real64; |
201 | |
202 | #ifndef KMP_INTPTR |
203 | #define KMP_INTPTR 1 |
204 | typedef long kmp_intptr_t; |
205 | typedef unsigned long kmp_uintptr_t; |
206 | #define KMP_INTPTR_SPEC "ld" |
207 | #define KMP_UINTPTR_SPEC "lu" |
208 | #endif |
209 | |
210 | #ifdef BUILD_I8 |
211 | typedef kmp_int64 kmp_int; |
212 | typedef kmp_uint64 kmp_uint; |
213 | #else |
214 | typedef kmp_int32 kmp_int; |
215 | typedef kmp_uint32 kmp_uint; |
216 | #endif /* BUILD_I8 */ |
217 | #define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) |
218 | #define KMP_INT_MIN ((kmp_int32)0x80000000) |
219 | |
220 | // stdarg handling |
221 | #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_WASM) && \ |
222 | (KMP_OS_FREEBSD || KMP_OS_LINUX || KMP_OS_WASI) |
223 | typedef va_list *kmp_va_list; |
224 | #define kmp_va_deref(ap) (*(ap)) |
225 | #define kmp_va_addr_of(ap) (&(ap)) |
226 | #else |
227 | typedef va_list kmp_va_list; |
228 | #define kmp_va_deref(ap) (ap) |
229 | #define kmp_va_addr_of(ap) (ap) |
230 | #endif |
231 | |
232 | #ifdef __cplusplus |
233 | // macros to cast out qualifiers and to re-interpret types |
234 | #define CCAST(type, var) const_cast<type>(var) |
235 | #define RCAST(type, var) reinterpret_cast<type>(var) |
236 | //------------------------------------------------------------------------- |
237 | // template for debug prints specification ( d, u, lld, llu ), and to obtain |
238 | // signed/unsigned flavors of a type |
239 | template <typename T> struct traits_t {}; |
240 | // int |
241 | template <> struct traits_t<signed int> { |
242 | typedef signed int signed_t; |
243 | typedef unsigned int unsigned_t; |
244 | typedef double floating_t; |
245 | static char const *spec; |
246 | static const signed_t max_value = 0x7fffffff; |
247 | static const signed_t min_value = 0x80000000; |
248 | static const int type_size = sizeof(signed_t); |
249 | }; |
250 | // unsigned int |
251 | template <> struct traits_t<unsigned int> { |
252 | typedef signed int signed_t; |
253 | typedef unsigned int unsigned_t; |
254 | typedef double floating_t; |
255 | static char const *spec; |
256 | static const unsigned_t max_value = 0xffffffff; |
257 | static const unsigned_t min_value = 0x00000000; |
258 | static const int type_size = sizeof(unsigned_t); |
259 | }; |
260 | // long |
261 | template <> struct traits_t<signed long> { |
262 | typedef signed long signed_t; |
263 | typedef unsigned long unsigned_t; |
264 | typedef long double floating_t; |
265 | static char const *spec; |
266 | static const int type_size = sizeof(signed_t); |
267 | }; |
268 | // long long |
269 | template <> struct traits_t<signed long long> { |
270 | typedef signed long long signed_t; |
271 | typedef unsigned long long unsigned_t; |
272 | typedef long double floating_t; |
273 | static char const *spec; |
274 | static const signed_t max_value = 0x7fffffffffffffffLL; |
275 | static const signed_t min_value = 0x8000000000000000LL; |
276 | static const int type_size = sizeof(signed_t); |
277 | }; |
278 | // unsigned long long |
279 | template <> struct traits_t<unsigned long long> { |
280 | typedef signed long long signed_t; |
281 | typedef unsigned long long unsigned_t; |
282 | typedef long double floating_t; |
283 | static char const *spec; |
284 | static const unsigned_t max_value = 0xffffffffffffffffLL; |
285 | static const unsigned_t min_value = 0x0000000000000000LL; |
286 | static const int type_size = sizeof(unsigned_t); |
287 | }; |
288 | //------------------------------------------------------------------------- |
289 | #else |
290 | #define CCAST(type, var) (type)(var) |
291 | #define RCAST(type, var) (type)(var) |
292 | #endif // __cplusplus |
293 | |
294 | #define KMP_EXPORT extern /* export declaration in guide libraries */ |
295 | |
296 | #if __GNUC__ >= 4 && !defined(__MINGW32__) |
297 | #define __forceinline __inline |
298 | #endif |
299 | |
300 | /* Check if the OS/arch can support user-level mwait */ |
301 | // All mwait code tests for UMWAIT first, so it should only fall back to ring3 |
302 | // MWAIT for KNL. |
303 | #define KMP_HAVE_MWAIT \ |
304 | ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ |
305 | !KMP_MIC2) |
306 | #define KMP_HAVE_UMWAIT \ |
307 | ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ |
308 | !KMP_MIC) |
309 | |
310 | #if KMP_OS_WINDOWS |
311 | // Don't include everything related to NT status code, we'll do that explicitly |
312 | #define WIN32_NO_STATUS |
313 | #include <windows.h> |
314 | |
315 | static inline int KMP_GET_PAGE_SIZE(void) { |
316 | SYSTEM_INFO si; |
317 | GetSystemInfo(&si); |
318 | return si.dwPageSize; |
319 | } |
320 | #else |
321 | #define KMP_GET_PAGE_SIZE() getpagesize() |
322 | #endif |
323 | |
324 | #define PAGE_ALIGNED(_addr) \ |
325 | (!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1))) |
326 | #define ALIGN_TO_PAGE(x) \ |
327 | (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1))) |
328 | |
329 | /* ---------- Support for cache alignment, padding, etc. ----------------*/ |
330 | |
331 | #ifdef __cplusplus |
332 | extern "C" { |
333 | #endif // __cplusplus |
334 | |
335 | #define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ |
336 | |
337 | /* Define the default size of the cache line */ |
338 | #ifndef CACHE_LINE |
339 | #define CACHE_LINE 128 /* cache line size in bytes */ |
340 | #else |
341 | #if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN) |
342 | // 2006-02-13: This produces too many warnings on OS X*. Disable for now |
343 | #warning CACHE_LINE is too small. |
344 | #endif |
345 | #endif /* CACHE_LINE */ |
346 | |
347 | #define KMP_CACHE_PREFETCH(ADDR) /* nothing */ |
348 | |
349 | // Define attribute that indicates that the fall through from the previous |
350 | // case label is intentional and should not be diagnosed by a compiler |
351 | // Code from libcxx/include/__config |
352 | // Use a function like macro to imply that it must be followed by a semicolon |
353 | #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) |
354 | #define KMP_FALLTHROUGH() [[fallthrough]] |
355 | // icc cannot properly tell this attribute is absent so force off |
356 | #elif KMP_COMPILER_ICC |
357 | #define KMP_FALLTHROUGH() ((void)0) |
358 | #elif __has_cpp_attribute(clang::fallthrough) |
359 | #define KMP_FALLTHROUGH() [[clang::fallthrough]] |
360 | #elif __has_attribute(fallthrough) || __GNUC__ >= 7 |
361 | #define KMP_FALLTHROUGH() __attribute__((__fallthrough__)) |
362 | #else |
363 | #define KMP_FALLTHROUGH() ((void)0) |
364 | #endif |
365 | |
366 | #if KMP_HAVE_ATTRIBUTE_WAITPKG |
367 | #define KMP_ATTRIBUTE_TARGET_WAITPKG __attribute__((target("waitpkg"))) |
368 | #else |
369 | #define KMP_ATTRIBUTE_TARGET_WAITPKG /* Nothing */ |
370 | #endif |
371 | |
372 | #if KMP_HAVE_ATTRIBUTE_RTM |
373 | #define KMP_ATTRIBUTE_TARGET_RTM __attribute__((target("rtm"))) |
374 | #else |
375 | #define KMP_ATTRIBUTE_TARGET_RTM /* Nothing */ |
376 | #endif |
377 | |
378 | // Define attribute that indicates a function does not return |
379 | #if __cplusplus >= 201103L |
380 | #define KMP_NORETURN [[noreturn]] |
381 | #elif KMP_OS_WINDOWS |
382 | #define KMP_NORETURN __declspec(noreturn) |
383 | #else |
384 | #define KMP_NORETURN __attribute__((noreturn)) |
385 | #endif |
386 | |
387 | #if KMP_OS_WINDOWS && KMP_MSVC_COMPAT |
388 | #define KMP_ALIGN(bytes) __declspec(align(bytes)) |
389 | #define KMP_THREAD_LOCAL __declspec(thread) |
390 | #define KMP_ALIAS /* Nothing */ |
391 | #else |
392 | #define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) |
393 | #define KMP_THREAD_LOCAL __thread |
394 | #define KMP_ALIAS(alias_of) __attribute__((alias(alias_of))) |
395 | #endif |
396 | |
397 | #if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB |
398 | #define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak)) |
399 | #else |
400 | #define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */ |
401 | #endif |
402 | |
403 | #if KMP_HAVE_WEAK_ATTRIBUTE |
404 | #define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak)) |
405 | #else |
406 | #define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */ |
407 | #endif |
408 | |
409 | // Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME |
410 | #ifndef KMP_STR |
411 | #define KMP_STR(x) _KMP_STR(x) |
412 | #define _KMP_STR(x) #x |
413 | #endif |
414 | |
415 | #ifdef KMP_USE_VERSION_SYMBOLS |
416 | // If using versioned symbols, KMP_EXPAND_NAME prepends |
417 | // __kmp_api_ to the real API name |
418 | #define KMP_EXPAND_NAME(api_name) _KMP_EXPAND_NAME(api_name) |
419 | #define _KMP_EXPAND_NAME(api_name) __kmp_api_##api_name |
420 | #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) \ |
421 | _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, "VERSION") |
422 | #define _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, default_ver) \ |
423 | __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver_num##_alias \ |
424 | __attribute__((alias(KMP_STR(__kmp_api_##api_name)))); \ |
425 | __asm__( \ |
426 | ".symver " KMP_STR(__kmp_api_##api_name##_##ver_num##_alias) "," KMP_STR( \ |
427 | api_name) "@" ver_str "\n\t"); \ |
428 | __asm__(".symver " KMP_STR(__kmp_api_##api_name) "," KMP_STR( \ |
429 | api_name) "@@" default_ver "\n\t") |
430 | |
431 | #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str) \ |
432 | _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, "VERSION") |
433 | #define _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, \ |
434 | default_ver) \ |
435 | __typeof__(__kmp_api_##apic_name) __kmp_api_##apic_name##_##ver_num##_alias \ |
436 | __attribute__((alias(KMP_STR(__kmp_api_##apic_name)))); \ |
437 | __asm__(".symver " KMP_STR(__kmp_api_##apic_name) "," KMP_STR( \ |
438 | apic_name) "@@" default_ver "\n\t"); \ |
439 | __asm__( \ |
440 | ".symver " KMP_STR(__kmp_api_##apic_name##_##ver_num##_alias) "," KMP_STR( \ |
441 | api_name) "@" ver_str "\n\t") |
442 | |
443 | #else // KMP_USE_VERSION_SYMBOLS |
444 | #define KMP_EXPAND_NAME(api_name) api_name |
445 | #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) /* Nothing */ |
446 | #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, \ |
447 | ver_str) /* Nothing */ |
448 | #endif // KMP_USE_VERSION_SYMBOLS |
449 | |
450 | /* Temporary note: if performance testing of this passes, we can remove |
451 | all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ |
452 | #define KMP_DO_ALIGN(bytes) KMP_ALIGN(bytes) |
453 | #define KMP_ALIGN_CACHE KMP_ALIGN(CACHE_LINE) |
454 | #define KMP_ALIGN_CACHE_INTERNODE KMP_ALIGN(INTERNODE_CACHE_LINE) |
455 | |
456 | /* General purpose fence types for memory operations */ |
457 | enum kmp_mem_fence_type { |
458 | kmp_no_fence, /* No memory fence */ |
459 | kmp_acquire_fence, /* Acquire (read) memory fence */ |
460 | kmp_release_fence, /* Release (write) memory fence */ |
461 | kmp_full_fence /* Full (read+write) memory fence */ |
462 | }; |
463 | |
464 | // Synchronization primitives |
465 | |
466 | #if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && (KMP_COMPILER_CLANG || KMP_COMPILER_GCC)) |
467 | |
468 | #if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG |
469 | #pragma intrinsic(InterlockedExchangeAdd) |
470 | #pragma intrinsic(InterlockedCompareExchange) |
471 | #pragma intrinsic(InterlockedExchange) |
472 | #if !KMP_32_BIT_ARCH |
473 | #pragma intrinsic(InterlockedExchange64) |
474 | #endif |
475 | #endif |
476 | |
477 | // Using InterlockedIncrement / InterlockedDecrement causes a library loading |
478 | // ordering problem, so we use InterlockedExchangeAdd instead. |
479 | #define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1) |
480 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
481 | InterlockedExchangeAdd((volatile long *)(p), 1) |
482 | #define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4) |
483 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
484 | InterlockedExchangeAdd((volatile long *)(p), 4) |
485 | #define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1) |
486 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
487 | InterlockedExchangeAdd((volatile long *)(p), -1) |
488 | #define KMP_TEST_THEN_ADD32(p, v) \ |
489 | InterlockedExchangeAdd((volatile long *)(p), (v)) |
490 | |
491 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
492 | InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv)) |
493 | |
494 | #define KMP_XCHG_FIXED32(p, v) \ |
495 | InterlockedExchange((volatile long *)(p), (long)(v)) |
496 | #define KMP_XCHG_FIXED64(p, v) \ |
497 | InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
498 | |
499 | inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { |
500 | kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v); |
501 | return *(kmp_real32 *)&tmp; |
502 | } |
503 | |
504 | #define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) |
505 | #define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) |
506 | #define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) |
507 | #define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) |
508 | #define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) |
509 | #define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) |
510 | |
511 | extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); |
512 | extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); |
513 | extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); |
514 | extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); |
515 | extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); |
516 | extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); |
517 | extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); |
518 | extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); |
519 | |
520 | #if KMP_ARCH_AARCH64 && KMP_COMPILER_MSVC && !KMP_COMPILER_CLANG |
521 | #define KMP_TEST_THEN_INC64(p) _InterlockedExchangeAdd64((p), 1LL) |
522 | #define KMP_TEST_THEN_INC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 1LL) |
523 | #define KMP_TEST_THEN_ADD4_64(p) _InterlockedExchangeAdd64((p), 4LL) |
524 | // #define KMP_TEST_THEN_ADD4_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 4LL) |
525 | // #define KMP_TEST_THEN_DEC64(p) _InterlockedExchangeAdd64((p), -1LL) |
526 | // #define KMP_TEST_THEN_DEC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), -1LL) |
527 | // #define KMP_TEST_THEN_ADD8(p, v) _InterlockedExchangeAdd8((p), (v)) |
528 | #define KMP_TEST_THEN_ADD64(p, v) _InterlockedExchangeAdd64((p), (v)) |
529 | |
530 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
531 | __kmp_compare_and_store_acq8((p), (cv), (sv)) |
532 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
533 | __kmp_compare_and_store_rel8((p), (cv), (sv)) |
534 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
535 | __kmp_compare_and_store_acq16((p), (cv), (sv)) |
536 | /* |
537 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
538 | __kmp_compare_and_store_rel16((p), (cv), (sv)) |
539 | */ |
540 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
541 | __kmp_compare_and_store_acq32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
542 | (kmp_int32)(sv)) |
543 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
544 | __kmp_compare_and_store_rel32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
545 | (kmp_int32)(sv)) |
546 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
547 | __kmp_compare_and_store_acq64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
548 | (kmp_int64)(sv)) |
549 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
550 | __kmp_compare_and_store_rel64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
551 | (kmp_int64)(sv)) |
552 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
553 | __kmp_compare_and_store_ptr((void *volatile *)(p), (void *)(cv), (void *)(sv)) |
554 | |
555 | // KMP_COMPARE_AND_STORE expects this order: pointer, compare, exchange |
556 | // _InterlockedCompareExchange expects this order: pointer, exchange, compare |
557 | // KMP_COMPARE_AND_STORE also returns a bool indicating a successful write. A |
558 | // write is successful if the return value of _InterlockedCompareExchange is the |
559 | // same as the compare value. |
560 | inline kmp_int8 __kmp_compare_and_store_acq8(volatile kmp_int8 *p, kmp_int8 cv, |
561 | kmp_int8 sv) { |
562 | return _InterlockedCompareExchange8_acq(p, sv, cv) == cv; |
563 | } |
564 | |
565 | inline kmp_int8 __kmp_compare_and_store_rel8(volatile kmp_int8 *p, kmp_int8 cv, |
566 | kmp_int8 sv) { |
567 | return _InterlockedCompareExchange8_rel(p, sv, cv) == cv; |
568 | } |
569 | |
570 | inline kmp_int16 __kmp_compare_and_store_acq16(volatile kmp_int16 *p, |
571 | kmp_int16 cv, kmp_int16 sv) { |
572 | return _InterlockedCompareExchange16_acq(p, sv, cv) == cv; |
573 | } |
574 | |
575 | inline kmp_int16 __kmp_compare_and_store_rel16(volatile kmp_int16 *p, |
576 | kmp_int16 cv, kmp_int16 sv) { |
577 | return _InterlockedCompareExchange16_rel(p, sv, cv) == cv; |
578 | } |
579 | |
580 | inline kmp_int32 __kmp_compare_and_store_acq32(volatile kmp_int32 *p, |
581 | kmp_int32 cv, kmp_int32 sv) { |
582 | return _InterlockedCompareExchange_acq((volatile long *)p, sv, cv) == cv; |
583 | } |
584 | |
585 | inline kmp_int32 __kmp_compare_and_store_rel32(volatile kmp_int32 *p, |
586 | kmp_int32 cv, kmp_int32 sv) { |
587 | return _InterlockedCompareExchange_rel((volatile long *)p, sv, cv) == cv; |
588 | } |
589 | |
590 | inline kmp_int32 __kmp_compare_and_store_acq64(volatile kmp_int64 *p, |
591 | kmp_int64 cv, kmp_int64 sv) { |
592 | return _InterlockedCompareExchange64_acq(p, sv, cv) == cv; |
593 | } |
594 | |
595 | inline kmp_int32 __kmp_compare_and_store_rel64(volatile kmp_int64 *p, |
596 | kmp_int64 cv, kmp_int64 sv) { |
597 | return _InterlockedCompareExchange64_rel(p, sv, cv) == cv; |
598 | } |
599 | |
600 | inline kmp_int32 __kmp_compare_and_store_ptr(void *volatile *p, void *cv, |
601 | void *sv) { |
602 | return _InterlockedCompareExchangePointer(p, sv, cv) == cv; |
603 | } |
604 | |
605 | // The _RET versions return the value instead of a bool |
606 | |
607 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
608 | _InterlockedCompareExchange8((p), (sv), (cv)) |
609 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
610 | _InterlockedCompareExchange16((p), (sv), (cv)) |
611 | |
612 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
613 | _InterlockedCompareExchange64((volatile kmp_int64 *)(p), (kmp_int64)(sv), \ |
614 | (kmp_int64)(cv)) |
615 | |
616 | |
617 | #define KMP_XCHG_FIXED8(p, v) \ |
618 | _InterlockedExchange8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
619 | #define KMP_XCHG_FIXED16(p, v) _InterlockedExchange16((p), (v)); |
620 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
621 | |
622 | inline kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v) { |
623 | kmp_int64 tmp = _InterlockedExchange64((volatile kmp_int64 *)p, *(kmp_int64 |
624 | *)&v); return *(kmp_real64 *)&tmp; |
625 | } |
626 | |
627 | #else // !KMP_ARCH_AARCH64 |
628 | |
629 | // Routines that we still need to implement in assembly. |
630 | extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); |
631 | |
632 | extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, |
633 | kmp_int8 sv); |
634 | extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, |
635 | kmp_int16 sv); |
636 | extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, |
637 | kmp_int32 sv); |
638 | extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, |
639 | kmp_int64 sv); |
640 | extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, |
641 | kmp_int8 sv); |
642 | extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, |
643 | kmp_int16 cv, kmp_int16 sv); |
644 | extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, |
645 | kmp_int32 cv, kmp_int32 sv); |
646 | extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, |
647 | kmp_int64 cv, kmp_int64 sv); |
648 | |
649 | extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); |
650 | extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); |
651 | extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); |
652 | extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); |
653 | extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); |
654 | extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); |
655 | |
656 | //#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) |
657 | //#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) |
658 | #define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) |
659 | #define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) |
660 | //#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) |
661 | //#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) |
662 | #define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) |
663 | #define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) |
664 | //#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) |
665 | //#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) |
666 | #define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) |
667 | #define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) |
668 | //#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) |
669 | #define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) |
670 | #define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) |
671 | |
672 | |
673 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
674 | __kmp_compare_and_store8((p), (cv), (sv)) |
675 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
676 | __kmp_compare_and_store8((p), (cv), (sv)) |
677 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
678 | __kmp_compare_and_store16((p), (cv), (sv)) |
679 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
680 | __kmp_compare_and_store16((p), (cv), (sv)) |
681 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
682 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
683 | (kmp_int32)(sv)) |
684 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
685 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
686 | (kmp_int32)(sv)) |
687 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
688 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
689 | (kmp_int64)(sv)) |
690 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
691 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
692 | (kmp_int64)(sv)) |
693 | |
694 | #if KMP_ARCH_X86 |
695 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
696 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
697 | (kmp_int32)(sv)) |
698 | #else /* 64 bit pointers */ |
699 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
700 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
701 | (kmp_int64)(sv)) |
702 | #endif /* KMP_ARCH_X86 */ |
703 | |
704 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
705 | __kmp_compare_and_store_ret8((p), (cv), (sv)) |
706 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
707 | __kmp_compare_and_store_ret16((p), (cv), (sv)) |
708 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
709 | __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
710 | (kmp_int64)(sv)) |
711 | |
712 | #define KMP_XCHG_FIXED8(p, v) \ |
713 | __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
714 | #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); |
715 | //#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); |
716 | //#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); |
717 | //#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); |
718 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
719 | #endif |
720 | |
721 | #elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) |
722 | |
723 | /* cast p to correct type so that proper intrinsic will be used */ |
724 | #define KMP_TEST_THEN_INC32(p) \ |
725 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) |
726 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
727 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) |
728 | #if KMP_ARCH_MIPS |
729 | #define KMP_TEST_THEN_INC64(p) \ |
730 | __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
731 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
732 | __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
733 | #else |
734 | #define KMP_TEST_THEN_INC64(p) \ |
735 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) |
736 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
737 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) |
738 | #endif |
739 | #define KMP_TEST_THEN_ADD4_32(p) \ |
740 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) |
741 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
742 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) |
743 | #if KMP_ARCH_MIPS |
744 | #define KMP_TEST_THEN_ADD4_64(p) \ |
745 | __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST) |
746 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
747 | __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST) |
748 | #define KMP_TEST_THEN_DEC64(p) \ |
749 | __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
750 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
751 | __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
752 | #else |
753 | #define KMP_TEST_THEN_ADD4_64(p) \ |
754 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) |
755 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
756 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) |
757 | #define KMP_TEST_THEN_DEC64(p) \ |
758 | __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) |
759 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
760 | __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) |
761 | #endif |
762 | #define KMP_TEST_THEN_DEC32(p) \ |
763 | __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) |
764 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
765 | __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) |
766 | #define KMP_TEST_THEN_ADD8(p, v) \ |
767 | __sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
768 | #define KMP_TEST_THEN_ADD32(p, v) \ |
769 | __sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v)) |
770 | #if KMP_ARCH_MIPS |
771 | #define KMP_TEST_THEN_ADD64(p, v) \ |
772 | __atomic_fetch_add((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
773 | __ATOMIC_SEQ_CST) |
774 | #else |
775 | #define KMP_TEST_THEN_ADD64(p, v) \ |
776 | __sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
777 | #endif |
778 | |
779 | #define KMP_TEST_THEN_OR8(p, v) \ |
780 | __sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
781 | #define KMP_TEST_THEN_AND8(p, v) \ |
782 | __sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
783 | #define KMP_TEST_THEN_OR32(p, v) \ |
784 | __sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
785 | #define KMP_TEST_THEN_AND32(p, v) \ |
786 | __sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
787 | #if KMP_ARCH_MIPS |
788 | #define KMP_TEST_THEN_OR64(p, v) \ |
789 | __atomic_fetch_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
790 | __ATOMIC_SEQ_CST) |
791 | #define KMP_TEST_THEN_AND64(p, v) \ |
792 | __atomic_fetch_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
793 | __ATOMIC_SEQ_CST) |
794 | #else |
795 | #define KMP_TEST_THEN_OR64(p, v) \ |
796 | __sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
797 | #define KMP_TEST_THEN_AND64(p, v) \ |
798 | __sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
799 | #endif |
800 | |
801 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
802 | __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
803 | (kmp_uint8)(sv)) |
804 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
805 | __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
806 | (kmp_uint8)(sv)) |
807 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
808 | __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
809 | (kmp_uint16)(sv)) |
810 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
811 | __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
812 | (kmp_uint16)(sv)) |
813 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
814 | __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
815 | (kmp_uint32)(sv)) |
816 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
817 | __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
818 | (kmp_uint32)(sv)) |
819 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
820 | __sync_bool_compare_and_swap((void *volatile *)(p), (void *)(cv), \ |
821 | (void *)(sv)) |
822 | |
823 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
824 | __sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
825 | (kmp_uint8)(sv)) |
826 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
827 | __sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
828 | (kmp_uint16)(sv)) |
829 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
830 | __sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
831 | (kmp_uint32)(sv)) |
832 | #if KMP_ARCH_MIPS |
833 | static inline bool mips_sync_bool_compare_and_swap(volatile kmp_uint64 *p, |
834 | kmp_uint64 cv, |
835 | kmp_uint64 sv) { |
836 | return __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST, |
837 | __ATOMIC_SEQ_CST); |
838 | } |
839 | static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p, |
840 | kmp_uint64 cv, |
841 | kmp_uint64 sv) { |
842 | __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST, |
843 | __ATOMIC_SEQ_CST); |
844 | return cv; |
845 | } |
846 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
847 | mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \ |
848 | (kmp_uint64)(cv), (kmp_uint64)(sv)) |
849 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
850 | mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \ |
851 | (kmp_uint64)(cv), (kmp_uint64)(sv)) |
852 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
853 | mips_sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
854 | (kmp_uint64)(sv)) |
855 | #else |
856 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
857 | __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
858 | (kmp_uint64)(sv)) |
859 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
860 | __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
861 | (kmp_uint64)(sv)) |
862 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
863 | __sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
864 | (kmp_uint64)(sv)) |
865 | #endif |
866 | |
867 | #if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800 |
868 | #define KMP_XCHG_FIXED8(p, v) \ |
869 | __atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \ |
870 | __ATOMIC_SEQ_CST) |
871 | #else |
872 | #define KMP_XCHG_FIXED8(p, v) \ |
873 | __sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v)) |
874 | #endif |
875 | #define KMP_XCHG_FIXED16(p, v) \ |
876 | __sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v)) |
877 | #define KMP_XCHG_FIXED32(p, v) \ |
878 | __sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
879 | #define KMP_XCHG_FIXED64(p, v) \ |
880 | __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
881 | |
882 | inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { |
883 | volatile kmp_uint32 *up; |
884 | kmp_uint32 uv; |
885 | memcpy(dest: &up, src: &p, n: sizeof(up)); |
886 | memcpy(dest: &uv, src: &v, n: sizeof(uv)); |
887 | kmp_int32 tmp = __sync_lock_test_and_set(up, uv); |
888 | kmp_real32 ftmp; |
889 | memcpy(dest: &ftmp, src: &tmp, n: sizeof(tmp)); |
890 | return ftmp; |
891 | } |
892 | |
893 | inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) { |
894 | volatile kmp_uint64 *up; |
895 | kmp_uint64 uv; |
896 | memcpy(dest: &up, src: &p, n: sizeof(up)); |
897 | memcpy(dest: &uv, src: &v, n: sizeof(uv)); |
898 | kmp_int64 tmp = __sync_lock_test_and_set(up, uv); |
899 | kmp_real64 dtmp; |
900 | memcpy(dest: &dtmp, src: &tmp, n: sizeof(tmp)); |
901 | return dtmp; |
902 | } |
903 | |
904 | #else |
905 | |
906 | extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); |
907 | extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); |
908 | extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); |
909 | extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); |
910 | extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); |
911 | extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); |
912 | extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); |
913 | extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); |
914 | extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); |
915 | |
916 | extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, |
917 | kmp_int8 sv); |
918 | extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, |
919 | kmp_int16 sv); |
920 | extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, |
921 | kmp_int32 sv); |
922 | extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, |
923 | kmp_int64 sv); |
924 | extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, |
925 | kmp_int8 sv); |
926 | extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, |
927 | kmp_int16 cv, kmp_int16 sv); |
928 | extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, |
929 | kmp_int32 cv, kmp_int32 sv); |
930 | extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, |
931 | kmp_int64 cv, kmp_int64 sv); |
932 | |
933 | extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); |
934 | extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); |
935 | extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); |
936 | extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); |
937 | extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); |
938 | extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); |
939 | |
940 | #define KMP_TEST_THEN_INC32(p) \ |
941 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) |
942 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
943 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) |
944 | #define KMP_TEST_THEN_INC64(p) \ |
945 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) |
946 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
947 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) |
948 | #define KMP_TEST_THEN_ADD4_32(p) \ |
949 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) |
950 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
951 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) |
952 | #define KMP_TEST_THEN_ADD4_64(p) \ |
953 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) |
954 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
955 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) |
956 | #define KMP_TEST_THEN_DEC32(p) \ |
957 | __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) |
958 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
959 | __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) |
960 | #define KMP_TEST_THEN_DEC64(p) \ |
961 | __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) |
962 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
963 | __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) |
964 | #define KMP_TEST_THEN_ADD8(p, v) \ |
965 | __kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
966 | #define KMP_TEST_THEN_ADD32(p, v) \ |
967 | __kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v)) |
968 | #define KMP_TEST_THEN_ADD64(p, v) \ |
969 | __kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
970 | |
971 | #define KMP_TEST_THEN_OR8(p, v) \ |
972 | __kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
973 | #define KMP_TEST_THEN_AND8(p, v) \ |
974 | __kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
975 | #define KMP_TEST_THEN_OR32(p, v) \ |
976 | __kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
977 | #define KMP_TEST_THEN_AND32(p, v) \ |
978 | __kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
979 | #define KMP_TEST_THEN_OR64(p, v) \ |
980 | __kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
981 | #define KMP_TEST_THEN_AND64(p, v) \ |
982 | __kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
983 | |
984 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
985 | __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ |
986 | (kmp_int8)(sv)) |
987 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
988 | __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ |
989 | (kmp_int8)(sv)) |
990 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
991 | __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ |
992 | (kmp_int16)(sv)) |
993 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
994 | __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ |
995 | (kmp_int16)(sv)) |
996 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
997 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
998 | (kmp_int32)(sv)) |
999 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
1000 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1001 | (kmp_int32)(sv)) |
1002 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
1003 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1004 | (kmp_int64)(sv)) |
1005 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
1006 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1007 | (kmp_int64)(sv)) |
1008 | |
1009 | #if KMP_ARCH_X86 |
1010 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
1011 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1012 | (kmp_int32)(sv)) |
1013 | #else /* 64 bit pointers */ |
1014 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
1015 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1016 | (kmp_int64)(sv)) |
1017 | #endif /* KMP_ARCH_X86 */ |
1018 | |
1019 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
1020 | __kmp_compare_and_store_ret8((p), (cv), (sv)) |
1021 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
1022 | __kmp_compare_and_store_ret16((p), (cv), (sv)) |
1023 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
1024 | __kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1025 | (kmp_int32)(sv)) |
1026 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
1027 | __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1028 | (kmp_int64)(sv)) |
1029 | |
1030 | #define KMP_XCHG_FIXED8(p, v) \ |
1031 | __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
1032 | #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); |
1033 | #define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); |
1034 | #define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); |
1035 | #define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); |
1036 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
1037 | |
1038 | #endif /* KMP_ASM_INTRINS */ |
1039 | |
1040 | /* ------------- relaxed consistency memory model stuff ------------------ */ |
1041 | |
1042 | #if KMP_OS_WINDOWS |
1043 | #ifdef __ABSOFT_WIN |
1044 | #define KMP_MB() asm("nop") |
1045 | #define KMP_IMB() asm("nop") |
1046 | #else |
1047 | #define KMP_MB() /* _asm{ nop } */ |
1048 | #define KMP_IMB() /* _asm{ nop } */ |
1049 | #endif |
1050 | #endif /* KMP_OS_WINDOWS */ |
1051 | |
1052 | #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ |
1053 | KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ |
1054 | KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 |
1055 | #if KMP_OS_WINDOWS |
1056 | #undef KMP_MB |
1057 | #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) |
1058 | #else /* !KMP_OS_WINDOWS */ |
1059 | #define KMP_MB() __sync_synchronize() |
1060 | #endif |
1061 | #endif |
1062 | |
1063 | #ifndef KMP_MB |
1064 | #define KMP_MB() /* nothing to do */ |
1065 | #endif |
1066 | |
1067 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1068 | #if KMP_MIC |
1069 | // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. |
1070 | // We shouldn't need it, though, since the ABI rules require that |
1071 | // * If the compiler generates NGO stores it also generates the fence |
1072 | // * If users hand-code NGO stores they should insert the fence |
1073 | // therefore no incomplete unordered stores should be visible. |
1074 | #define KMP_MFENCE() /* Nothing */ |
1075 | #define KMP_SFENCE() /* Nothing */ |
1076 | #else |
1077 | #if KMP_COMPILER_ICC || KMP_COMPILER_ICX |
1078 | #define KMP_MFENCE_() _mm_mfence() |
1079 | #define KMP_SFENCE_() _mm_sfence() |
1080 | #elif KMP_COMPILER_MSVC |
1081 | #define KMP_MFENCE_() MemoryBarrier() |
1082 | #define KMP_SFENCE_() MemoryBarrier() |
1083 | #else |
1084 | #define KMP_MFENCE_() __sync_synchronize() |
1085 | #define KMP_SFENCE_() __sync_synchronize() |
1086 | #endif |
1087 | #define KMP_MFENCE() \ |
1088 | if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \ |
1089 | __kmp_query_cpuid(&__kmp_cpuinfo); \ |
1090 | } \ |
1091 | if (__kmp_cpuinfo.flags.sse2) { \ |
1092 | KMP_MFENCE_(); \ |
1093 | } |
1094 | #define KMP_SFENCE() KMP_SFENCE_() |
1095 | #endif |
1096 | #else |
1097 | #define KMP_MFENCE() KMP_MB() |
1098 | #define KMP_SFENCE() KMP_MB() |
1099 | #endif |
1100 | |
1101 | #ifndef KMP_IMB |
1102 | #define KMP_IMB() /* nothing to do */ |
1103 | #endif |
1104 | |
1105 | #ifndef KMP_ST_REL32 |
1106 | #define KMP_ST_REL32(A, D) (*(A) = (D)) |
1107 | #endif |
1108 | |
1109 | #ifndef KMP_ST_REL64 |
1110 | #define KMP_ST_REL64(A, D) (*(A) = (D)) |
1111 | #endif |
1112 | |
1113 | #ifndef KMP_LD_ACQ32 |
1114 | #define KMP_LD_ACQ32(A) (*(A)) |
1115 | #endif |
1116 | |
1117 | #ifndef KMP_LD_ACQ64 |
1118 | #define KMP_LD_ACQ64(A) (*(A)) |
1119 | #endif |
1120 | |
1121 | /* ------------------------------------------------------------------------ */ |
1122 | // FIXME - maybe this should this be |
1123 | // |
1124 | // #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) |
1125 | // #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b)) |
1126 | // |
1127 | // #define TCR_8(a) (*(volatile kmp_int64 *)(a)) |
1128 | // #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b)) |
1129 | // |
1130 | // I'm fairly certain this is the correct thing to do, but I'm afraid |
1131 | // of performance regressions. |
1132 | |
1133 | #define TCR_1(a) (a) |
1134 | #define TCW_1(a, b) (a) = (b) |
1135 | #define TCR_4(a) (a) |
1136 | #define TCW_4(a, b) (a) = (b) |
1137 | #define TCI_4(a) (++(a)) |
1138 | #define TCD_4(a) (--(a)) |
1139 | #define TCR_8(a) (a) |
1140 | #define TCW_8(a, b) (a) = (b) |
1141 | #define TCI_8(a) (++(a)) |
1142 | #define TCD_8(a) (--(a)) |
1143 | #define TCR_SYNC_4(a) (a) |
1144 | #define TCW_SYNC_4(a, b) (a) = (b) |
1145 | #define TCX_SYNC_4(a, b, c) \ |
1146 | KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \ |
1147 | (kmp_int32)(b), (kmp_int32)(c)) |
1148 | #define TCR_SYNC_8(a) (a) |
1149 | #define TCW_SYNC_8(a, b) (a) = (b) |
1150 | #define TCX_SYNC_8(a, b, c) \ |
1151 | KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ |
1152 | (kmp_int64)(b), (kmp_int64)(c)) |
1153 | |
1154 | #if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM || KMP_ARCH_PPC |
1155 | // What about ARM? |
1156 | #define TCR_PTR(a) ((void *)TCR_4(a)) |
1157 | #define TCW_PTR(a, b) TCW_4((a), (b)) |
1158 | #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) |
1159 | #define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b)) |
1160 | #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c))) |
1161 | |
1162 | #else /* 64 bit pointers */ |
1163 | |
1164 | #define TCR_PTR(a) ((void *)TCR_8(a)) |
1165 | #define TCW_PTR(a, b) TCW_8((a), (b)) |
1166 | #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) |
1167 | #define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b)) |
1168 | #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c))) |
1169 | |
1170 | #endif /* KMP_ARCH_X86 */ |
1171 | |
1172 | /* If these FTN_{TRUE,FALSE} values change, may need to change several places |
1173 | where they are used to check that language is Fortran, not C. */ |
1174 | |
1175 | #ifndef FTN_TRUE |
1176 | #define FTN_TRUE TRUE |
1177 | #endif |
1178 | |
1179 | #ifndef FTN_FALSE |
1180 | #define FTN_FALSE FALSE |
1181 | #endif |
1182 | |
1183 | typedef void (*microtask_t)(int *gtid, int *npr, ...); |
1184 | |
1185 | #ifdef USE_VOLATILE_CAST |
1186 | #define VOLATILE_CAST(x) (volatile x) |
1187 | #else |
1188 | #define VOLATILE_CAST(x) (x) |
1189 | #endif |
1190 | |
1191 | #define KMP_WAIT __kmp_wait_4 |
1192 | #define KMP_WAIT_PTR __kmp_wait_4_ptr |
1193 | #define KMP_EQ __kmp_eq_4 |
1194 | #define KMP_NEQ __kmp_neq_4 |
1195 | #define KMP_LT __kmp_lt_4 |
1196 | #define KMP_GE __kmp_ge_4 |
1197 | #define KMP_LE __kmp_le_4 |
1198 | |
1199 | /* Workaround for Intel(R) 64 code gen bug when taking address of static array |
1200 | * (Intel(R) 64 Tracker #138) */ |
1201 | #if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX |
1202 | #define STATIC_EFI2_WORKAROUND |
1203 | #else |
1204 | #define STATIC_EFI2_WORKAROUND static |
1205 | #endif |
1206 | |
1207 | // Support of BGET usage |
1208 | #ifndef KMP_USE_BGET |
1209 | #define KMP_USE_BGET 1 |
1210 | #endif |
1211 | |
1212 | // Switches for OSS builds |
1213 | #ifndef USE_CMPXCHG_FIX |
1214 | #define USE_CMPXCHG_FIX 1 |
1215 | #endif |
1216 | |
1217 | // Enable dynamic user lock |
1218 | #define KMP_USE_DYNAMIC_LOCK 1 |
1219 | |
1220 | // Enable Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) if |
1221 | // dynamic user lock is turned on |
1222 | #if KMP_USE_DYNAMIC_LOCK |
1223 | // Visual studio can't handle the asm sections in this code |
1224 | #define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC |
1225 | #ifdef KMP_USE_ADAPTIVE_LOCKS |
1226 | #undef KMP_USE_ADAPTIVE_LOCKS |
1227 | #endif |
1228 | #define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX |
1229 | #endif |
1230 | |
1231 | // Enable tick time conversion of ticks to seconds |
1232 | #if KMP_STATS_ENABLED |
1233 | #define KMP_HAVE_TICK_TIME \ |
1234 | (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) |
1235 | #endif |
1236 | |
1237 | // Warning levels |
1238 | enum kmp_warnings_level { |
1239 | kmp_warnings_off = 0, /* No warnings */ |
1240 | kmp_warnings_low, /* Minimal warnings (default) */ |
1241 | kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ |
1242 | kmp_warnings_verbose /* reserved */ |
1243 | }; |
1244 | |
1245 | #ifdef __cplusplus |
1246 | } // extern "C" |
1247 | #endif // __cplusplus |
1248 | |
1249 | // Safe C API |
1250 | #include "kmp_safe_c_api.h" |
1251 | |
1252 | // Macros for C++11 atomic functions |
1253 | #define KMP_ATOMIC_LD(p, order) (p)->load(std::memory_order_##order) |
1254 | #define KMP_ATOMIC_OP(op, p, v, order) (p)->op(v, std::memory_order_##order) |
1255 | |
1256 | // For non-default load/store |
1257 | #define KMP_ATOMIC_LD_ACQ(p) KMP_ATOMIC_LD(p, acquire) |
1258 | #define KMP_ATOMIC_LD_RLX(p) KMP_ATOMIC_LD(p, relaxed) |
1259 | #define KMP_ATOMIC_ST_REL(p, v) KMP_ATOMIC_OP(store, p, v, release) |
1260 | #define KMP_ATOMIC_ST_RLX(p, v) KMP_ATOMIC_OP(store, p, v, relaxed) |
1261 | |
1262 | // For non-default fetch_<op> |
1263 | #define KMP_ATOMIC_ADD(p, v) KMP_ATOMIC_OP(fetch_add, p, v, acq_rel) |
1264 | #define KMP_ATOMIC_SUB(p, v) KMP_ATOMIC_OP(fetch_sub, p, v, acq_rel) |
1265 | #define KMP_ATOMIC_AND(p, v) KMP_ATOMIC_OP(fetch_and, p, v, acq_rel) |
1266 | #define KMP_ATOMIC_OR(p, v) KMP_ATOMIC_OP(fetch_or, p, v, acq_rel) |
1267 | #define KMP_ATOMIC_INC(p) KMP_ATOMIC_OP(fetch_add, p, 1, acq_rel) |
1268 | #define KMP_ATOMIC_DEC(p) KMP_ATOMIC_OP(fetch_sub, p, 1, acq_rel) |
1269 | #define KMP_ATOMIC_ADD_RLX(p, v) KMP_ATOMIC_OP(fetch_add, p, v, relaxed) |
1270 | #define KMP_ATOMIC_INC_RLX(p) KMP_ATOMIC_OP(fetch_add, p, 1, relaxed) |
1271 | |
1272 | // Callers of the following functions cannot see the side effect on "expected". |
1273 | template <typename T> |
1274 | bool __kmp_atomic_compare_store(std::atomic<T> *p, T expected, T desired) { |
1275 | return p->compare_exchange_strong( |
1276 | expected, desired, std::memory_order_acq_rel, std::memory_order_relaxed); |
1277 | } |
1278 | |
1279 | template <typename T> |
1280 | bool __kmp_atomic_compare_store_acq(std::atomic<T> *p, T expected, T desired) { |
1281 | return p->compare_exchange_strong( |
1282 | expected, desired, std::memory_order_acquire, std::memory_order_relaxed); |
1283 | } |
1284 | |
1285 | template <typename T> |
1286 | bool __kmp_atomic_compare_store_rel(std::atomic<T> *p, T expected, T desired) { |
1287 | return p->compare_exchange_strong( |
1288 | expected, desired, std::memory_order_release, std::memory_order_relaxed); |
1289 | } |
1290 | |
1291 | // Symbol lookup on Linux/Windows |
1292 | #if KMP_OS_WINDOWS |
1293 | extern void *__kmp_lookup_symbol(const char *name, bool next = false); |
1294 | #define KMP_DLSYM(name) __kmp_lookup_symbol(name) |
1295 | #define KMP_DLSYM_NEXT(name) __kmp_lookup_symbol(name, true) |
1296 | #elif KMP_OS_WASI |
1297 | #define KMP_DLSYM(name) nullptr |
1298 | #define KMP_DLSYM_NEXT(name) nullptr |
1299 | #else |
1300 | #define KMP_DLSYM(name) dlsym(RTLD_DEFAULT, name) |
1301 | #define KMP_DLSYM_NEXT(name) dlsym(RTLD_NEXT, name) |
1302 | #endif |
1303 | |
1304 | // MSVC doesn't have this, but clang/clang-cl does. |
1305 | #ifndef __has_builtin |
1306 | #define __has_builtin(x) 0 |
1307 | #endif |
1308 | |
1309 | // Same as LLVM_BUILTIN_UNREACHABLE. States that it is UB to reach this point. |
1310 | #if __has_builtin(__builtin_unreachable) || defined(__GNUC__) |
1311 | #define KMP_BUILTIN_UNREACHABLE __builtin_unreachable() |
1312 | #elif defined(_MSC_VER) |
1313 | #define KMP_BUILTIN_UNREACHABLE __assume(false) |
1314 | #else |
1315 | #define KMP_BUILTIN_UNREACHABLE |
1316 | #endif |
1317 | |
1318 | #endif /* KMP_OS_H */ |
1319 | |