1 | /* |
2 | * kmp_os.h -- KPTS runtime header file. |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef KMP_OS_H |
14 | #define KMP_OS_H |
15 | |
16 | #include "kmp_config.h" |
17 | #include <atomic> |
18 | #include <stdarg.h> |
19 | #include <stdlib.h> |
20 | #include <string.h> |
21 | |
22 | #define KMP_FTN_PLAIN 1 |
23 | #define KMP_FTN_APPEND 2 |
24 | #define KMP_FTN_UPPER 3 |
25 | /* |
26 | #define KMP_FTN_PREPEND 4 |
27 | #define KMP_FTN_UAPPEND 5 |
28 | */ |
29 | |
30 | #define KMP_PTR_SKIP (sizeof(void *)) |
31 | |
32 | /* -------------------------- Compiler variations ------------------------ */ |
33 | |
34 | #define KMP_OFF 0 |
35 | #define KMP_ON 1 |
36 | |
37 | #define KMP_MEM_CONS_VOLATILE 0 |
38 | #define KMP_MEM_CONS_FENCE 1 |
39 | |
40 | #ifndef KMP_MEM_CONS_MODEL |
41 | #define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE |
42 | #endif |
43 | |
44 | #ifndef __has_cpp_attribute |
45 | #define __has_cpp_attribute(x) 0 |
46 | #endif |
47 | |
48 | #ifndef __has_attribute |
49 | #define __has_attribute(x) 0 |
50 | #endif |
51 | |
52 | /* ------------------------- Compiler recognition ---------------------- */ |
53 | #define KMP_COMPILER_ICC 0 |
54 | #define KMP_COMPILER_GCC 0 |
55 | #define KMP_COMPILER_CLANG 0 |
56 | #define KMP_COMPILER_MSVC 0 |
57 | #define KMP_COMPILER_ICX 0 |
58 | |
59 | #if __INTEL_CLANG_COMPILER |
60 | #undef KMP_COMPILER_ICX |
61 | #define KMP_COMPILER_ICX 1 |
62 | #elif defined(__INTEL_COMPILER) |
63 | #undef KMP_COMPILER_ICC |
64 | #define KMP_COMPILER_ICC 1 |
65 | #elif defined(__clang__) |
66 | #undef KMP_COMPILER_CLANG |
67 | #define KMP_COMPILER_CLANG 1 |
68 | #elif defined(__GNUC__) |
69 | #undef KMP_COMPILER_GCC |
70 | #define KMP_COMPILER_GCC 1 |
71 | #elif defined(_MSC_VER) |
72 | #undef KMP_COMPILER_MSVC |
73 | #define KMP_COMPILER_MSVC 1 |
74 | #else |
75 | #error Unknown compiler |
76 | #endif |
77 | |
78 | #if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ |
79 | KMP_OS_DRAGONFLY || KMP_OS_AIX) && \ |
80 | !KMP_OS_WASI && !KMP_OS_EMSCRIPTEN |
81 | #define KMP_AFFINITY_SUPPORTED 1 |
82 | #if KMP_OS_WINDOWS && KMP_ARCH_X86_64 |
83 | #define KMP_GROUP_AFFINITY 1 |
84 | #else |
85 | #define KMP_GROUP_AFFINITY 0 |
86 | #endif |
87 | #else |
88 | #define KMP_AFFINITY_SUPPORTED 0 |
89 | #define KMP_GROUP_AFFINITY 0 |
90 | #endif |
91 | |
92 | #if (KMP_OS_LINUX || (KMP_OS_FREEBSD && __FreeBSD_version >= 1301000)) |
93 | #define KMP_HAVE_SCHED_GETCPU 1 |
94 | #else |
95 | #define KMP_HAVE_SCHED_GETCPU 0 |
96 | #endif |
97 | |
98 | /* Check for quad-precision extension. */ |
99 | #define KMP_HAVE_QUAD 0 |
100 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
101 | #if KMP_COMPILER_ICC || KMP_COMPILER_ICX |
102 | /* _Quad is already defined for icc */ |
103 | #undef KMP_HAVE_QUAD |
104 | #define KMP_HAVE_QUAD 1 |
105 | #elif KMP_COMPILER_CLANG |
106 | /* Clang doesn't support a software-implemented |
107 | 128-bit extended precision type yet */ |
108 | typedef long double _Quad; |
109 | #elif KMP_COMPILER_GCC |
110 | /* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad until |
111 | NetBSD 10.0 which ships with GCC 10.5 */ |
112 | #if (!KMP_OS_NETBSD || __GNUC__ >= 10) |
113 | typedef __float128 _Quad; |
114 | #undef KMP_HAVE_QUAD |
115 | #define KMP_HAVE_QUAD 1 |
116 | #endif |
117 | #elif KMP_COMPILER_MSVC |
118 | typedef long double _Quad; |
119 | #endif |
120 | #else |
121 | #if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC |
122 | typedef long double _Quad; |
123 | #undef KMP_HAVE_QUAD |
124 | #define KMP_HAVE_QUAD 1 |
125 | #endif |
126 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
127 | |
128 | #define KMP_USE_X87CONTROL 0 |
129 | #if KMP_OS_WINDOWS |
130 | #define KMP_END_OF_LINE "\r\n" |
131 | typedef char kmp_int8; |
132 | typedef unsigned char kmp_uint8; |
133 | typedef short kmp_int16; |
134 | typedef unsigned short kmp_uint16; |
135 | typedef int kmp_int32; |
136 | typedef unsigned int kmp_uint32; |
137 | #define KMP_INT32_SPEC "d" |
138 | #define KMP_UINT32_SPEC "u" |
139 | #ifndef KMP_STRUCT64 |
140 | typedef __int64 kmp_int64; |
141 | typedef unsigned __int64 kmp_uint64; |
142 | #define KMP_INT64_SPEC "I64d" |
143 | #define KMP_UINT64_SPEC "I64u" |
144 | #else |
145 | struct kmp_struct64 { |
146 | kmp_int32 a, b; |
147 | }; |
148 | typedef struct kmp_struct64 kmp_int64; |
149 | typedef struct kmp_struct64 kmp_uint64; |
150 | /* Not sure what to use for KMP_[U]INT64_SPEC here */ |
151 | #endif |
152 | #if KMP_ARCH_X86 && KMP_MSVC_COMPAT |
153 | #undef KMP_USE_X87CONTROL |
154 | #define KMP_USE_X87CONTROL 1 |
155 | #endif |
156 | #if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 |
157 | #define KMP_INTPTR 1 |
158 | typedef __int64 kmp_intptr_t; |
159 | typedef unsigned __int64 kmp_uintptr_t; |
160 | #define KMP_INTPTR_SPEC "I64d" |
161 | #define KMP_UINTPTR_SPEC "I64u" |
162 | #endif |
163 | #endif /* KMP_OS_WINDOWS */ |
164 | |
165 | #if KMP_OS_UNIX |
166 | #define KMP_END_OF_LINE "\n" |
167 | typedef char kmp_int8; |
168 | typedef unsigned char kmp_uint8; |
169 | typedef short kmp_int16; |
170 | typedef unsigned short kmp_uint16; |
171 | typedef int kmp_int32; |
172 | typedef unsigned int kmp_uint32; |
173 | typedef long long kmp_int64; |
174 | typedef unsigned long long kmp_uint64; |
175 | #define KMP_INT32_SPEC "d" |
176 | #define KMP_UINT32_SPEC "u" |
177 | #define KMP_INT64_SPEC "lld" |
178 | #define KMP_UINT64_SPEC "llu" |
179 | #endif /* KMP_OS_UNIX */ |
180 | |
181 | #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \ |
182 | KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || KMP_ARCH_SPARC32 |
183 | #define KMP_SIZE_T_SPEC KMP_UINT32_SPEC |
184 | #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \ |
185 | KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ |
186 | KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_SPARC64 |
187 | #define KMP_SIZE_T_SPEC KMP_UINT64_SPEC |
188 | #else |
189 | #error "Can't determine size_t printf format specifier." |
190 | #endif |
191 | |
192 | #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_WASM || KMP_ARCH_PPC |
193 | #define KMP_SIZE_T_MAX (0xFFFFFFFF) |
194 | #else |
195 | #define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) |
196 | #endif |
197 | |
198 | typedef size_t kmp_size_t; |
199 | typedef float kmp_real32; |
200 | typedef double kmp_real64; |
201 | |
202 | #ifndef KMP_INTPTR |
203 | #define KMP_INTPTR 1 |
204 | typedef long kmp_intptr_t; |
205 | typedef unsigned long kmp_uintptr_t; |
206 | #define KMP_INTPTR_SPEC "ld" |
207 | #define KMP_UINTPTR_SPEC "lu" |
208 | #endif |
209 | |
210 | #ifdef BUILD_I8 |
211 | typedef kmp_int64 kmp_int; |
212 | typedef kmp_uint64 kmp_uint; |
213 | #else |
214 | typedef kmp_int32 kmp_int; |
215 | typedef kmp_uint32 kmp_uint; |
216 | #endif /* BUILD_I8 */ |
217 | #define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) |
218 | #define KMP_INT_MIN ((kmp_int32)0x80000000) |
219 | |
220 | // stdarg handling |
221 | #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_WASM) && \ |
222 | (KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD || KMP_OS_DRAGONFLY || \ |
223 | KMP_OS_LINUX || KMP_OS_WASI) |
224 | typedef va_list *kmp_va_list; |
225 | #define kmp_va_deref(ap) (*(ap)) |
226 | #define kmp_va_addr_of(ap) (&(ap)) |
227 | #else |
228 | typedef va_list kmp_va_list; |
229 | #define kmp_va_deref(ap) (ap) |
230 | #define kmp_va_addr_of(ap) (ap) |
231 | #endif |
232 | |
233 | #ifdef __cplusplus |
234 | // macros to cast out qualifiers and to re-interpret types |
235 | #define CCAST(type, var) const_cast<type>(var) |
236 | #define RCAST(type, var) reinterpret_cast<type>(var) |
237 | //------------------------------------------------------------------------- |
238 | // template for debug prints specification ( d, u, lld, llu ), and to obtain |
239 | // signed/unsigned flavors of a type |
240 | template <typename T> struct traits_t {}; |
241 | // int |
242 | template <> struct traits_t<signed int> { |
243 | typedef signed int signed_t; |
244 | typedef unsigned int unsigned_t; |
245 | typedef double floating_t; |
246 | static char const *spec; |
247 | static const signed_t max_value = 0x7fffffff; |
248 | static const signed_t min_value = 0x80000000; |
249 | static const int type_size = sizeof(signed_t); |
250 | }; |
251 | // unsigned int |
252 | template <> struct traits_t<unsigned int> { |
253 | typedef signed int signed_t; |
254 | typedef unsigned int unsigned_t; |
255 | typedef double floating_t; |
256 | static char const *spec; |
257 | static const unsigned_t max_value = 0xffffffff; |
258 | static const unsigned_t min_value = 0x00000000; |
259 | static const int type_size = sizeof(unsigned_t); |
260 | }; |
261 | // long |
262 | template <> struct traits_t<signed long> { |
263 | typedef signed long signed_t; |
264 | typedef unsigned long unsigned_t; |
265 | typedef long double floating_t; |
266 | static char const *spec; |
267 | static const int type_size = sizeof(signed_t); |
268 | }; |
269 | // long long |
270 | template <> struct traits_t<signed long long> { |
271 | typedef signed long long signed_t; |
272 | typedef unsigned long long unsigned_t; |
273 | typedef long double floating_t; |
274 | static char const *spec; |
275 | static const signed_t max_value = 0x7fffffffffffffffLL; |
276 | static const signed_t min_value = 0x8000000000000000LL; |
277 | static const int type_size = sizeof(signed_t); |
278 | }; |
279 | // unsigned long long |
280 | template <> struct traits_t<unsigned long long> { |
281 | typedef signed long long signed_t; |
282 | typedef unsigned long long unsigned_t; |
283 | typedef long double floating_t; |
284 | static char const *spec; |
285 | static const unsigned_t max_value = 0xffffffffffffffffLL; |
286 | static const unsigned_t min_value = 0x0000000000000000LL; |
287 | static const int type_size = sizeof(unsigned_t); |
288 | }; |
289 | //------------------------------------------------------------------------- |
290 | #else |
291 | #define CCAST(type, var) (type)(var) |
292 | #define RCAST(type, var) (type)(var) |
293 | #endif // __cplusplus |
294 | |
295 | #define KMP_EXPORT extern /* export declaration in guide libraries */ |
296 | |
297 | #if __GNUC__ >= 4 && !defined(__MINGW32__) |
298 | #define __forceinline __inline |
299 | #endif |
300 | |
301 | /* Check if the OS/arch can support user-level mwait */ |
302 | // All mwait code tests for UMWAIT first, so it should only fall back to ring3 |
303 | // MWAIT for KNL. |
304 | #define KMP_HAVE_MWAIT \ |
305 | ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ |
306 | !KMP_MIC2) |
307 | #define KMP_HAVE_UMWAIT \ |
308 | ((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \ |
309 | !KMP_MIC) |
310 | |
311 | #if KMP_OS_WINDOWS |
312 | // Don't include everything related to NT status code, we'll do that explicitly |
313 | #define WIN32_NO_STATUS |
314 | #include <windows.h> |
315 | |
316 | static inline int KMP_GET_PAGE_SIZE(void) { |
317 | SYSTEM_INFO si; |
318 | GetSystemInfo(&si); |
319 | return si.dwPageSize; |
320 | } |
321 | #else |
322 | #define KMP_GET_PAGE_SIZE() getpagesize() |
323 | #endif |
324 | |
325 | #define PAGE_ALIGNED(_addr) \ |
326 | (!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1))) |
327 | #define ALIGN_TO_PAGE(x) \ |
328 | (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1))) |
329 | |
330 | /* ---------- Support for cache alignment, padding, etc. ----------------*/ |
331 | |
332 | #ifdef __cplusplus |
333 | extern "C" { |
334 | #endif // __cplusplus |
335 | |
336 | #define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */ |
337 | |
338 | /* Define the default size of the cache line */ |
339 | #ifndef CACHE_LINE |
340 | #define CACHE_LINE 128 /* cache line size in bytes */ |
341 | #else |
342 | #if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN) |
343 | // 2006-02-13: This produces too many warnings on OS X*. Disable for now |
344 | #warning CACHE_LINE is too small. |
345 | #endif |
346 | #endif /* CACHE_LINE */ |
347 | |
348 | #define KMP_CACHE_PREFETCH(ADDR) /* nothing */ |
349 | |
350 | // Define attribute that indicates that the fall through from the previous |
351 | // case label is intentional and should not be diagnosed by a compiler |
352 | // Code from libcxx/include/__config |
353 | // Use a function like macro to imply that it must be followed by a semicolon |
354 | #if __cplusplus > 201402L && __has_cpp_attribute(fallthrough) |
355 | #define KMP_FALLTHROUGH() [[fallthrough]] |
356 | // icc cannot properly tell this attribute is absent so force off |
357 | #elif KMP_COMPILER_ICC |
358 | #define KMP_FALLTHROUGH() ((void)0) |
359 | #elif __has_cpp_attribute(clang::fallthrough) |
360 | #define KMP_FALLTHROUGH() [[clang::fallthrough]] |
361 | #elif __has_attribute(fallthrough) || __GNUC__ >= 7 |
362 | #define KMP_FALLTHROUGH() __attribute__((__fallthrough__)) |
363 | #else |
364 | #define KMP_FALLTHROUGH() ((void)0) |
365 | #endif |
366 | |
367 | #if KMP_HAVE_ATTRIBUTE_WAITPKG |
368 | #define KMP_ATTRIBUTE_TARGET_WAITPKG __attribute__((target("waitpkg"))) |
369 | #else |
370 | #define KMP_ATTRIBUTE_TARGET_WAITPKG /* Nothing */ |
371 | #endif |
372 | |
373 | #if KMP_HAVE_ATTRIBUTE_RTM |
374 | #define KMP_ATTRIBUTE_TARGET_RTM __attribute__((target("rtm"))) |
375 | #else |
376 | #define KMP_ATTRIBUTE_TARGET_RTM /* Nothing */ |
377 | #endif |
378 | |
379 | // Define attribute that indicates a function does not return |
380 | #if __cplusplus >= 201103L |
381 | #define KMP_NORETURN [[noreturn]] |
382 | #elif KMP_OS_WINDOWS |
383 | #define KMP_NORETURN __declspec(noreturn) |
384 | #else |
385 | #define KMP_NORETURN __attribute__((noreturn)) |
386 | #endif |
387 | |
388 | #if KMP_OS_WINDOWS && KMP_MSVC_COMPAT |
389 | #define KMP_ALIGN(bytes) __declspec(align(bytes)) |
390 | #define KMP_THREAD_LOCAL __declspec(thread) |
391 | #define KMP_ALIAS /* Nothing */ |
392 | #else |
393 | #define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) |
394 | #define KMP_THREAD_LOCAL __thread |
395 | #define KMP_ALIAS(alias_of) __attribute__((alias(alias_of))) |
396 | #endif |
397 | |
398 | #if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB |
399 | #define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak)) |
400 | #else |
401 | #define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */ |
402 | #endif |
403 | |
404 | #if KMP_HAVE_WEAK_ATTRIBUTE |
405 | #define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak)) |
406 | #else |
407 | #define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */ |
408 | #endif |
409 | |
410 | // Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME |
411 | #ifndef KMP_STR |
412 | #define KMP_STR(x) _KMP_STR(x) |
413 | #define _KMP_STR(x) #x |
414 | #endif |
415 | |
416 | #ifdef KMP_USE_VERSION_SYMBOLS |
417 | // If using versioned symbols, KMP_EXPAND_NAME prepends |
418 | // __kmp_api_ to the real API name |
419 | #define KMP_EXPAND_NAME(api_name) _KMP_EXPAND_NAME(api_name) |
420 | #define _KMP_EXPAND_NAME(api_name) __kmp_api_##api_name |
421 | #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) \ |
422 | _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, "VERSION") |
423 | #define _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, default_ver) \ |
424 | __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver_num##_alias \ |
425 | __attribute__((alias(KMP_STR(__kmp_api_##api_name)))); \ |
426 | __asm__( \ |
427 | ".symver " KMP_STR(__kmp_api_##api_name##_##ver_num##_alias) "," KMP_STR( \ |
428 | api_name) "@" ver_str "\n\t"); \ |
429 | __asm__(".symver " KMP_STR(__kmp_api_##api_name) "," KMP_STR( \ |
430 | api_name) "@@" default_ver "\n\t") |
431 | |
432 | #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str) \ |
433 | _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, "VERSION") |
434 | #define _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, \ |
435 | default_ver) \ |
436 | __typeof__(__kmp_api_##apic_name) __kmp_api_##apic_name##_##ver_num##_alias \ |
437 | __attribute__((alias(KMP_STR(__kmp_api_##apic_name)))); \ |
438 | __asm__(".symver " KMP_STR(__kmp_api_##apic_name) "," KMP_STR( \ |
439 | apic_name) "@@" default_ver "\n\t"); \ |
440 | __asm__( \ |
441 | ".symver " KMP_STR(__kmp_api_##apic_name##_##ver_num##_alias) "," KMP_STR( \ |
442 | api_name) "@" ver_str "\n\t") |
443 | |
444 | #else // KMP_USE_VERSION_SYMBOLS |
445 | #define KMP_EXPAND_NAME(api_name) api_name |
446 | #define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) /* Nothing */ |
447 | #define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, \ |
448 | ver_str) /* Nothing */ |
449 | #endif // KMP_USE_VERSION_SYMBOLS |
450 | |
451 | /* Temporary note: if performance testing of this passes, we can remove |
452 | all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ |
453 | #define KMP_DO_ALIGN(bytes) KMP_ALIGN(bytes) |
454 | #define KMP_ALIGN_CACHE KMP_ALIGN(CACHE_LINE) |
455 | #define KMP_ALIGN_CACHE_INTERNODE KMP_ALIGN(INTERNODE_CACHE_LINE) |
456 | |
457 | /* General purpose fence types for memory operations */ |
458 | enum kmp_mem_fence_type { |
459 | kmp_no_fence, /* No memory fence */ |
460 | kmp_acquire_fence, /* Acquire (read) memory fence */ |
461 | kmp_release_fence, /* Release (write) memory fence */ |
462 | kmp_full_fence /* Full (read+write) memory fence */ |
463 | }; |
464 | |
465 | // Synchronization primitives |
466 | |
467 | #if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && (KMP_COMPILER_CLANG || KMP_COMPILER_GCC)) |
468 | |
469 | #if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG |
470 | #pragma intrinsic(InterlockedExchangeAdd) |
471 | #pragma intrinsic(InterlockedCompareExchange) |
472 | #pragma intrinsic(InterlockedExchange) |
473 | #if !KMP_32_BIT_ARCH |
474 | #pragma intrinsic(InterlockedExchange64) |
475 | #endif |
476 | #endif |
477 | |
478 | // Using InterlockedIncrement / InterlockedDecrement causes a library loading |
479 | // ordering problem, so we use InterlockedExchangeAdd instead. |
480 | #define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1) |
481 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
482 | InterlockedExchangeAdd((volatile long *)(p), 1) |
483 | #define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4) |
484 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
485 | InterlockedExchangeAdd((volatile long *)(p), 4) |
486 | #define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1) |
487 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
488 | InterlockedExchangeAdd((volatile long *)(p), -1) |
489 | #define KMP_TEST_THEN_ADD32(p, v) \ |
490 | InterlockedExchangeAdd((volatile long *)(p), (v)) |
491 | |
492 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
493 | InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv)) |
494 | |
495 | #define KMP_XCHG_FIXED32(p, v) \ |
496 | InterlockedExchange((volatile long *)(p), (long)(v)) |
497 | #define KMP_XCHG_FIXED64(p, v) \ |
498 | InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
499 | |
500 | inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { |
501 | kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v); |
502 | return *(kmp_real32 *)&tmp; |
503 | } |
504 | |
505 | #define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) |
506 | #define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) |
507 | #define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) |
508 | #define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) |
509 | #define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) |
510 | #define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) |
511 | |
512 | extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); |
513 | extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); |
514 | extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); |
515 | extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); |
516 | extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); |
517 | extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); |
518 | extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); |
519 | extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); |
520 | |
521 | #if KMP_ARCH_AARCH64 && KMP_COMPILER_MSVC && !KMP_COMPILER_CLANG |
522 | #define KMP_TEST_THEN_INC64(p) _InterlockedExchangeAdd64((p), 1LL) |
523 | #define KMP_TEST_THEN_INC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 1LL) |
524 | #define KMP_TEST_THEN_ADD4_64(p) _InterlockedExchangeAdd64((p), 4LL) |
525 | // #define KMP_TEST_THEN_ADD4_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 4LL) |
526 | // #define KMP_TEST_THEN_DEC64(p) _InterlockedExchangeAdd64((p), -1LL) |
527 | // #define KMP_TEST_THEN_DEC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), -1LL) |
528 | // #define KMP_TEST_THEN_ADD8(p, v) _InterlockedExchangeAdd8((p), (v)) |
529 | #define KMP_TEST_THEN_ADD64(p, v) _InterlockedExchangeAdd64((p), (v)) |
530 | |
531 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
532 | __kmp_compare_and_store_acq8((p), (cv), (sv)) |
533 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
534 | __kmp_compare_and_store_rel8((p), (cv), (sv)) |
535 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
536 | __kmp_compare_and_store_acq16((p), (cv), (sv)) |
537 | /* |
538 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
539 | __kmp_compare_and_store_rel16((p), (cv), (sv)) |
540 | */ |
541 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
542 | __kmp_compare_and_store_acq32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
543 | (kmp_int32)(sv)) |
544 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
545 | __kmp_compare_and_store_rel32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
546 | (kmp_int32)(sv)) |
547 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
548 | __kmp_compare_and_store_acq64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
549 | (kmp_int64)(sv)) |
550 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
551 | __kmp_compare_and_store_rel64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
552 | (kmp_int64)(sv)) |
553 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
554 | __kmp_compare_and_store_ptr((void *volatile *)(p), (void *)(cv), (void *)(sv)) |
555 | |
556 | // KMP_COMPARE_AND_STORE expects this order: pointer, compare, exchange |
557 | // _InterlockedCompareExchange expects this order: pointer, exchange, compare |
558 | // KMP_COMPARE_AND_STORE also returns a bool indicating a successful write. A |
559 | // write is successful if the return value of _InterlockedCompareExchange is the |
560 | // same as the compare value. |
561 | inline kmp_int8 __kmp_compare_and_store_acq8(volatile kmp_int8 *p, kmp_int8 cv, |
562 | kmp_int8 sv) { |
563 | return _InterlockedCompareExchange8_acq(p, sv, cv) == cv; |
564 | } |
565 | |
566 | inline kmp_int8 __kmp_compare_and_store_rel8(volatile kmp_int8 *p, kmp_int8 cv, |
567 | kmp_int8 sv) { |
568 | return _InterlockedCompareExchange8_rel(p, sv, cv) == cv; |
569 | } |
570 | |
571 | inline kmp_int16 __kmp_compare_and_store_acq16(volatile kmp_int16 *p, |
572 | kmp_int16 cv, kmp_int16 sv) { |
573 | return _InterlockedCompareExchange16_acq(p, sv, cv) == cv; |
574 | } |
575 | |
576 | inline kmp_int16 __kmp_compare_and_store_rel16(volatile kmp_int16 *p, |
577 | kmp_int16 cv, kmp_int16 sv) { |
578 | return _InterlockedCompareExchange16_rel(p, sv, cv) == cv; |
579 | } |
580 | |
581 | inline kmp_int32 __kmp_compare_and_store_acq32(volatile kmp_int32 *p, |
582 | kmp_int32 cv, kmp_int32 sv) { |
583 | return _InterlockedCompareExchange_acq((volatile long *)p, sv, cv) == cv; |
584 | } |
585 | |
586 | inline kmp_int32 __kmp_compare_and_store_rel32(volatile kmp_int32 *p, |
587 | kmp_int32 cv, kmp_int32 sv) { |
588 | return _InterlockedCompareExchange_rel((volatile long *)p, sv, cv) == cv; |
589 | } |
590 | |
591 | inline kmp_int32 __kmp_compare_and_store_acq64(volatile kmp_int64 *p, |
592 | kmp_int64 cv, kmp_int64 sv) { |
593 | return _InterlockedCompareExchange64_acq(p, sv, cv) == cv; |
594 | } |
595 | |
596 | inline kmp_int32 __kmp_compare_and_store_rel64(volatile kmp_int64 *p, |
597 | kmp_int64 cv, kmp_int64 sv) { |
598 | return _InterlockedCompareExchange64_rel(p, sv, cv) == cv; |
599 | } |
600 | |
601 | inline kmp_int32 __kmp_compare_and_store_ptr(void *volatile *p, void *cv, |
602 | void *sv) { |
603 | return _InterlockedCompareExchangePointer(p, sv, cv) == cv; |
604 | } |
605 | |
606 | // The _RET versions return the value instead of a bool |
607 | |
608 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
609 | _InterlockedCompareExchange8((p), (sv), (cv)) |
610 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
611 | _InterlockedCompareExchange16((p), (sv), (cv)) |
612 | |
613 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
614 | _InterlockedCompareExchange64((volatile kmp_int64 *)(p), (kmp_int64)(sv), \ |
615 | (kmp_int64)(cv)) |
616 | |
617 | |
618 | #define KMP_XCHG_FIXED8(p, v) \ |
619 | _InterlockedExchange8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
620 | #define KMP_XCHG_FIXED16(p, v) _InterlockedExchange16((p), (v)); |
621 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
622 | |
623 | inline kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v) { |
624 | kmp_int64 tmp = _InterlockedExchange64((volatile kmp_int64 *)p, *(kmp_int64 |
625 | *)&v); return *(kmp_real64 *)&tmp; |
626 | } |
627 | |
628 | #else // !KMP_ARCH_AARCH64 |
629 | |
630 | // Routines that we still need to implement in assembly. |
631 | extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); |
632 | |
633 | extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, |
634 | kmp_int8 sv); |
635 | extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, |
636 | kmp_int16 sv); |
637 | extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, |
638 | kmp_int32 sv); |
639 | extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, |
640 | kmp_int64 sv); |
641 | extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, |
642 | kmp_int8 sv); |
643 | extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, |
644 | kmp_int16 cv, kmp_int16 sv); |
645 | extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, |
646 | kmp_int32 cv, kmp_int32 sv); |
647 | extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, |
648 | kmp_int64 cv, kmp_int64 sv); |
649 | |
650 | extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); |
651 | extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); |
652 | extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); |
653 | extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); |
654 | extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); |
655 | extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); |
656 | |
657 | //#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) |
658 | //#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) |
659 | #define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) |
660 | #define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) |
661 | //#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) |
662 | //#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) |
663 | #define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) |
664 | #define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) |
665 | //#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) |
666 | //#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) |
667 | #define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) |
668 | #define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) |
669 | //#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) |
670 | #define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) |
671 | #define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) |
672 | |
673 | |
674 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
675 | __kmp_compare_and_store8((p), (cv), (sv)) |
676 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
677 | __kmp_compare_and_store8((p), (cv), (sv)) |
678 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
679 | __kmp_compare_and_store16((p), (cv), (sv)) |
680 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
681 | __kmp_compare_and_store16((p), (cv), (sv)) |
682 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
683 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
684 | (kmp_int32)(sv)) |
685 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
686 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
687 | (kmp_int32)(sv)) |
688 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
689 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
690 | (kmp_int64)(sv)) |
691 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
692 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
693 | (kmp_int64)(sv)) |
694 | |
695 | #if KMP_ARCH_X86 |
696 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
697 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
698 | (kmp_int32)(sv)) |
699 | #else /* 64 bit pointers */ |
700 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
701 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
702 | (kmp_int64)(sv)) |
703 | #endif /* KMP_ARCH_X86 */ |
704 | |
705 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
706 | __kmp_compare_and_store_ret8((p), (cv), (sv)) |
707 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
708 | __kmp_compare_and_store_ret16((p), (cv), (sv)) |
709 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
710 | __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
711 | (kmp_int64)(sv)) |
712 | |
713 | #define KMP_XCHG_FIXED8(p, v) \ |
714 | __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
715 | #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); |
716 | //#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); |
717 | //#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); |
718 | //#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); |
719 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
720 | #endif |
721 | |
722 | #elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) |
723 | |
724 | /* cast p to correct type so that proper intrinsic will be used */ |
725 | #define KMP_TEST_THEN_INC32(p) \ |
726 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) |
727 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
728 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 1) |
729 | #if KMP_ARCH_MIPS |
730 | #define KMP_TEST_THEN_INC64(p) \ |
731 | __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
732 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
733 | __atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
734 | #else |
735 | #define KMP_TEST_THEN_INC64(p) \ |
736 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) |
737 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
738 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL) |
739 | #endif |
740 | #define KMP_TEST_THEN_ADD4_32(p) \ |
741 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) |
742 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
743 | __sync_fetch_and_add((volatile kmp_int32 *)(p), 4) |
744 | #if KMP_ARCH_MIPS |
745 | #define KMP_TEST_THEN_ADD4_64(p) \ |
746 | __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST) |
747 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
748 | __atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST) |
749 | #define KMP_TEST_THEN_DEC64(p) \ |
750 | __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
751 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
752 | __atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST) |
753 | #else |
754 | #define KMP_TEST_THEN_ADD4_64(p) \ |
755 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) |
756 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
757 | __sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL) |
758 | #define KMP_TEST_THEN_DEC64(p) \ |
759 | __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) |
760 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
761 | __sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL) |
762 | #endif |
763 | #define KMP_TEST_THEN_DEC32(p) \ |
764 | __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) |
765 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
766 | __sync_fetch_and_sub((volatile kmp_int32 *)(p), 1) |
767 | #define KMP_TEST_THEN_ADD8(p, v) \ |
768 | __sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
769 | #define KMP_TEST_THEN_ADD32(p, v) \ |
770 | __sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v)) |
771 | #if KMP_ARCH_MIPS |
772 | #define KMP_TEST_THEN_ADD64(p, v) \ |
773 | __atomic_fetch_add((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
774 | __ATOMIC_SEQ_CST) |
775 | #else |
776 | #define KMP_TEST_THEN_ADD64(p, v) \ |
777 | __sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
778 | #endif |
779 | |
780 | #define KMP_TEST_THEN_OR8(p, v) \ |
781 | __sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
782 | #define KMP_TEST_THEN_AND8(p, v) \ |
783 | __sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
784 | #define KMP_TEST_THEN_OR32(p, v) \ |
785 | __sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
786 | #define KMP_TEST_THEN_AND32(p, v) \ |
787 | __sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
788 | #if KMP_ARCH_MIPS |
789 | #define KMP_TEST_THEN_OR64(p, v) \ |
790 | __atomic_fetch_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
791 | __ATOMIC_SEQ_CST) |
792 | #define KMP_TEST_THEN_AND64(p, v) \ |
793 | __atomic_fetch_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \ |
794 | __ATOMIC_SEQ_CST) |
795 | #else |
796 | #define KMP_TEST_THEN_OR64(p, v) \ |
797 | __sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
798 | #define KMP_TEST_THEN_AND64(p, v) \ |
799 | __sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
800 | #endif |
801 | |
802 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
803 | __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
804 | (kmp_uint8)(sv)) |
805 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
806 | __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
807 | (kmp_uint8)(sv)) |
808 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
809 | __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
810 | (kmp_uint16)(sv)) |
811 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
812 | __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
813 | (kmp_uint16)(sv)) |
814 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
815 | __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
816 | (kmp_uint32)(sv)) |
817 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
818 | __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
819 | (kmp_uint32)(sv)) |
820 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
821 | __sync_bool_compare_and_swap((void *volatile *)(p), (void *)(cv), \ |
822 | (void *)(sv)) |
823 | |
824 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
825 | __sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ |
826 | (kmp_uint8)(sv)) |
827 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
828 | __sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ |
829 | (kmp_uint16)(sv)) |
830 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
831 | __sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ |
832 | (kmp_uint32)(sv)) |
833 | #if KMP_ARCH_MIPS |
834 | static inline bool mips_sync_bool_compare_and_swap(volatile kmp_uint64 *p, |
835 | kmp_uint64 cv, |
836 | kmp_uint64 sv) { |
837 | return __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST, |
838 | __ATOMIC_SEQ_CST); |
839 | } |
840 | static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p, |
841 | kmp_uint64 cv, |
842 | kmp_uint64 sv) { |
843 | __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST, |
844 | __ATOMIC_SEQ_CST); |
845 | return cv; |
846 | } |
847 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
848 | mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \ |
849 | (kmp_uint64)(cv), (kmp_uint64)(sv)) |
850 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
851 | mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \ |
852 | (kmp_uint64)(cv), (kmp_uint64)(sv)) |
853 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
854 | mips_sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
855 | (kmp_uint64)(sv)) |
856 | #else |
857 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
858 | __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
859 | (kmp_uint64)(sv)) |
860 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
861 | __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
862 | (kmp_uint64)(sv)) |
863 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
864 | __sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ |
865 | (kmp_uint64)(sv)) |
866 | #endif |
867 | |
868 | #if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800 |
869 | #define KMP_XCHG_FIXED8(p, v) \ |
870 | __atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \ |
871 | __ATOMIC_SEQ_CST) |
872 | #else |
873 | #define KMP_XCHG_FIXED8(p, v) \ |
874 | __sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v)) |
875 | #endif |
876 | #define KMP_XCHG_FIXED16(p, v) \ |
877 | __sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v)) |
878 | #define KMP_XCHG_FIXED32(p, v) \ |
879 | __sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
880 | #define KMP_XCHG_FIXED64(p, v) \ |
881 | __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
882 | |
883 | inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { |
884 | volatile kmp_uint32 *up; |
885 | kmp_uint32 uv; |
886 | memcpy(dest: &up, src: &p, n: sizeof(up)); |
887 | memcpy(dest: &uv, src: &v, n: sizeof(uv)); |
888 | kmp_int32 tmp = __sync_lock_test_and_set(up, uv); |
889 | kmp_real32 ftmp; |
890 | memcpy(dest: &ftmp, src: &tmp, n: sizeof(tmp)); |
891 | return ftmp; |
892 | } |
893 | |
894 | inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) { |
895 | volatile kmp_uint64 *up; |
896 | kmp_uint64 uv; |
897 | memcpy(dest: &up, src: &p, n: sizeof(up)); |
898 | memcpy(dest: &uv, src: &v, n: sizeof(uv)); |
899 | kmp_int64 tmp = __sync_lock_test_and_set(up, uv); |
900 | kmp_real64 dtmp; |
901 | memcpy(dest: &dtmp, src: &tmp, n: sizeof(tmp)); |
902 | return dtmp; |
903 | } |
904 | |
905 | #else |
906 | |
907 | extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); |
908 | extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); |
909 | extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); |
910 | extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); |
911 | extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v); |
912 | extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v); |
913 | extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); |
914 | extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v); |
915 | extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v); |
916 | |
917 | extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, |
918 | kmp_int8 sv); |
919 | extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, |
920 | kmp_int16 sv); |
921 | extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, |
922 | kmp_int32 sv); |
923 | extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, |
924 | kmp_int64 sv); |
925 | extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, |
926 | kmp_int8 sv); |
927 | extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, |
928 | kmp_int16 cv, kmp_int16 sv); |
929 | extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, |
930 | kmp_int32 cv, kmp_int32 sv); |
931 | extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, |
932 | kmp_int64 cv, kmp_int64 sv); |
933 | |
934 | extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); |
935 | extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); |
936 | extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); |
937 | extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); |
938 | extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); |
939 | extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); |
940 | |
941 | #define KMP_TEST_THEN_INC32(p) \ |
942 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) |
943 | #define KMP_TEST_THEN_INC_ACQ32(p) \ |
944 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 1) |
945 | #define KMP_TEST_THEN_INC64(p) \ |
946 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) |
947 | #define KMP_TEST_THEN_INC_ACQ64(p) \ |
948 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL) |
949 | #define KMP_TEST_THEN_ADD4_32(p) \ |
950 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) |
951 | #define KMP_TEST_THEN_ADD4_ACQ32(p) \ |
952 | __kmp_test_then_add32((volatile kmp_int32 *)(p), 4) |
953 | #define KMP_TEST_THEN_ADD4_64(p) \ |
954 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) |
955 | #define KMP_TEST_THEN_ADD4_ACQ64(p) \ |
956 | __kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL) |
957 | #define KMP_TEST_THEN_DEC32(p) \ |
958 | __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) |
959 | #define KMP_TEST_THEN_DEC_ACQ32(p) \ |
960 | __kmp_test_then_add32((volatile kmp_int32 *)(p), -1) |
961 | #define KMP_TEST_THEN_DEC64(p) \ |
962 | __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) |
963 | #define KMP_TEST_THEN_DEC_ACQ64(p) \ |
964 | __kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL) |
965 | #define KMP_TEST_THEN_ADD8(p, v) \ |
966 | __kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
967 | #define KMP_TEST_THEN_ADD32(p, v) \ |
968 | __kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v)) |
969 | #define KMP_TEST_THEN_ADD64(p, v) \ |
970 | __kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v)) |
971 | |
972 | #define KMP_TEST_THEN_OR8(p, v) \ |
973 | __kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
974 | #define KMP_TEST_THEN_AND8(p, v) \ |
975 | __kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v)) |
976 | #define KMP_TEST_THEN_OR32(p, v) \ |
977 | __kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
978 | #define KMP_TEST_THEN_AND32(p, v) \ |
979 | __kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) |
980 | #define KMP_TEST_THEN_OR64(p, v) \ |
981 | __kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
982 | #define KMP_TEST_THEN_AND64(p, v) \ |
983 | __kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) |
984 | |
985 | #define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ |
986 | __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ |
987 | (kmp_int8)(sv)) |
988 | #define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ |
989 | __kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \ |
990 | (kmp_int8)(sv)) |
991 | #define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ |
992 | __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ |
993 | (kmp_int16)(sv)) |
994 | #define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ |
995 | __kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \ |
996 | (kmp_int16)(sv)) |
997 | #define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ |
998 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
999 | (kmp_int32)(sv)) |
1000 | #define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ |
1001 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1002 | (kmp_int32)(sv)) |
1003 | #define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ |
1004 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1005 | (kmp_int64)(sv)) |
1006 | #define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ |
1007 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1008 | (kmp_int64)(sv)) |
1009 | |
1010 | #if KMP_ARCH_X86 |
1011 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
1012 | __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1013 | (kmp_int32)(sv)) |
1014 | #else /* 64 bit pointers */ |
1015 | #define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ |
1016 | __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1017 | (kmp_int64)(sv)) |
1018 | #endif /* KMP_ARCH_X86 */ |
1019 | |
1020 | #define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ |
1021 | __kmp_compare_and_store_ret8((p), (cv), (sv)) |
1022 | #define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ |
1023 | __kmp_compare_and_store_ret16((p), (cv), (sv)) |
1024 | #define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ |
1025 | __kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ |
1026 | (kmp_int32)(sv)) |
1027 | #define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ |
1028 | __kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ |
1029 | (kmp_int64)(sv)) |
1030 | |
1031 | #define KMP_XCHG_FIXED8(p, v) \ |
1032 | __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); |
1033 | #define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); |
1034 | #define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); |
1035 | #define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); |
1036 | #define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); |
1037 | #define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); |
1038 | |
1039 | #endif /* KMP_ASM_INTRINS */ |
1040 | |
1041 | /* ------------- relaxed consistency memory model stuff ------------------ */ |
1042 | |
1043 | #if KMP_OS_WINDOWS |
1044 | #ifdef __ABSOFT_WIN |
1045 | #define KMP_MB() asm("nop") |
1046 | #define KMP_IMB() asm("nop") |
1047 | #else |
1048 | #define KMP_MB() /* _asm{ nop } */ |
1049 | #define KMP_IMB() /* _asm{ nop } */ |
1050 | #endif |
1051 | #endif /* KMP_OS_WINDOWS */ |
1052 | |
1053 | #if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ |
1054 | KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \ |
1055 | KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_32 || \ |
1056 | KMP_ARCH_SPARC |
1057 | #if KMP_OS_WINDOWS |
1058 | #undef KMP_MB |
1059 | #define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst) |
1060 | #else /* !KMP_OS_WINDOWS */ |
1061 | #define KMP_MB() __sync_synchronize() |
1062 | #endif |
1063 | #endif |
1064 | |
1065 | #ifndef KMP_MB |
1066 | #define KMP_MB() /* nothing to do */ |
1067 | #endif |
1068 | |
1069 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1070 | #if KMP_MIC |
1071 | // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. |
1072 | // We shouldn't need it, though, since the ABI rules require that |
1073 | // * If the compiler generates NGO stores it also generates the fence |
1074 | // * If users hand-code NGO stores they should insert the fence |
1075 | // therefore no incomplete unordered stores should be visible. |
1076 | #define KMP_MFENCE() /* Nothing */ |
1077 | #define KMP_SFENCE() /* Nothing */ |
1078 | #else |
1079 | #if KMP_COMPILER_ICC || KMP_COMPILER_ICX |
1080 | #define KMP_MFENCE_() _mm_mfence() |
1081 | #define KMP_SFENCE_() _mm_sfence() |
1082 | #elif KMP_COMPILER_MSVC |
1083 | #define KMP_MFENCE_() MemoryBarrier() |
1084 | #define KMP_SFENCE_() MemoryBarrier() |
1085 | #else |
1086 | #define KMP_MFENCE_() __sync_synchronize() |
1087 | #define KMP_SFENCE_() __sync_synchronize() |
1088 | #endif |
1089 | #define KMP_MFENCE() \ |
1090 | if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \ |
1091 | __kmp_query_cpuid(&__kmp_cpuinfo); \ |
1092 | } \ |
1093 | if (__kmp_cpuinfo.flags.sse2) { \ |
1094 | KMP_MFENCE_(); \ |
1095 | } |
1096 | #define KMP_SFENCE() KMP_SFENCE_() |
1097 | #endif |
1098 | #else |
1099 | #define KMP_MFENCE() KMP_MB() |
1100 | #define KMP_SFENCE() KMP_MB() |
1101 | #endif |
1102 | |
1103 | #ifndef KMP_IMB |
1104 | #define KMP_IMB() /* nothing to do */ |
1105 | #endif |
1106 | |
1107 | #ifndef KMP_ST_REL32 |
1108 | #define KMP_ST_REL32(A, D) (*(A) = (D)) |
1109 | #endif |
1110 | |
1111 | #ifndef KMP_ST_REL64 |
1112 | #define KMP_ST_REL64(A, D) (*(A) = (D)) |
1113 | #endif |
1114 | |
1115 | #ifndef KMP_LD_ACQ32 |
1116 | #define KMP_LD_ACQ32(A) (*(A)) |
1117 | #endif |
1118 | |
1119 | #ifndef KMP_LD_ACQ64 |
1120 | #define KMP_LD_ACQ64(A) (*(A)) |
1121 | #endif |
1122 | |
1123 | /* ------------------------------------------------------------------------ */ |
1124 | // FIXME - maybe this should this be |
1125 | // |
1126 | // #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) |
1127 | // #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b)) |
1128 | // |
1129 | // #define TCR_8(a) (*(volatile kmp_int64 *)(a)) |
1130 | // #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b)) |
1131 | // |
1132 | // I'm fairly certain this is the correct thing to do, but I'm afraid |
1133 | // of performance regressions. |
1134 | |
1135 | #define TCR_1(a) (a) |
1136 | #define TCW_1(a, b) (a) = (b) |
1137 | #define TCR_4(a) (a) |
1138 | #define TCW_4(a, b) (a) = (b) |
1139 | #define TCI_4(a) (++(a)) |
1140 | #define TCD_4(a) (--(a)) |
1141 | #define TCR_8(a) (a) |
1142 | #define TCW_8(a, b) (a) = (b) |
1143 | #define TCI_8(a) (++(a)) |
1144 | #define TCD_8(a) (--(a)) |
1145 | #define TCR_SYNC_4(a) (a) |
1146 | #define TCW_SYNC_4(a, b) (a) = (b) |
1147 | #define TCX_SYNC_4(a, b, c) \ |
1148 | KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \ |
1149 | (kmp_int32)(b), (kmp_int32)(c)) |
1150 | #define TCR_SYNC_8(a) (a) |
1151 | #define TCW_SYNC_8(a, b) (a) = (b) |
1152 | #define TCX_SYNC_8(a, b, c) \ |
1153 | KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ |
1154 | (kmp_int64)(b), (kmp_int64)(c)) |
1155 | |
1156 | #if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM || KMP_ARCH_PPC |
1157 | // What about ARM? |
1158 | #define TCR_PTR(a) ((void *)TCR_4(a)) |
1159 | #define TCW_PTR(a, b) TCW_4((a), (b)) |
1160 | #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) |
1161 | #define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b)) |
1162 | #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c))) |
1163 | |
1164 | #else /* 64 bit pointers */ |
1165 | |
1166 | #define TCR_PTR(a) ((void *)TCR_8(a)) |
1167 | #define TCW_PTR(a, b) TCW_8((a), (b)) |
1168 | #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) |
1169 | #define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b)) |
1170 | #define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c))) |
1171 | |
1172 | #endif /* KMP_ARCH_X86 */ |
1173 | |
1174 | /* If these FTN_{TRUE,FALSE} values change, may need to change several places |
1175 | where they are used to check that language is Fortran, not C. */ |
1176 | |
1177 | #ifndef FTN_TRUE |
1178 | #define FTN_TRUE TRUE |
1179 | #endif |
1180 | |
1181 | #ifndef FTN_FALSE |
1182 | #define FTN_FALSE FALSE |
1183 | #endif |
1184 | |
1185 | typedef void (*microtask_t)(int *gtid, int *npr, ...); |
1186 | |
1187 | #ifdef USE_VOLATILE_CAST |
1188 | #define VOLATILE_CAST(x) (volatile x) |
1189 | #else |
1190 | #define VOLATILE_CAST(x) (x) |
1191 | #endif |
1192 | |
1193 | #define KMP_WAIT __kmp_wait_4 |
1194 | #define KMP_WAIT_PTR __kmp_wait_4_ptr |
1195 | #define KMP_EQ __kmp_eq_4 |
1196 | #define KMP_NEQ __kmp_neq_4 |
1197 | #define KMP_LT __kmp_lt_4 |
1198 | #define KMP_GE __kmp_ge_4 |
1199 | #define KMP_LE __kmp_le_4 |
1200 | |
1201 | /* Workaround for Intel(R) 64 code gen bug when taking address of static array |
1202 | * (Intel(R) 64 Tracker #138) */ |
1203 | #if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX |
1204 | #define STATIC_EFI2_WORKAROUND |
1205 | #else |
1206 | #define STATIC_EFI2_WORKAROUND static |
1207 | #endif |
1208 | |
1209 | // Support of BGET usage |
1210 | #ifndef KMP_USE_BGET |
1211 | #define KMP_USE_BGET 1 |
1212 | #endif |
1213 | |
1214 | // Switches for OSS builds |
1215 | #ifndef USE_CMPXCHG_FIX |
1216 | #define USE_CMPXCHG_FIX 1 |
1217 | #endif |
1218 | |
1219 | // Enable dynamic user lock |
1220 | #define KMP_USE_DYNAMIC_LOCK 1 |
1221 | |
1222 | // Enable Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) if |
1223 | // dynamic user lock is turned on |
1224 | #if KMP_USE_DYNAMIC_LOCK |
1225 | // Visual studio can't handle the asm sections in this code |
1226 | #define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC |
1227 | #ifdef KMP_USE_ADAPTIVE_LOCKS |
1228 | #undef KMP_USE_ADAPTIVE_LOCKS |
1229 | #endif |
1230 | #define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX |
1231 | #endif |
1232 | |
1233 | // Enable tick time conversion of ticks to seconds |
1234 | #if KMP_STATS_ENABLED |
1235 | #define KMP_HAVE_TICK_TIME \ |
1236 | (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) |
1237 | #endif |
1238 | |
1239 | // Warning levels |
1240 | enum kmp_warnings_level { |
1241 | kmp_warnings_off = 0, /* No warnings */ |
1242 | kmp_warnings_low, /* Minimal warnings (default) */ |
1243 | kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ |
1244 | kmp_warnings_verbose /* reserved */ |
1245 | }; |
1246 | |
1247 | #ifdef __cplusplus |
1248 | } // extern "C" |
1249 | #endif // __cplusplus |
1250 | |
1251 | // Safe C API |
1252 | #include "kmp_safe_c_api.h" |
1253 | |
1254 | // Macros for C++11 atomic functions |
1255 | #define KMP_ATOMIC_LD(p, order) (p)->load(std::memory_order_##order) |
1256 | #define KMP_ATOMIC_OP(op, p, v, order) (p)->op(v, std::memory_order_##order) |
1257 | |
1258 | // For non-default load/store |
1259 | #define KMP_ATOMIC_LD_ACQ(p) KMP_ATOMIC_LD(p, acquire) |
1260 | #define KMP_ATOMIC_LD_RLX(p) KMP_ATOMIC_LD(p, relaxed) |
1261 | #define KMP_ATOMIC_ST_REL(p, v) KMP_ATOMIC_OP(store, p, v, release) |
1262 | #define KMP_ATOMIC_ST_RLX(p, v) KMP_ATOMIC_OP(store, p, v, relaxed) |
1263 | |
1264 | // For non-default fetch_<op> |
1265 | #define KMP_ATOMIC_ADD(p, v) KMP_ATOMIC_OP(fetch_add, p, v, acq_rel) |
1266 | #define KMP_ATOMIC_SUB(p, v) KMP_ATOMIC_OP(fetch_sub, p, v, acq_rel) |
1267 | #define KMP_ATOMIC_AND(p, v) KMP_ATOMIC_OP(fetch_and, p, v, acq_rel) |
1268 | #define KMP_ATOMIC_OR(p, v) KMP_ATOMIC_OP(fetch_or, p, v, acq_rel) |
1269 | #define KMP_ATOMIC_INC(p) KMP_ATOMIC_OP(fetch_add, p, 1, acq_rel) |
1270 | #define KMP_ATOMIC_DEC(p) KMP_ATOMIC_OP(fetch_sub, p, 1, acq_rel) |
1271 | #define KMP_ATOMIC_ADD_RLX(p, v) KMP_ATOMIC_OP(fetch_add, p, v, relaxed) |
1272 | #define KMP_ATOMIC_INC_RLX(p) KMP_ATOMIC_OP(fetch_add, p, 1, relaxed) |
1273 | |
1274 | // Callers of the following functions cannot see the side effect on "expected". |
1275 | template <typename T> |
1276 | bool __kmp_atomic_compare_store(std::atomic<T> *p, T expected, T desired) { |
1277 | return p->compare_exchange_strong( |
1278 | expected, desired, std::memory_order_acq_rel, std::memory_order_relaxed); |
1279 | } |
1280 | |
1281 | template <typename T> |
1282 | bool __kmp_atomic_compare_store_acq(std::atomic<T> *p, T expected, T desired) { |
1283 | return p->compare_exchange_strong( |
1284 | expected, desired, std::memory_order_acquire, std::memory_order_relaxed); |
1285 | } |
1286 | |
1287 | template <typename T> |
1288 | bool __kmp_atomic_compare_store_rel(std::atomic<T> *p, T expected, T desired) { |
1289 | return p->compare_exchange_strong( |
1290 | expected, desired, std::memory_order_release, std::memory_order_relaxed); |
1291 | } |
1292 | |
1293 | // Symbol lookup on Linux/Windows |
1294 | #if KMP_OS_WINDOWS |
1295 | extern void *__kmp_lookup_symbol(const char *name, bool next = false); |
1296 | #define KMP_DLSYM(name) __kmp_lookup_symbol(name) |
1297 | #define KMP_DLSYM_NEXT(name) __kmp_lookup_symbol(name, true) |
1298 | #elif KMP_OS_WASI || KMP_OS_EMSCRIPTEN |
1299 | #define KMP_DLSYM(name) nullptr |
1300 | #define KMP_DLSYM_NEXT(name) nullptr |
1301 | #else |
1302 | #define KMP_DLSYM(name) dlsym(RTLD_DEFAULT, name) |
1303 | #define KMP_DLSYM_NEXT(name) dlsym(RTLD_NEXT, name) |
1304 | #endif |
1305 | |
1306 | // MSVC doesn't have this, but clang/clang-cl does. |
1307 | #ifndef __has_builtin |
1308 | #define __has_builtin(x) 0 |
1309 | #endif |
1310 | |
1311 | // Same as LLVM_BUILTIN_UNREACHABLE. States that it is UB to reach this point. |
1312 | #if __has_builtin(__builtin_unreachable) || defined(__GNUC__) |
1313 | #define KMP_BUILTIN_UNREACHABLE __builtin_unreachable() |
1314 | #elif defined(_MSC_VER) |
1315 | #define KMP_BUILTIN_UNREACHABLE __assume(false) |
1316 | #else |
1317 | #define KMP_BUILTIN_UNREACHABLE |
1318 | #endif |
1319 | |
1320 | #endif /* KMP_OS_H */ |
1321 | |