1/*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include "kmp_atomic.h"
14#include "kmp.h" // TRUE, asm routines prototypes
15
16typedef unsigned char uchar;
17typedef unsigned short ushort;
18
19/*!
20@defgroup ATOMIC_OPS Atomic Operations
21These functions are used for implementing the many different varieties of atomic
22operations.
23
24The compiler is at liberty to inline atomic operations that are naturally
25supported by the target architecture. For instance on IA-32 architecture an
26atomic like this can be inlined
27@code
28static int s = 0;
29#pragma omp atomic
30 s++;
31@endcode
32using the single instruction: `lock; incl s`
33
34However the runtime does provide entrypoints for these operations to support
35compilers that choose not to inline them. (For instance,
36`__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37
38The names of the functions are encoded by using the data type name and the
39operation name, as in these tables.
40
41Data Type | Data type encoding
42-----------|---------------
43int8_t | `fixed1`
44uint8_t | `fixed1u`
45int16_t | `fixed2`
46uint16_t | `fixed2u`
47int32_t | `fixed4`
48uint32_t | `fixed4u`
49int32_t | `fixed8`
50uint32_t | `fixed8u`
51float | `float4`
52double | `float8`
53float 10 (8087 eighty bit float) | `float10`
54complex<float> | `cmplx4`
55complex<double> | `cmplx8`
56complex<float10> | `cmplx10`
57<br>
58
59Operation | Operation encoding
60----------|-------------------
61+ | add
62- | sub
63\* | mul
64/ | div
65& | andb
66<< | shl
67\>\> | shr
68\| | orb
69^ | xor
70&& | andl
71\|\| | orl
72maximum | max
73minimum | min
74.eqv. | eqv
75.neqv. | neqv
76
77<br>
78For non-commutative operations, `_rev` can also be added for the reversed
79operation. For the functions that capture the result, the suffix `_cpt` is
80added.
81
82Update Functions
83================
84The general form of an atomic function that just performs an update (without a
85`capture`)
86@code
87void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88lhs, TYPE rhs );
89@endcode
90@param ident_t a pointer to source location
91@param gtid the global thread id
92@param lhs a pointer to the left operand
93@param rhs the right operand
94
95`capture` functions
96===================
97The capture functions perform an atomic update and return a result, which is
98either the value before the capture, or that after. They take an additional
99argument to determine which result is returned.
100Their general form is therefore
101@code
102TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103lhs, TYPE rhs, int flag );
104@endcode
105@param ident_t a pointer to source location
106@param gtid the global thread id
107@param lhs a pointer to the left operand
108@param rhs the right operand
109@param flag one if the result is to be captured *after* the operation, zero if
110captured *before*.
111
112The one set of exceptions to this is the `complex<float>` type where the value
113is not returned, rather an extra argument pointer is passed.
114
115They look like
116@code
117void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
118lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119@endcode
120
121Read and Write Operations
122=========================
123The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124ensure that the value is read or written atomically, with no modification
125performed. In many cases on IA-32 architecture these operations can be inlined
126since the architecture guarantees that no tearing occurs on aligned objects
127accessed with a single memory operation of up to 64 bits in size.
128
129The general form of the read operations is
130@code
131TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132@endcode
133
134For the write operations the form is
135@code
136void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137);
138@endcode
139
140Full list of functions
141======================
142This leads to the generation of 376 atomic functions, as follows.
143
144Functions for integers
145---------------------
146There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147unsigned (where that matters).
148@code
149 __kmpc_atomic_fixed1_add
150 __kmpc_atomic_fixed1_add_cpt
151 __kmpc_atomic_fixed1_add_fp
152 __kmpc_atomic_fixed1_andb
153 __kmpc_atomic_fixed1_andb_cpt
154 __kmpc_atomic_fixed1_andl
155 __kmpc_atomic_fixed1_andl_cpt
156 __kmpc_atomic_fixed1_div
157 __kmpc_atomic_fixed1_div_cpt
158 __kmpc_atomic_fixed1_div_cpt_rev
159 __kmpc_atomic_fixed1_div_float8
160 __kmpc_atomic_fixed1_div_fp
161 __kmpc_atomic_fixed1_div_rev
162 __kmpc_atomic_fixed1_eqv
163 __kmpc_atomic_fixed1_eqv_cpt
164 __kmpc_atomic_fixed1_max
165 __kmpc_atomic_fixed1_max_cpt
166 __kmpc_atomic_fixed1_min
167 __kmpc_atomic_fixed1_min_cpt
168 __kmpc_atomic_fixed1_mul
169 __kmpc_atomic_fixed1_mul_cpt
170 __kmpc_atomic_fixed1_mul_float8
171 __kmpc_atomic_fixed1_mul_fp
172 __kmpc_atomic_fixed1_neqv
173 __kmpc_atomic_fixed1_neqv_cpt
174 __kmpc_atomic_fixed1_orb
175 __kmpc_atomic_fixed1_orb_cpt
176 __kmpc_atomic_fixed1_orl
177 __kmpc_atomic_fixed1_orl_cpt
178 __kmpc_atomic_fixed1_rd
179 __kmpc_atomic_fixed1_shl
180 __kmpc_atomic_fixed1_shl_cpt
181 __kmpc_atomic_fixed1_shl_cpt_rev
182 __kmpc_atomic_fixed1_shl_rev
183 __kmpc_atomic_fixed1_shr
184 __kmpc_atomic_fixed1_shr_cpt
185 __kmpc_atomic_fixed1_shr_cpt_rev
186 __kmpc_atomic_fixed1_shr_rev
187 __kmpc_atomic_fixed1_sub
188 __kmpc_atomic_fixed1_sub_cpt
189 __kmpc_atomic_fixed1_sub_cpt_rev
190 __kmpc_atomic_fixed1_sub_fp
191 __kmpc_atomic_fixed1_sub_rev
192 __kmpc_atomic_fixed1_swp
193 __kmpc_atomic_fixed1_wr
194 __kmpc_atomic_fixed1_xor
195 __kmpc_atomic_fixed1_xor_cpt
196 __kmpc_atomic_fixed1u_add_fp
197 __kmpc_atomic_fixed1u_sub_fp
198 __kmpc_atomic_fixed1u_mul_fp
199 __kmpc_atomic_fixed1u_div
200 __kmpc_atomic_fixed1u_div_cpt
201 __kmpc_atomic_fixed1u_div_cpt_rev
202 __kmpc_atomic_fixed1u_div_fp
203 __kmpc_atomic_fixed1u_div_rev
204 __kmpc_atomic_fixed1u_shr
205 __kmpc_atomic_fixed1u_shr_cpt
206 __kmpc_atomic_fixed1u_shr_cpt_rev
207 __kmpc_atomic_fixed1u_shr_rev
208 __kmpc_atomic_fixed2_add
209 __kmpc_atomic_fixed2_add_cpt
210 __kmpc_atomic_fixed2_add_fp
211 __kmpc_atomic_fixed2_andb
212 __kmpc_atomic_fixed2_andb_cpt
213 __kmpc_atomic_fixed2_andl
214 __kmpc_atomic_fixed2_andl_cpt
215 __kmpc_atomic_fixed2_div
216 __kmpc_atomic_fixed2_div_cpt
217 __kmpc_atomic_fixed2_div_cpt_rev
218 __kmpc_atomic_fixed2_div_float8
219 __kmpc_atomic_fixed2_div_fp
220 __kmpc_atomic_fixed2_div_rev
221 __kmpc_atomic_fixed2_eqv
222 __kmpc_atomic_fixed2_eqv_cpt
223 __kmpc_atomic_fixed2_max
224 __kmpc_atomic_fixed2_max_cpt
225 __kmpc_atomic_fixed2_min
226 __kmpc_atomic_fixed2_min_cpt
227 __kmpc_atomic_fixed2_mul
228 __kmpc_atomic_fixed2_mul_cpt
229 __kmpc_atomic_fixed2_mul_float8
230 __kmpc_atomic_fixed2_mul_fp
231 __kmpc_atomic_fixed2_neqv
232 __kmpc_atomic_fixed2_neqv_cpt
233 __kmpc_atomic_fixed2_orb
234 __kmpc_atomic_fixed2_orb_cpt
235 __kmpc_atomic_fixed2_orl
236 __kmpc_atomic_fixed2_orl_cpt
237 __kmpc_atomic_fixed2_rd
238 __kmpc_atomic_fixed2_shl
239 __kmpc_atomic_fixed2_shl_cpt
240 __kmpc_atomic_fixed2_shl_cpt_rev
241 __kmpc_atomic_fixed2_shl_rev
242 __kmpc_atomic_fixed2_shr
243 __kmpc_atomic_fixed2_shr_cpt
244 __kmpc_atomic_fixed2_shr_cpt_rev
245 __kmpc_atomic_fixed2_shr_rev
246 __kmpc_atomic_fixed2_sub
247 __kmpc_atomic_fixed2_sub_cpt
248 __kmpc_atomic_fixed2_sub_cpt_rev
249 __kmpc_atomic_fixed2_sub_fp
250 __kmpc_atomic_fixed2_sub_rev
251 __kmpc_atomic_fixed2_swp
252 __kmpc_atomic_fixed2_wr
253 __kmpc_atomic_fixed2_xor
254 __kmpc_atomic_fixed2_xor_cpt
255 __kmpc_atomic_fixed2u_add_fp
256 __kmpc_atomic_fixed2u_sub_fp
257 __kmpc_atomic_fixed2u_mul_fp
258 __kmpc_atomic_fixed2u_div
259 __kmpc_atomic_fixed2u_div_cpt
260 __kmpc_atomic_fixed2u_div_cpt_rev
261 __kmpc_atomic_fixed2u_div_fp
262 __kmpc_atomic_fixed2u_div_rev
263 __kmpc_atomic_fixed2u_shr
264 __kmpc_atomic_fixed2u_shr_cpt
265 __kmpc_atomic_fixed2u_shr_cpt_rev
266 __kmpc_atomic_fixed2u_shr_rev
267 __kmpc_atomic_fixed4_add
268 __kmpc_atomic_fixed4_add_cpt
269 __kmpc_atomic_fixed4_add_fp
270 __kmpc_atomic_fixed4_andb
271 __kmpc_atomic_fixed4_andb_cpt
272 __kmpc_atomic_fixed4_andl
273 __kmpc_atomic_fixed4_andl_cpt
274 __kmpc_atomic_fixed4_div
275 __kmpc_atomic_fixed4_div_cpt
276 __kmpc_atomic_fixed4_div_cpt_rev
277 __kmpc_atomic_fixed4_div_float8
278 __kmpc_atomic_fixed4_div_fp
279 __kmpc_atomic_fixed4_div_rev
280 __kmpc_atomic_fixed4_eqv
281 __kmpc_atomic_fixed4_eqv_cpt
282 __kmpc_atomic_fixed4_max
283 __kmpc_atomic_fixed4_max_cpt
284 __kmpc_atomic_fixed4_min
285 __kmpc_atomic_fixed4_min_cpt
286 __kmpc_atomic_fixed4_mul
287 __kmpc_atomic_fixed4_mul_cpt
288 __kmpc_atomic_fixed4_mul_float8
289 __kmpc_atomic_fixed4_mul_fp
290 __kmpc_atomic_fixed4_neqv
291 __kmpc_atomic_fixed4_neqv_cpt
292 __kmpc_atomic_fixed4_orb
293 __kmpc_atomic_fixed4_orb_cpt
294 __kmpc_atomic_fixed4_orl
295 __kmpc_atomic_fixed4_orl_cpt
296 __kmpc_atomic_fixed4_rd
297 __kmpc_atomic_fixed4_shl
298 __kmpc_atomic_fixed4_shl_cpt
299 __kmpc_atomic_fixed4_shl_cpt_rev
300 __kmpc_atomic_fixed4_shl_rev
301 __kmpc_atomic_fixed4_shr
302 __kmpc_atomic_fixed4_shr_cpt
303 __kmpc_atomic_fixed4_shr_cpt_rev
304 __kmpc_atomic_fixed4_shr_rev
305 __kmpc_atomic_fixed4_sub
306 __kmpc_atomic_fixed4_sub_cpt
307 __kmpc_atomic_fixed4_sub_cpt_rev
308 __kmpc_atomic_fixed4_sub_fp
309 __kmpc_atomic_fixed4_sub_rev
310 __kmpc_atomic_fixed4_swp
311 __kmpc_atomic_fixed4_wr
312 __kmpc_atomic_fixed4_xor
313 __kmpc_atomic_fixed4_xor_cpt
314 __kmpc_atomic_fixed4u_add_fp
315 __kmpc_atomic_fixed4u_sub_fp
316 __kmpc_atomic_fixed4u_mul_fp
317 __kmpc_atomic_fixed4u_div
318 __kmpc_atomic_fixed4u_div_cpt
319 __kmpc_atomic_fixed4u_div_cpt_rev
320 __kmpc_atomic_fixed4u_div_fp
321 __kmpc_atomic_fixed4u_div_rev
322 __kmpc_atomic_fixed4u_shr
323 __kmpc_atomic_fixed4u_shr_cpt
324 __kmpc_atomic_fixed4u_shr_cpt_rev
325 __kmpc_atomic_fixed4u_shr_rev
326 __kmpc_atomic_fixed8_add
327 __kmpc_atomic_fixed8_add_cpt
328 __kmpc_atomic_fixed8_add_fp
329 __kmpc_atomic_fixed8_andb
330 __kmpc_atomic_fixed8_andb_cpt
331 __kmpc_atomic_fixed8_andl
332 __kmpc_atomic_fixed8_andl_cpt
333 __kmpc_atomic_fixed8_div
334 __kmpc_atomic_fixed8_div_cpt
335 __kmpc_atomic_fixed8_div_cpt_rev
336 __kmpc_atomic_fixed8_div_float8
337 __kmpc_atomic_fixed8_div_fp
338 __kmpc_atomic_fixed8_div_rev
339 __kmpc_atomic_fixed8_eqv
340 __kmpc_atomic_fixed8_eqv_cpt
341 __kmpc_atomic_fixed8_max
342 __kmpc_atomic_fixed8_max_cpt
343 __kmpc_atomic_fixed8_min
344 __kmpc_atomic_fixed8_min_cpt
345 __kmpc_atomic_fixed8_mul
346 __kmpc_atomic_fixed8_mul_cpt
347 __kmpc_atomic_fixed8_mul_float8
348 __kmpc_atomic_fixed8_mul_fp
349 __kmpc_atomic_fixed8_neqv
350 __kmpc_atomic_fixed8_neqv_cpt
351 __kmpc_atomic_fixed8_orb
352 __kmpc_atomic_fixed8_orb_cpt
353 __kmpc_atomic_fixed8_orl
354 __kmpc_atomic_fixed8_orl_cpt
355 __kmpc_atomic_fixed8_rd
356 __kmpc_atomic_fixed8_shl
357 __kmpc_atomic_fixed8_shl_cpt
358 __kmpc_atomic_fixed8_shl_cpt_rev
359 __kmpc_atomic_fixed8_shl_rev
360 __kmpc_atomic_fixed8_shr
361 __kmpc_atomic_fixed8_shr_cpt
362 __kmpc_atomic_fixed8_shr_cpt_rev
363 __kmpc_atomic_fixed8_shr_rev
364 __kmpc_atomic_fixed8_sub
365 __kmpc_atomic_fixed8_sub_cpt
366 __kmpc_atomic_fixed8_sub_cpt_rev
367 __kmpc_atomic_fixed8_sub_fp
368 __kmpc_atomic_fixed8_sub_rev
369 __kmpc_atomic_fixed8_swp
370 __kmpc_atomic_fixed8_wr
371 __kmpc_atomic_fixed8_xor
372 __kmpc_atomic_fixed8_xor_cpt
373 __kmpc_atomic_fixed8u_add_fp
374 __kmpc_atomic_fixed8u_sub_fp
375 __kmpc_atomic_fixed8u_mul_fp
376 __kmpc_atomic_fixed8u_div
377 __kmpc_atomic_fixed8u_div_cpt
378 __kmpc_atomic_fixed8u_div_cpt_rev
379 __kmpc_atomic_fixed8u_div_fp
380 __kmpc_atomic_fixed8u_div_rev
381 __kmpc_atomic_fixed8u_shr
382 __kmpc_atomic_fixed8u_shr_cpt
383 __kmpc_atomic_fixed8u_shr_cpt_rev
384 __kmpc_atomic_fixed8u_shr_rev
385@endcode
386
387Functions for floating point
388----------------------------
389There are versions here for floating point numbers of size 4, 8, 10 and 16
390bytes. (Ten byte floats are used by X87, but are now rare).
391@code
392 __kmpc_atomic_float4_add
393 __kmpc_atomic_float4_add_cpt
394 __kmpc_atomic_float4_add_float8
395 __kmpc_atomic_float4_add_fp
396 __kmpc_atomic_float4_div
397 __kmpc_atomic_float4_div_cpt
398 __kmpc_atomic_float4_div_cpt_rev
399 __kmpc_atomic_float4_div_float8
400 __kmpc_atomic_float4_div_fp
401 __kmpc_atomic_float4_div_rev
402 __kmpc_atomic_float4_max
403 __kmpc_atomic_float4_max_cpt
404 __kmpc_atomic_float4_min
405 __kmpc_atomic_float4_min_cpt
406 __kmpc_atomic_float4_mul
407 __kmpc_atomic_float4_mul_cpt
408 __kmpc_atomic_float4_mul_float8
409 __kmpc_atomic_float4_mul_fp
410 __kmpc_atomic_float4_rd
411 __kmpc_atomic_float4_sub
412 __kmpc_atomic_float4_sub_cpt
413 __kmpc_atomic_float4_sub_cpt_rev
414 __kmpc_atomic_float4_sub_float8
415 __kmpc_atomic_float4_sub_fp
416 __kmpc_atomic_float4_sub_rev
417 __kmpc_atomic_float4_swp
418 __kmpc_atomic_float4_wr
419 __kmpc_atomic_float8_add
420 __kmpc_atomic_float8_add_cpt
421 __kmpc_atomic_float8_add_fp
422 __kmpc_atomic_float8_div
423 __kmpc_atomic_float8_div_cpt
424 __kmpc_atomic_float8_div_cpt_rev
425 __kmpc_atomic_float8_div_fp
426 __kmpc_atomic_float8_div_rev
427 __kmpc_atomic_float8_max
428 __kmpc_atomic_float8_max_cpt
429 __kmpc_atomic_float8_min
430 __kmpc_atomic_float8_min_cpt
431 __kmpc_atomic_float8_mul
432 __kmpc_atomic_float8_mul_cpt
433 __kmpc_atomic_float8_mul_fp
434 __kmpc_atomic_float8_rd
435 __kmpc_atomic_float8_sub
436 __kmpc_atomic_float8_sub_cpt
437 __kmpc_atomic_float8_sub_cpt_rev
438 __kmpc_atomic_float8_sub_fp
439 __kmpc_atomic_float8_sub_rev
440 __kmpc_atomic_float8_swp
441 __kmpc_atomic_float8_wr
442 __kmpc_atomic_float10_add
443 __kmpc_atomic_float10_add_cpt
444 __kmpc_atomic_float10_add_fp
445 __kmpc_atomic_float10_div
446 __kmpc_atomic_float10_div_cpt
447 __kmpc_atomic_float10_div_cpt_rev
448 __kmpc_atomic_float10_div_fp
449 __kmpc_atomic_float10_div_rev
450 __kmpc_atomic_float10_mul
451 __kmpc_atomic_float10_mul_cpt
452 __kmpc_atomic_float10_mul_fp
453 __kmpc_atomic_float10_rd
454 __kmpc_atomic_float10_sub
455 __kmpc_atomic_float10_sub_cpt
456 __kmpc_atomic_float10_sub_cpt_rev
457 __kmpc_atomic_float10_sub_fp
458 __kmpc_atomic_float10_sub_rev
459 __kmpc_atomic_float10_swp
460 __kmpc_atomic_float10_wr
461 __kmpc_atomic_float16_add
462 __kmpc_atomic_float16_add_cpt
463 __kmpc_atomic_float16_div
464 __kmpc_atomic_float16_div_cpt
465 __kmpc_atomic_float16_div_cpt_rev
466 __kmpc_atomic_float16_div_rev
467 __kmpc_atomic_float16_max
468 __kmpc_atomic_float16_max_cpt
469 __kmpc_atomic_float16_min
470 __kmpc_atomic_float16_min_cpt
471 __kmpc_atomic_float16_mul
472 __kmpc_atomic_float16_mul_cpt
473 __kmpc_atomic_float16_rd
474 __kmpc_atomic_float16_sub
475 __kmpc_atomic_float16_sub_cpt
476 __kmpc_atomic_float16_sub_cpt_rev
477 __kmpc_atomic_float16_sub_rev
478 __kmpc_atomic_float16_swp
479 __kmpc_atomic_float16_wr
480@endcode
481
482Functions for Complex types
483---------------------------
484Functions for complex types whose component floating point variables are of size
4854,8,10 or 16 bytes. The names here are based on the size of the component float,
486*not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488
489@code
490 __kmpc_atomic_cmplx4_add
491 __kmpc_atomic_cmplx4_add_cmplx8
492 __kmpc_atomic_cmplx4_add_cpt
493 __kmpc_atomic_cmplx4_div
494 __kmpc_atomic_cmplx4_div_cmplx8
495 __kmpc_atomic_cmplx4_div_cpt
496 __kmpc_atomic_cmplx4_div_cpt_rev
497 __kmpc_atomic_cmplx4_div_rev
498 __kmpc_atomic_cmplx4_mul
499 __kmpc_atomic_cmplx4_mul_cmplx8
500 __kmpc_atomic_cmplx4_mul_cpt
501 __kmpc_atomic_cmplx4_rd
502 __kmpc_atomic_cmplx4_sub
503 __kmpc_atomic_cmplx4_sub_cmplx8
504 __kmpc_atomic_cmplx4_sub_cpt
505 __kmpc_atomic_cmplx4_sub_cpt_rev
506 __kmpc_atomic_cmplx4_sub_rev
507 __kmpc_atomic_cmplx4_swp
508 __kmpc_atomic_cmplx4_wr
509 __kmpc_atomic_cmplx8_add
510 __kmpc_atomic_cmplx8_add_cpt
511 __kmpc_atomic_cmplx8_div
512 __kmpc_atomic_cmplx8_div_cpt
513 __kmpc_atomic_cmplx8_div_cpt_rev
514 __kmpc_atomic_cmplx8_div_rev
515 __kmpc_atomic_cmplx8_mul
516 __kmpc_atomic_cmplx8_mul_cpt
517 __kmpc_atomic_cmplx8_rd
518 __kmpc_atomic_cmplx8_sub
519 __kmpc_atomic_cmplx8_sub_cpt
520 __kmpc_atomic_cmplx8_sub_cpt_rev
521 __kmpc_atomic_cmplx8_sub_rev
522 __kmpc_atomic_cmplx8_swp
523 __kmpc_atomic_cmplx8_wr
524 __kmpc_atomic_cmplx10_add
525 __kmpc_atomic_cmplx10_add_cpt
526 __kmpc_atomic_cmplx10_div
527 __kmpc_atomic_cmplx10_div_cpt
528 __kmpc_atomic_cmplx10_div_cpt_rev
529 __kmpc_atomic_cmplx10_div_rev
530 __kmpc_atomic_cmplx10_mul
531 __kmpc_atomic_cmplx10_mul_cpt
532 __kmpc_atomic_cmplx10_rd
533 __kmpc_atomic_cmplx10_sub
534 __kmpc_atomic_cmplx10_sub_cpt
535 __kmpc_atomic_cmplx10_sub_cpt_rev
536 __kmpc_atomic_cmplx10_sub_rev
537 __kmpc_atomic_cmplx10_swp
538 __kmpc_atomic_cmplx10_wr
539 __kmpc_atomic_cmplx16_add
540 __kmpc_atomic_cmplx16_add_cpt
541 __kmpc_atomic_cmplx16_div
542 __kmpc_atomic_cmplx16_div_cpt
543 __kmpc_atomic_cmplx16_div_cpt_rev
544 __kmpc_atomic_cmplx16_div_rev
545 __kmpc_atomic_cmplx16_mul
546 __kmpc_atomic_cmplx16_mul_cpt
547 __kmpc_atomic_cmplx16_rd
548 __kmpc_atomic_cmplx16_sub
549 __kmpc_atomic_cmplx16_sub_cpt
550 __kmpc_atomic_cmplx16_sub_cpt_rev
551 __kmpc_atomic_cmplx16_swp
552 __kmpc_atomic_cmplx16_wr
553@endcode
554*/
555
556/*!
557@ingroup ATOMIC_OPS
558@{
559*/
560
561/*
562 * Global vars
563 */
564
565#ifndef KMP_GOMP_COMPAT
566int __kmp_atomic_mode = 1; // Intel perf
567#else
568int __kmp_atomic_mode = 2; // GOMP compatibility
569#endif /* KMP_GOMP_COMPAT */
570
571KMP_ALIGN(128)
572
573// Control access to all user coded atomics in Gnu compat mode
574kmp_atomic_lock_t __kmp_atomic_lock;
575// Control access to all user coded atomics for 1-byte fixed data types
576kmp_atomic_lock_t __kmp_atomic_lock_1i;
577// Control access to all user coded atomics for 2-byte fixed data types
578kmp_atomic_lock_t __kmp_atomic_lock_2i;
579// Control access to all user coded atomics for 4-byte fixed data types
580kmp_atomic_lock_t __kmp_atomic_lock_4i;
581// Control access to all user coded atomics for kmp_real32 data type
582kmp_atomic_lock_t __kmp_atomic_lock_4r;
583// Control access to all user coded atomics for 8-byte fixed data types
584kmp_atomic_lock_t __kmp_atomic_lock_8i;
585// Control access to all user coded atomics for kmp_real64 data type
586kmp_atomic_lock_t __kmp_atomic_lock_8r;
587// Control access to all user coded atomics for complex byte data type
588kmp_atomic_lock_t __kmp_atomic_lock_8c;
589// Control access to all user coded atomics for long double data type
590kmp_atomic_lock_t __kmp_atomic_lock_10r;
591// Control access to all user coded atomics for _Quad data type
592kmp_atomic_lock_t __kmp_atomic_lock_16r;
593// Control access to all user coded atomics for double complex data type
594kmp_atomic_lock_t __kmp_atomic_lock_16c;
595// Control access to all user coded atomics for long double complex type
596kmp_atomic_lock_t __kmp_atomic_lock_20c;
597// Control access to all user coded atomics for _Quad complex data type
598kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600/* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605#define KMP_ATOMIC_VOLATILE volatile
606
607#if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
609static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611}
612static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614}
615static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617}
618static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620}
621static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623}
624static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626}
627
628static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630}
631static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633}
634static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636}
637static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639}
640static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642}
643static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645}
646
647static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650}
651static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654}
655static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658}
659static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662}
663
664static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667}
668static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671}
672static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675}
676static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679}
680
681#endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683// ATOMIC implementation routines -----------------------------------------
684// One routine for each operation and operand type.
685// All routines declarations looks like
686// void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688#define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693// Beginning of a definition (provides name, parameters, gebug trace)
694// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695// fixed)
696// OP_ID - operation identifier (add, sub, mul, ...)
697// TYPE - operands' type
698#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704// ------------------------------------------------------------------------
705// Lock variables used for critical sections for various size operands
706#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720// ------------------------------------------------------------------------
721// Operation on *lhs, rhs bound by critical section
722// OP - operator (it's supposed to contain an assignment)
723// LCK_ID - lock identifier
724// Note: don't check gtid as it should always be valid
725// 1, 2-byte - expect valid parameter, other - check before this macro
726#define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP rhs); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738// ------------------------------------------------------------------------
739// For GNU compatibility, we may need to use a critical section,
740// even though it is not required by the ISA.
741//
742// On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743// sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744// critical section. On Intel(R) 64, all atomic operations are done with fetch
745// and add or compare and exchange. Therefore, the FLAG parameter to this
746// macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747// require a critical section, where we predict that they will be implemented
748// in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749//
750// When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751// the FLAG parameter should always be 1. If we know that we will be using
752// a critical section, then we want to make certain that we use the generic
753// lock __kmp_atomic_lock to protect the atomic update, and not of of the
754// locks that are specialized based upon the size or type of the data.
755//
756// If FLAG is 0, then we are relying on dead code elimination by the build
757// compiler to get rid of the useless block of code, and save a needless
758// branch at runtime.
759
760#ifdef KMP_GOMP_COMPAT
761#define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774#else
775#define OP_GOMP_CRITICAL(OP, FLAG)
776#define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777#endif /* KMP_GOMP_COMPAT */
778
779#if KMP_MIC
780#define KMP_DO_PAUSE _mm_delay_32(1)
781#else
782#define KMP_DO_PAUSE
783#endif /* KMP_MIC */
784
785// ------------------------------------------------------------------------
786// Operation on *lhs, rhs using "compare_and_store" routine
787// TYPE - operands' type
788// BITS - size in bits, used to distinguish low level calls
789// OP - operator
790#define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP rhs); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP rhs); \
802 } \
803 }
804
805#if USE_CMPXCHG_FIX
806// 2007-06-25:
807// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808// and win_32e are affected (I verified the asm). Compiler ignores the volatile
809// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811// the workaround.
812#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832// end of the first part of the workaround for C78287
833#endif // USE_CMPXCHG_FIX
834
835#if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
836// Undo explicit type casts to get MSVC ARM64 to build. Uses
837// OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG
838#undef OP_CMPXCHG
839#define OP_CMPXCHG(TYPE, BITS, OP) \
840 { \
841 struct _sss { \
842 TYPE cmp; \
843 kmp_int##BITS *vvv; \
844 }; \
845 struct _sss old_value, new_value; \
846 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
847 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
848 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
849 new_value.cmp = old_value.cmp OP rhs; \
850 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
851 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
852 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
853 KMP_DO_PAUSE; \
854 \
855 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
856 new_value.cmp = old_value.cmp OP rhs; \
857 } \
858 }
859
860#undef OP_UPDATE_CRITICAL
861#define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
862 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
863 (*lhs) = (*lhs)OP rhs; \
864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
865
866#endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM)
867
868#if KMP_ARCH_X86 || KMP_ARCH_X86_64
869
870// ------------------------------------------------------------------------
871// X86 or X86_64: no alignment problems ====================================
872#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
873 GOMP_FLAG) \
874 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
875 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
876 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
877 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
878 }
879// -------------------------------------------------------------------------
880#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
881 GOMP_FLAG) \
882 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
883 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
884 OP_CMPXCHG(TYPE, BITS, OP) \
885 }
886#if USE_CMPXCHG_FIX
887// -------------------------------------------------------------------------
888// workaround for C78287 (complex(kind=4) data type)
889#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
890 MASK, GOMP_FLAG) \
891 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
892 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
893 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
894 }
895// end of the second part of the workaround for C78287
896#endif // USE_CMPXCHG_FIX
897
898#else
899// -------------------------------------------------------------------------
900// Code for other architectures that don't handle unaligned accesses.
901#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
902 GOMP_FLAG) \
903 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
904 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
905 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
906 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
907 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
908 } else { \
909 KMP_CHECK_GTID; \
910 OP_UPDATE_CRITICAL(TYPE, OP, \
911 LCK_ID) /* unaligned address - use critical */ \
912 } \
913 }
914// -------------------------------------------------------------------------
915#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
916 GOMP_FLAG) \
917 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
918 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
919 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
920 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
921 } else { \
922 KMP_CHECK_GTID; \
923 OP_UPDATE_CRITICAL(TYPE, OP, \
924 LCK_ID) /* unaligned address - use critical */ \
925 } \
926 }
927#if USE_CMPXCHG_FIX
928// -------------------------------------------------------------------------
929// workaround for C78287 (complex(kind=4) data type)
930#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
931 MASK, GOMP_FLAG) \
932 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
933 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
934 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
935 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
936 } else { \
937 KMP_CHECK_GTID; \
938 OP_UPDATE_CRITICAL(TYPE, OP, \
939 LCK_ID) /* unaligned address - use critical */ \
940 } \
941 }
942// end of the second part of the workaround for C78287
943#endif // USE_CMPXCHG_FIX
944#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
945
946// Routines for ATOMIC 4-byte operands addition and subtraction
947ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
948 0) // __kmpc_atomic_fixed4_add
949ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
950 0) // __kmpc_atomic_fixed4_sub
951
952ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
953 KMP_ARCH_X86) // __kmpc_atomic_float4_add
954ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
955 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
956
957// Routines for ATOMIC 8-byte operands addition and subtraction
958ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
960ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
962
963ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
964 KMP_ARCH_X86) // __kmpc_atomic_float8_add
965ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
966 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
967
968// ------------------------------------------------------------------------
969// Entries definition for integer operands
970// TYPE_ID - operands type and size (fixed4, float4)
971// OP_ID - operation identifier (add, sub, mul, ...)
972// TYPE - operand type
973// BITS - size in bits, used to distinguish low level calls
974// OP - operator (used in critical section)
975// LCK_ID - lock identifier, used to possibly distinguish lock variable
976// MASK - used for alignment check
977
978// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
979// ------------------------------------------------------------------------
980// Routines for ATOMIC integer operands, other operators
981// ------------------------------------------------------------------------
982// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
983ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
984 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
985ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
986 0) // __kmpc_atomic_fixed1_andb
987ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
989ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
991ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
992 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
993ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
994 0) // __kmpc_atomic_fixed1_orb
995ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
997ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
999ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
1001ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
1003ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
1004 0) // __kmpc_atomic_fixed1_xor
1005ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
1007ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
1008 0) // __kmpc_atomic_fixed2_andb
1009ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
1011ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
1012 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
1013ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
1014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
1015ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
1016 0) // __kmpc_atomic_fixed2_orb
1017ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
1018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
1019ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
1020 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
1021ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
1022 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
1023ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
1024 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
1025ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
1026 0) // __kmpc_atomic_fixed2_xor
1027ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
1028 0) // __kmpc_atomic_fixed4_andb
1029ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
1030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
1031ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
1032 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1033ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1034 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1035ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1036 0) // __kmpc_atomic_fixed4_orb
1037ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1038 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1039ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1040 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1041ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1042 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1043ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1044 0) // __kmpc_atomic_fixed4_xor
1045ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1046 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1047ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1048 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1049ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1050 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1051ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1052 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1053ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1054 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1055ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1056 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1057ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1059ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1061ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1063ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1064 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1065ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1066 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1067ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1068 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1069ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1071// TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1072
1073/* ------------------------------------------------------------------------ */
1074/* Routines for C/C++ Reduction operators && and || */
1075
1076// ------------------------------------------------------------------------
1077// Need separate macros for &&, || because there is no combined assignment
1078// TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1079#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1080 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1081 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1082 OP_CRITICAL(= *lhs OP, LCK_ID) \
1083 }
1084
1085#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1086
1087// ------------------------------------------------------------------------
1088// X86 or X86_64: no alignment problems ===================================
1089#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1090 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1091 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1092 OP_CMPXCHG(TYPE, BITS, OP) \
1093 }
1094
1095#else
1096// ------------------------------------------------------------------------
1097// Code for other architectures that don't handle unaligned accesses.
1098#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1099 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1100 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1101 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1102 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1103 } else { \
1104 KMP_CHECK_GTID; \
1105 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1106 } \
1107 }
1108#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1109
1110ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1111 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1112ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1113 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1114ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1116ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1118ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1119 0) // __kmpc_atomic_fixed4_andl
1120ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1121 0) // __kmpc_atomic_fixed4_orl
1122ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1124ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1125 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1126
1127/* ------------------------------------------------------------------------- */
1128/* Routines for Fortran operators that matched no one in C: */
1129/* MAX, MIN, .EQV., .NEQV. */
1130/* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1131/* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1132
1133// -------------------------------------------------------------------------
1134// MIN and MAX need separate macros
1135// OP - operator to check if we need any actions?
1136#define MIN_MAX_CRITSECT(OP, LCK_ID) \
1137 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1138 \
1139 if (*lhs OP rhs) { /* still need actions? */ \
1140 *lhs = rhs; \
1141 } \
1142 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1143
1144// -------------------------------------------------------------------------
1145#ifdef KMP_GOMP_COMPAT
1146#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1147 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1148 KMP_CHECK_GTID; \
1149 MIN_MAX_CRITSECT(OP, 0); \
1150 return; \
1151 }
1152#else
1153#define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1154#endif /* KMP_GOMP_COMPAT */
1155
1156// -------------------------------------------------------------------------
1157#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1158 { \
1159 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1160 TYPE old_value; \
1161 temp_val = *lhs; \
1162 old_value = temp_val; \
1163 while (old_value OP rhs && /* still need actions? */ \
1164 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1165 (kmp_int##BITS *)lhs, \
1166 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1167 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1168 temp_val = *lhs; \
1169 old_value = temp_val; \
1170 } \
1171 }
1172
1173// -------------------------------------------------------------------------
1174// 1-byte, 2-byte operands - use critical section
1175#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1176 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1177 if (*lhs OP rhs) { /* need actions? */ \
1178 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1179 MIN_MAX_CRITSECT(OP, LCK_ID) \
1180 } \
1181 }
1182
1183#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1184
1185// -------------------------------------------------------------------------
1186// X86 or X86_64: no alignment problems ====================================
1187#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1188 GOMP_FLAG) \
1189 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1190 if (*lhs OP rhs) { \
1191 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1192 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1193 } \
1194 }
1195
1196#else
1197// -------------------------------------------------------------------------
1198// Code for other architectures that don't handle unaligned accesses.
1199#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1200 GOMP_FLAG) \
1201 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1202 if (*lhs OP rhs) { \
1203 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1204 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1205 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1206 } else { \
1207 KMP_CHECK_GTID; \
1208 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1209 } \
1210 } \
1211 }
1212#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1213
1214MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1215 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1216MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1217 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1218MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1219 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1220MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1221 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1222MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1223 0) // __kmpc_atomic_fixed4_max
1224MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1225 0) // __kmpc_atomic_fixed4_min
1226MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1227 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1228MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1229 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1230MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1231 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1232MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1233 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1234MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1235 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1236MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1237 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1238#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1239MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
1240 1) // __kmpc_atomic_float10_max
1241MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
1242 1) // __kmpc_atomic_float10_min
1243#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1244#if KMP_HAVE_QUAD
1245MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1246 1) // __kmpc_atomic_float16_max
1247MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1248 1) // __kmpc_atomic_float16_min
1249#if (KMP_ARCH_X86)
1250MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1251 1) // __kmpc_atomic_float16_max_a16
1252MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1253 1) // __kmpc_atomic_float16_min_a16
1254#endif // (KMP_ARCH_X86)
1255#endif // KMP_HAVE_QUAD
1256// ------------------------------------------------------------------------
1257// Need separate macros for .EQV. because of the need of complement (~)
1258// OP ignored for critical sections, ^=~ used instead
1259#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1260 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1261 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1262 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1263 }
1264
1265// ------------------------------------------------------------------------
1266#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1267// ------------------------------------------------------------------------
1268// X86 or X86_64: no alignment problems ===================================
1269#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1270 GOMP_FLAG) \
1271 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1272 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1273 OP_CMPXCHG(TYPE, BITS, OP) \
1274 }
1275// ------------------------------------------------------------------------
1276#else
1277// ------------------------------------------------------------------------
1278// Code for other architectures that don't handle unaligned accesses.
1279#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1280 GOMP_FLAG) \
1281 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1282 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1283 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1284 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1285 } else { \
1286 KMP_CHECK_GTID; \
1287 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1288 } \
1289 }
1290#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1291
1292ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1293 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1294ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1296ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1297 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1298ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1299 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1300ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1301 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1302ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1304ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1305 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1306ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1307 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1308
1309// ------------------------------------------------------------------------
1310// Routines for Extended types: long double, _Quad, complex flavours (use
1311// critical section)
1312// TYPE_ID, OP_ID, TYPE - detailed above
1313// OP - operator
1314// LCK_ID - lock identifier, used to possibly distinguish lock variable
1315#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1316 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1317 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1318 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1319 }
1320
1321/* ------------------------------------------------------------------------- */
1322#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1323// routines for long double type
1324ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1325 1) // __kmpc_atomic_float10_add
1326ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1327 1) // __kmpc_atomic_float10_sub
1328ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1329 1) // __kmpc_atomic_float10_mul
1330ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1331 1) // __kmpc_atomic_float10_div
1332#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1333#if KMP_HAVE_QUAD
1334// routines for _Quad type
1335ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1336 1) // __kmpc_atomic_float16_add
1337ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1338 1) // __kmpc_atomic_float16_sub
1339ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1340 1) // __kmpc_atomic_float16_mul
1341ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1342 1) // __kmpc_atomic_float16_div
1343#if (KMP_ARCH_X86)
1344ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1345 1) // __kmpc_atomic_float16_add_a16
1346ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1347 1) // __kmpc_atomic_float16_sub_a16
1348ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1349 1) // __kmpc_atomic_float16_mul_a16
1350ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1351 1) // __kmpc_atomic_float16_div_a16
1352#endif // (KMP_ARCH_X86)
1353#endif // KMP_HAVE_QUAD
1354// routines for complex types
1355
1356#if USE_CMPXCHG_FIX
1357// workaround for C78287 (complex(kind=4) data type)
1358ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1359 1) // __kmpc_atomic_cmplx4_add
1360ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1361 1) // __kmpc_atomic_cmplx4_sub
1362ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1363 1) // __kmpc_atomic_cmplx4_mul
1364ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1365 1) // __kmpc_atomic_cmplx4_div
1366// end of the workaround for C78287
1367#else
1368ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1369ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1370ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1371ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1372#endif // USE_CMPXCHG_FIX
1373
1374ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1375ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1376ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1377ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1378#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1379ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1380 1) // __kmpc_atomic_cmplx10_add
1381ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1382 1) // __kmpc_atomic_cmplx10_sub
1383ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1384 1) // __kmpc_atomic_cmplx10_mul
1385ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1386 1) // __kmpc_atomic_cmplx10_div
1387#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1388#if KMP_HAVE_QUAD
1389ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1390 1) // __kmpc_atomic_cmplx16_add
1391ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1392 1) // __kmpc_atomic_cmplx16_sub
1393ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1394 1) // __kmpc_atomic_cmplx16_mul
1395ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1396 1) // __kmpc_atomic_cmplx16_div
1397#if (KMP_ARCH_X86)
1398ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1399 1) // __kmpc_atomic_cmplx16_add_a16
1400ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1401 1) // __kmpc_atomic_cmplx16_sub_a16
1402ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1403 1) // __kmpc_atomic_cmplx16_mul_a16
1404ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1405 1) // __kmpc_atomic_cmplx16_div_a16
1406#endif // (KMP_ARCH_X86)
1407#endif // KMP_HAVE_QUAD
1408
1409// OpenMP 4.0: x = expr binop x for non-commutative operations.
1410// Supported only on IA-32 architecture and Intel(R) 64
1411#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1412
1413// ------------------------------------------------------------------------
1414// Operation on *lhs, rhs bound by critical section
1415// OP - operator (it's supposed to contain an assignment)
1416// LCK_ID - lock identifier
1417// Note: don't check gtid as it should always be valid
1418// 1, 2-byte - expect valid parameter, other - check before this macro
1419#define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1420 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1421 \
1422 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1423 \
1424 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1425
1426#ifdef KMP_GOMP_COMPAT
1427#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1428 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1429 KMP_CHECK_GTID; \
1430 OP_CRITICAL_REV(TYPE, OP, 0); \
1431 return; \
1432 }
1433
1434#else
1435#define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1436#endif /* KMP_GOMP_COMPAT */
1437
1438// Beginning of a definition (provides name, parameters, gebug trace)
1439// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1440// fixed)
1441// OP_ID - operation identifier (add, sub, mul, ...)
1442// TYPE - operands' type
1443#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1444 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1445 TYPE *lhs, TYPE rhs) { \
1446 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1447 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1448
1449// ------------------------------------------------------------------------
1450// Operation on *lhs, rhs using "compare_and_store" routine
1451// TYPE - operands' type
1452// BITS - size in bits, used to distinguish low level calls
1453// OP - operator
1454// Note: temp_val introduced in order to force the compiler to read
1455// *lhs only once (w/o it the compiler reads *lhs twice)
1456#define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1457 { \
1458 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1459 TYPE old_value, new_value; \
1460 temp_val = *lhs; \
1461 old_value = temp_val; \
1462 new_value = (TYPE)(rhs OP old_value); \
1463 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1464 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1465 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1466 KMP_DO_PAUSE; \
1467 \
1468 temp_val = *lhs; \
1469 old_value = temp_val; \
1470 new_value = (TYPE)(rhs OP old_value); \
1471 } \
1472 }
1473
1474// -------------------------------------------------------------------------
1475#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1476 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1477 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1478 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1479 }
1480
1481// ------------------------------------------------------------------------
1482// Entries definition for integer operands
1483// TYPE_ID - operands type and size (fixed4, float4)
1484// OP_ID - operation identifier (add, sub, mul, ...)
1485// TYPE - operand type
1486// BITS - size in bits, used to distinguish low level calls
1487// OP - operator (used in critical section)
1488// LCK_ID - lock identifier, used to possibly distinguish lock variable
1489
1490// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1491// ------------------------------------------------------------------------
1492// Routines for ATOMIC integer operands, other operators
1493// ------------------------------------------------------------------------
1494// TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1495ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1496 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1497ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1498 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1499ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1500 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1501ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1502 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1503ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1504 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1505ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1506 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1507
1508ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1509 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1510ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1511 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1512ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1513 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1514ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1515 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1516ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1517 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1518ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1519 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1520
1521ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1522 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1523ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1524 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1525ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1526 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1527ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1528 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1529ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1530 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1531ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1532 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1533
1534ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1535 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1536ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1537 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1538ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1539 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1540ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1542ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1543 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1544ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1546
1547ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1548 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1549ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1550 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1551
1552ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1553 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1554ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1555 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1556// TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1557
1558// ------------------------------------------------------------------------
1559// Routines for Extended types: long double, _Quad, complex flavours (use
1560// critical section)
1561// TYPE_ID, OP_ID, TYPE - detailed above
1562// OP - operator
1563// LCK_ID - lock identifier, used to possibly distinguish lock variable
1564#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1565 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1566 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1567 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1568 }
1569
1570/* ------------------------------------------------------------------------- */
1571// routines for long double type
1572ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1573 1) // __kmpc_atomic_float10_sub_rev
1574ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1575 1) // __kmpc_atomic_float10_div_rev
1576#if KMP_HAVE_QUAD
1577// routines for _Quad type
1578ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1579 1) // __kmpc_atomic_float16_sub_rev
1580ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1581 1) // __kmpc_atomic_float16_div_rev
1582#if (KMP_ARCH_X86)
1583ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1584 1) // __kmpc_atomic_float16_sub_a16_rev
1585ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1586 1) // __kmpc_atomic_float16_div_a16_rev
1587#endif // KMP_ARCH_X86
1588#endif // KMP_HAVE_QUAD
1589
1590// routines for complex types
1591ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1592 1) // __kmpc_atomic_cmplx4_sub_rev
1593ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1594 1) // __kmpc_atomic_cmplx4_div_rev
1595ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1596 1) // __kmpc_atomic_cmplx8_sub_rev
1597ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1598 1) // __kmpc_atomic_cmplx8_div_rev
1599ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1600 1) // __kmpc_atomic_cmplx10_sub_rev
1601ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1602 1) // __kmpc_atomic_cmplx10_div_rev
1603#if KMP_HAVE_QUAD
1604ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1605 1) // __kmpc_atomic_cmplx16_sub_rev
1606ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1607 1) // __kmpc_atomic_cmplx16_div_rev
1608#if (KMP_ARCH_X86)
1609ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1610 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1611ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1612 1) // __kmpc_atomic_cmplx16_div_a16_rev
1613#endif // KMP_ARCH_X86
1614#endif // KMP_HAVE_QUAD
1615
1616#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1617// End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1618
1619/* ------------------------------------------------------------------------ */
1620/* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1621/* Note: in order to reduce the total number of types combinations */
1622/* it is supposed that compiler converts RHS to longest floating type,*/
1623/* that is _Quad, before call to any of these routines */
1624/* Conversion to _Quad will be done by the compiler during calculation, */
1625/* conversion back to TYPE - before the assignment, like: */
1626/* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1627/* Performance penalty expected because of SW emulation use */
1628/* ------------------------------------------------------------------------ */
1629
1630#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1631 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1632 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1633 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1634 KA_TRACE(100, \
1635 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1636 gtid));
1637
1638// -------------------------------------------------------------------------
1639#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1640 GOMP_FLAG) \
1641 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1642 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1643 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1644 }
1645
1646// -------------------------------------------------------------------------
1647#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1648// -------------------------------------------------------------------------
1649// X86 or X86_64: no alignment problems ====================================
1650#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1651 LCK_ID, MASK, GOMP_FLAG) \
1652 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1653 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1654 OP_CMPXCHG(TYPE, BITS, OP) \
1655 }
1656// -------------------------------------------------------------------------
1657#else
1658// ------------------------------------------------------------------------
1659// Code for other architectures that don't handle unaligned accesses.
1660#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1661 LCK_ID, MASK, GOMP_FLAG) \
1662 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1663 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1664 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1665 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1666 } else { \
1667 KMP_CHECK_GTID; \
1668 OP_UPDATE_CRITICAL(TYPE, OP, \
1669 LCK_ID) /* unaligned address - use critical */ \
1670 } \
1671 }
1672#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1673
1674// -------------------------------------------------------------------------
1675#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1676// -------------------------------------------------------------------------
1677#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1678 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1679 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1680 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1681 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1682 }
1683#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1684 LCK_ID, GOMP_FLAG) \
1685 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1686 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1687 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1688 }
1689#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1690
1691// RHS=float8
1692ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1694ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1695 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1696ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1697 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1698ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1699 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1700ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1701 0) // __kmpc_atomic_fixed4_mul_float8
1702ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1703 0) // __kmpc_atomic_fixed4_div_float8
1704ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1705 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1706ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1707 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1708ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1709 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1710ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1711 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1712ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1713 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1714ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1715 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1716
1717// RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1718// use them)
1719#if KMP_HAVE_QUAD
1720ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1722ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1724ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1726ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1727 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1728ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1729 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1730ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1731 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1732ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1733 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1734ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1735 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1736
1737ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1739ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1741ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1743ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1744 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1745ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1746 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1747ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1748 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1749ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1750 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1751ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1752 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1753
1754ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1755 0) // __kmpc_atomic_fixed4_add_fp
1756ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1757 0) // __kmpc_atomic_fixed4u_add_fp
1758ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1759 0) // __kmpc_atomic_fixed4_sub_fp
1760ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1761 0) // __kmpc_atomic_fixed4u_sub_fp
1762ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1763 0) // __kmpc_atomic_fixed4_mul_fp
1764ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1765 0) // __kmpc_atomic_fixed4u_mul_fp
1766ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1767 0) // __kmpc_atomic_fixed4_div_fp
1768ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1769 0) // __kmpc_atomic_fixed4u_div_fp
1770
1771ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1773ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1775ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1777ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1778 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1779ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1780 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1781ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1782 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1783ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1784 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1785ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1787
1788ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1789 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1790ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1791 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1792ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1793 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1794ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1795 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1796
1797ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1798 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1799ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1801ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1802 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1803ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1804 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1805
1806#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1807ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1808 1) // __kmpc_atomic_float10_add_fp
1809ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1810 1) // __kmpc_atomic_float10_sub_fp
1811ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1812 1) // __kmpc_atomic_float10_mul_fp
1813ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1814 1) // __kmpc_atomic_float10_div_fp
1815
1816// Reverse operations
1817ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1818 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1819ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1820 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1821ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1822 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1823ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1824 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1825
1826ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1827 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1828ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1829 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1830ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1831 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1832ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1833 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1834
1835ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1836 0) // __kmpc_atomic_fixed4_sub_rev_fp
1837ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1838 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1839ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1840 0) // __kmpc_atomic_fixed4_div_rev_fp
1841ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1842 0) // __kmpc_atomic_fixed4u_div_rev_fp
1843
1844ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1845 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1846ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1847 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1848ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1849 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1850ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1851 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1852
1853ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1854 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1855ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1856 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1857
1858ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1859 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1860ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1861 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1862
1863ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1864 1) // __kmpc_atomic_float10_sub_rev_fp
1865ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1866 1) // __kmpc_atomic_float10_div_rev_fp
1867#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1868
1869#endif // KMP_HAVE_QUAD
1870
1871#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1872// ------------------------------------------------------------------------
1873// X86 or X86_64: no alignment problems ====================================
1874#if USE_CMPXCHG_FIX
1875// workaround for C78287 (complex(kind=4) data type)
1876#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1877 LCK_ID, MASK, GOMP_FLAG) \
1878 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1879 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1880 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1881 }
1882// end of the second part of the workaround for C78287
1883#else
1884#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1885 LCK_ID, MASK, GOMP_FLAG) \
1886 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1887 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1888 OP_CMPXCHG(TYPE, BITS, OP) \
1889 }
1890#endif // USE_CMPXCHG_FIX
1891#else
1892// ------------------------------------------------------------------------
1893// Code for other architectures that don't handle unaligned accesses.
1894#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1895 LCK_ID, MASK, GOMP_FLAG) \
1896 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1897 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1898 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1899 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1900 } else { \
1901 KMP_CHECK_GTID; \
1902 OP_UPDATE_CRITICAL(TYPE, OP, \
1903 LCK_ID) /* unaligned address - use critical */ \
1904 } \
1905 }
1906#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1907
1908ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1909 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1910ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1911 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1912ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1913 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1914ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1915 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1916
1917// READ, WRITE, CAPTURE
1918
1919// ------------------------------------------------------------------------
1920// Atomic READ routines
1921
1922// ------------------------------------------------------------------------
1923// Beginning of a definition (provides name, parameters, gebug trace)
1924// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1925// fixed)
1926// OP_ID - operation identifier (add, sub, mul, ...)
1927// TYPE - operands' type
1928#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1929 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1930 TYPE *loc) { \
1931 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1932 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1933
1934// ------------------------------------------------------------------------
1935// Operation on *lhs, rhs using "compare_and_store_ret" routine
1936// TYPE - operands' type
1937// BITS - size in bits, used to distinguish low level calls
1938// OP - operator
1939// Note: temp_val introduced in order to force the compiler to read
1940// *lhs only once (w/o it the compiler reads *lhs twice)
1941// TODO: check if it is still necessary
1942// Return old value regardless of the result of "compare & swap# operation
1943#define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1944 { \
1945 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1946 union f_i_union { \
1947 TYPE f_val; \
1948 kmp_int##BITS i_val; \
1949 }; \
1950 union f_i_union old_value; \
1951 temp_val = *loc; \
1952 old_value.f_val = temp_val; \
1953 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1954 (kmp_int##BITS *)loc, \
1955 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1956 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1957 new_value = old_value.f_val; \
1958 return new_value; \
1959 }
1960
1961// -------------------------------------------------------------------------
1962// Operation on *lhs, rhs bound by critical section
1963// OP - operator (it's supposed to contain an assignment)
1964// LCK_ID - lock identifier
1965// Note: don't check gtid as it should always be valid
1966// 1, 2-byte - expect valid parameter, other - check before this macro
1967#define OP_CRITICAL_READ(OP, LCK_ID) \
1968 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1969 \
1970 new_value = (*loc); \
1971 \
1972 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1973
1974// -------------------------------------------------------------------------
1975#ifdef KMP_GOMP_COMPAT
1976#define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1977 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1978 KMP_CHECK_GTID; \
1979 OP_CRITICAL_READ(OP, 0); \
1980 return new_value; \
1981 }
1982#else
1983#define OP_GOMP_CRITICAL_READ(OP, FLAG)
1984#endif /* KMP_GOMP_COMPAT */
1985
1986// -------------------------------------------------------------------------
1987#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1988 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1989 TYPE new_value; \
1990 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1991 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1992 return new_value; \
1993 }
1994// -------------------------------------------------------------------------
1995#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1996 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1997 TYPE new_value; \
1998 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1999 OP_CMPXCHG_READ(TYPE, BITS, OP) \
2000 }
2001// ------------------------------------------------------------------------
2002// Routines for Extended types: long double, _Quad, complex flavours (use
2003// critical section)
2004// TYPE_ID, OP_ID, TYPE - detailed above
2005// OP - operator
2006// LCK_ID - lock identifier, used to possibly distinguish lock variable
2007#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2008 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
2009 TYPE new_value; \
2010 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
2011 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
2012 return new_value; \
2013 }
2014
2015// ------------------------------------------------------------------------
2016// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
2017// value doesn't work.
2018// Let's return the read value through the additional parameter.
2019#if (KMP_OS_WINDOWS)
2020
2021#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
2022 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2023 \
2024 (*out) = (*loc); \
2025 \
2026 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2027// ------------------------------------------------------------------------
2028#ifdef KMP_GOMP_COMPAT
2029#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
2030 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2031 KMP_CHECK_GTID; \
2032 OP_CRITICAL_READ_WRK(OP, 0); \
2033 }
2034#else
2035#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
2036#endif /* KMP_GOMP_COMPAT */
2037// ------------------------------------------------------------------------
2038#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2039 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
2040 TYPE *loc) { \
2041 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2042 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2043
2044// ------------------------------------------------------------------------
2045#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2046 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2047 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2048 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2049 }
2050
2051#endif // KMP_OS_WINDOWS
2052
2053// ------------------------------------------------------------------------
2054// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2055ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2056ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2057 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2058ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2059 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2060ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2061 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2062
2063// !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2064ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2065 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2066ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2067 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2068
2069ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2070 1) // __kmpc_atomic_float10_rd
2071#if KMP_HAVE_QUAD
2072ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2073 1) // __kmpc_atomic_float16_rd
2074#endif // KMP_HAVE_QUAD
2075
2076// Fix for CQ220361 on Windows* OS
2077#if (KMP_OS_WINDOWS)
2078ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2079 1) // __kmpc_atomic_cmplx4_rd
2080#else
2081ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2082 1) // __kmpc_atomic_cmplx4_rd
2083#endif // (KMP_OS_WINDOWS)
2084ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2085 1) // __kmpc_atomic_cmplx8_rd
2086ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2087 1) // __kmpc_atomic_cmplx10_rd
2088#if KMP_HAVE_QUAD
2089ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2090 1) // __kmpc_atomic_cmplx16_rd
2091#if (KMP_ARCH_X86)
2092ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2093 1) // __kmpc_atomic_float16_a16_rd
2094ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2095 1) // __kmpc_atomic_cmplx16_a16_rd
2096#endif // (KMP_ARCH_X86)
2097#endif // KMP_HAVE_QUAD
2098
2099// ------------------------------------------------------------------------
2100// Atomic WRITE routines
2101
2102#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2103 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2104 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2105 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2106 }
2107// ------------------------------------------------------------------------
2108#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2109 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2110 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2111 KMP_XCHG_REAL##BITS(lhs, rhs); \
2112 }
2113
2114// ------------------------------------------------------------------------
2115// Operation on *lhs, rhs using "compare_and_store" routine
2116// TYPE - operands' type
2117// BITS - size in bits, used to distinguish low level calls
2118// OP - operator
2119// Note: temp_val introduced in order to force the compiler to read
2120// *lhs only once (w/o it the compiler reads *lhs twice)
2121#define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2122 { \
2123 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2124 TYPE old_value, new_value; \
2125 temp_val = *lhs; \
2126 old_value = temp_val; \
2127 new_value = rhs; \
2128 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2129 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2130 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2131 temp_val = *lhs; \
2132 old_value = temp_val; \
2133 new_value = rhs; \
2134 } \
2135 }
2136
2137// -------------------------------------------------------------------------
2138#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2139 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2140 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2141 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2142 }
2143
2144// ------------------------------------------------------------------------
2145// Routines for Extended types: long double, _Quad, complex flavours (use
2146// critical section)
2147// TYPE_ID, OP_ID, TYPE - detailed above
2148// OP - operator
2149// LCK_ID - lock identifier, used to possibly distinguish lock variable
2150#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2151 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2152 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2153 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2154 }
2155// -------------------------------------------------------------------------
2156
2157ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2158 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2159ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2160 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2161ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2162 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2163#if (KMP_ARCH_X86)
2164ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2165 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2166#else
2167ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2168 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2169#endif // (KMP_ARCH_X86)
2170
2171ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2172 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2173#if (KMP_ARCH_X86)
2174ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2175 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2176#else
2177ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2178 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2179#endif // (KMP_ARCH_X86)
2180
2181ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2182 1) // __kmpc_atomic_float10_wr
2183#if KMP_HAVE_QUAD
2184ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2185 1) // __kmpc_atomic_float16_wr
2186#endif // KMP_HAVE_QUAD
2187ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2188ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2189 1) // __kmpc_atomic_cmplx8_wr
2190ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2191 1) // __kmpc_atomic_cmplx10_wr
2192#if KMP_HAVE_QUAD
2193ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2194 1) // __kmpc_atomic_cmplx16_wr
2195#if (KMP_ARCH_X86)
2196ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2197 1) // __kmpc_atomic_float16_a16_wr
2198ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2199 1) // __kmpc_atomic_cmplx16_a16_wr
2200#endif // (KMP_ARCH_X86)
2201#endif // KMP_HAVE_QUAD
2202
2203// ------------------------------------------------------------------------
2204// Atomic CAPTURE routines
2205
2206// Beginning of a definition (provides name, parameters, gebug trace)
2207// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2208// fixed)
2209// OP_ID - operation identifier (add, sub, mul, ...)
2210// TYPE - operands' type
2211#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2212 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2213 TYPE *lhs, TYPE rhs, int flag) { \
2214 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2215 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2216
2217// -------------------------------------------------------------------------
2218// Operation on *lhs, rhs bound by critical section
2219// OP - operator (it's supposed to contain an assignment)
2220// LCK_ID - lock identifier
2221// Note: don't check gtid as it should always be valid
2222// 1, 2-byte - expect valid parameter, other - check before this macro
2223#define OP_CRITICAL_CPT(OP, LCK_ID) \
2224 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2225 \
2226 if (flag) { \
2227 (*lhs) OP rhs; \
2228 new_value = (*lhs); \
2229 } else { \
2230 new_value = (*lhs); \
2231 (*lhs) OP rhs; \
2232 } \
2233 \
2234 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2235 return new_value;
2236
2237#define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2238 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2239 \
2240 if (flag) { \
2241 (*lhs) = (TYPE)((*lhs)OP rhs); \
2242 new_value = (*lhs); \
2243 } else { \
2244 new_value = (*lhs); \
2245 (*lhs) = (TYPE)((*lhs)OP rhs); \
2246 } \
2247 \
2248 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2249 return new_value;
2250
2251// ------------------------------------------------------------------------
2252#ifdef KMP_GOMP_COMPAT
2253#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2254 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2255 KMP_CHECK_GTID; \
2256 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2257 }
2258#else
2259#define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2260#endif /* KMP_GOMP_COMPAT */
2261
2262// ------------------------------------------------------------------------
2263// Operation on *lhs, rhs using "compare_and_store" routine
2264// TYPE - operands' type
2265// BITS - size in bits, used to distinguish low level calls
2266// OP - operator
2267// Note: temp_val introduced in order to force the compiler to read
2268// *lhs only once (w/o it the compiler reads *lhs twice)
2269#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2270 { \
2271 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2272 TYPE old_value, new_value; \
2273 temp_val = *lhs; \
2274 old_value = temp_val; \
2275 new_value = (TYPE)(old_value OP rhs); \
2276 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2277 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2278 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2279 temp_val = *lhs; \
2280 old_value = temp_val; \
2281 new_value = (TYPE)(old_value OP rhs); \
2282 } \
2283 if (flag) { \
2284 return new_value; \
2285 } else \
2286 return old_value; \
2287 }
2288
2289// -------------------------------------------------------------------------
2290#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2291 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2292 TYPE new_value; \
2293 (void)new_value; \
2294 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2295 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2296 }
2297
2298// -------------------------------------------------------------------------
2299#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2300 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2301 TYPE old_value, new_value; \
2302 (void)new_value; \
2303 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2304 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2305 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2306 if (flag) { \
2307 return old_value OP rhs; \
2308 } else \
2309 return old_value; \
2310 }
2311// -------------------------------------------------------------------------
2312
2313ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2314 0) // __kmpc_atomic_fixed4_add_cpt
2315ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2316 0) // __kmpc_atomic_fixed4_sub_cpt
2317ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2318 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2319ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2320 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2321
2322ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2323 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2324ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2325 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2326ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2327 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2328ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2329 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2330
2331// ------------------------------------------------------------------------
2332// Entries definition for integer operands
2333// TYPE_ID - operands type and size (fixed4, float4)
2334// OP_ID - operation identifier (add, sub, mul, ...)
2335// TYPE - operand type
2336// BITS - size in bits, used to distinguish low level calls
2337// OP - operator (used in critical section)
2338// TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2339// ------------------------------------------------------------------------
2340// Routines for ATOMIC integer operands, other operators
2341// ------------------------------------------------------------------------
2342// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2343ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2344 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2345ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2346 0) // __kmpc_atomic_fixed1_andb_cpt
2347ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2348 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2349ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2350 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2351ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2352 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2353ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2354 0) // __kmpc_atomic_fixed1_orb_cpt
2355ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2356 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2357ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2358 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2359ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2360 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2361ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2362 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2363ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2364 0) // __kmpc_atomic_fixed1_xor_cpt
2365ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2366 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2367ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2368 0) // __kmpc_atomic_fixed2_andb_cpt
2369ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2370 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2371ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2372 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2373ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2374 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2375ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2376 0) // __kmpc_atomic_fixed2_orb_cpt
2377ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2378 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2379ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2380 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2381ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2382 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2383ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2384 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2385ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2386 0) // __kmpc_atomic_fixed2_xor_cpt
2387ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2388 0) // __kmpc_atomic_fixed4_andb_cpt
2389ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2390 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2391ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2392 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2393ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2394 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2395ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2396 0) // __kmpc_atomic_fixed4_orb_cpt
2397ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2398 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2399ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2400 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2401ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2402 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2403ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2404 0) // __kmpc_atomic_fixed4_xor_cpt
2405ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2406 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2407ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2408 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2409ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2410 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2411ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2412 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2413ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2414 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2415ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2416 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2417ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2418 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2419ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2420 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2421ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2422 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2423ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2424 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2425ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2426 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2427ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2428 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2429ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2430 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2431// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2432
2433// CAPTURE routines for mixed types RHS=float16
2434#if KMP_HAVE_QUAD
2435
2436// Beginning of a definition (provides name, parameters, gebug trace)
2437// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2438// fixed)
2439// OP_ID - operation identifier (add, sub, mul, ...)
2440// TYPE - operands' type
2441#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2442 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2443 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2444 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2445 KA_TRACE(100, \
2446 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2447 gtid));
2448
2449// -------------------------------------------------------------------------
2450#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2451 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2452 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2453 TYPE new_value; \
2454 (void)new_value; \
2455 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2456 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2457 }
2458
2459// -------------------------------------------------------------------------
2460#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2461 LCK_ID, GOMP_FLAG) \
2462 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2463 TYPE new_value; \
2464 (void)new_value; \
2465 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2466 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2467 }
2468
2469ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2470 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2471ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2472 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2473ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2474 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2475ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2476 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2477ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2478 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2479ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2480 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2481ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2482 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2483ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2484 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2485
2486ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2487 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2488ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2489 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2490ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2491 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2492ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2493 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2494ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2495 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2496ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2497 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2498ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2499 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2500ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2501 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2502
2503ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2504 0) // __kmpc_atomic_fixed4_add_cpt_fp
2505ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2506 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2507ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2508 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2509ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2510 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2511ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2512 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2513ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2514 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2515ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2516 0) // __kmpc_atomic_fixed4_div_cpt_fp
2517ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2518 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2519
2520ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2521 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2522ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2523 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2524ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2525 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2526ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2527 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2528ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2529 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2530ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2531 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2532ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2533 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2534ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2535 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2536
2537ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2538 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2539ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2540 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2541ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2542 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2543ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2544 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2545
2546ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2547 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2548ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2549 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2550ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2551 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2552ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2553 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2554
2555ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2556 1) // __kmpc_atomic_float10_add_cpt_fp
2557ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2558 1) // __kmpc_atomic_float10_sub_cpt_fp
2559ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2560 1) // __kmpc_atomic_float10_mul_cpt_fp
2561ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2562 1) // __kmpc_atomic_float10_div_cpt_fp
2563
2564#endif // KMP_HAVE_QUAD
2565
2566// ------------------------------------------------------------------------
2567// Routines for C/C++ Reduction operators && and ||
2568
2569// -------------------------------------------------------------------------
2570// Operation on *lhs, rhs bound by critical section
2571// OP - operator (it's supposed to contain an assignment)
2572// LCK_ID - lock identifier
2573// Note: don't check gtid as it should always be valid
2574// 1, 2-byte - expect valid parameter, other - check before this macro
2575#define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2576 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2577 \
2578 if (flag) { \
2579 new_value OP rhs; \
2580 (*lhs) = new_value; \
2581 } else { \
2582 new_value = (*lhs); \
2583 (*lhs) OP rhs; \
2584 } \
2585 \
2586 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2587
2588// ------------------------------------------------------------------------
2589#ifdef KMP_GOMP_COMPAT
2590#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2591 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2592 KMP_CHECK_GTID; \
2593 OP_CRITICAL_L_CPT(OP, 0); \
2594 return new_value; \
2595 }
2596#else
2597#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2598#endif /* KMP_GOMP_COMPAT */
2599
2600// ------------------------------------------------------------------------
2601// Need separate macros for &&, || because there is no combined assignment
2602#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2603 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2604 TYPE new_value; \
2605 (void)new_value; \
2606 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2607 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2608 }
2609
2610ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2611 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2612ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2613 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2614ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2615 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2616ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2617 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2618ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2619 0) // __kmpc_atomic_fixed4_andl_cpt
2620ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2621 0) // __kmpc_atomic_fixed4_orl_cpt
2622ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2623 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2624ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2625 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2626
2627// -------------------------------------------------------------------------
2628// Routines for Fortran operators that matched no one in C:
2629// MAX, MIN, .EQV., .NEQV.
2630// Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2631// Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2632
2633// -------------------------------------------------------------------------
2634// MIN and MAX need separate macros
2635// OP - operator to check if we need any actions?
2636#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2637 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2638 \
2639 if (*lhs OP rhs) { /* still need actions? */ \
2640 old_value = *lhs; \
2641 *lhs = rhs; \
2642 if (flag) \
2643 new_value = rhs; \
2644 else \
2645 new_value = old_value; \
2646 } else { \
2647 new_value = *lhs; \
2648 } \
2649 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2650 return new_value;
2651
2652// -------------------------------------------------------------------------
2653#ifdef KMP_GOMP_COMPAT
2654#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2655 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2656 KMP_CHECK_GTID; \
2657 MIN_MAX_CRITSECT_CPT(OP, 0); \
2658 }
2659#else
2660#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2661#endif /* KMP_GOMP_COMPAT */
2662
2663// -------------------------------------------------------------------------
2664#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2665 { \
2666 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2667 /*TYPE old_value; */ \
2668 temp_val = *lhs; \
2669 old_value = temp_val; \
2670 while (old_value OP rhs && /* still need actions? */ \
2671 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2672 (kmp_int##BITS *)lhs, \
2673 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2674 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2675 temp_val = *lhs; \
2676 old_value = temp_val; \
2677 } \
2678 if (flag) \
2679 return rhs; \
2680 else \
2681 return old_value; \
2682 }
2683
2684// -------------------------------------------------------------------------
2685// 1-byte, 2-byte operands - use critical section
2686#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2687 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2688 TYPE new_value, old_value; \
2689 if (*lhs OP rhs) { /* need actions? */ \
2690 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2691 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2692 } \
2693 return *lhs; \
2694 }
2695
2696#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2697 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2698 TYPE new_value, old_value; \
2699 (void)new_value; \
2700 if (*lhs OP rhs) { \
2701 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2702 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2703 } \
2704 return *lhs; \
2705 }
2706
2707MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2708 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2709MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2710 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2711MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2712 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2713MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2714 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2715MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2716 0) // __kmpc_atomic_fixed4_max_cpt
2717MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2718 0) // __kmpc_atomic_fixed4_min_cpt
2719MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2721MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2722 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2723MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2724 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2725MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2726 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2727MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2728 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2729MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2730 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2731MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r,
2732 1) // __kmpc_atomic_float10_max_cpt
2733MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r,
2734 1) // __kmpc_atomic_float10_min_cpt
2735#if KMP_HAVE_QUAD
2736MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2737 1) // __kmpc_atomic_float16_max_cpt
2738MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2739 1) // __kmpc_atomic_float16_min_cpt
2740#if (KMP_ARCH_X86)
2741MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2742 1) // __kmpc_atomic_float16_max_a16_cpt
2743MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2744 1) // __kmpc_atomic_float16_mix_a16_cpt
2745#endif // (KMP_ARCH_X86)
2746#endif // KMP_HAVE_QUAD
2747
2748// ------------------------------------------------------------------------
2749#ifdef KMP_GOMP_COMPAT
2750#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2751 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2752 KMP_CHECK_GTID; \
2753 OP_CRITICAL_CPT(OP, 0); \
2754 }
2755#else
2756#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2757#endif /* KMP_GOMP_COMPAT */
2758// ------------------------------------------------------------------------
2759#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2760 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2761 TYPE new_value; \
2762 (void)new_value; \
2763 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2764 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2765 }
2766
2767// ------------------------------------------------------------------------
2768
2769ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2770 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2771ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2772 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2773ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2774 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2775ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2776 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2777ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2778 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2779ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2780 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2781ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2782 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2783ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2784 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2785
2786// ------------------------------------------------------------------------
2787// Routines for Extended types: long double, _Quad, complex flavours (use
2788// critical section)
2789// TYPE_ID, OP_ID, TYPE - detailed above
2790// OP - operator
2791// LCK_ID - lock identifier, used to possibly distinguish lock variable
2792#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2793 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2794 TYPE new_value; \
2795 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2796 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2797 }
2798
2799// ------------------------------------------------------------------------
2800// Workaround for cmplx4. Regular routines with return value don't work
2801// on Win_32e. Let's return captured values through the additional parameter.
2802#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2803 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2804 \
2805 if (flag) { \
2806 (*lhs) OP rhs; \
2807 (*out) = (*lhs); \
2808 } else { \
2809 (*out) = (*lhs); \
2810 (*lhs) OP rhs; \
2811 } \
2812 \
2813 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2814 return;
2815// ------------------------------------------------------------------------
2816
2817#ifdef KMP_GOMP_COMPAT
2818#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2819 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2820 KMP_CHECK_GTID; \
2821 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2822 }
2823#else
2824#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2825#endif /* KMP_GOMP_COMPAT */
2826// ------------------------------------------------------------------------
2827
2828#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2829 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2830 TYPE rhs, TYPE *out, int flag) { \
2831 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2832 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2833// ------------------------------------------------------------------------
2834
2835#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2836 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2837 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2838 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2839 }
2840// The end of workaround for cmplx4
2841
2842/* ------------------------------------------------------------------------- */
2843// routines for long double type
2844ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2845 1) // __kmpc_atomic_float10_add_cpt
2846ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2847 1) // __kmpc_atomic_float10_sub_cpt
2848ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2849 1) // __kmpc_atomic_float10_mul_cpt
2850ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2851 1) // __kmpc_atomic_float10_div_cpt
2852#if KMP_HAVE_QUAD
2853// routines for _Quad type
2854ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2855 1) // __kmpc_atomic_float16_add_cpt
2856ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2857 1) // __kmpc_atomic_float16_sub_cpt
2858ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2859 1) // __kmpc_atomic_float16_mul_cpt
2860ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2861 1) // __kmpc_atomic_float16_div_cpt
2862#if (KMP_ARCH_X86)
2863ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2864 1) // __kmpc_atomic_float16_add_a16_cpt
2865ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2866 1) // __kmpc_atomic_float16_sub_a16_cpt
2867ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2868 1) // __kmpc_atomic_float16_mul_a16_cpt
2869ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2870 1) // __kmpc_atomic_float16_div_a16_cpt
2871#endif // (KMP_ARCH_X86)
2872#endif // KMP_HAVE_QUAD
2873
2874// routines for complex types
2875
2876// cmplx4 routines to return void
2877ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2878 1) // __kmpc_atomic_cmplx4_add_cpt
2879ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2880 1) // __kmpc_atomic_cmplx4_sub_cpt
2881ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2882 1) // __kmpc_atomic_cmplx4_mul_cpt
2883ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2884 1) // __kmpc_atomic_cmplx4_div_cpt
2885
2886ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2887 1) // __kmpc_atomic_cmplx8_add_cpt
2888ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2889 1) // __kmpc_atomic_cmplx8_sub_cpt
2890ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2891 1) // __kmpc_atomic_cmplx8_mul_cpt
2892ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2893 1) // __kmpc_atomic_cmplx8_div_cpt
2894ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2895 1) // __kmpc_atomic_cmplx10_add_cpt
2896ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2897 1) // __kmpc_atomic_cmplx10_sub_cpt
2898ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2899 1) // __kmpc_atomic_cmplx10_mul_cpt
2900ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2901 1) // __kmpc_atomic_cmplx10_div_cpt
2902#if KMP_HAVE_QUAD
2903ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2904 1) // __kmpc_atomic_cmplx16_add_cpt
2905ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2906 1) // __kmpc_atomic_cmplx16_sub_cpt
2907ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2908 1) // __kmpc_atomic_cmplx16_mul_cpt
2909ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2910 1) // __kmpc_atomic_cmplx16_div_cpt
2911#if (KMP_ARCH_X86)
2912ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2913 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2914ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2915 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2916ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2917 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2918ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2919 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2920#endif // (KMP_ARCH_X86)
2921#endif // KMP_HAVE_QUAD
2922
2923// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2924// binop x; v = x; } for non-commutative operations.
2925// Supported only on IA-32 architecture and Intel(R) 64
2926
2927#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2928// -------------------------------------------------------------------------
2929// Operation on *lhs, rhs bound by critical section
2930// OP - operator (it's supposed to contain an assignment)
2931// LCK_ID - lock identifier
2932// Note: don't check gtid as it should always be valid
2933// 1, 2-byte - expect valid parameter, other - check before this macro
2934#define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2935 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2936 \
2937 if (flag) { \
2938 /*temp_val = (*lhs);*/ \
2939 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2940 new_value = (*lhs); \
2941 } else { \
2942 new_value = (*lhs); \
2943 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2944 } \
2945 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2946 return new_value;
2947
2948// ------------------------------------------------------------------------
2949#ifdef KMP_GOMP_COMPAT
2950#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2951 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2952 KMP_CHECK_GTID; \
2953 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2954 }
2955#else
2956#define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2957#endif /* KMP_GOMP_COMPAT */
2958
2959// ------------------------------------------------------------------------
2960// Operation on *lhs, rhs using "compare_and_store" routine
2961// TYPE - operands' type
2962// BITS - size in bits, used to distinguish low level calls
2963// OP - operator
2964// Note: temp_val introduced in order to force the compiler to read
2965// *lhs only once (w/o it the compiler reads *lhs twice)
2966#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2967 { \
2968 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2969 TYPE old_value, new_value; \
2970 temp_val = *lhs; \
2971 old_value = temp_val; \
2972 new_value = (TYPE)(rhs OP old_value); \
2973 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2974 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2975 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2976 temp_val = *lhs; \
2977 old_value = temp_val; \
2978 new_value = (TYPE)(rhs OP old_value); \
2979 } \
2980 if (flag) { \
2981 return new_value; \
2982 } else \
2983 return old_value; \
2984 }
2985
2986// -------------------------------------------------------------------------
2987#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2988 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2989 TYPE new_value; \
2990 (void)new_value; \
2991 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2992 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2993 }
2994
2995ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2996 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2997ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2998 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2999ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
3000 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
3001ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
3002 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
3003ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
3004 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
3005ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
3006 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
3007ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
3008 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
3009ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
3010 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
3011ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
3012 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
3013ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
3014 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
3015ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
3016 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
3017ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
3018 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
3019ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
3020 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
3021ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
3022 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
3023ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
3024 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
3025ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
3026 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
3027ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
3028 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
3029ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
3030 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
3031ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
3032 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
3033ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
3034 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
3035ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
3036 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
3037ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
3038 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
3039ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
3040 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
3041ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
3042 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
3043ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
3044 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
3045ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
3046 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
3047ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
3048 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3049ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3050 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3051// TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3052
3053// ------------------------------------------------------------------------
3054// Routines for Extended types: long double, _Quad, complex flavours (use
3055// critical section)
3056// TYPE_ID, OP_ID, TYPE - detailed above
3057// OP - operator
3058// LCK_ID - lock identifier, used to possibly distinguish lock variable
3059#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3060 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3061 TYPE new_value; \
3062 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3063 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3064 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3065 }
3066
3067/* ------------------------------------------------------------------------- */
3068// routines for long double type
3069ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3070 1) // __kmpc_atomic_float10_sub_cpt_rev
3071ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3072 1) // __kmpc_atomic_float10_div_cpt_rev
3073#if KMP_HAVE_QUAD
3074// routines for _Quad type
3075ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3076 1) // __kmpc_atomic_float16_sub_cpt_rev
3077ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3078 1) // __kmpc_atomic_float16_div_cpt_rev
3079#if (KMP_ARCH_X86)
3080ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3081 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3082ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3083 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3084#endif // (KMP_ARCH_X86)
3085#endif // KMP_HAVE_QUAD
3086
3087// routines for complex types
3088
3089// ------------------------------------------------------------------------
3090// Workaround for cmplx4. Regular routines with return value don't work
3091// on Win_32e. Let's return captured values through the additional parameter.
3092#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3093 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3094 \
3095 if (flag) { \
3096 (*lhs) = (rhs)OP(*lhs); \
3097 (*out) = (*lhs); \
3098 } else { \
3099 (*out) = (*lhs); \
3100 (*lhs) = (rhs)OP(*lhs); \
3101 } \
3102 \
3103 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3104 return;
3105// ------------------------------------------------------------------------
3106
3107#ifdef KMP_GOMP_COMPAT
3108#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3109 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3110 KMP_CHECK_GTID; \
3111 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3112 }
3113#else
3114#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3115#endif /* KMP_GOMP_COMPAT */
3116// ------------------------------------------------------------------------
3117
3118#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3119 GOMP_FLAG) \
3120 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3121 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3122 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3123 }
3124// The end of workaround for cmplx4
3125
3126// !!! TODO: check if we need to return void for cmplx4 routines
3127// cmplx4 routines to return void
3128ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3129 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3130ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3131 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3132
3133ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3134 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3135ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3136 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3137ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3138 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3139ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3140 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3141#if KMP_HAVE_QUAD
3142ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3143 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3144ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3145 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3146#if (KMP_ARCH_X86)
3147ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3148 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3149ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3150 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3151#endif // (KMP_ARCH_X86)
3152#endif // KMP_HAVE_QUAD
3153
3154// Capture reverse for mixed type: RHS=float16
3155#if KMP_HAVE_QUAD
3156
3157// Beginning of a definition (provides name, parameters, gebug trace)
3158// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3159// fixed)
3160// OP_ID - operation identifier (add, sub, mul, ...)
3161// TYPE - operands' type
3162// -------------------------------------------------------------------------
3163#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3164 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3165 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3166 TYPE new_value; \
3167 (void)new_value; \
3168 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3169 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3170 }
3171
3172// -------------------------------------------------------------------------
3173#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3174 LCK_ID, GOMP_FLAG) \
3175 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3176 TYPE new_value; \
3177 (void)new_value; \
3178 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3179 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3180 }
3181
3182ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3184ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3185 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3186ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3187 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3188ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3189 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3190
3191ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3192 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3193ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3194 1,
3195 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3196ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3197 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3198ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3199 1,
3200 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3201
3202ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3203 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3204ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3205 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3206ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3207 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3208ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3209 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3210
3211ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3212 7,
3213 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3214ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3215 8i, 7,
3216 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3217ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3218 7,
3219 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3220ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3221 8i, 7,
3222 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3223
3224ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3225 4r, 3,
3226 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3227ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3228 4r, 3,
3229 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3230
3231ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3232 8r, 7,
3233 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3234ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3235 8r, 7,
3236 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3237
3238ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3239 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3240ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3241 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3242
3243#endif // KMP_HAVE_QUAD
3244
3245// OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3246
3247#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3248 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3249 TYPE rhs) { \
3250 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3251 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3252
3253#define CRITICAL_SWP(LCK_ID) \
3254 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3255 \
3256 old_value = (*lhs); \
3257 (*lhs) = rhs; \
3258 \
3259 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3260 return old_value;
3261
3262// ------------------------------------------------------------------------
3263#ifdef KMP_GOMP_COMPAT
3264#define GOMP_CRITICAL_SWP(FLAG) \
3265 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3266 KMP_CHECK_GTID; \
3267 CRITICAL_SWP(0); \
3268 }
3269#else
3270#define GOMP_CRITICAL_SWP(FLAG)
3271#endif /* KMP_GOMP_COMPAT */
3272
3273#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3274 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3275 TYPE old_value; \
3276 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3277 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3278 return old_value; \
3279 }
3280// ------------------------------------------------------------------------
3281#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3282 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3283 TYPE old_value; \
3284 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3285 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3286 return old_value; \
3287 }
3288
3289// ------------------------------------------------------------------------
3290#define CMPXCHG_SWP(TYPE, BITS) \
3291 { \
3292 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3293 TYPE old_value, new_value; \
3294 temp_val = *lhs; \
3295 old_value = temp_val; \
3296 new_value = rhs; \
3297 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3298 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3299 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3300 temp_val = *lhs; \
3301 old_value = temp_val; \
3302 new_value = rhs; \
3303 } \
3304 return old_value; \
3305 }
3306
3307// -------------------------------------------------------------------------
3308#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3309 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3310 TYPE old_value; \
3311 (void)old_value; \
3312 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3313 CMPXCHG_SWP(TYPE, BITS) \
3314 }
3315
3316ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3317ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3318ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3319
3320ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3321 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3322
3323#if (KMP_ARCH_X86)
3324ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3325 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3326ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3327 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3328#else
3329ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3330ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3331 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3332#endif // (KMP_ARCH_X86)
3333
3334// ------------------------------------------------------------------------
3335// Routines for Extended types: long double, _Quad, complex flavours (use
3336// critical section)
3337#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3338 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3339 TYPE old_value; \
3340 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3341 CRITICAL_SWP(LCK_ID) \
3342 }
3343
3344// ------------------------------------------------------------------------
3345// !!! TODO: check if we need to return void for cmplx4 routines
3346// Workaround for cmplx4. Regular routines with return value don't work
3347// on Win_32e. Let's return captured values through the additional parameter.
3348
3349#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3350 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3351 TYPE rhs, TYPE *out) { \
3352 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3353 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3354
3355#define CRITICAL_SWP_WRK(LCK_ID) \
3356 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3357 \
3358 tmp = (*lhs); \
3359 (*lhs) = (rhs); \
3360 (*out) = tmp; \
3361 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3362 return;
3363// ------------------------------------------------------------------------
3364
3365#ifdef KMP_GOMP_COMPAT
3366#define GOMP_CRITICAL_SWP_WRK(FLAG) \
3367 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3368 KMP_CHECK_GTID; \
3369 CRITICAL_SWP_WRK(0); \
3370 }
3371#else
3372#define GOMP_CRITICAL_SWP_WRK(FLAG)
3373#endif /* KMP_GOMP_COMPAT */
3374// ------------------------------------------------------------------------
3375
3376#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3377 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3378 TYPE tmp; \
3379 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3380 CRITICAL_SWP_WRK(LCK_ID) \
3381 }
3382// The end of workaround for cmplx4
3383
3384ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3385#if KMP_HAVE_QUAD
3386ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3387#endif // KMP_HAVE_QUAD
3388// cmplx4 routine to return void
3389ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3390
3391// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3392// __kmpc_atomic_cmplx4_swp
3393
3394ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3395ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3396#if KMP_HAVE_QUAD
3397ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3398#if (KMP_ARCH_X86)
3399ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3400 1) // __kmpc_atomic_float16_a16_swp
3401ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3402 1) // __kmpc_atomic_cmplx16_a16_swp
3403#endif // (KMP_ARCH_X86)
3404#endif // KMP_HAVE_QUAD
3405
3406// End of OpenMP 4.0 Capture
3407
3408#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3409
3410#undef OP_CRITICAL
3411
3412/* ------------------------------------------------------------------------ */
3413/* Generic atomic routines */
3414
3415void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3416 void (*f)(void *, void *, void *)) {
3417 KMP_DEBUG_ASSERT(__kmp_init_serial);
3418
3419 if (
3420#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421 FALSE /* must use lock */
3422#else
3423 TRUE
3424#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3425 ) {
3426 kmp_int8 old_value, new_value;
3427
3428 old_value = *(kmp_int8 *)lhs;
3429 (*f)(&new_value, &old_value, rhs);
3430
3431 /* TODO: Should this be acquire or release? */
3432 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3433 *(kmp_int8 *)&new_value)) {
3434 KMP_CPU_PAUSE();
3435
3436 old_value = *(kmp_int8 *)lhs;
3437 (*f)(&new_value, &old_value, rhs);
3438 }
3439
3440 return;
3441 } else {
3442 // All 1-byte data is of integer data type.
3443
3444#ifdef KMP_GOMP_COMPAT
3445 if (__kmp_atomic_mode == 2) {
3446 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3447 } else
3448#endif /* KMP_GOMP_COMPAT */
3449 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_1i, gtid);
3450
3451 (*f)(lhs, lhs, rhs);
3452
3453#ifdef KMP_GOMP_COMPAT
3454 if (__kmp_atomic_mode == 2) {
3455 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3456 } else
3457#endif /* KMP_GOMP_COMPAT */
3458 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_1i, gtid);
3459 }
3460}
3461
3462void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3463 void (*f)(void *, void *, void *)) {
3464 if (
3465#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3466 FALSE /* must use lock */
3467#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3468 TRUE /* no alignment problems */
3469#else
3470 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3471#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3472 ) {
3473 kmp_int16 old_value, new_value;
3474
3475 old_value = *(kmp_int16 *)lhs;
3476 (*f)(&new_value, &old_value, rhs);
3477
3478 /* TODO: Should this be acquire or release? */
3479 while (!KMP_COMPARE_AND_STORE_ACQ16(
3480 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3481 KMP_CPU_PAUSE();
3482
3483 old_value = *(kmp_int16 *)lhs;
3484 (*f)(&new_value, &old_value, rhs);
3485 }
3486
3487 return;
3488 } else {
3489 // All 2-byte data is of integer data type.
3490
3491#ifdef KMP_GOMP_COMPAT
3492 if (__kmp_atomic_mode == 2) {
3493 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3494 } else
3495#endif /* KMP_GOMP_COMPAT */
3496 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_2i, gtid);
3497
3498 (*f)(lhs, lhs, rhs);
3499
3500#ifdef KMP_GOMP_COMPAT
3501 if (__kmp_atomic_mode == 2) {
3502 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3503 } else
3504#endif /* KMP_GOMP_COMPAT */
3505 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_2i, gtid);
3506 }
3507}
3508
3509void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3510 void (*f)(void *, void *, void *)) {
3511 KMP_DEBUG_ASSERT(__kmp_init_serial);
3512
3513 if (
3514// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3515// Gomp compatibility is broken if this routine is called for floats.
3516#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3517 TRUE /* no alignment problems */
3518#else
3519 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3520#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3521 ) {
3522 kmp_int32 old_value, new_value;
3523
3524 old_value = *(kmp_int32 *)lhs;
3525 (*f)(&new_value, &old_value, rhs);
3526
3527 /* TODO: Should this be acquire or release? */
3528 while (!KMP_COMPARE_AND_STORE_ACQ32(
3529 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3530 KMP_CPU_PAUSE();
3531
3532 old_value = *(kmp_int32 *)lhs;
3533 (*f)(&new_value, &old_value, rhs);
3534 }
3535
3536 return;
3537 } else {
3538 // Use __kmp_atomic_lock_4i for all 4-byte data,
3539 // even if it isn't of integer data type.
3540
3541#ifdef KMP_GOMP_COMPAT
3542 if (__kmp_atomic_mode == 2) {
3543 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3544 } else
3545#endif /* KMP_GOMP_COMPAT */
3546 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_4i, gtid);
3547
3548 (*f)(lhs, lhs, rhs);
3549
3550#ifdef KMP_GOMP_COMPAT
3551 if (__kmp_atomic_mode == 2) {
3552 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3553 } else
3554#endif /* KMP_GOMP_COMPAT */
3555 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_4i, gtid);
3556 }
3557}
3558
3559void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3560 void (*f)(void *, void *, void *)) {
3561 KMP_DEBUG_ASSERT(__kmp_init_serial);
3562 if (
3563
3564#if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3565 FALSE /* must use lock */
3566#elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3567 TRUE /* no alignment problems */
3568#else
3569 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3570#endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3571 ) {
3572 kmp_int64 old_value, new_value;
3573
3574 old_value = *(kmp_int64 *)lhs;
3575 (*f)(&new_value, &old_value, rhs);
3576 /* TODO: Should this be acquire or release? */
3577 while (!KMP_COMPARE_AND_STORE_ACQ64(
3578 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3579 KMP_CPU_PAUSE();
3580
3581 old_value = *(kmp_int64 *)lhs;
3582 (*f)(&new_value, &old_value, rhs);
3583 }
3584
3585 return;
3586 } else {
3587 // Use __kmp_atomic_lock_8i for all 8-byte data,
3588 // even if it isn't of integer data type.
3589
3590#ifdef KMP_GOMP_COMPAT
3591 if (__kmp_atomic_mode == 2) {
3592 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3593 } else
3594#endif /* KMP_GOMP_COMPAT */
3595 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_8i, gtid);
3596
3597 (*f)(lhs, lhs, rhs);
3598
3599#ifdef KMP_GOMP_COMPAT
3600 if (__kmp_atomic_mode == 2) {
3601 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3602 } else
3603#endif /* KMP_GOMP_COMPAT */
3604 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_8i, gtid);
3605 }
3606}
3607#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3608void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3609 void (*f)(void *, void *, void *)) {
3610 KMP_DEBUG_ASSERT(__kmp_init_serial);
3611
3612#ifdef KMP_GOMP_COMPAT
3613 if (__kmp_atomic_mode == 2) {
3614 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3615 } else
3616#endif /* KMP_GOMP_COMPAT */
3617 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_10r, gtid);
3618
3619 (*f)(lhs, lhs, rhs);
3620
3621#ifdef KMP_GOMP_COMPAT
3622 if (__kmp_atomic_mode == 2) {
3623 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3624 } else
3625#endif /* KMP_GOMP_COMPAT */
3626 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_10r, gtid);
3627}
3628#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3629
3630void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3631 void (*f)(void *, void *, void *)) {
3632 KMP_DEBUG_ASSERT(__kmp_init_serial);
3633
3634#ifdef KMP_GOMP_COMPAT
3635 if (__kmp_atomic_mode == 2) {
3636 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3637 } else
3638#endif /* KMP_GOMP_COMPAT */
3639 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_16c, gtid);
3640
3641 (*f)(lhs, lhs, rhs);
3642
3643#ifdef KMP_GOMP_COMPAT
3644 if (__kmp_atomic_mode == 2) {
3645 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3646 } else
3647#endif /* KMP_GOMP_COMPAT */
3648 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_16c, gtid);
3649}
3650#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3651void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3652 void (*f)(void *, void *, void *)) {
3653 KMP_DEBUG_ASSERT(__kmp_init_serial);
3654
3655#ifdef KMP_GOMP_COMPAT
3656 if (__kmp_atomic_mode == 2) {
3657 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3658 } else
3659#endif /* KMP_GOMP_COMPAT */
3660 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_20c, gtid);
3661
3662 (*f)(lhs, lhs, rhs);
3663
3664#ifdef KMP_GOMP_COMPAT
3665 if (__kmp_atomic_mode == 2) {
3666 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3667 } else
3668#endif /* KMP_GOMP_COMPAT */
3669 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_20c, gtid);
3670}
3671#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3672void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3673 void (*f)(void *, void *, void *)) {
3674 KMP_DEBUG_ASSERT(__kmp_init_serial);
3675
3676#ifdef KMP_GOMP_COMPAT
3677 if (__kmp_atomic_mode == 2) {
3678 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3679 } else
3680#endif /* KMP_GOMP_COMPAT */
3681 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_32c, gtid);
3682
3683 (*f)(lhs, lhs, rhs);
3684
3685#ifdef KMP_GOMP_COMPAT
3686 if (__kmp_atomic_mode == 2) {
3687 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3688 } else
3689#endif /* KMP_GOMP_COMPAT */
3690 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_32c, gtid);
3691}
3692
3693// AC: same two routines as GOMP_atomic_start/end, but will be called by our
3694// compiler; duplicated in order to not use 3-party names in pure Intel code
3695// TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3696void __kmpc_atomic_start(void) {
3697 int gtid = __kmp_entry_gtid();
3698 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3699 __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3700}
3701
3702void __kmpc_atomic_end(void) {
3703 int gtid = __kmp_get_gtid();
3704 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3705 __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid);
3706}
3707
3708#if KMP_ARCH_X86 || KMP_ARCH_X86_64
3709
3710// OpenMP 5.1 compare and swap
3711
3712/*!
3713@param loc Source code location
3714@param gtid Global thread id
3715@param x Memory location to operate on
3716@param e Expected value
3717@param d Desired value
3718@return Result of comparison
3719
3720Implements Compare And Swap atomic operation.
3721
3722Sample code:
3723#pragma omp atomic compare update capture
3724 { r = x == e; if(r) { x = d; } }
3725*/
3726bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3727 return KMP_COMPARE_AND_STORE_ACQ8(x, e, d);
3728}
3729bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e,
3730 short d) {
3731 return KMP_COMPARE_AND_STORE_ACQ16(x, e, d);
3732}
3733bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e,
3734 kmp_int32 d) {
3735 return KMP_COMPARE_AND_STORE_ACQ32(x, e, d);
3736}
3737bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e,
3738 kmp_int64 d) {
3739 return KMP_COMPARE_AND_STORE_ACQ64(x, e, d);
3740}
3741
3742/*!
3743@param loc Source code location
3744@param gtid Global thread id
3745@param x Memory location to operate on
3746@param e Expected value
3747@param d Desired value
3748@return Old value of x
3749
3750Implements Compare And Swap atomic operation.
3751
3752Sample code:
3753#pragma omp atomic compare update capture
3754 { v = x; if (x == e) { x = d; } }
3755*/
3756char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) {
3757 return KMP_COMPARE_AND_STORE_RET8(x, e, d);
3758}
3759short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e,
3760 short d) {
3761 return KMP_COMPARE_AND_STORE_RET16(x, e, d);
3762}
3763kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x,
3764 kmp_int32 e, kmp_int32 d) {
3765 return KMP_COMPARE_AND_STORE_RET32(x, e, d);
3766}
3767kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x,
3768 kmp_int64 e, kmp_int64 d) {
3769 return KMP_COMPARE_AND_STORE_RET64(x, e, d);
3770}
3771
3772/*!
3773@param loc Source code location
3774@param gtid Global thread id
3775@param x Memory location to operate on
3776@param e Expected value
3777@param d Desired value
3778@param pv Captured value location
3779@return Result of comparison
3780
3781Implements Compare And Swap + Capture atomic operation.
3782
3783v gets old valie of x if comparison failed, untouched otherwise.
3784Sample code:
3785#pragma omp atomic compare update capture
3786 { r = x == e; if(r) { x = d; } else { v = x; } }
3787*/
3788bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3789 char d, char *pv) {
3790 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3791 if (old == e)
3792 return true;
3793 KMP_ASSERT(pv != NULL);
3794 *pv = old;
3795 return false;
3796}
3797bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3798 short d, short *pv) {
3799 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3800 if (old == e)
3801 return true;
3802 KMP_ASSERT(pv != NULL);
3803 *pv = old;
3804 return false;
3805}
3806bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3807 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3808 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3809 if (old == e)
3810 return true;
3811 KMP_ASSERT(pv != NULL);
3812 *pv = old;
3813 return false;
3814}
3815bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3816 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3817 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3818 if (old == e)
3819 return true;
3820 KMP_ASSERT(pv != NULL);
3821 *pv = old;
3822 return false;
3823}
3824
3825/*!
3826@param loc Source code location
3827@param gtid Global thread id
3828@param x Memory location to operate on
3829@param e Expected value
3830@param d Desired value
3831@param pv Captured value location
3832@return Old value of x
3833
3834Implements Compare And Swap + Capture atomic operation.
3835
3836v gets new valie of x.
3837Sample code:
3838#pragma omp atomic compare update capture
3839 { if (x == e) { x = d; }; v = x; }
3840*/
3841char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e,
3842 char d, char *pv) {
3843 char old = KMP_COMPARE_AND_STORE_RET8(x, e, d);
3844 KMP_ASSERT(pv != NULL);
3845 *pv = old == e ? d : old;
3846 return old;
3847}
3848short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e,
3849 short d, short *pv) {
3850 short old = KMP_COMPARE_AND_STORE_RET16(x, e, d);
3851 KMP_ASSERT(pv != NULL);
3852 *pv = old == e ? d : old;
3853 return old;
3854}
3855kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x,
3856 kmp_int32 e, kmp_int32 d, kmp_int32 *pv) {
3857 kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d);
3858 KMP_ASSERT(pv != NULL);
3859 *pv = old == e ? d : old;
3860 return old;
3861}
3862kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x,
3863 kmp_int64 e, kmp_int64 d, kmp_int64 *pv) {
3864 kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d);
3865 KMP_ASSERT(pv != NULL);
3866 *pv = old == e ? d : old;
3867 return old;
3868}
3869
3870// End OpenMP 5.1 compare + capture
3871#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3872
3873/*!
3874@}
3875*/
3876
3877// end of file
3878

source code of openmp/runtime/src/kmp_atomic.cpp