1 | /* |
2 | * kmp_atomic.cpp -- ATOMIC implementation routines |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "kmp_atomic.h" |
14 | #include "kmp.h" // TRUE, asm routines prototypes |
15 | |
16 | typedef unsigned char uchar; |
17 | typedef unsigned short ushort; |
18 | |
19 | /*! |
20 | @defgroup ATOMIC_OPS Atomic Operations |
21 | These functions are used for implementing the many different varieties of atomic |
22 | operations. |
23 | |
24 | The compiler is at liberty to inline atomic operations that are naturally |
25 | supported by the target architecture. For instance on IA-32 architecture an |
26 | atomic like this can be inlined |
27 | @code |
28 | static int s = 0; |
29 | #pragma omp atomic |
30 | s++; |
31 | @endcode |
32 | using the single instruction: `lock; incl s` |
33 | |
34 | However the runtime does provide entrypoints for these operations to support |
35 | compilers that choose not to inline them. (For instance, |
36 | `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) |
37 | |
38 | The names of the functions are encoded by using the data type name and the |
39 | operation name, as in these tables. |
40 | |
41 | Data Type | Data type encoding |
42 | -----------|--------------- |
43 | int8_t | `fixed1` |
44 | uint8_t | `fixed1u` |
45 | int16_t | `fixed2` |
46 | uint16_t | `fixed2u` |
47 | int32_t | `fixed4` |
48 | uint32_t | `fixed4u` |
49 | int32_t | `fixed8` |
50 | uint32_t | `fixed8u` |
51 | float | `float4` |
52 | double | `float8` |
53 | float 10 (8087 eighty bit float) | `float10` |
54 | complex<float> | `cmplx4` |
55 | complex<double> | `cmplx8` |
56 | complex<float10> | `cmplx10` |
57 | <br> |
58 | |
59 | Operation | Operation encoding |
60 | ----------|------------------- |
61 | + | add |
62 | - | sub |
63 | \* | mul |
64 | / | div |
65 | & | andb |
66 | << | shl |
67 | \>\> | shr |
68 | \| | orb |
69 | ^ | xor |
70 | && | andl |
71 | \|\| | orl |
72 | maximum | max |
73 | minimum | min |
74 | .eqv. | eqv |
75 | .neqv. | neqv |
76 | |
77 | <br> |
78 | For non-commutative operations, `_rev` can also be added for the reversed |
79 | operation. For the functions that capture the result, the suffix `_cpt` is |
80 | added. |
81 | |
82 | Update Functions |
83 | ================ |
84 | The general form of an atomic function that just performs an update (without a |
85 | `capture`) |
86 | @code |
87 | void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * |
88 | lhs, TYPE rhs ); |
89 | @endcode |
90 | @param ident_t a pointer to source location |
91 | @param gtid the global thread id |
92 | @param lhs a pointer to the left operand |
93 | @param rhs the right operand |
94 | |
95 | `capture` functions |
96 | =================== |
97 | The capture functions perform an atomic update and return a result, which is |
98 | either the value before the capture, or that after. They take an additional |
99 | argument to determine which result is returned. |
100 | Their general form is therefore |
101 | @code |
102 | TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * |
103 | lhs, TYPE rhs, int flag ); |
104 | @endcode |
105 | @param ident_t a pointer to source location |
106 | @param gtid the global thread id |
107 | @param lhs a pointer to the left operand |
108 | @param rhs the right operand |
109 | @param flag one if the result is to be captured *after* the operation, zero if |
110 | captured *before*. |
111 | |
112 | The one set of exceptions to this is the `complex<float>` type where the value |
113 | is not returned, rather an extra argument pointer is passed. |
114 | |
115 | They look like |
116 | @code |
117 | void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * |
118 | lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); |
119 | @endcode |
120 | |
121 | Read and Write Operations |
122 | ========================= |
123 | The OpenMP<sup>*</sup> standard now supports atomic operations that simply |
124 | ensure that the value is read or written atomically, with no modification |
125 | performed. In many cases on IA-32 architecture these operations can be inlined |
126 | since the architecture guarantees that no tearing occurs on aligned objects |
127 | accessed with a single memory operation of up to 64 bits in size. |
128 | |
129 | The general form of the read operations is |
130 | @code |
131 | TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); |
132 | @endcode |
133 | |
134 | For the write operations the form is |
135 | @code |
136 | void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs |
137 | ); |
138 | @endcode |
139 | |
140 | Full list of functions |
141 | ====================== |
142 | This leads to the generation of 376 atomic functions, as follows. |
143 | |
144 | Functions for integers |
145 | --------------------- |
146 | There are versions here for integers of size 1,2,4 and 8 bytes both signed and |
147 | unsigned (where that matters). |
148 | @code |
149 | __kmpc_atomic_fixed1_add |
150 | __kmpc_atomic_fixed1_add_cpt |
151 | __kmpc_atomic_fixed1_add_fp |
152 | __kmpc_atomic_fixed1_andb |
153 | __kmpc_atomic_fixed1_andb_cpt |
154 | __kmpc_atomic_fixed1_andl |
155 | __kmpc_atomic_fixed1_andl_cpt |
156 | __kmpc_atomic_fixed1_div |
157 | __kmpc_atomic_fixed1_div_cpt |
158 | __kmpc_atomic_fixed1_div_cpt_rev |
159 | __kmpc_atomic_fixed1_div_float8 |
160 | __kmpc_atomic_fixed1_div_fp |
161 | __kmpc_atomic_fixed1_div_rev |
162 | __kmpc_atomic_fixed1_eqv |
163 | __kmpc_atomic_fixed1_eqv_cpt |
164 | __kmpc_atomic_fixed1_max |
165 | __kmpc_atomic_fixed1_max_cpt |
166 | __kmpc_atomic_fixed1_min |
167 | __kmpc_atomic_fixed1_min_cpt |
168 | __kmpc_atomic_fixed1_mul |
169 | __kmpc_atomic_fixed1_mul_cpt |
170 | __kmpc_atomic_fixed1_mul_float8 |
171 | __kmpc_atomic_fixed1_mul_fp |
172 | __kmpc_atomic_fixed1_neqv |
173 | __kmpc_atomic_fixed1_neqv_cpt |
174 | __kmpc_atomic_fixed1_orb |
175 | __kmpc_atomic_fixed1_orb_cpt |
176 | __kmpc_atomic_fixed1_orl |
177 | __kmpc_atomic_fixed1_orl_cpt |
178 | __kmpc_atomic_fixed1_rd |
179 | __kmpc_atomic_fixed1_shl |
180 | __kmpc_atomic_fixed1_shl_cpt |
181 | __kmpc_atomic_fixed1_shl_cpt_rev |
182 | __kmpc_atomic_fixed1_shl_rev |
183 | __kmpc_atomic_fixed1_shr |
184 | __kmpc_atomic_fixed1_shr_cpt |
185 | __kmpc_atomic_fixed1_shr_cpt_rev |
186 | __kmpc_atomic_fixed1_shr_rev |
187 | __kmpc_atomic_fixed1_sub |
188 | __kmpc_atomic_fixed1_sub_cpt |
189 | __kmpc_atomic_fixed1_sub_cpt_rev |
190 | __kmpc_atomic_fixed1_sub_fp |
191 | __kmpc_atomic_fixed1_sub_rev |
192 | __kmpc_atomic_fixed1_swp |
193 | __kmpc_atomic_fixed1_wr |
194 | __kmpc_atomic_fixed1_xor |
195 | __kmpc_atomic_fixed1_xor_cpt |
196 | __kmpc_atomic_fixed1u_add_fp |
197 | __kmpc_atomic_fixed1u_sub_fp |
198 | __kmpc_atomic_fixed1u_mul_fp |
199 | __kmpc_atomic_fixed1u_div |
200 | __kmpc_atomic_fixed1u_div_cpt |
201 | __kmpc_atomic_fixed1u_div_cpt_rev |
202 | __kmpc_atomic_fixed1u_div_fp |
203 | __kmpc_atomic_fixed1u_div_rev |
204 | __kmpc_atomic_fixed1u_shr |
205 | __kmpc_atomic_fixed1u_shr_cpt |
206 | __kmpc_atomic_fixed1u_shr_cpt_rev |
207 | __kmpc_atomic_fixed1u_shr_rev |
208 | __kmpc_atomic_fixed2_add |
209 | __kmpc_atomic_fixed2_add_cpt |
210 | __kmpc_atomic_fixed2_add_fp |
211 | __kmpc_atomic_fixed2_andb |
212 | __kmpc_atomic_fixed2_andb_cpt |
213 | __kmpc_atomic_fixed2_andl |
214 | __kmpc_atomic_fixed2_andl_cpt |
215 | __kmpc_atomic_fixed2_div |
216 | __kmpc_atomic_fixed2_div_cpt |
217 | __kmpc_atomic_fixed2_div_cpt_rev |
218 | __kmpc_atomic_fixed2_div_float8 |
219 | __kmpc_atomic_fixed2_div_fp |
220 | __kmpc_atomic_fixed2_div_rev |
221 | __kmpc_atomic_fixed2_eqv |
222 | __kmpc_atomic_fixed2_eqv_cpt |
223 | __kmpc_atomic_fixed2_max |
224 | __kmpc_atomic_fixed2_max_cpt |
225 | __kmpc_atomic_fixed2_min |
226 | __kmpc_atomic_fixed2_min_cpt |
227 | __kmpc_atomic_fixed2_mul |
228 | __kmpc_atomic_fixed2_mul_cpt |
229 | __kmpc_atomic_fixed2_mul_float8 |
230 | __kmpc_atomic_fixed2_mul_fp |
231 | __kmpc_atomic_fixed2_neqv |
232 | __kmpc_atomic_fixed2_neqv_cpt |
233 | __kmpc_atomic_fixed2_orb |
234 | __kmpc_atomic_fixed2_orb_cpt |
235 | __kmpc_atomic_fixed2_orl |
236 | __kmpc_atomic_fixed2_orl_cpt |
237 | __kmpc_atomic_fixed2_rd |
238 | __kmpc_atomic_fixed2_shl |
239 | __kmpc_atomic_fixed2_shl_cpt |
240 | __kmpc_atomic_fixed2_shl_cpt_rev |
241 | __kmpc_atomic_fixed2_shl_rev |
242 | __kmpc_atomic_fixed2_shr |
243 | __kmpc_atomic_fixed2_shr_cpt |
244 | __kmpc_atomic_fixed2_shr_cpt_rev |
245 | __kmpc_atomic_fixed2_shr_rev |
246 | __kmpc_atomic_fixed2_sub |
247 | __kmpc_atomic_fixed2_sub_cpt |
248 | __kmpc_atomic_fixed2_sub_cpt_rev |
249 | __kmpc_atomic_fixed2_sub_fp |
250 | __kmpc_atomic_fixed2_sub_rev |
251 | __kmpc_atomic_fixed2_swp |
252 | __kmpc_atomic_fixed2_wr |
253 | __kmpc_atomic_fixed2_xor |
254 | __kmpc_atomic_fixed2_xor_cpt |
255 | __kmpc_atomic_fixed2u_add_fp |
256 | __kmpc_atomic_fixed2u_sub_fp |
257 | __kmpc_atomic_fixed2u_mul_fp |
258 | __kmpc_atomic_fixed2u_div |
259 | __kmpc_atomic_fixed2u_div_cpt |
260 | __kmpc_atomic_fixed2u_div_cpt_rev |
261 | __kmpc_atomic_fixed2u_div_fp |
262 | __kmpc_atomic_fixed2u_div_rev |
263 | __kmpc_atomic_fixed2u_shr |
264 | __kmpc_atomic_fixed2u_shr_cpt |
265 | __kmpc_atomic_fixed2u_shr_cpt_rev |
266 | __kmpc_atomic_fixed2u_shr_rev |
267 | __kmpc_atomic_fixed4_add |
268 | __kmpc_atomic_fixed4_add_cpt |
269 | __kmpc_atomic_fixed4_add_fp |
270 | __kmpc_atomic_fixed4_andb |
271 | __kmpc_atomic_fixed4_andb_cpt |
272 | __kmpc_atomic_fixed4_andl |
273 | __kmpc_atomic_fixed4_andl_cpt |
274 | __kmpc_atomic_fixed4_div |
275 | __kmpc_atomic_fixed4_div_cpt |
276 | __kmpc_atomic_fixed4_div_cpt_rev |
277 | __kmpc_atomic_fixed4_div_float8 |
278 | __kmpc_atomic_fixed4_div_fp |
279 | __kmpc_atomic_fixed4_div_rev |
280 | __kmpc_atomic_fixed4_eqv |
281 | __kmpc_atomic_fixed4_eqv_cpt |
282 | __kmpc_atomic_fixed4_max |
283 | __kmpc_atomic_fixed4_max_cpt |
284 | __kmpc_atomic_fixed4_min |
285 | __kmpc_atomic_fixed4_min_cpt |
286 | __kmpc_atomic_fixed4_mul |
287 | __kmpc_atomic_fixed4_mul_cpt |
288 | __kmpc_atomic_fixed4_mul_float8 |
289 | __kmpc_atomic_fixed4_mul_fp |
290 | __kmpc_atomic_fixed4_neqv |
291 | __kmpc_atomic_fixed4_neqv_cpt |
292 | __kmpc_atomic_fixed4_orb |
293 | __kmpc_atomic_fixed4_orb_cpt |
294 | __kmpc_atomic_fixed4_orl |
295 | __kmpc_atomic_fixed4_orl_cpt |
296 | __kmpc_atomic_fixed4_rd |
297 | __kmpc_atomic_fixed4_shl |
298 | __kmpc_atomic_fixed4_shl_cpt |
299 | __kmpc_atomic_fixed4_shl_cpt_rev |
300 | __kmpc_atomic_fixed4_shl_rev |
301 | __kmpc_atomic_fixed4_shr |
302 | __kmpc_atomic_fixed4_shr_cpt |
303 | __kmpc_atomic_fixed4_shr_cpt_rev |
304 | __kmpc_atomic_fixed4_shr_rev |
305 | __kmpc_atomic_fixed4_sub |
306 | __kmpc_atomic_fixed4_sub_cpt |
307 | __kmpc_atomic_fixed4_sub_cpt_rev |
308 | __kmpc_atomic_fixed4_sub_fp |
309 | __kmpc_atomic_fixed4_sub_rev |
310 | __kmpc_atomic_fixed4_swp |
311 | __kmpc_atomic_fixed4_wr |
312 | __kmpc_atomic_fixed4_xor |
313 | __kmpc_atomic_fixed4_xor_cpt |
314 | __kmpc_atomic_fixed4u_add_fp |
315 | __kmpc_atomic_fixed4u_sub_fp |
316 | __kmpc_atomic_fixed4u_mul_fp |
317 | __kmpc_atomic_fixed4u_div |
318 | __kmpc_atomic_fixed4u_div_cpt |
319 | __kmpc_atomic_fixed4u_div_cpt_rev |
320 | __kmpc_atomic_fixed4u_div_fp |
321 | __kmpc_atomic_fixed4u_div_rev |
322 | __kmpc_atomic_fixed4u_shr |
323 | __kmpc_atomic_fixed4u_shr_cpt |
324 | __kmpc_atomic_fixed4u_shr_cpt_rev |
325 | __kmpc_atomic_fixed4u_shr_rev |
326 | __kmpc_atomic_fixed8_add |
327 | __kmpc_atomic_fixed8_add_cpt |
328 | __kmpc_atomic_fixed8_add_fp |
329 | __kmpc_atomic_fixed8_andb |
330 | __kmpc_atomic_fixed8_andb_cpt |
331 | __kmpc_atomic_fixed8_andl |
332 | __kmpc_atomic_fixed8_andl_cpt |
333 | __kmpc_atomic_fixed8_div |
334 | __kmpc_atomic_fixed8_div_cpt |
335 | __kmpc_atomic_fixed8_div_cpt_rev |
336 | __kmpc_atomic_fixed8_div_float8 |
337 | __kmpc_atomic_fixed8_div_fp |
338 | __kmpc_atomic_fixed8_div_rev |
339 | __kmpc_atomic_fixed8_eqv |
340 | __kmpc_atomic_fixed8_eqv_cpt |
341 | __kmpc_atomic_fixed8_max |
342 | __kmpc_atomic_fixed8_max_cpt |
343 | __kmpc_atomic_fixed8_min |
344 | __kmpc_atomic_fixed8_min_cpt |
345 | __kmpc_atomic_fixed8_mul |
346 | __kmpc_atomic_fixed8_mul_cpt |
347 | __kmpc_atomic_fixed8_mul_float8 |
348 | __kmpc_atomic_fixed8_mul_fp |
349 | __kmpc_atomic_fixed8_neqv |
350 | __kmpc_atomic_fixed8_neqv_cpt |
351 | __kmpc_atomic_fixed8_orb |
352 | __kmpc_atomic_fixed8_orb_cpt |
353 | __kmpc_atomic_fixed8_orl |
354 | __kmpc_atomic_fixed8_orl_cpt |
355 | __kmpc_atomic_fixed8_rd |
356 | __kmpc_atomic_fixed8_shl |
357 | __kmpc_atomic_fixed8_shl_cpt |
358 | __kmpc_atomic_fixed8_shl_cpt_rev |
359 | __kmpc_atomic_fixed8_shl_rev |
360 | __kmpc_atomic_fixed8_shr |
361 | __kmpc_atomic_fixed8_shr_cpt |
362 | __kmpc_atomic_fixed8_shr_cpt_rev |
363 | __kmpc_atomic_fixed8_shr_rev |
364 | __kmpc_atomic_fixed8_sub |
365 | __kmpc_atomic_fixed8_sub_cpt |
366 | __kmpc_atomic_fixed8_sub_cpt_rev |
367 | __kmpc_atomic_fixed8_sub_fp |
368 | __kmpc_atomic_fixed8_sub_rev |
369 | __kmpc_atomic_fixed8_swp |
370 | __kmpc_atomic_fixed8_wr |
371 | __kmpc_atomic_fixed8_xor |
372 | __kmpc_atomic_fixed8_xor_cpt |
373 | __kmpc_atomic_fixed8u_add_fp |
374 | __kmpc_atomic_fixed8u_sub_fp |
375 | __kmpc_atomic_fixed8u_mul_fp |
376 | __kmpc_atomic_fixed8u_div |
377 | __kmpc_atomic_fixed8u_div_cpt |
378 | __kmpc_atomic_fixed8u_div_cpt_rev |
379 | __kmpc_atomic_fixed8u_div_fp |
380 | __kmpc_atomic_fixed8u_div_rev |
381 | __kmpc_atomic_fixed8u_shr |
382 | __kmpc_atomic_fixed8u_shr_cpt |
383 | __kmpc_atomic_fixed8u_shr_cpt_rev |
384 | __kmpc_atomic_fixed8u_shr_rev |
385 | @endcode |
386 | |
387 | Functions for floating point |
388 | ---------------------------- |
389 | There are versions here for floating point numbers of size 4, 8, 10 and 16 |
390 | bytes. (Ten byte floats are used by X87, but are now rare). |
391 | @code |
392 | __kmpc_atomic_float4_add |
393 | __kmpc_atomic_float4_add_cpt |
394 | __kmpc_atomic_float4_add_float8 |
395 | __kmpc_atomic_float4_add_fp |
396 | __kmpc_atomic_float4_div |
397 | __kmpc_atomic_float4_div_cpt |
398 | __kmpc_atomic_float4_div_cpt_rev |
399 | __kmpc_atomic_float4_div_float8 |
400 | __kmpc_atomic_float4_div_fp |
401 | __kmpc_atomic_float4_div_rev |
402 | __kmpc_atomic_float4_max |
403 | __kmpc_atomic_float4_max_cpt |
404 | __kmpc_atomic_float4_min |
405 | __kmpc_atomic_float4_min_cpt |
406 | __kmpc_atomic_float4_mul |
407 | __kmpc_atomic_float4_mul_cpt |
408 | __kmpc_atomic_float4_mul_float8 |
409 | __kmpc_atomic_float4_mul_fp |
410 | __kmpc_atomic_float4_rd |
411 | __kmpc_atomic_float4_sub |
412 | __kmpc_atomic_float4_sub_cpt |
413 | __kmpc_atomic_float4_sub_cpt_rev |
414 | __kmpc_atomic_float4_sub_float8 |
415 | __kmpc_atomic_float4_sub_fp |
416 | __kmpc_atomic_float4_sub_rev |
417 | __kmpc_atomic_float4_swp |
418 | __kmpc_atomic_float4_wr |
419 | __kmpc_atomic_float8_add |
420 | __kmpc_atomic_float8_add_cpt |
421 | __kmpc_atomic_float8_add_fp |
422 | __kmpc_atomic_float8_div |
423 | __kmpc_atomic_float8_div_cpt |
424 | __kmpc_atomic_float8_div_cpt_rev |
425 | __kmpc_atomic_float8_div_fp |
426 | __kmpc_atomic_float8_div_rev |
427 | __kmpc_atomic_float8_max |
428 | __kmpc_atomic_float8_max_cpt |
429 | __kmpc_atomic_float8_min |
430 | __kmpc_atomic_float8_min_cpt |
431 | __kmpc_atomic_float8_mul |
432 | __kmpc_atomic_float8_mul_cpt |
433 | __kmpc_atomic_float8_mul_fp |
434 | __kmpc_atomic_float8_rd |
435 | __kmpc_atomic_float8_sub |
436 | __kmpc_atomic_float8_sub_cpt |
437 | __kmpc_atomic_float8_sub_cpt_rev |
438 | __kmpc_atomic_float8_sub_fp |
439 | __kmpc_atomic_float8_sub_rev |
440 | __kmpc_atomic_float8_swp |
441 | __kmpc_atomic_float8_wr |
442 | __kmpc_atomic_float10_add |
443 | __kmpc_atomic_float10_add_cpt |
444 | __kmpc_atomic_float10_add_fp |
445 | __kmpc_atomic_float10_div |
446 | __kmpc_atomic_float10_div_cpt |
447 | __kmpc_atomic_float10_div_cpt_rev |
448 | __kmpc_atomic_float10_div_fp |
449 | __kmpc_atomic_float10_div_rev |
450 | __kmpc_atomic_float10_mul |
451 | __kmpc_atomic_float10_mul_cpt |
452 | __kmpc_atomic_float10_mul_fp |
453 | __kmpc_atomic_float10_rd |
454 | __kmpc_atomic_float10_sub |
455 | __kmpc_atomic_float10_sub_cpt |
456 | __kmpc_atomic_float10_sub_cpt_rev |
457 | __kmpc_atomic_float10_sub_fp |
458 | __kmpc_atomic_float10_sub_rev |
459 | __kmpc_atomic_float10_swp |
460 | __kmpc_atomic_float10_wr |
461 | __kmpc_atomic_float16_add |
462 | __kmpc_atomic_float16_add_cpt |
463 | __kmpc_atomic_float16_div |
464 | __kmpc_atomic_float16_div_cpt |
465 | __kmpc_atomic_float16_div_cpt_rev |
466 | __kmpc_atomic_float16_div_rev |
467 | __kmpc_atomic_float16_max |
468 | __kmpc_atomic_float16_max_cpt |
469 | __kmpc_atomic_float16_min |
470 | __kmpc_atomic_float16_min_cpt |
471 | __kmpc_atomic_float16_mul |
472 | __kmpc_atomic_float16_mul_cpt |
473 | __kmpc_atomic_float16_rd |
474 | __kmpc_atomic_float16_sub |
475 | __kmpc_atomic_float16_sub_cpt |
476 | __kmpc_atomic_float16_sub_cpt_rev |
477 | __kmpc_atomic_float16_sub_rev |
478 | __kmpc_atomic_float16_swp |
479 | __kmpc_atomic_float16_wr |
480 | @endcode |
481 | |
482 | Functions for Complex types |
483 | --------------------------- |
484 | Functions for complex types whose component floating point variables are of size |
485 | 4,8,10 or 16 bytes. The names here are based on the size of the component float, |
486 | *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an |
487 | operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. |
488 | |
489 | @code |
490 | __kmpc_atomic_cmplx4_add |
491 | __kmpc_atomic_cmplx4_add_cmplx8 |
492 | __kmpc_atomic_cmplx4_add_cpt |
493 | __kmpc_atomic_cmplx4_div |
494 | __kmpc_atomic_cmplx4_div_cmplx8 |
495 | __kmpc_atomic_cmplx4_div_cpt |
496 | __kmpc_atomic_cmplx4_div_cpt_rev |
497 | __kmpc_atomic_cmplx4_div_rev |
498 | __kmpc_atomic_cmplx4_mul |
499 | __kmpc_atomic_cmplx4_mul_cmplx8 |
500 | __kmpc_atomic_cmplx4_mul_cpt |
501 | __kmpc_atomic_cmplx4_rd |
502 | __kmpc_atomic_cmplx4_sub |
503 | __kmpc_atomic_cmplx4_sub_cmplx8 |
504 | __kmpc_atomic_cmplx4_sub_cpt |
505 | __kmpc_atomic_cmplx4_sub_cpt_rev |
506 | __kmpc_atomic_cmplx4_sub_rev |
507 | __kmpc_atomic_cmplx4_swp |
508 | __kmpc_atomic_cmplx4_wr |
509 | __kmpc_atomic_cmplx8_add |
510 | __kmpc_atomic_cmplx8_add_cpt |
511 | __kmpc_atomic_cmplx8_div |
512 | __kmpc_atomic_cmplx8_div_cpt |
513 | __kmpc_atomic_cmplx8_div_cpt_rev |
514 | __kmpc_atomic_cmplx8_div_rev |
515 | __kmpc_atomic_cmplx8_mul |
516 | __kmpc_atomic_cmplx8_mul_cpt |
517 | __kmpc_atomic_cmplx8_rd |
518 | __kmpc_atomic_cmplx8_sub |
519 | __kmpc_atomic_cmplx8_sub_cpt |
520 | __kmpc_atomic_cmplx8_sub_cpt_rev |
521 | __kmpc_atomic_cmplx8_sub_rev |
522 | __kmpc_atomic_cmplx8_swp |
523 | __kmpc_atomic_cmplx8_wr |
524 | __kmpc_atomic_cmplx10_add |
525 | __kmpc_atomic_cmplx10_add_cpt |
526 | __kmpc_atomic_cmplx10_div |
527 | __kmpc_atomic_cmplx10_div_cpt |
528 | __kmpc_atomic_cmplx10_div_cpt_rev |
529 | __kmpc_atomic_cmplx10_div_rev |
530 | __kmpc_atomic_cmplx10_mul |
531 | __kmpc_atomic_cmplx10_mul_cpt |
532 | __kmpc_atomic_cmplx10_rd |
533 | __kmpc_atomic_cmplx10_sub |
534 | __kmpc_atomic_cmplx10_sub_cpt |
535 | __kmpc_atomic_cmplx10_sub_cpt_rev |
536 | __kmpc_atomic_cmplx10_sub_rev |
537 | __kmpc_atomic_cmplx10_swp |
538 | __kmpc_atomic_cmplx10_wr |
539 | __kmpc_atomic_cmplx16_add |
540 | __kmpc_atomic_cmplx16_add_cpt |
541 | __kmpc_atomic_cmplx16_div |
542 | __kmpc_atomic_cmplx16_div_cpt |
543 | __kmpc_atomic_cmplx16_div_cpt_rev |
544 | __kmpc_atomic_cmplx16_div_rev |
545 | __kmpc_atomic_cmplx16_mul |
546 | __kmpc_atomic_cmplx16_mul_cpt |
547 | __kmpc_atomic_cmplx16_rd |
548 | __kmpc_atomic_cmplx16_sub |
549 | __kmpc_atomic_cmplx16_sub_cpt |
550 | __kmpc_atomic_cmplx16_sub_cpt_rev |
551 | __kmpc_atomic_cmplx16_swp |
552 | __kmpc_atomic_cmplx16_wr |
553 | @endcode |
554 | */ |
555 | |
556 | /*! |
557 | @ingroup ATOMIC_OPS |
558 | @{ |
559 | */ |
560 | |
561 | /* |
562 | * Global vars |
563 | */ |
564 | |
565 | #ifndef KMP_GOMP_COMPAT |
566 | int __kmp_atomic_mode = 1; // Intel perf |
567 | #else |
568 | int __kmp_atomic_mode = 2; // GOMP compatibility |
569 | #endif /* KMP_GOMP_COMPAT */ |
570 | |
571 | KMP_ALIGN(128) |
572 | |
573 | // Control access to all user coded atomics in Gnu compat mode |
574 | kmp_atomic_lock_t __kmp_atomic_lock; |
575 | // Control access to all user coded atomics for 1-byte fixed data types |
576 | kmp_atomic_lock_t __kmp_atomic_lock_1i; |
577 | // Control access to all user coded atomics for 2-byte fixed data types |
578 | kmp_atomic_lock_t __kmp_atomic_lock_2i; |
579 | // Control access to all user coded atomics for 4-byte fixed data types |
580 | kmp_atomic_lock_t __kmp_atomic_lock_4i; |
581 | // Control access to all user coded atomics for kmp_real32 data type |
582 | kmp_atomic_lock_t __kmp_atomic_lock_4r; |
583 | // Control access to all user coded atomics for 8-byte fixed data types |
584 | kmp_atomic_lock_t __kmp_atomic_lock_8i; |
585 | // Control access to all user coded atomics for kmp_real64 data type |
586 | kmp_atomic_lock_t __kmp_atomic_lock_8r; |
587 | // Control access to all user coded atomics for complex byte data type |
588 | kmp_atomic_lock_t __kmp_atomic_lock_8c; |
589 | // Control access to all user coded atomics for long double data type |
590 | kmp_atomic_lock_t __kmp_atomic_lock_10r; |
591 | // Control access to all user coded atomics for _Quad data type |
592 | kmp_atomic_lock_t __kmp_atomic_lock_16r; |
593 | // Control access to all user coded atomics for double complex data type |
594 | kmp_atomic_lock_t __kmp_atomic_lock_16c; |
595 | // Control access to all user coded atomics for long double complex type |
596 | kmp_atomic_lock_t __kmp_atomic_lock_20c; |
597 | // Control access to all user coded atomics for _Quad complex data type |
598 | kmp_atomic_lock_t __kmp_atomic_lock_32c; |
599 | |
600 | /* 2007-03-02: |
601 | Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug |
602 | on *_32 and *_32e. This is just a temporary workaround for the problem. It |
603 | seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines |
604 | in assembler language. */ |
605 | #define KMP_ATOMIC_VOLATILE volatile |
606 | |
607 | #if (KMP_ARCH_X86) && KMP_HAVE_QUAD |
608 | |
609 | static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
610 | return lhs.q + rhs.q; |
611 | } |
612 | static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
613 | return lhs.q - rhs.q; |
614 | } |
615 | static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
616 | return lhs.q * rhs.q; |
617 | } |
618 | static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
619 | return lhs.q / rhs.q; |
620 | } |
621 | static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
622 | return lhs.q < rhs.q; |
623 | } |
624 | static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { |
625 | return lhs.q > rhs.q; |
626 | } |
627 | |
628 | static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
629 | return lhs.q + rhs.q; |
630 | } |
631 | static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
632 | return lhs.q - rhs.q; |
633 | } |
634 | static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
635 | return lhs.q * rhs.q; |
636 | } |
637 | static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
638 | return lhs.q / rhs.q; |
639 | } |
640 | static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
641 | return lhs.q < rhs.q; |
642 | } |
643 | static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { |
644 | return lhs.q > rhs.q; |
645 | } |
646 | |
647 | static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, |
648 | kmp_cmplx128_a4_t &rhs) { |
649 | return lhs.q + rhs.q; |
650 | } |
651 | static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, |
652 | kmp_cmplx128_a4_t &rhs) { |
653 | return lhs.q - rhs.q; |
654 | } |
655 | static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, |
656 | kmp_cmplx128_a4_t &rhs) { |
657 | return lhs.q * rhs.q; |
658 | } |
659 | static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, |
660 | kmp_cmplx128_a4_t &rhs) { |
661 | return lhs.q / rhs.q; |
662 | } |
663 | |
664 | static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, |
665 | kmp_cmplx128_a16_t &rhs) { |
666 | return lhs.q + rhs.q; |
667 | } |
668 | static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, |
669 | kmp_cmplx128_a16_t &rhs) { |
670 | return lhs.q - rhs.q; |
671 | } |
672 | static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, |
673 | kmp_cmplx128_a16_t &rhs) { |
674 | return lhs.q * rhs.q; |
675 | } |
676 | static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, |
677 | kmp_cmplx128_a16_t &rhs) { |
678 | return lhs.q / rhs.q; |
679 | } |
680 | |
681 | #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD |
682 | |
683 | // ATOMIC implementation routines ----------------------------------------- |
684 | // One routine for each operation and operand type. |
685 | // All routines declarations looks like |
686 | // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); |
687 | |
688 | #define KMP_CHECK_GTID \ |
689 | if (gtid == KMP_GTID_UNKNOWN) { \ |
690 | gtid = __kmp_entry_gtid(); \ |
691 | } // check and get gtid when needed |
692 | |
693 | // Beginning of a definition (provides name, parameters, gebug trace) |
694 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
695 | // fixed) |
696 | // OP_ID - operation identifier (add, sub, mul, ...) |
697 | // TYPE - operands' type |
698 | #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ |
699 | RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ |
700 | TYPE *lhs, TYPE rhs) { \ |
701 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
702 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); |
703 | |
704 | // ------------------------------------------------------------------------ |
705 | // Lock variables used for critical sections for various size operands |
706 | #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat |
707 | #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char |
708 | #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short |
709 | #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int |
710 | #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float |
711 | #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int |
712 | #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double |
713 | #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex |
714 | #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double |
715 | #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad |
716 | #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex |
717 | #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex |
718 | #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex |
719 | |
720 | // ------------------------------------------------------------------------ |
721 | // Operation on *lhs, rhs bound by critical section |
722 | // OP - operator (it's supposed to contain an assignment) |
723 | // LCK_ID - lock identifier |
724 | // Note: don't check gtid as it should always be valid |
725 | // 1, 2-byte - expect valid parameter, other - check before this macro |
726 | #define OP_CRITICAL(OP, LCK_ID) \ |
727 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
728 | \ |
729 | (*lhs) OP(rhs); \ |
730 | \ |
731 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
732 | |
733 | #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ |
734 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
735 | (*lhs) = (TYPE)((*lhs)OP rhs); \ |
736 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
737 | |
738 | // ------------------------------------------------------------------------ |
739 | // For GNU compatibility, we may need to use a critical section, |
740 | // even though it is not required by the ISA. |
741 | // |
742 | // On IA-32 architecture, all atomic operations except for fixed 4 byte add, |
743 | // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common |
744 | // critical section. On Intel(R) 64, all atomic operations are done with fetch |
745 | // and add or compare and exchange. Therefore, the FLAG parameter to this |
746 | // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which |
747 | // require a critical section, where we predict that they will be implemented |
748 | // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). |
749 | // |
750 | // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, |
751 | // the FLAG parameter should always be 1. If we know that we will be using |
752 | // a critical section, then we want to make certain that we use the generic |
753 | // lock __kmp_atomic_lock to protect the atomic update, and not of of the |
754 | // locks that are specialized based upon the size or type of the data. |
755 | // |
756 | // If FLAG is 0, then we are relying on dead code elimination by the build |
757 | // compiler to get rid of the useless block of code, and save a needless |
758 | // branch at runtime. |
759 | |
760 | #ifdef KMP_GOMP_COMPAT |
761 | #define OP_GOMP_CRITICAL(OP, FLAG) \ |
762 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
763 | KMP_CHECK_GTID; \ |
764 | OP_CRITICAL(OP, 0); \ |
765 | return; \ |
766 | } |
767 | |
768 | #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ |
769 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
770 | KMP_CHECK_GTID; \ |
771 | OP_UPDATE_CRITICAL(TYPE, OP, 0); \ |
772 | return; \ |
773 | } |
774 | #else |
775 | #define OP_GOMP_CRITICAL(OP, FLAG) |
776 | #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) |
777 | #endif /* KMP_GOMP_COMPAT */ |
778 | |
779 | #if KMP_MIC |
780 | #define KMP_DO_PAUSE _mm_delay_32(1) |
781 | #else |
782 | #define KMP_DO_PAUSE |
783 | #endif /* KMP_MIC */ |
784 | |
785 | // ------------------------------------------------------------------------ |
786 | // Operation on *lhs, rhs using "compare_and_store" routine |
787 | // TYPE - operands' type |
788 | // BITS - size in bits, used to distinguish low level calls |
789 | // OP - operator |
790 | #define OP_CMPXCHG(TYPE, BITS, OP) \ |
791 | { \ |
792 | TYPE old_value, new_value; \ |
793 | old_value = *(TYPE volatile *)lhs; \ |
794 | new_value = (TYPE)(old_value OP rhs); \ |
795 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
796 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
797 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
798 | KMP_DO_PAUSE; \ |
799 | \ |
800 | old_value = *(TYPE volatile *)lhs; \ |
801 | new_value = (TYPE)(old_value OP rhs); \ |
802 | } \ |
803 | } |
804 | |
805 | #if USE_CMPXCHG_FIX |
806 | // 2007-06-25: |
807 | // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 |
808 | // and win_32e are affected (I verified the asm). Compiler ignores the volatile |
809 | // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the |
810 | // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of |
811 | // the workaround. |
812 | #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ |
813 | { \ |
814 | struct _sss { \ |
815 | TYPE cmp; \ |
816 | kmp_int##BITS *vvv; \ |
817 | }; \ |
818 | struct _sss old_value, new_value; \ |
819 | old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ |
820 | new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ |
821 | *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ |
822 | new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ |
823 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
824 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ |
825 | *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ |
826 | KMP_DO_PAUSE; \ |
827 | \ |
828 | *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ |
829 | new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ |
830 | } \ |
831 | } |
832 | // end of the first part of the workaround for C78287 |
833 | #endif // USE_CMPXCHG_FIX |
834 | |
835 | #if KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) |
836 | // Undo explicit type casts to get MSVC ARM64 to build. Uses |
837 | // OP_CMPXCHG_WORKAROUND definition for OP_CMPXCHG |
838 | #undef OP_CMPXCHG |
839 | #define OP_CMPXCHG(TYPE, BITS, OP) \ |
840 | { \ |
841 | struct _sss { \ |
842 | TYPE cmp; \ |
843 | kmp_int##BITS *vvv; \ |
844 | }; \ |
845 | struct _sss old_value, new_value; \ |
846 | old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ |
847 | new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ |
848 | *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ |
849 | new_value.cmp = old_value.cmp OP rhs; \ |
850 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
851 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ |
852 | *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ |
853 | KMP_DO_PAUSE; \ |
854 | \ |
855 | *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ |
856 | new_value.cmp = old_value.cmp OP rhs; \ |
857 | } \ |
858 | } |
859 | |
860 | #undef OP_UPDATE_CRITICAL |
861 | #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ |
862 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
863 | (*lhs) = (*lhs)OP rhs; \ |
864 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
865 | |
866 | #endif // KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM) |
867 | |
868 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
869 | |
870 | // ------------------------------------------------------------------------ |
871 | // X86 or X86_64: no alignment problems ==================================== |
872 | #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
873 | GOMP_FLAG) \ |
874 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
875 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
876 | /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
877 | KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ |
878 | } |
879 | // ------------------------------------------------------------------------- |
880 | #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
881 | GOMP_FLAG) \ |
882 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
883 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
884 | OP_CMPXCHG(TYPE, BITS, OP) \ |
885 | } |
886 | #if USE_CMPXCHG_FIX |
887 | // ------------------------------------------------------------------------- |
888 | // workaround for C78287 (complex(kind=4) data type) |
889 | #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ |
890 | MASK, GOMP_FLAG) \ |
891 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
892 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
893 | OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ |
894 | } |
895 | // end of the second part of the workaround for C78287 |
896 | #endif // USE_CMPXCHG_FIX |
897 | |
898 | #else |
899 | // ------------------------------------------------------------------------- |
900 | // Code for other architectures that don't handle unaligned accesses. |
901 | #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
902 | GOMP_FLAG) \ |
903 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
904 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
905 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
906 | /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
907 | KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ |
908 | } else { \ |
909 | KMP_CHECK_GTID; \ |
910 | OP_UPDATE_CRITICAL(TYPE, OP, \ |
911 | LCK_ID) /* unaligned address - use critical */ \ |
912 | } \ |
913 | } |
914 | // ------------------------------------------------------------------------- |
915 | #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
916 | GOMP_FLAG) \ |
917 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
918 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
919 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
920 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
921 | } else { \ |
922 | KMP_CHECK_GTID; \ |
923 | OP_UPDATE_CRITICAL(TYPE, OP, \ |
924 | LCK_ID) /* unaligned address - use critical */ \ |
925 | } \ |
926 | } |
927 | #if USE_CMPXCHG_FIX |
928 | // ------------------------------------------------------------------------- |
929 | // workaround for C78287 (complex(kind=4) data type) |
930 | #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ |
931 | MASK, GOMP_FLAG) \ |
932 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
933 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
934 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
935 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
936 | } else { \ |
937 | KMP_CHECK_GTID; \ |
938 | OP_UPDATE_CRITICAL(TYPE, OP, \ |
939 | LCK_ID) /* unaligned address - use critical */ \ |
940 | } \ |
941 | } |
942 | // end of the second part of the workaround for C78287 |
943 | #endif // USE_CMPXCHG_FIX |
944 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
945 | |
946 | // Routines for ATOMIC 4-byte operands addition and subtraction |
947 | ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, |
948 | 0) // __kmpc_atomic_fixed4_add |
949 | ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, |
950 | 0) // __kmpc_atomic_fixed4_sub |
951 | |
952 | ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, |
953 | KMP_ARCH_X86) // __kmpc_atomic_float4_add |
954 | ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, |
955 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub |
956 | |
957 | // Routines for ATOMIC 8-byte operands addition and subtraction |
958 | ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, |
959 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_add |
960 | ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, |
961 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub |
962 | |
963 | ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, |
964 | KMP_ARCH_X86) // __kmpc_atomic_float8_add |
965 | ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, |
966 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub |
967 | |
968 | // ------------------------------------------------------------------------ |
969 | // Entries definition for integer operands |
970 | // TYPE_ID - operands type and size (fixed4, float4) |
971 | // OP_ID - operation identifier (add, sub, mul, ...) |
972 | // TYPE - operand type |
973 | // BITS - size in bits, used to distinguish low level calls |
974 | // OP - operator (used in critical section) |
975 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
976 | // MASK - used for alignment check |
977 | |
978 | // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG |
979 | // ------------------------------------------------------------------------ |
980 | // Routines for ATOMIC integer operands, other operators |
981 | // ------------------------------------------------------------------------ |
982 | // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG |
983 | ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, |
984 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_add |
985 | ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, |
986 | 0) // __kmpc_atomic_fixed1_andb |
987 | ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, |
988 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div |
989 | ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, |
990 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div |
991 | ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, |
992 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul |
993 | ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, |
994 | 0) // __kmpc_atomic_fixed1_orb |
995 | ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, |
996 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl |
997 | ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, |
998 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr |
999 | ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, |
1000 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr |
1001 | ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, |
1002 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub |
1003 | ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, |
1004 | 0) // __kmpc_atomic_fixed1_xor |
1005 | ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, |
1006 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_add |
1007 | ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, |
1008 | 0) // __kmpc_atomic_fixed2_andb |
1009 | ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, |
1010 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div |
1011 | ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, |
1012 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div |
1013 | ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, |
1014 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul |
1015 | ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, |
1016 | 0) // __kmpc_atomic_fixed2_orb |
1017 | ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, |
1018 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl |
1019 | ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, |
1020 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr |
1021 | ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, |
1022 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr |
1023 | ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, |
1024 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub |
1025 | ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, |
1026 | 0) // __kmpc_atomic_fixed2_xor |
1027 | ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, |
1028 | 0) // __kmpc_atomic_fixed4_andb |
1029 | ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, |
1030 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_div |
1031 | ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, |
1032 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div |
1033 | ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, |
1034 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul |
1035 | ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, |
1036 | 0) // __kmpc_atomic_fixed4_orb |
1037 | ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, |
1038 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl |
1039 | ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, |
1040 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr |
1041 | ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, |
1042 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr |
1043 | ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, |
1044 | 0) // __kmpc_atomic_fixed4_xor |
1045 | ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, |
1046 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb |
1047 | ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, |
1048 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div |
1049 | ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, |
1050 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div |
1051 | ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, |
1052 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul |
1053 | ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, |
1054 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb |
1055 | ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, |
1056 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl |
1057 | ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, |
1058 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr |
1059 | ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, |
1060 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr |
1061 | ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, |
1062 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor |
1063 | ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, |
1064 | KMP_ARCH_X86) // __kmpc_atomic_float4_div |
1065 | ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, |
1066 | KMP_ARCH_X86) // __kmpc_atomic_float4_mul |
1067 | ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, |
1068 | KMP_ARCH_X86) // __kmpc_atomic_float8_div |
1069 | ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, |
1070 | KMP_ARCH_X86) // __kmpc_atomic_float8_mul |
1071 | // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG |
1072 | |
1073 | /* ------------------------------------------------------------------------ */ |
1074 | /* Routines for C/C++ Reduction operators && and || */ |
1075 | |
1076 | // ------------------------------------------------------------------------ |
1077 | // Need separate macros for &&, || because there is no combined assignment |
1078 | // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used |
1079 | #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
1080 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1081 | OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ |
1082 | OP_CRITICAL(= *lhs OP, LCK_ID) \ |
1083 | } |
1084 | |
1085 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1086 | |
1087 | // ------------------------------------------------------------------------ |
1088 | // X86 or X86_64: no alignment problems =================================== |
1089 | #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ |
1090 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1091 | OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ |
1092 | OP_CMPXCHG(TYPE, BITS, OP) \ |
1093 | } |
1094 | |
1095 | #else |
1096 | // ------------------------------------------------------------------------ |
1097 | // Code for other architectures that don't handle unaligned accesses. |
1098 | #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ |
1099 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1100 | OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ |
1101 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
1102 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
1103 | } else { \ |
1104 | KMP_CHECK_GTID; \ |
1105 | OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ |
1106 | } \ |
1107 | } |
1108 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1109 | |
1110 | ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, |
1111 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl |
1112 | ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, |
1113 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl |
1114 | ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, |
1115 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl |
1116 | ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, |
1117 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl |
1118 | ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, |
1119 | 0) // __kmpc_atomic_fixed4_andl |
1120 | ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, |
1121 | 0) // __kmpc_atomic_fixed4_orl |
1122 | ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, |
1123 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl |
1124 | ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, |
1125 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl |
1126 | |
1127 | /* ------------------------------------------------------------------------- */ |
1128 | /* Routines for Fortran operators that matched no one in C: */ |
1129 | /* MAX, MIN, .EQV., .NEQV. */ |
1130 | /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ |
1131 | /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ |
1132 | |
1133 | // ------------------------------------------------------------------------- |
1134 | // MIN and MAX need separate macros |
1135 | // OP - operator to check if we need any actions? |
1136 | #define MIN_MAX_CRITSECT(OP, LCK_ID) \ |
1137 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
1138 | \ |
1139 | if (*lhs OP rhs) { /* still need actions? */ \ |
1140 | *lhs = rhs; \ |
1141 | } \ |
1142 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
1143 | |
1144 | // ------------------------------------------------------------------------- |
1145 | #ifdef KMP_GOMP_COMPAT |
1146 | #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ |
1147 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
1148 | KMP_CHECK_GTID; \ |
1149 | MIN_MAX_CRITSECT(OP, 0); \ |
1150 | return; \ |
1151 | } |
1152 | #else |
1153 | #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) |
1154 | #endif /* KMP_GOMP_COMPAT */ |
1155 | |
1156 | // ------------------------------------------------------------------------- |
1157 | #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ |
1158 | { \ |
1159 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
1160 | TYPE old_value; \ |
1161 | temp_val = *lhs; \ |
1162 | old_value = temp_val; \ |
1163 | while (old_value OP rhs && /* still need actions? */ \ |
1164 | !KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
1165 | (kmp_int##BITS *)lhs, \ |
1166 | *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
1167 | *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ |
1168 | temp_val = *lhs; \ |
1169 | old_value = temp_val; \ |
1170 | } \ |
1171 | } |
1172 | |
1173 | // ------------------------------------------------------------------------- |
1174 | // 1-byte, 2-byte operands - use critical section |
1175 | #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
1176 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1177 | if (*lhs OP rhs) { /* need actions? */ \ |
1178 | GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ |
1179 | MIN_MAX_CRITSECT(OP, LCK_ID) \ |
1180 | } \ |
1181 | } |
1182 | |
1183 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1184 | |
1185 | // ------------------------------------------------------------------------- |
1186 | // X86 or X86_64: no alignment problems ==================================== |
1187 | #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
1188 | GOMP_FLAG) \ |
1189 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1190 | if (*lhs OP rhs) { \ |
1191 | GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ |
1192 | MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ |
1193 | } \ |
1194 | } |
1195 | |
1196 | #else |
1197 | // ------------------------------------------------------------------------- |
1198 | // Code for other architectures that don't handle unaligned accesses. |
1199 | #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
1200 | GOMP_FLAG) \ |
1201 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1202 | if (*lhs OP rhs) { \ |
1203 | GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ |
1204 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
1205 | MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
1206 | } else { \ |
1207 | KMP_CHECK_GTID; \ |
1208 | MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ |
1209 | } \ |
1210 | } \ |
1211 | } |
1212 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1213 | |
1214 | MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, |
1215 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_max |
1216 | MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, |
1217 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_min |
1218 | MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, |
1219 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_max |
1220 | MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, |
1221 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_min |
1222 | MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, |
1223 | 0) // __kmpc_atomic_fixed4_max |
1224 | MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, |
1225 | 0) // __kmpc_atomic_fixed4_min |
1226 | MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, |
1227 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_max |
1228 | MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, |
1229 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_min |
1230 | MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, |
1231 | KMP_ARCH_X86) // __kmpc_atomic_float4_max |
1232 | MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, |
1233 | KMP_ARCH_X86) // __kmpc_atomic_float4_min |
1234 | MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, |
1235 | KMP_ARCH_X86) // __kmpc_atomic_float8_max |
1236 | MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, |
1237 | KMP_ARCH_X86) // __kmpc_atomic_float8_min |
1238 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1239 | MIN_MAX_CRITICAL(float10, max, long double, <, 10r, |
1240 | 1) // __kmpc_atomic_float10_max |
1241 | MIN_MAX_CRITICAL(float10, min, long double, >, 10r, |
1242 | 1) // __kmpc_atomic_float10_min |
1243 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1244 | #if KMP_HAVE_QUAD |
1245 | MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, |
1246 | 1) // __kmpc_atomic_float16_max |
1247 | MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, |
1248 | 1) // __kmpc_atomic_float16_min |
1249 | #if (KMP_ARCH_X86) |
1250 | MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, |
1251 | 1) // __kmpc_atomic_float16_max_a16 |
1252 | MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, |
1253 | 1) // __kmpc_atomic_float16_min_a16 |
1254 | #endif // (KMP_ARCH_X86) |
1255 | #endif // KMP_HAVE_QUAD |
1256 | // ------------------------------------------------------------------------ |
1257 | // Need separate macros for .EQV. because of the need of complement (~) |
1258 | // OP ignored for critical sections, ^=~ used instead |
1259 | #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
1260 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1261 | OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ |
1262 | OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ |
1263 | } |
1264 | |
1265 | // ------------------------------------------------------------------------ |
1266 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1267 | // ------------------------------------------------------------------------ |
1268 | // X86 or X86_64: no alignment problems =================================== |
1269 | #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
1270 | GOMP_FLAG) \ |
1271 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1272 | OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ |
1273 | OP_CMPXCHG(TYPE, BITS, OP) \ |
1274 | } |
1275 | // ------------------------------------------------------------------------ |
1276 | #else |
1277 | // ------------------------------------------------------------------------ |
1278 | // Code for other architectures that don't handle unaligned accesses. |
1279 | #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ |
1280 | GOMP_FLAG) \ |
1281 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1282 | OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ |
1283 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
1284 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
1285 | } else { \ |
1286 | KMP_CHECK_GTID; \ |
1287 | OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ |
1288 | } \ |
1289 | } |
1290 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1291 | |
1292 | ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, |
1293 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv |
1294 | ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, |
1295 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv |
1296 | ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, |
1297 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv |
1298 | ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, |
1299 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv |
1300 | ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, |
1301 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv |
1302 | ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, |
1303 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv |
1304 | ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, |
1305 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv |
1306 | ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, |
1307 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv |
1308 | |
1309 | // ------------------------------------------------------------------------ |
1310 | // Routines for Extended types: long double, _Quad, complex flavours (use |
1311 | // critical section) |
1312 | // TYPE_ID, OP_ID, TYPE - detailed above |
1313 | // OP - operator |
1314 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
1315 | #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
1316 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
1317 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ |
1318 | OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ |
1319 | } |
1320 | |
1321 | /* ------------------------------------------------------------------------- */ |
1322 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1323 | // routines for long double type |
1324 | ATOMIC_CRITICAL(float10, add, long double, +, 10r, |
1325 | 1) // __kmpc_atomic_float10_add |
1326 | ATOMIC_CRITICAL(float10, sub, long double, -, 10r, |
1327 | 1) // __kmpc_atomic_float10_sub |
1328 | ATOMIC_CRITICAL(float10, mul, long double, *, 10r, |
1329 | 1) // __kmpc_atomic_float10_mul |
1330 | ATOMIC_CRITICAL(float10, div, long double, /, 10r, |
1331 | 1) // __kmpc_atomic_float10_div |
1332 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1333 | #if KMP_HAVE_QUAD |
1334 | // routines for _Quad type |
1335 | ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, |
1336 | 1) // __kmpc_atomic_float16_add |
1337 | ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, |
1338 | 1) // __kmpc_atomic_float16_sub |
1339 | ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, |
1340 | 1) // __kmpc_atomic_float16_mul |
1341 | ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, |
1342 | 1) // __kmpc_atomic_float16_div |
1343 | #if (KMP_ARCH_X86) |
1344 | ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, |
1345 | 1) // __kmpc_atomic_float16_add_a16 |
1346 | ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, |
1347 | 1) // __kmpc_atomic_float16_sub_a16 |
1348 | ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, |
1349 | 1) // __kmpc_atomic_float16_mul_a16 |
1350 | ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, |
1351 | 1) // __kmpc_atomic_float16_div_a16 |
1352 | #endif // (KMP_ARCH_X86) |
1353 | #endif // KMP_HAVE_QUAD |
1354 | // routines for complex types |
1355 | |
1356 | #if USE_CMPXCHG_FIX |
1357 | // workaround for C78287 (complex(kind=4) data type) |
1358 | ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, |
1359 | 1) // __kmpc_atomic_cmplx4_add |
1360 | ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, |
1361 | 1) // __kmpc_atomic_cmplx4_sub |
1362 | ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, |
1363 | 1) // __kmpc_atomic_cmplx4_mul |
1364 | ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, |
1365 | 1) // __kmpc_atomic_cmplx4_div |
1366 | // end of the workaround for C78287 |
1367 | #else |
1368 | ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add |
1369 | ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub |
1370 | ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul |
1371 | ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div |
1372 | #endif // USE_CMPXCHG_FIX |
1373 | |
1374 | ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add |
1375 | ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub |
1376 | ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul |
1377 | ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div |
1378 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1379 | ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, |
1380 | 1) // __kmpc_atomic_cmplx10_add |
1381 | ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, |
1382 | 1) // __kmpc_atomic_cmplx10_sub |
1383 | ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, |
1384 | 1) // __kmpc_atomic_cmplx10_mul |
1385 | ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, |
1386 | 1) // __kmpc_atomic_cmplx10_div |
1387 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1388 | #if KMP_HAVE_QUAD |
1389 | ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, |
1390 | 1) // __kmpc_atomic_cmplx16_add |
1391 | ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, |
1392 | 1) // __kmpc_atomic_cmplx16_sub |
1393 | ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, |
1394 | 1) // __kmpc_atomic_cmplx16_mul |
1395 | ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, |
1396 | 1) // __kmpc_atomic_cmplx16_div |
1397 | #if (KMP_ARCH_X86) |
1398 | ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, |
1399 | 1) // __kmpc_atomic_cmplx16_add_a16 |
1400 | ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, |
1401 | 1) // __kmpc_atomic_cmplx16_sub_a16 |
1402 | ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, |
1403 | 1) // __kmpc_atomic_cmplx16_mul_a16 |
1404 | ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, |
1405 | 1) // __kmpc_atomic_cmplx16_div_a16 |
1406 | #endif // (KMP_ARCH_X86) |
1407 | #endif // KMP_HAVE_QUAD |
1408 | |
1409 | // OpenMP 4.0: x = expr binop x for non-commutative operations. |
1410 | // Supported only on IA-32 architecture and Intel(R) 64 |
1411 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1412 | |
1413 | // ------------------------------------------------------------------------ |
1414 | // Operation on *lhs, rhs bound by critical section |
1415 | // OP - operator (it's supposed to contain an assignment) |
1416 | // LCK_ID - lock identifier |
1417 | // Note: don't check gtid as it should always be valid |
1418 | // 1, 2-byte - expect valid parameter, other - check before this macro |
1419 | #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ |
1420 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
1421 | \ |
1422 | (*lhs) = (TYPE)((rhs)OP(*lhs)); \ |
1423 | \ |
1424 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
1425 | |
1426 | #ifdef KMP_GOMP_COMPAT |
1427 | #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ |
1428 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
1429 | KMP_CHECK_GTID; \ |
1430 | OP_CRITICAL_REV(TYPE, OP, 0); \ |
1431 | return; \ |
1432 | } |
1433 | |
1434 | #else |
1435 | #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) |
1436 | #endif /* KMP_GOMP_COMPAT */ |
1437 | |
1438 | // Beginning of a definition (provides name, parameters, gebug trace) |
1439 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
1440 | // fixed) |
1441 | // OP_ID - operation identifier (add, sub, mul, ...) |
1442 | // TYPE - operands' type |
1443 | #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ |
1444 | RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ |
1445 | TYPE *lhs, TYPE rhs) { \ |
1446 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
1447 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); |
1448 | |
1449 | // ------------------------------------------------------------------------ |
1450 | // Operation on *lhs, rhs using "compare_and_store" routine |
1451 | // TYPE - operands' type |
1452 | // BITS - size in bits, used to distinguish low level calls |
1453 | // OP - operator |
1454 | // Note: temp_val introduced in order to force the compiler to read |
1455 | // *lhs only once (w/o it the compiler reads *lhs twice) |
1456 | #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ |
1457 | { \ |
1458 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
1459 | TYPE old_value, new_value; \ |
1460 | temp_val = *lhs; \ |
1461 | old_value = temp_val; \ |
1462 | new_value = (TYPE)(rhs OP old_value); \ |
1463 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
1464 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
1465 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
1466 | KMP_DO_PAUSE; \ |
1467 | \ |
1468 | temp_val = *lhs; \ |
1469 | old_value = temp_val; \ |
1470 | new_value = (TYPE)(rhs OP old_value); \ |
1471 | } \ |
1472 | } |
1473 | |
1474 | // ------------------------------------------------------------------------- |
1475 | #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ |
1476 | ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ |
1477 | OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ |
1478 | OP_CMPXCHG_REV(TYPE, BITS, OP) \ |
1479 | } |
1480 | |
1481 | // ------------------------------------------------------------------------ |
1482 | // Entries definition for integer operands |
1483 | // TYPE_ID - operands type and size (fixed4, float4) |
1484 | // OP_ID - operation identifier (add, sub, mul, ...) |
1485 | // TYPE - operand type |
1486 | // BITS - size in bits, used to distinguish low level calls |
1487 | // OP - operator (used in critical section) |
1488 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
1489 | |
1490 | // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG |
1491 | // ------------------------------------------------------------------------ |
1492 | // Routines for ATOMIC integer operands, other operators |
1493 | // ------------------------------------------------------------------------ |
1494 | // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG |
1495 | ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, |
1496 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev |
1497 | ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, |
1498 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev |
1499 | ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, |
1500 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev |
1501 | ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, |
1502 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev |
1503 | ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, |
1504 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev |
1505 | ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, |
1506 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev |
1507 | |
1508 | ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, |
1509 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev |
1510 | ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, |
1511 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev |
1512 | ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, |
1513 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev |
1514 | ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, |
1515 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev |
1516 | ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, |
1517 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev |
1518 | ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, |
1519 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev |
1520 | |
1521 | ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, |
1522 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev |
1523 | ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, |
1524 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev |
1525 | ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, |
1526 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev |
1527 | ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, |
1528 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev |
1529 | ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, |
1530 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev |
1531 | ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, |
1532 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev |
1533 | |
1534 | ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, |
1535 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev |
1536 | ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, |
1537 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev |
1538 | ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, |
1539 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev |
1540 | ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, |
1541 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev |
1542 | ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, |
1543 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev |
1544 | ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, |
1545 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev |
1546 | |
1547 | ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, |
1548 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev |
1549 | ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, |
1550 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev |
1551 | |
1552 | ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, |
1553 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev |
1554 | ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, |
1555 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev |
1556 | // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG |
1557 | |
1558 | // ------------------------------------------------------------------------ |
1559 | // Routines for Extended types: long double, _Quad, complex flavours (use |
1560 | // critical section) |
1561 | // TYPE_ID, OP_ID, TYPE - detailed above |
1562 | // OP - operator |
1563 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
1564 | #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
1565 | ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ |
1566 | OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ |
1567 | OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ |
1568 | } |
1569 | |
1570 | /* ------------------------------------------------------------------------- */ |
1571 | // routines for long double type |
1572 | ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, |
1573 | 1) // __kmpc_atomic_float10_sub_rev |
1574 | ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, |
1575 | 1) // __kmpc_atomic_float10_div_rev |
1576 | #if KMP_HAVE_QUAD |
1577 | // routines for _Quad type |
1578 | ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, |
1579 | 1) // __kmpc_atomic_float16_sub_rev |
1580 | ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, |
1581 | 1) // __kmpc_atomic_float16_div_rev |
1582 | #if (KMP_ARCH_X86) |
1583 | ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, |
1584 | 1) // __kmpc_atomic_float16_sub_a16_rev |
1585 | ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, |
1586 | 1) // __kmpc_atomic_float16_div_a16_rev |
1587 | #endif // KMP_ARCH_X86 |
1588 | #endif // KMP_HAVE_QUAD |
1589 | |
1590 | // routines for complex types |
1591 | ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, |
1592 | 1) // __kmpc_atomic_cmplx4_sub_rev |
1593 | ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, |
1594 | 1) // __kmpc_atomic_cmplx4_div_rev |
1595 | ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, |
1596 | 1) // __kmpc_atomic_cmplx8_sub_rev |
1597 | ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, |
1598 | 1) // __kmpc_atomic_cmplx8_div_rev |
1599 | ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, |
1600 | 1) // __kmpc_atomic_cmplx10_sub_rev |
1601 | ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, |
1602 | 1) // __kmpc_atomic_cmplx10_div_rev |
1603 | #if KMP_HAVE_QUAD |
1604 | ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, |
1605 | 1) // __kmpc_atomic_cmplx16_sub_rev |
1606 | ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, |
1607 | 1) // __kmpc_atomic_cmplx16_div_rev |
1608 | #if (KMP_ARCH_X86) |
1609 | ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, |
1610 | 1) // __kmpc_atomic_cmplx16_sub_a16_rev |
1611 | ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, |
1612 | 1) // __kmpc_atomic_cmplx16_div_a16_rev |
1613 | #endif // KMP_ARCH_X86 |
1614 | #endif // KMP_HAVE_QUAD |
1615 | |
1616 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1617 | // End of OpenMP 4.0: x = expr binop x for non-commutative operations. |
1618 | |
1619 | /* ------------------------------------------------------------------------ */ |
1620 | /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ |
1621 | /* Note: in order to reduce the total number of types combinations */ |
1622 | /* it is supposed that compiler converts RHS to longest floating type,*/ |
1623 | /* that is _Quad, before call to any of these routines */ |
1624 | /* Conversion to _Quad will be done by the compiler during calculation, */ |
1625 | /* conversion back to TYPE - before the assignment, like: */ |
1626 | /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ |
1627 | /* Performance penalty expected because of SW emulation use */ |
1628 | /* ------------------------------------------------------------------------ */ |
1629 | |
1630 | #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1631 | void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ |
1632 | ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ |
1633 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
1634 | KA_TRACE(100, \ |
1635 | ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ |
1636 | gtid)); |
1637 | |
1638 | // ------------------------------------------------------------------------- |
1639 | #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ |
1640 | GOMP_FLAG) \ |
1641 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1642 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ |
1643 | OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ |
1644 | } |
1645 | |
1646 | // ------------------------------------------------------------------------- |
1647 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1648 | // ------------------------------------------------------------------------- |
1649 | // X86 or X86_64: no alignment problems ==================================== |
1650 | #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ |
1651 | LCK_ID, MASK, GOMP_FLAG) \ |
1652 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1653 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
1654 | OP_CMPXCHG(TYPE, BITS, OP) \ |
1655 | } |
1656 | // ------------------------------------------------------------------------- |
1657 | #else |
1658 | // ------------------------------------------------------------------------ |
1659 | // Code for other architectures that don't handle unaligned accesses. |
1660 | #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ |
1661 | LCK_ID, MASK, GOMP_FLAG) \ |
1662 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1663 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
1664 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
1665 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
1666 | } else { \ |
1667 | KMP_CHECK_GTID; \ |
1668 | OP_UPDATE_CRITICAL(TYPE, OP, \ |
1669 | LCK_ID) /* unaligned address - use critical */ \ |
1670 | } \ |
1671 | } |
1672 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1673 | |
1674 | // ------------------------------------------------------------------------- |
1675 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1676 | // ------------------------------------------------------------------------- |
1677 | #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ |
1678 | RTYPE, LCK_ID, MASK, GOMP_FLAG) \ |
1679 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1680 | OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ |
1681 | OP_CMPXCHG_REV(TYPE, BITS, OP) \ |
1682 | } |
1683 | #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ |
1684 | LCK_ID, GOMP_FLAG) \ |
1685 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1686 | OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ |
1687 | OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ |
1688 | } |
1689 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1690 | |
1691 | // RHS=float8 |
1692 | ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, |
1693 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 |
1694 | ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, |
1695 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 |
1696 | ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, |
1697 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 |
1698 | ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, |
1699 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 |
1700 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, |
1701 | 0) // __kmpc_atomic_fixed4_mul_float8 |
1702 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, |
1703 | 0) // __kmpc_atomic_fixed4_div_float8 |
1704 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, |
1705 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 |
1706 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, |
1707 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 |
1708 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, |
1709 | KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 |
1710 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, |
1711 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 |
1712 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, |
1713 | KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 |
1714 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, |
1715 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 |
1716 | |
1717 | // RHS=float16 (deprecated, to be removed when we are sure the compiler does not |
1718 | // use them) |
1719 | #if KMP_HAVE_QUAD |
1720 | ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, |
1721 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp |
1722 | ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, |
1723 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp |
1724 | ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, |
1725 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp |
1726 | ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, |
1727 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp |
1728 | ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, |
1729 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp |
1730 | ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, |
1731 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp |
1732 | ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, |
1733 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp |
1734 | ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, |
1735 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp |
1736 | |
1737 | ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, |
1738 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp |
1739 | ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, |
1740 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp |
1741 | ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, |
1742 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp |
1743 | ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, |
1744 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp |
1745 | ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, |
1746 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp |
1747 | ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, |
1748 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp |
1749 | ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, |
1750 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp |
1751 | ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, |
1752 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp |
1753 | |
1754 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, |
1755 | 0) // __kmpc_atomic_fixed4_add_fp |
1756 | ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, |
1757 | 0) // __kmpc_atomic_fixed4u_add_fp |
1758 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, |
1759 | 0) // __kmpc_atomic_fixed4_sub_fp |
1760 | ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, |
1761 | 0) // __kmpc_atomic_fixed4u_sub_fp |
1762 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, |
1763 | 0) // __kmpc_atomic_fixed4_mul_fp |
1764 | ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, |
1765 | 0) // __kmpc_atomic_fixed4u_mul_fp |
1766 | ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, |
1767 | 0) // __kmpc_atomic_fixed4_div_fp |
1768 | ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, |
1769 | 0) // __kmpc_atomic_fixed4u_div_fp |
1770 | |
1771 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, |
1772 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp |
1773 | ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, |
1774 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp |
1775 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, |
1776 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp |
1777 | ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, |
1778 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp |
1779 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, |
1780 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp |
1781 | ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, |
1782 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp |
1783 | ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, |
1784 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp |
1785 | ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, |
1786 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp |
1787 | |
1788 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, |
1789 | KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp |
1790 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, |
1791 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp |
1792 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, |
1793 | KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp |
1794 | ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, |
1795 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp |
1796 | |
1797 | ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, |
1798 | KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp |
1799 | ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, |
1800 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp |
1801 | ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, |
1802 | KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp |
1803 | ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, |
1804 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp |
1805 | |
1806 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1807 | ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, |
1808 | 1) // __kmpc_atomic_float10_add_fp |
1809 | ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, |
1810 | 1) // __kmpc_atomic_float10_sub_fp |
1811 | ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, |
1812 | 1) // __kmpc_atomic_float10_mul_fp |
1813 | ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, |
1814 | 1) // __kmpc_atomic_float10_div_fp |
1815 | |
1816 | // Reverse operations |
1817 | ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, |
1818 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp |
1819 | ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, |
1820 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp |
1821 | ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, |
1822 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp |
1823 | ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, |
1824 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp |
1825 | |
1826 | ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, |
1827 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp |
1828 | ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, |
1829 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp |
1830 | ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, |
1831 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp |
1832 | ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, |
1833 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp |
1834 | |
1835 | ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, |
1836 | 0) // __kmpc_atomic_fixed4_sub_rev_fp |
1837 | ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, |
1838 | 0) // __kmpc_atomic_fixed4u_sub_rev_fp |
1839 | ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, |
1840 | 0) // __kmpc_atomic_fixed4_div_rev_fp |
1841 | ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, |
1842 | 0) // __kmpc_atomic_fixed4u_div_rev_fp |
1843 | |
1844 | ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, |
1845 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp |
1846 | ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, |
1847 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp |
1848 | ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, |
1849 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp |
1850 | ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, |
1851 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp |
1852 | |
1853 | ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, |
1854 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp |
1855 | ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, |
1856 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp |
1857 | |
1858 | ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, |
1859 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp |
1860 | ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, |
1861 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp |
1862 | |
1863 | ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, |
1864 | 1) // __kmpc_atomic_float10_sub_rev_fp |
1865 | ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, |
1866 | 1) // __kmpc_atomic_float10_div_rev_fp |
1867 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1868 | |
1869 | #endif // KMP_HAVE_QUAD |
1870 | |
1871 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
1872 | // ------------------------------------------------------------------------ |
1873 | // X86 or X86_64: no alignment problems ==================================== |
1874 | #if USE_CMPXCHG_FIX |
1875 | // workaround for C78287 (complex(kind=4) data type) |
1876 | #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ |
1877 | LCK_ID, MASK, GOMP_FLAG) \ |
1878 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1879 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
1880 | OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ |
1881 | } |
1882 | // end of the second part of the workaround for C78287 |
1883 | #else |
1884 | #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ |
1885 | LCK_ID, MASK, GOMP_FLAG) \ |
1886 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1887 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
1888 | OP_CMPXCHG(TYPE, BITS, OP) \ |
1889 | } |
1890 | #endif // USE_CMPXCHG_FIX |
1891 | #else |
1892 | // ------------------------------------------------------------------------ |
1893 | // Code for other architectures that don't handle unaligned accesses. |
1894 | #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ |
1895 | LCK_ID, MASK, GOMP_FLAG) \ |
1896 | ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ |
1897 | OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ |
1898 | if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ |
1899 | OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ |
1900 | } else { \ |
1901 | KMP_CHECK_GTID; \ |
1902 | OP_UPDATE_CRITICAL(TYPE, OP, \ |
1903 | LCK_ID) /* unaligned address - use critical */ \ |
1904 | } \ |
1905 | } |
1906 | #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ |
1907 | |
1908 | ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, |
1909 | 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 |
1910 | ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, |
1911 | 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 |
1912 | ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, |
1913 | 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 |
1914 | ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, |
1915 | 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 |
1916 | |
1917 | // READ, WRITE, CAPTURE |
1918 | |
1919 | // ------------------------------------------------------------------------ |
1920 | // Atomic READ routines |
1921 | |
1922 | // ------------------------------------------------------------------------ |
1923 | // Beginning of a definition (provides name, parameters, gebug trace) |
1924 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
1925 | // fixed) |
1926 | // OP_ID - operation identifier (add, sub, mul, ...) |
1927 | // TYPE - operands' type |
1928 | #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ |
1929 | RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ |
1930 | TYPE *loc) { \ |
1931 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
1932 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); |
1933 | |
1934 | // ------------------------------------------------------------------------ |
1935 | // Operation on *lhs, rhs using "compare_and_store_ret" routine |
1936 | // TYPE - operands' type |
1937 | // BITS - size in bits, used to distinguish low level calls |
1938 | // OP - operator |
1939 | // Note: temp_val introduced in order to force the compiler to read |
1940 | // *lhs only once (w/o it the compiler reads *lhs twice) |
1941 | // TODO: check if it is still necessary |
1942 | // Return old value regardless of the result of "compare & swap# operation |
1943 | #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ |
1944 | { \ |
1945 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
1946 | union f_i_union { \ |
1947 | TYPE f_val; \ |
1948 | kmp_int##BITS i_val; \ |
1949 | }; \ |
1950 | union f_i_union old_value; \ |
1951 | temp_val = *loc; \ |
1952 | old_value.f_val = temp_val; \ |
1953 | old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ |
1954 | (kmp_int##BITS *)loc, \ |
1955 | *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ |
1956 | *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ |
1957 | new_value = old_value.f_val; \ |
1958 | return new_value; \ |
1959 | } |
1960 | |
1961 | // ------------------------------------------------------------------------- |
1962 | // Operation on *lhs, rhs bound by critical section |
1963 | // OP - operator (it's supposed to contain an assignment) |
1964 | // LCK_ID - lock identifier |
1965 | // Note: don't check gtid as it should always be valid |
1966 | // 1, 2-byte - expect valid parameter, other - check before this macro |
1967 | #define OP_CRITICAL_READ(OP, LCK_ID) \ |
1968 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
1969 | \ |
1970 | new_value = (*loc); \ |
1971 | \ |
1972 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
1973 | |
1974 | // ------------------------------------------------------------------------- |
1975 | #ifdef KMP_GOMP_COMPAT |
1976 | #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ |
1977 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
1978 | KMP_CHECK_GTID; \ |
1979 | OP_CRITICAL_READ(OP, 0); \ |
1980 | return new_value; \ |
1981 | } |
1982 | #else |
1983 | #define OP_GOMP_CRITICAL_READ(OP, FLAG) |
1984 | #endif /* KMP_GOMP_COMPAT */ |
1985 | |
1986 | // ------------------------------------------------------------------------- |
1987 | #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
1988 | ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ |
1989 | TYPE new_value; \ |
1990 | OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ |
1991 | new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ |
1992 | return new_value; \ |
1993 | } |
1994 | // ------------------------------------------------------------------------- |
1995 | #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
1996 | ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ |
1997 | TYPE new_value; \ |
1998 | OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ |
1999 | OP_CMPXCHG_READ(TYPE, BITS, OP) \ |
2000 | } |
2001 | // ------------------------------------------------------------------------ |
2002 | // Routines for Extended types: long double, _Quad, complex flavours (use |
2003 | // critical section) |
2004 | // TYPE_ID, OP_ID, TYPE - detailed above |
2005 | // OP - operator |
2006 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
2007 | #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2008 | ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2009 | TYPE new_value; \ |
2010 | OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ |
2011 | OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ |
2012 | return new_value; \ |
2013 | } |
2014 | |
2015 | // ------------------------------------------------------------------------ |
2016 | // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return |
2017 | // value doesn't work. |
2018 | // Let's return the read value through the additional parameter. |
2019 | #if (KMP_OS_WINDOWS) |
2020 | |
2021 | #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ |
2022 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2023 | \ |
2024 | (*out) = (*loc); \ |
2025 | \ |
2026 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
2027 | // ------------------------------------------------------------------------ |
2028 | #ifdef KMP_GOMP_COMPAT |
2029 | #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ |
2030 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2031 | KMP_CHECK_GTID; \ |
2032 | OP_CRITICAL_READ_WRK(OP, 0); \ |
2033 | } |
2034 | #else |
2035 | #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) |
2036 | #endif /* KMP_GOMP_COMPAT */ |
2037 | // ------------------------------------------------------------------------ |
2038 | #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ |
2039 | void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ |
2040 | TYPE *loc) { \ |
2041 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
2042 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); |
2043 | |
2044 | // ------------------------------------------------------------------------ |
2045 | #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2046 | ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ |
2047 | OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ |
2048 | OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ |
2049 | } |
2050 | |
2051 | #endif // KMP_OS_WINDOWS |
2052 | |
2053 | // ------------------------------------------------------------------------ |
2054 | // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
2055 | ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd |
2056 | ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, |
2057 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd |
2058 | ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, |
2059 | KMP_ARCH_X86) // __kmpc_atomic_float4_rd |
2060 | ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, |
2061 | KMP_ARCH_X86) // __kmpc_atomic_float8_rd |
2062 | |
2063 | // !!! TODO: Remove lock operations for "char" since it can't be non-atomic |
2064 | ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, |
2065 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd |
2066 | ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, |
2067 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd |
2068 | |
2069 | ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, |
2070 | 1) // __kmpc_atomic_float10_rd |
2071 | #if KMP_HAVE_QUAD |
2072 | ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, |
2073 | 1) // __kmpc_atomic_float16_rd |
2074 | #endif // KMP_HAVE_QUAD |
2075 | |
2076 | // Fix for CQ220361 on Windows* OS |
2077 | #if (KMP_OS_WINDOWS) |
2078 | ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, |
2079 | 1) // __kmpc_atomic_cmplx4_rd |
2080 | #else |
2081 | ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, |
2082 | 1) // __kmpc_atomic_cmplx4_rd |
2083 | #endif // (KMP_OS_WINDOWS) |
2084 | ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, |
2085 | 1) // __kmpc_atomic_cmplx8_rd |
2086 | ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, |
2087 | 1) // __kmpc_atomic_cmplx10_rd |
2088 | #if KMP_HAVE_QUAD |
2089 | ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, |
2090 | 1) // __kmpc_atomic_cmplx16_rd |
2091 | #if (KMP_ARCH_X86) |
2092 | ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, |
2093 | 1) // __kmpc_atomic_float16_a16_rd |
2094 | ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, |
2095 | 1) // __kmpc_atomic_cmplx16_a16_rd |
2096 | #endif // (KMP_ARCH_X86) |
2097 | #endif // KMP_HAVE_QUAD |
2098 | |
2099 | // ------------------------------------------------------------------------ |
2100 | // Atomic WRITE routines |
2101 | |
2102 | #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2103 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
2104 | OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ |
2105 | KMP_XCHG_FIXED##BITS(lhs, rhs); \ |
2106 | } |
2107 | // ------------------------------------------------------------------------ |
2108 | #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2109 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
2110 | OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ |
2111 | KMP_XCHG_REAL##BITS(lhs, rhs); \ |
2112 | } |
2113 | |
2114 | // ------------------------------------------------------------------------ |
2115 | // Operation on *lhs, rhs using "compare_and_store" routine |
2116 | // TYPE - operands' type |
2117 | // BITS - size in bits, used to distinguish low level calls |
2118 | // OP - operator |
2119 | // Note: temp_val introduced in order to force the compiler to read |
2120 | // *lhs only once (w/o it the compiler reads *lhs twice) |
2121 | #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ |
2122 | { \ |
2123 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
2124 | TYPE old_value, new_value; \ |
2125 | temp_val = *lhs; \ |
2126 | old_value = temp_val; \ |
2127 | new_value = rhs; \ |
2128 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
2129 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
2130 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
2131 | temp_val = *lhs; \ |
2132 | old_value = temp_val; \ |
2133 | new_value = rhs; \ |
2134 | } \ |
2135 | } |
2136 | |
2137 | // ------------------------------------------------------------------------- |
2138 | #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2139 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
2140 | OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ |
2141 | OP_CMPXCHG_WR(TYPE, BITS, OP) \ |
2142 | } |
2143 | |
2144 | // ------------------------------------------------------------------------ |
2145 | // Routines for Extended types: long double, _Quad, complex flavours (use |
2146 | // critical section) |
2147 | // TYPE_ID, OP_ID, TYPE - detailed above |
2148 | // OP - operator |
2149 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
2150 | #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2151 | ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ |
2152 | OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ |
2153 | OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ |
2154 | } |
2155 | // ------------------------------------------------------------------------- |
2156 | |
2157 | ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, |
2158 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr |
2159 | ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, |
2160 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr |
2161 | ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, |
2162 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr |
2163 | #if (KMP_ARCH_X86) |
2164 | ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, |
2165 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr |
2166 | #else |
2167 | ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, |
2168 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr |
2169 | #endif // (KMP_ARCH_X86) |
2170 | |
2171 | ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, |
2172 | KMP_ARCH_X86) // __kmpc_atomic_float4_wr |
2173 | #if (KMP_ARCH_X86) |
2174 | ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, |
2175 | KMP_ARCH_X86) // __kmpc_atomic_float8_wr |
2176 | #else |
2177 | ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, |
2178 | KMP_ARCH_X86) // __kmpc_atomic_float8_wr |
2179 | #endif // (KMP_ARCH_X86) |
2180 | |
2181 | ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, |
2182 | 1) // __kmpc_atomic_float10_wr |
2183 | #if KMP_HAVE_QUAD |
2184 | ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, |
2185 | 1) // __kmpc_atomic_float16_wr |
2186 | #endif // KMP_HAVE_QUAD |
2187 | ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr |
2188 | ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, |
2189 | 1) // __kmpc_atomic_cmplx8_wr |
2190 | ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, |
2191 | 1) // __kmpc_atomic_cmplx10_wr |
2192 | #if KMP_HAVE_QUAD |
2193 | ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, |
2194 | 1) // __kmpc_atomic_cmplx16_wr |
2195 | #if (KMP_ARCH_X86) |
2196 | ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, |
2197 | 1) // __kmpc_atomic_float16_a16_wr |
2198 | ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, |
2199 | 1) // __kmpc_atomic_cmplx16_a16_wr |
2200 | #endif // (KMP_ARCH_X86) |
2201 | #endif // KMP_HAVE_QUAD |
2202 | |
2203 | // ------------------------------------------------------------------------ |
2204 | // Atomic CAPTURE routines |
2205 | |
2206 | // Beginning of a definition (provides name, parameters, gebug trace) |
2207 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
2208 | // fixed) |
2209 | // OP_ID - operation identifier (add, sub, mul, ...) |
2210 | // TYPE - operands' type |
2211 | #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ |
2212 | RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ |
2213 | TYPE *lhs, TYPE rhs, int flag) { \ |
2214 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
2215 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); |
2216 | |
2217 | // ------------------------------------------------------------------------- |
2218 | // Operation on *lhs, rhs bound by critical section |
2219 | // OP - operator (it's supposed to contain an assignment) |
2220 | // LCK_ID - lock identifier |
2221 | // Note: don't check gtid as it should always be valid |
2222 | // 1, 2-byte - expect valid parameter, other - check before this macro |
2223 | #define OP_CRITICAL_CPT(OP, LCK_ID) \ |
2224 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2225 | \ |
2226 | if (flag) { \ |
2227 | (*lhs) OP rhs; \ |
2228 | new_value = (*lhs); \ |
2229 | } else { \ |
2230 | new_value = (*lhs); \ |
2231 | (*lhs) OP rhs; \ |
2232 | } \ |
2233 | \ |
2234 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2235 | return new_value; |
2236 | |
2237 | #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ |
2238 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2239 | \ |
2240 | if (flag) { \ |
2241 | (*lhs) = (TYPE)((*lhs)OP rhs); \ |
2242 | new_value = (*lhs); \ |
2243 | } else { \ |
2244 | new_value = (*lhs); \ |
2245 | (*lhs) = (TYPE)((*lhs)OP rhs); \ |
2246 | } \ |
2247 | \ |
2248 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2249 | return new_value; |
2250 | |
2251 | // ------------------------------------------------------------------------ |
2252 | #ifdef KMP_GOMP_COMPAT |
2253 | #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ |
2254 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2255 | KMP_CHECK_GTID; \ |
2256 | OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ |
2257 | } |
2258 | #else |
2259 | #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) |
2260 | #endif /* KMP_GOMP_COMPAT */ |
2261 | |
2262 | // ------------------------------------------------------------------------ |
2263 | // Operation on *lhs, rhs using "compare_and_store" routine |
2264 | // TYPE - operands' type |
2265 | // BITS - size in bits, used to distinguish low level calls |
2266 | // OP - operator |
2267 | // Note: temp_val introduced in order to force the compiler to read |
2268 | // *lhs only once (w/o it the compiler reads *lhs twice) |
2269 | #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2270 | { \ |
2271 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
2272 | TYPE old_value, new_value; \ |
2273 | temp_val = *lhs; \ |
2274 | old_value = temp_val; \ |
2275 | new_value = (TYPE)(old_value OP rhs); \ |
2276 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
2277 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
2278 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
2279 | temp_val = *lhs; \ |
2280 | old_value = temp_val; \ |
2281 | new_value = (TYPE)(old_value OP rhs); \ |
2282 | } \ |
2283 | if (flag) { \ |
2284 | return new_value; \ |
2285 | } else \ |
2286 | return old_value; \ |
2287 | } |
2288 | |
2289 | // ------------------------------------------------------------------------- |
2290 | #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2291 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2292 | TYPE new_value; \ |
2293 | (void)new_value; \ |
2294 | OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ |
2295 | OP_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2296 | } |
2297 | |
2298 | // ------------------------------------------------------------------------- |
2299 | #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2300 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2301 | TYPE old_value, new_value; \ |
2302 | (void)new_value; \ |
2303 | OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ |
2304 | /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ |
2305 | old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ |
2306 | if (flag) { \ |
2307 | return old_value OP rhs; \ |
2308 | } else \ |
2309 | return old_value; \ |
2310 | } |
2311 | // ------------------------------------------------------------------------- |
2312 | |
2313 | ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, |
2314 | 0) // __kmpc_atomic_fixed4_add_cpt |
2315 | ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, |
2316 | 0) // __kmpc_atomic_fixed4_sub_cpt |
2317 | ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, |
2318 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt |
2319 | ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, |
2320 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt |
2321 | |
2322 | ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, |
2323 | KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt |
2324 | ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, |
2325 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt |
2326 | ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, |
2327 | KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt |
2328 | ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, |
2329 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt |
2330 | |
2331 | // ------------------------------------------------------------------------ |
2332 | // Entries definition for integer operands |
2333 | // TYPE_ID - operands type and size (fixed4, float4) |
2334 | // OP_ID - operation identifier (add, sub, mul, ...) |
2335 | // TYPE - operand type |
2336 | // BITS - size in bits, used to distinguish low level calls |
2337 | // OP - operator (used in critical section) |
2338 | // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG |
2339 | // ------------------------------------------------------------------------ |
2340 | // Routines for ATOMIC integer operands, other operators |
2341 | // ------------------------------------------------------------------------ |
2342 | // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
2343 | ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, |
2344 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt |
2345 | ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, |
2346 | 0) // __kmpc_atomic_fixed1_andb_cpt |
2347 | ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, |
2348 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt |
2349 | ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, |
2350 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt |
2351 | ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, |
2352 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt |
2353 | ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, |
2354 | 0) // __kmpc_atomic_fixed1_orb_cpt |
2355 | ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, |
2356 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt |
2357 | ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, |
2358 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt |
2359 | ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, |
2360 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt |
2361 | ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, |
2362 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt |
2363 | ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, |
2364 | 0) // __kmpc_atomic_fixed1_xor_cpt |
2365 | ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, |
2366 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt |
2367 | ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, |
2368 | 0) // __kmpc_atomic_fixed2_andb_cpt |
2369 | ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, |
2370 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt |
2371 | ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, |
2372 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt |
2373 | ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, |
2374 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt |
2375 | ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, |
2376 | 0) // __kmpc_atomic_fixed2_orb_cpt |
2377 | ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, |
2378 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt |
2379 | ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, |
2380 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt |
2381 | ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, |
2382 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt |
2383 | ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, |
2384 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt |
2385 | ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, |
2386 | 0) // __kmpc_atomic_fixed2_xor_cpt |
2387 | ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, |
2388 | 0) // __kmpc_atomic_fixed4_andb_cpt |
2389 | ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, |
2390 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt |
2391 | ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, |
2392 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt |
2393 | ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, |
2394 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt |
2395 | ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, |
2396 | 0) // __kmpc_atomic_fixed4_orb_cpt |
2397 | ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, |
2398 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt |
2399 | ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, |
2400 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt |
2401 | ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, |
2402 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt |
2403 | ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, |
2404 | 0) // __kmpc_atomic_fixed4_xor_cpt |
2405 | ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, |
2406 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt |
2407 | ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, |
2408 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt |
2409 | ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, |
2410 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt |
2411 | ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, |
2412 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt |
2413 | ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, |
2414 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt |
2415 | ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, |
2416 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt |
2417 | ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, |
2418 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt |
2419 | ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, |
2420 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt |
2421 | ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, |
2422 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt |
2423 | ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, |
2424 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt |
2425 | ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, |
2426 | KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt |
2427 | ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, |
2428 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt |
2429 | ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, |
2430 | KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt |
2431 | // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
2432 | |
2433 | // CAPTURE routines for mixed types RHS=float16 |
2434 | #if KMP_HAVE_QUAD |
2435 | |
2436 | // Beginning of a definition (provides name, parameters, gebug trace) |
2437 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
2438 | // fixed) |
2439 | // OP_ID - operation identifier (add, sub, mul, ...) |
2440 | // TYPE - operands' type |
2441 | #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ |
2442 | TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ |
2443 | ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ |
2444 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
2445 | KA_TRACE(100, \ |
2446 | ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ |
2447 | gtid)); |
2448 | |
2449 | // ------------------------------------------------------------------------- |
2450 | #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ |
2451 | RTYPE, LCK_ID, MASK, GOMP_FLAG) \ |
2452 | ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ |
2453 | TYPE new_value; \ |
2454 | (void)new_value; \ |
2455 | OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ |
2456 | OP_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2457 | } |
2458 | |
2459 | // ------------------------------------------------------------------------- |
2460 | #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ |
2461 | LCK_ID, GOMP_FLAG) \ |
2462 | ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ |
2463 | TYPE new_value; \ |
2464 | (void)new_value; \ |
2465 | OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ |
2466 | OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ |
2467 | } |
2468 | |
2469 | ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, |
2470 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp |
2471 | ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, |
2472 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp |
2473 | ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, |
2474 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp |
2475 | ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, |
2476 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp |
2477 | ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, |
2478 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp |
2479 | ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, |
2480 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp |
2481 | ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, |
2482 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp |
2483 | ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, |
2484 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp |
2485 | |
2486 | ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, |
2487 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp |
2488 | ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, |
2489 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp |
2490 | ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, |
2491 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp |
2492 | ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, |
2493 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp |
2494 | ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, |
2495 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp |
2496 | ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, |
2497 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp |
2498 | ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, |
2499 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp |
2500 | ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, |
2501 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp |
2502 | |
2503 | ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, |
2504 | 0) // __kmpc_atomic_fixed4_add_cpt_fp |
2505 | ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, |
2506 | 0) // __kmpc_atomic_fixed4u_add_cpt_fp |
2507 | ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, |
2508 | 0) // __kmpc_atomic_fixed4_sub_cpt_fp |
2509 | ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, |
2510 | 0) // __kmpc_atomic_fixed4u_sub_cpt_fp |
2511 | ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, |
2512 | 0) // __kmpc_atomic_fixed4_mul_cpt_fp |
2513 | ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, |
2514 | 0) // __kmpc_atomic_fixed4u_mul_cpt_fp |
2515 | ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, |
2516 | 0) // __kmpc_atomic_fixed4_div_cpt_fp |
2517 | ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, |
2518 | 0) // __kmpc_atomic_fixed4u_div_cpt_fp |
2519 | |
2520 | ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, |
2521 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp |
2522 | ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, |
2523 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp |
2524 | ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, |
2525 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp |
2526 | ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, |
2527 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp |
2528 | ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, |
2529 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp |
2530 | ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, |
2531 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp |
2532 | ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, |
2533 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp |
2534 | ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, |
2535 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp |
2536 | |
2537 | ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, |
2538 | KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp |
2539 | ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, |
2540 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp |
2541 | ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, |
2542 | KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp |
2543 | ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, |
2544 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp |
2545 | |
2546 | ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, |
2547 | KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp |
2548 | ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, |
2549 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp |
2550 | ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, |
2551 | KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp |
2552 | ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, |
2553 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp |
2554 | |
2555 | ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, |
2556 | 1) // __kmpc_atomic_float10_add_cpt_fp |
2557 | ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, |
2558 | 1) // __kmpc_atomic_float10_sub_cpt_fp |
2559 | ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, |
2560 | 1) // __kmpc_atomic_float10_mul_cpt_fp |
2561 | ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, |
2562 | 1) // __kmpc_atomic_float10_div_cpt_fp |
2563 | |
2564 | #endif // KMP_HAVE_QUAD |
2565 | |
2566 | // ------------------------------------------------------------------------ |
2567 | // Routines for C/C++ Reduction operators && and || |
2568 | |
2569 | // ------------------------------------------------------------------------- |
2570 | // Operation on *lhs, rhs bound by critical section |
2571 | // OP - operator (it's supposed to contain an assignment) |
2572 | // LCK_ID - lock identifier |
2573 | // Note: don't check gtid as it should always be valid |
2574 | // 1, 2-byte - expect valid parameter, other - check before this macro |
2575 | #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ |
2576 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2577 | \ |
2578 | if (flag) { \ |
2579 | new_value OP rhs; \ |
2580 | (*lhs) = new_value; \ |
2581 | } else { \ |
2582 | new_value = (*lhs); \ |
2583 | (*lhs) OP rhs; \ |
2584 | } \ |
2585 | \ |
2586 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); |
2587 | |
2588 | // ------------------------------------------------------------------------ |
2589 | #ifdef KMP_GOMP_COMPAT |
2590 | #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ |
2591 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2592 | KMP_CHECK_GTID; \ |
2593 | OP_CRITICAL_L_CPT(OP, 0); \ |
2594 | return new_value; \ |
2595 | } |
2596 | #else |
2597 | #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) |
2598 | #endif /* KMP_GOMP_COMPAT */ |
2599 | |
2600 | // ------------------------------------------------------------------------ |
2601 | // Need separate macros for &&, || because there is no combined assignment |
2602 | #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2603 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2604 | TYPE new_value; \ |
2605 | (void)new_value; \ |
2606 | OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ |
2607 | OP_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2608 | } |
2609 | |
2610 | ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, |
2611 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt |
2612 | ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, |
2613 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt |
2614 | ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, |
2615 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt |
2616 | ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, |
2617 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt |
2618 | ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, |
2619 | 0) // __kmpc_atomic_fixed4_andl_cpt |
2620 | ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, |
2621 | 0) // __kmpc_atomic_fixed4_orl_cpt |
2622 | ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, |
2623 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt |
2624 | ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, |
2625 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt |
2626 | |
2627 | // ------------------------------------------------------------------------- |
2628 | // Routines for Fortran operators that matched no one in C: |
2629 | // MAX, MIN, .EQV., .NEQV. |
2630 | // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt |
2631 | // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt |
2632 | |
2633 | // ------------------------------------------------------------------------- |
2634 | // MIN and MAX need separate macros |
2635 | // OP - operator to check if we need any actions? |
2636 | #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ |
2637 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2638 | \ |
2639 | if (*lhs OP rhs) { /* still need actions? */ \ |
2640 | old_value = *lhs; \ |
2641 | *lhs = rhs; \ |
2642 | if (flag) \ |
2643 | new_value = rhs; \ |
2644 | else \ |
2645 | new_value = old_value; \ |
2646 | } else { \ |
2647 | new_value = *lhs; \ |
2648 | } \ |
2649 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2650 | return new_value; |
2651 | |
2652 | // ------------------------------------------------------------------------- |
2653 | #ifdef KMP_GOMP_COMPAT |
2654 | #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ |
2655 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2656 | KMP_CHECK_GTID; \ |
2657 | MIN_MAX_CRITSECT_CPT(OP, 0); \ |
2658 | } |
2659 | #else |
2660 | #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) |
2661 | #endif /* KMP_GOMP_COMPAT */ |
2662 | |
2663 | // ------------------------------------------------------------------------- |
2664 | #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2665 | { \ |
2666 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
2667 | /*TYPE old_value; */ \ |
2668 | temp_val = *lhs; \ |
2669 | old_value = temp_val; \ |
2670 | while (old_value OP rhs && /* still need actions? */ \ |
2671 | !KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
2672 | (kmp_int##BITS *)lhs, \ |
2673 | *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
2674 | *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ |
2675 | temp_val = *lhs; \ |
2676 | old_value = temp_val; \ |
2677 | } \ |
2678 | if (flag) \ |
2679 | return rhs; \ |
2680 | else \ |
2681 | return old_value; \ |
2682 | } |
2683 | |
2684 | // ------------------------------------------------------------------------- |
2685 | // 1-byte, 2-byte operands - use critical section |
2686 | #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2687 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2688 | TYPE new_value, old_value; \ |
2689 | if (*lhs OP rhs) { /* need actions? */ \ |
2690 | GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ |
2691 | MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ |
2692 | } \ |
2693 | return *lhs; \ |
2694 | } |
2695 | |
2696 | #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2697 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2698 | TYPE new_value, old_value; \ |
2699 | (void)new_value; \ |
2700 | if (*lhs OP rhs) { \ |
2701 | GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ |
2702 | MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2703 | } \ |
2704 | return *lhs; \ |
2705 | } |
2706 | |
2707 | MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, |
2708 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt |
2709 | MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, |
2710 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt |
2711 | MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, |
2712 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt |
2713 | MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, |
2714 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt |
2715 | MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, |
2716 | 0) // __kmpc_atomic_fixed4_max_cpt |
2717 | MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, |
2718 | 0) // __kmpc_atomic_fixed4_min_cpt |
2719 | MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, |
2720 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt |
2721 | MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, |
2722 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt |
2723 | MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, |
2724 | KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt |
2725 | MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, |
2726 | KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt |
2727 | MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, |
2728 | KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt |
2729 | MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, |
2730 | KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt |
2731 | MIN_MAX_CRITICAL_CPT(float10, max_cpt, long double, <, 10r, |
2732 | 1) // __kmpc_atomic_float10_max_cpt |
2733 | MIN_MAX_CRITICAL_CPT(float10, min_cpt, long double, >, 10r, |
2734 | 1) // __kmpc_atomic_float10_min_cpt |
2735 | #if KMP_HAVE_QUAD |
2736 | MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, |
2737 | 1) // __kmpc_atomic_float16_max_cpt |
2738 | MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, |
2739 | 1) // __kmpc_atomic_float16_min_cpt |
2740 | #if (KMP_ARCH_X86) |
2741 | MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, |
2742 | 1) // __kmpc_atomic_float16_max_a16_cpt |
2743 | MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, |
2744 | 1) // __kmpc_atomic_float16_mix_a16_cpt |
2745 | #endif // (KMP_ARCH_X86) |
2746 | #endif // KMP_HAVE_QUAD |
2747 | |
2748 | // ------------------------------------------------------------------------ |
2749 | #ifdef KMP_GOMP_COMPAT |
2750 | #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ |
2751 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2752 | KMP_CHECK_GTID; \ |
2753 | OP_CRITICAL_CPT(OP, 0); \ |
2754 | } |
2755 | #else |
2756 | #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) |
2757 | #endif /* KMP_GOMP_COMPAT */ |
2758 | // ------------------------------------------------------------------------ |
2759 | #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2760 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2761 | TYPE new_value; \ |
2762 | (void)new_value; \ |
2763 | OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ |
2764 | OP_CMPXCHG_CPT(TYPE, BITS, OP) \ |
2765 | } |
2766 | |
2767 | // ------------------------------------------------------------------------ |
2768 | |
2769 | ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, |
2770 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt |
2771 | ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, |
2772 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt |
2773 | ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, |
2774 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt |
2775 | ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, |
2776 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt |
2777 | ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, |
2778 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt |
2779 | ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, |
2780 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt |
2781 | ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, |
2782 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt |
2783 | ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, |
2784 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt |
2785 | |
2786 | // ------------------------------------------------------------------------ |
2787 | // Routines for Extended types: long double, _Quad, complex flavours (use |
2788 | // critical section) |
2789 | // TYPE_ID, OP_ID, TYPE - detailed above |
2790 | // OP - operator |
2791 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
2792 | #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2793 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2794 | TYPE new_value; \ |
2795 | OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ |
2796 | OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ |
2797 | } |
2798 | |
2799 | // ------------------------------------------------------------------------ |
2800 | // Workaround for cmplx4. Regular routines with return value don't work |
2801 | // on Win_32e. Let's return captured values through the additional parameter. |
2802 | #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ |
2803 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2804 | \ |
2805 | if (flag) { \ |
2806 | (*lhs) OP rhs; \ |
2807 | (*out) = (*lhs); \ |
2808 | } else { \ |
2809 | (*out) = (*lhs); \ |
2810 | (*lhs) OP rhs; \ |
2811 | } \ |
2812 | \ |
2813 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2814 | return; |
2815 | // ------------------------------------------------------------------------ |
2816 | |
2817 | #ifdef KMP_GOMP_COMPAT |
2818 | #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ |
2819 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2820 | KMP_CHECK_GTID; \ |
2821 | OP_CRITICAL_CPT_WRK(OP## =, 0); \ |
2822 | } |
2823 | #else |
2824 | #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) |
2825 | #endif /* KMP_GOMP_COMPAT */ |
2826 | // ------------------------------------------------------------------------ |
2827 | |
2828 | #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ |
2829 | void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ |
2830 | TYPE rhs, TYPE *out, int flag) { \ |
2831 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
2832 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); |
2833 | // ------------------------------------------------------------------------ |
2834 | |
2835 | #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
2836 | ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ |
2837 | OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ |
2838 | OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ |
2839 | } |
2840 | // The end of workaround for cmplx4 |
2841 | |
2842 | /* ------------------------------------------------------------------------- */ |
2843 | // routines for long double type |
2844 | ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, |
2845 | 1) // __kmpc_atomic_float10_add_cpt |
2846 | ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, |
2847 | 1) // __kmpc_atomic_float10_sub_cpt |
2848 | ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, |
2849 | 1) // __kmpc_atomic_float10_mul_cpt |
2850 | ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, |
2851 | 1) // __kmpc_atomic_float10_div_cpt |
2852 | #if KMP_HAVE_QUAD |
2853 | // routines for _Quad type |
2854 | ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, |
2855 | 1) // __kmpc_atomic_float16_add_cpt |
2856 | ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, |
2857 | 1) // __kmpc_atomic_float16_sub_cpt |
2858 | ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, |
2859 | 1) // __kmpc_atomic_float16_mul_cpt |
2860 | ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, |
2861 | 1) // __kmpc_atomic_float16_div_cpt |
2862 | #if (KMP_ARCH_X86) |
2863 | ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, |
2864 | 1) // __kmpc_atomic_float16_add_a16_cpt |
2865 | ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, |
2866 | 1) // __kmpc_atomic_float16_sub_a16_cpt |
2867 | ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, |
2868 | 1) // __kmpc_atomic_float16_mul_a16_cpt |
2869 | ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, |
2870 | 1) // __kmpc_atomic_float16_div_a16_cpt |
2871 | #endif // (KMP_ARCH_X86) |
2872 | #endif // KMP_HAVE_QUAD |
2873 | |
2874 | // routines for complex types |
2875 | |
2876 | // cmplx4 routines to return void |
2877 | ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, |
2878 | 1) // __kmpc_atomic_cmplx4_add_cpt |
2879 | ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, |
2880 | 1) // __kmpc_atomic_cmplx4_sub_cpt |
2881 | ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, |
2882 | 1) // __kmpc_atomic_cmplx4_mul_cpt |
2883 | ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, |
2884 | 1) // __kmpc_atomic_cmplx4_div_cpt |
2885 | |
2886 | ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, |
2887 | 1) // __kmpc_atomic_cmplx8_add_cpt |
2888 | ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, |
2889 | 1) // __kmpc_atomic_cmplx8_sub_cpt |
2890 | ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, |
2891 | 1) // __kmpc_atomic_cmplx8_mul_cpt |
2892 | ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, |
2893 | 1) // __kmpc_atomic_cmplx8_div_cpt |
2894 | ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, |
2895 | 1) // __kmpc_atomic_cmplx10_add_cpt |
2896 | ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, |
2897 | 1) // __kmpc_atomic_cmplx10_sub_cpt |
2898 | ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, |
2899 | 1) // __kmpc_atomic_cmplx10_mul_cpt |
2900 | ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, |
2901 | 1) // __kmpc_atomic_cmplx10_div_cpt |
2902 | #if KMP_HAVE_QUAD |
2903 | ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, |
2904 | 1) // __kmpc_atomic_cmplx16_add_cpt |
2905 | ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, |
2906 | 1) // __kmpc_atomic_cmplx16_sub_cpt |
2907 | ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, |
2908 | 1) // __kmpc_atomic_cmplx16_mul_cpt |
2909 | ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, |
2910 | 1) // __kmpc_atomic_cmplx16_div_cpt |
2911 | #if (KMP_ARCH_X86) |
2912 | ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, |
2913 | 1) // __kmpc_atomic_cmplx16_add_a16_cpt |
2914 | ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, |
2915 | 1) // __kmpc_atomic_cmplx16_sub_a16_cpt |
2916 | ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, |
2917 | 1) // __kmpc_atomic_cmplx16_mul_a16_cpt |
2918 | ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, |
2919 | 1) // __kmpc_atomic_cmplx16_div_a16_cpt |
2920 | #endif // (KMP_ARCH_X86) |
2921 | #endif // KMP_HAVE_QUAD |
2922 | |
2923 | // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr |
2924 | // binop x; v = x; } for non-commutative operations. |
2925 | // Supported only on IA-32 architecture and Intel(R) 64 |
2926 | |
2927 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
2928 | // ------------------------------------------------------------------------- |
2929 | // Operation on *lhs, rhs bound by critical section |
2930 | // OP - operator (it's supposed to contain an assignment) |
2931 | // LCK_ID - lock identifier |
2932 | // Note: don't check gtid as it should always be valid |
2933 | // 1, 2-byte - expect valid parameter, other - check before this macro |
2934 | #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ |
2935 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2936 | \ |
2937 | if (flag) { \ |
2938 | /*temp_val = (*lhs);*/ \ |
2939 | (*lhs) = (TYPE)((rhs)OP(*lhs)); \ |
2940 | new_value = (*lhs); \ |
2941 | } else { \ |
2942 | new_value = (*lhs); \ |
2943 | (*lhs) = (TYPE)((rhs)OP(*lhs)); \ |
2944 | } \ |
2945 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
2946 | return new_value; |
2947 | |
2948 | // ------------------------------------------------------------------------ |
2949 | #ifdef KMP_GOMP_COMPAT |
2950 | #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ |
2951 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
2952 | KMP_CHECK_GTID; \ |
2953 | OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ |
2954 | } |
2955 | #else |
2956 | #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) |
2957 | #endif /* KMP_GOMP_COMPAT */ |
2958 | |
2959 | // ------------------------------------------------------------------------ |
2960 | // Operation on *lhs, rhs using "compare_and_store" routine |
2961 | // TYPE - operands' type |
2962 | // BITS - size in bits, used to distinguish low level calls |
2963 | // OP - operator |
2964 | // Note: temp_val introduced in order to force the compiler to read |
2965 | // *lhs only once (w/o it the compiler reads *lhs twice) |
2966 | #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ |
2967 | { \ |
2968 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
2969 | TYPE old_value, new_value; \ |
2970 | temp_val = *lhs; \ |
2971 | old_value = temp_val; \ |
2972 | new_value = (TYPE)(rhs OP old_value); \ |
2973 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
2974 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
2975 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
2976 | temp_val = *lhs; \ |
2977 | old_value = temp_val; \ |
2978 | new_value = (TYPE)(rhs OP old_value); \ |
2979 | } \ |
2980 | if (flag) { \ |
2981 | return new_value; \ |
2982 | } else \ |
2983 | return old_value; \ |
2984 | } |
2985 | |
2986 | // ------------------------------------------------------------------------- |
2987 | #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ |
2988 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
2989 | TYPE new_value; \ |
2990 | (void)new_value; \ |
2991 | OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ |
2992 | OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ |
2993 | } |
2994 | |
2995 | ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, |
2996 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev |
2997 | ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, |
2998 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev |
2999 | ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, |
3000 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev |
3001 | ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, |
3002 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev |
3003 | ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, |
3004 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev |
3005 | ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, |
3006 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev |
3007 | ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, |
3008 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev |
3009 | ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, |
3010 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev |
3011 | ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, |
3012 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev |
3013 | ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, |
3014 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev |
3015 | ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, |
3016 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev |
3017 | ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, |
3018 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev |
3019 | ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, |
3020 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev |
3021 | ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, |
3022 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev |
3023 | ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, |
3024 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev |
3025 | ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, |
3026 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev |
3027 | ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, |
3028 | KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev |
3029 | ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, |
3030 | KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev |
3031 | ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, |
3032 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev |
3033 | ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, |
3034 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev |
3035 | ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, |
3036 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev |
3037 | ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, |
3038 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev |
3039 | ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, |
3040 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev |
3041 | ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, |
3042 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev |
3043 | ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, |
3044 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev |
3045 | ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, |
3046 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev |
3047 | ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, |
3048 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev |
3049 | ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, |
3050 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev |
3051 | // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG |
3052 | |
3053 | // ------------------------------------------------------------------------ |
3054 | // Routines for Extended types: long double, _Quad, complex flavours (use |
3055 | // critical section) |
3056 | // TYPE_ID, OP_ID, TYPE - detailed above |
3057 | // OP - operator |
3058 | // LCK_ID - lock identifier, used to possibly distinguish lock variable |
3059 | #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ |
3060 | ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ |
3061 | TYPE new_value; \ |
3062 | /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ |
3063 | OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ |
3064 | OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ |
3065 | } |
3066 | |
3067 | /* ------------------------------------------------------------------------- */ |
3068 | // routines for long double type |
3069 | ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, |
3070 | 1) // __kmpc_atomic_float10_sub_cpt_rev |
3071 | ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, |
3072 | 1) // __kmpc_atomic_float10_div_cpt_rev |
3073 | #if KMP_HAVE_QUAD |
3074 | // routines for _Quad type |
3075 | ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, |
3076 | 1) // __kmpc_atomic_float16_sub_cpt_rev |
3077 | ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, |
3078 | 1) // __kmpc_atomic_float16_div_cpt_rev |
3079 | #if (KMP_ARCH_X86) |
3080 | ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, |
3081 | 1) // __kmpc_atomic_float16_sub_a16_cpt_rev |
3082 | ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, |
3083 | 1) // __kmpc_atomic_float16_div_a16_cpt_rev |
3084 | #endif // (KMP_ARCH_X86) |
3085 | #endif // KMP_HAVE_QUAD |
3086 | |
3087 | // routines for complex types |
3088 | |
3089 | // ------------------------------------------------------------------------ |
3090 | // Workaround for cmplx4. Regular routines with return value don't work |
3091 | // on Win_32e. Let's return captured values through the additional parameter. |
3092 | #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ |
3093 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3094 | \ |
3095 | if (flag) { \ |
3096 | (*lhs) = (rhs)OP(*lhs); \ |
3097 | (*out) = (*lhs); \ |
3098 | } else { \ |
3099 | (*out) = (*lhs); \ |
3100 | (*lhs) = (rhs)OP(*lhs); \ |
3101 | } \ |
3102 | \ |
3103 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3104 | return; |
3105 | // ------------------------------------------------------------------------ |
3106 | |
3107 | #ifdef KMP_GOMP_COMPAT |
3108 | #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ |
3109 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
3110 | KMP_CHECK_GTID; \ |
3111 | OP_CRITICAL_CPT_REV_WRK(OP, 0); \ |
3112 | } |
3113 | #else |
3114 | #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) |
3115 | #endif /* KMP_GOMP_COMPAT */ |
3116 | // ------------------------------------------------------------------------ |
3117 | |
3118 | #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ |
3119 | GOMP_FLAG) \ |
3120 | ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ |
3121 | OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ |
3122 | OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ |
3123 | } |
3124 | // The end of workaround for cmplx4 |
3125 | |
3126 | // !!! TODO: check if we need to return void for cmplx4 routines |
3127 | // cmplx4 routines to return void |
3128 | ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, |
3129 | 1) // __kmpc_atomic_cmplx4_sub_cpt_rev |
3130 | ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, |
3131 | 1) // __kmpc_atomic_cmplx4_div_cpt_rev |
3132 | |
3133 | ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, |
3134 | 1) // __kmpc_atomic_cmplx8_sub_cpt_rev |
3135 | ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, |
3136 | 1) // __kmpc_atomic_cmplx8_div_cpt_rev |
3137 | ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, |
3138 | 1) // __kmpc_atomic_cmplx10_sub_cpt_rev |
3139 | ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, |
3140 | 1) // __kmpc_atomic_cmplx10_div_cpt_rev |
3141 | #if KMP_HAVE_QUAD |
3142 | ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, |
3143 | 1) // __kmpc_atomic_cmplx16_sub_cpt_rev |
3144 | ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, |
3145 | 1) // __kmpc_atomic_cmplx16_div_cpt_rev |
3146 | #if (KMP_ARCH_X86) |
3147 | ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, |
3148 | 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev |
3149 | ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, |
3150 | 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev |
3151 | #endif // (KMP_ARCH_X86) |
3152 | #endif // KMP_HAVE_QUAD |
3153 | |
3154 | // Capture reverse for mixed type: RHS=float16 |
3155 | #if KMP_HAVE_QUAD |
3156 | |
3157 | // Beginning of a definition (provides name, parameters, gebug trace) |
3158 | // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned |
3159 | // fixed) |
3160 | // OP_ID - operation identifier (add, sub, mul, ...) |
3161 | // TYPE - operands' type |
3162 | // ------------------------------------------------------------------------- |
3163 | #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ |
3164 | RTYPE, LCK_ID, MASK, GOMP_FLAG) \ |
3165 | ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ |
3166 | TYPE new_value; \ |
3167 | (void)new_value; \ |
3168 | OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ |
3169 | OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ |
3170 | } |
3171 | |
3172 | // ------------------------------------------------------------------------- |
3173 | #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ |
3174 | LCK_ID, GOMP_FLAG) \ |
3175 | ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ |
3176 | TYPE new_value; \ |
3177 | (void)new_value; \ |
3178 | OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ |
3179 | OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ |
3180 | } |
3181 | |
3182 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, |
3183 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp |
3184 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, |
3185 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp |
3186 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, |
3187 | KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp |
3188 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, |
3189 | KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp |
3190 | |
3191 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, |
3192 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp |
3193 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, |
3194 | 1, |
3195 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp |
3196 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, |
3197 | KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp |
3198 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, |
3199 | 1, |
3200 | KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp |
3201 | |
3202 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, |
3203 | 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp |
3204 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, |
3205 | 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp |
3206 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, |
3207 | 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp |
3208 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, |
3209 | 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp |
3210 | |
3211 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, |
3212 | 7, |
3213 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp |
3214 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, |
3215 | 8i, 7, |
3216 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp |
3217 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, |
3218 | 7, |
3219 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp |
3220 | ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, |
3221 | 8i, 7, |
3222 | KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp |
3223 | |
3224 | ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, |
3225 | 4r, 3, |
3226 | KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp |
3227 | ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, |
3228 | 4r, 3, |
3229 | KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp |
3230 | |
3231 | ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, |
3232 | 8r, 7, |
3233 | KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp |
3234 | ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, |
3235 | 8r, 7, |
3236 | KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp |
3237 | |
3238 | ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, |
3239 | 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp |
3240 | ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, |
3241 | 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp |
3242 | |
3243 | #endif // KMP_HAVE_QUAD |
3244 | |
3245 | // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} |
3246 | |
3247 | #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ |
3248 | TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ |
3249 | TYPE rhs) { \ |
3250 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
3251 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); |
3252 | |
3253 | #define CRITICAL_SWP(LCK_ID) \ |
3254 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3255 | \ |
3256 | old_value = (*lhs); \ |
3257 | (*lhs) = rhs; \ |
3258 | \ |
3259 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3260 | return old_value; |
3261 | |
3262 | // ------------------------------------------------------------------------ |
3263 | #ifdef KMP_GOMP_COMPAT |
3264 | #define GOMP_CRITICAL_SWP(FLAG) \ |
3265 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
3266 | KMP_CHECK_GTID; \ |
3267 | CRITICAL_SWP(0); \ |
3268 | } |
3269 | #else |
3270 | #define GOMP_CRITICAL_SWP(FLAG) |
3271 | #endif /* KMP_GOMP_COMPAT */ |
3272 | |
3273 | #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ |
3274 | ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ |
3275 | TYPE old_value; \ |
3276 | GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
3277 | old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ |
3278 | return old_value; \ |
3279 | } |
3280 | // ------------------------------------------------------------------------ |
3281 | #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ |
3282 | ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ |
3283 | TYPE old_value; \ |
3284 | GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
3285 | old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ |
3286 | return old_value; \ |
3287 | } |
3288 | |
3289 | // ------------------------------------------------------------------------ |
3290 | #define CMPXCHG_SWP(TYPE, BITS) \ |
3291 | { \ |
3292 | TYPE KMP_ATOMIC_VOLATILE temp_val; \ |
3293 | TYPE old_value, new_value; \ |
3294 | temp_val = *lhs; \ |
3295 | old_value = temp_val; \ |
3296 | new_value = rhs; \ |
3297 | while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ |
3298 | (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ |
3299 | *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ |
3300 | temp_val = *lhs; \ |
3301 | old_value = temp_val; \ |
3302 | new_value = rhs; \ |
3303 | } \ |
3304 | return old_value; \ |
3305 | } |
3306 | |
3307 | // ------------------------------------------------------------------------- |
3308 | #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ |
3309 | ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ |
3310 | TYPE old_value; \ |
3311 | (void)old_value; \ |
3312 | GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
3313 | CMPXCHG_SWP(TYPE, BITS) \ |
3314 | } |
3315 | |
3316 | ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp |
3317 | ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp |
3318 | ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp |
3319 | |
3320 | ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, |
3321 | KMP_ARCH_X86) // __kmpc_atomic_float4_swp |
3322 | |
3323 | #if (KMP_ARCH_X86) |
3324 | ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, |
3325 | KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp |
3326 | ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, |
3327 | KMP_ARCH_X86) // __kmpc_atomic_float8_swp |
3328 | #else |
3329 | ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp |
3330 | ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, |
3331 | KMP_ARCH_X86) // __kmpc_atomic_float8_swp |
3332 | #endif // (KMP_ARCH_X86) |
3333 | |
3334 | // ------------------------------------------------------------------------ |
3335 | // Routines for Extended types: long double, _Quad, complex flavours (use |
3336 | // critical section) |
3337 | #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ |
3338 | ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ |
3339 | TYPE old_value; \ |
3340 | GOMP_CRITICAL_SWP(GOMP_FLAG) \ |
3341 | CRITICAL_SWP(LCK_ID) \ |
3342 | } |
3343 | |
3344 | // ------------------------------------------------------------------------ |
3345 | // !!! TODO: check if we need to return void for cmplx4 routines |
3346 | // Workaround for cmplx4. Regular routines with return value don't work |
3347 | // on Win_32e. Let's return captured values through the additional parameter. |
3348 | |
3349 | #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ |
3350 | void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ |
3351 | TYPE rhs, TYPE *out) { \ |
3352 | KMP_DEBUG_ASSERT(__kmp_init_serial); \ |
3353 | KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); |
3354 | |
3355 | #define CRITICAL_SWP_WRK(LCK_ID) \ |
3356 | __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3357 | \ |
3358 | tmp = (*lhs); \ |
3359 | (*lhs) = (rhs); \ |
3360 | (*out) = tmp; \ |
3361 | __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ |
3362 | return; |
3363 | // ------------------------------------------------------------------------ |
3364 | |
3365 | #ifdef KMP_GOMP_COMPAT |
3366 | #define GOMP_CRITICAL_SWP_WRK(FLAG) \ |
3367 | if ((FLAG) && (__kmp_atomic_mode == 2)) { \ |
3368 | KMP_CHECK_GTID; \ |
3369 | CRITICAL_SWP_WRK(0); \ |
3370 | } |
3371 | #else |
3372 | #define GOMP_CRITICAL_SWP_WRK(FLAG) |
3373 | #endif /* KMP_GOMP_COMPAT */ |
3374 | // ------------------------------------------------------------------------ |
3375 | |
3376 | #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ |
3377 | ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ |
3378 | TYPE tmp; \ |
3379 | GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ |
3380 | CRITICAL_SWP_WRK(LCK_ID) \ |
3381 | } |
3382 | // The end of workaround for cmplx4 |
3383 | |
3384 | ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp |
3385 | #if KMP_HAVE_QUAD |
3386 | ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp |
3387 | #endif // KMP_HAVE_QUAD |
3388 | // cmplx4 routine to return void |
3389 | ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp |
3390 | |
3391 | // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // |
3392 | // __kmpc_atomic_cmplx4_swp |
3393 | |
3394 | ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp |
3395 | ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp |
3396 | #if KMP_HAVE_QUAD |
3397 | ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp |
3398 | #if (KMP_ARCH_X86) |
3399 | ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, |
3400 | 1) // __kmpc_atomic_float16_a16_swp |
3401 | ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, |
3402 | 1) // __kmpc_atomic_cmplx16_a16_swp |
3403 | #endif // (KMP_ARCH_X86) |
3404 | #endif // KMP_HAVE_QUAD |
3405 | |
3406 | // End of OpenMP 4.0 Capture |
3407 | |
3408 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3409 | |
3410 | #undef OP_CRITICAL |
3411 | |
3412 | /* ------------------------------------------------------------------------ */ |
3413 | /* Generic atomic routines */ |
3414 | |
3415 | void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3416 | void (*f)(void *, void *, void *)) { |
3417 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3418 | |
3419 | if ( |
3420 | #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3421 | FALSE /* must use lock */ |
3422 | #else |
3423 | TRUE |
3424 | #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3425 | ) { |
3426 | kmp_int8 old_value, new_value; |
3427 | |
3428 | old_value = *(kmp_int8 *)lhs; |
3429 | (*f)(&new_value, &old_value, rhs); |
3430 | |
3431 | /* TODO: Should this be acquire or release? */ |
3432 | while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, |
3433 | *(kmp_int8 *)&new_value)) { |
3434 | KMP_CPU_PAUSE(); |
3435 | |
3436 | old_value = *(kmp_int8 *)lhs; |
3437 | (*f)(&new_value, &old_value, rhs); |
3438 | } |
3439 | |
3440 | return; |
3441 | } else { |
3442 | // All 1-byte data is of integer data type. |
3443 | |
3444 | #ifdef KMP_GOMP_COMPAT |
3445 | if (__kmp_atomic_mode == 2) { |
3446 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3447 | } else |
3448 | #endif /* KMP_GOMP_COMPAT */ |
3449 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_1i, gtid); |
3450 | |
3451 | (*f)(lhs, lhs, rhs); |
3452 | |
3453 | #ifdef KMP_GOMP_COMPAT |
3454 | if (__kmp_atomic_mode == 2) { |
3455 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3456 | } else |
3457 | #endif /* KMP_GOMP_COMPAT */ |
3458 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_1i, gtid); |
3459 | } |
3460 | } |
3461 | |
3462 | void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3463 | void (*f)(void *, void *, void *)) { |
3464 | if ( |
3465 | #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3466 | FALSE /* must use lock */ |
3467 | #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3468 | TRUE /* no alignment problems */ |
3469 | #else |
3470 | !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ |
3471 | #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3472 | ) { |
3473 | kmp_int16 old_value, new_value; |
3474 | |
3475 | old_value = *(kmp_int16 *)lhs; |
3476 | (*f)(&new_value, &old_value, rhs); |
3477 | |
3478 | /* TODO: Should this be acquire or release? */ |
3479 | while (!KMP_COMPARE_AND_STORE_ACQ16( |
3480 | (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { |
3481 | KMP_CPU_PAUSE(); |
3482 | |
3483 | old_value = *(kmp_int16 *)lhs; |
3484 | (*f)(&new_value, &old_value, rhs); |
3485 | } |
3486 | |
3487 | return; |
3488 | } else { |
3489 | // All 2-byte data is of integer data type. |
3490 | |
3491 | #ifdef KMP_GOMP_COMPAT |
3492 | if (__kmp_atomic_mode == 2) { |
3493 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3494 | } else |
3495 | #endif /* KMP_GOMP_COMPAT */ |
3496 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_2i, gtid); |
3497 | |
3498 | (*f)(lhs, lhs, rhs); |
3499 | |
3500 | #ifdef KMP_GOMP_COMPAT |
3501 | if (__kmp_atomic_mode == 2) { |
3502 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3503 | } else |
3504 | #endif /* KMP_GOMP_COMPAT */ |
3505 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_2i, gtid); |
3506 | } |
3507 | } |
3508 | |
3509 | void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3510 | void (*f)(void *, void *, void *)) { |
3511 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3512 | |
3513 | if ( |
3514 | // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. |
3515 | // Gomp compatibility is broken if this routine is called for floats. |
3516 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3517 | TRUE /* no alignment problems */ |
3518 | #else |
3519 | !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ |
3520 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3521 | ) { |
3522 | kmp_int32 old_value, new_value; |
3523 | |
3524 | old_value = *(kmp_int32 *)lhs; |
3525 | (*f)(&new_value, &old_value, rhs); |
3526 | |
3527 | /* TODO: Should this be acquire or release? */ |
3528 | while (!KMP_COMPARE_AND_STORE_ACQ32( |
3529 | (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { |
3530 | KMP_CPU_PAUSE(); |
3531 | |
3532 | old_value = *(kmp_int32 *)lhs; |
3533 | (*f)(&new_value, &old_value, rhs); |
3534 | } |
3535 | |
3536 | return; |
3537 | } else { |
3538 | // Use __kmp_atomic_lock_4i for all 4-byte data, |
3539 | // even if it isn't of integer data type. |
3540 | |
3541 | #ifdef KMP_GOMP_COMPAT |
3542 | if (__kmp_atomic_mode == 2) { |
3543 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3544 | } else |
3545 | #endif /* KMP_GOMP_COMPAT */ |
3546 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_4i, gtid); |
3547 | |
3548 | (*f)(lhs, lhs, rhs); |
3549 | |
3550 | #ifdef KMP_GOMP_COMPAT |
3551 | if (__kmp_atomic_mode == 2) { |
3552 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3553 | } else |
3554 | #endif /* KMP_GOMP_COMPAT */ |
3555 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_4i, gtid); |
3556 | } |
3557 | } |
3558 | |
3559 | void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3560 | void (*f)(void *, void *, void *)) { |
3561 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3562 | if ( |
3563 | |
3564 | #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3565 | FALSE /* must use lock */ |
3566 | #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3567 | TRUE /* no alignment problems */ |
3568 | #else |
3569 | !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ |
3570 | #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) |
3571 | ) { |
3572 | kmp_int64 old_value, new_value; |
3573 | |
3574 | old_value = *(kmp_int64 *)lhs; |
3575 | (*f)(&new_value, &old_value, rhs); |
3576 | /* TODO: Should this be acquire or release? */ |
3577 | while (!KMP_COMPARE_AND_STORE_ACQ64( |
3578 | (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { |
3579 | KMP_CPU_PAUSE(); |
3580 | |
3581 | old_value = *(kmp_int64 *)lhs; |
3582 | (*f)(&new_value, &old_value, rhs); |
3583 | } |
3584 | |
3585 | return; |
3586 | } else { |
3587 | // Use __kmp_atomic_lock_8i for all 8-byte data, |
3588 | // even if it isn't of integer data type. |
3589 | |
3590 | #ifdef KMP_GOMP_COMPAT |
3591 | if (__kmp_atomic_mode == 2) { |
3592 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3593 | } else |
3594 | #endif /* KMP_GOMP_COMPAT */ |
3595 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_8i, gtid); |
3596 | |
3597 | (*f)(lhs, lhs, rhs); |
3598 | |
3599 | #ifdef KMP_GOMP_COMPAT |
3600 | if (__kmp_atomic_mode == 2) { |
3601 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3602 | } else |
3603 | #endif /* KMP_GOMP_COMPAT */ |
3604 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_8i, gtid); |
3605 | } |
3606 | } |
3607 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3608 | void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3609 | void (*f)(void *, void *, void *)) { |
3610 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3611 | |
3612 | #ifdef KMP_GOMP_COMPAT |
3613 | if (__kmp_atomic_mode == 2) { |
3614 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3615 | } else |
3616 | #endif /* KMP_GOMP_COMPAT */ |
3617 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_10r, gtid); |
3618 | |
3619 | (*f)(lhs, lhs, rhs); |
3620 | |
3621 | #ifdef KMP_GOMP_COMPAT |
3622 | if (__kmp_atomic_mode == 2) { |
3623 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3624 | } else |
3625 | #endif /* KMP_GOMP_COMPAT */ |
3626 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_10r, gtid); |
3627 | } |
3628 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3629 | |
3630 | void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3631 | void (*f)(void *, void *, void *)) { |
3632 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3633 | |
3634 | #ifdef KMP_GOMP_COMPAT |
3635 | if (__kmp_atomic_mode == 2) { |
3636 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3637 | } else |
3638 | #endif /* KMP_GOMP_COMPAT */ |
3639 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_16c, gtid); |
3640 | |
3641 | (*f)(lhs, lhs, rhs); |
3642 | |
3643 | #ifdef KMP_GOMP_COMPAT |
3644 | if (__kmp_atomic_mode == 2) { |
3645 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3646 | } else |
3647 | #endif /* KMP_GOMP_COMPAT */ |
3648 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_16c, gtid); |
3649 | } |
3650 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3651 | void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3652 | void (*f)(void *, void *, void *)) { |
3653 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3654 | |
3655 | #ifdef KMP_GOMP_COMPAT |
3656 | if (__kmp_atomic_mode == 2) { |
3657 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3658 | } else |
3659 | #endif /* KMP_GOMP_COMPAT */ |
3660 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_20c, gtid); |
3661 | |
3662 | (*f)(lhs, lhs, rhs); |
3663 | |
3664 | #ifdef KMP_GOMP_COMPAT |
3665 | if (__kmp_atomic_mode == 2) { |
3666 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3667 | } else |
3668 | #endif /* KMP_GOMP_COMPAT */ |
3669 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_20c, gtid); |
3670 | } |
3671 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3672 | void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, |
3673 | void (*f)(void *, void *, void *)) { |
3674 | KMP_DEBUG_ASSERT(__kmp_init_serial); |
3675 | |
3676 | #ifdef KMP_GOMP_COMPAT |
3677 | if (__kmp_atomic_mode == 2) { |
3678 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3679 | } else |
3680 | #endif /* KMP_GOMP_COMPAT */ |
3681 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock_32c, gtid); |
3682 | |
3683 | (*f)(lhs, lhs, rhs); |
3684 | |
3685 | #ifdef KMP_GOMP_COMPAT |
3686 | if (__kmp_atomic_mode == 2) { |
3687 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3688 | } else |
3689 | #endif /* KMP_GOMP_COMPAT */ |
3690 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock_32c, gtid); |
3691 | } |
3692 | |
3693 | // AC: same two routines as GOMP_atomic_start/end, but will be called by our |
3694 | // compiler; duplicated in order to not use 3-party names in pure Intel code |
3695 | // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. |
3696 | void __kmpc_atomic_start(void) { |
3697 | int gtid = __kmp_entry_gtid(); |
3698 | KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n" , gtid)); |
3699 | __kmp_acquire_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3700 | } |
3701 | |
3702 | void __kmpc_atomic_end(void) { |
3703 | int gtid = __kmp_get_gtid(); |
3704 | KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n" , gtid)); |
3705 | __kmp_release_atomic_lock(lck: &__kmp_atomic_lock, gtid); |
3706 | } |
3707 | |
3708 | #if KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3709 | |
3710 | // OpenMP 5.1 compare and swap |
3711 | |
3712 | /*! |
3713 | @param loc Source code location |
3714 | @param gtid Global thread id |
3715 | @param x Memory location to operate on |
3716 | @param e Expected value |
3717 | @param d Desired value |
3718 | @return Result of comparison |
3719 | |
3720 | Implements Compare And Swap atomic operation. |
3721 | |
3722 | Sample code: |
3723 | #pragma omp atomic compare update capture |
3724 | { r = x == e; if(r) { x = d; } } |
3725 | */ |
3726 | bool __kmpc_atomic_bool_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { |
3727 | return KMP_COMPARE_AND_STORE_ACQ8(x, e, d); |
3728 | } |
3729 | bool __kmpc_atomic_bool_2_cas(ident_t *loc, int gtid, short *x, short e, |
3730 | short d) { |
3731 | return KMP_COMPARE_AND_STORE_ACQ16(x, e, d); |
3732 | } |
3733 | bool __kmpc_atomic_bool_4_cas(ident_t *loc, int gtid, kmp_int32 *x, kmp_int32 e, |
3734 | kmp_int32 d) { |
3735 | return KMP_COMPARE_AND_STORE_ACQ32(x, e, d); |
3736 | } |
3737 | bool __kmpc_atomic_bool_8_cas(ident_t *loc, int gtid, kmp_int64 *x, kmp_int64 e, |
3738 | kmp_int64 d) { |
3739 | return KMP_COMPARE_AND_STORE_ACQ64(x, e, d); |
3740 | } |
3741 | |
3742 | /*! |
3743 | @param loc Source code location |
3744 | @param gtid Global thread id |
3745 | @param x Memory location to operate on |
3746 | @param e Expected value |
3747 | @param d Desired value |
3748 | @return Old value of x |
3749 | |
3750 | Implements Compare And Swap atomic operation. |
3751 | |
3752 | Sample code: |
3753 | #pragma omp atomic compare update capture |
3754 | { v = x; if (x == e) { x = d; } } |
3755 | */ |
3756 | char __kmpc_atomic_val_1_cas(ident_t *loc, int gtid, char *x, char e, char d) { |
3757 | return KMP_COMPARE_AND_STORE_RET8(x, e, d); |
3758 | } |
3759 | short __kmpc_atomic_val_2_cas(ident_t *loc, int gtid, short *x, short e, |
3760 | short d) { |
3761 | return KMP_COMPARE_AND_STORE_RET16(x, e, d); |
3762 | } |
3763 | kmp_int32 __kmpc_atomic_val_4_cas(ident_t *loc, int gtid, kmp_int32 *x, |
3764 | kmp_int32 e, kmp_int32 d) { |
3765 | return KMP_COMPARE_AND_STORE_RET32(x, e, d); |
3766 | } |
3767 | kmp_int64 __kmpc_atomic_val_8_cas(ident_t *loc, int gtid, kmp_int64 *x, |
3768 | kmp_int64 e, kmp_int64 d) { |
3769 | return KMP_COMPARE_AND_STORE_RET64(x, e, d); |
3770 | } |
3771 | |
3772 | /*! |
3773 | @param loc Source code location |
3774 | @param gtid Global thread id |
3775 | @param x Memory location to operate on |
3776 | @param e Expected value |
3777 | @param d Desired value |
3778 | @param pv Captured value location |
3779 | @return Result of comparison |
3780 | |
3781 | Implements Compare And Swap + Capture atomic operation. |
3782 | |
3783 | v gets old valie of x if comparison failed, untouched otherwise. |
3784 | Sample code: |
3785 | #pragma omp atomic compare update capture |
3786 | { r = x == e; if(r) { x = d; } else { v = x; } } |
3787 | */ |
3788 | bool __kmpc_atomic_bool_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, |
3789 | char d, char *pv) { |
3790 | char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); |
3791 | if (old == e) |
3792 | return true; |
3793 | KMP_ASSERT(pv != NULL); |
3794 | *pv = old; |
3795 | return false; |
3796 | } |
3797 | bool __kmpc_atomic_bool_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, |
3798 | short d, short *pv) { |
3799 | short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); |
3800 | if (old == e) |
3801 | return true; |
3802 | KMP_ASSERT(pv != NULL); |
3803 | *pv = old; |
3804 | return false; |
3805 | } |
3806 | bool __kmpc_atomic_bool_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, |
3807 | kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { |
3808 | kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); |
3809 | if (old == e) |
3810 | return true; |
3811 | KMP_ASSERT(pv != NULL); |
3812 | *pv = old; |
3813 | return false; |
3814 | } |
3815 | bool __kmpc_atomic_bool_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, |
3816 | kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { |
3817 | kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); |
3818 | if (old == e) |
3819 | return true; |
3820 | KMP_ASSERT(pv != NULL); |
3821 | *pv = old; |
3822 | return false; |
3823 | } |
3824 | |
3825 | /*! |
3826 | @param loc Source code location |
3827 | @param gtid Global thread id |
3828 | @param x Memory location to operate on |
3829 | @param e Expected value |
3830 | @param d Desired value |
3831 | @param pv Captured value location |
3832 | @return Old value of x |
3833 | |
3834 | Implements Compare And Swap + Capture atomic operation. |
3835 | |
3836 | v gets new valie of x. |
3837 | Sample code: |
3838 | #pragma omp atomic compare update capture |
3839 | { if (x == e) { x = d; }; v = x; } |
3840 | */ |
3841 | char __kmpc_atomic_val_1_cas_cpt(ident_t *loc, int gtid, char *x, char e, |
3842 | char d, char *pv) { |
3843 | char old = KMP_COMPARE_AND_STORE_RET8(x, e, d); |
3844 | KMP_ASSERT(pv != NULL); |
3845 | *pv = old == e ? d : old; |
3846 | return old; |
3847 | } |
3848 | short __kmpc_atomic_val_2_cas_cpt(ident_t *loc, int gtid, short *x, short e, |
3849 | short d, short *pv) { |
3850 | short old = KMP_COMPARE_AND_STORE_RET16(x, e, d); |
3851 | KMP_ASSERT(pv != NULL); |
3852 | *pv = old == e ? d : old; |
3853 | return old; |
3854 | } |
3855 | kmp_int32 __kmpc_atomic_val_4_cas_cpt(ident_t *loc, int gtid, kmp_int32 *x, |
3856 | kmp_int32 e, kmp_int32 d, kmp_int32 *pv) { |
3857 | kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(x, e, d); |
3858 | KMP_ASSERT(pv != NULL); |
3859 | *pv = old == e ? d : old; |
3860 | return old; |
3861 | } |
3862 | kmp_int64 __kmpc_atomic_val_8_cas_cpt(ident_t *loc, int gtid, kmp_int64 *x, |
3863 | kmp_int64 e, kmp_int64 d, kmp_int64 *pv) { |
3864 | kmp_int64 old = KMP_COMPARE_AND_STORE_RET64(x, e, d); |
3865 | KMP_ASSERT(pv != NULL); |
3866 | *pv = old == e ? d : old; |
3867 | return old; |
3868 | } |
3869 | |
3870 | // End OpenMP 5.1 compare + capture |
3871 | #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 |
3872 | |
3873 | /*! |
3874 | @} |
3875 | */ |
3876 | |
3877 | // end of file |
3878 | |