Warning: This file is not a C or C++ file. It does not have highlighting.

1//===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//
10//===----------------------------------------------------------------------===//
11
12#ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
13#define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
14
15#include "DeviceTypes.h"
16#include "DeviceUtils.h"
17
18namespace ompx {
19namespace atomic {
20
21enum OrderingTy {
22 relaxed = __ATOMIC_RELAXED,
23 acquire = __ATOMIC_ACQUIRE,
24 release = __ATOMIC_RELEASE,
25 acq_rel = __ATOMIC_ACQ_REL,
26 seq_cst = __ATOMIC_SEQ_CST,
27};
28
29enum MemScopeTy {
30 system = __MEMORY_SCOPE_SYSTEM,
31 device = __MEMORY_SCOPE_DEVICE,
32 workgroup = __MEMORY_SCOPE_WRKGRP,
33 wavefront = __MEMORY_SCOPE_WVFRNT,
34 single = __MEMORY_SCOPE_SINGLE,
35};
36
37/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
38uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
39 MemScopeTy MemScope = MemScopeTy::device);
40
41/// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
42/// result is stored in \p *Addr;
43/// {
44
45template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
46bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc,
47 atomic::OrderingTy OrderingFail,
48 MemScopeTy MemScope = MemScopeTy::device) {
49 return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
50 OrderingSucc, OrderingFail, MemScope);
51}
52
53template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
54V add(Ty *Address, V Val, atomic::OrderingTy Ordering,
55 MemScopeTy MemScope = MemScopeTy::device) {
56 return __scoped_atomic_fetch_add(Address, Val, Ordering, MemScope);
57}
58
59template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
60V load(Ty *Address, atomic::OrderingTy Ordering,
61 MemScopeTy MemScope = MemScopeTy::device) {
62#ifdef __NVPTX__
63 return __scoped_atomic_fetch_add(Address, V(0), Ordering, MemScope);
64#else
65 return __scoped_atomic_load_n(Address, Ordering, MemScope);
66#endif
67}
68
69template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
70void store(Ty *Address, V Val, atomic::OrderingTy Ordering,
71 MemScopeTy MemScope = MemScopeTy::device) {
72 __scoped_atomic_store_n(Address, Val, Ordering, MemScope);
73}
74
75template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
76V mul(Ty *Address, V Val, atomic::OrderingTy Ordering,
77 MemScopeTy MemScope = MemScopeTy::device) {
78 Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
79 bool Success;
80 do {
81 TypedCurrentVal = atomic::load(Address, Ordering);
82 TypedNewVal = TypedCurrentVal * Val;
83 Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering,
84 atomic::relaxed, MemScope);
85 } while (!Success);
86 return TypedResultVal;
87}
88
89template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
90utils::enable_if_t<!utils::is_floating_point_v<V>, V>
91max(Ty *Address, V Val, atomic::OrderingTy Ordering,
92 MemScopeTy MemScope = MemScopeTy::device) {
93 return __scoped_atomic_fetch_max(Address, Val, Ordering, MemScope);
94}
95
96template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
97utils::enable_if_t<utils::is_same_v<V, float>, V>
98max(Ty *Address, V Val, atomic::OrderingTy Ordering,
99 MemScopeTy MemScope = MemScopeTy::device) {
100 if (Val >= 0)
101 return utils::bitCast<float>(max(
102 (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
103 return utils::bitCast<float>(min(
104 (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
105}
106
107template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
108utils::enable_if_t<utils::is_same_v<V, double>, V>
109max(Ty *Address, V Val, atomic::OrderingTy Ordering,
110 MemScopeTy MemScope = MemScopeTy::device) {
111 if (Val >= 0)
112 return utils::bitCast<double>(max(
113 (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
114 return utils::bitCast<double>(min(
115 (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
116}
117
118template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
119utils::enable_if_t<!utils::is_floating_point_v<V>, V>
120min(Ty *Address, V Val, atomic::OrderingTy Ordering,
121 MemScopeTy MemScope = MemScopeTy::device) {
122 return __scoped_atomic_fetch_min(Address, Val, Ordering, MemScope);
123}
124
125// TODO: Implement this with __atomic_fetch_max and remove the duplication.
126template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
127utils::enable_if_t<utils::is_same_v<V, float>, V>
128min(Ty *Address, V Val, atomic::OrderingTy Ordering,
129 MemScopeTy MemScope = MemScopeTy::device) {
130 if (Val >= 0)
131 return utils::bitCast<float>(min(
132 (int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
133 return utils::bitCast<float>(max(
134 (uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
135}
136
137// TODO: Implement this with __atomic_fetch_max and remove the duplication.
138template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
139utils::enable_if_t<utils::is_same_v<V, double>, V>
140min(Ty *Address, utils::remove_addrspace_t<Ty> Val, atomic::OrderingTy Ordering,
141 MemScopeTy MemScope = MemScopeTy::device) {
142 if (Val >= 0)
143 return utils::bitCast<double>(min(
144 (int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
145 return utils::bitCast<double>(max(
146 (uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
147}
148
149template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
150V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering,
151 MemScopeTy MemScope = MemScopeTy::device) {
152 return __scoped_atomic_fetch_or(Address, Val, Ordering, MemScope);
153}
154
155template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
156V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering,
157 MemScopeTy MemScope = MemScopeTy::device) {
158 return __scoped_atomic_fetch_and(Address, Val, Ordering, MemScope);
159}
160
161template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
162V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering,
163 MemScopeTy MemScope = MemScopeTy::device) {
164 return __scoped_atomic_fetch_xor(Address, Val, Ordering, MemScope);
165}
166
167static inline uint32_t
168atomicExchange(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
169 MemScopeTy MemScope = MemScopeTy::device) {
170 uint32_t R;
171 __scoped_atomic_exchange(Address, &Val, &R, Ordering, MemScope);
172 return R;
173}
174
175///}
176
177} // namespace atomic
178
179namespace synchronize {
180
181/// Initialize the synchronization machinery. Must be called by all threads.
182void init(bool IsSPMD);
183
184/// Synchronize all threads in a warp identified by \p Mask.
185void warp(LaneMaskTy Mask);
186
187/// Synchronize all threads in a block and perform a fence before and after the
188/// barrier according to \p Ordering. Note that the fence might be part of the
189/// barrier.
190void threads(atomic::OrderingTy Ordering);
191
192/// Synchronizing threads is allowed even if they all hit different instances of
193/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
194/// restrictive in that it requires all threads to hit the same instance. The
195/// noinline is removed by the openmp-opt pass and helps to preserve the
196/// information till then.
197///{
198
199/// Synchronize all threads in a block, they are reaching the same instruction
200/// (hence all threads in the block are "aligned"). Also perform a fence before
201/// and after the barrier according to \p Ordering. Note that the
202/// fence might be part of the barrier if the target offers this.
203[[gnu::noinline, omp::assume("ompx_aligned_barrier")]] void
204threadsAligned(atomic::OrderingTy Ordering);
205
206///}
207
208} // namespace synchronize
209
210namespace fence {
211
212/// Memory fence with \p Ordering semantics for the team.
213void team(atomic::OrderingTy Ordering);
214
215/// Memory fence with \p Ordering semantics for the contention group.
216void kernel(atomic::OrderingTy Ordering);
217
218/// Memory fence with \p Ordering semantics for the system.
219void system(atomic::OrderingTy Ordering);
220
221} // namespace fence
222
223} // namespace ompx
224
225#endif
226

Warning: This file is not a C or C++ file. It does not have highlighting.

Provided by KDAB

Privacy Policy
Improve your Profiling and Debugging skills
Find out more

source code of offload/DeviceRTL/include/Synchronization.h