Synchronization.h source code [offload/DeviceRTL/include/Synchronization.h]

Warning: This file is not a C or C++ file. It does not have highlighting.

1	//===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	//
10	//===----------------------------------------------------------------------===//
11
12	#ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
13	#define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
14
15	#include "DeviceTypes.h"
16	#include "DeviceUtils.h"
17
18	namespace ompx {
19	namespace atomic {
20
21	enum OrderingTy {
22	relaxed = __ATOMIC_RELAXED,
23	acquire = __ATOMIC_ACQUIRE,
24	release = __ATOMIC_RELEASE,
25	acq_rel = __ATOMIC_ACQ_REL,
26	seq_cst = __ATOMIC_SEQ_CST,
27	};
28
29	enum MemScopeTy {
30	system = __MEMORY_SCOPE_SYSTEM,
31	device = __MEMORY_SCOPE_DEVICE,
32	workgroup = __MEMORY_SCOPE_WRKGRP,
33	wavefront = __MEMORY_SCOPE_WVFRNT,
34	single = __MEMORY_SCOPE_SINGLE,
35	};
36
37	/// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
38	uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
39	MemScopeTy MemScope = MemScopeTy::device);
40
41	/// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
42	/// result is stored in \p *Addr;
43	/// {
44
45	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
46	bool cas(Ty *Address, V ExpectedV, V DesiredV, atomic::OrderingTy OrderingSucc,
47	atomic::OrderingTy OrderingFail,
48	MemScopeTy MemScope = MemScopeTy::device) {
49	return __scoped_atomic_compare_exchange(Address, &ExpectedV, &DesiredV, false,
50	OrderingSucc, OrderingFail, MemScope);
51	}
52
53	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
54	V add(Ty *Address, V Val, atomic::OrderingTy Ordering,
55	MemScopeTy MemScope = MemScopeTy::device) {
56	return __scoped_atomic_fetch_add(Address, Val, Ordering, MemScope);
57	}
58
59	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
60	V load(Ty *Address, atomic::OrderingTy Ordering,
61	MemScopeTy MemScope = MemScopeTy::device) {
62	#ifdef __NVPTX__
63	return __scoped_atomic_fetch_add(Address, V(0), Ordering, MemScope);
64	#else
65	return __scoped_atomic_load_n(Address, Ordering, MemScope);
66	#endif
67	}
68
69	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
70	void store(Ty *Address, V Val, atomic::OrderingTy Ordering,
71	MemScopeTy MemScope = MemScopeTy::device) {
72	__scoped_atomic_store_n(Address, Val, Ordering, MemScope);
73	}
74
75	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
76	V mul(Ty *Address, V Val, atomic::OrderingTy Ordering,
77	MemScopeTy MemScope = MemScopeTy::device) {
78	Ty TypedCurrentVal, TypedResultVal, TypedNewVal;
79	bool Success;
80	do {
81	TypedCurrentVal = atomic::load(Address, Ordering);
82	TypedNewVal = TypedCurrentVal * Val;
83	Success = atomic::cas(Address, TypedCurrentVal, TypedNewVal, Ordering,
84	atomic::relaxed, MemScope);
85	} while (!Success);
86	return TypedResultVal;
87	}
88
89	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
90	utils::enable_if_t<!utils::is_floating_point_v<V>, V>
91	max(Ty *Address, V Val, atomic::OrderingTy Ordering,
92	MemScopeTy MemScope = MemScopeTy::device) {
93	return __scoped_atomic_fetch_max(Address, Val, Ordering, MemScope);
94	}
95
96	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
97	utils::enable_if_t<utils::is_same_v<V, float>, V>
98	max(Ty *Address, V Val, atomic::OrderingTy Ordering,
99	MemScopeTy MemScope = MemScopeTy::device) {
100	if (Val >= 0)
101	return utils::bitCast<float>(max(
102	(int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
103	return utils::bitCast<float>(min(
104	(uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
105	}
106
107	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
108	utils::enable_if_t<utils::is_same_v<V, double>, V>
109	max(Ty *Address, V Val, atomic::OrderingTy Ordering,
110	MemScopeTy MemScope = MemScopeTy::device) {
111	if (Val >= 0)
112	return utils::bitCast<double>(max(
113	(int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
114	return utils::bitCast<double>(min(
115	(uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
116	}
117
118	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
119	utils::enable_if_t<!utils::is_floating_point_v<V>, V>
120	min(Ty *Address, V Val, atomic::OrderingTy Ordering,
121	MemScopeTy MemScope = MemScopeTy::device) {
122	return __scoped_atomic_fetch_min(Address, Val, Ordering, MemScope);
123	}
124
125	// TODO: Implement this with __atomic_fetch_max and remove the duplication.
126	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
127	utils::enable_if_t<utils::is_same_v<V, float>, V>
128	min(Ty *Address, V Val, atomic::OrderingTy Ordering,
129	MemScopeTy MemScope = MemScopeTy::device) {
130	if (Val >= 0)
131	return utils::bitCast<float>(min(
132	(int32_t *)Address, utils::bitCast<int32_t>(Val), Ordering, MemScope));
133	return utils::bitCast<float>(max(
134	(uint32_t *)Address, utils::bitCast<uint32_t>(Val), Ordering, MemScope));
135	}
136
137	// TODO: Implement this with __atomic_fetch_max and remove the duplication.
138	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
139	utils::enable_if_t<utils::is_same_v<V, double>, V>
140	min(Ty *Address, utils::remove_addrspace_t<Ty> Val, atomic::OrderingTy Ordering,
141	MemScopeTy MemScope = MemScopeTy::device) {
142	if (Val >= 0)
143	return utils::bitCast<double>(min(
144	(int64_t *)Address, utils::bitCast<int64_t>(Val), Ordering, MemScope));
145	return utils::bitCast<double>(max(
146	(uint64_t *)Address, utils::bitCast<uint64_t>(Val), Ordering, MemScope));
147	}
148
149	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
150	V bit_or(Ty *Address, V Val, atomic::OrderingTy Ordering,
151	MemScopeTy MemScope = MemScopeTy::device) {
152	return __scoped_atomic_fetch_or(Address, Val, Ordering, MemScope);
153	}
154
155	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
156	V bit_and(Ty *Address, V Val, atomic::OrderingTy Ordering,
157	MemScopeTy MemScope = MemScopeTy::device) {
158	return __scoped_atomic_fetch_and(Address, Val, Ordering, MemScope);
159	}
160
161	template <typename Ty, typename V = utils::remove_addrspace_t<Ty>>
162	V bit_xor(Ty *Address, V Val, atomic::OrderingTy Ordering,
163	MemScopeTy MemScope = MemScopeTy::device) {
164	return __scoped_atomic_fetch_xor(Address, Val, Ordering, MemScope);
165	}
166
167	static inline uint32_t
168	atomicExchange(uint32_t *Address, uint32_t Val, atomic::OrderingTy Ordering,
169	MemScopeTy MemScope = MemScopeTy::device) {
170	uint32_t R;
171	__scoped_atomic_exchange(Address, &Val, &R, Ordering, MemScope);
172	return R;
173	}
174
175	///}
176
177	} // namespace atomic
178
179	namespace synchronize {
180
181	/// Initialize the synchronization machinery. Must be called by all threads.
182	void init(bool IsSPMD);
183
184	/// Synchronize all threads in a warp identified by \p Mask.
185	void warp(LaneMaskTy Mask);
186
187	/// Synchronize all threads in a block and perform a fence before and after the
188	/// barrier according to \p Ordering. Note that the fence might be part of the
189	/// barrier.
190	void threads(atomic::OrderingTy Ordering);
191
192	/// Synchronizing threads is allowed even if they all hit different instances of
193	/// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
194	/// restrictive in that it requires all threads to hit the same instance. The
195	/// noinline is removed by the openmp-opt pass and helps to preserve the
196	/// information till then.
197	///{
198
199	/// Synchronize all threads in a block, they are reaching the same instruction
200	/// (hence all threads in the block are "aligned"). Also perform a fence before
201	/// and after the barrier according to \p Ordering. Note that the
202	/// fence might be part of the barrier if the target offers this.
203	[[gnu::noinline, omp::assume("ompx_aligned_barrier")]] void
204	threadsAligned(atomic::OrderingTy Ordering);
205
206	///}
207
208	} // namespace synchronize
209
210	namespace fence {
211
212	/// Memory fence with \p Ordering semantics for the team.
213	void team(atomic::OrderingTy Ordering);
214
215	/// Memory fence with \p Ordering semantics for the contention group.
216	void kernel(atomic::OrderingTy Ordering);
217
218	/// Memory fence with \p Ordering semantics for the system.
219	void system(atomic::OrderingTy Ordering);
220
221	} // namespace fence
222
223	} // namespace ompx
224
225	#endif
226

Warning: This file is not a C or C++ file. It does not have highlighting.

source code of offload/DeviceRTL/include/Synchronization.h