kmp_barrier.h source code [openmp/runtime/src/kmp_barrier.h]

1	/*
2	* kmp_barrier.h
3	*/
4
5	//===----------------------------------------------------------------------===//
6	//
7	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8	// See https://llvm.org/LICENSE.txt for license information.
9	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef KMP_BARRIER_H
14	#define KMP_BARRIER_H
15
16	#include "kmp.h"
17	#include "kmp_i18n.h"
18
19	#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20	#include <xmmintrin.h>
21	#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22	#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23	#elif KMP_HAVE_ALIGNED_ALLOC
24	#define KMP_ALGIN_UP(val, alignment) \
25	(((val) + (alignment)-1) / (alignment) * (alignment))
26	#define KMP_ALIGNED_ALLOCATE(size, alignment) \
27	aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28	#define KMP_ALIGNED_FREE(ptr) free(ptr)
29	#elif KMP_HAVE_POSIX_MEMALIGN
30	static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
31	void *ptr;
32	int n = posix_memalign(&ptr, alignment, size);
33	if (n != `0`) {
34	if (ptr)
35	free(ptr);
36	return nullptr;
37	}
38	return ptr;
39	}
40	#define KMP_ALIGNED_FREE(ptr) free(ptr)
41	#elif KMP_HAVE__ALIGNED_MALLOC
42	#include <malloc.h>
43	#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44	#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
45	#else
46	#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47	#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
48	#endif
49
50	// Use four cache lines: MLC tends to prefetch the next or previous cache line
51	// creating a possible fake conflict between cores, so this is the only way to
52	// guarantee that no such prefetch can happen.
53	#ifndef KMP_FOURLINE_ALIGN_CACHE
54	#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
55	#endif
56
57	#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
58
59	class distributedBarrier {
60	struct flags_s {
61	kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
62	};
63
64	struct go_s {
65	std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
66	};
67
68	struct iter_s {
69	kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
70	};
71
72	struct sleep_s {
73	std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
74	};
75
76	void init(size_t nthr);
77	void resize(size_t nthr);
78	void computeGo(size_t n);
79	void computeVarsForN(size_t n);
80
81	public:
82	enum {
83	MAX_ITERS = `3`,
84	MAX_GOS = `8`,
85	IDEAL_GOS = `4`,
86	IDEAL_CONTENTION = `16`,
87	};
88
89	flags_s *flags[MAX_ITERS];
90	go_s *go;
91	iter_s *iter;
92	sleep_s *sleep;
93
94	size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
95	size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
96	// number of go signals each requiring one write per iteration
97	size_t KMP_ALIGN_CACHE num_gos;
98	// number of groups of gos
99	size_t KMP_ALIGN_CACHE num_groups;
100	// threads per go signal
101	size_t KMP_ALIGN_CACHE threads_per_go;
102	bool KMP_ALIGN_CACHE fix_threads_per_go;
103	// threads per group
104	size_t KMP_ALIGN_CACHE threads_per_group;
105	// number of go signals in a group
106	size_t KMP_ALIGN_CACHE gos_per_group;
107	void *team_icvs;
108
109	distributedBarrier() = delete;
110	~distributedBarrier() = delete;
111
112	// Used instead of constructor to create aligned data
113	static distributedBarrier allocate(int* nThreads) {
114	distributedBarrier d = (distributedBarrier )KMP_ALIGNED_ALLOCATE(
115	sizeof(distributedBarrier), `4` * CACHE_LINE);
116	if (!d) {
117	KMP_FATAL(MemoryAllocFailed);
118	}
119	d->num_threads = `0`;
120	d->max_threads = `0`;
121	for (int i = `0`; i < MAX_ITERS; ++i)
122	d->flags[i] = NULL;
123	d->go = NULL;
124	d->iter = NULL;
125	d->sleep = NULL;
126	d->team_icvs = NULL;
127	d->fix_threads_per_go = false;
128	// calculate gos and groups ONCE on base size
129	d->computeGo(n: nThreads);
130	d->init(nthr: nThreads);
131	return d;
132	}
133
134	static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
135
136	void update_num_threads(size_t nthr) { init(nthr); }
137
138	bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
139	size_t get_num_threads() { return num_threads; }
140	kmp_uint64 go_release();
141	void go_reset();
142	};
143
144	#endif // KMP_BARRIER_H
145

source code of openmp/runtime/src/kmp_barrier.h