kmp_collapse.h source code [openmp/runtime/src/kmp_collapse.h]

1	/*
2	* kmp_collapse.h -- header for loop collapse feature
3	*/
4
5	//===----------------------------------------------------------------------===//
6	//
7	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8	// See https://llvm.org/LICENSE.txt for license information.
9	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef KMP_COLLAPSE_H
14	#define KMP_COLLAPSE_H
15
16	#include <type_traits>
17
18	// Type of the index into the loop nest structures
19	// (with values from 0 to less than n from collapse(n))
20	typedef kmp_int32 kmp_index_t;
21
22	// Type for combined loop nest space IV:
23	typedef kmp_uint64 kmp_loop_nest_iv_t;
24
25	// Loop has <, <=, etc. as a comparison:
26	enum comparison_t : kmp_int32 {
27	comp_less_or_eq = `0`,
28	comp_greater_or_eq = `1`,
29	comp_not_eq = `2`,
30	comp_less = `3`,
31	comp_greater = `4`
32	};
33
34	// Type of loop IV.
35	// Type of bounds and step, after usual promotions
36	// are a subset of these types (32 & 64 only):
37	enum loop_type_t : kmp_int32 {
38	loop_type_uint8 = `0`,
39	loop_type_int8 = `1`,
40	loop_type_uint16 = `2`,
41	loop_type_int16 = `3`,
42	loop_type_uint32 = `4`,
43	loop_type_int32 = `5`,
44	loop_type_uint64 = `6`,
45	loop_type_int64 = `7`
46	};
47
48	// Defining loop types to handle special cases
49	enum nested_loop_type_t : kmp_int32 {
50	nested_loop_type_unkown = `0`,
51	nested_loop_type_lower_triangular_matrix = `1`,
52	nested_loop_type_upper_triangular_matrix = `2`
53	};
54
55	/!*
56	@ingroup WORK_SHARING
57	* Describes the structure for rectangular nested loops.
58	*/
59	template <typename T> struct bounds_infoXX_template {
60
61	// typedef typename traits_t<T>::unsigned_t UT;
62	typedef typename traits_t<T>::signed_t ST;
63
64	loop_type_t loop_type; // The differentiator
65	loop_type_t loop_iv_type;
66	comparison_t comparison;
67	// outer_iv should be 0 (or any other less then number of dimentions)
68	// if loop doesn't depend on it (lb1 and ub1 will be 0).
69	// This way we can do multiplication without a check.
70	kmp_index_t outer_iv;
71
72	// unions to keep the size constant:
73	union {
74	T lb0;
75	kmp_uint64 lb0_u64; // real type can be signed
76	};
77
78	union {
79	T lb1;
80	kmp_uint64 lb1_u64; // real type can be signed
81	};
82
83	union {
84	T ub0;
85	kmp_uint64 ub0_u64; // real type can be signed
86	};
87
88	union {
89	T ub1;
90	kmp_uint64 ub1_u64; // real type can be signed
91	};
92
93	union {
94	ST step; // signed even if bounds type is unsigned
95	kmp_int64 step_64; // signed
96	};
97
98	kmp_loop_nest_iv_t trip_count;
99	};
100
101	/!*
102	@ingroup WORK_SHARING
103	* Interface struct for rectangular nested loops.
104	* Same size as bounds_infoXX_template.
105	*/
106	struct bounds_info_t {
107
108	loop_type_t loop_type; // The differentiator
109	loop_type_t loop_iv_type;
110	comparison_t comparison;
111	// outer_iv should be 0 (or any other less then number of dimentions)
112	// if loop doesn't depend on it (lb1 and ub1 will be 0).
113	// This way we can do multiplication without a check.
114	kmp_index_t outer_iv;
115
116	kmp_uint64 lb0_u64; // real type can be signed
117	kmp_uint64 lb1_u64; // real type can be signed
118	kmp_uint64 ub0_u64; // real type can be signed
119	kmp_uint64 ub1_u64; // real type can be signed
120	kmp_int64 step_64; // signed
121
122	// This is internal, but it's the only internal thing we need
123	// in rectangular case, so let's expose it here:
124	kmp_loop_nest_iv_t trip_count;
125	};
126
127	//-------------------------------------------------------------------------
128	// Additional types for internal representation:
129
130	// Array for a point in the loop space, in the original space.
131	// It's represented in kmp_uint64, but each dimention is calculated in
132	// that loop IV type. Also dimentions have to be converted to those types
133	// when used in generated code.
134	typedef kmp_uint64 *kmp_point_t;
135
136	// Array: Number of loop iterations on each nesting level to achieve some point,
137	// in expanded space or in original space.
138	// OMPTODO: move from using iterations to using offsets (iterations multiplied
139	// by steps). For those we need to be careful with the types, as step can be
140	// negative, but it'll remove multiplications and divisions in several places.
141	typedef kmp_loop_nest_iv_t *kmp_iterations_t;
142
143	// Internal struct with additional info:
144	template <typename T> struct bounds_info_internalXX_template {
145
146	// OMPTODO: should span have type T or should it better be
147	// kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
148	// updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
149	// use big_span_t, if it can be resolved at compile time.
150	typedef
151	typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
152	big_span_t;
153
154	// typedef typename big_span_t span_t;
155	typedef T span_t;
156
157	bounds_infoXX_template<T> b; // possibly adjusted bounds
158
159	// Leaving this as a union in case we'll switch to span_t with different sizes
160	// (depending on T)
161	union {
162	// Smallest possible value of iv (may be smaller than actually possible)
163	span_t span_smallest;
164	kmp_uint64 span_smallest_u64;
165	};
166
167	// Leaving this as a union in case we'll switch to span_t with different sizes
168	// (depending on T)
169	union {
170	// Biggest possible value of iv (may be bigger than actually possible)
171	span_t span_biggest;
172	kmp_uint64 span_biggest_u64;
173	};
174
175	// Did we adjust loop bounds (not counting canonicalization)?
176	bool loop_bounds_adjusted;
177	};
178
179	// Internal struct with additional info:
180	struct bounds_info_internal_t {
181
182	bounds_info_t b; // possibly adjusted bounds
183
184	// Smallest possible value of iv (may be smaller than actually possible)
185	kmp_uint64 span_smallest_u64;
186
187	// Biggest possible value of iv (may be bigger than actually possible)
188	kmp_uint64 span_biggest_u64;
189
190	// Did we adjust loop bounds (not counting canonicalization)?
191	bool loop_bounds_adjusted;
192	};
193
194	//----------APIs for rectangular loop nests--------------------------------
195
196	// Canonicalize loop nest and calculate overall trip count.
197	// "bounds_nest" has to be allocated per thread.
198	// API will modify original bounds_nest array to bring it to a canonical form
199	// (only <= and >=, no !=, <, >). If the original loop nest was already in a
200	// canonical form there will be no changes to bounds in bounds_nest array
201	// (only trip counts will be calculated).
202	// Returns trip count of overall space.
203	extern "C" kmp_loop_nest_iv_t
204	__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
205	/in/out/ bounds_info_t *original_bounds_nest,
206	kmp_index_t n);
207
208	// Calculate old induction variables corresponding to overall new_iv.
209	// Note: original IV will be returned as if it had kmp_uint64 type,
210	// will have to be converted to original type in user code.
211	// Note: trip counts should be already calculated by
212	// __kmpc_process_loop_nest_rectang.
213	// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
214	// that into user code.
215	extern "C" void
216	__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
217	const bounds_info_t *original_bounds_nest,
218	/out/ kmp_uint64 *original_ivs,
219	kmp_index_t n);
220
221	//----------Init API for non-rectangular loops--------------------------------
222
223	// Init API for collapsed loops (static, no chunks defined).
224	// "bounds_nest" has to be allocated per thread.
225	// API will modify original bounds_nest array to bring it to a canonical form
226	// (only <= and >=, no !=, <, >). If the original loop nest was already in a
227	// canonical form there will be no changes to bounds in bounds_nest array
228	// (only trip counts will be calculated). Internally API will expand the space
229	// to parallelogram/parallelepiped, calculate total, calculate bounds for the
230	// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
231	// important on the left side, to hit the lower bounds and not step over), and
232	// pick the correct chunk for this thread (so it will calculate chunks up to the
233	// needed one). It could be optimized to calculate just this chunk, potentially
234	// a bit less well distributed among threads. It is designed to make sure that
235	// threads will receive predictable chunks, deterministically (so that next nest
236	// of loops with similar characteristics will get exactly same chunks on same
237	// threads).
238	// Current contract: chunk_bounds_nest has only lb0 and ub0,
239	// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
240	extern "C" kmp_int32
241	__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
242	/in/out/ bounds_info_t *original_bounds_nest,
243	/out/ bounds_info_t *chunk_bounds_nest,
244	kmp_index_t n,
245	/out/ kmp_int32 *plastiter);
246
247	#endif // KMP_COLLAPSE_H
248

source code of openmp/runtime/src/kmp_collapse.h