1 | /* |
2 | * kmp_collapse.h -- header for loop collapse feature |
3 | */ |
4 | |
5 | //===----------------------------------------------------------------------===// |
6 | // |
7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
8 | // See https://llvm.org/LICENSE.txt for license information. |
9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef KMP_COLLAPSE_H |
14 | #define KMP_COLLAPSE_H |
15 | |
16 | #include <type_traits> |
17 | |
18 | // Type of the index into the loop nest structures |
19 | // (with values from 0 to less than n from collapse(n)) |
20 | typedef kmp_int32 kmp_index_t; |
21 | |
22 | // Type for combined loop nest space IV: |
23 | typedef kmp_uint64 kmp_loop_nest_iv_t; |
24 | |
25 | // Loop has <, <=, etc. as a comparison: |
26 | enum comparison_t : kmp_int32 { |
27 | comp_less_or_eq = 0, |
28 | comp_greater_or_eq = 1, |
29 | comp_not_eq = 2, |
30 | comp_less = 3, |
31 | comp_greater = 4 |
32 | }; |
33 | |
34 | // Type of loop IV. |
35 | // Type of bounds and step, after usual promotions |
36 | // are a subset of these types (32 & 64 only): |
37 | enum loop_type_t : kmp_int32 { |
38 | loop_type_uint8 = 0, |
39 | loop_type_int8 = 1, |
40 | loop_type_uint16 = 2, |
41 | loop_type_int16 = 3, |
42 | loop_type_uint32 = 4, |
43 | loop_type_int32 = 5, |
44 | loop_type_uint64 = 6, |
45 | loop_type_int64 = 7 |
46 | }; |
47 | |
48 | // Defining loop types to handle special cases |
49 | enum nested_loop_type_t : kmp_int32 { |
50 | nested_loop_type_unkown = 0, |
51 | nested_loop_type_lower_triangular_matrix = 1, |
52 | nested_loop_type_upper_triangular_matrix = 2 |
53 | }; |
54 | |
55 | /*! |
56 | @ingroup WORK_SHARING |
57 | * Describes the structure for rectangular nested loops. |
58 | */ |
59 | template <typename T> struct bounds_infoXX_template { |
60 | |
61 | // typedef typename traits_t<T>::unsigned_t UT; |
62 | typedef typename traits_t<T>::signed_t ST; |
63 | |
64 | loop_type_t loop_type; // The differentiator |
65 | loop_type_t loop_iv_type; |
66 | comparison_t comparison; |
67 | // outer_iv should be 0 (or any other less then number of dimentions) |
68 | // if loop doesn't depend on it (lb1 and ub1 will be 0). |
69 | // This way we can do multiplication without a check. |
70 | kmp_index_t outer_iv; |
71 | |
72 | // unions to keep the size constant: |
73 | union { |
74 | T lb0; |
75 | kmp_uint64 lb0_u64; // real type can be signed |
76 | }; |
77 | |
78 | union { |
79 | T lb1; |
80 | kmp_uint64 lb1_u64; // real type can be signed |
81 | }; |
82 | |
83 | union { |
84 | T ub0; |
85 | kmp_uint64 ub0_u64; // real type can be signed |
86 | }; |
87 | |
88 | union { |
89 | T ub1; |
90 | kmp_uint64 ub1_u64; // real type can be signed |
91 | }; |
92 | |
93 | union { |
94 | ST step; // signed even if bounds type is unsigned |
95 | kmp_int64 step_64; // signed |
96 | }; |
97 | |
98 | kmp_loop_nest_iv_t trip_count; |
99 | }; |
100 | |
101 | /*! |
102 | @ingroup WORK_SHARING |
103 | * Interface struct for rectangular nested loops. |
104 | * Same size as bounds_infoXX_template. |
105 | */ |
106 | struct bounds_info_t { |
107 | |
108 | loop_type_t loop_type; // The differentiator |
109 | loop_type_t loop_iv_type; |
110 | comparison_t comparison; |
111 | // outer_iv should be 0 (or any other less then number of dimentions) |
112 | // if loop doesn't depend on it (lb1 and ub1 will be 0). |
113 | // This way we can do multiplication without a check. |
114 | kmp_index_t outer_iv; |
115 | |
116 | kmp_uint64 lb0_u64; // real type can be signed |
117 | kmp_uint64 lb1_u64; // real type can be signed |
118 | kmp_uint64 ub0_u64; // real type can be signed |
119 | kmp_uint64 ub1_u64; // real type can be signed |
120 | kmp_int64 step_64; // signed |
121 | |
122 | // This is internal, but it's the only internal thing we need |
123 | // in rectangular case, so let's expose it here: |
124 | kmp_loop_nest_iv_t trip_count; |
125 | }; |
126 | |
127 | //------------------------------------------------------------------------- |
128 | // Additional types for internal representation: |
129 | |
130 | // Array for a point in the loop space, in the original space. |
131 | // It's represented in kmp_uint64, but each dimention is calculated in |
132 | // that loop IV type. Also dimentions have to be converted to those types |
133 | // when used in generated code. |
134 | typedef kmp_uint64 *kmp_point_t; |
135 | |
136 | // Array: Number of loop iterations on each nesting level to achieve some point, |
137 | // in expanded space or in original space. |
138 | // OMPTODO: move from using iterations to using offsets (iterations multiplied |
139 | // by steps). For those we need to be careful with the types, as step can be |
140 | // negative, but it'll remove multiplications and divisions in several places. |
141 | typedef kmp_loop_nest_iv_t *kmp_iterations_t; |
142 | |
143 | // Internal struct with additional info: |
144 | template <typename T> struct bounds_info_internalXX_template { |
145 | |
146 | // OMPTODO: should span have type T or should it better be |
147 | // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than |
148 | // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to |
149 | // use big_span_t, if it can be resolved at compile time. |
150 | typedef |
151 | typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64> |
152 | big_span_t; |
153 | |
154 | // typedef typename big_span_t span_t; |
155 | typedef T span_t; |
156 | |
157 | bounds_infoXX_template<T> b; // possibly adjusted bounds |
158 | |
159 | // Leaving this as a union in case we'll switch to span_t with different sizes |
160 | // (depending on T) |
161 | union { |
162 | // Smallest possible value of iv (may be smaller than actually possible) |
163 | span_t span_smallest; |
164 | kmp_uint64 span_smallest_u64; |
165 | }; |
166 | |
167 | // Leaving this as a union in case we'll switch to span_t with different sizes |
168 | // (depending on T) |
169 | union { |
170 | // Biggest possible value of iv (may be bigger than actually possible) |
171 | span_t span_biggest; |
172 | kmp_uint64 span_biggest_u64; |
173 | }; |
174 | |
175 | // Did we adjust loop bounds (not counting canonicalization)? |
176 | bool loop_bounds_adjusted; |
177 | }; |
178 | |
179 | // Internal struct with additional info: |
180 | struct bounds_info_internal_t { |
181 | |
182 | bounds_info_t b; // possibly adjusted bounds |
183 | |
184 | // Smallest possible value of iv (may be smaller than actually possible) |
185 | kmp_uint64 span_smallest_u64; |
186 | |
187 | // Biggest possible value of iv (may be bigger than actually possible) |
188 | kmp_uint64 span_biggest_u64; |
189 | |
190 | // Did we adjust loop bounds (not counting canonicalization)? |
191 | bool loop_bounds_adjusted; |
192 | }; |
193 | |
194 | //----------APIs for rectangular loop nests-------------------------------- |
195 | |
196 | // Canonicalize loop nest and calculate overall trip count. |
197 | // "bounds_nest" has to be allocated per thread. |
198 | // API will modify original bounds_nest array to bring it to a canonical form |
199 | // (only <= and >=, no !=, <, >). If the original loop nest was already in a |
200 | // canonical form there will be no changes to bounds in bounds_nest array |
201 | // (only trip counts will be calculated). |
202 | // Returns trip count of overall space. |
203 | extern "C" kmp_loop_nest_iv_t |
204 | __kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid, |
205 | /*in/out*/ bounds_info_t *original_bounds_nest, |
206 | kmp_index_t n); |
207 | |
208 | // Calculate old induction variables corresponding to overall new_iv. |
209 | // Note: original IV will be returned as if it had kmp_uint64 type, |
210 | // will have to be converted to original type in user code. |
211 | // Note: trip counts should be already calculated by |
212 | // __kmpc_process_loop_nest_rectang. |
213 | // OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline |
214 | // that into user code. |
215 | extern "C" void |
216 | __kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv, |
217 | const bounds_info_t *original_bounds_nest, |
218 | /*out*/ kmp_uint64 *original_ivs, |
219 | kmp_index_t n); |
220 | |
221 | //----------Init API for non-rectangular loops-------------------------------- |
222 | |
223 | // Init API for collapsed loops (static, no chunks defined). |
224 | // "bounds_nest" has to be allocated per thread. |
225 | // API will modify original bounds_nest array to bring it to a canonical form |
226 | // (only <= and >=, no !=, <, >). If the original loop nest was already in a |
227 | // canonical form there will be no changes to bounds in bounds_nest array |
228 | // (only trip counts will be calculated). Internally API will expand the space |
229 | // to parallelogram/parallelepiped, calculate total, calculate bounds for the |
230 | // chunks in terms of the new IV, re-calc them in terms of old IVs (especially |
231 | // important on the left side, to hit the lower bounds and not step over), and |
232 | // pick the correct chunk for this thread (so it will calculate chunks up to the |
233 | // needed one). It could be optimized to calculate just this chunk, potentially |
234 | // a bit less well distributed among threads. It is designed to make sure that |
235 | // threads will receive predictable chunks, deterministically (so that next nest |
236 | // of loops with similar characteristics will get exactly same chunks on same |
237 | // threads). |
238 | // Current contract: chunk_bounds_nest has only lb0 and ub0, |
239 | // lb1 and ub1 are set to 0 and can be ignored. (This may change in the future). |
240 | extern "C" kmp_int32 |
241 | __kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid, |
242 | /*in/out*/ bounds_info_t *original_bounds_nest, |
243 | /*out*/ bounds_info_t *chunk_bounds_nest, |
244 | kmp_index_t n, |
245 | /*out*/ kmp_int32 *plastiter); |
246 | |
247 | #endif // KMP_COLLAPSE_H |
248 | |