1 | //===-- lib/runtime/reduction.cpp -------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Implements ALL, ANY, COUNT, IALL, IANY, IPARITY, & PARITY for all required |
10 | // operand types and shapes. |
11 | // |
12 | // DOT_PRODUCT, FINDLOC, MATMUL, SUM, and PRODUCT are in their own eponymous |
13 | // source files. |
14 | // NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp. |
15 | |
16 | #include "flang/Runtime/reduction.h" |
17 | #include "flang-rt/runtime/descriptor.h" |
18 | #include "flang-rt/runtime/reduction-templates.h" |
19 | #include <cinttypes> |
20 | |
21 | namespace Fortran::runtime { |
22 | |
23 | // IALL, IANY, IPARITY |
24 | |
25 | template <typename INTERMEDIATE> class IntegerAndAccumulator { |
26 | public: |
27 | explicit RT_API_ATTRS IntegerAndAccumulator(const Descriptor &array) |
28 | : array_{array} {} |
29 | RT_API_ATTRS void Reinitialize() { and_ = ~INTERMEDIATE{0}; } |
30 | template <typename A> |
31 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
32 | *p = static_cast<A>(and_); |
33 | } |
34 | template <typename A> |
35 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
36 | and_ &= *array_.Element<A>(at); |
37 | return true; |
38 | } |
39 | |
40 | private: |
41 | const Descriptor &array_; |
42 | INTERMEDIATE and_{~INTERMEDIATE{0}}; |
43 | }; |
44 | |
45 | template <typename INTERMEDIATE> class IntegerOrAccumulator { |
46 | public: |
47 | explicit RT_API_ATTRS IntegerOrAccumulator(const Descriptor &array) |
48 | : array_{array} {} |
49 | RT_API_ATTRS void Reinitialize() { or_ = 0; } |
50 | template <typename A> |
51 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
52 | *p = static_cast<A>(or_); |
53 | } |
54 | template <typename A> |
55 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
56 | or_ |= *array_.Element<A>(at); |
57 | return true; |
58 | } |
59 | |
60 | private: |
61 | const Descriptor &array_; |
62 | INTERMEDIATE or_{0}; |
63 | }; |
64 | |
65 | template <typename INTERMEDIATE> class IntegerXorAccumulator { |
66 | public: |
67 | explicit RT_API_ATTRS IntegerXorAccumulator(const Descriptor &array) |
68 | : array_{array} {} |
69 | RT_API_ATTRS void Reinitialize() { xor_ = 0; } |
70 | template <typename A> |
71 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
72 | *p = static_cast<A>(xor_); |
73 | } |
74 | template <typename A> |
75 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
76 | xor_ ^= *array_.Element<A>(at); |
77 | return true; |
78 | } |
79 | |
80 | private: |
81 | const Descriptor &array_; |
82 | INTERMEDIATE xor_{0}; |
83 | }; |
84 | |
85 | extern "C" { |
86 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAll1)(const Descriptor &x, |
87 | const char *source, int line, int dim, const Descriptor *mask) { |
88 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
89 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
90 | /*allowUnsignedForInteger=*/true); |
91 | } |
92 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAll2)(const Descriptor &x, |
93 | const char *source, int line, int dim, const Descriptor *mask) { |
94 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
95 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
96 | /*allowUnsignedForInteger=*/true); |
97 | } |
98 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAll4)(const Descriptor &x, |
99 | const char *source, int line, int dim, const Descriptor *mask) { |
100 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
101 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
102 | /*allowUnsignedForInteger=*/true); |
103 | } |
104 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAll8)(const Descriptor &x, |
105 | const char *source, int line, int dim, const Descriptor *mask) { |
106 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
107 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IALL" , |
108 | /*allowUnsignedForInteger=*/true); |
109 | } |
110 | #ifdef __SIZEOF_INT128__ |
111 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAll16)(const Descriptor &x, |
112 | const char *source, int line, int dim, const Descriptor *mask) { |
113 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
114 | mask, IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
115 | "IALL" , /*allowUnsignedForInteger=*/true); |
116 | } |
117 | #endif |
118 | void RTDEF(IAllDim)(Descriptor &result, const Descriptor &x, int dim, |
119 | const char *source, int line, const Descriptor *mask) { |
120 | Terminator terminator{source, line}; |
121 | auto catKind{x.type().GetCategoryAndKind()}; |
122 | RUNTIME_CHECK(terminator, |
123 | catKind.has_value() && |
124 | (catKind->first == TypeCategory::Integer || |
125 | catKind->first == TypeCategory::Unsigned)); |
126 | PartialIntegerReduction<IntegerAndAccumulator>( |
127 | result, x, dim, catKind->second, mask, "IALL" , terminator); |
128 | } |
129 | |
130 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAny1)(const Descriptor &x, |
131 | const char *source, int line, int dim, const Descriptor *mask) { |
132 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
133 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
134 | /*allowUnsignedForInteger=*/true); |
135 | } |
136 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAny2)(const Descriptor &x, |
137 | const char *source, int line, int dim, const Descriptor *mask) { |
138 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
139 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
140 | /*allowUnsignedForInteger=*/true); |
141 | } |
142 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAny4)(const Descriptor &x, |
143 | const char *source, int line, int dim, const Descriptor *mask) { |
144 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
145 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
146 | /*allowUnsignedForInteger=*/true); |
147 | } |
148 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAny8)(const Descriptor &x, |
149 | const char *source, int line, int dim, const Descriptor *mask) { |
150 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
151 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IANY" , |
152 | /*allowUnsignedForInteger=*/true); |
153 | } |
154 | #ifdef __SIZEOF_INT128__ |
155 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAny16)(const Descriptor &x, |
156 | const char *source, int line, int dim, const Descriptor *mask) { |
157 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
158 | mask, IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
159 | "IANY" , /*allowUnsignedForInteger=*/true); |
160 | } |
161 | #endif |
162 | void RTDEF(IAnyDim)(Descriptor &result, const Descriptor &x, int dim, |
163 | const char *source, int line, const Descriptor *mask) { |
164 | Terminator terminator{source, line}; |
165 | auto catKind{x.type().GetCategoryAndKind()}; |
166 | RUNTIME_CHECK(terminator, |
167 | catKind.has_value() && |
168 | (catKind->first == TypeCategory::Integer || |
169 | catKind->first == TypeCategory::Unsigned)); |
170 | PartialIntegerReduction<IntegerOrAccumulator>( |
171 | result, x, dim, catKind->second, mask, "IANY" , terminator); |
172 | } |
173 | |
174 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IParity1)(const Descriptor &x, |
175 | const char *source, int line, int dim, const Descriptor *mask) { |
176 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
177 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
178 | /*allowUnsignedForInteger=*/true); |
179 | } |
180 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IParity2)(const Descriptor &x, |
181 | const char *source, int line, int dim, const Descriptor *mask) { |
182 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
183 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
184 | /*allowUnsignedForInteger=*/true); |
185 | } |
186 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IParity4)(const Descriptor &x, |
187 | const char *source, int line, int dim, const Descriptor *mask) { |
188 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
189 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
190 | /*allowUnsignedForInteger=*/true); |
191 | } |
192 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IParity8)(const Descriptor &x, |
193 | const char *source, int line, int dim, const Descriptor *mask) { |
194 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
195 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IPARITY" , |
196 | /*allowUnsignedForInteger=*/true); |
197 | } |
198 | #ifdef __SIZEOF_INT128__ |
199 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IParity16)(const Descriptor &x, |
200 | const char *source, int line, int dim, const Descriptor *mask) { |
201 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
202 | mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
203 | "IPARITY" , /*allowUnsignedForInteger=*/true); |
204 | } |
205 | #endif |
206 | void RTDEF(IParityDim)(Descriptor &result, const Descriptor &x, int dim, |
207 | const char *source, int line, const Descriptor *mask) { |
208 | Terminator terminator{source, line}; |
209 | auto catKind{x.type().GetCategoryAndKind()}; |
210 | RUNTIME_CHECK(terminator, |
211 | catKind.has_value() && |
212 | (catKind->first == TypeCategory::Integer || |
213 | catKind->first == TypeCategory::Unsigned)); |
214 | PartialIntegerReduction<IntegerXorAccumulator>( |
215 | result, x, dim, catKind->second, mask, "IPARITY" , terminator); |
216 | } |
217 | } |
218 | |
219 | // ALL, ANY, COUNT, & PARITY |
220 | |
221 | enum class LogicalReduction { All, Any, Parity }; |
222 | |
223 | template <LogicalReduction REDUCTION> class LogicalAccumulator { |
224 | public: |
225 | using Type = bool; |
226 | RT_API_ATTRS explicit LogicalAccumulator(const Descriptor &array) |
227 | : array_{array} {} |
228 | RT_API_ATTRS void Reinitialize() { |
229 | result_ = REDUCTION == LogicalReduction::All; |
230 | } |
231 | RT_API_ATTRS bool Result() const { return result_; } |
232 | RT_API_ATTRS bool Accumulate(bool x) { |
233 | if constexpr (REDUCTION == LogicalReduction::Parity) { |
234 | result_ = result_ != x; |
235 | } else if (x != (REDUCTION == LogicalReduction::All)) { |
236 | result_ = x; |
237 | return false; |
238 | } |
239 | return true; |
240 | } |
241 | template <typename IGNORED = void> |
242 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
243 | return Accumulate(IsLogicalElementTrue(array_, at)); |
244 | } |
245 | |
246 | private: |
247 | const Descriptor &array_; |
248 | bool result_{REDUCTION == LogicalReduction::All}; |
249 | }; |
250 | |
251 | template <typename ACCUMULATOR> |
252 | RT_API_ATTRS inline auto GetTotalLogicalReduction(const Descriptor &x, |
253 | const char *source, int line, int dim, ACCUMULATOR &&accumulator, |
254 | const char *intrinsic) -> typename ACCUMULATOR::Type { |
255 | Terminator terminator{source, line}; |
256 | if (dim < 0 || dim > 1) { |
257 | terminator.Crash("%s: bad DIM=%d for ARRAY with rank=1" , intrinsic, dim); |
258 | } |
259 | SubscriptValue xAt[maxRank]; |
260 | x.GetLowerBounds(xAt); |
261 | for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) { |
262 | if (!accumulator.AccumulateAt(xAt)) { |
263 | break; // cut short, result is known |
264 | } |
265 | } |
266 | return accumulator.Result(); |
267 | } |
268 | |
269 | template <typename ACCUMULATOR> |
270 | RT_API_ATTRS inline auto ReduceLogicalDimToScalar( |
271 | const Descriptor &x, int zeroBasedDim, SubscriptValue subscripts[]) -> |
272 | typename ACCUMULATOR::Type { |
273 | ACCUMULATOR accumulator{x}; |
274 | SubscriptValue xAt[maxRank]; |
275 | GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); |
276 | const auto &dim{x.GetDimension(zeroBasedDim)}; |
277 | SubscriptValue at{dim.LowerBound()}; |
278 | for (auto n{dim.Extent()}; n-- > 0; ++at) { |
279 | xAt[zeroBasedDim] = at; |
280 | if (!accumulator.AccumulateAt(xAt)) { |
281 | break; |
282 | } |
283 | } |
284 | return accumulator.Result(); |
285 | } |
286 | |
287 | template <LogicalReduction REDUCTION> struct LogicalReduceHelper { |
288 | template <int KIND> struct Functor { |
289 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, |
290 | int dim, Terminator &terminator, const char *intrinsic) const { |
291 | // Standard requires result to have same LOGICAL kind as argument. |
292 | CreatePartialReductionResult( |
293 | result, x, x.ElementBytes(), dim, terminator, intrinsic, x.type()); |
294 | SubscriptValue at[maxRank]; |
295 | result.GetLowerBounds(at); |
296 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
297 | using CppType = CppTypeFor<TypeCategory::Logical, KIND>; |
298 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
299 | *result.Element<CppType>(at) = |
300 | ReduceLogicalDimToScalar<LogicalAccumulator<REDUCTION>>( |
301 | x, dim - 1, at); |
302 | } |
303 | } |
304 | }; |
305 | }; |
306 | |
307 | template <LogicalReduction REDUCTION> |
308 | RT_API_ATTRS inline void DoReduceLogicalDimension(Descriptor &result, |
309 | const Descriptor &x, int dim, Terminator &terminator, |
310 | const char *intrinsic) { |
311 | auto catKind{x.type().GetCategoryAndKind()}; |
312 | RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical); |
313 | ApplyLogicalKind<LogicalReduceHelper<REDUCTION>::template Functor, void>( |
314 | catKind->second, terminator, result, x, dim, terminator, intrinsic); |
315 | } |
316 | |
317 | // COUNT |
318 | |
319 | class CountAccumulator { |
320 | public: |
321 | using Type = std::int64_t; |
322 | RT_API_ATTRS explicit CountAccumulator(const Descriptor &array) |
323 | : array_{array} {} |
324 | RT_API_ATTRS void Reinitialize() { result_ = 0; } |
325 | RT_API_ATTRS Type Result() const { return result_; } |
326 | template <typename IGNORED = void> |
327 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
328 | if (IsLogicalElementTrue(array_, at)) { |
329 | ++result_; |
330 | } |
331 | return true; |
332 | } |
333 | |
334 | private: |
335 | const Descriptor &array_; |
336 | Type result_{0}; |
337 | }; |
338 | |
339 | template <int KIND> struct CountDimension { |
340 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim, |
341 | Terminator &terminator) const { |
342 | // Element size of the descriptor descriptor is the size |
343 | // of {TypeCategory::Integer, KIND}. |
344 | CreatePartialReductionResult(result, x, |
345 | Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, terminator, |
346 | "COUNT" , TypeCode{TypeCategory::Integer, KIND}); |
347 | SubscriptValue at[maxRank]; |
348 | result.GetLowerBounds(at); |
349 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
350 | using CppType = CppTypeFor<TypeCategory::Integer, KIND>; |
351 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
352 | *result.Element<CppType>(at) = |
353 | ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at); |
354 | } |
355 | } |
356 | }; |
357 | |
358 | extern "C" { |
359 | RT_EXT_API_GROUP_BEGIN |
360 | |
361 | bool RTDEF(All)(const Descriptor &x, const char *source, int line, int dim) { |
362 | return GetTotalLogicalReduction(x, source, line, dim, |
363 | LogicalAccumulator<LogicalReduction::All>{x}, "ALL" ); |
364 | } |
365 | void RTDEF(AllDim)(Descriptor &result, const Descriptor &x, int dim, |
366 | const char *source, int line) { |
367 | Terminator terminator{source, line}; |
368 | DoReduceLogicalDimension<LogicalReduction::All>( |
369 | result, x, dim, terminator, "ALL" ); |
370 | } |
371 | |
372 | bool RTDEF(Any)(const Descriptor &x, const char *source, int line, int dim) { |
373 | return GetTotalLogicalReduction(x, source, line, dim, |
374 | LogicalAccumulator<LogicalReduction::Any>{x}, "ANY" ); |
375 | } |
376 | void RTDEF(AnyDim)(Descriptor &result, const Descriptor &x, int dim, |
377 | const char *source, int line) { |
378 | Terminator terminator{source, line}; |
379 | DoReduceLogicalDimension<LogicalReduction::Any>( |
380 | result, x, dim, terminator, "ANY" ); |
381 | } |
382 | |
383 | std::int64_t RTDEF(Count)( |
384 | const Descriptor &x, const char *source, int line, int dim) { |
385 | return GetTotalLogicalReduction( |
386 | x, source, line, dim, CountAccumulator{x}, "COUNT" ); |
387 | } |
388 | |
389 | void RTDEF(CountDim)(Descriptor &result, const Descriptor &x, int dim, int kind, |
390 | const char *source, int line) { |
391 | Terminator terminator{source, line}; |
392 | ApplyIntegerKind<CountDimension, void>( |
393 | kind, terminator, result, x, dim, terminator); |
394 | } |
395 | |
396 | bool RTDEF(Parity)(const Descriptor &x, const char *source, int line, int dim) { |
397 | return GetTotalLogicalReduction(x, source, line, dim, |
398 | LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY" ); |
399 | } |
400 | void RTDEF(ParityDim)(Descriptor &result, const Descriptor &x, int dim, |
401 | const char *source, int line) { |
402 | Terminator terminator{source, line}; |
403 | DoReduceLogicalDimension<LogicalReduction::Parity>( |
404 | result, x, dim, terminator, "PARITY" ); |
405 | } |
406 | |
407 | RT_EXT_API_GROUP_END |
408 | } // extern "C" |
409 | } // namespace Fortran::runtime |
410 | |