1 | //===-- runtime/reduction.cpp ---------------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Implements ALL, ANY, COUNT, IALL, IANY, IPARITY, & PARITY for all required |
10 | // operand types and shapes. |
11 | // |
12 | // DOT_PRODUCT, FINDLOC, MATMUL, SUM, and PRODUCT are in their own eponymous |
13 | // source files. |
14 | // NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp. |
15 | |
16 | #include "flang/Runtime/reduction.h" |
17 | #include "reduction-templates.h" |
18 | #include "flang/Runtime/descriptor.h" |
19 | #include <cinttypes> |
20 | |
21 | namespace Fortran::runtime { |
22 | |
23 | // IALL, IANY, IPARITY |
24 | |
25 | template <typename INTERMEDIATE> class IntegerAndAccumulator { |
26 | public: |
27 | explicit RT_API_ATTRS IntegerAndAccumulator(const Descriptor &array) |
28 | : array_{array} {} |
29 | RT_API_ATTRS void Reinitialize() { and_ = ~INTERMEDIATE{0}; } |
30 | template <typename A> |
31 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
32 | *p = static_cast<A>(and_); |
33 | } |
34 | template <typename A> |
35 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
36 | and_ &= *array_.Element<A>(at); |
37 | return true; |
38 | } |
39 | |
40 | private: |
41 | const Descriptor &array_; |
42 | INTERMEDIATE and_{~INTERMEDIATE{0}}; |
43 | }; |
44 | |
45 | template <typename INTERMEDIATE> class IntegerOrAccumulator { |
46 | public: |
47 | explicit RT_API_ATTRS IntegerOrAccumulator(const Descriptor &array) |
48 | : array_{array} {} |
49 | RT_API_ATTRS void Reinitialize() { or_ = 0; } |
50 | template <typename A> |
51 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
52 | *p = static_cast<A>(or_); |
53 | } |
54 | template <typename A> |
55 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
56 | or_ |= *array_.Element<A>(at); |
57 | return true; |
58 | } |
59 | |
60 | private: |
61 | const Descriptor &array_; |
62 | INTERMEDIATE or_{0}; |
63 | }; |
64 | |
65 | template <typename INTERMEDIATE> class IntegerXorAccumulator { |
66 | public: |
67 | explicit RT_API_ATTRS IntegerXorAccumulator(const Descriptor &array) |
68 | : array_{array} {} |
69 | RT_API_ATTRS void Reinitialize() { xor_ = 0; } |
70 | template <typename A> |
71 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
72 | *p = static_cast<A>(xor_); |
73 | } |
74 | template <typename A> |
75 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
76 | xor_ ^= *array_.Element<A>(at); |
77 | return true; |
78 | } |
79 | |
80 | private: |
81 | const Descriptor &array_; |
82 | INTERMEDIATE xor_{0}; |
83 | }; |
84 | |
85 | extern "C" { |
86 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAll1)(const Descriptor &x, |
87 | const char *source, int line, int dim, const Descriptor *mask) { |
88 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
89 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" ); |
90 | } |
91 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAll2)(const Descriptor &x, |
92 | const char *source, int line, int dim, const Descriptor *mask) { |
93 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
94 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" ); |
95 | } |
96 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAll4)(const Descriptor &x, |
97 | const char *source, int line, int dim, const Descriptor *mask) { |
98 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
99 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" ); |
100 | } |
101 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAll8)(const Descriptor &x, |
102 | const char *source, int line, int dim, const Descriptor *mask) { |
103 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
104 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IALL" ); |
105 | } |
106 | #ifdef __SIZEOF_INT128__ |
107 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAll16)(const Descriptor &x, |
108 | const char *source, int line, int dim, const Descriptor *mask) { |
109 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
110 | mask, IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
111 | "IALL" ); |
112 | } |
113 | #endif |
114 | void RTDEF(IAllDim)(Descriptor &result, const Descriptor &x, int dim, |
115 | const char *source, int line, const Descriptor *mask) { |
116 | Terminator terminator{source, line}; |
117 | auto catKind{x.type().GetCategoryAndKind()}; |
118 | RUNTIME_CHECK(terminator, |
119 | catKind.has_value() && catKind->first == TypeCategory::Integer); |
120 | PartialIntegerReduction<IntegerAndAccumulator>( |
121 | result, x, dim, catKind->second, mask, "IALL" , terminator); |
122 | } |
123 | |
124 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAny1)(const Descriptor &x, |
125 | const char *source, int line, int dim, const Descriptor *mask) { |
126 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
127 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" ); |
128 | } |
129 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAny2)(const Descriptor &x, |
130 | const char *source, int line, int dim, const Descriptor *mask) { |
131 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
132 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" ); |
133 | } |
134 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAny4)(const Descriptor &x, |
135 | const char *source, int line, int dim, const Descriptor *mask) { |
136 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
137 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" ); |
138 | } |
139 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAny8)(const Descriptor &x, |
140 | const char *source, int line, int dim, const Descriptor *mask) { |
141 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
142 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IANY" ); |
143 | } |
144 | #ifdef __SIZEOF_INT128__ |
145 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAny16)(const Descriptor &x, |
146 | const char *source, int line, int dim, const Descriptor *mask) { |
147 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
148 | mask, IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
149 | "IANY" ); |
150 | } |
151 | #endif |
152 | void RTDEF(IAnyDim)(Descriptor &result, const Descriptor &x, int dim, |
153 | const char *source, int line, const Descriptor *mask) { |
154 | Terminator terminator{source, line}; |
155 | auto catKind{x.type().GetCategoryAndKind()}; |
156 | RUNTIME_CHECK(terminator, |
157 | catKind.has_value() && catKind->first == TypeCategory::Integer); |
158 | PartialIntegerReduction<IntegerOrAccumulator>( |
159 | result, x, dim, catKind->second, mask, "IANY" , terminator); |
160 | } |
161 | |
162 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IParity1)(const Descriptor &x, |
163 | const char *source, int line, int dim, const Descriptor *mask) { |
164 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
165 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, |
166 | "IPARITY" ); |
167 | } |
168 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IParity2)(const Descriptor &x, |
169 | const char *source, int line, int dim, const Descriptor *mask) { |
170 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
171 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, |
172 | "IPARITY" ); |
173 | } |
174 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IParity4)(const Descriptor &x, |
175 | const char *source, int line, int dim, const Descriptor *mask) { |
176 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
177 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, |
178 | "IPARITY" ); |
179 | } |
180 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IParity8)(const Descriptor &x, |
181 | const char *source, int line, int dim, const Descriptor *mask) { |
182 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
183 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, |
184 | "IPARITY" ); |
185 | } |
186 | #ifdef __SIZEOF_INT128__ |
187 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IParity16)(const Descriptor &x, |
188 | const char *source, int line, int dim, const Descriptor *mask) { |
189 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
190 | mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
191 | "IPARITY" ); |
192 | } |
193 | #endif |
194 | void RTDEF(IParityDim)(Descriptor &result, const Descriptor &x, int dim, |
195 | const char *source, int line, const Descriptor *mask) { |
196 | Terminator terminator{source, line}; |
197 | auto catKind{x.type().GetCategoryAndKind()}; |
198 | RUNTIME_CHECK(terminator, |
199 | catKind.has_value() && catKind->first == TypeCategory::Integer); |
200 | PartialIntegerReduction<IntegerXorAccumulator>( |
201 | result, x, dim, catKind->second, mask, "IPARITY" , terminator); |
202 | } |
203 | } |
204 | |
205 | // ALL, ANY, COUNT, & PARITY |
206 | |
207 | enum class LogicalReduction { All, Any, Parity }; |
208 | |
209 | template <LogicalReduction REDUCTION> class LogicalAccumulator { |
210 | public: |
211 | using Type = bool; |
212 | explicit LogicalAccumulator(const Descriptor &array) : array_{array} {} |
213 | void Reinitialize() { result_ = REDUCTION == LogicalReduction::All; } |
214 | bool Result() const { return result_; } |
215 | bool Accumulate(bool x) { |
216 | if constexpr (REDUCTION == LogicalReduction::Parity) { |
217 | result_ = result_ != x; |
218 | } else if (x != (REDUCTION == LogicalReduction::All)) { |
219 | result_ = x; |
220 | return false; |
221 | } |
222 | return true; |
223 | } |
224 | template <typename IGNORED = void> |
225 | bool AccumulateAt(const SubscriptValue at[]) { |
226 | return Accumulate(IsLogicalElementTrue(array_, at)); |
227 | } |
228 | |
229 | private: |
230 | const Descriptor &array_; |
231 | bool result_{REDUCTION == LogicalReduction::All}; |
232 | }; |
233 | |
234 | template <typename ACCUMULATOR> |
235 | inline auto GetTotalLogicalReduction(const Descriptor &x, const char *source, |
236 | int line, int dim, ACCUMULATOR &&accumulator, const char *intrinsic) -> |
237 | typename ACCUMULATOR::Type { |
238 | Terminator terminator{source, line}; |
239 | if (dim < 0 || dim > 1) { |
240 | terminator.Crash("%s: bad DIM=%d for ARRAY with rank=1" , intrinsic, dim); |
241 | } |
242 | SubscriptValue xAt[maxRank]; |
243 | x.GetLowerBounds(xAt); |
244 | for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) { |
245 | if (!accumulator.AccumulateAt(xAt)) { |
246 | break; // cut short, result is known |
247 | } |
248 | } |
249 | return accumulator.Result(); |
250 | } |
251 | |
252 | template <typename ACCUMULATOR> |
253 | inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim, |
254 | SubscriptValue subscripts[]) -> typename ACCUMULATOR::Type { |
255 | ACCUMULATOR accumulator{x}; |
256 | SubscriptValue xAt[maxRank]; |
257 | GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); |
258 | const auto &dim{x.GetDimension(zeroBasedDim)}; |
259 | SubscriptValue at{dim.LowerBound()}; |
260 | for (auto n{dim.Extent()}; n-- > 0; ++at) { |
261 | xAt[zeroBasedDim] = at; |
262 | if (!accumulator.AccumulateAt(xAt)) { |
263 | break; |
264 | } |
265 | } |
266 | return accumulator.Result(); |
267 | } |
268 | |
269 | template <LogicalReduction REDUCTION> struct LogicalReduceHelper { |
270 | template <int KIND> struct Functor { |
271 | void operator()(Descriptor &result, const Descriptor &x, int dim, |
272 | Terminator &terminator, const char *intrinsic) const { |
273 | // Standard requires result to have same LOGICAL kind as argument. |
274 | CreatePartialReductionResult( |
275 | result, x, x.ElementBytes(), dim, terminator, intrinsic, x.type()); |
276 | SubscriptValue at[maxRank]; |
277 | result.GetLowerBounds(at); |
278 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
279 | using CppType = CppTypeFor<TypeCategory::Logical, KIND>; |
280 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
281 | *result.Element<CppType>(at) = |
282 | ReduceLogicalDimToScalar<LogicalAccumulator<REDUCTION>>( |
283 | x, dim - 1, at); |
284 | } |
285 | } |
286 | }; |
287 | }; |
288 | |
289 | template <LogicalReduction REDUCTION> |
290 | inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x, |
291 | int dim, Terminator &terminator, const char *intrinsic) { |
292 | auto catKind{x.type().GetCategoryAndKind()}; |
293 | RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical); |
294 | ApplyLogicalKind<LogicalReduceHelper<REDUCTION>::template Functor, void>( |
295 | catKind->second, terminator, result, x, dim, terminator, intrinsic); |
296 | } |
297 | |
298 | // COUNT |
299 | |
300 | class CountAccumulator { |
301 | public: |
302 | using Type = std::int64_t; |
303 | explicit CountAccumulator(const Descriptor &array) : array_{array} {} |
304 | void Reinitialize() { result_ = 0; } |
305 | Type Result() const { return result_; } |
306 | template <typename IGNORED = void> |
307 | bool AccumulateAt(const SubscriptValue at[]) { |
308 | if (IsLogicalElementTrue(array_, at)) { |
309 | ++result_; |
310 | } |
311 | return true; |
312 | } |
313 | |
314 | private: |
315 | const Descriptor &array_; |
316 | Type result_{0}; |
317 | }; |
318 | |
319 | template <int KIND> struct CountDimension { |
320 | void operator()(Descriptor &result, const Descriptor &x, int dim, |
321 | Terminator &terminator) const { |
322 | // Element size of the descriptor descriptor is the size |
323 | // of {TypeCategory::Integer, KIND}. |
324 | CreatePartialReductionResult(result, x, |
325 | Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, terminator, |
326 | "COUNT" , TypeCode{TypeCategory::Integer, KIND}); |
327 | SubscriptValue at[maxRank]; |
328 | result.GetLowerBounds(at); |
329 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
330 | using CppType = CppTypeFor<TypeCategory::Integer, KIND>; |
331 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
332 | *result.Element<CppType>(at) = |
333 | ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at); |
334 | } |
335 | } |
336 | }; |
337 | |
338 | extern "C" { |
339 | RT_EXT_API_GROUP_BEGIN |
340 | |
341 | bool RTDEF(All)(const Descriptor &x, const char *source, int line, int dim) { |
342 | return GetTotalLogicalReduction(x, source, line, dim, |
343 | LogicalAccumulator<LogicalReduction::All>{x}, "ALL" ); |
344 | } |
345 | void RTDEF(AllDim)(Descriptor &result, const Descriptor &x, int dim, |
346 | const char *source, int line) { |
347 | Terminator terminator{source, line}; |
348 | DoReduceLogicalDimension<LogicalReduction::All>( |
349 | result, x, dim, terminator, "ALL" ); |
350 | } |
351 | |
352 | bool RTDEF(Any)(const Descriptor &x, const char *source, int line, int dim) { |
353 | return GetTotalLogicalReduction(x, source, line, dim, |
354 | LogicalAccumulator<LogicalReduction::Any>{x}, "ANY" ); |
355 | } |
356 | void RTDEF(AnyDim)(Descriptor &result, const Descriptor &x, int dim, |
357 | const char *source, int line) { |
358 | Terminator terminator{source, line}; |
359 | DoReduceLogicalDimension<LogicalReduction::Any>( |
360 | result, x, dim, terminator, "ANY" ); |
361 | } |
362 | |
363 | std::int64_t RTDEF(Count)( |
364 | const Descriptor &x, const char *source, int line, int dim) { |
365 | return GetTotalLogicalReduction( |
366 | x, source, line, dim, CountAccumulator{x}, "COUNT" ); |
367 | } |
368 | |
369 | void RTDEF(CountDim)(Descriptor &result, const Descriptor &x, int dim, int kind, |
370 | const char *source, int line) { |
371 | Terminator terminator{source, line}; |
372 | ApplyIntegerKind<CountDimension, void>( |
373 | kind, terminator, result, x, dim, terminator); |
374 | } |
375 | |
376 | bool RTDEF(Parity)(const Descriptor &x, const char *source, int line, int dim) { |
377 | return GetTotalLogicalReduction(x, source, line, dim, |
378 | LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY" ); |
379 | } |
380 | void RTDEF(ParityDim)(Descriptor &result, const Descriptor &x, int dim, |
381 | const char *source, int line) { |
382 | Terminator terminator{source, line}; |
383 | DoReduceLogicalDimension<LogicalReduction::Parity>( |
384 | result, x, dim, terminator, "PARITY" ); |
385 | } |
386 | |
387 | RT_EXT_API_GROUP_END |
388 | } // extern "C" |
389 | } // namespace Fortran::runtime |
390 | |