1 | //===-- runtime/reduction-templates.h -------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | // Generic function templates used by various reduction transformation |
10 | // intrinsic functions (SUM, PRODUCT, &c.) |
11 | // |
12 | // * Partial reductions (i.e., those with DIM= arguments that are not |
13 | // required to be 1 by the rank of the argument) return arrays that |
14 | // are dynamically allocated in a caller-supplied descriptor. |
15 | // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC |
16 | // return integer vectors of some kind, not scalars; a caller-supplied |
17 | // descriptor is used |
18 | // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary |
19 | // length results, dynamically allocated in a caller-supplied descriptor |
20 | |
21 | #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ |
22 | #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ |
23 | |
24 | #include "terminator.h" |
25 | #include "tools.h" |
26 | #include "flang/Runtime/cpp-type.h" |
27 | #include "flang/Runtime/descriptor.h" |
28 | |
29 | namespace Fortran::runtime { |
30 | |
31 | // Reductions are implemented with *accumulators*, which are instances of |
32 | // classes that incrementally build up the result (or an element thereof) during |
33 | // a traversal of the unmasked elements of an array. Each accumulator class |
34 | // supports a constructor (which captures a reference to the array), an |
35 | // AccumulateAt() member function that applies supplied subscripts to the |
36 | // array and does something with a scalar element, and a GetResult() |
37 | // member function that copies a final result into its destination. |
38 | |
39 | // Total reduction of the array argument to a scalar (or to a vector in the |
40 | // cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or |
41 | // cases where the argument has rank 1 and DIM=, if present, must be 1. |
42 | template <typename TYPE, typename ACCUMULATOR> |
43 | inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim, |
44 | const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic, |
45 | Terminator &terminator) { |
46 | if (dim < 0 || dim > 1) { |
47 | terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d" , |
48 | intrinsic, dim, x.rank()); |
49 | } |
50 | SubscriptValue xAt[maxRank]; |
51 | x.GetLowerBounds(xAt); |
52 | if (mask) { |
53 | CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY" , fromName: "MASK" ); |
54 | SubscriptValue maskAt[maxRank]; |
55 | mask->GetLowerBounds(maskAt); |
56 | if (mask->rank() > 0) { |
57 | for (auto elements{x.Elements()}; elements--; |
58 | x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) { |
59 | if (IsLogicalElementTrue(*mask, maskAt)) { |
60 | if (!accumulator.template AccumulateAt<TYPE>(xAt)) { |
61 | break; |
62 | } |
63 | } |
64 | } |
65 | return; |
66 | } else if (!IsLogicalElementTrue(*mask, maskAt)) { |
67 | // scalar MASK=.FALSE.: return identity value |
68 | return; |
69 | } |
70 | } |
71 | // No MASK=, or scalar MASK=.TRUE. |
72 | for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) { |
73 | if (!accumulator.template AccumulateAt<TYPE>(xAt)) { |
74 | break; // cut short, result is known |
75 | } |
76 | } |
77 | } |
78 | |
79 | template <TypeCategory CAT, int KIND, typename ACCUMULATOR> |
80 | inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x, |
81 | const char *source, int line, int dim, const Descriptor *mask, |
82 | ACCUMULATOR &&accumulator, const char *intrinsic) { |
83 | Terminator terminator{source, line}; |
84 | RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type()); |
85 | using CppType = CppTypeFor<CAT, KIND>; |
86 | DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator); |
87 | CppType result; |
88 | #ifdef _MSC_VER // work around MSVC spurious error |
89 | accumulator.GetResult(&result); |
90 | #else |
91 | accumulator.template GetResult(&result); |
92 | #endif |
93 | return result; |
94 | } |
95 | |
96 | // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape |
97 | // of the array is [2,3,5], the shape of the result is [2,5] and |
98 | // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has |
99 | // lower bounds other than one. This utility subroutine creates an |
100 | // array of subscripts [j,_,k] for result subscripts [j,k] so that the |
101 | // elements of array(j,:,k) can be reduced. |
102 | inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[], |
103 | const Descriptor &descriptor, int zeroBasedDim, |
104 | const SubscriptValue from[]) { |
105 | descriptor.GetLowerBounds(at); |
106 | int rank{descriptor.rank()}; |
107 | int j{0}; |
108 | for (; j < zeroBasedDim; ++j) { |
109 | at[j] += from[j] - 1 /*lower bound*/; |
110 | } |
111 | for (++j; j < rank; ++j) { |
112 | at[j] += from[j - 1] - 1; |
113 | } |
114 | } |
115 | |
116 | template <typename TYPE, typename ACCUMULATOR> |
117 | inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x, |
118 | int zeroBasedDim, SubscriptValue subscripts[], TYPE *result, |
119 | ACCUMULATOR &accumulator) { |
120 | SubscriptValue xAt[maxRank]; |
121 | GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); |
122 | const auto &dim{x.GetDimension(zeroBasedDim)}; |
123 | SubscriptValue at{dim.LowerBound()}; |
124 | for (auto n{dim.Extent()}; n-- > 0; ++at) { |
125 | xAt[zeroBasedDim] = at; |
126 | if (!accumulator.template AccumulateAt<TYPE>(xAt)) { |
127 | break; |
128 | } |
129 | } |
130 | #ifdef _MSC_VER // work around MSVC spurious error |
131 | accumulator.GetResult(result, zeroBasedDim); |
132 | #else |
133 | accumulator.template GetResult(result, zeroBasedDim); |
134 | #endif |
135 | } |
136 | |
137 | template <typename TYPE, typename ACCUMULATOR> |
138 | inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x, |
139 | int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask, |
140 | TYPE *result, ACCUMULATOR &accumulator) { |
141 | SubscriptValue xAt[maxRank], maskAt[maxRank]; |
142 | GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); |
143 | GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts); |
144 | const auto &xDim{x.GetDimension(zeroBasedDim)}; |
145 | SubscriptValue xPos{xDim.LowerBound()}; |
146 | const auto &maskDim{mask.GetDimension(zeroBasedDim)}; |
147 | SubscriptValue maskPos{maskDim.LowerBound()}; |
148 | for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0; |
149 | ++xPos, ++maskPos) { |
150 | maskAt[zeroBasedDim] = maskPos; |
151 | if (IsLogicalElementTrue(mask, maskAt)) { |
152 | xAt[zeroBasedDim] = xPos; |
153 | if (!accumulator.template AccumulateAt<TYPE>(xAt)) { |
154 | break; |
155 | } |
156 | } |
157 | } |
158 | #ifdef _MSC_VER // work around MSVC spurious error |
159 | accumulator.GetResult(result, zeroBasedDim); |
160 | #else |
161 | accumulator.template GetResult(result, zeroBasedDim); |
162 | #endif |
163 | } |
164 | |
165 | // Utility: establishes & allocates the result array for a partial |
166 | // reduction (i.e., one with DIM=). |
167 | static RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result, |
168 | const Descriptor &x, std::size_t resultElementSize, int dim, |
169 | Terminator &terminator, const char *intrinsic, TypeCode typeCode) { |
170 | int xRank{x.rank()}; |
171 | if (dim < 1 || dim > xRank) { |
172 | terminator.Crash( |
173 | "%s: bad DIM=%d for ARRAY with rank %d" , intrinsic, dim, xRank); |
174 | } |
175 | int zeroBasedDim{dim - 1}; |
176 | SubscriptValue resultExtent[maxRank]; |
177 | for (int j{0}; j < zeroBasedDim; ++j) { |
178 | resultExtent[j] = x.GetDimension(j).Extent(); |
179 | } |
180 | for (int j{zeroBasedDim + 1}; j < xRank; ++j) { |
181 | resultExtent[j - 1] = x.GetDimension(j).Extent(); |
182 | } |
183 | result.Establish(typeCode, resultElementSize, nullptr, xRank - 1, |
184 | resultExtent, CFI_attribute_allocatable); |
185 | for (int j{0}; j + 1 < xRank; ++j) { |
186 | result.GetDimension(j).SetBounds(1, resultExtent[j]); |
187 | } |
188 | if (int stat{result.Allocate()}) { |
189 | terminator.Crash( |
190 | "%s: could not allocate memory for result; STAT=%d" , intrinsic, stat); |
191 | } |
192 | } |
193 | |
194 | // Partial reductions with DIM= |
195 | |
196 | template <typename ACCUMULATOR, TypeCategory CAT, int KIND> |
197 | inline RT_API_ATTRS void PartialReduction(Descriptor &result, |
198 | const Descriptor &x, std::size_t resultElementSize, int dim, |
199 | const Descriptor *mask, Terminator &terminator, const char *intrinsic, |
200 | ACCUMULATOR &accumulator) { |
201 | CreatePartialReductionResult(result, x, resultElementSize, dim, terminator, |
202 | intrinsic, TypeCode{CAT, KIND}); |
203 | SubscriptValue at[maxRank]; |
204 | result.GetLowerBounds(at); |
205 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
206 | using CppType = CppTypeFor<CAT, KIND>; |
207 | if (mask) { |
208 | CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY" , fromName: "MASK" ); |
209 | SubscriptValue maskAt[maxRank]; // contents unused |
210 | if (mask->rank() > 0) { |
211 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
212 | accumulator.Reinitialize(); |
213 | ReduceDimMaskToScalar<CppType, ACCUMULATOR>( |
214 | x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator); |
215 | } |
216 | return; |
217 | } else if (!IsLogicalElementTrue(*mask, maskAt)) { |
218 | // scalar MASK=.FALSE. |
219 | accumulator.Reinitialize(); |
220 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
221 | accumulator.GetResult(result.Element<CppType>(at)); |
222 | } |
223 | return; |
224 | } |
225 | } |
226 | // No MASK= or scalar MASK=.TRUE. |
227 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
228 | accumulator.Reinitialize(); |
229 | ReduceDimToScalar<CppType, ACCUMULATOR>( |
230 | x, dim - 1, at, result.Element<CppType>(at), accumulator); |
231 | } |
232 | } |
233 | |
234 | template <template <typename> class ACCUM> |
235 | struct PartialIntegerReductionHelper { |
236 | template <int KIND> struct Functor { |
237 | static constexpr int Intermediate{ |
238 | std::max(a: KIND, b: 4)}; // use at least "int" for intermediate results |
239 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, |
240 | int dim, const Descriptor *mask, Terminator &terminator, |
241 | const char *intrinsic) const { |
242 | using Accumulator = |
243 | ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>; |
244 | Accumulator accumulator{x}; |
245 | // Element size of the destination descriptor is the same |
246 | // as the element size of the source. |
247 | PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x, |
248 | x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator); |
249 | } |
250 | }; |
251 | }; |
252 | |
253 | template <template <typename> class INTEGER_ACCUM> |
254 | inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result, |
255 | const Descriptor &x, int dim, int kind, const Descriptor *mask, |
256 | const char *intrinsic, Terminator &terminator) { |
257 | ApplyIntegerKind< |
258 | PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>( |
259 | kind, terminator, result, x, dim, mask, terminator, intrinsic); |
260 | } |
261 | |
262 | template <TypeCategory CAT, template <typename> class ACCUM> |
263 | struct PartialFloatingReductionHelper { |
264 | template <int KIND> struct Functor { |
265 | static constexpr int Intermediate{ |
266 | std::max(a: KIND, b: 8)}; // use at least "double" for intermediate results |
267 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, |
268 | int dim, const Descriptor *mask, Terminator &terminator, |
269 | const char *intrinsic) const { |
270 | using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>; |
271 | Accumulator accumulator{x}; |
272 | // Element size of the destination descriptor is the same |
273 | // as the element size of the source. |
274 | PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim, |
275 | mask, terminator, intrinsic, accumulator); |
276 | } |
277 | }; |
278 | }; |
279 | |
280 | template <template <typename> class INTEGER_ACCUM, |
281 | template <typename> class REAL_ACCUM, |
282 | template <typename> class COMPLEX_ACCUM> |
283 | inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result, |
284 | const Descriptor &x, int dim, const char *source, int line, |
285 | const Descriptor *mask, const char *intrinsic) { |
286 | Terminator terminator{source, line}; |
287 | auto catKind{x.type().GetCategoryAndKind()}; |
288 | RUNTIME_CHECK(terminator, catKind.has_value()); |
289 | switch (catKind->first) { |
290 | case TypeCategory::Integer: |
291 | PartialIntegerReduction<INTEGER_ACCUM>( |
292 | result, x, dim, catKind->second, mask, intrinsic, terminator); |
293 | break; |
294 | case TypeCategory::Real: |
295 | ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real, |
296 | REAL_ACCUM>::template Functor, |
297 | void>(catKind->second, terminator, result, x, dim, mask, terminator, |
298 | intrinsic); |
299 | break; |
300 | case TypeCategory::Complex: |
301 | ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex, |
302 | COMPLEX_ACCUM>::template Functor, |
303 | void>(catKind->second, terminator, result, x, dim, mask, terminator, |
304 | intrinsic); |
305 | break; |
306 | default: |
307 | terminator.Crash("%s: bad type code %d" , intrinsic, x.type().raw()); |
308 | } |
309 | } |
310 | |
311 | template <typename ACCUMULATOR> struct LocationResultHelper { |
312 | template <int KIND> struct Functor { |
313 | RT_API_ATTRS void operator()( |
314 | ACCUMULATOR &accumulator, const Descriptor &result) const { |
315 | accumulator.GetResult( |
316 | result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>()); |
317 | } |
318 | }; |
319 | }; |
320 | |
321 | template <typename ACCUMULATOR> struct PartialLocationHelper { |
322 | template <int KIND> struct Functor { |
323 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, |
324 | int dim, const Descriptor *mask, Terminator &terminator, |
325 | const char *intrinsic, ACCUMULATOR &accumulator) const { |
326 | // Element size of the destination descriptor is the size |
327 | // of {TypeCategory::Integer, KIND}. |
328 | PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x, |
329 | Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask, |
330 | terminator, intrinsic, accumulator); |
331 | } |
332 | }; |
333 | }; |
334 | |
335 | } // namespace Fortran::runtime |
336 | #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ |
337 | |