1//===-- runtime/reduction-templates.h -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Generic function templates used by various reduction transformation
10// intrinsic functions (SUM, PRODUCT, &c.)
11//
12// * Partial reductions (i.e., those with DIM= arguments that are not
13// required to be 1 by the rank of the argument) return arrays that
14// are dynamically allocated in a caller-supplied descriptor.
15// * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16// return integer vectors of some kind, not scalars; a caller-supplied
17// descriptor is used
18// * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19// length results, dynamically allocated in a caller-supplied descriptor
20
21#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
23
24#include "terminator.h"
25#include "tools.h"
26#include "flang/Runtime/cpp-type.h"
27#include "flang/Runtime/descriptor.h"
28
29namespace Fortran::runtime {
30
31// Reductions are implemented with *accumulators*, which are instances of
32// classes that incrementally build up the result (or an element thereof) during
33// a traversal of the unmasked elements of an array. Each accumulator class
34// supports a constructor (which captures a reference to the array), an
35// AccumulateAt() member function that applies supplied subscripts to the
36// array and does something with a scalar element, and a GetResult()
37// member function that copies a final result into its destination.
38
39// Total reduction of the array argument to a scalar (or to a vector in the
40// cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
41// cases where the argument has rank 1 and DIM=, if present, must be 1.
42template <typename TYPE, typename ACCUMULATOR>
43inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
44 const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
45 Terminator &terminator) {
46 if (dim < 0 || dim > 1) {
47 terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
48 intrinsic, dim, x.rank());
49 }
50 SubscriptValue xAt[maxRank];
51 x.GetLowerBounds(xAt);
52 if (mask) {
53 CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY", fromName: "MASK");
54 SubscriptValue maskAt[maxRank];
55 mask->GetLowerBounds(maskAt);
56 if (mask->rank() > 0) {
57 for (auto elements{x.Elements()}; elements--;
58 x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
59 if (IsLogicalElementTrue(*mask, maskAt)) {
60 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
61 break;
62 }
63 }
64 }
65 return;
66 } else if (!IsLogicalElementTrue(*mask, maskAt)) {
67 // scalar MASK=.FALSE.: return identity value
68 return;
69 }
70 }
71 // No MASK=, or scalar MASK=.TRUE.
72 for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
73 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
74 break; // cut short, result is known
75 }
76 }
77}
78
79template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
80inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
81 const char *source, int line, int dim, const Descriptor *mask,
82 ACCUMULATOR &&accumulator, const char *intrinsic) {
83 Terminator terminator{source, line};
84 RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
85 using CppType = CppTypeFor<CAT, KIND>;
86 DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
87 CppType result;
88#ifdef _MSC_VER // work around MSVC spurious error
89 accumulator.GetResult(&result);
90#else
91 accumulator.template GetResult(&result);
92#endif
93 return result;
94}
95
96// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
97// of the array is [2,3,5], the shape of the result is [2,5] and
98// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
99// lower bounds other than one. This utility subroutine creates an
100// array of subscripts [j,_,k] for result subscripts [j,k] so that the
101// elements of array(j,:,k) can be reduced.
102inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
103 const Descriptor &descriptor, int zeroBasedDim,
104 const SubscriptValue from[]) {
105 descriptor.GetLowerBounds(at);
106 int rank{descriptor.rank()};
107 int j{0};
108 for (; j < zeroBasedDim; ++j) {
109 at[j] += from[j] - 1 /*lower bound*/;
110 }
111 for (++j; j < rank; ++j) {
112 at[j] += from[j - 1] - 1;
113 }
114}
115
116template <typename TYPE, typename ACCUMULATOR>
117inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
118 int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
119 ACCUMULATOR &accumulator) {
120 SubscriptValue xAt[maxRank];
121 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
122 const auto &dim{x.GetDimension(zeroBasedDim)};
123 SubscriptValue at{dim.LowerBound()};
124 for (auto n{dim.Extent()}; n-- > 0; ++at) {
125 xAt[zeroBasedDim] = at;
126 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
127 break;
128 }
129 }
130#ifdef _MSC_VER // work around MSVC spurious error
131 accumulator.GetResult(result, zeroBasedDim);
132#else
133 accumulator.template GetResult(result, zeroBasedDim);
134#endif
135}
136
137template <typename TYPE, typename ACCUMULATOR>
138inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
139 int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
140 TYPE *result, ACCUMULATOR &accumulator) {
141 SubscriptValue xAt[maxRank], maskAt[maxRank];
142 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
143 GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
144 const auto &xDim{x.GetDimension(zeroBasedDim)};
145 SubscriptValue xPos{xDim.LowerBound()};
146 const auto &maskDim{mask.GetDimension(zeroBasedDim)};
147 SubscriptValue maskPos{maskDim.LowerBound()};
148 for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
149 ++xPos, ++maskPos) {
150 maskAt[zeroBasedDim] = maskPos;
151 if (IsLogicalElementTrue(mask, maskAt)) {
152 xAt[zeroBasedDim] = xPos;
153 if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
154 break;
155 }
156 }
157 }
158#ifdef _MSC_VER // work around MSVC spurious error
159 accumulator.GetResult(result, zeroBasedDim);
160#else
161 accumulator.template GetResult(result, zeroBasedDim);
162#endif
163}
164
165// Utility: establishes & allocates the result array for a partial
166// reduction (i.e., one with DIM=).
167static RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
168 const Descriptor &x, std::size_t resultElementSize, int dim,
169 Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
170 int xRank{x.rank()};
171 if (dim < 1 || dim > xRank) {
172 terminator.Crash(
173 "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
174 }
175 int zeroBasedDim{dim - 1};
176 SubscriptValue resultExtent[maxRank];
177 for (int j{0}; j < zeroBasedDim; ++j) {
178 resultExtent[j] = x.GetDimension(j).Extent();
179 }
180 for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
181 resultExtent[j - 1] = x.GetDimension(j).Extent();
182 }
183 result.Establish(typeCode, resultElementSize, nullptr, xRank - 1,
184 resultExtent, CFI_attribute_allocatable);
185 for (int j{0}; j + 1 < xRank; ++j) {
186 result.GetDimension(j).SetBounds(1, resultExtent[j]);
187 }
188 if (int stat{result.Allocate()}) {
189 terminator.Crash(
190 "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
191 }
192}
193
194// Partial reductions with DIM=
195
196template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
197inline RT_API_ATTRS void PartialReduction(Descriptor &result,
198 const Descriptor &x, std::size_t resultElementSize, int dim,
199 const Descriptor *mask, Terminator &terminator, const char *intrinsic,
200 ACCUMULATOR &accumulator) {
201 CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
202 intrinsic, TypeCode{CAT, KIND});
203 SubscriptValue at[maxRank];
204 result.GetLowerBounds(at);
205 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
206 using CppType = CppTypeFor<CAT, KIND>;
207 if (mask) {
208 CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY", fromName: "MASK");
209 SubscriptValue maskAt[maxRank]; // contents unused
210 if (mask->rank() > 0) {
211 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
212 accumulator.Reinitialize();
213 ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
214 x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
215 }
216 return;
217 } else if (!IsLogicalElementTrue(*mask, maskAt)) {
218 // scalar MASK=.FALSE.
219 accumulator.Reinitialize();
220 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
221 accumulator.GetResult(result.Element<CppType>(at));
222 }
223 return;
224 }
225 }
226 // No MASK= or scalar MASK=.TRUE.
227 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
228 accumulator.Reinitialize();
229 ReduceDimToScalar<CppType, ACCUMULATOR>(
230 x, dim - 1, at, result.Element<CppType>(at), accumulator);
231 }
232}
233
234template <template <typename> class ACCUM>
235struct PartialIntegerReductionHelper {
236 template <int KIND> struct Functor {
237 static constexpr int Intermediate{
238 std::max(a: KIND, b: 4)}; // use at least "int" for intermediate results
239 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
240 int dim, const Descriptor *mask, Terminator &terminator,
241 const char *intrinsic) const {
242 using Accumulator =
243 ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
244 Accumulator accumulator{x};
245 // Element size of the destination descriptor is the same
246 // as the element size of the source.
247 PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
248 x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
249 }
250 };
251};
252
253template <template <typename> class INTEGER_ACCUM>
254inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
255 const Descriptor &x, int dim, int kind, const Descriptor *mask,
256 const char *intrinsic, Terminator &terminator) {
257 ApplyIntegerKind<
258 PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
259 kind, terminator, result, x, dim, mask, terminator, intrinsic);
260}
261
262template <TypeCategory CAT, template <typename> class ACCUM>
263struct PartialFloatingReductionHelper {
264 template <int KIND> struct Functor {
265 static constexpr int Intermediate{
266 std::max(a: KIND, b: 8)}; // use at least "double" for intermediate results
267 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
268 int dim, const Descriptor *mask, Terminator &terminator,
269 const char *intrinsic) const {
270 using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
271 Accumulator accumulator{x};
272 // Element size of the destination descriptor is the same
273 // as the element size of the source.
274 PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
275 mask, terminator, intrinsic, accumulator);
276 }
277 };
278};
279
280template <template <typename> class INTEGER_ACCUM,
281 template <typename> class REAL_ACCUM,
282 template <typename> class COMPLEX_ACCUM>
283inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
284 const Descriptor &x, int dim, const char *source, int line,
285 const Descriptor *mask, const char *intrinsic) {
286 Terminator terminator{source, line};
287 auto catKind{x.type().GetCategoryAndKind()};
288 RUNTIME_CHECK(terminator, catKind.has_value());
289 switch (catKind->first) {
290 case TypeCategory::Integer:
291 PartialIntegerReduction<INTEGER_ACCUM>(
292 result, x, dim, catKind->second, mask, intrinsic, terminator);
293 break;
294 case TypeCategory::Real:
295 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
296 REAL_ACCUM>::template Functor,
297 void>(catKind->second, terminator, result, x, dim, mask, terminator,
298 intrinsic);
299 break;
300 case TypeCategory::Complex:
301 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
302 COMPLEX_ACCUM>::template Functor,
303 void>(catKind->second, terminator, result, x, dim, mask, terminator,
304 intrinsic);
305 break;
306 default:
307 terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
308 }
309}
310
311template <typename ACCUMULATOR> struct LocationResultHelper {
312 template <int KIND> struct Functor {
313 RT_API_ATTRS void operator()(
314 ACCUMULATOR &accumulator, const Descriptor &result) const {
315 accumulator.GetResult(
316 result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
317 }
318 };
319};
320
321template <typename ACCUMULATOR> struct PartialLocationHelper {
322 template <int KIND> struct Functor {
323 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
324 int dim, const Descriptor *mask, Terminator &terminator,
325 const char *intrinsic, ACCUMULATOR &accumulator) const {
326 // Element size of the destination descriptor is the size
327 // of {TypeCategory::Integer, KIND}.
328 PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
329 Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
330 terminator, intrinsic, accumulator);
331 }
332 };
333};
334
335} // namespace Fortran::runtime
336#endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
337

source code of flang/runtime/reduction-templates.h