reduction-templates.h source code [flang/runtime/reduction-templates.h]

1	//===-- runtime/reduction-templates.h ---------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// Generic function templates used by various reduction transformation
10	// intrinsic functions (SUM, PRODUCT, &c.)
11	//
12	// Partial reductions (i.e., those with DIM= arguments that are not*
13	// required to be 1 by the rank of the argument) return arrays that
14	// are dynamically allocated in a caller-supplied descriptor.
15	// Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC*
16	// return integer vectors of some kind, not scalars; a caller-supplied
17	// descriptor is used
18	// Character-valued reductions (MAXVAL & MINVAL) return arbitrary*
19	// length results, dynamically allocated in a caller-supplied descriptor
20
21	#ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22	#define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
23
24	#include "numeric-templates.h"
25	#include "terminator.h"
26	#include "tools.h"
27	#include "flang/Runtime/cpp-type.h"
28	#include "flang/Runtime/descriptor.h"
29	#include <algorithm>
30
31	namespace Fortran::runtime {
32
33	// Reductions are implemented with accumulators, which are instances of
34	// classes that incrementally build up the result (or an element thereof) during
35	// a traversal of the unmasked elements of an array. Each accumulator class
36	// supports a constructor (which captures a reference to the array), an
37	// AccumulateAt() member function that applies supplied subscripts to the
38	// array and does something with a scalar element, and a GetResult()
39	// member function that copies a final result into its destination.
40
41	// Total reduction of the array argument to a scalar (or to a vector in the
42	// cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or
43	// cases where the argument has rank 1 and DIM=, if present, must be 1.
44	template <typename TYPE, typename ACCUMULATOR>
45	inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
46	const Descriptor mask, ACCUMULATOR &accumulator, const* char *intrinsic,
47	Terminator &terminator) {
48	if (dim < `0` \|\| dim > `1`) {
49	terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
50	intrinsic, dim, x.rank());
51	}
52	SubscriptValue xAt[maxRank];
53	x.GetLowerBounds(xAt);
54	if (mask) {
55	CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY", fromName: "MASK");
56	if (mask->rank() > `0`) {
57	SubscriptValue maskAt[maxRank];
58	mask->GetLowerBounds(maskAt);
59	for (auto elements{x.Elements()}; elements--;
60	x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
61	if (IsLogicalElementTrue(*mask, maskAt)) {
62	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
63	break;
64	}
65	}
66	}
67	return;
68	} else if (!IsLogicalScalarTrue(*mask)) {
69	// scalar MASK=.FALSE.: return identity value
70	return;
71	}
72	}
73	// No MASK=, or scalar MASK=.TRUE.
74	for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
75	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
76	break; // cut short, result is known
77	}
78	}
79	}
80
81	template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
82	inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
83	const char source, int* line, int dim, const Descriptor *mask,
84	ACCUMULATOR &&accumulator, const char *intrinsic) {
85	Terminator terminator{source, line};
86	RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
87	using CppType = CppTypeFor<CAT, KIND>;
88	DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
89	if constexpr (std::is_void_v<CppType>) {
90	// Result is returned from accumulator, as in REDUCE() for derived type
91	#ifdef _MSC_VER // work around MSVC spurious error
92	accumulator.GetResult();
93	#else
94	accumulator.template GetResult();
95	#endif
96	} else {
97	CppType result;
98	#ifdef _MSC_VER // work around MSVC spurious error
99	accumulator.GetResult(&result);
100	#else
101	accumulator.template GetResult(&result);
102	#endif
103	return result;
104	}
105	}
106
107	// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
108	// of the array is [2,3,5], the shape of the result is [2,5] and
109	// result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
110	// lower bounds other than one. This utility subroutine creates an
111	// array of subscripts [j,_,k] for result subscripts [j,k] so that the
112	// elements of array(j,:,k) can be reduced.
113	inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
114	const Descriptor &descriptor, int zeroBasedDim,
115	const SubscriptValue from[]) {
116	descriptor.GetLowerBounds(at);
117	int rank{descriptor.rank()};
118	int j{`0`};
119	for (; j < zeroBasedDim; ++j) {
120	at[j] += from[j] - `1` /lower bound/;
121	}
122	for (++j; j < rank; ++j) {
123	at[j] += from[j - `1`] - `1`;
124	}
125	}
126
127	template <typename TYPE, typename ACCUMULATOR>
128	inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
129	int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
130	ACCUMULATOR &accumulator) {
131	SubscriptValue xAt[maxRank];
132	GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
133	const auto &dim{x.GetDimension(zeroBasedDim)};
134	SubscriptValue at{dim.LowerBound()};
135	for (auto n{dim.Extent()}; n-- > `0`; ++at) {
136	xAt[zeroBasedDim] = at;
137	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
138	break;
139	}
140	}
141	#ifdef _MSC_VER // work around MSVC spurious error
142	accumulator.GetResult(result, zeroBasedDim);
143	#else
144	accumulator.template GetResult(result, zeroBasedDim);
145	#endif
146	}
147
148	template <typename TYPE, typename ACCUMULATOR>
149	inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
150	int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
151	TYPE *result, ACCUMULATOR &accumulator) {
152	SubscriptValue xAt[maxRank], maskAt[maxRank];
153	GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
154	GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
155	const auto &xDim{x.GetDimension(zeroBasedDim)};
156	SubscriptValue xPos{xDim.LowerBound()};
157	const auto &maskDim{mask.GetDimension(zeroBasedDim)};
158	SubscriptValue maskPos{maskDim.LowerBound()};
159	for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > `0`;
160	++xPos, ++maskPos) {
161	maskAt[zeroBasedDim] = maskPos;
162	if (IsLogicalElementTrue(mask, maskAt)) {
163	xAt[zeroBasedDim] = xPos;
164	if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
165	break;
166	}
167	}
168	}
169	#ifdef _MSC_VER // work around MSVC spurious error
170	accumulator.GetResult(result, zeroBasedDim);
171	#else
172	accumulator.template GetResult(result, zeroBasedDim);
173	#endif
174	}
175
176	// Partial reductions with DIM=
177
178	template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
179	inline RT_API_ATTRS void PartialReduction(Descriptor &result,
180	const Descriptor &x, std::size_t resultElementSize, int dim,
181	const Descriptor mask, Terminator &terminator, const* char *intrinsic,
182	ACCUMULATOR &accumulator) {
183	CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
184	intrinsic, TypeCode{CAT, KIND});
185	SubscriptValue at[maxRank];
186	result.GetLowerBounds(at);
187	INTERNAL_CHECK(result.rank() == `0` \|\| at[`0`] == `1`);
188	using CppType = CppTypeFor<CAT, KIND>;
189	if (mask) {
190	CheckConformability(to: x, x: *mask, terminator, funcName: intrinsic, toName: "ARRAY", fromName: "MASK");
191	if (mask->rank() > `0`) {
192	for (auto n{result.Elements()}; n-- > `0`; result.IncrementSubscripts(at)) {
193	accumulator.Reinitialize();
194	ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
195	x, dim - `1`, at, *mask, result.Element<CppType>(at), accumulator);
196	}
197	return;
198	} else if (!IsLogicalScalarTrue(*mask)) {
199	// scalar MASK=.FALSE.
200	accumulator.Reinitialize();
201	for (auto n{result.Elements()}; n-- > `0`; result.IncrementSubscripts(at)) {
202	accumulator.GetResult(result.Element<CppType>(at));
203	}
204	return;
205	}
206	}
207	// No MASK= or scalar MASK=.TRUE.
208	for (auto n{result.Elements()}; n-- > `0`; result.IncrementSubscripts(at)) {
209	accumulator.Reinitialize();
210	ReduceDimToScalar<CppType, ACCUMULATOR>(
211	x, dim - `1`, at, result.Element<CppType>(at), accumulator);
212	}
213	}
214
215	template <template <typename> class ACCUM>
216	struct PartialIntegerReductionHelper {
217	template <int KIND> struct Functor {
218	static constexpr int Intermediate{
219	std::max(a: KIND, b: `4`)}; // use at least "int" for intermediate results
220	RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
221	int dim, const Descriptor *mask, Terminator &terminator,
222	const char intrinsic) const* {
223	using Accumulator =
224	ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
225	Accumulator accumulator{x};
226	// Element size of the destination descriptor is the same
227	// as the element size of the source.
228	PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
229	x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
230	}
231	};
232	};
233
234	template <template <typename> class INTEGER_ACCUM>
235	inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
236	const Descriptor &x, int dim, int kind, const Descriptor *mask,
237	const char *intrinsic, Terminator &terminator) {
238	ApplyIntegerKind<
239	PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
240	kind, terminator, result, x, dim, mask, terminator, intrinsic);
241	}
242
243	template <TypeCategory CAT, template <typename> class ACCUM>
244	struct PartialFloatingReductionHelper {
245	template <int KIND> struct Functor {
246	static constexpr int Intermediate{
247	std::max(a: KIND, b: `8`)}; // use at least "double" for intermediate results
248	RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
249	int dim, const Descriptor *mask, Terminator &terminator,
250	const char intrinsic) const* {
251	using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
252	Accumulator accumulator{x};
253	// Element size of the destination descriptor is the same
254	// as the element size of the source.
255	PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
256	mask, terminator, intrinsic, accumulator);
257	}
258	};
259	};
260
261	template <template <typename> class INTEGER_ACCUM,
262	template <typename> class REAL_ACCUM,
263	template <typename> class COMPLEX_ACCUM>
264	inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
265	const Descriptor &x, int dim, const char source, int* line,
266	const Descriptor mask, const* char *intrinsic) {
267	Terminator terminator{source, line};
268	auto catKind{x.type().GetCategoryAndKind()};
269	RUNTIME_CHECK(terminator, catKind.has_value());
270	switch (catKind->first) {
271	case TypeCategory::Integer:
272	PartialIntegerReduction<INTEGER_ACCUM>(
273	result, x, dim, catKind->second, mask, intrinsic, terminator);
274	break;
275	case TypeCategory::Real:
276	ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
277	REAL_ACCUM>::template Functor,
278	void>(catKind->second, terminator, result, x, dim, mask, terminator,
279	intrinsic);
280	break;
281	case TypeCategory::Complex:
282	ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
283	COMPLEX_ACCUM>::template Functor,
284	void>(catKind->second, terminator, result, x, dim, mask, terminator,
285	intrinsic);
286	break;
287	default:
288	terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
289	}
290	}
291
292	template <typename ACCUMULATOR> struct LocationResultHelper {
293	template <int KIND> struct Functor {
294	RT_API_ATTRS void operator()(
295	ACCUMULATOR &accumulator, const Descriptor &result) const {
296	accumulator.GetResult(
297	result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
298	}
299	};
300	};
301
302	template <typename ACCUMULATOR> struct PartialLocationHelper {
303	template <int KIND> struct Functor {
304	RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
305	int dim, const Descriptor *mask, Terminator &terminator,
306	const char intrinsic, ACCUMULATOR &accumulator) const* {
307	// Element size of the destination descriptor is the size
308	// of {TypeCategory::Integer, KIND}.
309	PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
310	Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
311	terminator, intrinsic, accumulator);
312	}
313	};
314	};
315
316	// NORM2 templates
317
318	RT_VAR_GROUP_BEGIN
319
320	// Use at least double precision for accumulators.
321	// Don't use __float128, it doesn't work with abs() or sqrt() yet.
322	static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind {
323	#if LDBL_MANT_DIG == 113 \|\| HAS_FLOAT128
324	`16`
325	#elif LDBL_MANT_DIG == 64
326	`10`
327	#else
328	`8`
329	#endif
330	};
331
332	RT_VAR_GROUP_END
333
334	template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
335	inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
336	int dim, const Descriptor mask, const* char *intrinsic,
337	Terminator &terminator) {
338	using Type = CppTypeFor<CAT, KIND>;
339	ACCUMULATOR accumulator{x};
340	if (dim == `0` \|\| x.rank() == `1`) {
341	// Total reduction
342
343	// Element size of the destination descriptor is the same
344	// as the element size of the source.
345	result.Establish(x.type(), x.ElementBytes(), nullptr, `0`, nullptr,
346	CFI_attribute_allocatable);
347	if (int stat{result.Allocate()}) {
348	terminator.Crash(
349	"%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
350	}
351	DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
352	accumulator.GetResult(result.OffsetElement<Type>());
353	} else {
354	// Partial reduction
355
356	// Element size of the destination descriptor is the same
357	// as the element size of the source.
358	PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
359	mask, terminator, intrinsic, accumulator);
360	}
361	}
362
363	// The data type used by Norm2Accumulator.
364	template <int KIND>
365	using Norm2AccumType =
366	CppTypeFor<TypeCategory::Real, std::clamp(KIND, `8`, Norm2LargestLDKind)>;
367
368	template <int KIND> class Norm2Accumulator {
369	public:
370	using Type = CppTypeFor<TypeCategory::Real, KIND>;
371	using AccumType = Norm2AccumType<KIND>;
372	explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
373	: array_{array} {}
374	RT_API_ATTRS void Reinitialize() { max_ = sum_ = `0`; }
375	template <typename A>
376	RT_API_ATTRS void GetResult(A p, int* /zeroBasedDim/ = -`1`) const {
377	// m sqrt(1 + sum((others(:)/m)*2))
378	p = static_cast<Type>(max_ SQRTTy<AccumType>::compute(`1` + sum_));
379	}
380	RT_API_ATTRS bool Accumulate(Type x) {
381	auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
382	if (!max_) {
383	max_ = absX;
384	} else if (absX > max_) {
385	auto t{max_ / absX}; // < 1.0
386	auto tsq{t * t};
387	sum_ = tsq; // scale sum to reflect change to the max*
388	sum_ += tsq; // include a term for the previous max
389	max_ = absX;
390	} else { // absX <= max_
391	auto t{absX / max_};
392	sum_ += t * t;
393	}
394	return true;
395	}
396	template <typename A>
397	RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
398	return Accumulate(*array_.Element<A>(at));
399	}
400
401	private:
402	const Descriptor &array_;
403	AccumType max_{`0`}; // value (m) with largest magnitude
404	AccumType sum_{`0`}; // sum((others(:)/m)2)
405	};
406
407	template <int KIND> struct Norm2Helper {
408	RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
409	const Descriptor mask, Terminator &terminator) const* {
410	DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
411	result, x, dim, mask, "NORM2", terminator);
412	}
413	};
414
415	} // namespace Fortran::runtime
416	#endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
417

source code of flang/runtime/reduction-templates.h