| 1 | //===-- lib/runtime/reduction.cpp -------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | // Implements ALL, ANY, COUNT, IALL, IANY, IPARITY, & PARITY for all required |
| 10 | // operand types and shapes. |
| 11 | // |
| 12 | // DOT_PRODUCT, FINDLOC, MATMUL, SUM, and PRODUCT are in their own eponymous |
| 13 | // source files. |
| 14 | // NORM2, MAXLOC, MINLOC, MAXVAL, and MINVAL are in extrema.cpp. |
| 15 | |
| 16 | #include "flang/Runtime/reduction.h" |
| 17 | #include "flang-rt/runtime/descriptor.h" |
| 18 | #include "flang-rt/runtime/reduction-templates.h" |
| 19 | #include <cinttypes> |
| 20 | |
| 21 | namespace Fortran::runtime { |
| 22 | |
| 23 | // IALL, IANY, IPARITY |
| 24 | |
| 25 | template <typename INTERMEDIATE> class IntegerAndAccumulator { |
| 26 | public: |
| 27 | explicit RT_API_ATTRS IntegerAndAccumulator(const Descriptor &array) |
| 28 | : array_{array} {} |
| 29 | RT_API_ATTRS void Reinitialize() { and_ = ~INTERMEDIATE{0}; } |
| 30 | template <typename A> |
| 31 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
| 32 | *p = static_cast<A>(and_); |
| 33 | } |
| 34 | template <typename A> |
| 35 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
| 36 | and_ &= *array_.Element<A>(at); |
| 37 | return true; |
| 38 | } |
| 39 | |
| 40 | private: |
| 41 | const Descriptor &array_; |
| 42 | INTERMEDIATE and_{~INTERMEDIATE{0}}; |
| 43 | }; |
| 44 | |
| 45 | template <typename INTERMEDIATE> class IntegerOrAccumulator { |
| 46 | public: |
| 47 | explicit RT_API_ATTRS IntegerOrAccumulator(const Descriptor &array) |
| 48 | : array_{array} {} |
| 49 | RT_API_ATTRS void Reinitialize() { or_ = 0; } |
| 50 | template <typename A> |
| 51 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
| 52 | *p = static_cast<A>(or_); |
| 53 | } |
| 54 | template <typename A> |
| 55 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
| 56 | or_ |= *array_.Element<A>(at); |
| 57 | return true; |
| 58 | } |
| 59 | |
| 60 | private: |
| 61 | const Descriptor &array_; |
| 62 | INTERMEDIATE or_{0}; |
| 63 | }; |
| 64 | |
| 65 | template <typename INTERMEDIATE> class IntegerXorAccumulator { |
| 66 | public: |
| 67 | explicit RT_API_ATTRS IntegerXorAccumulator(const Descriptor &array) |
| 68 | : array_{array} {} |
| 69 | RT_API_ATTRS void Reinitialize() { xor_ = 0; } |
| 70 | template <typename A> |
| 71 | RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { |
| 72 | *p = static_cast<A>(xor_); |
| 73 | } |
| 74 | template <typename A> |
| 75 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
| 76 | xor_ ^= *array_.Element<A>(at); |
| 77 | return true; |
| 78 | } |
| 79 | |
| 80 | private: |
| 81 | const Descriptor &array_; |
| 82 | INTERMEDIATE xor_{0}; |
| 83 | }; |
| 84 | |
| 85 | extern "C" { |
| 86 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAll1)(const Descriptor &x, |
| 87 | const char *source, int line, int dim, const Descriptor *mask) { |
| 88 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
| 89 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
| 90 | /*allowUnsignedForInteger=*/true); |
| 91 | } |
| 92 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAll2)(const Descriptor &x, |
| 93 | const char *source, int line, int dim, const Descriptor *mask) { |
| 94 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
| 95 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
| 96 | /*allowUnsignedForInteger=*/true); |
| 97 | } |
| 98 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAll4)(const Descriptor &x, |
| 99 | const char *source, int line, int dim, const Descriptor *mask) { |
| 100 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
| 101 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL" , |
| 102 | /*allowUnsignedForInteger=*/true); |
| 103 | } |
| 104 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAll8)(const Descriptor &x, |
| 105 | const char *source, int line, int dim, const Descriptor *mask) { |
| 106 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
| 107 | IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IALL" , |
| 108 | /*allowUnsignedForInteger=*/true); |
| 109 | } |
| 110 | #ifdef __SIZEOF_INT128__ |
| 111 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAll16)(const Descriptor &x, |
| 112 | const char *source, int line, int dim, const Descriptor *mask) { |
| 113 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
| 114 | mask, IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
| 115 | "IALL" , /*allowUnsignedForInteger=*/true); |
| 116 | } |
| 117 | #endif |
| 118 | void RTDEF(IAllDim)(Descriptor &result, const Descriptor &x, int dim, |
| 119 | const char *source, int line, const Descriptor *mask) { |
| 120 | Terminator terminator{source, line}; |
| 121 | auto catKind{x.type().GetCategoryAndKind()}; |
| 122 | RUNTIME_CHECK(terminator, |
| 123 | catKind.has_value() && |
| 124 | (catKind->first == TypeCategory::Integer || |
| 125 | catKind->first == TypeCategory::Unsigned)); |
| 126 | PartialIntegerReduction<IntegerAndAccumulator>( |
| 127 | result, x, dim, catKind->second, mask, "IALL" , terminator); |
| 128 | } |
| 129 | |
| 130 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAny1)(const Descriptor &x, |
| 131 | const char *source, int line, int dim, const Descriptor *mask) { |
| 132 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
| 133 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
| 134 | /*allowUnsignedForInteger=*/true); |
| 135 | } |
| 136 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAny2)(const Descriptor &x, |
| 137 | const char *source, int line, int dim, const Descriptor *mask) { |
| 138 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
| 139 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
| 140 | /*allowUnsignedForInteger=*/true); |
| 141 | } |
| 142 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAny4)(const Descriptor &x, |
| 143 | const char *source, int line, int dim, const Descriptor *mask) { |
| 144 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
| 145 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY" , |
| 146 | /*allowUnsignedForInteger=*/true); |
| 147 | } |
| 148 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAny8)(const Descriptor &x, |
| 149 | const char *source, int line, int dim, const Descriptor *mask) { |
| 150 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
| 151 | IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IANY" , |
| 152 | /*allowUnsignedForInteger=*/true); |
| 153 | } |
| 154 | #ifdef __SIZEOF_INT128__ |
| 155 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAny16)(const Descriptor &x, |
| 156 | const char *source, int line, int dim, const Descriptor *mask) { |
| 157 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
| 158 | mask, IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
| 159 | "IANY" , /*allowUnsignedForInteger=*/true); |
| 160 | } |
| 161 | #endif |
| 162 | void RTDEF(IAnyDim)(Descriptor &result, const Descriptor &x, int dim, |
| 163 | const char *source, int line, const Descriptor *mask) { |
| 164 | Terminator terminator{source, line}; |
| 165 | auto catKind{x.type().GetCategoryAndKind()}; |
| 166 | RUNTIME_CHECK(terminator, |
| 167 | catKind.has_value() && |
| 168 | (catKind->first == TypeCategory::Integer || |
| 169 | catKind->first == TypeCategory::Unsigned)); |
| 170 | PartialIntegerReduction<IntegerOrAccumulator>( |
| 171 | result, x, dim, catKind->second, mask, "IANY" , terminator); |
| 172 | } |
| 173 | |
| 174 | CppTypeFor<TypeCategory::Integer, 1> RTDEF(IParity1)(const Descriptor &x, |
| 175 | const char *source, int line, int dim, const Descriptor *mask) { |
| 176 | return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask, |
| 177 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
| 178 | /*allowUnsignedForInteger=*/true); |
| 179 | } |
| 180 | CppTypeFor<TypeCategory::Integer, 2> RTDEF(IParity2)(const Descriptor &x, |
| 181 | const char *source, int line, int dim, const Descriptor *mask) { |
| 182 | return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask, |
| 183 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
| 184 | /*allowUnsignedForInteger=*/true); |
| 185 | } |
| 186 | CppTypeFor<TypeCategory::Integer, 4> RTDEF(IParity4)(const Descriptor &x, |
| 187 | const char *source, int line, int dim, const Descriptor *mask) { |
| 188 | return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask, |
| 189 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IPARITY" , |
| 190 | /*allowUnsignedForInteger=*/true); |
| 191 | } |
| 192 | CppTypeFor<TypeCategory::Integer, 8> RTDEF(IParity8)(const Descriptor &x, |
| 193 | const char *source, int line, int dim, const Descriptor *mask) { |
| 194 | return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask, |
| 195 | IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IPARITY" , |
| 196 | /*allowUnsignedForInteger=*/true); |
| 197 | } |
| 198 | #ifdef __SIZEOF_INT128__ |
| 199 | CppTypeFor<TypeCategory::Integer, 16> RTDEF(IParity16)(const Descriptor &x, |
| 200 | const char *source, int line, int dim, const Descriptor *mask) { |
| 201 | return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim, |
| 202 | mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x}, |
| 203 | "IPARITY" , /*allowUnsignedForInteger=*/true); |
| 204 | } |
| 205 | #endif |
| 206 | void RTDEF(IParityDim)(Descriptor &result, const Descriptor &x, int dim, |
| 207 | const char *source, int line, const Descriptor *mask) { |
| 208 | Terminator terminator{source, line}; |
| 209 | auto catKind{x.type().GetCategoryAndKind()}; |
| 210 | RUNTIME_CHECK(terminator, |
| 211 | catKind.has_value() && |
| 212 | (catKind->first == TypeCategory::Integer || |
| 213 | catKind->first == TypeCategory::Unsigned)); |
| 214 | PartialIntegerReduction<IntegerXorAccumulator>( |
| 215 | result, x, dim, catKind->second, mask, "IPARITY" , terminator); |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | // ALL, ANY, COUNT, & PARITY |
| 220 | |
| 221 | enum class LogicalReduction { All, Any, Parity }; |
| 222 | |
| 223 | template <LogicalReduction REDUCTION> class LogicalAccumulator { |
| 224 | public: |
| 225 | using Type = bool; |
| 226 | RT_API_ATTRS explicit LogicalAccumulator(const Descriptor &array) |
| 227 | : array_{array} {} |
| 228 | RT_API_ATTRS void Reinitialize() { |
| 229 | result_ = REDUCTION == LogicalReduction::All; |
| 230 | } |
| 231 | RT_API_ATTRS bool Result() const { return result_; } |
| 232 | RT_API_ATTRS bool Accumulate(bool x) { |
| 233 | if constexpr (REDUCTION == LogicalReduction::Parity) { |
| 234 | result_ = result_ != x; |
| 235 | } else if (x != (REDUCTION == LogicalReduction::All)) { |
| 236 | result_ = x; |
| 237 | return false; |
| 238 | } |
| 239 | return true; |
| 240 | } |
| 241 | template <typename IGNORED = void> |
| 242 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
| 243 | return Accumulate(IsLogicalElementTrue(array_, at)); |
| 244 | } |
| 245 | |
| 246 | private: |
| 247 | const Descriptor &array_; |
| 248 | bool result_{REDUCTION == LogicalReduction::All}; |
| 249 | }; |
| 250 | |
| 251 | template <typename ACCUMULATOR> |
| 252 | RT_API_ATTRS inline auto GetTotalLogicalReduction(const Descriptor &x, |
| 253 | const char *source, int line, int dim, ACCUMULATOR &&accumulator, |
| 254 | const char *intrinsic) -> typename ACCUMULATOR::Type { |
| 255 | Terminator terminator{source, line}; |
| 256 | if (dim < 0 || dim > 1) { |
| 257 | terminator.Crash("%s: bad DIM=%d for ARRAY with rank=1" , intrinsic, dim); |
| 258 | } |
| 259 | SubscriptValue xAt[maxRank]; |
| 260 | x.GetLowerBounds(xAt); |
| 261 | for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) { |
| 262 | if (!accumulator.AccumulateAt(xAt)) { |
| 263 | break; // cut short, result is known |
| 264 | } |
| 265 | } |
| 266 | return accumulator.Result(); |
| 267 | } |
| 268 | |
| 269 | template <typename ACCUMULATOR> |
| 270 | RT_API_ATTRS inline auto ReduceLogicalDimToScalar( |
| 271 | const Descriptor &x, int zeroBasedDim, SubscriptValue subscripts[]) -> |
| 272 | typename ACCUMULATOR::Type { |
| 273 | ACCUMULATOR accumulator{x}; |
| 274 | SubscriptValue xAt[maxRank]; |
| 275 | GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); |
| 276 | const auto &dim{x.GetDimension(zeroBasedDim)}; |
| 277 | SubscriptValue at{dim.LowerBound()}; |
| 278 | for (auto n{dim.Extent()}; n-- > 0; ++at) { |
| 279 | xAt[zeroBasedDim] = at; |
| 280 | if (!accumulator.AccumulateAt(xAt)) { |
| 281 | break; |
| 282 | } |
| 283 | } |
| 284 | return accumulator.Result(); |
| 285 | } |
| 286 | |
| 287 | template <LogicalReduction REDUCTION> struct LogicalReduceHelper { |
| 288 | template <int KIND> struct Functor { |
| 289 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, |
| 290 | int dim, Terminator &terminator, const char *intrinsic) const { |
| 291 | // Standard requires result to have same LOGICAL kind as argument. |
| 292 | CreatePartialReductionResult( |
| 293 | result, x, x.ElementBytes(), dim, terminator, intrinsic, x.type()); |
| 294 | SubscriptValue at[maxRank]; |
| 295 | result.GetLowerBounds(at); |
| 296 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
| 297 | using CppType = CppTypeFor<TypeCategory::Logical, KIND>; |
| 298 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
| 299 | *result.Element<CppType>(at) = |
| 300 | ReduceLogicalDimToScalar<LogicalAccumulator<REDUCTION>>( |
| 301 | x, dim - 1, at); |
| 302 | } |
| 303 | } |
| 304 | }; |
| 305 | }; |
| 306 | |
| 307 | template <LogicalReduction REDUCTION> |
| 308 | RT_API_ATTRS inline void DoReduceLogicalDimension(Descriptor &result, |
| 309 | const Descriptor &x, int dim, Terminator &terminator, |
| 310 | const char *intrinsic) { |
| 311 | auto catKind{x.type().GetCategoryAndKind()}; |
| 312 | RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical); |
| 313 | ApplyLogicalKind<LogicalReduceHelper<REDUCTION>::template Functor, void>( |
| 314 | catKind->second, terminator, result, x, dim, terminator, intrinsic); |
| 315 | } |
| 316 | |
| 317 | // COUNT |
| 318 | |
| 319 | class CountAccumulator { |
| 320 | public: |
| 321 | using Type = std::int64_t; |
| 322 | RT_API_ATTRS explicit CountAccumulator(const Descriptor &array) |
| 323 | : array_{array} {} |
| 324 | RT_API_ATTRS void Reinitialize() { result_ = 0; } |
| 325 | RT_API_ATTRS Type Result() const { return result_; } |
| 326 | template <typename IGNORED = void> |
| 327 | RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { |
| 328 | if (IsLogicalElementTrue(array_, at)) { |
| 329 | ++result_; |
| 330 | } |
| 331 | return true; |
| 332 | } |
| 333 | |
| 334 | private: |
| 335 | const Descriptor &array_; |
| 336 | Type result_{0}; |
| 337 | }; |
| 338 | |
| 339 | template <int KIND> struct CountDimension { |
| 340 | RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim, |
| 341 | Terminator &terminator) const { |
| 342 | // Element size of the descriptor descriptor is the size |
| 343 | // of {TypeCategory::Integer, KIND}. |
| 344 | CreatePartialReductionResult(result, x, |
| 345 | Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, terminator, |
| 346 | "COUNT" , TypeCode{TypeCategory::Integer, KIND}); |
| 347 | SubscriptValue at[maxRank]; |
| 348 | result.GetLowerBounds(at); |
| 349 | INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); |
| 350 | using CppType = CppTypeFor<TypeCategory::Integer, KIND>; |
| 351 | for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { |
| 352 | *result.Element<CppType>(at) = |
| 353 | ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at); |
| 354 | } |
| 355 | } |
| 356 | }; |
| 357 | |
| 358 | extern "C" { |
| 359 | RT_EXT_API_GROUP_BEGIN |
| 360 | |
| 361 | bool RTDEF(All)(const Descriptor &x, const char *source, int line, int dim) { |
| 362 | return GetTotalLogicalReduction(x, source, line, dim, |
| 363 | LogicalAccumulator<LogicalReduction::All>{x}, "ALL" ); |
| 364 | } |
| 365 | void RTDEF(AllDim)(Descriptor &result, const Descriptor &x, int dim, |
| 366 | const char *source, int line) { |
| 367 | Terminator terminator{source, line}; |
| 368 | DoReduceLogicalDimension<LogicalReduction::All>( |
| 369 | result, x, dim, terminator, "ALL" ); |
| 370 | } |
| 371 | |
| 372 | bool RTDEF(Any)(const Descriptor &x, const char *source, int line, int dim) { |
| 373 | return GetTotalLogicalReduction(x, source, line, dim, |
| 374 | LogicalAccumulator<LogicalReduction::Any>{x}, "ANY" ); |
| 375 | } |
| 376 | void RTDEF(AnyDim)(Descriptor &result, const Descriptor &x, int dim, |
| 377 | const char *source, int line) { |
| 378 | Terminator terminator{source, line}; |
| 379 | DoReduceLogicalDimension<LogicalReduction::Any>( |
| 380 | result, x, dim, terminator, "ANY" ); |
| 381 | } |
| 382 | |
| 383 | std::int64_t RTDEF(Count)( |
| 384 | const Descriptor &x, const char *source, int line, int dim) { |
| 385 | return GetTotalLogicalReduction( |
| 386 | x, source, line, dim, CountAccumulator{x}, "COUNT" ); |
| 387 | } |
| 388 | |
| 389 | void RTDEF(CountDim)(Descriptor &result, const Descriptor &x, int dim, int kind, |
| 390 | const char *source, int line) { |
| 391 | Terminator terminator{source, line}; |
| 392 | ApplyIntegerKind<CountDimension, void>( |
| 393 | kind, terminator, result, x, dim, terminator); |
| 394 | } |
| 395 | |
| 396 | bool RTDEF(Parity)(const Descriptor &x, const char *source, int line, int dim) { |
| 397 | return GetTotalLogicalReduction(x, source, line, dim, |
| 398 | LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY" ); |
| 399 | } |
| 400 | void RTDEF(ParityDim)(Descriptor &result, const Descriptor &x, int dim, |
| 401 | const char *source, int line) { |
| 402 | Terminator terminator{source, line}; |
| 403 | DoReduceLogicalDimension<LogicalReduction::Parity>( |
| 404 | result, x, dim, terminator, "PARITY" ); |
| 405 | } |
| 406 | |
| 407 | RT_EXT_API_GROUP_END |
| 408 | } // extern "C" |
| 409 | } // namespace Fortran::runtime |
| 410 | |