| 1 | //===-- lib/runtime/tools.cpp -----------------------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang-rt/runtime/tools.h" |
| 10 | #include "flang-rt/runtime/terminator.h" |
| 11 | #include <algorithm> |
| 12 | #include <cstdint> |
| 13 | #include <cstdlib> |
| 14 | #include <cstring> |
| 15 | |
| 16 | namespace Fortran::runtime { |
| 17 | |
| 18 | RT_OFFLOAD_API_GROUP_BEGIN |
| 19 | |
| 20 | RT_API_ATTRS std::size_t TrimTrailingSpaces(const char *s, std::size_t n) { |
| 21 | while (n > 0 && s[n - 1] == ' ') { |
| 22 | --n; |
| 23 | } |
| 24 | return n; |
| 25 | } |
| 26 | |
| 27 | RT_API_ATTRS OwningPtr<char> SaveDefaultCharacter( |
| 28 | const char *s, std::size_t length, const Terminator &terminator) { |
| 29 | if (s) { |
| 30 | auto *p{static_cast<char *>(AllocateMemoryOrCrash(terminator, length + 1))}; |
| 31 | std::memcpy(p, s, length); |
| 32 | p[length] = '\0'; |
| 33 | return OwningPtr<char>{p}; |
| 34 | } else { |
| 35 | return OwningPtr<char>{}; |
| 36 | } |
| 37 | } |
| 38 | |
| 39 | static RT_API_ATTRS bool CaseInsensitiveMatch( |
| 40 | const char *value, std::size_t length, const char *possibility) { |
| 41 | for (; length-- > 0; ++possibility) { |
| 42 | char ch{*value++}; |
| 43 | if (ch >= 'a' && ch <= 'z') { |
| 44 | ch += 'A' - 'a'; |
| 45 | } |
| 46 | if (*possibility != ch) { |
| 47 | if (*possibility != '\0' || ch != ' ') { |
| 48 | return false; |
| 49 | } |
| 50 | // Ignore trailing blanks (12.5.6.2 p1) |
| 51 | while (length-- > 0) { |
| 52 | if (*value++ != ' ') { |
| 53 | return false; |
| 54 | } |
| 55 | } |
| 56 | return true; |
| 57 | } |
| 58 | } |
| 59 | return *possibility == '\0'; |
| 60 | } |
| 61 | |
| 62 | RT_API_ATTRS int IdentifyValue( |
| 63 | const char *value, std::size_t length, const char *possibilities[]) { |
| 64 | if (value) { |
| 65 | for (int j{0}; possibilities[j]; ++j) { |
| 66 | if (CaseInsensitiveMatch(value, length, possibility: possibilities[j])) { |
| 67 | return j; |
| 68 | } |
| 69 | } |
| 70 | } |
| 71 | return -1; |
| 72 | } |
| 73 | |
| 74 | RT_API_ATTRS void ToFortranDefaultCharacter( |
| 75 | char *to, std::size_t toLength, const char *from) { |
| 76 | std::size_t len{Fortran::runtime::strlen(s: from)}; |
| 77 | if (len < toLength) { |
| 78 | std::memcpy(dest: to, src: from, n: len); |
| 79 | std::memset(s: to + len, c: ' ', n: toLength - len); |
| 80 | } else { |
| 81 | std::memcpy(dest: to, src: from, n: toLength); |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | RT_API_ATTRS void CheckConformability(const Descriptor &to, const Descriptor &x, |
| 86 | Terminator &terminator, const char *funcName, const char *toName, |
| 87 | const char *xName) { |
| 88 | if (x.rank() == 0) { |
| 89 | return; // scalar conforms with anything |
| 90 | } |
| 91 | int rank{to.rank()}; |
| 92 | if (x.rank() != rank) { |
| 93 | terminator.Crash( |
| 94 | "Incompatible array arguments to %s: %s has rank %d but %s has rank %d" , |
| 95 | funcName, toName, rank, xName, x.rank()); |
| 96 | } else { |
| 97 | for (int j{0}; j < rank; ++j) { |
| 98 | auto toExtent{static_cast<std::int64_t>(to.GetDimension(j).Extent())}; |
| 99 | auto xExtent{static_cast<std::int64_t>(x.GetDimension(j).Extent())}; |
| 100 | if (xExtent != toExtent) { |
| 101 | terminator.Crash("Incompatible array arguments to %s: dimension %d of " |
| 102 | "%s has extent %" PRId64 " but %s has extent %" PRId64, |
| 103 | funcName, j + 1, toName, toExtent, xName, xExtent); |
| 104 | } |
| 105 | } |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | RT_API_ATTRS void CheckIntegerKind( |
| 110 | Terminator &terminator, int kind, const char *intrinsic) { |
| 111 | if (kind < 1 || kind > 16 || (kind & (kind - 1)) != 0) { |
| 112 | terminator.Crash("not yet implemented: INTEGER(KIND=%d) in %s intrinsic" , |
| 113 | intrinsic, kind); |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | template <typename P, int RANK> |
| 118 | RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous( |
| 119 | const Descriptor &to, const Descriptor &from) { |
| 120 | DescriptorIterator<RANK> toIt{to}; |
| 121 | DescriptorIterator<RANK> fromIt{from}; |
| 122 | // Knowing the size at compile time can enable memcpy inlining optimisations |
| 123 | constexpr std::size_t typeElementBytes{sizeof(P)}; |
| 124 | // We might still need to check the actual size as a fallback |
| 125 | std::size_t elementBytes{to.ElementBytes()}; |
| 126 | for (std::size_t n{to.Elements()}; n-- > 0; |
| 127 | toIt.Advance(), fromIt.Advance()) { |
| 128 | // typeElementBytes == 1 when P is a char - the non-specialised case |
| 129 | if constexpr (typeElementBytes != 1) { |
| 130 | std::memcpy( |
| 131 | dest: toIt.template Get<P>(), src: fromIt.template Get<P>(), n: typeElementBytes); |
| 132 | } else { |
| 133 | std::memcpy( |
| 134 | dest: toIt.template Get<P>(), src: fromIt.template Get<P>(), n: elementBytes); |
| 135 | } |
| 136 | } |
| 137 | } |
| 138 | |
| 139 | // Explicitly instantiate the default case to conform to the C++ standard |
| 140 | template RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous<char, -1>( |
| 141 | const Descriptor &to, const Descriptor &from); |
| 142 | |
| 143 | template <typename P, int RANK> |
| 144 | RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous( |
| 145 | const Descriptor &to, const Descriptor &from) { |
| 146 | char *toAt{to.OffsetElement()}; |
| 147 | constexpr std::size_t typeElementBytes{sizeof(P)}; |
| 148 | std::size_t elementBytes{to.ElementBytes()}; |
| 149 | DescriptorIterator<RANK> fromIt{from}; |
| 150 | for (std::size_t n{to.Elements()}; n-- > 0; |
| 151 | toAt += elementBytes, fromIt.Advance()) { |
| 152 | if constexpr (typeElementBytes != 1) { |
| 153 | std::memcpy(dest: toAt, src: fromIt.template Get<P>(), n: typeElementBytes); |
| 154 | } else { |
| 155 | std::memcpy(dest: toAt, src: fromIt.template Get<P>(), n: elementBytes); |
| 156 | } |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | template RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous<char, -1>( |
| 161 | const Descriptor &to, const Descriptor &from); |
| 162 | |
| 163 | template <typename P, int RANK> |
| 164 | RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous( |
| 165 | const Descriptor &to, const Descriptor &from) { |
| 166 | char *fromAt{from.OffsetElement()}; |
| 167 | DescriptorIterator<RANK> toIt{to}; |
| 168 | constexpr std::size_t typeElementBytes{sizeof(P)}; |
| 169 | std::size_t elementBytes{to.ElementBytes()}; |
| 170 | for (std::size_t n{to.Elements()}; n-- > 0; |
| 171 | toIt.Advance(), fromAt += elementBytes) { |
| 172 | if constexpr (typeElementBytes != 1) { |
| 173 | std::memcpy(dest: toIt.template Get<P>(), src: fromAt, n: typeElementBytes); |
| 174 | } else { |
| 175 | std::memcpy(dest: toIt.template Get<P>(), src: fromAt, n: elementBytes); |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | template RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous<char, -1>( |
| 181 | const Descriptor &to, const Descriptor &from); |
| 182 | |
| 183 | // ShallowCopy helper for calling the correct specialised variant based on |
| 184 | // scenario |
| 185 | template <typename P, int RANK = -1> |
| 186 | RT_API_ATTRS void ShallowCopyInner(const Descriptor &to, const Descriptor &from, |
| 187 | bool toIsContiguous, bool fromIsContiguous) { |
| 188 | if (toIsContiguous) { |
| 189 | if (fromIsContiguous) { |
| 190 | std::memcpy(dest: to.OffsetElement(), src: from.OffsetElement(), |
| 191 | n: to.Elements() * to.ElementBytes()); |
| 192 | } else { |
| 193 | ShallowCopyDiscontiguousToContiguous<P, RANK>(to, from); |
| 194 | } |
| 195 | } else { |
| 196 | if (fromIsContiguous) { |
| 197 | ShallowCopyContiguousToDiscontiguous<P, RANK>(to, from); |
| 198 | } else { |
| 199 | ShallowCopyDiscontiguousToDiscontiguous<P, RANK>(to, from); |
| 200 | } |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | // Most arrays are much closer to rank-1 than to maxRank. |
| 205 | // Doing the recursion upwards instead of downwards puts the more common |
| 206 | // cases earlier in the if-chain and has a tangible impact on performance. |
| 207 | template <typename P, int RANK> struct ShallowCopyRankSpecialize { |
| 208 | static bool execute(const Descriptor &to, const Descriptor &from, |
| 209 | bool toIsContiguous, bool fromIsContiguous) { |
| 210 | if (to.rank() == RANK && from.rank() == RANK) { |
| 211 | ShallowCopyInner<P, RANK>(to, from, toIsContiguous, fromIsContiguous); |
| 212 | return true; |
| 213 | } |
| 214 | return ShallowCopyRankSpecialize<P, RANK + 1>::execute( |
| 215 | to, from, toIsContiguous, fromIsContiguous); |
| 216 | } |
| 217 | }; |
| 218 | |
| 219 | template <typename P> struct ShallowCopyRankSpecialize<P, maxRank + 1> { |
| 220 | static bool execute(const Descriptor &to, const Descriptor &from, |
| 221 | bool toIsContiguous, bool fromIsContiguous) { |
| 222 | return false; |
| 223 | } |
| 224 | }; |
| 225 | |
| 226 | // ShallowCopy helper for specialising the variants based on array rank |
| 227 | template <typename P> |
| 228 | RT_API_ATTRS void ShallowCopyRank(const Descriptor &to, const Descriptor &from, |
| 229 | bool toIsContiguous, bool fromIsContiguous) { |
| 230 | // Try to call a specialised ShallowCopy variant from rank-1 up to maxRank |
| 231 | bool specialized{ShallowCopyRankSpecialize<P, 1>::execute( |
| 232 | to, from, toIsContiguous, fromIsContiguous)}; |
| 233 | if (!specialized) { |
| 234 | ShallowCopyInner<P>(to, from, toIsContiguous, fromIsContiguous); |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from, |
| 239 | bool toIsContiguous, bool fromIsContiguous) { |
| 240 | std::size_t elementBytes{to.ElementBytes()}; |
| 241 | // Checking the type at runtime and making sure the pointer passed to memcpy |
| 242 | // has a type that matches the element type makes it possible for the compiler |
| 243 | // to optimise out the memcpy calls altogether and can substantially improve |
| 244 | // performance for some applications. |
| 245 | if (to.type().IsInteger()) { |
| 246 | if (elementBytes == sizeof(int64_t)) { |
| 247 | ShallowCopyRank<int64_t>(to, from, toIsContiguous, fromIsContiguous); |
| 248 | } else if (elementBytes == sizeof(int32_t)) { |
| 249 | ShallowCopyRank<int32_t>(to, from, toIsContiguous, fromIsContiguous); |
| 250 | } else if (elementBytes == sizeof(int16_t)) { |
| 251 | ShallowCopyRank<int16_t>(to, from, toIsContiguous, fromIsContiguous); |
| 252 | #if defined USING_NATIVE_INT128_T |
| 253 | } else if (elementBytes == sizeof(__int128_t)) { |
| 254 | ShallowCopyRank<__int128_t>(to, from, toIsContiguous, fromIsContiguous); |
| 255 | #endif |
| 256 | } else { |
| 257 | ShallowCopyRank<char>(to, from, toIsContiguous, fromIsContiguous); |
| 258 | } |
| 259 | } else if (to.type().IsReal()) { |
| 260 | if (elementBytes == sizeof(double)) { |
| 261 | ShallowCopyRank<double>(to, from, toIsContiguous, fromIsContiguous); |
| 262 | } else if (elementBytes == sizeof(float)) { |
| 263 | ShallowCopyRank<float>(to, from, toIsContiguous, fromIsContiguous); |
| 264 | } else { |
| 265 | ShallowCopyRank<char>(to, from, toIsContiguous, fromIsContiguous); |
| 266 | } |
| 267 | } else { |
| 268 | ShallowCopyRank<char>(to, from, toIsContiguous, fromIsContiguous); |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from) { |
| 273 | ShallowCopy(to, from, to.IsContiguous(), from.IsContiguous()); |
| 274 | } |
| 275 | |
| 276 | RT_API_ATTRS char *EnsureNullTerminated( |
| 277 | char *str, std::size_t length, Terminator &terminator) { |
| 278 | if (runtime::memchr(str, '\0', length) == nullptr) { |
| 279 | char *newCmd{(char *)AllocateMemoryOrCrash(terminator, length + 1)}; |
| 280 | std::memcpy(dest: newCmd, src: str, n: length); |
| 281 | newCmd[length] = '\0'; |
| 282 | return newCmd; |
| 283 | } else { |
| 284 | return str; |
| 285 | } |
| 286 | } |
| 287 | |
| 288 | RT_API_ATTRS bool IsValidCharDescriptor(const Descriptor *value) { |
| 289 | return value && value->IsAllocated() && |
| 290 | value->type() == TypeCode(TypeCategory::Character, 1) && |
| 291 | value->rank() == 0; |
| 292 | } |
| 293 | |
| 294 | RT_API_ATTRS bool IsValidIntDescriptor(const Descriptor *intVal) { |
| 295 | // Check that our descriptor is allocated and is a scalar integer with |
| 296 | // kind != 1 (i.e. with a large enough decimal exponent range). |
| 297 | return intVal && intVal->IsAllocated() && intVal->rank() == 0 && |
| 298 | intVal->type().IsInteger() && intVal->type().GetCategoryAndKind() && |
| 299 | intVal->type().GetCategoryAndKind()->second != 1; |
| 300 | } |
| 301 | |
| 302 | RT_API_ATTRS std::int32_t CopyCharsToDescriptor(const Descriptor &value, |
| 303 | const char *rawValue, std::size_t rawValueLength, const Descriptor *errmsg, |
| 304 | std::size_t offset) { |
| 305 | |
| 306 | const std::int64_t toCopy{std::min(static_cast<std::int64_t>(rawValueLength), |
| 307 | static_cast<std::int64_t>(value.ElementBytes() - offset))}; |
| 308 | if (toCopy < 0) { |
| 309 | return ToErrmsg(errmsg, StatValueTooShort); |
| 310 | } |
| 311 | |
| 312 | std::memcpy(value.OffsetElement(offset), rawValue, toCopy); |
| 313 | |
| 314 | if (static_cast<std::int64_t>(rawValueLength) > toCopy) { |
| 315 | return ToErrmsg(errmsg, StatValueTooShort); |
| 316 | } |
| 317 | |
| 318 | return StatOk; |
| 319 | } |
| 320 | |
| 321 | RT_API_ATTRS void StoreIntToDescriptor( |
| 322 | const Descriptor *length, std::int64_t value, Terminator &terminator) { |
| 323 | auto typeCode{length->type().GetCategoryAndKind()}; |
| 324 | int kind{typeCode->second}; |
| 325 | ApplyIntegerKind<StoreIntegerAt, void>( |
| 326 | kind, terminator, *length, /* atIndex = */ 0, value); |
| 327 | } |
| 328 | |
| 329 | template <int KIND> struct FitsInIntegerKind { |
| 330 | RT_API_ATTRS bool operator()([[maybe_unused]] std::int64_t value) { |
| 331 | if constexpr (KIND >= 8) { |
| 332 | return true; |
| 333 | } else { |
| 334 | return value <= |
| 335 | std::numeric_limits< |
| 336 | CppTypeFor<Fortran::common::TypeCategory::Integer, KIND>>::max(); |
| 337 | } |
| 338 | } |
| 339 | }; |
| 340 | |
| 341 | // Utility: establishes & allocates the result array for a partial |
| 342 | // reduction (i.e., one with DIM=). |
| 343 | RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result, |
| 344 | const Descriptor &x, std::size_t resultElementSize, int dim, |
| 345 | Terminator &terminator, const char *intrinsic, TypeCode typeCode) { |
| 346 | int xRank{x.rank()}; |
| 347 | if (dim < 1 || dim > xRank) { |
| 348 | terminator.Crash( |
| 349 | "%s: bad DIM=%d for ARRAY with rank %d" , intrinsic, dim, xRank); |
| 350 | } |
| 351 | int zeroBasedDim{dim - 1}; |
| 352 | SubscriptValue resultExtent[maxRank]; |
| 353 | for (int j{0}; j < zeroBasedDim; ++j) { |
| 354 | resultExtent[j] = x.GetDimension(j).Extent(); |
| 355 | } |
| 356 | for (int j{zeroBasedDim + 1}; j < xRank; ++j) { |
| 357 | resultExtent[j - 1] = x.GetDimension(j).Extent(); |
| 358 | } |
| 359 | result.Establish(typeCode, resultElementSize, nullptr, xRank - 1, |
| 360 | resultExtent, CFI_attribute_allocatable); |
| 361 | for (int j{0}; j + 1 < xRank; ++j) { |
| 362 | result.GetDimension(j).SetBounds(1, resultExtent[j]); |
| 363 | } |
| 364 | if (int stat{result.Allocate(kNoAsyncObject)}) { |
| 365 | terminator.Crash( |
| 366 | "%s: could not allocate memory for result; STAT=%d" , intrinsic, stat); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | RT_OFFLOAD_API_GROUP_END |
| 371 | } // namespace Fortran::runtime |
| 372 | |