| 1 | //===- CRunnerUtils.cpp - Utils for MLIR execution ------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements basic functions to manipulate structured MLIR types at |
| 10 | // runtime. Entities in this file are meant to be retargetable, including on |
| 11 | // targets without a C++ runtime, and must be kept C compatible. |
| 12 | // |
| 13 | //===----------------------------------------------------------------------===// |
| 14 | |
| 15 | #include "mlir/ExecutionEngine/CRunnerUtils.h" |
| 16 | #include "mlir/ExecutionEngine/Msan.h" |
| 17 | |
| 18 | #ifndef _WIN32 |
| 19 | #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ |
| 20 | defined(__DragonFly__) |
| 21 | #include <cstdlib> |
| 22 | #else |
| 23 | #include <alloca.h> |
| 24 | #endif |
| 25 | #include <sys/time.h> |
| 26 | #else |
| 27 | #include "malloc.h" |
| 28 | #endif // _WIN32 |
| 29 | |
| 30 | #include <algorithm> |
| 31 | #include <cinttypes> |
| 32 | #include <cstdio> |
| 33 | #include <cstdlib> |
| 34 | #include <numeric> |
| 35 | #include <random> |
| 36 | #include <string.h> |
| 37 | |
| 38 | #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
| 39 | |
| 40 | namespace { |
| 41 | template <typename V> |
| 42 | void stdSort(uint64_t n, V *p) { |
| 43 | std::sort(p, p + n); |
| 44 | } |
| 45 | |
| 46 | } // namespace |
| 47 | |
| 48 | // Small runtime support "lib" for vector.print lowering. |
| 49 | // By providing elementary printing methods only, this |
| 50 | // library can remain fully unaware of low-level implementation |
| 51 | // details of our vectors. Also useful for direct LLVM IR output. |
| 52 | extern "C" void printI64(int64_t i) { fprintf(stdout, format: "%" PRId64, i); } |
| 53 | extern "C" void printU64(uint64_t u) { fprintf(stdout, format: "%" PRIu64, u); } |
| 54 | extern "C" void printF32(float f) { |
| 55 | if (std::isnan(x: f) && std::signbit(x: f)) { |
| 56 | fprintf(stdout, format: "-nan" ); |
| 57 | } else { |
| 58 | fprintf(stdout, format: "%g" , f); |
| 59 | } |
| 60 | } |
| 61 | extern "C" void printF64(double d) { |
| 62 | if (std::isnan(x: d) && std::signbit(x: d)) { |
| 63 | fprintf(stdout, format: "-nan" ); |
| 64 | } else { |
| 65 | fprintf(stdout, format: "%lg" , d); |
| 66 | } |
| 67 | } |
| 68 | extern "C" void printString(char const *s) { fputs(s: s, stdout); } |
| 69 | extern "C" void printOpen() { fputs(s: "( " , stdout); } |
| 70 | extern "C" void printClose() { fputs(s: " )" , stdout); } |
| 71 | extern "C" void printComma() { fputs(s: ", " , stdout); } |
| 72 | extern "C" void printNewline() { fputc(c: '\n', stdout); } |
| 73 | |
| 74 | extern "C" void memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *srcArg, |
| 75 | UnrankedMemRefType<char> *dstArg) { |
| 76 | DynamicMemRefType<char> src(*srcArg); |
| 77 | DynamicMemRefType<char> dst(*dstArg); |
| 78 | |
| 79 | int64_t rank = src.rank; |
| 80 | MLIR_MSAN_MEMORY_IS_INITIALIZED(src.sizes, rank * sizeof(int64_t)); |
| 81 | |
| 82 | // Handle empty shapes -> nothing to copy. |
| 83 | for (int rankp = 0; rankp < rank; ++rankp) |
| 84 | if (src.sizes[rankp] == 0) |
| 85 | return; |
| 86 | |
| 87 | char *srcPtr = src.data + src.offset * elemSize; |
| 88 | char *dstPtr = dst.data + dst.offset * elemSize; |
| 89 | |
| 90 | if (rank == 0) { |
| 91 | memcpy(dest: dstPtr, src: srcPtr, n: elemSize); |
| 92 | return; |
| 93 | } |
| 94 | |
| 95 | int64_t *indices = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
| 96 | int64_t *srcStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
| 97 | int64_t *dstStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
| 98 | |
| 99 | // Initialize index and scale strides. |
| 100 | for (int rankp = 0; rankp < rank; ++rankp) { |
| 101 | indices[rankp] = 0; |
| 102 | srcStrides[rankp] = src.strides[rankp] * elemSize; |
| 103 | dstStrides[rankp] = dst.strides[rankp] * elemSize; |
| 104 | } |
| 105 | |
| 106 | int64_t readIndex = 0, writeIndex = 0; |
| 107 | for (;;) { |
| 108 | // Copy over the element, byte by byte. |
| 109 | memcpy(dest: dstPtr + writeIndex, src: srcPtr + readIndex, n: elemSize); |
| 110 | // Advance index and read position. |
| 111 | for (int64_t axis = rank - 1; axis >= 0; --axis) { |
| 112 | // Advance at current axis. |
| 113 | auto newIndex = ++indices[axis]; |
| 114 | readIndex += srcStrides[axis]; |
| 115 | writeIndex += dstStrides[axis]; |
| 116 | // If this is a valid index, we have our next index, so continue copying. |
| 117 | if (src.sizes[axis] != newIndex) |
| 118 | break; |
| 119 | // We reached the end of this axis. If this is axis 0, we are done. |
| 120 | if (axis == 0) |
| 121 | return; |
| 122 | // Else, reset to 0 and undo the advancement of the linear index that |
| 123 | // this axis had. Then continue with the axis one outer. |
| 124 | indices[axis] = 0; |
| 125 | readIndex -= src.sizes[axis] * srcStrides[axis]; |
| 126 | writeIndex -= dst.sizes[axis] * dstStrides[axis]; |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | /// Prints GFLOPS rating. |
| 132 | extern "C" void printFlops(double flops) { |
| 133 | fprintf(stderr, format: "%lf GFLOPS\n" , flops / 1.0E9); |
| 134 | } |
| 135 | |
| 136 | /// Returns the number of seconds since Epoch 1970-01-01 00:00:00 +0000 (UTC). |
| 137 | extern "C" double rtclock() { |
| 138 | #ifndef _WIN32 |
| 139 | struct timeval tp; |
| 140 | int stat = gettimeofday(tv: &tp, tz: nullptr); |
| 141 | if (stat != 0) |
| 142 | fprintf(stderr, format: "Error returning time from gettimeofday: %d\n" , stat); |
| 143 | return (tp.tv_sec + tp.tv_usec * 1.0e-6); |
| 144 | #else |
| 145 | fprintf(stderr, "Timing utility not implemented on Windows\n" ); |
| 146 | return 0.0; |
| 147 | #endif // _WIN32 |
| 148 | } |
| 149 | |
| 150 | extern "C" void *mlirAlloc(uint64_t size) { return malloc(size: size); } |
| 151 | |
| 152 | extern "C" void *mlirAlignedAlloc(uint64_t alignment, uint64_t size) { |
| 153 | #ifdef _WIN32 |
| 154 | return _aligned_malloc(size, alignment); |
| 155 | #elif defined(__APPLE__) |
| 156 | // aligned_alloc was added in MacOS 10.15. Fall back to posix_memalign to also |
| 157 | // support older versions. |
| 158 | void *result = nullptr; |
| 159 | (void)::posix_memalign(&result, alignment, size); |
| 160 | return result; |
| 161 | #else |
| 162 | return aligned_alloc(alignment: alignment, size: size); |
| 163 | #endif |
| 164 | } |
| 165 | |
| 166 | extern "C" void mlirFree(void *ptr) { free(ptr: ptr); } |
| 167 | |
| 168 | extern "C" void mlirAlignedFree(void *ptr) { |
| 169 | #ifdef _WIN32 |
| 170 | _aligned_free(ptr); |
| 171 | #else |
| 172 | free(ptr: ptr); |
| 173 | #endif |
| 174 | } |
| 175 | |
| 176 | extern "C" void *rtsrand(uint64_t s) { |
| 177 | // Standard mersenne_twister_engine seeded with s. |
| 178 | return new std::mt19937(s); |
| 179 | } |
| 180 | |
| 181 | extern "C" uint64_t rtrand(void *g, uint64_t m) { |
| 182 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
| 183 | std::uniform_int_distribution<uint64_t> distrib(0, m); |
| 184 | return distrib(*generator); |
| 185 | } |
| 186 | |
| 187 | extern "C" void rtdrand(void *g) { |
| 188 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
| 189 | delete generator; |
| 190 | } |
| 191 | |
| 192 | extern "C" void _mlir_ciface_shuffle(StridedMemRefType<uint64_t, 1> *mref, |
| 193 | void *g) { |
| 194 | assert(mref); |
| 195 | assert(mref->strides[0] == 1); // consecutive |
| 196 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
| 197 | uint64_t s = mref->sizes[0]; |
| 198 | uint64_t *data = mref->data + mref->offset; |
| 199 | std::iota(first: data, last: data + s, value: 0); |
| 200 | std::shuffle(first: data, last: data + s, g&: *generator); |
| 201 | } |
| 202 | |
| 203 | #define IMPL_STDSORT(VNAME, V) \ |
| 204 | extern "C" void _mlir_ciface_stdSort##VNAME(uint64_t n, \ |
| 205 | StridedMemRefType<V, 1> *vref) { \ |
| 206 | assert(vref); \ |
| 207 | assert(vref->strides[0] == 1); \ |
| 208 | V *values = vref->data + vref->offset; \ |
| 209 | stdSort(n, values); \ |
| 210 | } |
| 211 | IMPL_STDSORT(I64, int64_t) |
| 212 | IMPL_STDSORT(F64, double) |
| 213 | IMPL_STDSORT(F32, float) |
| 214 | #undef IMPL_STDSORT |
| 215 | |
| 216 | #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
| 217 | |