1 | //===- CRunnerUtils.cpp - Utils for MLIR execution ------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements basic functions to manipulate structured MLIR types at |
10 | // runtime. Entities in this file are meant to be retargetable, including on |
11 | // targets without a C++ runtime, and must be kept C compatible. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "mlir/ExecutionEngine/CRunnerUtils.h" |
16 | #include "mlir/ExecutionEngine/Msan.h" |
17 | |
18 | #ifndef _WIN32 |
19 | #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ |
20 | defined(__DragonFly__) |
21 | #include <cstdlib> |
22 | #else |
23 | #include <alloca.h> |
24 | #endif |
25 | #include <sys/time.h> |
26 | #else |
27 | #include "malloc.h" |
28 | #endif // _WIN32 |
29 | |
30 | #include <algorithm> |
31 | #include <cinttypes> |
32 | #include <cstdio> |
33 | #include <cstdlib> |
34 | #include <numeric> |
35 | #include <random> |
36 | #include <string.h> |
37 | |
38 | #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
39 | |
40 | namespace { |
41 | template <typename V> |
42 | void stdSort(uint64_t n, V *p) { |
43 | std::sort(p, p + n); |
44 | } |
45 | |
46 | } // namespace |
47 | |
48 | // Small runtime support "lib" for vector.print lowering. |
49 | // By providing elementary printing methods only, this |
50 | // library can remain fully unaware of low-level implementation |
51 | // details of our vectors. Also useful for direct LLVM IR output. |
52 | extern "C" void printI64(int64_t i) { fprintf(stdout, format: "%" PRId64, i); } |
53 | extern "C" void printU64(uint64_t u) { fprintf(stdout, format: "%" PRIu64, u); } |
54 | extern "C" void printF32(float f) { |
55 | if (std::isnan(x: f) && std::signbit(x: f)) { |
56 | fprintf(stdout, format: "-nan" ); |
57 | } else { |
58 | fprintf(stdout, format: "%g" , f); |
59 | } |
60 | } |
61 | extern "C" void printF64(double d) { |
62 | if (std::isnan(x: d) && std::signbit(x: d)) { |
63 | fprintf(stdout, format: "-nan" ); |
64 | } else { |
65 | fprintf(stdout, format: "%lg" , d); |
66 | } |
67 | } |
68 | extern "C" void printString(char const *s) { fputs(s: s, stdout); } |
69 | extern "C" void printOpen() { fputs(s: "( " , stdout); } |
70 | extern "C" void printClose() { fputs(s: " )" , stdout); } |
71 | extern "C" void printComma() { fputs(s: ", " , stdout); } |
72 | extern "C" void printNewline() { fputc(c: '\n', stdout); } |
73 | |
74 | extern "C" void memrefCopy(int64_t elemSize, UnrankedMemRefType<char> *srcArg, |
75 | UnrankedMemRefType<char> *dstArg) { |
76 | DynamicMemRefType<char> src(*srcArg); |
77 | DynamicMemRefType<char> dst(*dstArg); |
78 | |
79 | int64_t rank = src.rank; |
80 | MLIR_MSAN_MEMORY_IS_INITIALIZED(src.sizes, rank * sizeof(int64_t)); |
81 | |
82 | // Handle empty shapes -> nothing to copy. |
83 | for (int rankp = 0; rankp < rank; ++rankp) |
84 | if (src.sizes[rankp] == 0) |
85 | return; |
86 | |
87 | char *srcPtr = src.data + src.offset * elemSize; |
88 | char *dstPtr = dst.data + dst.offset * elemSize; |
89 | |
90 | if (rank == 0) { |
91 | memcpy(dest: dstPtr, src: srcPtr, n: elemSize); |
92 | return; |
93 | } |
94 | |
95 | int64_t *indices = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
96 | int64_t *srcStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
97 | int64_t *dstStrides = static_cast<int64_t *>(alloca(sizeof(int64_t) * rank)); |
98 | |
99 | // Initialize index and scale strides. |
100 | for (int rankp = 0; rankp < rank; ++rankp) { |
101 | indices[rankp] = 0; |
102 | srcStrides[rankp] = src.strides[rankp] * elemSize; |
103 | dstStrides[rankp] = dst.strides[rankp] * elemSize; |
104 | } |
105 | |
106 | int64_t readIndex = 0, writeIndex = 0; |
107 | for (;;) { |
108 | // Copy over the element, byte by byte. |
109 | memcpy(dest: dstPtr + writeIndex, src: srcPtr + readIndex, n: elemSize); |
110 | // Advance index and read position. |
111 | for (int64_t axis = rank - 1; axis >= 0; --axis) { |
112 | // Advance at current axis. |
113 | auto newIndex = ++indices[axis]; |
114 | readIndex += srcStrides[axis]; |
115 | writeIndex += dstStrides[axis]; |
116 | // If this is a valid index, we have our next index, so continue copying. |
117 | if (src.sizes[axis] != newIndex) |
118 | break; |
119 | // We reached the end of this axis. If this is axis 0, we are done. |
120 | if (axis == 0) |
121 | return; |
122 | // Else, reset to 0 and undo the advancement of the linear index that |
123 | // this axis had. Then continue with the axis one outer. |
124 | indices[axis] = 0; |
125 | readIndex -= src.sizes[axis] * srcStrides[axis]; |
126 | writeIndex -= dst.sizes[axis] * dstStrides[axis]; |
127 | } |
128 | } |
129 | } |
130 | |
131 | /// Prints GFLOPS rating. |
132 | extern "C" void printFlops(double flops) { |
133 | fprintf(stderr, format: "%lf GFLOPS\n" , flops / 1.0E9); |
134 | } |
135 | |
136 | /// Returns the number of seconds since Epoch 1970-01-01 00:00:00 +0000 (UTC). |
137 | extern "C" double rtclock() { |
138 | #ifndef _WIN32 |
139 | struct timeval tp; |
140 | int stat = gettimeofday(tv: &tp, tz: nullptr); |
141 | if (stat != 0) |
142 | fprintf(stderr, format: "Error returning time from gettimeofday: %d\n" , stat); |
143 | return (tp.tv_sec + tp.tv_usec * 1.0e-6); |
144 | #else |
145 | fprintf(stderr, "Timing utility not implemented on Windows\n" ); |
146 | return 0.0; |
147 | #endif // _WIN32 |
148 | } |
149 | |
150 | extern "C" void *mlirAlloc(uint64_t size) { return malloc(size: size); } |
151 | |
152 | extern "C" void *mlirAlignedAlloc(uint64_t alignment, uint64_t size) { |
153 | #ifdef _WIN32 |
154 | return _aligned_malloc(size, alignment); |
155 | #elif defined(__APPLE__) |
156 | // aligned_alloc was added in MacOS 10.15. Fall back to posix_memalign to also |
157 | // support older versions. |
158 | void *result = nullptr; |
159 | (void)::posix_memalign(&result, alignment, size); |
160 | return result; |
161 | #else |
162 | return aligned_alloc(alignment: alignment, size: size); |
163 | #endif |
164 | } |
165 | |
166 | extern "C" void mlirFree(void *ptr) { free(ptr: ptr); } |
167 | |
168 | extern "C" void mlirAlignedFree(void *ptr) { |
169 | #ifdef _WIN32 |
170 | _aligned_free(ptr); |
171 | #else |
172 | free(ptr: ptr); |
173 | #endif |
174 | } |
175 | |
176 | extern "C" void *rtsrand(uint64_t s) { |
177 | // Standard mersenne_twister_engine seeded with s. |
178 | return new std::mt19937(s); |
179 | } |
180 | |
181 | extern "C" uint64_t rtrand(void *g, uint64_t m) { |
182 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
183 | std::uniform_int_distribution<uint64_t> distrib(0, m); |
184 | return distrib(*generator); |
185 | } |
186 | |
187 | extern "C" void rtdrand(void *g) { |
188 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
189 | delete generator; |
190 | } |
191 | |
192 | extern "C" void _mlir_ciface_shuffle(StridedMemRefType<uint64_t, 1> *mref, |
193 | void *g) { |
194 | assert(mref); |
195 | assert(mref->strides[0] == 1); // consecutive |
196 | std::mt19937 *generator = static_cast<std::mt19937 *>(g); |
197 | uint64_t s = mref->sizes[0]; |
198 | uint64_t *data = mref->data + mref->offset; |
199 | std::iota(first: data, last: data + s, value: 0); |
200 | std::shuffle(first: data, last: data + s, g&: *generator); |
201 | } |
202 | |
203 | #define IMPL_STDSORT(VNAME, V) \ |
204 | extern "C" void _mlir_ciface_stdSort##VNAME(uint64_t n, \ |
205 | StridedMemRefType<V, 1> *vref) { \ |
206 | assert(vref); \ |
207 | assert(vref->strides[0] == 1); \ |
208 | V *values = vref->data + vref->offset; \ |
209 | stdSort(n, values); \ |
210 | } |
211 | IMPL_STDSORT(I64, int64_t) |
212 | IMPL_STDSORT(F64, double) |
213 | IMPL_STDSORT(F32, float) |
214 | #undef IMPL_STDSORT |
215 | |
216 | #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
217 | |