1 | //===- CRunnerUtils.h - Utils for debugging MLIR execution ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file declares basic classes and functions to manipulate structured MLIR |
10 | // types at runtime. Entities in this file must be compliant with C++11 and be |
11 | // retargetable, including on targets without a C++ runtime. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef MLIR_EXECUTIONENGINE_CRUNNERUTILS_H |
16 | #define MLIR_EXECUTIONENGINE_CRUNNERUTILS_H |
17 | |
18 | #ifdef _WIN32 |
19 | #ifndef MLIR_CRUNNERUTILS_EXPORT |
20 | #ifdef mlir_c_runner_utils_EXPORTS |
21 | // We are building this library |
22 | #define MLIR_CRUNNERUTILS_EXPORT __declspec(dllexport) |
23 | #define MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
24 | #else |
25 | // We are using this library |
26 | #define MLIR_CRUNNERUTILS_EXPORT __declspec(dllimport) |
27 | #endif // mlir_c_runner_utils_EXPORTS |
28 | #endif // MLIR_CRUNNERUTILS_EXPORT |
29 | #else // _WIN32 |
30 | // Non-windows: use visibility attributes. |
31 | #define MLIR_CRUNNERUTILS_EXPORT __attribute__((visibility("default"))) |
32 | #define MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS |
33 | #endif // _WIN32 |
34 | |
35 | #include <array> |
36 | #include <cassert> |
37 | #include <cstdint> |
38 | #include <initializer_list> |
39 | #include <vector> |
40 | |
41 | //===----------------------------------------------------------------------===// |
42 | // Codegen-compatible structures for Vector type. |
43 | //===----------------------------------------------------------------------===// |
44 | namespace mlir { |
45 | namespace detail { |
46 | |
47 | constexpr bool isPowerOf2(int n) { return (!(n & (n - 1))); } |
48 | |
49 | constexpr unsigned nextPowerOf2(int n) { |
50 | return (n <= 1) ? 1 : (isPowerOf2(n) ? n : (2 * nextPowerOf2(n: (n + 1) / 2))); |
51 | } |
52 | |
53 | template <typename T, int Dim, bool IsPowerOf2> |
54 | struct Vector1D; |
55 | |
56 | template <typename T, int Dim> |
57 | struct Vector1D<T, Dim, /*IsPowerOf2=*/true> { |
58 | Vector1D() { |
59 | static_assert(detail::nextPowerOf2(n: sizeof(T[Dim])) == sizeof(T[Dim]), |
60 | "size error" ); |
61 | } |
62 | inline T &operator[](unsigned i) { return vector[i]; } |
63 | inline const T &operator[](unsigned i) const { return vector[i]; } |
64 | |
65 | private: |
66 | T vector[Dim]; |
67 | }; |
68 | |
69 | // 1-D vector, padded to the next power of 2 allocation. |
70 | // Specialization occurs to avoid zero size arrays (which fail in -Werror). |
71 | template <typename T, int Dim> |
72 | struct Vector1D<T, Dim, /*IsPowerOf2=*/false> { |
73 | Vector1D() { |
74 | static_assert(nextPowerOf2(n: sizeof(T[Dim])) > sizeof(T[Dim]), "size error" ); |
75 | static_assert(nextPowerOf2(n: sizeof(T[Dim])) < 2 * sizeof(T[Dim]), |
76 | "size error" ); |
77 | } |
78 | inline T &operator[](unsigned i) { return vector[i]; } |
79 | inline const T &operator[](unsigned i) const { return vector[i]; } |
80 | |
81 | private: |
82 | T vector[Dim]; |
83 | char padding[nextPowerOf2(n: sizeof(T[Dim])) - sizeof(T[Dim])]; |
84 | }; |
85 | } // namespace detail |
86 | } // namespace mlir |
87 | |
88 | // N-D vectors recurse down to 1-D. |
89 | template <typename T, int Dim, int... Dims> |
90 | struct Vector { |
91 | inline Vector<T, Dims...> &operator[](unsigned i) { return vector[i]; } |
92 | inline const Vector<T, Dims...> &operator[](unsigned i) const { |
93 | return vector[i]; |
94 | } |
95 | |
96 | private: |
97 | Vector<T, Dims...> vector[Dim]; |
98 | }; |
99 | |
100 | // 1-D vectors in LLVM are automatically padded to the next power of 2. |
101 | // We insert explicit padding in to account for this. |
102 | template <typename T, int Dim> |
103 | struct Vector<T, Dim> |
104 | : public mlir::detail::Vector1D<T, Dim, |
105 | mlir::detail::isPowerOf2(n: sizeof(T[Dim]))> { |
106 | }; |
107 | |
108 | template <int D1, typename T> |
109 | using Vector1D = Vector<T, D1>; |
110 | template <int D1, int D2, typename T> |
111 | using Vector2D = Vector<T, D1, D2>; |
112 | template <int D1, int D2, int D3, typename T> |
113 | using Vector3D = Vector<T, D1, D2, D3>; |
114 | template <int D1, int D2, int D3, int D4, typename T> |
115 | using Vector4D = Vector<T, D1, D2, D3, D4>; |
116 | |
117 | template <int N> |
118 | void dropFront(int64_t arr[N], int64_t *res) { |
119 | for (unsigned i = 1; i < N; ++i) |
120 | *(res + i - 1) = arr[i]; |
121 | } |
122 | |
123 | //===----------------------------------------------------------------------===// |
124 | // Codegen-compatible structures for StridedMemRef type. |
125 | //===----------------------------------------------------------------------===// |
126 | template <typename T, int Rank> |
127 | class StridedMemrefIterator; |
128 | |
129 | /// StridedMemRef descriptor type with static rank. |
130 | template <typename T, int N> |
131 | struct StridedMemRefType { |
132 | T *basePtr; |
133 | T *data; |
134 | int64_t offset; |
135 | int64_t sizes[N]; |
136 | int64_t strides[N]; |
137 | |
138 | template <typename Range, |
139 | typename sfinae = decltype(std::declval<Range>().begin())> |
140 | T &operator[](Range &&indices) { |
141 | assert(indices.size() == N && |
142 | "indices should match rank in memref subscript" ); |
143 | int64_t curOffset = offset; |
144 | for (int dim = N - 1; dim >= 0; --dim) { |
145 | int64_t currentIndex = *(indices.begin() + dim); |
146 | assert(currentIndex < sizes[dim] && "Index overflow" ); |
147 | curOffset += currentIndex * strides[dim]; |
148 | } |
149 | return data[curOffset]; |
150 | } |
151 | |
152 | StridedMemrefIterator<T, N> begin() { return {*this, offset}; } |
153 | StridedMemrefIterator<T, N> end() { return {*this, -1}; } |
154 | |
155 | // This operator[] is extremely slow and only for sugaring purposes. |
156 | StridedMemRefType<T, N - 1> operator[](int64_t idx) { |
157 | StridedMemRefType<T, N - 1> res; |
158 | res.basePtr = basePtr; |
159 | res.data = data; |
160 | res.offset = offset + idx * strides[0]; |
161 | dropFront<N>(sizes, res.sizes); |
162 | dropFront<N>(strides, res.strides); |
163 | return res; |
164 | } |
165 | }; |
166 | |
167 | /// StridedMemRef descriptor type specialized for rank 1. |
168 | template <typename T> |
169 | struct StridedMemRefType<T, 1> { |
170 | T *basePtr; |
171 | T *data; |
172 | int64_t offset; |
173 | int64_t sizes[1]; |
174 | int64_t strides[1]; |
175 | |
176 | template <typename Range, |
177 | typename sfinae = decltype(std::declval<Range>().begin())> |
178 | T &operator[](Range indices) { |
179 | assert(indices.size() == 1 && |
180 | "indices should match rank in memref subscript" ); |
181 | return (*this)[*indices.begin()]; |
182 | } |
183 | |
184 | StridedMemrefIterator<T, 1> begin() { return {*this, offset}; } |
185 | StridedMemrefIterator<T, 1> end() { return {*this, -1}; } |
186 | |
187 | T &operator[](int64_t idx) { return *(data + offset + idx * strides[0]); } |
188 | }; |
189 | |
190 | /// StridedMemRef descriptor type specialized for rank 0. |
191 | template <typename T> |
192 | struct StridedMemRefType<T, 0> { |
193 | T *basePtr; |
194 | T *data; |
195 | int64_t offset; |
196 | |
197 | template <typename Range, |
198 | typename sfinae = decltype(std::declval<Range>().begin())> |
199 | T &operator[](Range indices) { |
200 | assert((indices.size() == 0) && |
201 | "Expect empty indices for 0-rank memref subscript" ); |
202 | return data[offset]; |
203 | } |
204 | |
205 | StridedMemrefIterator<T, 0> begin() { return {*this, offset}; } |
206 | StridedMemrefIterator<T, 0> end() { return {*this, offset + 1}; } |
207 | }; |
208 | |
209 | /// Iterate over all elements in a strided memref. |
210 | template <typename T, int Rank> |
211 | class StridedMemrefIterator { |
212 | public: |
213 | using iterator_category = std::forward_iterator_tag; |
214 | using value_type = T; |
215 | using difference_type = std::ptrdiff_t; |
216 | using pointer = T *; |
217 | using reference = T &; |
218 | |
219 | StridedMemrefIterator(StridedMemRefType<T, Rank> &descriptor, |
220 | int64_t offset = 0) |
221 | : offset(offset), descriptor(&descriptor) {} |
222 | StridedMemrefIterator<T, Rank> &operator++() { |
223 | int dim = Rank - 1; |
224 | while (dim >= 0 && indices[dim] == (descriptor->sizes[dim] - 1)) { |
225 | offset -= indices[dim] * descriptor->strides[dim]; |
226 | indices[dim] = 0; |
227 | --dim; |
228 | } |
229 | if (dim < 0) { |
230 | offset = -1; |
231 | return *this; |
232 | } |
233 | ++indices[dim]; |
234 | offset += descriptor->strides[dim]; |
235 | return *this; |
236 | } |
237 | |
238 | reference operator*() { return descriptor->data[offset]; } |
239 | pointer operator->() { return &descriptor->data[offset]; } |
240 | |
241 | const std::array<int64_t, Rank> &getIndices() { return indices; } |
242 | |
243 | bool operator==(const StridedMemrefIterator &other) const { |
244 | return other.offset == offset && other.descriptor == descriptor; |
245 | } |
246 | |
247 | bool operator!=(const StridedMemrefIterator &other) const { |
248 | return !(*this == other); |
249 | } |
250 | |
251 | private: |
252 | /// Offset in the buffer. This can be derived from the indices and the |
253 | /// descriptor. |
254 | int64_t offset = 0; |
255 | |
256 | /// Array of indices in the multi-dimensional memref. |
257 | std::array<int64_t, Rank> indices = {}; |
258 | |
259 | /// Descriptor for the strided memref. |
260 | StridedMemRefType<T, Rank> *descriptor; |
261 | }; |
262 | |
263 | /// Iterate over all elements in a 0-ranked strided memref. |
264 | template <typename T> |
265 | class StridedMemrefIterator<T, 0> { |
266 | public: |
267 | using iterator_category = std::forward_iterator_tag; |
268 | using value_type = T; |
269 | using difference_type = std::ptrdiff_t; |
270 | using pointer = T *; |
271 | using reference = T &; |
272 | |
273 | StridedMemrefIterator(StridedMemRefType<T, 0> &descriptor, int64_t offset = 0) |
274 | : elt(descriptor.data + offset) {} |
275 | |
276 | StridedMemrefIterator<T, 0> &operator++() { |
277 | ++elt; |
278 | return *this; |
279 | } |
280 | |
281 | reference operator*() { return *elt; } |
282 | pointer operator->() { return elt; } |
283 | |
284 | // There are no indices for a 0-ranked memref, but this API is provided for |
285 | // consistency with the general case. |
286 | const std::array<int64_t, 0> &getIndices() { |
287 | // Since this is a 0-array of indices we can keep a single global const |
288 | // copy. |
289 | static const std::array<int64_t, 0> indices = {}; |
290 | return indices; |
291 | } |
292 | |
293 | bool operator==(const StridedMemrefIterator &other) const { |
294 | return other.elt == elt; |
295 | } |
296 | |
297 | bool operator!=(const StridedMemrefIterator &other) const { |
298 | return !(*this == other); |
299 | } |
300 | |
301 | private: |
302 | /// Pointer to the single element in the zero-ranked memref. |
303 | T *elt; |
304 | }; |
305 | |
306 | //===----------------------------------------------------------------------===// |
307 | // Codegen-compatible structure for UnrankedMemRef type. |
308 | //===----------------------------------------------------------------------===// |
309 | // Unranked MemRef |
310 | template <typename T> |
311 | struct UnrankedMemRefType { |
312 | int64_t rank; |
313 | void *descriptor; |
314 | }; |
315 | |
316 | //===----------------------------------------------------------------------===// |
317 | // DynamicMemRefType type. |
318 | //===----------------------------------------------------------------------===// |
319 | template <typename T> |
320 | class DynamicMemRefIterator; |
321 | |
322 | // A reference to one of the StridedMemRef types. |
323 | template <typename T> |
324 | class DynamicMemRefType { |
325 | public: |
326 | int64_t rank; |
327 | T *basePtr; |
328 | T *data; |
329 | int64_t offset; |
330 | const int64_t *sizes; |
331 | const int64_t *strides; |
332 | |
333 | explicit DynamicMemRefType(const StridedMemRefType<T, 0> &memRef) |
334 | : rank(0), basePtr(memRef.basePtr), data(memRef.data), |
335 | offset(memRef.offset), sizes(nullptr), strides(nullptr) {} |
336 | template <int N> |
337 | explicit DynamicMemRefType(const StridedMemRefType<T, N> &memRef) |
338 | : rank(N), basePtr(memRef.basePtr), data(memRef.data), |
339 | offset(memRef.offset), sizes(memRef.sizes), strides(memRef.strides) {} |
340 | explicit DynamicMemRefType(const ::UnrankedMemRefType<T> &memRef) |
341 | : rank(memRef.rank) { |
342 | auto *desc = static_cast<StridedMemRefType<T, 1> *>(memRef.descriptor); |
343 | basePtr = desc->basePtr; |
344 | data = desc->data; |
345 | offset = desc->offset; |
346 | sizes = rank == 0 ? nullptr : desc->sizes; |
347 | strides = sizes + rank; |
348 | } |
349 | |
350 | template <typename Range, |
351 | typename sfinae = decltype(std::declval<Range>().begin())> |
352 | T &operator[](Range &&indices) { |
353 | assert(indices.size() == rank && |
354 | "indices should match rank in memref subscript" ); |
355 | if (rank == 0) |
356 | return data[offset]; |
357 | |
358 | int64_t curOffset = offset; |
359 | for (int dim = rank - 1; dim >= 0; --dim) { |
360 | int64_t currentIndex = *(indices.begin() + dim); |
361 | assert(currentIndex < sizes[dim] && "Index overflow" ); |
362 | curOffset += currentIndex * strides[dim]; |
363 | } |
364 | return data[curOffset]; |
365 | } |
366 | |
367 | DynamicMemRefIterator<T> begin() { return {*this, offset}; } |
368 | DynamicMemRefIterator<T> end() { return {*this, -1}; } |
369 | |
370 | // This operator[] is extremely slow and only for sugaring purposes. |
371 | DynamicMemRefType<T> operator[](int64_t idx) { |
372 | assert(rank > 0 && "can't make a subscript of a zero ranked array" ); |
373 | |
374 | DynamicMemRefType<T> res(*this); |
375 | --res.rank; |
376 | res.offset += idx * res.strides[0]; |
377 | ++res.sizes; |
378 | ++res.strides; |
379 | return res; |
380 | } |
381 | |
382 | // This operator* can be used in conjunction with the previous operator[] in |
383 | // order to access the underlying value in case of zero-ranked memref. |
384 | T &operator*() { |
385 | assert(rank == 0 && "not a zero-ranked memRef" ); |
386 | return data[offset]; |
387 | } |
388 | }; |
389 | |
390 | /// Iterate over all elements in a dynamic memref. |
391 | template <typename T> |
392 | class DynamicMemRefIterator { |
393 | public: |
394 | using iterator_category = std::forward_iterator_tag; |
395 | using value_type = T; |
396 | using difference_type = std::ptrdiff_t; |
397 | using pointer = T *; |
398 | using reference = T &; |
399 | |
400 | DynamicMemRefIterator(DynamicMemRefType<T> &descriptor, int64_t offset = 0) |
401 | : offset(offset), descriptor(&descriptor) { |
402 | indices.resize(descriptor.rank, 0); |
403 | } |
404 | |
405 | DynamicMemRefIterator<T> &operator++() { |
406 | if (descriptor->rank == 0) { |
407 | offset = -1; |
408 | return *this; |
409 | } |
410 | |
411 | int dim = descriptor->rank - 1; |
412 | |
413 | while (dim >= 0 && indices[dim] == (descriptor->sizes[dim] - 1)) { |
414 | offset -= indices[dim] * descriptor->strides[dim]; |
415 | indices[dim] = 0; |
416 | --dim; |
417 | } |
418 | |
419 | if (dim < 0) { |
420 | offset = -1; |
421 | return *this; |
422 | } |
423 | |
424 | ++indices[dim]; |
425 | offset += descriptor->strides[dim]; |
426 | return *this; |
427 | } |
428 | |
429 | reference operator*() { return descriptor->data[offset]; } |
430 | pointer operator->() { return &descriptor->data[offset]; } |
431 | |
432 | const std::vector<int64_t> &getIndices() { return indices; } |
433 | |
434 | bool operator==(const DynamicMemRefIterator &other) const { |
435 | return other.offset == offset && other.descriptor == descriptor; |
436 | } |
437 | |
438 | bool operator!=(const DynamicMemRefIterator &other) const { |
439 | return !(*this == other); |
440 | } |
441 | |
442 | private: |
443 | /// Offset in the buffer. This can be derived from the indices and the |
444 | /// descriptor. |
445 | int64_t offset = 0; |
446 | |
447 | /// Array of indices in the multi-dimensional memref. |
448 | std::vector<int64_t> indices = {}; |
449 | |
450 | /// Descriptor for the dynamic memref. |
451 | DynamicMemRefType<T> *descriptor; |
452 | }; |
453 | |
454 | //===----------------------------------------------------------------------===// |
455 | // Small runtime support library for memref.copy lowering during codegen. |
456 | //===----------------------------------------------------------------------===// |
457 | extern "C" MLIR_CRUNNERUTILS_EXPORT void |
458 | memrefCopy(int64_t elemSize, ::UnrankedMemRefType<char> *src, |
459 | ::UnrankedMemRefType<char> *dst); |
460 | |
461 | //===----------------------------------------------------------------------===// |
462 | // Small runtime support library for vector.print lowering during codegen. |
463 | //===----------------------------------------------------------------------===// |
464 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printI64(int64_t i); |
465 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printU64(uint64_t u); |
466 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printF32(float f); |
467 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printF64(double d); |
468 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printString(char const *s); |
469 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printOpen(); |
470 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printClose(); |
471 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printComma(); |
472 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printNewline(); |
473 | |
474 | //===----------------------------------------------------------------------===// |
475 | // Small runtime support library for timing execution and printing GFLOPS |
476 | //===----------------------------------------------------------------------===// |
477 | extern "C" MLIR_CRUNNERUTILS_EXPORT void printFlops(double flops); |
478 | extern "C" MLIR_CRUNNERUTILS_EXPORT double rtclock(); |
479 | |
480 | //===----------------------------------------------------------------------===// |
481 | // Runtime support library for random number generation. |
482 | //===----------------------------------------------------------------------===// |
483 | // Uses a seed to initialize a random generator and returns the generator. |
484 | extern "C" MLIR_CRUNNERUTILS_EXPORT void *rtsrand(uint64_t s); |
485 | // Uses a random number generator g and returns a random number |
486 | // in the range of [0, m). |
487 | extern "C" MLIR_CRUNNERUTILS_EXPORT uint64_t rtrand(void *g, uint64_t m); |
488 | // Deletes the random number generator. |
489 | extern "C" MLIR_CRUNNERUTILS_EXPORT void rtdrand(void *g); |
490 | // Uses a random number generator g and std::shuffle to modify mref |
491 | // in place. Memref mref will be a permutation of all numbers |
492 | // in the range of [0, size of mref). |
493 | extern "C" MLIR_CRUNNERUTILS_EXPORT void |
494 | _mlir_ciface_shuffle(StridedMemRefType<uint64_t, 1> *mref, void *g); |
495 | |
496 | //===----------------------------------------------------------------------===// |
497 | // Runtime support library to allow the use of std::sort in MLIR program. |
498 | //===----------------------------------------------------------------------===// |
499 | extern "C" MLIR_CRUNNERUTILS_EXPORT void |
500 | _mlir_ciface_stdSortI64(uint64_t n, StridedMemRefType<int64_t, 1> *vref); |
501 | extern "C" MLIR_CRUNNERUTILS_EXPORT void |
502 | _mlir_ciface_stdSortF64(uint64_t n, StridedMemRefType<double, 1> *vref); |
503 | extern "C" MLIR_CRUNNERUTILS_EXPORT void |
504 | _mlir_ciface_stdSortF32(uint64_t n, StridedMemRefType<float, 1> *vref); |
505 | #endif // MLIR_EXECUTIONENGINE_CRUNNERUTILS_H |
506 | |