1//===- Storage.h - TACO-flavored sparse tensor representation ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains definitions for the following classes:
10//
11// * `SparseTensorStorageBase`
12// * `SparseTensorStorage<P, C, V>`
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
17#define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
18
19#include "mlir/Dialect/SparseTensor/IR/Enums.h"
20#include "mlir/ExecutionEngine/Float16bits.h"
21#include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h"
22#include "mlir/ExecutionEngine/SparseTensor/COO.h"
23#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
24
25namespace mlir {
26namespace sparse_tensor {
27
28//===----------------------------------------------------------------------===//
29//
30// SparseTensorStorage Classes
31//
32//===----------------------------------------------------------------------===//
33
34/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
35/// takes responsibility for all the `<P,C,V>`-independent aspects
36/// of the tensor (e.g., sizes, sparsity, mapping). In addition,
37/// we use function overloading to implement "partial" method
38/// specialization, which the C-API relies on to catch type errors
39/// arising from our use of opaque pointers.
40///
41/// Because this class forms a bridge between the denotational semantics
42/// of "tensors" and the operational semantics of how we store and
43/// compute with them, it also distinguishes between two different
44/// coordinate spaces (and their associated rank, sizes, etc).
45/// Denotationally, we have the *dimensions* of the tensor represented
46/// by this object. Operationally, we have the *levels* of the storage
47/// representation itself.
48///
49/// The *size* of an axis is the cardinality of possible coordinate
50/// values along that axis (regardless of which coordinates have stored
51/// element values). As such, each size must be non-zero since if any
52/// axis has size-zero then the whole tensor would have trivial storage
53/// (since there are no possible coordinates). Thus we use the plural
54/// term *sizes* for a collection of non-zero cardinalities, and use
55/// this term whenever referring to run-time cardinalities. Whereas we
56/// use the term *shape* for a collection of compile-time cardinalities,
57/// where zero is used to indicate cardinalities which are dynamic (i.e.,
58/// unknown/unspecified at compile-time). At run-time, these dynamic
59/// cardinalities will be inferred from or checked against sizes otherwise
60/// specified. Thus, dynamic cardinalities always have an "immutable but
61/// unknown" value; so the term "dynamic" should not be taken to indicate
62/// run-time mutability.
63class SparseTensorStorageBase {
64protected:
65 SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
66 SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
67
68public:
69 /// Constructs a new sparse-tensor storage object with the given encoding.
70 SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
71 uint64_t lvlRank, const uint64_t *lvlSizes,
72 const LevelType *lvlTypes, const uint64_t *dim2lvl,
73 const uint64_t *lvl2dim);
74 virtual ~SparseTensorStorageBase() = default;
75
76 /// Gets the number of tensor-dimensions.
77 uint64_t getDimRank() const { return dimSizes.size(); }
78
79 /// Gets the number of storage-levels.
80 uint64_t getLvlRank() const { return lvlSizes.size(); }
81
82 /// Gets the tensor-dimension sizes array.
83 const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
84
85 /// Safely looks up the size of the given tensor-dimension.
86 uint64_t getDimSize(uint64_t d) const {
87 assert(d < getDimRank());
88 return dimSizes[d];
89 }
90
91 /// Gets the storage-level sizes array.
92 const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; }
93
94 /// Safely looks up the size of the given storage-level.
95 uint64_t getLvlSize(uint64_t l) const {
96 assert(l < getLvlRank());
97 return lvlSizes[l];
98 }
99
100 /// Gets the level-types array.
101 const std::vector<LevelType> &getLvlTypes() const { return lvlTypes; }
102
103 /// Safely looks up the type of the given level.
104 LevelType getLvlType(uint64_t l) const {
105 assert(l < getLvlRank());
106 return lvlTypes[l];
107 }
108
109 /// Safely checks if the level uses dense storage.
110 bool isDenseLvl(uint64_t l) const { return isDenseLT(lt: getLvlType(l)); }
111
112 /// Safely checks if the level uses compressed storage.
113 bool isCompressedLvl(uint64_t l) const {
114 return isCompressedLT(lt: getLvlType(l));
115 }
116
117 /// Safely checks if the level uses loose compressed storage.
118 bool isLooseCompressedLvl(uint64_t l) const {
119 return isLooseCompressedLT(lt: getLvlType(l));
120 }
121
122 /// Safely checks if the level uses singleton storage.
123 bool isSingletonLvl(uint64_t l) const { return isSingletonLT(lt: getLvlType(l)); }
124
125 /// Safely checks if the level uses n out of m storage.
126 bool isNOutOfMLvl(uint64_t l) const { return isNOutOfMLT(lt: getLvlType(l)); }
127
128 /// Safely checks if the level is ordered.
129 bool isOrderedLvl(uint64_t l) const { return isOrderedLT(lt: getLvlType(l)); }
130
131 /// Safely checks if the level is unique.
132 bool isUniqueLvl(uint64_t l) const { return isUniqueLT(lt: getLvlType(l)); }
133
134 /// Gets positions-overhead storage for the given level.
135#define DECL_GETPOSITIONS(PNAME, P) \
136 virtual void getPositions(std::vector<P> **, uint64_t);
137 MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETPOSITIONS)
138#undef DECL_GETPOSITIONS
139
140 /// Gets coordinates-overhead storage for the given level.
141#define DECL_GETCOORDINATES(INAME, C) \
142 virtual void getCoordinates(std::vector<C> **, uint64_t);
143 MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
144#undef DECL_GETCOORDINATES
145
146 /// Gets coordinates-overhead storage buffer for the given level.
147#define DECL_GETCOORDINATESBUFFER(INAME, C) \
148 virtual void getCoordinatesBuffer(std::vector<C> **, uint64_t);
149 MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATESBUFFER)
150#undef DECL_GETCOORDINATESBUFFER
151
152 /// Gets primary storage.
153#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
154 MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
155#undef DECL_GETVALUES
156
157 /// Element-wise insertion in lexicographic coordinate order. The first
158 /// argument is the level-coordinates for the value being inserted.
159#define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
160 MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT)
161#undef DECL_LEXINSERT
162
163 /// Expanded insertion. Note that this method resets the
164 /// values/filled-switch array back to all-zero/false while only
165 /// iterating over the nonzero elements.
166#define DECL_EXPINSERT(VNAME, V) \
167 virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t, \
168 uint64_t);
169 MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
170#undef DECL_EXPINSERT
171
172 /// Finalizes lexicographic insertions.
173 virtual void endLexInsert() = 0;
174
175private:
176 const std::vector<uint64_t> dimSizes;
177 const std::vector<uint64_t> lvlSizes;
178 const std::vector<LevelType> lvlTypes;
179 const std::vector<uint64_t> dim2lvlVec;
180 const std::vector<uint64_t> lvl2dimVec;
181
182protected:
183 const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
184 const bool allDense;
185};
186
187/// A memory-resident sparse tensor using a storage scheme based on
188/// per-level sparse/dense annotations. This data structure provides
189/// a bufferized form of a sparse tensor type. In contrast to generating
190/// setup methods for each differently annotated sparse tensor, this
191/// method provides a convenient "one-size-fits-all" solution that simply
192/// takes an input tensor and annotations to implement all required setup
193/// in a general manner.
194template <typename P, typename C, typename V>
195class SparseTensorStorage final : public SparseTensorStorageBase {
196 /// Private constructor to share code between the other constructors.
197 /// Beware that the object is not necessarily guaranteed to be in a
198 /// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
199 /// doesn't entail `!(positions[l].empty())`.
200 SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
201 uint64_t lvlRank, const uint64_t *lvlSizes,
202 const LevelType *lvlTypes, const uint64_t *dim2lvl,
203 const uint64_t *lvl2dim)
204 : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
205 dim2lvl, lvl2dim),
206 positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}
207
208public:
209 /// Constructs a sparse tensor with the given encoding, and allocates
210 /// overhead storage according to some simple heuristics. When lvlCOO
211 /// is set, the sparse tensor initializes with the contents from that
212 /// data structure. Otherwise, an empty sparse tensor results.
213 SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
214 uint64_t lvlRank, const uint64_t *lvlSizes,
215 const LevelType *lvlTypes, const uint64_t *dim2lvl,
216 const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO);
217
218 /// Constructs a sparse tensor with the given encoding, and initializes
219 /// the contents from the level buffers. The constructor assumes that the
220 /// data provided by `lvlBufs` can be directly used to interpret the result
221 /// sparse tensor and performs no integrity test on the input data.
222 SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
223 uint64_t lvlRank, const uint64_t *lvlSizes,
224 const LevelType *lvlTypes, const uint64_t *dim2lvl,
225 const uint64_t *lvl2dim, const intptr_t *lvlBufs);
226
227 /// Allocates a new empty sparse tensor.
228 static SparseTensorStorage<P, C, V> *
229 newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
230 const uint64_t *lvlSizes, const LevelType *lvlTypes,
231 const uint64_t *dim2lvl, const uint64_t *lvl2dim);
232
233 /// Allocates a new sparse tensor and initializes it from the given COO.
234 static SparseTensorStorage<P, C, V> *
235 newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
236 const uint64_t *lvlSizes, const LevelType *lvlTypes,
237 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
238 SparseTensorCOO<V> *lvlCOO);
239
240 /// Allocates a new sparse tensor and initialize it from the given buffers.
241 static SparseTensorStorage<P, C, V> *
242 newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
243 const uint64_t *lvlSizes, const LevelType *lvlTypes,
244 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
245 uint64_t srcRank, const intptr_t *buffers);
246
247 ~SparseTensorStorage() final = default;
248
249 /// Partially specialize these getter methods based on template types.
250 void getPositions(std::vector<P> **out, uint64_t lvl) final {
251 assert(out && "Received nullptr for out parameter");
252 assert(lvl < getLvlRank());
253 *out = &positions[lvl];
254 }
255 void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
256 assert(out && "Received nullptr for out parameter");
257 assert(lvl < getLvlRank());
258 *out = &coordinates[lvl];
259 }
260 void getCoordinatesBuffer(std::vector<C> **out, uint64_t lvl) final {
261 assert(out && "Received nullptr for out parameter");
262 assert(lvl < getLvlRank());
263 // Note that the sparse tensor support library always stores COO in SoA
264 // format, even when AoS is requested. This is never an issue, since all
265 // actual code/library generation requests "views" into the coordinate
266 // storage for the individual levels, which is trivially provided for
267 // both AoS and SoA (as well as all the other storage formats). The only
268 // exception is when the buffer version of coordinate storage is requested
269 // (currently only for printing). In that case, we do the following
270 // potentially expensive transformation to provide that view. If this
271 // operation becomes more common beyond debugging, we should consider
272 // implementing proper AoS in the support library as well.
273 uint64_t lvlRank = getLvlRank();
274 uint64_t nnz = values.size();
275 crdBuffer.clear();
276 crdBuffer.reserve(nnz * (lvlRank - lvl));
277 for (uint64_t i = 0; i < nnz; i++) {
278 for (uint64_t l = lvl; l < lvlRank; l++) {
279 assert(i < coordinates[l].size());
280 crdBuffer.push_back(coordinates[l][i]);
281 }
282 }
283 *out = &crdBuffer;
284 }
285 void getValues(std::vector<V> **out) final {
286 assert(out && "Received nullptr for out parameter");
287 *out = &values;
288 }
289
290 /// Partially specialize lexicographical insertions based on template types.
291 void lexInsert(const uint64_t *lvlCoords, V val) final {
292 assert(lvlCoords);
293 if (allDense) {
294 uint64_t lvlRank = getLvlRank();
295 uint64_t valIdx = 0;
296 // Linearize the address.
297 for (uint64_t l = 0; l < lvlRank; l++)
298 valIdx = valIdx * getLvlSize(l) + lvlCoords[l];
299 values[valIdx] = val;
300 return;
301 }
302 // First, wrap up pending insertion path.
303 uint64_t diffLvl = 0;
304 uint64_t full = 0;
305 if (!values.empty()) {
306 diffLvl = lexDiff(lvlCoords);
307 endPath(diffLvl: diffLvl + 1);
308 full = lvlCursor[diffLvl] + 1;
309 }
310 // Then continue with insertion path.
311 insPath(lvlCoords, diffLvl, full, val);
312 }
313
314 /// Partially specialize expanded insertions based on template types.
315 void expInsert(uint64_t *lvlCoords, V *values, bool *filled, uint64_t *added,
316 uint64_t count, uint64_t expsz) final {
317 assert((lvlCoords && values && filled && added) && "Received nullptr");
318 if (count == 0)
319 return;
320 // Sort.
321 std::sort(first: added, last: added + count);
322 // Restore insertion path for first insert.
323 const uint64_t lastLvl = getLvlRank() - 1;
324 uint64_t c = added[0];
325 assert(c <= expsz);
326 assert(filled[c] && "added coordinate is not filled");
327 lvlCoords[lastLvl] = c;
328 lexInsert(lvlCoords, val: values[c]);
329 values[c] = 0;
330 filled[c] = false;
331 // Subsequent insertions are quick.
332 for (uint64_t i = 1; i < count; i++) {
333 assert(c < added[i] && "non-lexicographic insertion");
334 c = added[i];
335 assert(c <= expsz);
336 assert(filled[c] && "added coordinate is not filled");
337 lvlCoords[lastLvl] = c;
338 insPath(lvlCoords, diffLvl: lastLvl, full: added[i - 1] + 1, val: values[c]);
339 values[c] = 0;
340 filled[c] = false;
341 }
342 }
343
344 /// Finalizes lexicographic insertions.
345 void endLexInsert() final {
346 if (!allDense) {
347 if (values.empty())
348 finalizeSegment(l: 0);
349 else
350 endPath(diffLvl: 0);
351 }
352 }
353
354 /// Sort the unordered tensor in place, the method assumes that it is
355 /// an unordered COO tensor.
356 void sortInPlace() {
357 uint64_t nnz = values.size();
358#ifndef NDEBUG
359 for (uint64_t l = 0; l < getLvlRank(); l++)
360 assert(nnz == coordinates[l].size());
361#endif
362
363 // In-place permutation.
364 auto applyPerm = [this](std::vector<uint64_t> &perm) {
365 uint64_t length = perm.size();
366 uint64_t lvlRank = getLvlRank();
367 // Cache for the current level coordinates.
368 std::vector<P> lvlCrds(lvlRank);
369 for (uint64_t i = 0; i < length; i++) {
370 uint64_t current = i;
371 if (i != perm[current]) {
372 for (uint64_t l = 0; l < lvlRank; l++)
373 lvlCrds[l] = coordinates[l][i];
374 V val = values[i];
375 // Deals with a permutation cycle.
376 while (i != perm[current]) {
377 uint64_t next = perm[current];
378 // Swaps the level coordinates and value.
379 for (uint64_t l = 0; l < lvlRank; l++)
380 coordinates[l][current] = coordinates[l][next];
381 values[current] = values[next];
382 perm[current] = current;
383 current = next;
384 }
385 for (uint64_t l = 0; l < lvlRank; l++)
386 coordinates[l][current] = lvlCrds[l];
387 values[current] = val;
388 perm[current] = current;
389 }
390 }
391 };
392
393 std::vector<uint64_t> sortedIdx(nnz, 0);
394 for (uint64_t i = 0; i < nnz; i++)
395 sortedIdx[i] = i;
396
397 std::sort(sortedIdx.begin(), sortedIdx.end(),
398 [this](uint64_t lhs, uint64_t rhs) {
399 for (uint64_t l = 0; l < getLvlRank(); l++) {
400 if (coordinates[l][lhs] == coordinates[l][rhs])
401 continue;
402 return coordinates[l][lhs] < coordinates[l][rhs];
403 }
404 assert(lhs == rhs && "duplicate coordinates");
405 return false;
406 });
407
408 applyPerm(sortedIdx);
409 }
410
411private:
412 /// Appends coordinate `crd` to level `lvl`, in the semantically
413 /// general sense. For non-dense levels, that means appending to the
414 /// `coordinates[lvl]` array, checking that `crd` is representable in
415 /// the `C` type; however, we do not verify other semantic requirements
416 /// (e.g., that `crd` is in bounds for `lvlSizes[lvl]`, and not previously
417 /// occurring in the same segment). For dense levels, this method instead
418 /// appends the appropriate number of zeros to the `values` array, where
419 /// `full` is the number of "entries" already written to `values` for this
420 /// segment (aka one after the highest coordinate previously appended).
421 void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
422 if (!isDenseLvl(l: lvl)) {
423 assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) ||
424 isSingletonLvl(lvl) || isNOutOfMLvl(lvl));
425 coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
426 } else { // Dense level.
427 assert(crd >= full && "Coordinate was already filled");
428 if (crd == full)
429 return; // Short-circuit, since it'll be a nop.
430 if (lvl + 1 == getLvlRank())
431 values.insert(values.end(), crd - full, 0);
432 else
433 finalizeSegment(l: lvl + 1, full: 0, count: crd - full);
434 }
435 }
436
437 /// Computes the assembled-size associated with the `l`-th level,
438 /// given the assembled-size associated with the `(l-1)`-th level.
439 uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
440 if (isCompressedLvl(l))
441 return positions[l][parentSz];
442 if (isLooseCompressedLvl(l))
443 return positions[l][2 * parentSz - 1];
444 if (isSingletonLvl(l) || isNOutOfMLvl(l))
445 return parentSz; // new size same as the parent
446 assert(isDenseLvl(l));
447 return parentSz * getLvlSize(l);
448 }
449
450 /// Initializes sparse tensor storage scheme from a memory-resident sparse
451 /// tensor in coordinate scheme. This method prepares the positions and
452 /// coordinates arrays under the given per-level dense/sparse annotations.
453 void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo,
454 uint64_t hi, uint64_t l) {
455 const uint64_t lvlRank = getLvlRank();
456 assert(l <= lvlRank && hi <= lvlElements.size());
457 // Once levels are exhausted, insert the numerical values.
458 if (l == lvlRank) {
459 assert(lo < hi);
460 values.push_back(lvlElements[lo].value);
461 return;
462 }
463 // Visit all elements in this interval.
464 uint64_t full = 0;
465 while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`.
466 // Find segment in interval with same coordinate at this level.
467 const uint64_t c = lvlElements[lo].coords[l];
468 uint64_t seg = lo + 1;
469 if (isUniqueLvl(l))
470 while (seg < hi && lvlElements[seg].coords[l] == c)
471 seg++;
472 // Handle segment in interval for sparse or dense level.
473 appendCrd(lvl: l, full, crd: c);
474 full = c + 1;
475 fromCOO(lvlElements, lo, hi: seg, l: l + 1);
476 // And move on to next segment in interval.
477 lo = seg;
478 }
479 // Finalize the sparse position structure at this level.
480 finalizeSegment(l, full);
481 }
482
483 /// Finalizes the sparse position structure at this level.
484 void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
485 if (count == 0)
486 return; // Short-circuit, since it'll be a nop.
487 if (isCompressedLvl(l)) {
488 uint64_t pos = coordinates[l].size();
489 positions[l].insert(positions[l].end(), count,
490 detail::checkOverflowCast<P>(pos));
491 } else if (isLooseCompressedLvl(l)) {
492 // Finish this level, and push pairs for the empty ones, and one
493 // more for next level. Note that this always leaves one extra
494 // unused element at the end.
495 uint64_t pos = coordinates[l].size();
496 positions[l].insert(positions[l].end(), 2 * count,
497 detail::checkOverflowCast<P>(pos));
498 } else if (isSingletonLvl(l) || isNOutOfMLvl(l)) {
499 return; // Nothing to finalize.
500 } else { // Dense dimension.
501 assert(isDenseLvl(l));
502 const uint64_t sz = getLvlSizes()[l];
503 assert(sz >= full && "Segment is overfull");
504 count = detail::checkedMul(lhs: count, rhs: sz - full);
505 // For dense storage we must enumerate all the remaining coordinates
506 // in this level (i.e., coordinates after the last non-zero
507 // element), and either fill in their zero values or else recurse
508 // to finalize some deeper level.
509 if (l + 1 == getLvlRank())
510 values.insert(values.end(), count, 0);
511 else
512 finalizeSegment(l: l + 1, full: 0, count);
513 }
514 }
515
516 /// Wraps up a single insertion path, inner to outer.
517 void endPath(uint64_t diffLvl) {
518 const uint64_t lvlRank = getLvlRank();
519 const uint64_t lastLvl = lvlRank - 1;
520 assert(diffLvl <= lvlRank);
521 const uint64_t stop = lvlRank - diffLvl;
522 for (uint64_t i = 0; i < stop; i++) {
523 const uint64_t l = lastLvl - i;
524 finalizeSegment(l, full: lvlCursor[l] + 1);
525 }
526 }
527
528 /// Continues a single insertion path, outer to inner. The first
529 /// argument is the level-coordinates for the value being inserted.
530 void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full,
531 V val) {
532 const uint64_t lvlRank = getLvlRank();
533 assert(diffLvl <= lvlRank);
534 for (uint64_t l = diffLvl; l < lvlRank; l++) {
535 const uint64_t c = lvlCoords[l];
536 appendCrd(lvl: l, full, crd: c);
537 full = 0;
538 lvlCursor[l] = c;
539 }
540 values.push_back(val);
541 }
542
543 /// Finds the lexicographically first level where the level-coordinates
544 /// in the argument differ from those in the current cursor.
545 uint64_t lexDiff(const uint64_t *lvlCoords) const {
546 const uint64_t lvlRank = getLvlRank();
547 for (uint64_t l = 0; l < lvlRank; l++) {
548 const auto crd = lvlCoords[l];
549 const auto cur = lvlCursor[l];
550 if (crd > cur || (crd == cur && !isUniqueLvl(l)) ||
551 (crd < cur && !isOrderedLvl(l))) {
552 return l;
553 }
554 if (crd < cur) {
555 assert(false && "non-lexicographic insertion");
556 return -1u;
557 }
558 }
559 assert(false && "duplicate insertion");
560 return -1u;
561 }
562
563 // Sparse tensor storage components.
564 std::vector<std::vector<P>> positions;
565 std::vector<std::vector<C>> coordinates;
566 std::vector<V> values;
567
568 // Auxiliary data structures.
569 std::vector<uint64_t> lvlCursor;
570 std::vector<C> crdBuffer; // just for AoS view
571};
572
573//===----------------------------------------------------------------------===//
574//
575// SparseTensorStorage Factories
576//
577//===----------------------------------------------------------------------===//
578
579template <typename P, typename C, typename V>
580SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
581 uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
582 const uint64_t *lvlSizes, const LevelType *lvlTypes,
583 const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
584 SparseTensorCOO<V> *noLvlCOO = nullptr;
585 return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
586 lvlTypes, dim2lvl, lvl2dim, noLvlCOO);
587}
588
589template <typename P, typename C, typename V>
590SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
591 uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
592 const uint64_t *lvlSizes, const LevelType *lvlTypes,
593 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
594 SparseTensorCOO<V> *lvlCOO) {
595 assert(lvlCOO);
596 return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
597 lvlTypes, dim2lvl, lvl2dim, lvlCOO);
598}
599
600template <typename P, typename C, typename V>
601SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers(
602 uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
603 const uint64_t *lvlSizes, const LevelType *lvlTypes,
604 const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank,
605 const intptr_t *buffers) {
606 return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
607 lvlTypes, dim2lvl, lvl2dim, buffers);
608}
609
610//===----------------------------------------------------------------------===//
611//
612// SparseTensorStorage Constructors
613//
614//===----------------------------------------------------------------------===//
615
616template <typename P, typename C, typename V>
617SparseTensorStorage<P, C, V>::SparseTensorStorage(
618 uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
619 const uint64_t *lvlSizes, const LevelType *lvlTypes,
620 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
621 SparseTensorCOO<V> *lvlCOO)
622 : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
623 dim2lvl, lvl2dim) {
624 // Provide hints on capacity of positions and coordinates.
625 // TODO: needs much fine-tuning based on actual sparsity; currently
626 // we reserve position/coordinate space based on all previous dense
627 // levels, which works well up to first sparse level; but we should
628 // really use nnz and dense/sparse distribution.
629 uint64_t sz = 1;
630 for (uint64_t l = 0; l < lvlRank; l++) {
631 if (isCompressedLvl(l)) {
632 positions[l].reserve(sz + 1);
633 positions[l].push_back(0);
634 coordinates[l].reserve(sz);
635 sz = 1;
636 } else if (isLooseCompressedLvl(l)) {
637 positions[l].reserve(2 * sz + 1); // last one unused
638 positions[l].push_back(0);
639 coordinates[l].reserve(sz);
640 sz = 1;
641 } else if (isSingletonLvl(l)) {
642 coordinates[l].reserve(sz);
643 sz = 1;
644 } else if (isNOutOfMLvl(l)) {
645 assert(l == lvlRank - 1 && "unexpected n:m usage");
646 sz = detail::checkedMul(lhs: sz, rhs: lvlSizes[l]) / 2;
647 coordinates[l].reserve(sz);
648 values.reserve(sz);
649 } else { // Dense level.
650 assert(isDenseLvl(l));
651 sz = detail::checkedMul(lhs: sz, rhs: lvlSizes[l]);
652 }
653 }
654 if (lvlCOO) {
655 /* New from COO: ensure it is sorted. */
656 assert(lvlCOO->getRank() == lvlRank);
657 lvlCOO->sort();
658 // Now actually insert the `elements`.
659 const auto &elements = lvlCOO->getElements();
660 const uint64_t nse = elements.size();
661 assert(values.size() == 0);
662 values.reserve(nse);
663 fromCOO(lvlElements: elements, lo: 0, hi: nse, l: 0);
664 } else if (allDense) {
665 /* New empty (all dense) */
666 values.resize(sz, 0);
667 }
668}
669
670template <typename P, typename C, typename V>
671SparseTensorStorage<P, C, V>::SparseTensorStorage(
672 uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
673 const uint64_t *lvlSizes, const LevelType *lvlTypes,
674 const uint64_t *dim2lvl, const uint64_t *lvl2dim, const intptr_t *lvlBufs)
675 : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
676 dim2lvl, lvl2dim) {
677 // Note that none of the buffers can be reused because ownership
678 // of the memory passed from clients is not necessarily transferred.
679 // Therefore, all data is copied over into a new SparseTensorStorage.
680 uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0;
681 for (uint64_t l = 0; l < lvlRank; l++) {
682 if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) {
683 // A `(loose)compressed_nu` level marks the start of trailing COO
684 // start level. Since the coordinate buffer used for trailing COO
685 // is passed in as AoS scheme and SparseTensorStorage uses a SoA
686 // scheme, we cannot simply copy the value from the provided buffers.
687 trailCOOLen = lvlRank - l;
688 break;
689 }
690 if (isCompressedLvl(l) || isLooseCompressedLvl(l)) {
691 P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
692 C *crdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
693 if (isLooseCompressedLvl(l)) {
694 positions[l].assign(posPtr, posPtr + 2 * parentSz);
695 coordinates[l].assign(crdPtr, crdPtr + positions[l][2 * parentSz - 1]);
696 } else {
697 positions[l].assign(posPtr, posPtr + parentSz + 1);
698 coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
699 }
700 } else if (isSingletonLvl(l)) {
701 assert(0 && "general singleton not supported yet");
702 } else if (isNOutOfMLvl(l)) {
703 assert(0 && "n ouf of m not supported yet");
704 } else {
705 assert(isDenseLvl(l));
706 }
707 parentSz = assembledSize(parentSz, l);
708 }
709
710 // Handle Aos vs. SoA mismatch for COO.
711 if (trailCOOLen != 0) {
712 uint64_t cooStartLvl = lvlRank - trailCOOLen;
713 assert(!isUniqueLvl(cooStartLvl) &&
714 (isCompressedLvl(cooStartLvl) || isLooseCompressedLvl(cooStartLvl)));
715 P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
716 C *aosCrdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
717 P crdLen;
718 if (isLooseCompressedLvl(l: cooStartLvl)) {
719 positions[cooStartLvl].assign(posPtr, posPtr + 2 * parentSz);
720 crdLen = positions[cooStartLvl][2 * parentSz - 1];
721 } else {
722 positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1);
723 crdLen = positions[cooStartLvl][parentSz];
724 }
725 for (uint64_t l = cooStartLvl; l < lvlRank; l++) {
726 coordinates[l].resize(crdLen);
727 for (uint64_t n = 0; n < crdLen; n++) {
728 coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen);
729 }
730 }
731 parentSz = assembledSize(parentSz, l: cooStartLvl);
732 }
733
734 // Copy the values buffer.
735 V *valPtr = reinterpret_cast<V *>(lvlBufs[bufIdx]);
736 values.assign(valPtr, valPtr + parentSz);
737}
738
739} // namespace sparse_tensor
740} // namespace mlir
741
742#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
743

source code of mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h