Storage.h source code [mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h]

1	//===- Storage.h - TACO-flavored sparse tensor representation ---- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains definitions for the following classes:
10	//
11	// `SparseTensorStorageBase`*
12	// `SparseTensorStorage<P, C, V>`*
13	//
14	//===----------------------------------------------------------------------===//
15
16	#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
17	#define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
18
19	#include "mlir/Dialect/SparseTensor/IR/Enums.h"
20	#include "mlir/ExecutionEngine/Float16bits.h"
21	#include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h"
22	#include "mlir/ExecutionEngine/SparseTensor/COO.h"
23	#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
24
25	namespace mlir {
26	namespace sparse_tensor {
27
28	//===----------------------------------------------------------------------===//
29	//
30	// SparseTensorStorage Classes
31	//
32	//===----------------------------------------------------------------------===//
33
34	/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
35	/// takes responsibility for all the `<P,C,V>`-independent aspects
36	/// of the tensor (e.g., sizes, sparsity, mapping). In addition,
37	/// we use function overloading to implement "partial" method
38	/// specialization, which the C-API relies on to catch type errors
39	/// arising from our use of opaque pointers.
40	///
41	/// Because this class forms a bridge between the denotational semantics
42	/// of "tensors" and the operational semantics of how we store and
43	/// compute with them, it also distinguishes between two different
44	/// coordinate spaces (and their associated rank, sizes, etc).
45	/// Denotationally, we have the dimensions* of the tensor represented*
46	/// by this object. Operationally, we have the levels* of the storage*
47	/// representation itself.
48	///
49	/// The size* of an axis is the cardinality of possible coordinate*
50	/// values along that axis (regardless of which coordinates have stored
51	/// element values). As such, each size must be non-zero since if any
52	/// axis has size-zero then the whole tensor would have trivial storage
53	/// (since there are no possible coordinates). Thus we use the plural
54	/// term sizes* for a collection of non-zero cardinalities, and use*
55	/// this term whenever referring to run-time cardinalities. Whereas we
56	/// use the term shape* for a collection of compile-time cardinalities,*
57	/// where zero is used to indicate cardinalities which are dynamic (i.e.,
58	/// unknown/unspecified at compile-time). At run-time, these dynamic
59	/// cardinalities will be inferred from or checked against sizes otherwise
60	/// specified. Thus, dynamic cardinalities always have an "immutable but
61	/// unknown" value; so the term "dynamic" should not be taken to indicate
62	/// run-time mutability.
63	class SparseTensorStorageBase {
64	protected:
65	SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
66	SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
67
68	public:
69	/// Constructs a new sparse-tensor storage object with the given encoding.
70	SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
71	uint64_t lvlRank, const uint64_t *lvlSizes,
72	const LevelType lvlTypes, const* uint64_t *dim2lvl,
73	const uint64_t *lvl2dim);
74	virtual ~SparseTensorStorageBase() = default;
75
76	/// Gets the number of tensor-dimensions.
77	uint64_t getDimRank() const { return dimSizes.size(); }
78
79	/// Gets the number of storage-levels.
80	uint64_t getLvlRank() const { return lvlSizes.size(); }
81
82	/// Gets the tensor-dimension sizes array.
83	const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
84
85	/// Safely looks up the size of the given tensor-dimension.
86	uint64_t getDimSize(uint64_t d) const {
87	assert(d < getDimRank());
88	return dimSizes [d];
89	}
90
91	/// Gets the storage-level sizes array.
92	const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; }
93
94	/// Safely looks up the size of the given storage-level.
95	uint64_t getLvlSize(uint64_t l) const {
96	assert(l < getLvlRank());
97	return lvlSizes [l];
98	}
99
100	/// Gets the level-types array.
101	const std::vector<LevelType> &getLvlTypes() const { return lvlTypes; }
102
103	/// Safely looks up the type of the given level.
104	LevelType getLvlType(uint64_t l) const {
105	assert(l < getLvlRank());
106	return lvlTypes [l];
107	}
108
109	/// Safely checks if the level uses dense storage.
110	bool isDenseLvl(uint64_t l) const { return isDenseLT(lt: getLvlType(l)); }
111
112	/// Safely checks if the level uses compressed storage.
113	bool isCompressedLvl(uint64_t l) const {
114	return isCompressedLT(lt: getLvlType(l));
115	}
116
117	/// Safely checks if the level uses loose compressed storage.
118	bool isLooseCompressedLvl(uint64_t l) const {
119	return isLooseCompressedLT(lt: getLvlType(l));
120	}
121
122	/// Safely checks if the level uses singleton storage.
123	bool isSingletonLvl(uint64_t l) const { return isSingletonLT(lt: getLvlType(l)); }
124
125	/// Safely checks if the level uses n out of m storage.
126	bool isNOutOfMLvl(uint64_t l) const { return isNOutOfMLT(lt: getLvlType(l)); }
127
128	/// Safely checks if the level is ordered.
129	bool isOrderedLvl(uint64_t l) const { return isOrderedLT(lt: getLvlType(l)); }
130
131	/// Safely checks if the level is unique.
132	bool isUniqueLvl(uint64_t l) const { return isUniqueLT(lt: getLvlType(l)); }
133
134	/// Gets positions-overhead storage for the given level.
135	#define DECL_GETPOSITIONS(PNAME, P) \
136	virtual void getPositions(std::vector<P> **, uint64_t);
137	MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETPOSITIONS)
138	#undef DECL_GETPOSITIONS
139
140	/// Gets coordinates-overhead storage for the given level.
141	#define DECL_GETCOORDINATES(INAME, C) \
142	virtual void getCoordinates(std::vector<C> **, uint64_t);
143	MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
144	#undef DECL_GETCOORDINATES
145
146	/// Gets coordinates-overhead storage buffer for the given level.
147	#define DECL_GETCOORDINATESBUFFER(INAME, C) \
148	virtual void getCoordinatesBuffer(std::vector<C> **, uint64_t);
149	MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATESBUFFER)
150	#undef DECL_GETCOORDINATESBUFFER
151
152	/// Gets primary storage.
153	#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
154	MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
155	#undef DECL_GETVALUES
156
157	/// Element-wise insertion in lexicographic coordinate order. The first
158	/// argument is the level-coordinates for the value being inserted.
159	#define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
160	MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT)
161	#undef DECL_LEXINSERT
162
163	/// Expanded insertion. Note that this method resets the
164	/// values/filled-switch array back to all-zero/false while only
165	/// iterating over the nonzero elements.
166	#define DECL_EXPINSERT(VNAME, V) \
167	virtual void expInsert(uint64_t , V , bool , uint64_t , uint64_t, \
168	uint64_t);
169	MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
170	#undef DECL_EXPINSERT
171
172	/// Finalizes lexicographic insertions.
173	virtual void endLexInsert() = `0`;
174
175	private:
176	const std::vector<uint64_t> dimSizes;
177	const std::vector<uint64_t> lvlSizes;
178	const std::vector<LevelType> lvlTypes;
179	const std::vector<uint64_t> dim2lvlVec;
180	const std::vector<uint64_t> lvl2dimVec;
181
182	protected:
183	const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
184	const bool allDense;
185	};
186
187	/// A memory-resident sparse tensor using a storage scheme based on
188	/// per-level sparse/dense annotations. This data structure provides
189	/// a bufferized form of a sparse tensor type. In contrast to generating
190	/// setup methods for each differently annotated sparse tensor, this
191	/// method provides a convenient "one-size-fits-all" solution that simply
192	/// takes an input tensor and annotations to implement all required setup
193	/// in a general manner.
194	template <typename P, typename C, typename V>
195	class SparseTensorStorage final : public SparseTensorStorageBase {
196	/// Private constructor to share code between the other constructors.
197	/// Beware that the object is not necessarily guaranteed to be in a
198	/// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
199	/// doesn't entail `!(positions[l].empty())`.
200	SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
201	uint64_t lvlRank, const uint64_t *lvlSizes,
202	const LevelType lvlTypes, const* uint64_t *dim2lvl,
203	const uint64_t *lvl2dim)
204	: SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
205	dim2lvl, lvl2dim),
206	positions(lvlRank), coordinates(lvlRank), lvlCursor (lvlRank) {}
207
208	public:
209	/// Constructs a sparse tensor with the given encoding, and allocates
210	/// overhead storage according to some simple heuristics. When lvlCOO
211	/// is set, the sparse tensor initializes with the contents from that
212	/// data structure. Otherwise, an empty sparse tensor results.
213	SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
214	uint64_t lvlRank, const uint64_t *lvlSizes,
215	const LevelType lvlTypes, const* uint64_t *dim2lvl,
216	const uint64_t lvl2dim, SparseTensorCOO<V> lvlCOO);
217
218	/// Constructs a sparse tensor with the given encoding, and initializes
219	/// the contents from the level buffers. The constructor assumes that the
220	/// data provided by `lvlBufs` can be directly used to interpret the result
221	/// sparse tensor and performs no integrity test on the input data.
222	SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
223	uint64_t lvlRank, const uint64_t *lvlSizes,
224	const LevelType lvlTypes, const* uint64_t *dim2lvl,
225	const uint64_t lvl2dim, const* intptr_t *lvlBufs);
226
227	/// Allocates a new empty sparse tensor.
228	static SparseTensorStorage<P, C, V> *
229	newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
230	const uint64_t lvlSizes, const* LevelType *lvlTypes,
231	const uint64_t dim2lvl, const* uint64_t *lvl2dim);
232
233	/// Allocates a new sparse tensor and initializes it from the given COO.
234	static SparseTensorStorage<P, C, V> *
235	newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
236	const uint64_t lvlSizes, const* LevelType *lvlTypes,
237	const uint64_t dim2lvl, const* uint64_t *lvl2dim,
238	SparseTensorCOO<V> *lvlCOO);
239
240	/// Allocates a new sparse tensor and initialize it from the given buffers.
241	static SparseTensorStorage<P, C, V> *
242	newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
243	const uint64_t lvlSizes, const* LevelType *lvlTypes,
244	const uint64_t dim2lvl, const* uint64_t *lvl2dim,
245	uint64_t srcRank, const intptr_t *buffers);
246
247	~SparseTensorStorage() final = default;
248
249	/// Partially specialize these getter methods based on template types.
250	void getPositions(std::vector<P> **out, uint64_t lvl) final {
251	assert(out && "Received nullptr for out parameter");
252	assert(lvl < getLvlRank());
253	*out = &positions[lvl];
254	}
255	void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
256	assert(out && "Received nullptr for out parameter");
257	assert(lvl < getLvlRank());
258	*out = &coordinates[lvl];
259	}
260	void getCoordinatesBuffer(std::vector<C> **out, uint64_t lvl) final {
261	assert(out && "Received nullptr for out parameter");
262	assert(lvl < getLvlRank());
263	// Note that the sparse tensor support library always stores COO in SoA
264	// format, even when AoS is requested. This is never an issue, since all
265	// actual code/library generation requests "views" into the coordinate
266	// storage for the individual levels, which is trivially provided for
267	// both AoS and SoA (as well as all the other storage formats). The only
268	// exception is when the buffer version of coordinate storage is requested
269	// (currently only for printing). In that case, we do the following
270	// potentially expensive transformation to provide that view. If this
271	// operation becomes more common beyond debugging, we should consider
272	// implementing proper AoS in the support library as well.
273	uint64_t lvlRank = getLvlRank();
274	uint64_t nnz = values.size();
275	crdBuffer.clear();
276	crdBuffer.reserve(nnz * (lvlRank - lvl));
277	for (uint64_t i = `0`; i < nnz; i++) {
278	for (uint64_t l = lvl; l < lvlRank; l++) {
279	assert(i < coordinates[l].size());
280	crdBuffer.push_back(coordinates[l][i]);
281	}
282	}
283	*out = &crdBuffer;
284	}
285	void getValues(std::vector<V> **out) final {
286	assert(out && "Received nullptr for out parameter");
287	*out = &values;
288	}
289
290	/// Partially specialize lexicographical insertions based on template types.
291	void lexInsert(const uint64_t *lvlCoords, V val) final {
292	assert(lvlCoords);
293	if (allDense) {
294	uint64_t lvlRank = getLvlRank();
295	uint64_t valIdx = `0`;
296	// Linearize the address.
297	for (uint64_t l = `0`; l < lvlRank; l++)
298	valIdx = valIdx * getLvlSize(l) + lvlCoords[l];
299	values[valIdx] = val;
300	return;
301	}
302	// First, wrap up pending insertion path.
303	uint64_t diffLvl = `0`;
304	uint64_t full = `0`;
305	if (!values.empty()) {
306	diffLvl = lexDiff(lvlCoords);
307	endPath(diffLvl: diffLvl + `1`);
308	full = lvlCursor [diffLvl] + `1`;
309	}
310	// Then continue with insertion path.
311	insPath(lvlCoords, diffLvl, full, val);
312	}
313
314	/// Partially specialize expanded insertions based on template types.
315	void expInsert(uint64_t lvlCoords, V values, bool filled, uint64_t added,
316	uint64_t count, uint64_t expsz) final {
317	assert((lvlCoords && values && filled && added) && "Received nullptr");
318	if (count == `0`)
319	return;
320	// Sort.
321	std::sort(first: added, last: added + count);
322	// Restore insertion path for first insert.
323	const uint64_t lastLvl = getLvlRank() - `1`;
324	uint64_t c = added[`0`];
325	assert(c <= expsz);
326	assert(filled[c] && "added coordinate is not filled");
327	lvlCoords[lastLvl] = c;
328	lexInsert(lvlCoords, val: values[c]);
329	values[c] = `0`;
330	filled[c] = false;
331	// Subsequent insertions are quick.
332	for (uint64_t i = `1`; i < count; i++) {
333	assert(c < added[i] && "non-lexicographic insertion");
334	c = added[i];
335	assert(c <= expsz);
336	assert(filled[c] && "added coordinate is not filled");
337	lvlCoords[lastLvl] = c;
338	insPath(lvlCoords, diffLvl: lastLvl, full: added[i - `1`] + `1`, val: values[c]);
339	values[c] = `0`;
340	filled[c] = false;
341	}
342	}
343
344	/// Finalizes lexicographic insertions.
345	void endLexInsert() final {
346	if (!allDense) {
347	if (values.empty())
348	finalizeSegment(l: `0`);
349	else
350	endPath(diffLvl: `0`);
351	}
352	}
353
354	/// Sort the unordered tensor in place, the method assumes that it is
355	/// an unordered COO tensor.
356	void sortInPlace() {
357	uint64_t nnz = values.size();
358	#ifndef NDEBUG
359	for (uint64_t l = `0`; l < getLvlRank(); l++)
360	assert(nnz == coordinates[l].size());
361	#endif
362
363	// In-place permutation.
364	auto applyPerm = [this](std::vector<uint64_t> &perm) {
365	uint64_t length = perm.size();
366	uint64_t lvlRank = getLvlRank();
367	// Cache for the current level coordinates.
368	std::vector<P> lvlCrds(lvlRank);
369	for (uint64_t i = `0`; i < length; i++) {
370	uint64_t current = i;
371	if (i != perm [current]) {
372	for (uint64_t l = `0`; l < lvlRank; l++)
373	lvlCrds[l] = coordinates[l][i];
374	V val = values[i];
375	// Deals with a permutation cycle.
376	while (i != perm [current]) {
377	uint64_t next = perm [current];
378	// Swaps the level coordinates and value.
379	for (uint64_t l = `0`; l < lvlRank; l++)
380	coordinates[l][current] = coordinates[l][next];
381	values[current] = values[next];
382	perm [current] = current;
383	current = next;
384	}
385	for (uint64_t l = `0`; l < lvlRank; l++)
386	coordinates[l][current] = lvlCrds[l];
387	values[current] = val;
388	perm [current] = current;
389	}
390	}
391	};
392
393	std::vector<uint64_t> sortedIdx(nnz, `0`);
394	for (uint64_t i = `0`; i < nnz; i++)
395	sortedIdx [i] = i;
396
397	std::sort(sortedIdx.begin(), sortedIdx.end(),
398	[this](uint64_t lhs, uint64_t rhs) {
399	for (uint64_t l = `0`; l < getLvlRank(); l++) {
400	if (coordinates[l][lhs] == coordinates[l][rhs])
401	continue;
402	return coordinates[l][lhs] < coordinates[l][rhs];
403	}
404	assert(lhs == rhs && "duplicate coordinates");
405	return false;
406	});
407
408	applyPerm(sortedIdx);
409	}
410
411	private:
412	/// Appends coordinate `crd` to level `lvl`, in the semantically
413	/// general sense. For non-dense levels, that means appending to the
414	/// `coordinates[lvl]` array, checking that `crd` is representable in
415	/// the `C` type; however, we do not verify other semantic requirements
416	/// (e.g., that `crd` is in bounds for `lvlSizes[lvl]`, and not previously
417	/// occurring in the same segment). For dense levels, this method instead
418	/// appends the appropriate number of zeros to the `values` array, where
419	/// `full` is the number of "entries" already written to `values` for this
420	/// segment (aka one after the highest coordinate previously appended).
421	void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
422	if (!isDenseLvl(l: lvl)) {
423	assert(isCompressedLvl(lvl) \|\| isLooseCompressedLvl(lvl) \|\|
424	isSingletonLvl(lvl) \|\| isNOutOfMLvl(lvl));
425	coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
426	} else { // Dense level.
427	assert(crd >= full && "Coordinate was already filled");
428	if (crd == full)
429	return; // Short-circuit, since it'll be a nop.
430	if (lvl + `1` == getLvlRank())
431	values.insert(values.end(), crd - full, `0`);
432	else
433	finalizeSegment(l: lvl + `1`, full: `0`, count: crd - full);
434	}
435	}
436
437	/// Computes the assembled-size associated with the `l`-th level,
438	/// given the assembled-size associated with the `(l-1)`-th level.
439	uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
440	if (isCompressedLvl(l))
441	return positions[l][parentSz];
442	if (isLooseCompressedLvl(l))
443	return positions[l][`2` * parentSz - `1`];
444	if (isSingletonLvl(l) \|\| isNOutOfMLvl(l))
445	return parentSz; // new size same as the parent
446	assert(isDenseLvl(l));
447	return parentSz * getLvlSize(l);
448	}
449
450	/// Initializes sparse tensor storage scheme from a memory-resident sparse
451	/// tensor in coordinate scheme. This method prepares the positions and
452	/// coordinates arrays under the given per-level dense/sparse annotations.
453	void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo,
454	uint64_t hi, uint64_t l) {
455	const uint64_t lvlRank = getLvlRank();
456	assert(l <= lvlRank && hi <= lvlElements.size());
457	// Once levels are exhausted, insert the numerical values.
458	if (l == lvlRank) {
459	assert(lo < hi);
460	values.push_back(lvlElements[lo].value);
461	return;
462	}
463	// Visit all elements in this interval.
464	uint64_t full = `0`;
465	while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`.
466	// Find segment in interval with same coordinate at this level.
467	const uint64_t c = lvlElements[lo].coords[l];
468	uint64_t seg = lo + `1`;
469	if (isUniqueLvl(l))
470	while (seg < hi && lvlElements[seg].coords[l] == c)
471	seg++;
472	// Handle segment in interval for sparse or dense level.
473	appendCrd(lvl: l, full, crd: c);
474	full = c + `1`;
475	fromCOO(lvlElements, lo, hi: seg, l: l + `1`);
476	// And move on to next segment in interval.
477	lo = seg;
478	}
479	// Finalize the sparse position structure at this level.
480	finalizeSegment(l, full);
481	}
482
483	/// Finalizes the sparse position structure at this level.
484	void finalizeSegment(uint64_t l, uint64_t full = `0`, uint64_t count = `1`) {
485	if (count == `0`)
486	return; // Short-circuit, since it'll be a nop.
487	if (isCompressedLvl(l)) {
488	uint64_t pos = coordinates[l].size();
489	positions[l].insert(positions[l].end(), count,
490	detail::checkOverflowCast<P>(pos));
491	} else if (isLooseCompressedLvl(l)) {
492	// Finish this level, and push pairs for the empty ones, and one
493	// more for next level. Note that this always leaves one extra
494	// unused element at the end.
495	uint64_t pos = coordinates[l].size();
496	positions[l].insert(positions[l].end(), `2` * count,
497	detail::checkOverflowCast<P>(pos));
498	} else if (isSingletonLvl(l) \|\| isNOutOfMLvl(l)) {
499	return; // Nothing to finalize.
500	} else { // Dense dimension.
501	assert(isDenseLvl(l));
502	const uint64_t sz = getLvlSizes()[l];
503	assert(sz >= full && "Segment is overfull");
504	count = detail::checkedMul(lhs: count, rhs: sz - full);
505	// For dense storage we must enumerate all the remaining coordinates
506	// in this level (i.e., coordinates after the last non-zero
507	// element), and either fill in their zero values or else recurse
508	// to finalize some deeper level.
509	if (l + `1` == getLvlRank())
510	values.insert(values.end(), count, `0`);
511	else
512	finalizeSegment(l: l + `1`, full: `0`, count);
513	}
514	}
515
516	/// Wraps up a single insertion path, inner to outer.
517	void endPath(uint64_t diffLvl) {
518	const uint64_t lvlRank = getLvlRank();
519	const uint64_t lastLvl = lvlRank - `1`;
520	assert(diffLvl <= lvlRank);
521	const uint64_t stop = lvlRank - diffLvl;
522	for (uint64_t i = `0`; i < stop; i++) {
523	const uint64_t l = lastLvl - i;
524	finalizeSegment(l, full: lvlCursor [l] + `1`);
525	}
526	}
527
528	/// Continues a single insertion path, outer to inner. The first
529	/// argument is the level-coordinates for the value being inserted.
530	void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full,
531	V val) {
532	const uint64_t lvlRank = getLvlRank();
533	assert(diffLvl <= lvlRank);
534	for (uint64_t l = diffLvl; l < lvlRank; l++) {
535	const uint64_t c = lvlCoords[l];
536	appendCrd(lvl: l, full, crd: c);
537	full = `0`;
538	lvlCursor [l] = c;
539	}
540	values.push_back(val);
541	}
542
543	/// Finds the lexicographically first level where the level-coordinates
544	/// in the argument differ from those in the current cursor.
545	uint64_t lexDiff(const uint64_t lvlCoords) const* {
546	const uint64_t lvlRank = getLvlRank();
547	for (uint64_t l = `0`; l < lvlRank; l++) {
548	const auto crd = lvlCoords[l];
549	const auto cur = lvlCursor [l];
550	if (crd > cur \|\| (crd == cur && !isUniqueLvl(l)) \|\|
551	(crd < cur && !isOrderedLvl(l))) {
552	return l;
553	}
554	if (crd < cur) {
555	assert(false && "non-lexicographic insertion");
556	return -`1u`;
557	}
558	}
559	assert(false && "duplicate insertion");
560	return -`1u`;
561	}
562
563	// Sparse tensor storage components.
564	std::vector<std::vector<P>> positions;
565	std::vector<std::vector<C>> coordinates;
566	std::vector<V> values;
567
568	// Auxiliary data structures.
569	std::vector<uint64_t> lvlCursor;
570	std::vector<C> crdBuffer; // just for AoS view
571	};
572
573	//===----------------------------------------------------------------------===//
574	//
575	// SparseTensorStorage Factories
576	//
577	//===----------------------------------------------------------------------===//
578
579	template <typename P, typename C, typename V>
580	SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
581	uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
582	const uint64_t lvlSizes, const* LevelType *lvlTypes,
583	const uint64_t dim2lvl, const* uint64_t *lvl2dim) {
584	SparseTensorCOO<V> noLvlCOO = nullptr*;
585	return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
586	lvlTypes, dim2lvl, lvl2dim, noLvlCOO);
587	}
588
589	template <typename P, typename C, typename V>
590	SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
591	uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
592	const uint64_t lvlSizes, const* LevelType *lvlTypes,
593	const uint64_t dim2lvl, const* uint64_t *lvl2dim,
594	SparseTensorCOO<V> *lvlCOO) {
595	assert(lvlCOO);
596	return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
597	lvlTypes, dim2lvl, lvl2dim, lvlCOO);
598	}
599
600	template <typename P, typename C, typename V>
601	SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers(
602	uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
603	const uint64_t lvlSizes, const* LevelType *lvlTypes,
604	const uint64_t dim2lvl, const* uint64_t *lvl2dim, uint64_t srcRank,
605	const intptr_t *buffers) {
606	return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
607	lvlTypes, dim2lvl, lvl2dim, buffers);
608	}
609
610	//===----------------------------------------------------------------------===//
611	//
612	// SparseTensorStorage Constructors
613	//
614	//===----------------------------------------------------------------------===//
615
616	template <typename P, typename C, typename V>
617	SparseTensorStorage<P, C, V>::SparseTensorStorage(
618	uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
619	const uint64_t lvlSizes, const* LevelType *lvlTypes,
620	const uint64_t dim2lvl, const* uint64_t *lvl2dim,
621	SparseTensorCOO<V> *lvlCOO)
622	: SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
623	dim2lvl, lvl2dim) {
624	// Provide hints on capacity of positions and coordinates.
625	// TODO: needs much fine-tuning based on actual sparsity; currently
626	// we reserve position/coordinate space based on all previous dense
627	// levels, which works well up to first sparse level; but we should
628	// really use nnz and dense/sparse distribution.
629	uint64_t sz = `1`;
630	for (uint64_t l = `0`; l < lvlRank; l++) {
631	if (isCompressedLvl(l)) {
632	positions[l].reserve(sz + `1`);
633	positions[l].push_back(`0`);
634	coordinates[l].reserve(sz);
635	sz = `1`;
636	} else if (isLooseCompressedLvl(l)) {
637	positions[l].reserve(`2` * sz + `1`); // last one unused
638	positions[l].push_back(`0`);
639	coordinates[l].reserve(sz);
640	sz = `1`;
641	} else if (isSingletonLvl(l)) {
642	coordinates[l].reserve(sz);
643	sz = `1`;
644	} else if (isNOutOfMLvl(l)) {
645	assert(l == lvlRank - `1` && "unexpected n:m usage");
646	sz = detail::checkedMul(lhs: sz, rhs: lvlSizes[l]) / `2`;
647	coordinates[l].reserve(sz);
648	values.reserve(sz);
649	} else { // Dense level.
650	assert(isDenseLvl(l));
651	sz = detail::checkedMul(lhs: sz, rhs: lvlSizes[l]);
652	}
653	}
654	if (lvlCOO) {
655	/ New from COO: ensure it is sorted. /
656	assert(lvlCOO->getRank() == lvlRank);
657	lvlCOO->sort();
658	// Now actually insert the `elements`.
659	const auto &elements = lvlCOO->getElements();
660	const uint64_t nse = elements.size();
661	assert(values.size() == `0`);
662	values.reserve(nse);
663	fromCOO(lvlElements: elements, lo: `0`, hi: nse, l: `0`);
664	} else if (allDense) {
665	/ New empty (all dense) /
666	values.resize(sz, `0`);
667	}
668	}
669
670	template <typename P, typename C, typename V>
671	SparseTensorStorage<P, C, V>::SparseTensorStorage(
672	uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
673	const uint64_t lvlSizes, const* LevelType *lvlTypes,
674	const uint64_t dim2lvl, const* uint64_t lvl2dim, const* intptr_t *lvlBufs)
675	: SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
676	dim2lvl, lvl2dim) {
677	// Note that none of the buffers can be reused because ownership
678	// of the memory passed from clients is not necessarily transferred.
679	// Therefore, all data is copied over into a new SparseTensorStorage.
680	uint64_t trailCOOLen = `0`, parentSz = `1`, bufIdx = `0`;
681	for (uint64_t l = `0`; l < lvlRank; l++) {
682	if (!isUniqueLvl(l) && (isCompressedLvl(l) \|\| isLooseCompressedLvl(l))) {
683	// A `(loose)compressed_nu` level marks the start of trailing COO
684	// start level. Since the coordinate buffer used for trailing COO
685	// is passed in as AoS scheme and SparseTensorStorage uses a SoA
686	// scheme, we cannot simply copy the value from the provided buffers.
687	trailCOOLen = lvlRank - l;
688	break;
689	}
690	if (isCompressedLvl(l) \|\| isLooseCompressedLvl(l)) {
691	P posPtr = reinterpret_cast<P >(lvlBufs[bufIdx++]);
692	C crdPtr = reinterpret_cast<C >(lvlBufs[bufIdx++]);
693	if (isLooseCompressedLvl(l)) {
694	positions[l].assign(posPtr, posPtr + `2` * parentSz);
695	coordinates[l].assign(crdPtr, crdPtr + positions[l][`2` * parentSz - `1`]);
696	} else {
697	positions[l].assign(posPtr, posPtr + parentSz + `1`);
698	coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
699	}
700	} else if (isSingletonLvl(l)) {
701	assert(`0` && "general singleton not supported yet");
702	} else if (isNOutOfMLvl(l)) {
703	assert(`0` && "n ouf of m not supported yet");
704	} else {
705	assert(isDenseLvl(l));
706	}
707	parentSz = assembledSize(parentSz, l);
708	}
709
710	// Handle Aos vs. SoA mismatch for COO.
711	if (trailCOOLen != `0`) {
712	uint64_t cooStartLvl = lvlRank - trailCOOLen;
713	assert(!isUniqueLvl(cooStartLvl) &&
714	(isCompressedLvl(cooStartLvl) \|\| isLooseCompressedLvl(cooStartLvl)));
715	P posPtr = reinterpret_cast<P >(lvlBufs[bufIdx++]);
716	C aosCrdPtr = reinterpret_cast<C >(lvlBufs[bufIdx++]);
717	P crdLen;
718	if (isLooseCompressedLvl(l: cooStartLvl)) {
719	positions[cooStartLvl].assign(posPtr, posPtr + `2` * parentSz);
720	crdLen = positions[cooStartLvl][`2` * parentSz - `1`];
721	} else {
722	positions[cooStartLvl].assign(posPtr, posPtr + parentSz + `1`);
723	crdLen = positions[cooStartLvl][parentSz];
724	}
725	for (uint64_t l = cooStartLvl; l < lvlRank; l++) {
726	coordinates[l].resize(crdLen);
727	for (uint64_t n = `0`; n < crdLen; n++) {
728	coordinates[l][n] = (aosCrdPtr + (l - cooStartLvl) + n trailCOOLen);
729	}
730	}
731	parentSz = assembledSize(parentSz, l: cooStartLvl);
732	}
733
734	// Copy the values buffer.
735	V valPtr = reinterpret_cast<V >(lvlBufs[bufIdx]);
736	values.assign(valPtr, valPtr + parentSz);
737	}
738
739	} // namespace sparse_tensor
740	} // namespace mlir
741
742	#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
743

source code of mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h