1//===- BytecodeWriter.cpp - MLIR Bytecode Writer --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "mlir/Bytecode/BytecodeWriter.h"
10#include "IRNumbering.h"
11#include "mlir/Bytecode/BytecodeImplementation.h"
12#include "mlir/Bytecode/BytecodeOpInterface.h"
13#include "mlir/Bytecode/Encoding.h"
14#include "mlir/IR/Attributes.h"
15#include "mlir/IR/Diagnostics.h"
16#include "mlir/IR/OpImplementation.h"
17#include "mlir/Support/LogicalResult.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/CachedHashString.h"
20#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/raw_ostream.h"
24#include <optional>
25
26#define DEBUG_TYPE "mlir-bytecode-writer"
27
28using namespace mlir;
29using namespace mlir::bytecode::detail;
30
31//===----------------------------------------------------------------------===//
32// BytecodeWriterConfig
33//===----------------------------------------------------------------------===//
34
35struct BytecodeWriterConfig::Impl {
36 Impl(StringRef producer) : producer(producer) {}
37
38 /// Version to use when writing.
39 /// Note: This only differs from kVersion if a specific version is set.
40 int64_t bytecodeVersion = bytecode::kVersion;
41
42 /// A flag specifying whether to elide emission of resources into the bytecode
43 /// file.
44 bool shouldElideResourceData = false;
45
46 /// A map containing dialect version information for each dialect to emit.
47 llvm::StringMap<std::unique_ptr<DialectVersion>> dialectVersionMap;
48
49 /// The producer of the bytecode.
50 StringRef producer;
51
52 /// Printer callbacks used to emit custom type and attribute encodings.
53 llvm::SmallVector<std::unique_ptr<AttrTypeBytecodeWriter<Attribute>>>
54 attributeWriterCallbacks;
55 llvm::SmallVector<std::unique_ptr<AttrTypeBytecodeWriter<Type>>>
56 typeWriterCallbacks;
57
58 /// A collection of non-dialect resource printers.
59 SmallVector<std::unique_ptr<AsmResourcePrinter>> externalResourcePrinters;
60};
61
62BytecodeWriterConfig::BytecodeWriterConfig(StringRef producer)
63 : impl(std::make_unique<Impl>(args&: producer)) {}
64BytecodeWriterConfig::BytecodeWriterConfig(FallbackAsmResourceMap &map,
65 StringRef producer)
66 : BytecodeWriterConfig(producer) {
67 attachFallbackResourcePrinter(map);
68}
69BytecodeWriterConfig::~BytecodeWriterConfig() = default;
70
71ArrayRef<std::unique_ptr<AttrTypeBytecodeWriter<Attribute>>>
72BytecodeWriterConfig::getAttributeWriterCallbacks() const {
73 return impl->attributeWriterCallbacks;
74}
75
76ArrayRef<std::unique_ptr<AttrTypeBytecodeWriter<Type>>>
77BytecodeWriterConfig::getTypeWriterCallbacks() const {
78 return impl->typeWriterCallbacks;
79}
80
81void BytecodeWriterConfig::attachAttributeCallback(
82 std::unique_ptr<AttrTypeBytecodeWriter<Attribute>> callback) {
83 impl->attributeWriterCallbacks.emplace_back(Args: std::move(callback));
84}
85
86void BytecodeWriterConfig::attachTypeCallback(
87 std::unique_ptr<AttrTypeBytecodeWriter<Type>> callback) {
88 impl->typeWriterCallbacks.emplace_back(Args: std::move(callback));
89}
90
91void BytecodeWriterConfig::attachResourcePrinter(
92 std::unique_ptr<AsmResourcePrinter> printer) {
93 impl->externalResourcePrinters.emplace_back(Args: std::move(printer));
94}
95
96void BytecodeWriterConfig::setElideResourceDataFlag(
97 bool shouldElideResourceData) {
98 impl->shouldElideResourceData = shouldElideResourceData;
99}
100
101void BytecodeWriterConfig::setDesiredBytecodeVersion(int64_t bytecodeVersion) {
102 impl->bytecodeVersion = bytecodeVersion;
103}
104
105int64_t BytecodeWriterConfig::getDesiredBytecodeVersion() const {
106 return impl->bytecodeVersion;
107}
108
109llvm::StringMap<std::unique_ptr<DialectVersion>> &
110BytecodeWriterConfig::getDialectVersionMap() const {
111 return impl->dialectVersionMap;
112}
113
114void BytecodeWriterConfig::setDialectVersion(
115 llvm::StringRef dialectName,
116 std::unique_ptr<DialectVersion> dialectVersion) const {
117 assert(!impl->dialectVersionMap.contains(dialectName) &&
118 "cannot override a previously set dialect version");
119 impl->dialectVersionMap.insert(KV: {dialectName, std::move(dialectVersion)});
120}
121
122//===----------------------------------------------------------------------===//
123// EncodingEmitter
124//===----------------------------------------------------------------------===//
125
126namespace {
127/// This class functions as the underlying encoding emitter for the bytecode
128/// writer. This class is a bit different compared to other types of encoders;
129/// it does not use a single buffer, but instead may contain several buffers
130/// (some owned by the writer, and some not) that get concatted during the final
131/// emission.
132class EncodingEmitter {
133public:
134 EncodingEmitter() = default;
135 EncodingEmitter(const EncodingEmitter &) = delete;
136 EncodingEmitter &operator=(const EncodingEmitter &) = delete;
137
138 /// Write the current contents to the provided stream.
139 void writeTo(raw_ostream &os) const;
140
141 /// Return the current size of the encoded buffer.
142 size_t size() const { return prevResultSize + currentResult.size(); }
143
144 //===--------------------------------------------------------------------===//
145 // Emission
146 //===--------------------------------------------------------------------===//
147
148 /// Backpatch a byte in the result buffer at the given offset.
149 void patchByte(uint64_t offset, uint8_t value) {
150 assert(offset < size() && offset >= prevResultSize &&
151 "cannot patch previously emitted data");
152 currentResult[offset - prevResultSize] = value;
153 }
154
155 /// Emit the provided blob of data, which is owned by the caller and is
156 /// guaranteed to not die before the end of the bytecode process.
157 void emitOwnedBlob(ArrayRef<uint8_t> data) {
158 // Push the current buffer before adding the provided data.
159 appendResult(result: std::move(currentResult));
160 appendOwnedResult(result: data);
161 }
162
163 /// Emit the provided blob of data that has the given alignment, which is
164 /// owned by the caller and is guaranteed to not die before the end of the
165 /// bytecode process. The alignment value is also encoded, making it available
166 /// on load.
167 void emitOwnedBlobAndAlignment(ArrayRef<uint8_t> data, uint32_t alignment) {
168 emitVarInt(value: alignment);
169 emitVarInt(value: data.size());
170
171 alignTo(alignment);
172 emitOwnedBlob(data);
173 }
174 void emitOwnedBlobAndAlignment(ArrayRef<char> data, uint32_t alignment) {
175 ArrayRef<uint8_t> castedData(reinterpret_cast<const uint8_t *>(data.data()),
176 data.size());
177 emitOwnedBlobAndAlignment(data: castedData, alignment);
178 }
179
180 /// Align the emitter to the given alignment.
181 void alignTo(unsigned alignment) {
182 if (alignment < 2)
183 return;
184 assert(llvm::isPowerOf2_32(alignment) && "expected valid alignment");
185
186 // Check to see if we need to emit any padding bytes to meet the desired
187 // alignment.
188 size_t curOffset = size();
189 size_t paddingSize = llvm::alignTo(Value: curOffset, Align: alignment) - curOffset;
190 while (paddingSize--)
191 emitByte(byte: bytecode::kAlignmentByte);
192
193 // Keep track of the maximum required alignment.
194 requiredAlignment = std::max(a: requiredAlignment, b: alignment);
195 }
196
197 //===--------------------------------------------------------------------===//
198 // Integer Emission
199
200 /// Emit a single byte.
201 template <typename T>
202 void emitByte(T byte) {
203 currentResult.push_back(x: static_cast<uint8_t>(byte));
204 }
205
206 /// Emit a range of bytes.
207 void emitBytes(ArrayRef<uint8_t> bytes) {
208 llvm::append_range(C&: currentResult, R&: bytes);
209 }
210
211 /// Emit a variable length integer. The first encoded byte contains a prefix
212 /// in the low bits indicating the encoded length of the value. This length
213 /// prefix is a bit sequence of '0's followed by a '1'. The number of '0' bits
214 /// indicate the number of _additional_ bytes (not including the prefix byte).
215 /// All remaining bits in the first byte, along with all of the bits in
216 /// additional bytes, provide the value of the integer encoded in
217 /// little-endian order.
218 void emitVarInt(uint64_t value) {
219 // In the most common case, the value can be represented in a single byte.
220 // Given how hot this case is, explicitly handle that here.
221 if ((value >> 7) == 0)
222 return emitByte(byte: (value << 1) | 0x1);
223 emitMultiByteVarInt(value);
224 }
225
226 /// Emit a signed variable length integer. Signed varints are encoded using
227 /// a varint with zigzag encoding, meaning that we use the low bit of the
228 /// value to indicate the sign of the value. This allows for more efficient
229 /// encoding of negative values by limiting the number of active bits
230 void emitSignedVarInt(uint64_t value) {
231 emitVarInt(value: (value << 1) ^ (uint64_t)((int64_t)value >> 63));
232 }
233
234 /// Emit a variable length integer whose low bit is used to encode the
235 /// provided flag, i.e. encoded as: (value << 1) | (flag ? 1 : 0).
236 void emitVarIntWithFlag(uint64_t value, bool flag) {
237 emitVarInt(value: (value << 1) | (flag ? 1 : 0));
238 }
239
240 //===--------------------------------------------------------------------===//
241 // String Emission
242
243 /// Emit the given string as a nul terminated string.
244 void emitNulTerminatedString(StringRef str) {
245 emitString(str);
246 emitByte(byte: 0);
247 }
248
249 /// Emit the given string without a nul terminator.
250 void emitString(StringRef str) {
251 emitBytes(bytes: {reinterpret_cast<const uint8_t *>(str.data()), str.size()});
252 }
253
254 //===--------------------------------------------------------------------===//
255 // Section Emission
256
257 /// Emit a nested section of the given code, whose contents are encoded in the
258 /// provided emitter.
259 void emitSection(bytecode::Section::ID code, EncodingEmitter &&emitter) {
260 // Emit the section code and length. The high bit of the code is used to
261 // indicate whether the section alignment is present, so save an offset to
262 // it.
263 uint64_t codeOffset = currentResult.size();
264 emitByte(byte: code);
265 emitVarInt(value: emitter.size());
266
267 // Integrate the alignment of the section into this emitter if necessary.
268 unsigned emitterAlign = emitter.requiredAlignment;
269 if (emitterAlign > 1) {
270 if (size() & (emitterAlign - 1)) {
271 emitVarInt(value: emitterAlign);
272 alignTo(alignment: emitterAlign);
273
274 // Indicate that we needed to align the section, the high bit of the
275 // code field is used for this.
276 currentResult[codeOffset] |= 0b10000000;
277 } else {
278 // Otherwise, if we happen to be at a compatible offset, we just
279 // remember that we need this alignment.
280 requiredAlignment = std::max(a: requiredAlignment, b: emitterAlign);
281 }
282 }
283
284 // Push our current buffer and then merge the provided section body into
285 // ours.
286 appendResult(result: std::move(currentResult));
287 for (std::vector<uint8_t> &result : emitter.prevResultStorage)
288 prevResultStorage.push_back(x: std::move(result));
289 llvm::append_range(C&: prevResultList, R&: emitter.prevResultList);
290 prevResultSize += emitter.prevResultSize;
291 appendResult(result: std::move(emitter.currentResult));
292 }
293
294private:
295 /// Emit the given value using a variable width encoding. This method is a
296 /// fallback when the number of bytes needed to encode the value is greater
297 /// than 1. We mark it noinline here so that the single byte hot path isn't
298 /// pessimized.
299 LLVM_ATTRIBUTE_NOINLINE void emitMultiByteVarInt(uint64_t value);
300
301 /// Append a new result buffer to the current contents.
302 void appendResult(std::vector<uint8_t> &&result) {
303 if (result.empty())
304 return;
305 prevResultStorage.emplace_back(args: std::move(result));
306 appendOwnedResult(result: prevResultStorage.back());
307 }
308 void appendOwnedResult(ArrayRef<uint8_t> result) {
309 if (result.empty())
310 return;
311 prevResultSize += result.size();
312 prevResultList.emplace_back(args&: result);
313 }
314
315 /// The result of the emitter currently being built. We refrain from building
316 /// a single buffer to simplify emitting sections, large data, and more. The
317 /// result is thus represented using multiple distinct buffers, some of which
318 /// we own (via prevResultStorage), and some of which are just pointers into
319 /// externally owned buffers.
320 std::vector<uint8_t> currentResult;
321 std::vector<ArrayRef<uint8_t>> prevResultList;
322 std::vector<std::vector<uint8_t>> prevResultStorage;
323
324 /// An up-to-date total size of all of the buffers within `prevResultList`.
325 /// This enables O(1) size checks of the current encoding.
326 size_t prevResultSize = 0;
327
328 /// The highest required alignment for the start of this section.
329 unsigned requiredAlignment = 1;
330};
331
332//===----------------------------------------------------------------------===//
333// StringSectionBuilder
334//===----------------------------------------------------------------------===//
335
336namespace {
337/// This class is used to simplify the process of emitting the string section.
338class StringSectionBuilder {
339public:
340 /// Add the given string to the string section, and return the index of the
341 /// string within the section.
342 size_t insert(StringRef str) {
343 auto it = strings.insert(KV: {llvm::CachedHashStringRef(str), strings.size()});
344 return it.first->second;
345 }
346
347 /// Write the current set of strings to the given emitter.
348 void write(EncodingEmitter &emitter) {
349 emitter.emitVarInt(value: strings.size());
350
351 // Emit the sizes in reverse order, so that we don't need to backpatch an
352 // offset to the string data or have a separate section.
353 for (const auto &it : llvm::reverse(C&: strings))
354 emitter.emitVarInt(value: it.first.size() + 1);
355 // Emit the string data itself.
356 for (const auto &it : strings)
357 emitter.emitNulTerminatedString(str: it.first.val());
358 }
359
360private:
361 /// A set of strings referenced within the bytecode. The value of the map is
362 /// unused.
363 llvm::MapVector<llvm::CachedHashStringRef, size_t> strings;
364};
365} // namespace
366
367class DialectWriter : public DialectBytecodeWriter {
368 using DialectVersionMapT = llvm::StringMap<std::unique_ptr<DialectVersion>>;
369
370public:
371 DialectWriter(int64_t bytecodeVersion, EncodingEmitter &emitter,
372 IRNumberingState &numberingState,
373 StringSectionBuilder &stringSection,
374 const DialectVersionMapT &dialectVersionMap)
375 : bytecodeVersion(bytecodeVersion), emitter(emitter),
376 numberingState(numberingState), stringSection(stringSection),
377 dialectVersionMap(dialectVersionMap) {}
378
379 //===--------------------------------------------------------------------===//
380 // IR
381 //===--------------------------------------------------------------------===//
382
383 void writeAttribute(Attribute attr) override {
384 emitter.emitVarInt(value: numberingState.getNumber(attr));
385 }
386 void writeOptionalAttribute(Attribute attr) override {
387 if (!attr) {
388 emitter.emitVarInt(value: 0);
389 return;
390 }
391 emitter.emitVarIntWithFlag(value: numberingState.getNumber(attr), flag: true);
392 }
393
394 void writeType(Type type) override {
395 emitter.emitVarInt(value: numberingState.getNumber(type));
396 }
397
398 void writeResourceHandle(const AsmDialectResourceHandle &resource) override {
399 emitter.emitVarInt(value: numberingState.getNumber(resource));
400 }
401
402 //===--------------------------------------------------------------------===//
403 // Primitives
404 //===--------------------------------------------------------------------===//
405
406 void writeVarInt(uint64_t value) override { emitter.emitVarInt(value); }
407
408 void writeSignedVarInt(int64_t value) override {
409 emitter.emitSignedVarInt(value);
410 }
411
412 void writeAPIntWithKnownWidth(const APInt &value) override {
413 size_t bitWidth = value.getBitWidth();
414
415 // If the value is a single byte, just emit it directly without going
416 // through a varint.
417 if (bitWidth <= 8)
418 return emitter.emitByte(byte: value.getLimitedValue());
419
420 // If the value fits within a single varint, emit it directly.
421 if (bitWidth <= 64)
422 return emitter.emitSignedVarInt(value: value.getLimitedValue());
423
424 // Otherwise, we need to encode a variable number of active words. We use
425 // active words instead of the number of total words under the observation
426 // that smaller values will be more common.
427 unsigned numActiveWords = value.getActiveWords();
428 emitter.emitVarInt(value: numActiveWords);
429
430 const uint64_t *rawValueData = value.getRawData();
431 for (unsigned i = 0; i < numActiveWords; ++i)
432 emitter.emitSignedVarInt(value: rawValueData[i]);
433 }
434
435 void writeAPFloatWithKnownSemantics(const APFloat &value) override {
436 writeAPIntWithKnownWidth(value: value.bitcastToAPInt());
437 }
438
439 void writeOwnedString(StringRef str) override {
440 emitter.emitVarInt(value: stringSection.insert(str));
441 }
442
443 void writeOwnedBlob(ArrayRef<char> blob) override {
444 emitter.emitVarInt(value: blob.size());
445 emitter.emitOwnedBlob(data: ArrayRef<uint8_t>(
446 reinterpret_cast<const uint8_t *>(blob.data()), blob.size()));
447 }
448
449 void writeOwnedBool(bool value) override { emitter.emitByte(byte: value); }
450
451 int64_t getBytecodeVersion() const override { return bytecodeVersion; }
452
453 FailureOr<const DialectVersion *>
454 getDialectVersion(StringRef dialectName) const override {
455 auto dialectEntry = dialectVersionMap.find(Key: dialectName);
456 if (dialectEntry == dialectVersionMap.end())
457 return failure();
458 return dialectEntry->getValue().get();
459 }
460
461private:
462 int64_t bytecodeVersion;
463 EncodingEmitter &emitter;
464 IRNumberingState &numberingState;
465 StringSectionBuilder &stringSection;
466 const DialectVersionMapT &dialectVersionMap;
467};
468
469namespace {
470class PropertiesSectionBuilder {
471public:
472 PropertiesSectionBuilder(IRNumberingState &numberingState,
473 StringSectionBuilder &stringSection,
474 const BytecodeWriterConfig::Impl &config)
475 : numberingState(numberingState), stringSection(stringSection),
476 config(config) {}
477
478 /// Emit the op properties in the properties section and return the index of
479 /// the properties within the section. Return -1 if no properties was emitted.
480 std::optional<ssize_t> emit(Operation *op) {
481 EncodingEmitter propertiesEmitter;
482 if (!op->getPropertiesStorageSize())
483 return std::nullopt;
484 if (!op->isRegistered()) {
485 // Unregistered op are storing properties as an optional attribute.
486 Attribute prop = *op->getPropertiesStorage().as<Attribute *>();
487 if (!prop)
488 return std::nullopt;
489 EncodingEmitter sizeEmitter;
490 sizeEmitter.emitVarInt(value: numberingState.getNumber(attr: prop));
491 scratch.clear();
492 llvm::raw_svector_ostream os(scratch);
493 sizeEmitter.writeTo(os);
494 return emit(rawProperties: scratch);
495 }
496
497 EncodingEmitter emitter;
498 DialectWriter propertiesWriter(config.bytecodeVersion, emitter,
499 numberingState, stringSection,
500 config.dialectVersionMap);
501 auto iface = cast<BytecodeOpInterface>(op);
502 iface.writeProperties(propertiesWriter);
503 scratch.clear();
504 llvm::raw_svector_ostream os(scratch);
505 emitter.writeTo(os);
506 return emit(rawProperties: scratch);
507 }
508
509 /// Write the current set of properties to the given emitter.
510 void write(EncodingEmitter &emitter) {
511 emitter.emitVarInt(value: propertiesStorage.size());
512 if (propertiesStorage.empty())
513 return;
514 for (const auto &storage : propertiesStorage) {
515 if (storage.empty()) {
516 emitter.emitBytes(bytes: ArrayRef<uint8_t>());
517 continue;
518 }
519 emitter.emitBytes(bytes: ArrayRef(reinterpret_cast<const uint8_t *>(&storage[0]),
520 storage.size()));
521 }
522 }
523
524 /// Returns true if the section is empty.
525 bool empty() { return propertiesStorage.empty(); }
526
527private:
528 /// Emit raw data and returns the offset in the internal buffer.
529 /// Data are deduplicated and will be copied in the internal buffer only if
530 /// they don't exist there already.
531 ssize_t emit(ArrayRef<char> rawProperties) {
532 // Populate a scratch buffer with the properties size.
533 SmallVector<char> sizeScratch;
534 {
535 EncodingEmitter sizeEmitter;
536 sizeEmitter.emitVarInt(value: rawProperties.size());
537 llvm::raw_svector_ostream os(sizeScratch);
538 sizeEmitter.writeTo(os);
539 }
540 // Append a new storage to the table now.
541 size_t index = propertiesStorage.size();
542 propertiesStorage.emplace_back();
543 std::vector<char> &newStorage = propertiesStorage.back();
544 size_t propertiesSize = sizeScratch.size() + rawProperties.size();
545 newStorage.reserve(n: propertiesSize);
546 newStorage.insert(position: newStorage.end(), first: sizeScratch.begin(), last: sizeScratch.end());
547 newStorage.insert(position: newStorage.end(), first: rawProperties.begin(),
548 last: rawProperties.end());
549
550 // Try to de-duplicate the new serialized properties.
551 // If the properties is a duplicate, pop it back from the storage.
552 auto inserted = propertiesUniquing.insert(
553 KV: std::make_pair(x: ArrayRef<char>(newStorage), y&: index));
554 if (!inserted.second)
555 propertiesStorage.pop_back();
556 return inserted.first->getSecond();
557 }
558
559 /// Storage for properties.
560 std::vector<std::vector<char>> propertiesStorage;
561 SmallVector<char> scratch;
562 DenseMap<ArrayRef<char>, int64_t> propertiesUniquing;
563 IRNumberingState &numberingState;
564 StringSectionBuilder &stringSection;
565 const BytecodeWriterConfig::Impl &config;
566};
567} // namespace
568
569/// A simple raw_ostream wrapper around a EncodingEmitter. This removes the need
570/// to go through an intermediate buffer when interacting with code that wants a
571/// raw_ostream.
572class RawEmitterOstream : public raw_ostream {
573public:
574 explicit RawEmitterOstream(EncodingEmitter &emitter) : emitter(emitter) {
575 SetUnbuffered();
576 }
577
578private:
579 void write_impl(const char *ptr, size_t size) override {
580 emitter.emitBytes(bytes: {reinterpret_cast<const uint8_t *>(ptr), size});
581 }
582 uint64_t current_pos() const override { return emitter.size(); }
583
584 /// The section being emitted to.
585 EncodingEmitter &emitter;
586};
587} // namespace
588
589void EncodingEmitter::writeTo(raw_ostream &os) const {
590 for (auto &prevResult : prevResultList)
591 os.write(Ptr: (const char *)prevResult.data(), Size: prevResult.size());
592 os.write(Ptr: (const char *)currentResult.data(), Size: currentResult.size());
593}
594
595void EncodingEmitter::emitMultiByteVarInt(uint64_t value) {
596 // Compute the number of bytes needed to encode the value. Each byte can hold
597 // up to 7-bits of data. We only check up to the number of bits we can encode
598 // in the first byte (8).
599 uint64_t it = value >> 7;
600 for (size_t numBytes = 2; numBytes < 9; ++numBytes) {
601 if (LLVM_LIKELY(it >>= 7) == 0) {
602 uint64_t encodedValue = (value << 1) | 0x1;
603 encodedValue <<= (numBytes - 1);
604 llvm::support::ulittle64_t encodedValueLE(encodedValue);
605 emitBytes(bytes: {reinterpret_cast<uint8_t *>(&encodedValueLE), numBytes});
606 return;
607 }
608 }
609
610 // If the value is too large to encode in a single byte, emit a special all
611 // zero marker byte and splat the value directly.
612 emitByte(byte: 0);
613 llvm::support::ulittle64_t valueLE(value);
614 emitBytes(bytes: {reinterpret_cast<uint8_t *>(&valueLE), sizeof(valueLE)});
615}
616
617//===----------------------------------------------------------------------===//
618// Bytecode Writer
619//===----------------------------------------------------------------------===//
620
621namespace {
622class BytecodeWriter {
623public:
624 BytecodeWriter(Operation *op, const BytecodeWriterConfig &config)
625 : numberingState(op, config), config(config.getImpl()),
626 propertiesSection(numberingState, stringSection, config.getImpl()) {}
627
628 /// Write the bytecode for the given root operation.
629 LogicalResult write(Operation *rootOp, raw_ostream &os);
630
631private:
632 //===--------------------------------------------------------------------===//
633 // Dialects
634
635 void writeDialectSection(EncodingEmitter &emitter);
636
637 //===--------------------------------------------------------------------===//
638 // Attributes and Types
639
640 void writeAttrTypeSection(EncodingEmitter &emitter);
641
642 //===--------------------------------------------------------------------===//
643 // Operations
644
645 LogicalResult writeBlock(EncodingEmitter &emitter, Block *block);
646 LogicalResult writeOp(EncodingEmitter &emitter, Operation *op);
647 LogicalResult writeRegion(EncodingEmitter &emitter, Region *region);
648 LogicalResult writeIRSection(EncodingEmitter &emitter, Operation *op);
649
650 LogicalResult writeRegions(EncodingEmitter &emitter,
651 MutableArrayRef<Region> regions) {
652 return success(isSuccess: llvm::all_of(Range&: regions, P: [&](Region &region) {
653 return succeeded(result: writeRegion(emitter, region: &region));
654 }));
655 }
656
657 //===--------------------------------------------------------------------===//
658 // Resources
659
660 void writeResourceSection(Operation *op, EncodingEmitter &emitter);
661
662 //===--------------------------------------------------------------------===//
663 // Strings
664
665 void writeStringSection(EncodingEmitter &emitter);
666
667 //===--------------------------------------------------------------------===//
668 // Properties
669
670 void writePropertiesSection(EncodingEmitter &emitter);
671
672 //===--------------------------------------------------------------------===//
673 // Helpers
674
675 void writeUseListOrders(EncodingEmitter &emitter, uint8_t &opEncodingMask,
676 ValueRange range);
677
678 //===--------------------------------------------------------------------===//
679 // Fields
680
681 /// The builder used for the string section.
682 StringSectionBuilder stringSection;
683
684 /// The IR numbering state generated for the root operation.
685 IRNumberingState numberingState;
686
687 /// Configuration dictating bytecode emission.
688 const BytecodeWriterConfig::Impl &config;
689
690 /// Storage for the properties section
691 PropertiesSectionBuilder propertiesSection;
692};
693} // namespace
694
695LogicalResult BytecodeWriter::write(Operation *rootOp, raw_ostream &os) {
696 EncodingEmitter emitter;
697
698 // Emit the bytecode file header. This is how we identify the output as a
699 // bytecode file.
700 emitter.emitString(str: "ML\xefR");
701
702 // Emit the bytecode version.
703 if (config.bytecodeVersion < bytecode::kMinSupportedVersion ||
704 config.bytecodeVersion > bytecode::kVersion)
705 return rootOp->emitError()
706 << "unsupported version requested " << config.bytecodeVersion
707 << ", must be in range ["
708 << static_cast<int64_t>(bytecode::kMinSupportedVersion) << ", "
709 << static_cast<int64_t>(bytecode::kVersion) << ']';
710 emitter.emitVarInt(value: config.bytecodeVersion);
711
712 // Emit the producer.
713 emitter.emitNulTerminatedString(str: config.producer);
714
715 // Emit the dialect section.
716 writeDialectSection(emitter);
717
718 // Emit the attributes and types section.
719 writeAttrTypeSection(emitter);
720
721 // Emit the IR section.
722 if (failed(result: writeIRSection(emitter, op: rootOp)))
723 return failure();
724
725 // Emit the resources section.
726 writeResourceSection(op: rootOp, emitter);
727
728 // Emit the string section.
729 writeStringSection(emitter);
730
731 // Emit the properties section.
732 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding)
733 writePropertiesSection(emitter);
734 else if (!propertiesSection.empty())
735 return rootOp->emitError(
736 message: "unexpected properties emitted incompatible with bytecode <5");
737
738 // Write the generated bytecode to the provided output stream.
739 emitter.writeTo(os);
740
741 return success();
742}
743
744//===----------------------------------------------------------------------===//
745// Dialects
746
747/// Write the given entries in contiguous groups with the same parent dialect.
748/// Each dialect sub-group is encoded with the parent dialect and number of
749/// elements, followed by the encoding for the entries. The given callback is
750/// invoked to encode each individual entry.
751template <typename EntriesT, typename EntryCallbackT>
752static void writeDialectGrouping(EncodingEmitter &emitter, EntriesT &&entries,
753 EntryCallbackT &&callback) {
754 for (auto it = entries.begin(), e = entries.end(); it != e;) {
755 auto groupStart = it++;
756
757 // Find the end of the group that shares the same parent dialect.
758 DialectNumbering *currentDialect = groupStart->dialect;
759 it = std::find_if(it, e, [&](const auto &entry) {
760 return entry.dialect != currentDialect;
761 });
762
763 // Emit the dialect and number of elements.
764 emitter.emitVarInt(value: currentDialect->number);
765 emitter.emitVarInt(value: std::distance(groupStart, it));
766
767 // Emit the entries within the group.
768 for (auto &entry : llvm::make_range(groupStart, it))
769 callback(entry);
770 }
771}
772
773void BytecodeWriter::writeDialectSection(EncodingEmitter &emitter) {
774 EncodingEmitter dialectEmitter;
775
776 // Emit the referenced dialects.
777 auto dialects = numberingState.getDialects();
778 dialectEmitter.emitVarInt(value: llvm::size(Range&: dialects));
779 for (DialectNumbering &dialect : dialects) {
780 // Write the string section and get the ID.
781 size_t nameID = stringSection.insert(str: dialect.name);
782
783 if (config.bytecodeVersion < bytecode::kDialectVersioning) {
784 dialectEmitter.emitVarInt(value: nameID);
785 continue;
786 }
787
788 // Try writing the version to the versionEmitter.
789 EncodingEmitter versionEmitter;
790 if (dialect.interface) {
791 // The writer used when emitting using a custom bytecode encoding.
792 DialectWriter versionWriter(config.bytecodeVersion, versionEmitter,
793 numberingState, stringSection,
794 config.dialectVersionMap);
795 dialect.interface->writeVersion(writer&: versionWriter);
796 }
797
798 // If the version emitter is empty, version is not available. We can encode
799 // this in the dialect ID, so if there is no version, we don't write the
800 // section.
801 size_t versionAvailable = versionEmitter.size() > 0;
802 dialectEmitter.emitVarIntWithFlag(value: nameID, flag: versionAvailable);
803 if (versionAvailable)
804 dialectEmitter.emitSection(code: bytecode::Section::kDialectVersions,
805 emitter: std::move(versionEmitter));
806 }
807
808 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation)
809 dialectEmitter.emitVarInt(value: size(Range: numberingState.getOpNames()));
810
811 // Emit the referenced operation names grouped by dialect.
812 auto emitOpName = [&](OpNameNumbering &name) {
813 size_t stringId = stringSection.insert(str: name.name.stripDialect());
814 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding)
815 dialectEmitter.emitVarInt(value: stringId);
816 else
817 dialectEmitter.emitVarIntWithFlag(value: stringId, flag: name.name.isRegistered());
818 };
819 writeDialectGrouping(emitter&: dialectEmitter, entries: numberingState.getOpNames(), callback&: emitOpName);
820
821 emitter.emitSection(code: bytecode::Section::kDialect, emitter: std::move(dialectEmitter));
822}
823
824//===----------------------------------------------------------------------===//
825// Attributes and Types
826
827void BytecodeWriter::writeAttrTypeSection(EncodingEmitter &emitter) {
828 EncodingEmitter attrTypeEmitter;
829 EncodingEmitter offsetEmitter;
830 offsetEmitter.emitVarInt(value: llvm::size(Range: numberingState.getAttributes()));
831 offsetEmitter.emitVarInt(value: llvm::size(Range: numberingState.getTypes()));
832
833 // A functor used to emit an attribute or type entry.
834 uint64_t prevOffset = 0;
835 auto emitAttrOrType = [&](auto &entry) {
836 auto entryValue = entry.getValue();
837
838 auto emitAttrOrTypeRawImpl = [&]() -> void {
839 RawEmitterOstream(attrTypeEmitter) << entryValue;
840 attrTypeEmitter.emitByte(byte: 0);
841 };
842 auto emitAttrOrTypeImpl = [&]() -> bool {
843 // TODO: We don't currently support custom encoded mutable types and
844 // attributes.
845 if (entryValue.template hasTrait<TypeTrait::IsMutable>() ||
846 entryValue.template hasTrait<AttributeTrait::IsMutable>()) {
847 emitAttrOrTypeRawImpl();
848 return false;
849 }
850
851 DialectWriter dialectWriter(config.bytecodeVersion, attrTypeEmitter,
852 numberingState, stringSection,
853 config.dialectVersionMap);
854 if constexpr (std::is_same_v<std::decay_t<decltype(entryValue)>, Type>) {
855 for (const auto &callback : config.typeWriterCallbacks) {
856 if (succeeded(callback->write(entryValue, dialectWriter)))
857 return true;
858 }
859 if (const BytecodeDialectInterface *interface =
860 entry.dialect->interface) {
861 if (succeeded(interface->writeType(type: entryValue, writer&: dialectWriter)))
862 return true;
863 }
864 } else {
865 for (const auto &callback : config.attributeWriterCallbacks) {
866 if (succeeded(callback->write(entryValue, dialectWriter)))
867 return true;
868 }
869 if (const BytecodeDialectInterface *interface =
870 entry.dialect->interface) {
871 if (succeeded(interface->writeAttribute(attr: entryValue, writer&: dialectWriter)))
872 return true;
873 }
874 }
875
876 // If the entry was not emitted using a callback or a dialect interface,
877 // emit it using the textual format.
878 emitAttrOrTypeRawImpl();
879 return false;
880 };
881
882 bool hasCustomEncoding = emitAttrOrTypeImpl();
883
884 // Record the offset of this entry.
885 uint64_t curOffset = attrTypeEmitter.size();
886 offsetEmitter.emitVarIntWithFlag(value: curOffset - prevOffset, flag: hasCustomEncoding);
887 prevOffset = curOffset;
888 };
889
890 // Emit the attribute and type entries for each dialect.
891 writeDialectGrouping(emitter&: offsetEmitter, entries: numberingState.getAttributes(),
892 callback&: emitAttrOrType);
893 writeDialectGrouping(emitter&: offsetEmitter, entries: numberingState.getTypes(),
894 callback&: emitAttrOrType);
895
896 // Emit the sections to the stream.
897 emitter.emitSection(code: bytecode::Section::kAttrTypeOffset,
898 emitter: std::move(offsetEmitter));
899 emitter.emitSection(code: bytecode::Section::kAttrType, emitter: std::move(attrTypeEmitter));
900}
901
902//===----------------------------------------------------------------------===//
903// Operations
904
905LogicalResult BytecodeWriter::writeBlock(EncodingEmitter &emitter,
906 Block *block) {
907 ArrayRef<BlockArgument> args = block->getArguments();
908 bool hasArgs = !args.empty();
909
910 // Emit the number of operations in this block, and if it has arguments. We
911 // use the low bit of the operation count to indicate if the block has
912 // arguments.
913 unsigned numOps = numberingState.getOperationCount(block);
914 emitter.emitVarIntWithFlag(value: numOps, flag: hasArgs);
915
916 // Emit the arguments of the block.
917 if (hasArgs) {
918 emitter.emitVarInt(value: args.size());
919 for (BlockArgument arg : args) {
920 Location argLoc = arg.getLoc();
921 if (config.bytecodeVersion >= bytecode::kElideUnknownBlockArgLocation) {
922 emitter.emitVarIntWithFlag(numberingState.getNumber(arg.getType()),
923 !isa<UnknownLoc>(argLoc));
924 if (!isa<UnknownLoc>(argLoc))
925 emitter.emitVarInt(value: numberingState.getNumber(attr: argLoc));
926 } else {
927 emitter.emitVarInt(value: numberingState.getNumber(type: arg.getType()));
928 emitter.emitVarInt(value: numberingState.getNumber(attr: argLoc));
929 }
930 }
931 if (config.bytecodeVersion >= bytecode::kUseListOrdering) {
932 uint64_t maskOffset = emitter.size();
933 uint8_t encodingMask = 0;
934 emitter.emitByte(byte: 0);
935 writeUseListOrders(emitter, opEncodingMask&: encodingMask, range: args);
936 if (encodingMask)
937 emitter.patchByte(offset: maskOffset, value: encodingMask);
938 }
939 }
940
941 // Emit the operations within the block.
942 for (Operation &op : *block)
943 if (failed(result: writeOp(emitter, op: &op)))
944 return failure();
945 return success();
946}
947
948LogicalResult BytecodeWriter::writeOp(EncodingEmitter &emitter, Operation *op) {
949 emitter.emitVarInt(value: numberingState.getNumber(opName: op->getName()));
950
951 // Emit a mask for the operation components. We need to fill this in later
952 // (when we actually know what needs to be emitted), so emit a placeholder for
953 // now.
954 uint64_t maskOffset = emitter.size();
955 uint8_t opEncodingMask = 0;
956 emitter.emitByte(byte: 0);
957
958 // Emit the location for this operation.
959 emitter.emitVarInt(value: numberingState.getNumber(attr: op->getLoc()));
960
961 // Emit the attributes of this operation.
962 DictionaryAttr attrs = op->getDiscardableAttrDictionary();
963 // Allow deployment to version <kNativePropertiesEncoding by merging inherent
964 // attribute with the discardable ones. We should fail if there are any
965 // conflicts. When properties are not used by the op, also store everything as
966 // attributes.
967 if (config.bytecodeVersion < bytecode::kNativePropertiesEncoding ||
968 !op->getPropertiesStorage()) {
969 attrs = op->getAttrDictionary();
970 }
971 if (!attrs.empty()) {
972 opEncodingMask |= bytecode::OpEncodingMask::kHasAttrs;
973 emitter.emitVarInt(value: numberingState.getNumber(attrs));
974 }
975
976 // Emit the properties of this operation, for now we still support deployment
977 // to version <kNativePropertiesEncoding.
978 if (config.bytecodeVersion >= bytecode::kNativePropertiesEncoding) {
979 std::optional<ssize_t> propertiesId = propertiesSection.emit(op);
980 if (propertiesId.has_value()) {
981 opEncodingMask |= bytecode::OpEncodingMask::kHasProperties;
982 emitter.emitVarInt(value: *propertiesId);
983 }
984 }
985
986 // Emit the result types of the operation.
987 if (unsigned numResults = op->getNumResults()) {
988 opEncodingMask |= bytecode::OpEncodingMask::kHasResults;
989 emitter.emitVarInt(value: numResults);
990 for (Type type : op->getResultTypes())
991 emitter.emitVarInt(value: numberingState.getNumber(type));
992 }
993
994 // Emit the operands of the operation.
995 if (unsigned numOperands = op->getNumOperands()) {
996 opEncodingMask |= bytecode::OpEncodingMask::kHasOperands;
997 emitter.emitVarInt(value: numOperands);
998 for (Value operand : op->getOperands())
999 emitter.emitVarInt(value: numberingState.getNumber(value: operand));
1000 }
1001
1002 // Emit the successors of the operation.
1003 if (unsigned numSuccessors = op->getNumSuccessors()) {
1004 opEncodingMask |= bytecode::OpEncodingMask::kHasSuccessors;
1005 emitter.emitVarInt(value: numSuccessors);
1006 for (Block *successor : op->getSuccessors())
1007 emitter.emitVarInt(value: numberingState.getNumber(block: successor));
1008 }
1009
1010 // Emit the use-list orders to bytecode, so we can reconstruct the same order
1011 // at parsing.
1012 if (config.bytecodeVersion >= bytecode::kUseListOrdering)
1013 writeUseListOrders(emitter, opEncodingMask, range: ValueRange(op->getResults()));
1014
1015 // Check for regions.
1016 unsigned numRegions = op->getNumRegions();
1017 if (numRegions)
1018 opEncodingMask |= bytecode::OpEncodingMask::kHasInlineRegions;
1019
1020 // Update the mask for the operation.
1021 emitter.patchByte(offset: maskOffset, value: opEncodingMask);
1022
1023 // With the mask emitted, we can now emit the regions of the operation. We do
1024 // this after mask emission to avoid offset complications that may arise by
1025 // emitting the regions first (e.g. if the regions are huge, backpatching the
1026 // op encoding mask is more annoying).
1027 if (numRegions) {
1028 bool isIsolatedFromAbove = numberingState.isIsolatedFromAbove(op);
1029 emitter.emitVarIntWithFlag(value: numRegions, flag: isIsolatedFromAbove);
1030
1031 // If the region is not isolated from above, or we are emitting bytecode
1032 // targeting version <kLazyLoading, we don't use a section.
1033 if (isIsolatedFromAbove &&
1034 config.bytecodeVersion >= bytecode::kLazyLoading) {
1035 EncodingEmitter regionEmitter;
1036 if (failed(result: writeRegions(emitter&: regionEmitter, regions: op->getRegions())))
1037 return failure();
1038 emitter.emitSection(code: bytecode::Section::kIR, emitter: std::move(regionEmitter));
1039
1040 } else if (failed(result: writeRegions(emitter, regions: op->getRegions()))) {
1041 return failure();
1042 }
1043 }
1044 return success();
1045}
1046
1047void BytecodeWriter::writeUseListOrders(EncodingEmitter &emitter,
1048 uint8_t &opEncodingMask,
1049 ValueRange range) {
1050 // Loop over the results and store the use-list order per result index.
1051 DenseMap<unsigned, llvm::SmallVector<unsigned>> map;
1052 for (auto item : llvm::enumerate(First&: range)) {
1053 auto value = item.value();
1054 // No need to store a custom use-list order if the result does not have
1055 // multiple uses.
1056 if (value.use_empty() || value.hasOneUse())
1057 continue;
1058
1059 // For each result, assemble the list of pairs (use-list-index,
1060 // global-value-index). While doing so, detect if the global-value-index is
1061 // already ordered with respect to the use-list-index.
1062 bool alreadyOrdered = true;
1063 auto &firstUse = *value.use_begin();
1064 uint64_t prevID = bytecode::getUseID(
1065 val&: firstUse, ownerID: numberingState.getNumber(op: firstUse.getOwner()));
1066 llvm::SmallVector<std::pair<unsigned, uint64_t>> useListPairs(
1067 {{0, prevID}});
1068
1069 for (auto use : llvm::drop_begin(RangeOrContainer: llvm::enumerate(First: value.getUses()))) {
1070 uint64_t currentID = bytecode::getUseID(
1071 val&: use.value(), ownerID: numberingState.getNumber(op: use.value().getOwner()));
1072 // The use-list order achieved when building the IR at parsing always
1073 // pushes new uses on front. Hence, if the order by unique ID is
1074 // monotonically decreasing, a roundtrip to bytecode preserves such order.
1075 alreadyOrdered &= (prevID > currentID);
1076 useListPairs.push_back(Elt: {use.index(), currentID});
1077 prevID = currentID;
1078 }
1079
1080 // Do not emit if the order is already sorted.
1081 if (alreadyOrdered)
1082 continue;
1083
1084 // Sort the use indices by the unique ID indices in descending order.
1085 std::sort(
1086 first: useListPairs.begin(), last: useListPairs.end(),
1087 comp: [](auto elem1, auto elem2) { return elem1.second > elem2.second; });
1088
1089 map.try_emplace(Key: item.index(), Args: llvm::map_range(C&: useListPairs, F: [](auto elem) {
1090 return elem.first;
1091 }));
1092 }
1093
1094 if (map.empty())
1095 return;
1096
1097 opEncodingMask |= bytecode::OpEncodingMask::kHasUseListOrders;
1098 // Emit the number of results that have a custom use-list order if the number
1099 // of results is greater than one.
1100 if (range.size() != 1)
1101 emitter.emitVarInt(value: map.size());
1102
1103 for (const auto &item : map) {
1104 auto resultIdx = item.getFirst();
1105 auto useListOrder = item.getSecond();
1106
1107 // Compute the number of uses that are actually shuffled. If those are less
1108 // than half of the total uses, encoding the index pair `(src, dst)` is more
1109 // space efficient.
1110 size_t shuffledElements =
1111 llvm::count_if(Range: llvm::enumerate(First&: useListOrder),
1112 P: [](auto item) { return item.index() != item.value(); });
1113 bool indexPairEncoding = shuffledElements < (useListOrder.size() / 2);
1114
1115 // For single result, we don't need to store the result index.
1116 if (range.size() != 1)
1117 emitter.emitVarInt(value: resultIdx);
1118
1119 if (indexPairEncoding) {
1120 emitter.emitVarIntWithFlag(value: shuffledElements * 2, flag: indexPairEncoding);
1121 for (auto pair : llvm::enumerate(First&: useListOrder)) {
1122 if (pair.index() != pair.value()) {
1123 emitter.emitVarInt(value: pair.value());
1124 emitter.emitVarInt(value: pair.index());
1125 }
1126 }
1127 } else {
1128 emitter.emitVarIntWithFlag(value: useListOrder.size(), flag: indexPairEncoding);
1129 for (const auto &index : useListOrder)
1130 emitter.emitVarInt(value: index);
1131 }
1132 }
1133}
1134
1135LogicalResult BytecodeWriter::writeRegion(EncodingEmitter &emitter,
1136 Region *region) {
1137 // If the region is empty, we only need to emit the number of blocks (which is
1138 // zero).
1139 if (region->empty()) {
1140 emitter.emitVarInt(/*numBlocks*/ value: 0);
1141 return success();
1142 }
1143
1144 // Emit the number of blocks and values within the region.
1145 unsigned numBlocks, numValues;
1146 std::tie(args&: numBlocks, args&: numValues) = numberingState.getBlockValueCount(region);
1147 emitter.emitVarInt(value: numBlocks);
1148 emitter.emitVarInt(value: numValues);
1149
1150 // Emit the blocks within the region.
1151 for (Block &block : *region)
1152 if (failed(result: writeBlock(emitter, block: &block)))
1153 return failure();
1154 return success();
1155}
1156
1157LogicalResult BytecodeWriter::writeIRSection(EncodingEmitter &emitter,
1158 Operation *op) {
1159 EncodingEmitter irEmitter;
1160
1161 // Write the IR section the same way as a block with no arguments. Note that
1162 // the low-bit of the operation count for a block is used to indicate if the
1163 // block has arguments, which in this case is always false.
1164 irEmitter.emitVarIntWithFlag(/*numOps*/ value: 1, /*hasArgs*/ flag: false);
1165
1166 // Emit the operations.
1167 if (failed(result: writeOp(emitter&: irEmitter, op)))
1168 return failure();
1169
1170 emitter.emitSection(code: bytecode::Section::kIR, emitter: std::move(irEmitter));
1171 return success();
1172}
1173
1174//===----------------------------------------------------------------------===//
1175// Resources
1176
1177namespace {
1178/// This class represents a resource builder implementation for the MLIR
1179/// bytecode format.
1180class ResourceBuilder : public AsmResourceBuilder {
1181public:
1182 using PostProcessFn = function_ref<void(StringRef, AsmResourceEntryKind)>;
1183
1184 ResourceBuilder(EncodingEmitter &emitter, StringSectionBuilder &stringSection,
1185 PostProcessFn postProcessFn, bool shouldElideData)
1186 : emitter(emitter), stringSection(stringSection),
1187 postProcessFn(postProcessFn), shouldElideData(shouldElideData) {}
1188 ~ResourceBuilder() override = default;
1189
1190 void buildBlob(StringRef key, ArrayRef<char> data,
1191 uint32_t dataAlignment) final {
1192 if (!shouldElideData)
1193 emitter.emitOwnedBlobAndAlignment(data, alignment: dataAlignment);
1194 postProcessFn(key, AsmResourceEntryKind::Blob);
1195 }
1196 void buildBool(StringRef key, bool data) final {
1197 if (!shouldElideData)
1198 emitter.emitByte(byte: data);
1199 postProcessFn(key, AsmResourceEntryKind::Bool);
1200 }
1201 void buildString(StringRef key, StringRef data) final {
1202 if (!shouldElideData)
1203 emitter.emitVarInt(value: stringSection.insert(str: data));
1204 postProcessFn(key, AsmResourceEntryKind::String);
1205 }
1206
1207private:
1208 EncodingEmitter &emitter;
1209 StringSectionBuilder &stringSection;
1210 PostProcessFn postProcessFn;
1211 bool shouldElideData = false;
1212};
1213} // namespace
1214
1215void BytecodeWriter::writeResourceSection(Operation *op,
1216 EncodingEmitter &emitter) {
1217 EncodingEmitter resourceEmitter;
1218 EncodingEmitter resourceOffsetEmitter;
1219 uint64_t prevOffset = 0;
1220 SmallVector<std::tuple<StringRef, AsmResourceEntryKind, uint64_t>>
1221 curResourceEntries;
1222
1223 // Functor used to process the offset for a resource of `kind` defined by
1224 // 'key'.
1225 auto appendResourceOffset = [&](StringRef key, AsmResourceEntryKind kind) {
1226 uint64_t curOffset = resourceEmitter.size();
1227 curResourceEntries.emplace_back(Args&: key, Args&: kind, Args: curOffset - prevOffset);
1228 prevOffset = curOffset;
1229 };
1230
1231 // Functor used to emit a resource group defined by 'key'.
1232 auto emitResourceGroup = [&](uint64_t key) {
1233 resourceOffsetEmitter.emitVarInt(value: key);
1234 resourceOffsetEmitter.emitVarInt(value: curResourceEntries.size());
1235 for (auto [key, kind, size] : curResourceEntries) {
1236 resourceOffsetEmitter.emitVarInt(value: stringSection.insert(str: key));
1237 resourceOffsetEmitter.emitVarInt(value: size);
1238 resourceOffsetEmitter.emitByte(byte: kind);
1239 }
1240 };
1241
1242 // Builder used to emit resources.
1243 ResourceBuilder entryBuilder(resourceEmitter, stringSection,
1244 appendResourceOffset,
1245 config.shouldElideResourceData);
1246
1247 // Emit the external resource entries.
1248 resourceOffsetEmitter.emitVarInt(value: config.externalResourcePrinters.size());
1249 for (const auto &printer : config.externalResourcePrinters) {
1250 curResourceEntries.clear();
1251 printer->buildResources(op, builder&: entryBuilder);
1252 emitResourceGroup(stringSection.insert(str: printer->getName()));
1253 }
1254
1255 // Emit the dialect resource entries.
1256 for (DialectNumbering &dialect : numberingState.getDialects()) {
1257 if (!dialect.asmInterface)
1258 continue;
1259 curResourceEntries.clear();
1260 dialect.asmInterface->buildResources(op, referencedResources: dialect.resources, builder&: entryBuilder);
1261
1262 // Emit the declaration resources for this dialect, these didn't get emitted
1263 // by the interface. These resources don't have data attached, so just use a
1264 // "blob" kind as a placeholder.
1265 for (const auto &resource : dialect.resourceMap)
1266 if (resource.second->isDeclaration)
1267 appendResourceOffset(resource.first, AsmResourceEntryKind::Blob);
1268
1269 // Emit the resource group for this dialect.
1270 if (!curResourceEntries.empty())
1271 emitResourceGroup(dialect.number);
1272 }
1273
1274 // If we didn't emit any resource groups, elide the resource sections.
1275 if (resourceOffsetEmitter.size() == 0)
1276 return;
1277
1278 emitter.emitSection(code: bytecode::Section::kResourceOffset,
1279 emitter: std::move(resourceOffsetEmitter));
1280 emitter.emitSection(code: bytecode::Section::kResource, emitter: std::move(resourceEmitter));
1281}
1282
1283//===----------------------------------------------------------------------===//
1284// Strings
1285
1286void BytecodeWriter::writeStringSection(EncodingEmitter &emitter) {
1287 EncodingEmitter stringEmitter;
1288 stringSection.write(emitter&: stringEmitter);
1289 emitter.emitSection(code: bytecode::Section::kString, emitter: std::move(stringEmitter));
1290}
1291
1292//===----------------------------------------------------------------------===//
1293// Properties
1294
1295void BytecodeWriter::writePropertiesSection(EncodingEmitter &emitter) {
1296 EncodingEmitter propertiesEmitter;
1297 propertiesSection.write(emitter&: propertiesEmitter);
1298 emitter.emitSection(code: bytecode::Section::kProperties,
1299 emitter: std::move(propertiesEmitter));
1300}
1301
1302//===----------------------------------------------------------------------===//
1303// Entry Points
1304//===----------------------------------------------------------------------===//
1305
1306LogicalResult mlir::writeBytecodeToFile(Operation *op, raw_ostream &os,
1307 const BytecodeWriterConfig &config) {
1308 BytecodeWriter writer(op, config);
1309 return writer.write(rootOp: op, os);
1310}
1311

source code of mlir/lib/Bytecode/Writer/BytecodeWriter.cpp