DecoderEmitter.cpp source code [llvm/utils/TableGen/DecoderEmitter.cpp]

1	//===---------------- DecoderEmitter.cpp - Decoder Generator --------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// It contains the tablegen backend that emits the decoder functions for
10	// targets with fixed/variable length instruction set.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "Common/CodeGenHwModes.h"
15	#include "Common/CodeGenInstruction.h"
16	#include "Common/CodeGenTarget.h"
17	#include "Common/InfoByHwMode.h"
18	#include "Common/VarLenCodeEmitterGen.h"
19	#include "TableGenBackends.h"
20	#include "llvm/ADT/APInt.h"
21	#include "llvm/ADT/ArrayRef.h"
22	#include "llvm/ADT/CachedHashString.h"
23	#include "llvm/ADT/STLExtras.h"
24	#include "llvm/ADT/SetVector.h"
25	#include "llvm/ADT/SmallBitVector.h"
26	#include "llvm/ADT/SmallString.h"
27	#include "llvm/ADT/Statistic.h"
28	#include "llvm/ADT/StringExtras.h"
29	#include "llvm/ADT/StringRef.h"
30	#include "llvm/MC/MCDecoderOps.h"
31	#include "llvm/Support/Casting.h"
32	#include "llvm/Support/CommandLine.h"
33	#include "llvm/Support/Debug.h"
34	#include "llvm/Support/ErrorHandling.h"
35	#include "llvm/Support/FormattedStream.h"
36	#include "llvm/Support/LEB128.h"
37	#include "llvm/Support/raw_ostream.h"
38	#include "llvm/TableGen/Error.h"
39	#include "llvm/TableGen/Record.h"
40	#include <algorithm>
41	#include <cassert>
42	#include <cstddef>
43	#include <cstdint>
44	#include <map>
45	#include <memory>
46	#include <set>
47	#include <string>
48	#include <utility>
49	#include <vector>
50
51	using namespace llvm;
52
53	#define DEBUG_TYPE "decoder-emitter"
54
55	extern cl::OptionCategory DisassemblerEmitterCat;
56
57	enum SuppressLevel {
58	SUPPRESSION_DISABLE,
59	SUPPRESSION_LEVEL1,
60	SUPPRESSION_LEVEL2
61	};
62
63	cl::opt<SuppressLevel> DecoderEmitterSuppressDuplicates(
64	"suppress-per-hwmode-duplicates",
65	cl::desc ("Suppress duplication of instrs into per-HwMode decoder tables"),
66	cl::values(
67	clEnumValN(
68	SUPPRESSION_DISABLE, "O0",
69	"Do not prevent DecoderTable duplications caused by HwModes"),
70	clEnumValN(
71	SUPPRESSION_LEVEL1, "O1",
72	"Remove duplicate DecoderTable entries generated due to HwModes"),
73	clEnumValN(
74	SUPPRESSION_LEVEL2, "O2",
75	"Extract HwModes-specific instructions into new DecoderTables, "
76	"significantly reducing Table Duplications")),
77	cl::init(Val: SUPPRESSION_DISABLE), cl::cat (DisassemblerEmitterCat));
78
79	namespace {
80
81	STATISTIC(NumEncodings, "Number of encodings considered");
82	STATISTIC(NumEncodingsLackingDisasm,
83	"Number of encodings without disassembler info");
84	STATISTIC(NumInstructions, "Number of instructions considered");
85	STATISTIC(NumEncodingsSupported, "Number of encodings supported");
86	STATISTIC(NumEncodingsOmitted, "Number of encodings omitted");
87
88	struct EncodingField {
89	unsigned Base, Width, Offset;
90	EncodingField(unsigned B, unsigned W, unsigned O)
91	: Base(B), Width(W), Offset(O) {}
92	};
93
94	struct OperandInfo {
95	std::vector<EncodingField> Fields;
96	std::string Decoder;
97	bool HasCompleteDecoder;
98	uint64_t InitValue;
99
100	OperandInfo(std::string D, bool HCD)
101	: Decoder (std::move(D)), HasCompleteDecoder(HCD), InitValue(`0`) {}
102
103	void addField(unsigned Base, unsigned Width, unsigned Offset) {
104	Fields.push_back(x: EncodingField (Base, Width, Offset));
105	}
106
107	unsigned numFields() const { return Fields.size(); }
108
109	typedef std::vector<EncodingField>::const_iterator const_iterator;
110
111	const_iterator begin() const { return Fields.begin(); }
112	const_iterator end() const { return Fields.end(); }
113	};
114
115	typedef std::vector<uint8_t> DecoderTable;
116	typedef uint32_t DecoderFixup;
117	typedef std::vector<DecoderFixup> FixupList;
118	typedef std::vector<FixupList> FixupScopeList;
119	typedef SmallSetVector<CachedHashString, `16`> PredicateSet;
120	typedef SmallSetVector<CachedHashString, `16`> DecoderSet;
121	struct DecoderTableInfo {
122	DecoderTable Table;
123	FixupScopeList FixupStack;
124	PredicateSet Predicates;
125	DecoderSet Decoders;
126	};
127
128	struct EncodingAndInst {
129	const Record *EncodingDef;
130	const CodeGenInstruction *Inst;
131	StringRef HwModeName;
132
133	EncodingAndInst(const Record EncodingDef, const* CodeGenInstruction *Inst,
134	StringRef HwModeName = "")
135	: EncodingDef(EncodingDef), Inst(Inst), HwModeName (HwModeName) {}
136	};
137
138	struct EncodingIDAndOpcode {
139	unsigned EncodingID;
140	unsigned Opcode;
141
142	EncodingIDAndOpcode() : EncodingID(`0`), Opcode(`0`) {}
143	EncodingIDAndOpcode(unsigned EncodingID, unsigned Opcode)
144	: EncodingID(EncodingID), Opcode(Opcode) {}
145	};
146
147	using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
148	using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
149
150	raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) {
151	if (Value.EncodingDef != Value.Inst->TheDef)
152	OS << Value.EncodingDef->getName() << ":";
153	OS << Value.Inst->TheDef->getName();
154	return OS;
155	}
156
157	class DecoderEmitter {
158	RecordKeeper &RK;
159	std::vector<EncodingAndInst> NumberedEncodings;
160
161	public:
162	DecoderEmitter(RecordKeeper &R, std::string PredicateNamespace)
163	: RK(R), Target (R), PredicateNamespace (std::move(PredicateNamespace)) {}
164
165	// Emit the decoder state machine table.
166	void emitTable(formatted_raw_ostream &o, DecoderTable &Table,
167	unsigned Indentation, unsigned BitWidth, StringRef Namespace,
168	const EncodingIDsVec &EncodingIDs) const;
169	void emitInstrLenTable(formatted_raw_ostream &OS,
170	std::vector<unsigned> &InstrLen) const;
171	void emitPredicateFunction(formatted_raw_ostream &OS,
172	PredicateSet &Predicates,
173	unsigned Indentation) const;
174	void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
175	unsigned Indentation) const;
176
177	// run - Output the code emitter
178	void run(raw_ostream &o);
179
180	private:
181	CodeGenTarget Target;
182
183	public:
184	std::string PredicateNamespace;
185	};
186
187	} // end anonymous namespace
188
189	// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
190	// for a bit value.
191	//
192	// BIT_UNFILTERED is used as the init value for a filter position. It is used
193	// only for filter processings.
194	typedef enum {
195	BIT_TRUE, // '1'
196	BIT_FALSE, // '0'
197	BIT_UNSET, // '?'
198	BIT_UNFILTERED // unfiltered
199	} bit_value_t;
200
201	static bool ValueSet(bit_value_t V) {
202	return (V == BIT_TRUE \|\| V == BIT_FALSE);
203	}
204
205	static bool ValueNotSet(bit_value_t V) { return (V == BIT_UNSET); }
206
207	static int Value(bit_value_t V) {
208	return ValueNotSet(V) ? -`1` : (V == BIT_FALSE ? `0` : `1`);
209	}
210
211	static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
212	if (BitInit *bit = dyn_cast<BitInit>(Val: bits.getBit(Bit: index)))
213	return bit->getValue() ? BIT_TRUE : BIT_FALSE;
214
215	// The bit is uninitialized.
216	return BIT_UNSET;
217	}
218
219	// Prints the bit value for each position.
220	static void dumpBits(raw_ostream &o, const BitsInit &bits) {
221	for (unsigned index = bits.getNumBits(); index > `0`; --index) {
222	switch (bitFromBits(bits, index: index - `1`)) {
223	case BIT_TRUE:
224	o << "1";
225	break;
226	case BIT_FALSE:
227	o << "0";
228	break;
229	case BIT_UNSET:
230	o << "_";
231	break;
232	default:
233	llvm_unreachable("unexpected return value from bitFromBits");
234	}
235	}
236	}
237
238	static BitsInit &getBitsField(const Record &def, StringRef str) {
239	const RecordVal *RV = def.getValue(Name: str);
240	if (BitsInit *Bits = dyn_cast<BitsInit>(Val: RV->getValue()))
241	return *Bits;
242
243	// variable length instruction
244	VarLenInst VLI = VarLenInst (cast<DagInit>(Val: RV->getValue()), RV);
245	SmallVector<Init *, `16`> Bits;
246
247	for (const auto &SI : VLI) {
248	if (const BitsInit *BI = dyn_cast<BitsInit>(Val: SI.Value)) {
249	for (unsigned Idx = `0U`; Idx < BI->getNumBits(); ++Idx) {
250	Bits.push_back(Elt: BI->getBit(Bit: Idx));
251	}
252	} else if (const BitInit *BI = dyn_cast<BitInit>(Val: SI.Value)) {
253	Bits.push_back(Elt: const_cast<BitInit *>(BI));
254	} else {
255	for (unsigned Idx = `0U`; Idx < SI.BitWidth; ++Idx)
256	Bits.push_back(Elt: UnsetInit::get(RK&: def.getRecords()));
257	}
258	}
259
260	return *BitsInit::get(RK&: def.getRecords(), Range: Bits);
261	}
262
263	// Representation of the instruction to work on.
264	typedef std::vector<bit_value_t> insn_t;
265
266	namespace {
267
268	static const uint64_t NO_FIXED_SEGMENTS_SENTINEL = -`1ULL`;
269
270	class FilterChooser;
271
272	/// Filter - Filter works with FilterChooser to produce the decoding tree for
273	/// the ISA.
274	///
275	/// It is useful to think of a Filter as governing the switch stmts of the
276	/// decoding tree in a certain level. Each case stmt delegates to an inferior
277	/// FilterChooser to decide what further decoding logic to employ, or in another
278	/// words, what other remaining bits to look at. The FilterChooser eventually
279	/// chooses a best Filter to do its job.
280	///
281	/// This recursive scheme ends when the number of Opcodes assigned to the
282	/// FilterChooser becomes 1 or if there is a conflict. A conflict happens when
283	/// the Filter/FilterChooser combo does not know how to distinguish among the
284	/// Opcodes assigned.
285	///
286	/// An example of a conflict is
287	///
288	/// Conflict:
289	/// 111101000.00........00010000....
290	/// 111101000.00........0001........
291	/// 1111010...00........0001........
292	/// 1111010...00....................
293	/// 1111010.........................
294	/// 1111............................
295	/// ................................
296	/// VST4q8a 111101000_00________00010000____
297	/// VST4q8b 111101000_00________00010000____
298	///
299	/// The Debug output shows the path that the decoding tree follows to reach the
300	/// the conclusion that there is a conflict. VST4q8a is a vst4 to double-spaced
301	/// even registers, while VST4q8b is a vst4 to double-spaced odd registers.
302	///
303	/// The encoding info in the .td files does not specify this meta information,
304	/// which could have been used by the decoder to resolve the conflict. The
305	/// decoder could try to decode the even/odd register numbering and assign to
306	/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
307	/// version and return the Opcode since the two have the same Asm format string.
308	class Filter {
309	protected:
310	const FilterChooser
311	Owner; // points to the FilterChooser who owns this filter*
312	unsigned StartBit; // the starting bit position
313	unsigned NumBits; // number of bits to filter
314	bool Mixed; // a mixed region contains both set and unset bits
315
316	// Map of well-known segment value to the set of uid's with that value.
317	std::map<uint64_t, std::vector<EncodingIDAndOpcode>> FilteredInstructions;
318
319	// Set of uid's with non-constant segment values.
320	std::vector<EncodingIDAndOpcode> VariableInstructions;
321
322	// Map of well-known segment value to its delegate.
323	std::map<uint64_t, std::unique_ptr<const FilterChooser>> FilterChooserMap;
324
325	// Number of instructions which fall under FilteredInstructions category.
326	unsigned NumFiltered;
327
328	// Keeps track of the last opcode in the filtered bucket.
329	EncodingIDAndOpcode LastOpcFiltered;
330
331	public:
332	Filter(Filter &&f);
333	Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
334
335	~Filter() = default;
336
337	unsigned getNumFiltered() const { return NumFiltered; }
338
339	EncodingIDAndOpcode getSingletonOpc() const {
340	assert(NumFiltered == `1`);
341	return LastOpcFiltered;
342	}
343
344	// Return the filter chooser for the group of instructions without constant
345	// segment values.
346	const FilterChooser &getVariableFC() const {
347	assert(NumFiltered == `1`);
348	assert(FilterChooserMap.size() == `1`);
349	return *(FilterChooserMap.find(x: NO_FIXED_SEGMENTS_SENTINEL)->second);
350	}
351
352	// Divides the decoding task into sub tasks and delegates them to the
353	// inferior FilterChooser's.
354	//
355	// A special case arises when there's only one entry in the filtered
356	// instructions. In order to unambiguously decode the singleton, we need to
357	// match the remaining undecoded encoding bits against the singleton.
358	void recurse();
359
360	// Emit table entries to decode instructions given a segment or segments of
361	// bits.
362	void emitTableEntry(DecoderTableInfo &TableInfo) const;
363
364	// Returns the number of fanout produced by the filter. More fanout implies
365	// the filter distinguishes more categories of instructions.
366	unsigned usefulness() const;
367	}; // end class Filter
368
369	} // end anonymous namespace
370
371	// These are states of our finite state machines used in FilterChooser's
372	// filterProcessor() which produces the filter candidates to use.
373	typedef enum {
374	ATTR_NONE,
375	ATTR_FILTERED,
376	ATTR_ALL_SET,
377	ATTR_ALL_UNSET,
378	ATTR_MIXED
379	} bitAttr_t;
380
381	/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
382	/// in order to perform the decoding of instructions at the current level.
383	///
384	/// Decoding proceeds from the top down. Based on the well-known encoding bits
385	/// of instructions available, FilterChooser builds up the possible Filters that
386	/// can further the task of decoding by distinguishing among the remaining
387	/// candidate instructions.
388	///
389	/// Once a filter has been chosen, it is called upon to divide the decoding task
390	/// into sub-tasks and delegates them to its inferior FilterChoosers for further
391	/// processings.
392	///
393	/// It is useful to think of a Filter as governing the switch stmts of the
394	/// decoding tree. And each case is delegated to an inferior FilterChooser to
395	/// decide what further remaining bits to look at.
396	namespace {
397
398	class FilterChooser {
399	protected:
400	friend class Filter;
401
402	// Vector of codegen instructions to choose our filter.
403	ArrayRef<EncodingAndInst> AllInstructions;
404
405	// Vector of uid's for this filter chooser to work on.
406	// The first member of the pair is the opcode id being decoded, the second is
407	// the opcode id that should be emitted.
408	const std::vector<EncodingIDAndOpcode> &Opcodes;
409
410	// Lookup table for the operand decoding of instructions.
411	const std::map<unsigned, std::vector<OperandInfo>> &Operands;
412
413	// Vector of candidate filters.
414	std::vector<Filter> Filters;
415
416	// Array of bit values passed down from our parent.
417	// Set to all BIT_UNFILTERED's for Parent == NULL.
418	std::vector<bit_value_t> FilterBitValues;
419
420	// Links to the FilterChooser above us in the decoding tree.
421	const FilterChooser *Parent;
422
423	// Index of the best filter from Filters.
424	int BestIndex;
425
426	// Width of instructions
427	unsigned BitWidth;
428
429	// Parent emitter
430	const DecoderEmitter *Emitter;
431
432	public:
433	FilterChooser(ArrayRef<EncodingAndInst> Insts,
434	const std::vector<EncodingIDAndOpcode> &IDs,
435	const std::map<unsigned, std::vector<OperandInfo>> &Ops,
436	unsigned BW, const DecoderEmitter *E)
437	: AllInstructions (Insts), Opcodes(IDs), Operands(Ops),
438	FilterBitValues (BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-`1`),
439	BitWidth(BW), Emitter(E) {
440	doFilter();
441	}
442
443	FilterChooser(ArrayRef<EncodingAndInst> Insts,
444	const std::vector<EncodingIDAndOpcode> &IDs,
445	const std::map<unsigned, std::vector<OperandInfo>> &Ops,
446	const std::vector<bit_value_t> &ParentFilterBitValues,
447	const FilterChooser &parent)
448	: AllInstructions (Insts), Opcodes(IDs), Operands(Ops),
449	FilterBitValues (ParentFilterBitValues), Parent(&parent), BestIndex(-`1`),
450	BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
451	doFilter();
452	}
453
454	FilterChooser(const FilterChooser &) = delete;
455	void operator=(const FilterChooser &) = delete;
456
457	unsigned getBitWidth() const { return BitWidth; }
458
459	protected:
460	// Populates the insn given the uid.
461	void insnWithID(insn_t &Insn, unsigned Opcode) const {
462	const Record *EncodingDef = AllInstructions [Opcode].EncodingDef;
463	BitsInit &Bits = getBitsField(def: *EncodingDef, str: "Inst");
464	Insn.resize(new_size: std::max(a: BitWidth, b: Bits.getNumBits()), x: BIT_UNSET);
465	// We may have a SoftFail bitmask, which specifies a mask where an encoding
466	// may differ from the value in "Inst" and yet still be valid, but the
467	// disassembler should return SoftFail instead of Success.
468	//
469	// This is used for marking UNPREDICTABLE instructions in the ARM world.
470	const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
471	const BitsInit SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr*;
472	for (unsigned i = `0`; i < Bits.getNumBits(); ++i) {
473	if (SFBits && bitFromBits(bits: *SFBits, index: i) == BIT_TRUE)
474	Insn [i] = BIT_UNSET;
475	else
476	Insn [i] = bitFromBits(bits: Bits, index: i);
477	}
478	}
479
480	// Emit the name of the encoding/instruction pair.
481	void emitNameWithID(raw_ostream &OS, unsigned Opcode) const {
482	const Record *EncodingDef = AllInstructions [Opcode].EncodingDef;
483	const Record *InstDef = AllInstructions [Opcode].Inst->TheDef;
484	if (EncodingDef != InstDef)
485	OS << EncodingDef->getName() << ":";
486	OS << InstDef->getName();
487	}
488
489	// Populates the field of the insn given the start position and the number of
490	// consecutive bits to scan for.
491	//
492	// Returns a pair of values (indicator, field), where the indicator is false
493	// if there exists any uninitialized bit value in the range and true if all
494	// bits are well-known. The second value is the potentially populated field.
495	std::pair<bool, uint64_t> fieldFromInsn(const insn_t &Insn, unsigned StartBit,
496	unsigned NumBits) const;
497
498	/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
499	/// filter array as a series of chars.
500	void dumpFilterArray(raw_ostream &o,
501	const std::vector<bit_value_t> &filter) const;
502
503	/// dumpStack - dumpStack traverses the filter chooser chain and calls
504	/// dumpFilterArray on each filter chooser up to the top level one.
505	void dumpStack(raw_ostream &o, const char prefix) const*;
506
507	Filter &bestFilter() {
508	assert(BestIndex != -`1` && "BestIndex not set");
509	return Filters [BestIndex];
510	}
511
512	bool PositionFiltered(unsigned i) const {
513	return ValueSet(V: FilterBitValues [i]);
514	}
515
516	// Calculates the island(s) needed to decode the instruction.
517	// This returns a lit of undecoded bits of an instructions, for example,
518	// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
519	// decoded bits in order to verify that the instruction matches the Opcode.
520	unsigned getIslands(std::vector<unsigned> &StartBits,
521	std::vector<unsigned> &EndBits,
522	std::vector<uint64_t> &FieldVals,
523	const insn_t &Insn) const;
524
525	// Emits code to check the Predicates member of an instruction are true.
526	// Returns true if predicate matches were emitted, false otherwise.
527	bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
528	unsigned Opc) const;
529	bool emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
530	raw_ostream &OS) const;
531
532	bool doesOpcodeNeedPredicate(unsigned Opc) const;
533	unsigned getPredicateIndex(DecoderTableInfo &TableInfo, StringRef P) const;
534	void emitPredicateTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
535
536	void emitSoftFailTableEntry(DecoderTableInfo &TableInfo, unsigned Opc) const;
537
538	// Emits table entries to decode the singleton.
539	void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
540	EncodingIDAndOpcode Opc) const;
541
542	// Emits code to decode the singleton, and then to decode the rest.
543	void emitSingletonTableEntry(DecoderTableInfo &TableInfo,
544	const Filter &Best) const;
545
546	void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
547	const OperandInfo &OpInfo,
548	bool &OpHasCompleteDecoder) const;
549
550	void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc,
551	bool &HasCompleteDecoder) const;
552	unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
553	bool &HasCompleteDecoder) const;
554
555	// Assign a single filter and run with it.
556	void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
557
558	// reportRegion is a helper function for filterProcessor to mark a region as
559	// eligible for use as a filter region.
560	void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
561	bool AllowMixed);
562
563	// FilterProcessor scans the well-known encoding bits of the instructions and
564	// builds up a list of candidate filters. It chooses the best filter and
565	// recursively descends down the decoding tree.
566	bool filterProcessor(bool AllowMixed, bool Greedy = true);
567
568	// Decides on the best configuration of filter(s) to use in order to decode
569	// the instructions. A conflict of instructions may occur, in which case we
570	// dump the conflict set to the standard error.
571	void doFilter();
572
573	public:
574	// emitTableEntries - Emit state machine entries to decode our share of
575	// instructions.
576	void emitTableEntries(DecoderTableInfo &TableInfo) const;
577	};
578
579	} // end anonymous namespace
580
581	///////////////////////////
582	// //
583	// Filter Implementation //
584	// //
585	///////////////////////////
586
587	Filter::Filter(Filter &&f)
588	: Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
589	FilteredInstructions (std::move(f.FilteredInstructions)),
590	VariableInstructions (std::move(f.VariableInstructions)),
591	FilterChooserMap (std::move(f.FilterChooserMap)),
592	NumFiltered(f.NumFiltered), LastOpcFiltered (f.LastOpcFiltered) {}
593
594	Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
595	bool mixed)
596	: Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
597	assert(StartBit + NumBits - `1` < Owner->BitWidth);
598
599	NumFiltered = `0`;
600	LastOpcFiltered = {`0`, `0`};
601
602	for (const auto &OpcPair : Owner->Opcodes) {
603	insn_t Insn;
604
605	// Populates the insn given the uid.
606	Owner->insnWithID(Insn, Opcode: OpcPair.EncodingID);
607
608	// Scans the segment for possibly well-specified encoding bits.
609	auto [Ok, Field] = Owner->fieldFromInsn(Insn, StartBit, NumBits);
610
611	if (Ok) {
612	// The encoding bits are well-known. Lets add the uid of the
613	// instruction into the bucket keyed off the constant field value.
614	LastOpcFiltered = OpcPair;
615	FilteredInstructions [Field].push_back(x: LastOpcFiltered);
616	++NumFiltered;
617	} else {
618	// Some of the encoding bit(s) are unspecified. This contributes to
619	// one additional member of "Variable" instructions.
620	VariableInstructions.push_back(x: OpcPair);
621	}
622	}
623
624	assert((FilteredInstructions.size() + VariableInstructions.size() > `0`) &&
625	"Filter returns no instruction categories");
626	}
627
628	// Divides the decoding task into sub tasks and delegates them to the
629	// inferior FilterChooser's.
630	//
631	// A special case arises when there's only one entry in the filtered
632	// instructions. In order to unambiguously decode the singleton, we need to
633	// match the remaining undecoded encoding bits against the singleton.
634	void Filter::recurse() {
635	// Starts by inheriting our parent filter chooser's filter bit values.
636	std::vector<bit_value_t> BitValueArray(Owner->FilterBitValues);
637
638	if (!VariableInstructions.empty()) {
639	// Conservatively marks each segment position as BIT_UNSET.
640	for (unsigned bitIndex = `0`; bitIndex < NumBits; ++bitIndex)
641	BitValueArray [StartBit + bitIndex] = BIT_UNSET;
642
643	// Delegates to an inferior filter chooser for further processing on this
644	// group of instructions whose segment values are variable.
645	FilterChooserMap.insert(x: std::pair(
646	NO_FIXED_SEGMENTS_SENTINEL,
647	std::make_unique<FilterChooser>(args: Owner->AllInstructions,
648	args&: VariableInstructions, args: Owner->Operands,
649	args&: BitValueArray, args: *Owner)));
650	}
651
652	// No need to recurse for a singleton filtered instruction.
653	// See also Filter::emit().*
654	if (getNumFiltered() == `1`) {
655	assert(FilterChooserMap.size() == `1`);
656	return;
657	}
658
659	// Otherwise, create sub choosers.
660	for (const auto &Inst : FilteredInstructions) {
661
662	// Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
663	for (unsigned bitIndex = `0`; bitIndex < NumBits; ++bitIndex) {
664	if (Inst.first & (`1ULL` << bitIndex))
665	BitValueArray [StartBit + bitIndex] = BIT_TRUE;
666	else
667	BitValueArray [StartBit + bitIndex] = BIT_FALSE;
668	}
669
670	// Delegates to an inferior filter chooser for further processing on this
671	// category of instructions.
672	FilterChooserMap.insert(
673	x: std::pair(Inst.first, std::make_unique<FilterChooser>(
674	args: Owner->AllInstructions, args: Inst.second,
675	args: Owner->Operands, args&: BitValueArray, args: *Owner)));
676	}
677	}
678
679	static void resolveTableFixups(DecoderTable &Table, const FixupList &Fixups,
680	uint32_t DestIdx) {
681	// Any NumToSkip fixups in the current scope can resolve to the
682	// current location.
683	for (FixupList::const_reverse_iterator I = Fixups.rbegin(), E = Fixups.rend();
684	I != E; ++I) {
685	// Calculate the distance from the byte following the fixup entry byte
686	// to the destination. The Target is calculated from after the 16-bit
687	// NumToSkip entry itself, so subtract two from the displacement here
688	// to account for that.
689	uint32_t FixupIdx = *I;
690	uint32_t Delta = DestIdx - FixupIdx - `3`;
691	// Our NumToSkip entries are 24-bits. Make sure our table isn't too
692	// big.
693	assert(Delta < (`1u` << `24`));
694	Table [FixupIdx] = (uint8_t)Delta;
695	Table [FixupIdx + `1`] = (uint8_t)(Delta >> `8`);
696	Table [FixupIdx + `2`] = (uint8_t)(Delta >> `16`);
697	}
698	}
699
700	// Emit table entries to decode instructions given a segment or segments
701	// of bits.
702	void Filter::emitTableEntry(DecoderTableInfo &TableInfo) const {
703	assert((NumBits < (`1u` << `8`)) && "NumBits overflowed uint8 table entry!");
704	TableInfo.Table.push_back(x: MCD::OPC_ExtractField);
705
706	SmallString<`16`> SBytes;
707	raw_svector_ostream S(SBytes);
708	encodeULEB128(Value: StartBit, OS&: S);
709	TableInfo.Table.insert(position: TableInfo.Table.end(), first: SBytes.begin(), last: SBytes.end());
710	TableInfo.Table.push_back(x: NumBits);
711
712	// A new filter entry begins a new scope for fixup resolution.
713	TableInfo.FixupStack.emplace_back();
714
715	DecoderTable &Table = TableInfo.Table;
716
717	size_t PrevFilter = `0`;
718	bool HasFallthrough = false;
719	for (const auto &Filter : FilterChooserMap) {
720	// Field value -1 implies a non-empty set of variable instructions.
721	// See also recurse().
722	if (Filter.first == NO_FIXED_SEGMENTS_SENTINEL) {
723	HasFallthrough = true;
724
725	// Each scope should always have at least one filter value to check
726	// for.
727	assert(PrevFilter != `0` && "empty filter set!");
728	FixupList &CurScope = TableInfo.FixupStack.back();
729	// Resolve any NumToSkip fixups in the current scope.
730	resolveTableFixups(Table, Fixups: CurScope, DestIdx: Table.size());
731	CurScope.clear();
732	PrevFilter = `0`; // Don't re-process the filter's fallthrough.
733	} else {
734	Table.push_back(x: MCD::OPC_FilterValue);
735	// Encode and emit the value to filter against.
736	uint8_t Buffer[`16`];
737	unsigned Len = encodeULEB128(Value: Filter.first, p: Buffer);
738	Table.insert(position: Table.end(), first: Buffer, last: Buffer + Len);
739	// Reserve space for the NumToSkip entry. We'll backpatch the value
740	// later.
741	PrevFilter = Table.size();
742	Table.push_back(x: `0`);
743	Table.push_back(x: `0`);
744	Table.push_back(x: `0`);
745	}
746
747	// We arrive at a category of instructions with the same segment value.
748	// Now delegate to the sub filter chooser for further decodings.
749	// The case may fallthrough, which happens if the remaining well-known
750	// encoding bits do not match exactly.
751	Filter.second ->emitTableEntries(TableInfo);
752
753	// Now that we've emitted the body of the handler, update the NumToSkip
754	// of the filter itself to be able to skip forward when false. Subtract
755	// two as to account for the width of the NumToSkip field itself.
756	if (PrevFilter) {
757	uint32_t NumToSkip = Table.size() - PrevFilter - `3`;
758	assert(NumToSkip < (`1u` << `24`) &&
759	"disassembler decoding table too large!");
760	Table [PrevFilter] = (uint8_t)NumToSkip;
761	Table [PrevFilter + `1`] = (uint8_t)(NumToSkip >> `8`);
762	Table [PrevFilter + `2`] = (uint8_t)(NumToSkip >> `16`);
763	}
764	}
765
766	// Any remaining unresolved fixups bubble up to the parent fixup scope.
767	assert(TableInfo.FixupStack.size() > `1` && "fixup stack underflow!");
768	FixupScopeList::iterator Source = TableInfo.FixupStack.end() - `1`;
769	FixupScopeList::iterator Dest = Source - `1`;
770	llvm::append_range(C&: Dest, R&: Source);
771	TableInfo.FixupStack.pop_back();
772
773	// If there is no fallthrough, then the final filter should get fixed
774	// up according to the enclosing scope rather than the current position.
775	if (!HasFallthrough)
776	TableInfo.FixupStack.back().push_back(x: PrevFilter);
777	}
778
779	// Returns the number of fanout produced by the filter. More fanout implies
780	// the filter distinguishes more categories of instructions.
781	unsigned Filter::usefulness() const {
782	if (!VariableInstructions.empty())
783	return FilteredInstructions.size();
784	else
785	return FilteredInstructions.size() + `1`;
786	}
787
788	//////////////////////////////////
789	// //
790	// Filterchooser Implementation //
791	// //
792	//////////////////////////////////
793
794	// Emit the decoder state machine table.
795	void DecoderEmitter::emitTable(formatted_raw_ostream &OS, DecoderTable &Table,
796	unsigned Indentation, unsigned BitWidth,
797	StringRef Namespace,
798	const EncodingIDsVec &EncodingIDs) const {
799	// We'll need to be able to map from a decoded opcode into the corresponding
800	// EncodingID for this specific combination of BitWidth and Namespace. This
801	// is used below to index into NumberedEncodings.
802	DenseMap<unsigned, unsigned> OpcodeToEncodingID;
803	OpcodeToEncodingID.reserve(NumEntries: EncodingIDs.size());
804	for (const auto &EI : EncodingIDs)
805	OpcodeToEncodingID [EI.Opcode] = EI.EncodingID;
806
807	OS.indent(NumSpaces: Indentation) << "static const uint8_t DecoderTable" << Namespace
808	<< BitWidth << "[] = {\n";
809
810	Indentation += `2`;
811
812	// Emit ULEB128 encoded value to OS, returning the number of bytes emitted.
813	auto emitULEB128 = [](DecoderTable::const_iterator I,
814	formatted_raw_ostream &OS) {
815	unsigned Len = `0`;
816	while (*I >= `128`) {
817	OS << (unsigned)*I ++ << ", ";
818	Len++;
819	}
820	OS << (unsigned)*I ++ << ", ";
821	return Len + `1`;
822	};
823
824	// Emit 24-bit numtoskip value to OS, returning the NumToSkip value.
825	auto emitNumToSkip = [](DecoderTable::const_iterator I,
826	formatted_raw_ostream &OS) {
827	uint8_t Byte = *I ++;
828	uint32_t NumToSkip = Byte;
829	OS << (unsigned)Byte << ", ";
830	Byte = *I ++;
831	OS << (unsigned)Byte << ", ";
832	NumToSkip \|= Byte << `8`;
833	Byte = *I ++;
834	OS << utostr(X: Byte) << ", ";
835	NumToSkip \|= Byte << `16`;
836	return NumToSkip;
837	};
838
839	// FIXME: We may be able to use the NumToSkip values to recover
840	// appropriate indentation levels.
841	DecoderTable::const_iterator I = Table.begin();
842	DecoderTable::const_iterator E = Table.end();
843	while (I != E) {
844	assert(I < E && "incomplete decode table entry!");
845
846	uint64_t Pos = I - Table.begin();
847	OS << "/* " << Pos << " */";
848	OS.PadToColumn(NewCol: `12`);
849
850	switch (*I) {
851	default:
852	PrintFatalError(Msg: "invalid decode table opcode");
853	case MCD::OPC_ExtractField: {
854	++I;
855	OS.indent(NumSpaces: Indentation) << "MCD::OPC_ExtractField, ";
856
857	// ULEB128 encoded start value.
858	const char ErrMsg = nullptr*;
859	unsigned Start = decodeULEB128(p: Table.data() + Pos + `1`, n: nullptr,
860	end: Table.data() + Table.size(), error: &ErrMsg);
861	assert(ErrMsg == nullptr && "ULEB128 value too large!");
862	I += emitULEB128 (I, OS);
863
864	unsigned Len = *I ++;
865	OS << Len << ", // Inst{";
866	if (Len > `1`)
867	OS << (Start + Len - `1`) << "-";
868	OS << Start << "} ...\n";
869	break;
870	}
871	case MCD::OPC_FilterValue: {
872	++I;
873	OS.indent(NumSpaces: Indentation) << "MCD::OPC_FilterValue, ";
874	// The filter value is ULEB128 encoded.
875	I += emitULEB128 (I, OS);
876
877	// 24-bit numtoskip value.
878	uint32_t NumToSkip = emitNumToSkip (I, OS);
879	I += `3`;
880	OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
881	break;
882	}
883	case MCD::OPC_CheckField: {
884	++I;
885	OS.indent(NumSpaces: Indentation) << "MCD::OPC_CheckField, ";
886	// ULEB128 encoded start value.
887	I += emitULEB128 (I, OS);
888	// 8-bit length.
889	unsigned Len = *I ++;
890	OS << Len << ", ";
891	// ULEB128 encoded field value.
892	I += emitULEB128 (I, OS);
893
894	// 24-bit numtoskip value.
895	uint32_t NumToSkip = emitNumToSkip (I, OS);
896	I += `3`;
897	OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
898	break;
899	}
900	case MCD::OPC_CheckPredicate: {
901	++I;
902	OS.indent(NumSpaces: Indentation) << "MCD::OPC_CheckPredicate, ";
903	I += emitULEB128 (I, OS);
904
905	// 24-bit numtoskip value.
906	uint32_t NumToSkip = emitNumToSkip (I, OS);
907	I += `3`;
908	OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
909	break;
910	}
911	case MCD::OPC_Decode:
912	case MCD::OPC_TryDecode: {
913	bool IsTry = *I == MCD::OPC_TryDecode;
914	++I;
915	// Decode the Opcode value.
916	const char ErrMsg = nullptr*;
917	unsigned Opc = decodeULEB128(p: Table.data() + Pos + `1`, n: nullptr,
918	end: Table.data() + Table.size(), error: &ErrMsg);
919	assert(ErrMsg == nullptr && "ULEB128 value too large!");
920
921	OS.indent(NumSpaces: Indentation)
922	<< "MCD::OPC_" << (IsTry ? "Try" : "") << "Decode, ";
923	I += emitULEB128 (I, OS);
924
925	// Decoder index.
926	I += emitULEB128 (I, OS);
927
928	auto EncI = OpcodeToEncodingID.find(Val: Opc);
929	assert(EncI != OpcodeToEncodingID.end() && "no encoding entry");
930	auto EncodingID = EncI ->second;
931
932	if (!IsTry) {
933	OS << "// Opcode: " << NumberedEncodings [EncodingID] << "\n";
934	break;
935	}
936
937	// Fallthrough for OPC_TryDecode.
938
939	// 24-bit numtoskip value.
940	uint32_t NumToSkip = emitNumToSkip (I, OS);
941	I += `3`;
942
943	OS << "// Opcode: " << NumberedEncodings [EncodingID]
944	<< ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
945	break;
946	}
947	case MCD::OPC_SoftFail: {
948	++I;
949	OS.indent(NumSpaces: Indentation) << "MCD::OPC_SoftFail";
950	// Positive mask
951	uint64_t Value = `0`;
952	unsigned Shift = `0`;
953	do {
954	OS << ", " << (unsigned)*I;
955	Value += ((uint64_t)(*I & `0x7f`)) << Shift;
956	Shift += `7`;
957	} while (*I ++ >= `128`);
958	if (Value > `127`) {
959	OS << " /* 0x";
960	OS.write_hex(N: Value);
961	OS << " */";
962	}
963	// Negative mask
964	Value = `0`;
965	Shift = `0`;
966	do {
967	OS << ", " << (unsigned)*I;
968	Value += ((uint64_t)(*I & `0x7f`)) << Shift;
969	Shift += `7`;
970	} while (*I ++ >= `128`);
971	if (Value > `127`) {
972	OS << " /* 0x";
973	OS.write_hex(N: Value);
974	OS << " */";
975	}
976	OS << ",\n";
977	break;
978	}
979	case MCD::OPC_Fail: {
980	++I;
981	OS.indent(NumSpaces: Indentation) << "MCD::OPC_Fail,\n";
982	break;
983	}
984	}
985	}
986	OS.indent(NumSpaces: Indentation) << "0\n";
987
988	Indentation -= `2`;
989
990	OS.indent(NumSpaces: Indentation) << "};\n\n";
991	}
992
993	void DecoderEmitter::emitInstrLenTable(formatted_raw_ostream &OS,
994	std::vector<unsigned> &InstrLen) const {
995	OS << "static const uint8_t InstrLenTable[] = {\n";
996	for (unsigned &Len : InstrLen) {
997	OS << Len << ",\n";
998	}
999	OS << "};\n\n";
1000	}
1001
1002	void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
1003	PredicateSet &Predicates,
1004	unsigned Indentation) const {
1005	// The predicate function is just a big switch statement based on the
1006	// input predicate index.
1007	OS.indent(NumSpaces: Indentation) << "static bool checkDecoderPredicate(unsigned Idx, "
1008	<< "const FeatureBitset &Bits) {\n";
1009	Indentation += `2`;
1010	if (!Predicates.empty()) {
1011	OS.indent(NumSpaces: Indentation) << "switch (Idx) {\n";
1012	OS.indent(NumSpaces: Indentation)
1013	<< "default: llvm_unreachable(\"Invalid index!\");\n";
1014	unsigned Index = `0`;
1015	for (const auto &Predicate : Predicates) {
1016	OS.indent(NumSpaces: Indentation) << "case " << Index++ << ":\n";
1017	OS.indent(NumSpaces: Indentation + `2`) << "return (" << Predicate << ");\n";
1018	}
1019	OS.indent(NumSpaces: Indentation) << "}\n";
1020	} else {
1021	// No case statement to emit
1022	OS.indent(NumSpaces: Indentation) << "llvm_unreachable(\"Invalid index!\");\n";
1023	}
1024	Indentation -= `2`;
1025	OS.indent(NumSpaces: Indentation) << "}\n\n";
1026	}
1027
1028	void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
1029	DecoderSet &Decoders,
1030	unsigned Indentation) const {
1031	// The decoder function is just a big switch statement based on the
1032	// input decoder index.
1033	OS.indent(NumSpaces: Indentation) << "template <typename InsnType>\n";
1034	OS.indent(NumSpaces: Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
1035	<< " unsigned Idx, InsnType insn, MCInst &MI,\n";
1036	OS.indent(NumSpaces: Indentation)
1037	<< " uint64_t "
1038	<< "Address, const MCDisassembler *Decoder, bool &DecodeComplete) {\n";
1039	Indentation += `2`;
1040	OS.indent(NumSpaces: Indentation) << "DecodeComplete = true;\n";
1041	// TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
1042	// It would be better for emitBinaryParser to use a 64-bit tmp whenever
1043	// possible but fall back to an InsnType-sized tmp for truly large fields.
1044	OS.indent(NumSpaces: Indentation) << "using TmpType = "
1045	"std::conditional_t<std::is_integral<InsnType>::"
1046	"value, InsnType, uint64_t>;\n";
1047	OS.indent(NumSpaces: Indentation) << "TmpType tmp;\n";
1048	OS.indent(NumSpaces: Indentation) << "switch (Idx) {\n";
1049	OS.indent(NumSpaces: Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
1050	unsigned Index = `0`;
1051	for (const auto &Decoder : Decoders) {
1052	OS.indent(NumSpaces: Indentation) << "case " << Index++ << ":\n";
1053	OS << Decoder;
1054	OS.indent(NumSpaces: Indentation + `2`) << "return S;\n";
1055	}
1056	OS.indent(NumSpaces: Indentation) << "}\n";
1057	Indentation -= `2`;
1058	OS.indent(NumSpaces: Indentation) << "}\n";
1059	}
1060
1061	// Populates the field of the insn given the start position and the number of
1062	// consecutive bits to scan for.
1063	//
1064	// Returns a pair of values (indicator, field), where the indicator is false
1065	// if there exists any uninitialized bit value in the range and true if all
1066	// bits are well-known. The second value is the potentially populated field.
1067	std::pair<bool, uint64_t> FilterChooser::fieldFromInsn(const insn_t &Insn,
1068	unsigned StartBit,
1069	unsigned NumBits) const {
1070	uint64_t Field = `0`;
1071
1072	for (unsigned i = `0`; i < NumBits; ++i) {
1073	if (Insn [StartBit + i] == BIT_UNSET)
1074	return {false, Field};
1075
1076	if (Insn [StartBit + i] == BIT_TRUE)
1077	Field = Field \| (`1ULL` << i);
1078	}
1079
1080	return {true, Field};
1081	}
1082
1083	/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
1084	/// filter array as a series of chars.
1085	void FilterChooser::dumpFilterArray(
1086	raw_ostream &o, const std::vector<bit_value_t> &filter) const {
1087	for (unsigned bitIndex = BitWidth; bitIndex > `0`; bitIndex--) {
1088	switch (filter [bitIndex - `1`]) {
1089	case BIT_UNFILTERED:
1090	o << ".";
1091	break;
1092	case BIT_UNSET:
1093	o << "_";
1094	break;
1095	case BIT_TRUE:
1096	o << "1";
1097	break;
1098	case BIT_FALSE:
1099	o << "0";
1100	break;
1101	}
1102	}
1103	}
1104
1105	/// dumpStack - dumpStack traverses the filter chooser chain and calls
1106	/// dumpFilterArray on each filter chooser up to the top level one.
1107	void FilterChooser::dumpStack(raw_ostream &o, const char prefix) const* {
1108	const FilterChooser current = this*;
1109
1110	while (current) {
1111	o << prefix;
1112	dumpFilterArray(o, filter: current->FilterBitValues);
1113	o << `'\n'`;
1114	current = current->Parent;
1115	}
1116	}
1117
1118	// Calculates the island(s) needed to decode the instruction.
1119	// This returns a list of undecoded bits of an instructions, for example,
1120	// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
1121	// decoded bits in order to verify that the instruction matches the Opcode.
1122	unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
1123	std::vector<unsigned> &EndBits,
1124	std::vector<uint64_t> &FieldVals,
1125	const insn_t &Insn) const {
1126	unsigned Num, BitNo;
1127	Num = BitNo = `0`;
1128
1129	uint64_t FieldVal = `0`;
1130
1131	// 0: Init
1132	// 1: Water (the bit value does not affect decoding)
1133	// 2: Island (well-known bit value needed for decoding)
1134	int State = `0`;
1135
1136	for (unsigned i = `0`; i < BitWidth; ++i) {
1137	int64_t Val = Value(V: Insn [i]);
1138	bool Filtered = PositionFiltered(i);
1139	switch (State) {
1140	default:
1141	llvm_unreachable("Unreachable code!");
1142	case `0`:
1143	case `1`:
1144	if (Filtered \|\| Val == -`1`)
1145	State = `1`; // Still in Water
1146	else {
1147	State = `2`; // Into the Island
1148	BitNo = `0`;
1149	StartBits.push_back(x: i);
1150	FieldVal = Val;
1151	}
1152	break;
1153	case `2`:
1154	if (Filtered \|\| Val == -`1`) {
1155	State = `1`; // Into the Water
1156	EndBits.push_back(x: i - `1`);
1157	FieldVals.push_back(x: FieldVal);
1158	++Num;
1159	} else {
1160	State = `2`; // Still in Island
1161	++BitNo;
1162	FieldVal = FieldVal \| Val << BitNo;
1163	}
1164	break;
1165	}
1166	}
1167	// If we are still in Island after the loop, do some housekeeping.
1168	if (State == `2`) {
1169	EndBits.push_back(x: BitWidth - `1`);
1170	FieldVals.push_back(x: FieldVal);
1171	++Num;
1172	}
1173
1174	assert(StartBits.size() == Num && EndBits.size() == Num &&
1175	FieldVals.size() == Num);
1176	return Num;
1177	}
1178
1179	void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
1180	const OperandInfo &OpInfo,
1181	bool &OpHasCompleteDecoder) const {
1182	const std::string &Decoder = OpInfo.Decoder;
1183
1184	bool UseInsertBits = OpInfo.numFields() != `1` \|\| OpInfo.InitValue != `0`;
1185
1186	if (UseInsertBits) {
1187	o.indent(NumSpaces: Indentation) << "tmp = 0x";
1188	o.write_hex(N: OpInfo.InitValue);
1189	o << ";\n";
1190	}
1191
1192	for (const EncodingField &EF : OpInfo) {
1193	o.indent(NumSpaces: Indentation);
1194	if (UseInsertBits)
1195	o << "insertBits(tmp, ";
1196	else
1197	o << "tmp = ";
1198	o << "fieldFromInstruction(insn, " << EF.Base << ", " << EF.Width << `')'`;
1199	if (UseInsertBits)
1200	o << ", " << EF.Offset << ", " << EF.Width << `')'`;
1201	else if (EF.Offset != `0`)
1202	o << " << " << EF.Offset;
1203	o << ";\n";
1204	}
1205
1206	if (Decoder != "") {
1207	OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
1208	o.indent(NumSpaces: Indentation) << "if (!Check(S, " << Decoder
1209	<< "(MI, tmp, Address, Decoder))) { "
1210	<< (OpHasCompleteDecoder ? ""
1211	: "DecodeComplete = false; ")
1212	<< "return MCDisassembler::Fail; }\n";
1213	} else {
1214	OpHasCompleteDecoder = true;
1215	o.indent(NumSpaces: Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n";
1216	}
1217	}
1218
1219	void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
1220	unsigned Opc, bool &HasCompleteDecoder) const {
1221	HasCompleteDecoder = true;
1222
1223	for (const auto &Op : Operands.find(x: Opc)->second) {
1224	// If a custom instruction decoder was specified, use that.
1225	if (Op.numFields() == `0` && !Op.Decoder.empty()) {
1226	HasCompleteDecoder = Op.HasCompleteDecoder;
1227	OS.indent(NumSpaces: Indentation)
1228	<< "if (!Check(S, " << Op.Decoder
1229	<< "(MI, insn, Address, Decoder))) { "
1230	<< (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
1231	<< "return MCDisassembler::Fail; }\n";
1232	break;
1233	}
1234
1235	bool OpHasCompleteDecoder;
1236	emitBinaryParser(o&: OS, Indentation, OpInfo: Op, OpHasCompleteDecoder);
1237	if (!OpHasCompleteDecoder)
1238	HasCompleteDecoder = false;
1239	}
1240	}
1241
1242	unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
1243	bool &HasCompleteDecoder) const {
1244	// Build up the predicate string.
1245	SmallString<`256`> Decoder;
1246	// FIXME: emitDecoder() function can take a buffer directly rather than
1247	// a stream.
1248	raw_svector_ostream S(Decoder);
1249	unsigned I = `4`;
1250	emitDecoder(OS&: S, Indentation: I, Opc, HasCompleteDecoder);
1251
1252	// Using the full decoder string as the key value here is a bit
1253	// heavyweight, but is effective. If the string comparisons become a
1254	// performance concern, we can implement a mangling of the predicate
1255	// data easily enough with a map back to the actual string. That's
1256	// overkill for now, though.
1257
1258	// Make sure the predicate is in the table.
1259	Decoders.insert(X: CachedHashString (Decoder));
1260	// Now figure out the index for when we write out the table.
1261	DecoderSet::const_iterator P = find(Range&: Decoders, Val: Decoder.str());
1262	return (unsigned)(P - Decoders.begin());
1263	}
1264
1265	// If ParenIfBinOp is true, print a surrounding () if Val uses && or \|\|.
1266	bool FilterChooser::emitPredicateMatchAux(const Init &Val, bool ParenIfBinOp,
1267	raw_ostream &OS) const {
1268	if (const auto *D = dyn_cast<DefInit>(Val: &Val)) {
1269	if (!D->getDef()->isSubClassOf(Name: "SubtargetFeature"))
1270	return true;
1271	OS << "Bits[" << Emitter->PredicateNamespace << "::" << D->getAsString()
1272	<< "]";
1273	return false;
1274	}
1275	if (const auto *D = dyn_cast<DagInit>(Val: &Val)) {
1276	std::string Op = D->getOperator()->getAsString();
1277	if (Op == "not" && D->getNumArgs() == `1`) {
1278	OS << `'!'`;
1279	return emitPredicateMatchAux(Val: D->getArg(Num: `0`), ParenIfBinOp: true*, OS);
1280	}
1281	if ((Op == "any_of" \|\| Op == "all_of") && D->getNumArgs() > `0`) {
1282	bool Paren = D->getNumArgs() > `1` && std::exchange(obj&: ParenIfBinOp, new_val: true);
1283	if (Paren)
1284	OS << `'('`;
1285	ListSeparator LS(Op == "any_of" ? " \|\| " : " && ");
1286	for (auto *Arg : D->getArgs()) {
1287	OS << LS;
1288	if (emitPredicateMatchAux(Val: *Arg, ParenIfBinOp, OS))
1289	return true;
1290	}
1291	if (Paren)
1292	OS << `')'`;
1293	return false;
1294	}
1295	}
1296	return true;
1297	}
1298
1299	bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
1300	unsigned Opc) const {
1301	ListInit *Predicates =
1302	AllInstructions [Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1303	bool IsFirstEmission = true;
1304	for (unsigned i = `0`; i < Predicates->size(); ++i) {
1305	Record *Pred = Predicates->getElementAsRecord(i);
1306	if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1307	continue;
1308
1309	if (!isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1310	continue;
1311
1312	if (!IsFirstEmission)
1313	o << " && ";
1314	if (emitPredicateMatchAux(Val: *Pred->getValueAsDag(FieldName: "AssemblerCondDag"),
1315	ParenIfBinOp: Predicates->size() > `1`, OS&: o))
1316	PrintFatalError(ErrorLoc: Pred->getLoc(), Msg: "Invalid AssemblerCondDag!");
1317	IsFirstEmission = false;
1318	}
1319	return !Predicates->empty();
1320	}
1321
1322	bool FilterChooser::doesOpcodeNeedPredicate(unsigned Opc) const {
1323	ListInit *Predicates =
1324	AllInstructions [Opc].EncodingDef->getValueAsListInit(FieldName: "Predicates");
1325	for (unsigned i = `0`; i < Predicates->size(); ++i) {
1326	Record *Pred = Predicates->getElementAsRecord(i);
1327	if (!Pred->getValue(Name: "AssemblerMatcherPredicate"))
1328	continue;
1329
1330	if (isa<DagInit>(Val: Pred->getValue(Name: "AssemblerCondDag")->getValue()))
1331	return true;
1332	}
1333	return false;
1334	}
1335
1336	unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
1337	StringRef Predicate) const {
1338	// Using the full predicate string as the key value here is a bit
1339	// heavyweight, but is effective. If the string comparisons become a
1340	// performance concern, we can implement a mangling of the predicate
1341	// data easily enough with a map back to the actual string. That's
1342	// overkill for now, though.
1343
1344	// Make sure the predicate is in the table.
1345	TableInfo.Predicates.insert(X: CachedHashString (Predicate));
1346	// Now figure out the index for when we write out the table.
1347	PredicateSet::const_iterator P = find(Range&: TableInfo.Predicates, Val: Predicate);
1348	return (unsigned)(P - TableInfo.Predicates.begin());
1349	}
1350
1351	void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
1352	unsigned Opc) const {
1353	if (!doesOpcodeNeedPredicate(Opc))
1354	return;
1355
1356	// Build up the predicate string.
1357	SmallString<`256`> Predicate;
1358	// FIXME: emitPredicateMatch() functions can take a buffer directly rather
1359	// than a stream.
1360	raw_svector_ostream PS(Predicate);
1361	unsigned I = `0`;
1362	emitPredicateMatch(o&: PS, Indentation&: I, Opc);
1363
1364	// Figure out the index into the predicate table for the predicate just
1365	// computed.
1366	unsigned PIdx = getPredicateIndex(TableInfo, Predicate: PS.str());
1367	SmallString<`16`> PBytes;
1368	raw_svector_ostream S(PBytes);
1369	encodeULEB128(Value: PIdx, OS&: S);
1370
1371	TableInfo.Table.push_back(x: MCD::OPC_CheckPredicate);
1372	// Predicate index.
1373	for (const auto PB : PBytes)
1374	TableInfo.Table.push_back(x: PB);
1375	// Push location for NumToSkip backpatching.
1376	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.size());
1377	TableInfo.Table.push_back(x: `0`);
1378	TableInfo.Table.push_back(x: `0`);
1379	TableInfo.Table.push_back(x: `0`);
1380	}
1381
1382	void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
1383	unsigned Opc) const {
1384	const Record *EncodingDef = AllInstructions [Opc].EncodingDef;
1385	const RecordVal *RV = EncodingDef->getValue(Name: "SoftFail");
1386	BitsInit SFBits = RV ? dyn_cast<BitsInit>(Val: RV->getValue()) : nullptr*;
1387
1388	if (!SFBits)
1389	return;
1390	BitsInit *InstBits = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
1391
1392	APInt PositiveMask(BitWidth, `0ULL`);
1393	APInt NegativeMask(BitWidth, `0ULL`);
1394	for (unsigned i = `0`; i < BitWidth; ++i) {
1395	bit_value_t B = bitFromBits(bits: *SFBits, index: i);
1396	bit_value_t IB = bitFromBits(bits: *InstBits, index: i);
1397
1398	if (B != BIT_TRUE)
1399	continue;
1400
1401	switch (IB) {
1402	case BIT_FALSE:
1403	// The bit is meant to be false, so emit a check to see if it is true.
1404	PositiveMask.setBit(i);
1405	break;
1406	case BIT_TRUE:
1407	// The bit is meant to be true, so emit a check to see if it is false.
1408	NegativeMask.setBit(i);
1409	break;
1410	default:
1411	// The bit is not set; this must be an error!
1412	errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
1413	<< AllInstructions [Opc] << " is set but Inst{" << i
1414	<< "} is unset!\n"
1415	<< " - You can only mark a bit as SoftFail if it is fully defined"
1416	<< " (1/0 - not '?') in Inst\n";
1417	return;
1418	}
1419	}
1420
1421	bool NeedPositiveMask = PositiveMask.getBoolValue();
1422	bool NeedNegativeMask = NegativeMask.getBoolValue();
1423
1424	if (!NeedPositiveMask && !NeedNegativeMask)
1425	return;
1426
1427	TableInfo.Table.push_back(x: MCD::OPC_SoftFail);
1428
1429	SmallString<`16`> MaskBytes;
1430	raw_svector_ostream S(MaskBytes);
1431	if (NeedPositiveMask) {
1432	encodeULEB128(Value: PositiveMask.getZExtValue(), OS&: S);
1433	for (unsigned i = `0`, e = MaskBytes.size(); i != e; ++i)
1434	TableInfo.Table.push_back(x: MaskBytes [i]);
1435	} else
1436	TableInfo.Table.push_back(x: `0`);
1437	if (NeedNegativeMask) {
1438	MaskBytes.clear();
1439	encodeULEB128(Value: NegativeMask.getZExtValue(), OS&: S);
1440	for (unsigned i = `0`, e = MaskBytes.size(); i != e; ++i)
1441	TableInfo.Table.push_back(x: MaskBytes [i]);
1442	} else
1443	TableInfo.Table.push_back(x: `0`);
1444	}
1445
1446	// Emits table entries to decode the singleton.
1447	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1448	EncodingIDAndOpcode Opc) const {
1449	std::vector<unsigned> StartBits;
1450	std::vector<unsigned> EndBits;
1451	std::vector<uint64_t> FieldVals;
1452	insn_t Insn;
1453	insnWithID(Insn, Opcode: Opc.EncodingID);
1454
1455	// Look for islands of undecoded bits of the singleton.
1456	getIslands(StartBits, EndBits, FieldVals, Insn);
1457
1458	unsigned Size = StartBits.size();
1459
1460	// Emit the predicate table entry if one is needed.
1461	emitPredicateTableEntry(TableInfo, Opc: Opc.EncodingID);
1462
1463	// Check any additional encoding fields needed.
1464	for (unsigned I = Size; I != `0`; --I) {
1465	unsigned NumBits = EndBits [I - `1`] - StartBits [I - `1`] + `1`;
1466	assert((NumBits < (`1u` << `8`)) && "NumBits overflowed uint8 table entry!");
1467	TableInfo.Table.push_back(x: MCD::OPC_CheckField);
1468	uint8_t Buffer[`16`], *P;
1469	encodeULEB128(Value: StartBits [I - `1`], p: Buffer);
1470	for (P = Buffer; *P >= `128`; ++P)
1471	TableInfo.Table.push_back(x: *P);
1472	TableInfo.Table.push_back(x: *P);
1473	TableInfo.Table.push_back(x: NumBits);
1474	encodeULEB128(Value: FieldVals [I - `1`], p: Buffer);
1475	for (P = Buffer; *P >= `128`; ++P)
1476	TableInfo.Table.push_back(x: *P);
1477	TableInfo.Table.push_back(x: *P);
1478	// Push location for NumToSkip backpatching.
1479	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.size());
1480	// The fixup is always 24-bits, so go ahead and allocate the space
1481	// in the table so all our relative position calculations work OK even
1482	// before we fully resolve the real value here.
1483	TableInfo.Table.push_back(x: `0`);
1484	TableInfo.Table.push_back(x: `0`);
1485	TableInfo.Table.push_back(x: `0`);
1486	}
1487
1488	// Check for soft failure of the match.
1489	emitSoftFailTableEntry(TableInfo, Opc: Opc.EncodingID);
1490
1491	bool HasCompleteDecoder;
1492	unsigned DIdx =
1493	getDecoderIndex(Decoders&: TableInfo.Decoders, Opc: Opc.EncodingID, HasCompleteDecoder);
1494
1495	// Produce OPC_Decode or OPC_TryDecode opcode based on the information
1496	// whether the instruction decoder is complete or not. If it is complete
1497	// then it handles all possible values of remaining variable/unfiltered bits
1498	// and for any value can determine if the bitpattern is a valid instruction
1499	// or not. This means OPC_Decode will be the final step in the decoding
1500	// process. If it is not complete, then the Fail return code from the
1501	// decoder method indicates that additional processing should be done to see
1502	// if there is any other instruction that also matches the bitpattern and
1503	// can decode it.
1504	TableInfo.Table.push_back(x: HasCompleteDecoder ? MCD::OPC_Decode
1505	: MCD::OPC_TryDecode);
1506	NumEncodingsSupported ++;
1507	uint8_t Buffer[`16`], *p;
1508	encodeULEB128(Value: Opc.Opcode, p: Buffer);
1509	for (p = Buffer; *p >= `128`; ++p)
1510	TableInfo.Table.push_back(x: *p);
1511	TableInfo.Table.push_back(x: *p);
1512
1513	SmallString<`16`> Bytes;
1514	raw_svector_ostream S(Bytes);
1515	encodeULEB128(Value: DIdx, OS&: S);
1516
1517	// Decoder index.
1518	for (const auto B : Bytes)
1519	TableInfo.Table.push_back(x: B);
1520
1521	if (!HasCompleteDecoder) {
1522	// Push location for NumToSkip backpatching.
1523	TableInfo.FixupStack.back().push_back(x: TableInfo.Table.size());
1524	// Allocate the space for the fixup.
1525	TableInfo.Table.push_back(x: `0`);
1526	TableInfo.Table.push_back(x: `0`);
1527	TableInfo.Table.push_back(x: `0`);
1528	}
1529	}
1530
1531	// Emits table entries to decode the singleton, and then to decode the rest.
1532	void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
1533	const Filter &Best) const {
1534	EncodingIDAndOpcode Opc = Best.getSingletonOpc();
1535
1536	// complex singletons need predicate checks from the first singleton
1537	// to refer forward to the variable filterchooser that follows.
1538	TableInfo.FixupStack.emplace_back();
1539
1540	emitSingletonTableEntry(TableInfo, Opc);
1541
1542	resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
1543	DestIdx: TableInfo.Table.size());
1544	TableInfo.FixupStack.pop_back();
1545
1546	Best.getVariableFC().emitTableEntries(TableInfo);
1547	}
1548
1549	// Assign a single filter and run with it. Top level API client can initialize
1550	// with a single filter to start the filtering process.
1551	void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
1552	bool mixed) {
1553	Filters.clear();
1554	Filters.emplace_back(args&: *this, args&: startBit, args&: numBit, args: true);
1555	BestIndex = `0`; // Sole Filter instance to choose from.
1556	bestFilter().recurse();
1557	}
1558
1559	// reportRegion is a helper function for filterProcessor to mark a region as
1560	// eligible for use as a filter region.
1561	void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
1562	unsigned BitIndex, bool AllowMixed) {
1563	if (RA == ATTR_MIXED && AllowMixed)
1564	Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: true);
1565	else if (RA == ATTR_ALL_SET && !AllowMixed)
1566	Filters.emplace_back(args&: *this, args&: StartBit, args: BitIndex - StartBit, args: false);
1567	}
1568
1569	// FilterProcessor scans the well-known encoding bits of the instructions and
1570	// builds up a list of candidate filters. It chooses the best filter and
1571	// recursively descends down the decoding tree.
1572	bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
1573	Filters.clear();
1574	BestIndex = -`1`;
1575	unsigned numInstructions = Opcodes.size();
1576
1577	assert(numInstructions && "Filter created with no instructions");
1578
1579	// No further filtering is necessary.
1580	if (numInstructions == `1`)
1581	return true;
1582
1583	// Heuristics. See also doFilter()'s "Heuristics" comment when num of
1584	// instructions is 3.
1585	if (AllowMixed && !Greedy) {
1586	assert(numInstructions == `3`);
1587
1588	for (const auto &Opcode : Opcodes) {
1589	std::vector<unsigned> StartBits;
1590	std::vector<unsigned> EndBits;
1591	std::vector<uint64_t> FieldVals;
1592	insn_t Insn;
1593
1594	insnWithID(Insn, Opcode: Opcode.EncodingID);
1595
1596	// Look for islands of undecoded bits of any instruction.
1597	if (getIslands(StartBits, EndBits, FieldVals, Insn) > `0`) {
1598	// Found an instruction with island(s). Now just assign a filter.
1599	runSingleFilter(startBit: StartBits [`0`], numBit: EndBits [`0`] - StartBits [`0`] + `1`, mixed: true);
1600	return true;
1601	}
1602	}
1603	}
1604
1605	unsigned BitIndex;
1606
1607	// We maintain BIT_WIDTH copies of the bitAttrs automaton.
1608	// The automaton consumes the corresponding bit from each
1609	// instruction.
1610	//
1611	// Input symbols: 0, 1, and _ (unset).
1612	// States: NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
1613	// Initial state: NONE.
1614	//
1615	// (NONE) ------- [01] -> (ALL_SET)
1616	// (NONE) ------- _ ----> (ALL_UNSET)
1617	// (ALL_SET) ---- [01] -> (ALL_SET)
1618	// (ALL_SET) ---- _ ----> (MIXED)
1619	// (ALL_UNSET) -- [01] -> (MIXED)
1620	// (ALL_UNSET) -- _ ----> (ALL_UNSET)
1621	// (MIXED) ------ . ----> (MIXED)
1622	// (FILTERED)---- . ----> (FILTERED)
1623
1624	std::vector<bitAttr_t> bitAttrs;
1625
1626	// FILTERED bit positions provide no entropy and are not worthy of pursuing.
1627	// Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
1628	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex)
1629	if (FilterBitValues [BitIndex] == BIT_TRUE \|\|
1630	FilterBitValues [BitIndex] == BIT_FALSE)
1631	bitAttrs.push_back(x: ATTR_FILTERED);
1632	else
1633	bitAttrs.push_back(x: ATTR_NONE);
1634
1635	for (const auto &OpcPair : Opcodes) {
1636	insn_t insn;
1637
1638	insnWithID(Insn&: insn, Opcode: OpcPair.EncodingID);
1639
1640	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex) {
1641	switch (bitAttrs [BitIndex]) {
1642	case ATTR_NONE:
1643	if (insn [BitIndex] == BIT_UNSET)
1644	bitAttrs [BitIndex] = ATTR_ALL_UNSET;
1645	else
1646	bitAttrs [BitIndex] = ATTR_ALL_SET;
1647	break;
1648	case ATTR_ALL_SET:
1649	if (insn [BitIndex] == BIT_UNSET)
1650	bitAttrs [BitIndex] = ATTR_MIXED;
1651	break;
1652	case ATTR_ALL_UNSET:
1653	if (insn [BitIndex] != BIT_UNSET)
1654	bitAttrs [BitIndex] = ATTR_MIXED;
1655	break;
1656	case ATTR_MIXED:
1657	case ATTR_FILTERED:
1658	break;
1659	}
1660	}
1661	}
1662
1663	// The regionAttr automaton consumes the bitAttrs automatons' state,
1664	// lowest-to-highest.
1665	//
1666	// Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
1667	// States: NONE, ALL_SET, MIXED
1668	// Initial state: NONE
1669	//
1670	// (NONE) ----- F --> (NONE)
1671	// (NONE) ----- S --> (ALL_SET) ; and set region start
1672	// (NONE) ----- U --> (NONE)
1673	// (NONE) ----- M --> (MIXED) ; and set region start
1674	// (ALL_SET) -- F --> (NONE) ; and report an ALL_SET region
1675	// (ALL_SET) -- S --> (ALL_SET)
1676	// (ALL_SET) -- U --> (NONE) ; and report an ALL_SET region
1677	// (ALL_SET) -- M --> (MIXED) ; and report an ALL_SET region
1678	// (MIXED) ---- F --> (NONE) ; and report a MIXED region
1679	// (MIXED) ---- S --> (ALL_SET) ; and report a MIXED region
1680	// (MIXED) ---- U --> (NONE) ; and report a MIXED region
1681	// (MIXED) ---- M --> (MIXED)
1682
1683	bitAttr_t RA = ATTR_NONE;
1684	unsigned StartBit = `0`;
1685
1686	for (BitIndex = `0`; BitIndex < BitWidth; ++BitIndex) {
1687	bitAttr_t bitAttr = bitAttrs [BitIndex];
1688
1689	assert(bitAttr != ATTR_NONE && "Bit without attributes");
1690
1691	switch (RA) {
1692	case ATTR_NONE:
1693	switch (bitAttr) {
1694	case ATTR_FILTERED:
1695	break;
1696	case ATTR_ALL_SET:
1697	StartBit = BitIndex;
1698	RA = ATTR_ALL_SET;
1699	break;
1700	case ATTR_ALL_UNSET:
1701	break;
1702	case ATTR_MIXED:
1703	StartBit = BitIndex;
1704	RA = ATTR_MIXED;
1705	break;
1706	default:
1707	llvm_unreachable("Unexpected bitAttr!");
1708	}
1709	break;
1710	case ATTR_ALL_SET:
1711	switch (bitAttr) {
1712	case ATTR_FILTERED:
1713	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1714	RA = ATTR_NONE;
1715	break;
1716	case ATTR_ALL_SET:
1717	break;
1718	case ATTR_ALL_UNSET:
1719	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1720	RA = ATTR_NONE;
1721	break;
1722	case ATTR_MIXED:
1723	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1724	StartBit = BitIndex;
1725	RA = ATTR_MIXED;
1726	break;
1727	default:
1728	llvm_unreachable("Unexpected bitAttr!");
1729	}
1730	break;
1731	case ATTR_MIXED:
1732	switch (bitAttr) {
1733	case ATTR_FILTERED:
1734	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1735	StartBit = BitIndex;
1736	RA = ATTR_NONE;
1737	break;
1738	case ATTR_ALL_SET:
1739	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1740	StartBit = BitIndex;
1741	RA = ATTR_ALL_SET;
1742	break;
1743	case ATTR_ALL_UNSET:
1744	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1745	RA = ATTR_NONE;
1746	break;
1747	case ATTR_MIXED:
1748	break;
1749	default:
1750	llvm_unreachable("Unexpected bitAttr!");
1751	}
1752	break;
1753	case ATTR_ALL_UNSET:
1754	llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
1755	case ATTR_FILTERED:
1756	llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
1757	}
1758	}
1759
1760	// At the end, if we're still in ALL_SET or MIXED states, report a region
1761	switch (RA) {
1762	case ATTR_NONE:
1763	break;
1764	case ATTR_FILTERED:
1765	break;
1766	case ATTR_ALL_SET:
1767	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1768	break;
1769	case ATTR_ALL_UNSET:
1770	break;
1771	case ATTR_MIXED:
1772	reportRegion(RA, StartBit, BitIndex, AllowMixed);
1773	break;
1774	}
1775
1776	// We have finished with the filter processings. Now it's time to choose
1777	// the best performing filter.
1778	BestIndex = `0`;
1779	bool AllUseless = true;
1780	unsigned BestScore = `0`;
1781
1782	for (const auto &[Idx, Filter] : enumerate(First&: Filters)) {
1783	unsigned Usefulness = Filter.usefulness();
1784
1785	if (Usefulness)
1786	AllUseless = false;
1787
1788	if (Usefulness > BestScore) {
1789	BestIndex = Idx;
1790	BestScore = Usefulness;
1791	}
1792	}
1793
1794	if (!AllUseless)
1795	bestFilter().recurse();
1796
1797	return !AllUseless;
1798	} // end of FilterChooser::filterProcessor(bool)
1799
1800	// Decides on the best configuration of filter(s) to use in order to decode
1801	// the instructions. A conflict of instructions may occur, in which case we
1802	// dump the conflict set to the standard error.
1803	void FilterChooser::doFilter() {
1804	unsigned Num = Opcodes.size();
1805	assert(Num && "FilterChooser created with no instructions");
1806
1807	// Try regions of consecutive known bit values first.
1808	if (filterProcessor(AllowMixed: false))
1809	return;
1810
1811	// Then regions of mixed bits (both known and unitialized bit values allowed).
1812	if (filterProcessor(AllowMixed: true))
1813	return;
1814
1815	// Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
1816	// no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
1817	// well-known encoding pattern. In such case, we backtrack and scan for the
1818	// the very first consecutive ATTR_ALL_SET region and assign a filter to it.
1819	if (Num == `3` && filterProcessor(AllowMixed: true, Greedy: false))
1820	return;
1821
1822	// If we come to here, the instruction decoding has failed.
1823	// Set the BestIndex to -1 to indicate so.
1824	BestIndex = -`1`;
1825	}
1826
1827	// emitTableEntries - Emit state machine entries to decode our share of
1828	// instructions.
1829	void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
1830	if (Opcodes.size() == `1`) {
1831	// There is only one instruction in the set, which is great!
1832	// Call emitSingletonDecoder() to see whether there are any remaining
1833	// encodings bits.
1834	emitSingletonTableEntry(TableInfo, Opc: Opcodes [`0`]);
1835	return;
1836	}
1837
1838	// Choose the best filter to do the decodings!
1839	if (BestIndex != -`1`) {
1840	const Filter &Best = Filters [BestIndex];
1841	if (Best.getNumFiltered() == `1`)
1842	emitSingletonTableEntry(TableInfo, Best);
1843	else
1844	Best.emitTableEntry(TableInfo);
1845	return;
1846	}
1847
1848	// We don't know how to decode these instructions! Dump the
1849	// conflict set and bail.
1850
1851	// Print out useful conflict information for postmortem analysis.
1852	errs() << "Decoding Conflict:\n";
1853
1854	dumpStack(o&: errs(), prefix: "\t\t");
1855
1856	for (auto Opcode : Opcodes) {
1857	errs() << `'\t'`;
1858	emitNameWithID(OS&: errs(), Opcode: Opcode.EncodingID);
1859	errs() << " ";
1860	dumpBits(
1861	o&: errs(),
1862	bits: getBitsField(def: *AllInstructions [Opcode.EncodingID].EncodingDef, str: "Inst"));
1863	errs() << `'\n'`;
1864	}
1865	}
1866
1867	static std::string findOperandDecoderMethod(Record *Record) {
1868	std::string Decoder;
1869
1870	RecordVal *DecoderString = Record->getValue(Name: "DecoderMethod");
1871	StringInit *String =
1872	DecoderString ? dyn_cast<StringInit>(Val: DecoderString->getValue()) : nullptr;
1873	if (String) {
1874	Decoder = std::string (String->getValue());
1875	if (!Decoder.empty())
1876	return Decoder;
1877	}
1878
1879	if (Record->isSubClassOf(Name: "RegisterOperand"))
1880	Record = Record->getValueAsDef(FieldName: "RegClass");
1881
1882	if (Record->isSubClassOf(Name: "RegisterClass")) {
1883	Decoder = "Decode" + Record->getName().str() + "RegisterClass";
1884	} else if (Record->isSubClassOf(Name: "PointerLikeRegClass")) {
1885	Decoder = "DecodePointerLikeRegClass" +
1886	utostr(X: Record->getValueAsInt(FieldName: "RegClassKind"));
1887	}
1888
1889	return Decoder;
1890	}
1891
1892	OperandInfo getOpInfo(Record *TypeRecord) {
1893	std::string Decoder = findOperandDecoderMethod(Record: TypeRecord);
1894
1895	RecordVal *HasCompleteDecoderVal = TypeRecord->getValue(Name: "hasCompleteDecoder");
1896	BitInit *HasCompleteDecoderBit =
1897	HasCompleteDecoderVal
1898	? dyn_cast<BitInit>(Val: HasCompleteDecoderVal->getValue())
1899	: nullptr;
1900	bool HasCompleteDecoder =
1901	HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
1902
1903	return OperandInfo (Decoder, HasCompleteDecoder);
1904	}
1905
1906	void parseVarLenInstOperand(const Record &Def,
1907	std::vector<OperandInfo> &Operands,
1908	const CodeGenInstruction &CGI) {
1909
1910	const RecordVal *RV = Def.getValue(Name: "Inst");
1911	VarLenInst VLI(cast<DagInit>(Val: RV->getValue()), RV);
1912	SmallVector<int> TiedTo;
1913
1914	for (const auto &[Idx, Op] : enumerate(First: CGI.Operands)) {
1915	if (Op.MIOperandInfo && Op.MIOperandInfo->getNumArgs() > `0`)
1916	for (auto *Arg : Op.MIOperandInfo->getArgs())
1917	Operands.push_back(x: getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef()));
1918	else
1919	Operands.push_back(x: getOpInfo(TypeRecord: Op.Rec));
1920
1921	int TiedReg = Op.getTiedRegister();
1922	TiedTo.push_back(Elt: -`1`);
1923	if (TiedReg != -`1`) {
1924	TiedTo [Idx] = TiedReg;
1925	TiedTo [TiedReg] = Idx;
1926	}
1927	}
1928
1929	unsigned CurrBitPos = `0`;
1930	for (const auto &EncodingSegment : VLI) {
1931	unsigned Offset = `0`;
1932	StringRef OpName;
1933
1934	if (const StringInit *SI = dyn_cast<StringInit>(Val: EncodingSegment.Value)) {
1935	OpName = SI->getValue();
1936	} else if (const DagInit *DI = dyn_cast<DagInit>(Val: EncodingSegment.Value)) {
1937	OpName = cast<StringInit>(Val: DI->getArg(Num: `0`))->getValue();
1938	Offset = cast<IntInit>(Val: DI->getArg(Num: `2`))->getValue();
1939	}
1940
1941	if (!OpName.empty()) {
1942	auto OpSubOpPair =
1943	const_cast<CodeGenInstruction &>(CGI).Operands.ParseOperandName(
1944	Op: OpName);
1945	unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(Op: OpSubOpPair);
1946	Operands [OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1947	if (!EncodingSegment.CustomDecoder.empty())
1948	Operands [OpIdx].Decoder = EncodingSegment.CustomDecoder.str();
1949
1950	int TiedReg = TiedTo [OpSubOpPair.first];
1951	if (TiedReg != -`1`) {
1952	unsigned OpIdx = CGI.Operands.getFlattenedOperandNumber(
1953	Op: std::pair(TiedReg, OpSubOpPair.second));
1954	Operands [OpIdx].addField(Base: CurrBitPos, Width: EncodingSegment.BitWidth, Offset);
1955	}
1956	}
1957
1958	CurrBitPos += EncodingSegment.BitWidth;
1959	}
1960	}
1961
1962	static void debugDumpRecord(const Record &Rec) {
1963	// Dump the record, so we can see what's going on...
1964	std::string E;
1965	raw_string_ostream S(E);
1966	S << "Dumping record for previous error:\n";
1967	S << Rec;
1968	PrintNote(Msg: E);
1969	}
1970
1971	/// For an operand field named OpName: populate OpInfo.InitValue with the
1972	/// constant-valued bit values, and OpInfo.Fields with the ranges of bits to
1973	/// insert from the decoded instruction.
1974	static void addOneOperandFields(const Record &EncodingDef, const BitsInit &Bits,
1975	std::map<std::string, std::string> &TiedNames,
1976	StringRef OpName, OperandInfo &OpInfo) {
1977	// Some bits of the operand may be required to be 1 depending on the
1978	// instruction's encoding. Collect those bits.
1979	if (const RecordVal *EncodedValue = EncodingDef.getValue(Name: OpName))
1980	if (const BitsInit *OpBits = dyn_cast<BitsInit>(Val: EncodedValue->getValue()))
1981	for (unsigned I = `0`; I < OpBits->getNumBits(); ++I)
1982	if (const BitInit *OpBit = dyn_cast<BitInit>(Val: OpBits->getBit(Bit: I)))
1983	if (OpBit->getValue())
1984	OpInfo.InitValue \|= `1ULL` << I;
1985
1986	for (unsigned I = `0`, J = `0`; I != Bits.getNumBits(); I = J) {
1987	VarInit *Var;
1988	unsigned Offset = `0`;
1989	for (; J != Bits.getNumBits(); ++J) {
1990	VarBitInit *BJ = dyn_cast<VarBitInit>(Val: Bits.getBit(Bit: J));
1991	if (BJ) {
1992	Var = dyn_cast<VarInit>(Val: BJ->getBitVar());
1993	if (I == J)
1994	Offset = BJ->getBitNum();
1995	else if (BJ->getBitNum() != Offset + J - I)
1996	break;
1997	} else {
1998	Var = dyn_cast<VarInit>(Val: Bits.getBit(Bit: J));
1999	}
2000	if (!Var \|\| (Var->getName() != OpName &&
2001	Var->getName() != TiedNames [std::string (OpName)]))
2002	break;
2003	}
2004	if (I == J)
2005	++J;
2006	else
2007	OpInfo.addField(Base: I, Width: J - I, Offset);
2008	}
2009	}
2010
2011	static unsigned
2012	populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
2013	const CodeGenInstruction &CGI, unsigned Opc,
2014	std::map<unsigned, std::vector<OperandInfo>> &Operands,
2015	bool IsVarLenInst) {
2016	const Record &Def = *CGI.TheDef;
2017	// If all the bit positions are not specified; do not decode this instruction.
2018	// We are bound to fail! For proper disassembly, the well-known encoding bits
2019	// of the instruction must be fully specified.
2020
2021	BitsInit &Bits = getBitsField(def: EncodingDef, str: "Inst");
2022	if (Bits.allInComplete())
2023	return `0`;
2024
2025	std::vector<OperandInfo> InsnOperands;
2026
2027	// If the instruction has specified a custom decoding hook, use that instead
2028	// of trying to auto-generate the decoder.
2029	StringRef InstDecoder = EncodingDef.getValueAsString(FieldName: "DecoderMethod");
2030	if (InstDecoder != "") {
2031	bool HasCompleteInstDecoder =
2032	EncodingDef.getValueAsBit(FieldName: "hasCompleteDecoder");
2033	InsnOperands.push_back(
2034	x: OperandInfo (std::string (InstDecoder), HasCompleteInstDecoder));
2035	Operands [Opc] = InsnOperands;
2036	return Bits.getNumBits();
2037	}
2038
2039	// Generate a description of the operand of the instruction that we know
2040	// how to decode automatically.
2041	// FIXME: We'll need to have a way to manually override this as needed.
2042
2043	// Gather the outputs/inputs of the instruction, so we can find their
2044	// positions in the encoding. This assumes for now that they appear in the
2045	// MCInst in the order that they're listed.
2046	std::vector<std::pair<Init *, StringRef>> InOutOperands;
2047	DagInit *Out = Def.getValueAsDag(FieldName: "OutOperandList");
2048	DagInit *In = Def.getValueAsDag(FieldName: "InOperandList");
2049	for (const auto &[Idx, Arg] : enumerate(First: Out->getArgs()))
2050	InOutOperands.push_back(x: std::pair(Arg, Out->getArgNameStr(Num: Idx)));
2051	for (const auto &[Idx, Arg] : enumerate(First: In->getArgs()))
2052	InOutOperands.push_back(x: std::pair(Arg, In->getArgNameStr(Num: Idx)));
2053
2054	// Search for tied operands, so that we can correctly instantiate
2055	// operands that are not explicitly represented in the encoding.
2056	std::map<std::string, std::string> TiedNames;
2057	for (const auto &[I, Op] : enumerate(First: CGI.Operands)) {
2058	for (const auto &[J, CI] : enumerate(First: Op.Constraints)) {
2059	if (CI.isTied()) {
2060	std::pair<unsigned, unsigned> SO =
2061	CGI.Operands.getSubOperandNumber(Op: CI.getTiedOperand());
2062	std::string TiedName = CGI.Operands [SO.first].SubOpNames [SO.second];
2063	if (TiedName.empty())
2064	TiedName = CGI.Operands [SO.first].Name;
2065	std::string MyName = Op.SubOpNames [J];
2066	if (MyName.empty())
2067	MyName = Op.Name;
2068
2069	TiedNames [MyName] = TiedName;
2070	TiedNames [TiedName] = MyName;
2071	}
2072	}
2073	}
2074
2075	if (IsVarLenInst) {
2076	parseVarLenInstOperand(Def: EncodingDef, Operands&: InsnOperands, CGI);
2077	} else {
2078	// For each operand, see if we can figure out where it is encoded.
2079	for (const auto &Op : InOutOperands) {
2080	Init *OpInit = Op.first;
2081	StringRef OpName = Op.second;
2082
2083	// We're ready to find the instruction encoding locations for this
2084	// operand.
2085
2086	// First, find the operand type ("OpInit"), and sub-op names
2087	// ("SubArgDag") if present.
2088	DagInit *SubArgDag = dyn_cast<DagInit>(Val: OpInit);
2089	if (SubArgDag)
2090	OpInit = SubArgDag->getOperator();
2091	Record *OpTypeRec = cast<DefInit>(Val: OpInit)->getDef();
2092	// Lookup the sub-operands from the operand type record (note that only
2093	// Operand subclasses have MIOperandInfo, see CodeGenInstruction.cpp).
2094	DagInit *SubOps = OpTypeRec->isSubClassOf(Name: "Operand")
2095	? OpTypeRec->getValueAsDag(FieldName: "MIOperandInfo")
2096	: nullptr;
2097
2098	// Lookup the decoder method and construct a new OperandInfo to hold our
2099	// result.
2100	OperandInfo OpInfo = getOpInfo(TypeRecord: OpTypeRec);
2101
2102	// If we have named sub-operands...
2103	if (SubArgDag) {
2104	// Then there should not be a custom decoder specified on the top-level
2105	// type.
2106	if (!OpInfo.Decoder.empty()) {
2107	PrintError(ErrorLoc: EncodingDef.getLoc(),
2108	Msg: "DecoderEmitter: operand \"" + OpName + "\" has type \"" +
2109	OpInit->getAsString() +
2110	"\" with a custom DecoderMethod, but also named "
2111	"sub-operands.");
2112	continue;
2113	}
2114
2115	// Decode each of the sub-ops separately.
2116	assert(SubOps && SubArgDag->getNumArgs() == SubOps->getNumArgs());
2117	for (const auto &[I, Arg] : enumerate(First: SubOps->getArgs())) {
2118	StringRef SubOpName = SubArgDag->getArgNameStr(Num: I);
2119	OperandInfo SubOpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: Arg)->getDef());
2120
2121	addOneOperandFields(EncodingDef, Bits, TiedNames, OpName: SubOpName,
2122	OpInfo&: SubOpInfo);
2123	InsnOperands.push_back(x: SubOpInfo);
2124	}
2125	continue;
2126	}
2127
2128	// Otherwise, if we have an operand with sub-operands, but they aren't
2129	// named...
2130	if (SubOps && OpInfo.Decoder.empty()) {
2131	// If it's a single sub-operand, and no custom decoder, use the decoder
2132	// from the one sub-operand.
2133	if (SubOps->getNumArgs() == `1`)
2134	OpInfo = getOpInfo(TypeRecord: cast<DefInit>(Val: SubOps->getArg(Num: `0`))->getDef());
2135
2136	// If we have multiple sub-ops, there'd better have a custom
2137	// decoder. (Otherwise we don't know how to populate them properly...)
2138	if (SubOps->getNumArgs() > `1`) {
2139	PrintError(ErrorLoc: EncodingDef.getLoc(),
2140	Msg: "DecoderEmitter: operand \"" + OpName +
2141	"\" uses MIOperandInfo with multiple ops, but doesn't "
2142	"have a custom decoder!");
2143	debugDumpRecord(Rec: EncodingDef);
2144	continue;
2145	}
2146	}
2147
2148	addOneOperandFields(EncodingDef, Bits, TiedNames, OpName, OpInfo);
2149	// FIXME: it should be an error not to find a definition for a given
2150	// operand, rather than just failing to add it to the resulting
2151	// instruction! (This is a longstanding bug, which will be addressed in an
2152	// upcoming change.)
2153	if (OpInfo.numFields() > `0`)
2154	InsnOperands.push_back(x: OpInfo);
2155	}
2156	}
2157	Operands [Opc] = InsnOperands;
2158
2159	#if 0
2160	LLVM_DEBUG({
2161	// Dumps the instruction encoding bits.
2162	dumpBits(errs(), Bits);
2163
2164	errs() << `'\n'`;
2165
2166	// Dumps the list of operand info.
2167	for (unsigned i = `0`, e = CGI.Operands.size(); i != e; ++i) {
2168	const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
2169	const std::string &OperandName = Info.Name;
2170	const Record &OperandDef = *Info.Rec;
2171
2172	errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
2173	}
2174	});
2175	#endif
2176
2177	return Bits.getNumBits();
2178	}
2179
2180	// emitFieldFromInstruction - Emit the templated helper function
2181	// fieldFromInstruction().
2182	// On Windows we make sure that this function is not inlined when
2183	// using the VS compiler. It has a bug which causes the function
2184	// to be optimized out in some circumstances. See llvm.org/pr38292
2185	static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
2186	OS << R"(
2187	// Helper functions for extracting fields from encoded instructions.
2188	// InsnType must either be integral or an APInt-like object that must:
2189	// * be default-constructible and copy-constructible
2190	// * be constructible from an APInt (this can be private)
2191	// * Support insertBits(bits, startBit, numBits)
2192	// * Support extractBitsAsZExtValue(numBits, startBit)
2193	// * Support the ~, &, ==, and != operators with other objects of the same type
2194	// * Support the != and bitwise & with uint64_t
2195	// * Support put (<<) to raw_ostream&
2196	template <typename InsnType>
2197	#if defined(_MSC_VER) && !defined(__clang__)
2198	__declspec(noinline)
2199	#endif
2200	static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
2201	fieldFromInstruction(const InsnType &insn, unsigned startBit,
2202	unsigned numBits) {
2203	assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
2204	assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
2205	"Instruction field out of bounds!");
2206	InsnType fieldMask;
2207	if (numBits == sizeof(InsnType) * 8)
2208	fieldMask = (InsnType)(-1LL);
2209	else
2210	fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
2211	return (insn & fieldMask) >> startBit;
2212	}
2213
2214	template <typename InsnType>
2215	static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
2216	fieldFromInstruction(const InsnType &insn, unsigned startBit,
2217	unsigned numBits) {
2218	return insn.extractBitsAsZExtValue(numBits, startBit);
2219	}
2220	)";
2221	}
2222
2223	// emitInsertBits - Emit the templated helper function insertBits().
2224	static void emitInsertBits(formatted_raw_ostream &OS) {
2225	OS << R"(
2226	// Helper function for inserting bits extracted from an encoded instruction into
2227	// a field.
2228	template <typename InsnType>
2229	static std::enable_if_t<std::is_integral<InsnType>::value>
2230	insertBits(InsnType &field, InsnType bits, unsigned startBit, unsigned numBits) {
2231	assert(startBit + numBits <= sizeof field * 8);
2232	field \|= (InsnType)bits << startBit;
2233	}
2234
2235	template <typename InsnType>
2236	static std::enable_if_t<!std::is_integral<InsnType>::value>
2237	insertBits(InsnType &field, uint64_t bits, unsigned startBit, unsigned numBits) {
2238	field.insertBits(bits, startBit, numBits);
2239	}
2240	)";
2241	}
2242
2243	// emitDecodeInstruction - Emit the templated helper function
2244	// decodeInstruction().
2245	static void emitDecodeInstruction(formatted_raw_ostream &OS,
2246	bool IsVarLenInst) {
2247	OS << R"(
2248	template <typename InsnType>
2249	static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
2250	InsnType insn, uint64_t Address,
2251	const MCDisassembler *DisAsm,
2252	const MCSubtargetInfo &STI)";
2253	if (IsVarLenInst) {
2254	OS << ",\n "
2255	"llvm::function_ref<void(APInt &, uint64_t)> makeUp";
2256	}
2257	OS << R"() {
2258	const FeatureBitset &Bits = STI.getFeatureBits();
2259
2260	const uint8_t *Ptr = DecodeTable;
2261	uint64_t CurFieldValue = 0;
2262	DecodeStatus S = MCDisassembler::Success;
2263	while (true) {
2264	ptrdiff_t Loc = Ptr - DecodeTable;
2265	switch (*Ptr) {
2266	default:
2267	errs() << Loc << ": Unexpected decode table opcode!\n";
2268	return MCDisassembler::Fail;
2269	case MCD::OPC_ExtractField: {
2270	// Decode the start value.
2271	unsigned DecodedLen;
2272	unsigned Start = decodeULEB128(++Ptr, &DecodedLen);
2273	Ptr += DecodedLen;
2274	unsigned Len = *Ptr++;)";
2275	if (IsVarLenInst)
2276	OS << "\n makeUp(insn, Start + Len);";
2277	OS << R"(
2278	CurFieldValue = fieldFromInstruction(insn, Start, Len);
2279	LLVM_DEBUG(dbgs() << Loc << ": OPC_ExtractField(" << Start << ", "
2280	<< Len << "): " << CurFieldValue << "\n");
2281	break;
2282	}
2283	case MCD::OPC_FilterValue: {
2284	// Decode the field value.
2285	unsigned Len;
2286	uint64_t Val = decodeULEB128(++Ptr, &Len);
2287	Ptr += Len;
2288	// NumToSkip is a plain 24-bit integer.
2289	unsigned NumToSkip = *Ptr++;
2290	NumToSkip \|= (*Ptr++) << 8;
2291	NumToSkip \|= (*Ptr++) << 16;
2292
2293	// Perform the filter operation.
2294	if (Val != CurFieldValue)
2295	Ptr += NumToSkip;
2296	LLVM_DEBUG(dbgs() << Loc << ": OPC_FilterValue(" << Val << ", " << NumToSkip
2297	<< "): " << ((Val != CurFieldValue) ? "FAIL:" : "PASS:")
2298	<< " continuing at " << (Ptr - DecodeTable) << "\n");
2299
2300	break;
2301	}
2302	case MCD::OPC_CheckField: {
2303	// Decode the start value.
2304	unsigned Start = decodeULEB128AndInc(++Ptr);
2305	unsigned Len = *Ptr;)";
2306	if (IsVarLenInst)
2307	OS << "\n makeUp(insn, Start + Len);";
2308	OS << R"(
2309	uint64_t FieldValue = fieldFromInstruction(insn, Start, Len);
2310	// Decode the field value.
2311	unsigned PtrLen = 0;
2312	uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
2313	Ptr += PtrLen;
2314	// NumToSkip is a plain 24-bit integer.
2315	unsigned NumToSkip = *Ptr++;
2316	NumToSkip \|= (*Ptr++) << 8;
2317	NumToSkip \|= (*Ptr++) << 16;
2318
2319	// If the actual and expected values don't match, skip.
2320	if (ExpectedValue != FieldValue)
2321	Ptr += NumToSkip;
2322	LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckField(" << Start << ", "
2323	<< Len << ", " << ExpectedValue << ", " << NumToSkip
2324	<< "): FieldValue = " << FieldValue << ", ExpectedValue = "
2325	<< ExpectedValue << ": "
2326	<< ((ExpectedValue == FieldValue) ? "PASS\n" : "FAIL\n"));
2327	break;
2328	}
2329	case MCD::OPC_CheckPredicate: {
2330	// Decode the Predicate Index value.
2331	unsigned PIdx = decodeULEB128AndInc(++Ptr);
2332	// NumToSkip is a plain 24-bit integer.
2333	unsigned NumToSkip = *Ptr++;
2334	NumToSkip \|= (*Ptr++) << 8;
2335	NumToSkip \|= (*Ptr++) << 16;
2336	// Check the predicate.
2337	bool Pred;
2338	if (!(Pred = checkDecoderPredicate(PIdx, Bits)))
2339	Ptr += NumToSkip;
2340	(void)Pred;
2341	LLVM_DEBUG(dbgs() << Loc << ": OPC_CheckPredicate(" << PIdx << "): "
2342	<< (Pred ? "PASS\n" : "FAIL\n"));
2343
2344	break;
2345	}
2346	case MCD::OPC_Decode: {
2347	// Decode the Opcode value.
2348	unsigned Opc = decodeULEB128AndInc(++Ptr);
2349	unsigned DecodeIdx = decodeULEB128AndInc(Ptr);
2350
2351	MI.clear();
2352	MI.setOpcode(Opc);
2353	bool DecodeComplete;)";
2354	if (IsVarLenInst) {
2355	OS << "\n unsigned Len = InstrLenTable[Opc];\n"
2356	<< " makeUp(insn, Len);";
2357	}
2358	OS << R"(
2359	S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);
2360	assert(DecodeComplete);
2361
2362	LLVM_DEBUG(dbgs() << Loc << ": OPC_Decode: opcode " << Opc
2363	<< ", using decoder " << DecodeIdx << ": "
2364	<< (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2365	return S;
2366	}
2367	case MCD::OPC_TryDecode: {
2368	// Decode the Opcode value.
2369	unsigned Opc = decodeULEB128AndInc(++Ptr);
2370	unsigned DecodeIdx = decodeULEB128AndInc(Ptr);
2371	// NumToSkip is a plain 24-bit integer.
2372	unsigned NumToSkip = *Ptr++;
2373	NumToSkip \|= (*Ptr++) << 8;
2374	NumToSkip \|= (*Ptr++) << 16;
2375
2376	// Perform the decode operation.
2377	MCInst TmpMI;
2378	TmpMI.setOpcode(Opc);
2379	bool DecodeComplete;
2380	S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);
2381	LLVM_DEBUG(dbgs() << Loc << ": OPC_TryDecode: opcode " << Opc
2382	<< ", using decoder " << DecodeIdx << ": ");
2383
2384	if (DecodeComplete) {
2385	// Decoding complete.
2386	LLVM_DEBUG(dbgs() << (S != MCDisassembler::Fail ? "PASS" : "FAIL") << "\n");
2387	MI = TmpMI;
2388	return S;
2389	} else {
2390	assert(S == MCDisassembler::Fail);
2391	// If the decoding was incomplete, skip.
2392	Ptr += NumToSkip;
2393	LLVM_DEBUG(dbgs() << "FAIL: continuing at " << (Ptr - DecodeTable) << "\n");
2394	// Reset decode status. This also drops a SoftFail status that could be
2395	// set before the decode attempt.
2396	S = MCDisassembler::Success;
2397	}
2398	break;
2399	}
2400	case MCD::OPC_SoftFail: {
2401	// Decode the mask values.
2402	uint64_t PositiveMask = decodeULEB128AndInc(++Ptr);
2403	uint64_t NegativeMask = decodeULEB128AndInc(Ptr);
2404	bool Fail = (insn & PositiveMask) != 0 \|\| (~insn & NegativeMask) != 0;
2405	if (Fail)
2406	S = MCDisassembler::SoftFail;
2407	LLVM_DEBUG(dbgs() << Loc << ": OPC_SoftFail: " << (Fail ? "FAIL\n" : "PASS\n"));
2408	break;
2409	}
2410	case MCD::OPC_Fail: {
2411	LLVM_DEBUG(dbgs() << Loc << ": OPC_Fail\n");
2412	return MCDisassembler::Fail;
2413	}
2414	}
2415	}
2416	llvm_unreachable("bogosity detected in disassembler state machine!");
2417	}
2418
2419	)";
2420	}
2421
2422	// Helper to propagate SoftFail status. Returns false if the status is Fail;
2423	// callers are expected to early-exit in that condition. (Note, the '&' operator
2424	// is correct to propagate the values of this enum; see comment on 'enum
2425	// DecodeStatus'.)
2426	static void emitCheck(formatted_raw_ostream &OS) {
2427	OS << R"(
2428	static bool Check(DecodeStatus &Out, DecodeStatus In) {
2429	Out = static_cast<DecodeStatus>(Out & In);
2430	return Out != MCDisassembler::Fail;
2431	}
2432
2433	)";
2434	}
2435
2436	// Collect all HwModes referenced by the target for encoding purposes,
2437	// returning a vector of corresponding names.
2438	static void collectHwModesReferencedForEncodings(
2439	const CodeGenHwModes &HWM, std::vector<StringRef> &Names,
2440	NamespacesHwModesMap &NamespacesWithHwModes) {
2441	SmallBitVector BV(HWM.getNumModeIds());
2442	for (const auto &MS : HWM.getHwModeSelects()) {
2443	for (const HwModeSelect::PairType &P : MS.second.Items) {
2444	if (P.second->isSubClassOf(Name: "InstructionEncoding")) {
2445	std::string DecoderNamespace =
2446	std::string (P.second->getValueAsString(FieldName: "DecoderNamespace"));
2447	if (P.first == DefaultMode) {
2448	NamespacesWithHwModes [DecoderNamespace].insert(x: "");
2449	} else {
2450	NamespacesWithHwModes [DecoderNamespace].insert(
2451	x: HWM.getMode(Id: P.first).Name);
2452	}
2453	BV.set(P.first);
2454	}
2455	}
2456	}
2457	transform(Range: BV.set_bits(), d_first: std::back_inserter(x&: Names), F: [&HWM](const int &M) {
2458	if (M == DefaultMode)
2459	return StringRef ("");
2460	return HWM.getModeName(Id: M, /IncludeDefault=/true);
2461	});
2462	}
2463
2464	static void
2465	handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
2466	const std::vector<StringRef> &HwModeNames,
2467	NamespacesHwModesMap &NamespacesWithHwModes,
2468	std::vector<EncodingAndInst> &GlobalEncodings) {
2469	const Record *InstDef = Instr->TheDef;
2470
2471	switch (DecoderEmitterSuppressDuplicates) {
2472	case SUPPRESSION_DISABLE: {
2473	for (StringRef HwModeName : HwModeNames)
2474	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2475	break;
2476	}
2477	case SUPPRESSION_LEVEL1: {
2478	std::string DecoderNamespace =
2479	std::string (InstDef->getValueAsString(FieldName: "DecoderNamespace"));
2480	auto It = NamespacesWithHwModes.find(x: DecoderNamespace);
2481	if (It != NamespacesWithHwModes.end()) {
2482	for (StringRef HwModeName : It ->second)
2483	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args&: HwModeName);
2484	} else {
2485	// Only emit the encoding once, as it's DecoderNamespace doesn't
2486	// contain any HwModes.
2487	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2488	}
2489	break;
2490	}
2491	case SUPPRESSION_LEVEL2:
2492	GlobalEncodings.emplace_back(args&: InstDef, args&: Instr, args: "");
2493	break;
2494	}
2495	}
2496
2497	// Emits disassembler code for instruction decoding.
2498	void DecoderEmitter::run(raw_ostream &o) {
2499	formatted_raw_ostream OS(o);
2500	OS << R"(
2501	#include "llvm/MC/MCInst.h"
2502	#include "llvm/MC/MCSubtargetInfo.h"
2503	#include "llvm/Support/DataTypes.h"
2504	#include "llvm/Support/Debug.h"
2505	#include "llvm/Support/LEB128.h"
2506	#include "llvm/Support/raw_ostream.h"
2507	#include "llvm/TargetParser/SubtargetFeature.h"
2508	#include <assert.h>
2509
2510	namespace llvm {
2511	)";
2512
2513	emitFieldFromInstruction(OS);
2514	emitInsertBits(OS);
2515	emitCheck(OS);
2516
2517	Target.reverseBitsForLittleEndianEncoding();
2518
2519	// Parameterize the decoders based on namespace and instruction width.
2520
2521	// First, collect all encoding-related HwModes referenced by the target.
2522	// And establish a mapping table between DecoderNamespace and HwMode.
2523	// If HwModeNames is empty, add the empty string so we always have one HwMode.
2524	const CodeGenHwModes &HWM = Target.getHwModes();
2525	std::vector<StringRef> HwModeNames;
2526	NamespacesHwModesMap NamespacesWithHwModes;
2527	collectHwModesReferencedForEncodings(HWM, Names&: HwModeNames, NamespacesWithHwModes);
2528	if (HwModeNames.empty())
2529	HwModeNames.push_back(x: "");
2530
2531	const auto &NumberedInstructions = Target.getInstructionsByEnumValue();
2532	NumberedEncodings.reserve(n: NumberedInstructions.size());
2533	for (const auto &NumberedInstruction : NumberedInstructions) {
2534	const Record *InstDef = NumberedInstruction->TheDef;
2535	if (const RecordVal *RV = InstDef->getValue(Name: "EncodingInfos")) {
2536	if (DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
2537	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
2538	for (auto &[ModeId, Encoding] : EBM) {
2539	// DecoderTables with DefaultMode should not have any suffix.
2540	if (ModeId == DefaultMode) {
2541	NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction, args: "");
2542	} else {
2543	NumberedEncodings.emplace_back(args&: Encoding, args: NumberedInstruction,
2544	args: HWM.getMode(Id: ModeId).Name);
2545	}
2546	}
2547	continue;
2548	}
2549	}
2550	// This instruction is encoded the same on all HwModes.
2551	// According to user needs, provide varying degrees of suppression.
2552	handleHwModesUnrelatedEncodings(Instr: NumberedInstruction, HwModeNames,
2553	NamespacesWithHwModes, GlobalEncodings&: NumberedEncodings);
2554	}
2555	for (const auto &NumberedAlias :
2556	RK.getAllDerivedDefinitions(ClassName: "AdditionalEncoding"))
2557	NumberedEncodings.emplace_back(
2558	args: NumberedAlias,
2559	args: &Target.getInstruction(InstRec: NumberedAlias->getValueAsDef(FieldName: "AliasOf")));
2560
2561	std::map<std::pair<std::string, unsigned>, std::vector<EncodingIDAndOpcode>>
2562	OpcMap;
2563	std::map<unsigned, std::vector<OperandInfo>> Operands;
2564	std::vector<unsigned> InstrLen;
2565	bool IsVarLenInst = Target.hasVariableLengthEncodings();
2566	unsigned MaxInstLen = `0`;
2567
2568	for (const auto &[NEI, NumberedEncoding] : enumerate(First&: NumberedEncodings)) {
2569	const Record *EncodingDef = NumberedEncoding.EncodingDef;
2570	const CodeGenInstruction *Inst = NumberedEncoding.Inst;
2571	const Record *Def = Inst->TheDef;
2572	unsigned Size = EncodingDef->getValueAsInt(FieldName: "Size");
2573	if (Def->getValueAsString(FieldName: "Namespace") == "TargetOpcode" \|\|
2574	Def->getValueAsBit(FieldName: "isPseudo") \|\|
2575	Def->getValueAsBit(FieldName: "isAsmParserOnly") \|\|
2576	Def->getValueAsBit(FieldName: "isCodeGenOnly")) {
2577	NumEncodingsLackingDisasm ++;
2578	continue;
2579	}
2580
2581	if (NEI < NumberedInstructions.size())
2582	NumInstructions ++;
2583	NumEncodings ++;
2584
2585	if (!Size && !IsVarLenInst)
2586	continue;
2587
2588	if (IsVarLenInst)
2589	InstrLen.resize(new_size: NumberedInstructions.size(), x: `0`);
2590
2591	if (unsigned Len = populateInstruction(Target, EncodingDef: EncodingDef, CGI: Inst, Opc: NEI,
2592	Operands, IsVarLenInst)) {
2593	if (IsVarLenInst) {
2594	MaxInstLen = std::max(a: MaxInstLen, b: Len);
2595	InstrLen [NEI] = Len;
2596	}
2597	std::string DecoderNamespace =
2598	std::string (EncodingDef->getValueAsString(FieldName: "DecoderNamespace"));
2599	if (!NumberedEncoding.HwModeName.empty())
2600	DecoderNamespace +=
2601	std::string ("_") + NumberedEncoding.HwModeName.str();
2602	OpcMap [std::pair (DecoderNamespace, Size)].emplace_back(
2603	args&: NEI, args: Target.getInstrIntValue(R: Def));
2604	} else {
2605	NumEncodingsOmitted ++;
2606	}
2607	}
2608
2609	DecoderTableInfo TableInfo;
2610	for (const auto &Opc : OpcMap) {
2611	// Emit the decoder for this namespace+width combination.
2612	ArrayRef<EncodingAndInst> NumberedEncodingsRef(NumberedEncodings.data(),
2613	NumberedEncodings.size());
2614	FilterChooser FC(NumberedEncodingsRef, Opc.second, Operands,
2615	IsVarLenInst ? MaxInstLen : `8` * Opc.first.second, this);
2616
2617	// The decode table is cleared for each top level decoder function. The
2618	// predicates and decoders themselves, however, are shared across all
2619	// decoders to give more opportunities for uniqueing.
2620	TableInfo.Table.clear();
2621	TableInfo.FixupStack.clear();
2622	TableInfo.Table.reserve(n: `16384`);
2623	TableInfo.FixupStack.emplace_back();
2624	FC.emitTableEntries(TableInfo);
2625	// Any NumToSkip fixups in the top level scope can resolve to the
2626	// OPC_Fail at the end of the table.
2627	assert(TableInfo.FixupStack.size() == `1` && "fixup stack phasing error!");
2628	// Resolve any NumToSkip fixups in the current scope.
2629	resolveTableFixups(Table&: TableInfo.Table, Fixups: TableInfo.FixupStack.back(),
2630	DestIdx: TableInfo.Table.size());
2631	TableInfo.FixupStack.clear();
2632
2633	TableInfo.Table.push_back(x: MCD::OPC_Fail);
2634
2635	// Print the table to the output stream.
2636	emitTable(OS, Table&: TableInfo.Table, Indentation: `0`, BitWidth: FC.getBitWidth(), Namespace: Opc.first.first,
2637	EncodingIDs: Opc.second);
2638	}
2639
2640	// For variable instruction, we emit a instruction length table
2641	// to let the decoder know how long the instructions are.
2642	// You can see example usage in M68k's disassembler.
2643	if (IsVarLenInst)
2644	emitInstrLenTable(OS, InstrLen);
2645	// Emit the predicate function.
2646	emitPredicateFunction(OS, Predicates&: TableInfo.Predicates, Indentation: `0`);
2647
2648	// Emit the decoder function.
2649	emitDecoderFunction(OS, Decoders&: TableInfo.Decoders, Indentation: `0`);
2650
2651	// Emit the main entry point for the decoder, decodeInstruction().
2652	emitDecodeInstruction(OS, IsVarLenInst);
2653
2654	OS << "\n} // end namespace llvm\n";
2655	}
2656
2657	namespace llvm {
2658
2659	void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
2660	const std::string &PredicateNamespace) {
2661	DecoderEmitter (RK, PredicateNamespace).run(o&: OS);
2662	}
2663
2664	} // end namespace llvm
2665

source code of llvm/utils/TableGen/DecoderEmitter.cpp