CodeEmitterGen.cpp source code [llvm/utils/TableGen/CodeEmitterGen.cpp]

1	//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// CodeEmitterGen uses the descriptions of instructions and their fields to
10	// construct an automated code emitter: a function called
11	// getBinaryCodeForInstr() that, given a MCInst, returns the value of the
12	// instruction - either as an uint64_t or as an APInt, depending on the
13	// maximum bit width of all Inst definitions.
14	//
15	// In addition, it generates another function called getOperandBitOffset()
16	// that, given a MCInst and an operand index, returns the minimum of indices of
17	// all bits that carry some portion of the respective operand. When the target's
18	// encodeInstruction() stores the instruction in a little-endian byte order, the
19	// returned value is the offset of the start of the operand in the encoded
20	// instruction. Other targets might need to adjust the returned value according
21	// to their encodeInstruction() implementation.
22	//
23	//===----------------------------------------------------------------------===//
24
25	#include "Common/CodeGenHwModes.h"
26	#include "Common/CodeGenInstruction.h"
27	#include "Common/CodeGenTarget.h"
28	#include "Common/InfoByHwMode.h"
29	#include "Common/VarLenCodeEmitterGen.h"
30	#include "llvm/ADT/APInt.h"
31	#include "llvm/ADT/ArrayRef.h"
32	#include "llvm/ADT/StringExtras.h"
33	#include "llvm/Support/Casting.h"
34	#include "llvm/Support/raw_ostream.h"
35	#include "llvm/TableGen/Error.h"
36	#include "llvm/TableGen/Record.h"
37	#include "llvm/TableGen/TableGenBackend.h"
38	#include <cstdint>
39	#include <map>
40	#include <set>
41	#include <string>
42	#include <utility>
43	#include <vector>
44
45	using namespace llvm;
46
47	namespace {
48
49	class CodeEmitterGen {
50	RecordKeeper &Records;
51
52	public:
53	CodeEmitterGen(RecordKeeper &R) : Records(R) {}
54
55	void run(raw_ostream &o);
56
57	private:
58	int getVariableBit(const std::string &VarName, BitsInit BI, int* bit);
59	std::pair<std::string, std::string>
60	getInstructionCases(Record *R, CodeGenTarget &Target);
61	void addInstructionCasesForEncoding(Record R, Record EncodingDef,
62	CodeGenTarget &Target, std::string &Case,
63	std::string &BitOffsetCase);
64	bool addCodeToMergeInOperand(Record R, BitsInit BI,
65	const std::string &VarName, std::string &Case,
66	std::string &BitOffsetCase,
67	CodeGenTarget &Target);
68
69	void emitInstructionBaseValues(
70	raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
71	CodeGenTarget &Target, int HwMode = -`1`);
72	void
73	emitCaseMap(raw_ostream &o,
74	const std::map<std::string, std::vector<std::string>> &CaseMap);
75	unsigned BitWidth = `0u`;
76	bool UseAPInt = false;
77	};
78
79	// If the VarBitInit at position 'bit' matches the specified variable then
80	// return the variable bit position. Otherwise return -1.
81	int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI,
82	int bit) {
83	if (VarBitInit *VBI = dyn_cast<VarBitInit>(Val: BI->getBit(Bit: bit))) {
84	if (VarInit *VI = dyn_cast<VarInit>(Val: VBI->getBitVar()))
85	if (VI->getName() == VarName)
86	return VBI->getBitNum();
87	} else if (VarInit *VI = dyn_cast<VarInit>(Val: BI->getBit(Bit: bit))) {
88	if (VI->getName() == VarName)
89	return `0`;
90	}
91
92	return -`1`;
93	}
94
95	// Returns true if it succeeds, false if an error.
96	bool CodeEmitterGen::addCodeToMergeInOperand(Record R, BitsInit BI,
97	const std::string &VarName,
98	std::string &Case,
99	std::string &BitOffsetCase,
100	CodeGenTarget &Target) {
101	CodeGenInstruction &CGI = Target.getInstruction(InstRec: R);
102
103	// Determine if VarName actually contributes to the Inst encoding.
104	int bit = BI->getNumBits() - `1`;
105
106	// Scan for a bit that this contributed to.
107	for (; bit >= `0`;) {
108	if (getVariableBit(VarName, BI, bit) != -`1`)
109	break;
110
111	--bit;
112	}
113
114	// If we found no bits, ignore this value, otherwise emit the call to get the
115	// operand encoding.
116	if (bit < `0`)
117	return true;
118
119	// If the operand matches by name, reference according to that
120	// operand number. Non-matching operands are assumed to be in
121	// order.
122	unsigned OpIdx;
123	std::pair<unsigned, unsigned> SubOp;
124	if (CGI.Operands.hasSubOperandAlias(Name: VarName, SubOp)) {
125	OpIdx = CGI.Operands [SubOp.first].MIOperandNo + SubOp.second;
126	} else if (CGI.Operands.hasOperandNamed(Name: VarName, OpIdx)) {
127	// Get the machine operand number for the indicated operand.
128	OpIdx = CGI.Operands [OpIdx].MIOperandNo;
129	} else {
130	PrintError(Rec: R, Msg: Twine ("No operand named ") + VarName + " in record " +
131	R->getName());
132	return false;
133	}
134
135	if (CGI.Operands.isFlatOperandNotEmitted(FlatOpNo: OpIdx)) {
136	PrintError(Rec: R,
137	Msg: "Operand " + VarName + " used but also marked as not emitted!");
138	return false;
139	}
140
141	std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(Op: OpIdx);
142	std::string &EncoderMethodName =
143	CGI.Operands [SO.first].EncoderMethodNames [SO.second];
144
145	if (UseAPInt)
146	Case += " op.clearAllBits();\n";
147
148	Case += " // op: " + VarName + "\n";
149
150	// If the source operand has a custom encoder, use it.
151	if (!EncoderMethodName.empty()) {
152	if (UseAPInt) {
153	Case += " " + EncoderMethodName + "(MI, " + utostr(X: OpIdx);
154	Case += ", op";
155	} else {
156	Case += " op = " + EncoderMethodName + "(MI, " + utostr(X: OpIdx);
157	}
158	Case += ", Fixups, STI);\n";
159	} else {
160	if (UseAPInt) {
161	Case +=
162	" getMachineOpValue(MI, MI.getOperand(" + utostr(X: OpIdx) + ")";
163	Case += ", op, Fixups, STI";
164	} else {
165	Case += " op = getMachineOpValue(MI, MI.getOperand(" +
166	utostr(X: OpIdx) + ")";
167	Case += ", Fixups, STI";
168	}
169	Case += ");\n";
170	}
171
172	// Precalculate the number of lits this variable contributes to in the
173	// operand. If there is a single lit (consecutive range of bits) we can use a
174	// destructive sequence on APInt that reduces memory allocations.
175	int numOperandLits = `0`;
176	for (int tmpBit = bit; tmpBit >= `0`;) {
177	int varBit = getVariableBit(VarName, BI, bit: tmpBit);
178
179	// If this bit isn't from a variable, skip it.
180	if (varBit == -`1`) {
181	--tmpBit;
182	continue;
183	}
184
185	// Figure out the consecutive range of bits covered by this operand, in
186	// order to generate better encoding code.
187	int beginVarBit = varBit;
188	int N = `1`;
189	for (--tmpBit; tmpBit >= `0`;) {
190	varBit = getVariableBit(VarName, BI, bit: tmpBit);
191	if (varBit == -`1` \|\| varBit != (beginVarBit - N))
192	break;
193	++N;
194	--tmpBit;
195	}
196	++numOperandLits;
197	}
198
199	unsigned BitOffset = -`1`;
200	for (; bit >= `0`;) {
201	int varBit = getVariableBit(VarName, BI, bit);
202
203	// If this bit isn't from a variable, skip it.
204	if (varBit == -`1`) {
205	--bit;
206	continue;
207	}
208
209	// Figure out the consecutive range of bits covered by this operand, in
210	// order to generate better encoding code.
211	int beginInstBit = bit;
212	int beginVarBit = varBit;
213	int N = `1`;
214	for (--bit; bit >= `0`;) {
215	varBit = getVariableBit(VarName, BI, bit);
216	if (varBit == -`1` \|\| varBit != (beginVarBit - N))
217	break;
218	++N;
219	--bit;
220	}
221
222	std::string maskStr;
223	int opShift;
224
225	unsigned loBit = beginVarBit - N + `1`;
226	unsigned hiBit = loBit + N;
227	unsigned loInstBit = beginInstBit - N + `1`;
228	BitOffset = loInstBit;
229	if (UseAPInt) {
230	std::string extractStr;
231	if (N >= `64`) {
232	extractStr = "op.extractBits(" + itostr(X: hiBit - loBit) + ", " +
233	itostr(X: loBit) + ")";
234	Case += " Value.insertBits(" + extractStr + ", " +
235	itostr(X: loInstBit) + ");\n";
236	} else {
237	extractStr = "op.extractBitsAsZExtValue(" + itostr(X: hiBit - loBit) +
238	", " + itostr(X: loBit) + ")";
239	Case += " Value.insertBits(" + extractStr + ", " +
240	itostr(X: loInstBit) + ", " + itostr(X: hiBit - loBit) + ");\n";
241	}
242	} else {
243	uint64_t opMask = ~(uint64_t)`0` >> (`64` - N);
244	opShift = beginVarBit - N + `1`;
245	opMask <<= opShift;
246	maskStr = "UINT64_C(" + utostr(X: opMask) + ")";
247	opShift = beginInstBit - beginVarBit;
248
249	if (numOperandLits == `1`) {
250	Case += " op &= " + maskStr + ";\n";
251	if (opShift > `0`) {
252	Case += " op <<= " + itostr(X: opShift) + ";\n";
253	} else if (opShift < `0`) {
254	Case += " op >>= " + itostr(X: -opShift) + ";\n";
255	}
256	Case += " Value \|= op;\n";
257	} else {
258	if (opShift > `0`) {
259	Case += " Value \|= (op & " + maskStr + ") << " +
260	itostr(X: opShift) + ";\n";
261	} else if (opShift < `0`) {
262	Case += " Value \|= (op & " + maskStr + ") >> " +
263	itostr(X: -opShift) + ";\n";
264	} else {
265	Case += " Value \|= (op & " + maskStr + ");\n";
266	}
267	}
268	}
269	}
270
271	if (BitOffset != (unsigned)-`1`) {
272	BitOffsetCase += " case " + utostr(X: OpIdx) + ":\n";
273	BitOffsetCase += " // op: " + VarName + "\n";
274	BitOffsetCase += " return " + utostr(X: BitOffset) + ";\n";
275	}
276
277	return true;
278	}
279
280	std::pair<std::string, std::string>
281	CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
282	std::string Case, BitOffsetCase;
283
284	auto append = [&](const char *S) {
285	Case += S;
286	BitOffsetCase += S;
287	};
288
289	if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
290	if (auto *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
291	const CodeGenHwModes &HWM = Target.getHwModes();
292	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
293	append (" switch (HwMode) {\n");
294	append (" default: llvm_unreachable(\"Unhandled HwMode\");\n");
295	for (auto &KV : EBM) {
296	append ((" case " + itostr(X: KV.first) + ": {\n").c_str());
297	addInstructionCasesForEncoding(R, EncodingDef: KV.second, Target, Case,
298	BitOffsetCase);
299	append (" break;\n");
300	append (" }\n");
301	}
302	append (" }\n");
303	return std::pair(std::move(Case), std::move(BitOffsetCase));
304	}
305	}
306	addInstructionCasesForEncoding(R, EncodingDef: R, Target, Case, BitOffsetCase);
307	return std::pair(std::move(Case), std::move(BitOffsetCase));
308	}
309
310	void CodeEmitterGen::addInstructionCasesForEncoding(
311	Record R, Record EncodingDef, CodeGenTarget &Target, std::string &Case,
312	std::string &BitOffsetCase) {
313	BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
314
315	// Loop over all of the fields in the instruction, determining which are the
316	// operands to the instruction.
317	bool Success = true;
318	size_t OrigBitOffsetCaseSize = BitOffsetCase.size();
319	BitOffsetCase += " switch (OpNum) {\n";
320	size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size();
321	for (const RecordVal &RV : EncodingDef->getValues()) {
322	// Ignore fixed fields in the record, we're looking for values like:
323	// bits<5> RST = { ?, ?, ?, ?, ? };
324	if (RV.isNonconcreteOK() \|\| RV.getValue()->isComplete())
325	continue;
326
327	Success &= addCodeToMergeInOperand(R, BI, VarName: std::string (RV.getName()), Case,
328	BitOffsetCase, Target);
329	}
330	// Avoid empty switches.
331	if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop)
332	BitOffsetCase.resize(n: OrigBitOffsetCaseSize);
333	else
334	BitOffsetCase += " }\n";
335
336	if (!Success) {
337	// Dump the record, so we can see what's going on...
338	std::string E;
339	raw_string_ostream S(E);
340	S << "Dumping record for previous error:\n";
341	S << *R;
342	PrintNote(Msg: E);
343	}
344
345	StringRef PostEmitter = R->getValueAsString(FieldName: "PostEncoderMethod");
346	if (!PostEmitter.empty()) {
347	Case += " Value = ";
348	Case += PostEmitter;
349	Case += "(MI, Value";
350	Case += ", STI";
351	Case += ");\n";
352	}
353	}
354
355	static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
356	for (unsigned I = `0`; I < Bits.getNumWords(); ++I)
357	OS << ((I > `0`) ? ", " : "") << "UINT64_C(" << utostr(X: Bits.getRawData()[I])
358	<< ")";
359	}
360
361	void CodeEmitterGen::emitInstructionBaseValues(
362	raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
363	CodeGenTarget &Target, int HwMode) {
364	const CodeGenHwModes &HWM = Target.getHwModes();
365	if (HwMode == -`1`)
366	o << " static const uint64_t InstBits[] = {\n";
367	else
368	o << " static const uint64_t InstBits_"
369	<< HWM.getModeName(Id: HwMode, /IncludeDefault=/true) << "[] = {\n";
370
371	for (const CodeGenInstruction *CGI : NumberedInstructions) {
372	Record *R = CGI->TheDef;
373
374	if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" \|\|
375	R->getValueAsBit(FieldName: "isPseudo")) {
376	o << " ";
377	emitInstBits(OS&: o, Bits: APInt (BitWidth, `0`));
378	o << ",\n";
379	continue;
380	}
381
382	Record *EncodingDef = R;
383	if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
384	if (auto *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
385	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
386	if (EBM.hasMode(M: HwMode))
387	EncodingDef = EBM.get(Mode: HwMode);
388	}
389	}
390	BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
391
392	// Start by filling in fixed values.
393	APInt Value(BitWidth, `0`);
394	for (unsigned i = `0`, e = BI->getNumBits(); i != e; ++i) {
395	if (auto *B = dyn_cast<BitInit>(Val: BI->getBit(Bit: i)); B && B->getValue())
396	Value.setBit(i);
397	}
398	o << " ";
399	emitInstBits(OS&: o, Bits: Value);
400	o << "," << `'\t'` << "// " << R->getName() << "\n";
401	}
402	o << " UINT64_C(0)\n };\n";
403	}
404
405	void CodeEmitterGen::emitCaseMap(
406	raw_ostream &o,
407	const std::map<std::string, std::vector<std::string>> &CaseMap) {
408	std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;
409	for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
410	const std::string &Case = IE ->first;
411	const std::vector<std::string> &InstList = IE ->second;
412
413	for (int i = `0`, N = InstList.size(); i < N; i++) {
414	if (i)
415	o << "\n";
416	o << " case " << InstList [i] << ":";
417	}
418	o << " {\n";
419	o << Case;
420	o << " break;\n"
421	<< " }\n";
422	}
423	}
424
425	void CodeEmitterGen::run(raw_ostream &o) {
426	emitSourceFileHeader(Desc: "Machine Code Emitter", OS&: o);
427
428	CodeGenTarget Target(Records);
429	std::vector<Record *> Insts = Records.getAllDerivedDefinitions(ClassName: "Instruction");
430
431	// For little-endian instruction bit encodings, reverse the bit order
432	Target.reverseBitsForLittleEndianEncoding();
433
434	ArrayRef<const CodeGenInstruction *> NumberedInstructions =
435	Target.getInstructionsByEnumValue();
436
437	if (Target.hasVariableLengthEncodings()) {
438	emitVarLenCodeEmitter(R&: Records, OS&: o);
439	} else {
440	const CodeGenHwModes &HWM = Target.getHwModes();
441	// The set of HwModes used by instruction encodings.
442	std::set<unsigned> HwModes;
443	BitWidth = `0`;
444	for (const CodeGenInstruction *CGI : NumberedInstructions) {
445	Record *R = CGI->TheDef;
446	if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" \|\|
447	R->getValueAsBit(FieldName: "isPseudo"))
448	continue;
449
450	if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
451	if (DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
452	EncodingInfoByHwMode EBM(DI->getDef(), HWM);
453	for (auto &KV : EBM) {
454	BitsInit *BI = KV.second->getValueAsBitsInit(FieldName: "Inst");
455	BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
456	HwModes.insert(x: KV.first);
457	}
458	continue;
459	}
460	}
461	BitsInit *BI = R->getValueAsBitsInit(FieldName: "Inst");
462	BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
463	}
464	UseAPInt = BitWidth > `64`;
465
466	// Emit function declaration
467	if (UseAPInt) {
468	o << "void " << Target.getName()
469	<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
470	<< " SmallVectorImpl<MCFixup> &Fixups,\n"
471	<< " APInt &Inst,\n"
472	<< " APInt &Scratch,\n"
473	<< " const MCSubtargetInfo &STI) const {\n";
474	} else {
475	o << "uint64_t " << Target.getName();
476	o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
477	<< " SmallVectorImpl<MCFixup> &Fixups,\n"
478	<< " const MCSubtargetInfo &STI) const {\n";
479	}
480
481	// Emit instruction base values
482	if (HwModes.empty()) {
483	emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode: -`1`);
484	} else {
485	for (unsigned HwMode : HwModes)
486	emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode: (int)HwMode);
487	}
488
489	if (!HwModes.empty()) {
490	o << " const uint64_t *InstBits;\n";
491	o << " unsigned HwMode = STI.getHwMode();\n";
492	o << " switch (HwMode) {\n";
493	o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
494	for (unsigned I : HwModes) {
495	o << " case " << I << ": InstBits = InstBits_"
496	<< HWM.getModeName(Id: I, /IncludeDefault=/true) << "; break;\n";
497	}
498	o << " };\n";
499	}
500
501	// Map to accumulate all the cases.
502	std::map<std::string, std::vector<std::string>> CaseMap;
503	std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
504
505	// Construct all cases statement for each opcode
506	for (Record *R : Insts) {
507	if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" \|\|
508	R->getValueAsBit(FieldName: "isPseudo"))
509	continue;
510	std::string InstName =
511	(R->getValueAsString(FieldName: "Namespace") + "::" + R->getName()).str();
512	std::string Case, BitOffsetCase;
513	std::tie(args&: Case, args&: BitOffsetCase) = getInstructionCases(R, Target);
514
515	CaseMap [Case].push_back(x: InstName);
516	BitOffsetCaseMap [BitOffsetCase].push_back(x: std::move(InstName));
517	}
518
519	// Emit initial function code
520	if (UseAPInt) {
521	int NumWords = APInt::getNumWords(BitWidth);
522	o << " const unsigned opcode = MI.getOpcode();\n"
523	<< " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
524	<< " Scratch = Scratch.zext(" << BitWidth << ");\n"
525	<< " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
526	<< NumWords << ", " << NumWords << "));\n"
527	<< " APInt &Value = Inst;\n"
528	<< " APInt &op = Scratch;\n"
529	<< " switch (opcode) {\n";
530	} else {
531	o << " const unsigned opcode = MI.getOpcode();\n"
532	<< " uint64_t Value = InstBits[opcode];\n"
533	<< " uint64_t op = 0;\n"
534	<< " (void)op; // suppress warning\n"
535	<< " switch (opcode) {\n";
536	}
537
538	// Emit each case statement
539	emitCaseMap(o, CaseMap);
540
541	// Default case: unhandled opcode
542	o << " default:\n"
543	<< " std::string msg;\n"
544	<< " raw_string_ostream Msg(msg);\n"
545	<< " Msg << \"Not supported instr: \" << MI;\n"
546	<< " report_fatal_error(Msg.str().c_str());\n"
547	<< " }\n";
548	if (UseAPInt)
549	o << " Inst = Value;\n";
550	else
551	o << " return Value;\n";
552	o << "}\n\n";
553
554	o << "#ifdef GET_OPERAND_BIT_OFFSET\n"
555	<< "#undef GET_OPERAND_BIT_OFFSET\n\n"
556	<< "uint32_t " << Target.getName()
557	<< "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
558	<< " unsigned OpNum,\n"
559	<< " const MCSubtargetInfo &STI) const {\n"
560	<< " switch (MI.getOpcode()) {\n";
561	emitCaseMap(o, CaseMap: BitOffsetCaseMap);
562	o << " }\n"
563	<< " std::string msg;\n"
564	<< " raw_string_ostream Msg(msg);\n"
565	<< " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
566	"<< \"]\";\n"
567	<< " report_fatal_error(Msg.str().c_str());\n"
568	<< "}\n\n"
569	<< "#endif // GET_OPERAND_BIT_OFFSET\n\n";
570	}
571	}
572
573	} // end anonymous namespace
574
575	static TableGen::Emitter::OptClass<CodeEmitterGen>
576	X("gen-emitter", "Generate machine code emitter");
577

source code of llvm/utils/TableGen/CodeEmitterGen.cpp