1//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// CodeEmitterGen uses the descriptions of instructions and their fields to
10// construct an automated code emitter: a function called
11// getBinaryCodeForInstr() that, given a MCInst, returns the value of the
12// instruction - either as an uint64_t or as an APInt, depending on the
13// maximum bit width of all Inst definitions.
14//
15// In addition, it generates another function called getOperandBitOffset()
16// that, given a MCInst and an operand index, returns the minimum of indices of
17// all bits that carry some portion of the respective operand. When the target's
18// encodeInstruction() stores the instruction in a little-endian byte order, the
19// returned value is the offset of the start of the operand in the encoded
20// instruction. Other targets might need to adjust the returned value according
21// to their encodeInstruction() implementation.
22//
23//===----------------------------------------------------------------------===//
24
25#include "Common/CodeGenHwModes.h"
26#include "Common/CodeGenInstruction.h"
27#include "Common/CodeGenTarget.h"
28#include "Common/InfoByHwMode.h"
29#include "Common/VarLenCodeEmitterGen.h"
30#include "llvm/ADT/APInt.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Support/Casting.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/TableGen/Error.h"
36#include "llvm/TableGen/Record.h"
37#include "llvm/TableGen/TableGenBackend.h"
38#include <cstdint>
39#include <map>
40#include <set>
41#include <string>
42#include <utility>
43#include <vector>
44
45using namespace llvm;
46
47namespace {
48
49class CodeEmitterGen {
50 RecordKeeper &Records;
51
52public:
53 CodeEmitterGen(RecordKeeper &R) : Records(R) {}
54
55 void run(raw_ostream &o);
56
57private:
58 int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
59 std::pair<std::string, std::string>
60 getInstructionCases(Record *R, CodeGenTarget &Target);
61 void addInstructionCasesForEncoding(Record *R, Record *EncodingDef,
62 CodeGenTarget &Target, std::string &Case,
63 std::string &BitOffsetCase);
64 bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
65 const std::string &VarName, std::string &Case,
66 std::string &BitOffsetCase,
67 CodeGenTarget &Target);
68
69 void emitInstructionBaseValues(
70 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
71 CodeGenTarget &Target, int HwMode = -1);
72 void
73 emitCaseMap(raw_ostream &o,
74 const std::map<std::string, std::vector<std::string>> &CaseMap);
75 unsigned BitWidth = 0u;
76 bool UseAPInt = false;
77};
78
79// If the VarBitInit at position 'bit' matches the specified variable then
80// return the variable bit position. Otherwise return -1.
81int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI,
82 int bit) {
83 if (VarBitInit *VBI = dyn_cast<VarBitInit>(Val: BI->getBit(Bit: bit))) {
84 if (VarInit *VI = dyn_cast<VarInit>(Val: VBI->getBitVar()))
85 if (VI->getName() == VarName)
86 return VBI->getBitNum();
87 } else if (VarInit *VI = dyn_cast<VarInit>(Val: BI->getBit(Bit: bit))) {
88 if (VI->getName() == VarName)
89 return 0;
90 }
91
92 return -1;
93}
94
95// Returns true if it succeeds, false if an error.
96bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
97 const std::string &VarName,
98 std::string &Case,
99 std::string &BitOffsetCase,
100 CodeGenTarget &Target) {
101 CodeGenInstruction &CGI = Target.getInstruction(InstRec: R);
102
103 // Determine if VarName actually contributes to the Inst encoding.
104 int bit = BI->getNumBits() - 1;
105
106 // Scan for a bit that this contributed to.
107 for (; bit >= 0;) {
108 if (getVariableBit(VarName, BI, bit) != -1)
109 break;
110
111 --bit;
112 }
113
114 // If we found no bits, ignore this value, otherwise emit the call to get the
115 // operand encoding.
116 if (bit < 0)
117 return true;
118
119 // If the operand matches by name, reference according to that
120 // operand number. Non-matching operands are assumed to be in
121 // order.
122 unsigned OpIdx;
123 std::pair<unsigned, unsigned> SubOp;
124 if (CGI.Operands.hasSubOperandAlias(Name: VarName, SubOp)) {
125 OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;
126 } else if (CGI.Operands.hasOperandNamed(Name: VarName, OpIdx)) {
127 // Get the machine operand number for the indicated operand.
128 OpIdx = CGI.Operands[OpIdx].MIOperandNo;
129 } else {
130 PrintError(Rec: R, Msg: Twine("No operand named ") + VarName + " in record " +
131 R->getName());
132 return false;
133 }
134
135 if (CGI.Operands.isFlatOperandNotEmitted(FlatOpNo: OpIdx)) {
136 PrintError(Rec: R,
137 Msg: "Operand " + VarName + " used but also marked as not emitted!");
138 return false;
139 }
140
141 std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(Op: OpIdx);
142 std::string &EncoderMethodName =
143 CGI.Operands[SO.first].EncoderMethodNames[SO.second];
144
145 if (UseAPInt)
146 Case += " op.clearAllBits();\n";
147
148 Case += " // op: " + VarName + "\n";
149
150 // If the source operand has a custom encoder, use it.
151 if (!EncoderMethodName.empty()) {
152 if (UseAPInt) {
153 Case += " " + EncoderMethodName + "(MI, " + utostr(X: OpIdx);
154 Case += ", op";
155 } else {
156 Case += " op = " + EncoderMethodName + "(MI, " + utostr(X: OpIdx);
157 }
158 Case += ", Fixups, STI);\n";
159 } else {
160 if (UseAPInt) {
161 Case +=
162 " getMachineOpValue(MI, MI.getOperand(" + utostr(X: OpIdx) + ")";
163 Case += ", op, Fixups, STI";
164 } else {
165 Case += " op = getMachineOpValue(MI, MI.getOperand(" +
166 utostr(X: OpIdx) + ")";
167 Case += ", Fixups, STI";
168 }
169 Case += ");\n";
170 }
171
172 // Precalculate the number of lits this variable contributes to in the
173 // operand. If there is a single lit (consecutive range of bits) we can use a
174 // destructive sequence on APInt that reduces memory allocations.
175 int numOperandLits = 0;
176 for (int tmpBit = bit; tmpBit >= 0;) {
177 int varBit = getVariableBit(VarName, BI, bit: tmpBit);
178
179 // If this bit isn't from a variable, skip it.
180 if (varBit == -1) {
181 --tmpBit;
182 continue;
183 }
184
185 // Figure out the consecutive range of bits covered by this operand, in
186 // order to generate better encoding code.
187 int beginVarBit = varBit;
188 int N = 1;
189 for (--tmpBit; tmpBit >= 0;) {
190 varBit = getVariableBit(VarName, BI, bit: tmpBit);
191 if (varBit == -1 || varBit != (beginVarBit - N))
192 break;
193 ++N;
194 --tmpBit;
195 }
196 ++numOperandLits;
197 }
198
199 unsigned BitOffset = -1;
200 for (; bit >= 0;) {
201 int varBit = getVariableBit(VarName, BI, bit);
202
203 // If this bit isn't from a variable, skip it.
204 if (varBit == -1) {
205 --bit;
206 continue;
207 }
208
209 // Figure out the consecutive range of bits covered by this operand, in
210 // order to generate better encoding code.
211 int beginInstBit = bit;
212 int beginVarBit = varBit;
213 int N = 1;
214 for (--bit; bit >= 0;) {
215 varBit = getVariableBit(VarName, BI, bit);
216 if (varBit == -1 || varBit != (beginVarBit - N))
217 break;
218 ++N;
219 --bit;
220 }
221
222 std::string maskStr;
223 int opShift;
224
225 unsigned loBit = beginVarBit - N + 1;
226 unsigned hiBit = loBit + N;
227 unsigned loInstBit = beginInstBit - N + 1;
228 BitOffset = loInstBit;
229 if (UseAPInt) {
230 std::string extractStr;
231 if (N >= 64) {
232 extractStr = "op.extractBits(" + itostr(X: hiBit - loBit) + ", " +
233 itostr(X: loBit) + ")";
234 Case += " Value.insertBits(" + extractStr + ", " +
235 itostr(X: loInstBit) + ");\n";
236 } else {
237 extractStr = "op.extractBitsAsZExtValue(" + itostr(X: hiBit - loBit) +
238 ", " + itostr(X: loBit) + ")";
239 Case += " Value.insertBits(" + extractStr + ", " +
240 itostr(X: loInstBit) + ", " + itostr(X: hiBit - loBit) + ");\n";
241 }
242 } else {
243 uint64_t opMask = ~(uint64_t)0 >> (64 - N);
244 opShift = beginVarBit - N + 1;
245 opMask <<= opShift;
246 maskStr = "UINT64_C(" + utostr(X: opMask) + ")";
247 opShift = beginInstBit - beginVarBit;
248
249 if (numOperandLits == 1) {
250 Case += " op &= " + maskStr + ";\n";
251 if (opShift > 0) {
252 Case += " op <<= " + itostr(X: opShift) + ";\n";
253 } else if (opShift < 0) {
254 Case += " op >>= " + itostr(X: -opShift) + ";\n";
255 }
256 Case += " Value |= op;\n";
257 } else {
258 if (opShift > 0) {
259 Case += " Value |= (op & " + maskStr + ") << " +
260 itostr(X: opShift) + ";\n";
261 } else if (opShift < 0) {
262 Case += " Value |= (op & " + maskStr + ") >> " +
263 itostr(X: -opShift) + ";\n";
264 } else {
265 Case += " Value |= (op & " + maskStr + ");\n";
266 }
267 }
268 }
269 }
270
271 if (BitOffset != (unsigned)-1) {
272 BitOffsetCase += " case " + utostr(X: OpIdx) + ":\n";
273 BitOffsetCase += " // op: " + VarName + "\n";
274 BitOffsetCase += " return " + utostr(X: BitOffset) + ";\n";
275 }
276
277 return true;
278}
279
280std::pair<std::string, std::string>
281CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
282 std::string Case, BitOffsetCase;
283
284 auto append = [&](const char *S) {
285 Case += S;
286 BitOffsetCase += S;
287 };
288
289 if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
290 if (auto *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
291 const CodeGenHwModes &HWM = Target.getHwModes();
292 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
293 append(" switch (HwMode) {\n");
294 append(" default: llvm_unreachable(\"Unhandled HwMode\");\n");
295 for (auto &KV : EBM) {
296 append((" case " + itostr(X: KV.first) + ": {\n").c_str());
297 addInstructionCasesForEncoding(R, EncodingDef: KV.second, Target, Case,
298 BitOffsetCase);
299 append(" break;\n");
300 append(" }\n");
301 }
302 append(" }\n");
303 return std::pair(std::move(Case), std::move(BitOffsetCase));
304 }
305 }
306 addInstructionCasesForEncoding(R, EncodingDef: R, Target, Case, BitOffsetCase);
307 return std::pair(std::move(Case), std::move(BitOffsetCase));
308}
309
310void CodeEmitterGen::addInstructionCasesForEncoding(
311 Record *R, Record *EncodingDef, CodeGenTarget &Target, std::string &Case,
312 std::string &BitOffsetCase) {
313 BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
314
315 // Loop over all of the fields in the instruction, determining which are the
316 // operands to the instruction.
317 bool Success = true;
318 size_t OrigBitOffsetCaseSize = BitOffsetCase.size();
319 BitOffsetCase += " switch (OpNum) {\n";
320 size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size();
321 for (const RecordVal &RV : EncodingDef->getValues()) {
322 // Ignore fixed fields in the record, we're looking for values like:
323 // bits<5> RST = { ?, ?, ?, ?, ? };
324 if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
325 continue;
326
327 Success &= addCodeToMergeInOperand(R, BI, VarName: std::string(RV.getName()), Case,
328 BitOffsetCase, Target);
329 }
330 // Avoid empty switches.
331 if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop)
332 BitOffsetCase.resize(n: OrigBitOffsetCaseSize);
333 else
334 BitOffsetCase += " }\n";
335
336 if (!Success) {
337 // Dump the record, so we can see what's going on...
338 std::string E;
339 raw_string_ostream S(E);
340 S << "Dumping record for previous error:\n";
341 S << *R;
342 PrintNote(Msg: E);
343 }
344
345 StringRef PostEmitter = R->getValueAsString(FieldName: "PostEncoderMethod");
346 if (!PostEmitter.empty()) {
347 Case += " Value = ";
348 Case += PostEmitter;
349 Case += "(MI, Value";
350 Case += ", STI";
351 Case += ");\n";
352 }
353}
354
355static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
356 for (unsigned I = 0; I < Bits.getNumWords(); ++I)
357 OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(X: Bits.getRawData()[I])
358 << ")";
359}
360
361void CodeEmitterGen::emitInstructionBaseValues(
362 raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
363 CodeGenTarget &Target, int HwMode) {
364 const CodeGenHwModes &HWM = Target.getHwModes();
365 if (HwMode == -1)
366 o << " static const uint64_t InstBits[] = {\n";
367 else
368 o << " static const uint64_t InstBits_"
369 << HWM.getModeName(Id: HwMode, /*IncludeDefault=*/true) << "[] = {\n";
370
371 for (const CodeGenInstruction *CGI : NumberedInstructions) {
372 Record *R = CGI->TheDef;
373
374 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
375 R->getValueAsBit(FieldName: "isPseudo")) {
376 o << " ";
377 emitInstBits(OS&: o, Bits: APInt(BitWidth, 0));
378 o << ",\n";
379 continue;
380 }
381
382 Record *EncodingDef = R;
383 if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
384 if (auto *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
385 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
386 if (EBM.hasMode(M: HwMode))
387 EncodingDef = EBM.get(Mode: HwMode);
388 }
389 }
390 BitsInit *BI = EncodingDef->getValueAsBitsInit(FieldName: "Inst");
391
392 // Start by filling in fixed values.
393 APInt Value(BitWidth, 0);
394 for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
395 if (auto *B = dyn_cast<BitInit>(Val: BI->getBit(Bit: i)); B && B->getValue())
396 Value.setBit(i);
397 }
398 o << " ";
399 emitInstBits(OS&: o, Bits: Value);
400 o << "," << '\t' << "// " << R->getName() << "\n";
401 }
402 o << " UINT64_C(0)\n };\n";
403}
404
405void CodeEmitterGen::emitCaseMap(
406 raw_ostream &o,
407 const std::map<std::string, std::vector<std::string>> &CaseMap) {
408 std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;
409 for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
410 const std::string &Case = IE->first;
411 const std::vector<std::string> &InstList = IE->second;
412
413 for (int i = 0, N = InstList.size(); i < N; i++) {
414 if (i)
415 o << "\n";
416 o << " case " << InstList[i] << ":";
417 }
418 o << " {\n";
419 o << Case;
420 o << " break;\n"
421 << " }\n";
422 }
423}
424
425void CodeEmitterGen::run(raw_ostream &o) {
426 emitSourceFileHeader(Desc: "Machine Code Emitter", OS&: o);
427
428 CodeGenTarget Target(Records);
429 std::vector<Record *> Insts = Records.getAllDerivedDefinitions(ClassName: "Instruction");
430
431 // For little-endian instruction bit encodings, reverse the bit order
432 Target.reverseBitsForLittleEndianEncoding();
433
434 ArrayRef<const CodeGenInstruction *> NumberedInstructions =
435 Target.getInstructionsByEnumValue();
436
437 if (Target.hasVariableLengthEncodings()) {
438 emitVarLenCodeEmitter(R&: Records, OS&: o);
439 } else {
440 const CodeGenHwModes &HWM = Target.getHwModes();
441 // The set of HwModes used by instruction encodings.
442 std::set<unsigned> HwModes;
443 BitWidth = 0;
444 for (const CodeGenInstruction *CGI : NumberedInstructions) {
445 Record *R = CGI->TheDef;
446 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
447 R->getValueAsBit(FieldName: "isPseudo"))
448 continue;
449
450 if (const RecordVal *RV = R->getValue(Name: "EncodingInfos")) {
451 if (DefInit *DI = dyn_cast_or_null<DefInit>(Val: RV->getValue())) {
452 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
453 for (auto &KV : EBM) {
454 BitsInit *BI = KV.second->getValueAsBitsInit(FieldName: "Inst");
455 BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
456 HwModes.insert(x: KV.first);
457 }
458 continue;
459 }
460 }
461 BitsInit *BI = R->getValueAsBitsInit(FieldName: "Inst");
462 BitWidth = std::max(a: BitWidth, b: BI->getNumBits());
463 }
464 UseAPInt = BitWidth > 64;
465
466 // Emit function declaration
467 if (UseAPInt) {
468 o << "void " << Target.getName()
469 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
470 << " SmallVectorImpl<MCFixup> &Fixups,\n"
471 << " APInt &Inst,\n"
472 << " APInt &Scratch,\n"
473 << " const MCSubtargetInfo &STI) const {\n";
474 } else {
475 o << "uint64_t " << Target.getName();
476 o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
477 << " SmallVectorImpl<MCFixup> &Fixups,\n"
478 << " const MCSubtargetInfo &STI) const {\n";
479 }
480
481 // Emit instruction base values
482 if (HwModes.empty()) {
483 emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode: -1);
484 } else {
485 for (unsigned HwMode : HwModes)
486 emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode: (int)HwMode);
487 }
488
489 if (!HwModes.empty()) {
490 o << " const uint64_t *InstBits;\n";
491 o << " unsigned HwMode = STI.getHwMode();\n";
492 o << " switch (HwMode) {\n";
493 o << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
494 for (unsigned I : HwModes) {
495 o << " case " << I << ": InstBits = InstBits_"
496 << HWM.getModeName(Id: I, /*IncludeDefault=*/true) << "; break;\n";
497 }
498 o << " };\n";
499 }
500
501 // Map to accumulate all the cases.
502 std::map<std::string, std::vector<std::string>> CaseMap;
503 std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
504
505 // Construct all cases statement for each opcode
506 for (Record *R : Insts) {
507 if (R->getValueAsString(FieldName: "Namespace") == "TargetOpcode" ||
508 R->getValueAsBit(FieldName: "isPseudo"))
509 continue;
510 std::string InstName =
511 (R->getValueAsString(FieldName: "Namespace") + "::" + R->getName()).str();
512 std::string Case, BitOffsetCase;
513 std::tie(args&: Case, args&: BitOffsetCase) = getInstructionCases(R, Target);
514
515 CaseMap[Case].push_back(x: InstName);
516 BitOffsetCaseMap[BitOffsetCase].push_back(x: std::move(InstName));
517 }
518
519 // Emit initial function code
520 if (UseAPInt) {
521 int NumWords = APInt::getNumWords(BitWidth);
522 o << " const unsigned opcode = MI.getOpcode();\n"
523 << " if (Scratch.getBitWidth() != " << BitWidth << ")\n"
524 << " Scratch = Scratch.zext(" << BitWidth << ");\n"
525 << " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
526 << NumWords << ", " << NumWords << "));\n"
527 << " APInt &Value = Inst;\n"
528 << " APInt &op = Scratch;\n"
529 << " switch (opcode) {\n";
530 } else {
531 o << " const unsigned opcode = MI.getOpcode();\n"
532 << " uint64_t Value = InstBits[opcode];\n"
533 << " uint64_t op = 0;\n"
534 << " (void)op; // suppress warning\n"
535 << " switch (opcode) {\n";
536 }
537
538 // Emit each case statement
539 emitCaseMap(o, CaseMap);
540
541 // Default case: unhandled opcode
542 o << " default:\n"
543 << " std::string msg;\n"
544 << " raw_string_ostream Msg(msg);\n"
545 << " Msg << \"Not supported instr: \" << MI;\n"
546 << " report_fatal_error(Msg.str().c_str());\n"
547 << " }\n";
548 if (UseAPInt)
549 o << " Inst = Value;\n";
550 else
551 o << " return Value;\n";
552 o << "}\n\n";
553
554 o << "#ifdef GET_OPERAND_BIT_OFFSET\n"
555 << "#undef GET_OPERAND_BIT_OFFSET\n\n"
556 << "uint32_t " << Target.getName()
557 << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
558 << " unsigned OpNum,\n"
559 << " const MCSubtargetInfo &STI) const {\n"
560 << " switch (MI.getOpcode()) {\n";
561 emitCaseMap(o, CaseMap: BitOffsetCaseMap);
562 o << " }\n"
563 << " std::string msg;\n"
564 << " raw_string_ostream Msg(msg);\n"
565 << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
566 "<< \"]\";\n"
567 << " report_fatal_error(Msg.str().c_str());\n"
568 << "}\n\n"
569 << "#endif // GET_OPERAND_BIT_OFFSET\n\n";
570 }
571}
572
573} // end anonymous namespace
574
575static TableGen::Emitter::OptClass<CodeEmitterGen>
576 X("gen-emitter", "Generate machine code emitter");
577

source code of llvm/utils/TableGen/CodeEmitterGen.cpp