1//===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10
11#include "llvm-c/Target.h"
12#include "llvm/MC/MCAsmBackend.h"
13#include "llvm/MC/MCAsmInfo.h"
14#include "llvm/MC/MCCodeEmitter.h"
15#include "llvm/MC/MCContext.h"
16#include "llvm/MC/MCInstPrinter.h"
17#include "llvm/MC/MCInstrInfo.h"
18#include "llvm/MC/MCObjectFileInfo.h"
19#include "llvm/MC/MCObjectWriter.h"
20#include "llvm/MC/MCParser/AsmLexer.h"
21#include "llvm/MC/MCParser/MCTargetAsmParser.h"
22#include "llvm/MC/MCRegisterInfo.h"
23#include "llvm/MC/MCSectionMachO.h"
24#include "llvm/MC/MCStreamer.h"
25#include "llvm/MC/MCSubtargetInfo.h"
26#include "llvm/MC/MCTargetOptionsCommandFlags.h"
27#include "llvm/MC/TargetRegistry.h"
28#include "llvm/Support/CommandLine.h"
29#include "llvm/Support/FileUtilities.h"
30#include "llvm/Support/MemoryBuffer.h"
31#include "llvm/Support/SourceMgr.h"
32#include "llvm/Support/TargetSelect.h"
33#include "llvm/Support/ToolOutputFile.h"
34#include "llvm/Support/raw_ostream.h"
35#include "llvm/TargetParser/Host.h"
36#include "llvm/TargetParser/SubtargetFeature.h"
37
38using namespace llvm;
39
40static mc::RegisterMCTargetOptionsFlags MOF;
41
42static cl::opt<std::string>
43 TripleName("triple", cl::desc("Target triple to assemble for, "
44 "see -version for available targets"));
45
46static cl::opt<std::string>
47 MCPU("mcpu",
48 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
49 cl::value_desc("cpu-name"), cl::init(Val: ""));
50
51// This is useful for variable-length instruction sets.
52static cl::opt<unsigned> InsnLimit(
53 "insn-limit",
54 cl::desc("Limit the number of instructions to process (0 for no limit)"),
55 cl::value_desc("count"), cl::init(Val: 0));
56
57static cl::list<std::string>
58 MAttrs("mattr", cl::CommaSeparated,
59 cl::desc("Target specific attributes (-mattr=help for details)"),
60 cl::value_desc("a1,+a2,-a3,..."));
61// The feature string derived from -mattr's values.
62std::string FeaturesStr;
63
64static cl::list<std::string>
65 FuzzerArgs("fuzzer-args", cl::Positional,
66 cl::desc("Options to pass to the fuzzer"),
67 cl::PositionalEatsArgs);
68static std::vector<char *> ModifiedArgv;
69
70enum OutputFileType {
71 OFT_Null,
72 OFT_AssemblyFile,
73 OFT_ObjectFile
74};
75static cl::opt<OutputFileType>
76FileType("filetype", cl::init(Val: OFT_AssemblyFile),
77 cl::desc("Choose an output file type:"),
78 cl::values(
79 clEnumValN(OFT_AssemblyFile, "asm",
80 "Emit an assembly ('.s') file"),
81 clEnumValN(OFT_Null, "null",
82 "Don't emit anything (for timing purposes)"),
83 clEnumValN(OFT_ObjectFile, "obj",
84 "Emit a native object ('.o') file")));
85
86
87class LLVMFuzzerInputBuffer : public MemoryBuffer
88{
89 public:
90 LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
91 : Data(reinterpret_cast<const char *>(data_)),
92 Size(size_) {
93 init(BufStart: Data, BufEnd: Data+Size, RequiresNullTerminator: false);
94 }
95
96
97 virtual BufferKind getBufferKind() const {
98 return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
99 // the intent ... though AFAIK it
100 // probably came from an mmap or sbrk
101 }
102
103 private:
104 const char *Data;
105 size_t Size;
106};
107
108static int AssembleInput(const char *ProgName, const Target *TheTarget,
109 SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
110 MCAsmInfo &MAI, MCSubtargetInfo &STI,
111 MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
112 static const bool NoInitialTextSection = false;
113
114 std::unique_ptr<MCAsmParser> Parser(
115 createMCAsmParser(SrcMgr, Ctx, Str, MAI));
116
117 std::unique_ptr<MCTargetAsmParser> TAP(
118 TheTarget->createMCAsmParser(STI, Parser&: *Parser, MII: MCII, Options: MCOptions));
119
120 if (!TAP) {
121 errs() << ProgName
122 << ": error: this target '" << TripleName
123 << "', does not support assembly parsing.\n";
124 abort();
125 }
126
127 Parser->setTargetParser(*TAP);
128
129 return Parser->Run(NoInitialTextSection);
130}
131
132
133int AssembleOneInput(const uint8_t *Data, size_t Size) {
134 const bool ShowInst = false;
135 const bool AsmVerbose = false;
136 const bool UseDwarfDirectory = true;
137
138 Triple TheTriple(Triple::normalize(Str: TripleName));
139
140 SourceMgr SrcMgr;
141
142 std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
143
144 // Tell SrcMgr about this buffer, which is what the parser will pick up.
145 SrcMgr.AddNewSourceBuffer(F: std::move(BufferPtr), IncludeLoc: SMLoc());
146
147 static const std::vector<std::string> NoIncludeDirs;
148 SrcMgr.setIncludeDirs(NoIncludeDirs);
149
150 static std::string ArchName;
151 std::string Error;
152 const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
153 Error);
154 if (!TheTarget) {
155 errs() << "error: this target '" << TheTriple.normalize()
156 << "/" << ArchName << "', was not found: '" << Error << "'\n";
157
158 abort();
159 }
160
161 std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT: TripleName));
162 if (!MRI) {
163 errs() << "Unable to create target register info!";
164 abort();
165 }
166
167 MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
168 std::unique_ptr<MCAsmInfo> MAI(
169 TheTarget->createMCAsmInfo(MRI: *MRI, TheTriple: TripleName, Options: MCOptions));
170 if (!MAI) {
171 errs() << "Unable to create target asm info!";
172 abort();
173 }
174
175 std::unique_ptr<MCSubtargetInfo> STI(
176 TheTarget->createMCSubtargetInfo(TheTriple: TripleName, CPU: MCPU, Features: FeaturesStr));
177
178 MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
179 std::unique_ptr<MCObjectFileInfo> MOFI(
180 TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
181 Ctx.setObjectFileInfo(MOFI.get());
182
183 const unsigned OutputAsmVariant = 0;
184 std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
185 MCInstPrinter *IP = TheTarget->createMCInstPrinter(T: Triple(TripleName), SyntaxVariant: OutputAsmVariant,
186 MAI: *MAI, MII: *MCII, MRI: *MRI);
187 if (!IP) {
188 errs()
189 << "error: unable to create instruction printer for target triple '"
190 << TheTriple.normalize() << "' with assembly variant "
191 << OutputAsmVariant << ".\n";
192
193 abort();
194 }
195
196 const char *ProgName = "llvm-mc-fuzzer";
197 std::unique_ptr<MCCodeEmitter> CE = nullptr;
198 std::unique_ptr<MCAsmBackend> MAB = nullptr;
199
200 std::string OutputString;
201 raw_string_ostream Out(OutputString);
202 auto FOut = std::make_unique<formatted_raw_ostream>(args&: Out);
203
204 std::unique_ptr<MCStreamer> Str;
205
206 if (FileType == OFT_AssemblyFile) {
207 Str.reset(p: TheTarget->createAsmStreamer(Ctx, OS: std::move(FOut), IsVerboseAsm: AsmVerbose,
208 UseDwarfDirectory, InstPrint: IP, CE: std::move(CE),
209 TAB: std::move(MAB), ShowInst));
210 } else {
211 assert(FileType == OFT_ObjectFile && "Invalid file type!");
212
213 std::error_code EC;
214 const std::string OutputFilename = "-";
215 auto Out =
216 std::make_unique<ToolOutputFile>(args: OutputFilename, args&: EC, args: sys::fs::OF_None);
217 if (EC) {
218 errs() << EC.message() << '\n';
219 abort();
220 }
221
222 // Don't waste memory on names of temp labels.
223 Ctx.setUseNamesOnTempLabels(false);
224
225 std::unique_ptr<buffer_ostream> BOS;
226 raw_pwrite_stream *OS = &Out->os();
227 if (!Out->os().supportsSeeking()) {
228 BOS = std::make_unique<buffer_ostream>(args&: Out->os());
229 OS = BOS.get();
230 }
231
232 MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(II: *MCII, Ctx);
233 MCAsmBackend *MAB = TheTarget->createMCAsmBackend(STI: *STI, MRI: *MRI, Options: MCOptions);
234 Str.reset(p: TheTarget->createMCObjectStreamer(
235 T: TheTriple, Ctx, TAB: std::unique_ptr<MCAsmBackend>(MAB),
236 OW: MAB->createObjectWriter(OS&: *OS), Emitter: std::unique_ptr<MCCodeEmitter>(CE), STI: *STI,
237 RelaxAll: MCOptions.MCRelaxAll, IncrementalLinkerCompatible: MCOptions.MCIncrementalLinkerCompatible,
238 /*DWARFMustBeAtTheEnd*/ false));
239 }
240 const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, Str&: *Str, MAI&: *MAI, STI&: *STI,
241 MCII&: *MCII, MCOptions);
242
243 (void) Res;
244
245 return 0;
246}
247
248extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
249 return AssembleOneInput(Data, Size);
250}
251
252extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
253 char ***argv) {
254 // The command line is unusual compared to other fuzzers due to the need to
255 // specify the target. Options like -triple, -mcpu, and -mattr work like
256 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
257 // fuzzer itself.
258 //
259 // Examples:
260 //
261 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
262 // 4-bytes each and use the contents of ./corpus as the test corpus:
263 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
264 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
265 //
266 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
267 // feature enabled using up to 64-byte inputs:
268 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
269 // -disassemble -fuzzer-args ./corpus
270 //
271 // If your aim is to find instructions that are not tested, then it is
272 // advisable to constrain the maximum input size to a single instruction
273 // using -max_len as in the first example. This results in a test corpus of
274 // individual instructions that test unique paths. Without this constraint,
275 // there will be considerable redundancy in the corpus.
276
277 char **OriginalArgv = *argv;
278
279 LLVMInitializeAllTargetInfos();
280 LLVMInitializeAllTargetMCs();
281 LLVMInitializeAllAsmParsers();
282
283 cl::ParseCommandLineOptions(argc: *argc, argv: OriginalArgv);
284
285 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
286 // the driver can parse its arguments.
287 //
288 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
289 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
290 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
291 ModifiedArgv.push_back(x: OriginalArgv[0]);
292 for (const auto &FuzzerArg : FuzzerArgs) {
293 for (int i = 1; i < *argc; ++i) {
294 if (FuzzerArg == OriginalArgv[i])
295 ModifiedArgv.push_back(x: OriginalArgv[i]);
296 }
297 }
298 *argc = ModifiedArgv.size();
299 *argv = ModifiedArgv.data();
300
301 // Package up features to be passed to target/subtarget
302 // We have to pass it via a global since the callback doesn't
303 // permit any user data.
304 if (MAttrs.size()) {
305 SubtargetFeatures Features;
306 for (unsigned i = 0; i != MAttrs.size(); ++i)
307 Features.AddFeature(String: MAttrs[i]);
308 FeaturesStr = Features.getString();
309 }
310
311 if (TripleName.empty())
312 TripleName = sys::getDefaultTargetTriple();
313
314 return 0;
315}
316

source code of llvm/tools/llvm-mc-assemble-fuzzer/llvm-mc-assemble-fuzzer.cpp