1 | //===-- IRMutator.h - Mutation engine for fuzzing IR ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Provides the IRMutator class, which drives mutations on IR based on a |
10 | // configurable set of strategies. Some common strategies are also included |
11 | // here. |
12 | // |
13 | // Fuzzer-friendly (de)serialization functions are also provided, as these |
14 | // are usually needed when mutating IR. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #ifndef LLVM_FUZZMUTATE_IRMUTATOR_H |
19 | #define LLVM_FUZZMUTATE_IRMUTATOR_H |
20 | |
21 | #include "llvm/FuzzMutate/OpDescriptor.h" |
22 | #include "llvm/Support/ErrorHandling.h" |
23 | #include <optional> |
24 | |
25 | namespace llvm { |
26 | class BasicBlock; |
27 | class Function; |
28 | class Instruction; |
29 | class Module; |
30 | |
31 | struct RandomIRBuilder; |
32 | |
33 | /// Base class for describing how to mutate a module. mutation functions for |
34 | /// each IR unit forward to the contained unit. |
35 | class IRMutationStrategy { |
36 | public: |
37 | virtual ~IRMutationStrategy() = default; |
38 | |
39 | /// Provide a weight to bias towards choosing this strategy for a mutation. |
40 | /// |
41 | /// The value of the weight is arbitrary, but a good default is "the number of |
42 | /// distinct ways in which this strategy can mutate a unit". This can also be |
43 | /// used to prefer strategies that shrink the overall size of the result when |
44 | /// we start getting close to \c MaxSize. |
45 | virtual uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
46 | uint64_t CurrentWeight) = 0; |
47 | |
48 | /// @{ |
49 | /// Mutators for each IR unit. By default these forward to a contained |
50 | /// instance of the next smaller unit. |
51 | virtual void mutate(Module &M, RandomIRBuilder &IB); |
52 | virtual void mutate(Function &F, RandomIRBuilder &IB); |
53 | virtual void mutate(BasicBlock &BB, RandomIRBuilder &IB); |
54 | virtual void mutate(Instruction &I, RandomIRBuilder &IB) { |
55 | llvm_unreachable("Strategy does not implement any mutators" ); |
56 | } |
57 | /// @} |
58 | }; |
59 | |
60 | using TypeGetter = std::function<Type *(LLVMContext &)>; |
61 | |
62 | /// Entry point for configuring and running IR mutations. |
63 | class IRMutator { |
64 | std::vector<TypeGetter> AllowedTypes; |
65 | std::vector<std::unique_ptr<IRMutationStrategy>> Strategies; |
66 | |
67 | public: |
68 | IRMutator(std::vector<TypeGetter> &&AllowedTypes, |
69 | std::vector<std::unique_ptr<IRMutationStrategy>> &&Strategies) |
70 | : AllowedTypes(std::move(AllowedTypes)), |
71 | Strategies(std::move(Strategies)) {} |
72 | |
73 | /// Calculate the size of module as the number of objects in it, i.e. |
74 | /// instructions, basic blocks, functions, and aliases. |
75 | /// |
76 | /// \param M module |
77 | /// \return number of objects in module |
78 | static size_t getModuleSize(const Module &M); |
79 | |
80 | /// Mutate given module. No change will be made if no strategy is selected. |
81 | /// |
82 | /// \param M module to mutate |
83 | /// \param Seed seed for random mutation |
84 | /// \param MaxSize max module size (see getModuleSize) |
85 | void mutateModule(Module &M, int Seed, size_t MaxSize); |
86 | }; |
87 | |
88 | /// Strategy that injects operations into the function. |
89 | class InjectorIRStrategy : public IRMutationStrategy { |
90 | std::vector<fuzzerop::OpDescriptor> Operations; |
91 | |
92 | std::optional<fuzzerop::OpDescriptor> chooseOperation(Value *Src, |
93 | RandomIRBuilder &IB); |
94 | |
95 | public: |
96 | InjectorIRStrategy() : Operations(getDefaultOps()) {} |
97 | InjectorIRStrategy(std::vector<fuzzerop::OpDescriptor> &&Operations) |
98 | : Operations(std::move(Operations)) {} |
99 | static std::vector<fuzzerop::OpDescriptor> getDefaultOps(); |
100 | |
101 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
102 | uint64_t CurrentWeight) override { |
103 | return Operations.size(); |
104 | } |
105 | |
106 | using IRMutationStrategy::mutate; |
107 | void mutate(Function &F, RandomIRBuilder &IB) override; |
108 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
109 | }; |
110 | |
111 | /// Strategy that deletes instructions when the Module is too large. |
112 | class InstDeleterIRStrategy : public IRMutationStrategy { |
113 | public: |
114 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
115 | uint64_t CurrentWeight) override; |
116 | |
117 | using IRMutationStrategy::mutate; |
118 | void mutate(Function &F, RandomIRBuilder &IB) override; |
119 | void mutate(Instruction &Inst, RandomIRBuilder &IB) override; |
120 | }; |
121 | |
122 | /// Strategy that modifies instruction attributes and operands. |
123 | class InstModificationIRStrategy : public IRMutationStrategy { |
124 | public: |
125 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
126 | uint64_t CurrentWeight) override { |
127 | return 4; |
128 | } |
129 | |
130 | using IRMutationStrategy::mutate; |
131 | void mutate(Instruction &Inst, RandomIRBuilder &IB) override; |
132 | }; |
133 | |
134 | /// Strategy that generates new function calls and inserts function signatures |
135 | /// to the modules. If any signatures are present in the module it will be |
136 | /// called. |
137 | class InsertFunctionStrategy : public IRMutationStrategy { |
138 | public: |
139 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
140 | uint64_t CurrentWeight) override { |
141 | return 10; |
142 | } |
143 | |
144 | using IRMutationStrategy::mutate; |
145 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
146 | }; |
147 | |
148 | /// Strategy to split a random block and insert a random CFG in between. |
149 | class InsertCFGStrategy : public IRMutationStrategy { |
150 | private: |
151 | uint64_t MaxNumCases; |
152 | enum CFGToSink { Return, DirectSink, SinkOrSelfLoop, EndOfCFGToLink }; |
153 | |
154 | public: |
155 | InsertCFGStrategy(uint64_t MNC = 8) : MaxNumCases(MNC){}; |
156 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
157 | uint64_t CurrentWeight) override { |
158 | return 5; |
159 | } |
160 | |
161 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
162 | |
163 | private: |
164 | void connectBlocksToSink(ArrayRef<BasicBlock *> Blocks, BasicBlock *Sink, |
165 | RandomIRBuilder &IB); |
166 | }; |
167 | |
168 | /// Strategy to insert PHI Nodes at the head of each basic block. |
169 | class InsertPHIStrategy : public IRMutationStrategy { |
170 | public: |
171 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
172 | uint64_t CurrentWeight) override { |
173 | return 2; |
174 | } |
175 | |
176 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
177 | }; |
178 | |
179 | /// Strategy to select a random instruction and add a new sink (user) to it to |
180 | /// increate data dependency. |
181 | class SinkInstructionStrategy : public IRMutationStrategy { |
182 | public: |
183 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
184 | uint64_t CurrentWeight) override { |
185 | return 2; |
186 | } |
187 | |
188 | void mutate(Function &F, RandomIRBuilder &IB) override; |
189 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
190 | }; |
191 | |
192 | /// Strategy to randomly select a block and shuffle the operations without |
193 | /// affecting data dependency. |
194 | class ShuffleBlockStrategy : public IRMutationStrategy { |
195 | public: |
196 | uint64_t getWeight(size_t CurrentSize, size_t MaxSize, |
197 | uint64_t CurrentWeight) override { |
198 | return 2; |
199 | } |
200 | |
201 | void mutate(BasicBlock &BB, RandomIRBuilder &IB) override; |
202 | }; |
203 | |
204 | /// Fuzzer friendly interface for the llvm bitcode parser. |
205 | /// |
206 | /// \param Data Bitcode we are going to parse |
207 | /// \param Size Size of the 'Data' in bytes |
208 | /// \return New module or nullptr in case of error |
209 | std::unique_ptr<Module> parseModule(const uint8_t *Data, size_t Size, |
210 | LLVMContext &Context); |
211 | |
212 | /// Fuzzer friendly interface for the llvm bitcode printer. |
213 | /// |
214 | /// \param M Module to print |
215 | /// \param Dest Location to store serialized module |
216 | /// \param MaxSize Size of the destination buffer |
217 | /// \return Number of bytes that were written. When module size exceeds MaxSize |
218 | /// returns 0 and leaves Dest unchanged. |
219 | size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize); |
220 | |
221 | /// Try to parse module and verify it. May output verification errors to the |
222 | /// errs(). |
223 | /// \return New module or nullptr in case of error. |
224 | std::unique_ptr<Module> parseAndVerify(const uint8_t *Data, size_t Size, |
225 | LLVMContext &Context); |
226 | |
227 | } // namespace llvm |
228 | |
229 | #endif // LLVM_FUZZMUTATE_IRMUTATOR_H |
230 | |