1 | //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // fuzzer::InputCorpus |
9 | //===----------------------------------------------------------------------===// |
10 | |
11 | #ifndef LLVM_FUZZER_CORPUS |
12 | #define LLVM_FUZZER_CORPUS |
13 | |
14 | #include "FuzzerDataFlowTrace.h" |
15 | #include "FuzzerDefs.h" |
16 | #include "FuzzerIO.h" |
17 | #include "FuzzerRandom.h" |
18 | #include "FuzzerSHA1.h" |
19 | #include "FuzzerTracePC.h" |
20 | #include <algorithm> |
21 | #include <bitset> |
22 | #include <chrono> |
23 | #include <numeric> |
24 | #include <random> |
25 | #include <unordered_set> |
26 | |
27 | namespace fuzzer { |
28 | |
29 | struct InputInfo { |
30 | Unit U; // The actual input data. |
31 | std::chrono::microseconds TimeOfUnit; |
32 | uint8_t Sha1[kSHA1NumBytes]; // Checksum. |
33 | // Number of features that this input has and no smaller input has. |
34 | size_t NumFeatures = 0; |
35 | size_t Tmp = 0; // Used by ValidateFeatureSet. |
36 | // Stats. |
37 | size_t NumExecutedMutations = 0; |
38 | size_t NumSuccessfullMutations = 0; |
39 | bool NeverReduce = false; |
40 | bool MayDeleteFile = false; |
41 | bool Reduced = false; |
42 | bool HasFocusFunction = false; |
43 | std::vector<uint32_t> UniqFeatureSet; |
44 | std::vector<uint8_t> DataFlowTraceForFocusFunction; |
45 | // Power schedule. |
46 | bool NeedsEnergyUpdate = false; |
47 | double Energy = 0.0; |
48 | double SumIncidence = 0.0; |
49 | std::vector<std::pair<uint32_t, uint16_t>> FeatureFreqs; |
50 | |
51 | // Delete feature Idx and its frequency from FeatureFreqs. |
52 | bool DeleteFeatureFreq(uint32_t Idx) { |
53 | if (FeatureFreqs.empty()) |
54 | return false; |
55 | |
56 | // Binary search over local feature frequencies sorted by index. |
57 | auto Lower = std::lower_bound(first: FeatureFreqs.begin(), last: FeatureFreqs.end(), |
58 | val: std::pair<uint32_t, uint16_t>(Idx, 0)); |
59 | |
60 | if (Lower != FeatureFreqs.end() && Lower->first == Idx) { |
61 | FeatureFreqs.erase(position: Lower); |
62 | return true; |
63 | } |
64 | return false; |
65 | } |
66 | |
67 | // Assign more energy to a high-entropy seed, i.e., that reveals more |
68 | // information about the globally rare features in the neighborhood of the |
69 | // seed. Since we do not know the entropy of a seed that has never been |
70 | // executed we assign fresh seeds maximum entropy and let II->Energy approach |
71 | // the true entropy from above. If ScalePerExecTime is true, the computed |
72 | // entropy is scaled based on how fast this input executes compared to the |
73 | // average execution time of inputs. The faster an input executes, the more |
74 | // energy gets assigned to the input. |
75 | void UpdateEnergy(size_t GlobalNumberOfFeatures, bool ScalePerExecTime, |
76 | std::chrono::microseconds AverageUnitExecutionTime) { |
77 | Energy = 0.0; |
78 | SumIncidence = 0.0; |
79 | |
80 | // Apply add-one smoothing to locally discovered features. |
81 | for (const auto &F : FeatureFreqs) { |
82 | double LocalIncidence = F.second + 1; |
83 | Energy -= LocalIncidence * log(x: LocalIncidence); |
84 | SumIncidence += LocalIncidence; |
85 | } |
86 | |
87 | // Apply add-one smoothing to locally undiscovered features. |
88 | // PreciseEnergy -= 0; // since log(1.0) == 0) |
89 | SumIncidence += |
90 | static_cast<double>(GlobalNumberOfFeatures - FeatureFreqs.size()); |
91 | |
92 | // Add a single locally abundant feature apply add-one smoothing. |
93 | double AbdIncidence = static_cast<double>(NumExecutedMutations + 1); |
94 | Energy -= AbdIncidence * log(x: AbdIncidence); |
95 | SumIncidence += AbdIncidence; |
96 | |
97 | // Normalize. |
98 | if (SumIncidence != 0) |
99 | Energy = Energy / SumIncidence + log(x: SumIncidence); |
100 | |
101 | if (ScalePerExecTime) { |
102 | // Scaling to favor inputs with lower execution time. |
103 | uint32_t PerfScore = 100; |
104 | if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 10) |
105 | PerfScore = 10; |
106 | else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 4) |
107 | PerfScore = 25; |
108 | else if (TimeOfUnit.count() > AverageUnitExecutionTime.count() * 2) |
109 | PerfScore = 50; |
110 | else if (TimeOfUnit.count() * 3 > AverageUnitExecutionTime.count() * 4) |
111 | PerfScore = 75; |
112 | else if (TimeOfUnit.count() * 4 < AverageUnitExecutionTime.count()) |
113 | PerfScore = 300; |
114 | else if (TimeOfUnit.count() * 3 < AverageUnitExecutionTime.count()) |
115 | PerfScore = 200; |
116 | else if (TimeOfUnit.count() * 2 < AverageUnitExecutionTime.count()) |
117 | PerfScore = 150; |
118 | |
119 | Energy *= PerfScore; |
120 | } |
121 | } |
122 | |
123 | // Increment the frequency of the feature Idx. |
124 | void UpdateFeatureFrequency(uint32_t Idx) { |
125 | NeedsEnergyUpdate = true; |
126 | |
127 | // The local feature frequencies is an ordered vector of pairs. |
128 | // If there are no local feature frequencies, push_back preserves order. |
129 | // Set the feature frequency for feature Idx32 to 1. |
130 | if (FeatureFreqs.empty()) { |
131 | FeatureFreqs.push_back(x: std::pair<uint32_t, uint16_t>(Idx, 1)); |
132 | return; |
133 | } |
134 | |
135 | // Binary search over local feature frequencies sorted by index. |
136 | auto Lower = std::lower_bound(first: FeatureFreqs.begin(), last: FeatureFreqs.end(), |
137 | val: std::pair<uint32_t, uint16_t>(Idx, 0)); |
138 | |
139 | // If feature Idx32 already exists, increment its frequency. |
140 | // Otherwise, insert a new pair right after the next lower index. |
141 | if (Lower != FeatureFreqs.end() && Lower->first == Idx) { |
142 | Lower->second++; |
143 | } else { |
144 | FeatureFreqs.insert(position: Lower, x: std::pair<uint32_t, uint16_t>(Idx, 1)); |
145 | } |
146 | } |
147 | }; |
148 | |
149 | struct EntropicOptions { |
150 | bool Enabled; |
151 | size_t NumberOfRarestFeatures; |
152 | size_t FeatureFrequencyThreshold; |
153 | bool ScalePerExecTime; |
154 | }; |
155 | |
156 | class InputCorpus { |
157 | static const uint32_t kFeatureSetSize = 1 << 21; |
158 | static const uint8_t kMaxMutationFactor = 20; |
159 | static const size_t kSparseEnergyUpdates = 100; |
160 | |
161 | size_t NumExecutedMutations = 0; |
162 | |
163 | EntropicOptions Entropic; |
164 | |
165 | public: |
166 | InputCorpus(const std::string &OutputCorpus, EntropicOptions Entropic) |
167 | : Entropic(Entropic), OutputCorpus(OutputCorpus) { |
168 | memset(s: InputSizesPerFeature, c: 0, n: sizeof(InputSizesPerFeature)); |
169 | memset(s: SmallestElementPerFeature, c: 0, n: sizeof(SmallestElementPerFeature)); |
170 | } |
171 | ~InputCorpus() { |
172 | for (auto II : Inputs) |
173 | delete II; |
174 | } |
175 | size_t size() const { return Inputs.size(); } |
176 | size_t SizeInBytes() const { |
177 | size_t Res = 0; |
178 | for (auto II : Inputs) |
179 | Res += II->U.size(); |
180 | return Res; |
181 | } |
182 | size_t NumActiveUnits() const { |
183 | size_t Res = 0; |
184 | for (auto II : Inputs) |
185 | Res += !II->U.empty(); |
186 | return Res; |
187 | } |
188 | size_t MaxInputSize() const { |
189 | size_t Res = 0; |
190 | for (auto II : Inputs) |
191 | Res = std::max(a: Res, b: II->U.size()); |
192 | return Res; |
193 | } |
194 | void IncrementNumExecutedMutations() { NumExecutedMutations++; } |
195 | |
196 | size_t NumInputsThatTouchFocusFunction() { |
197 | return std::count_if(first: Inputs.begin(), last: Inputs.end(), pred: [](const InputInfo *II) { |
198 | return II->HasFocusFunction; |
199 | }); |
200 | } |
201 | |
202 | size_t NumInputsWithDataFlowTrace() { |
203 | return std::count_if(first: Inputs.begin(), last: Inputs.end(), pred: [](const InputInfo *II) { |
204 | return !II->DataFlowTraceForFocusFunction.empty(); |
205 | }); |
206 | } |
207 | |
208 | bool empty() const { return Inputs.empty(); } |
209 | const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } |
210 | InputInfo *AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, |
211 | bool HasFocusFunction, bool NeverReduce, |
212 | std::chrono::microseconds TimeOfUnit, |
213 | const std::vector<uint32_t> &FeatureSet, |
214 | const DataFlowTrace &DFT, const InputInfo *BaseII) { |
215 | assert(!U.empty()); |
216 | if (FeatureDebug) |
217 | Printf(Fmt: "ADD_TO_CORPUS %zd NF %zd\n" , Inputs.size(), NumFeatures); |
218 | // Inputs.size() is cast to uint32_t below. |
219 | assert(Inputs.size() < std::numeric_limits<uint32_t>::max()); |
220 | Inputs.push_back(x: new InputInfo()); |
221 | InputInfo &II = *Inputs.back(); |
222 | II.U = U; |
223 | II.NumFeatures = NumFeatures; |
224 | II.NeverReduce = NeverReduce; |
225 | II.TimeOfUnit = TimeOfUnit; |
226 | II.MayDeleteFile = MayDeleteFile; |
227 | II.UniqFeatureSet = FeatureSet; |
228 | II.HasFocusFunction = HasFocusFunction; |
229 | // Assign maximal energy to the new seed. |
230 | II.Energy = RareFeatures.empty() ? 1.0 : log(x: RareFeatures.size()); |
231 | II.SumIncidence = static_cast<double>(RareFeatures.size()); |
232 | II.NeedsEnergyUpdate = false; |
233 | std::sort(first: II.UniqFeatureSet.begin(), last: II.UniqFeatureSet.end()); |
234 | ComputeSHA1(Data: U.data(), Len: U.size(), Out: II.Sha1); |
235 | auto Sha1Str = Sha1ToString(Sha1: II.Sha1); |
236 | Hashes.insert(x: Sha1Str); |
237 | if (HasFocusFunction) |
238 | if (auto V = DFT.Get(InputSha1: Sha1Str)) |
239 | II.DataFlowTraceForFocusFunction = *V; |
240 | // This is a gross heuristic. |
241 | // Ideally, when we add an element to a corpus we need to know its DFT. |
242 | // But if we don't, we'll use the DFT of its base input. |
243 | if (II.DataFlowTraceForFocusFunction.empty() && BaseII) |
244 | II.DataFlowTraceForFocusFunction = BaseII->DataFlowTraceForFocusFunction; |
245 | DistributionNeedsUpdate = true; |
246 | PrintCorpus(); |
247 | // ValidateFeatureSet(); |
248 | return &II; |
249 | } |
250 | |
251 | // Debug-only |
252 | void PrintUnit(const Unit &U) { |
253 | if (!FeatureDebug) return; |
254 | for (uint8_t C : U) { |
255 | if (C != 'F' && C != 'U' && C != 'Z') |
256 | C = '.'; |
257 | Printf(Fmt: "%c" , C); |
258 | } |
259 | } |
260 | |
261 | // Debug-only |
262 | void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) { |
263 | if (!FeatureDebug) return; |
264 | Printf(Fmt: "{" ); |
265 | for (uint32_t Feature: FeatureSet) |
266 | Printf(Fmt: "%u," , Feature); |
267 | Printf(Fmt: "}" ); |
268 | } |
269 | |
270 | // Debug-only |
271 | void PrintCorpus() { |
272 | if (!FeatureDebug) return; |
273 | Printf(Fmt: "======= CORPUS:\n" ); |
274 | int i = 0; |
275 | for (auto II : Inputs) { |
276 | if (std::find(first: II->U.begin(), last: II->U.end(), val: 'F') != II->U.end()) { |
277 | Printf(Fmt: "[%2d] " , i); |
278 | Printf(Fmt: "%s sz=%zd " , Sha1ToString(Sha1: II->Sha1).c_str(), II->U.size()); |
279 | PrintUnit(U: II->U); |
280 | Printf(Fmt: " " ); |
281 | PrintFeatureSet(FeatureSet: II->UniqFeatureSet); |
282 | Printf(Fmt: "\n" ); |
283 | } |
284 | i++; |
285 | } |
286 | } |
287 | |
288 | void Replace(InputInfo *II, const Unit &U, |
289 | std::chrono::microseconds TimeOfUnit) { |
290 | assert(II->U.size() > U.size()); |
291 | Hashes.erase(x: Sha1ToString(Sha1: II->Sha1)); |
292 | DeleteFile(II: *II); |
293 | ComputeSHA1(Data: U.data(), Len: U.size(), Out: II->Sha1); |
294 | Hashes.insert(x: Sha1ToString(Sha1: II->Sha1)); |
295 | II->U = U; |
296 | II->Reduced = true; |
297 | II->TimeOfUnit = TimeOfUnit; |
298 | DistributionNeedsUpdate = true; |
299 | } |
300 | |
301 | bool HasUnit(const Unit &U) { return Hashes.count(x: Hash(U)); } |
302 | bool HasUnit(const std::string &H) { return Hashes.count(x: H); } |
303 | InputInfo &ChooseUnitToMutate(Random &Rand) { |
304 | InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; |
305 | assert(!II.U.empty()); |
306 | return II; |
307 | } |
308 | |
309 | InputInfo &ChooseUnitToCrossOverWith(Random &Rand, bool UniformDist) { |
310 | if (!UniformDist) { |
311 | return ChooseUnitToMutate(Rand); |
312 | } |
313 | InputInfo &II = *Inputs[Rand(Inputs.size())]; |
314 | assert(!II.U.empty()); |
315 | return II; |
316 | } |
317 | |
318 | // Returns an index of random unit from the corpus to mutate. |
319 | size_t ChooseUnitIdxToMutate(Random &Rand) { |
320 | UpdateCorpusDistribution(Rand); |
321 | size_t Idx = static_cast<size_t>(CorpusDistribution(Rand)); |
322 | assert(Idx < Inputs.size()); |
323 | return Idx; |
324 | } |
325 | |
326 | void PrintStats() { |
327 | for (size_t i = 0; i < Inputs.size(); i++) { |
328 | const auto &II = *Inputs[i]; |
329 | Printf(Fmt: " [% 3zd %s] sz: % 5zd runs: % 5zd succ: % 5zd focus: %d\n" , i, |
330 | Sha1ToString(Sha1: II.Sha1).c_str(), II.U.size(), |
331 | II.NumExecutedMutations, II.NumSuccessfullMutations, |
332 | II.HasFocusFunction); |
333 | } |
334 | } |
335 | |
336 | void PrintFeatureSet() { |
337 | for (size_t i = 0; i < kFeatureSetSize; i++) { |
338 | if(size_t Sz = GetFeature(Idx: i)) |
339 | Printf(Fmt: "[%zd: id %zd sz%zd] " , i, SmallestElementPerFeature[i], Sz); |
340 | } |
341 | Printf(Fmt: "\n\t" ); |
342 | for (size_t i = 0; i < Inputs.size(); i++) |
343 | if (size_t N = Inputs[i]->NumFeatures) |
344 | Printf(Fmt: " %zd=>%zd " , i, N); |
345 | Printf(Fmt: "\n" ); |
346 | } |
347 | |
348 | void DeleteFile(const InputInfo &II) { |
349 | if (!OutputCorpus.empty() && II.MayDeleteFile) |
350 | RemoveFile(Path: DirPlusFile(DirPath: OutputCorpus, FileName: Sha1ToString(Sha1: II.Sha1))); |
351 | } |
352 | |
353 | void DeleteInput(size_t Idx) { |
354 | InputInfo &II = *Inputs[Idx]; |
355 | DeleteFile(II); |
356 | Unit().swap(x&: II.U); |
357 | II.Energy = 0.0; |
358 | II.NeedsEnergyUpdate = false; |
359 | DistributionNeedsUpdate = true; |
360 | if (FeatureDebug) |
361 | Printf(Fmt: "EVICTED %zd\n" , Idx); |
362 | } |
363 | |
364 | void AddRareFeature(uint32_t Idx) { |
365 | // Maintain *at least* TopXRarestFeatures many rare features |
366 | // and all features with a frequency below ConsideredRare. |
367 | // Remove all other features. |
368 | while (RareFeatures.size() > Entropic.NumberOfRarestFeatures && |
369 | FreqOfMostAbundantRareFeature > Entropic.FeatureFrequencyThreshold) { |
370 | |
371 | // Find most and second most abbundant feature. |
372 | uint32_t MostAbundantRareFeatureIndices[2] = {RareFeatures[0], |
373 | RareFeatures[0]}; |
374 | size_t Delete = 0; |
375 | for (size_t i = 0; i < RareFeatures.size(); i++) { |
376 | uint32_t Idx2 = RareFeatures[i]; |
377 | if (GlobalFeatureFreqs[Idx2] >= |
378 | GlobalFeatureFreqs[MostAbundantRareFeatureIndices[0]]) { |
379 | MostAbundantRareFeatureIndices[1] = MostAbundantRareFeatureIndices[0]; |
380 | MostAbundantRareFeatureIndices[0] = Idx2; |
381 | Delete = i; |
382 | } |
383 | } |
384 | |
385 | // Remove most abundant rare feature. |
386 | IsRareFeature[Delete] = false; |
387 | RareFeatures[Delete] = RareFeatures.back(); |
388 | RareFeatures.pop_back(); |
389 | |
390 | for (auto II : Inputs) { |
391 | if (II->DeleteFeatureFreq(Idx: MostAbundantRareFeatureIndices[0])) |
392 | II->NeedsEnergyUpdate = true; |
393 | } |
394 | |
395 | // Set 2nd most abundant as the new most abundant feature count. |
396 | FreqOfMostAbundantRareFeature = |
397 | GlobalFeatureFreqs[MostAbundantRareFeatureIndices[1]]; |
398 | } |
399 | |
400 | // Add rare feature, handle collisions, and update energy. |
401 | RareFeatures.push_back(x: Idx); |
402 | IsRareFeature[Idx] = true; |
403 | GlobalFeatureFreqs[Idx] = 0; |
404 | for (auto II : Inputs) { |
405 | II->DeleteFeatureFreq(Idx); |
406 | |
407 | // Apply add-one smoothing to this locally undiscovered feature. |
408 | // Zero energy seeds will never be fuzzed and remain zero energy. |
409 | if (II->Energy > 0.0) { |
410 | II->SumIncidence += 1; |
411 | II->Energy += log(x: II->SumIncidence) / II->SumIncidence; |
412 | } |
413 | } |
414 | |
415 | DistributionNeedsUpdate = true; |
416 | } |
417 | |
418 | bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { |
419 | assert(NewSize); |
420 | Idx = Idx % kFeatureSetSize; |
421 | uint32_t OldSize = GetFeature(Idx); |
422 | if (OldSize == 0 || (Shrink && OldSize > NewSize)) { |
423 | if (OldSize > 0) { |
424 | size_t OldIdx = SmallestElementPerFeature[Idx]; |
425 | InputInfo &II = *Inputs[OldIdx]; |
426 | assert(II.NumFeatures > 0); |
427 | II.NumFeatures--; |
428 | if (II.NumFeatures == 0) |
429 | DeleteInput(Idx: OldIdx); |
430 | } else { |
431 | NumAddedFeatures++; |
432 | if (Entropic.Enabled) |
433 | AddRareFeature(Idx: (uint32_t)Idx); |
434 | } |
435 | NumUpdatedFeatures++; |
436 | if (FeatureDebug) |
437 | Printf(Fmt: "ADD FEATURE %zd sz %d\n" , Idx, NewSize); |
438 | // Inputs.size() is guaranteed to be less than UINT32_MAX by AddToCorpus. |
439 | SmallestElementPerFeature[Idx] = static_cast<uint32_t>(Inputs.size()); |
440 | InputSizesPerFeature[Idx] = NewSize; |
441 | return true; |
442 | } |
443 | return false; |
444 | } |
445 | |
446 | // Increment frequency of feature Idx globally and locally. |
447 | void UpdateFeatureFrequency(InputInfo *II, size_t Idx) { |
448 | uint32_t Idx32 = Idx % kFeatureSetSize; |
449 | |
450 | // Saturated increment. |
451 | if (GlobalFeatureFreqs[Idx32] == 0xFFFF) |
452 | return; |
453 | uint16_t Freq = GlobalFeatureFreqs[Idx32]++; |
454 | |
455 | // Skip if abundant. |
456 | if (Freq > FreqOfMostAbundantRareFeature || !IsRareFeature[Idx32]) |
457 | return; |
458 | |
459 | // Update global frequencies. |
460 | if (Freq == FreqOfMostAbundantRareFeature) |
461 | FreqOfMostAbundantRareFeature++; |
462 | |
463 | // Update local frequencies. |
464 | if (II) |
465 | II->UpdateFeatureFrequency(Idx: Idx32); |
466 | } |
467 | |
468 | size_t NumFeatures() const { return NumAddedFeatures; } |
469 | size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } |
470 | |
471 | private: |
472 | |
473 | static const bool FeatureDebug = false; |
474 | |
475 | uint32_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } |
476 | |
477 | void ValidateFeatureSet() { |
478 | if (FeatureDebug) |
479 | PrintFeatureSet(); |
480 | for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) |
481 | if (GetFeature(Idx)) |
482 | Inputs[SmallestElementPerFeature[Idx]]->Tmp++; |
483 | for (auto II: Inputs) { |
484 | if (II->Tmp != II->NumFeatures) |
485 | Printf(Fmt: "ZZZ %zd %zd\n" , II->Tmp, II->NumFeatures); |
486 | assert(II->Tmp == II->NumFeatures); |
487 | II->Tmp = 0; |
488 | } |
489 | } |
490 | |
491 | // Updates the probability distribution for the units in the corpus. |
492 | // Must be called whenever the corpus or unit weights are changed. |
493 | // |
494 | // Hypothesis: inputs that maximize information about globally rare features |
495 | // are interesting. |
496 | void UpdateCorpusDistribution(Random &Rand) { |
497 | // Skip update if no seeds or rare features were added/deleted. |
498 | // Sparse updates for local change of feature frequencies, |
499 | // i.e., randomly do not skip. |
500 | if (!DistributionNeedsUpdate && |
501 | (!Entropic.Enabled || Rand(kSparseEnergyUpdates))) |
502 | return; |
503 | |
504 | DistributionNeedsUpdate = false; |
505 | |
506 | size_t N = Inputs.size(); |
507 | assert(N); |
508 | Intervals.resize(new_size: N + 1); |
509 | Weights.resize(new_size: N); |
510 | std::iota(first: Intervals.begin(), last: Intervals.end(), value: 0); |
511 | |
512 | std::chrono::microseconds AverageUnitExecutionTime(0); |
513 | for (auto II : Inputs) { |
514 | AverageUnitExecutionTime += II->TimeOfUnit; |
515 | } |
516 | AverageUnitExecutionTime /= N; |
517 | |
518 | bool VanillaSchedule = true; |
519 | if (Entropic.Enabled) { |
520 | for (auto II : Inputs) { |
521 | if (II->NeedsEnergyUpdate && II->Energy != 0.0) { |
522 | II->NeedsEnergyUpdate = false; |
523 | II->UpdateEnergy(GlobalNumberOfFeatures: RareFeatures.size(), ScalePerExecTime: Entropic.ScalePerExecTime, |
524 | AverageUnitExecutionTime); |
525 | } |
526 | } |
527 | |
528 | for (size_t i = 0; i < N; i++) { |
529 | |
530 | if (Inputs[i]->NumFeatures == 0) { |
531 | // If the seed doesn't represent any features, assign zero energy. |
532 | Weights[i] = 0.; |
533 | } else if (Inputs[i]->NumExecutedMutations / kMaxMutationFactor > |
534 | NumExecutedMutations / Inputs.size()) { |
535 | // If the seed was fuzzed a lot more than average, assign zero energy. |
536 | Weights[i] = 0.; |
537 | } else { |
538 | // Otherwise, simply assign the computed energy. |
539 | Weights[i] = Inputs[i]->Energy; |
540 | } |
541 | |
542 | // If energy for all seeds is zero, fall back to vanilla schedule. |
543 | if (Weights[i] > 0.0) |
544 | VanillaSchedule = false; |
545 | } |
546 | } |
547 | |
548 | if (VanillaSchedule) { |
549 | for (size_t i = 0; i < N; i++) |
550 | Weights[i] = |
551 | Inputs[i]->NumFeatures |
552 | ? static_cast<double>((i + 1) * |
553 | (Inputs[i]->HasFocusFunction ? 1000 : 1)) |
554 | : 0.; |
555 | } |
556 | |
557 | if (FeatureDebug) { |
558 | for (size_t i = 0; i < N; i++) |
559 | Printf(Fmt: "%zd " , Inputs[i]->NumFeatures); |
560 | Printf(Fmt: "SCORE\n" ); |
561 | for (size_t i = 0; i < N; i++) |
562 | Printf(Fmt: "%f " , Weights[i]); |
563 | Printf(Fmt: "Weights\n" ); |
564 | } |
565 | CorpusDistribution = std::piecewise_constant_distribution<double>( |
566 | Intervals.begin(), Intervals.end(), Weights.begin()); |
567 | } |
568 | std::piecewise_constant_distribution<double> CorpusDistribution; |
569 | |
570 | std::vector<double> Intervals; |
571 | std::vector<double> Weights; |
572 | |
573 | std::unordered_set<std::string> Hashes; |
574 | std::vector<InputInfo *> Inputs; |
575 | |
576 | size_t NumAddedFeatures = 0; |
577 | size_t NumUpdatedFeatures = 0; |
578 | uint32_t InputSizesPerFeature[kFeatureSetSize]; |
579 | uint32_t SmallestElementPerFeature[kFeatureSetSize]; |
580 | |
581 | bool DistributionNeedsUpdate = true; |
582 | uint16_t FreqOfMostAbundantRareFeature = 0; |
583 | uint16_t GlobalFeatureFreqs[kFeatureSetSize] = {}; |
584 | std::vector<uint32_t> RareFeatures; |
585 | std::bitset<kFeatureSetSize> IsRareFeature; |
586 | |
587 | std::string OutputCorpus; |
588 | }; |
589 | |
590 | } // namespace fuzzer |
591 | |
592 | #endif // LLVM_FUZZER_CORPUS |
593 | |