FuzzerDataFlowTrace.cpp source code [compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp]

1	//===- FuzzerDataFlowTrace.cpp - DataFlowTrace ---- C++ -* ===//*
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	// fuzzer::DataFlowTrace
9	//===----------------------------------------------------------------------===//
10
11	#include "FuzzerDataFlowTrace.h"
12
13	#include "FuzzerCommand.h"
14	#include "FuzzerIO.h"
15	#include "FuzzerRandom.h"
16	#include "FuzzerSHA1.h"
17	#include "FuzzerUtil.h"
18
19	#include <cstdlib>
20	#include <fstream>
21	#include <numeric>
22	#include <queue>
23	#include <sstream>
24	#include <string>
25	#include <unordered_map>
26	#include <unordered_set>
27	#include <vector>
28
29	namespace fuzzer {
30	static const char *kFunctionsTxt = "functions.txt";
31
32	bool BlockCoverage::AppendCoverage(const std::string &S) {
33	std::stringstream SS(S);
34	return AppendCoverage(IN&: SS);
35	}
36
37	// Coverage lines have this form:
38	// CN X Y Z T
39	// where N is the number of the function, T is the total number of instrumented
40	// BBs, and X,Y,Z, if present, are the indices of covered BB.
41	// BB #0, which is the entry block, is not explicitly listed.
42	bool BlockCoverage::AppendCoverage(std::istream &IN) {
43	std::string L;
44	while (std::getline(in&: IN, str&: L, delim: `'\n'`)) {
45	if (L.empty())
46	continue;
47	std::stringstream SS(L.c_str() + `1`);
48	size_t FunctionId = `0`;
49	SS >> FunctionId;
50	if (L [`0`] == `'F'`) {
51	FunctionsWithDFT.insert(x: FunctionId);
52	continue;
53	}
54	if (L [`0`] != `'C'`) continue;
55	std::vector<uint32_t> CoveredBlocks;
56	while (true) {
57	uint32_t BB = `0`;
58	SS >> BB;
59	if (!SS) break;
60	CoveredBlocks.push_back(x: BB);
61	}
62	if (CoveredBlocks.empty()) return false;
63	// Ensures no CoverageVector is longer than UINT32_MAX.
64	uint32_t NumBlocks = CoveredBlocks.back();
65	CoveredBlocks.pop_back();
66	for (auto BB : CoveredBlocks)
67	if (BB >= NumBlocks) return false;
68	auto It = Functions.find(x: FunctionId);
69	auto &Counters =
70	It == Functions.end()
71	? Functions.insert(x: {FunctionId, std::vector<uint32_t>(NumBlocks)})
72	.first ->second
73	: It ->second;
74
75	if (Counters.size() != NumBlocks) return false; // wrong number of blocks.
76
77	Counters [`0`]++;
78	for (auto BB : CoveredBlocks)
79	Counters [BB]++;
80	}
81	return true;
82	}
83
84	// Assign weights to each function.
85	// General principles:
86	// any uncovered function gets weight 0.*
87	// a function with lots of uncovered blocks gets bigger weight.*
88	// a function with a less frequently executed code gets bigger weight.*
89	std::vector<double> BlockCoverage::FunctionWeights(size_t NumFunctions) const {
90	std::vector<double> Res(NumFunctions);
91	for (const auto &It : Functions) {
92	auto FunctionID = It.first;
93	auto Counters = It.second;
94	assert(FunctionID < NumFunctions);
95	auto &Weight = Res [FunctionID];
96	// Give higher weight if the function has a DFT.
97	Weight = FunctionsWithDFT.count(x: FunctionID) ? `1000.` : `1`;
98	// Give higher weight to functions with less frequently seen basic blocks.
99	Weight /= SmallestNonZeroCounter(Counters);
100	// Give higher weight to functions with the most uncovered basic blocks.
101	Weight *= NumberOfUncoveredBlocks(Counters) + `1`;
102	}
103	return Res;
104	}
105
106	void DataFlowTrace::ReadCoverage(const std::string &DirPath) {
107	std::vector<SizedFile> Files;
108	GetSizedFilesFromDir(Dir: DirPath, V: &Files);
109	for (auto &SF : Files) {
110	auto Name = Basename(Path: SF.File);
111	if (Name == kFunctionsTxt) continue;
112	if (!CorporaHashes.count(x: Name)) continue;
113	std::ifstream IF(SF.File);
114	Coverage.AppendCoverage(IN&: IF);
115	}
116	}
117
118	static void DFTStringAppendToVector(std::vector<uint8_t> *DFT,
119	const std::string &DFTString) {
120	assert(DFT->size() == DFTString.size());
121	for (size_t I = `0`, Len = DFT->size(); I < Len; I++)
122	(*DFT)[I] = DFTString [I] == `'1'`;
123	}
124
125	// converts a string of '0' and '1' into a std::vector<uint8_t>
126	static std::vector<uint8_t> DFTStringToVector(const std::string &DFTString) {
127	std::vector<uint8_t> DFT(DFTString.size());
128	DFTStringAppendToVector(DFT: &DFT, DFTString);
129	return DFT;
130	}
131
132	static bool ParseError(const char Err, const* std::string &Line) {
133	Printf(Fmt: "DataFlowTrace: parse error: %s: Line: %s\n", Err, Line.c_str());
134	return false;
135	}
136
137	// TODO(metzman): replace std::string with std::string_view for
138	// better performance. Need to figure our how to use string_view on Windows.
139	static bool ParseDFTLine(const std::string &Line, size_t *FunctionNum,
140	std::string *DFTString) {
141	if (!Line.empty() && Line [`0`] != `'F'`)
142	return false; // Ignore coverage.
143	size_t SpacePos = Line.find(c: `' '`);
144	if (SpacePos == std::string::npos)
145	return ParseError(Err: "no space in the trace line", Line);
146	if (Line.empty() \|\| Line [`0`] != `'F'`)
147	return ParseError(Err: "the trace line doesn't start with 'F'", Line);
148	*FunctionNum = std::atol(nptr: Line.c_str() + `1`);
149	const char *Beg = Line.c_str() + SpacePos + `1`;
150	const char *End = Line.c_str() + Line.size();
151	assert(Beg < End);
152	size_t Len = End - Beg;
153	for (size_t I = `0`; I < Len; I++) {
154	if (Beg[I] != `'0'` && Beg[I] != `'1'`)
155	return ParseError(Err: "the trace should contain only 0 or 1", Line);
156	}
157	*DFTString = Beg;
158	return true;
159	}
160
161	bool DataFlowTrace::Init(const std::string &DirPath, std::string *FocusFunction,
162	std::vector<SizedFile> &CorporaFiles, Random &Rand) {
163	if (DirPath.empty()) return false;
164	Printf(Fmt: "INFO: DataFlowTrace: reading from '%s'\n", DirPath.c_str());
165	std::vector<SizedFile> Files;
166	GetSizedFilesFromDir(Dir: DirPath, V: &Files);
167	std::string L;
168	size_t FocusFuncIdx = SIZE_MAX;
169	std::vector<std::string> FunctionNames;
170
171	// Collect the hashes of the corpus files.
172	for (auto &SF : CorporaFiles)
173	CorporaHashes.insert(x: Hash(U: FileToVector(Path: SF.File)));
174
175	// Read functions.txt
176	std::ifstream IF(DirPlusFile(DirPath, FileName: kFunctionsTxt));
177	size_t NumFunctions = `0`;
178	while (std::getline(in&: IF, str&: L, delim: `'\n'`)) {
179	FunctionNames.push_back(x: L);
180	NumFunctions++;
181	if (*FocusFunction == L)
182	FocusFuncIdx = NumFunctions - `1`;
183	}
184	if (!NumFunctions)
185	return false;
186
187	if (*FocusFunction == "auto") {
188	// AUTOFOCUS works like this:
189	// reads the coverage data from the DFT files.*
190	// assigns weights to functions based on coverage.*
191	// chooses a random function according to the weights.*
192	ReadCoverage(DirPath);
193	auto Weights = Coverage.FunctionWeights(NumFunctions);
194	std::vector<double> Intervals(NumFunctions + `1`);
195	std::iota(first: Intervals.begin(), last: Intervals.end(), value: `0`);
196	auto Distribution = std::piecewise_constant_distribution<double>(
197	Intervals.begin(), Intervals.end(), Weights.begin());
198	FocusFuncIdx = static_cast<size_t>(Distribution (Rand));
199	*FocusFunction = FunctionNames [FocusFuncIdx];
200	assert(FocusFuncIdx < NumFunctions);
201	Printf(Fmt: "INFO: AUTOFOCUS: %zd %s\n", FocusFuncIdx,
202	FunctionNames [FocusFuncIdx].c_str());
203	for (size_t i = `0`; i < NumFunctions; i++) {
204	if (Weights [i] == `0.0`)
205	continue;
206	Printf(Fmt: " [%zd] W %g\tBB-tot %u\tBB-cov %u\tEntryFreq %u:\t%s\n", i,
207	Weights [i], Coverage.GetNumberOfBlocks(FunctionId: i),
208	Coverage.GetNumberOfCoveredBlocks(FunctionId: i), Coverage.GetCounter(FunctionId: i, BasicBlockId: `0`),
209	FunctionNames [i].c_str());
210	}
211	}
212
213	if (!NumFunctions \|\| FocusFuncIdx == SIZE_MAX \|\| Files.size() <= `1`)
214	return false;
215
216	// Read traces.
217	size_t NumTraceFiles = `0`;
218	size_t NumTracesWithFocusFunction = `0`;
219	for (auto &SF : Files) {
220	auto Name = Basename(Path: SF.File);
221	if (Name == kFunctionsTxt) continue;
222	if (!CorporaHashes.count(x: Name)) continue; // not in the corpus.
223	NumTraceFiles++;
224	// Printf("=== %s\n", Name.c_str());
225	std::ifstream IF(SF.File);
226	while (std::getline(in&: IF, str&: L, delim: `'\n'`)) {
227	size_t FunctionNum = `0`;
228	std::string DFTString;
229	if (ParseDFTLine(Line: L, FunctionNum: &FunctionNum, DFTString: &DFTString) &&
230	FunctionNum == FocusFuncIdx) {
231	NumTracesWithFocusFunction++;
232
233	if (FunctionNum >= NumFunctions)
234	return ParseError(Err: "N is greater than the number of functions", Line: L);
235	Traces [Name] = DFTStringToVector(DFTString);
236	// Print just a few small traces.
237	if (NumTracesWithFocusFunction <= `3` && DFTString.size() <= `16`)
238	Printf(Fmt: "%s => \|%s\|\n", Name.c_str(), std::string (DFTString).c_str());
239	break; // No need to parse the following lines.
240	}
241	}
242	}
243	Printf(Fmt: "INFO: DataFlowTrace: %zd trace files, %zd functions, "
244	"%zd traces with focus function\n",
245	NumTraceFiles, NumFunctions, NumTracesWithFocusFunction);
246	return NumTraceFiles > `0`;
247	}
248
249	int CollectDataFlow(const std::string &DFTBinary, const std::string &DirPath,
250	const std::vector<SizedFile> &CorporaFiles) {
251	Printf(Fmt: "INFO: collecting data flow: bin: %s dir: %s files: %zd\n",
252	DFTBinary.c_str(), DirPath.c_str(), CorporaFiles.size());
253	if (CorporaFiles.empty()) {
254	Printf(Fmt: "ERROR: can't collect data flow without corpus provided.");
255	return `1`;
256	}
257
258	static char DFSanEnv[] = "DFSAN_OPTIONS=warn_unimplemented=0";
259	putenv(string: DFSanEnv);
260	MkDir(Path: DirPath);
261	for (auto &F : CorporaFiles) {
262	// For every input F we need to collect the data flow and the coverage.
263	// Data flow collection may fail if we request too many DFSan tags at once.
264	// So, we start from requesting all tags in range [0,Size) and if that fails
265	// we then request tags in [0,Size/2) and [Size/2, Size), and so on.
266	// Function number => DFT.
267	auto OutPath = DirPlusFile(DirPath, FileName: Hash(U: FileToVector(Path: F.File)));
268	std::unordered_map<size_t, std::vector<uint8_t>> DFTMap;
269	std::unordered_set<std::string> Cov;
270	Command Cmd;
271	Cmd.addArgument(Arg: DFTBinary);
272	Cmd.addArgument(Arg: F.File);
273	Cmd.addArgument(Arg: OutPath);
274	Printf(Fmt: "CMD: %s\n", Cmd.toString().c_str());
275	ExecuteCommand(Cmd);
276	}
277	// Write functions.txt if it's currently empty or doesn't exist.
278	auto FunctionsTxtPath = DirPlusFile(DirPath, FileName: kFunctionsTxt);
279	if (FileToString(Path: FunctionsTxtPath).empty()) {
280	Command Cmd;
281	Cmd.addArgument(Arg: DFTBinary);
282	Cmd.setOutputFile(FunctionsTxtPath);
283	ExecuteCommand(Cmd);
284	}
285	return `0`;
286	}
287
288	} // namespace fuzzer
289

source code of compiler-rt/lib/fuzzer/FuzzerDataFlowTrace.cpp