1 | /*===- DataFlow.cpp - a standalone DataFlow tracer -------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // An experimental data-flow tracer for fuzz targets. |
9 | // It is based on DFSan and SanitizerCoverage. |
10 | // https://clang.llvm.org/docs/DataFlowSanitizer.html |
11 | // https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow |
12 | // |
13 | // It executes the fuzz target on the given input while monitoring the |
14 | // data flow for every instrumented comparison instruction. |
15 | // |
16 | // The output shows which functions depend on which bytes of the input, |
17 | // and also provides basic-block coverage for every input. |
18 | // |
19 | // Build: |
20 | // 1. Compile this file (DataFlow.cpp) with -fsanitize=dataflow and -O2. |
21 | // 2. Compile DataFlowCallbacks.cpp with -O2 -fPIC. |
22 | // 3. Build the fuzz target with -g -fsanitize=dataflow |
23 | // -fsanitize-coverage=trace-pc-guard,pc-table,bb,trace-cmp |
24 | // 4. Link those together with -fsanitize=dataflow |
25 | // |
26 | // -fsanitize-coverage=trace-cmp inserts callbacks around every comparison |
27 | // instruction, DFSan modifies the calls to pass the data flow labels. |
28 | // The callbacks update the data flow label for the current function. |
29 | // See e.g. __dfsw___sanitizer_cov_trace_cmp1 below. |
30 | // |
31 | // -fsanitize-coverage=trace-pc-guard,pc-table,bb instruments function |
32 | // entries so that the comparison callback knows that current function. |
33 | // -fsanitize-coverage=...,bb also allows to collect basic block coverage. |
34 | // |
35 | // |
36 | // Run: |
37 | // # Collect data flow and coverage for INPUT_FILE |
38 | // # write to OUTPUT_FILE (default: stdout) |
39 | // export DFSAN_OPTIONS=warn_unimplemented=0 |
40 | // ./a.out INPUT_FILE [OUTPUT_FILE] |
41 | // |
42 | // # Print all instrumented functions. llvm-symbolizer must be present in PATH |
43 | // ./a.out |
44 | // |
45 | // Example output: |
46 | // =============== |
47 | // F0 11111111111111 |
48 | // F1 10000000000000 |
49 | // C0 1 2 3 4 5 |
50 | // C1 8 |
51 | // =============== |
52 | // "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on. |
53 | // "CN X Y Z T": tells that a function N has basic blocks X, Y, and Z covered |
54 | // in addition to the function's entry block, out of T total instrumented |
55 | // blocks. |
56 | // |
57 | //===----------------------------------------------------------------------===*/ |
58 | |
59 | #include <assert.h> |
60 | #include <stdio.h> |
61 | #include <stdlib.h> |
62 | #include <stdint.h> |
63 | #include <string.h> |
64 | |
65 | #include <execinfo.h> // backtrace_symbols_fd |
66 | |
67 | #include "DataFlow.h" |
68 | |
69 | extern "C" { |
70 | extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size); |
71 | __attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv); |
72 | } // extern "C" |
73 | |
74 | CallbackData __dft; |
75 | static size_t InputLen; |
76 | static size_t NumIterations; |
77 | static dfsan_label **FuncLabelsPerIter; // NumIterations x NumFuncs; |
78 | |
79 | static inline bool BlockIsEntry(size_t BlockIdx) { |
80 | return __dft.PCsBeg[BlockIdx * 2 + 1] & PCFLAG_FUNC_ENTRY; |
81 | } |
82 | |
83 | const int kNumLabels = 8; |
84 | |
85 | // Prints all instrumented functions. |
86 | static int PrintFunctions() { |
87 | // We don't have the symbolizer integrated with dfsan yet. |
88 | // So use backtrace_symbols_fd and pipe it through llvm-symbolizer. |
89 | // TODO(kcc): this is pretty ugly and may break in lots of ways. |
90 | // We'll need to make a proper in-process symbolizer work with DFSan. |
91 | FILE *Pipe = popen(command: "sed 's/(+/ /g; s/).*//g' " |
92 | "| llvm-symbolizer " |
93 | "| grep '\\.dfsan' " |
94 | "| sed 's/\\.dfsan//g' " |
95 | "| c++filt" , |
96 | modes: "w" ); |
97 | for (size_t I = 0; I < __dft.NumGuards; I++) { |
98 | uintptr_t PC = __dft.PCsBeg[I * 2]; |
99 | if (!BlockIsEntry(BlockIdx: I)) continue; |
100 | void *const Buf[1] = {(void*)PC}; |
101 | backtrace_symbols_fd(array: Buf, size: 1, fd: fileno(stream: Pipe)); |
102 | } |
103 | pclose(stream: Pipe); |
104 | return 0; |
105 | } |
106 | |
107 | static void PrintBinary(FILE *Out, dfsan_label L, size_t Len) { |
108 | char buf[kNumLabels + 1]; |
109 | assert(Len <= kNumLabels); |
110 | for (int i = 0; i < kNumLabels; i++) |
111 | buf[i] = (L & (1 << i)) ? '1' : '0'; |
112 | buf[Len] = 0; |
113 | fprintf(stream: Out, format: "%s" , buf); |
114 | } |
115 | |
116 | static void PrintDataFlow(FILE *Out) { |
117 | for (size_t Func = 0; Func < __dft.NumFuncs; Func++) { |
118 | bool HasAny = false; |
119 | for (size_t Iter = 0; Iter < NumIterations; Iter++) |
120 | if (FuncLabelsPerIter[Iter][Func]) |
121 | HasAny = true; |
122 | if (!HasAny) |
123 | continue; |
124 | fprintf(stream: Out, format: "F%zd " , Func); |
125 | size_t LenOfLastIteration = kNumLabels; |
126 | if (auto Tail = InputLen % kNumLabels) |
127 | LenOfLastIteration = Tail; |
128 | for (size_t Iter = 0; Iter < NumIterations; Iter++) |
129 | PrintBinary(Out, L: FuncLabelsPerIter[Iter][Func], |
130 | Len: Iter == NumIterations - 1 ? LenOfLastIteration : kNumLabels); |
131 | fprintf(stream: Out, format: "\n" ); |
132 | } |
133 | } |
134 | |
135 | static void PrintCoverage(FILE *Out) { |
136 | ssize_t CurrentFuncGuard = -1; |
137 | ssize_t CurrentFuncNum = -1; |
138 | ssize_t NumBlocksInCurrentFunc = -1; |
139 | for (size_t FuncBeg = 0; FuncBeg < __dft.NumGuards;) { |
140 | CurrentFuncNum++; |
141 | assert(BlockIsEntry(FuncBeg)); |
142 | size_t FuncEnd = FuncBeg + 1; |
143 | for (; FuncEnd < __dft.NumGuards && !BlockIsEntry(BlockIdx: FuncEnd); FuncEnd++) |
144 | ; |
145 | if (__dft.BBExecuted[FuncBeg]) { |
146 | fprintf(stream: Out, format: "C%zd" , CurrentFuncNum); |
147 | for (size_t I = FuncBeg + 1; I < FuncEnd; I++) |
148 | if (__dft.BBExecuted[I]) |
149 | fprintf(stream: Out, format: " %zd" , I - FuncBeg); |
150 | fprintf(stream: Out, format: " %zd\n" , FuncEnd - FuncBeg); |
151 | } |
152 | FuncBeg = FuncEnd; |
153 | } |
154 | } |
155 | |
156 | int main(int argc, char **argv) { |
157 | if (LLVMFuzzerInitialize) |
158 | LLVMFuzzerInitialize(argc: &argc, argv: &argv); |
159 | if (argc == 1) |
160 | return PrintFunctions(); |
161 | assert(argc == 2 || argc == 3); |
162 | |
163 | const char *Input = argv[1]; |
164 | fprintf(stderr, format: "INFO: reading '%s'\n" , Input); |
165 | FILE *In = fopen(filename: Input, modes: "r" ); |
166 | assert(In); |
167 | fseek(stream: In, off: 0, SEEK_END); |
168 | InputLen = ftell(stream: In); |
169 | fseek(stream: In, off: 0, SEEK_SET); |
170 | unsigned char *Buf = (unsigned char*)malloc(size: InputLen); |
171 | size_t NumBytesRead = fread(ptr: Buf, size: 1, n: InputLen, stream: In); |
172 | assert(NumBytesRead == InputLen); |
173 | fclose(stream: In); |
174 | |
175 | NumIterations = (NumBytesRead + kNumLabels - 1) / kNumLabels; |
176 | FuncLabelsPerIter = |
177 | (dfsan_label **)calloc(nmemb: NumIterations, size: sizeof(dfsan_label *)); |
178 | for (size_t Iter = 0; Iter < NumIterations; Iter++) |
179 | FuncLabelsPerIter[Iter] = |
180 | (dfsan_label *)calloc(nmemb: __dft.NumFuncs, size: sizeof(dfsan_label)); |
181 | |
182 | for (size_t Iter = 0; Iter < NumIterations; Iter++) { |
183 | fprintf(stderr, format: "INFO: running '%s' %zd/%zd\n" , Input, Iter, NumIterations); |
184 | dfsan_flush(); |
185 | dfsan_set_label(label: 0, addr: Buf, size: InputLen); |
186 | __dft.FuncLabels = FuncLabelsPerIter[Iter]; |
187 | |
188 | size_t BaseIdx = Iter * kNumLabels; |
189 | size_t LastIdx = BaseIdx + kNumLabels < NumBytesRead ? BaseIdx + kNumLabels |
190 | : NumBytesRead; |
191 | assert(BaseIdx < LastIdx); |
192 | for (size_t Idx = BaseIdx; Idx < LastIdx; Idx++) |
193 | dfsan_set_label(label: 1 << (Idx - BaseIdx), addr: Buf + Idx, size: 1); |
194 | LLVMFuzzerTestOneInput(Data: Buf, Size: InputLen); |
195 | } |
196 | free(ptr: Buf); |
197 | |
198 | bool OutIsStdout = argc == 2; |
199 | fprintf(stderr, format: "INFO: writing dataflow to %s\n" , |
200 | OutIsStdout ? "<stdout>" : argv[2]); |
201 | FILE *Out = OutIsStdout ? stdout : fopen(filename: argv[2], modes: "w" ); |
202 | PrintDataFlow(Out); |
203 | PrintCoverage(Out); |
204 | if (!OutIsStdout) fclose(stream: Out); |
205 | } |
206 | |