1 | //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a DataflowAnalysisContext class that owns objects that |
10 | // encompass the state of a program and stores context that is used during |
11 | // dataflow analysis. |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" |
16 | #include "clang/Analysis/FlowSensitive/ASTOps.h" |
17 | #include "clang/Analysis/FlowSensitive/Formula.h" |
18 | #include "clang/Analysis/FlowSensitive/Logger.h" |
19 | #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" |
20 | #include "clang/Analysis/FlowSensitive/Value.h" |
21 | #include "llvm/ADT/SetOperations.h" |
22 | #include "llvm/ADT/SetVector.h" |
23 | #include "llvm/Support/CommandLine.h" |
24 | #include "llvm/Support/Debug.h" |
25 | #include "llvm/Support/FileSystem.h" |
26 | #include "llvm/Support/Path.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <cassert> |
29 | #include <memory> |
30 | #include <string> |
31 | #include <utility> |
32 | #include <vector> |
33 | |
34 | static llvm::cl::opt<std::string> DataflowLog( |
35 | "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, |
36 | llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " |
37 | "log to stderr. With an arg, writes HTML logs under the " |
38 | "specified directory (one per analyzed function).")); |
39 | |
40 | namespace clang { |
41 | namespace dataflow { |
42 | |
43 | FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { |
44 | // During context-sensitive analysis, a struct may be allocated in one |
45 | // function, but its field accessed in a function lower in the stack than |
46 | // the allocation. Since we only collect fields used in the function where |
47 | // the allocation occurs, we can't apply that filter when performing |
48 | // context-sensitive analysis. But, this only applies to storage locations, |
49 | // since field access it not allowed to fail. In contrast, field *values* |
50 | // don't need this allowance, since the API allows for uninitialized fields. |
51 | if (Opts.ContextSensitiveOpts) |
52 | return getObjectFields(Type); |
53 | |
54 | return llvm::set_intersection(S1: getObjectFields(Type), S2: ModeledFields); |
55 | } |
56 | |
57 | void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { |
58 | ModeledFields.set_union(Fields); |
59 | } |
60 | |
61 | StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { |
62 | if (!Type.isNull() && Type->isRecordType()) { |
63 | llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; |
64 | for (const FieldDecl *Field : getModeledFields(Type)) |
65 | if (Field->getType()->isReferenceType()) |
66 | FieldLocs.insert({Field, nullptr}); |
67 | else |
68 | FieldLocs.insert({Field, &createStorageLocation( |
69 | Type: Field->getType().getNonReferenceType())}); |
70 | |
71 | RecordStorageLocation::SyntheticFieldMap SyntheticFields; |
72 | for (const auto &Entry : getSyntheticFields(Type)) |
73 | SyntheticFields.insert( |
74 | {Entry.getKey(), |
75 | &createStorageLocation(Type: Entry.getValue().getNonReferenceType())}); |
76 | |
77 | return createRecordStorageLocation(Type, FieldLocs: std::move(FieldLocs), |
78 | SyntheticFields: std::move(SyntheticFields)); |
79 | } |
80 | return arena().create<ScalarStorageLocation>(args&: Type); |
81 | } |
82 | |
83 | // Returns the keys for a given `StringMap`. |
84 | // Can't use `StringSet` as the return type as it doesn't support `operator==`. |
85 | template <typename T> |
86 | static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { |
87 | return llvm::DenseSet<llvm::StringRef>(llvm::from_range, Map.keys()); |
88 | } |
89 | |
90 | RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( |
91 | QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, |
92 | RecordStorageLocation::SyntheticFieldMap SyntheticFields) { |
93 | assert(Type->isRecordType()); |
94 | assert(containsSameFields(getModeledFields(Type), FieldLocs)); |
95 | assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); |
96 | |
97 | RecordStorageLocationCreated = true; |
98 | return arena().create<RecordStorageLocation>(args&: Type, args: std::move(FieldLocs), |
99 | args: std::move(SyntheticFields)); |
100 | } |
101 | |
102 | StorageLocation & |
103 | DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { |
104 | if (auto *Loc = DeclToLoc.lookup(Val: &D)) |
105 | return *Loc; |
106 | auto &Loc = createStorageLocation(Type: D.getType().getNonReferenceType()); |
107 | DeclToLoc[&D] = &Loc; |
108 | return Loc; |
109 | } |
110 | |
111 | StorageLocation & |
112 | DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { |
113 | const Expr &CanonE = ignoreCFGOmittedNodes(E); |
114 | |
115 | if (auto *Loc = ExprToLoc.lookup(Val: &CanonE)) |
116 | return *Loc; |
117 | auto &Loc = createStorageLocation(Type: CanonE.getType()); |
118 | ExprToLoc[&CanonE] = &Loc; |
119 | return Loc; |
120 | } |
121 | |
122 | PointerValue & |
123 | DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { |
124 | auto CanonicalPointeeType = |
125 | PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); |
126 | auto Res = NullPointerVals.try_emplace(Key: CanonicalPointeeType, Args: nullptr); |
127 | if (Res.second) { |
128 | auto &PointeeLoc = createStorageLocation(Type: CanonicalPointeeType); |
129 | Res.first->second = &arena().create<PointerValue>(args&: PointeeLoc); |
130 | } |
131 | return *Res.first->second; |
132 | } |
133 | |
134 | void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { |
135 | if (Invariant == nullptr) |
136 | Invariant = &Constraint; |
137 | else |
138 | Invariant = &arena().makeAnd(LHS: *Invariant, RHS: Constraint); |
139 | } |
140 | |
141 | void DataflowAnalysisContext::addFlowConditionConstraint( |
142 | Atom Token, const Formula &Constraint) { |
143 | auto Res = FlowConditionConstraints.try_emplace(Key: Token, Args: &Constraint); |
144 | if (!Res.second) { |
145 | Res.first->second = |
146 | &arena().makeAnd(LHS: *Res.first->second, RHS: Constraint); |
147 | } |
148 | } |
149 | |
150 | Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { |
151 | Atom ForkToken = arena().makeFlowConditionToken(); |
152 | FlowConditionDeps[ForkToken].insert(V: Token); |
153 | addFlowConditionConstraint(Token: ForkToken, Constraint: arena().makeAtomRef(A: Token)); |
154 | return ForkToken; |
155 | } |
156 | |
157 | Atom |
158 | DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, |
159 | Atom SecondToken) { |
160 | Atom Token = arena().makeFlowConditionToken(); |
161 | auto &TokenDeps = FlowConditionDeps[Token]; |
162 | TokenDeps.insert(V: FirstToken); |
163 | TokenDeps.insert(V: SecondToken); |
164 | addFlowConditionConstraint(Token, |
165 | Constraint: arena().makeOr(LHS: arena().makeAtomRef(A: FirstToken), |
166 | RHS: arena().makeAtomRef(A: SecondToken))); |
167 | return Token; |
168 | } |
169 | |
170 | Solver::Result DataflowAnalysisContext::querySolver( |
171 | llvm::SetVector<const Formula *> Constraints) { |
172 | return S.solve(Vals: Constraints.getArrayRef()); |
173 | } |
174 | |
175 | bool DataflowAnalysisContext::flowConditionImplies(Atom Token, |
176 | const Formula &F) { |
177 | if (F.isLiteral(b: true)) |
178 | return true; |
179 | |
180 | // Returns true if and only if truth assignment of the flow condition implies |
181 | // that `F` is also true. We prove whether or not this property holds by |
182 | // reducing the problem to satisfiability checking. In other words, we attempt |
183 | // to show that assuming `F` is false makes the constraints induced by the |
184 | // flow condition unsatisfiable. |
185 | llvm::SetVector<const Formula *> Constraints; |
186 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
187 | Constraints.insert(X: &arena().makeNot(Val: F)); |
188 | addTransitiveFlowConditionConstraints(Token, Out&: Constraints); |
189 | return isUnsatisfiable(Constraints: std::move(Constraints)); |
190 | } |
191 | |
192 | bool DataflowAnalysisContext::flowConditionAllows(Atom Token, |
193 | const Formula &F) { |
194 | if (F.isLiteral(b: false)) |
195 | return false; |
196 | |
197 | llvm::SetVector<const Formula *> Constraints; |
198 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
199 | Constraints.insert(X: &F); |
200 | addTransitiveFlowConditionConstraints(Token, Out&: Constraints); |
201 | return isSatisfiable(Constraints: std::move(Constraints)); |
202 | } |
203 | |
204 | bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, |
205 | const Formula &Val2) { |
206 | llvm::SetVector<const Formula *> Constraints; |
207 | Constraints.insert(X: &arena().makeNot(Val: arena().makeEquals(LHS: Val1, RHS: Val2))); |
208 | return isUnsatisfiable(Constraints: std::move(Constraints)); |
209 | } |
210 | |
211 | void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( |
212 | Atom Token, llvm::SetVector<const Formula *> &Constraints) { |
213 | llvm::DenseSet<Atom> AddedTokens; |
214 | std::vector<Atom> Remaining = {Token}; |
215 | |
216 | if (Invariant) |
217 | Constraints.insert(X: Invariant); |
218 | // Define all the flow conditions that might be referenced in constraints. |
219 | while (!Remaining.empty()) { |
220 | auto Token = Remaining.back(); |
221 | Remaining.pop_back(); |
222 | if (!AddedTokens.insert(V: Token).second) |
223 | continue; |
224 | |
225 | auto ConstraintsIt = FlowConditionConstraints.find(Val: Token); |
226 | if (ConstraintsIt == FlowConditionConstraints.end()) { |
227 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
228 | } else { |
229 | // Bind flow condition token via `iff` to its set of constraints: |
230 | // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints |
231 | Constraints.insert(X: &arena().makeEquals(LHS: arena().makeAtomRef(A: Token), |
232 | RHS: *ConstraintsIt->second)); |
233 | } |
234 | |
235 | if (auto DepsIt = FlowConditionDeps.find(Val: Token); |
236 | DepsIt != FlowConditionDeps.end()) |
237 | for (Atom A : DepsIt->second) |
238 | Remaining.push_back(x: A); |
239 | } |
240 | } |
241 | |
242 | static void printAtomList(const llvm::SmallVector<Atom> &Atoms, |
243 | llvm::raw_ostream &OS) { |
244 | OS << "("; |
245 | for (size_t i = 0; i < Atoms.size(); ++i) { |
246 | OS << Atoms[i]; |
247 | if (i + 1 < Atoms.size()) |
248 | OS << ", "; |
249 | } |
250 | OS << ")\n"; |
251 | } |
252 | |
253 | void DataflowAnalysisContext::dumpFlowCondition(Atom Token, |
254 | llvm::raw_ostream &OS) { |
255 | llvm::SetVector<const Formula *> Constraints; |
256 | Constraints.insert(X: &arena().makeAtomRef(A: Token)); |
257 | addTransitiveFlowConditionConstraints(Token, Constraints); |
258 | |
259 | OS << "Flow condition token: "<< Token << "\n"; |
260 | SimplifyConstraintsInfo Info; |
261 | llvm::SetVector<const Formula *> OriginalConstraints = Constraints; |
262 | simplifyConstraints(Constraints, arena&: arena(), Info: &Info); |
263 | if (!Constraints.empty()) { |
264 | OS << "Constraints:\n"; |
265 | for (const auto *Constraint : Constraints) { |
266 | Constraint->print(OS); |
267 | OS << "\n"; |
268 | } |
269 | } |
270 | if (!Info.TrueAtoms.empty()) { |
271 | OS << "True atoms: "; |
272 | printAtomList(Atoms: Info.TrueAtoms, OS); |
273 | } |
274 | if (!Info.FalseAtoms.empty()) { |
275 | OS << "False atoms: "; |
276 | printAtomList(Atoms: Info.FalseAtoms, OS); |
277 | } |
278 | if (!Info.EquivalentAtoms.empty()) { |
279 | OS << "Equivalent atoms:\n"; |
280 | for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) |
281 | printAtomList(Atoms: Class, OS); |
282 | } |
283 | |
284 | OS << "\nFlow condition constraints before simplification:\n"; |
285 | for (const auto *Constraint : OriginalConstraints) { |
286 | Constraint->print(OS); |
287 | OS << "\n"; |
288 | } |
289 | } |
290 | |
291 | const AdornedCFG * |
292 | DataflowAnalysisContext::getAdornedCFG(const FunctionDecl *F) { |
293 | // Canonicalize the key: |
294 | F = F->getDefinition(); |
295 | if (F == nullptr) |
296 | return nullptr; |
297 | auto It = FunctionContexts.find(Val: F); |
298 | if (It != FunctionContexts.end()) |
299 | return &It->second; |
300 | |
301 | if (F->doesThisDeclarationHaveABody()) { |
302 | auto ACFG = AdornedCFG::build(Func: *F); |
303 | // FIXME: Handle errors. |
304 | assert(ACFG); |
305 | auto Result = FunctionContexts.insert(KV: {F, std::move(*ACFG)}); |
306 | return &Result.first->second; |
307 | } |
308 | |
309 | return nullptr; |
310 | } |
311 | |
312 | static std::unique_ptr<Logger> makeLoggerFromCommandLine() { |
313 | if (DataflowLog.empty()) |
314 | return Logger::textual(llvm::errs()); |
315 | |
316 | llvm::StringRef Dir = DataflowLog; |
317 | if (auto EC = llvm::sys::fs::create_directories(path: Dir)) |
318 | llvm::errs() << "Failed to create log dir: "<< EC.message() << "\n"; |
319 | // All analysis runs within a process will log to the same directory. |
320 | // Share a counter so they don't all overwrite each other's 0.html. |
321 | // (Don't share a logger, it's not threadsafe). |
322 | static std::atomic<unsigned> Counter = {0}; |
323 | auto StreamFactory = |
324 | [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { |
325 | llvm::SmallString<256> File(Dir); |
326 | llvm::sys::path::append(path&: File, |
327 | a: std::to_string(val: Counter.fetch_add(i: 1)) + ".html"); |
328 | std::error_code EC; |
329 | auto OS = std::make_unique<llvm::raw_fd_ostream>(args&: File, args&: EC); |
330 | if (EC) { |
331 | llvm::errs() << "Failed to create log "<< File << ": "<< EC.message() |
332 | << "\n"; |
333 | return std::make_unique<llvm::raw_null_ostream>(); |
334 | } |
335 | return OS; |
336 | }; |
337 | return Logger::html(std::move(StreamFactory)); |
338 | } |
339 | |
340 | DataflowAnalysisContext::DataflowAnalysisContext( |
341 | Solver &S, std::unique_ptr<Solver> &&OwnedSolver, Options Opts) |
342 | : S(S), OwnedSolver(std::move(OwnedSolver)), A(std::make_unique<Arena>()), |
343 | Opts(Opts) { |
344 | // If the -dataflow-log command-line flag was set, synthesize a logger. |
345 | // This is ugly but provides a uniform method for ad-hoc debugging dataflow- |
346 | // based tools. |
347 | if (Opts.Log == nullptr) { |
348 | if (DataflowLog.getNumOccurrences() > 0) { |
349 | LogOwner = makeLoggerFromCommandLine(); |
350 | this->Opts.Log = LogOwner.get(); |
351 | // FIXME: if the flag is given a value, write an HTML log to a file. |
352 | } else { |
353 | this->Opts.Log = &Logger::null(); |
354 | } |
355 | } |
356 | } |
357 | |
358 | DataflowAnalysisContext::~DataflowAnalysisContext() = default; |
359 | |
360 | } // namespace dataflow |
361 | } // namespace clang |
362 |
Definitions
- DataflowLog
- getModeledFields
- addModeledFields
- createStorageLocation
- getKeys
- createRecordStorageLocation
- getStableStorageLocation
- getStableStorageLocation
- getOrCreateNullPointerValue
- addInvariant
- addFlowConditionConstraint
- forkFlowCondition
- joinFlowConditions
- querySolver
- flowConditionImplies
- flowConditionAllows
- equivalentFormulas
- addTransitiveFlowConditionConstraints
- printAtomList
- dumpFlowCondition
- getAdornedCFG
- makeLoggerFromCommandLine
- DataflowAnalysisContext
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more