1 | //=== Taint.cpp - Taint tracking and basic propagation rules. ------*- C++ -*-// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Defines basic, non-domain-specific mechanisms for tracking tainted values. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
14 | #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h" |
15 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
16 | #include <optional> |
17 | |
18 | using namespace clang; |
19 | using namespace ento; |
20 | using namespace taint; |
21 | |
22 | // Fully tainted symbols. |
23 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintMap, SymbolRef, TaintTagType) |
24 | |
25 | // Partially tainted symbols. |
26 | REGISTER_MAP_FACTORY_WITH_PROGRAMSTATE(TaintedSubRegions, const SubRegion *, |
27 | TaintTagType) |
28 | REGISTER_MAP_WITH_PROGRAMSTATE(DerivedSymTaint, SymbolRef, TaintedSubRegions) |
29 | |
30 | void taint::printTaint(ProgramStateRef State, raw_ostream &Out, const char *NL, |
31 | const char *Sep) { |
32 | TaintMapTy TM = State->get<TaintMap>(); |
33 | |
34 | if (!TM.isEmpty()) |
35 | Out << "Tainted symbols:" << NL; |
36 | |
37 | for (const auto &I : TM) |
38 | Out << I.first << " : " << I.second << NL; |
39 | } |
40 | |
41 | void taint::dumpTaint(ProgramStateRef State) { |
42 | printTaint(State, Out&: llvm::errs()); |
43 | } |
44 | |
45 | ProgramStateRef taint::addTaint(ProgramStateRef State, const Stmt *S, |
46 | const LocationContext *LCtx, |
47 | TaintTagType Kind) { |
48 | return addTaint(State, V: State->getSVal(Ex: S, LCtx), Kind); |
49 | } |
50 | |
51 | ProgramStateRef taint::addTaint(ProgramStateRef State, SVal V, |
52 | TaintTagType Kind) { |
53 | SymbolRef Sym = V.getAsSymbol(); |
54 | if (Sym) |
55 | return addTaint(State, Sym, Kind); |
56 | |
57 | // If the SVal represents a structure, try to mass-taint all values within the |
58 | // structure. For now it only works efficiently on lazy compound values that |
59 | // were conjured during a conservative evaluation of a function - either as |
60 | // return values of functions that return structures or arrays by value, or as |
61 | // values of structures or arrays passed into the function by reference, |
62 | // directly or through pointer aliasing. Such lazy compound values are |
63 | // characterized by having exactly one binding in their captured store within |
64 | // their parent region, which is a conjured symbol default-bound to the base |
65 | // region of the parent region. |
66 | if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) { |
67 | if (std::optional<SVal> binding = |
68 | State->getStateManager().getStoreManager().getDefaultBinding( |
69 | lcv: *LCV)) { |
70 | if (SymbolRef Sym = binding->getAsSymbol()) |
71 | return addPartialTaint(State, ParentSym: Sym, SubRegion: LCV->getRegion(), Kind); |
72 | } |
73 | } |
74 | |
75 | const MemRegion *R = V.getAsRegion(); |
76 | return addTaint(State, R, Kind); |
77 | } |
78 | |
79 | ProgramStateRef taint::addTaint(ProgramStateRef State, const MemRegion *R, |
80 | TaintTagType Kind) { |
81 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
82 | return addTaint(State, Sym: SR->getSymbol(), Kind); |
83 | return State; |
84 | } |
85 | |
86 | ProgramStateRef taint::addTaint(ProgramStateRef State, SymbolRef Sym, |
87 | TaintTagType Kind) { |
88 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
89 | // is cast agnostic. |
90 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
91 | Sym = SC->getOperand(); |
92 | |
93 | ProgramStateRef NewState = State->set<TaintMap>(K: Sym, E: Kind); |
94 | assert(NewState); |
95 | return NewState; |
96 | } |
97 | |
98 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SVal V) { |
99 | SymbolRef Sym = V.getAsSymbol(); |
100 | if (Sym) |
101 | return removeTaint(State, Sym); |
102 | |
103 | const MemRegion *R = V.getAsRegion(); |
104 | return removeTaint(State, R); |
105 | } |
106 | |
107 | ProgramStateRef taint::removeTaint(ProgramStateRef State, const MemRegion *R) { |
108 | if (const SymbolicRegion *SR = dyn_cast_or_null<SymbolicRegion>(Val: R)) |
109 | return removeTaint(State, Sym: SR->getSymbol()); |
110 | return State; |
111 | } |
112 | |
113 | ProgramStateRef taint::removeTaint(ProgramStateRef State, SymbolRef Sym) { |
114 | // If this is a symbol cast, remove the cast before adding the taint. Taint |
115 | // is cast agnostic. |
116 | while (const SymbolCast *SC = dyn_cast<SymbolCast>(Val: Sym)) |
117 | Sym = SC->getOperand(); |
118 | |
119 | ProgramStateRef NewState = State->remove<TaintMap>(K: Sym); |
120 | assert(NewState); |
121 | return NewState; |
122 | } |
123 | |
124 | ProgramStateRef taint::addPartialTaint(ProgramStateRef State, |
125 | SymbolRef ParentSym, |
126 | const SubRegion *SubRegion, |
127 | TaintTagType Kind) { |
128 | // Ignore partial taint if the entire parent symbol is already tainted. |
129 | if (const TaintTagType *T = State->get<TaintMap>(key: ParentSym)) |
130 | if (*T == Kind) |
131 | return State; |
132 | |
133 | // Partial taint applies if only a portion of the symbol is tainted. |
134 | if (SubRegion == SubRegion->getBaseRegion()) |
135 | return addTaint(State, Sym: ParentSym, Kind); |
136 | |
137 | const TaintedSubRegions *SavedRegs = State->get<DerivedSymTaint>(key: ParentSym); |
138 | TaintedSubRegions::Factory &F = State->get_context<TaintedSubRegions>(); |
139 | TaintedSubRegions Regs = SavedRegs ? *SavedRegs : F.getEmptyMap(); |
140 | |
141 | Regs = F.add(Old: Regs, K: SubRegion, D: Kind); |
142 | ProgramStateRef NewState = State->set<DerivedSymTaint>(K: ParentSym, E: Regs); |
143 | assert(NewState); |
144 | return NewState; |
145 | } |
146 | |
147 | bool taint::isTainted(ProgramStateRef State, const Stmt *S, |
148 | const LocationContext *LCtx, TaintTagType Kind) { |
149 | return !getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
150 | .empty(); |
151 | } |
152 | |
153 | bool taint::isTainted(ProgramStateRef State, SVal V, TaintTagType Kind) { |
154 | return !getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
155 | .empty(); |
156 | } |
157 | |
158 | bool taint::isTainted(ProgramStateRef State, const MemRegion *Reg, |
159 | TaintTagType K) { |
160 | return !getTaintedSymbolsImpl(State, Reg, Kind: K, /*ReturnFirstOnly=*/returnFirstOnly: true) |
161 | .empty(); |
162 | } |
163 | |
164 | bool taint::isTainted(ProgramStateRef State, SymbolRef Sym, TaintTagType Kind) { |
165 | return !getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: true) |
166 | .empty(); |
167 | } |
168 | |
169 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
170 | const Stmt *S, |
171 | const LocationContext *LCtx, |
172 | TaintTagType Kind) { |
173 | return getTaintedSymbolsImpl(State, S, LCtx, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
174 | } |
175 | |
176 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, SVal V, |
177 | TaintTagType Kind) { |
178 | return getTaintedSymbolsImpl(State, V, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
179 | } |
180 | |
181 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
182 | SymbolRef Sym, |
183 | TaintTagType Kind) { |
184 | return getTaintedSymbolsImpl(State, Sym, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
185 | } |
186 | |
187 | std::vector<SymbolRef> taint::getTaintedSymbols(ProgramStateRef State, |
188 | const MemRegion *Reg, |
189 | TaintTagType Kind) { |
190 | return getTaintedSymbolsImpl(State, Reg, Kind, /*ReturnFirstOnly=*/returnFirstOnly: false); |
191 | } |
192 | |
193 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
194 | const Stmt *S, |
195 | const LocationContext *LCtx, |
196 | TaintTagType Kind, |
197 | bool returnFirstOnly) { |
198 | SVal val = State->getSVal(Ex: S, LCtx); |
199 | return getTaintedSymbolsImpl(State, V: val, Kind, returnFirstOnly); |
200 | } |
201 | |
202 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
203 | SVal V, TaintTagType Kind, |
204 | bool returnFirstOnly) { |
205 | if (SymbolRef Sym = V.getAsSymbol()) |
206 | return getTaintedSymbolsImpl(State, Sym, Kind, returnFirstOnly); |
207 | if (const MemRegion *Reg = V.getAsRegion()) |
208 | return getTaintedSymbolsImpl(State, Reg, Kind, returnFirstOnly); |
209 | return {}; |
210 | } |
211 | |
212 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
213 | const MemRegion *Reg, |
214 | TaintTagType K, |
215 | bool returnFirstOnly) { |
216 | std::vector<SymbolRef> TaintedSymbols; |
217 | if (!Reg) |
218 | return TaintedSymbols; |
219 | |
220 | // Element region (array element) is tainted if the offset is tainted. |
221 | if (const ElementRegion *ER = dyn_cast<ElementRegion>(Val: Reg)) { |
222 | std::vector<SymbolRef> TaintedIndex = |
223 | getTaintedSymbolsImpl(State, V: ER->getIndex(), Kind: K, returnFirstOnly); |
224 | llvm::append_range(C&: TaintedSymbols, R&: TaintedIndex); |
225 | if (returnFirstOnly && !TaintedSymbols.empty()) |
226 | return TaintedSymbols; // return early if needed |
227 | } |
228 | |
229 | // Symbolic region is tainted if the corresponding symbol is tainted. |
230 | if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Val: Reg)) { |
231 | std::vector<SymbolRef> TaintedRegions = |
232 | getTaintedSymbolsImpl(State, Sym: SR->getSymbol(), Kind: K, returnFirstOnly); |
233 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
234 | if (returnFirstOnly && !TaintedSymbols.empty()) |
235 | return TaintedSymbols; // return early if needed |
236 | } |
237 | |
238 | // Any subregion (including Element and Symbolic regions) is tainted if its |
239 | // super-region is tainted. |
240 | if (const SubRegion *ER = dyn_cast<SubRegion>(Val: Reg)) { |
241 | std::vector<SymbolRef> TaintedSubRegions = |
242 | getTaintedSymbolsImpl(State, Reg: ER->getSuperRegion(), K, returnFirstOnly); |
243 | llvm::append_range(C&: TaintedSymbols, R&: TaintedSubRegions); |
244 | if (returnFirstOnly && !TaintedSymbols.empty()) |
245 | return TaintedSymbols; // return early if needed |
246 | } |
247 | |
248 | return TaintedSymbols; |
249 | } |
250 | |
251 | std::vector<SymbolRef> taint::getTaintedSymbolsImpl(ProgramStateRef State, |
252 | SymbolRef Sym, |
253 | TaintTagType Kind, |
254 | bool returnFirstOnly) { |
255 | std::vector<SymbolRef> TaintedSymbols; |
256 | if (!Sym) |
257 | return TaintedSymbols; |
258 | |
259 | // Traverse all the symbols this symbol depends on to see if any are tainted. |
260 | for (SymbolRef SubSym : Sym->symbols()) { |
261 | if (!isa<SymbolData>(Val: SubSym)) |
262 | continue; |
263 | |
264 | if (const TaintTagType *Tag = State->get<TaintMap>(key: SubSym)) { |
265 | if (*Tag == Kind) { |
266 | TaintedSymbols.push_back(x: SubSym); |
267 | if (returnFirstOnly) |
268 | return TaintedSymbols; // return early if needed |
269 | } |
270 | } |
271 | |
272 | if (const auto *SD = dyn_cast<SymbolDerived>(Val: SubSym)) { |
273 | // If this is a SymbolDerived with a tainted parent, it's also tainted. |
274 | std::vector<SymbolRef> TaintedParents = getTaintedSymbolsImpl( |
275 | State, Sym: SD->getParentSymbol(), Kind, returnFirstOnly); |
276 | llvm::append_range(C&: TaintedSymbols, R&: TaintedParents); |
277 | if (returnFirstOnly && !TaintedSymbols.empty()) |
278 | return TaintedSymbols; // return early if needed |
279 | |
280 | // If this is a SymbolDerived with the same parent symbol as another |
281 | // tainted SymbolDerived and a region that's a sub-region of that |
282 | // tainted symbol, it's also tainted. |
283 | if (const TaintedSubRegions *Regs = |
284 | State->get<DerivedSymTaint>(key: SD->getParentSymbol())) { |
285 | const TypedValueRegion *R = SD->getRegion(); |
286 | for (auto I : *Regs) { |
287 | // FIXME: The logic to identify tainted regions could be more |
288 | // complete. For example, this would not currently identify |
289 | // overlapping fields in a union as tainted. To identify this we can |
290 | // check for overlapping/nested byte offsets. |
291 | if (Kind == I.second && R->isSubRegionOf(R: I.first)) { |
292 | TaintedSymbols.push_back(x: SD->getParentSymbol()); |
293 | if (returnFirstOnly && !TaintedSymbols.empty()) |
294 | return TaintedSymbols; // return early if needed |
295 | } |
296 | } |
297 | } |
298 | } |
299 | |
300 | // If memory region is tainted, data is also tainted. |
301 | if (const auto *SRV = dyn_cast<SymbolRegionValue>(Val: SubSym)) { |
302 | std::vector<SymbolRef> TaintedRegions = |
303 | getTaintedSymbolsImpl(State, Reg: SRV->getRegion(), K: Kind, returnFirstOnly); |
304 | llvm::append_range(C&: TaintedSymbols, R&: TaintedRegions); |
305 | if (returnFirstOnly && !TaintedSymbols.empty()) |
306 | return TaintedSymbols; // return early if needed |
307 | } |
308 | |
309 | // If this is a SymbolCast from a tainted value, it's also tainted. |
310 | if (const auto *SC = dyn_cast<SymbolCast>(Val: SubSym)) { |
311 | std::vector<SymbolRef> TaintedCasts = |
312 | getTaintedSymbolsImpl(State, Sym: SC->getOperand(), Kind, returnFirstOnly); |
313 | llvm::append_range(C&: TaintedSymbols, R&: TaintedCasts); |
314 | if (returnFirstOnly && !TaintedSymbols.empty()) |
315 | return TaintedSymbols; // return early if needed |
316 | } |
317 | } |
318 | return TaintedSymbols; |
319 | } |
320 | |