1 | //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This checker defines the attack surface for generic taint propagation. |
10 | // |
11 | // The taint information produced by it might be useful to other checkers. For |
12 | // example, checkers should report errors which involve tainted data more |
13 | // aggressively, even if the involved symbols are under constrained. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "Yaml.h" |
18 | #include "clang/AST/Attr.h" |
19 | #include "clang/Basic/Builtins.h" |
20 | #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" |
21 | #include "clang/StaticAnalyzer/Checkers/Taint.h" |
22 | #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" |
23 | #include "clang/StaticAnalyzer/Core/Checker.h" |
24 | #include "clang/StaticAnalyzer/Core/CheckerManager.h" |
25 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" |
26 | #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" |
27 | #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" |
28 | #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" |
29 | #include "llvm/ADT/StringExtras.h" |
30 | #include "llvm/Support/YAMLTraits.h" |
31 | |
32 | #include <limits> |
33 | #include <memory> |
34 | #include <optional> |
35 | #include <utility> |
36 | #include <vector> |
37 | |
38 | #define DEBUG_TYPE "taint-checker" |
39 | |
40 | using namespace clang; |
41 | using namespace ento; |
42 | using namespace taint; |
43 | |
44 | using llvm::ImmutableSet; |
45 | |
46 | namespace { |
47 | |
48 | class GenericTaintChecker; |
49 | |
50 | /// Check for CWE-134: Uncontrolled Format String. |
51 | constexpr llvm::StringLiteral MsgUncontrolledFormatString = |
52 | "Untrusted data is used as a format string " |
53 | "(CWE-134: Uncontrolled Format String)" ; |
54 | |
55 | /// Check for: |
56 | /// CERT/STR02-C. "Sanitize data passed to complex subsystems" |
57 | /// CWE-78, "Failure to Sanitize Data into an OS Command" |
58 | constexpr llvm::StringLiteral MsgSanitizeSystemArgs = |
59 | "Untrusted data is passed to a system call " |
60 | "(CERT/STR02-C. Sanitize data passed to complex subsystems)" ; |
61 | |
62 | /// Check if tainted data is used as a buffer size in strn.. functions, |
63 | /// and allocators. |
64 | constexpr llvm::StringLiteral MsgTaintedBufferSize = |
65 | "Untrusted data is used to specify the buffer size " |
66 | "(CERT/STR31-C. Guarantee that storage for strings has sufficient space " |
67 | "for character data and the null terminator)" ; |
68 | |
69 | /// Check if tainted data is used as a custom sink's parameter. |
70 | constexpr llvm::StringLiteral MsgCustomSink = |
71 | "Untrusted data is passed to a user-defined sink" ; |
72 | |
73 | using ArgIdxTy = int; |
74 | using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>; |
75 | |
76 | /// Denotes the return value. |
77 | constexpr ArgIdxTy ReturnValueIndex{-1}; |
78 | |
79 | static ArgIdxTy fromArgumentCount(unsigned Count) { |
80 | assert(Count <= |
81 | static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) && |
82 | "ArgIdxTy is not large enough to represent the number of arguments." ); |
83 | return Count; |
84 | } |
85 | |
86 | /// Check if the region the expression evaluates to is the standard input, |
87 | /// and thus, is tainted. |
88 | /// FIXME: Move this to Taint.cpp. |
89 | bool isStdin(SVal Val, const ASTContext &ACtx) { |
90 | // FIXME: What if Val is NonParamVarRegion? |
91 | |
92 | // The region should be symbolic, we do not know it's value. |
93 | const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val: Val.getAsRegion()); |
94 | if (!SymReg) |
95 | return false; |
96 | |
97 | // Get it's symbol and find the declaration region it's pointing to. |
98 | const auto *DeclReg = |
99 | dyn_cast_or_null<DeclRegion>(Val: SymReg->getSymbol()->getOriginRegion()); |
100 | if (!DeclReg) |
101 | return false; |
102 | |
103 | // This region corresponds to a declaration, find out if it's a global/extern |
104 | // variable named stdin with the proper type. |
105 | if (const auto *D = dyn_cast_or_null<VarDecl>(Val: DeclReg->getDecl())) { |
106 | D = D->getCanonicalDecl(); |
107 | if (D->getName() == "stdin" && D->hasExternalStorage() && D->isExternC()) { |
108 | const QualType FILETy = ACtx.getFILEType().getCanonicalType(); |
109 | const QualType Ty = D->getType().getCanonicalType(); |
110 | |
111 | if (Ty->isPointerType()) |
112 | return Ty->getPointeeType() == FILETy; |
113 | } |
114 | } |
115 | return false; |
116 | } |
117 | |
118 | SVal getPointeeOf(ProgramStateRef State, Loc LValue) { |
119 | const QualType ArgTy = LValue.getType(State->getStateManager().getContext()); |
120 | if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType()) |
121 | return State->getSVal(LV: LValue); |
122 | |
123 | // Do not dereference void pointers. Treat them as byte pointers instead. |
124 | // FIXME: we might want to consider more than just the first byte. |
125 | return State->getSVal(LValue, State->getStateManager().getContext().CharTy); |
126 | } |
127 | |
128 | /// Given a pointer/reference argument, return the value it refers to. |
129 | std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) { |
130 | if (auto LValue = Arg.getAs<Loc>()) |
131 | return getPointeeOf(State, LValue: *LValue); |
132 | return std::nullopt; |
133 | } |
134 | |
135 | /// Given a pointer, return the SVal of its pointee or if it is tainted, |
136 | /// otherwise return the pointer's SVal if tainted. |
137 | /// Also considers stdin as a taint source. |
138 | std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State, |
139 | SVal Arg) { |
140 | if (auto Pointee = getPointeeOf(State, Arg)) |
141 | if (isTainted(State, V: *Pointee)) // FIXME: isTainted(...) ? Pointee : None; |
142 | return Pointee; |
143 | |
144 | if (isTainted(State, V: Arg)) |
145 | return Arg; |
146 | return std::nullopt; |
147 | } |
148 | |
149 | bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) { |
150 | return getTaintedPointeeOrPointer(State, Arg: ExprSVal).has_value(); |
151 | } |
152 | |
153 | /// Helps in printing taint diagnostics. |
154 | /// Marks the incoming parameters of a function interesting (to be printed) |
155 | /// when the return value, or the outgoing parameters are tainted. |
156 | const NoteTag *taintOriginTrackerTag(CheckerContext &C, |
157 | std::vector<SymbolRef> TaintedSymbols, |
158 | std::vector<ArgIdxTy> TaintedArgs, |
159 | const LocationContext *CallLocation) { |
160 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
161 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
162 | PathSensitiveBugReport &BR) -> std::string { |
163 | SmallString<256> Msg; |
164 | // We give diagnostics only for taint related reports |
165 | if (!BR.isInteresting(LC: CallLocation) || |
166 | BR.getBugType().getCategory() != categories::TaintedData) { |
167 | return "" ; |
168 | } |
169 | if (TaintedSymbols.empty()) |
170 | return "Taint originated here" ; |
171 | |
172 | for (auto Sym : TaintedSymbols) { |
173 | BR.markInteresting(sym: Sym); |
174 | } |
175 | LLVM_DEBUG(for (auto Arg |
176 | : TaintedArgs) { |
177 | llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n" ; |
178 | }); |
179 | return "" ; |
180 | }); |
181 | } |
182 | |
183 | /// Helps in printing taint diagnostics. |
184 | /// Marks the function interesting (to be printed) |
185 | /// when the return value, or the outgoing parameters are tainted. |
186 | const NoteTag *taintPropagationExplainerTag( |
187 | CheckerContext &C, std::vector<SymbolRef> TaintedSymbols, |
188 | std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) { |
189 | assert(TaintedSymbols.size() == TaintedArgs.size()); |
190 | return C.getNoteTag(Cb: [TaintedSymbols = std::move(TaintedSymbols), |
191 | TaintedArgs = std::move(TaintedArgs), CallLocation]( |
192 | PathSensitiveBugReport &BR) -> std::string { |
193 | SmallString<256> Msg; |
194 | llvm::raw_svector_ostream Out(Msg); |
195 | // We give diagnostics only for taint related reports |
196 | if (TaintedSymbols.empty() || |
197 | BR.getBugType().getCategory() != categories::TaintedData) { |
198 | return "" ; |
199 | } |
200 | int nofTaintedArgs = 0; |
201 | for (auto [Idx, Sym] : llvm::enumerate(First: TaintedSymbols)) { |
202 | if (BR.isInteresting(sym: Sym)) { |
203 | BR.markInteresting(LC: CallLocation); |
204 | if (TaintedArgs[Idx] != ReturnValueIndex) { |
205 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument " |
206 | << TaintedArgs[Idx] + 1 << "\n" ); |
207 | if (nofTaintedArgs == 0) |
208 | Out << "Taint propagated to the " ; |
209 | else |
210 | Out << ", " ; |
211 | Out << TaintedArgs[Idx] + 1 |
212 | << llvm::getOrdinalSuffix(Val: TaintedArgs[Idx] + 1) << " argument" ; |
213 | nofTaintedArgs++; |
214 | } else { |
215 | LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n" ); |
216 | Out << "Taint propagated to the return value" ; |
217 | } |
218 | } |
219 | } |
220 | return std::string(Out.str()); |
221 | }); |
222 | } |
223 | |
224 | /// ArgSet is used to describe arguments relevant for taint detection or |
225 | /// taint application. A discrete set of argument indexes and a variadic |
226 | /// argument list signified by a starting index are supported. |
227 | class ArgSet { |
228 | public: |
229 | ArgSet() = default; |
230 | ArgSet(ArgVecTy &&DiscreteArgs, |
231 | std::optional<ArgIdxTy> VariadicIndex = std::nullopt) |
232 | : DiscreteArgs(std::move(DiscreteArgs)), |
233 | VariadicIndex(std::move(VariadicIndex)) {} |
234 | |
235 | bool contains(ArgIdxTy ArgIdx) const { |
236 | if (llvm::is_contained(Range: DiscreteArgs, Element: ArgIdx)) |
237 | return true; |
238 | |
239 | return VariadicIndex && ArgIdx >= *VariadicIndex; |
240 | } |
241 | |
242 | bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; } |
243 | |
244 | private: |
245 | ArgVecTy DiscreteArgs; |
246 | std::optional<ArgIdxTy> VariadicIndex; |
247 | }; |
248 | |
249 | /// A struct used to specify taint propagation rules for a function. |
250 | /// |
251 | /// If any of the possible taint source arguments is tainted, all of the |
252 | /// destination arguments should also be tainted. If ReturnValueIndex is added |
253 | /// to the dst list, the return value will be tainted. |
254 | class GenericTaintRule { |
255 | /// Arguments which are taints sinks and should be checked, and a report |
256 | /// should be emitted if taint reaches these. |
257 | ArgSet SinkArgs; |
258 | /// Arguments which should be sanitized on function return. |
259 | ArgSet FilterArgs; |
260 | /// Arguments which can participate in taint propagation. If any of the |
261 | /// arguments in PropSrcArgs is tainted, all arguments in PropDstArgs should |
262 | /// be tainted. |
263 | ArgSet PropSrcArgs; |
264 | ArgSet PropDstArgs; |
265 | |
266 | /// A message that explains why the call is sensitive to taint. |
267 | std::optional<StringRef> SinkMsg; |
268 | |
269 | GenericTaintRule() = default; |
270 | |
271 | GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst, |
272 | std::optional<StringRef> SinkMsg = std::nullopt) |
273 | : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)), |
274 | PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)), |
275 | SinkMsg(SinkMsg) {} |
276 | |
277 | public: |
278 | /// Make a rule that reports a warning if taint reaches any of \p FilterArgs |
279 | /// arguments. |
280 | static GenericTaintRule Sink(ArgSet &&SinkArgs, |
281 | std::optional<StringRef> Msg = std::nullopt) { |
282 | return {std::move(SinkArgs), {}, {}, {}, Msg}; |
283 | } |
284 | |
285 | /// Make a rule that sanitizes all FilterArgs arguments. |
286 | static GenericTaintRule Filter(ArgSet &&FilterArgs) { |
287 | return {{}, std::move(FilterArgs), {}, {}}; |
288 | } |
289 | |
290 | /// Make a rule that unconditionally taints all Args. |
291 | /// If Func is provided, it must also return true for taint to propagate. |
292 | static GenericTaintRule Source(ArgSet &&SourceArgs) { |
293 | return {{}, {}, {}, std::move(SourceArgs)}; |
294 | } |
295 | |
296 | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
297 | static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) { |
298 | return {{}, {}, std::move(SrcArgs), std::move(DstArgs)}; |
299 | } |
300 | |
301 | /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted. |
302 | static GenericTaintRule |
303 | SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs, |
304 | std::optional<StringRef> Msg = std::nullopt) { |
305 | return { |
306 | std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg}; |
307 | } |
308 | |
309 | /// Process a function which could either be a taint source, a taint sink, a |
310 | /// taint filter or a taint propagator. |
311 | void process(const GenericTaintChecker &Checker, const CallEvent &Call, |
312 | CheckerContext &C) const; |
313 | |
314 | /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s. |
315 | static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) { |
316 | return ArgIdx == ReturnValueIndex ? Call.getOriginExpr() |
317 | : Call.getArgExpr(Index: ArgIdx); |
318 | }; |
319 | |
320 | /// Functions for custom taintedness propagation. |
321 | static bool UntrustedEnv(CheckerContext &C); |
322 | }; |
323 | |
324 | using RuleLookupTy = CallDescriptionMap<GenericTaintRule>; |
325 | |
326 | /// Used to parse the configuration file. |
327 | struct TaintConfiguration { |
328 | using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>; |
329 | enum class VariadicType { None, Src, Dst }; |
330 | |
331 | struct Common { |
332 | std::string Name; |
333 | std::string Scope; |
334 | }; |
335 | |
336 | struct Sink : Common { |
337 | ArgVecTy SinkArgs; |
338 | }; |
339 | |
340 | struct Filter : Common { |
341 | ArgVecTy FilterArgs; |
342 | }; |
343 | |
344 | struct Propagation : Common { |
345 | ArgVecTy SrcArgs; |
346 | ArgVecTy DstArgs; |
347 | VariadicType VarType; |
348 | ArgIdxTy VarIndex; |
349 | }; |
350 | |
351 | std::vector<Propagation> Propagations; |
352 | std::vector<Filter> Filters; |
353 | std::vector<Sink> Sinks; |
354 | |
355 | TaintConfiguration() = default; |
356 | TaintConfiguration(const TaintConfiguration &) = default; |
357 | TaintConfiguration(TaintConfiguration &&) = default; |
358 | TaintConfiguration &operator=(const TaintConfiguration &) = default; |
359 | TaintConfiguration &operator=(TaintConfiguration &&) = default; |
360 | }; |
361 | |
362 | struct GenericTaintRuleParser { |
363 | GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {} |
364 | /// Container type used to gather call identification objects grouped into |
365 | /// pairs with their corresponding taint rules. It is temporary as it is used |
366 | /// to finally initialize RuleLookupTy, which is considered to be immutable. |
367 | using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>; |
368 | RulesContTy parseConfiguration(const std::string &Option, |
369 | TaintConfiguration &&Config) const; |
370 | |
371 | private: |
372 | using NamePartsTy = llvm::SmallVector<StringRef, 2>; |
373 | |
374 | /// Validate part of the configuration, which contains a list of argument |
375 | /// indexes. |
376 | void validateArgVector(const std::string &Option, const ArgVecTy &Args) const; |
377 | |
378 | template <typename Config> static NamePartsTy parseNameParts(const Config &C); |
379 | |
380 | // Takes the config and creates a CallDescription for it and associates a Rule |
381 | // with that. |
382 | template <typename Config> |
383 | static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, |
384 | RulesContTy &Rules); |
385 | |
386 | void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P, |
387 | RulesContTy &Rules) const; |
388 | void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P, |
389 | RulesContTy &Rules) const; |
390 | void parseConfig(const std::string &Option, |
391 | TaintConfiguration::Propagation &&P, |
392 | RulesContTy &Rules) const; |
393 | |
394 | CheckerManager &Mgr; |
395 | }; |
396 | |
397 | class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> { |
398 | public: |
399 | void checkPreCall(const CallEvent &Call, CheckerContext &C) const; |
400 | void checkPostCall(const CallEvent &Call, CheckerContext &C) const; |
401 | |
402 | void printState(raw_ostream &Out, ProgramStateRef State, const char *NL, |
403 | const char *Sep) const override; |
404 | |
405 | /// Generate a report if the expression is tainted or points to tainted data. |
406 | bool generateReportIfTainted(const Expr *E, StringRef Msg, |
407 | CheckerContext &C) const; |
408 | |
409 | private: |
410 | const BugType BT{this, "Use of Untrusted Data" , categories::TaintedData}; |
411 | |
412 | bool checkUncontrolledFormatString(const CallEvent &Call, |
413 | CheckerContext &C) const; |
414 | |
415 | void taintUnsafeSocketProtocol(const CallEvent &Call, |
416 | CheckerContext &C) const; |
417 | |
418 | /// Default taint rules are initalized with the help of a CheckerContext to |
419 | /// access the names of built-in functions like memcpy. |
420 | void initTaintRules(CheckerContext &C) const; |
421 | |
422 | /// CallDescription currently cannot restrict matches to the global namespace |
423 | /// only, which is why multiple CallDescriptionMaps are used, as we want to |
424 | /// disambiguate global C functions from functions inside user-defined |
425 | /// namespaces. |
426 | // TODO: Remove separation to simplify matching logic once CallDescriptions |
427 | // are more expressive. |
428 | |
429 | mutable std::optional<RuleLookupTy> StaticTaintRules; |
430 | mutable std::optional<RuleLookupTy> DynamicTaintRules; |
431 | }; |
432 | } // end of anonymous namespace |
433 | |
434 | /// YAML serialization mapping. |
435 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink) |
436 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter) |
437 | LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation) |
438 | |
439 | namespace llvm { |
440 | namespace yaml { |
441 | template <> struct MappingTraits<TaintConfiguration> { |
442 | static void mapping(IO &IO, TaintConfiguration &Config) { |
443 | IO.mapOptional(Key: "Propagations" , Val&: Config.Propagations); |
444 | IO.mapOptional(Key: "Filters" , Val&: Config.Filters); |
445 | IO.mapOptional(Key: "Sinks" , Val&: Config.Sinks); |
446 | } |
447 | }; |
448 | |
449 | template <> struct MappingTraits<TaintConfiguration::Sink> { |
450 | static void mapping(IO &IO, TaintConfiguration::Sink &Sink) { |
451 | IO.mapRequired(Key: "Name" , Val&: Sink.Name); |
452 | IO.mapOptional(Key: "Scope" , Val&: Sink.Scope); |
453 | IO.mapRequired(Key: "Args" , Val&: Sink.SinkArgs); |
454 | } |
455 | }; |
456 | |
457 | template <> struct MappingTraits<TaintConfiguration::Filter> { |
458 | static void mapping(IO &IO, TaintConfiguration::Filter &Filter) { |
459 | IO.mapRequired(Key: "Name" , Val&: Filter.Name); |
460 | IO.mapOptional(Key: "Scope" , Val&: Filter.Scope); |
461 | IO.mapRequired(Key: "Args" , Val&: Filter.FilterArgs); |
462 | } |
463 | }; |
464 | |
465 | template <> struct MappingTraits<TaintConfiguration::Propagation> { |
466 | static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) { |
467 | IO.mapRequired(Key: "Name" , Val&: Propagation.Name); |
468 | IO.mapOptional(Key: "Scope" , Val&: Propagation.Scope); |
469 | IO.mapOptional(Key: "SrcArgs" , Val&: Propagation.SrcArgs); |
470 | IO.mapOptional(Key: "DstArgs" , Val&: Propagation.DstArgs); |
471 | IO.mapOptional(Key: "VariadicType" , Val&: Propagation.VarType); |
472 | IO.mapOptional(Key: "VariadicIndex" , Val&: Propagation.VarIndex); |
473 | } |
474 | }; |
475 | |
476 | template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> { |
477 | static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) { |
478 | IO.enumCase(Val&: Value, Str: "None" , ConstVal: TaintConfiguration::VariadicType::None); |
479 | IO.enumCase(Val&: Value, Str: "Src" , ConstVal: TaintConfiguration::VariadicType::Src); |
480 | IO.enumCase(Val&: Value, Str: "Dst" , ConstVal: TaintConfiguration::VariadicType::Dst); |
481 | } |
482 | }; |
483 | } // namespace yaml |
484 | } // namespace llvm |
485 | |
486 | /// A set which is used to pass information from call pre-visit instruction |
487 | /// to the call post-visit. The values are signed integers, which are either |
488 | /// ReturnValueIndex, or indexes of the pointer/reference argument, which |
489 | /// points to data, which should be tainted on return. |
490 | REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *, |
491 | ImmutableSet<ArgIdxTy>) |
492 | REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy) |
493 | |
494 | void GenericTaintRuleParser::validateArgVector(const std::string &Option, |
495 | const ArgVecTy &Args) const { |
496 | for (ArgIdxTy Arg : Args) { |
497 | if (Arg < ReturnValueIndex) { |
498 | Mgr.reportInvalidCheckerOptionValue( |
499 | C: Mgr.getChecker<GenericTaintChecker>(), OptionName: Option, |
500 | ExpectedValueDesc: "an argument number for propagation rules greater or equal to -1" ); |
501 | } |
502 | } |
503 | } |
504 | |
505 | template <typename Config> |
506 | GenericTaintRuleParser::NamePartsTy |
507 | GenericTaintRuleParser::parseNameParts(const Config &C) { |
508 | NamePartsTy NameParts; |
509 | if (!C.Scope.empty()) { |
510 | // If the Scope argument contains multiple "::" parts, those are considered |
511 | // namespace identifiers. |
512 | StringRef{C.Scope}.split(A&: NameParts, Separator: "::" , /*MaxSplit*/ -1, |
513 | /*KeepEmpty*/ false); |
514 | } |
515 | NameParts.emplace_back(C.Name); |
516 | return NameParts; |
517 | } |
518 | |
519 | template <typename Config> |
520 | void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, |
521 | GenericTaintRule &&Rule, |
522 | RulesContTy &Rules) { |
523 | NamePartsTy NameParts = parseNameParts(C); |
524 | Rules.emplace_back(args: CallDescription(NameParts), args: std::move(Rule)); |
525 | } |
526 | |
527 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
528 | TaintConfiguration::Sink &&S, |
529 | RulesContTy &Rules) const { |
530 | validateArgVector(Option, Args: S.SinkArgs); |
531 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Sink(SinkArgs: std::move(S.SinkArgs)), |
532 | Rules); |
533 | } |
534 | |
535 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
536 | TaintConfiguration::Filter &&S, |
537 | RulesContTy &Rules) const { |
538 | validateArgVector(Option, Args: S.FilterArgs); |
539 | consumeRulesFromConfig(C: S, Rule: GenericTaintRule::Filter(FilterArgs: std::move(S.FilterArgs)), |
540 | Rules); |
541 | } |
542 | |
543 | void GenericTaintRuleParser::parseConfig(const std::string &Option, |
544 | TaintConfiguration::Propagation &&P, |
545 | RulesContTy &Rules) const { |
546 | validateArgVector(Option, Args: P.SrcArgs); |
547 | validateArgVector(Option, Args: P.DstArgs); |
548 | bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src; |
549 | bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst; |
550 | std::optional<ArgIdxTy> JustVarIndex = P.VarIndex; |
551 | |
552 | ArgSet SrcDesc(std::move(P.SrcArgs), |
553 | IsSrcVariadic ? JustVarIndex : std::nullopt); |
554 | ArgSet DstDesc(std::move(P.DstArgs), |
555 | IsDstVariadic ? JustVarIndex : std::nullopt); |
556 | |
557 | consumeRulesFromConfig( |
558 | C: P, Rule: GenericTaintRule::Prop(SrcArgs: std::move(SrcDesc), DstArgs: std::move(DstDesc)), Rules); |
559 | } |
560 | |
561 | GenericTaintRuleParser::RulesContTy |
562 | GenericTaintRuleParser::parseConfiguration(const std::string &Option, |
563 | TaintConfiguration &&Config) const { |
564 | |
565 | RulesContTy Rules; |
566 | |
567 | for (auto &F : Config.Filters) |
568 | parseConfig(Option, S: std::move(F), Rules); |
569 | |
570 | for (auto &S : Config.Sinks) |
571 | parseConfig(Option, S: std::move(S), Rules); |
572 | |
573 | for (auto &P : Config.Propagations) |
574 | parseConfig(Option, P: std::move(P), Rules); |
575 | |
576 | return Rules; |
577 | } |
578 | |
579 | void GenericTaintChecker::initTaintRules(CheckerContext &C) const { |
580 | // Check for exact name match for functions without builtin substitutes. |
581 | // Use qualified name, because these are C functions without namespace. |
582 | |
583 | if (StaticTaintRules || DynamicTaintRules) |
584 | return; |
585 | |
586 | using RulesConstructionTy = |
587 | std::vector<std::pair<CallDescription, GenericTaintRule>>; |
588 | using TR = GenericTaintRule; |
589 | |
590 | const Builtin::Context &BI = C.getASTContext().BuiltinInfo; |
591 | |
592 | RulesConstructionTy GlobalCRules{ |
593 | // Sources |
594 | {{{"fdopen" }}, TR::Source({{ReturnValueIndex}})}, |
595 | {{{"fopen" }}, TR::Source({{ReturnValueIndex}})}, |
596 | {{{"freopen" }}, TR::Source({{ReturnValueIndex}})}, |
597 | {{{"getch" }}, TR::Source({{ReturnValueIndex}})}, |
598 | {{{"getchar" }}, TR::Source({{ReturnValueIndex}})}, |
599 | {{{"getchar_unlocked" }}, TR::Source({{ReturnValueIndex}})}, |
600 | {{{"gets" }}, TR::Source({{0}, ReturnValueIndex})}, |
601 | {{{"gets_s" }}, TR::Source({{0}, ReturnValueIndex})}, |
602 | {{{"scanf" }}, TR::Source({{}, 1})}, |
603 | {{{"scanf_s" }}, TR::Source({{}, {1}})}, |
604 | {{{"wgetch" }}, TR::Source({{}, ReturnValueIndex})}, |
605 | // Sometimes the line between taint sources and propagators is blurry. |
606 | // _IO_getc is choosen to be a source, but could also be a propagator. |
607 | // This way it is simpler, as modeling it as a propagator would require |
608 | // to model the possible sources of _IO_FILE * values, which the _IO_getc |
609 | // function takes as parameters. |
610 | {{{"_IO_getc" }}, TR::Source({{ReturnValueIndex}})}, |
611 | {{{"getcwd" }}, TR::Source({{0, ReturnValueIndex}})}, |
612 | {{{"getwd" }}, TR::Source({{0, ReturnValueIndex}})}, |
613 | {{{"readlink" }}, TR::Source({{1, ReturnValueIndex}})}, |
614 | {{{"readlinkat" }}, TR::Source({{2, ReturnValueIndex}})}, |
615 | {{{"get_current_dir_name" }}, TR::Source({{ReturnValueIndex}})}, |
616 | {{{"gethostname" }}, TR::Source({{0}})}, |
617 | {{{"getnameinfo" }}, TR::Source({{2, 4}})}, |
618 | {{{"getseuserbyname" }}, TR::Source({{1, 2}})}, |
619 | {{{"getgroups" }}, TR::Source({{1, ReturnValueIndex}})}, |
620 | {{{"getlogin" }}, TR::Source({{ReturnValueIndex}})}, |
621 | {{{"getlogin_r" }}, TR::Source({{0}})}, |
622 | |
623 | // Props |
624 | {{{"accept" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
625 | {{{"atoi" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
626 | {{{"atol" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
627 | {{{"atoll" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
628 | {{{"fgetc" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
629 | {{{"fgetln" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
630 | {{{"fgets" }}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, |
631 | {{{"fgetws" }}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, |
632 | {{{"fscanf" }}, TR::Prop({{0}}, {{}, 2})}, |
633 | {{{"fscanf_s" }}, TR::Prop({{0}}, {{}, {2}})}, |
634 | {{{"sscanf" }}, TR::Prop({{0}}, {{}, 2})}, |
635 | |
636 | {{{"getc" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
637 | {{{"getc_unlocked" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
638 | {{{"getdelim" }}, TR::Prop({{3}}, {{0}})}, |
639 | {{{"getline" }}, TR::Prop({{2}}, {{0}})}, |
640 | {{{"getw" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
641 | {{{"pread" }}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, |
642 | {{{"read" }}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, |
643 | {{{"strchr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
644 | {{{"strrchr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
645 | {{{"tolower" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
646 | {{{"toupper" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
647 | {{{"fread" }}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, |
648 | {{{"recv" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
649 | {{{"recvfrom" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
650 | |
651 | {{{"ttyname" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
652 | {{{"ttyname_r" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
653 | |
654 | {{{"basename" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
655 | {{{"dirname" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
656 | {{{"fnmatch" }}, TR::Prop({{1}}, {{ReturnValueIndex}})}, |
657 | {{{"memchr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
658 | {{{"memrchr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
659 | {{{"rawmemchr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
660 | |
661 | {{{"mbtowc" }}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
662 | {{{"wctomb" }}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
663 | {{{"wcwidth" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
664 | |
665 | {{{"memcmp" }}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
666 | {{{"memcpy" }}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
667 | {{{"memmove" }}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
668 | // If memmem was called with a tainted needle and the search was |
669 | // successful, that would mean that the value pointed by the return value |
670 | // has the same content as the needle. If we choose to go by the policy of |
671 | // content equivalence implies taintedness equivalence, that would mean |
672 | // haystack should be considered a propagation source argument. |
673 | {{{"memmem" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
674 | |
675 | // The comment for memmem above also applies to strstr. |
676 | {{{"strstr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
677 | {{{"strcasestr" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
678 | |
679 | {{{"strchrnul" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
680 | |
681 | {{{"index" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
682 | {{{"rindex" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
683 | |
684 | // FIXME: In case of arrays, only the first element of the array gets |
685 | // tainted. |
686 | {{{"qsort" }}, TR::Prop({{0}}, {{0}})}, |
687 | {{{"qsort_r" }}, TR::Prop({{0}}, {{0}})}, |
688 | |
689 | {{{"strcmp" }}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
690 | {{{"strcasecmp" }}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
691 | {{{"strncmp" }}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, |
692 | {{{"strncasecmp" }}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, |
693 | {{{"strspn" }}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
694 | {{{"strcspn" }}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, |
695 | {{{"strpbrk" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
696 | {{{"strndup" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
697 | {{{"strndupa" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
698 | |
699 | // strlen, wcslen, strnlen and alike intentionally don't propagate taint. |
700 | // See the details here: https://github.com/llvm/llvm-project/pull/66086 |
701 | |
702 | {{{"strtol" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
703 | {{{"strtoll" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
704 | {{{"strtoul" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
705 | {{{"strtoull" }}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, |
706 | |
707 | {{{"isalnum" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
708 | {{{"isalpha" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
709 | {{{"isascii" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
710 | {{{"isblank" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
711 | {{{"iscntrl" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
712 | {{{"isdigit" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
713 | {{{"isgraph" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
714 | {{{"islower" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
715 | {{{"isprint" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
716 | {{{"ispunct" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
717 | {{{"isspace" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
718 | {{{"isupper" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
719 | {{{"isxdigit" }}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
720 | |
721 | {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncat)}}, |
722 | TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, |
723 | {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcpy)}}, |
724 | TR::Prop({{1, 2}}, {{0}})}, |
725 | {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcat)}}, |
726 | TR::Prop({{1, 2}}, {{0}})}, |
727 | {{CDM::CLibraryMaybeHardened, {{"snprintf" }}}, |
728 | TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, |
729 | {{CDM::CLibraryMaybeHardened, {{"sprintf" }}}, |
730 | TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, |
731 | {{CDM::CLibraryMaybeHardened, {{"strcpy" }}}, |
732 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
733 | {{CDM::CLibraryMaybeHardened, {{"stpcpy" }}}, |
734 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
735 | {{CDM::CLibraryMaybeHardened, {{"strcat" }}}, |
736 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
737 | {{CDM::CLibraryMaybeHardened, {{"wcsncat" }}}, |
738 | TR::Prop({{1}}, {{0, ReturnValueIndex}})}, |
739 | {{CDM::CLibrary, {{"strdup" }}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
740 | {{CDM::CLibrary, {{"strdupa" }}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
741 | {{CDM::CLibrary, {{"wcsdup" }}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, |
742 | |
743 | // Sinks |
744 | {{{"system" }}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
745 | {{{"popen" }}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
746 | {{{"execl" }}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, |
747 | {{{"execle" }}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, |
748 | {{{"execlp" }}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, |
749 | {{{"execv" }}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, |
750 | {{{"execve" }}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, |
751 | {{{"fexecve" }}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, |
752 | {{{"execvp" }}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, |
753 | {{{"execvpe" }}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, |
754 | {{{"dlopen" }}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, |
755 | {{CDM::CLibrary, {{"malloc" }}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
756 | {{CDM::CLibrary, {{"calloc" }}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
757 | {{CDM::CLibrary, {{"alloca" }}}, TR::Sink({{0}}, MsgTaintedBufferSize)}, |
758 | {{CDM::CLibrary, {{"memccpy" }}}, TR::Sink({{3}}, MsgTaintedBufferSize)}, |
759 | {{CDM::CLibrary, {{"realloc" }}}, TR::Sink({{1}}, MsgTaintedBufferSize)}, |
760 | {{{{"setproctitle" }}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, |
761 | {{{{"setproctitle_fast" }}}, |
762 | TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, |
763 | |
764 | // SinkProps |
765 | {{CDM::CLibraryMaybeHardened, BI.getName(Builtin::BImemcpy)}, |
766 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
767 | MsgTaintedBufferSize)}, |
768 | {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BImemmove)}}, |
769 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
770 | MsgTaintedBufferSize)}, |
771 | {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncpy)}}, |
772 | TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}}, |
773 | MsgTaintedBufferSize)}, |
774 | {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, |
775 | TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}}, |
776 | MsgTaintedBufferSize)}, |
777 | {{CDM::CLibrary, {{"bcopy" }}}, |
778 | TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}}; |
779 | |
780 | // `getenv` returns taint only in untrusted environments. |
781 | if (TR::UntrustedEnv(C)) { |
782 | // void setproctitle_init(int argc, char *argv[], char *envp[]) |
783 | GlobalCRules.push_back( |
784 | x: {{{"setproctitle_init" }}, TR::Sink(SinkArgs: {{1, 2}}, Msg: MsgCustomSink)}); |
785 | GlobalCRules.push_back(x: {{{"getenv" }}, TR::Source(SourceArgs: {{ReturnValueIndex}})}); |
786 | } |
787 | |
788 | StaticTaintRules.emplace(args: std::make_move_iterator(i: GlobalCRules.begin()), |
789 | args: std::make_move_iterator(i: GlobalCRules.end())); |
790 | |
791 | // User-provided taint configuration. |
792 | CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager(); |
793 | assert(Mgr); |
794 | GenericTaintRuleParser ConfigParser{*Mgr}; |
795 | std::string Option{"Config" }; |
796 | StringRef ConfigFile = |
797 | Mgr->getAnalyzerOptions().getCheckerStringOption(C: this, OptionName: Option); |
798 | std::optional<TaintConfiguration> Config = |
799 | getConfiguration<TaintConfiguration>(Mgr&: *Mgr, Chk: this, Option, ConfigFile); |
800 | if (!Config) { |
801 | // We don't have external taint config, no parsing required. |
802 | DynamicTaintRules = RuleLookupTy{}; |
803 | return; |
804 | } |
805 | |
806 | GenericTaintRuleParser::RulesContTy Rules{ |
807 | ConfigParser.parseConfiguration(Option, Config: std::move(*Config))}; |
808 | |
809 | DynamicTaintRules.emplace(args: std::make_move_iterator(i: Rules.begin()), |
810 | args: std::make_move_iterator(i: Rules.end())); |
811 | } |
812 | |
813 | void GenericTaintChecker::checkPreCall(const CallEvent &Call, |
814 | CheckerContext &C) const { |
815 | initTaintRules(C); |
816 | |
817 | // FIXME: this should be much simpler. |
818 | if (const auto *Rule = |
819 | Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr) |
820 | Rule->process(Checker: *this, Call, C); |
821 | else if (const auto *Rule = DynamicTaintRules->lookup(Call)) |
822 | Rule->process(Checker: *this, Call, C); |
823 | |
824 | // FIXME: These edge cases are to be eliminated from here eventually. |
825 | // |
826 | // Additional check that is not supported by CallDescription. |
827 | // TODO: Make CallDescription be able to match attributes such as printf-like |
828 | // arguments. |
829 | checkUncontrolledFormatString(Call, C); |
830 | |
831 | // TODO: Modeling sockets should be done in a specific checker. |
832 | // Socket is a source, which taints the return value. |
833 | taintUnsafeSocketProtocol(Call, C); |
834 | } |
835 | |
836 | void GenericTaintChecker::checkPostCall(const CallEvent &Call, |
837 | CheckerContext &C) const { |
838 | // Set the marked values as tainted. The return value only accessible from |
839 | // checkPostStmt. |
840 | ProgramStateRef State = C.getState(); |
841 | const StackFrameContext *CurrentFrame = C.getStackFrame(); |
842 | |
843 | // Depending on what was tainted at pre-visit, we determined a set of |
844 | // arguments which should be tainted after the function returns. These are |
845 | // stored in the state as TaintArgsOnPostVisit set. |
846 | TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>(); |
847 | |
848 | const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(K: CurrentFrame); |
849 | if (!TaintArgs) |
850 | return; |
851 | assert(!TaintArgs->isEmpty()); |
852 | |
853 | LLVM_DEBUG(for (ArgIdxTy I |
854 | : *TaintArgs) { |
855 | llvm::dbgs() << "PostCall<" ; |
856 | Call.dump(llvm::dbgs()); |
857 | llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n'; |
858 | }); |
859 | |
860 | const NoteTag *InjectionTag = nullptr; |
861 | std::vector<SymbolRef> TaintedSymbols; |
862 | std::vector<ArgIdxTy> TaintedIndexes; |
863 | for (ArgIdxTy ArgNum : *TaintArgs) { |
864 | // Special handling for the tainted return value. |
865 | if (ArgNum == ReturnValueIndex) { |
866 | State = addTaint(State, V: Call.getReturnValue()); |
867 | std::vector<SymbolRef> TaintedSyms = |
868 | getTaintedSymbols(State, V: Call.getReturnValue()); |
869 | if (!TaintedSyms.empty()) { |
870 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
871 | TaintedIndexes.push_back(x: ArgNum); |
872 | } |
873 | continue; |
874 | } |
875 | // The arguments are pointer arguments. The data they are pointing at is |
876 | // tainted after the call. |
877 | if (auto V = getPointeeOf(State, Arg: Call.getArgSVal(Index: ArgNum))) { |
878 | State = addTaint(State, V: *V); |
879 | std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, V: *V); |
880 | if (!TaintedSyms.empty()) { |
881 | TaintedSymbols.push_back(x: TaintedSyms[0]); |
882 | TaintedIndexes.push_back(x: ArgNum); |
883 | } |
884 | } |
885 | } |
886 | // Create a NoteTag callback, which prints to the user where the taintedness |
887 | // was propagated to. |
888 | InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedArgs: TaintedIndexes, |
889 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
890 | // Clear up the taint info from the state. |
891 | State = State->remove<TaintArgsOnPostVisit>(K: CurrentFrame); |
892 | C.addTransition(State, Tag: InjectionTag); |
893 | } |
894 | |
895 | void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State, |
896 | const char *NL, const char *Sep) const { |
897 | printTaint(State, Out, nl: NL, sep: Sep); |
898 | } |
899 | |
900 | void GenericTaintRule::process(const GenericTaintChecker &Checker, |
901 | const CallEvent &Call, CheckerContext &C) const { |
902 | ProgramStateRef State = C.getState(); |
903 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
904 | |
905 | /// Iterate every call argument, and get their corresponding Expr and SVal. |
906 | const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) { |
907 | for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) { |
908 | const Expr *E = GetArgExpr(ArgIdx: I, Call); |
909 | Fun(I, E, C.getSVal(E)); |
910 | } |
911 | }; |
912 | |
913 | /// Check for taint sinks. |
914 | ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) { |
915 | // Add taintedness to stdin parameters |
916 | if (isStdin(Val: C.getSVal(E), ACtx: C.getASTContext())) { |
917 | State = addTaint(State, V: C.getSVal(E)); |
918 | } |
919 | if (SinkArgs.contains(ArgIdx: I) && isTaintedOrPointsToTainted(State, ExprSVal: C.getSVal(E))) |
920 | Checker.generateReportIfTainted(E, Msg: SinkMsg.value_or(u: MsgCustomSink), C); |
921 | }); |
922 | |
923 | /// Check for taint filters. |
924 | ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) { |
925 | if (FilterArgs.contains(ArgIdx: I)) { |
926 | State = removeTaint(State, V: S); |
927 | if (auto P = getPointeeOf(State, Arg: S)) |
928 | State = removeTaint(State, V: *P); |
929 | } |
930 | }); |
931 | |
932 | /// Check for taint propagation sources. |
933 | /// A rule will make the destination variables tainted if PropSrcArgs |
934 | /// is empty (taints the destination |
935 | /// arguments unconditionally), or if any of its signified |
936 | /// args are tainted in context of the current CallEvent. |
937 | bool IsMatching = PropSrcArgs.isEmpty(); |
938 | std::vector<SymbolRef> TaintedSymbols; |
939 | std::vector<ArgIdxTy> TaintedIndexes; |
940 | ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols, |
941 | &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) { |
942 | std::optional<SVal> TaintedSVal = |
943 | getTaintedPointeeOrPointer(State, Arg: C.getSVal(E)); |
944 | IsMatching = |
945 | IsMatching || (PropSrcArgs.contains(ArgIdx: I) && TaintedSVal.has_value()); |
946 | |
947 | // We track back tainted arguments except for stdin |
948 | if (TaintedSVal && !isStdin(Val: *TaintedSVal, ACtx: C.getASTContext())) { |
949 | std::vector<SymbolRef> TaintedArgSyms = |
950 | getTaintedSymbols(State, V: *TaintedSVal); |
951 | if (!TaintedArgSyms.empty()) { |
952 | llvm::append_range(C&: TaintedSymbols, R&: TaintedArgSyms); |
953 | TaintedIndexes.push_back(x: I); |
954 | } |
955 | } |
956 | }); |
957 | |
958 | // Early return for propagation rules which dont match. |
959 | // Matching propagations, Sinks and Filters will pass this point. |
960 | if (!IsMatching) |
961 | return; |
962 | |
963 | const auto WouldEscape = [](SVal V, QualType Ty) -> bool { |
964 | if (!isa<Loc>(Val: V)) |
965 | return false; |
966 | |
967 | const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified(); |
968 | const bool IsNonConstPtr = |
969 | Ty->isPointerType() && !Ty->getPointeeType().isConstQualified(); |
970 | |
971 | return IsNonConstRef || IsNonConstPtr; |
972 | }; |
973 | |
974 | /// Propagate taint where it is necessary. |
975 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
976 | ImmutableSet<ArgIdxTy> Result = F.getEmptySet(); |
977 | ForEachCallArg( |
978 | [&](ArgIdxTy I, const Expr *E, SVal V) { |
979 | if (PropDstArgs.contains(ArgIdx: I)) { |
980 | LLVM_DEBUG(llvm::dbgs() << "PreCall<" ; Call.dump(llvm::dbgs()); |
981 | llvm::dbgs() |
982 | << "> prepares tainting arg index: " << I << '\n';); |
983 | Result = F.add(Old: Result, V: I); |
984 | } |
985 | |
986 | // Taint property gets lost if the variable is passed as a |
987 | // non-const pointer or reference to a function which is |
988 | // not inlined. For matching rules we want to preserve the taintedness. |
989 | // TODO: We should traverse all reachable memory regions via the |
990 | // escaping parameter. Instead of doing that we simply mark only the |
991 | // referred memory region as tainted. |
992 | if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, Arg: V)) { |
993 | LLVM_DEBUG(if (!Result.contains(I)) { |
994 | llvm::dbgs() << "PreCall<" ; |
995 | Call.dump(llvm::dbgs()); |
996 | llvm::dbgs() << "> prepares tainting arg index: " << I << '\n'; |
997 | }); |
998 | Result = F.add(Old: Result, V: I); |
999 | } |
1000 | }); |
1001 | |
1002 | if (!Result.isEmpty()) |
1003 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1004 | const NoteTag *InjectionTag = taintOriginTrackerTag( |
1005 | C, TaintedSymbols: std::move(TaintedSymbols), TaintedArgs: std::move(TaintedIndexes), |
1006 | CallLocation: Call.getCalleeStackFrame(BlockCount: 0)); |
1007 | C.addTransition(State, Tag: InjectionTag); |
1008 | } |
1009 | |
1010 | bool GenericTaintRule::UntrustedEnv(CheckerContext &C) { |
1011 | return !C.getAnalysisManager() |
1012 | .getAnalyzerOptions() |
1013 | .ShouldAssumeControlledEnvironment; |
1014 | } |
1015 | |
1016 | bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg, |
1017 | CheckerContext &C) const { |
1018 | assert(E); |
1019 | std::optional<SVal> TaintedSVal = |
1020 | getTaintedPointeeOrPointer(State: C.getState(), Arg: C.getSVal(E)); |
1021 | |
1022 | if (!TaintedSVal) |
1023 | return false; |
1024 | |
1025 | // Generate diagnostic. |
1026 | if (ExplodedNode *N = C.generateNonFatalErrorNode()) { |
1027 | auto report = std::make_unique<PathSensitiveBugReport>(args: BT, args&: Msg, args&: N); |
1028 | report->addRange(R: E->getSourceRange()); |
1029 | for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) { |
1030 | report->markInteresting(TaintedSym); |
1031 | } |
1032 | |
1033 | C.emitReport(R: std::move(report)); |
1034 | return true; |
1035 | } |
1036 | return false; |
1037 | } |
1038 | |
1039 | /// TODO: remove checking for printf format attributes and socket whitelisting |
1040 | /// from GenericTaintChecker, and that means the following functions: |
1041 | /// getPrintfFormatArgumentNum, |
1042 | /// GenericTaintChecker::checkUncontrolledFormatString, |
1043 | /// GenericTaintChecker::taintUnsafeSocketProtocol |
1044 | |
1045 | static bool getPrintfFormatArgumentNum(const CallEvent &Call, |
1046 | const CheckerContext &C, |
1047 | ArgIdxTy &ArgNum) { |
1048 | // Find if the function contains a format string argument. |
1049 | // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, |
1050 | // vsnprintf, syslog, custom annotated functions. |
1051 | const Decl *CallDecl = Call.getDecl(); |
1052 | if (!CallDecl) |
1053 | return false; |
1054 | const FunctionDecl *FDecl = CallDecl->getAsFunction(); |
1055 | if (!FDecl) |
1056 | return false; |
1057 | |
1058 | const ArgIdxTy CallNumArgs = fromArgumentCount(Count: Call.getNumArgs()); |
1059 | |
1060 | for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { |
1061 | ArgNum = Format->getFormatIdx() - 1; |
1062 | if ((Format->getType()->getName() == "printf" ) && CallNumArgs > ArgNum) |
1063 | return true; |
1064 | } |
1065 | |
1066 | return false; |
1067 | } |
1068 | |
1069 | bool GenericTaintChecker::checkUncontrolledFormatString( |
1070 | const CallEvent &Call, CheckerContext &C) const { |
1071 | // Check if the function contains a format string argument. |
1072 | ArgIdxTy ArgNum = 0; |
1073 | if (!getPrintfFormatArgumentNum(Call, C, ArgNum)) |
1074 | return false; |
1075 | |
1076 | // If either the format string content or the pointer itself are tainted, |
1077 | // warn. |
1078 | return generateReportIfTainted(E: Call.getArgExpr(Index: ArgNum), |
1079 | Msg: MsgUncontrolledFormatString, C); |
1080 | } |
1081 | |
1082 | void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call, |
1083 | CheckerContext &C) const { |
1084 | if (Call.getNumArgs() < 1) |
1085 | return; |
1086 | const IdentifierInfo *ID = Call.getCalleeIdentifier(); |
1087 | if (!ID) |
1088 | return; |
1089 | if (!ID->getName().equals(RHS: "socket" )) |
1090 | return; |
1091 | |
1092 | SourceLocation DomLoc = Call.getArgExpr(Index: 0)->getExprLoc(); |
1093 | StringRef DomName = C.getMacroNameOrSpelling(Loc&: DomLoc); |
1094 | // Allow internal communication protocols. |
1095 | bool SafeProtocol = DomName.equals(RHS: "AF_SYSTEM" ) || |
1096 | DomName.equals(RHS: "AF_LOCAL" ) || DomName.equals(RHS: "AF_UNIX" ) || |
1097 | DomName.equals(RHS: "AF_RESERVED_36" ); |
1098 | if (SafeProtocol) |
1099 | return; |
1100 | |
1101 | ProgramStateRef State = C.getState(); |
1102 | auto &F = State->getStateManager().get_context<ArgIdxFactory>(); |
1103 | ImmutableSet<ArgIdxTy> Result = F.add(Old: F.getEmptySet(), V: ReturnValueIndex); |
1104 | State = State->set<TaintArgsOnPostVisit>(K: C.getStackFrame(), E: Result); |
1105 | C.addTransition(State); |
1106 | } |
1107 | |
1108 | /// Checker registration |
1109 | void ento::registerGenericTaintChecker(CheckerManager &Mgr) { |
1110 | Mgr.registerChecker<GenericTaintChecker>(); |
1111 | } |
1112 | |
1113 | bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) { |
1114 | return true; |
1115 | } |
1116 | |