1//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10#include "clang/AST/APValue.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/ASTTypeTraits.h"
13#include "clang/AST/Attr.h"
14#include "clang/AST/Decl.h"
15#include "clang/AST/DeclCXX.h"
16#include "clang/AST/DynamicRecursiveASTVisitor.h"
17#include "clang/AST/Expr.h"
18#include "clang/AST/FormatString.h"
19#include "clang/AST/ParentMapContext.h"
20#include "clang/AST/Stmt.h"
21#include "clang/AST/StmtVisitor.h"
22#include "clang/AST/Type.h"
23#include "clang/ASTMatchers/LowLevelHelpers.h"
24#include "clang/Analysis/Support/FixitUtil.h"
25#include "clang/Basic/SourceLocation.h"
26#include "clang/Lex/Lexer.h"
27#include "clang/Lex/Preprocessor.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/STLFunctionalExtras.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringRef.h"
33#include <cstddef>
34#include <optional>
35#include <queue>
36#include <set>
37#include <sstream>
38
39using namespace clang;
40
41#ifndef NDEBUG
42namespace {
43class StmtDebugPrinter
44 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
45public:
46 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
47
48 std::string VisitBinaryOperator(const BinaryOperator *BO) {
49 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
50 }
51
52 std::string VisitUnaryOperator(const UnaryOperator *UO) {
53 return "UnaryOperator(" + UO->getOpcodeStr(UO->getOpcode()).str() + ")";
54 }
55
56 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
57 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
58 }
59};
60
61// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
62// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
63static std::string getDREAncestorString(const DeclRefExpr *DRE,
64 ASTContext &Ctx) {
65 std::stringstream SS;
66 const Stmt *St = DRE;
67 StmtDebugPrinter StmtPriner;
68
69 do {
70 SS << StmtPriner.Visit(St);
71
72 DynTypedNodeList StParents = Ctx.getParents(*St);
73
74 if (StParents.size() > 1)
75 return "unavailable due to multiple parents";
76 if (StParents.empty())
77 break;
78 St = StParents.begin()->get<Stmt>();
79 if (St)
80 SS << " ==> ";
81 } while (St);
82 return SS.str();
83}
84
85} // namespace
86#endif /* NDEBUG */
87
88namespace {
89// Using a custom `FastMatcher` instead of ASTMatchers to achieve better
90// performance. FastMatcher uses simple function `matches` to find if a node
91// is a match, avoiding the dependency on the ASTMatchers framework which
92// provide a nice abstraction, but incur big performance costs.
93class FastMatcher {
94public:
95 virtual bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
96 const UnsafeBufferUsageHandler &Handler) = 0;
97 virtual ~FastMatcher() = default;
98};
99
100class MatchResult {
101
102public:
103 template <typename T> const T *getNodeAs(StringRef ID) const {
104 auto It = Nodes.find(Key: ID);
105 if (It == Nodes.end()) {
106 return nullptr;
107 }
108 return It->second.get<T>();
109 }
110
111 void addNode(StringRef ID, const DynTypedNode &Node) { Nodes[ID] = Node; }
112
113private:
114 llvm::StringMap<DynTypedNode> Nodes;
115};
116} // namespace
117
118#define SIZED_CONTAINER_OR_VIEW_LIST \
119 "span", "array", "vector", "basic_string_view", "basic_string", \
120 "initializer_list",
121
122// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
123// except for those belonging to a different callable of "n".
124class MatchDescendantVisitor : public DynamicRecursiveASTVisitor {
125public:
126 // Creates an AST visitor that matches `Matcher` on all
127 // descendants of a given node "n" except for the ones
128 // belonging to a different callable of "n".
129 MatchDescendantVisitor(ASTContext &Context, FastMatcher &Matcher,
130 bool FindAll, bool ignoreUnevaluatedContext,
131 const UnsafeBufferUsageHandler &NewHandler)
132 : Matcher(&Matcher), FindAll(FindAll), Matches(false),
133 ignoreUnevaluatedContext(ignoreUnevaluatedContext),
134 ActiveASTContext(&Context), Handler(&NewHandler) {
135 ShouldVisitTemplateInstantiations = true;
136 ShouldVisitImplicitCode = false; // TODO: let's ignore implicit code for now
137 }
138
139 // Returns true if a match is found in a subtree of `DynNode`, which belongs
140 // to the same callable of `DynNode`.
141 bool findMatch(const DynTypedNode &DynNode) {
142 Matches = false;
143 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
144 TraverseStmt(Node: const_cast<Stmt *>(StmtNode));
145 return Matches;
146 }
147 return false;
148 }
149
150 // The following are overriding methods from the base visitor class.
151 // They are public only to allow CRTP to work. They are *not *part
152 // of the public API of this class.
153
154 // For the matchers so far used in safe buffers, we only need to match
155 // `Stmt`s. To override more as needed.
156
157 bool TraverseDecl(Decl *Node) override {
158 if (!Node)
159 return true;
160 if (!match(Node: *Node))
161 return false;
162 // To skip callables:
163 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Val: Node))
164 return true;
165 // Traverse descendants
166 return DynamicRecursiveASTVisitor::TraverseDecl(D: Node);
167 }
168
169 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) override {
170 // These are unevaluated, except the result expression.
171 if (ignoreUnevaluatedContext)
172 return TraverseStmt(Node: Node->getResultExpr());
173 return DynamicRecursiveASTVisitor::TraverseGenericSelectionExpr(S: Node);
174 }
175
176 bool
177 TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) override {
178 // Unevaluated context.
179 if (ignoreUnevaluatedContext)
180 return true;
181 return DynamicRecursiveASTVisitor::TraverseUnaryExprOrTypeTraitExpr(S: Node);
182 }
183
184 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) override {
185 // Unevaluated context.
186 if (ignoreUnevaluatedContext)
187 return true;
188 return DynamicRecursiveASTVisitor::TraverseTypeOfExprTypeLoc(TL: Node);
189 }
190
191 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) override {
192 // Unevaluated context.
193 if (ignoreUnevaluatedContext)
194 return true;
195 return DynamicRecursiveASTVisitor::TraverseDecltypeTypeLoc(TL: Node);
196 }
197
198 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) override {
199 // Unevaluated context.
200 if (ignoreUnevaluatedContext)
201 return true;
202 return DynamicRecursiveASTVisitor::TraverseCXXNoexceptExpr(S: Node);
203 }
204
205 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) override {
206 // Unevaluated context.
207 if (ignoreUnevaluatedContext)
208 return true;
209 return DynamicRecursiveASTVisitor::TraverseCXXTypeidExpr(S: Node);
210 }
211
212 bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) override {
213 if (!TraverseStmt(Node: Node->getExpr()))
214 return false;
215 return DynamicRecursiveASTVisitor::TraverseCXXDefaultInitExpr(S: Node);
216 }
217
218 bool TraverseStmt(Stmt *Node) override {
219 if (!Node)
220 return true;
221 if (!match(Node: *Node))
222 return false;
223 return DynamicRecursiveASTVisitor::TraverseStmt(S: Node);
224 }
225
226private:
227 // Sets 'Matched' to true if 'Matcher' matches 'Node'
228 //
229 // Returns 'true' if traversal should continue after this function
230 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
231 template <typename T> bool match(const T &Node) {
232 if (Matcher->matches(DynNode: DynTypedNode::create(Node), Ctx&: *ActiveASTContext,
233 Handler: *Handler)) {
234 Matches = true;
235 if (!FindAll)
236 return false; // Abort as soon as a match is found.
237 }
238 return true;
239 }
240
241 FastMatcher *const Matcher;
242 // When true, finds all matches. When false, finds the first match and stops.
243 const bool FindAll;
244 bool Matches;
245 bool ignoreUnevaluatedContext;
246 ASTContext *ActiveASTContext;
247 const UnsafeBufferUsageHandler *Handler;
248};
249
250// Because we're dealing with raw pointers, let's define what we mean by that.
251static bool hasPointerType(const Expr &E) {
252 return isa<PointerType>(Val: E.getType().getCanonicalType());
253}
254
255static bool hasArrayType(const Expr &E) {
256 return isa<ArrayType>(Val: E.getType().getCanonicalType());
257}
258
259static void
260forEachDescendantEvaluatedStmt(const Stmt *S, ASTContext &Ctx,
261 const UnsafeBufferUsageHandler &Handler,
262 FastMatcher &Matcher) {
263 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
264 /*ignoreUnevaluatedContext=*/true, Handler);
265 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
266}
267
268static void forEachDescendantStmt(const Stmt *S, ASTContext &Ctx,
269 const UnsafeBufferUsageHandler &Handler,
270 FastMatcher &Matcher) {
271 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
272 /*ignoreUnevaluatedContext=*/false, Handler);
273 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
274}
275
276// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
277static bool notInSafeBufferOptOut(const Stmt &Node,
278 const UnsafeBufferUsageHandler *Handler) {
279 return !Handler->isSafeBufferOptOut(Loc: Node.getBeginLoc());
280}
281
282static bool
283ignoreUnsafeBufferInContainer(const Stmt &Node,
284 const UnsafeBufferUsageHandler *Handler) {
285 return Handler->ignoreUnsafeBufferInContainer(Loc: Node.getBeginLoc());
286}
287
288static bool ignoreUnsafeLibcCall(const ASTContext &Ctx, const Stmt &Node,
289 const UnsafeBufferUsageHandler *Handler) {
290 if (Ctx.getLangOpts().CPlusPlus)
291 return Handler->ignoreUnsafeBufferInLibcCall(Loc: Node.getBeginLoc());
292 return true; /* Only warn about libc calls for C++ */
293}
294
295// Finds any expression 'e' such that `OnResult`
296// matches 'e' and 'e' is in an Unspecified Lvalue Context.
297static void findStmtsInUnspecifiedLvalueContext(
298 const Stmt *S, const llvm::function_ref<void(const Expr *)> OnResult) {
299 if (const auto *CE = dyn_cast<ImplicitCastExpr>(Val: S);
300 CE && CE->getCastKind() == CastKind::CK_LValueToRValue)
301 OnResult(CE->getSubExpr());
302 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
303 BO && BO->getOpcode() == BO_Assign)
304 OnResult(BO->getLHS());
305}
306
307// Finds any expression `e` such that `InnerMatcher` matches `e` and
308// `e` is in an Unspecified Pointer Context (UPC).
309static void findStmtsInUnspecifiedPointerContext(
310 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
311 // A UPC can be
312 // 1. an argument of a function call (except the callee has [[unsafe_...]]
313 // attribute), or
314 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
315 // 3. the operand of a comparator operation; or
316 // 4. the operand of a pointer subtraction operation
317 // (i.e., computing the distance between two pointers); or ...
318
319 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
320 if (const auto *FnDecl = CE->getDirectCallee();
321 FnDecl && FnDecl->hasAttr<UnsafeBufferUsageAttr>())
322 return;
323 ast_matchers::matchEachArgumentWithParamType(
324 Node: *CE, OnParamAndArg: [&InnerMatcher](QualType Type, const Expr *Arg) {
325 if (Type->isAnyPointerType())
326 InnerMatcher(Arg);
327 });
328 }
329
330 if (auto *CE = dyn_cast<CastExpr>(Val: S)) {
331 if (CE->getCastKind() != CastKind::CK_PointerToIntegral &&
332 CE->getCastKind() != CastKind::CK_PointerToBoolean)
333 return;
334 if (!hasPointerType(E: *CE->getSubExpr()))
335 return;
336 InnerMatcher(CE->getSubExpr());
337 }
338
339 // Pointer comparison operator.
340 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
341 BO && (BO->getOpcode() == BO_EQ || BO->getOpcode() == BO_NE ||
342 BO->getOpcode() == BO_LT || BO->getOpcode() == BO_LE ||
343 BO->getOpcode() == BO_GT || BO->getOpcode() == BO_GE)) {
344 auto *LHS = BO->getLHS();
345 if (hasPointerType(E: *LHS))
346 InnerMatcher(LHS);
347
348 auto *RHS = BO->getRHS();
349 if (hasPointerType(E: *RHS))
350 InnerMatcher(RHS);
351 }
352
353 // Pointer subtractions.
354 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
355 BO && BO->getOpcode() == BO_Sub && hasPointerType(E: *BO->getLHS()) &&
356 hasPointerType(E: *BO->getRHS())) {
357 // Note that here we need both LHS and RHS to be
358 // pointer. Then the inner matcher can match any of
359 // them:
360 InnerMatcher(BO->getLHS());
361 InnerMatcher(BO->getRHS());
362 }
363 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now
364 // we don't have to check that.)
365}
366
367// Finds statements in unspecified untyped context i.e. any expression 'e' such
368// that `InnerMatcher` matches 'e' and 'e' is in an unspecified untyped context
369// (i.e the expression 'e' isn't evaluated to an RValue). For example, consider
370// the following code:
371// int *p = new int[4];
372// int *q = new int[4];
373// if ((p = q)) {}
374// p = q;
375// The expression `p = q` in the conditional of the `if` statement
376// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
377// in the assignment statement is in an untyped context.
378static void findStmtsInUnspecifiedUntypedContext(
379 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
380 // An unspecified context can be
381 // 1. A compound statement,
382 // 2. The body of an if statement
383 // 3. Body of a loop
384 if (auto *CS = dyn_cast<CompoundStmt>(Val: S)) {
385 for (auto *Child : CS->body())
386 InnerMatcher(Child);
387 }
388 if (auto *IfS = dyn_cast<IfStmt>(Val: S)) {
389 if (IfS->getThen())
390 InnerMatcher(IfS->getThen());
391 if (IfS->getElse())
392 InnerMatcher(IfS->getElse());
393 }
394 // FIXME: Handle loop bodies.
395}
396
397// Returns true iff integer E1 is equivalent to integer E2.
398//
399// For now we only support such expressions:
400// expr := DRE | const-value | expr BO expr
401// BO := '*' | '+'
402//
403// FIXME: We can reuse the expression comparator of the interop analysis after
404// it has been upstreamed.
405static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx);
406static bool areEqualIntegralBinaryOperators(const BinaryOperator *E1,
407 const Expr *E2_LHS,
408 BinaryOperatorKind BOP,
409 const Expr *E2_RHS,
410 ASTContext &Ctx) {
411 if (E1->getOpcode() == BOP) {
412 switch (BOP) {
413 // Commutative operators:
414 case BO_Mul:
415 case BO_Add:
416 return (areEqualIntegers(E1: E1->getLHS(), E2: E2_LHS, Ctx) &&
417 areEqualIntegers(E1: E1->getRHS(), E2: E2_RHS, Ctx)) ||
418 (areEqualIntegers(E1: E1->getLHS(), E2: E2_RHS, Ctx) &&
419 areEqualIntegers(E1: E1->getRHS(), E2: E2_LHS, Ctx));
420 default:
421 return false;
422 }
423 }
424 return false;
425}
426
427static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
428 E1 = E1->IgnoreParenImpCasts();
429 E2 = E2->IgnoreParenImpCasts();
430 if (!E1->getType()->isIntegerType() || E1->getType() != E2->getType())
431 return false;
432
433 Expr::EvalResult ER1, ER2;
434
435 // If both are constants:
436 if (E1->EvaluateAsInt(Result&: ER1, Ctx) && E2->EvaluateAsInt(Result&: ER2, Ctx))
437 return ER1.Val.getInt() == ER2.Val.getInt();
438
439 // Otherwise, they should have identical stmt kind:
440 if (E1->getStmtClass() != E2->getStmtClass())
441 return false;
442 switch (E1->getStmtClass()) {
443 case Stmt::DeclRefExprClass:
444 return cast<DeclRefExpr>(Val: E1)->getDecl() == cast<DeclRefExpr>(Val: E2)->getDecl();
445 case Stmt::BinaryOperatorClass: {
446 auto BO2 = cast<BinaryOperator>(Val: E2);
447 return areEqualIntegralBinaryOperators(E1: cast<BinaryOperator>(Val: E1),
448 E2_LHS: BO2->getLHS(), BOP: BO2->getOpcode(),
449 E2_RHS: BO2->getRHS(), Ctx);
450 }
451 default:
452 return false;
453 }
454}
455
456// Providing that `Ptr` is a pointer and `Size` is an unsigned-integral
457// expression, returns true iff they follow one of the following safe
458// patterns:
459// 1. Ptr is `DRE.data()` and Size is `DRE.size()`, where DRE is a hardened
460// container or view;
461//
462// 2. Ptr is `a` and Size is `n`, where `a` is of an array-of-T with constant
463// size `n`;
464//
465// 3. Ptr is `&var` and Size is `1`; or
466// Ptr is `std::addressof(...)` and Size is `1`;
467//
468// 4. Size is `0`;
469static bool isPtrBufferSafe(const Expr *Ptr, const Expr *Size,
470 ASTContext &Ctx) {
471 // Pattern 1:
472 if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Val: Ptr->IgnoreParenImpCasts()))
473 if (auto *MCESize =
474 dyn_cast<CXXMemberCallExpr>(Val: Size->IgnoreParenImpCasts())) {
475 auto *DREOfPtr = dyn_cast<DeclRefExpr>(
476 Val: MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
477 auto *DREOfSize = dyn_cast<DeclRefExpr>(
478 Val: MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
479
480 if (!DREOfPtr || !DREOfSize)
481 return false; // not in safe pattern
482 // We need to make sure 'a' is identical to 'b' for 'a.data()' and
483 // 'b.size()' otherwise we do not know they match:
484 if (DREOfPtr->getDecl() != DREOfSize->getDecl())
485 return false;
486 if (MCEPtr->getMethodDecl()->getName() != "data")
487 return false;
488 // `MCEPtr->getRecordDecl()` must be non-null as `DREOfPtr` is non-null:
489 if (!MCEPtr->getRecordDecl()->isInStdNamespace())
490 return false;
491
492 auto *ObjII = MCEPtr->getRecordDecl()->getIdentifier();
493
494 if (!ObjII)
495 return false;
496
497 bool AcceptSizeBytes = Ptr->getType()->getPointeeType()->isCharType();
498
499 if (!((AcceptSizeBytes &&
500 MCESize->getMethodDecl()->getName() == "size_bytes") ||
501 // Note here the pointer must be a pointer-to-char type unless there
502 // is explicit casting. If there is explicit casting, this branch
503 // is unreachable. Thus, at this branch "size" and "size_bytes" are
504 // equivalent as the pointer is a char pointer:
505 MCESize->getMethodDecl()->getName() == "size"))
506 return false;
507
508 return llvm::is_contained(Set: {SIZED_CONTAINER_OR_VIEW_LIST},
509 Element: ObjII->getName());
510 }
511
512 Expr::EvalResult ER;
513
514 // Pattern 2-4:
515 if (Size->EvaluateAsInt(Result&: ER, Ctx)) {
516 // Pattern 2:
517 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Ptr->IgnoreParenImpCasts())) {
518 if (auto *CAT = Ctx.getAsConstantArrayType(T: DRE->getType())) {
519 llvm::APSInt SizeInt = ER.Val.getInt();
520
521 return llvm::APSInt::compareValues(
522 I1: SizeInt, I2: llvm::APSInt(CAT->getSize(), true)) == 0;
523 }
524 return false;
525 }
526
527 // Pattern 3:
528 if (ER.Val.getInt().isOne()) {
529 if (auto *UO = dyn_cast<UnaryOperator>(Val: Ptr->IgnoreParenImpCasts()))
530 return UO && UO->getOpcode() == UnaryOperator::Opcode::UO_AddrOf;
531 if (auto *CE = dyn_cast<CallExpr>(Val: Ptr->IgnoreParenImpCasts())) {
532 auto *FnDecl = CE->getDirectCallee();
533
534 return FnDecl && FnDecl->getNameAsString() == "addressof" &&
535 FnDecl->isInStdNamespace();
536 }
537 return false;
538 }
539 // Pattern 4:
540 if (ER.Val.getInt().isZero())
541 return true;
542 }
543 return false;
544}
545
546// Given a two-param std::span construct call, matches iff the call has the
547// following forms:
548// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
549// 2. `std::span<T>{new T, 1}`
550// 3. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
551// `f` is a function with attribute `alloc_size(N, M)`;
552// `args` represents the list of arguments;
553// `N, M` are parameter indexes to the allocating element number and size.
554// Sometimes, there is only one parameter index representing the total
555// size.
556// 4. `std::span<T>{x.begin(), x.end()}` where `x` is an object in the
557// SIZED_CONTAINER_OR_VIEW_LIST.
558// 5. `isPtrBufferSafe` returns true for the two arguments of the span
559// constructor
560static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
561 ASTContext &Ctx) {
562 assert(Node.getNumArgs() == 2 &&
563 "expecting a two-parameter std::span constructor");
564 const Expr *Arg0 = Node.getArg(Arg: 0)->IgnoreParenImpCasts();
565 const Expr *Arg1 = Node.getArg(Arg: 1)->IgnoreParenImpCasts();
566 auto HaveEqualConstantValues = [&Ctx](const Expr *E0, const Expr *E1) {
567 if (auto E0CV = E0->getIntegerConstantExpr(Ctx))
568 if (auto E1CV = E1->getIntegerConstantExpr(Ctx)) {
569 return llvm::APSInt::compareValues(I1: *E0CV, I2: *E1CV) == 0;
570 }
571 return false;
572 };
573 auto AreSameDRE = [](const Expr *E0, const Expr *E1) {
574 if (auto *DRE0 = dyn_cast<DeclRefExpr>(Val: E0))
575 if (auto *DRE1 = dyn_cast<DeclRefExpr>(Val: E1)) {
576 return DRE0->getDecl() == DRE1->getDecl();
577 }
578 return false;
579 };
580 std::optional<llvm::APSInt> Arg1CV = Arg1->getIntegerConstantExpr(Ctx);
581
582 if (Arg1CV && Arg1CV->isZero())
583 // Check form 5:
584 return true;
585
586 // Check forms 1-2:
587 switch (Arg0->getStmtClass()) {
588 case Stmt::CXXNewExprClass:
589 if (auto Size = cast<CXXNewExpr>(Val: Arg0)->getArraySize()) {
590 // Check form 1:
591 return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) ||
592 HaveEqualConstantValues(*Size, Arg1);
593 }
594 // TODO: what's placeholder type? avoid it for now.
595 if (!cast<CXXNewExpr>(Val: Arg0)->hasPlaceholderType()) {
596 // Check form 2:
597 return Arg1CV && Arg1CV->isOne();
598 }
599 break;
600 default:
601 break;
602 }
603
604 // Check form 3:
605 if (auto CCast = dyn_cast<CStyleCastExpr>(Val: Arg0)) {
606 if (!CCast->getType()->isPointerType())
607 return false;
608
609 QualType PteTy = CCast->getType()->getPointeeType();
610
611 if (!(PteTy->isConstantSizeType() && Ctx.getTypeSizeInChars(T: PteTy).isOne()))
612 return false;
613
614 if (const auto *Call = dyn_cast<CallExpr>(Val: CCast->getSubExpr())) {
615 if (const FunctionDecl *FD = Call->getDirectCallee())
616 if (auto *AllocAttr = FD->getAttr<AllocSizeAttr>()) {
617 const Expr *EleSizeExpr =
618 Call->getArg(Arg: AllocAttr->getElemSizeParam().getASTIndex());
619 // NumElemIdx is invalid if AllocSizeAttr has 1 argument:
620 ParamIdx NumElemIdx = AllocAttr->getNumElemsParam();
621
622 if (!NumElemIdx.isValid())
623 return areEqualIntegers(E1: Arg1, E2: EleSizeExpr, Ctx);
624
625 const Expr *NumElesExpr = Call->getArg(Arg: NumElemIdx.getASTIndex());
626
627 if (auto BO = dyn_cast<BinaryOperator>(Val: Arg1))
628 return areEqualIntegralBinaryOperators(E1: BO, E2_LHS: NumElesExpr, BOP: BO_Mul,
629 E2_RHS: EleSizeExpr, Ctx);
630 }
631 }
632 }
633 // Check form 4:
634 auto IsMethodCallToSizedObject = [](const Stmt *Node, StringRef MethodName) {
635 if (const auto *MC = dyn_cast<CXXMemberCallExpr>(Val: Node)) {
636 const auto *MD = MC->getMethodDecl();
637 const auto *RD = MC->getRecordDecl();
638
639 if (RD && MD)
640 if (auto *II = RD->getDeclName().getAsIdentifierInfo();
641 II && RD->isInStdNamespace())
642 return llvm::is_contained(Set: {SIZED_CONTAINER_OR_VIEW_LIST},
643 Element: II->getName()) &&
644 MD->getName() == MethodName;
645 }
646 return false;
647 };
648
649 if (IsMethodCallToSizedObject(Arg0, "begin") &&
650 IsMethodCallToSizedObject(Arg1, "end"))
651 return AreSameDRE(
652 // We know Arg0 and Arg1 are `CXXMemberCallExpr`s:
653 cast<CXXMemberCallExpr>(Val: Arg0)
654 ->getImplicitObjectArgument()
655 ->IgnoreParenImpCasts(),
656 cast<CXXMemberCallExpr>(Val: Arg1)
657 ->getImplicitObjectArgument()
658 ->IgnoreParenImpCasts());
659
660 // Check 5:
661 return isPtrBufferSafe(Ptr: Arg0, Size: Arg1, Ctx);
662}
663
664static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
665 const ASTContext &Ctx) {
666 // FIXME: Proper solution:
667 // - refactor Sema::CheckArrayAccess
668 // - split safe/OOB/unknown decision logic from diagnostics emitting code
669 // - e. g. "Try harder to find a NamedDecl to point at in the note."
670 // already duplicated
671 // - call both from Sema and from here
672
673 uint64_t limit;
674 if (const auto *CATy =
675 dyn_cast<ConstantArrayType>(Val: Node.getBase()
676 ->IgnoreParenImpCasts()
677 ->getType()
678 ->getUnqualifiedDesugaredType())) {
679 limit = CATy->getLimitedSize();
680 } else if (const auto *SLiteral = dyn_cast<clang::StringLiteral>(
681 Val: Node.getBase()->IgnoreParenImpCasts())) {
682 limit = SLiteral->getLength() + 1;
683 } else {
684 return false;
685 }
686
687 Expr::EvalResult EVResult;
688 const Expr *IndexExpr = Node.getIdx();
689 if (!IndexExpr->isValueDependent() &&
690 IndexExpr->EvaluateAsInt(Result&: EVResult, Ctx)) {
691 llvm::APSInt ArrIdx = EVResult.Val.getInt();
692 // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
693 // bug
694 if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < limit)
695 return true;
696 } else if (const auto *BE = dyn_cast<BinaryOperator>(Val: IndexExpr)) {
697 // For an integer expression `e` and an integer constant `n`, `e & n` and
698 // `n & e` are bounded by `n`:
699 if (BE->getOpcode() != BO_And && BE->getOpcode() != BO_Rem)
700 return false;
701
702 const Expr *LHS = BE->getLHS();
703 const Expr *RHS = BE->getRHS();
704
705 if (BE->getOpcode() == BO_Rem) {
706 // If n is a negative number, then n % const can be greater than const
707 if (!LHS->getType()->isUnsignedIntegerType()) {
708 return false;
709 }
710
711 if (!RHS->isValueDependent() && RHS->EvaluateAsInt(Result&: EVResult, Ctx)) {
712 llvm::APSInt result = EVResult.Val.getInt();
713 if (result.isNonNegative() && result.getLimitedValue() <= limit)
714 return true;
715 }
716
717 return false;
718 }
719
720 if ((!LHS->isValueDependent() &&
721 LHS->EvaluateAsInt(Result&: EVResult, Ctx)) || // case: `n & e`
722 (!RHS->isValueDependent() &&
723 RHS->EvaluateAsInt(Result&: EVResult, Ctx))) { // `e & n`
724 llvm::APSInt result = EVResult.Val.getInt();
725 if (result.isNonNegative() && result.getLimitedValue() < limit)
726 return true;
727 }
728 return false;
729 }
730 return false;
731}
732
733namespace libc_func_matchers {
734// Under `libc_func_matchers`, define a set of matchers that match unsafe
735// functions in libc and unsafe calls to them.
736
737// A tiny parser to strip off common prefix and suffix of libc function names
738// in real code.
739//
740// Given a function name, `matchName` returns `CoreName` according to the
741// following grammar:
742//
743// LibcName := CoreName | CoreName + "_s"
744// MatchingName := "__builtin_" + LibcName |
745// "__builtin___" + LibcName + "_chk" |
746// "__asan_" + LibcName
747//
748struct LibcFunNamePrefixSuffixParser {
749 StringRef matchName(StringRef FunName, bool isBuiltin) {
750 // Try to match __builtin_:
751 if (isBuiltin && FunName.starts_with(Prefix: "__builtin_"))
752 // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
753 // no match:
754 return matchLibcNameOrBuiltinChk(
755 Name: FunName.drop_front(N: 10 /* truncate "__builtin_" */));
756 // Try to match __asan_:
757 if (FunName.starts_with(Prefix: "__asan_"))
758 return matchLibcName(Name: FunName.drop_front(N: 7 /* truncate of "__asan_" */));
759 return matchLibcName(Name: FunName);
760 }
761
762 // Parameter `Name` is the substring after stripping off the prefix
763 // "__builtin_".
764 StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
765 if (Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "_chk"))
766 return matchLibcName(
767 Name: Name.drop_front(N: 2).drop_back(N: 4) /* truncate "__" and "_chk" */);
768 return matchLibcName(Name);
769 }
770
771 StringRef matchLibcName(StringRef Name) {
772 if (Name.ends_with(Suffix: "_s"))
773 return Name.drop_back(N: 2 /* truncate "_s" */);
774 return Name;
775 }
776};
777
778// A pointer type expression is known to be null-terminated, if it has the
779// form: E.c_str(), for any expression E of `std::string` type.
780static bool isNullTermPointer(const Expr *Ptr) {
781 if (isa<clang::StringLiteral>(Val: Ptr->IgnoreParenImpCasts()))
782 return true;
783 if (isa<PredefinedExpr>(Val: Ptr->IgnoreParenImpCasts()))
784 return true;
785 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Ptr->IgnoreParenImpCasts())) {
786 const CXXMethodDecl *MD = MCE->getMethodDecl();
787 const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
788
789 if (MD && RD && RD->isInStdNamespace() && MD->getIdentifier())
790 if (MD->getName() == "c_str" && RD->getName() == "basic_string")
791 return true;
792 }
793 return false;
794}
795
796// Return true iff at least one of following cases holds:
797// 1. Format string is a literal and there is an unsafe pointer argument
798// corresponding to an `s` specifier;
799// 2. Format string is not a literal and there is least an unsafe pointer
800// argument (including the formatter argument).
801//
802// `UnsafeArg` is the output argument that will be set only if this function
803// returns true.
804static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
805 const unsigned FmtArgIdx, ASTContext &Ctx,
806 bool isKprintf = false) {
807 class StringFormatStringHandler
808 : public analyze_format_string::FormatStringHandler {
809 const CallExpr *Call;
810 unsigned FmtArgIdx;
811 const Expr *&UnsafeArg;
812
813 public:
814 StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
815 const Expr *&UnsafeArg)
816 : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
817
818 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
819 const char *startSpecifier,
820 unsigned specifierLen,
821 const TargetInfo &Target) override {
822 if (FS.getConversionSpecifier().getKind() ==
823 analyze_printf::PrintfConversionSpecifier::sArg) {
824 unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
825
826 if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
827 if (!isNullTermPointer(Ptr: Call->getArg(Arg: ArgIdx))) {
828 UnsafeArg = Call->getArg(Arg: ArgIdx); // output
829 // returning false stops parsing immediately
830 return false;
831 }
832 }
833 return true; // continue parsing
834 }
835 };
836
837 const Expr *Fmt = Call->getArg(Arg: FmtArgIdx);
838
839 if (auto *SL = dyn_cast<clang::StringLiteral>(Val: Fmt->IgnoreParenImpCasts())) {
840 StringRef FmtStr;
841
842 if (SL->getCharByteWidth() == 1)
843 FmtStr = SL->getString();
844 else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx))
845 FmtStr = *EvaledFmtStr;
846 else
847 goto CHECK_UNSAFE_PTR;
848
849 StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
850
851 return analyze_format_string::ParsePrintfString(
852 H&: Handler, beg: FmtStr.begin(), end: FmtStr.end(), LO: Ctx.getLangOpts(),
853 Target: Ctx.getTargetInfo(), isFreeBSDKPrintf: isKprintf);
854 }
855CHECK_UNSAFE_PTR:
856 // If format is not a string literal, we cannot analyze the format string.
857 // In this case, this call is considered unsafe if at least one argument
858 // (including the format argument) is unsafe pointer.
859 return llvm::any_of(
860 Range: llvm::make_range(x: Call->arg_begin() + FmtArgIdx, y: Call->arg_end()),
861 P: [&UnsafeArg](const Expr *Arg) -> bool {
862 if (Arg->getType()->isPointerType() && !isNullTermPointer(Ptr: Arg)) {
863 UnsafeArg = Arg;
864 return true;
865 }
866 return false;
867 });
868}
869
870// Matches a FunctionDecl node such that
871// 1. It's name, after stripping off predefined prefix and suffix, is
872// `CoreName`; and
873// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
874// is a set of libc function names.
875//
876// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
877// The notation `CoreName[str/wcs]` means a new name obtained from replace
878// string "wcs" with "str" in `CoreName`.
879static bool isPredefinedUnsafeLibcFunc(const FunctionDecl &Node) {
880 static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
881 if (!PredefinedNames)
882 PredefinedNames =
883 std::make_unique<std::set<StringRef>, std::set<StringRef>>(args: {
884 // numeric conversion:
885 "atof",
886 "atoi",
887 "atol",
888 "atoll",
889 "strtol",
890 "strtoll",
891 "strtoul",
892 "strtoull",
893 "strtof",
894 "strtod",
895 "strtold",
896 "strtoimax",
897 "strtoumax",
898 // "strfromf", "strfromd", "strfroml", // C23?
899 // string manipulation:
900 "strcpy",
901 "strncpy",
902 "strlcpy",
903 "strcat",
904 "strncat",
905 "strlcat",
906 "strxfrm",
907 "strdup",
908 "strndup",
909 // string examination:
910 "strlen",
911 "strnlen",
912 "strcmp",
913 "strncmp",
914 "stricmp",
915 "strcasecmp",
916 "strcoll",
917 "strchr",
918 "strrchr",
919 "strspn",
920 "strcspn",
921 "strpbrk",
922 "strstr",
923 "strtok",
924 // "mem-" functions
925 "memchr",
926 "wmemchr",
927 "memcmp",
928 "wmemcmp",
929 "memcpy",
930 "memccpy",
931 "mempcpy",
932 "wmemcpy",
933 "memmove",
934 "wmemmove",
935 "memset",
936 "wmemset",
937 // IO:
938 "fread",
939 "fwrite",
940 "fgets",
941 "fgetws",
942 "gets",
943 "fputs",
944 "fputws",
945 "puts",
946 // others
947 "strerror_s",
948 "strerror_r",
949 "bcopy",
950 "bzero",
951 "bsearch",
952 "qsort",
953 });
954
955 auto *II = Node.getIdentifier();
956
957 if (!II)
958 return false;
959
960 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
961 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
962
963 // Match predefined names:
964 if (PredefinedNames->find(x: Name) != PredefinedNames->end())
965 return true;
966
967 std::string NameWCS = Name.str();
968 size_t WcsPos = NameWCS.find(s: "wcs");
969
970 while (WcsPos != std::string::npos) {
971 NameWCS[WcsPos++] = 's';
972 NameWCS[WcsPos++] = 't';
973 NameWCS[WcsPos++] = 'r';
974 WcsPos = NameWCS.find(s: "wcs", pos: WcsPos);
975 }
976 if (PredefinedNames->find(x: NameWCS) != PredefinedNames->end())
977 return true;
978 // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
979 // all should end with "scanf"):
980 return Name.ends_with(Suffix: "scanf");
981}
982
983// Match a call to one of the `v*printf` functions taking `va_list`. We cannot
984// check safety for these functions so they should be changed to their
985// non-va_list versions.
986static bool isUnsafeVaListPrintfFunc(const FunctionDecl &Node) {
987 auto *II = Node.getIdentifier();
988
989 if (!II)
990 return false;
991
992 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
993 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
994
995 if (!Name.ends_with(Suffix: "printf"))
996 return false; // neither printf nor scanf
997 return Name.starts_with(Prefix: "v");
998}
999
1000// Matches a call to one of the `sprintf` functions as they are always unsafe
1001// and should be changed to `snprintf`.
1002static bool isUnsafeSprintfFunc(const FunctionDecl &Node) {
1003 auto *II = Node.getIdentifier();
1004
1005 if (!II)
1006 return false;
1007
1008 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
1009 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
1010
1011 if (!Name.ends_with(Suffix: "printf") ||
1012 // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
1013 Name.starts_with(Prefix: "v"))
1014 return false;
1015
1016 StringRef Prefix = Name.drop_back(N: 6);
1017
1018 if (Prefix.ends_with(Suffix: "w"))
1019 Prefix = Prefix.drop_back(N: 1);
1020 return Prefix == "s";
1021}
1022
1023// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
1024// character versions. Calls to these functions can be safe if their arguments
1025// are carefully made safe.
1026static bool isNormalPrintfFunc(const FunctionDecl &Node) {
1027 auto *II = Node.getIdentifier();
1028
1029 if (!II)
1030 return false;
1031
1032 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
1033 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
1034
1035 if (!Name.ends_with(Suffix: "printf") || Name.starts_with(Prefix: "v"))
1036 return false;
1037
1038 StringRef Prefix = Name.drop_back(N: 6);
1039
1040 if (Prefix.ends_with(Suffix: "w"))
1041 Prefix = Prefix.drop_back(N: 1);
1042
1043 return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
1044}
1045
1046// This matcher requires that it is known that the callee `isNormalPrintf`.
1047// Then if the format string is a string literal, this matcher matches when at
1048// least one string argument is unsafe. If the format is not a string literal,
1049// this matcher matches when at least one pointer type argument is unsafe.
1050static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
1051 MatchResult &Result, llvm::StringRef Tag) {
1052 // Determine what printf it is by examining formal parameters:
1053 const FunctionDecl *FD = Node.getDirectCallee();
1054
1055 assert(FD && "It should have been checked that FD is non-null.");
1056
1057 unsigned NumParms = FD->getNumParams();
1058
1059 if (NumParms < 1)
1060 return false; // possibly some user-defined printf function
1061
1062 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
1063
1064 if (!FirstParmTy->isPointerType())
1065 return false; // possibly some user-defined printf function
1066
1067 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
1068
1069 if (!Ctx.getFILEType()
1070 .isNull() && //`FILE *` must be in the context if it is fprintf
1071 FirstPteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
1072 // It is a fprintf:
1073 const Expr *UnsafeArg;
1074
1075 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 1, Ctx, isKprintf: false)) {
1076 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1077 return true;
1078 }
1079 return false;
1080 }
1081
1082 if (FirstPteTy.isConstQualified()) {
1083 // If the first parameter is a `const char *`, it is a printf/kprintf:
1084 bool isKprintf = false;
1085 const Expr *UnsafeArg;
1086
1087 if (auto *II = FD->getIdentifier())
1088 isKprintf = II->getName() == "kprintf";
1089 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 0, Ctx, isKprintf)) {
1090 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1091 return true;
1092 }
1093 return false;
1094 }
1095
1096 if (NumParms > 2) {
1097 QualType SecondParmTy = FD->getParamDecl(i: 1)->getType();
1098
1099 if (!FirstPteTy.isConstQualified() && SecondParmTy->isIntegerType()) {
1100 // If the first parameter type is non-const qualified `char *` and the
1101 // second is an integer, it is a snprintf:
1102 const Expr *UnsafeArg;
1103
1104 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 2, Ctx, isKprintf: false)) {
1105 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1106 return true;
1107 }
1108 return false;
1109 }
1110 }
1111 // We don't really recognize this "normal" printf, the only thing we
1112 // can do is to require all pointers to be null-terminated:
1113 for (const auto *Arg : Node.arguments())
1114 if (Arg->getType()->isPointerType() && !isNullTermPointer(Ptr: Arg)) {
1115 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *Arg));
1116 return true;
1117 }
1118 return false;
1119}
1120
1121// This function requires that it is known that the callee `isNormalPrintf`.
1122// It returns true iff the first two arguments of the call is a pointer
1123// `Ptr` and an unsigned integer `Size` and they are NOT safe, i.e.,
1124// `!isPtrBufferSafe(Ptr, Size)`.
1125static bool hasUnsafeSnprintfBuffer(const CallExpr &Node, ASTContext &Ctx) {
1126 const FunctionDecl *FD = Node.getDirectCallee();
1127
1128 assert(FD && "It should have been checked that FD is non-null.");
1129
1130 if (FD->getNumParams() < 3)
1131 return false; // Not an snprint
1132
1133 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
1134
1135 if (!FirstParmTy->isPointerType())
1136 return false; // Not an snprint
1137
1138 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
1139 const Expr *Buf = Node.getArg(Arg: 0), *Size = Node.getArg(Arg: 1);
1140
1141 if (FirstPteTy.isConstQualified() || !FirstPteTy->isAnyCharacterType() ||
1142 !Buf->getType()->isPointerType() ||
1143 !Size->getType()->isUnsignedIntegerType())
1144 return false; // not an snprintf call
1145
1146 return !isPtrBufferSafe(Ptr: Buf, Size, Ctx);
1147}
1148} // namespace libc_func_matchers
1149
1150namespace {
1151// Because the analysis revolves around variables and their types, we'll need to
1152// track uses of variables (aka DeclRefExprs).
1153using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
1154
1155// Convenience typedef.
1156using FixItList = SmallVector<FixItHint, 4>;
1157} // namespace
1158
1159namespace {
1160/// Gadget is an individual operation in the code that may be of interest to
1161/// this analysis. Each (non-abstract) subclass corresponds to a specific
1162/// rigid AST structure that constitutes an operation on a pointer-type object.
1163/// Discovery of a gadget in the code corresponds to claiming that we understand
1164/// what this part of code is doing well enough to potentially improve it.
1165/// Gadgets can be warning (immediately deserving a warning) or fixable (not
1166/// always deserving a warning per se, but requires our attention to identify
1167/// it warrants a fixit).
1168class Gadget {
1169public:
1170 enum class Kind {
1171#define GADGET(x) x,
1172#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1173 };
1174
1175 Gadget(Kind K) : K(K) {}
1176
1177 Kind getKind() const { return K; }
1178
1179#ifndef NDEBUG
1180 StringRef getDebugName() const {
1181 switch (K) {
1182#define GADGET(x) \
1183 case Kind::x: \
1184 return #x;
1185#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1186 }
1187 llvm_unreachable("Unhandled Gadget::Kind enum");
1188 }
1189#endif
1190
1191 virtual bool isWarningGadget() const = 0;
1192 // TODO remove this method from WarningGadget interface. It's only used for
1193 // debug prints in FixableGadget.
1194 virtual SourceLocation getSourceLoc() const = 0;
1195
1196 /// Returns the list of pointer-type variables on which this gadget performs
1197 /// its operation. Typically, there's only one variable. This isn't a list
1198 /// of all DeclRefExprs in the gadget's AST!
1199 virtual DeclUseList getClaimedVarUseSites() const = 0;
1200
1201 virtual ~Gadget() = default;
1202
1203private:
1204 Kind K;
1205};
1206
1207/// Warning gadgets correspond to unsafe code patterns that warrants
1208/// an immediate warning.
1209class WarningGadget : public Gadget {
1210public:
1211 WarningGadget(Kind K) : Gadget(K) {}
1212
1213 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
1214 bool isWarningGadget() const final { return true; }
1215
1216 virtual void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1217 bool IsRelatedToDecl,
1218 ASTContext &Ctx) const = 0;
1219
1220 virtual SmallVector<const Expr *, 1> getUnsafePtrs() const = 0;
1221};
1222
1223/// Fixable gadgets correspond to code patterns that aren't always unsafe but
1224/// need to be properly recognized in order to emit fixes. For example, if a raw
1225/// pointer-type variable is replaced by a safe C++ container, every use of such
1226/// variable must be carefully considered and possibly updated.
1227class FixableGadget : public Gadget {
1228public:
1229 FixableGadget(Kind K) : Gadget(K) {}
1230
1231 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
1232 bool isWarningGadget() const final { return false; }
1233
1234 /// Returns a fixit that would fix the current gadget according to
1235 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
1236 /// returns an empty list if no fixes are necessary.
1237 virtual std::optional<FixItList> getFixits(const FixitStrategy &) const {
1238 return std::nullopt;
1239 }
1240
1241 /// Returns a list of two elements where the first element is the LHS of a
1242 /// pointer assignment statement and the second element is the RHS. This
1243 /// two-element list represents the fact that the LHS buffer gets its bounds
1244 /// information from the RHS buffer. This information will be used later to
1245 /// group all those variables whose types must be modified together to prevent
1246 /// type mismatches.
1247 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1248 getStrategyImplications() const {
1249 return std::nullopt;
1250 }
1251};
1252
1253static bool isSupportedVariable(const DeclRefExpr &Node) {
1254 const Decl *D = Node.getDecl();
1255 return D != nullptr && isa<VarDecl>(Val: D);
1256}
1257
1258using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
1259using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
1260
1261/// An increment of a pointer-type value is unsafe as it may run the pointer
1262/// out of bounds.
1263class IncrementGadget : public WarningGadget {
1264 static constexpr const char *const OpTag = "op";
1265 const UnaryOperator *Op;
1266
1267public:
1268 IncrementGadget(const MatchResult &Result)
1269 : WarningGadget(Kind::Increment),
1270 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1271
1272 static bool classof(const Gadget *G) {
1273 return G->getKind() == Kind::Increment;
1274 }
1275
1276 static bool matches(const Stmt *S, const ASTContext &Ctx,
1277 MatchResult &Result) {
1278 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1279 if (!UO || !UO->isIncrementOp())
1280 return false;
1281 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1282 return false;
1283 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1284 return true;
1285 }
1286
1287 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1288 bool IsRelatedToDecl,
1289 ASTContext &Ctx) const override {
1290 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1291 }
1292 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1293
1294 DeclUseList getClaimedVarUseSites() const override {
1295 SmallVector<const DeclRefExpr *, 2> Uses;
1296 if (const auto *DRE =
1297 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1298 Uses.push_back(Elt: DRE);
1299 }
1300
1301 return std::move(Uses);
1302 }
1303
1304 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1305 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1306 }
1307};
1308
1309/// A decrement of a pointer-type value is unsafe as it may run the pointer
1310/// out of bounds.
1311class DecrementGadget : public WarningGadget {
1312 static constexpr const char *const OpTag = "op";
1313 const UnaryOperator *Op;
1314
1315public:
1316 DecrementGadget(const MatchResult &Result)
1317 : WarningGadget(Kind::Decrement),
1318 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1319
1320 static bool classof(const Gadget *G) {
1321 return G->getKind() == Kind::Decrement;
1322 }
1323
1324 static bool matches(const Stmt *S, const ASTContext &Ctx,
1325 MatchResult &Result) {
1326 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1327 if (!UO || !UO->isDecrementOp())
1328 return false;
1329 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1330 return false;
1331 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1332 return true;
1333 }
1334
1335 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1336 bool IsRelatedToDecl,
1337 ASTContext &Ctx) const override {
1338 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1339 }
1340 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1341
1342 DeclUseList getClaimedVarUseSites() const override {
1343 if (const auto *DRE =
1344 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1345 return {DRE};
1346 }
1347
1348 return {};
1349 }
1350
1351 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1352 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1353 }
1354};
1355
1356/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
1357/// it doesn't have any bounds checks for the array.
1358class ArraySubscriptGadget : public WarningGadget {
1359 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
1360 const ArraySubscriptExpr *ASE;
1361
1362public:
1363 ArraySubscriptGadget(const MatchResult &Result)
1364 : WarningGadget(Kind::ArraySubscript),
1365 ASE(Result.getNodeAs<ArraySubscriptExpr>(ID: ArraySubscrTag)) {}
1366
1367 static bool classof(const Gadget *G) {
1368 return G->getKind() == Kind::ArraySubscript;
1369 }
1370
1371 static bool matches(const Stmt *S, const ASTContext &Ctx,
1372 MatchResult &Result) {
1373 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: S);
1374 if (!ASE)
1375 return false;
1376 const auto *const Base = ASE->getBase()->IgnoreParenImpCasts();
1377 if (!hasPointerType(E: *Base) && !hasArrayType(E: *Base))
1378 return false;
1379 const auto *Idx = dyn_cast<IntegerLiteral>(Val: ASE->getIdx());
1380 bool IsSafeIndex = (Idx && Idx->getValue().isZero()) ||
1381 isa<ArrayInitIndexExpr>(Val: ASE->getIdx());
1382 if (IsSafeIndex || isSafeArraySubscript(Node: *ASE, Ctx))
1383 return false;
1384 Result.addNode(ID: ArraySubscrTag, Node: DynTypedNode::create(Node: *ASE));
1385 return true;
1386 }
1387
1388 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1389 bool IsRelatedToDecl,
1390 ASTContext &Ctx) const override {
1391 Handler.handleUnsafeOperation(Operation: ASE, IsRelatedToDecl, Ctx);
1392 }
1393 SourceLocation getSourceLoc() const override { return ASE->getBeginLoc(); }
1394
1395 DeclUseList getClaimedVarUseSites() const override {
1396 if (const auto *DRE =
1397 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts())) {
1398 return {DRE};
1399 }
1400
1401 return {};
1402 }
1403
1404 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1405 return {ASE->getBase()->IgnoreParenImpCasts()};
1406 }
1407};
1408
1409/// A pointer arithmetic expression of one of the forms:
1410/// \code
1411/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
1412/// \endcode
1413class PointerArithmeticGadget : public WarningGadget {
1414 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
1415 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
1416 const BinaryOperator *PA; // pointer arithmetic expression
1417 const Expr *Ptr; // the pointer expression in `PA`
1418
1419public:
1420 PointerArithmeticGadget(const MatchResult &Result)
1421 : WarningGadget(Kind::PointerArithmetic),
1422 PA(Result.getNodeAs<BinaryOperator>(ID: PointerArithmeticTag)),
1423 Ptr(Result.getNodeAs<Expr>(ID: PointerArithmeticPointerTag)) {}
1424
1425 static bool classof(const Gadget *G) {
1426 return G->getKind() == Kind::PointerArithmetic;
1427 }
1428
1429 static bool matches(const Stmt *S, const ASTContext &Ctx,
1430 MatchResult &Result) {
1431 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1432 if (!BO)
1433 return false;
1434 const auto *LHS = BO->getLHS();
1435 const auto *RHS = BO->getRHS();
1436 // ptr at left
1437 if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub ||
1438 BO->getOpcode() == BO_AddAssign || BO->getOpcode() == BO_SubAssign) {
1439 if (hasPointerType(E: *LHS) && (RHS->getType()->isIntegerType() ||
1440 RHS->getType()->isEnumeralType())) {
1441 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *LHS));
1442 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1443 return true;
1444 }
1445 }
1446 // ptr at right
1447 if (BO->getOpcode() == BO_Add && hasPointerType(E: *RHS) &&
1448 (LHS->getType()->isIntegerType() || LHS->getType()->isEnumeralType())) {
1449 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *RHS));
1450 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1451 return true;
1452 }
1453 return false;
1454 }
1455
1456 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1457 bool IsRelatedToDecl,
1458 ASTContext &Ctx) const override {
1459 Handler.handleUnsafeOperation(Operation: PA, IsRelatedToDecl, Ctx);
1460 }
1461 SourceLocation getSourceLoc() const override { return PA->getBeginLoc(); }
1462
1463 DeclUseList getClaimedVarUseSites() const override {
1464 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ptr->IgnoreParenImpCasts())) {
1465 return {DRE};
1466 }
1467
1468 return {};
1469 }
1470
1471 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1472 return {Ptr->IgnoreParenImpCasts()};
1473 }
1474
1475 // FIXME: pointer adding zero should be fine
1476 // FIXME: this gadge will need a fix-it
1477};
1478
1479class SpanTwoParamConstructorGadget : public WarningGadget {
1480 static constexpr const char *const SpanTwoParamConstructorTag =
1481 "spanTwoParamConstructor";
1482 const CXXConstructExpr *Ctor; // the span constructor expression
1483
1484public:
1485 SpanTwoParamConstructorGadget(const MatchResult &Result)
1486 : WarningGadget(Kind::SpanTwoParamConstructor),
1487 Ctor(Result.getNodeAs<CXXConstructExpr>(ID: SpanTwoParamConstructorTag)) {}
1488
1489 static bool classof(const Gadget *G) {
1490 return G->getKind() == Kind::SpanTwoParamConstructor;
1491 }
1492
1493 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1494 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1495 if (!CE)
1496 return false;
1497 const auto *CDecl = CE->getConstructor();
1498 const auto *CRecordDecl = CDecl->getParent();
1499 auto HasTwoParamSpanCtorDecl =
1500 CRecordDecl->isInStdNamespace() &&
1501 CDecl->getDeclName().getAsString() == "span" && CE->getNumArgs() == 2;
1502 if (!HasTwoParamSpanCtorDecl || isSafeSpanTwoParamConstruct(Node: *CE, Ctx))
1503 return false;
1504 Result.addNode(ID: SpanTwoParamConstructorTag, Node: DynTypedNode::create(Node: *CE));
1505 return true;
1506 }
1507
1508 static bool matches(const Stmt *S, ASTContext &Ctx,
1509 const UnsafeBufferUsageHandler *Handler,
1510 MatchResult &Result) {
1511 if (ignoreUnsafeBufferInContainer(Node: *S, Handler))
1512 return false;
1513 return matches(S, Ctx, Result);
1514 }
1515
1516 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1517 bool IsRelatedToDecl,
1518 ASTContext &Ctx) const override {
1519 Handler.handleUnsafeOperationInContainer(Operation: Ctor, IsRelatedToDecl, Ctx);
1520 }
1521 SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); }
1522
1523 DeclUseList getClaimedVarUseSites() const override {
1524 // If the constructor call is of the form `std::span{var, n}`, `var` is
1525 // considered an unsafe variable.
1526 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Ctor->getArg(Arg: 0))) {
1527 if (isa<VarDecl>(Val: DRE->getDecl()))
1528 return {DRE};
1529 }
1530 return {};
1531 }
1532
1533 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1534};
1535
1536/// A pointer initialization expression of the form:
1537/// \code
1538/// int *p = q;
1539/// \endcode
1540class PointerInitGadget : public FixableGadget {
1541private:
1542 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
1543 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
1544 const VarDecl *PtrInitLHS; // the LHS pointer expression in `PI`
1545 const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI`
1546
1547public:
1548 PointerInitGadget(const MatchResult &Result)
1549 : FixableGadget(Kind::PointerInit),
1550 PtrInitLHS(Result.getNodeAs<VarDecl>(ID: PointerInitLHSTag)),
1551 PtrInitRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerInitRHSTag)) {}
1552
1553 static bool classof(const Gadget *G) {
1554 return G->getKind() == Kind::PointerInit;
1555 }
1556
1557 static bool matches(const Stmt *S,
1558 llvm::SmallVectorImpl<MatchResult> &Results) {
1559 const DeclStmt *DS = dyn_cast<DeclStmt>(Val: S);
1560 if (!DS || !DS->isSingleDecl())
1561 return false;
1562 const VarDecl *VD = dyn_cast<VarDecl>(Val: DS->getSingleDecl());
1563 if (!VD)
1564 return false;
1565 const Expr *Init = VD->getAnyInitializer();
1566 if (!Init)
1567 return false;
1568 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Init->IgnoreImpCasts());
1569 if (!DRE || !hasPointerType(E: *DRE) || !isSupportedVariable(Node: *DRE)) {
1570 return false;
1571 }
1572 MatchResult R;
1573 R.addNode(ID: PointerInitLHSTag, Node: DynTypedNode::create(Node: *VD));
1574 R.addNode(ID: PointerInitRHSTag, Node: DynTypedNode::create(Node: *DRE));
1575 Results.emplace_back(Args: std::move(R));
1576 return true;
1577 }
1578
1579 virtual std::optional<FixItList>
1580 getFixits(const FixitStrategy &S) const override;
1581 SourceLocation getSourceLoc() const override {
1582 return PtrInitRHS->getBeginLoc();
1583 }
1584
1585 virtual DeclUseList getClaimedVarUseSites() const override {
1586 return DeclUseList{PtrInitRHS};
1587 }
1588
1589 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1590 getStrategyImplications() const override {
1591 return std::make_pair(x: PtrInitLHS, y: cast<VarDecl>(Val: PtrInitRHS->getDecl()));
1592 }
1593};
1594
1595/// A pointer assignment expression of the form:
1596/// \code
1597/// p = q;
1598/// \endcode
1599/// where both `p` and `q` are pointers.
1600class PtrToPtrAssignmentGadget : public FixableGadget {
1601private:
1602 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1603 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1604 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1605 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1606
1607public:
1608 PtrToPtrAssignmentGadget(const MatchResult &Result)
1609 : FixableGadget(Kind::PtrToPtrAssignment),
1610 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1611 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1612
1613 static bool classof(const Gadget *G) {
1614 return G->getKind() == Kind::PtrToPtrAssignment;
1615 }
1616
1617 static bool matches(const Stmt *S,
1618 llvm::SmallVectorImpl<MatchResult> &Results) {
1619 size_t SizeBefore = Results.size();
1620 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1621 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1622 if (!BO || BO->getOpcode() != BO_Assign)
1623 return;
1624 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1625 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1626 !RHSRef || !hasPointerType(E: *RHSRef) ||
1627 !isSupportedVariable(Node: *RHSRef)) {
1628 return;
1629 }
1630 const auto *LHS = BO->getLHS();
1631 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1632 !LHSRef || !hasPointerType(E: *LHSRef) ||
1633 !isSupportedVariable(Node: *LHSRef)) {
1634 return;
1635 }
1636 MatchResult R;
1637 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1638 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1639 Results.emplace_back(Args: std::move(R));
1640 });
1641 return SizeBefore != Results.size();
1642 }
1643
1644 virtual std::optional<FixItList>
1645 getFixits(const FixitStrategy &S) const override;
1646 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1647
1648 virtual DeclUseList getClaimedVarUseSites() const override {
1649 return DeclUseList{PtrLHS, PtrRHS};
1650 }
1651
1652 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1653 getStrategyImplications() const override {
1654 return std::make_pair(x: cast<VarDecl>(Val: PtrLHS->getDecl()),
1655 y: cast<VarDecl>(Val: PtrRHS->getDecl()));
1656 }
1657};
1658
1659/// An assignment expression of the form:
1660/// \code
1661/// ptr = array;
1662/// \endcode
1663/// where `p` is a pointer and `array` is a constant size array.
1664class CArrayToPtrAssignmentGadget : public FixableGadget {
1665private:
1666 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1667 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1668 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1669 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1670
1671public:
1672 CArrayToPtrAssignmentGadget(const MatchResult &Result)
1673 : FixableGadget(Kind::CArrayToPtrAssignment),
1674 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1675 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1676
1677 static bool classof(const Gadget *G) {
1678 return G->getKind() == Kind::CArrayToPtrAssignment;
1679 }
1680
1681 static bool matches(const Stmt *S,
1682 llvm::SmallVectorImpl<MatchResult> &Results) {
1683 size_t SizeBefore = Results.size();
1684 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1685 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1686 if (!BO || BO->getOpcode() != BO_Assign)
1687 return;
1688 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1689 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1690 !RHSRef ||
1691 !isa<ConstantArrayType>(Val: RHSRef->getType().getCanonicalType()) ||
1692 !isSupportedVariable(Node: *RHSRef)) {
1693 return;
1694 }
1695 const auto *LHS = BO->getLHS();
1696 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1697 !LHSRef || !hasPointerType(E: *LHSRef) ||
1698 !isSupportedVariable(Node: *LHSRef)) {
1699 return;
1700 }
1701 MatchResult R;
1702 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1703 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1704 Results.emplace_back(Args: std::move(R));
1705 });
1706 return SizeBefore != Results.size();
1707 }
1708
1709 virtual std::optional<FixItList>
1710 getFixits(const FixitStrategy &S) const override;
1711 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1712
1713 virtual DeclUseList getClaimedVarUseSites() const override {
1714 return DeclUseList{PtrLHS, PtrRHS};
1715 }
1716
1717 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1718 getStrategyImplications() const override {
1719 return {};
1720 }
1721};
1722
1723/// A call of a function or method that performs unchecked buffer operations
1724/// over one of its pointer parameters.
1725class UnsafeBufferUsageAttrGadget : public WarningGadget {
1726 constexpr static const char *const OpTag = "attr_expr";
1727 const Expr *Op;
1728
1729public:
1730 UnsafeBufferUsageAttrGadget(const MatchResult &Result)
1731 : WarningGadget(Kind::UnsafeBufferUsageAttr),
1732 Op(Result.getNodeAs<Expr>(ID: OpTag)) {}
1733
1734 static bool classof(const Gadget *G) {
1735 return G->getKind() == Kind::UnsafeBufferUsageAttr;
1736 }
1737
1738 static bool matches(const Stmt *S, const ASTContext &Ctx,
1739 MatchResult &Result) {
1740 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
1741 if (CE->getDirectCallee() &&
1742 CE->getDirectCallee()->hasAttr<UnsafeBufferUsageAttr>()) {
1743 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1744 return true;
1745 }
1746 }
1747 if (auto *ME = dyn_cast<MemberExpr>(Val: S)) {
1748 if (!isa<FieldDecl>(Val: ME->getMemberDecl()))
1749 return false;
1750 if (ME->getMemberDecl()->hasAttr<UnsafeBufferUsageAttr>()) {
1751 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *ME));
1752 return true;
1753 }
1754 }
1755 return false;
1756 }
1757
1758 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1759 bool IsRelatedToDecl,
1760 ASTContext &Ctx) const override {
1761 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1762 }
1763 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1764
1765 DeclUseList getClaimedVarUseSites() const override { return {}; }
1766
1767 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1768};
1769
1770/// A call of a constructor that performs unchecked buffer operations
1771/// over one of its pointer parameters, or constructs a class object that will
1772/// perform buffer operations that depend on the correctness of the parameters.
1773class UnsafeBufferUsageCtorAttrGadget : public WarningGadget {
1774 constexpr static const char *const OpTag = "cxx_construct_expr";
1775 const CXXConstructExpr *Op;
1776
1777public:
1778 UnsafeBufferUsageCtorAttrGadget(const MatchResult &Result)
1779 : WarningGadget(Kind::UnsafeBufferUsageCtorAttr),
1780 Op(Result.getNodeAs<CXXConstructExpr>(ID: OpTag)) {}
1781
1782 static bool classof(const Gadget *G) {
1783 return G->getKind() == Kind::UnsafeBufferUsageCtorAttr;
1784 }
1785
1786 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1787 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1788 if (!CE || !CE->getConstructor()->hasAttr<UnsafeBufferUsageAttr>())
1789 return false;
1790 // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget.
1791 MatchResult Tmp;
1792 if (SpanTwoParamConstructorGadget::matches(S: CE, Ctx, Result&: Tmp))
1793 return false;
1794 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1795 return true;
1796 }
1797
1798 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1799 bool IsRelatedToDecl,
1800 ASTContext &Ctx) const override {
1801 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1802 }
1803 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1804
1805 DeclUseList getClaimedVarUseSites() const override { return {}; }
1806
1807 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1808};
1809
1810// Warning gadget for unsafe invocation of span::data method.
1811// Triggers when the pointer returned by the invocation is immediately
1812// cast to a larger type.
1813
1814class DataInvocationGadget : public WarningGadget {
1815 constexpr static const char *const OpTag = "data_invocation_expr";
1816 const ExplicitCastExpr *Op;
1817
1818public:
1819 DataInvocationGadget(const MatchResult &Result)
1820 : WarningGadget(Kind::DataInvocation),
1821 Op(Result.getNodeAs<ExplicitCastExpr>(ID: OpTag)) {}
1822
1823 static bool classof(const Gadget *G) {
1824 return G->getKind() == Kind::DataInvocation;
1825 }
1826
1827 static bool matches(const Stmt *S, const ASTContext &Ctx,
1828 MatchResult &Result) {
1829 auto *CE = dyn_cast<ExplicitCastExpr>(Val: S);
1830 if (!CE)
1831 return false;
1832 for (auto *Child : CE->children()) {
1833 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Child);
1834 MCE && isDataFunction(call: MCE)) {
1835 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1836 return true;
1837 }
1838 if (auto *Paren = dyn_cast<ParenExpr>(Val: Child)) {
1839 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Paren->getSubExpr());
1840 MCE && isDataFunction(call: MCE)) {
1841 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1842 return true;
1843 }
1844 }
1845 }
1846 return false;
1847 }
1848
1849 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1850 bool IsRelatedToDecl,
1851 ASTContext &Ctx) const override {
1852 Handler.handleUnsafeOperation(Operation: Op, IsRelatedToDecl, Ctx);
1853 }
1854 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1855
1856 DeclUseList getClaimedVarUseSites() const override { return {}; }
1857
1858private:
1859 static bool isDataFunction(const CXXMemberCallExpr *call) {
1860 if (!call)
1861 return false;
1862 auto *callee = call->getDirectCallee();
1863 if (!callee || !isa<CXXMethodDecl>(Val: callee))
1864 return false;
1865 auto *method = cast<CXXMethodDecl>(Val: callee);
1866 if (method->getNameAsString() == "data" &&
1867 method->getParent()->isInStdNamespace() &&
1868 llvm::is_contained(Set: {SIZED_CONTAINER_OR_VIEW_LIST},
1869 Element: method->getParent()->getName()))
1870 return true;
1871 return false;
1872 }
1873
1874 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1875};
1876
1877class UnsafeLibcFunctionCallGadget : public WarningGadget {
1878 const CallExpr *const Call;
1879 const Expr *UnsafeArg = nullptr;
1880 constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
1881 // Extra tags for additional information:
1882 constexpr static const char *const UnsafeSprintfTag =
1883 "UnsafeLibcFunctionCall_sprintf";
1884 constexpr static const char *const UnsafeSizedByTag =
1885 "UnsafeLibcFunctionCall_sized_by";
1886 constexpr static const char *const UnsafeStringTag =
1887 "UnsafeLibcFunctionCall_string";
1888 constexpr static const char *const UnsafeVaListTag =
1889 "UnsafeLibcFunctionCall_va_list";
1890
1891 enum UnsafeKind {
1892 OTHERS = 0, // no specific information, the callee function is unsafe
1893 SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
1894 SIZED_BY =
1895 2, // the first two arguments of `snprintf` function have
1896 // "__sized_by" relation but they do not conform to safe patterns
1897 STRING = 3, // an argument is a pointer-to-char-as-string but does not
1898 // guarantee null-termination
1899 VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
1900 // considered unsafe as it is not compile-time check
1901 } WarnedFunKind = OTHERS;
1902
1903public:
1904 UnsafeLibcFunctionCallGadget(const MatchResult &Result)
1905 : WarningGadget(Kind::UnsafeLibcFunctionCall),
1906 Call(Result.getNodeAs<CallExpr>(ID: Tag)) {
1907 if (Result.getNodeAs<Decl>(ID: UnsafeSprintfTag))
1908 WarnedFunKind = SPRINTF;
1909 else if (auto *E = Result.getNodeAs<Expr>(ID: UnsafeStringTag)) {
1910 WarnedFunKind = STRING;
1911 UnsafeArg = E;
1912 } else if (Result.getNodeAs<CallExpr>(ID: UnsafeSizedByTag)) {
1913 WarnedFunKind = SIZED_BY;
1914 UnsafeArg = Call->getArg(Arg: 0);
1915 } else if (Result.getNodeAs<Decl>(ID: UnsafeVaListTag))
1916 WarnedFunKind = VA_LIST;
1917 }
1918
1919 static bool matches(const Stmt *S, ASTContext &Ctx,
1920 const UnsafeBufferUsageHandler *Handler,
1921 MatchResult &Result) {
1922 if (ignoreUnsafeLibcCall(Ctx, Node: *S, Handler))
1923 return false;
1924 auto *CE = dyn_cast<CallExpr>(Val: S);
1925 if (!CE || !CE->getDirectCallee())
1926 return false;
1927 const auto *FD = dyn_cast<FunctionDecl>(Val: CE->getDirectCallee());
1928 if (!FD)
1929 return false;
1930 auto isSingleStringLiteralArg = false;
1931 if (CE->getNumArgs() == 1) {
1932 isSingleStringLiteralArg =
1933 isa<clang::StringLiteral>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts());
1934 }
1935 if (!isSingleStringLiteralArg) {
1936 // (unless the call has a sole string literal argument):
1937 if (libc_func_matchers::isPredefinedUnsafeLibcFunc(Node: *FD)) {
1938 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1939 return true;
1940 }
1941 if (libc_func_matchers::isUnsafeVaListPrintfFunc(Node: *FD)) {
1942 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1943 Result.addNode(ID: UnsafeVaListTag, Node: DynTypedNode::create(Node: *FD));
1944 return true;
1945 }
1946 if (libc_func_matchers::isUnsafeSprintfFunc(Node: *FD)) {
1947 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1948 Result.addNode(ID: UnsafeSprintfTag, Node: DynTypedNode::create(Node: *FD));
1949 return true;
1950 }
1951 }
1952 if (libc_func_matchers::isNormalPrintfFunc(Node: *FD)) {
1953 if (libc_func_matchers::hasUnsafeSnprintfBuffer(Node: *CE, Ctx)) {
1954 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1955 Result.addNode(ID: UnsafeSizedByTag, Node: DynTypedNode::create(Node: *CE));
1956 return true;
1957 }
1958 if (libc_func_matchers::hasUnsafePrintfStringArg(Node: *CE, Ctx, Result,
1959 Tag: UnsafeStringTag)) {
1960 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1961 return true;
1962 }
1963 }
1964 return false;
1965 }
1966
1967 const Stmt *getBaseStmt() const { return Call; }
1968
1969 SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
1970
1971 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1972 bool IsRelatedToDecl,
1973 ASTContext &Ctx) const override {
1974 Handler.handleUnsafeLibcCall(Call, PrintfInfo: WarnedFunKind, Ctx, UnsafeArg);
1975 }
1976
1977 DeclUseList getClaimedVarUseSites() const override { return {}; }
1978
1979 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1980};
1981
1982// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
1983// Context (see `findStmtsInUnspecifiedLvalueContext`).
1984// Note here `[]` is the built-in subscript operator.
1985class ULCArraySubscriptGadget : public FixableGadget {
1986private:
1987 static constexpr const char *const ULCArraySubscriptTag =
1988 "ArraySubscriptUnderULC";
1989 const ArraySubscriptExpr *Node;
1990
1991public:
1992 ULCArraySubscriptGadget(const MatchResult &Result)
1993 : FixableGadget(Kind::ULCArraySubscript),
1994 Node(Result.getNodeAs<ArraySubscriptExpr>(ID: ULCArraySubscriptTag)) {
1995 assert(Node != nullptr && "Expecting a non-null matching result");
1996 }
1997
1998 static bool classof(const Gadget *G) {
1999 return G->getKind() == Kind::ULCArraySubscript;
2000 }
2001
2002 static bool matches(const Stmt *S,
2003 llvm::SmallVectorImpl<MatchResult> &Results) {
2004 size_t SizeBefore = Results.size();
2005 findStmtsInUnspecifiedLvalueContext(S, OnResult: [&Results](const Expr *E) {
2006 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: E);
2007 if (!ASE)
2008 return;
2009 const auto *DRE =
2010 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
2011 if (!DRE || !(hasPointerType(E: *DRE) || hasArrayType(E: *DRE)) ||
2012 !isSupportedVariable(Node: *DRE))
2013 return;
2014 MatchResult R;
2015 R.addNode(ID: ULCArraySubscriptTag, Node: DynTypedNode::create(Node: *ASE));
2016 Results.emplace_back(Args: std::move(R));
2017 });
2018 return SizeBefore != Results.size();
2019 }
2020
2021 virtual std::optional<FixItList>
2022 getFixits(const FixitStrategy &S) const override;
2023 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2024
2025 virtual DeclUseList getClaimedVarUseSites() const override {
2026 if (const auto *DRE =
2027 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts())) {
2028 return {DRE};
2029 }
2030 return {};
2031 }
2032};
2033
2034// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
2035// unspecified pointer context (findStmtsInUnspecifiedPointerContext). The
2036// gadget emits fixit of the form `UPC(DRE.data())`.
2037class UPCStandalonePointerGadget : public FixableGadget {
2038private:
2039 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
2040 const DeclRefExpr *Node;
2041
2042public:
2043 UPCStandalonePointerGadget(const MatchResult &Result)
2044 : FixableGadget(Kind::UPCStandalonePointer),
2045 Node(Result.getNodeAs<DeclRefExpr>(ID: DeclRefExprTag)) {
2046 assert(Node != nullptr && "Expecting a non-null matching result");
2047 }
2048
2049 static bool classof(const Gadget *G) {
2050 return G->getKind() == Kind::UPCStandalonePointer;
2051 }
2052
2053 static bool matches(const Stmt *S,
2054 llvm::SmallVectorImpl<MatchResult> &Results) {
2055 size_t SizeBefore = Results.size();
2056 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2057 auto *E = dyn_cast<Expr>(Val: S);
2058 if (!E)
2059 return;
2060 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreParenImpCasts());
2061 if (!DRE || (!hasPointerType(E: *DRE) && !hasArrayType(E: *DRE)) ||
2062 !isSupportedVariable(Node: *DRE))
2063 return;
2064 MatchResult R;
2065 R.addNode(ID: DeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2066 Results.emplace_back(Args: std::move(R));
2067 });
2068 return SizeBefore != Results.size();
2069 }
2070
2071 virtual std::optional<FixItList>
2072 getFixits(const FixitStrategy &S) const override;
2073 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2074
2075 virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; }
2076};
2077
2078class PointerDereferenceGadget : public FixableGadget {
2079 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2080 static constexpr const char *const OperatorTag = "op";
2081
2082 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2083 const UnaryOperator *Op = nullptr;
2084
2085public:
2086 PointerDereferenceGadget(const MatchResult &Result)
2087 : FixableGadget(Kind::PointerDereference),
2088 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2089 Op(Result.getNodeAs<UnaryOperator>(ID: OperatorTag)) {}
2090
2091 static bool classof(const Gadget *G) {
2092 return G->getKind() == Kind::PointerDereference;
2093 }
2094
2095 static bool matches(const Stmt *S,
2096 llvm::SmallVectorImpl<MatchResult> &Results) {
2097 size_t SizeBefore = Results.size();
2098 findStmtsInUnspecifiedLvalueContext(S, OnResult: [&Results](const Stmt *S) {
2099 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
2100 if (!UO || UO->getOpcode() != UO_Deref)
2101 return;
2102 const auto *CE = dyn_cast<Expr>(Val: UO->getSubExpr());
2103 if (!CE)
2104 return;
2105 CE = CE->IgnoreParenImpCasts();
2106 const auto *DRE = dyn_cast<DeclRefExpr>(Val: CE);
2107 if (!DRE || !isSupportedVariable(Node: *DRE))
2108 return;
2109 MatchResult R;
2110 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2111 R.addNode(ID: OperatorTag, Node: DynTypedNode::create(Node: *UO));
2112 Results.emplace_back(Args: std::move(R));
2113 });
2114 return SizeBefore != Results.size();
2115 }
2116
2117 DeclUseList getClaimedVarUseSites() const override {
2118 return {BaseDeclRefExpr};
2119 }
2120
2121 virtual std::optional<FixItList>
2122 getFixits(const FixitStrategy &S) const override;
2123 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
2124};
2125
2126// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
2127// Context (see `findStmtsInUnspecifiedPointerContext`).
2128// Note here `[]` is the built-in subscript operator.
2129class UPCAddressofArraySubscriptGadget : public FixableGadget {
2130private:
2131 static constexpr const char *const UPCAddressofArraySubscriptTag =
2132 "AddressofArraySubscriptUnderUPC";
2133 const UnaryOperator *Node; // the `&DRE[any]` node
2134
2135public:
2136 UPCAddressofArraySubscriptGadget(const MatchResult &Result)
2137 : FixableGadget(Kind::ULCArraySubscript),
2138 Node(Result.getNodeAs<UnaryOperator>(ID: UPCAddressofArraySubscriptTag)) {
2139 assert(Node != nullptr && "Expecting a non-null matching result");
2140 }
2141
2142 static bool classof(const Gadget *G) {
2143 return G->getKind() == Kind::UPCAddressofArraySubscript;
2144 }
2145
2146 static bool matches(const Stmt *S,
2147 llvm::SmallVectorImpl<MatchResult> &Results) {
2148 size_t SizeBefore = Results.size();
2149 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2150 auto *E = dyn_cast<Expr>(Val: S);
2151 if (!E)
2152 return;
2153 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2154 if (!UO || UO->getOpcode() != UO_AddrOf)
2155 return;
2156 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: UO->getSubExpr());
2157 if (!ASE)
2158 return;
2159 const auto *DRE =
2160 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
2161 if (!DRE || !isSupportedVariable(Node: *DRE))
2162 return;
2163 MatchResult R;
2164 R.addNode(ID: UPCAddressofArraySubscriptTag, Node: DynTypedNode::create(Node: *UO));
2165 Results.emplace_back(Args: std::move(R));
2166 });
2167 return SizeBefore != Results.size();
2168 }
2169
2170 virtual std::optional<FixItList>
2171 getFixits(const FixitStrategy &) const override;
2172 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2173
2174 virtual DeclUseList getClaimedVarUseSites() const override {
2175 const auto *ArraySubst = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
2176 const auto *DRE =
2177 cast<DeclRefExpr>(Val: ArraySubst->getBase()->IgnoreParenImpCasts());
2178 return {DRE};
2179 }
2180};
2181} // namespace
2182
2183namespace {
2184// An auxiliary tracking facility for the fixit analysis. It helps connect
2185// declarations to its uses and make sure we've covered all uses with our
2186// analysis before we try to fix the declaration.
2187class DeclUseTracker {
2188 using UseSetTy = llvm::SmallSet<const DeclRefExpr *, 16>;
2189 using DefMapTy = llvm::DenseMap<const VarDecl *, const DeclStmt *>;
2190
2191 // Allocate on the heap for easier move.
2192 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
2193 DefMapTy Defs{};
2194
2195public:
2196 DeclUseTracker() = default;
2197 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
2198 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
2199 DeclUseTracker(DeclUseTracker &&) = default;
2200 DeclUseTracker &operator=(DeclUseTracker &&) = default;
2201
2202 // Start tracking a freshly discovered DRE.
2203 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(Ptr: DRE); }
2204
2205 // Stop tracking the DRE as it's been fully figured out.
2206 void claimUse(const DeclRefExpr *DRE) {
2207 assert(Uses->count(DRE) &&
2208 "DRE not found or claimed by multiple matchers!");
2209 Uses->erase(Ptr: DRE);
2210 }
2211
2212 // A variable is unclaimed if at least one use is unclaimed.
2213 bool hasUnclaimedUses(const VarDecl *VD) const {
2214 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
2215 return any_of(Range&: *Uses, P: [VD](const DeclRefExpr *DRE) {
2216 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
2217 });
2218 }
2219
2220 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
2221 UseSetTy ReturnSet;
2222 for (auto use : *Uses) {
2223 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
2224 ReturnSet.insert(Ptr: use);
2225 }
2226 }
2227 return ReturnSet;
2228 }
2229
2230 void discoverDecl(const DeclStmt *DS) {
2231 for (const Decl *D : DS->decls()) {
2232 if (const auto *VD = dyn_cast<VarDecl>(Val: D)) {
2233 // FIXME: Assertion temporarily disabled due to a bug in
2234 // ASTMatcher internal behavior in presence of GNU
2235 // statement-expressions. We need to properly investigate this
2236 // because it can screw up our algorithm in other ways.
2237 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
2238 Defs[VD] = DS;
2239 }
2240 }
2241 }
2242
2243 const DeclStmt *lookupDecl(const VarDecl *VD) const {
2244 return Defs.lookup(Val: VD);
2245 }
2246};
2247} // namespace
2248
2249// Representing a pointer type expression of the form `++Ptr` in an Unspecified
2250// Pointer Context (UPC):
2251class UPCPreIncrementGadget : public FixableGadget {
2252private:
2253 static constexpr const char *const UPCPreIncrementTag =
2254 "PointerPreIncrementUnderUPC";
2255 const UnaryOperator *Node; // the `++Ptr` node
2256
2257public:
2258 UPCPreIncrementGadget(const MatchResult &Result)
2259 : FixableGadget(Kind::UPCPreIncrement),
2260 Node(Result.getNodeAs<UnaryOperator>(ID: UPCPreIncrementTag)) {
2261 assert(Node != nullptr && "Expecting a non-null matching result");
2262 }
2263
2264 static bool classof(const Gadget *G) {
2265 return G->getKind() == Kind::UPCPreIncrement;
2266 }
2267
2268 static bool matches(const Stmt *S,
2269 llvm::SmallVectorImpl<MatchResult> &Results) {
2270 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
2271 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
2272 // can have the matcher be general, so long as `getClaimedVarUseSites` does
2273 // things right.
2274 size_t SizeBefore = Results.size();
2275 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2276 auto *E = dyn_cast<Expr>(Val: S);
2277 if (!E)
2278 return;
2279 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2280 if (!UO || UO->getOpcode() != UO_PreInc)
2281 return;
2282 const auto *DRE = dyn_cast<DeclRefExpr>(Val: UO->getSubExpr());
2283 if (!DRE || !isSupportedVariable(Node: *DRE))
2284 return;
2285 MatchResult R;
2286 R.addNode(ID: UPCPreIncrementTag, Node: DynTypedNode::create(Node: *UO));
2287 Results.emplace_back(Args: std::move(R));
2288 });
2289 return SizeBefore != Results.size();
2290 }
2291
2292 virtual std::optional<FixItList>
2293 getFixits(const FixitStrategy &S) const override;
2294 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2295
2296 virtual DeclUseList getClaimedVarUseSites() const override {
2297 return {dyn_cast<DeclRefExpr>(Val: Node->getSubExpr())};
2298 }
2299};
2300
2301// Representing a pointer type expression of the form `Ptr += n` in an
2302// Unspecified Untyped Context (UUC):
2303class UUCAddAssignGadget : public FixableGadget {
2304private:
2305 static constexpr const char *const UUCAddAssignTag =
2306 "PointerAddAssignUnderUUC";
2307 static constexpr const char *const OffsetTag = "Offset";
2308
2309 const BinaryOperator *Node; // the `Ptr += n` node
2310 const Expr *Offset = nullptr;
2311
2312public:
2313 UUCAddAssignGadget(const MatchResult &Result)
2314 : FixableGadget(Kind::UUCAddAssign),
2315 Node(Result.getNodeAs<BinaryOperator>(ID: UUCAddAssignTag)),
2316 Offset(Result.getNodeAs<Expr>(ID: OffsetTag)) {
2317 assert(Node != nullptr && "Expecting a non-null matching result");
2318 }
2319
2320 static bool classof(const Gadget *G) {
2321 return G->getKind() == Kind::UUCAddAssign;
2322 }
2323
2324 static bool matches(const Stmt *S,
2325 llvm::SmallVectorImpl<MatchResult> &Results) {
2326 size_t SizeBefore = Results.size();
2327 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
2328 const auto *E = dyn_cast<Expr>(Val: S);
2329 if (!E)
2330 return;
2331 const auto *BO = dyn_cast<BinaryOperator>(Val: E->IgnoreImpCasts());
2332 if (!BO || BO->getOpcode() != BO_AddAssign)
2333 return;
2334 const auto *DRE = dyn_cast<DeclRefExpr>(Val: BO->getLHS());
2335 if (!DRE || !hasPointerType(E: *DRE) || !isSupportedVariable(Node: *DRE))
2336 return;
2337 MatchResult R;
2338 R.addNode(ID: UUCAddAssignTag, Node: DynTypedNode::create(Node: *BO));
2339 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *BO->getRHS()));
2340 Results.emplace_back(Args: std::move(R));
2341 });
2342 return SizeBefore != Results.size();
2343 }
2344
2345 virtual std::optional<FixItList>
2346 getFixits(const FixitStrategy &S) const override;
2347 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2348
2349 virtual DeclUseList getClaimedVarUseSites() const override {
2350 return {dyn_cast<DeclRefExpr>(Val: Node->getLHS())};
2351 }
2352};
2353
2354// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
2355// ptr)`:
2356class DerefSimplePtrArithFixableGadget : public FixableGadget {
2357 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2358 static constexpr const char *const DerefOpTag = "DerefOp";
2359 static constexpr const char *const AddOpTag = "AddOp";
2360 static constexpr const char *const OffsetTag = "Offset";
2361
2362 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2363 const UnaryOperator *DerefOp = nullptr;
2364 const BinaryOperator *AddOp = nullptr;
2365 const IntegerLiteral *Offset = nullptr;
2366
2367public:
2368 DerefSimplePtrArithFixableGadget(const MatchResult &Result)
2369 : FixableGadget(Kind::DerefSimplePtrArithFixable),
2370 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2371 DerefOp(Result.getNodeAs<UnaryOperator>(ID: DerefOpTag)),
2372 AddOp(Result.getNodeAs<BinaryOperator>(ID: AddOpTag)),
2373 Offset(Result.getNodeAs<IntegerLiteral>(ID: OffsetTag)) {}
2374
2375 static bool matches(const Stmt *S,
2376 llvm::SmallVectorImpl<MatchResult> &Results) {
2377 auto IsPtr = [](const Expr *E, MatchResult &R) {
2378 if (!E || !hasPointerType(E: *E))
2379 return false;
2380 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreImpCasts());
2381 if (!DRE || !isSupportedVariable(Node: *DRE))
2382 return false;
2383 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2384 return true;
2385 };
2386 const auto IsPlusOverPtrAndInteger = [&IsPtr](const Expr *E,
2387 MatchResult &R) {
2388 const auto *BO = dyn_cast<BinaryOperator>(Val: E);
2389 if (!BO || BO->getOpcode() != BO_Add)
2390 return false;
2391
2392 const auto *LHS = BO->getLHS();
2393 const auto *RHS = BO->getRHS();
2394 if (isa<IntegerLiteral>(Val: RHS) && IsPtr(LHS, R)) {
2395 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *RHS));
2396 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2397 return true;
2398 }
2399 if (isa<IntegerLiteral>(Val: LHS) && IsPtr(RHS, R)) {
2400 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *LHS));
2401 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2402 return true;
2403 }
2404 return false;
2405 };
2406 size_t SizeBefore = Results.size();
2407 const auto InnerMatcher = [&IsPlusOverPtrAndInteger,
2408 &Results](const Expr *E) {
2409 const auto *UO = dyn_cast<UnaryOperator>(Val: E);
2410 if (!UO || UO->getOpcode() != UO_Deref)
2411 return;
2412
2413 const auto *Operand = UO->getSubExpr()->IgnoreParens();
2414 MatchResult R;
2415 if (IsPlusOverPtrAndInteger(Operand, R)) {
2416 R.addNode(ID: DerefOpTag, Node: DynTypedNode::create(Node: *UO));
2417 Results.emplace_back(Args: std::move(R));
2418 }
2419 };
2420 findStmtsInUnspecifiedLvalueContext(S, OnResult: InnerMatcher);
2421 return SizeBefore != Results.size();
2422 }
2423
2424 virtual std::optional<FixItList>
2425 getFixits(const FixitStrategy &s) const final;
2426 SourceLocation getSourceLoc() const override {
2427 return DerefOp->getBeginLoc();
2428 }
2429
2430 virtual DeclUseList getClaimedVarUseSites() const final {
2431 return {BaseDeclRefExpr};
2432 }
2433};
2434
2435class WarningGadgetMatcher : public FastMatcher {
2436
2437public:
2438 WarningGadgetMatcher(WarningGadgetList &WarningGadgets)
2439 : WarningGadgets(WarningGadgets) {}
2440
2441 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2442 const UnsafeBufferUsageHandler &Handler) override {
2443 const Stmt *S = DynNode.get<Stmt>();
2444 if (!S)
2445 return false;
2446
2447 MatchResult Result;
2448#define WARNING_GADGET(name) \
2449 if (name##Gadget::matches(S, Ctx, Result) && \
2450 notInSafeBufferOptOut(*S, &Handler)) { \
2451 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2452 return true; \
2453 }
2454#define WARNING_OPTIONAL_GADGET(name) \
2455 if (name##Gadget::matches(S, Ctx, &Handler, Result) && \
2456 notInSafeBufferOptOut(*S, &Handler)) { \
2457 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2458 return true; \
2459 }
2460#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2461 return false;
2462 }
2463
2464private:
2465 WarningGadgetList &WarningGadgets;
2466};
2467
2468class FixableGadgetMatcher : public FastMatcher {
2469
2470public:
2471 FixableGadgetMatcher(FixableGadgetList &FixableGadgets,
2472 DeclUseTracker &Tracker)
2473 : FixableGadgets(FixableGadgets), Tracker(Tracker) {}
2474
2475 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2476 const UnsafeBufferUsageHandler &Handler) override {
2477 bool matchFound = false;
2478 const Stmt *S = DynNode.get<Stmt>();
2479 if (!S) {
2480 return matchFound;
2481 }
2482
2483 llvm::SmallVector<MatchResult> Results;
2484#define FIXABLE_GADGET(name) \
2485 if (name##Gadget::matches(S, Results)) { \
2486 for (const auto &R : Results) { \
2487 FixableGadgets.push_back(std::make_unique<name##Gadget>(R)); \
2488 matchFound = true; \
2489 } \
2490 Results = {}; \
2491 }
2492#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2493 // In parallel, match all DeclRefExprs so that to find out
2494 // whether there are any uncovered by gadgets.
2495 if (auto *DRE = findDeclRefExpr(S); DRE) {
2496 Tracker.discoverUse(DRE);
2497 matchFound = true;
2498 }
2499 // Also match DeclStmts because we'll need them when fixing
2500 // their underlying VarDecls that otherwise don't have
2501 // any backreferences to DeclStmts.
2502 if (auto *DS = findDeclStmt(S); DS) {
2503 Tracker.discoverDecl(DS);
2504 matchFound = true;
2505 }
2506 return matchFound;
2507 }
2508
2509private:
2510 const DeclRefExpr *findDeclRefExpr(const Stmt *S) {
2511 const auto *DRE = dyn_cast<DeclRefExpr>(Val: S);
2512 if (!DRE || (!hasPointerType(E: *DRE) && !hasArrayType(E: *DRE)))
2513 return nullptr;
2514 const Decl *D = DRE->getDecl();
2515 if (!D || (!isa<VarDecl>(Val: D) && !isa<BindingDecl>(Val: D)))
2516 return nullptr;
2517 return DRE;
2518 }
2519 const DeclStmt *findDeclStmt(const Stmt *S) {
2520 const auto *DS = dyn_cast<DeclStmt>(Val: S);
2521 if (!DS)
2522 return nullptr;
2523 return DS;
2524 }
2525 FixableGadgetList &FixableGadgets;
2526 DeclUseTracker &Tracker;
2527};
2528
2529// Scan the function and return a list of gadgets found with provided kits.
2530static void findGadgets(const Stmt *S, ASTContext &Ctx,
2531 const UnsafeBufferUsageHandler &Handler,
2532 bool EmitSuggestions, FixableGadgetList &FixableGadgets,
2533 WarningGadgetList &WarningGadgets,
2534 DeclUseTracker &Tracker) {
2535 WarningGadgetMatcher WMatcher{WarningGadgets};
2536 forEachDescendantEvaluatedStmt(S, Ctx, Handler, Matcher&: WMatcher);
2537 if (EmitSuggestions) {
2538 FixableGadgetMatcher FMatcher{FixableGadgets, Tracker};
2539 forEachDescendantStmt(S, Ctx, Handler, Matcher&: FMatcher);
2540 }
2541}
2542
2543// Compares AST nodes by source locations.
2544template <typename NodeTy> struct CompareNode {
2545 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
2546 return N1->getBeginLoc().getRawEncoding() <
2547 N2->getBeginLoc().getRawEncoding();
2548 }
2549};
2550
2551std::set<const Expr *> clang::findUnsafePointers(const FunctionDecl *FD) {
2552 class MockReporter : public UnsafeBufferUsageHandler {
2553 public:
2554 MockReporter() {}
2555 void handleUnsafeOperation(const Stmt *, bool, ASTContext &) override {}
2556 void handleUnsafeLibcCall(const CallExpr *, unsigned, ASTContext &,
2557 const Expr *UnsafeArg = nullptr) override {}
2558 void handleUnsafeOperationInContainer(const Stmt *, bool,
2559 ASTContext &) override {}
2560 void handleUnsafeVariableGroup(const VarDecl *,
2561 const VariableGroupsManager &, FixItList &&,
2562 const Decl *,
2563 const FixitStrategy &) override {}
2564 bool isSafeBufferOptOut(const SourceLocation &) const override {
2565 return false;
2566 }
2567 bool ignoreUnsafeBufferInContainer(const SourceLocation &) const override {
2568 return false;
2569 }
2570 bool ignoreUnsafeBufferInLibcCall(const SourceLocation &) const override {
2571 return false;
2572 }
2573 std::string getUnsafeBufferUsageAttributeTextAt(
2574 SourceLocation, StringRef WSSuffix = "") const override {
2575 return "";
2576 }
2577 };
2578
2579 FixableGadgetList FixableGadgets;
2580 WarningGadgetList WarningGadgets;
2581 DeclUseTracker Tracker;
2582 MockReporter IgnoreHandler;
2583
2584 findGadgets(S: FD->getBody(), Ctx&: FD->getASTContext(), Handler: IgnoreHandler, EmitSuggestions: false,
2585 FixableGadgets, WarningGadgets, Tracker);
2586
2587 std::set<const Expr *> Result;
2588 for (auto &G : WarningGadgets) {
2589 for (const Expr *E : G->getUnsafePtrs()) {
2590 Result.insert(x: E);
2591 }
2592 }
2593
2594 return Result;
2595}
2596
2597struct WarningGadgetSets {
2598 std::map<const VarDecl *, std::set<const WarningGadget *>,
2599 // To keep keys sorted by their locations in the map so that the
2600 // order is deterministic:
2601 CompareNode<VarDecl>>
2602 byVar;
2603 // These Gadgets are not related to pointer variables (e. g. temporaries).
2604 llvm::SmallVector<const WarningGadget *, 16> noVar;
2605};
2606
2607static WarningGadgetSets
2608groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
2609 WarningGadgetSets result;
2610 // If some gadgets cover more than one
2611 // variable, they'll appear more than once in the map.
2612 for (auto &G : AllUnsafeOperations) {
2613 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
2614
2615 bool AssociatedWithVarDecl = false;
2616 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
2617 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2618 result.byVar[VD].insert(x: G.get());
2619 AssociatedWithVarDecl = true;
2620 }
2621 }
2622
2623 if (!AssociatedWithVarDecl) {
2624 result.noVar.push_back(Elt: G.get());
2625 continue;
2626 }
2627 }
2628 return result;
2629}
2630
2631struct FixableGadgetSets {
2632 std::map<const VarDecl *, std::set<const FixableGadget *>,
2633 // To keep keys sorted by their locations in the map so that the
2634 // order is deterministic:
2635 CompareNode<VarDecl>>
2636 byVar;
2637};
2638
2639static FixableGadgetSets
2640groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
2641 FixableGadgetSets FixablesForUnsafeVars;
2642 for (auto &F : AllFixableOperations) {
2643 DeclUseList DREs = F->getClaimedVarUseSites();
2644
2645 for (const DeclRefExpr *DRE : DREs) {
2646 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2647 FixablesForUnsafeVars.byVar[VD].insert(x: F.get());
2648 }
2649 }
2650 }
2651 return FixablesForUnsafeVars;
2652}
2653
2654bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
2655 const SourceManager &SM) {
2656 // A simple interval overlap detection algorithm. Sorts all ranges by their
2657 // begin location then finds the first overlap in one pass.
2658 std::vector<const FixItHint *> All; // a copy of `FixIts`
2659
2660 for (const FixItHint &H : FixIts)
2661 All.push_back(x: &H);
2662 std::sort(first: All.begin(), last: All.end(),
2663 comp: [&SM](const FixItHint *H1, const FixItHint *H2) {
2664 return SM.isBeforeInTranslationUnit(LHS: H1->RemoveRange.getBegin(),
2665 RHS: H2->RemoveRange.getBegin());
2666 });
2667
2668 const FixItHint *CurrHint = nullptr;
2669
2670 for (const FixItHint *Hint : All) {
2671 if (!CurrHint ||
2672 SM.isBeforeInTranslationUnit(LHS: CurrHint->RemoveRange.getEnd(),
2673 RHS: Hint->RemoveRange.getBegin())) {
2674 // Either to initialize `CurrHint` or `CurrHint` does not
2675 // overlap with `Hint`:
2676 CurrHint = Hint;
2677 } else
2678 // In case `Hint` overlaps the `CurrHint`, we found at least one
2679 // conflict:
2680 return true;
2681 }
2682 return false;
2683}
2684
2685std::optional<FixItList>
2686PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2687 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2688 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2689 switch (S.lookup(VD: LeftVD)) {
2690 case FixitStrategy::Kind::Span:
2691 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2692 return FixItList{};
2693 return std::nullopt;
2694 case FixitStrategy::Kind::Wontfix:
2695 return std::nullopt;
2696 case FixitStrategy::Kind::Iterator:
2697 case FixitStrategy::Kind::Array:
2698 return std::nullopt;
2699 case FixitStrategy::Kind::Vector:
2700 llvm_unreachable("unsupported strategies for FixableGadgets");
2701 }
2702 return std::nullopt;
2703}
2704
2705/// \returns fixit that adds .data() call after \DRE.
2706static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
2707 const DeclRefExpr *DRE);
2708
2709std::optional<FixItList>
2710CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2711 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2712 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2713 // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is
2714 // non-trivial.
2715 //
2716 // CArrayToPtrAssignmentGadget doesn't have strategy implications because
2717 // constant size array propagates its bounds. Because of that LHS and RHS are
2718 // addressed by two different fixits.
2719 //
2720 // At the same time FixitStrategy S doesn't reflect what group a fixit belongs
2721 // to and can't be generally relied on in multi-variable Fixables!
2722 //
2723 // E. g. If an instance of this gadget is fixing variable on LHS then the
2724 // variable on RHS is fixed by a different fixit and its strategy for LHS
2725 // fixit is as if Wontfix.
2726 //
2727 // The only exception is Wontfix strategy for a given variable as that is
2728 // valid for any fixit produced for the given input source code.
2729 if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Span) {
2730 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Wontfix) {
2731 return FixItList{};
2732 }
2733 } else if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Wontfix) {
2734 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Array) {
2735 return createDataFixit(Ctx: RightVD->getASTContext(), DRE: PtrRHS);
2736 }
2737 }
2738 return std::nullopt;
2739}
2740
2741std::optional<FixItList>
2742PointerInitGadget::getFixits(const FixitStrategy &S) const {
2743 const auto *LeftVD = PtrInitLHS;
2744 const auto *RightVD = cast<VarDecl>(Val: PtrInitRHS->getDecl());
2745 switch (S.lookup(VD: LeftVD)) {
2746 case FixitStrategy::Kind::Span:
2747 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2748 return FixItList{};
2749 return std::nullopt;
2750 case FixitStrategy::Kind::Wontfix:
2751 return std::nullopt;
2752 case FixitStrategy::Kind::Iterator:
2753 case FixitStrategy::Kind::Array:
2754 return std::nullopt;
2755 case FixitStrategy::Kind::Vector:
2756 llvm_unreachable("unsupported strategies for FixableGadgets");
2757 }
2758 return std::nullopt;
2759}
2760
2761static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
2762 const ASTContext &Ctx) {
2763 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
2764 if (ConstVal->isNegative())
2765 return false;
2766 } else if (!Expr->getType()->isUnsignedIntegerType())
2767 return false;
2768 return true;
2769}
2770
2771std::optional<FixItList>
2772ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2773 if (const auto *DRE =
2774 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts()))
2775 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2776 switch (S.lookup(VD)) {
2777 case FixitStrategy::Kind::Span: {
2778
2779 // If the index has a negative constant value, we give up as no valid
2780 // fix-it can be generated:
2781 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
2782 VD->getASTContext();
2783 if (!isNonNegativeIntegerExpr(Expr: Node->getIdx(), VD, Ctx))
2784 return std::nullopt;
2785 // no-op is a good fix-it, otherwise
2786 return FixItList{};
2787 }
2788 case FixitStrategy::Kind::Array:
2789 return FixItList{};
2790 case FixitStrategy::Kind::Wontfix:
2791 case FixitStrategy::Kind::Iterator:
2792 case FixitStrategy::Kind::Vector:
2793 llvm_unreachable("unsupported strategies for FixableGadgets");
2794 }
2795 }
2796 return std::nullopt;
2797}
2798
2799static std::optional<FixItList> // forward declaration
2800fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
2801
2802std::optional<FixItList>
2803UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2804 auto DREs = getClaimedVarUseSites();
2805 const auto *VD = cast<VarDecl>(Val: DREs.front()->getDecl());
2806
2807 switch (S.lookup(VD)) {
2808 case FixitStrategy::Kind::Span:
2809 return fixUPCAddressofArraySubscriptWithSpan(Node);
2810 case FixitStrategy::Kind::Wontfix:
2811 case FixitStrategy::Kind::Iterator:
2812 case FixitStrategy::Kind::Array:
2813 return std::nullopt;
2814 case FixitStrategy::Kind::Vector:
2815 llvm_unreachable("unsupported strategies for FixableGadgets");
2816 }
2817 return std::nullopt; // something went wrong, no fix-it
2818}
2819
2820// FIXME: this function should be customizable through format
2821static StringRef getEndOfLine() {
2822 static const char *const EOL = "\n";
2823 return EOL;
2824}
2825
2826// Returns the text indicating that the user needs to provide input there:
2827static std::string
2828getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
2829 std::string s = std::string("<# ");
2830 s += HintTextToUser;
2831 s += " #>";
2832 return s;
2833}
2834
2835// Return the source location of the last character of the AST `Node`.
2836template <typename NodeTy>
2837static std::optional<SourceLocation>
2838getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
2839 const LangOptions &LangOpts) {
2840 if (unsigned TkLen =
2841 Lexer::MeasureTokenLength(Loc: Node->getEndLoc(), SM, LangOpts)) {
2842 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
2843
2844 if (Loc.isValid())
2845 return Loc;
2846 }
2847 return std::nullopt;
2848}
2849
2850// We cannot fix a variable declaration if it has some other specifiers than the
2851// type specifier. Because the source ranges of those specifiers could overlap
2852// with the source range that is being replaced using fix-its. Especially when
2853// we often cannot obtain accurate source ranges of cv-qualified type
2854// specifiers.
2855// FIXME: also deal with type attributes
2856static bool hasUnsupportedSpecifiers(const VarDecl *VD,
2857 const SourceManager &SM) {
2858 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
2859 // source range of `VD`:
2860 bool AttrRangeOverlapping = llvm::any_of(Range: VD->attrs(), P: [&](Attr *At) -> bool {
2861 return !(SM.isBeforeInTranslationUnit(LHS: At->getRange().getEnd(),
2862 RHS: VD->getBeginLoc())) &&
2863 !(SM.isBeforeInTranslationUnit(LHS: VD->getEndLoc(),
2864 RHS: At->getRange().getBegin()));
2865 });
2866 return VD->isInlineSpecified() || VD->isConstexpr() ||
2867 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
2868 AttrRangeOverlapping;
2869}
2870
2871// Returns the `SourceRange` of `D`. The reason why this function exists is
2872// that `D->getSourceRange()` may return a range where the end location is the
2873// starting location of the last token. The end location of the source range
2874// returned by this function is the last location of the last token.
2875static SourceRange getSourceRangeToTokenEnd(const Decl *D,
2876 const SourceManager &SM,
2877 const LangOptions &LangOpts) {
2878 SourceLocation Begin = D->getBeginLoc();
2879 SourceLocation
2880 End = // `D->getEndLoc` should always return the starting location of the
2881 // last token, so we should get the end of the token
2882 Lexer::getLocForEndOfToken(Loc: D->getEndLoc(), Offset: 0, SM, LangOpts);
2883
2884 return SourceRange(Begin, End);
2885}
2886
2887// Returns the text of the name (with qualifiers) of a `FunctionDecl`.
2888static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
2889 const SourceManager &SM,
2890 const LangOptions &LangOpts) {
2891 SourceLocation BeginLoc = FD->getQualifier()
2892 ? FD->getQualifierLoc().getBeginLoc()
2893 : FD->getNameInfo().getBeginLoc();
2894 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
2895 // last token:
2896 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
2897 Loc: FD->getNameInfo().getEndLoc(), Offset: 0, SM, LangOpts);
2898 SourceRange NameRange{BeginLoc, EndLoc};
2899
2900 return getRangeText(SR: NameRange, SM, LangOpts);
2901}
2902
2903// Returns the text representing a `std::span` type where the element type is
2904// represented by `EltTyText`.
2905//
2906// Note the optional parameter `Qualifiers`: one needs to pass qualifiers
2907// explicitly if the element type needs to be qualified.
2908static std::string
2909getSpanTypeText(StringRef EltTyText,
2910 std::optional<Qualifiers> Quals = std::nullopt) {
2911 const char *const SpanOpen = "std::span<";
2912
2913 if (Quals)
2914 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
2915 return SpanOpen + EltTyText.str() + '>';
2916}
2917
2918std::optional<FixItList>
2919DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const {
2920 const VarDecl *VD = dyn_cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2921
2922 if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) {
2923 ASTContext &Ctx = VD->getASTContext();
2924 // std::span can't represent elements before its begin()
2925 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
2926 if (ConstVal->isNegative())
2927 return std::nullopt;
2928
2929 // note that the expr may (oddly) has multiple layers of parens
2930 // example:
2931 // *((..(pointer + 123)..))
2932 // goal:
2933 // pointer[123]
2934 // Fix-It:
2935 // remove '*('
2936 // replace ' + ' with '['
2937 // replace ')' with ']'
2938
2939 // example:
2940 // *((..(123 + pointer)..))
2941 // goal:
2942 // 123[pointer]
2943 // Fix-It:
2944 // remove '*('
2945 // replace ' + ' with '['
2946 // replace ')' with ']'
2947
2948 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
2949 const SourceManager &SM = Ctx.getSourceManager();
2950 const LangOptions &LangOpts = Ctx.getLangOpts();
2951 CharSourceRange StarWithTrailWhitespace =
2952 clang::CharSourceRange::getCharRange(B: DerefOp->getOperatorLoc(),
2953 E: LHS->getBeginLoc());
2954
2955 std::optional<SourceLocation> LHSLocation = getPastLoc(Node: LHS, SM, LangOpts);
2956 if (!LHSLocation)
2957 return std::nullopt;
2958
2959 CharSourceRange PlusWithSurroundingWhitespace =
2960 clang::CharSourceRange::getCharRange(B: *LHSLocation, E: RHS->getBeginLoc());
2961
2962 std::optional<SourceLocation> AddOpLocation =
2963 getPastLoc(Node: AddOp, SM, LangOpts);
2964 std::optional<SourceLocation> DerefOpLocation =
2965 getPastLoc(Node: DerefOp, SM, LangOpts);
2966
2967 if (!AddOpLocation || !DerefOpLocation)
2968 return std::nullopt;
2969
2970 CharSourceRange ClosingParenWithPrecWhitespace =
2971 clang::CharSourceRange::getCharRange(B: *AddOpLocation, E: *DerefOpLocation);
2972
2973 return FixItList{
2974 {FixItHint::CreateRemoval(RemoveRange: StarWithTrailWhitespace),
2975 FixItHint::CreateReplacement(RemoveRange: PlusWithSurroundingWhitespace, Code: "["),
2976 FixItHint::CreateReplacement(RemoveRange: ClosingParenWithPrecWhitespace, Code: "]")}};
2977 }
2978 return std::nullopt; // something wrong or unsupported, give up
2979}
2980
2981std::optional<FixItList>
2982PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
2983 const VarDecl *VD = cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2984 switch (S.lookup(VD)) {
2985 case FixitStrategy::Kind::Span: {
2986 ASTContext &Ctx = VD->getASTContext();
2987 SourceManager &SM = Ctx.getSourceManager();
2988 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
2989 // Deletes the *operand
2990 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
2991 B: Op->getBeginLoc(), E: Op->getBeginLoc().getLocWithOffset(Offset: 1));
2992 // Inserts the [0]
2993 if (auto LocPastOperand =
2994 getPastLoc(Node: BaseDeclRefExpr, SM, LangOpts: Ctx.getLangOpts())) {
2995 return FixItList{{FixItHint::CreateRemoval(RemoveRange: derefRange),
2996 FixItHint::CreateInsertion(InsertionLoc: *LocPastOperand, Code: "[0]")}};
2997 }
2998 break;
2999 }
3000 case FixitStrategy::Kind::Iterator:
3001 case FixitStrategy::Kind::Array:
3002 return std::nullopt;
3003 case FixitStrategy::Kind::Vector:
3004 llvm_unreachable("FixitStrategy not implemented yet!");
3005 case FixitStrategy::Kind::Wontfix:
3006 llvm_unreachable("Invalid strategy!");
3007 }
3008
3009 return std::nullopt;
3010}
3011
3012static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
3013 const DeclRefExpr *DRE) {
3014 const SourceManager &SM = Ctx.getSourceManager();
3015 // Inserts the .data() after the DRE
3016 std::optional<SourceLocation> EndOfOperand =
3017 getPastLoc(Node: DRE, SM, LangOpts: Ctx.getLangOpts());
3018
3019 if (EndOfOperand)
3020 return FixItList{{FixItHint::CreateInsertion(InsertionLoc: *EndOfOperand, Code: ".data()")}};
3021
3022 return std::nullopt;
3023}
3024
3025// Generates fix-its replacing an expression of the form UPC(DRE) with
3026// `DRE.data()`
3027std::optional<FixItList>
3028UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {
3029 const auto VD = cast<VarDecl>(Val: Node->getDecl());
3030 switch (S.lookup(VD)) {
3031 case FixitStrategy::Kind::Array:
3032 case FixitStrategy::Kind::Span: {
3033 return createDataFixit(Ctx: VD->getASTContext(), DRE: Node);
3034 // FIXME: Points inside a macro expansion.
3035 break;
3036 }
3037 case FixitStrategy::Kind::Wontfix:
3038 case FixitStrategy::Kind::Iterator:
3039 return std::nullopt;
3040 case FixitStrategy::Kind::Vector:
3041 llvm_unreachable("unsupported strategies for FixableGadgets");
3042 }
3043
3044 return std::nullopt;
3045}
3046
3047// Generates fix-its replacing an expression of the form `&DRE[e]` with
3048// `&DRE.data()[e]`:
3049static std::optional<FixItList>
3050fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
3051 const auto *ArraySub = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
3052 const auto *DRE = cast<DeclRefExpr>(Val: ArraySub->getBase()->IgnoreImpCasts());
3053 // FIXME: this `getASTContext` call is costly, we should pass the
3054 // ASTContext in:
3055 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
3056 const Expr *Idx = ArraySub->getIdx();
3057 const SourceManager &SM = Ctx.getSourceManager();
3058 const LangOptions &LangOpts = Ctx.getLangOpts();
3059 std::stringstream SS;
3060 bool IdxIsLitZero = false;
3061
3062 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
3063 if ((*ICE).isZero())
3064 IdxIsLitZero = true;
3065 std::optional<StringRef> DreString = getExprText(E: DRE, SM, LangOpts);
3066 if (!DreString)
3067 return std::nullopt;
3068
3069 if (IdxIsLitZero) {
3070 // If the index is literal zero, we produce the most concise fix-it:
3071 SS << (*DreString).str() << ".data()";
3072 } else {
3073 std::optional<StringRef> IndexString = getExprText(E: Idx, SM, LangOpts);
3074 if (!IndexString)
3075 return std::nullopt;
3076
3077 SS << "&" << (*DreString).str() << ".data()"
3078 << "[" << (*IndexString).str() << "]";
3079 }
3080 return FixItList{
3081 FixItHint::CreateReplacement(RemoveRange: Node->getSourceRange(), Code: SS.str())};
3082}
3083
3084std::optional<FixItList>
3085UUCAddAssignGadget::getFixits(const FixitStrategy &S) const {
3086 DeclUseList DREs = getClaimedVarUseSites();
3087
3088 if (DREs.size() != 1)
3089 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
3090 // give up
3091 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3092 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3093 FixItList Fixes;
3094
3095 const Stmt *AddAssignNode = Node;
3096 StringRef varName = VD->getName();
3097 const ASTContext &Ctx = VD->getASTContext();
3098
3099 if (!isNonNegativeIntegerExpr(Expr: Offset, VD, Ctx))
3100 return std::nullopt;
3101
3102 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
3103 bool NotParenExpr =
3104 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
3105 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
3106 if (NotParenExpr)
3107 SS += "(";
3108
3109 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
3110 Node: AddAssignNode, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3111 if (!AddAssignLocation)
3112 return std::nullopt;
3113
3114 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3115 RemoveRange: SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
3116 Code: SS));
3117 if (NotParenExpr)
3118 Fixes.push_back(Elt: FixItHint::CreateInsertion(
3119 InsertionLoc: Offset->getEndLoc().getLocWithOffset(Offset: 1), Code: ")"));
3120 return Fixes;
3121 }
3122 }
3123 return std::nullopt; // Not in the cases that we can handle for now, give up.
3124}
3125
3126std::optional<FixItList>
3127UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const {
3128 DeclUseList DREs = getClaimedVarUseSites();
3129
3130 if (DREs.size() != 1)
3131 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
3132 // give up
3133 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3134 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3135 FixItList Fixes;
3136 std::stringstream SS;
3137 StringRef varName = VD->getName();
3138 const ASTContext &Ctx = VD->getASTContext();
3139
3140 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
3141 SS << "(" << varName.data() << " = " << varName.data()
3142 << ".subspan(1)).data()";
3143 std::optional<SourceLocation> PreIncLocation =
3144 getEndCharLoc(Node, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3145 if (!PreIncLocation)
3146 return std::nullopt;
3147
3148 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3149 RemoveRange: SourceRange(Node->getBeginLoc(), *PreIncLocation), Code: SS.str()));
3150 return Fixes;
3151 }
3152 }
3153 return std::nullopt; // Not in the cases that we can handle for now, give up.
3154}
3155
3156// For a non-null initializer `Init` of `T *` type, this function returns
3157// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
3158// to output stream.
3159// In many cases, this function cannot figure out the actual extent `S`. It
3160// then will use a place holder to replace `S` to ask users to fill `S` in. The
3161// initializer shall be used to initialize a variable of type `std::span<T>`.
3162// In some cases (e. g. constant size array) the initializer should remain
3163// unchanged and the function returns empty list. In case the function can't
3164// provide the right fixit it will return nullopt.
3165//
3166// FIXME: Support multi-level pointers
3167//
3168// Parameters:
3169// `Init` a pointer to the initializer expression
3170// `Ctx` a reference to the ASTContext
3171static std::optional<FixItList>
3172FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
3173 const StringRef UserFillPlaceHolder) {
3174 const SourceManager &SM = Ctx.getSourceManager();
3175 const LangOptions &LangOpts = Ctx.getLangOpts();
3176
3177 // If `Init` has a constant value that is (or equivalent to) a
3178 // NULL pointer, we use the default constructor to initialize the span
3179 // object, i.e., a `std:span` variable declaration with no initializer.
3180 // So the fix-it is just to remove the initializer.
3181 if (Init->isNullPointerConstant(
3182 Ctx,
3183 // FIXME: Why does this function not ask for `const ASTContext
3184 // &`? It should. Maybe worth an NFC patch later.
3185 NPC: Expr::NullPointerConstantValueDependence::
3186 NPC_ValueDependentIsNotNull)) {
3187 std::optional<SourceLocation> InitLocation =
3188 getEndCharLoc(Node: Init, SM, LangOpts);
3189 if (!InitLocation)
3190 return std::nullopt;
3191
3192 SourceRange SR(Init->getBeginLoc(), *InitLocation);
3193
3194 return FixItList{FixItHint::CreateRemoval(RemoveRange: SR)};
3195 }
3196
3197 FixItList FixIts{};
3198 std::string ExtentText = UserFillPlaceHolder.data();
3199 StringRef One = "1";
3200
3201 // Insert `{` before `Init`:
3202 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: Init->getBeginLoc(), Code: "{"));
3203 // Try to get the data extent. Break into different cases:
3204 if (auto CxxNew = dyn_cast<CXXNewExpr>(Val: Init->IgnoreImpCasts())) {
3205 // In cases `Init` is `new T[n]` and there is no explicit cast over
3206 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
3207 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
3208 // simpler for the case where `Init` is `new T`.
3209 if (const Expr *Ext = CxxNew->getArraySize().value_or(u: nullptr)) {
3210 if (!Ext->HasSideEffects(Ctx)) {
3211 std::optional<StringRef> ExtentString = getExprText(E: Ext, SM, LangOpts);
3212 if (!ExtentString)
3213 return std::nullopt;
3214 ExtentText = *ExtentString;
3215 }
3216 } else if (!CxxNew->isArray())
3217 // Although the initializer is not allocating a buffer, the pointer
3218 // variable could still be used in buffer access operations.
3219 ExtentText = One;
3220 } else if (Ctx.getAsConstantArrayType(T: Init->IgnoreImpCasts()->getType())) {
3221 // std::span has a single parameter constructor for initialization with
3222 // constant size array. The size is auto-deduced as the constructor is a
3223 // function template. The correct fixit is empty - no changes should happen.
3224 return FixItList{};
3225 } else {
3226 // In cases `Init` is of the form `&Var` after stripping of implicit
3227 // casts, where `&` is the built-in operator, the extent is 1.
3228 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Val: Init->IgnoreImpCasts()))
3229 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
3230 isa_and_present<DeclRefExpr>(Val: AddrOfExpr->getSubExpr()))
3231 ExtentText = One;
3232 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
3233 // and explicit casting, etc. etc.
3234 }
3235
3236 SmallString<32> StrBuffer{};
3237 std::optional<SourceLocation> LocPassInit = getPastLoc(Node: Init, SM, LangOpts);
3238
3239 if (!LocPassInit)
3240 return std::nullopt;
3241
3242 StrBuffer.append(RHS: ", ");
3243 StrBuffer.append(RHS: ExtentText);
3244 StrBuffer.append(RHS: "}");
3245 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: *LocPassInit, Code: StrBuffer.str()));
3246 return FixIts;
3247}
3248
3249#ifndef NDEBUG
3250#define DEBUG_NOTE_DECL_FAIL(D, Msg) \
3251 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \
3252 "failed to produce fixit for declaration '" + \
3253 (D)->getNameAsString() + "'" + (Msg))
3254#else
3255#define DEBUG_NOTE_DECL_FAIL(D, Msg)
3256#endif
3257
3258// For the given variable declaration with a pointer-to-T type, returns the text
3259// `std::span<T>`. If it is unable to generate the text, returns
3260// `std::nullopt`.
3261static std::optional<std::string>
3262createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) {
3263 assert(VD->getType()->isPointerType());
3264
3265 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3266 std::optional<std::string> PteTyText = getPointeeTypeText(
3267 VD, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts(), QualifiersToAppend: &PteTyQualifiers);
3268
3269 if (!PteTyText)
3270 return std::nullopt;
3271
3272 std::string SpanTyText = "std::span<";
3273
3274 SpanTyText.append(str: *PteTyText);
3275 // Append qualifiers to span element type if any:
3276 if (PteTyQualifiers) {
3277 SpanTyText.append(s: " ");
3278 SpanTyText.append(str: PteTyQualifiers->getAsString());
3279 }
3280 SpanTyText.append(s: ">");
3281 return SpanTyText;
3282}
3283
3284// For a `VarDecl` of the form `T * var (= Init)?`, this
3285// function generates fix-its that
3286// 1) replace `T * var` with `std::span<T> var`; and
3287// 2) change `Init` accordingly to a span constructor, if it exists.
3288//
3289// FIXME: support Multi-level pointers
3290//
3291// Parameters:
3292// `D` a pointer the variable declaration node
3293// `Ctx` a reference to the ASTContext
3294// `UserFillPlaceHolder` the user-input placeholder text
3295// Returns:
3296// the non-empty fix-it list, if fix-its are successfuly generated; empty
3297// list otherwise.
3298static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
3299 const StringRef UserFillPlaceHolder,
3300 UnsafeBufferUsageHandler &Handler) {
3301 if (hasUnsupportedSpecifiers(VD: D, SM: Ctx.getSourceManager()))
3302 return {};
3303
3304 FixItList FixIts{};
3305 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: D, Ctx);
3306
3307 if (!SpanTyText) {
3308 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
3309 return {};
3310 }
3311
3312 // Will hold the text for `std::span<T> Ident`:
3313 std::stringstream SS;
3314
3315 SS << *SpanTyText;
3316 // Fix the initializer if it exists:
3317 if (const Expr *Init = D->getInit()) {
3318 std::optional<FixItList> InitFixIts =
3319 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
3320 if (!InitFixIts)
3321 return {};
3322 FixIts.insert(I: FixIts.end(), From: std::make_move_iterator(i: InitFixIts->begin()),
3323 To: std::make_move_iterator(i: InitFixIts->end()));
3324 }
3325 // For declaration of the form `T * ident = init;`, we want to replace
3326 // `T * ` with `std::span<T>`.
3327 // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
3328 // just `T *` with `std::span<T>`.
3329 const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc();
3330 if (!EndLocForReplacement.isValid()) {
3331 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
3332 return {};
3333 }
3334 // The only exception is that for `T *ident` we'll add a single space between
3335 // "std::span<T>" and "ident".
3336 // FIXME: The condition is false for identifiers expended from macros.
3337 if (EndLocForReplacement.getLocWithOffset(Offset: 1) == getVarDeclIdentifierLoc(VD: D))
3338 SS << " ";
3339
3340 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3341 RemoveRange: SourceRange(D->getBeginLoc(), EndLocForReplacement), Code: SS.str()));
3342 return FixIts;
3343}
3344
3345static bool hasConflictingOverload(const FunctionDecl *FD) {
3346 return !FD->getDeclContext()->lookup(Name: FD->getDeclName()).isSingleResult();
3347}
3348
3349// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
3350// types, this function produces fix-its to make the change self-contained. Let
3351// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
3352// entity defined by the `FunctionDecl` after the change to the parameters.
3353// Fix-its produced by this function are
3354// 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
3355// of 'F';
3356// 2. Create a declaration of "NewF" next to each declaration of `F`;
3357// 3. Create a definition of "F" (as its' original definition is now belongs
3358// to "NewF") next to its original definition. The body of the creating
3359// definition calls to "NewF".
3360//
3361// Example:
3362//
3363// void f(int *p); // original declaration
3364// void f(int *p) { // original definition
3365// p[5];
3366// }
3367//
3368// To change the parameter `p` to be of `std::span<int>` type, we
3369// also add overloads:
3370//
3371// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
3372// void f(std::span<int> p); // added overload decl
3373// void f(std::span<int> p) { // original def where param is changed
3374// p[5];
3375// }
3376// [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
3377// return f(std::span(p, <# size #>));
3378// }
3379//
3380static std::optional<FixItList>
3381createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
3382 const ASTContext &Ctx,
3383 UnsafeBufferUsageHandler &Handler) {
3384 // FIXME: need to make this conflict checking better:
3385 if (hasConflictingOverload(FD))
3386 return std::nullopt;
3387
3388 const SourceManager &SM = Ctx.getSourceManager();
3389 const LangOptions &LangOpts = Ctx.getLangOpts();
3390 const unsigned NumParms = FD->getNumParams();
3391 std::vector<std::string> NewTysTexts(NumParms);
3392 std::vector<bool> ParmsMask(NumParms, false);
3393 bool AtLeastOneParmToFix = false;
3394
3395 for (unsigned i = 0; i < NumParms; i++) {
3396 const ParmVarDecl *PVD = FD->getParamDecl(i);
3397
3398 if (S.lookup(VD: PVD) == FixitStrategy::Kind::Wontfix)
3399 continue;
3400 if (S.lookup(VD: PVD) != FixitStrategy::Kind::Span)
3401 // Not supported, not suppose to happen:
3402 return std::nullopt;
3403
3404 std::optional<Qualifiers> PteTyQuals = std::nullopt;
3405 std::optional<std::string> PteTyText =
3406 getPointeeTypeText(VD: PVD, SM, LangOpts, QualifiersToAppend: &PteTyQuals);
3407
3408 if (!PteTyText)
3409 // something wrong in obtaining the text of the pointee type, give up
3410 return std::nullopt;
3411 // FIXME: whether we should create std::span type depends on the
3412 // FixitStrategy.
3413 NewTysTexts[i] = getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQuals);
3414 ParmsMask[i] = true;
3415 AtLeastOneParmToFix = true;
3416 }
3417 if (!AtLeastOneParmToFix)
3418 // No need to create function overloads:
3419 return {};
3420 // FIXME Respect indentation of the original code.
3421
3422 // A lambda that creates the text representation of a function declaration
3423 // with the new type signatures:
3424 const auto NewOverloadSignatureCreator =
3425 [&SM, &LangOpts, &NewTysTexts,
3426 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3427 std::stringstream SS;
3428
3429 SS << ";";
3430 SS << getEndOfLine().str();
3431 // Append: ret-type func-name "("
3432 if (auto Prefix = getRangeText(
3433 SR: SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
3434 SM, LangOpts))
3435 SS << Prefix->str();
3436 else
3437 return std::nullopt; // give up
3438 // Append: parameter-type-list
3439 const unsigned NumParms = FD->getNumParams();
3440
3441 for (unsigned i = 0; i < NumParms; i++) {
3442 const ParmVarDecl *Parm = FD->getParamDecl(i);
3443
3444 if (Parm->isImplicit())
3445 continue;
3446 if (ParmsMask[i]) {
3447 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
3448 // new type:
3449 SS << NewTysTexts[i];
3450 // print parameter name if provided:
3451 if (IdentifierInfo *II = Parm->getIdentifier())
3452 SS << ' ' << II->getName().str();
3453 } else if (auto ParmTypeText =
3454 getRangeText(SR: getSourceRangeToTokenEnd(D: Parm, SM, LangOpts),
3455 SM, LangOpts)) {
3456 // print the whole `Parm` without modification:
3457 SS << ParmTypeText->str();
3458 } else
3459 return std::nullopt; // something wrong, give up
3460 if (i != NumParms - 1)
3461 SS << ", ";
3462 }
3463 SS << ")";
3464 return SS.str();
3465 };
3466
3467 // A lambda that creates the text representation of a function definition with
3468 // the original signature:
3469 const auto OldOverloadDefCreator =
3470 [&Handler, &SM, &LangOpts, &NewTysTexts,
3471 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3472 std::stringstream SS;
3473
3474 SS << getEndOfLine().str();
3475 // Append: attr-name ret-type func-name "(" param-list ")" "{"
3476 if (auto FDPrefix = getRangeText(
3477 SR: SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
3478 LangOpts))
3479 SS << Handler.getUnsafeBufferUsageAttributeTextAt(Loc: FD->getBeginLoc(), WSSuffix: " ")
3480 << FDPrefix->str() << "{";
3481 else
3482 return std::nullopt;
3483 // Append: "return" func-name "("
3484 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
3485 SS << "return " << FunQualName->str() << "(";
3486 else
3487 return std::nullopt;
3488
3489 // Append: arg-list
3490 const unsigned NumParms = FD->getNumParams();
3491 for (unsigned i = 0; i < NumParms; i++) {
3492 const ParmVarDecl *Parm = FD->getParamDecl(i);
3493
3494 if (Parm->isImplicit())
3495 continue;
3496 // FIXME: If a parameter has no name, it is unused in the
3497 // definition. So we could just leave it as it is.
3498 if (!Parm->getIdentifier())
3499 // If a parameter of a function definition has no name:
3500 return std::nullopt;
3501 if (ParmsMask[i])
3502 // This is our spanified paramter!
3503 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
3504 << ", " << getUserFillPlaceHolder(HintTextToUser: "size") << ")";
3505 else
3506 SS << Parm->getIdentifier()->getName().str();
3507 if (i != NumParms - 1)
3508 SS << ", ";
3509 }
3510 // finish call and the body
3511 SS << ");}" << getEndOfLine().str();
3512 // FIXME: 80-char line formatting?
3513 return SS.str();
3514 };
3515
3516 FixItList FixIts{};
3517 for (FunctionDecl *FReDecl : FD->redecls()) {
3518 std::optional<SourceLocation> Loc = getPastLoc(Node: FReDecl, SM, LangOpts);
3519
3520 if (!Loc)
3521 return {};
3522 if (FReDecl->isThisDeclarationADefinition()) {
3523 assert(FReDecl == FD && "inconsistent function definition");
3524 // Inserts a definition with the old signature to the end of
3525 // `FReDecl`:
3526 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
3527 FixIts.emplace_back(Args: FixItHint::CreateInsertion(InsertionLoc: *Loc, Code: *OldOverloadDef));
3528 else
3529 return {}; // give up
3530 } else {
3531 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
3532 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
3533 FixIts.emplace_back(Args: FixItHint::CreateInsertion(
3534 InsertionLoc: FReDecl->getBeginLoc(), Code: Handler.getUnsafeBufferUsageAttributeTextAt(
3535 Loc: FReDecl->getBeginLoc(), WSSuffix: " ")));
3536 }
3537 // Inserts a declaration with the new signature to the end of `FReDecl`:
3538 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
3539 FixIts.emplace_back(Args: FixItHint::CreateInsertion(InsertionLoc: *Loc, Code: *NewOverloadDecl));
3540 else
3541 return {};
3542 }
3543 }
3544 return FixIts;
3545}
3546
3547// To fix a `ParmVarDecl` to be of `std::span` type.
3548static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
3549 UnsafeBufferUsageHandler &Handler) {
3550 if (hasUnsupportedSpecifiers(VD: PVD, SM: Ctx.getSourceManager())) {
3551 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
3552 return {};
3553 }
3554 if (PVD->hasDefaultArg()) {
3555 // FIXME: generate fix-its for default values:
3556 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
3557 return {};
3558 }
3559
3560 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3561 std::optional<std::string> PteTyText = getPointeeTypeText(
3562 VD: PVD, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts(), QualifiersToAppend: &PteTyQualifiers);
3563
3564 if (!PteTyText) {
3565 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
3566 return {};
3567 }
3568
3569 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
3570
3571 if (!PVDNameText) {
3572 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
3573 return {};
3574 }
3575
3576 std::stringstream SS;
3577 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: PVD, Ctx);
3578
3579 if (PteTyQualifiers)
3580 // Append qualifiers if they exist:
3581 SS << getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQualifiers);
3582 else
3583 SS << getSpanTypeText(EltTyText: *PteTyText);
3584 // Append qualifiers to the type of the parameter:
3585 if (PVD->getType().hasQualifiers())
3586 SS << ' ' << PVD->getType().getQualifiers().getAsString();
3587 // Append parameter's name:
3588 SS << ' ' << PVDNameText->str();
3589 // Add replacement fix-it:
3590 return {FixItHint::CreateReplacement(RemoveRange: PVD->getSourceRange(), Code: SS.str())};
3591}
3592
3593static FixItList fixVariableWithSpan(const VarDecl *VD,
3594 const DeclUseTracker &Tracker,
3595 ASTContext &Ctx,
3596 UnsafeBufferUsageHandler &Handler) {
3597 const DeclStmt *DS = Tracker.lookupDecl(VD);
3598 if (!DS) {
3599 DEBUG_NOTE_DECL_FAIL(VD,
3600 " : variables declared this way not implemented yet");
3601 return {};
3602 }
3603 if (!DS->isSingleDecl()) {
3604 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3605 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
3606 return {};
3607 }
3608 // Currently DS is an unused variable but we'll need it when
3609 // non-single decls are implemented, where the pointee type name
3610 // and the '*' are spread around the place.
3611 (void)DS;
3612
3613 // FIXME: handle cases where DS has multiple declarations
3614 return fixLocalVarDeclWithSpan(D: VD, Ctx, UserFillPlaceHolder: getUserFillPlaceHolder(), Handler);
3615}
3616
3617static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
3618 UnsafeBufferUsageHandler &Handler) {
3619 FixItList FixIts{};
3620
3621 // Note: the code below expects the declaration to not use any type sugar like
3622 // typedef.
3623 if (auto CAT = Ctx.getAsConstantArrayType(T: D->getType())) {
3624 const QualType &ArrayEltT = CAT->getElementType();
3625 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
3626 // FIXME: support multi-dimensional arrays
3627 if (isa<clang::ArrayType>(Val: ArrayEltT.getCanonicalType()))
3628 return {};
3629
3630 const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(VD: D);
3631
3632 // Get the spelling of the element type as written in the source file
3633 // (including macros, etc.).
3634 auto MaybeElemTypeTxt =
3635 getRangeText(SR: {D->getBeginLoc(), IdentifierLoc}, SM: Ctx.getSourceManager(),
3636 LangOpts: Ctx.getLangOpts());
3637 if (!MaybeElemTypeTxt)
3638 return {};
3639 const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
3640
3641 // Find the '[' token.
3642 std::optional<Token> NextTok = Lexer::findNextToken(
3643 Loc: IdentifierLoc, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3644 while (NextTok && !NextTok->is(K: tok::l_square) &&
3645 NextTok->getLocation() <= D->getSourceRange().getEnd())
3646 NextTok = Lexer::findNextToken(Loc: NextTok->getLocation(),
3647 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3648 if (!NextTok)
3649 return {};
3650 const SourceLocation LSqBracketLoc = NextTok->getLocation();
3651
3652 // Get the spelling of the array size as written in the source file
3653 // (including macros, etc.).
3654 auto MaybeArraySizeTxt = getRangeText(
3655 SR: {LSqBracketLoc.getLocWithOffset(Offset: 1), D->getTypeSpecEndLoc()},
3656 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3657 if (!MaybeArraySizeTxt)
3658 return {};
3659 const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
3660 if (ArraySizeTxt.empty()) {
3661 // FIXME: Support array size getting determined from the initializer.
3662 // Examples:
3663 // int arr1[] = {0, 1, 2};
3664 // int arr2{3, 4, 5};
3665 // We might be able to preserve the non-specified size with `auto` and
3666 // `std::to_array`:
3667 // auto arr1 = std::to_array<int>({0, 1, 2});
3668 return {};
3669 }
3670
3671 std::optional<StringRef> IdentText =
3672 getVarDeclIdentifierText(VD: D, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3673
3674 if (!IdentText) {
3675 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
3676 return {};
3677 }
3678
3679 SmallString<32> Replacement;
3680 llvm::raw_svector_ostream OS(Replacement);
3681 OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
3682 << IdentText->str();
3683
3684 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3685 RemoveRange: SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, Code: OS.str()));
3686 }
3687
3688 return FixIts;
3689}
3690
3691static FixItList fixVariableWithArray(const VarDecl *VD,
3692 const DeclUseTracker &Tracker,
3693 const ASTContext &Ctx,
3694 UnsafeBufferUsageHandler &Handler) {
3695 const DeclStmt *DS = Tracker.lookupDecl(VD);
3696 assert(DS && "Fixing non-local variables not implemented yet!");
3697 if (!DS->isSingleDecl()) {
3698 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3699 return {};
3700 }
3701 // Currently DS is an unused variable but we'll need it when
3702 // non-single decls are implemented, where the pointee type name
3703 // and the '*' are spread around the place.
3704 (void)DS;
3705
3706 // FIXME: handle cases where DS has multiple declarations
3707 return fixVarDeclWithArray(D: VD, Ctx, Handler);
3708}
3709
3710// TODO: we should be consistent to use `std::nullopt` to represent no-fix due
3711// to any unexpected problem.
3712static FixItList
3713fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
3714 /* The function decl under analysis */ const Decl *D,
3715 const DeclUseTracker &Tracker, ASTContext &Ctx,
3716 UnsafeBufferUsageHandler &Handler) {
3717 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD)) {
3718 auto *FD = dyn_cast<clang::FunctionDecl>(Val: PVD->getDeclContext());
3719 if (!FD || FD != D) {
3720 // `FD != D` means that `PVD` belongs to a function that is not being
3721 // analyzed currently. Thus `FD` may not be complete.
3722 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
3723 return {};
3724 }
3725
3726 // TODO If function has a try block we can't change params unless we check
3727 // also its catch block for their use.
3728 // FIXME We might support static class methods, some select methods,
3729 // operators and possibly lamdas.
3730 if (FD->isMain() || FD->isConstexpr() ||
3731 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
3732 FD->isVariadic() ||
3733 // also covers call-operator of lamdas
3734 isa<CXXMethodDecl>(Val: FD) ||
3735 // skip when the function body is a try-block
3736 (FD->hasBody() && isa<CXXTryStmt>(Val: FD->getBody())) ||
3737 FD->isOverloadedOperator()) {
3738 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
3739 return {}; // TODO test all these cases
3740 }
3741 }
3742
3743 switch (K) {
3744 case FixitStrategy::Kind::Span: {
3745 if (VD->getType()->isPointerType()) {
3746 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD))
3747 return fixParamWithSpan(PVD, Ctx, Handler);
3748
3749 if (VD->isLocalVarDecl())
3750 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
3751 }
3752 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
3753 return {};
3754 }
3755 case FixitStrategy::Kind::Array: {
3756 if (VD->isLocalVarDecl() && Ctx.getAsConstantArrayType(T: VD->getType()))
3757 return fixVariableWithArray(VD, Tracker, Ctx, Handler);
3758
3759 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array");
3760 return {};
3761 }
3762 case FixitStrategy::Kind::Iterator:
3763 case FixitStrategy::Kind::Vector:
3764 llvm_unreachable("FixitStrategy not implemented yet!");
3765 case FixitStrategy::Kind::Wontfix:
3766 llvm_unreachable("Invalid strategy!");
3767 }
3768 llvm_unreachable("Unknown strategy!");
3769}
3770
3771// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
3772// `RemoveRange` of 'h' overlaps with a macro use.
3773static bool overlapWithMacro(const FixItList &FixIts) {
3774 // FIXME: For now we only check if the range (or the first token) is (part of)
3775 // a macro expansion. Ideally, we want to check for all tokens in the range.
3776 return llvm::any_of(Range: FixIts, P: [](const FixItHint &Hint) {
3777 auto Range = Hint.RemoveRange;
3778 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
3779 // If the range (or the first token) is (part of) a macro expansion:
3780 return true;
3781 return false;
3782 });
3783}
3784
3785// Returns true iff `VD` is a parameter of the declaration `D`:
3786static bool isParameterOf(const VarDecl *VD, const Decl *D) {
3787 return isa<ParmVarDecl>(Val: VD) &&
3788 VD->getDeclContext() == dyn_cast<DeclContext>(Val: D);
3789}
3790
3791// Erases variables in `FixItsForVariable`, if such a variable has an unfixable
3792// group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
3793// contain `v`.
3794static void eraseVarsForUnfixableGroupMates(
3795 std::map<const VarDecl *, FixItList> &FixItsForVariable,
3796 const VariableGroupsManager &VarGrpMgr) {
3797 // Variables will be removed from `FixItsForVariable`:
3798 SmallVector<const VarDecl *, 8> ToErase;
3799
3800 for (const auto &[VD, Ignore] : FixItsForVariable) {
3801 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(Var: VD);
3802 if (llvm::any_of(Range&: Grp,
3803 P: [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
3804 return !FixItsForVariable.count(x: GrpMember);
3805 })) {
3806 // At least one group member cannot be fixed, so we have to erase the
3807 // whole group:
3808 for (const VarDecl *Member : Grp)
3809 ToErase.push_back(Elt: Member);
3810 }
3811 }
3812 for (auto *VarToErase : ToErase)
3813 FixItsForVariable.erase(x: VarToErase);
3814}
3815
3816// Returns the fix-its that create bounds-safe function overloads for the
3817// function `D`, if `D`'s parameters will be changed to safe-types through
3818// fix-its in `FixItsForVariable`.
3819//
3820// NOTE: In case `D`'s parameters will be changed but bounds-safe function
3821// overloads cannot created, the whole group that contains the parameters will
3822// be erased from `FixItsForVariable`.
3823static FixItList createFunctionOverloadsForParms(
3824 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
3825 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
3826 const FixitStrategy &S, ASTContext &Ctx,
3827 UnsafeBufferUsageHandler &Handler) {
3828 FixItList FixItsSharedByParms{};
3829
3830 std::optional<FixItList> OverloadFixes =
3831 createOverloadsForFixedParams(S, FD, Ctx, Handler);
3832
3833 if (OverloadFixes) {
3834 FixItsSharedByParms.append(RHS: *OverloadFixes);
3835 } else {
3836 // Something wrong in generating `OverloadFixes`, need to remove the
3837 // whole group, where parameters are in, from `FixItsForVariable` (Note
3838 // that all parameters should be in the same group):
3839 for (auto *Member : VarGrpMgr.getGroupOfParms())
3840 FixItsForVariable.erase(x: Member);
3841 }
3842 return FixItsSharedByParms;
3843}
3844
3845// Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
3846static std::map<const VarDecl *, FixItList>
3847getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S,
3848 ASTContext &Ctx,
3849 /* The function decl under analysis */ const Decl *D,
3850 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
3851 const VariableGroupsManager &VarGrpMgr) {
3852 // `FixItsForVariable` will map each variable to a set of fix-its directly
3853 // associated to the variable itself. Fix-its of distinct variables in
3854 // `FixItsForVariable` are disjoint.
3855 std::map<const VarDecl *, FixItList> FixItsForVariable;
3856
3857 // Populate `FixItsForVariable` with fix-its directly associated with each
3858 // variable. Fix-its directly associated to a variable 'v' are the ones
3859 // produced by the `FixableGadget`s whose claimed variable is 'v'.
3860 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
3861 FixItsForVariable[VD] =
3862 fixVariable(VD, K: S.lookup(VD), D, Tracker, Ctx, Handler);
3863 // If we fail to produce Fix-It for the declaration we have to skip the
3864 // variable entirely.
3865 if (FixItsForVariable[VD].empty()) {
3866 FixItsForVariable.erase(x: VD);
3867 continue;
3868 }
3869 for (const auto &F : Fixables) {
3870 std::optional<FixItList> Fixits = F->getFixits(S);
3871
3872 if (Fixits) {
3873 FixItsForVariable[VD].insert(I: FixItsForVariable[VD].end(),
3874 From: Fixits->begin(), To: Fixits->end());
3875 continue;
3876 }
3877#ifndef NDEBUG
3878 Handler.addDebugNoteForVar(
3879 VD, F->getSourceLoc(),
3880 ("gadget '" + F->getDebugName() + "' refused to produce a fix")
3881 .str());
3882#endif
3883 FixItsForVariable.erase(x: VD);
3884 break;
3885 }
3886 }
3887
3888 // `FixItsForVariable` now contains only variables that can be
3889 // fixed. A variable can be fixed if its' declaration and all Fixables
3890 // associated to it can all be fixed.
3891
3892 // To further remove from `FixItsForVariable` variables whose group mates
3893 // cannot be fixed...
3894 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
3895 // Now `FixItsForVariable` gets further reduced: a variable is in
3896 // `FixItsForVariable` iff it can be fixed and all its group mates can be
3897 // fixed.
3898
3899 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
3900 // That is, when fixing multiple parameters in one step, these fix-its will
3901 // be applied only once (instead of being applied per parameter).
3902 FixItList FixItsSharedByParms{};
3903
3904 if (auto *FD = dyn_cast<FunctionDecl>(Val: D))
3905 FixItsSharedByParms = createFunctionOverloadsForParms(
3906 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
3907
3908 // The map that maps each variable `v` to fix-its for the whole group where
3909 // `v` is in:
3910 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
3911 FixItsForVariable};
3912
3913 for (auto &[Var, Ignore] : FixItsForVariable) {
3914 bool AnyParm = false;
3915 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, HasParm: &AnyParm);
3916
3917 for (const VarDecl *GrpMate : VarGroupForVD) {
3918 if (Var == GrpMate)
3919 continue;
3920 if (FixItsForVariable.count(x: GrpMate))
3921 FinalFixItsForVariable[Var].append(RHS: FixItsForVariable[GrpMate]);
3922 }
3923 if (AnyParm) {
3924 // This assertion should never fail. Otherwise we have a bug.
3925 assert(!FixItsSharedByParms.empty() &&
3926 "Should not try to fix a parameter that does not belong to a "
3927 "FunctionDecl");
3928 FinalFixItsForVariable[Var].append(RHS: FixItsSharedByParms);
3929 }
3930 }
3931 // Fix-its that will be applied in one step shall NOT:
3932 // 1. overlap with macros or/and templates; or
3933 // 2. conflict with each other.
3934 // Otherwise, the fix-its will be dropped.
3935 for (auto Iter = FinalFixItsForVariable.begin();
3936 Iter != FinalFixItsForVariable.end();)
3937 if (overlapWithMacro(FixIts: Iter->second) ||
3938 clang::internal::anyConflict(FixIts: Iter->second, SM: Ctx.getSourceManager())) {
3939 Iter = FinalFixItsForVariable.erase(position: Iter);
3940 } else
3941 Iter++;
3942 return FinalFixItsForVariable;
3943}
3944
3945template <typename VarDeclIterTy>
3946static FixitStrategy
3947getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
3948 FixitStrategy S;
3949 for (const VarDecl *VD : UnsafeVars) {
3950 if (isa<ConstantArrayType>(Val: VD->getType().getCanonicalType()))
3951 S.set(VD, K: FixitStrategy::Kind::Array);
3952 else
3953 S.set(VD, K: FixitStrategy::Kind::Span);
3954 }
3955 return S;
3956}
3957
3958// Manages variable groups:
3959class VariableGroupsManagerImpl : public VariableGroupsManager {
3960 const std::vector<VarGrpTy> Groups;
3961 const std::map<const VarDecl *, unsigned> &VarGrpMap;
3962 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
3963
3964public:
3965 VariableGroupsManagerImpl(
3966 const std::vector<VarGrpTy> &Groups,
3967 const std::map<const VarDecl *, unsigned> &VarGrpMap,
3968 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
3969 : Groups(Groups), VarGrpMap(VarGrpMap),
3970 GrpsUnionForParms(GrpsUnionForParms) {}
3971
3972 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
3973 if (GrpsUnionForParms.contains(key: Var)) {
3974 if (HasParm)
3975 *HasParm = true;
3976 return GrpsUnionForParms.getArrayRef();
3977 }
3978 if (HasParm)
3979 *HasParm = false;
3980
3981 auto It = VarGrpMap.find(x: Var);
3982
3983 if (It == VarGrpMap.end())
3984 return {};
3985 return Groups[It->second];
3986 }
3987
3988 VarGrpRef getGroupOfParms() const override {
3989 return GrpsUnionForParms.getArrayRef();
3990 }
3991};
3992
3993static void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets,
3994 WarningGadgetList WarningGadgets,
3995 DeclUseTracker Tracker,
3996 UnsafeBufferUsageHandler &Handler,
3997 bool EmitSuggestions) {
3998 if (!EmitSuggestions) {
3999 // Our job is very easy without suggestions. Just warn about
4000 // every problematic operation and consider it done. No need to deal
4001 // with fixable gadgets, no need to group operations by variable.
4002 for (const auto &G : WarningGadgets) {
4003 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
4004 Ctx&: D->getASTContext());
4005 }
4006
4007 // This return guarantees that most of the machine doesn't run when
4008 // suggestions aren't requested.
4009 assert(FixableGadgets.empty() &&
4010 "Fixable gadgets found but suggestions not requested!");
4011 return;
4012 }
4013
4014 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
4015 // function under the analysis. No need to fix any Fixables.
4016 if (!WarningGadgets.empty()) {
4017 // Gadgets "claim" variables they're responsible for. Once this loop
4018 // finishes, the tracker will only track DREs that weren't claimed by any
4019 // gadgets, i.e. not understood by the analysis.
4020 for (const auto &G : FixableGadgets) {
4021 for (const auto *DRE : G->getClaimedVarUseSites()) {
4022 Tracker.claimUse(DRE);
4023 }
4024 }
4025 }
4026
4027 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
4028 // function under the analysis. Thus, it early returns here as there is
4029 // nothing needs to be fixed.
4030 //
4031 // Note this claim is based on the assumption that there is no unsafe
4032 // variable whose declaration is invisible from the analyzing function.
4033 // Otherwise, we need to consider if the uses of those unsafe varuables needs
4034 // fix.
4035 // So far, we are not fixing any global variables or class members. And,
4036 // lambdas will be analyzed along with the enclosing function. So this early
4037 // return is correct for now.
4038 if (WarningGadgets.empty())
4039 return;
4040
4041 WarningGadgetSets UnsafeOps =
4042 groupWarningGadgetsByVar(AllUnsafeOperations: std::move(WarningGadgets));
4043 FixableGadgetSets FixablesForAllVars =
4044 groupFixablesByVar(AllFixableOperations: std::move(FixableGadgets));
4045
4046 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
4047
4048 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
4049 for (auto it = FixablesForAllVars.byVar.cbegin();
4050 it != FixablesForAllVars.byVar.cend();) {
4051 // FIXME: need to deal with global variables later
4052 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(Val: it->first))) {
4053#ifndef NDEBUG
4054 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4055 ("failed to produce fixit for '" +
4056 it->first->getNameAsString() +
4057 "' : neither local nor a parameter"));
4058#endif
4059 it = FixablesForAllVars.byVar.erase(position: it);
4060 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
4061#ifndef NDEBUG
4062 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4063 ("failed to produce fixit for '" +
4064 it->first->getNameAsString() +
4065 "' : has a reference type"));
4066#endif
4067 it = FixablesForAllVars.byVar.erase(position: it);
4068 } else if (Tracker.hasUnclaimedUses(VD: it->first)) {
4069 it = FixablesForAllVars.byVar.erase(position: it);
4070 } else if (it->first->isInitCapture()) {
4071#ifndef NDEBUG
4072 Handler.addDebugNoteForVar(it->first, it->first->getBeginLoc(),
4073 ("failed to produce fixit for '" +
4074 it->first->getNameAsString() +
4075 "' : init capture"));
4076#endif
4077 it = FixablesForAllVars.byVar.erase(position: it);
4078 } else {
4079 ++it;
4080 }
4081 }
4082
4083#ifndef NDEBUG
4084 for (const auto &it : UnsafeOps.byVar) {
4085 const VarDecl *const UnsafeVD = it.first;
4086 auto UnclaimedDREs = Tracker.getUnclaimedUses(UnsafeVD);
4087 if (UnclaimedDREs.empty())
4088 continue;
4089 const auto UnfixedVDName = UnsafeVD->getNameAsString();
4090 for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) {
4091 std::string UnclaimedUseTrace =
4092 getDREAncestorString(UnclaimedDRE, D->getASTContext());
4093
4094 Handler.addDebugNoteForVar(
4095 UnsafeVD, UnclaimedDRE->getBeginLoc(),
4096 ("failed to produce fixit for '" + UnfixedVDName +
4097 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
4098 UnclaimedUseTrace));
4099 }
4100 }
4101#endif
4102
4103 // Fixpoint iteration for pointer assignments
4104 using DepMapTy =
4105 llvm::DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
4106 DepMapTy DependenciesMap{};
4107 DepMapTy PtrAssignmentGraph{};
4108
4109 for (const auto &it : FixablesForAllVars.byVar) {
4110 for (const FixableGadget *fixable : it.second) {
4111 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
4112 fixable->getStrategyImplications();
4113 if (ImplPair) {
4114 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
4115 PtrAssignmentGraph[Impl.first].insert(X: Impl.second);
4116 }
4117 }
4118 }
4119
4120 /*
4121 The following code does a BFS traversal of the `PtrAssignmentGraph`
4122 considering all unsafe vars as starting nodes and constructs an undirected
4123 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
4124 elimiates all variables that are unreachable from any unsafe var. In other
4125 words, this removes all dependencies that don't include any unsafe variable
4126 and consequently don't need any fixit generation.
4127 Note: A careful reader would observe that the code traverses
4128 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
4129 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
4130 achieve the same result but the one used here dramatically cuts the
4131 amount of hoops the second part of the algorithm needs to jump, given that
4132 a lot of these connections become "direct". The reader is advised not to
4133 imagine how the graph is transformed because of using `Var` instead of
4134 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
4135 and think about why it's equivalent later.
4136 */
4137 std::set<const VarDecl *> VisitedVarsDirected{};
4138 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4139 if (VisitedVarsDirected.find(x: Var) == VisitedVarsDirected.end()) {
4140
4141 std::queue<const VarDecl *> QueueDirected{};
4142 QueueDirected.push(x: Var);
4143 while (!QueueDirected.empty()) {
4144 const VarDecl *CurrentVar = QueueDirected.front();
4145 QueueDirected.pop();
4146 VisitedVarsDirected.insert(x: CurrentVar);
4147 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
4148 for (const VarDecl *Adj : AdjacentNodes) {
4149 if (VisitedVarsDirected.find(x: Adj) == VisitedVarsDirected.end()) {
4150 QueueDirected.push(x: Adj);
4151 }
4152 DependenciesMap[Var].insert(X: Adj);
4153 DependenciesMap[Adj].insert(X: Var);
4154 }
4155 }
4156 }
4157 }
4158
4159 // `Groups` stores the set of Connected Components in the graph.
4160 std::vector<VarGrpTy> Groups;
4161 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
4162 // variables belong to. Group indexes refer to the elements in `Groups`.
4163 // `VarGrpMap` is complete in that every variable that needs fix is in it.
4164 std::map<const VarDecl *, unsigned> VarGrpMap;
4165 // The union group over the ones in "Groups" that contain parameters of `D`:
4166 llvm::SetVector<const VarDecl *>
4167 GrpsUnionForParms; // these variables need to be fixed in one step
4168
4169 // Group Connected Components for Unsafe Vars
4170 // (Dependencies based on pointer assignments)
4171 std::set<const VarDecl *> VisitedVars{};
4172 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4173 if (VisitedVars.find(x: Var) == VisitedVars.end()) {
4174 VarGrpTy &VarGroup = Groups.emplace_back();
4175 std::queue<const VarDecl *> Queue{};
4176
4177 Queue.push(x: Var);
4178 while (!Queue.empty()) {
4179 const VarDecl *CurrentVar = Queue.front();
4180 Queue.pop();
4181 VisitedVars.insert(x: CurrentVar);
4182 VarGroup.push_back(x: CurrentVar);
4183 auto AdjacentNodes = DependenciesMap[CurrentVar];
4184 for (const VarDecl *Adj : AdjacentNodes) {
4185 if (VisitedVars.find(x: Adj) == VisitedVars.end()) {
4186 Queue.push(x: Adj);
4187 }
4188 }
4189 }
4190
4191 bool HasParm = false;
4192 unsigned GrpIdx = Groups.size() - 1;
4193
4194 for (const VarDecl *V : VarGroup) {
4195 VarGrpMap[V] = GrpIdx;
4196 if (!HasParm && isParameterOf(VD: V, D))
4197 HasParm = true;
4198 }
4199 if (HasParm)
4200 GrpsUnionForParms.insert_range(R&: VarGroup);
4201 }
4202 }
4203
4204 // Remove a `FixableGadget` if the associated variable is not in the graph
4205 // computed above. We do not want to generate fix-its for such variables,
4206 // since they are neither warned nor reachable from a warned one.
4207 //
4208 // Note a variable is not warned if it is not directly used in any unsafe
4209 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
4210 // does not exist another variable `u` such that `u` is warned and fixing `u`
4211 // (transitively) implicates fixing `v`.
4212 //
4213 // For example,
4214 // ```
4215 // void f(int * p) {
4216 // int * a = p; *p = 0;
4217 // }
4218 // ```
4219 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
4220 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
4221 // the function above, `p` becomes reachable from a warned variable.
4222 for (auto I = FixablesForAllVars.byVar.begin();
4223 I != FixablesForAllVars.byVar.end();) {
4224 // Note `VisitedVars` contain all the variables in the graph:
4225 if (!VisitedVars.count(x: (*I).first)) {
4226 // no such var in graph:
4227 I = FixablesForAllVars.byVar.erase(position: I);
4228 } else
4229 ++I;
4230 }
4231
4232 // We assign strategies to variables that are 1) in the graph and 2) can be
4233 // fixed. Other variables have the default "Won't fix" strategy.
4234 FixitStrategy NaiveStrategy = getNaiveStrategy(UnsafeVars: llvm::make_filter_range(
4235 Range&: VisitedVars, Pred: [&FixablesForAllVars](const VarDecl *V) {
4236 // If a warned variable has no "Fixable", it is considered unfixable:
4237 return FixablesForAllVars.byVar.count(x: V);
4238 }));
4239 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
4240
4241 if (isa<NamedDecl>(Val: D))
4242 // The only case where `D` is not a `NamedDecl` is when `D` is a
4243 // `BlockDecl`. Let's not fix variables in blocks for now
4244 FixItsForVariableGroup =
4245 getFixIts(FixablesForAllVars, S: NaiveStrategy, Ctx&: D->getASTContext(), D,
4246 Tracker, Handler, VarGrpMgr);
4247
4248 for (const auto &G : UnsafeOps.noVar) {
4249 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
4250 Ctx&: D->getASTContext());
4251 }
4252
4253 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
4254 auto FixItsIt = FixItsForVariableGroup.find(x: VD);
4255 Handler.handleUnsafeVariableGroup(Variable: VD, VarGrpMgr,
4256 Fixes: FixItsIt != FixItsForVariableGroup.end()
4257 ? std::move(FixItsIt->second)
4258 : FixItList{},
4259 D, VarTargetTypes: NaiveStrategy);
4260 for (const auto &G : WarningGadgets) {
4261 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true,
4262 Ctx&: D->getASTContext());
4263 }
4264 }
4265}
4266
4267void clang::checkUnsafeBufferUsage(const Decl *D,
4268 UnsafeBufferUsageHandler &Handler,
4269 bool EmitSuggestions) {
4270#ifndef NDEBUG
4271 Handler.clearDebugNotes();
4272#endif
4273
4274 assert(D);
4275
4276 SmallVector<Stmt *> Stmts;
4277
4278 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D)) {
4279 // We do not want to visit a Lambda expression defined inside a method
4280 // independently. Instead, it should be visited along with the outer method.
4281 // FIXME: do we want to do the same thing for `BlockDecl`s?
4282 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: D)) {
4283 if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass())
4284 return;
4285 }
4286
4287 for (FunctionDecl *FReDecl : FD->redecls()) {
4288 if (FReDecl->isExternC()) {
4289 // Do not emit fixit suggestions for functions declared in an
4290 // extern "C" block.
4291 EmitSuggestions = false;
4292 break;
4293 }
4294 }
4295
4296 Stmts.push_back(Elt: FD->getBody());
4297
4298 if (const auto *ID = dyn_cast<CXXConstructorDecl>(Val: D)) {
4299 for (const CXXCtorInitializer *CI : ID->inits()) {
4300 Stmts.push_back(Elt: CI->getInit());
4301 }
4302 }
4303 } else if (isa<BlockDecl>(Val: D) || isa<ObjCMethodDecl>(Val: D)) {
4304 Stmts.push_back(Elt: D->getBody());
4305 }
4306
4307 assert(!Stmts.empty());
4308
4309 FixableGadgetList FixableGadgets;
4310 WarningGadgetList WarningGadgets;
4311 DeclUseTracker Tracker;
4312 for (Stmt *S : Stmts) {
4313 findGadgets(S, Ctx&: D->getASTContext(), Handler, EmitSuggestions, FixableGadgets,
4314 WarningGadgets, Tracker);
4315 }
4316 applyGadgets(D, FixableGadgets: std::move(FixableGadgets), WarningGadgets: std::move(WarningGadgets),
4317 Tracker: std::move(Tracker), Handler, EmitSuggestions);
4318}
4319

source code of clang/lib/Analysis/UnsafeBufferUsage.cpp