1//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10#include "clang/AST/APValue.h"
11#include "clang/AST/ASTContext.h"
12#include "clang/AST/ASTTypeTraits.h"
13#include "clang/AST/Attr.h"
14#include "clang/AST/Decl.h"
15#include "clang/AST/DeclCXX.h"
16#include "clang/AST/DynamicRecursiveASTVisitor.h"
17#include "clang/AST/Expr.h"
18#include "clang/AST/FormatString.h"
19#include "clang/AST/ParentMapContext.h"
20#include "clang/AST/Stmt.h"
21#include "clang/AST/StmtVisitor.h"
22#include "clang/AST/Type.h"
23#include "clang/ASTMatchers/LowLevelHelpers.h"
24#include "clang/Analysis/Support/FixitUtil.h"
25#include "clang/Basic/SourceLocation.h"
26#include "clang/Lex/Lexer.h"
27#include "clang/Lex/Preprocessor.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/STLFunctionalExtras.h"
30#include "llvm/ADT/SmallSet.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringRef.h"
33#include "llvm/Support/Casting.h"
34#include <cstddef>
35#include <optional>
36#include <queue>
37#include <set>
38#include <sstream>
39
40using namespace clang;
41
42#ifndef NDEBUG
43namespace {
44class StmtDebugPrinter
45 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
46public:
47 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
48
49 std::string VisitBinaryOperator(const BinaryOperator *BO) {
50 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
51 }
52
53 std::string VisitUnaryOperator(const UnaryOperator *UO) {
54 return "UnaryOperator(" + UO->getOpcodeStr(Op: UO->getOpcode()).str() + ")";
55 }
56
57 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
58 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
59 }
60};
61
62// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
63// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
64static std::string getDREAncestorString(const DeclRefExpr *DRE,
65 ASTContext &Ctx) {
66 std::stringstream SS;
67 const Stmt *St = DRE;
68 StmtDebugPrinter StmtPriner;
69
70 do {
71 SS << StmtPriner.Visit(St);
72
73 DynTypedNodeList StParents = Ctx.getParents(Node: *St);
74
75 if (StParents.size() > 1)
76 return "unavailable due to multiple parents";
77 if (StParents.empty())
78 break;
79 St = StParents.begin()->get<Stmt>();
80 if (St)
81 SS << " ==> ";
82 } while (St);
83 return SS.str();
84}
85
86} // namespace
87#endif /* NDEBUG */
88
89namespace {
90// Using a custom `FastMatcher` instead of ASTMatchers to achieve better
91// performance. FastMatcher uses simple function `matches` to find if a node
92// is a match, avoiding the dependency on the ASTMatchers framework which
93// provide a nice abstraction, but incur big performance costs.
94class FastMatcher {
95public:
96 virtual bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
97 const UnsafeBufferUsageHandler &Handler) = 0;
98 virtual ~FastMatcher() = default;
99};
100
101class MatchResult {
102
103public:
104 template <typename T> const T *getNodeAs(StringRef ID) const {
105 auto It = Nodes.find(Key: ID);
106 if (It == Nodes.end()) {
107 return nullptr;
108 }
109 return It->second.get<T>();
110 }
111
112 void addNode(StringRef ID, const DynTypedNode &Node) { Nodes[ID] = Node; }
113
114private:
115 llvm::StringMap<DynTypedNode> Nodes;
116};
117} // namespace
118
119// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
120// except for those belonging to a different callable of "n".
121class MatchDescendantVisitor : public DynamicRecursiveASTVisitor {
122public:
123 // Creates an AST visitor that matches `Matcher` on all
124 // descendants of a given node "n" except for the ones
125 // belonging to a different callable of "n".
126 MatchDescendantVisitor(ASTContext &Context, FastMatcher &Matcher,
127 bool FindAll, bool ignoreUnevaluatedContext,
128 const UnsafeBufferUsageHandler &NewHandler)
129 : Matcher(&Matcher), FindAll(FindAll), Matches(false),
130 ignoreUnevaluatedContext(ignoreUnevaluatedContext),
131 ActiveASTContext(&Context), Handler(&NewHandler) {
132 ShouldVisitTemplateInstantiations = true;
133 ShouldVisitImplicitCode = false; // TODO: let's ignore implicit code for now
134 }
135
136 // Returns true if a match is found in a subtree of `DynNode`, which belongs
137 // to the same callable of `DynNode`.
138 bool findMatch(const DynTypedNode &DynNode) {
139 Matches = false;
140 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
141 TraverseStmt(Node: const_cast<Stmt *>(StmtNode));
142 return Matches;
143 }
144 return false;
145 }
146
147 // The following are overriding methods from the base visitor class.
148 // They are public only to allow CRTP to work. They are *not *part
149 // of the public API of this class.
150
151 // For the matchers so far used in safe buffers, we only need to match
152 // `Stmt`s. To override more as needed.
153
154 bool TraverseDecl(Decl *Node) override {
155 if (!Node)
156 return true;
157 if (!match(Node: *Node))
158 return false;
159 // To skip callables:
160 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Val: Node))
161 return true;
162 // Traverse descendants
163 return DynamicRecursiveASTVisitor::TraverseDecl(Node);
164 }
165
166 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) override {
167 // These are unevaluated, except the result expression.
168 if (ignoreUnevaluatedContext)
169 return TraverseStmt(Node->getResultExpr());
170 return DynamicRecursiveASTVisitor::TraverseGenericSelectionExpr(Node);
171 }
172
173 bool
174 TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) override {
175 // Unevaluated context.
176 if (ignoreUnevaluatedContext)
177 return true;
178 return DynamicRecursiveASTVisitor::TraverseUnaryExprOrTypeTraitExpr(Node);
179 }
180
181 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) override {
182 // Unevaluated context.
183 if (ignoreUnevaluatedContext)
184 return true;
185 return DynamicRecursiveASTVisitor::TraverseTypeOfExprTypeLoc(Node);
186 }
187
188 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) override {
189 // Unevaluated context.
190 if (ignoreUnevaluatedContext)
191 return true;
192 return DynamicRecursiveASTVisitor::TraverseDecltypeTypeLoc(Node);
193 }
194
195 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) override {
196 // Unevaluated context.
197 if (ignoreUnevaluatedContext)
198 return true;
199 return DynamicRecursiveASTVisitor::TraverseCXXNoexceptExpr(Node);
200 }
201
202 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) override {
203 // Unevaluated context.
204 if (ignoreUnevaluatedContext)
205 return true;
206 return DynamicRecursiveASTVisitor::TraverseCXXTypeidExpr(Node);
207 }
208
209 bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) override {
210 if (!TraverseStmt(Node->getExpr()))
211 return false;
212 return DynamicRecursiveASTVisitor::TraverseCXXDefaultInitExpr(Node);
213 }
214
215 bool TraverseStmt(Stmt *Node) override {
216 if (!Node)
217 return true;
218 if (!match(Node: *Node))
219 return false;
220 return DynamicRecursiveASTVisitor::TraverseStmt(Node);
221 }
222
223private:
224 // Sets 'Matched' to true if 'Matcher' matches 'Node'
225 //
226 // Returns 'true' if traversal should continue after this function
227 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
228 template <typename T> bool match(const T &Node) {
229 if (Matcher->matches(DynNode: DynTypedNode::create(Node), Ctx&: *ActiveASTContext,
230 Handler: *Handler)) {
231 Matches = true;
232 if (!FindAll)
233 return false; // Abort as soon as a match is found.
234 }
235 return true;
236 }
237
238 FastMatcher *const Matcher;
239 // When true, finds all matches. When false, finds the first match and stops.
240 const bool FindAll;
241 bool Matches;
242 bool ignoreUnevaluatedContext;
243 ASTContext *ActiveASTContext;
244 const UnsafeBufferUsageHandler *Handler;
245};
246
247// Because we're dealing with raw pointers, let's define what we mean by that.
248static bool hasPointerType(const Expr &E) {
249 return isa<PointerType>(Val: E.getType().getCanonicalType());
250}
251
252static bool hasArrayType(const Expr &E) {
253 return isa<ArrayType>(Val: E.getType().getCanonicalType());
254}
255
256static void
257forEachDescendantEvaluatedStmt(const Stmt *S, ASTContext &Ctx,
258 const UnsafeBufferUsageHandler &Handler,
259 FastMatcher &Matcher) {
260 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
261 /*ignoreUnevaluatedContext=*/true, Handler);
262 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
263}
264
265static void forEachDescendantStmt(const Stmt *S, ASTContext &Ctx,
266 const UnsafeBufferUsageHandler &Handler,
267 FastMatcher &Matcher) {
268 MatchDescendantVisitor Visitor(Ctx, Matcher, /*FindAll=*/true,
269 /*ignoreUnevaluatedContext=*/false, Handler);
270 Visitor.findMatch(DynNode: DynTypedNode::create(Node: *S));
271}
272
273// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
274static bool notInSafeBufferOptOut(const Stmt &Node,
275 const UnsafeBufferUsageHandler *Handler) {
276 return !Handler->isSafeBufferOptOut(Loc: Node.getBeginLoc());
277}
278
279static bool
280ignoreUnsafeBufferInContainer(const Stmt &Node,
281 const UnsafeBufferUsageHandler *Handler) {
282 return Handler->ignoreUnsafeBufferInContainer(Loc: Node.getBeginLoc());
283}
284
285static bool ignoreUnsafeLibcCall(const ASTContext &Ctx, const Stmt &Node,
286 const UnsafeBufferUsageHandler *Handler) {
287 if (Ctx.getLangOpts().CPlusPlus)
288 return Handler->ignoreUnsafeBufferInLibcCall(Loc: Node.getBeginLoc());
289 return true; /* Only warn about libc calls for C++ */
290}
291
292// Finds any expression 'e' such that `OnResult`
293// matches 'e' and 'e' is in an Unspecified Lvalue Context.
294static void findStmtsInUnspecifiedLvalueContext(
295 const Stmt *S, const llvm::function_ref<void(const Expr *)> OnResult) {
296 if (const auto *CE = dyn_cast<ImplicitCastExpr>(Val: S);
297 CE && CE->getCastKind() == CastKind::CK_LValueToRValue)
298 OnResult(CE->getSubExpr());
299 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
300 BO && BO->getOpcode() == BO_Assign)
301 OnResult(BO->getLHS());
302}
303
304// Finds any expression `e` such that `InnerMatcher` matches `e` and
305// `e` is in an Unspecified Pointer Context (UPC).
306static void findStmtsInUnspecifiedPointerContext(
307 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
308 // A UPC can be
309 // 1. an argument of a function call (except the callee has [[unsafe_...]]
310 // attribute), or
311 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
312 // 3. the operand of a comparator operation; or
313 // 4. the operand of a pointer subtraction operation
314 // (i.e., computing the distance between two pointers); or ...
315
316 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
317 if (const auto *FnDecl = CE->getDirectCallee();
318 FnDecl && FnDecl->hasAttr<UnsafeBufferUsageAttr>())
319 return;
320 ast_matchers::matchEachArgumentWithParamType(
321 Node: *CE, OnParamAndArg: [&InnerMatcher](QualType Type, const Expr *Arg) {
322 if (Type->isAnyPointerType())
323 InnerMatcher(Arg);
324 });
325 }
326
327 if (auto *CE = dyn_cast<CastExpr>(Val: S)) {
328 if (CE->getCastKind() != CastKind::CK_PointerToIntegral &&
329 CE->getCastKind() != CastKind::CK_PointerToBoolean)
330 return;
331 if (!hasPointerType(E: *CE->getSubExpr()))
332 return;
333 InnerMatcher(CE->getSubExpr());
334 }
335
336 // Pointer comparison operator.
337 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
338 BO && (BO->getOpcode() == BO_EQ || BO->getOpcode() == BO_NE ||
339 BO->getOpcode() == BO_LT || BO->getOpcode() == BO_LE ||
340 BO->getOpcode() == BO_GT || BO->getOpcode() == BO_GE)) {
341 auto *LHS = BO->getLHS();
342 if (hasPointerType(E: *LHS))
343 InnerMatcher(LHS);
344
345 auto *RHS = BO->getRHS();
346 if (hasPointerType(E: *RHS))
347 InnerMatcher(RHS);
348 }
349
350 // Pointer subtractions.
351 if (const auto *BO = dyn_cast<BinaryOperator>(Val: S);
352 BO && BO->getOpcode() == BO_Sub && hasPointerType(E: *BO->getLHS()) &&
353 hasPointerType(E: *BO->getRHS())) {
354 // Note that here we need both LHS and RHS to be
355 // pointer. Then the inner matcher can match any of
356 // them:
357 InnerMatcher(BO->getLHS());
358 InnerMatcher(BO->getRHS());
359 }
360 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now
361 // we don't have to check that.)
362}
363
364// Finds statements in unspecified untyped context i.e. any expression 'e' such
365// that `InnerMatcher` matches 'e' and 'e' is in an unspecified untyped context
366// (i.e the expression 'e' isn't evaluated to an RValue). For example, consider
367// the following code:
368// int *p = new int[4];
369// int *q = new int[4];
370// if ((p = q)) {}
371// p = q;
372// The expression `p = q` in the conditional of the `if` statement
373// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
374// in the assignment statement is in an untyped context.
375static void findStmtsInUnspecifiedUntypedContext(
376 const Stmt *S, llvm::function_ref<void(const Stmt *)> InnerMatcher) {
377 // An unspecified context can be
378 // 1. A compound statement,
379 // 2. The body of an if statement
380 // 3. Body of a loop
381 if (auto *CS = dyn_cast<CompoundStmt>(Val: S)) {
382 for (auto *Child : CS->body())
383 InnerMatcher(Child);
384 }
385 if (auto *IfS = dyn_cast<IfStmt>(Val: S)) {
386 if (IfS->getThen())
387 InnerMatcher(IfS->getThen());
388 if (IfS->getElse())
389 InnerMatcher(IfS->getElse());
390 }
391 // FIXME: Handle loop bodies.
392}
393
394// Returns true iff integer E1 is equivalent to integer E2.
395//
396// For now we only support such expressions:
397// expr := DRE | const-value | expr BO expr
398// BO := '*' | '+'
399//
400// FIXME: We can reuse the expression comparator of the interop analysis after
401// it has been upstreamed.
402static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx);
403static bool areEqualIntegralBinaryOperators(const BinaryOperator *E1,
404 const Expr *E2_LHS,
405 BinaryOperatorKind BOP,
406 const Expr *E2_RHS,
407 ASTContext &Ctx) {
408 if (E1->getOpcode() == BOP) {
409 switch (BOP) {
410 // Commutative operators:
411 case BO_Mul:
412 case BO_Add:
413 return (areEqualIntegers(E1: E1->getLHS(), E2: E2_LHS, Ctx) &&
414 areEqualIntegers(E1: E1->getRHS(), E2: E2_RHS, Ctx)) ||
415 (areEqualIntegers(E1: E1->getLHS(), E2: E2_RHS, Ctx) &&
416 areEqualIntegers(E1: E1->getRHS(), E2: E2_LHS, Ctx));
417 default:
418 return false;
419 }
420 }
421 return false;
422}
423
424static bool areEqualIntegers(const Expr *E1, const Expr *E2, ASTContext &Ctx) {
425 E1 = E1->IgnoreParenImpCasts();
426 E2 = E2->IgnoreParenImpCasts();
427 if (!E1->getType()->isIntegerType() || E1->getType() != E2->getType())
428 return false;
429
430 Expr::EvalResult ER1, ER2;
431
432 // If both are constants:
433 if (E1->EvaluateAsInt(Result&: ER1, Ctx) && E2->EvaluateAsInt(Result&: ER2, Ctx))
434 return ER1.Val.getInt() == ER2.Val.getInt();
435
436 // Otherwise, they should have identical stmt kind:
437 if (E1->getStmtClass() != E2->getStmtClass())
438 return false;
439 switch (E1->getStmtClass()) {
440 case Stmt::DeclRefExprClass:
441 return cast<DeclRefExpr>(Val: E1)->getDecl() == cast<DeclRefExpr>(Val: E2)->getDecl();
442 case Stmt::BinaryOperatorClass: {
443 auto BO2 = cast<BinaryOperator>(Val: E2);
444 return areEqualIntegralBinaryOperators(E1: cast<BinaryOperator>(Val: E1),
445 E2_LHS: BO2->getLHS(), BOP: BO2->getOpcode(),
446 E2_RHS: BO2->getRHS(), Ctx);
447 }
448 default:
449 return false;
450 }
451}
452
453// Given a two-param std::span construct call, matches iff the call has the
454// following forms:
455// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
456// 2. `std::span<T>{new T, 1}`
457// 3. `std::span<T>{&var, 1}` or `std::span<T>{std::addressof(...), 1}`
458// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
459// `n`
460// 5. `std::span<T>{any, 0}`
461// 6. `std::span<T>{ (char *)f(args), args[N] * arg*[M]}`, where
462// `f` is a function with attribute `alloc_size(N, M)`;
463// `args` represents the list of arguments;
464// `N, M` are parameter indexes to the allocating element number and size.
465// Sometimes, there is only one parameter index representing the total
466// size.
467static bool isSafeSpanTwoParamConstruct(const CXXConstructExpr &Node,
468 ASTContext &Ctx) {
469 assert(Node.getNumArgs() == 2 &&
470 "expecting a two-parameter std::span constructor");
471 const Expr *Arg0 = Node.getArg(Arg: 0)->IgnoreParenImpCasts();
472 const Expr *Arg1 = Node.getArg(Arg: 1)->IgnoreParenImpCasts();
473 auto HaveEqualConstantValues = [&Ctx](const Expr *E0, const Expr *E1) {
474 if (auto E0CV = E0->getIntegerConstantExpr(Ctx))
475 if (auto E1CV = E1->getIntegerConstantExpr(Ctx)) {
476 return llvm::APSInt::compareValues(I1: *E0CV, I2: *E1CV) == 0;
477 }
478 return false;
479 };
480 auto AreSameDRE = [](const Expr *E0, const Expr *E1) {
481 if (auto *DRE0 = dyn_cast<DeclRefExpr>(Val: E0))
482 if (auto *DRE1 = dyn_cast<DeclRefExpr>(Val: E1)) {
483 return DRE0->getDecl() == DRE1->getDecl();
484 }
485 return false;
486 };
487 std::optional<llvm::APSInt> Arg1CV = Arg1->getIntegerConstantExpr(Ctx);
488
489 if (Arg1CV && Arg1CV->isZero())
490 // Check form 5:
491 return true;
492
493 // Check forms 1-3:
494 switch (Arg0->getStmtClass()) {
495 case Stmt::CXXNewExprClass:
496 if (auto Size = cast<CXXNewExpr>(Val: Arg0)->getArraySize()) {
497 // Check form 1:
498 return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) ||
499 HaveEqualConstantValues(*Size, Arg1);
500 }
501 // TODO: what's placeholder type? avoid it for now.
502 if (!cast<CXXNewExpr>(Val: Arg0)->hasPlaceholderType()) {
503 // Check form 2:
504 return Arg1CV && Arg1CV->isOne();
505 }
506 break;
507 case Stmt::UnaryOperatorClass:
508 if (cast<UnaryOperator>(Val: Arg0)->getOpcode() ==
509 UnaryOperator::Opcode::UO_AddrOf)
510 // Check form 3:
511 return Arg1CV && Arg1CV->isOne();
512 break;
513 case Stmt::CallExprClass:
514 // Check form 3:
515 if (const auto *CE = dyn_cast<CallExpr>(Val: Arg0)) {
516 const auto FnDecl = CE->getDirectCallee();
517 if (FnDecl && FnDecl->getNameAsString() == "addressof" &&
518 FnDecl->isInStdNamespace()) {
519 return Arg1CV && Arg1CV->isOne();
520 }
521 }
522 break;
523 default:
524 break;
525 }
526
527 QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
528
529 if (auto *ConstArrTy = Ctx.getAsConstantArrayType(T: Arg0Ty)) {
530 const llvm::APSInt ConstArrSize = llvm::APSInt(ConstArrTy->getSize());
531
532 // Check form 4:
533 return Arg1CV && llvm::APSInt::compareValues(I1: ConstArrSize, I2: *Arg1CV) == 0;
534 }
535 // Check form 6:
536 if (auto CCast = dyn_cast<CStyleCastExpr>(Val: Arg0)) {
537 if (!CCast->getType()->isPointerType())
538 return false;
539
540 QualType PteTy = CCast->getType()->getPointeeType();
541
542 if (!(PteTy->isConstantSizeType() && Ctx.getTypeSizeInChars(T: PteTy).isOne()))
543 return false;
544
545 if (const auto *Call = dyn_cast<CallExpr>(CCast->getSubExpr())) {
546 if (const FunctionDecl *FD = Call->getDirectCallee())
547 if (auto *AllocAttr = FD->getAttr<AllocSizeAttr>()) {
548 const Expr *EleSizeExpr =
549 Call->getArg(AllocAttr->getElemSizeParam().getASTIndex());
550 // NumElemIdx is invalid if AllocSizeAttr has 1 argument:
551 ParamIdx NumElemIdx = AllocAttr->getNumElemsParam();
552
553 if (!NumElemIdx.isValid())
554 return areEqualIntegers(E1: Arg1, E2: EleSizeExpr, Ctx);
555
556 const Expr *NumElesExpr = Call->getArg(NumElemIdx.getASTIndex());
557
558 if (auto BO = dyn_cast<BinaryOperator>(Val: Arg1))
559 return areEqualIntegralBinaryOperators(E1: BO, E2_LHS: NumElesExpr, BOP: BO_Mul,
560 E2_RHS: EleSizeExpr, Ctx);
561 }
562 }
563 }
564 return false;
565}
566
567static bool isSafeArraySubscript(const ArraySubscriptExpr &Node,
568 const ASTContext &Ctx) {
569 // FIXME: Proper solution:
570 // - refactor Sema::CheckArrayAccess
571 // - split safe/OOB/unknown decision logic from diagnostics emitting code
572 // - e. g. "Try harder to find a NamedDecl to point at in the note."
573 // already duplicated
574 // - call both from Sema and from here
575
576 uint64_t limit;
577 if (const auto *CATy =
578 dyn_cast<ConstantArrayType>(Val: Node.getBase()
579 ->IgnoreParenImpCasts()
580 ->getType()
581 ->getUnqualifiedDesugaredType())) {
582 limit = CATy->getLimitedSize();
583 } else if (const auto *SLiteral = dyn_cast<clang::StringLiteral>(
584 Val: Node.getBase()->IgnoreParenImpCasts())) {
585 limit = SLiteral->getLength() + 1;
586 } else {
587 return false;
588 }
589
590 Expr::EvalResult EVResult;
591 const Expr *IndexExpr = Node.getIdx();
592 if (!IndexExpr->isValueDependent() &&
593 IndexExpr->EvaluateAsInt(Result&: EVResult, Ctx)) {
594 llvm::APSInt ArrIdx = EVResult.Val.getInt();
595 // FIXME: ArrIdx.isNegative() we could immediately emit an error as that's a
596 // bug
597 if (ArrIdx.isNonNegative() && ArrIdx.getLimitedValue() < limit)
598 return true;
599 } else if (const auto *BE = dyn_cast<BinaryOperator>(Val: IndexExpr)) {
600 // For an integer expression `e` and an integer constant `n`, `e & n` and
601 // `n & e` are bounded by `n`:
602 if (BE->getOpcode() != BO_And && BE->getOpcode() != BO_Rem)
603 return false;
604
605 const Expr *LHS = BE->getLHS();
606 const Expr *RHS = BE->getRHS();
607
608 if (BE->getOpcode() == BO_Rem) {
609 // If n is a negative number, then n % const can be greater than const
610 if (!LHS->getType()->isUnsignedIntegerType()) {
611 return false;
612 }
613
614 if (!RHS->isValueDependent() && RHS->EvaluateAsInt(Result&: EVResult, Ctx)) {
615 llvm::APSInt result = EVResult.Val.getInt();
616 if (result.isNonNegative() && result.getLimitedValue() <= limit)
617 return true;
618 }
619
620 return false;
621 }
622
623 if ((!LHS->isValueDependent() &&
624 LHS->EvaluateAsInt(Result&: EVResult, Ctx)) || // case: `n & e`
625 (!RHS->isValueDependent() &&
626 RHS->EvaluateAsInt(Result&: EVResult, Ctx))) { // `e & n`
627 llvm::APSInt result = EVResult.Val.getInt();
628 if (result.isNonNegative() && result.getLimitedValue() < limit)
629 return true;
630 }
631 return false;
632 }
633 return false;
634}
635
636namespace libc_func_matchers {
637// Under `libc_func_matchers`, define a set of matchers that match unsafe
638// functions in libc and unsafe calls to them.
639
640// A tiny parser to strip off common prefix and suffix of libc function names
641// in real code.
642//
643// Given a function name, `matchName` returns `CoreName` according to the
644// following grammar:
645//
646// LibcName := CoreName | CoreName + "_s"
647// MatchingName := "__builtin_" + LibcName |
648// "__builtin___" + LibcName + "_chk" |
649// "__asan_" + LibcName
650//
651struct LibcFunNamePrefixSuffixParser {
652 StringRef matchName(StringRef FunName, bool isBuiltin) {
653 // Try to match __builtin_:
654 if (isBuiltin && FunName.starts_with(Prefix: "__builtin_"))
655 // Then either it is __builtin_LibcName or __builtin___LibcName_chk or
656 // no match:
657 return matchLibcNameOrBuiltinChk(
658 Name: FunName.drop_front(N: 10 /* truncate "__builtin_" */));
659 // Try to match __asan_:
660 if (FunName.starts_with(Prefix: "__asan_"))
661 return matchLibcName(Name: FunName.drop_front(N: 7 /* truncate of "__asan_" */));
662 return matchLibcName(Name: FunName);
663 }
664
665 // Parameter `Name` is the substring after stripping off the prefix
666 // "__builtin_".
667 StringRef matchLibcNameOrBuiltinChk(StringRef Name) {
668 if (Name.starts_with(Prefix: "__") && Name.ends_with(Suffix: "_chk"))
669 return matchLibcName(
670 Name: Name.drop_front(N: 2).drop_back(N: 4) /* truncate "__" and "_chk" */);
671 return matchLibcName(Name);
672 }
673
674 StringRef matchLibcName(StringRef Name) {
675 if (Name.ends_with(Suffix: "_s"))
676 return Name.drop_back(N: 2 /* truncate "_s" */);
677 return Name;
678 }
679};
680
681// A pointer type expression is known to be null-terminated, if it has the
682// form: E.c_str(), for any expression E of `std::string` type.
683static bool isNullTermPointer(const Expr *Ptr) {
684 if (isa<clang::StringLiteral>(Val: Ptr->IgnoreParenImpCasts()))
685 return true;
686 if (isa<PredefinedExpr>(Val: Ptr->IgnoreParenImpCasts()))
687 return true;
688 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Val: Ptr->IgnoreParenImpCasts())) {
689 const CXXMethodDecl *MD = MCE->getMethodDecl();
690 const CXXRecordDecl *RD = MCE->getRecordDecl()->getCanonicalDecl();
691
692 if (MD && RD && RD->isInStdNamespace() && MD->getIdentifier())
693 if (MD->getName() == "c_str" && RD->getName() == "basic_string")
694 return true;
695 }
696 return false;
697}
698
699// Return true iff at least one of following cases holds:
700// 1. Format string is a literal and there is an unsafe pointer argument
701// corresponding to an `s` specifier;
702// 2. Format string is not a literal and there is least an unsafe pointer
703// argument (including the formatter argument).
704//
705// `UnsafeArg` is the output argument that will be set only if this function
706// returns true.
707static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
708 const unsigned FmtArgIdx, ASTContext &Ctx,
709 bool isKprintf = false) {
710 class StringFormatStringHandler
711 : public analyze_format_string::FormatStringHandler {
712 const CallExpr *Call;
713 unsigned FmtArgIdx;
714 const Expr *&UnsafeArg;
715
716 public:
717 StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
718 const Expr *&UnsafeArg)
719 : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
720
721 bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
722 const char *startSpecifier,
723 unsigned specifierLen,
724 const TargetInfo &Target) override {
725 if (FS.getConversionSpecifier().getKind() ==
726 analyze_printf::PrintfConversionSpecifier::sArg) {
727 unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
728
729 if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
730 if (!isNullTermPointer(Ptr: Call->getArg(Arg: ArgIdx))) {
731 UnsafeArg = Call->getArg(Arg: ArgIdx); // output
732 // returning false stops parsing immediately
733 return false;
734 }
735 }
736 return true; // continue parsing
737 }
738 };
739
740 const Expr *Fmt = Call->getArg(Arg: FmtArgIdx);
741
742 if (auto *SL = dyn_cast<clang::StringLiteral>(Val: Fmt->IgnoreParenImpCasts())) {
743 StringRef FmtStr;
744
745 if (SL->getCharByteWidth() == 1)
746 FmtStr = SL->getString();
747 else if (auto EvaledFmtStr = SL->tryEvaluateString(Ctx))
748 FmtStr = *EvaledFmtStr;
749 else
750 goto CHECK_UNSAFE_PTR;
751
752 StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
753
754 return analyze_format_string::ParsePrintfString(
755 H&: Handler, beg: FmtStr.begin(), end: FmtStr.end(), LO: Ctx.getLangOpts(),
756 Target: Ctx.getTargetInfo(), isFreeBSDKPrintf: isKprintf);
757 }
758CHECK_UNSAFE_PTR:
759 // If format is not a string literal, we cannot analyze the format string.
760 // In this case, this call is considered unsafe if at least one argument
761 // (including the format argument) is unsafe pointer.
762 return llvm::any_of(
763 llvm::make_range(Call->arg_begin() + FmtArgIdx, Call->arg_end()),
764 [&UnsafeArg](const Expr *Arg) -> bool {
765 if (Arg->getType()->isPointerType() && !isNullTermPointer(Ptr: Arg)) {
766 UnsafeArg = Arg;
767 return true;
768 }
769 return false;
770 });
771}
772
773// Matches a FunctionDecl node such that
774// 1. It's name, after stripping off predefined prefix and suffix, is
775// `CoreName`; and
776// 2. `CoreName` or `CoreName[str/wcs]` is one of the `PredefinedNames`, which
777// is a set of libc function names.
778//
779// Note: For predefined prefix and suffix, see `LibcFunNamePrefixSuffixParser`.
780// The notation `CoreName[str/wcs]` means a new name obtained from replace
781// string "wcs" with "str" in `CoreName`.
782static bool isPredefinedUnsafeLibcFunc(const FunctionDecl &Node) {
783 static std::unique_ptr<std::set<StringRef>> PredefinedNames = nullptr;
784 if (!PredefinedNames)
785 PredefinedNames =
786 std::make_unique<std::set<StringRef>, std::set<StringRef>>(args: {
787 // numeric conversion:
788 "atof",
789 "atoi",
790 "atol",
791 "atoll",
792 "strtol",
793 "strtoll",
794 "strtoul",
795 "strtoull",
796 "strtof",
797 "strtod",
798 "strtold",
799 "strtoimax",
800 "strtoumax",
801 // "strfromf", "strfromd", "strfroml", // C23?
802 // string manipulation:
803 "strcpy",
804 "strncpy",
805 "strlcpy",
806 "strcat",
807 "strncat",
808 "strlcat",
809 "strxfrm",
810 "strdup",
811 "strndup",
812 // string examination:
813 "strlen",
814 "strnlen",
815 "strcmp",
816 "strncmp",
817 "stricmp",
818 "strcasecmp",
819 "strcoll",
820 "strchr",
821 "strrchr",
822 "strspn",
823 "strcspn",
824 "strpbrk",
825 "strstr",
826 "strtok",
827 // "mem-" functions
828 "memchr",
829 "wmemchr",
830 "memcmp",
831 "wmemcmp",
832 "memcpy",
833 "memccpy",
834 "mempcpy",
835 "wmemcpy",
836 "memmove",
837 "wmemmove",
838 "memset",
839 "wmemset",
840 // IO:
841 "fread",
842 "fwrite",
843 "fgets",
844 "fgetws",
845 "gets",
846 "fputs",
847 "fputws",
848 "puts",
849 // others
850 "strerror_s",
851 "strerror_r",
852 "bcopy",
853 "bzero",
854 "bsearch",
855 "qsort",
856 });
857
858 auto *II = Node.getIdentifier();
859
860 if (!II)
861 return false;
862
863 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
864 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
865
866 // Match predefined names:
867 if (PredefinedNames->find(x: Name) != PredefinedNames->end())
868 return true;
869
870 std::string NameWCS = Name.str();
871 size_t WcsPos = NameWCS.find(s: "wcs");
872
873 while (WcsPos != std::string::npos) {
874 NameWCS[WcsPos++] = 's';
875 NameWCS[WcsPos++] = 't';
876 NameWCS[WcsPos++] = 'r';
877 WcsPos = NameWCS.find(s: "wcs", pos: WcsPos);
878 }
879 if (PredefinedNames->find(x: NameWCS) != PredefinedNames->end())
880 return true;
881 // All `scanf` functions are unsafe (including `sscanf`, `vsscanf`, etc.. They
882 // all should end with "scanf"):
883 return Name.ends_with(Suffix: "scanf");
884}
885
886// Match a call to one of the `v*printf` functions taking `va_list`. We cannot
887// check safety for these functions so they should be changed to their
888// non-va_list versions.
889static bool isUnsafeVaListPrintfFunc(const FunctionDecl &Node) {
890 auto *II = Node.getIdentifier();
891
892 if (!II)
893 return false;
894
895 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
896 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
897
898 if (!Name.ends_with(Suffix: "printf"))
899 return false; // neither printf nor scanf
900 return Name.starts_with(Prefix: "v");
901}
902
903// Matches a call to one of the `sprintf` functions as they are always unsafe
904// and should be changed to `snprintf`.
905static bool isUnsafeSprintfFunc(const FunctionDecl &Node) {
906 auto *II = Node.getIdentifier();
907
908 if (!II)
909 return false;
910
911 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
912 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
913
914 if (!Name.ends_with(Suffix: "printf") ||
915 // Let `isUnsafeVaListPrintfFunc` check for cases with va-list:
916 Name.starts_with(Prefix: "v"))
917 return false;
918
919 StringRef Prefix = Name.drop_back(N: 6);
920
921 if (Prefix.ends_with(Suffix: "w"))
922 Prefix = Prefix.drop_back(N: 1);
923 return Prefix == "s";
924}
925
926// Match function declarations of `printf`, `fprintf`, `snprintf` and their wide
927// character versions. Calls to these functions can be safe if their arguments
928// are carefully made safe.
929static bool isNormalPrintfFunc(const FunctionDecl &Node) {
930 auto *II = Node.getIdentifier();
931
932 if (!II)
933 return false;
934
935 StringRef Name = LibcFunNamePrefixSuffixParser().matchName(
936 FunName: II->getName(), isBuiltin: Node.getBuiltinID());
937
938 if (!Name.ends_with(Suffix: "printf") || Name.starts_with(Prefix: "v"))
939 return false;
940
941 StringRef Prefix = Name.drop_back(N: 6);
942
943 if (Prefix.ends_with(Suffix: "w"))
944 Prefix = Prefix.drop_back(N: 1);
945
946 return Prefix.empty() || Prefix == "k" || Prefix == "f" || Prefix == "sn";
947}
948
949// This matcher requires that it is known that the callee `isNormalPrintf`.
950// Then if the format string is a string literal, this matcher matches when at
951// least one string argument is unsafe. If the format is not a string literal,
952// this matcher matches when at least one pointer type argument is unsafe.
953static bool hasUnsafePrintfStringArg(const CallExpr &Node, ASTContext &Ctx,
954 MatchResult &Result, llvm::StringRef Tag) {
955 // Determine what printf it is by examining formal parameters:
956 const FunctionDecl *FD = Node.getDirectCallee();
957
958 assert(FD && "It should have been checked that FD is non-null.");
959
960 unsigned NumParms = FD->getNumParams();
961
962 if (NumParms < 1)
963 return false; // possibly some user-defined printf function
964
965 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
966
967 if (!FirstParmTy->isPointerType())
968 return false; // possibly some user-defined printf function
969
970 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
971
972 if (!Ctx.getFILEType()
973 .isNull() && //`FILE *` must be in the context if it is fprintf
974 FirstPteTy.getCanonicalType() == Ctx.getFILEType().getCanonicalType()) {
975 // It is a fprintf:
976 const Expr *UnsafeArg;
977
978 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 1, Ctx, isKprintf: false)) {
979 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
980 return true;
981 }
982 return false;
983 }
984
985 if (FirstPteTy.isConstQualified()) {
986 // If the first parameter is a `const char *`, it is a printf/kprintf:
987 bool isKprintf = false;
988 const Expr *UnsafeArg;
989
990 if (auto *II = FD->getIdentifier())
991 isKprintf = II->getName() == "kprintf";
992 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 0, Ctx, isKprintf)) {
993 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
994 return true;
995 }
996 return false;
997 }
998
999 if (NumParms > 2) {
1000 QualType SecondParmTy = FD->getParamDecl(i: 1)->getType();
1001
1002 if (!FirstPteTy.isConstQualified() && SecondParmTy->isIntegerType()) {
1003 // If the first parameter type is non-const qualified `char *` and the
1004 // second is an integer, it is a snprintf:
1005 const Expr *UnsafeArg;
1006
1007 if (hasUnsafeFormatOrSArg(Call: &Node, UnsafeArg, FmtArgIdx: 2, Ctx, isKprintf: false)) {
1008 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *UnsafeArg));
1009 return true;
1010 }
1011 return false;
1012 }
1013 }
1014 // We don't really recognize this "normal" printf, the only thing we
1015 // can do is to require all pointers to be null-terminated:
1016 for (const auto *Arg : Node.arguments())
1017 if (Arg->getType()->isPointerType() && !isNullTermPointer(Arg)) {
1018 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *Arg));
1019 return true;
1020 }
1021 return false;
1022}
1023
1024// This matcher requires that it is known that the callee `isNormalPrintf`.
1025// Then it matches if the first two arguments of the call is a pointer and an
1026// integer and they are not in a safe pattern.
1027//
1028// For the first two arguments: `ptr` and `size`, they are safe if in the
1029// following patterns:
1030//
1031// Pattern 1:
1032// ptr := DRE.data();
1033// size:= DRE.size()/DRE.size_bytes()
1034// And DRE is a hardened container or view.
1035//
1036// Pattern 2:
1037// ptr := Constant-Array-DRE;
1038// size:= any expression that has compile-time constant value equivalent to
1039// sizeof (Constant-Array-DRE)
1040static bool hasUnsafeSnprintfBuffer(const CallExpr &Node,
1041 const ASTContext &Ctx) {
1042 const FunctionDecl *FD = Node.getDirectCallee();
1043
1044 assert(FD && "It should have been checked that FD is non-null.");
1045
1046 if (FD->getNumParams() < 3)
1047 return false; // Not an snprint
1048
1049 QualType FirstParmTy = FD->getParamDecl(i: 0)->getType();
1050
1051 if (!FirstParmTy->isPointerType())
1052 return false; // Not an snprint
1053
1054 QualType FirstPteTy = FirstParmTy->castAs<PointerType>()->getPointeeType();
1055 const Expr *Buf = Node.getArg(Arg: 0), *Size = Node.getArg(Arg: 1);
1056
1057 if (FirstPteTy.isConstQualified() || !Buf->getType()->isPointerType() ||
1058 !Size->getType()->isIntegerType())
1059 return false; // not an snprintf call
1060
1061 // Pattern 1:
1062 static StringRef SizedObjs[] = {"span", "array", "vector",
1063 "basic_string_view", "basic_string"};
1064 Buf = Buf->IgnoreParenImpCasts();
1065 Size = Size->IgnoreParenImpCasts();
1066 if (auto *MCEPtr = dyn_cast<CXXMemberCallExpr>(Val: Buf))
1067 if (auto *MCESize = dyn_cast<CXXMemberCallExpr>(Val: Size)) {
1068 auto *DREOfPtr = dyn_cast<DeclRefExpr>(
1069 Val: MCEPtr->getImplicitObjectArgument()->IgnoreParenImpCasts());
1070 auto *DREOfSize = dyn_cast<DeclRefExpr>(
1071 Val: MCESize->getImplicitObjectArgument()->IgnoreParenImpCasts());
1072
1073 if (!DREOfPtr || !DREOfSize)
1074 return true; // not in safe pattern
1075 if (DREOfPtr->getDecl() != DREOfSize->getDecl())
1076 return true; // not in safe pattern
1077 if (MCEPtr->getMethodDecl()->getName() != "data")
1078 return true; // not in safe pattern
1079
1080 if (MCESize->getMethodDecl()->getName() == "size_bytes" ||
1081 // Note here the pointer must be a pointer-to-char type unless there
1082 // is explicit casting. If there is explicit casting, this branch
1083 // is unreachable. Thus, at this branch "size" and "size_bytes" are
1084 // equivalent as the pointer is a char pointer:
1085 MCESize->getMethodDecl()->getName() == "size")
1086 for (StringRef SizedObj : SizedObjs)
1087 if (MCEPtr->getRecordDecl()->isInStdNamespace() &&
1088 MCEPtr->getRecordDecl()->getCanonicalDecl()->getName() ==
1089 SizedObj)
1090 return false; // It is in fact safe
1091 }
1092
1093 // Pattern 2:
1094 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Buf->IgnoreParenImpCasts())) {
1095 if (auto *CAT = Ctx.getAsConstantArrayType(DRE->getType())) {
1096 Expr::EvalResult ER;
1097 // The array element type must be compatible with `char` otherwise an
1098 // explicit cast will be needed, which will make this check unreachable.
1099 // Therefore, the array extent is same as its' bytewise size.
1100 if (Size->EvaluateAsInt(Result&: ER, Ctx)) {
1101 llvm::APSInt EVal = ER.Val.getInt(); // Size must have integer type
1102
1103 return llvm::APSInt::compareValues(
1104 I1: EVal, I2: llvm::APSInt(CAT->getSize(), true)) != 0;
1105 }
1106 }
1107 }
1108 return true; // ptr and size are not in safe pattern
1109}
1110} // namespace libc_func_matchers
1111
1112namespace {
1113// Because the analysis revolves around variables and their types, we'll need to
1114// track uses of variables (aka DeclRefExprs).
1115using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
1116
1117// Convenience typedef.
1118using FixItList = SmallVector<FixItHint, 4>;
1119} // namespace
1120
1121namespace {
1122/// Gadget is an individual operation in the code that may be of interest to
1123/// this analysis. Each (non-abstract) subclass corresponds to a specific
1124/// rigid AST structure that constitutes an operation on a pointer-type object.
1125/// Discovery of a gadget in the code corresponds to claiming that we understand
1126/// what this part of code is doing well enough to potentially improve it.
1127/// Gadgets can be warning (immediately deserving a warning) or fixable (not
1128/// always deserving a warning per se, but requires our attention to identify
1129/// it warrants a fixit).
1130class Gadget {
1131public:
1132 enum class Kind {
1133#define GADGET(x) x,
1134#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1135 };
1136
1137 Gadget(Kind K) : K(K) {}
1138
1139 Kind getKind() const { return K; }
1140
1141#ifndef NDEBUG
1142 StringRef getDebugName() const {
1143 switch (K) {
1144#define GADGET(x) \
1145 case Kind::x: \
1146 return #x;
1147#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1148 }
1149 llvm_unreachable("Unhandled Gadget::Kind enum");
1150 }
1151#endif
1152
1153 virtual bool isWarningGadget() const = 0;
1154 // TODO remove this method from WarningGadget interface. It's only used for
1155 // debug prints in FixableGadget.
1156 virtual SourceLocation getSourceLoc() const = 0;
1157
1158 /// Returns the list of pointer-type variables on which this gadget performs
1159 /// its operation. Typically, there's only one variable. This isn't a list
1160 /// of all DeclRefExprs in the gadget's AST!
1161 virtual DeclUseList getClaimedVarUseSites() const = 0;
1162
1163 virtual ~Gadget() = default;
1164
1165private:
1166 Kind K;
1167};
1168
1169/// Warning gadgets correspond to unsafe code patterns that warrants
1170/// an immediate warning.
1171class WarningGadget : public Gadget {
1172public:
1173 WarningGadget(Kind K) : Gadget(K) {}
1174
1175 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
1176 bool isWarningGadget() const final { return true; }
1177
1178 virtual void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1179 bool IsRelatedToDecl,
1180 ASTContext &Ctx) const = 0;
1181
1182 virtual SmallVector<const Expr *, 1> getUnsafePtrs() const = 0;
1183};
1184
1185/// Fixable gadgets correspond to code patterns that aren't always unsafe but
1186/// need to be properly recognized in order to emit fixes. For example, if a raw
1187/// pointer-type variable is replaced by a safe C++ container, every use of such
1188/// variable must be carefully considered and possibly updated.
1189class FixableGadget : public Gadget {
1190public:
1191 FixableGadget(Kind K) : Gadget(K) {}
1192
1193 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
1194 bool isWarningGadget() const final { return false; }
1195
1196 /// Returns a fixit that would fix the current gadget according to
1197 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
1198 /// returns an empty list if no fixes are necessary.
1199 virtual std::optional<FixItList> getFixits(const FixitStrategy &) const {
1200 return std::nullopt;
1201 }
1202
1203 /// Returns a list of two elements where the first element is the LHS of a
1204 /// pointer assignment statement and the second element is the RHS. This
1205 /// two-element list represents the fact that the LHS buffer gets its bounds
1206 /// information from the RHS buffer. This information will be used later to
1207 /// group all those variables whose types must be modified together to prevent
1208 /// type mismatches.
1209 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1210 getStrategyImplications() const {
1211 return std::nullopt;
1212 }
1213};
1214
1215static bool isSupportedVariable(const DeclRefExpr &Node) {
1216 const Decl *D = Node.getDecl();
1217 return D != nullptr && isa<VarDecl>(Val: D);
1218}
1219
1220using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
1221using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
1222
1223/// An increment of a pointer-type value is unsafe as it may run the pointer
1224/// out of bounds.
1225class IncrementGadget : public WarningGadget {
1226 static constexpr const char *const OpTag = "op";
1227 const UnaryOperator *Op;
1228
1229public:
1230 IncrementGadget(const MatchResult &Result)
1231 : WarningGadget(Kind::Increment),
1232 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1233
1234 static bool classof(const Gadget *G) {
1235 return G->getKind() == Kind::Increment;
1236 }
1237
1238 static bool matches(const Stmt *S, const ASTContext &Ctx,
1239 MatchResult &Result) {
1240 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1241 if (!UO || !UO->isIncrementOp())
1242 return false;
1243 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1244 return false;
1245 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1246 return true;
1247 }
1248
1249 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1250 bool IsRelatedToDecl,
1251 ASTContext &Ctx) const override {
1252 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx);
1253 }
1254 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1255
1256 DeclUseList getClaimedVarUseSites() const override {
1257 SmallVector<const DeclRefExpr *, 2> Uses;
1258 if (const auto *DRE =
1259 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1260 Uses.push_back(Elt: DRE);
1261 }
1262
1263 return std::move(Uses);
1264 }
1265
1266 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1267 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1268 }
1269};
1270
1271/// A decrement of a pointer-type value is unsafe as it may run the pointer
1272/// out of bounds.
1273class DecrementGadget : public WarningGadget {
1274 static constexpr const char *const OpTag = "op";
1275 const UnaryOperator *Op;
1276
1277public:
1278 DecrementGadget(const MatchResult &Result)
1279 : WarningGadget(Kind::Decrement),
1280 Op(Result.getNodeAs<UnaryOperator>(ID: OpTag)) {}
1281
1282 static bool classof(const Gadget *G) {
1283 return G->getKind() == Kind::Decrement;
1284 }
1285
1286 static bool matches(const Stmt *S, const ASTContext &Ctx,
1287 MatchResult &Result) {
1288 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
1289 if (!UO || !UO->isDecrementOp())
1290 return false;
1291 if (!hasPointerType(E: *UO->getSubExpr()->IgnoreParenImpCasts()))
1292 return false;
1293 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *UO));
1294 return true;
1295 }
1296
1297 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1298 bool IsRelatedToDecl,
1299 ASTContext &Ctx) const override {
1300 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx);
1301 }
1302 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1303
1304 DeclUseList getClaimedVarUseSites() const override {
1305 if (const auto *DRE =
1306 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
1307 return {DRE};
1308 }
1309
1310 return {};
1311 }
1312
1313 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1314 return {Op->getSubExpr()->IgnoreParenImpCasts()};
1315 }
1316};
1317
1318/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
1319/// it doesn't have any bounds checks for the array.
1320class ArraySubscriptGadget : public WarningGadget {
1321 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
1322 const ArraySubscriptExpr *ASE;
1323
1324public:
1325 ArraySubscriptGadget(const MatchResult &Result)
1326 : WarningGadget(Kind::ArraySubscript),
1327 ASE(Result.getNodeAs<ArraySubscriptExpr>(ID: ArraySubscrTag)) {}
1328
1329 static bool classof(const Gadget *G) {
1330 return G->getKind() == Kind::ArraySubscript;
1331 }
1332
1333 static bool matches(const Stmt *S, const ASTContext &Ctx,
1334 MatchResult &Result) {
1335 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: S);
1336 if (!ASE)
1337 return false;
1338 const auto *const Base = ASE->getBase()->IgnoreParenImpCasts();
1339 if (!hasPointerType(E: *Base) && !hasArrayType(E: *Base))
1340 return false;
1341 const auto *Idx = dyn_cast<IntegerLiteral>(Val: ASE->getIdx());
1342 bool IsSafeIndex = (Idx && Idx->getValue().isZero()) ||
1343 isa<ArrayInitIndexExpr>(Val: ASE->getIdx());
1344 if (IsSafeIndex || isSafeArraySubscript(Node: *ASE, Ctx))
1345 return false;
1346 Result.addNode(ID: ArraySubscrTag, Node: DynTypedNode::create(Node: *ASE));
1347 return true;
1348 }
1349
1350 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1351 bool IsRelatedToDecl,
1352 ASTContext &Ctx) const override {
1353 Handler.handleUnsafeOperation(ASE, IsRelatedToDecl, Ctx);
1354 }
1355 SourceLocation getSourceLoc() const override { return ASE->getBeginLoc(); }
1356
1357 DeclUseList getClaimedVarUseSites() const override {
1358 if (const auto *DRE =
1359 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts())) {
1360 return {DRE};
1361 }
1362
1363 return {};
1364 }
1365
1366 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1367 return {ASE->getBase()->IgnoreParenImpCasts()};
1368 }
1369};
1370
1371/// A pointer arithmetic expression of one of the forms:
1372/// \code
1373/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
1374/// \endcode
1375class PointerArithmeticGadget : public WarningGadget {
1376 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
1377 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
1378 const BinaryOperator *PA; // pointer arithmetic expression
1379 const Expr *Ptr; // the pointer expression in `PA`
1380
1381public:
1382 PointerArithmeticGadget(const MatchResult &Result)
1383 : WarningGadget(Kind::PointerArithmetic),
1384 PA(Result.getNodeAs<BinaryOperator>(ID: PointerArithmeticTag)),
1385 Ptr(Result.getNodeAs<Expr>(ID: PointerArithmeticPointerTag)) {}
1386
1387 static bool classof(const Gadget *G) {
1388 return G->getKind() == Kind::PointerArithmetic;
1389 }
1390
1391 static bool matches(const Stmt *S, const ASTContext &Ctx,
1392 MatchResult &Result) {
1393 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1394 if (!BO)
1395 return false;
1396 const auto *LHS = BO->getLHS();
1397 const auto *RHS = BO->getRHS();
1398 // ptr at left
1399 if (BO->getOpcode() == BO_Add || BO->getOpcode() == BO_Sub ||
1400 BO->getOpcode() == BO_AddAssign || BO->getOpcode() == BO_SubAssign) {
1401 if (hasPointerType(E: *LHS) && (RHS->getType()->isIntegerType() ||
1402 RHS->getType()->isEnumeralType())) {
1403 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *LHS));
1404 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1405 return true;
1406 }
1407 }
1408 // ptr at right
1409 if (BO->getOpcode() == BO_Add && hasPointerType(E: *RHS) &&
1410 (LHS->getType()->isIntegerType() || LHS->getType()->isEnumeralType())) {
1411 Result.addNode(ID: PointerArithmeticPointerTag, Node: DynTypedNode::create(Node: *RHS));
1412 Result.addNode(ID: PointerArithmeticTag, Node: DynTypedNode::create(Node: *BO));
1413 return true;
1414 }
1415 return false;
1416 }
1417
1418 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1419 bool IsRelatedToDecl,
1420 ASTContext &Ctx) const override {
1421 Handler.handleUnsafeOperation(PA, IsRelatedToDecl, Ctx);
1422 }
1423 SourceLocation getSourceLoc() const override { return PA->getBeginLoc(); }
1424
1425 DeclUseList getClaimedVarUseSites() const override {
1426 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ptr->IgnoreParenImpCasts())) {
1427 return {DRE};
1428 }
1429
1430 return {};
1431 }
1432
1433 SmallVector<const Expr *, 1> getUnsafePtrs() const override {
1434 return {Ptr->IgnoreParenImpCasts()};
1435 }
1436
1437 // FIXME: pointer adding zero should be fine
1438 // FIXME: this gadge will need a fix-it
1439};
1440
1441class SpanTwoParamConstructorGadget : public WarningGadget {
1442 static constexpr const char *const SpanTwoParamConstructorTag =
1443 "spanTwoParamConstructor";
1444 const CXXConstructExpr *Ctor; // the span constructor expression
1445
1446public:
1447 SpanTwoParamConstructorGadget(const MatchResult &Result)
1448 : WarningGadget(Kind::SpanTwoParamConstructor),
1449 Ctor(Result.getNodeAs<CXXConstructExpr>(ID: SpanTwoParamConstructorTag)) {}
1450
1451 static bool classof(const Gadget *G) {
1452 return G->getKind() == Kind::SpanTwoParamConstructor;
1453 }
1454
1455 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1456 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1457 if (!CE)
1458 return false;
1459 const auto *CDecl = CE->getConstructor();
1460 const auto *CRecordDecl = CDecl->getParent();
1461 auto HasTwoParamSpanCtorDecl =
1462 CRecordDecl->isInStdNamespace() &&
1463 CDecl->getDeclName().getAsString() == "span" && CE->getNumArgs() == 2;
1464 if (!HasTwoParamSpanCtorDecl || isSafeSpanTwoParamConstruct(Node: *CE, Ctx))
1465 return false;
1466 Result.addNode(ID: SpanTwoParamConstructorTag, Node: DynTypedNode::create(Node: *CE));
1467 return true;
1468 }
1469
1470 static bool matches(const Stmt *S, ASTContext &Ctx,
1471 const UnsafeBufferUsageHandler *Handler,
1472 MatchResult &Result) {
1473 if (ignoreUnsafeBufferInContainer(Node: *S, Handler))
1474 return false;
1475 return matches(S, Ctx, Result);
1476 }
1477
1478 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1479 bool IsRelatedToDecl,
1480 ASTContext &Ctx) const override {
1481 Handler.handleUnsafeOperationInContainer(Ctor, IsRelatedToDecl, Ctx);
1482 }
1483 SourceLocation getSourceLoc() const override { return Ctor->getBeginLoc(); }
1484
1485 DeclUseList getClaimedVarUseSites() const override {
1486 // If the constructor call is of the form `std::span{var, n}`, `var` is
1487 // considered an unsafe variable.
1488 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Ctor->getArg(Arg: 0))) {
1489 if (isa<VarDecl>(Val: DRE->getDecl()))
1490 return {DRE};
1491 }
1492 return {};
1493 }
1494
1495 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1496};
1497
1498/// A pointer initialization expression of the form:
1499/// \code
1500/// int *p = q;
1501/// \endcode
1502class PointerInitGadget : public FixableGadget {
1503private:
1504 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
1505 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
1506 const VarDecl *PtrInitLHS; // the LHS pointer expression in `PI`
1507 const DeclRefExpr *PtrInitRHS; // the RHS pointer expression in `PI`
1508
1509public:
1510 PointerInitGadget(const MatchResult &Result)
1511 : FixableGadget(Kind::PointerInit),
1512 PtrInitLHS(Result.getNodeAs<VarDecl>(ID: PointerInitLHSTag)),
1513 PtrInitRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerInitRHSTag)) {}
1514
1515 static bool classof(const Gadget *G) {
1516 return G->getKind() == Kind::PointerInit;
1517 }
1518
1519 static bool matches(const Stmt *S,
1520 llvm::SmallVectorImpl<MatchResult> &Results) {
1521 const DeclStmt *DS = dyn_cast<DeclStmt>(Val: S);
1522 if (!DS || !DS->isSingleDecl())
1523 return false;
1524 const VarDecl *VD = dyn_cast<VarDecl>(Val: DS->getSingleDecl());
1525 if (!VD)
1526 return false;
1527 const Expr *Init = VD->getAnyInitializer();
1528 if (!Init)
1529 return false;
1530 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Init->IgnoreImpCasts());
1531 if (!DRE || !hasPointerType(*DRE) || !isSupportedVariable(Node: *DRE)) {
1532 return false;
1533 }
1534 MatchResult R;
1535 R.addNode(ID: PointerInitLHSTag, Node: DynTypedNode::create(Node: *VD));
1536 R.addNode(ID: PointerInitRHSTag, Node: DynTypedNode::create(Node: *DRE));
1537 Results.emplace_back(Args: std::move(R));
1538 return true;
1539 }
1540
1541 virtual std::optional<FixItList>
1542 getFixits(const FixitStrategy &S) const override;
1543 SourceLocation getSourceLoc() const override {
1544 return PtrInitRHS->getBeginLoc();
1545 }
1546
1547 virtual DeclUseList getClaimedVarUseSites() const override {
1548 return DeclUseList{PtrInitRHS};
1549 }
1550
1551 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1552 getStrategyImplications() const override {
1553 return std::make_pair(x: PtrInitLHS, y: cast<VarDecl>(Val: PtrInitRHS->getDecl()));
1554 }
1555};
1556
1557/// A pointer assignment expression of the form:
1558/// \code
1559/// p = q;
1560/// \endcode
1561/// where both `p` and `q` are pointers.
1562class PtrToPtrAssignmentGadget : public FixableGadget {
1563private:
1564 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1565 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1566 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1567 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1568
1569public:
1570 PtrToPtrAssignmentGadget(const MatchResult &Result)
1571 : FixableGadget(Kind::PtrToPtrAssignment),
1572 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1573 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1574
1575 static bool classof(const Gadget *G) {
1576 return G->getKind() == Kind::PtrToPtrAssignment;
1577 }
1578
1579 static bool matches(const Stmt *S,
1580 llvm::SmallVectorImpl<MatchResult> &Results) {
1581 size_t SizeBefore = Results.size();
1582 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1583 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1584 if (!BO || BO->getOpcode() != BO_Assign)
1585 return;
1586 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1587 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1588 !RHSRef || !hasPointerType(*RHSRef) ||
1589 !isSupportedVariable(Node: *RHSRef)) {
1590 return;
1591 }
1592 const auto *LHS = BO->getLHS();
1593 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1594 !LHSRef || !hasPointerType(*LHSRef) ||
1595 !isSupportedVariable(Node: *LHSRef)) {
1596 return;
1597 }
1598 MatchResult R;
1599 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1600 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1601 Results.emplace_back(Args: std::move(R));
1602 });
1603 return SizeBefore != Results.size();
1604 }
1605
1606 virtual std::optional<FixItList>
1607 getFixits(const FixitStrategy &S) const override;
1608 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1609
1610 virtual DeclUseList getClaimedVarUseSites() const override {
1611 return DeclUseList{PtrLHS, PtrRHS};
1612 }
1613
1614 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1615 getStrategyImplications() const override {
1616 return std::make_pair(x: cast<VarDecl>(Val: PtrLHS->getDecl()),
1617 y: cast<VarDecl>(Val: PtrRHS->getDecl()));
1618 }
1619};
1620
1621/// An assignment expression of the form:
1622/// \code
1623/// ptr = array;
1624/// \endcode
1625/// where `p` is a pointer and `array` is a constant size array.
1626class CArrayToPtrAssignmentGadget : public FixableGadget {
1627private:
1628 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
1629 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
1630 const DeclRefExpr *PtrLHS; // the LHS pointer expression in `PA`
1631 const DeclRefExpr *PtrRHS; // the RHS pointer expression in `PA`
1632
1633public:
1634 CArrayToPtrAssignmentGadget(const MatchResult &Result)
1635 : FixableGadget(Kind::CArrayToPtrAssignment),
1636 PtrLHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
1637 PtrRHS(Result.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
1638
1639 static bool classof(const Gadget *G) {
1640 return G->getKind() == Kind::CArrayToPtrAssignment;
1641 }
1642
1643 static bool matches(const Stmt *S,
1644 llvm::SmallVectorImpl<MatchResult> &Results) {
1645 size_t SizeBefore = Results.size();
1646 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
1647 const auto *BO = dyn_cast<BinaryOperator>(Val: S);
1648 if (!BO || BO->getOpcode() != BO_Assign)
1649 return;
1650 const auto *RHS = BO->getRHS()->IgnoreParenImpCasts();
1651 if (const auto *RHSRef = dyn_cast<DeclRefExpr>(Val: RHS);
1652 !RHSRef ||
1653 !isa<ConstantArrayType>(RHSRef->getType().getCanonicalType()) ||
1654 !isSupportedVariable(Node: *RHSRef)) {
1655 return;
1656 }
1657 const auto *LHS = BO->getLHS();
1658 if (const auto *LHSRef = dyn_cast<DeclRefExpr>(Val: LHS);
1659 !LHSRef || !hasPointerType(*LHSRef) ||
1660 !isSupportedVariable(Node: *LHSRef)) {
1661 return;
1662 }
1663 MatchResult R;
1664 R.addNode(ID: PointerAssignLHSTag, Node: DynTypedNode::create(Node: *LHS));
1665 R.addNode(ID: PointerAssignRHSTag, Node: DynTypedNode::create(Node: *RHS));
1666 Results.emplace_back(Args: std::move(R));
1667 });
1668 return SizeBefore != Results.size();
1669 }
1670
1671 virtual std::optional<FixItList>
1672 getFixits(const FixitStrategy &S) const override;
1673 SourceLocation getSourceLoc() const override { return PtrLHS->getBeginLoc(); }
1674
1675 virtual DeclUseList getClaimedVarUseSites() const override {
1676 return DeclUseList{PtrLHS, PtrRHS};
1677 }
1678
1679 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
1680 getStrategyImplications() const override {
1681 return {};
1682 }
1683};
1684
1685/// A call of a function or method that performs unchecked buffer operations
1686/// over one of its pointer parameters.
1687class UnsafeBufferUsageAttrGadget : public WarningGadget {
1688 constexpr static const char *const OpTag = "attr_expr";
1689 const Expr *Op;
1690
1691public:
1692 UnsafeBufferUsageAttrGadget(const MatchResult &Result)
1693 : WarningGadget(Kind::UnsafeBufferUsageAttr),
1694 Op(Result.getNodeAs<Expr>(ID: OpTag)) {}
1695
1696 static bool classof(const Gadget *G) {
1697 return G->getKind() == Kind::UnsafeBufferUsageAttr;
1698 }
1699
1700 static bool matches(const Stmt *S, const ASTContext &Ctx,
1701 MatchResult &Result) {
1702 if (auto *CE = dyn_cast<CallExpr>(Val: S)) {
1703 if (CE->getDirectCallee() &&
1704 CE->getDirectCallee()->hasAttr<UnsafeBufferUsageAttr>()) {
1705 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1706 return true;
1707 }
1708 }
1709 if (auto *ME = dyn_cast<MemberExpr>(Val: S)) {
1710 if (!isa<FieldDecl>(Val: ME->getMemberDecl()))
1711 return false;
1712 if (ME->getMemberDecl()->hasAttr<UnsafeBufferUsageAttr>()) {
1713 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *ME));
1714 return true;
1715 }
1716 }
1717 return false;
1718 }
1719
1720 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1721 bool IsRelatedToDecl,
1722 ASTContext &Ctx) const override {
1723 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx);
1724 }
1725 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1726
1727 DeclUseList getClaimedVarUseSites() const override { return {}; }
1728
1729 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1730};
1731
1732/// A call of a constructor that performs unchecked buffer operations
1733/// over one of its pointer parameters, or constructs a class object that will
1734/// perform buffer operations that depend on the correctness of the parameters.
1735class UnsafeBufferUsageCtorAttrGadget : public WarningGadget {
1736 constexpr static const char *const OpTag = "cxx_construct_expr";
1737 const CXXConstructExpr *Op;
1738
1739public:
1740 UnsafeBufferUsageCtorAttrGadget(const MatchResult &Result)
1741 : WarningGadget(Kind::UnsafeBufferUsageCtorAttr),
1742 Op(Result.getNodeAs<CXXConstructExpr>(ID: OpTag)) {}
1743
1744 static bool classof(const Gadget *G) {
1745 return G->getKind() == Kind::UnsafeBufferUsageCtorAttr;
1746 }
1747
1748 static bool matches(const Stmt *S, ASTContext &Ctx, MatchResult &Result) {
1749 const auto *CE = dyn_cast<CXXConstructExpr>(Val: S);
1750 if (!CE || !CE->getConstructor()->hasAttr<UnsafeBufferUsageAttr>())
1751 return false;
1752 // std::span(ptr, size) ctor is handled by SpanTwoParamConstructorGadget.
1753 MatchResult Tmp;
1754 if (SpanTwoParamConstructorGadget::matches(CE, Ctx, Tmp))
1755 return false;
1756 Result.addNode(ID: OpTag, Node: DynTypedNode::create(Node: *CE));
1757 return true;
1758 }
1759
1760 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1761 bool IsRelatedToDecl,
1762 ASTContext &Ctx) const override {
1763 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx);
1764 }
1765 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1766
1767 DeclUseList getClaimedVarUseSites() const override { return {}; }
1768
1769 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1770};
1771
1772// Warning gadget for unsafe invocation of span::data method.
1773// Triggers when the pointer returned by the invocation is immediately
1774// cast to a larger type.
1775
1776class DataInvocationGadget : public WarningGadget {
1777 constexpr static const char *const OpTag = "data_invocation_expr";
1778 const ExplicitCastExpr *Op;
1779
1780public:
1781 DataInvocationGadget(const MatchResult &Result)
1782 : WarningGadget(Kind::DataInvocation),
1783 Op(Result.getNodeAs<ExplicitCastExpr>(ID: OpTag)) {}
1784
1785 static bool classof(const Gadget *G) {
1786 return G->getKind() == Kind::DataInvocation;
1787 }
1788
1789 static bool matches(const Stmt *S, const ASTContext &Ctx,
1790 MatchResult &Result) {
1791 auto *CE = dyn_cast<ExplicitCastExpr>(Val: S);
1792 if (!CE)
1793 return false;
1794 for (auto *Child : CE->children()) {
1795 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Child);
1796 MCE && isDataFunction(MCE)) {
1797 Result.addNode(OpTag, DynTypedNode::create(*CE));
1798 return true;
1799 }
1800 if (auto *Paren = dyn_cast<ParenExpr>(Child)) {
1801 if (auto *MCE = dyn_cast<CXXMemberCallExpr>(Paren->getSubExpr());
1802 MCE && isDataFunction(MCE)) {
1803 Result.addNode(OpTag, DynTypedNode::create(*CE));
1804 return true;
1805 }
1806 }
1807 }
1808 return false;
1809 }
1810
1811 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1812 bool IsRelatedToDecl,
1813 ASTContext &Ctx) const override {
1814 Handler.handleUnsafeOperation(Op, IsRelatedToDecl, Ctx);
1815 }
1816 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
1817
1818 DeclUseList getClaimedVarUseSites() const override { return {}; }
1819
1820private:
1821 static bool isDataFunction(const CXXMemberCallExpr *call) {
1822 if (!call)
1823 return false;
1824 auto *callee = call->getDirectCallee();
1825 if (!callee || !isa<CXXMethodDecl>(callee))
1826 return false;
1827 auto *method = cast<CXXMethodDecl>(callee);
1828 if (method->getNameAsString() == "data" &&
1829 method->getParent()->isInStdNamespace() &&
1830 (method->getParent()->getName() == "span" ||
1831 method->getParent()->getName() == "array" ||
1832 method->getParent()->getName() == "vector"))
1833 return true;
1834 return false;
1835 }
1836
1837 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1838};
1839
1840class UnsafeLibcFunctionCallGadget : public WarningGadget {
1841 const CallExpr *const Call;
1842 const Expr *UnsafeArg = nullptr;
1843 constexpr static const char *const Tag = "UnsafeLibcFunctionCall";
1844 // Extra tags for additional information:
1845 constexpr static const char *const UnsafeSprintfTag =
1846 "UnsafeLibcFunctionCall_sprintf";
1847 constexpr static const char *const UnsafeSizedByTag =
1848 "UnsafeLibcFunctionCall_sized_by";
1849 constexpr static const char *const UnsafeStringTag =
1850 "UnsafeLibcFunctionCall_string";
1851 constexpr static const char *const UnsafeVaListTag =
1852 "UnsafeLibcFunctionCall_va_list";
1853
1854 enum UnsafeKind {
1855 OTHERS = 0, // no specific information, the callee function is unsafe
1856 SPRINTF = 1, // never call `-sprintf`s, call `-snprintf`s instead.
1857 SIZED_BY =
1858 2, // the first two arguments of `snprintf` function have
1859 // "__sized_by" relation but they do not conform to safe patterns
1860 STRING = 3, // an argument is a pointer-to-char-as-string but does not
1861 // guarantee null-termination
1862 VA_LIST = 4, // one of the `-printf`s function that take va_list, which is
1863 // considered unsafe as it is not compile-time check
1864 } WarnedFunKind = OTHERS;
1865
1866public:
1867 UnsafeLibcFunctionCallGadget(const MatchResult &Result)
1868 : WarningGadget(Kind::UnsafeLibcFunctionCall),
1869 Call(Result.getNodeAs<CallExpr>(ID: Tag)) {
1870 if (Result.getNodeAs<Decl>(ID: UnsafeSprintfTag))
1871 WarnedFunKind = SPRINTF;
1872 else if (auto *E = Result.getNodeAs<Expr>(ID: UnsafeStringTag)) {
1873 WarnedFunKind = STRING;
1874 UnsafeArg = E;
1875 } else if (Result.getNodeAs<CallExpr>(ID: UnsafeSizedByTag)) {
1876 WarnedFunKind = SIZED_BY;
1877 UnsafeArg = Call->getArg(Arg: 0);
1878 } else if (Result.getNodeAs<Decl>(ID: UnsafeVaListTag))
1879 WarnedFunKind = VA_LIST;
1880 }
1881
1882 static bool matches(const Stmt *S, ASTContext &Ctx,
1883 const UnsafeBufferUsageHandler *Handler,
1884 MatchResult &Result) {
1885 if (ignoreUnsafeLibcCall(Ctx, Node: *S, Handler))
1886 return false;
1887 auto *CE = dyn_cast<CallExpr>(Val: S);
1888 if (!CE || !CE->getDirectCallee())
1889 return false;
1890 const auto *FD = dyn_cast<FunctionDecl>(Val: CE->getDirectCallee());
1891 if (!FD)
1892 return false;
1893 auto isSingleStringLiteralArg = false;
1894 if (CE->getNumArgs() == 1) {
1895 isSingleStringLiteralArg =
1896 isa<clang::StringLiteral>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts());
1897 }
1898 if (!isSingleStringLiteralArg) {
1899 // (unless the call has a sole string literal argument):
1900 if (libc_func_matchers::isPredefinedUnsafeLibcFunc(Node: *FD)) {
1901 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1902 return true;
1903 }
1904 if (libc_func_matchers::isUnsafeVaListPrintfFunc(Node: *FD)) {
1905 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1906 Result.addNode(ID: UnsafeVaListTag, Node: DynTypedNode::create(Node: *FD));
1907 return true;
1908 }
1909 if (libc_func_matchers::isUnsafeSprintfFunc(Node: *FD)) {
1910 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1911 Result.addNode(ID: UnsafeSprintfTag, Node: DynTypedNode::create(Node: *FD));
1912 return true;
1913 }
1914 }
1915 if (libc_func_matchers::isNormalPrintfFunc(Node: *FD)) {
1916 if (libc_func_matchers::hasUnsafeSnprintfBuffer(Node: *CE, Ctx)) {
1917 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1918 Result.addNode(ID: UnsafeSizedByTag, Node: DynTypedNode::create(Node: *CE));
1919 return true;
1920 }
1921 if (libc_func_matchers::hasUnsafePrintfStringArg(Node: *CE, Ctx, Result,
1922 Tag: UnsafeStringTag)) {
1923 Result.addNode(ID: Tag, Node: DynTypedNode::create(Node: *CE));
1924 return true;
1925 }
1926 }
1927 return false;
1928 }
1929
1930 const Stmt *getBaseStmt() const { return Call; }
1931
1932 SourceLocation getSourceLoc() const override { return Call->getBeginLoc(); }
1933
1934 void handleUnsafeOperation(UnsafeBufferUsageHandler &Handler,
1935 bool IsRelatedToDecl,
1936 ASTContext &Ctx) const override {
1937 Handler.handleUnsafeLibcCall(Call, PrintfInfo: WarnedFunKind, Ctx, UnsafeArg);
1938 }
1939
1940 DeclUseList getClaimedVarUseSites() const override { return {}; }
1941
1942 SmallVector<const Expr *, 1> getUnsafePtrs() const override { return {}; }
1943};
1944
1945// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
1946// Context (see `findStmtsInUnspecifiedLvalueContext`).
1947// Note here `[]` is the built-in subscript operator.
1948class ULCArraySubscriptGadget : public FixableGadget {
1949private:
1950 static constexpr const char *const ULCArraySubscriptTag =
1951 "ArraySubscriptUnderULC";
1952 const ArraySubscriptExpr *Node;
1953
1954public:
1955 ULCArraySubscriptGadget(const MatchResult &Result)
1956 : FixableGadget(Kind::ULCArraySubscript),
1957 Node(Result.getNodeAs<ArraySubscriptExpr>(ID: ULCArraySubscriptTag)) {
1958 assert(Node != nullptr && "Expecting a non-null matching result");
1959 }
1960
1961 static bool classof(const Gadget *G) {
1962 return G->getKind() == Kind::ULCArraySubscript;
1963 }
1964
1965 static bool matches(const Stmt *S,
1966 llvm::SmallVectorImpl<MatchResult> &Results) {
1967 size_t SizeBefore = Results.size();
1968 findStmtsInUnspecifiedLvalueContext(S, OnResult: [&Results](const Expr *E) {
1969 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: E);
1970 if (!ASE)
1971 return;
1972 const auto *DRE =
1973 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
1974 if (!DRE || !(hasPointerType(*DRE) || hasArrayType(*DRE)) ||
1975 !isSupportedVariable(Node: *DRE))
1976 return;
1977 MatchResult R;
1978 R.addNode(ID: ULCArraySubscriptTag, Node: DynTypedNode::create(Node: *ASE));
1979 Results.emplace_back(Args: std::move(R));
1980 });
1981 return SizeBefore != Results.size();
1982 }
1983
1984 virtual std::optional<FixItList>
1985 getFixits(const FixitStrategy &S) const override;
1986 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
1987
1988 virtual DeclUseList getClaimedVarUseSites() const override {
1989 if (const auto *DRE =
1990 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts())) {
1991 return {DRE};
1992 }
1993 return {};
1994 }
1995};
1996
1997// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
1998// unspecified pointer context (findStmtsInUnspecifiedPointerContext). The
1999// gadget emits fixit of the form `UPC(DRE.data())`.
2000class UPCStandalonePointerGadget : public FixableGadget {
2001private:
2002 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
2003 const DeclRefExpr *Node;
2004
2005public:
2006 UPCStandalonePointerGadget(const MatchResult &Result)
2007 : FixableGadget(Kind::UPCStandalonePointer),
2008 Node(Result.getNodeAs<DeclRefExpr>(ID: DeclRefExprTag)) {
2009 assert(Node != nullptr && "Expecting a non-null matching result");
2010 }
2011
2012 static bool classof(const Gadget *G) {
2013 return G->getKind() == Kind::UPCStandalonePointer;
2014 }
2015
2016 static bool matches(const Stmt *S,
2017 llvm::SmallVectorImpl<MatchResult> &Results) {
2018 size_t SizeBefore = Results.size();
2019 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2020 auto *E = dyn_cast<Expr>(Val: S);
2021 if (!E)
2022 return;
2023 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreParenImpCasts());
2024 if (!DRE || (!hasPointerType(*DRE) && !hasArrayType(*DRE)) ||
2025 !isSupportedVariable(Node: *DRE))
2026 return;
2027 MatchResult R;
2028 R.addNode(ID: DeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2029 Results.emplace_back(Args: std::move(R));
2030 });
2031 return SizeBefore != Results.size();
2032 }
2033
2034 virtual std::optional<FixItList>
2035 getFixits(const FixitStrategy &S) const override;
2036 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2037
2038 virtual DeclUseList getClaimedVarUseSites() const override { return {Node}; }
2039};
2040
2041class PointerDereferenceGadget : public FixableGadget {
2042 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2043 static constexpr const char *const OperatorTag = "op";
2044
2045 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2046 const UnaryOperator *Op = nullptr;
2047
2048public:
2049 PointerDereferenceGadget(const MatchResult &Result)
2050 : FixableGadget(Kind::PointerDereference),
2051 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2052 Op(Result.getNodeAs<UnaryOperator>(ID: OperatorTag)) {}
2053
2054 static bool classof(const Gadget *G) {
2055 return G->getKind() == Kind::PointerDereference;
2056 }
2057
2058 static bool matches(const Stmt *S,
2059 llvm::SmallVectorImpl<MatchResult> &Results) {
2060 size_t SizeBefore = Results.size();
2061 findStmtsInUnspecifiedLvalueContext(S, [&Results](const Stmt *S) {
2062 const auto *UO = dyn_cast<UnaryOperator>(Val: S);
2063 if (!UO || UO->getOpcode() != UO_Deref)
2064 return;
2065 const auto *CE = dyn_cast<Expr>(Val: UO->getSubExpr());
2066 if (!CE)
2067 return;
2068 CE = CE->IgnoreParenImpCasts();
2069 const auto *DRE = dyn_cast<DeclRefExpr>(Val: CE);
2070 if (!DRE || !isSupportedVariable(Node: *DRE))
2071 return;
2072 MatchResult R;
2073 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2074 R.addNode(ID: OperatorTag, Node: DynTypedNode::create(Node: *UO));
2075 Results.emplace_back(Args: std::move(R));
2076 });
2077 return SizeBefore != Results.size();
2078 }
2079
2080 DeclUseList getClaimedVarUseSites() const override {
2081 return {BaseDeclRefExpr};
2082 }
2083
2084 virtual std::optional<FixItList>
2085 getFixits(const FixitStrategy &S) const override;
2086 SourceLocation getSourceLoc() const override { return Op->getBeginLoc(); }
2087};
2088
2089// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
2090// Context (see `findStmtsInUnspecifiedPointerContext`).
2091// Note here `[]` is the built-in subscript operator.
2092class UPCAddressofArraySubscriptGadget : public FixableGadget {
2093private:
2094 static constexpr const char *const UPCAddressofArraySubscriptTag =
2095 "AddressofArraySubscriptUnderUPC";
2096 const UnaryOperator *Node; // the `&DRE[any]` node
2097
2098public:
2099 UPCAddressofArraySubscriptGadget(const MatchResult &Result)
2100 : FixableGadget(Kind::ULCArraySubscript),
2101 Node(Result.getNodeAs<UnaryOperator>(ID: UPCAddressofArraySubscriptTag)) {
2102 assert(Node != nullptr && "Expecting a non-null matching result");
2103 }
2104
2105 static bool classof(const Gadget *G) {
2106 return G->getKind() == Kind::UPCAddressofArraySubscript;
2107 }
2108
2109 static bool matches(const Stmt *S,
2110 llvm::SmallVectorImpl<MatchResult> &Results) {
2111 size_t SizeBefore = Results.size();
2112 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2113 auto *E = dyn_cast<Expr>(Val: S);
2114 if (!E)
2115 return;
2116 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2117 if (!UO || UO->getOpcode() != UO_AddrOf)
2118 return;
2119 const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: UO->getSubExpr());
2120 if (!ASE)
2121 return;
2122 const auto *DRE =
2123 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts());
2124 if (!DRE || !isSupportedVariable(Node: *DRE))
2125 return;
2126 MatchResult R;
2127 R.addNode(ID: UPCAddressofArraySubscriptTag, Node: DynTypedNode::create(Node: *UO));
2128 Results.emplace_back(Args: std::move(R));
2129 });
2130 return SizeBefore != Results.size();
2131 }
2132
2133 virtual std::optional<FixItList>
2134 getFixits(const FixitStrategy &) const override;
2135 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2136
2137 virtual DeclUseList getClaimedVarUseSites() const override {
2138 const auto *ArraySubst = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
2139 const auto *DRE =
2140 cast<DeclRefExpr>(Val: ArraySubst->getBase()->IgnoreParenImpCasts());
2141 return {DRE};
2142 }
2143};
2144} // namespace
2145
2146namespace {
2147// An auxiliary tracking facility for the fixit analysis. It helps connect
2148// declarations to its uses and make sure we've covered all uses with our
2149// analysis before we try to fix the declaration.
2150class DeclUseTracker {
2151 using UseSetTy = llvm::SmallSet<const DeclRefExpr *, 16>;
2152 using DefMapTy = llvm::DenseMap<const VarDecl *, const DeclStmt *>;
2153
2154 // Allocate on the heap for easier move.
2155 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
2156 DefMapTy Defs{};
2157
2158public:
2159 DeclUseTracker() = default;
2160 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
2161 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
2162 DeclUseTracker(DeclUseTracker &&) = default;
2163 DeclUseTracker &operator=(DeclUseTracker &&) = default;
2164
2165 // Start tracking a freshly discovered DRE.
2166 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(Ptr: DRE); }
2167
2168 // Stop tracking the DRE as it's been fully figured out.
2169 void claimUse(const DeclRefExpr *DRE) {
2170 assert(Uses->count(DRE) &&
2171 "DRE not found or claimed by multiple matchers!");
2172 Uses->erase(Ptr: DRE);
2173 }
2174
2175 // A variable is unclaimed if at least one use is unclaimed.
2176 bool hasUnclaimedUses(const VarDecl *VD) const {
2177 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
2178 return any_of(Range&: *Uses, P: [VD](const DeclRefExpr *DRE) {
2179 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
2180 });
2181 }
2182
2183 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
2184 UseSetTy ReturnSet;
2185 for (auto use : *Uses) {
2186 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
2187 ReturnSet.insert(Ptr: use);
2188 }
2189 }
2190 return ReturnSet;
2191 }
2192
2193 void discoverDecl(const DeclStmt *DS) {
2194 for (const Decl *D : DS->decls()) {
2195 if (const auto *VD = dyn_cast<VarDecl>(Val: D)) {
2196 // FIXME: Assertion temporarily disabled due to a bug in
2197 // ASTMatcher internal behavior in presence of GNU
2198 // statement-expressions. We need to properly investigate this
2199 // because it can screw up our algorithm in other ways.
2200 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
2201 Defs[VD] = DS;
2202 }
2203 }
2204 }
2205
2206 const DeclStmt *lookupDecl(const VarDecl *VD) const {
2207 return Defs.lookup(Val: VD);
2208 }
2209};
2210} // namespace
2211
2212// Representing a pointer type expression of the form `++Ptr` in an Unspecified
2213// Pointer Context (UPC):
2214class UPCPreIncrementGadget : public FixableGadget {
2215private:
2216 static constexpr const char *const UPCPreIncrementTag =
2217 "PointerPreIncrementUnderUPC";
2218 const UnaryOperator *Node; // the `++Ptr` node
2219
2220public:
2221 UPCPreIncrementGadget(const MatchResult &Result)
2222 : FixableGadget(Kind::UPCPreIncrement),
2223 Node(Result.getNodeAs<UnaryOperator>(ID: UPCPreIncrementTag)) {
2224 assert(Node != nullptr && "Expecting a non-null matching result");
2225 }
2226
2227 static bool classof(const Gadget *G) {
2228 return G->getKind() == Kind::UPCPreIncrement;
2229 }
2230
2231 static bool matches(const Stmt *S,
2232 llvm::SmallVectorImpl<MatchResult> &Results) {
2233 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
2234 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
2235 // can have the matcher be general, so long as `getClaimedVarUseSites` does
2236 // things right.
2237 size_t SizeBefore = Results.size();
2238 findStmtsInUnspecifiedPointerContext(S, InnerMatcher: [&Results](const Stmt *S) {
2239 auto *E = dyn_cast<Expr>(Val: S);
2240 if (!E)
2241 return;
2242 const auto *UO = dyn_cast<UnaryOperator>(Val: E->IgnoreImpCasts());
2243 if (!UO || UO->getOpcode() != UO_PreInc)
2244 return;
2245 const auto *DRE = dyn_cast<DeclRefExpr>(Val: UO->getSubExpr());
2246 if (!DRE || !isSupportedVariable(Node: *DRE))
2247 return;
2248 MatchResult R;
2249 R.addNode(ID: UPCPreIncrementTag, Node: DynTypedNode::create(Node: *UO));
2250 Results.emplace_back(Args: std::move(R));
2251 });
2252 return SizeBefore != Results.size();
2253 }
2254
2255 virtual std::optional<FixItList>
2256 getFixits(const FixitStrategy &S) const override;
2257 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2258
2259 virtual DeclUseList getClaimedVarUseSites() const override {
2260 return {dyn_cast<DeclRefExpr>(Val: Node->getSubExpr())};
2261 }
2262};
2263
2264// Representing a pointer type expression of the form `Ptr += n` in an
2265// Unspecified Untyped Context (UUC):
2266class UUCAddAssignGadget : public FixableGadget {
2267private:
2268 static constexpr const char *const UUCAddAssignTag =
2269 "PointerAddAssignUnderUUC";
2270 static constexpr const char *const OffsetTag = "Offset";
2271
2272 const BinaryOperator *Node; // the `Ptr += n` node
2273 const Expr *Offset = nullptr;
2274
2275public:
2276 UUCAddAssignGadget(const MatchResult &Result)
2277 : FixableGadget(Kind::UUCAddAssign),
2278 Node(Result.getNodeAs<BinaryOperator>(ID: UUCAddAssignTag)),
2279 Offset(Result.getNodeAs<Expr>(ID: OffsetTag)) {
2280 assert(Node != nullptr && "Expecting a non-null matching result");
2281 }
2282
2283 static bool classof(const Gadget *G) {
2284 return G->getKind() == Kind::UUCAddAssign;
2285 }
2286
2287 static bool matches(const Stmt *S,
2288 llvm::SmallVectorImpl<MatchResult> &Results) {
2289 size_t SizeBefore = Results.size();
2290 findStmtsInUnspecifiedUntypedContext(S, InnerMatcher: [&Results](const Stmt *S) {
2291 const auto *E = dyn_cast<Expr>(Val: S);
2292 if (!E)
2293 return;
2294 const auto *BO = dyn_cast<BinaryOperator>(Val: E->IgnoreImpCasts());
2295 if (!BO || BO->getOpcode() != BO_AddAssign)
2296 return;
2297 const auto *DRE = dyn_cast<DeclRefExpr>(Val: BO->getLHS());
2298 if (!DRE || !hasPointerType(*DRE) || !isSupportedVariable(Node: *DRE))
2299 return;
2300 MatchResult R;
2301 R.addNode(ID: UUCAddAssignTag, Node: DynTypedNode::create(Node: *BO));
2302 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *BO->getRHS()));
2303 Results.emplace_back(Args: std::move(R));
2304 });
2305 return SizeBefore != Results.size();
2306 }
2307
2308 virtual std::optional<FixItList>
2309 getFixits(const FixitStrategy &S) const override;
2310 SourceLocation getSourceLoc() const override { return Node->getBeginLoc(); }
2311
2312 virtual DeclUseList getClaimedVarUseSites() const override {
2313 return {dyn_cast<DeclRefExpr>(Val: Node->getLHS())};
2314 }
2315};
2316
2317// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
2318// ptr)`:
2319class DerefSimplePtrArithFixableGadget : public FixableGadget {
2320 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
2321 static constexpr const char *const DerefOpTag = "DerefOp";
2322 static constexpr const char *const AddOpTag = "AddOp";
2323 static constexpr const char *const OffsetTag = "Offset";
2324
2325 const DeclRefExpr *BaseDeclRefExpr = nullptr;
2326 const UnaryOperator *DerefOp = nullptr;
2327 const BinaryOperator *AddOp = nullptr;
2328 const IntegerLiteral *Offset = nullptr;
2329
2330public:
2331 DerefSimplePtrArithFixableGadget(const MatchResult &Result)
2332 : FixableGadget(Kind::DerefSimplePtrArithFixable),
2333 BaseDeclRefExpr(Result.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
2334 DerefOp(Result.getNodeAs<UnaryOperator>(ID: DerefOpTag)),
2335 AddOp(Result.getNodeAs<BinaryOperator>(ID: AddOpTag)),
2336 Offset(Result.getNodeAs<IntegerLiteral>(ID: OffsetTag)) {}
2337
2338 static bool matches(const Stmt *S,
2339 llvm::SmallVectorImpl<MatchResult> &Results) {
2340 auto IsPtr = [](const Expr *E, MatchResult &R) {
2341 if (!E || !hasPointerType(E: *E))
2342 return false;
2343 const auto *DRE = dyn_cast<DeclRefExpr>(Val: E->IgnoreImpCasts());
2344 if (!DRE || !isSupportedVariable(Node: *DRE))
2345 return false;
2346 R.addNode(ID: BaseDeclRefExprTag, Node: DynTypedNode::create(Node: *DRE));
2347 return true;
2348 };
2349 const auto IsPlusOverPtrAndInteger = [&IsPtr](const Expr *E,
2350 MatchResult &R) {
2351 const auto *BO = dyn_cast<BinaryOperator>(Val: E);
2352 if (!BO || BO->getOpcode() != BO_Add)
2353 return false;
2354
2355 const auto *LHS = BO->getLHS();
2356 const auto *RHS = BO->getRHS();
2357 if (isa<IntegerLiteral>(Val: RHS) && IsPtr(LHS, R)) {
2358 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *RHS));
2359 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2360 return true;
2361 }
2362 if (isa<IntegerLiteral>(Val: LHS) && IsPtr(RHS, R)) {
2363 R.addNode(ID: OffsetTag, Node: DynTypedNode::create(Node: *LHS));
2364 R.addNode(ID: AddOpTag, Node: DynTypedNode::create(Node: *BO));
2365 return true;
2366 }
2367 return false;
2368 };
2369 size_t SizeBefore = Results.size();
2370 const auto InnerMatcher = [&IsPlusOverPtrAndInteger,
2371 &Results](const Expr *E) {
2372 const auto *UO = dyn_cast<UnaryOperator>(Val: E);
2373 if (!UO || UO->getOpcode() != UO_Deref)
2374 return;
2375
2376 const auto *Operand = UO->getSubExpr()->IgnoreParens();
2377 MatchResult R;
2378 if (IsPlusOverPtrAndInteger(Operand, R)) {
2379 R.addNode(ID: DerefOpTag, Node: DynTypedNode::create(Node: *UO));
2380 Results.emplace_back(Args: std::move(R));
2381 }
2382 };
2383 findStmtsInUnspecifiedLvalueContext(S, OnResult: InnerMatcher);
2384 return SizeBefore != Results.size();
2385 }
2386
2387 virtual std::optional<FixItList>
2388 getFixits(const FixitStrategy &s) const final;
2389 SourceLocation getSourceLoc() const override {
2390 return DerefOp->getBeginLoc();
2391 }
2392
2393 virtual DeclUseList getClaimedVarUseSites() const final {
2394 return {BaseDeclRefExpr};
2395 }
2396};
2397
2398class WarningGadgetMatcher : public FastMatcher {
2399
2400public:
2401 WarningGadgetMatcher(WarningGadgetList &WarningGadgets)
2402 : WarningGadgets(WarningGadgets) {}
2403
2404 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2405 const UnsafeBufferUsageHandler &Handler) override {
2406 const Stmt *S = DynNode.get<Stmt>();
2407 if (!S)
2408 return false;
2409
2410 MatchResult Result;
2411#define WARNING_GADGET(name) \
2412 if (name##Gadget::matches(S, Ctx, Result) && \
2413 notInSafeBufferOptOut(*S, &Handler)) { \
2414 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2415 return true; \
2416 }
2417#define WARNING_OPTIONAL_GADGET(name) \
2418 if (name##Gadget::matches(S, Ctx, &Handler, Result) && \
2419 notInSafeBufferOptOut(*S, &Handler)) { \
2420 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
2421 return true; \
2422 }
2423#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2424 return false;
2425 }
2426
2427private:
2428 WarningGadgetList &WarningGadgets;
2429};
2430
2431class FixableGadgetMatcher : public FastMatcher {
2432
2433public:
2434 FixableGadgetMatcher(FixableGadgetList &FixableGadgets,
2435 DeclUseTracker &Tracker)
2436 : FixableGadgets(FixableGadgets), Tracker(Tracker) {}
2437
2438 bool matches(const DynTypedNode &DynNode, ASTContext &Ctx,
2439 const UnsafeBufferUsageHandler &Handler) override {
2440 bool matchFound = false;
2441 const Stmt *S = DynNode.get<Stmt>();
2442 if (!S) {
2443 return matchFound;
2444 }
2445
2446 llvm::SmallVector<MatchResult> Results;
2447#define FIXABLE_GADGET(name) \
2448 if (name##Gadget::matches(S, Results)) { \
2449 for (const auto &R : Results) { \
2450 FixableGadgets.push_back(std::make_unique<name##Gadget>(R)); \
2451 matchFound = true; \
2452 } \
2453 Results = {}; \
2454 }
2455#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
2456 // In parallel, match all DeclRefExprs so that to find out
2457 // whether there are any uncovered by gadgets.
2458 if (auto *DRE = findDeclRefExpr(S); DRE) {
2459 Tracker.discoverUse(DRE);
2460 matchFound = true;
2461 }
2462 // Also match DeclStmts because we'll need them when fixing
2463 // their underlying VarDecls that otherwise don't have
2464 // any backreferences to DeclStmts.
2465 if (auto *DS = findDeclStmt(S); DS) {
2466 Tracker.discoverDecl(DS);
2467 matchFound = true;
2468 }
2469 return matchFound;
2470 }
2471
2472private:
2473 const DeclRefExpr *findDeclRefExpr(const Stmt *S) {
2474 const auto *DRE = dyn_cast<DeclRefExpr>(Val: S);
2475 if (!DRE || (!hasPointerType(*DRE) && !hasArrayType(*DRE)))
2476 return nullptr;
2477 const Decl *D = DRE->getDecl();
2478 if (!D || (!isa<VarDecl>(Val: D) && !isa<BindingDecl>(Val: D)))
2479 return nullptr;
2480 return DRE;
2481 }
2482 const DeclStmt *findDeclStmt(const Stmt *S) {
2483 const auto *DS = dyn_cast<DeclStmt>(Val: S);
2484 if (!DS)
2485 return nullptr;
2486 return DS;
2487 }
2488 FixableGadgetList &FixableGadgets;
2489 DeclUseTracker &Tracker;
2490};
2491
2492// Scan the function and return a list of gadgets found with provided kits.
2493static void findGadgets(const Stmt *S, ASTContext &Ctx,
2494 const UnsafeBufferUsageHandler &Handler,
2495 bool EmitSuggestions, FixableGadgetList &FixableGadgets,
2496 WarningGadgetList &WarningGadgets,
2497 DeclUseTracker &Tracker) {
2498 WarningGadgetMatcher WMatcher{WarningGadgets};
2499 forEachDescendantEvaluatedStmt(S, Ctx, Handler, Matcher&: WMatcher);
2500 if (EmitSuggestions) {
2501 FixableGadgetMatcher FMatcher{FixableGadgets, Tracker};
2502 forEachDescendantStmt(S, Ctx, Handler, Matcher&: FMatcher);
2503 }
2504}
2505
2506// Compares AST nodes by source locations.
2507template <typename NodeTy> struct CompareNode {
2508 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
2509 return N1->getBeginLoc().getRawEncoding() <
2510 N2->getBeginLoc().getRawEncoding();
2511 }
2512};
2513
2514std::set<const Expr *> clang::findUnsafePointers(const FunctionDecl *FD) {
2515 class MockReporter : public UnsafeBufferUsageHandler {
2516 public:
2517 MockReporter() {}
2518 void handleUnsafeOperation(const Stmt *, bool, ASTContext &) override {}
2519 void handleUnsafeLibcCall(const CallExpr *, unsigned, ASTContext &,
2520 const Expr *UnsafeArg = nullptr) override {}
2521 void handleUnsafeOperationInContainer(const Stmt *, bool,
2522 ASTContext &) override {}
2523 void handleUnsafeVariableGroup(const VarDecl *,
2524 const VariableGroupsManager &, FixItList &&,
2525 const Decl *,
2526 const FixitStrategy &) override {}
2527 bool isSafeBufferOptOut(const SourceLocation &) const override {
2528 return false;
2529 }
2530 bool ignoreUnsafeBufferInContainer(const SourceLocation &) const override {
2531 return false;
2532 }
2533 bool ignoreUnsafeBufferInLibcCall(const SourceLocation &) const override {
2534 return false;
2535 }
2536 std::string getUnsafeBufferUsageAttributeTextAt(
2537 SourceLocation, StringRef WSSuffix = "") const override {
2538 return "";
2539 }
2540 };
2541
2542 FixableGadgetList FixableGadgets;
2543 WarningGadgetList WarningGadgets;
2544 DeclUseTracker Tracker;
2545 MockReporter IgnoreHandler;
2546
2547 findGadgets(FD->getBody(), FD->getASTContext(), IgnoreHandler, false,
2548 FixableGadgets, WarningGadgets, Tracker);
2549
2550 std::set<const Expr *> Result;
2551 for (auto &G : WarningGadgets) {
2552 for (const Expr *E : G->getUnsafePtrs()) {
2553 Result.insert(x: E);
2554 }
2555 }
2556
2557 return Result;
2558}
2559
2560struct WarningGadgetSets {
2561 std::map<const VarDecl *, std::set<const WarningGadget *>,
2562 // To keep keys sorted by their locations in the map so that the
2563 // order is deterministic:
2564 CompareNode<VarDecl>>
2565 byVar;
2566 // These Gadgets are not related to pointer variables (e. g. temporaries).
2567 llvm::SmallVector<const WarningGadget *, 16> noVar;
2568};
2569
2570static WarningGadgetSets
2571groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
2572 WarningGadgetSets result;
2573 // If some gadgets cover more than one
2574 // variable, they'll appear more than once in the map.
2575 for (auto &G : AllUnsafeOperations) {
2576 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
2577
2578 bool AssociatedWithVarDecl = false;
2579 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
2580 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2581 result.byVar[VD].insert(x: G.get());
2582 AssociatedWithVarDecl = true;
2583 }
2584 }
2585
2586 if (!AssociatedWithVarDecl) {
2587 result.noVar.push_back(Elt: G.get());
2588 continue;
2589 }
2590 }
2591 return result;
2592}
2593
2594struct FixableGadgetSets {
2595 std::map<const VarDecl *, std::set<const FixableGadget *>,
2596 // To keep keys sorted by their locations in the map so that the
2597 // order is deterministic:
2598 CompareNode<VarDecl>>
2599 byVar;
2600};
2601
2602static FixableGadgetSets
2603groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
2604 FixableGadgetSets FixablesForUnsafeVars;
2605 for (auto &F : AllFixableOperations) {
2606 DeclUseList DREs = F->getClaimedVarUseSites();
2607
2608 for (const DeclRefExpr *DRE : DREs) {
2609 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2610 FixablesForUnsafeVars.byVar[VD].insert(x: F.get());
2611 }
2612 }
2613 }
2614 return FixablesForUnsafeVars;
2615}
2616
2617bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
2618 const SourceManager &SM) {
2619 // A simple interval overlap detection algorithm. Sorts all ranges by their
2620 // begin location then finds the first overlap in one pass.
2621 std::vector<const FixItHint *> All; // a copy of `FixIts`
2622
2623 for (const FixItHint &H : FixIts)
2624 All.push_back(x: &H);
2625 std::sort(first: All.begin(), last: All.end(),
2626 comp: [&SM](const FixItHint *H1, const FixItHint *H2) {
2627 return SM.isBeforeInTranslationUnit(LHS: H1->RemoveRange.getBegin(),
2628 RHS: H2->RemoveRange.getBegin());
2629 });
2630
2631 const FixItHint *CurrHint = nullptr;
2632
2633 for (const FixItHint *Hint : All) {
2634 if (!CurrHint ||
2635 SM.isBeforeInTranslationUnit(LHS: CurrHint->RemoveRange.getEnd(),
2636 RHS: Hint->RemoveRange.getBegin())) {
2637 // Either to initialize `CurrHint` or `CurrHint` does not
2638 // overlap with `Hint`:
2639 CurrHint = Hint;
2640 } else
2641 // In case `Hint` overlaps the `CurrHint`, we found at least one
2642 // conflict:
2643 return true;
2644 }
2645 return false;
2646}
2647
2648std::optional<FixItList>
2649PtrToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2650 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2651 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2652 switch (S.lookup(VD: LeftVD)) {
2653 case FixitStrategy::Kind::Span:
2654 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2655 return FixItList{};
2656 return std::nullopt;
2657 case FixitStrategy::Kind::Wontfix:
2658 return std::nullopt;
2659 case FixitStrategy::Kind::Iterator:
2660 case FixitStrategy::Kind::Array:
2661 return std::nullopt;
2662 case FixitStrategy::Kind::Vector:
2663 llvm_unreachable("unsupported strategies for FixableGadgets");
2664 }
2665 return std::nullopt;
2666}
2667
2668/// \returns fixit that adds .data() call after \DRE.
2669static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
2670 const DeclRefExpr *DRE);
2671
2672std::optional<FixItList>
2673CArrayToPtrAssignmentGadget::getFixits(const FixitStrategy &S) const {
2674 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
2675 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
2676 // TLDR: Implementing fixits for non-Wontfix strategy on both LHS and RHS is
2677 // non-trivial.
2678 //
2679 // CArrayToPtrAssignmentGadget doesn't have strategy implications because
2680 // constant size array propagates its bounds. Because of that LHS and RHS are
2681 // addressed by two different fixits.
2682 //
2683 // At the same time FixitStrategy S doesn't reflect what group a fixit belongs
2684 // to and can't be generally relied on in multi-variable Fixables!
2685 //
2686 // E. g. If an instance of this gadget is fixing variable on LHS then the
2687 // variable on RHS is fixed by a different fixit and its strategy for LHS
2688 // fixit is as if Wontfix.
2689 //
2690 // The only exception is Wontfix strategy for a given variable as that is
2691 // valid for any fixit produced for the given input source code.
2692 if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Span) {
2693 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Wontfix) {
2694 return FixItList{};
2695 }
2696 } else if (S.lookup(VD: LeftVD) == FixitStrategy::Kind::Wontfix) {
2697 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Array) {
2698 return createDataFixit(RightVD->getASTContext(), PtrRHS);
2699 }
2700 }
2701 return std::nullopt;
2702}
2703
2704std::optional<FixItList>
2705PointerInitGadget::getFixits(const FixitStrategy &S) const {
2706 const auto *LeftVD = PtrInitLHS;
2707 const auto *RightVD = cast<VarDecl>(Val: PtrInitRHS->getDecl());
2708 switch (S.lookup(VD: LeftVD)) {
2709 case FixitStrategy::Kind::Span:
2710 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
2711 return FixItList{};
2712 return std::nullopt;
2713 case FixitStrategy::Kind::Wontfix:
2714 return std::nullopt;
2715 case FixitStrategy::Kind::Iterator:
2716 case FixitStrategy::Kind::Array:
2717 return std::nullopt;
2718 case FixitStrategy::Kind::Vector:
2719 llvm_unreachable("unsupported strategies for FixableGadgets");
2720 }
2721 return std::nullopt;
2722}
2723
2724static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
2725 const ASTContext &Ctx) {
2726 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
2727 if (ConstVal->isNegative())
2728 return false;
2729 } else if (!Expr->getType()->isUnsignedIntegerType())
2730 return false;
2731 return true;
2732}
2733
2734std::optional<FixItList>
2735ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2736 if (const auto *DRE =
2737 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts()))
2738 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
2739 switch (S.lookup(VD)) {
2740 case FixitStrategy::Kind::Span: {
2741
2742 // If the index has a negative constant value, we give up as no valid
2743 // fix-it can be generated:
2744 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
2745 VD->getASTContext();
2746 if (!isNonNegativeIntegerExpr(Expr: Node->getIdx(), VD, Ctx))
2747 return std::nullopt;
2748 // no-op is a good fix-it, otherwise
2749 return FixItList{};
2750 }
2751 case FixitStrategy::Kind::Array:
2752 return FixItList{};
2753 case FixitStrategy::Kind::Wontfix:
2754 case FixitStrategy::Kind::Iterator:
2755 case FixitStrategy::Kind::Vector:
2756 llvm_unreachable("unsupported strategies for FixableGadgets");
2757 }
2758 }
2759 return std::nullopt;
2760}
2761
2762static std::optional<FixItList> // forward declaration
2763fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
2764
2765std::optional<FixItList>
2766UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
2767 auto DREs = getClaimedVarUseSites();
2768 const auto *VD = cast<VarDecl>(Val: DREs.front()->getDecl());
2769
2770 switch (S.lookup(VD)) {
2771 case FixitStrategy::Kind::Span:
2772 return fixUPCAddressofArraySubscriptWithSpan(Node);
2773 case FixitStrategy::Kind::Wontfix:
2774 case FixitStrategy::Kind::Iterator:
2775 case FixitStrategy::Kind::Array:
2776 return std::nullopt;
2777 case FixitStrategy::Kind::Vector:
2778 llvm_unreachable("unsupported strategies for FixableGadgets");
2779 }
2780 return std::nullopt; // something went wrong, no fix-it
2781}
2782
2783// FIXME: this function should be customizable through format
2784static StringRef getEndOfLine() {
2785 static const char *const EOL = "\n";
2786 return EOL;
2787}
2788
2789// Returns the text indicating that the user needs to provide input there:
2790static std::string
2791getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
2792 std::string s = std::string("<# ");
2793 s += HintTextToUser;
2794 s += " #>";
2795 return s;
2796}
2797
2798// Return the source location of the last character of the AST `Node`.
2799template <typename NodeTy>
2800static std::optional<SourceLocation>
2801getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
2802 const LangOptions &LangOpts) {
2803 if (unsigned TkLen =
2804 Lexer::MeasureTokenLength(Loc: Node->getEndLoc(), SM, LangOpts)) {
2805 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
2806
2807 if (Loc.isValid())
2808 return Loc;
2809 }
2810 return std::nullopt;
2811}
2812
2813// We cannot fix a variable declaration if it has some other specifiers than the
2814// type specifier. Because the source ranges of those specifiers could overlap
2815// with the source range that is being replaced using fix-its. Especially when
2816// we often cannot obtain accurate source ranges of cv-qualified type
2817// specifiers.
2818// FIXME: also deal with type attributes
2819static bool hasUnsupportedSpecifiers(const VarDecl *VD,
2820 const SourceManager &SM) {
2821 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
2822 // source range of `VD`:
2823 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool {
2824 return !(SM.isBeforeInTranslationUnit(LHS: At->getRange().getEnd(),
2825 RHS: VD->getBeginLoc())) &&
2826 !(SM.isBeforeInTranslationUnit(LHS: VD->getEndLoc(),
2827 RHS: At->getRange().getBegin()));
2828 });
2829 return VD->isInlineSpecified() || VD->isConstexpr() ||
2830 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
2831 AttrRangeOverlapping;
2832}
2833
2834// Returns the `SourceRange` of `D`. The reason why this function exists is
2835// that `D->getSourceRange()` may return a range where the end location is the
2836// starting location of the last token. The end location of the source range
2837// returned by this function is the last location of the last token.
2838static SourceRange getSourceRangeToTokenEnd(const Decl *D,
2839 const SourceManager &SM,
2840 const LangOptions &LangOpts) {
2841 SourceLocation Begin = D->getBeginLoc();
2842 SourceLocation
2843 End = // `D->getEndLoc` should always return the starting location of the
2844 // last token, so we should get the end of the token
2845 Lexer::getLocForEndOfToken(Loc: D->getEndLoc(), Offset: 0, SM, LangOpts);
2846
2847 return SourceRange(Begin, End);
2848}
2849
2850// Returns the text of the name (with qualifiers) of a `FunctionDecl`.
2851static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
2852 const SourceManager &SM,
2853 const LangOptions &LangOpts) {
2854 SourceLocation BeginLoc = FD->getQualifier()
2855 ? FD->getQualifierLoc().getBeginLoc()
2856 : FD->getNameInfo().getBeginLoc();
2857 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
2858 // last token:
2859 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
2860 Loc: FD->getNameInfo().getEndLoc(), Offset: 0, SM, LangOpts);
2861 SourceRange NameRange{BeginLoc, EndLoc};
2862
2863 return getRangeText(SR: NameRange, SM, LangOpts);
2864}
2865
2866// Returns the text representing a `std::span` type where the element type is
2867// represented by `EltTyText`.
2868//
2869// Note the optional parameter `Qualifiers`: one needs to pass qualifiers
2870// explicitly if the element type needs to be qualified.
2871static std::string
2872getSpanTypeText(StringRef EltTyText,
2873 std::optional<Qualifiers> Quals = std::nullopt) {
2874 const char *const SpanOpen = "std::span<";
2875
2876 if (Quals)
2877 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
2878 return SpanOpen + EltTyText.str() + '>';
2879}
2880
2881std::optional<FixItList>
2882DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const {
2883 const VarDecl *VD = dyn_cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2884
2885 if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) {
2886 ASTContext &Ctx = VD->getASTContext();
2887 // std::span can't represent elements before its begin()
2888 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
2889 if (ConstVal->isNegative())
2890 return std::nullopt;
2891
2892 // note that the expr may (oddly) has multiple layers of parens
2893 // example:
2894 // *((..(pointer + 123)..))
2895 // goal:
2896 // pointer[123]
2897 // Fix-It:
2898 // remove '*('
2899 // replace ' + ' with '['
2900 // replace ')' with ']'
2901
2902 // example:
2903 // *((..(123 + pointer)..))
2904 // goal:
2905 // 123[pointer]
2906 // Fix-It:
2907 // remove '*('
2908 // replace ' + ' with '['
2909 // replace ')' with ']'
2910
2911 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
2912 const SourceManager &SM = Ctx.getSourceManager();
2913 const LangOptions &LangOpts = Ctx.getLangOpts();
2914 CharSourceRange StarWithTrailWhitespace =
2915 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
2916 LHS->getBeginLoc());
2917
2918 std::optional<SourceLocation> LHSLocation = getPastLoc(Node: LHS, SM, LangOpts);
2919 if (!LHSLocation)
2920 return std::nullopt;
2921
2922 CharSourceRange PlusWithSurroundingWhitespace =
2923 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
2924
2925 std::optional<SourceLocation> AddOpLocation =
2926 getPastLoc(Node: AddOp, SM, LangOpts);
2927 std::optional<SourceLocation> DerefOpLocation =
2928 getPastLoc(Node: DerefOp, SM, LangOpts);
2929
2930 if (!AddOpLocation || !DerefOpLocation)
2931 return std::nullopt;
2932
2933 CharSourceRange ClosingParenWithPrecWhitespace =
2934 clang::CharSourceRange::getCharRange(B: *AddOpLocation, E: *DerefOpLocation);
2935
2936 return FixItList{
2937 {FixItHint::CreateRemoval(RemoveRange: StarWithTrailWhitespace),
2938 FixItHint::CreateReplacement(RemoveRange: PlusWithSurroundingWhitespace, Code: "["),
2939 FixItHint::CreateReplacement(RemoveRange: ClosingParenWithPrecWhitespace, Code: "]")}};
2940 }
2941 return std::nullopt; // something wrong or unsupported, give up
2942}
2943
2944std::optional<FixItList>
2945PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
2946 const VarDecl *VD = cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
2947 switch (S.lookup(VD)) {
2948 case FixitStrategy::Kind::Span: {
2949 ASTContext &Ctx = VD->getASTContext();
2950 SourceManager &SM = Ctx.getSourceManager();
2951 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
2952 // Deletes the *operand
2953 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
2954 B: Op->getBeginLoc(), E: Op->getBeginLoc().getLocWithOffset(Offset: 1));
2955 // Inserts the [0]
2956 if (auto LocPastOperand =
2957 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
2958 return FixItList{{FixItHint::CreateRemoval(RemoveRange: derefRange),
2959 FixItHint::CreateInsertion(InsertionLoc: *LocPastOperand, Code: "[0]")}};
2960 }
2961 break;
2962 }
2963 case FixitStrategy::Kind::Iterator:
2964 case FixitStrategy::Kind::Array:
2965 return std::nullopt;
2966 case FixitStrategy::Kind::Vector:
2967 llvm_unreachable("FixitStrategy not implemented yet!");
2968 case FixitStrategy::Kind::Wontfix:
2969 llvm_unreachable("Invalid strategy!");
2970 }
2971
2972 return std::nullopt;
2973}
2974
2975static inline std::optional<FixItList> createDataFixit(const ASTContext &Ctx,
2976 const DeclRefExpr *DRE) {
2977 const SourceManager &SM = Ctx.getSourceManager();
2978 // Inserts the .data() after the DRE
2979 std::optional<SourceLocation> EndOfOperand =
2980 getPastLoc(Node: DRE, SM, LangOpts: Ctx.getLangOpts());
2981
2982 if (EndOfOperand)
2983 return FixItList{{FixItHint::CreateInsertion(InsertionLoc: *EndOfOperand, Code: ".data()")}};
2984
2985 return std::nullopt;
2986}
2987
2988// Generates fix-its replacing an expression of the form UPC(DRE) with
2989// `DRE.data()`
2990std::optional<FixItList>
2991UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {
2992 const auto VD = cast<VarDecl>(Val: Node->getDecl());
2993 switch (S.lookup(VD)) {
2994 case FixitStrategy::Kind::Array:
2995 case FixitStrategy::Kind::Span: {
2996 return createDataFixit(VD->getASTContext(), Node);
2997 // FIXME: Points inside a macro expansion.
2998 break;
2999 }
3000 case FixitStrategy::Kind::Wontfix:
3001 case FixitStrategy::Kind::Iterator:
3002 return std::nullopt;
3003 case FixitStrategy::Kind::Vector:
3004 llvm_unreachable("unsupported strategies for FixableGadgets");
3005 }
3006
3007 return std::nullopt;
3008}
3009
3010// Generates fix-its replacing an expression of the form `&DRE[e]` with
3011// `&DRE.data()[e]`:
3012static std::optional<FixItList>
3013fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
3014 const auto *ArraySub = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
3015 const auto *DRE = cast<DeclRefExpr>(Val: ArraySub->getBase()->IgnoreImpCasts());
3016 // FIXME: this `getASTContext` call is costly, we should pass the
3017 // ASTContext in:
3018 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
3019 const Expr *Idx = ArraySub->getIdx();
3020 const SourceManager &SM = Ctx.getSourceManager();
3021 const LangOptions &LangOpts = Ctx.getLangOpts();
3022 std::stringstream SS;
3023 bool IdxIsLitZero = false;
3024
3025 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
3026 if ((*ICE).isZero())
3027 IdxIsLitZero = true;
3028 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
3029 if (!DreString)
3030 return std::nullopt;
3031
3032 if (IdxIsLitZero) {
3033 // If the index is literal zero, we produce the most concise fix-it:
3034 SS << (*DreString).str() << ".data()";
3035 } else {
3036 std::optional<StringRef> IndexString = getExprText(E: Idx, SM, LangOpts);
3037 if (!IndexString)
3038 return std::nullopt;
3039
3040 SS << "&" << (*DreString).str() << ".data()"
3041 << "[" << (*IndexString).str() << "]";
3042 }
3043 return FixItList{
3044 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
3045}
3046
3047std::optional<FixItList>
3048UUCAddAssignGadget::getFixits(const FixitStrategy &S) const {
3049 DeclUseList DREs = getClaimedVarUseSites();
3050
3051 if (DREs.size() != 1)
3052 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
3053 // give up
3054 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3055 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3056 FixItList Fixes;
3057
3058 const Stmt *AddAssignNode = Node;
3059 StringRef varName = VD->getName();
3060 const ASTContext &Ctx = VD->getASTContext();
3061
3062 if (!isNonNegativeIntegerExpr(Expr: Offset, VD, Ctx))
3063 return std::nullopt;
3064
3065 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
3066 bool NotParenExpr =
3067 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
3068 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
3069 if (NotParenExpr)
3070 SS += "(";
3071
3072 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
3073 Node: AddAssignNode, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3074 if (!AddAssignLocation)
3075 return std::nullopt;
3076
3077 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3078 RemoveRange: SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
3079 Code: SS));
3080 if (NotParenExpr)
3081 Fixes.push_back(FixItHint::CreateInsertion(
3082 InsertionLoc: Offset->getEndLoc().getLocWithOffset(1), Code: ")"));
3083 return Fixes;
3084 }
3085 }
3086 return std::nullopt; // Not in the cases that we can handle for now, give up.
3087}
3088
3089std::optional<FixItList>
3090UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const {
3091 DeclUseList DREs = getClaimedVarUseSites();
3092
3093 if (DREs.size() != 1)
3094 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
3095 // give up
3096 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
3097 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
3098 FixItList Fixes;
3099 std::stringstream SS;
3100 StringRef varName = VD->getName();
3101 const ASTContext &Ctx = VD->getASTContext();
3102
3103 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
3104 SS << "(" << varName.data() << " = " << varName.data()
3105 << ".subspan(1)).data()";
3106 std::optional<SourceLocation> PreIncLocation =
3107 getEndCharLoc(Node, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3108 if (!PreIncLocation)
3109 return std::nullopt;
3110
3111 Fixes.push_back(Elt: FixItHint::CreateReplacement(
3112 RemoveRange: SourceRange(Node->getBeginLoc(), *PreIncLocation), Code: SS.str()));
3113 return Fixes;
3114 }
3115 }
3116 return std::nullopt; // Not in the cases that we can handle for now, give up.
3117}
3118
3119// For a non-null initializer `Init` of `T *` type, this function returns
3120// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
3121// to output stream.
3122// In many cases, this function cannot figure out the actual extent `S`. It
3123// then will use a place holder to replace `S` to ask users to fill `S` in. The
3124// initializer shall be used to initialize a variable of type `std::span<T>`.
3125// In some cases (e. g. constant size array) the initializer should remain
3126// unchanged and the function returns empty list. In case the function can't
3127// provide the right fixit it will return nullopt.
3128//
3129// FIXME: Support multi-level pointers
3130//
3131// Parameters:
3132// `Init` a pointer to the initializer expression
3133// `Ctx` a reference to the ASTContext
3134static std::optional<FixItList>
3135FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
3136 const StringRef UserFillPlaceHolder) {
3137 const SourceManager &SM = Ctx.getSourceManager();
3138 const LangOptions &LangOpts = Ctx.getLangOpts();
3139
3140 // If `Init` has a constant value that is (or equivalent to) a
3141 // NULL pointer, we use the default constructor to initialize the span
3142 // object, i.e., a `std:span` variable declaration with no initializer.
3143 // So the fix-it is just to remove the initializer.
3144 if (Init->isNullPointerConstant(
3145 Ctx,
3146 // FIXME: Why does this function not ask for `const ASTContext
3147 // &`? It should. Maybe worth an NFC patch later.
3148 NPC: Expr::NullPointerConstantValueDependence::
3149 NPC_ValueDependentIsNotNull)) {
3150 std::optional<SourceLocation> InitLocation =
3151 getEndCharLoc(Node: Init, SM, LangOpts);
3152 if (!InitLocation)
3153 return std::nullopt;
3154
3155 SourceRange SR(Init->getBeginLoc(), *InitLocation);
3156
3157 return FixItList{FixItHint::CreateRemoval(RemoveRange: SR)};
3158 }
3159
3160 FixItList FixIts{};
3161 std::string ExtentText = UserFillPlaceHolder.data();
3162 StringRef One = "1";
3163
3164 // Insert `{` before `Init`:
3165 FixIts.push_back(FixItHint::CreateInsertion(InsertionLoc: Init->getBeginLoc(), Code: "{"));
3166 // Try to get the data extent. Break into different cases:
3167 if (auto CxxNew = dyn_cast<CXXNewExpr>(Val: Init->IgnoreImpCasts())) {
3168 // In cases `Init` is `new T[n]` and there is no explicit cast over
3169 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
3170 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
3171 // simpler for the case where `Init` is `new T`.
3172 if (const Expr *Ext = CxxNew->getArraySize().value_or(u: nullptr)) {
3173 if (!Ext->HasSideEffects(Ctx)) {
3174 std::optional<StringRef> ExtentString = getExprText(E: Ext, SM, LangOpts);
3175 if (!ExtentString)
3176 return std::nullopt;
3177 ExtentText = *ExtentString;
3178 }
3179 } else if (!CxxNew->isArray())
3180 // Although the initializer is not allocating a buffer, the pointer
3181 // variable could still be used in buffer access operations.
3182 ExtentText = One;
3183 } else if (Ctx.getAsConstantArrayType(T: Init->IgnoreImpCasts()->getType())) {
3184 // std::span has a single parameter constructor for initialization with
3185 // constant size array. The size is auto-deduced as the constructor is a
3186 // function template. The correct fixit is empty - no changes should happen.
3187 return FixItList{};
3188 } else {
3189 // In cases `Init` is of the form `&Var` after stripping of implicit
3190 // casts, where `&` is the built-in operator, the extent is 1.
3191 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Val: Init->IgnoreImpCasts()))
3192 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
3193 isa_and_present<DeclRefExpr>(Val: AddrOfExpr->getSubExpr()))
3194 ExtentText = One;
3195 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
3196 // and explicit casting, etc. etc.
3197 }
3198
3199 SmallString<32> StrBuffer{};
3200 std::optional<SourceLocation> LocPassInit = getPastLoc(Node: Init, SM, LangOpts);
3201
3202 if (!LocPassInit)
3203 return std::nullopt;
3204
3205 StrBuffer.append(RHS: ", ");
3206 StrBuffer.append(RHS: ExtentText);
3207 StrBuffer.append(RHS: "}");
3208 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: *LocPassInit, Code: StrBuffer.str()));
3209 return FixIts;
3210}
3211
3212#ifndef NDEBUG
3213#define DEBUG_NOTE_DECL_FAIL(D, Msg) \
3214 Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), \
3215 "failed to produce fixit for declaration '" + \
3216 (D)->getNameAsString() + "'" + (Msg))
3217#else
3218#define DEBUG_NOTE_DECL_FAIL(D, Msg)
3219#endif
3220
3221// For the given variable declaration with a pointer-to-T type, returns the text
3222// `std::span<T>`. If it is unable to generate the text, returns
3223// `std::nullopt`.
3224static std::optional<std::string>
3225createSpanTypeForVarDecl(const VarDecl *VD, const ASTContext &Ctx) {
3226 assert(VD->getType()->isPointerType());
3227
3228 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3229 std::optional<std::string> PteTyText = getPointeeTypeText(
3230 VD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
3231
3232 if (!PteTyText)
3233 return std::nullopt;
3234
3235 std::string SpanTyText = "std::span<";
3236
3237 SpanTyText.append(str: *PteTyText);
3238 // Append qualifiers to span element type if any:
3239 if (PteTyQualifiers) {
3240 SpanTyText.append(s: " ");
3241 SpanTyText.append(str: PteTyQualifiers->getAsString());
3242 }
3243 SpanTyText.append(s: ">");
3244 return SpanTyText;
3245}
3246
3247// For a `VarDecl` of the form `T * var (= Init)?`, this
3248// function generates fix-its that
3249// 1) replace `T * var` with `std::span<T> var`; and
3250// 2) change `Init` accordingly to a span constructor, if it exists.
3251//
3252// FIXME: support Multi-level pointers
3253//
3254// Parameters:
3255// `D` a pointer the variable declaration node
3256// `Ctx` a reference to the ASTContext
3257// `UserFillPlaceHolder` the user-input placeholder text
3258// Returns:
3259// the non-empty fix-it list, if fix-its are successfuly generated; empty
3260// list otherwise.
3261static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
3262 const StringRef UserFillPlaceHolder,
3263 UnsafeBufferUsageHandler &Handler) {
3264 if (hasUnsupportedSpecifiers(VD: D, SM: Ctx.getSourceManager()))
3265 return {};
3266
3267 FixItList FixIts{};
3268 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: D, Ctx);
3269
3270 if (!SpanTyText) {
3271 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
3272 return {};
3273 }
3274
3275 // Will hold the text for `std::span<T> Ident`:
3276 std::stringstream SS;
3277
3278 SS << *SpanTyText;
3279 // Fix the initializer if it exists:
3280 if (const Expr *Init = D->getInit()) {
3281 std::optional<FixItList> InitFixIts =
3282 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
3283 if (!InitFixIts)
3284 return {};
3285 FixIts.insert(I: FixIts.end(), From: std::make_move_iterator(i: InitFixIts->begin()),
3286 To: std::make_move_iterator(i: InitFixIts->end()));
3287 }
3288 // For declaration of the form `T * ident = init;`, we want to replace
3289 // `T * ` with `std::span<T>`.
3290 // We ignore CV-qualifiers so for `T * const ident;` we also want to replace
3291 // just `T *` with `std::span<T>`.
3292 const SourceLocation EndLocForReplacement = D->getTypeSpecEndLoc();
3293 if (!EndLocForReplacement.isValid()) {
3294 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
3295 return {};
3296 }
3297 // The only exception is that for `T *ident` we'll add a single space between
3298 // "std::span<T>" and "ident".
3299 // FIXME: The condition is false for identifiers expended from macros.
3300 if (EndLocForReplacement.getLocWithOffset(Offset: 1) == getVarDeclIdentifierLoc(D))
3301 SS << " ";
3302
3303 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3304 RemoveRange: SourceRange(D->getBeginLoc(), EndLocForReplacement), Code: SS.str()));
3305 return FixIts;
3306}
3307
3308static bool hasConflictingOverload(const FunctionDecl *FD) {
3309 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult();
3310}
3311
3312// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
3313// types, this function produces fix-its to make the change self-contained. Let
3314// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
3315// entity defined by the `FunctionDecl` after the change to the parameters.
3316// Fix-its produced by this function are
3317// 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
3318// of 'F';
3319// 2. Create a declaration of "NewF" next to each declaration of `F`;
3320// 3. Create a definition of "F" (as its' original definition is now belongs
3321// to "NewF") next to its original definition. The body of the creating
3322// definition calls to "NewF".
3323//
3324// Example:
3325//
3326// void f(int *p); // original declaration
3327// void f(int *p) { // original definition
3328// p[5];
3329// }
3330//
3331// To change the parameter `p` to be of `std::span<int>` type, we
3332// also add overloads:
3333//
3334// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
3335// void f(std::span<int> p); // added overload decl
3336// void f(std::span<int> p) { // original def where param is changed
3337// p[5];
3338// }
3339// [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
3340// return f(std::span(p, <# size #>));
3341// }
3342//
3343static std::optional<FixItList>
3344createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
3345 const ASTContext &Ctx,
3346 UnsafeBufferUsageHandler &Handler) {
3347 // FIXME: need to make this conflict checking better:
3348 if (hasConflictingOverload(FD))
3349 return std::nullopt;
3350
3351 const SourceManager &SM = Ctx.getSourceManager();
3352 const LangOptions &LangOpts = Ctx.getLangOpts();
3353 const unsigned NumParms = FD->getNumParams();
3354 std::vector<std::string> NewTysTexts(NumParms);
3355 std::vector<bool> ParmsMask(NumParms, false);
3356 bool AtLeastOneParmToFix = false;
3357
3358 for (unsigned i = 0; i < NumParms; i++) {
3359 const ParmVarDecl *PVD = FD->getParamDecl(i);
3360
3361 if (S.lookup(PVD) == FixitStrategy::Kind::Wontfix)
3362 continue;
3363 if (S.lookup(PVD) != FixitStrategy::Kind::Span)
3364 // Not supported, not suppose to happen:
3365 return std::nullopt;
3366
3367 std::optional<Qualifiers> PteTyQuals = std::nullopt;
3368 std::optional<std::string> PteTyText =
3369 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals);
3370
3371 if (!PteTyText)
3372 // something wrong in obtaining the text of the pointee type, give up
3373 return std::nullopt;
3374 // FIXME: whether we should create std::span type depends on the
3375 // FixitStrategy.
3376 NewTysTexts[i] = getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQuals);
3377 ParmsMask[i] = true;
3378 AtLeastOneParmToFix = true;
3379 }
3380 if (!AtLeastOneParmToFix)
3381 // No need to create function overloads:
3382 return {};
3383 // FIXME Respect indentation of the original code.
3384
3385 // A lambda that creates the text representation of a function declaration
3386 // with the new type signatures:
3387 const auto NewOverloadSignatureCreator =
3388 [&SM, &LangOpts, &NewTysTexts,
3389 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3390 std::stringstream SS;
3391
3392 SS << ";";
3393 SS << getEndOfLine().str();
3394 // Append: ret-type func-name "("
3395 if (auto Prefix = getRangeText(
3396 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
3397 SM, LangOpts))
3398 SS << Prefix->str();
3399 else
3400 return std::nullopt; // give up
3401 // Append: parameter-type-list
3402 const unsigned NumParms = FD->getNumParams();
3403
3404 for (unsigned i = 0; i < NumParms; i++) {
3405 const ParmVarDecl *Parm = FD->getParamDecl(i);
3406
3407 if (Parm->isImplicit())
3408 continue;
3409 if (ParmsMask[i]) {
3410 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
3411 // new type:
3412 SS << NewTysTexts[i];
3413 // print parameter name if provided:
3414 if (IdentifierInfo *II = Parm->getIdentifier())
3415 SS << ' ' << II->getName().str();
3416 } else if (auto ParmTypeText =
3417 getRangeText(getSourceRangeToTokenEnd(Parm, SM, LangOpts),
3418 SM, LangOpts)) {
3419 // print the whole `Parm` without modification:
3420 SS << ParmTypeText->str();
3421 } else
3422 return std::nullopt; // something wrong, give up
3423 if (i != NumParms - 1)
3424 SS << ", ";
3425 }
3426 SS << ")";
3427 return SS.str();
3428 };
3429
3430 // A lambda that creates the text representation of a function definition with
3431 // the original signature:
3432 const auto OldOverloadDefCreator =
3433 [&Handler, &SM, &LangOpts, &NewTysTexts,
3434 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
3435 std::stringstream SS;
3436
3437 SS << getEndOfLine().str();
3438 // Append: attr-name ret-type func-name "(" param-list ")" "{"
3439 if (auto FDPrefix = getRangeText(
3440 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
3441 LangOpts))
3442 SS << Handler.getUnsafeBufferUsageAttributeTextAt(Loc: FD->getBeginLoc(), WSSuffix: " ")
3443 << FDPrefix->str() << "{";
3444 else
3445 return std::nullopt;
3446 // Append: "return" func-name "("
3447 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
3448 SS << "return " << FunQualName->str() << "(";
3449 else
3450 return std::nullopt;
3451
3452 // Append: arg-list
3453 const unsigned NumParms = FD->getNumParams();
3454 for (unsigned i = 0; i < NumParms; i++) {
3455 const ParmVarDecl *Parm = FD->getParamDecl(i);
3456
3457 if (Parm->isImplicit())
3458 continue;
3459 // FIXME: If a parameter has no name, it is unused in the
3460 // definition. So we could just leave it as it is.
3461 if (!Parm->getIdentifier())
3462 // If a parameter of a function definition has no name:
3463 return std::nullopt;
3464 if (ParmsMask[i])
3465 // This is our spanified paramter!
3466 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
3467 << ", " << getUserFillPlaceHolder(HintTextToUser: "size") << ")";
3468 else
3469 SS << Parm->getIdentifier()->getName().str();
3470 if (i != NumParms - 1)
3471 SS << ", ";
3472 }
3473 // finish call and the body
3474 SS << ");}" << getEndOfLine().str();
3475 // FIXME: 80-char line formatting?
3476 return SS.str();
3477 };
3478
3479 FixItList FixIts{};
3480 for (FunctionDecl *FReDecl : FD->redecls()) {
3481 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts);
3482
3483 if (!Loc)
3484 return {};
3485 if (FReDecl->isThisDeclarationADefinition()) {
3486 assert(FReDecl == FD && "inconsistent function definition");
3487 // Inserts a definition with the old signature to the end of
3488 // `FReDecl`:
3489 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
3490 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef));
3491 else
3492 return {}; // give up
3493 } else {
3494 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
3495 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
3496 FixIts.emplace_back(FixItHint::CreateInsertion(
3497 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt(
3498 FReDecl->getBeginLoc(), " ")));
3499 }
3500 // Inserts a declaration with the new signature to the end of `FReDecl`:
3501 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
3502 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl));
3503 else
3504 return {};
3505 }
3506 }
3507 return FixIts;
3508}
3509
3510// To fix a `ParmVarDecl` to be of `std::span` type.
3511static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
3512 UnsafeBufferUsageHandler &Handler) {
3513 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) {
3514 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
3515 return {};
3516 }
3517 if (PVD->hasDefaultArg()) {
3518 // FIXME: generate fix-its for default values:
3519 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
3520 return {};
3521 }
3522
3523 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
3524 std::optional<std::string> PteTyText = getPointeeTypeText(
3525 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
3526
3527 if (!PteTyText) {
3528 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
3529 return {};
3530 }
3531
3532 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
3533
3534 if (!PVDNameText) {
3535 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
3536 return {};
3537 }
3538
3539 std::stringstream SS;
3540 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx);
3541
3542 if (PteTyQualifiers)
3543 // Append qualifiers if they exist:
3544 SS << getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQualifiers);
3545 else
3546 SS << getSpanTypeText(EltTyText: *PteTyText);
3547 // Append qualifiers to the type of the parameter:
3548 if (PVD->getType().hasQualifiers())
3549 SS << ' ' << PVD->getType().getQualifiers().getAsString();
3550 // Append parameter's name:
3551 SS << ' ' << PVDNameText->str();
3552 // Add replacement fix-it:
3553 return {FixItHint::CreateReplacement(RemoveRange: PVD->getSourceRange(), Code: SS.str())};
3554}
3555
3556static FixItList fixVariableWithSpan(const VarDecl *VD,
3557 const DeclUseTracker &Tracker,
3558 ASTContext &Ctx,
3559 UnsafeBufferUsageHandler &Handler) {
3560 const DeclStmt *DS = Tracker.lookupDecl(VD);
3561 if (!DS) {
3562 DEBUG_NOTE_DECL_FAIL(VD,
3563 " : variables declared this way not implemented yet");
3564 return {};
3565 }
3566 if (!DS->isSingleDecl()) {
3567 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3568 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
3569 return {};
3570 }
3571 // Currently DS is an unused variable but we'll need it when
3572 // non-single decls are implemented, where the pointee type name
3573 // and the '*' are spread around the place.
3574 (void)DS;
3575
3576 // FIXME: handle cases where DS has multiple declarations
3577 return fixLocalVarDeclWithSpan(D: VD, Ctx, UserFillPlaceHolder: getUserFillPlaceHolder(), Handler);
3578}
3579
3580static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
3581 UnsafeBufferUsageHandler &Handler) {
3582 FixItList FixIts{};
3583
3584 // Note: the code below expects the declaration to not use any type sugar like
3585 // typedef.
3586 if (auto CAT = Ctx.getAsConstantArrayType(D->getType())) {
3587 const QualType &ArrayEltT = CAT->getElementType();
3588 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
3589 // FIXME: support multi-dimensional arrays
3590 if (isa<clang::ArrayType>(Val: ArrayEltT.getCanonicalType()))
3591 return {};
3592
3593 const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(D);
3594
3595 // Get the spelling of the element type as written in the source file
3596 // (including macros, etc.).
3597 auto MaybeElemTypeTxt =
3598 getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
3599 Ctx.getLangOpts());
3600 if (!MaybeElemTypeTxt)
3601 return {};
3602 const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
3603
3604 // Find the '[' token.
3605 std::optional<Token> NextTok = Lexer::findNextToken(
3606 Loc: IdentifierLoc, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3607 while (NextTok && !NextTok->is(K: tok::l_square) &&
3608 NextTok->getLocation() <= D->getSourceRange().getEnd())
3609 NextTok = Lexer::findNextToken(Loc: NextTok->getLocation(),
3610 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
3611 if (!NextTok)
3612 return {};
3613 const SourceLocation LSqBracketLoc = NextTok->getLocation();
3614
3615 // Get the spelling of the array size as written in the source file
3616 // (including macros, etc.).
3617 auto MaybeArraySizeTxt = getRangeText(
3618 {LSqBracketLoc.getLocWithOffset(Offset: 1), D->getTypeSpecEndLoc()},
3619 Ctx.getSourceManager(), Ctx.getLangOpts());
3620 if (!MaybeArraySizeTxt)
3621 return {};
3622 const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
3623 if (ArraySizeTxt.empty()) {
3624 // FIXME: Support array size getting determined from the initializer.
3625 // Examples:
3626 // int arr1[] = {0, 1, 2};
3627 // int arr2{3, 4, 5};
3628 // We might be able to preserve the non-specified size with `auto` and
3629 // `std::to_array`:
3630 // auto arr1 = std::to_array<int>({0, 1, 2});
3631 return {};
3632 }
3633
3634 std::optional<StringRef> IdentText =
3635 getVarDeclIdentifierText(D, Ctx.getSourceManager(), Ctx.getLangOpts());
3636
3637 if (!IdentText) {
3638 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
3639 return {};
3640 }
3641
3642 SmallString<32> Replacement;
3643 llvm::raw_svector_ostream OS(Replacement);
3644 OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
3645 << IdentText->str();
3646
3647 FixIts.push_back(Elt: FixItHint::CreateReplacement(
3648 RemoveRange: SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, Code: OS.str()));
3649 }
3650
3651 return FixIts;
3652}
3653
3654static FixItList fixVariableWithArray(const VarDecl *VD,
3655 const DeclUseTracker &Tracker,
3656 const ASTContext &Ctx,
3657 UnsafeBufferUsageHandler &Handler) {
3658 const DeclStmt *DS = Tracker.lookupDecl(VD);
3659 assert(DS && "Fixing non-local variables not implemented yet!");
3660 if (!DS->isSingleDecl()) {
3661 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
3662 return {};
3663 }
3664 // Currently DS is an unused variable but we'll need it when
3665 // non-single decls are implemented, where the pointee type name
3666 // and the '*' are spread around the place.
3667 (void)DS;
3668
3669 // FIXME: handle cases where DS has multiple declarations
3670 return fixVarDeclWithArray(D: VD, Ctx, Handler);
3671}
3672
3673// TODO: we should be consistent to use `std::nullopt` to represent no-fix due
3674// to any unexpected problem.
3675static FixItList
3676fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
3677 /* The function decl under analysis */ const Decl *D,
3678 const DeclUseTracker &Tracker, ASTContext &Ctx,
3679 UnsafeBufferUsageHandler &Handler) {
3680 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD)) {
3681 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext());
3682 if (!FD || FD != D) {
3683 // `FD != D` means that `PVD` belongs to a function that is not being
3684 // analyzed currently. Thus `FD` may not be complete.
3685 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
3686 return {};
3687 }
3688
3689 // TODO If function has a try block we can't change params unless we check
3690 // also its catch block for their use.
3691 // FIXME We might support static class methods, some select methods,
3692 // operators and possibly lamdas.
3693 if (FD->isMain() || FD->isConstexpr() ||
3694 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
3695 FD->isVariadic() ||
3696 // also covers call-operator of lamdas
3697 isa<CXXMethodDecl>(FD) ||
3698 // skip when the function body is a try-block
3699 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) ||
3700 FD->isOverloadedOperator()) {
3701 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
3702 return {}; // TODO test all these cases
3703 }
3704 }
3705
3706 switch (K) {
3707 case FixitStrategy::Kind::Span: {
3708 if (VD->getType()->isPointerType()) {
3709 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD))
3710 return fixParamWithSpan(PVD, Ctx, Handler);
3711
3712 if (VD->isLocalVarDecl())
3713 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
3714 }
3715 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
3716 return {};
3717 }
3718 case FixitStrategy::Kind::Array: {
3719 if (VD->isLocalVarDecl() && Ctx.getAsConstantArrayType(T: VD->getType()))
3720 return fixVariableWithArray(VD, Tracker, Ctx, Handler);
3721
3722 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array");
3723 return {};
3724 }
3725 case FixitStrategy::Kind::Iterator:
3726 case FixitStrategy::Kind::Vector:
3727 llvm_unreachable("FixitStrategy not implemented yet!");
3728 case FixitStrategy::Kind::Wontfix:
3729 llvm_unreachable("Invalid strategy!");
3730 }
3731 llvm_unreachable("Unknown strategy!");
3732}
3733
3734// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
3735// `RemoveRange` of 'h' overlaps with a macro use.
3736static bool overlapWithMacro(const FixItList &FixIts) {
3737 // FIXME: For now we only check if the range (or the first token) is (part of)
3738 // a macro expansion. Ideally, we want to check for all tokens in the range.
3739 return llvm::any_of(Range: FixIts, P: [](const FixItHint &Hint) {
3740 auto Range = Hint.RemoveRange;
3741 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
3742 // If the range (or the first token) is (part of) a macro expansion:
3743 return true;
3744 return false;
3745 });
3746}
3747
3748// Returns true iff `VD` is a parameter of the declaration `D`:
3749static bool isParameterOf(const VarDecl *VD, const Decl *D) {
3750 return isa<ParmVarDecl>(Val: VD) &&
3751 VD->getDeclContext() == dyn_cast<DeclContext>(Val: D);
3752}
3753
3754// Erases variables in `FixItsForVariable`, if such a variable has an unfixable
3755// group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
3756// contain `v`.
3757static void eraseVarsForUnfixableGroupMates(
3758 std::map<const VarDecl *, FixItList> &FixItsForVariable,
3759 const VariableGroupsManager &VarGrpMgr) {
3760 // Variables will be removed from `FixItsForVariable`:
3761 SmallVector<const VarDecl *, 8> ToErase;
3762
3763 for (const auto &[VD, Ignore] : FixItsForVariable) {
3764 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(Var: VD);
3765 if (llvm::any_of(Range&: Grp,
3766 P: [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
3767 return !FixItsForVariable.count(x: GrpMember);
3768 })) {
3769 // At least one group member cannot be fixed, so we have to erase the
3770 // whole group:
3771 for (const VarDecl *Member : Grp)
3772 ToErase.push_back(Elt: Member);
3773 }
3774 }
3775 for (auto *VarToErase : ToErase)
3776 FixItsForVariable.erase(x: VarToErase);
3777}
3778
3779// Returns the fix-its that create bounds-safe function overloads for the
3780// function `D`, if `D`'s parameters will be changed to safe-types through
3781// fix-its in `FixItsForVariable`.
3782//
3783// NOTE: In case `D`'s parameters will be changed but bounds-safe function
3784// overloads cannot created, the whole group that contains the parameters will
3785// be erased from `FixItsForVariable`.
3786static FixItList createFunctionOverloadsForParms(
3787 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
3788 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
3789 const FixitStrategy &S, ASTContext &Ctx,
3790 UnsafeBufferUsageHandler &Handler) {
3791 FixItList FixItsSharedByParms{};
3792
3793 std::optional<FixItList> OverloadFixes =
3794 createOverloadsForFixedParams(S, FD, Ctx, Handler);
3795
3796 if (OverloadFixes) {
3797 FixItsSharedByParms.append(RHS: *OverloadFixes);
3798 } else {
3799 // Something wrong in generating `OverloadFixes`, need to remove the
3800 // whole group, where parameters are in, from `FixItsForVariable` (Note
3801 // that all parameters should be in the same group):
3802 for (auto *Member : VarGrpMgr.getGroupOfParms())
3803 FixItsForVariable.erase(x: Member);
3804 }
3805 return FixItsSharedByParms;
3806}
3807
3808// Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
3809static std::map<const VarDecl *, FixItList>
3810getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S,
3811 ASTContext &Ctx,
3812 /* The function decl under analysis */ const Decl *D,
3813 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
3814 const VariableGroupsManager &VarGrpMgr) {
3815 // `FixItsForVariable` will map each variable to a set of fix-its directly
3816 // associated to the variable itself. Fix-its of distinct variables in
3817 // `FixItsForVariable` are disjoint.
3818 std::map<const VarDecl *, FixItList> FixItsForVariable;
3819
3820 // Populate `FixItsForVariable` with fix-its directly associated with each
3821 // variable. Fix-its directly associated to a variable 'v' are the ones
3822 // produced by the `FixableGadget`s whose claimed variable is 'v'.
3823 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
3824 FixItsForVariable[VD] =
3825 fixVariable(VD, K: S.lookup(VD), D, Tracker, Ctx, Handler);
3826 // If we fail to produce Fix-It for the declaration we have to skip the
3827 // variable entirely.
3828 if (FixItsForVariable[VD].empty()) {
3829 FixItsForVariable.erase(x: VD);
3830 continue;
3831 }
3832 for (const auto &F : Fixables) {
3833 std::optional<FixItList> Fixits = F->getFixits(S);
3834
3835 if (Fixits) {
3836 FixItsForVariable[VD].insert(I: FixItsForVariable[VD].end(),
3837 From: Fixits->begin(), To: Fixits->end());
3838 continue;
3839 }
3840#ifndef NDEBUG
3841 Handler.addDebugNoteForVar(
3842 VD, Loc: F->getSourceLoc(),
3843 Text: ("gadget '" + F->getDebugName() + "' refused to produce a fix")
3844 .str());
3845#endif
3846 FixItsForVariable.erase(x: VD);
3847 break;
3848 }
3849 }
3850
3851 // `FixItsForVariable` now contains only variables that can be
3852 // fixed. A variable can be fixed if its' declaration and all Fixables
3853 // associated to it can all be fixed.
3854
3855 // To further remove from `FixItsForVariable` variables whose group mates
3856 // cannot be fixed...
3857 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
3858 // Now `FixItsForVariable` gets further reduced: a variable is in
3859 // `FixItsForVariable` iff it can be fixed and all its group mates can be
3860 // fixed.
3861
3862 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
3863 // That is, when fixing multiple parameters in one step, these fix-its will
3864 // be applied only once (instead of being applied per parameter).
3865 FixItList FixItsSharedByParms{};
3866
3867 if (auto *FD = dyn_cast<FunctionDecl>(Val: D))
3868 FixItsSharedByParms = createFunctionOverloadsForParms(
3869 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
3870
3871 // The map that maps each variable `v` to fix-its for the whole group where
3872 // `v` is in:
3873 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
3874 FixItsForVariable};
3875
3876 for (auto &[Var, Ignore] : FixItsForVariable) {
3877 bool AnyParm = false;
3878 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, HasParm: &AnyParm);
3879
3880 for (const VarDecl *GrpMate : VarGroupForVD) {
3881 if (Var == GrpMate)
3882 continue;
3883 if (FixItsForVariable.count(x: GrpMate))
3884 FinalFixItsForVariable[Var].append(RHS: FixItsForVariable[GrpMate]);
3885 }
3886 if (AnyParm) {
3887 // This assertion should never fail. Otherwise we have a bug.
3888 assert(!FixItsSharedByParms.empty() &&
3889 "Should not try to fix a parameter that does not belong to a "
3890 "FunctionDecl");
3891 FinalFixItsForVariable[Var].append(RHS: FixItsSharedByParms);
3892 }
3893 }
3894 // Fix-its that will be applied in one step shall NOT:
3895 // 1. overlap with macros or/and templates; or
3896 // 2. conflict with each other.
3897 // Otherwise, the fix-its will be dropped.
3898 for (auto Iter = FinalFixItsForVariable.begin();
3899 Iter != FinalFixItsForVariable.end();)
3900 if (overlapWithMacro(FixIts: Iter->second) ||
3901 clang::internal::anyConflict(FixIts: Iter->second, SM: Ctx.getSourceManager())) {
3902 Iter = FinalFixItsForVariable.erase(position: Iter);
3903 } else
3904 Iter++;
3905 return FinalFixItsForVariable;
3906}
3907
3908template <typename VarDeclIterTy>
3909static FixitStrategy
3910getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
3911 FixitStrategy S;
3912 for (const VarDecl *VD : UnsafeVars) {
3913 if (isa<ConstantArrayType>(VD->getType().getCanonicalType()))
3914 S.set(VD, K: FixitStrategy::Kind::Array);
3915 else
3916 S.set(VD, K: FixitStrategy::Kind::Span);
3917 }
3918 return S;
3919}
3920
3921// Manages variable groups:
3922class VariableGroupsManagerImpl : public VariableGroupsManager {
3923 const std::vector<VarGrpTy> Groups;
3924 const std::map<const VarDecl *, unsigned> &VarGrpMap;
3925 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
3926
3927public:
3928 VariableGroupsManagerImpl(
3929 const std::vector<VarGrpTy> &Groups,
3930 const std::map<const VarDecl *, unsigned> &VarGrpMap,
3931 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
3932 : Groups(Groups), VarGrpMap(VarGrpMap),
3933 GrpsUnionForParms(GrpsUnionForParms) {}
3934
3935 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
3936 if (GrpsUnionForParms.contains(key: Var)) {
3937 if (HasParm)
3938 *HasParm = true;
3939 return GrpsUnionForParms.getArrayRef();
3940 }
3941 if (HasParm)
3942 *HasParm = false;
3943
3944 auto It = VarGrpMap.find(x: Var);
3945
3946 if (It == VarGrpMap.end())
3947 return {};
3948 return Groups[It->second];
3949 }
3950
3951 VarGrpRef getGroupOfParms() const override {
3952 return GrpsUnionForParms.getArrayRef();
3953 }
3954};
3955
3956static void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets,
3957 WarningGadgetList WarningGadgets,
3958 DeclUseTracker Tracker,
3959 UnsafeBufferUsageHandler &Handler,
3960 bool EmitSuggestions) {
3961 if (!EmitSuggestions) {
3962 // Our job is very easy without suggestions. Just warn about
3963 // every problematic operation and consider it done. No need to deal
3964 // with fixable gadgets, no need to group operations by variable.
3965 for (const auto &G : WarningGadgets) {
3966 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
3967 Ctx&: D->getASTContext());
3968 }
3969
3970 // This return guarantees that most of the machine doesn't run when
3971 // suggestions aren't requested.
3972 assert(FixableGadgets.empty() &&
3973 "Fixable gadgets found but suggestions not requested!");
3974 return;
3975 }
3976
3977 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
3978 // function under the analysis. No need to fix any Fixables.
3979 if (!WarningGadgets.empty()) {
3980 // Gadgets "claim" variables they're responsible for. Once this loop
3981 // finishes, the tracker will only track DREs that weren't claimed by any
3982 // gadgets, i.e. not understood by the analysis.
3983 for (const auto &G : FixableGadgets) {
3984 for (const auto *DRE : G->getClaimedVarUseSites()) {
3985 Tracker.claimUse(DRE);
3986 }
3987 }
3988 }
3989
3990 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
3991 // function under the analysis. Thus, it early returns here as there is
3992 // nothing needs to be fixed.
3993 //
3994 // Note this claim is based on the assumption that there is no unsafe
3995 // variable whose declaration is invisible from the analyzing function.
3996 // Otherwise, we need to consider if the uses of those unsafe varuables needs
3997 // fix.
3998 // So far, we are not fixing any global variables or class members. And,
3999 // lambdas will be analyzed along with the enclosing function. So this early
4000 // return is correct for now.
4001 if (WarningGadgets.empty())
4002 return;
4003
4004 WarningGadgetSets UnsafeOps =
4005 groupWarningGadgetsByVar(AllUnsafeOperations: std::move(WarningGadgets));
4006 FixableGadgetSets FixablesForAllVars =
4007 groupFixablesByVar(AllFixableOperations: std::move(FixableGadgets));
4008
4009 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
4010
4011 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
4012 for (auto it = FixablesForAllVars.byVar.cbegin();
4013 it != FixablesForAllVars.byVar.cend();) {
4014 // FIXME: need to deal with global variables later
4015 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(Val: it->first))) {
4016#ifndef NDEBUG
4017 Handler.addDebugNoteForVar(VD: it->first, Loc: it->first->getBeginLoc(),
4018 Text: ("failed to produce fixit for '" +
4019 it->first->getNameAsString() +
4020 "' : neither local nor a parameter"));
4021#endif
4022 it = FixablesForAllVars.byVar.erase(position: it);
4023 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
4024#ifndef NDEBUG
4025 Handler.addDebugNoteForVar(VD: it->first, Loc: it->first->getBeginLoc(),
4026 Text: ("failed to produce fixit for '" +
4027 it->first->getNameAsString() +
4028 "' : has a reference type"));
4029#endif
4030 it = FixablesForAllVars.byVar.erase(position: it);
4031 } else if (Tracker.hasUnclaimedUses(VD: it->first)) {
4032 it = FixablesForAllVars.byVar.erase(position: it);
4033 } else if (it->first->isInitCapture()) {
4034#ifndef NDEBUG
4035 Handler.addDebugNoteForVar(VD: it->first, Loc: it->first->getBeginLoc(),
4036 Text: ("failed to produce fixit for '" +
4037 it->first->getNameAsString() +
4038 "' : init capture"));
4039#endif
4040 it = FixablesForAllVars.byVar.erase(position: it);
4041 } else {
4042 ++it;
4043 }
4044 }
4045
4046#ifndef NDEBUG
4047 for (const auto &it : UnsafeOps.byVar) {
4048 const VarDecl *const UnsafeVD = it.first;
4049 auto UnclaimedDREs = Tracker.getUnclaimedUses(VD: UnsafeVD);
4050 if (UnclaimedDREs.empty())
4051 continue;
4052 const auto UnfixedVDName = UnsafeVD->getNameAsString();
4053 for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) {
4054 std::string UnclaimedUseTrace =
4055 getDREAncestorString(DRE: UnclaimedDRE, Ctx&: D->getASTContext());
4056
4057 Handler.addDebugNoteForVar(
4058 VD: UnsafeVD, Loc: UnclaimedDRE->getBeginLoc(),
4059 Text: ("failed to produce fixit for '" + UnfixedVDName +
4060 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
4061 UnclaimedUseTrace));
4062 }
4063 }
4064#endif
4065
4066 // Fixpoint iteration for pointer assignments
4067 using DepMapTy =
4068 llvm::DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
4069 DepMapTy DependenciesMap{};
4070 DepMapTy PtrAssignmentGraph{};
4071
4072 for (const auto &it : FixablesForAllVars.byVar) {
4073 for (const FixableGadget *fixable : it.second) {
4074 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
4075 fixable->getStrategyImplications();
4076 if (ImplPair) {
4077 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
4078 PtrAssignmentGraph[Impl.first].insert(X: Impl.second);
4079 }
4080 }
4081 }
4082
4083 /*
4084 The following code does a BFS traversal of the `PtrAssignmentGraph`
4085 considering all unsafe vars as starting nodes and constructs an undirected
4086 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
4087 elimiates all variables that are unreachable from any unsafe var. In other
4088 words, this removes all dependencies that don't include any unsafe variable
4089 and consequently don't need any fixit generation.
4090 Note: A careful reader would observe that the code traverses
4091 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
4092 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
4093 achieve the same result but the one used here dramatically cuts the
4094 amount of hoops the second part of the algorithm needs to jump, given that
4095 a lot of these connections become "direct". The reader is advised not to
4096 imagine how the graph is transformed because of using `Var` instead of
4097 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
4098 and think about why it's equivalent later.
4099 */
4100 std::set<const VarDecl *> VisitedVarsDirected{};
4101 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4102 if (VisitedVarsDirected.find(x: Var) == VisitedVarsDirected.end()) {
4103
4104 std::queue<const VarDecl *> QueueDirected{};
4105 QueueDirected.push(x: Var);
4106 while (!QueueDirected.empty()) {
4107 const VarDecl *CurrentVar = QueueDirected.front();
4108 QueueDirected.pop();
4109 VisitedVarsDirected.insert(x: CurrentVar);
4110 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
4111 for (const VarDecl *Adj : AdjacentNodes) {
4112 if (VisitedVarsDirected.find(x: Adj) == VisitedVarsDirected.end()) {
4113 QueueDirected.push(x: Adj);
4114 }
4115 DependenciesMap[Var].insert(X: Adj);
4116 DependenciesMap[Adj].insert(X: Var);
4117 }
4118 }
4119 }
4120 }
4121
4122 // `Groups` stores the set of Connected Components in the graph.
4123 std::vector<VarGrpTy> Groups;
4124 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
4125 // variables belong to. Group indexes refer to the elements in `Groups`.
4126 // `VarGrpMap` is complete in that every variable that needs fix is in it.
4127 std::map<const VarDecl *, unsigned> VarGrpMap;
4128 // The union group over the ones in "Groups" that contain parameters of `D`:
4129 llvm::SetVector<const VarDecl *>
4130 GrpsUnionForParms; // these variables need to be fixed in one step
4131
4132 // Group Connected Components for Unsafe Vars
4133 // (Dependencies based on pointer assignments)
4134 std::set<const VarDecl *> VisitedVars{};
4135 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
4136 if (VisitedVars.find(x: Var) == VisitedVars.end()) {
4137 VarGrpTy &VarGroup = Groups.emplace_back();
4138 std::queue<const VarDecl *> Queue{};
4139
4140 Queue.push(x: Var);
4141 while (!Queue.empty()) {
4142 const VarDecl *CurrentVar = Queue.front();
4143 Queue.pop();
4144 VisitedVars.insert(x: CurrentVar);
4145 VarGroup.push_back(x: CurrentVar);
4146 auto AdjacentNodes = DependenciesMap[CurrentVar];
4147 for (const VarDecl *Adj : AdjacentNodes) {
4148 if (VisitedVars.find(x: Adj) == VisitedVars.end()) {
4149 Queue.push(x: Adj);
4150 }
4151 }
4152 }
4153
4154 bool HasParm = false;
4155 unsigned GrpIdx = Groups.size() - 1;
4156
4157 for (const VarDecl *V : VarGroup) {
4158 VarGrpMap[V] = GrpIdx;
4159 if (!HasParm && isParameterOf(VD: V, D))
4160 HasParm = true;
4161 }
4162 if (HasParm)
4163 GrpsUnionForParms.insert_range(R&: VarGroup);
4164 }
4165 }
4166
4167 // Remove a `FixableGadget` if the associated variable is not in the graph
4168 // computed above. We do not want to generate fix-its for such variables,
4169 // since they are neither warned nor reachable from a warned one.
4170 //
4171 // Note a variable is not warned if it is not directly used in any unsafe
4172 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
4173 // does not exist another variable `u` such that `u` is warned and fixing `u`
4174 // (transitively) implicates fixing `v`.
4175 //
4176 // For example,
4177 // ```
4178 // void f(int * p) {
4179 // int * a = p; *p = 0;
4180 // }
4181 // ```
4182 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
4183 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
4184 // the function above, `p` becomes reachable from a warned variable.
4185 for (auto I = FixablesForAllVars.byVar.begin();
4186 I != FixablesForAllVars.byVar.end();) {
4187 // Note `VisitedVars` contain all the variables in the graph:
4188 if (!VisitedVars.count(x: (*I).first)) {
4189 // no such var in graph:
4190 I = FixablesForAllVars.byVar.erase(position: I);
4191 } else
4192 ++I;
4193 }
4194
4195 // We assign strategies to variables that are 1) in the graph and 2) can be
4196 // fixed. Other variables have the default "Won't fix" strategy.
4197 FixitStrategy NaiveStrategy = getNaiveStrategy(UnsafeVars: llvm::make_filter_range(
4198 Range&: VisitedVars, Pred: [&FixablesForAllVars](const VarDecl *V) {
4199 // If a warned variable has no "Fixable", it is considered unfixable:
4200 return FixablesForAllVars.byVar.count(x: V);
4201 }));
4202 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
4203
4204 if (isa<NamedDecl>(Val: D))
4205 // The only case where `D` is not a `NamedDecl` is when `D` is a
4206 // `BlockDecl`. Let's not fix variables in blocks for now
4207 FixItsForVariableGroup =
4208 getFixIts(FixablesForAllVars, S: NaiveStrategy, Ctx&: D->getASTContext(), D,
4209 Tracker, Handler, VarGrpMgr);
4210
4211 for (const auto &G : UnsafeOps.noVar) {
4212 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/false,
4213 Ctx&: D->getASTContext());
4214 }
4215
4216 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
4217 auto FixItsIt = FixItsForVariableGroup.find(x: VD);
4218 Handler.handleUnsafeVariableGroup(Variable: VD, VarGrpMgr,
4219 Fixes: FixItsIt != FixItsForVariableGroup.end()
4220 ? std::move(FixItsIt->second)
4221 : FixItList{},
4222 D, VarTargetTypes: NaiveStrategy);
4223 for (const auto &G : WarningGadgets) {
4224 G->handleUnsafeOperation(Handler, /*IsRelatedToDecl=*/true,
4225 Ctx&: D->getASTContext());
4226 }
4227 }
4228}
4229
4230void clang::checkUnsafeBufferUsage(const Decl *D,
4231 UnsafeBufferUsageHandler &Handler,
4232 bool EmitSuggestions) {
4233#ifndef NDEBUG
4234 Handler.clearDebugNotes();
4235#endif
4236
4237 assert(D);
4238
4239 SmallVector<Stmt *> Stmts;
4240
4241 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D)) {
4242 // We do not want to visit a Lambda expression defined inside a method
4243 // independently. Instead, it should be visited along with the outer method.
4244 // FIXME: do we want to do the same thing for `BlockDecl`s?
4245 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: D)) {
4246 if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass())
4247 return;
4248 }
4249
4250 for (FunctionDecl *FReDecl : FD->redecls()) {
4251 if (FReDecl->isExternC()) {
4252 // Do not emit fixit suggestions for functions declared in an
4253 // extern "C" block.
4254 EmitSuggestions = false;
4255 break;
4256 }
4257 }
4258
4259 Stmts.push_back(Elt: FD->getBody());
4260
4261 if (const auto *ID = dyn_cast<CXXConstructorDecl>(Val: D)) {
4262 for (const CXXCtorInitializer *CI : ID->inits()) {
4263 Stmts.push_back(CI->getInit());
4264 }
4265 }
4266 } else if (isa<BlockDecl>(Val: D) || isa<ObjCMethodDecl>(Val: D)) {
4267 Stmts.push_back(Elt: D->getBody());
4268 }
4269
4270 assert(!Stmts.empty());
4271
4272 FixableGadgetList FixableGadgets;
4273 WarningGadgetList WarningGadgets;
4274 DeclUseTracker Tracker;
4275 for (Stmt *S : Stmts) {
4276 findGadgets(S, Ctx&: D->getASTContext(), Handler, EmitSuggestions, FixableGadgets,
4277 WarningGadgets, Tracker);
4278 }
4279 applyGadgets(D, FixableGadgets: std::move(FixableGadgets), WarningGadgets: std::move(WarningGadgets),
4280 Tracker: std::move(Tracker), Handler, EmitSuggestions);
4281}
4282

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of clang/lib/Analysis/UnsafeBufferUsage.cpp