1//===- UnsafeBufferUsage.cpp - Replace pointers with modern C++ -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Analysis/Analyses/UnsafeBufferUsage.h"
10#include "clang/AST/Decl.h"
11#include "clang/AST/Expr.h"
12#include "clang/AST/RecursiveASTVisitor.h"
13#include "clang/AST/StmtVisitor.h"
14#include "clang/ASTMatchers/ASTMatchFinder.h"
15#include "clang/Basic/CharInfo.h"
16#include "clang/Basic/SourceLocation.h"
17#include "clang/Lex/Lexer.h"
18#include "clang/Lex/Preprocessor.h"
19#include "llvm/ADT/APSInt.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/Casting.h"
23#include <memory>
24#include <optional>
25#include <queue>
26#include <sstream>
27
28using namespace llvm;
29using namespace clang;
30using namespace ast_matchers;
31
32#ifndef NDEBUG
33namespace {
34class StmtDebugPrinter
35 : public ConstStmtVisitor<StmtDebugPrinter, std::string> {
36public:
37 std::string VisitStmt(const Stmt *S) { return S->getStmtClassName(); }
38
39 std::string VisitBinaryOperator(const BinaryOperator *BO) {
40 return "BinaryOperator(" + BO->getOpcodeStr().str() + ")";
41 }
42
43 std::string VisitUnaryOperator(const UnaryOperator *UO) {
44 return "UnaryOperator(" + UO->getOpcodeStr(Op: UO->getOpcode()).str() + ")";
45 }
46
47 std::string VisitImplicitCastExpr(const ImplicitCastExpr *ICE) {
48 return "ImplicitCastExpr(" + std::string(ICE->getCastKindName()) + ")";
49 }
50};
51
52// Returns a string of ancestor `Stmt`s of the given `DRE` in such a form:
53// "DRE ==> parent-of-DRE ==> grandparent-of-DRE ==> ...".
54static std::string getDREAncestorString(const DeclRefExpr *DRE,
55 ASTContext &Ctx) {
56 std::stringstream SS;
57 const Stmt *St = DRE;
58 StmtDebugPrinter StmtPriner;
59
60 do {
61 SS << StmtPriner.Visit(St);
62
63 DynTypedNodeList StParents = Ctx.getParents(Node: *St);
64
65 if (StParents.size() > 1)
66 return "unavailable due to multiple parents";
67 if (StParents.size() == 0)
68 break;
69 St = StParents.begin()->get<Stmt>();
70 if (St)
71 SS << " ==> ";
72 } while (St);
73 return SS.str();
74}
75} // namespace
76#endif /* NDEBUG */
77
78namespace clang::ast_matchers {
79// A `RecursiveASTVisitor` that traverses all descendants of a given node "n"
80// except for those belonging to a different callable of "n".
81class MatchDescendantVisitor
82 : public RecursiveASTVisitor<MatchDescendantVisitor> {
83public:
84 typedef RecursiveASTVisitor<MatchDescendantVisitor> VisitorBase;
85
86 // Creates an AST visitor that matches `Matcher` on all
87 // descendants of a given node "n" except for the ones
88 // belonging to a different callable of "n".
89 MatchDescendantVisitor(const internal::DynTypedMatcher *Matcher,
90 internal::ASTMatchFinder *Finder,
91 internal::BoundNodesTreeBuilder *Builder,
92 internal::ASTMatchFinder::BindKind Bind,
93 const bool ignoreUnevaluatedContext)
94 : Matcher(Matcher), Finder(Finder), Builder(Builder), Bind(Bind),
95 Matches(false), ignoreUnevaluatedContext(ignoreUnevaluatedContext) {}
96
97 // Returns true if a match is found in a subtree of `DynNode`, which belongs
98 // to the same callable of `DynNode`.
99 bool findMatch(const DynTypedNode &DynNode) {
100 Matches = false;
101 if (const Stmt *StmtNode = DynNode.get<Stmt>()) {
102 TraverseStmt(Node: const_cast<Stmt *>(StmtNode));
103 *Builder = ResultBindings;
104 return Matches;
105 }
106 return false;
107 }
108
109 // The following are overriding methods from the base visitor class.
110 // They are public only to allow CRTP to work. They are *not *part
111 // of the public API of this class.
112
113 // For the matchers so far used in safe buffers, we only need to match
114 // `Stmt`s. To override more as needed.
115
116 bool TraverseDecl(Decl *Node) {
117 if (!Node)
118 return true;
119 if (!match(Node: *Node))
120 return false;
121 // To skip callables:
122 if (isa<FunctionDecl, BlockDecl, ObjCMethodDecl>(Val: Node))
123 return true;
124 // Traverse descendants
125 return VisitorBase::TraverseDecl(D: Node);
126 }
127
128 bool TraverseGenericSelectionExpr(GenericSelectionExpr *Node) {
129 // These are unevaluated, except the result expression.
130 if(ignoreUnevaluatedContext)
131 return TraverseStmt(Node->getResultExpr());
132 return VisitorBase::TraverseGenericSelectionExpr(Node);
133 }
134
135 bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *Node) {
136 // Unevaluated context.
137 if(ignoreUnevaluatedContext)
138 return true;
139 return VisitorBase::TraverseUnaryExprOrTypeTraitExpr(Node);
140 }
141
142 bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc Node) {
143 // Unevaluated context.
144 if(ignoreUnevaluatedContext)
145 return true;
146 return VisitorBase::TraverseTypeOfExprTypeLoc(Node);
147 }
148
149 bool TraverseDecltypeTypeLoc(DecltypeTypeLoc Node) {
150 // Unevaluated context.
151 if(ignoreUnevaluatedContext)
152 return true;
153 return VisitorBase::TraverseDecltypeTypeLoc(Node);
154 }
155
156 bool TraverseCXXNoexceptExpr(CXXNoexceptExpr *Node) {
157 // Unevaluated context.
158 if(ignoreUnevaluatedContext)
159 return true;
160 return VisitorBase::TraverseCXXNoexceptExpr(Node);
161 }
162
163 bool TraverseCXXTypeidExpr(CXXTypeidExpr *Node) {
164 // Unevaluated context.
165 if(ignoreUnevaluatedContext)
166 return true;
167 return VisitorBase::TraverseCXXTypeidExpr(Node);
168 }
169
170 bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) {
171 if (!Node)
172 return true;
173 if (!match(Node: *Node))
174 return false;
175 return VisitorBase::TraverseStmt(S: Node);
176 }
177
178 bool shouldVisitTemplateInstantiations() const { return true; }
179 bool shouldVisitImplicitCode() const {
180 // TODO: let's ignore implicit code for now
181 return false;
182 }
183
184private:
185 // Sets 'Matched' to true if 'Matcher' matches 'Node'
186 //
187 // Returns 'true' if traversal should continue after this function
188 // returns, i.e. if no match is found or 'Bind' is 'BK_All'.
189 template <typename T> bool match(const T &Node) {
190 internal::BoundNodesTreeBuilder RecursiveBuilder(*Builder);
191
192 if (Matcher->matches(DynNode: DynTypedNode::create(Node), Finder,
193 Builder: &RecursiveBuilder)) {
194 ResultBindings.addMatch(Bindings: RecursiveBuilder);
195 Matches = true;
196 if (Bind != internal::ASTMatchFinder::BK_All)
197 return false; // Abort as soon as a match is found.
198 }
199 return true;
200 }
201
202 const internal::DynTypedMatcher *const Matcher;
203 internal::ASTMatchFinder *const Finder;
204 internal::BoundNodesTreeBuilder *const Builder;
205 internal::BoundNodesTreeBuilder ResultBindings;
206 const internal::ASTMatchFinder::BindKind Bind;
207 bool Matches;
208 bool ignoreUnevaluatedContext;
209};
210
211// Because we're dealing with raw pointers, let's define what we mean by that.
212static auto hasPointerType() {
213 return hasType(InnerMatcher: hasCanonicalType(InnerMatcher: pointerType()));
214}
215
216static auto hasArrayType() {
217 return hasType(InnerMatcher: hasCanonicalType(InnerMatcher: arrayType()));
218}
219
220AST_MATCHER_P(Stmt, forEachDescendantEvaluatedStmt, internal::Matcher<Stmt>, innerMatcher) {
221 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
222
223 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, true);
224 return Visitor.findMatch(DynNode: DynTypedNode::create(Node));
225}
226
227AST_MATCHER_P(Stmt, forEachDescendantStmt, internal::Matcher<Stmt>, innerMatcher) {
228 const DynTypedMatcher &DTM = static_cast<DynTypedMatcher>(innerMatcher);
229
230 MatchDescendantVisitor Visitor(&DTM, Finder, Builder, ASTMatchFinder::BK_All, false);
231 return Visitor.findMatch(DynNode: DynTypedNode::create(Node));
232}
233
234// Matches a `Stmt` node iff the node is in a safe-buffer opt-out region
235AST_MATCHER_P(Stmt, notInSafeBufferOptOut, const UnsafeBufferUsageHandler *,
236 Handler) {
237 return !Handler->isSafeBufferOptOut(Loc: Node.getBeginLoc());
238}
239
240AST_MATCHER_P(Stmt, ignoreUnsafeBufferInContainer,
241 const UnsafeBufferUsageHandler *, Handler) {
242 return Handler->ignoreUnsafeBufferInContainer(Loc: Node.getBeginLoc());
243}
244
245AST_MATCHER_P(CastExpr, castSubExpr, internal::Matcher<Expr>, innerMatcher) {
246 return innerMatcher.matches(Node: *Node.getSubExpr(), Finder, Builder);
247}
248
249// Matches a `UnaryOperator` whose operator is pre-increment:
250AST_MATCHER(UnaryOperator, isPreInc) {
251 return Node.getOpcode() == UnaryOperator::Opcode::UO_PreInc;
252}
253
254// Returns a matcher that matches any expression 'e' such that `innerMatcher`
255// matches 'e' and 'e' is in an Unspecified Lvalue Context.
256static auto isInUnspecifiedLvalueContext(internal::Matcher<Expr> innerMatcher) {
257 // clang-format off
258 return
259 expr(anyOf(
260 implicitCastExpr(
261 hasCastKind(Kind: CastKind::CK_LValueToRValue),
262 castSubExpr(innerMatcher)),
263 binaryOperator(
264 hasAnyOperatorName("="),
265 hasLHS(InnerMatcher: innerMatcher)
266 )
267 ));
268// clang-format on
269}
270
271
272// Returns a matcher that matches any expression `e` such that `InnerMatcher`
273// matches `e` and `e` is in an Unspecified Pointer Context (UPC).
274static internal::Matcher<Stmt>
275isInUnspecifiedPointerContext(internal::Matcher<Stmt> InnerMatcher) {
276 // A UPC can be
277 // 1. an argument of a function call (except the callee has [[unsafe_...]]
278 // attribute), or
279 // 2. the operand of a pointer-to-(integer or bool) cast operation; or
280 // 3. the operand of a comparator operation; or
281 // 4. the operand of a pointer subtraction operation
282 // (i.e., computing the distance between two pointers); or ...
283
284 auto CallArgMatcher =
285 callExpr(forEachArgumentWithParam(InnerMatcher,
286 hasPointerType() /* array also decays to pointer type*/),
287 unless(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage)))));
288
289 auto CastOperandMatcher =
290 castExpr(anyOf(hasCastKind(Kind: CastKind::CK_PointerToIntegral),
291 hasCastKind(Kind: CastKind::CK_PointerToBoolean)),
292 castSubExpr(innerMatcher: allOf(hasPointerType(), InnerMatcher)));
293
294 auto CompOperandMatcher =
295 binaryOperator(hasAnyOperatorName("!=", "==", "<", "<=", ">", ">="),
296 eachOf(hasLHS(InnerMatcher: allOf(hasPointerType(), InnerMatcher)),
297 hasRHS(InnerMatcher: allOf(hasPointerType(), InnerMatcher))));
298
299 // A matcher that matches pointer subtractions:
300 auto PtrSubtractionMatcher =
301 binaryOperator(hasOperatorName(Name: "-"),
302 // Note that here we need both LHS and RHS to be
303 // pointer. Then the inner matcher can match any of
304 // them:
305 allOf(hasLHS(InnerMatcher: hasPointerType()),
306 hasRHS(InnerMatcher: hasPointerType())),
307 eachOf(hasLHS(InnerMatcher),
308 hasRHS(InnerMatcher)));
309
310 return stmt(anyOf(CallArgMatcher, CastOperandMatcher, CompOperandMatcher,
311 PtrSubtractionMatcher));
312 // FIXME: any more cases? (UPC excludes the RHS of an assignment. For now we
313 // don't have to check that.)
314}
315
316// Returns a matcher that matches any expression 'e' such that `innerMatcher`
317// matches 'e' and 'e' is in an unspecified untyped context (i.e the expression
318// 'e' isn't evaluated to an RValue). For example, consider the following code:
319// int *p = new int[4];
320// int *q = new int[4];
321// if ((p = q)) {}
322// p = q;
323// The expression `p = q` in the conditional of the `if` statement
324// `if ((p = q))` is evaluated as an RValue, whereas the expression `p = q;`
325// in the assignment statement is in an untyped context.
326static internal::Matcher<Stmt>
327isInUnspecifiedUntypedContext(internal::Matcher<Stmt> InnerMatcher) {
328 // An unspecified context can be
329 // 1. A compound statement,
330 // 2. The body of an if statement
331 // 3. Body of a loop
332 auto CompStmt = compoundStmt(forEach(InnerMatcher));
333 auto IfStmtThen = ifStmt(hasThen(InnerMatcher));
334 auto IfStmtElse = ifStmt(hasElse(InnerMatcher));
335 // FIXME: Handle loop bodies.
336 return stmt(anyOf(CompStmt, IfStmtThen, IfStmtElse));
337}
338
339// Given a two-param std::span construct call, matches iff the call has the
340// following forms:
341// 1. `std::span<T>{new T[n], n}`, where `n` is a literal or a DRE
342// 2. `std::span<T>{new T, 1}`
343// 3. `std::span<T>{&var, 1}`
344// 4. `std::span<T>{a, n}`, where `a` is of an array-of-T with constant size
345// `n`
346// 5. `std::span<T>{any, 0}`
347AST_MATCHER(CXXConstructExpr, isSafeSpanTwoParamConstruct) {
348 assert(Node.getNumArgs() == 2 &&
349 "expecting a two-parameter std::span constructor");
350 const Expr *Arg0 = Node.getArg(Arg: 0)->IgnoreImplicit();
351 const Expr *Arg1 = Node.getArg(Arg: 1)->IgnoreImplicit();
352 auto HaveEqualConstantValues = [&Finder](const Expr *E0, const Expr *E1) {
353 if (auto E0CV = E0->getIntegerConstantExpr(Ctx: Finder->getASTContext()))
354 if (auto E1CV = E1->getIntegerConstantExpr(Ctx: Finder->getASTContext())) {
355 return APSInt::compareValues(I1: *E0CV, I2: *E1CV) == 0;
356 }
357 return false;
358 };
359 auto AreSameDRE = [](const Expr *E0, const Expr *E1) {
360 if (auto *DRE0 = dyn_cast<DeclRefExpr>(Val: E0))
361 if (auto *DRE1 = dyn_cast<DeclRefExpr>(Val: E1)) {
362 return DRE0->getDecl() == DRE1->getDecl();
363 }
364 return false;
365 };
366 std::optional<APSInt> Arg1CV =
367 Arg1->getIntegerConstantExpr(Ctx: Finder->getASTContext());
368
369 if (Arg1CV && Arg1CV->isZero())
370 // Check form 5:
371 return true;
372 switch (Arg0->IgnoreImplicit()->getStmtClass()) {
373 case Stmt::CXXNewExprClass:
374 if (auto Size = cast<CXXNewExpr>(Val: Arg0)->getArraySize()) {
375 // Check form 1:
376 return AreSameDRE((*Size)->IgnoreImplicit(), Arg1) ||
377 HaveEqualConstantValues(*Size, Arg1);
378 }
379 // TODO: what's placeholder type? avoid it for now.
380 if (!cast<CXXNewExpr>(Val: Arg0)->hasPlaceholderType()) {
381 // Check form 2:
382 return Arg1CV && Arg1CV->isOne();
383 }
384 break;
385 case Stmt::UnaryOperatorClass:
386 if (cast<UnaryOperator>(Val: Arg0)->getOpcode() ==
387 UnaryOperator::Opcode::UO_AddrOf)
388 // Check form 3:
389 return Arg1CV && Arg1CV->isOne();
390 break;
391 default:
392 break;
393 }
394
395 QualType Arg0Ty = Arg0->IgnoreImplicit()->getType();
396
397 if (Arg0Ty->isConstantArrayType()) {
398 const APInt &ConstArrSize = cast<ConstantArrayType>(Val&: Arg0Ty)->getSize();
399
400 // Check form 4:
401 return Arg1CV && APSInt::compareValues(I1: APSInt(ConstArrSize), I2: *Arg1CV) == 0;
402 }
403 return false;
404}
405} // namespace clang::ast_matchers
406
407namespace {
408// Because the analysis revolves around variables and their types, we'll need to
409// track uses of variables (aka DeclRefExprs).
410using DeclUseList = SmallVector<const DeclRefExpr *, 1>;
411
412// Convenience typedef.
413using FixItList = SmallVector<FixItHint, 4>;
414} // namespace
415
416namespace {
417/// Gadget is an individual operation in the code that may be of interest to
418/// this analysis. Each (non-abstract) subclass corresponds to a specific
419/// rigid AST structure that constitutes an operation on a pointer-type object.
420/// Discovery of a gadget in the code corresponds to claiming that we understand
421/// what this part of code is doing well enough to potentially improve it.
422/// Gadgets can be warning (immediately deserving a warning) or fixable (not
423/// always deserving a warning per se, but requires our attention to identify
424/// it warrants a fixit).
425class Gadget {
426public:
427 enum class Kind {
428#define GADGET(x) x,
429#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
430 };
431
432 /// Common type of ASTMatchers used for discovering gadgets.
433 /// Useful for implementing the static matcher() methods
434 /// that are expected from all non-abstract subclasses.
435 using Matcher = decltype(stmt());
436
437 Gadget(Kind K) : K(K) {}
438
439 Kind getKind() const { return K; }
440
441#ifndef NDEBUG
442 StringRef getDebugName() const {
443 switch (K) {
444#define GADGET(x) case Kind::x: return #x;
445#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
446 }
447 llvm_unreachable("Unhandled Gadget::Kind enum");
448 }
449#endif
450
451 virtual bool isWarningGadget() const = 0;
452 virtual const Stmt *getBaseStmt() const = 0;
453
454 /// Returns the list of pointer-type variables on which this gadget performs
455 /// its operation. Typically, there's only one variable. This isn't a list
456 /// of all DeclRefExprs in the gadget's AST!
457 virtual DeclUseList getClaimedVarUseSites() const = 0;
458
459 virtual ~Gadget() = default;
460
461private:
462 Kind K;
463};
464
465
466/// Warning gadgets correspond to unsafe code patterns that warrants
467/// an immediate warning.
468class WarningGadget : public Gadget {
469public:
470 WarningGadget(Kind K) : Gadget(K) {}
471
472 static bool classof(const Gadget *G) { return G->isWarningGadget(); }
473 bool isWarningGadget() const final { return true; }
474};
475
476/// Fixable gadgets correspond to code patterns that aren't always unsafe but need to be
477/// properly recognized in order to emit fixes. For example, if a raw pointer-type
478/// variable is replaced by a safe C++ container, every use of such variable must be
479/// carefully considered and possibly updated.
480class FixableGadget : public Gadget {
481public:
482 FixableGadget(Kind K) : Gadget(K) {}
483
484 static bool classof(const Gadget *G) { return !G->isWarningGadget(); }
485 bool isWarningGadget() const final { return false; }
486
487 /// Returns a fixit that would fix the current gadget according to
488 /// the current strategy. Returns std::nullopt if the fix cannot be produced;
489 /// returns an empty list if no fixes are necessary.
490 virtual std::optional<FixItList> getFixits(const FixitStrategy &) const {
491 return std::nullopt;
492 }
493
494 /// Returns a list of two elements where the first element is the LHS of a pointer assignment
495 /// statement and the second element is the RHS. This two-element list represents the fact that
496 /// the LHS buffer gets its bounds information from the RHS buffer. This information will be used
497 /// later to group all those variables whose types must be modified together to prevent type
498 /// mismatches.
499 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
500 getStrategyImplications() const {
501 return std::nullopt;
502 }
503};
504
505static auto toSupportedVariable() {
506 return to(InnerMatcher: varDecl());
507}
508
509using FixableGadgetList = std::vector<std::unique_ptr<FixableGadget>>;
510using WarningGadgetList = std::vector<std::unique_ptr<WarningGadget>>;
511
512/// An increment of a pointer-type value is unsafe as it may run the pointer
513/// out of bounds.
514class IncrementGadget : public WarningGadget {
515 static constexpr const char *const OpTag = "op";
516 const UnaryOperator *Op;
517
518public:
519 IncrementGadget(const MatchFinder::MatchResult &Result)
520 : WarningGadget(Kind::Increment),
521 Op(Result.Nodes.getNodeAs<UnaryOperator>(ID: OpTag)) {}
522
523 static bool classof(const Gadget *G) {
524 return G->getKind() == Kind::Increment;
525 }
526
527 static Matcher matcher() {
528 return stmt(unaryOperator(
529 hasOperatorName(Name: "++"),
530 hasUnaryOperand(InnerMatcher: ignoringParenImpCasts(InnerMatcher: hasPointerType()))
531 ).bind(ID: OpTag));
532 }
533
534 const UnaryOperator *getBaseStmt() const override { return Op; }
535
536 DeclUseList getClaimedVarUseSites() const override {
537 SmallVector<const DeclRefExpr *, 2> Uses;
538 if (const auto *DRE =
539 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
540 Uses.push_back(Elt: DRE);
541 }
542
543 return std::move(Uses);
544 }
545};
546
547/// A decrement of a pointer-type value is unsafe as it may run the pointer
548/// out of bounds.
549class DecrementGadget : public WarningGadget {
550 static constexpr const char *const OpTag = "op";
551 const UnaryOperator *Op;
552
553public:
554 DecrementGadget(const MatchFinder::MatchResult &Result)
555 : WarningGadget(Kind::Decrement),
556 Op(Result.Nodes.getNodeAs<UnaryOperator>(ID: OpTag)) {}
557
558 static bool classof(const Gadget *G) {
559 return G->getKind() == Kind::Decrement;
560 }
561
562 static Matcher matcher() {
563 return stmt(unaryOperator(
564 hasOperatorName(Name: "--"),
565 hasUnaryOperand(InnerMatcher: ignoringParenImpCasts(InnerMatcher: hasPointerType()))
566 ).bind(ID: OpTag));
567 }
568
569 const UnaryOperator *getBaseStmt() const override { return Op; }
570
571 DeclUseList getClaimedVarUseSites() const override {
572 if (const auto *DRE =
573 dyn_cast<DeclRefExpr>(Val: Op->getSubExpr()->IgnoreParenImpCasts())) {
574 return {DRE};
575 }
576
577 return {};
578 }
579};
580
581/// Array subscript expressions on raw pointers as if they're arrays. Unsafe as
582/// it doesn't have any bounds checks for the array.
583class ArraySubscriptGadget : public WarningGadget {
584 static constexpr const char *const ArraySubscrTag = "ArraySubscript";
585 const ArraySubscriptExpr *ASE;
586
587public:
588 ArraySubscriptGadget(const MatchFinder::MatchResult &Result)
589 : WarningGadget(Kind::ArraySubscript),
590 ASE(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ID: ArraySubscrTag)) {}
591
592 static bool classof(const Gadget *G) {
593 return G->getKind() == Kind::ArraySubscript;
594 }
595
596 static Matcher matcher() {
597 // FIXME: What if the index is integer literal 0? Should this be
598 // a safe gadget in this case?
599 // clang-format off
600 return stmt(arraySubscriptExpr(
601 hasBase(InnerMatcher: ignoringParenImpCasts(
602 InnerMatcher: anyOf(hasPointerType(), hasArrayType()))),
603 unless(hasIndex(
604 InnerMatcher: anyOf(integerLiteral(equals(Value: 0)), arrayInitIndexExpr())
605 )))
606 .bind(ID: ArraySubscrTag));
607 // clang-format on
608 }
609
610 const ArraySubscriptExpr *getBaseStmt() const override { return ASE; }
611
612 DeclUseList getClaimedVarUseSites() const override {
613 if (const auto *DRE =
614 dyn_cast<DeclRefExpr>(Val: ASE->getBase()->IgnoreParenImpCasts())) {
615 return {DRE};
616 }
617
618 return {};
619 }
620};
621
622/// A pointer arithmetic expression of one of the forms:
623/// \code
624/// ptr + n | n + ptr | ptr - n | ptr += n | ptr -= n
625/// \endcode
626class PointerArithmeticGadget : public WarningGadget {
627 static constexpr const char *const PointerArithmeticTag = "ptrAdd";
628 static constexpr const char *const PointerArithmeticPointerTag = "ptrAddPtr";
629 const BinaryOperator *PA; // pointer arithmetic expression
630 const Expr *Ptr; // the pointer expression in `PA`
631
632public:
633 PointerArithmeticGadget(const MatchFinder::MatchResult &Result)
634 : WarningGadget(Kind::PointerArithmetic),
635 PA(Result.Nodes.getNodeAs<BinaryOperator>(ID: PointerArithmeticTag)),
636 Ptr(Result.Nodes.getNodeAs<Expr>(ID: PointerArithmeticPointerTag)) {}
637
638 static bool classof(const Gadget *G) {
639 return G->getKind() == Kind::PointerArithmetic;
640 }
641
642 static Matcher matcher() {
643 auto HasIntegerType = anyOf(hasType(InnerMatcher: isInteger()), hasType(InnerMatcher: enumType()));
644 auto PtrAtRight =
645 allOf(hasOperatorName(Name: "+"),
646 hasRHS(InnerMatcher: expr(hasPointerType()).bind(ID: PointerArithmeticPointerTag)),
647 hasLHS(InnerMatcher: HasIntegerType));
648 auto PtrAtLeft =
649 allOf(anyOf(hasOperatorName(Name: "+"), hasOperatorName(Name: "-"),
650 hasOperatorName(Name: "+="), hasOperatorName(Name: "-=")),
651 hasLHS(InnerMatcher: expr(hasPointerType()).bind(ID: PointerArithmeticPointerTag)),
652 hasRHS(InnerMatcher: HasIntegerType));
653
654 return stmt(binaryOperator(anyOf(PtrAtLeft, PtrAtRight))
655 .bind(ID: PointerArithmeticTag));
656 }
657
658 const Stmt *getBaseStmt() const override { return PA; }
659
660 DeclUseList getClaimedVarUseSites() const override {
661 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ptr->IgnoreParenImpCasts())) {
662 return {DRE};
663 }
664
665 return {};
666 }
667 // FIXME: pointer adding zero should be fine
668 // FIXME: this gadge will need a fix-it
669};
670
671class SpanTwoParamConstructorGadget : public WarningGadget {
672 static constexpr const char *const SpanTwoParamConstructorTag =
673 "spanTwoParamConstructor";
674 const CXXConstructExpr *Ctor; // the span constructor expression
675
676public:
677 SpanTwoParamConstructorGadget(const MatchFinder::MatchResult &Result)
678 : WarningGadget(Kind::SpanTwoParamConstructor),
679 Ctor(Result.Nodes.getNodeAs<CXXConstructExpr>(
680 ID: SpanTwoParamConstructorTag)) {}
681
682 static bool classof(const Gadget *G) {
683 return G->getKind() == Kind::SpanTwoParamConstructor;
684 }
685
686 static Matcher matcher() {
687 auto HasTwoParamSpanCtorDecl = hasDeclaration(
688 InnerMatcher: cxxConstructorDecl(hasDeclContext(InnerMatcher: isInStdNamespace()), hasName(Name: "span"),
689 parameterCountIs(N: 2)));
690
691 return stmt(cxxConstructExpr(HasTwoParamSpanCtorDecl,
692 unless(isSafeSpanTwoParamConstruct()))
693 .bind(ID: SpanTwoParamConstructorTag));
694 }
695
696 const Stmt *getBaseStmt() const override { return Ctor; }
697
698 DeclUseList getClaimedVarUseSites() const override {
699 // If the constructor call is of the form `std::span{var, n}`, `var` is
700 // considered an unsafe variable.
701 if (auto *DRE = dyn_cast<DeclRefExpr>(Val: Ctor->getArg(Arg: 0))) {
702 if (isa<VarDecl>(Val: DRE->getDecl()))
703 return {DRE};
704 }
705 return {};
706 }
707};
708
709/// A pointer initialization expression of the form:
710/// \code
711/// int *p = q;
712/// \endcode
713class PointerInitGadget : public FixableGadget {
714private:
715 static constexpr const char *const PointerInitLHSTag = "ptrInitLHS";
716 static constexpr const char *const PointerInitRHSTag = "ptrInitRHS";
717 const VarDecl * PtrInitLHS; // the LHS pointer expression in `PI`
718 const DeclRefExpr * PtrInitRHS; // the RHS pointer expression in `PI`
719
720public:
721 PointerInitGadget(const MatchFinder::MatchResult &Result)
722 : FixableGadget(Kind::PointerInit),
723 PtrInitLHS(Result.Nodes.getNodeAs<VarDecl>(ID: PointerInitLHSTag)),
724 PtrInitRHS(Result.Nodes.getNodeAs<DeclRefExpr>(ID: PointerInitRHSTag)) {}
725
726 static bool classof(const Gadget *G) {
727 return G->getKind() == Kind::PointerInit;
728 }
729
730 static Matcher matcher() {
731 auto PtrInitStmt = declStmt(hasSingleDecl(InnerMatcher: varDecl(
732 hasInitializer(InnerMatcher: ignoringImpCasts(InnerMatcher: declRefExpr(
733 hasPointerType(),
734 toSupportedVariable()).
735 bind(ID: PointerInitRHSTag)))).
736 bind(ID: PointerInitLHSTag)));
737
738 return stmt(PtrInitStmt);
739 }
740
741 virtual std::optional<FixItList>
742 getFixits(const FixitStrategy &S) const override;
743
744 virtual const Stmt *getBaseStmt() const override {
745 // FIXME: This needs to be the entire DeclStmt, assuming that this method
746 // makes sense at all on a FixableGadget.
747 return PtrInitRHS;
748 }
749
750 virtual DeclUseList getClaimedVarUseSites() const override {
751 return DeclUseList{PtrInitRHS};
752 }
753
754 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
755 getStrategyImplications() const override {
756 return std::make_pair(x: PtrInitLHS,
757 y: cast<VarDecl>(Val: PtrInitRHS->getDecl()));
758 }
759};
760
761/// A pointer assignment expression of the form:
762/// \code
763/// p = q;
764/// \endcode
765class PointerAssignmentGadget : public FixableGadget {
766private:
767 static constexpr const char *const PointerAssignLHSTag = "ptrLHS";
768 static constexpr const char *const PointerAssignRHSTag = "ptrRHS";
769 const DeclRefExpr * PtrLHS; // the LHS pointer expression in `PA`
770 const DeclRefExpr * PtrRHS; // the RHS pointer expression in `PA`
771
772public:
773 PointerAssignmentGadget(const MatchFinder::MatchResult &Result)
774 : FixableGadget(Kind::PointerAssignment),
775 PtrLHS(Result.Nodes.getNodeAs<DeclRefExpr>(ID: PointerAssignLHSTag)),
776 PtrRHS(Result.Nodes.getNodeAs<DeclRefExpr>(ID: PointerAssignRHSTag)) {}
777
778 static bool classof(const Gadget *G) {
779 return G->getKind() == Kind::PointerAssignment;
780 }
781
782 static Matcher matcher() {
783 auto PtrAssignExpr = binaryOperator(allOf(hasOperatorName(Name: "="),
784 hasRHS(InnerMatcher: ignoringParenImpCasts(InnerMatcher: declRefExpr(hasPointerType(),
785 toSupportedVariable()).
786 bind(ID: PointerAssignRHSTag))),
787 hasLHS(InnerMatcher: declRefExpr(hasPointerType(),
788 toSupportedVariable()).
789 bind(ID: PointerAssignLHSTag))));
790
791 return stmt(isInUnspecifiedUntypedContext(InnerMatcher: PtrAssignExpr));
792 }
793
794 virtual std::optional<FixItList>
795 getFixits(const FixitStrategy &S) const override;
796
797 virtual const Stmt *getBaseStmt() const override {
798 // FIXME: This should be the binary operator, assuming that this method
799 // makes sense at all on a FixableGadget.
800 return PtrLHS;
801 }
802
803 virtual DeclUseList getClaimedVarUseSites() const override {
804 return DeclUseList{PtrLHS, PtrRHS};
805 }
806
807 virtual std::optional<std::pair<const VarDecl *, const VarDecl *>>
808 getStrategyImplications() const override {
809 return std::make_pair(x: cast<VarDecl>(Val: PtrLHS->getDecl()),
810 y: cast<VarDecl>(Val: PtrRHS->getDecl()));
811 }
812};
813
814/// A call of a function or method that performs unchecked buffer operations
815/// over one of its pointer parameters.
816class UnsafeBufferUsageAttrGadget : public WarningGadget {
817 constexpr static const char *const OpTag = "call_expr";
818 const CallExpr *Op;
819
820public:
821 UnsafeBufferUsageAttrGadget(const MatchFinder::MatchResult &Result)
822 : WarningGadget(Kind::UnsafeBufferUsageAttr),
823 Op(Result.Nodes.getNodeAs<CallExpr>(ID: OpTag)) {}
824
825 static bool classof(const Gadget *G) {
826 return G->getKind() == Kind::UnsafeBufferUsageAttr;
827 }
828
829 static Matcher matcher() {
830 return stmt(callExpr(callee(functionDecl(hasAttr(attr::UnsafeBufferUsage))))
831 .bind(OpTag));
832 }
833 const Stmt *getBaseStmt() const override { return Op; }
834
835 DeclUseList getClaimedVarUseSites() const override { return {}; }
836};
837
838// Warning gadget for unsafe invocation of span::data method.
839// Triggers when the pointer returned by the invocation is immediately
840// cast to a larger type.
841
842class DataInvocationGadget : public WarningGadget {
843 constexpr static const char *const OpTag = "data_invocation_expr";
844 const ExplicitCastExpr *Op;
845
846public:
847 DataInvocationGadget(const MatchFinder::MatchResult &Result)
848 : WarningGadget(Kind::DataInvocation),
849 Op(Result.Nodes.getNodeAs<ExplicitCastExpr>(ID: OpTag)) {}
850
851 static bool classof(const Gadget *G) {
852 return G->getKind() == Kind::DataInvocation;
853 }
854
855 static Matcher matcher() {
856 Matcher callExpr = cxxMemberCallExpr(
857 callee(InnerMatcher: cxxMethodDecl(hasName(Name: "data"), ofClass(InnerMatcher: hasName(Name: "std::span")))));
858 return stmt(
859 explicitCastExpr(anyOf(has(callExpr), has(parenExpr(has(callExpr)))))
860 .bind(ID: OpTag));
861 }
862 const Stmt *getBaseStmt() const override { return Op; }
863
864 DeclUseList getClaimedVarUseSites() const override { return {}; }
865};
866
867// Represents expressions of the form `DRE[*]` in the Unspecified Lvalue
868// Context (see `isInUnspecifiedLvalueContext`).
869// Note here `[]` is the built-in subscript operator.
870class ULCArraySubscriptGadget : public FixableGadget {
871private:
872 static constexpr const char *const ULCArraySubscriptTag =
873 "ArraySubscriptUnderULC";
874 const ArraySubscriptExpr *Node;
875
876public:
877 ULCArraySubscriptGadget(const MatchFinder::MatchResult &Result)
878 : FixableGadget(Kind::ULCArraySubscript),
879 Node(Result.Nodes.getNodeAs<ArraySubscriptExpr>(ID: ULCArraySubscriptTag)) {
880 assert(Node != nullptr && "Expecting a non-null matching result");
881 }
882
883 static bool classof(const Gadget *G) {
884 return G->getKind() == Kind::ULCArraySubscript;
885 }
886
887 static Matcher matcher() {
888 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
889 auto BaseIsArrayOrPtrDRE =
890 hasBase(InnerMatcher: ignoringParenImpCasts(InnerMatcher: declRefExpr(ArrayOrPtr,
891 toSupportedVariable())));
892 auto Target =
893 arraySubscriptExpr(BaseIsArrayOrPtrDRE).bind(ID: ULCArraySubscriptTag);
894
895 return expr(isInUnspecifiedLvalueContext(innerMatcher: Target));
896 }
897
898 virtual std::optional<FixItList>
899 getFixits(const FixitStrategy &S) const override;
900
901 virtual const Stmt *getBaseStmt() const override { return Node; }
902
903 virtual DeclUseList getClaimedVarUseSites() const override {
904 if (const auto *DRE =
905 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts())) {
906 return {DRE};
907 }
908 return {};
909 }
910};
911
912// Fixable gadget to handle stand alone pointers of the form `UPC(DRE)` in the
913// unspecified pointer context (isInUnspecifiedPointerContext). The gadget emits
914// fixit of the form `UPC(DRE.data())`.
915class UPCStandalonePointerGadget : public FixableGadget {
916private:
917 static constexpr const char *const DeclRefExprTag = "StandalonePointer";
918 const DeclRefExpr *Node;
919
920public:
921 UPCStandalonePointerGadget(const MatchFinder::MatchResult &Result)
922 : FixableGadget(Kind::UPCStandalonePointer),
923 Node(Result.Nodes.getNodeAs<DeclRefExpr>(ID: DeclRefExprTag)) {
924 assert(Node != nullptr && "Expecting a non-null matching result");
925 }
926
927 static bool classof(const Gadget *G) {
928 return G->getKind() == Kind::UPCStandalonePointer;
929 }
930
931 static Matcher matcher() {
932 auto ArrayOrPtr = anyOf(hasPointerType(), hasArrayType());
933 auto target = expr(
934 ignoringParenImpCasts(InnerMatcher: declRefExpr(allOf(ArrayOrPtr,
935 toSupportedVariable())).bind(ID: DeclRefExprTag)));
936 return stmt(isInUnspecifiedPointerContext(InnerMatcher: target));
937 }
938
939 virtual std::optional<FixItList>
940 getFixits(const FixitStrategy &S) const override;
941
942 virtual const Stmt *getBaseStmt() const override { return Node; }
943
944 virtual DeclUseList getClaimedVarUseSites() const override {
945 return {Node};
946 }
947};
948
949class PointerDereferenceGadget : public FixableGadget {
950 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
951 static constexpr const char *const OperatorTag = "op";
952
953 const DeclRefExpr *BaseDeclRefExpr = nullptr;
954 const UnaryOperator *Op = nullptr;
955
956public:
957 PointerDereferenceGadget(const MatchFinder::MatchResult &Result)
958 : FixableGadget(Kind::PointerDereference),
959 BaseDeclRefExpr(
960 Result.Nodes.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
961 Op(Result.Nodes.getNodeAs<UnaryOperator>(ID: OperatorTag)) {}
962
963 static bool classof(const Gadget *G) {
964 return G->getKind() == Kind::PointerDereference;
965 }
966
967 static Matcher matcher() {
968 auto Target =
969 unaryOperator(
970 hasOperatorName(Name: "*"),
971 has(expr(ignoringParenImpCasts(
972 InnerMatcher: declRefExpr(toSupportedVariable()).bind(ID: BaseDeclRefExprTag)))))
973 .bind(ID: OperatorTag);
974
975 return expr(isInUnspecifiedLvalueContext(innerMatcher: Target));
976 }
977
978 DeclUseList getClaimedVarUseSites() const override {
979 return {BaseDeclRefExpr};
980 }
981
982 virtual const Stmt *getBaseStmt() const final { return Op; }
983
984 virtual std::optional<FixItList>
985 getFixits(const FixitStrategy &S) const override;
986};
987
988// Represents expressions of the form `&DRE[any]` in the Unspecified Pointer
989// Context (see `isInUnspecifiedPointerContext`).
990// Note here `[]` is the built-in subscript operator.
991class UPCAddressofArraySubscriptGadget : public FixableGadget {
992private:
993 static constexpr const char *const UPCAddressofArraySubscriptTag =
994 "AddressofArraySubscriptUnderUPC";
995 const UnaryOperator *Node; // the `&DRE[any]` node
996
997public:
998 UPCAddressofArraySubscriptGadget(const MatchFinder::MatchResult &Result)
999 : FixableGadget(Kind::ULCArraySubscript),
1000 Node(Result.Nodes.getNodeAs<UnaryOperator>(
1001 ID: UPCAddressofArraySubscriptTag)) {
1002 assert(Node != nullptr && "Expecting a non-null matching result");
1003 }
1004
1005 static bool classof(const Gadget *G) {
1006 return G->getKind() == Kind::UPCAddressofArraySubscript;
1007 }
1008
1009 static Matcher matcher() {
1010 return expr(isInUnspecifiedPointerContext(InnerMatcher: expr(ignoringImpCasts(
1011 InnerMatcher: unaryOperator(hasOperatorName(Name: "&"),
1012 hasUnaryOperand(InnerMatcher: arraySubscriptExpr(
1013 hasBase(InnerMatcher: ignoringParenImpCasts(InnerMatcher: declRefExpr(
1014 toSupportedVariable()))))))
1015 .bind(ID: UPCAddressofArraySubscriptTag)))));
1016 }
1017
1018 virtual std::optional<FixItList>
1019 getFixits(const FixitStrategy &) const override;
1020
1021 virtual const Stmt *getBaseStmt() const override { return Node; }
1022
1023 virtual DeclUseList getClaimedVarUseSites() const override {
1024 const auto *ArraySubst = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
1025 const auto *DRE =
1026 cast<DeclRefExpr>(Val: ArraySubst->getBase()->IgnoreImpCasts());
1027 return {DRE};
1028 }
1029};
1030} // namespace
1031
1032namespace {
1033// An auxiliary tracking facility for the fixit analysis. It helps connect
1034// declarations to its uses and make sure we've covered all uses with our
1035// analysis before we try to fix the declaration.
1036class DeclUseTracker {
1037 using UseSetTy = SmallSet<const DeclRefExpr *, 16>;
1038 using DefMapTy = DenseMap<const VarDecl *, const DeclStmt *>;
1039
1040 // Allocate on the heap for easier move.
1041 std::unique_ptr<UseSetTy> Uses{std::make_unique<UseSetTy>()};
1042 DefMapTy Defs{};
1043
1044public:
1045 DeclUseTracker() = default;
1046 DeclUseTracker(const DeclUseTracker &) = delete; // Let's avoid copies.
1047 DeclUseTracker &operator=(const DeclUseTracker &) = delete;
1048 DeclUseTracker(DeclUseTracker &&) = default;
1049 DeclUseTracker &operator=(DeclUseTracker &&) = default;
1050
1051 // Start tracking a freshly discovered DRE.
1052 void discoverUse(const DeclRefExpr *DRE) { Uses->insert(Ptr: DRE); }
1053
1054 // Stop tracking the DRE as it's been fully figured out.
1055 void claimUse(const DeclRefExpr *DRE) {
1056 assert(Uses->count(DRE) &&
1057 "DRE not found or claimed by multiple matchers!");
1058 Uses->erase(Ptr: DRE);
1059 }
1060
1061 // A variable is unclaimed if at least one use is unclaimed.
1062 bool hasUnclaimedUses(const VarDecl *VD) const {
1063 // FIXME: Can this be less linear? Maybe maintain a map from VDs to DREs?
1064 return any_of(Range&: *Uses, P: [VD](const DeclRefExpr *DRE) {
1065 return DRE->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl();
1066 });
1067 }
1068
1069 UseSetTy getUnclaimedUses(const VarDecl *VD) const {
1070 UseSetTy ReturnSet;
1071 for (auto use : *Uses) {
1072 if (use->getDecl()->getCanonicalDecl() == VD->getCanonicalDecl()) {
1073 ReturnSet.insert(Ptr: use);
1074 }
1075 }
1076 return ReturnSet;
1077 }
1078
1079 void discoverDecl(const DeclStmt *DS) {
1080 for (const Decl *D : DS->decls()) {
1081 if (const auto *VD = dyn_cast<VarDecl>(Val: D)) {
1082 // FIXME: Assertion temporarily disabled due to a bug in
1083 // ASTMatcher internal behavior in presence of GNU
1084 // statement-expressions. We need to properly investigate this
1085 // because it can screw up our algorithm in other ways.
1086 // assert(Defs.count(VD) == 0 && "Definition already discovered!");
1087 Defs[VD] = DS;
1088 }
1089 }
1090 }
1091
1092 const DeclStmt *lookupDecl(const VarDecl *VD) const {
1093 return Defs.lookup(Val: VD);
1094 }
1095};
1096} // namespace
1097
1098// Representing a pointer type expression of the form `++Ptr` in an Unspecified
1099// Pointer Context (UPC):
1100class UPCPreIncrementGadget : public FixableGadget {
1101private:
1102 static constexpr const char *const UPCPreIncrementTag =
1103 "PointerPreIncrementUnderUPC";
1104 const UnaryOperator *Node; // the `++Ptr` node
1105
1106public:
1107 UPCPreIncrementGadget(const MatchFinder::MatchResult &Result)
1108 : FixableGadget(Kind::UPCPreIncrement),
1109 Node(Result.Nodes.getNodeAs<UnaryOperator>(ID: UPCPreIncrementTag)) {
1110 assert(Node != nullptr && "Expecting a non-null matching result");
1111 }
1112
1113 static bool classof(const Gadget *G) {
1114 return G->getKind() == Kind::UPCPreIncrement;
1115 }
1116
1117 static Matcher matcher() {
1118 // Note here we match `++Ptr` for any expression `Ptr` of pointer type.
1119 // Although currently we can only provide fix-its when `Ptr` is a DRE, we
1120 // can have the matcher be general, so long as `getClaimedVarUseSites` does
1121 // things right.
1122 return stmt(isInUnspecifiedPointerContext(InnerMatcher: expr(ignoringImpCasts(
1123 InnerMatcher: unaryOperator(isPreInc(),
1124 hasUnaryOperand(InnerMatcher: declRefExpr(
1125 toSupportedVariable()))
1126 ).bind(ID: UPCPreIncrementTag)))));
1127 }
1128
1129 virtual std::optional<FixItList>
1130 getFixits(const FixitStrategy &S) const override;
1131
1132 virtual const Stmt *getBaseStmt() const override { return Node; }
1133
1134 virtual DeclUseList getClaimedVarUseSites() const override {
1135 return {dyn_cast<DeclRefExpr>(Val: Node->getSubExpr())};
1136 }
1137};
1138
1139// Representing a pointer type expression of the form `Ptr += n` in an
1140// Unspecified Untyped Context (UUC):
1141class UUCAddAssignGadget : public FixableGadget {
1142private:
1143 static constexpr const char *const UUCAddAssignTag =
1144 "PointerAddAssignUnderUUC";
1145 static constexpr const char *const OffsetTag = "Offset";
1146
1147 const BinaryOperator *Node; // the `Ptr += n` node
1148 const Expr *Offset = nullptr;
1149
1150public:
1151 UUCAddAssignGadget(const MatchFinder::MatchResult &Result)
1152 : FixableGadget(Kind::UUCAddAssign),
1153 Node(Result.Nodes.getNodeAs<BinaryOperator>(ID: UUCAddAssignTag)),
1154 Offset(Result.Nodes.getNodeAs<Expr>(ID: OffsetTag)) {
1155 assert(Node != nullptr && "Expecting a non-null matching result");
1156 }
1157
1158 static bool classof(const Gadget *G) {
1159 return G->getKind() == Kind::UUCAddAssign;
1160 }
1161
1162 static Matcher matcher() {
1163 // clang-format off
1164 return stmt(isInUnspecifiedUntypedContext(InnerMatcher: expr(ignoringImpCasts(
1165 InnerMatcher: binaryOperator(hasOperatorName(Name: "+="),
1166 hasLHS(
1167 InnerMatcher: declRefExpr(
1168 hasPointerType(),
1169 toSupportedVariable())),
1170 hasRHS(InnerMatcher: expr().bind(ID: OffsetTag)))
1171 .bind(ID: UUCAddAssignTag)))));
1172 // clang-format on
1173 }
1174
1175 virtual std::optional<FixItList>
1176 getFixits(const FixitStrategy &S) const override;
1177
1178 virtual const Stmt *getBaseStmt() const override { return Node; }
1179
1180 virtual DeclUseList getClaimedVarUseSites() const override {
1181 return {dyn_cast<DeclRefExpr>(Val: Node->getLHS())};
1182 }
1183};
1184
1185// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 +
1186// ptr)`:
1187class DerefSimplePtrArithFixableGadget : public FixableGadget {
1188 static constexpr const char *const BaseDeclRefExprTag = "BaseDRE";
1189 static constexpr const char *const DerefOpTag = "DerefOp";
1190 static constexpr const char *const AddOpTag = "AddOp";
1191 static constexpr const char *const OffsetTag = "Offset";
1192
1193 const DeclRefExpr *BaseDeclRefExpr = nullptr;
1194 const UnaryOperator *DerefOp = nullptr;
1195 const BinaryOperator *AddOp = nullptr;
1196 const IntegerLiteral *Offset = nullptr;
1197
1198public:
1199 DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result)
1200 : FixableGadget(Kind::DerefSimplePtrArithFixable),
1201 BaseDeclRefExpr(
1202 Result.Nodes.getNodeAs<DeclRefExpr>(ID: BaseDeclRefExprTag)),
1203 DerefOp(Result.Nodes.getNodeAs<UnaryOperator>(ID: DerefOpTag)),
1204 AddOp(Result.Nodes.getNodeAs<BinaryOperator>(ID: AddOpTag)),
1205 Offset(Result.Nodes.getNodeAs<IntegerLiteral>(ID: OffsetTag)) {}
1206
1207 static Matcher matcher() {
1208 // clang-format off
1209 auto ThePtr = expr(hasPointerType(),
1210 ignoringImpCasts(InnerMatcher: declRefExpr(toSupportedVariable()).
1211 bind(ID: BaseDeclRefExprTag)));
1212 auto PlusOverPtrAndInteger = expr(anyOf(
1213 binaryOperator(hasOperatorName(Name: "+"), hasLHS(InnerMatcher: ThePtr),
1214 hasRHS(InnerMatcher: integerLiteral().bind(ID: OffsetTag)))
1215 .bind(ID: AddOpTag),
1216 binaryOperator(hasOperatorName(Name: "+"), hasRHS(InnerMatcher: ThePtr),
1217 hasLHS(InnerMatcher: integerLiteral().bind(ID: OffsetTag)))
1218 .bind(ID: AddOpTag)));
1219 return isInUnspecifiedLvalueContext(innerMatcher: unaryOperator(
1220 hasOperatorName(Name: "*"),
1221 hasUnaryOperand(InnerMatcher: ignoringParens(InnerMatcher: PlusOverPtrAndInteger)))
1222 .bind(ID: DerefOpTag));
1223 // clang-format on
1224 }
1225
1226 virtual std::optional<FixItList>
1227 getFixits(const FixitStrategy &s) const final;
1228
1229 // TODO remove this method from FixableGadget interface
1230 virtual const Stmt *getBaseStmt() const final { return nullptr; }
1231
1232 virtual DeclUseList getClaimedVarUseSites() const final {
1233 return {BaseDeclRefExpr};
1234 }
1235};
1236
1237/// Scan the function and return a list of gadgets found with provided kits.
1238static std::tuple<FixableGadgetList, WarningGadgetList, DeclUseTracker>
1239findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler,
1240 bool EmitSuggestions) {
1241
1242 struct GadgetFinderCallback : MatchFinder::MatchCallback {
1243 FixableGadgetList FixableGadgets;
1244 WarningGadgetList WarningGadgets;
1245 DeclUseTracker Tracker;
1246
1247 void run(const MatchFinder::MatchResult &Result) override {
1248 // In debug mode, assert that we've found exactly one gadget.
1249 // This helps us avoid conflicts in .bind() tags.
1250#if NDEBUG
1251#define NEXT return
1252#else
1253 [[maybe_unused]] int numFound = 0;
1254#define NEXT ++numFound
1255#endif
1256
1257 if (const auto *DRE = Result.Nodes.getNodeAs<DeclRefExpr>(ID: "any_dre")) {
1258 Tracker.discoverUse(DRE);
1259 NEXT;
1260 }
1261
1262 if (const auto *DS = Result.Nodes.getNodeAs<DeclStmt>(ID: "any_ds")) {
1263 Tracker.discoverDecl(DS);
1264 NEXT;
1265 }
1266
1267 // Figure out which matcher we've found, and call the appropriate
1268 // subclass constructor.
1269 // FIXME: Can we do this more logarithmically?
1270#define FIXABLE_GADGET(name) \
1271 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1272 FixableGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1273 NEXT; \
1274 }
1275#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1276#define WARNING_GADGET(name) \
1277 if (Result.Nodes.getNodeAs<Stmt>(#name)) { \
1278 WarningGadgets.push_back(std::make_unique<name##Gadget>(Result)); \
1279 NEXT; \
1280 }
1281#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1282
1283 assert(numFound >= 1 && "Gadgets not found in match result!");
1284 assert(numFound <= 1 && "Conflicting bind tags in gadgets!");
1285 }
1286 };
1287
1288 MatchFinder M;
1289 GadgetFinderCallback CB;
1290
1291 // clang-format off
1292 M.addMatcher(
1293 NodeMatch: stmt(
1294 forEachDescendantEvaluatedStmt(innerMatcher: stmt(anyOf(
1295 // Add Gadget::matcher() for every gadget in the registry.
1296#define WARNING_GADGET(x) \
1297 allOf(x ## Gadget::matcher().bind(#x), \
1298 notInSafeBufferOptOut(&Handler)),
1299#define WARNING_CONTAINER_GADGET(x) \
1300 allOf(x ## Gadget::matcher().bind(#x), \
1301 notInSafeBufferOptOut(&Handler), \
1302 unless(ignoreUnsafeBufferInContainer(&Handler))),
1303#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1304 // Avoid a hanging comma.
1305 unless(stmt())
1306 )))
1307 ),
1308 Action: &CB
1309 );
1310 // clang-format on
1311
1312 if (EmitSuggestions) {
1313 // clang-format off
1314 M.addMatcher(
1315 NodeMatch: stmt(
1316 forEachDescendantStmt(innerMatcher: stmt(eachOf(
1317#define FIXABLE_GADGET(x) \
1318 x ## Gadget::matcher().bind(#x),
1319#include "clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def"
1320 // In parallel, match all DeclRefExprs so that to find out
1321 // whether there are any uncovered by gadgets.
1322 declRefExpr(anyOf(hasPointerType(), hasArrayType()),
1323 to(InnerMatcher: anyOf(varDecl(), bindingDecl()))).bind(ID: "any_dre"),
1324 // Also match DeclStmts because we'll need them when fixing
1325 // their underlying VarDecls that otherwise don't have
1326 // any backreferences to DeclStmts.
1327 declStmt().bind(ID: "any_ds")
1328 )))
1329 ),
1330 Action: &CB
1331 );
1332 // clang-format on
1333 }
1334
1335 M.match(Node: *D->getBody(), Context&: D->getASTContext());
1336 return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets),
1337 std::move(CB.Tracker)};
1338}
1339
1340// Compares AST nodes by source locations.
1341template <typename NodeTy> struct CompareNode {
1342 bool operator()(const NodeTy *N1, const NodeTy *N2) const {
1343 return N1->getBeginLoc().getRawEncoding() <
1344 N2->getBeginLoc().getRawEncoding();
1345 }
1346};
1347
1348struct WarningGadgetSets {
1349 std::map<const VarDecl *, std::set<const WarningGadget *>,
1350 // To keep keys sorted by their locations in the map so that the
1351 // order is deterministic:
1352 CompareNode<VarDecl>>
1353 byVar;
1354 // These Gadgets are not related to pointer variables (e. g. temporaries).
1355 llvm::SmallVector<const WarningGadget *, 16> noVar;
1356};
1357
1358static WarningGadgetSets
1359groupWarningGadgetsByVar(const WarningGadgetList &AllUnsafeOperations) {
1360 WarningGadgetSets result;
1361 // If some gadgets cover more than one
1362 // variable, they'll appear more than once in the map.
1363 for (auto &G : AllUnsafeOperations) {
1364 DeclUseList ClaimedVarUseSites = G->getClaimedVarUseSites();
1365
1366 bool AssociatedWithVarDecl = false;
1367 for (const DeclRefExpr *DRE : ClaimedVarUseSites) {
1368 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
1369 result.byVar[VD].insert(x: G.get());
1370 AssociatedWithVarDecl = true;
1371 }
1372 }
1373
1374 if (!AssociatedWithVarDecl) {
1375 result.noVar.push_back(Elt: G.get());
1376 continue;
1377 }
1378 }
1379 return result;
1380}
1381
1382struct FixableGadgetSets {
1383 std::map<const VarDecl *, std::set<const FixableGadget *>,
1384 // To keep keys sorted by their locations in the map so that the
1385 // order is deterministic:
1386 CompareNode<VarDecl>>
1387 byVar;
1388};
1389
1390static FixableGadgetSets
1391groupFixablesByVar(FixableGadgetList &&AllFixableOperations) {
1392 FixableGadgetSets FixablesForUnsafeVars;
1393 for (auto &F : AllFixableOperations) {
1394 DeclUseList DREs = F->getClaimedVarUseSites();
1395
1396 for (const DeclRefExpr *DRE : DREs) {
1397 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
1398 FixablesForUnsafeVars.byVar[VD].insert(x: F.get());
1399 }
1400 }
1401 }
1402 return FixablesForUnsafeVars;
1403}
1404
1405bool clang::internal::anyConflict(const SmallVectorImpl<FixItHint> &FixIts,
1406 const SourceManager &SM) {
1407 // A simple interval overlap detection algorithm. Sorts all ranges by their
1408 // begin location then finds the first overlap in one pass.
1409 std::vector<const FixItHint *> All; // a copy of `FixIts`
1410
1411 for (const FixItHint &H : FixIts)
1412 All.push_back(x: &H);
1413 std::sort(first: All.begin(), last: All.end(),
1414 comp: [&SM](const FixItHint *H1, const FixItHint *H2) {
1415 return SM.isBeforeInTranslationUnit(LHS: H1->RemoveRange.getBegin(),
1416 RHS: H2->RemoveRange.getBegin());
1417 });
1418
1419 const FixItHint *CurrHint = nullptr;
1420
1421 for (const FixItHint *Hint : All) {
1422 if (!CurrHint ||
1423 SM.isBeforeInTranslationUnit(LHS: CurrHint->RemoveRange.getEnd(),
1424 RHS: Hint->RemoveRange.getBegin())) {
1425 // Either to initialize `CurrHint` or `CurrHint` does not
1426 // overlap with `Hint`:
1427 CurrHint = Hint;
1428 } else
1429 // In case `Hint` overlaps the `CurrHint`, we found at least one
1430 // conflict:
1431 return true;
1432 }
1433 return false;
1434}
1435
1436std::optional<FixItList>
1437PointerAssignmentGadget::getFixits(const FixitStrategy &S) const {
1438 const auto *LeftVD = cast<VarDecl>(Val: PtrLHS->getDecl());
1439 const auto *RightVD = cast<VarDecl>(Val: PtrRHS->getDecl());
1440 switch (S.lookup(VD: LeftVD)) {
1441 case FixitStrategy::Kind::Span:
1442 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
1443 return FixItList{};
1444 return std::nullopt;
1445 case FixitStrategy::Kind::Wontfix:
1446 return std::nullopt;
1447 case FixitStrategy::Kind::Iterator:
1448 case FixitStrategy::Kind::Array:
1449 return std::nullopt;
1450 case FixitStrategy::Kind::Vector:
1451 llvm_unreachable("unsupported strategies for FixableGadgets");
1452 }
1453 return std::nullopt;
1454}
1455
1456std::optional<FixItList>
1457PointerInitGadget::getFixits(const FixitStrategy &S) const {
1458 const auto *LeftVD = PtrInitLHS;
1459 const auto *RightVD = cast<VarDecl>(Val: PtrInitRHS->getDecl());
1460 switch (S.lookup(VD: LeftVD)) {
1461 case FixitStrategy::Kind::Span:
1462 if (S.lookup(VD: RightVD) == FixitStrategy::Kind::Span)
1463 return FixItList{};
1464 return std::nullopt;
1465 case FixitStrategy::Kind::Wontfix:
1466 return std::nullopt;
1467 case FixitStrategy::Kind::Iterator:
1468 case FixitStrategy::Kind::Array:
1469 return std::nullopt;
1470 case FixitStrategy::Kind::Vector:
1471 llvm_unreachable("unsupported strategies for FixableGadgets");
1472 }
1473 return std::nullopt;
1474}
1475
1476static bool isNonNegativeIntegerExpr(const Expr *Expr, const VarDecl *VD,
1477 const ASTContext &Ctx) {
1478 if (auto ConstVal = Expr->getIntegerConstantExpr(Ctx)) {
1479 if (ConstVal->isNegative())
1480 return false;
1481 } else if (!Expr->getType()->isUnsignedIntegerType())
1482 return false;
1483 return true;
1484}
1485
1486std::optional<FixItList>
1487ULCArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
1488 if (const auto *DRE =
1489 dyn_cast<DeclRefExpr>(Val: Node->getBase()->IgnoreImpCasts()))
1490 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
1491 switch (S.lookup(VD)) {
1492 case FixitStrategy::Kind::Span: {
1493
1494 // If the index has a negative constant value, we give up as no valid
1495 // fix-it can be generated:
1496 const ASTContext &Ctx = // FIXME: we need ASTContext to be passed in!
1497 VD->getASTContext();
1498 if (!isNonNegativeIntegerExpr(Expr: Node->getIdx(), VD, Ctx))
1499 return std::nullopt;
1500 // no-op is a good fix-it, otherwise
1501 return FixItList{};
1502 }
1503 case FixitStrategy::Kind::Array:
1504 return FixItList{};
1505 case FixitStrategy::Kind::Wontfix:
1506 case FixitStrategy::Kind::Iterator:
1507 case FixitStrategy::Kind::Vector:
1508 llvm_unreachable("unsupported strategies for FixableGadgets");
1509 }
1510 }
1511 return std::nullopt;
1512}
1513
1514static std::optional<FixItList> // forward declaration
1515fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node);
1516
1517std::optional<FixItList>
1518UPCAddressofArraySubscriptGadget::getFixits(const FixitStrategy &S) const {
1519 auto DREs = getClaimedVarUseSites();
1520 const auto *VD = cast<VarDecl>(Val: DREs.front()->getDecl());
1521
1522 switch (S.lookup(VD)) {
1523 case FixitStrategy::Kind::Span:
1524 return fixUPCAddressofArraySubscriptWithSpan(Node);
1525 case FixitStrategy::Kind::Wontfix:
1526 case FixitStrategy::Kind::Iterator:
1527 case FixitStrategy::Kind::Array:
1528 return std::nullopt;
1529 case FixitStrategy::Kind::Vector:
1530 llvm_unreachable("unsupported strategies for FixableGadgets");
1531 }
1532 return std::nullopt; // something went wrong, no fix-it
1533}
1534
1535// FIXME: this function should be customizable through format
1536static StringRef getEndOfLine() {
1537 static const char *const EOL = "\n";
1538 return EOL;
1539}
1540
1541// Returns the text indicating that the user needs to provide input there:
1542std::string getUserFillPlaceHolder(StringRef HintTextToUser = "placeholder") {
1543 std::string s = std::string("<# ");
1544 s += HintTextToUser;
1545 s += " #>";
1546 return s;
1547}
1548
1549// Return the text representation of the given `APInt Val`:
1550static std::string getAPIntText(APInt Val) {
1551 SmallVector<char> Txt;
1552 Val.toString(Str&: Txt, Radix: 10, Signed: true);
1553 // APInt::toString does not add '\0' to the end of the string for us:
1554 Txt.push_back(Elt: '\0');
1555 return Txt.data();
1556}
1557
1558// Return the source location of the last character of the AST `Node`.
1559template <typename NodeTy>
1560static std::optional<SourceLocation>
1561getEndCharLoc(const NodeTy *Node, const SourceManager &SM,
1562 const LangOptions &LangOpts) {
1563 unsigned TkLen = Lexer::MeasureTokenLength(Loc: Node->getEndLoc(), SM, LangOpts);
1564 SourceLocation Loc = Node->getEndLoc().getLocWithOffset(TkLen - 1);
1565
1566 if (Loc.isValid())
1567 return Loc;
1568
1569 return std::nullopt;
1570}
1571
1572// Return the source location just past the last character of the AST `Node`.
1573template <typename NodeTy>
1574static std::optional<SourceLocation> getPastLoc(const NodeTy *Node,
1575 const SourceManager &SM,
1576 const LangOptions &LangOpts) {
1577 SourceLocation Loc =
1578 Lexer::getLocForEndOfToken(Loc: Node->getEndLoc(), Offset: 0, SM, LangOpts);
1579 if (Loc.isValid())
1580 return Loc;
1581 return std::nullopt;
1582}
1583
1584// Return text representation of an `Expr`.
1585static std::optional<StringRef> getExprText(const Expr *E,
1586 const SourceManager &SM,
1587 const LangOptions &LangOpts) {
1588 std::optional<SourceLocation> LastCharLoc = getPastLoc(Node: E, SM, LangOpts);
1589
1590 if (LastCharLoc)
1591 return Lexer::getSourceText(
1592 Range: CharSourceRange::getCharRange(E->getBeginLoc(), *LastCharLoc), SM,
1593 LangOpts);
1594
1595 return std::nullopt;
1596}
1597
1598// Returns the literal text in `SourceRange SR`, if `SR` is a valid range.
1599static std::optional<StringRef> getRangeText(SourceRange SR,
1600 const SourceManager &SM,
1601 const LangOptions &LangOpts) {
1602 bool Invalid = false;
1603 CharSourceRange CSR = CharSourceRange::getCharRange(R: SR);
1604 StringRef Text = Lexer::getSourceText(Range: CSR, SM, LangOpts, Invalid: &Invalid);
1605
1606 if (!Invalid)
1607 return Text;
1608 return std::nullopt;
1609}
1610
1611// Returns the begin location of the identifier of the given variable
1612// declaration.
1613static SourceLocation getVarDeclIdentifierLoc(const VarDecl *VD) {
1614 // According to the implementation of `VarDecl`, `VD->getLocation()` actually
1615 // returns the begin location of the identifier of the declaration:
1616 return VD->getLocation();
1617}
1618
1619// Returns the literal text of the identifier of the given variable declaration.
1620static std::optional<StringRef>
1621getVarDeclIdentifierText(const VarDecl *VD, const SourceManager &SM,
1622 const LangOptions &LangOpts) {
1623 SourceLocation ParmIdentBeginLoc = getVarDeclIdentifierLoc(VD);
1624 SourceLocation ParmIdentEndLoc =
1625 Lexer::getLocForEndOfToken(Loc: ParmIdentBeginLoc, Offset: 0, SM, LangOpts);
1626
1627 if (ParmIdentEndLoc.isMacroID() &&
1628 !Lexer::isAtEndOfMacroExpansion(loc: ParmIdentEndLoc, SM, LangOpts))
1629 return std::nullopt;
1630 return getRangeText(SR: {ParmIdentBeginLoc, ParmIdentEndLoc}, SM, LangOpts);
1631}
1632
1633// We cannot fix a variable declaration if it has some other specifiers than the
1634// type specifier. Because the source ranges of those specifiers could overlap
1635// with the source range that is being replaced using fix-its. Especially when
1636// we often cannot obtain accurate source ranges of cv-qualified type
1637// specifiers.
1638// FIXME: also deal with type attributes
1639static bool hasUnsupportedSpecifiers(const VarDecl *VD,
1640 const SourceManager &SM) {
1641 // AttrRangeOverlapping: true if at least one attribute of `VD` overlaps the
1642 // source range of `VD`:
1643 bool AttrRangeOverlapping = llvm::any_of(VD->attrs(), [&](Attr *At) -> bool {
1644 return !(SM.isBeforeInTranslationUnit(LHS: At->getRange().getEnd(),
1645 RHS: VD->getBeginLoc())) &&
1646 !(SM.isBeforeInTranslationUnit(LHS: VD->getEndLoc(),
1647 RHS: At->getRange().getBegin()));
1648 });
1649 return VD->isInlineSpecified() || VD->isConstexpr() ||
1650 VD->hasConstantInitialization() || !VD->hasLocalStorage() ||
1651 AttrRangeOverlapping;
1652}
1653
1654// Returns the `SourceRange` of `D`. The reason why this function exists is
1655// that `D->getSourceRange()` may return a range where the end location is the
1656// starting location of the last token. The end location of the source range
1657// returned by this function is the last location of the last token.
1658static SourceRange getSourceRangeToTokenEnd(const Decl *D,
1659 const SourceManager &SM,
1660 const LangOptions &LangOpts) {
1661 SourceLocation Begin = D->getBeginLoc();
1662 SourceLocation
1663 End = // `D->getEndLoc` should always return the starting location of the
1664 // last token, so we should get the end of the token
1665 Lexer::getLocForEndOfToken(Loc: D->getEndLoc(), Offset: 0, SM, LangOpts);
1666
1667 return SourceRange(Begin, End);
1668}
1669
1670// Returns the text of the pointee type of `T` from a `VarDecl` of a pointer
1671// type. The text is obtained through from `TypeLoc`s. Since `TypeLoc` does not
1672// have source ranges of qualifiers ( The `QualifiedTypeLoc` looks hacky too me
1673// :( ), `Qualifiers` of the pointee type is returned separately through the
1674// output parameter `QualifiersToAppend`.
1675static std::optional<std::string>
1676getPointeeTypeText(const VarDecl *VD, const SourceManager &SM,
1677 const LangOptions &LangOpts,
1678 std::optional<Qualifiers> *QualifiersToAppend) {
1679 QualType Ty = VD->getType();
1680 QualType PteTy;
1681
1682 assert(Ty->isPointerType() && !Ty->isFunctionPointerType() &&
1683 "Expecting a VarDecl of type of pointer to object type");
1684 PteTy = Ty->getPointeeType();
1685
1686 TypeLoc TyLoc = VD->getTypeSourceInfo()->getTypeLoc().getUnqualifiedLoc();
1687 TypeLoc PteTyLoc;
1688
1689 // We only deal with the cases that we know `TypeLoc::getNextTypeLoc` returns
1690 // the `TypeLoc` of the pointee type:
1691 switch (TyLoc.getTypeLocClass()) {
1692 case TypeLoc::ConstantArray:
1693 case TypeLoc::IncompleteArray:
1694 case TypeLoc::VariableArray:
1695 case TypeLoc::DependentSizedArray:
1696 case TypeLoc::Decayed:
1697 assert(isa<ParmVarDecl>(VD) && "An array type shall not be treated as a "
1698 "pointer type unless it decays.");
1699 PteTyLoc = TyLoc.getNextTypeLoc();
1700 break;
1701 case TypeLoc::Pointer:
1702 PteTyLoc = TyLoc.castAs<PointerTypeLoc>().getPointeeLoc();
1703 break;
1704 default:
1705 return std::nullopt;
1706 }
1707 if (PteTyLoc.isNull())
1708 // Sometimes we cannot get a useful `TypeLoc` for the pointee type, e.g.,
1709 // when the pointer type is `auto`.
1710 return std::nullopt;
1711
1712 SourceLocation IdentLoc = getVarDeclIdentifierLoc(VD);
1713
1714 if (!(IdentLoc.isValid() && PteTyLoc.getSourceRange().isValid())) {
1715 // We are expecting these locations to be valid. But in some cases, they are
1716 // not all valid. It is a Clang bug to me and we are not responsible for
1717 // fixing it. So we will just give up for now when it happens.
1718 return std::nullopt;
1719 }
1720
1721 // Note that TypeLoc.getEndLoc() returns the begin location of the last token:
1722 SourceLocation PteEndOfTokenLoc =
1723 Lexer::getLocForEndOfToken(Loc: PteTyLoc.getEndLoc(), Offset: 0, SM, LangOpts);
1724
1725 if (!PteEndOfTokenLoc.isValid())
1726 // Sometimes we cannot get the end location of the pointee type, e.g., when
1727 // there are macros involved.
1728 return std::nullopt;
1729 if (!SM.isBeforeInTranslationUnit(LHS: PteEndOfTokenLoc, RHS: IdentLoc)) {
1730 // We only deal with the cases where the source text of the pointee type
1731 // appears on the left-hand side of the variable identifier completely,
1732 // including the following forms:
1733 // `T ident`,
1734 // `T ident[]`, where `T` is any type.
1735 // Examples of excluded cases are `T (*ident)[]` or `T ident[][n]`.
1736 return std::nullopt;
1737 }
1738 if (PteTy.hasQualifiers()) {
1739 // TypeLoc does not provide source ranges for qualifiers (it says it's
1740 // intentional but seems fishy to me), so we cannot get the full text
1741 // `PteTy` via source ranges.
1742 *QualifiersToAppend = PteTy.getQualifiers();
1743 }
1744 return getRangeText(SR: {PteTyLoc.getBeginLoc(), PteEndOfTokenLoc}, SM, LangOpts)
1745 ->str();
1746}
1747
1748// Returns the text of the name (with qualifiers) of a `FunctionDecl`.
1749static std::optional<StringRef> getFunNameText(const FunctionDecl *FD,
1750 const SourceManager &SM,
1751 const LangOptions &LangOpts) {
1752 SourceLocation BeginLoc = FD->getQualifier()
1753 ? FD->getQualifierLoc().getBeginLoc()
1754 : FD->getNameInfo().getBeginLoc();
1755 // Note that `FD->getNameInfo().getEndLoc()` returns the begin location of the
1756 // last token:
1757 SourceLocation EndLoc = Lexer::getLocForEndOfToken(
1758 Loc: FD->getNameInfo().getEndLoc(), Offset: 0, SM, LangOpts);
1759 SourceRange NameRange{BeginLoc, EndLoc};
1760
1761 return getRangeText(SR: NameRange, SM, LangOpts);
1762}
1763
1764// Returns the text representing a `std::span` type where the element type is
1765// represented by `EltTyText`.
1766//
1767// Note the optional parameter `Qualifiers`: one needs to pass qualifiers
1768// explicitly if the element type needs to be qualified.
1769static std::string
1770getSpanTypeText(StringRef EltTyText,
1771 std::optional<Qualifiers> Quals = std::nullopt) {
1772 const char *const SpanOpen = "std::span<";
1773
1774 if (Quals)
1775 return SpanOpen + EltTyText.str() + ' ' + Quals->getAsString() + '>';
1776 return SpanOpen + EltTyText.str() + '>';
1777}
1778
1779std::optional<FixItList>
1780DerefSimplePtrArithFixableGadget::getFixits(const FixitStrategy &s) const {
1781 const VarDecl *VD = dyn_cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
1782
1783 if (VD && s.lookup(VD) == FixitStrategy::Kind::Span) {
1784 ASTContext &Ctx = VD->getASTContext();
1785 // std::span can't represent elements before its begin()
1786 if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx))
1787 if (ConstVal->isNegative())
1788 return std::nullopt;
1789
1790 // note that the expr may (oddly) has multiple layers of parens
1791 // example:
1792 // *((..(pointer + 123)..))
1793 // goal:
1794 // pointer[123]
1795 // Fix-It:
1796 // remove '*('
1797 // replace ' + ' with '['
1798 // replace ')' with ']'
1799
1800 // example:
1801 // *((..(123 + pointer)..))
1802 // goal:
1803 // 123[pointer]
1804 // Fix-It:
1805 // remove '*('
1806 // replace ' + ' with '['
1807 // replace ')' with ']'
1808
1809 const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS();
1810 const SourceManager &SM = Ctx.getSourceManager();
1811 const LangOptions &LangOpts = Ctx.getLangOpts();
1812 CharSourceRange StarWithTrailWhitespace =
1813 clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(),
1814 LHS->getBeginLoc());
1815
1816 std::optional<SourceLocation> LHSLocation = getPastLoc(Node: LHS, SM, LangOpts);
1817 if (!LHSLocation)
1818 return std::nullopt;
1819
1820 CharSourceRange PlusWithSurroundingWhitespace =
1821 clang::CharSourceRange::getCharRange(*LHSLocation, RHS->getBeginLoc());
1822
1823 std::optional<SourceLocation> AddOpLocation =
1824 getPastLoc(Node: AddOp, SM, LangOpts);
1825 std::optional<SourceLocation> DerefOpLocation =
1826 getPastLoc(Node: DerefOp, SM, LangOpts);
1827
1828 if (!AddOpLocation || !DerefOpLocation)
1829 return std::nullopt;
1830
1831 CharSourceRange ClosingParenWithPrecWhitespace =
1832 clang::CharSourceRange::getCharRange(B: *AddOpLocation, E: *DerefOpLocation);
1833
1834 return FixItList{
1835 {FixItHint::CreateRemoval(RemoveRange: StarWithTrailWhitespace),
1836 FixItHint::CreateReplacement(RemoveRange: PlusWithSurroundingWhitespace, Code: "["),
1837 FixItHint::CreateReplacement(RemoveRange: ClosingParenWithPrecWhitespace, Code: "]")}};
1838 }
1839 return std::nullopt; // something wrong or unsupported, give up
1840}
1841
1842std::optional<FixItList>
1843PointerDereferenceGadget::getFixits(const FixitStrategy &S) const {
1844 const VarDecl *VD = cast<VarDecl>(Val: BaseDeclRefExpr->getDecl());
1845 switch (S.lookup(VD)) {
1846 case FixitStrategy::Kind::Span: {
1847 ASTContext &Ctx = VD->getASTContext();
1848 SourceManager &SM = Ctx.getSourceManager();
1849 // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0]
1850 // Deletes the *operand
1851 CharSourceRange derefRange = clang::CharSourceRange::getCharRange(
1852 B: Op->getBeginLoc(), E: Op->getBeginLoc().getLocWithOffset(Offset: 1));
1853 // Inserts the [0]
1854 if (auto LocPastOperand =
1855 getPastLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts())) {
1856 return FixItList{{FixItHint::CreateRemoval(RemoveRange: derefRange),
1857 FixItHint::CreateInsertion(InsertionLoc: *LocPastOperand, Code: "[0]")}};
1858 }
1859 break;
1860 }
1861 case FixitStrategy::Kind::Iterator:
1862 case FixitStrategy::Kind::Array:
1863 return std::nullopt;
1864 case FixitStrategy::Kind::Vector:
1865 llvm_unreachable("FixitStrategy not implemented yet!");
1866 case FixitStrategy::Kind::Wontfix:
1867 llvm_unreachable("Invalid strategy!");
1868 }
1869
1870 return std::nullopt;
1871}
1872
1873// Generates fix-its replacing an expression of the form UPC(DRE) with
1874// `DRE.data()`
1875std::optional<FixItList>
1876UPCStandalonePointerGadget::getFixits(const FixitStrategy &S) const {
1877 const auto VD = cast<VarDecl>(Val: Node->getDecl());
1878 switch (S.lookup(VD)) {
1879 case FixitStrategy::Kind::Array:
1880 case FixitStrategy::Kind::Span: {
1881 ASTContext &Ctx = VD->getASTContext();
1882 SourceManager &SM = Ctx.getSourceManager();
1883 // Inserts the .data() after the DRE
1884 std::optional<SourceLocation> EndOfOperand =
1885 getPastLoc(Node, SM, LangOpts: Ctx.getLangOpts());
1886
1887 if (EndOfOperand)
1888 return FixItList{{FixItHint::CreateInsertion(InsertionLoc: *EndOfOperand, Code: ".data()")}};
1889 // FIXME: Points inside a macro expansion.
1890 break;
1891 }
1892 case FixitStrategy::Kind::Wontfix:
1893 case FixitStrategy::Kind::Iterator:
1894 return std::nullopt;
1895 case FixitStrategy::Kind::Vector:
1896 llvm_unreachable("unsupported strategies for FixableGadgets");
1897 }
1898
1899 return std::nullopt;
1900}
1901
1902// Generates fix-its replacing an expression of the form `&DRE[e]` with
1903// `&DRE.data()[e]`:
1904static std::optional<FixItList>
1905fixUPCAddressofArraySubscriptWithSpan(const UnaryOperator *Node) {
1906 const auto *ArraySub = cast<ArraySubscriptExpr>(Val: Node->getSubExpr());
1907 const auto *DRE = cast<DeclRefExpr>(Val: ArraySub->getBase()->IgnoreImpCasts());
1908 // FIXME: this `getASTContext` call is costly, we should pass the
1909 // ASTContext in:
1910 const ASTContext &Ctx = DRE->getDecl()->getASTContext();
1911 const Expr *Idx = ArraySub->getIdx();
1912 const SourceManager &SM = Ctx.getSourceManager();
1913 const LangOptions &LangOpts = Ctx.getLangOpts();
1914 std::stringstream SS;
1915 bool IdxIsLitZero = false;
1916
1917 if (auto ICE = Idx->getIntegerConstantExpr(Ctx))
1918 if ((*ICE).isZero())
1919 IdxIsLitZero = true;
1920 std::optional<StringRef> DreString = getExprText(DRE, SM, LangOpts);
1921 if (!DreString)
1922 return std::nullopt;
1923
1924 if (IdxIsLitZero) {
1925 // If the index is literal zero, we produce the most concise fix-it:
1926 SS << (*DreString).str() << ".data()";
1927 } else {
1928 std::optional<StringRef> IndexString = getExprText(E: Idx, SM, LangOpts);
1929 if (!IndexString)
1930 return std::nullopt;
1931
1932 SS << "&" << (*DreString).str() << ".data()"
1933 << "[" << (*IndexString).str() << "]";
1934 }
1935 return FixItList{
1936 FixItHint::CreateReplacement(Node->getSourceRange(), SS.str())};
1937}
1938
1939std::optional<FixItList>
1940UUCAddAssignGadget::getFixits(const FixitStrategy &S) const {
1941 DeclUseList DREs = getClaimedVarUseSites();
1942
1943 if (DREs.size() != 1)
1944 return std::nullopt; // In cases of `Ptr += n` where `Ptr` is not a DRE, we
1945 // give up
1946 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
1947 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
1948 FixItList Fixes;
1949
1950 const Stmt *AddAssignNode = getBaseStmt();
1951 StringRef varName = VD->getName();
1952 const ASTContext &Ctx = VD->getASTContext();
1953
1954 if (!isNonNegativeIntegerExpr(Expr: Offset, VD, Ctx))
1955 return std::nullopt;
1956
1957 // To transform UUC(p += n) to UUC(p = p.subspan(..)):
1958 bool NotParenExpr =
1959 (Offset->IgnoreParens()->getBeginLoc() == Offset->getBeginLoc());
1960 std::string SS = varName.str() + " = " + varName.str() + ".subspan";
1961 if (NotParenExpr)
1962 SS += "(";
1963
1964 std::optional<SourceLocation> AddAssignLocation = getEndCharLoc(
1965 Node: AddAssignNode, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
1966 if (!AddAssignLocation)
1967 return std::nullopt;
1968
1969 Fixes.push_back(Elt: FixItHint::CreateReplacement(
1970 RemoveRange: SourceRange(AddAssignNode->getBeginLoc(), Node->getOperatorLoc()),
1971 Code: SS));
1972 if (NotParenExpr)
1973 Fixes.push_back(FixItHint::CreateInsertion(
1974 InsertionLoc: Offset->getEndLoc().getLocWithOffset(1), Code: ")"));
1975 return Fixes;
1976 }
1977 }
1978 return std::nullopt; // Not in the cases that we can handle for now, give up.
1979}
1980
1981std::optional<FixItList>
1982UPCPreIncrementGadget::getFixits(const FixitStrategy &S) const {
1983 DeclUseList DREs = getClaimedVarUseSites();
1984
1985 if (DREs.size() != 1)
1986 return std::nullopt; // In cases of `++Ptr` where `Ptr` is not a DRE, we
1987 // give up
1988 if (const VarDecl *VD = dyn_cast<VarDecl>(Val: DREs.front()->getDecl())) {
1989 if (S.lookup(VD) == FixitStrategy::Kind::Span) {
1990 FixItList Fixes;
1991 std::stringstream SS;
1992 const Stmt *PreIncNode = getBaseStmt();
1993 StringRef varName = VD->getName();
1994 const ASTContext &Ctx = VD->getASTContext();
1995
1996 // To transform UPC(++p) to UPC((p = p.subspan(1)).data()):
1997 SS << "(" << varName.data() << " = " << varName.data()
1998 << ".subspan(1)).data()";
1999 std::optional<SourceLocation> PreIncLocation =
2000 getEndCharLoc(Node: PreIncNode, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
2001 if (!PreIncLocation)
2002 return std::nullopt;
2003
2004 Fixes.push_back(Elt: FixItHint::CreateReplacement(
2005 RemoveRange: SourceRange(PreIncNode->getBeginLoc(), *PreIncLocation), Code: SS.str()));
2006 return Fixes;
2007 }
2008 }
2009 return std::nullopt; // Not in the cases that we can handle for now, give up.
2010}
2011
2012// For a non-null initializer `Init` of `T *` type, this function returns
2013// `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it
2014// to output stream.
2015// In many cases, this function cannot figure out the actual extent `S`. It
2016// then will use a place holder to replace `S` to ask users to fill `S` in. The
2017// initializer shall be used to initialize a variable of type `std::span<T>`.
2018//
2019// FIXME: Support multi-level pointers
2020//
2021// Parameters:
2022// `Init` a pointer to the initializer expression
2023// `Ctx` a reference to the ASTContext
2024static FixItList
2025FixVarInitializerWithSpan(const Expr *Init, ASTContext &Ctx,
2026 const StringRef UserFillPlaceHolder) {
2027 const SourceManager &SM = Ctx.getSourceManager();
2028 const LangOptions &LangOpts = Ctx.getLangOpts();
2029
2030 // If `Init` has a constant value that is (or equivalent to) a
2031 // NULL pointer, we use the default constructor to initialize the span
2032 // object, i.e., a `std:span` variable declaration with no initializer.
2033 // So the fix-it is just to remove the initializer.
2034 if (Init->isNullPointerConstant(Ctx,
2035 // FIXME: Why does this function not ask for `const ASTContext
2036 // &`? It should. Maybe worth an NFC patch later.
2037 NPC: Expr::NullPointerConstantValueDependence::
2038 NPC_ValueDependentIsNotNull)) {
2039 std::optional<SourceLocation> InitLocation =
2040 getEndCharLoc(Node: Init, SM, LangOpts);
2041 if (!InitLocation)
2042 return {};
2043
2044 SourceRange SR(Init->getBeginLoc(), *InitLocation);
2045
2046 return {FixItHint::CreateRemoval(RemoveRange: SR)};
2047 }
2048
2049 FixItList FixIts{};
2050 std::string ExtentText = UserFillPlaceHolder.data();
2051 StringRef One = "1";
2052
2053 // Insert `{` before `Init`:
2054 FixIts.push_back(FixItHint::CreateInsertion(InsertionLoc: Init->getBeginLoc(), Code: "{"));
2055 // Try to get the data extent. Break into different cases:
2056 if (auto CxxNew = dyn_cast<CXXNewExpr>(Val: Init->IgnoreImpCasts())) {
2057 // In cases `Init` is `new T[n]` and there is no explicit cast over
2058 // `Init`, we know that `Init` must evaluates to a pointer to `n` objects
2059 // of `T`. So the extent is `n` unless `n` has side effects. Similar but
2060 // simpler for the case where `Init` is `new T`.
2061 if (const Expr *Ext = CxxNew->getArraySize().value_or(u: nullptr)) {
2062 if (!Ext->HasSideEffects(Ctx)) {
2063 std::optional<StringRef> ExtentString = getExprText(E: Ext, SM, LangOpts);
2064 if (!ExtentString)
2065 return {};
2066 ExtentText = *ExtentString;
2067 }
2068 } else if (!CxxNew->isArray())
2069 // Although the initializer is not allocating a buffer, the pointer
2070 // variable could still be used in buffer access operations.
2071 ExtentText = One;
2072 } else if (const auto *CArrTy = Ctx.getAsConstantArrayType(
2073 T: Init->IgnoreImpCasts()->getType())) {
2074 // In cases `Init` is of an array type after stripping off implicit casts,
2075 // the extent is the array size. Note that if the array size is not a
2076 // constant, we cannot use it as the extent.
2077 ExtentText = getAPIntText(Val: CArrTy->getSize());
2078 } else {
2079 // In cases `Init` is of the form `&Var` after stripping of implicit
2080 // casts, where `&` is the built-in operator, the extent is 1.
2081 if (auto AddrOfExpr = dyn_cast<UnaryOperator>(Val: Init->IgnoreImpCasts()))
2082 if (AddrOfExpr->getOpcode() == UnaryOperatorKind::UO_AddrOf &&
2083 isa_and_present<DeclRefExpr>(Val: AddrOfExpr->getSubExpr()))
2084 ExtentText = One;
2085 // TODO: we can handle more cases, e.g., `&a[0]`, `&a`, `std::addressof`,
2086 // and explicit casting, etc. etc.
2087 }
2088
2089 SmallString<32> StrBuffer{};
2090 std::optional<SourceLocation> LocPassInit = getPastLoc(Node: Init, SM, LangOpts);
2091
2092 if (!LocPassInit)
2093 return {};
2094
2095 StrBuffer.append(RHS: ", ");
2096 StrBuffer.append(RHS: ExtentText);
2097 StrBuffer.append(RHS: "}");
2098 FixIts.push_back(Elt: FixItHint::CreateInsertion(InsertionLoc: *LocPassInit, Code: StrBuffer.str()));
2099 return FixIts;
2100}
2101
2102#ifndef NDEBUG
2103#define DEBUG_NOTE_DECL_FAIL(D, Msg) \
2104Handler.addDebugNoteForVar((D), (D)->getBeginLoc(), "failed to produce fixit for declaration '" + (D)->getNameAsString() + "'" + (Msg))
2105#else
2106#define DEBUG_NOTE_DECL_FAIL(D, Msg)
2107#endif
2108
2109// For the given variable declaration with a pointer-to-T type, returns the text
2110// `std::span<T>`. If it is unable to generate the text, returns
2111// `std::nullopt`.
2112static std::optional<std::string> createSpanTypeForVarDecl(const VarDecl *VD,
2113 const ASTContext &Ctx) {
2114 assert(VD->getType()->isPointerType());
2115
2116 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2117 std::optional<std::string> PteTyText = getPointeeTypeText(
2118 VD, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts(), QualifiersToAppend: &PteTyQualifiers);
2119
2120 if (!PteTyText)
2121 return std::nullopt;
2122
2123 std::string SpanTyText = "std::span<";
2124
2125 SpanTyText.append(str: *PteTyText);
2126 // Append qualifiers to span element type if any:
2127 if (PteTyQualifiers) {
2128 SpanTyText.append(s: " ");
2129 SpanTyText.append(str: PteTyQualifiers->getAsString());
2130 }
2131 SpanTyText.append(s: ">");
2132 return SpanTyText;
2133}
2134
2135// For a `VarDecl` of the form `T * var (= Init)?`, this
2136// function generates fix-its that
2137// 1) replace `T * var` with `std::span<T> var`; and
2138// 2) change `Init` accordingly to a span constructor, if it exists.
2139//
2140// FIXME: support Multi-level pointers
2141//
2142// Parameters:
2143// `D` a pointer the variable declaration node
2144// `Ctx` a reference to the ASTContext
2145// `UserFillPlaceHolder` the user-input placeholder text
2146// Returns:
2147// the non-empty fix-it list, if fix-its are successfuly generated; empty
2148// list otherwise.
2149static FixItList fixLocalVarDeclWithSpan(const VarDecl *D, ASTContext &Ctx,
2150 const StringRef UserFillPlaceHolder,
2151 UnsafeBufferUsageHandler &Handler) {
2152 if (hasUnsupportedSpecifiers(VD: D, SM: Ctx.getSourceManager()))
2153 return {};
2154
2155 FixItList FixIts{};
2156 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(VD: D, Ctx);
2157
2158 if (!SpanTyText) {
2159 DEBUG_NOTE_DECL_FAIL(D, " : failed to generate 'std::span' type");
2160 return {};
2161 }
2162
2163 // Will hold the text for `std::span<T> Ident`:
2164 std::stringstream SS;
2165
2166 SS << *SpanTyText;
2167 // Append qualifiers to the type of `D`, if any:
2168 if (D->getType().hasQualifiers())
2169 SS << " " << D->getType().getQualifiers().getAsString();
2170
2171 // The end of the range of the original source that will be replaced
2172 // by `std::span<T> ident`:
2173 SourceLocation EndLocForReplacement = D->getEndLoc();
2174 std::optional<StringRef> IdentText =
2175 getVarDeclIdentifierText(VD: D, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
2176
2177 if (!IdentText) {
2178 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
2179 return {};
2180 }
2181 // Fix the initializer if it exists:
2182 if (const Expr *Init = D->getInit()) {
2183 FixItList InitFixIts =
2184 FixVarInitializerWithSpan(Init, Ctx, UserFillPlaceHolder);
2185 if (InitFixIts.empty())
2186 return {};
2187 FixIts.insert(I: FixIts.end(), From: std::make_move_iterator(i: InitFixIts.begin()),
2188 To: std::make_move_iterator(i: InitFixIts.end()));
2189 // If the declaration has the form `T *ident = init`, we want to replace
2190 // `T *ident = ` with `std::span<T> ident`:
2191 EndLocForReplacement = Init->getBeginLoc().getLocWithOffset(-1);
2192 }
2193 SS << " " << IdentText->str();
2194 if (!EndLocForReplacement.isValid()) {
2195 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the end of the declaration");
2196 return {};
2197 }
2198 FixIts.push_back(Elt: FixItHint::CreateReplacement(
2199 RemoveRange: SourceRange(D->getBeginLoc(), EndLocForReplacement), Code: SS.str()));
2200 return FixIts;
2201}
2202
2203static bool hasConflictingOverload(const FunctionDecl *FD) {
2204 return !FD->getDeclContext()->lookup(FD->getDeclName()).isSingleResult();
2205}
2206
2207// For a `FunctionDecl`, whose `ParmVarDecl`s are being changed to have new
2208// types, this function produces fix-its to make the change self-contained. Let
2209// 'F' be the entity defined by the original `FunctionDecl` and "NewF" be the
2210// entity defined by the `FunctionDecl` after the change to the parameters.
2211// Fix-its produced by this function are
2212// 1. Add the `[[clang::unsafe_buffer_usage]]` attribute to each declaration
2213// of 'F';
2214// 2. Create a declaration of "NewF" next to each declaration of `F`;
2215// 3. Create a definition of "F" (as its' original definition is now belongs
2216// to "NewF") next to its original definition. The body of the creating
2217// definition calls to "NewF".
2218//
2219// Example:
2220//
2221// void f(int *p); // original declaration
2222// void f(int *p) { // original definition
2223// p[5];
2224// }
2225//
2226// To change the parameter `p` to be of `std::span<int>` type, we
2227// also add overloads:
2228//
2229// [[clang::unsafe_buffer_usage]] void f(int *p); // original decl
2230// void f(std::span<int> p); // added overload decl
2231// void f(std::span<int> p) { // original def where param is changed
2232// p[5];
2233// }
2234// [[clang::unsafe_buffer_usage]] void f(int *p) { // added def
2235// return f(std::span(p, <# size #>));
2236// }
2237//
2238static std::optional<FixItList>
2239createOverloadsForFixedParams(const FixitStrategy &S, const FunctionDecl *FD,
2240 const ASTContext &Ctx,
2241 UnsafeBufferUsageHandler &Handler) {
2242 // FIXME: need to make this conflict checking better:
2243 if (hasConflictingOverload(FD))
2244 return std::nullopt;
2245
2246 const SourceManager &SM = Ctx.getSourceManager();
2247 const LangOptions &LangOpts = Ctx.getLangOpts();
2248 const unsigned NumParms = FD->getNumParams();
2249 std::vector<std::string> NewTysTexts(NumParms);
2250 std::vector<bool> ParmsMask(NumParms, false);
2251 bool AtLeastOneParmToFix = false;
2252
2253 for (unsigned i = 0; i < NumParms; i++) {
2254 const ParmVarDecl *PVD = FD->getParamDecl(i);
2255
2256 if (S.lookup(PVD) == FixitStrategy::Kind::Wontfix)
2257 continue;
2258 if (S.lookup(PVD) != FixitStrategy::Kind::Span)
2259 // Not supported, not suppose to happen:
2260 return std::nullopt;
2261
2262 std::optional<Qualifiers> PteTyQuals = std::nullopt;
2263 std::optional<std::string> PteTyText =
2264 getPointeeTypeText(PVD, SM, LangOpts, &PteTyQuals);
2265
2266 if (!PteTyText)
2267 // something wrong in obtaining the text of the pointee type, give up
2268 return std::nullopt;
2269 // FIXME: whether we should create std::span type depends on the
2270 // FixitStrategy.
2271 NewTysTexts[i] = getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQuals);
2272 ParmsMask[i] = true;
2273 AtLeastOneParmToFix = true;
2274 }
2275 if (!AtLeastOneParmToFix)
2276 // No need to create function overloads:
2277 return {};
2278 // FIXME Respect indentation of the original code.
2279
2280 // A lambda that creates the text representation of a function declaration
2281 // with the new type signatures:
2282 const auto NewOverloadSignatureCreator =
2283 [&SM, &LangOpts, &NewTysTexts,
2284 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2285 std::stringstream SS;
2286
2287 SS << ";";
2288 SS << getEndOfLine().str();
2289 // Append: ret-type func-name "("
2290 if (auto Prefix = getRangeText(
2291 SourceRange(FD->getBeginLoc(), (*FD->param_begin())->getBeginLoc()),
2292 SM, LangOpts))
2293 SS << Prefix->str();
2294 else
2295 return std::nullopt; // give up
2296 // Append: parameter-type-list
2297 const unsigned NumParms = FD->getNumParams();
2298
2299 for (unsigned i = 0; i < NumParms; i++) {
2300 const ParmVarDecl *Parm = FD->getParamDecl(i);
2301
2302 if (Parm->isImplicit())
2303 continue;
2304 if (ParmsMask[i]) {
2305 // This `i`-th parameter will be fixed with `NewTysTexts[i]` being its
2306 // new type:
2307 SS << NewTysTexts[i];
2308 // print parameter name if provided:
2309 if (IdentifierInfo *II = Parm->getIdentifier())
2310 SS << ' ' << II->getName().str();
2311 } else if (auto ParmTypeText = getRangeText(
2312 getSourceRangeToTokenEnd(Parm, SM, LangOpts),
2313 SM, LangOpts)) {
2314 // print the whole `Parm` without modification:
2315 SS << ParmTypeText->str();
2316 } else
2317 return std::nullopt; // something wrong, give up
2318 if (i != NumParms - 1)
2319 SS << ", ";
2320 }
2321 SS << ")";
2322 return SS.str();
2323 };
2324
2325 // A lambda that creates the text representation of a function definition with
2326 // the original signature:
2327 const auto OldOverloadDefCreator =
2328 [&Handler, &SM, &LangOpts, &NewTysTexts,
2329 &ParmsMask](const FunctionDecl *FD) -> std::optional<std::string> {
2330 std::stringstream SS;
2331
2332 SS << getEndOfLine().str();
2333 // Append: attr-name ret-type func-name "(" param-list ")" "{"
2334 if (auto FDPrefix = getRangeText(
2335 SourceRange(FD->getBeginLoc(), FD->getBody()->getBeginLoc()), SM,
2336 LangOpts))
2337 SS << Handler.getUnsafeBufferUsageAttributeTextAt(Loc: FD->getBeginLoc(), WSSuffix: " ")
2338 << FDPrefix->str() << "{";
2339 else
2340 return std::nullopt;
2341 // Append: "return" func-name "("
2342 if (auto FunQualName = getFunNameText(FD, SM, LangOpts))
2343 SS << "return " << FunQualName->str() << "(";
2344 else
2345 return std::nullopt;
2346
2347 // Append: arg-list
2348 const unsigned NumParms = FD->getNumParams();
2349 for (unsigned i = 0; i < NumParms; i++) {
2350 const ParmVarDecl *Parm = FD->getParamDecl(i);
2351
2352 if (Parm->isImplicit())
2353 continue;
2354 // FIXME: If a parameter has no name, it is unused in the
2355 // definition. So we could just leave it as it is.
2356 if (!Parm->getIdentifier())
2357 // If a parameter of a function definition has no name:
2358 return std::nullopt;
2359 if (ParmsMask[i])
2360 // This is our spanified paramter!
2361 SS << NewTysTexts[i] << "(" << Parm->getIdentifier()->getName().str()
2362 << ", " << getUserFillPlaceHolder(HintTextToUser: "size") << ")";
2363 else
2364 SS << Parm->getIdentifier()->getName().str();
2365 if (i != NumParms - 1)
2366 SS << ", ";
2367 }
2368 // finish call and the body
2369 SS << ");}" << getEndOfLine().str();
2370 // FIXME: 80-char line formatting?
2371 return SS.str();
2372 };
2373
2374 FixItList FixIts{};
2375 for (FunctionDecl *FReDecl : FD->redecls()) {
2376 std::optional<SourceLocation> Loc = getPastLoc(FReDecl, SM, LangOpts);
2377
2378 if (!Loc)
2379 return {};
2380 if (FReDecl->isThisDeclarationADefinition()) {
2381 assert(FReDecl == FD && "inconsistent function definition");
2382 // Inserts a definition with the old signature to the end of
2383 // `FReDecl`:
2384 if (auto OldOverloadDef = OldOverloadDefCreator(FReDecl))
2385 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *OldOverloadDef));
2386 else
2387 return {}; // give up
2388 } else {
2389 // Adds the unsafe-buffer attribute (if not already there) to `FReDecl`:
2390 if (!FReDecl->hasAttr<UnsafeBufferUsageAttr>()) {
2391 FixIts.emplace_back(FixItHint::CreateInsertion(
2392 FReDecl->getBeginLoc(), Handler.getUnsafeBufferUsageAttributeTextAt(
2393 FReDecl->getBeginLoc(), " ")));
2394 }
2395 // Inserts a declaration with the new signature to the end of `FReDecl`:
2396 if (auto NewOverloadDecl = NewOverloadSignatureCreator(FReDecl))
2397 FixIts.emplace_back(FixItHint::CreateInsertion(*Loc, *NewOverloadDecl));
2398 else
2399 return {};
2400 }
2401 }
2402 return FixIts;
2403}
2404
2405// To fix a `ParmVarDecl` to be of `std::span` type.
2406static FixItList fixParamWithSpan(const ParmVarDecl *PVD, const ASTContext &Ctx,
2407 UnsafeBufferUsageHandler &Handler) {
2408 if (hasUnsupportedSpecifiers(PVD, Ctx.getSourceManager())) {
2409 DEBUG_NOTE_DECL_FAIL(PVD, " : has unsupport specifier(s)");
2410 return {};
2411 }
2412 if (PVD->hasDefaultArg()) {
2413 // FIXME: generate fix-its for default values:
2414 DEBUG_NOTE_DECL_FAIL(PVD, " : has default arg");
2415 return {};
2416 }
2417
2418 std::optional<Qualifiers> PteTyQualifiers = std::nullopt;
2419 std::optional<std::string> PteTyText = getPointeeTypeText(
2420 PVD, Ctx.getSourceManager(), Ctx.getLangOpts(), &PteTyQualifiers);
2421
2422 if (!PteTyText) {
2423 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid pointee type");
2424 return {};
2425 }
2426
2427 std::optional<StringRef> PVDNameText = PVD->getIdentifier()->getName();
2428
2429 if (!PVDNameText) {
2430 DEBUG_NOTE_DECL_FAIL(PVD, " : invalid identifier name");
2431 return {};
2432 }
2433
2434 std::stringstream SS;
2435 std::optional<std::string> SpanTyText = createSpanTypeForVarDecl(PVD, Ctx);
2436
2437 if (PteTyQualifiers)
2438 // Append qualifiers if they exist:
2439 SS << getSpanTypeText(EltTyText: *PteTyText, Quals: PteTyQualifiers);
2440 else
2441 SS << getSpanTypeText(EltTyText: *PteTyText);
2442 // Append qualifiers to the type of the parameter:
2443 if (PVD->getType().hasQualifiers())
2444 SS << ' ' << PVD->getType().getQualifiers().getAsString();
2445 // Append parameter's name:
2446 SS << ' ' << PVDNameText->str();
2447 // Add replacement fix-it:
2448 return {FixItHint::CreateReplacement(RemoveRange: PVD->getSourceRange(), Code: SS.str())};
2449}
2450
2451static FixItList fixVariableWithSpan(const VarDecl *VD,
2452 const DeclUseTracker &Tracker,
2453 ASTContext &Ctx,
2454 UnsafeBufferUsageHandler &Handler) {
2455 const DeclStmt *DS = Tracker.lookupDecl(VD);
2456 if (!DS) {
2457 DEBUG_NOTE_DECL_FAIL(VD, " : variables declared this way not implemented yet");
2458 return {};
2459 }
2460 if (!DS->isSingleDecl()) {
2461 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2462 DEBUG_NOTE_DECL_FAIL(VD, " : multiple VarDecls");
2463 return {};
2464 }
2465 // Currently DS is an unused variable but we'll need it when
2466 // non-single decls are implemented, where the pointee type name
2467 // and the '*' are spread around the place.
2468 (void)DS;
2469
2470 // FIXME: handle cases where DS has multiple declarations
2471 return fixLocalVarDeclWithSpan(D: VD, Ctx, UserFillPlaceHolder: getUserFillPlaceHolder(), Handler);
2472}
2473
2474static FixItList fixVarDeclWithArray(const VarDecl *D, const ASTContext &Ctx,
2475 UnsafeBufferUsageHandler &Handler) {
2476 FixItList FixIts{};
2477
2478 // Note: the code below expects the declaration to not use any type sugar like
2479 // typedef.
2480 if (auto CAT = dyn_cast<clang::ConstantArrayType>(D->getType())) {
2481 const QualType &ArrayEltT = CAT->getElementType();
2482 assert(!ArrayEltT.isNull() && "Trying to fix a non-array type variable!");
2483 // FIXME: support multi-dimensional arrays
2484 if (isa<clang::ArrayType>(Val: ArrayEltT.getCanonicalType()))
2485 return {};
2486
2487 const SourceLocation IdentifierLoc = getVarDeclIdentifierLoc(VD: D);
2488
2489 // Get the spelling of the element type as written in the source file
2490 // (including macros, etc.).
2491 auto MaybeElemTypeTxt =
2492 getRangeText({D->getBeginLoc(), IdentifierLoc}, Ctx.getSourceManager(),
2493 Ctx.getLangOpts());
2494 if (!MaybeElemTypeTxt)
2495 return {};
2496 const llvm::StringRef ElemTypeTxt = MaybeElemTypeTxt->trim();
2497
2498 // Find the '[' token.
2499 std::optional<Token> NextTok = Lexer::findNextToken(
2500 Loc: IdentifierLoc, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
2501 while (NextTok && !NextTok->is(K: tok::l_square) &&
2502 NextTok->getLocation() <= D->getSourceRange().getEnd())
2503 NextTok = Lexer::findNextToken(Loc: NextTok->getLocation(),
2504 SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
2505 if (!NextTok)
2506 return {};
2507 const SourceLocation LSqBracketLoc = NextTok->getLocation();
2508
2509 // Get the spelling of the array size as written in the source file
2510 // (including macros, etc.).
2511 auto MaybeArraySizeTxt = getRangeText(
2512 {LSqBracketLoc.getLocWithOffset(Offset: 1), D->getTypeSpecEndLoc()},
2513 Ctx.getSourceManager(), Ctx.getLangOpts());
2514 if (!MaybeArraySizeTxt)
2515 return {};
2516 const llvm::StringRef ArraySizeTxt = MaybeArraySizeTxt->trim();
2517 if (ArraySizeTxt.empty()) {
2518 // FIXME: Support array size getting determined from the initializer.
2519 // Examples:
2520 // int arr1[] = {0, 1, 2};
2521 // int arr2{3, 4, 5};
2522 // We might be able to preserve the non-specified size with `auto` and
2523 // `std::to_array`:
2524 // auto arr1 = std::to_array<int>({0, 1, 2});
2525 return {};
2526 }
2527
2528 std::optional<StringRef> IdentText =
2529 getVarDeclIdentifierText(VD: D, SM: Ctx.getSourceManager(), LangOpts: Ctx.getLangOpts());
2530
2531 if (!IdentText) {
2532 DEBUG_NOTE_DECL_FAIL(D, " : failed to locate the identifier");
2533 return {};
2534 }
2535
2536 SmallString<32> Replacement;
2537 raw_svector_ostream OS(Replacement);
2538 OS << "std::array<" << ElemTypeTxt << ", " << ArraySizeTxt << "> "
2539 << IdentText->str();
2540
2541 FixIts.push_back(Elt: FixItHint::CreateReplacement(
2542 RemoveRange: SourceRange{D->getBeginLoc(), D->getTypeSpecEndLoc()}, Code: OS.str()));
2543 }
2544
2545 return FixIts;
2546}
2547
2548static FixItList fixVariableWithArray(const VarDecl *VD,
2549 const DeclUseTracker &Tracker,
2550 const ASTContext &Ctx,
2551 UnsafeBufferUsageHandler &Handler) {
2552 const DeclStmt *DS = Tracker.lookupDecl(VD);
2553 assert(DS && "Fixing non-local variables not implemented yet!");
2554 if (!DS->isSingleDecl()) {
2555 // FIXME: to support handling multiple `VarDecl`s in a single `DeclStmt`
2556 return {};
2557 }
2558 // Currently DS is an unused variable but we'll need it when
2559 // non-single decls are implemented, where the pointee type name
2560 // and the '*' are spread around the place.
2561 (void)DS;
2562
2563 // FIXME: handle cases where DS has multiple declarations
2564 return fixVarDeclWithArray(D: VD, Ctx, Handler);
2565}
2566
2567// TODO: we should be consistent to use `std::nullopt` to represent no-fix due
2568// to any unexpected problem.
2569static FixItList
2570fixVariable(const VarDecl *VD, FixitStrategy::Kind K,
2571 /* The function decl under analysis */ const Decl *D,
2572 const DeclUseTracker &Tracker, ASTContext &Ctx,
2573 UnsafeBufferUsageHandler &Handler) {
2574 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD)) {
2575 auto *FD = dyn_cast<clang::FunctionDecl>(PVD->getDeclContext());
2576 if (!FD || FD != D) {
2577 // `FD != D` means that `PVD` belongs to a function that is not being
2578 // analyzed currently. Thus `FD` may not be complete.
2579 DEBUG_NOTE_DECL_FAIL(VD, " : function not currently analyzed");
2580 return {};
2581 }
2582
2583 // TODO If function has a try block we can't change params unless we check
2584 // also its catch block for their use.
2585 // FIXME We might support static class methods, some select methods,
2586 // operators and possibly lamdas.
2587 if (FD->isMain() || FD->isConstexpr() ||
2588 FD->getTemplatedKind() != FunctionDecl::TemplatedKind::TK_NonTemplate ||
2589 FD->isVariadic() ||
2590 // also covers call-operator of lamdas
2591 isa<CXXMethodDecl>(FD) ||
2592 // skip when the function body is a try-block
2593 (FD->hasBody() && isa<CXXTryStmt>(FD->getBody())) ||
2594 FD->isOverloadedOperator()) {
2595 DEBUG_NOTE_DECL_FAIL(VD, " : unsupported function decl");
2596 return {}; // TODO test all these cases
2597 }
2598 }
2599
2600 switch (K) {
2601 case FixitStrategy::Kind::Span: {
2602 if (VD->getType()->isPointerType()) {
2603 if (const auto *PVD = dyn_cast<ParmVarDecl>(Val: VD))
2604 return fixParamWithSpan(PVD, Ctx, Handler);
2605
2606 if (VD->isLocalVarDecl())
2607 return fixVariableWithSpan(VD, Tracker, Ctx, Handler);
2608 }
2609 DEBUG_NOTE_DECL_FAIL(VD, " : not a pointer");
2610 return {};
2611 }
2612 case FixitStrategy::Kind::Array: {
2613 if (VD->isLocalVarDecl() &&
2614 isa<clang::ConstantArrayType>(VD->getType().getCanonicalType()))
2615 return fixVariableWithArray(VD, Tracker, Ctx, Handler);
2616
2617 DEBUG_NOTE_DECL_FAIL(VD, " : not a local const-size array");
2618 return {};
2619 }
2620 case FixitStrategy::Kind::Iterator:
2621 case FixitStrategy::Kind::Vector:
2622 llvm_unreachable("FixitStrategy not implemented yet!");
2623 case FixitStrategy::Kind::Wontfix:
2624 llvm_unreachable("Invalid strategy!");
2625 }
2626 llvm_unreachable("Unknown strategy!");
2627}
2628
2629// Returns true iff there exists a `FixItHint` 'h' in `FixIts` such that the
2630// `RemoveRange` of 'h' overlaps with a macro use.
2631static bool overlapWithMacro(const FixItList &FixIts) {
2632 // FIXME: For now we only check if the range (or the first token) is (part of)
2633 // a macro expansion. Ideally, we want to check for all tokens in the range.
2634 return llvm::any_of(Range: FixIts, P: [](const FixItHint &Hint) {
2635 auto Range = Hint.RemoveRange;
2636 if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
2637 // If the range (or the first token) is (part of) a macro expansion:
2638 return true;
2639 return false;
2640 });
2641}
2642
2643// Returns true iff `VD` is a parameter of the declaration `D`:
2644static bool isParameterOf(const VarDecl *VD, const Decl *D) {
2645 return isa<ParmVarDecl>(Val: VD) &&
2646 VD->getDeclContext() == dyn_cast<DeclContext>(Val: D);
2647}
2648
2649// Erases variables in `FixItsForVariable`, if such a variable has an unfixable
2650// group mate. A variable `v` is unfixable iff `FixItsForVariable` does not
2651// contain `v`.
2652static void eraseVarsForUnfixableGroupMates(
2653 std::map<const VarDecl *, FixItList> &FixItsForVariable,
2654 const VariableGroupsManager &VarGrpMgr) {
2655 // Variables will be removed from `FixItsForVariable`:
2656 SmallVector<const VarDecl *, 8> ToErase;
2657
2658 for (const auto &[VD, Ignore] : FixItsForVariable) {
2659 VarGrpRef Grp = VarGrpMgr.getGroupOfVar(Var: VD);
2660 if (llvm::any_of(Range&: Grp,
2661 P: [&FixItsForVariable](const VarDecl *GrpMember) -> bool {
2662 return !FixItsForVariable.count(x: GrpMember);
2663 })) {
2664 // At least one group member cannot be fixed, so we have to erase the
2665 // whole group:
2666 for (const VarDecl *Member : Grp)
2667 ToErase.push_back(Elt: Member);
2668 }
2669 }
2670 for (auto *VarToErase : ToErase)
2671 FixItsForVariable.erase(x: VarToErase);
2672}
2673
2674// Returns the fix-its that create bounds-safe function overloads for the
2675// function `D`, if `D`'s parameters will be changed to safe-types through
2676// fix-its in `FixItsForVariable`.
2677//
2678// NOTE: In case `D`'s parameters will be changed but bounds-safe function
2679// overloads cannot created, the whole group that contains the parameters will
2680// be erased from `FixItsForVariable`.
2681static FixItList createFunctionOverloadsForParms(
2682 std::map<const VarDecl *, FixItList> &FixItsForVariable /* mutable */,
2683 const VariableGroupsManager &VarGrpMgr, const FunctionDecl *FD,
2684 const FixitStrategy &S, ASTContext &Ctx,
2685 UnsafeBufferUsageHandler &Handler) {
2686 FixItList FixItsSharedByParms{};
2687
2688 std::optional<FixItList> OverloadFixes =
2689 createOverloadsForFixedParams(S, FD, Ctx, Handler);
2690
2691 if (OverloadFixes) {
2692 FixItsSharedByParms.append(RHS: *OverloadFixes);
2693 } else {
2694 // Something wrong in generating `OverloadFixes`, need to remove the
2695 // whole group, where parameters are in, from `FixItsForVariable` (Note
2696 // that all parameters should be in the same group):
2697 for (auto *Member : VarGrpMgr.getGroupOfParms())
2698 FixItsForVariable.erase(x: Member);
2699 }
2700 return FixItsSharedByParms;
2701}
2702
2703// Constructs self-contained fix-its for each variable in `FixablesForAllVars`.
2704static std::map<const VarDecl *, FixItList>
2705getFixIts(FixableGadgetSets &FixablesForAllVars, const FixitStrategy &S,
2706 ASTContext &Ctx,
2707 /* The function decl under analysis */ const Decl *D,
2708 const DeclUseTracker &Tracker, UnsafeBufferUsageHandler &Handler,
2709 const VariableGroupsManager &VarGrpMgr) {
2710 // `FixItsForVariable` will map each variable to a set of fix-its directly
2711 // associated to the variable itself. Fix-its of distinct variables in
2712 // `FixItsForVariable` are disjoint.
2713 std::map<const VarDecl *, FixItList> FixItsForVariable;
2714
2715 // Populate `FixItsForVariable` with fix-its directly associated with each
2716 // variable. Fix-its directly associated to a variable 'v' are the ones
2717 // produced by the `FixableGadget`s whose claimed variable is 'v'.
2718 for (const auto &[VD, Fixables] : FixablesForAllVars.byVar) {
2719 FixItsForVariable[VD] =
2720 fixVariable(VD, K: S.lookup(VD), D, Tracker, Ctx, Handler);
2721 // If we fail to produce Fix-It for the declaration we have to skip the
2722 // variable entirely.
2723 if (FixItsForVariable[VD].empty()) {
2724 FixItsForVariable.erase(x: VD);
2725 continue;
2726 }
2727 for (const auto &F : Fixables) {
2728 std::optional<FixItList> Fixits = F->getFixits(S);
2729
2730 if (Fixits) {
2731 FixItsForVariable[VD].insert(I: FixItsForVariable[VD].end(),
2732 From: Fixits->begin(), To: Fixits->end());
2733 continue;
2734 }
2735#ifndef NDEBUG
2736 Handler.addDebugNoteForVar(
2737 VD, Loc: F->getBaseStmt()->getBeginLoc(),
2738 Text: ("gadget '" + F->getDebugName() + "' refused to produce a fix")
2739 .str());
2740#endif
2741 FixItsForVariable.erase(x: VD);
2742 break;
2743 }
2744 }
2745
2746 // `FixItsForVariable` now contains only variables that can be
2747 // fixed. A variable can be fixed if its' declaration and all Fixables
2748 // associated to it can all be fixed.
2749
2750 // To further remove from `FixItsForVariable` variables whose group mates
2751 // cannot be fixed...
2752 eraseVarsForUnfixableGroupMates(FixItsForVariable, VarGrpMgr);
2753 // Now `FixItsForVariable` gets further reduced: a variable is in
2754 // `FixItsForVariable` iff it can be fixed and all its group mates can be
2755 // fixed.
2756
2757 // Fix-its of bounds-safe overloads of `D` are shared by parameters of `D`.
2758 // That is, when fixing multiple parameters in one step, these fix-its will
2759 // be applied only once (instead of being applied per parameter).
2760 FixItList FixItsSharedByParms{};
2761
2762 if (auto *FD = dyn_cast<FunctionDecl>(Val: D))
2763 FixItsSharedByParms = createFunctionOverloadsForParms(
2764 FixItsForVariable, VarGrpMgr, FD, S, Ctx, Handler);
2765
2766 // The map that maps each variable `v` to fix-its for the whole group where
2767 // `v` is in:
2768 std::map<const VarDecl *, FixItList> FinalFixItsForVariable{
2769 FixItsForVariable};
2770
2771 for (auto &[Var, Ignore] : FixItsForVariable) {
2772 bool AnyParm = false;
2773 const auto VarGroupForVD = VarGrpMgr.getGroupOfVar(Var, HasParm: &AnyParm);
2774
2775 for (const VarDecl *GrpMate : VarGroupForVD) {
2776 if (Var == GrpMate)
2777 continue;
2778 if (FixItsForVariable.count(x: GrpMate))
2779 FinalFixItsForVariable[Var].append(RHS: FixItsForVariable[GrpMate]);
2780 }
2781 if (AnyParm) {
2782 // This assertion should never fail. Otherwise we have a bug.
2783 assert(!FixItsSharedByParms.empty() &&
2784 "Should not try to fix a parameter that does not belong to a "
2785 "FunctionDecl");
2786 FinalFixItsForVariable[Var].append(RHS: FixItsSharedByParms);
2787 }
2788 }
2789 // Fix-its that will be applied in one step shall NOT:
2790 // 1. overlap with macros or/and templates; or
2791 // 2. conflict with each other.
2792 // Otherwise, the fix-its will be dropped.
2793 for (auto Iter = FinalFixItsForVariable.begin();
2794 Iter != FinalFixItsForVariable.end();)
2795 if (overlapWithMacro(FixIts: Iter->second) ||
2796 clang::internal::anyConflict(FixIts: Iter->second, SM: Ctx.getSourceManager())) {
2797 Iter = FinalFixItsForVariable.erase(position: Iter);
2798 } else
2799 Iter++;
2800 return FinalFixItsForVariable;
2801}
2802
2803template <typename VarDeclIterTy>
2804static FixitStrategy
2805getNaiveStrategy(llvm::iterator_range<VarDeclIterTy> UnsafeVars) {
2806 FixitStrategy S;
2807 for (const VarDecl *VD : UnsafeVars) {
2808 if (isa<ConstantArrayType>(VD->getType().getCanonicalType()))
2809 S.set(VD, K: FixitStrategy::Kind::Array);
2810 else
2811 S.set(VD, K: FixitStrategy::Kind::Span);
2812 }
2813 return S;
2814}
2815
2816// Manages variable groups:
2817class VariableGroupsManagerImpl : public VariableGroupsManager {
2818 const std::vector<VarGrpTy> Groups;
2819 const std::map<const VarDecl *, unsigned> &VarGrpMap;
2820 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms;
2821
2822public:
2823 VariableGroupsManagerImpl(
2824 const std::vector<VarGrpTy> &Groups,
2825 const std::map<const VarDecl *, unsigned> &VarGrpMap,
2826 const llvm::SetVector<const VarDecl *> &GrpsUnionForParms)
2827 : Groups(Groups), VarGrpMap(VarGrpMap),
2828 GrpsUnionForParms(GrpsUnionForParms) {}
2829
2830 VarGrpRef getGroupOfVar(const VarDecl *Var, bool *HasParm) const override {
2831 if (GrpsUnionForParms.contains(key: Var)) {
2832 if (HasParm)
2833 *HasParm = true;
2834 return GrpsUnionForParms.getArrayRef();
2835 }
2836 if (HasParm)
2837 *HasParm = false;
2838
2839 auto It = VarGrpMap.find(x: Var);
2840
2841 if (It == VarGrpMap.end())
2842 return std::nullopt;
2843 return Groups[It->second];
2844 }
2845
2846 VarGrpRef getGroupOfParms() const override {
2847 return GrpsUnionForParms.getArrayRef();
2848 }
2849};
2850
2851void clang::checkUnsafeBufferUsage(const Decl *D,
2852 UnsafeBufferUsageHandler &Handler,
2853 bool EmitSuggestions) {
2854#ifndef NDEBUG
2855 Handler.clearDebugNotes();
2856#endif
2857
2858 assert(D && D->getBody());
2859 // We do not want to visit a Lambda expression defined inside a method independently.
2860 // Instead, it should be visited along with the outer method.
2861 // FIXME: do we want to do the same thing for `BlockDecl`s?
2862 if (const auto *fd = dyn_cast<CXXMethodDecl>(Val: D)) {
2863 if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass())
2864 return;
2865 }
2866
2867 // Do not emit fixit suggestions for functions declared in an
2868 // extern "C" block.
2869 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D)) {
2870 for (FunctionDecl *FReDecl : FD->redecls()) {
2871 if (FReDecl->isExternC()) {
2872 EmitSuggestions = false;
2873 break;
2874 }
2875 }
2876 }
2877
2878 WarningGadgetSets UnsafeOps;
2879 FixableGadgetSets FixablesForAllVars;
2880
2881 auto [FixableGadgets, WarningGadgets, Tracker] =
2882 findGadgets(D, Handler, EmitSuggestions);
2883
2884 if (!EmitSuggestions) {
2885 // Our job is very easy without suggestions. Just warn about
2886 // every problematic operation and consider it done. No need to deal
2887 // with fixable gadgets, no need to group operations by variable.
2888 for (const auto &G : WarningGadgets) {
2889 Handler.handleUnsafeOperation(Operation: G->getBaseStmt(), /*IsRelatedToDecl=*/false,
2890 Ctx&: D->getASTContext());
2891 }
2892
2893 // This return guarantees that most of the machine doesn't run when
2894 // suggestions aren't requested.
2895 assert(FixableGadgets.size() == 0 &&
2896 "Fixable gadgets found but suggestions not requested!");
2897 return;
2898 }
2899
2900 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2901 // function under the analysis. No need to fix any Fixables.
2902 if (!WarningGadgets.empty()) {
2903 // Gadgets "claim" variables they're responsible for. Once this loop
2904 // finishes, the tracker will only track DREs that weren't claimed by any
2905 // gadgets, i.e. not understood by the analysis.
2906 for (const auto &G : FixableGadgets) {
2907 for (const auto *DRE : G->getClaimedVarUseSites()) {
2908 Tracker.claimUse(DRE);
2909 }
2910 }
2911 }
2912
2913 // If no `WarningGadget`s ever matched, there is no unsafe operations in the
2914 // function under the analysis. Thus, it early returns here as there is
2915 // nothing needs to be fixed.
2916 //
2917 // Note this claim is based on the assumption that there is no unsafe
2918 // variable whose declaration is invisible from the analyzing function.
2919 // Otherwise, we need to consider if the uses of those unsafe varuables needs
2920 // fix.
2921 // So far, we are not fixing any global variables or class members. And,
2922 // lambdas will be analyzed along with the enclosing function. So this early
2923 // return is correct for now.
2924 if (WarningGadgets.empty())
2925 return;
2926
2927 UnsafeOps = groupWarningGadgetsByVar(AllUnsafeOperations: std::move(WarningGadgets));
2928 FixablesForAllVars = groupFixablesByVar(AllFixableOperations: std::move(FixableGadgets));
2929
2930 std::map<const VarDecl *, FixItList> FixItsForVariableGroup;
2931
2932 // Filter out non-local vars and vars with unclaimed DeclRefExpr-s.
2933 for (auto it = FixablesForAllVars.byVar.cbegin();
2934 it != FixablesForAllVars.byVar.cend();) {
2935 // FIXME: need to deal with global variables later
2936 if ((!it->first->isLocalVarDecl() && !isa<ParmVarDecl>(Val: it->first))) {
2937#ifndef NDEBUG
2938 Handler.addDebugNoteForVar(
2939 VD: it->first, Loc: it->first->getBeginLoc(),
2940 Text: ("failed to produce fixit for '" + it->first->getNameAsString() +
2941 "' : neither local nor a parameter"));
2942#endif
2943 it = FixablesForAllVars.byVar.erase(position: it);
2944 } else if (it->first->getType().getCanonicalType()->isReferenceType()) {
2945#ifndef NDEBUG
2946 Handler.addDebugNoteForVar(VD: it->first, Loc: it->first->getBeginLoc(),
2947 Text: ("failed to produce fixit for '" +
2948 it->first->getNameAsString() +
2949 "' : has a reference type"));
2950#endif
2951 it = FixablesForAllVars.byVar.erase(position: it);
2952 } else if (Tracker.hasUnclaimedUses(VD: it->first)) {
2953 it = FixablesForAllVars.byVar.erase(position: it);
2954 } else if (it->first->isInitCapture()) {
2955#ifndef NDEBUG
2956 Handler.addDebugNoteForVar(
2957 VD: it->first, Loc: it->first->getBeginLoc(),
2958 Text: ("failed to produce fixit for '" + it->first->getNameAsString() +
2959 "' : init capture"));
2960#endif
2961 it = FixablesForAllVars.byVar.erase(position: it);
2962 } else {
2963 ++it;
2964 }
2965 }
2966
2967#ifndef NDEBUG
2968 for (const auto &it : UnsafeOps.byVar) {
2969 const VarDecl *const UnsafeVD = it.first;
2970 auto UnclaimedDREs = Tracker.getUnclaimedUses(VD: UnsafeVD);
2971 if (UnclaimedDREs.empty())
2972 continue;
2973 const auto UnfixedVDName = UnsafeVD->getNameAsString();
2974 for (const clang::DeclRefExpr *UnclaimedDRE : UnclaimedDREs) {
2975 std::string UnclaimedUseTrace =
2976 getDREAncestorString(DRE: UnclaimedDRE, Ctx&: D->getASTContext());
2977
2978 Handler.addDebugNoteForVar(
2979 VD: UnsafeVD, Loc: UnclaimedDRE->getBeginLoc(),
2980 Text: ("failed to produce fixit for '" + UnfixedVDName +
2981 "' : has an unclaimed use\nThe unclaimed DRE trace: " +
2982 UnclaimedUseTrace));
2983 }
2984 }
2985#endif
2986
2987 // Fixpoint iteration for pointer assignments
2988 using DepMapTy = DenseMap<const VarDecl *, llvm::SetVector<const VarDecl *>>;
2989 DepMapTy DependenciesMap{};
2990 DepMapTy PtrAssignmentGraph{};
2991
2992 for (auto it : FixablesForAllVars.byVar) {
2993 for (const FixableGadget *fixable : it.second) {
2994 std::optional<std::pair<const VarDecl *, const VarDecl *>> ImplPair =
2995 fixable->getStrategyImplications();
2996 if (ImplPair) {
2997 std::pair<const VarDecl *, const VarDecl *> Impl = std::move(*ImplPair);
2998 PtrAssignmentGraph[Impl.first].insert(X: Impl.second);
2999 }
3000 }
3001 }
3002
3003 /*
3004 The following code does a BFS traversal of the `PtrAssignmentGraph`
3005 considering all unsafe vars as starting nodes and constructs an undirected
3006 graph `DependenciesMap`. Constructing the `DependenciesMap` in this manner
3007 elimiates all variables that are unreachable from any unsafe var. In other
3008 words, this removes all dependencies that don't include any unsafe variable
3009 and consequently don't need any fixit generation.
3010 Note: A careful reader would observe that the code traverses
3011 `PtrAssignmentGraph` using `CurrentVar` but adds edges between `Var` and
3012 `Adj` and not between `CurrentVar` and `Adj`. Both approaches would
3013 achieve the same result but the one used here dramatically cuts the
3014 amount of hoops the second part of the algorithm needs to jump, given that
3015 a lot of these connections become "direct". The reader is advised not to
3016 imagine how the graph is transformed because of using `Var` instead of
3017 `CurrentVar`. The reader can continue reading as if `CurrentVar` was used,
3018 and think about why it's equivalent later.
3019 */
3020 std::set<const VarDecl *> VisitedVarsDirected{};
3021 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
3022 if (VisitedVarsDirected.find(x: Var) == VisitedVarsDirected.end()) {
3023
3024 std::queue<const VarDecl*> QueueDirected{};
3025 QueueDirected.push(x: Var);
3026 while(!QueueDirected.empty()) {
3027 const VarDecl* CurrentVar = QueueDirected.front();
3028 QueueDirected.pop();
3029 VisitedVarsDirected.insert(x: CurrentVar);
3030 auto AdjacentNodes = PtrAssignmentGraph[CurrentVar];
3031 for (const VarDecl *Adj : AdjacentNodes) {
3032 if (VisitedVarsDirected.find(x: Adj) == VisitedVarsDirected.end()) {
3033 QueueDirected.push(x: Adj);
3034 }
3035 DependenciesMap[Var].insert(X: Adj);
3036 DependenciesMap[Adj].insert(X: Var);
3037 }
3038 }
3039 }
3040 }
3041
3042 // `Groups` stores the set of Connected Components in the graph.
3043 std::vector<VarGrpTy> Groups;
3044 // `VarGrpMap` maps variables that need fix to the groups (indexes) that the
3045 // variables belong to. Group indexes refer to the elements in `Groups`.
3046 // `VarGrpMap` is complete in that every variable that needs fix is in it.
3047 std::map<const VarDecl *, unsigned> VarGrpMap;
3048 // The union group over the ones in "Groups" that contain parameters of `D`:
3049 llvm::SetVector<const VarDecl *>
3050 GrpsUnionForParms; // these variables need to be fixed in one step
3051
3052 // Group Connected Components for Unsafe Vars
3053 // (Dependencies based on pointer assignments)
3054 std::set<const VarDecl *> VisitedVars{};
3055 for (const auto &[Var, ignore] : UnsafeOps.byVar) {
3056 if (VisitedVars.find(x: Var) == VisitedVars.end()) {
3057 VarGrpTy &VarGroup = Groups.emplace_back();
3058 std::queue<const VarDecl*> Queue{};
3059
3060 Queue.push(x: Var);
3061 while(!Queue.empty()) {
3062 const VarDecl* CurrentVar = Queue.front();
3063 Queue.pop();
3064 VisitedVars.insert(x: CurrentVar);
3065 VarGroup.push_back(x: CurrentVar);
3066 auto AdjacentNodes = DependenciesMap[CurrentVar];
3067 for (const VarDecl *Adj : AdjacentNodes) {
3068 if (VisitedVars.find(x: Adj) == VisitedVars.end()) {
3069 Queue.push(x: Adj);
3070 }
3071 }
3072 }
3073
3074 bool HasParm = false;
3075 unsigned GrpIdx = Groups.size() - 1;
3076
3077 for (const VarDecl *V : VarGroup) {
3078 VarGrpMap[V] = GrpIdx;
3079 if (!HasParm && isParameterOf(VD: V, D))
3080 HasParm = true;
3081 }
3082 if (HasParm)
3083 GrpsUnionForParms.insert(Start: VarGroup.begin(), End: VarGroup.end());
3084 }
3085 }
3086
3087 // Remove a `FixableGadget` if the associated variable is not in the graph
3088 // computed above. We do not want to generate fix-its for such variables,
3089 // since they are neither warned nor reachable from a warned one.
3090 //
3091 // Note a variable is not warned if it is not directly used in any unsafe
3092 // operation. A variable `v` is NOT reachable from an unsafe variable, if it
3093 // does not exist another variable `u` such that `u` is warned and fixing `u`
3094 // (transitively) implicates fixing `v`.
3095 //
3096 // For example,
3097 // ```
3098 // void f(int * p) {
3099 // int * a = p; *p = 0;
3100 // }
3101 // ```
3102 // `*p = 0` is a fixable gadget associated with a variable `p` that is neither
3103 // warned nor reachable from a warned one. If we add `a[5] = 0` to the end of
3104 // the function above, `p` becomes reachable from a warned variable.
3105 for (auto I = FixablesForAllVars.byVar.begin();
3106 I != FixablesForAllVars.byVar.end();) {
3107 // Note `VisitedVars` contain all the variables in the graph:
3108 if (!VisitedVars.count(x: (*I).first)) {
3109 // no such var in graph:
3110 I = FixablesForAllVars.byVar.erase(position: I);
3111 } else
3112 ++I;
3113 }
3114
3115 // We assign strategies to variables that are 1) in the graph and 2) can be
3116 // fixed. Other variables have the default "Won't fix" strategy.
3117 FixitStrategy NaiveStrategy = getNaiveStrategy(UnsafeVars: llvm::make_filter_range(
3118 Range&: VisitedVars, Pred: [&FixablesForAllVars](const VarDecl *V) {
3119 // If a warned variable has no "Fixable", it is considered unfixable:
3120 return FixablesForAllVars.byVar.count(x: V);
3121 }));
3122 VariableGroupsManagerImpl VarGrpMgr(Groups, VarGrpMap, GrpsUnionForParms);
3123
3124 if (isa<NamedDecl>(Val: D))
3125 // The only case where `D` is not a `NamedDecl` is when `D` is a
3126 // `BlockDecl`. Let's not fix variables in blocks for now
3127 FixItsForVariableGroup =
3128 getFixIts(FixablesForAllVars, S: NaiveStrategy, Ctx&: D->getASTContext(), D,
3129 Tracker, Handler, VarGrpMgr);
3130
3131 for (const auto &G : UnsafeOps.noVar) {
3132 Handler.handleUnsafeOperation(Operation: G->getBaseStmt(), /*IsRelatedToDecl=*/false,
3133 Ctx&: D->getASTContext());
3134 }
3135
3136 for (const auto &[VD, WarningGadgets] : UnsafeOps.byVar) {
3137 auto FixItsIt = FixItsForVariableGroup.find(x: VD);
3138 Handler.handleUnsafeVariableGroup(Variable: VD, VarGrpMgr,
3139 Fixes: FixItsIt != FixItsForVariableGroup.end()
3140 ? std::move(FixItsIt->second)
3141 : FixItList{},
3142 D, VarTargetTypes: NaiveStrategy);
3143 for (const auto &G : WarningGadgets) {
3144 Handler.handleUnsafeOperation(Operation: G->getBaseStmt(), /*IsRelatedToDecl=*/true,
3145 Ctx&: D->getASTContext());
3146 }
3147 }
3148}
3149

source code of clang/lib/Analysis/UnsafeBufferUsage.cpp