| 1 | //===--- IsolateDeclarationCheck.cpp - clang-tidy -------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "IsolateDeclarationCheck.h" |
| 10 | #include "../utils/LexerUtils.h" |
| 11 | #include "clang/ASTMatchers/ASTMatchFinder.h" |
| 12 | #include <optional> |
| 13 | |
| 14 | using namespace clang::ast_matchers; |
| 15 | using namespace clang::tidy::utils::lexer; |
| 16 | |
| 17 | namespace clang::tidy::readability { |
| 18 | |
| 19 | namespace { |
| 20 | AST_MATCHER(DeclStmt, isSingleDecl) { return Node.isSingleDecl(); } |
| 21 | AST_MATCHER(DeclStmt, onlyDeclaresVariables) { |
| 22 | return llvm::all_of(Range: Node.decls(), P: [](Decl *D) { return isa<VarDecl>(Val: D); }); |
| 23 | } |
| 24 | } // namespace |
| 25 | |
| 26 | void IsolateDeclarationCheck::registerMatchers(MatchFinder *Finder) { |
| 27 | Finder->addMatcher(NodeMatch: declStmt(onlyDeclaresVariables(), unless(isSingleDecl()), |
| 28 | hasParent(compoundStmt())) |
| 29 | .bind(ID: "decl_stmt" ), |
| 30 | Action: this); |
| 31 | } |
| 32 | |
| 33 | static SourceLocation findStartOfIndirection(SourceLocation Start, |
| 34 | int Indirections, |
| 35 | const SourceManager &SM, |
| 36 | const LangOptions &LangOpts) { |
| 37 | assert(Indirections >= 0 && "Indirections must be non-negative" ); |
| 38 | if (Indirections == 0) |
| 39 | return Start; |
| 40 | |
| 41 | // Note that the post-fix decrement is necessary to perform the correct |
| 42 | // number of transformations. |
| 43 | while (Indirections-- != 0) { |
| 44 | Start = findPreviousAnyTokenKind(Start, SM, LangOpts, TK: tok::star, TKs: tok::amp); |
| 45 | if (Start.isInvalid() || Start.isMacroID()) |
| 46 | return {}; |
| 47 | } |
| 48 | return Start; |
| 49 | } |
| 50 | |
| 51 | static bool isMacroID(SourceRange R) { |
| 52 | return R.getBegin().isMacroID() || R.getEnd().isMacroID(); |
| 53 | } |
| 54 | |
| 55 | /// This function counts the number of written indirections for the given |
| 56 | /// Type \p T. It does \b NOT resolve typedefs as it's a helper for lexing |
| 57 | /// the source code. |
| 58 | /// \see declRanges |
| 59 | static int countIndirections(const Type *T, int Indirections = 0) { |
| 60 | if (T->isFunctionPointerType()) { |
| 61 | const auto *Pointee = T->getPointeeType()->castAs<FunctionType>(); |
| 62 | return countIndirections( |
| 63 | T: Pointee->getReturnType().IgnoreParens().getTypePtr(), Indirections: ++Indirections); |
| 64 | } |
| 65 | |
| 66 | // Note: Do not increment the 'Indirections' because it is not yet clear |
| 67 | // if there is an indirection added in the source code of the array |
| 68 | // declaration. |
| 69 | if (const auto *AT = dyn_cast<ArrayType>(Val: T)) |
| 70 | return countIndirections(T: AT->getElementType().IgnoreParens().getTypePtr(), |
| 71 | Indirections); |
| 72 | |
| 73 | if (isa<PointerType>(Val: T) || isa<ReferenceType>(Val: T)) |
| 74 | return countIndirections(T: T->getPointeeType().IgnoreParens().getTypePtr(), |
| 75 | Indirections: ++Indirections); |
| 76 | |
| 77 | return Indirections; |
| 78 | } |
| 79 | |
| 80 | static bool typeIsMemberPointer(const Type *T) { |
| 81 | if (isa<ArrayType>(Val: T)) |
| 82 | return typeIsMemberPointer(T: T->getArrayElementTypeNoTypeQual()); |
| 83 | |
| 84 | if ((isa<PointerType>(Val: T) || isa<ReferenceType>(Val: T)) && |
| 85 | isa<PointerType>(Val: T->getPointeeType())) |
| 86 | return typeIsMemberPointer(T: T->getPointeeType().getTypePtr()); |
| 87 | |
| 88 | return isa<MemberPointerType>(Val: T); |
| 89 | } |
| 90 | |
| 91 | /// This function tries to extract the SourceRanges that make up all |
| 92 | /// declarations in this \c DeclStmt. |
| 93 | /// |
| 94 | /// The resulting vector has the structure {UnderlyingType, Decl1, Decl2, ...}. |
| 95 | /// Each \c SourceRange is of the form [Begin, End). |
| 96 | /// If any of the create ranges is invalid or in a macro the result will be |
| 97 | /// \c None. |
| 98 | /// If the \c DeclStmt contains only one declaration, the result is \c None. |
| 99 | /// If the \c DeclStmt contains declarations other than \c VarDecl the result |
| 100 | /// is \c None. |
| 101 | /// |
| 102 | /// \code |
| 103 | /// int * ptr1 = nullptr, value = 42; |
| 104 | /// // [ ][ ] [ ] - The ranges here are inclusive |
| 105 | /// \endcode |
| 106 | /// \todo Generalize this function to take other declarations than \c VarDecl. |
| 107 | static std::optional<std::vector<SourceRange>> |
| 108 | declRanges(const DeclStmt *DS, const SourceManager &SM, |
| 109 | const LangOptions &LangOpts) { |
| 110 | std::size_t DeclCount = std::distance(first: DS->decl_begin(), last: DS->decl_end()); |
| 111 | if (DeclCount < 2) |
| 112 | return std::nullopt; |
| 113 | |
| 114 | if (rangeContainsExpansionsOrDirectives(Range: DS->getSourceRange(), SM, LangOpts)) |
| 115 | return std::nullopt; |
| 116 | |
| 117 | // The initial type of the declaration and each declaration has it's own |
| 118 | // slice. This is necessary, because pointers and references bind only |
| 119 | // to the local variable and not to all variables in the declaration. |
| 120 | // Example: 'int *pointer, value = 42;' |
| 121 | std::vector<SourceRange> Slices; |
| 122 | Slices.reserve(n: DeclCount + 1); |
| 123 | |
| 124 | // Calculate the first slice, for now only variables are handled but in the |
| 125 | // future this should be relaxed and support various kinds of declarations. |
| 126 | const auto *FirstDecl = dyn_cast<VarDecl>(Val: *DS->decl_begin()); |
| 127 | |
| 128 | if (FirstDecl == nullptr) |
| 129 | return std::nullopt; |
| 130 | |
| 131 | // FIXME: Member pointers are not transformed correctly right now, that's |
| 132 | // why they are treated as problematic here. |
| 133 | if (typeIsMemberPointer(FirstDecl->getType().IgnoreParens().getTypePtr())) |
| 134 | return std::nullopt; |
| 135 | |
| 136 | // Consider the following case: 'int * pointer, value = 42;' |
| 137 | // Created slices (inclusive) [ ][ ] [ ] |
| 138 | // Because 'getBeginLoc' points to the start of the variable *name*, the |
| 139 | // location of the pointer must be determined separately. |
| 140 | SourceLocation Start = findStartOfIndirection( |
| 141 | FirstDecl->getLocation(), |
| 142 | countIndirections(FirstDecl->getType().IgnoreParens().getTypePtr()), SM, |
| 143 | LangOpts); |
| 144 | |
| 145 | // Fix function-pointer declarations that have a '(' in front of the |
| 146 | // pointer. |
| 147 | // Example: 'void (*f2)(int), (*g2)(int, float) = gg;' |
| 148 | // Slices: [ ][ ] [ ] |
| 149 | if (FirstDecl->getType()->isFunctionPointerType()) |
| 150 | Start = findPreviousTokenKind(Start, SM, LangOpts, TK: tok::l_paren); |
| 151 | |
| 152 | // It is possible that a declarator is wrapped with parens. |
| 153 | // Example: 'float (((*f_ptr2)))[42], *f_ptr3, ((f_value2)) = 42.f;' |
| 154 | // The slice for the type-part must not contain these parens. Consequently |
| 155 | // 'Start' is moved to the most left paren if there are parens. |
| 156 | while (true) { |
| 157 | if (Start.isInvalid() || Start.isMacroID()) |
| 158 | break; |
| 159 | |
| 160 | Token T = getPreviousToken(Location: Start, SM, LangOpts); |
| 161 | if (T.is(K: tok::l_paren)) { |
| 162 | Start = findPreviousTokenStart(Start, SM, LangOpts); |
| 163 | continue; |
| 164 | } |
| 165 | break; |
| 166 | } |
| 167 | |
| 168 | SourceRange DeclRange(DS->getBeginLoc(), Start); |
| 169 | if (DeclRange.isInvalid() || isMacroID(R: DeclRange)) |
| 170 | return std::nullopt; |
| 171 | |
| 172 | // The first slice, that is prepended to every isolated declaration, is |
| 173 | // created. |
| 174 | Slices.emplace_back(args&: DeclRange); |
| 175 | |
| 176 | // Create all following slices that each declare a variable. |
| 177 | SourceLocation DeclBegin = Start; |
| 178 | for (const auto &Decl : DS->decls()) { |
| 179 | const auto *CurrentDecl = cast<VarDecl>(Val: Decl); |
| 180 | |
| 181 | // FIXME: Member pointers are not transformed correctly right now, that's |
| 182 | // why they are treated as problematic here. |
| 183 | if (typeIsMemberPointer(CurrentDecl->getType().IgnoreParens().getTypePtr())) |
| 184 | return std::nullopt; |
| 185 | |
| 186 | SourceLocation DeclEnd = |
| 187 | CurrentDecl->hasInit() |
| 188 | ? findNextTerminator(CurrentDecl->getInit()->getEndLoc(), SM, |
| 189 | LangOpts) |
| 190 | : findNextTerminator(CurrentDecl->getEndLoc(), SM, LangOpts); |
| 191 | |
| 192 | SourceRange VarNameRange(DeclBegin, DeclEnd); |
| 193 | if (VarNameRange.isInvalid() || isMacroID(R: VarNameRange)) |
| 194 | return std::nullopt; |
| 195 | |
| 196 | Slices.emplace_back(args&: VarNameRange); |
| 197 | DeclBegin = DeclEnd.getLocWithOffset(Offset: 1); |
| 198 | } |
| 199 | return Slices; |
| 200 | } |
| 201 | |
| 202 | static std::optional<std::vector<StringRef>> |
| 203 | collectSourceRanges(llvm::ArrayRef<SourceRange> Ranges, const SourceManager &SM, |
| 204 | const LangOptions &LangOpts) { |
| 205 | std::vector<StringRef> Snippets; |
| 206 | Snippets.reserve(n: Ranges.size()); |
| 207 | |
| 208 | for (const auto &Range : Ranges) { |
| 209 | CharSourceRange CharRange = Lexer::getAsCharRange( |
| 210 | Range: CharSourceRange::getCharRange(B: Range.getBegin(), E: Range.getEnd()), SM, |
| 211 | LangOpts); |
| 212 | |
| 213 | if (CharRange.isInvalid()) |
| 214 | return std::nullopt; |
| 215 | |
| 216 | bool InvalidText = false; |
| 217 | StringRef Snippet = |
| 218 | Lexer::getSourceText(Range: CharRange, SM, LangOpts, Invalid: &InvalidText); |
| 219 | |
| 220 | if (InvalidText) |
| 221 | return std::nullopt; |
| 222 | |
| 223 | Snippets.emplace_back(args&: Snippet); |
| 224 | } |
| 225 | |
| 226 | return Snippets; |
| 227 | } |
| 228 | |
| 229 | /// Expects a vector {TypeSnippet, Firstdecl, SecondDecl, ...}. |
| 230 | static std::vector<std::string> |
| 231 | createIsolatedDecls(llvm::ArrayRef<StringRef> Snippets) { |
| 232 | // The first section is the type snippet, which does not make a decl itself. |
| 233 | assert(Snippets.size() > 2 && "Not enough snippets to create isolated decls" ); |
| 234 | std::vector<std::string> Decls(Snippets.size() - 1); |
| 235 | |
| 236 | for (std::size_t I = 1; I < Snippets.size(); ++I) |
| 237 | Decls[I - 1] = Twine(Snippets[0]) |
| 238 | .concat(Suffix: Snippets[0].ends_with(Suffix: " " ) ? "" : " " ) |
| 239 | .concat(Suffix: Snippets[I].ltrim()) |
| 240 | .concat(Suffix: ";" ) |
| 241 | .str(); |
| 242 | |
| 243 | return Decls; |
| 244 | } |
| 245 | |
| 246 | void IsolateDeclarationCheck::check(const MatchFinder::MatchResult &Result) { |
| 247 | const auto *WholeDecl = Result.Nodes.getNodeAs<DeclStmt>(ID: "decl_stmt" ); |
| 248 | |
| 249 | auto Diag = |
| 250 | diag(Loc: WholeDecl->getBeginLoc(), |
| 251 | Description: "multiple declarations in a single statement reduces readability" ); |
| 252 | |
| 253 | std::optional<std::vector<SourceRange>> PotentialRanges = |
| 254 | declRanges(DS: WholeDecl, SM: *Result.SourceManager, LangOpts: getLangOpts()); |
| 255 | if (!PotentialRanges) |
| 256 | return; |
| 257 | |
| 258 | std::optional<std::vector<StringRef>> PotentialSnippets = collectSourceRanges( |
| 259 | Ranges: *PotentialRanges, SM: *Result.SourceManager, LangOpts: getLangOpts()); |
| 260 | |
| 261 | if (!PotentialSnippets) |
| 262 | return; |
| 263 | |
| 264 | std::vector<std::string> NewDecls = createIsolatedDecls(Snippets: *PotentialSnippets); |
| 265 | std::string Replacement = llvm::join( |
| 266 | R&: NewDecls, |
| 267 | Separator: (Twine("\n" ) + Lexer::getIndentationForLine(Loc: WholeDecl->getBeginLoc(), |
| 268 | SM: *Result.SourceManager)) |
| 269 | .str()); |
| 270 | |
| 271 | Diag << FixItHint::CreateReplacement(RemoveRange: WholeDecl->getSourceRange(), |
| 272 | Code: Replacement); |
| 273 | } |
| 274 | } // namespace clang::tidy::readability |
| 275 | |