1//===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MisleadingBidirectional.h"
10
11#include "clang/Frontend/CompilerInstance.h"
12#include "clang/Lex/Preprocessor.h"
13#include "llvm/Support/ConvertUTF.h"
14
15using namespace clang;
16using namespace clang::tidy::misc;
17
18static bool containsMisleadingBidi(StringRef Buffer,
19 bool HonorLineBreaks = true) {
20 const char *CurPtr = Buffer.begin();
21
22 enum BidiChar {
23 PS = 0x2029,
24 RLO = 0x202E,
25 RLE = 0x202B,
26 LRO = 0x202D,
27 LRE = 0x202A,
28 PDF = 0x202C,
29 RLI = 0x2067,
30 LRI = 0x2066,
31 FSI = 0x2068,
32 PDI = 0x2069
33 };
34
35 SmallVector<BidiChar> BidiContexts;
36
37 // Scan each character while maintaining a stack of opened bidi context.
38 // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
39 // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
40 //
41 // Warn if we end up with an unclosed context.
42 while (CurPtr < Buffer.end()) {
43 unsigned char C = *CurPtr;
44 if (isASCII(c: C)) {
45 ++CurPtr;
46 bool IsParagrapSep =
47 (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
48 bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
49 if (IsParagrapSep || IsSegmentSep)
50 BidiContexts.clear();
51 continue;
52 }
53 llvm::UTF32 CodePoint = 0;
54 llvm::ConversionResult Result = llvm::convertUTF8Sequence(
55 source: (const llvm::UTF8 **)&CurPtr, sourceEnd: (const llvm::UTF8 *)Buffer.end(),
56 target: &CodePoint, flags: llvm::strictConversion);
57
58 // If conversion fails, utf-8 is designed so that we can just try next char.
59 if (Result != llvm::conversionOK) {
60 ++CurPtr;
61 continue;
62 }
63
64 // Open a PDF context.
65 if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
66 CodePoint == LRE)
67 BidiContexts.push_back(Elt: PDF);
68 // Close PDF Context.
69 else if (CodePoint == PDF) {
70 if (!BidiContexts.empty() && BidiContexts.back() == PDF)
71 BidiContexts.pop_back();
72 }
73 // Open a PDI Context.
74 else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
75 BidiContexts.push_back(Elt: PDI);
76 // Close a PDI Context.
77 else if (CodePoint == PDI) {
78 auto R = llvm::find(Range: llvm::reverse(C&: BidiContexts), Val: PDI);
79 if (R != BidiContexts.rend())
80 BidiContexts.resize(N: BidiContexts.rend() - R - 1);
81 }
82 // Line break or equivalent
83 else if (CodePoint == PS)
84 BidiContexts.clear();
85 }
86 return !BidiContexts.empty();
87}
88
89class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
90 : public CommentHandler {
91public:
92 MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check)
93 : Check(Check) {}
94
95 bool HandleComment(Preprocessor &PP, SourceRange Range) override {
96 // FIXME: check that we are in a /* */ comment
97 StringRef Text =
98 Lexer::getSourceText(Range: CharSourceRange::getCharRange(R: Range),
99 SM: PP.getSourceManager(), LangOpts: PP.getLangOpts());
100
101 if (containsMisleadingBidi(Buffer: Text, HonorLineBreaks: true))
102 Check.diag(
103 Loc: Range.getBegin(),
104 Description: "comment contains misleading bidirectional Unicode characters");
105 return false;
106 }
107
108private:
109 MisleadingBidirectionalCheck &Check;
110};
111
112MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
113 StringRef Name, ClangTidyContext *Context)
114 : ClangTidyCheck(Name, Context),
115 Handler(std::make_unique<MisleadingBidirectionalHandler>(args&: *this)) {}
116
117MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
118
119void MisleadingBidirectionalCheck::registerPPCallbacks(
120 const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
121 PP->addCommentHandler(Handler: Handler.get());
122}
123
124void MisleadingBidirectionalCheck::check(
125 const ast_matchers::MatchFinder::MatchResult &Result) {
126 if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>(ID: "strlit")) {
127 StringRef Literal = SL->getBytes();
128 if (containsMisleadingBidi(Buffer: Literal, HonorLineBreaks: false))
129 diag(Loc: SL->getBeginLoc(), Description: "string literal contains misleading "
130 "bidirectional Unicode characters");
131 }
132}
133
134void MisleadingBidirectionalCheck::registerMatchers(
135 ast_matchers::MatchFinder *Finder) {
136 Finder->addMatcher(NodeMatch: ast_matchers::stringLiteral().bind(ID: "strlit"), Action: this);
137}
138

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp