1//===--- DirectiveTree.cpp - Find and strip preprocessor directives -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/DirectiveTree.h"
10#include "clang/Basic/IdentifierTable.h"
11#include "clang/Basic/TokenKinds.h"
12#include "llvm/Support/FormatVariadic.h"
13#include <optional>
14#include <variant>
15
16namespace clang {
17namespace pseudo {
18namespace {
19
20class DirectiveParser {
21public:
22 explicit DirectiveParser(const TokenStream &Code)
23 : Code(Code), Tok(&Code.front()) {}
24 void parse(DirectiveTree *Result) { parse(Tree: Result, /*TopLevel=*/true); }
25
26private:
27 // Roles that a directive might take within a conditional block.
28 enum class Cond { None, If, Else, End };
29 static Cond classifyDirective(tok::PPKeywordKind K) {
30 switch (K) {
31 case clang::tok::pp_if:
32 case clang::tok::pp_ifdef:
33 case clang::tok::pp_ifndef:
34 return Cond::If;
35 case clang::tok::pp_elif:
36 case clang::tok::pp_elifdef:
37 case clang::tok::pp_elifndef:
38 case clang::tok::pp_else:
39 return Cond::Else;
40 case clang::tok::pp_endif:
41 return Cond::End;
42 default:
43 return Cond::None;
44 }
45 }
46
47 // Parses tokens starting at Tok into Tree.
48 // If we reach an End or Else directive that ends Tree, returns it.
49 // If TopLevel is true, then we do not expect End and always return
50 // std::nullopt.
51 std::optional<DirectiveTree::Directive> parse(DirectiveTree *Tree,
52 bool TopLevel) {
53 auto StartsDirective =
54 [&, AllowDirectiveAt((const Token *)nullptr)]() mutable {
55 if (Tok->flag(Mask: LexFlags::StartsPPLine)) {
56 // If we considered a comment at the start of a PP-line, it doesn't
57 // start a directive but the directive can still start after it.
58 if (Tok->Kind == tok::comment)
59 AllowDirectiveAt = Tok + 1;
60 return Tok->Kind == tok::hash;
61 }
62 return Tok->Kind == tok::hash && AllowDirectiveAt == Tok;
63 };
64 // Each iteration adds one chunk (or returns, if we see #endif).
65 while (Tok->Kind != tok::eof) {
66 // If there's no directive here, we have a code chunk.
67 if (!StartsDirective()) {
68 const Token *Start = Tok;
69 do
70 ++Tok;
71 while (Tok->Kind != tok::eof && !StartsDirective());
72 Tree->Chunks.push_back(x: DirectiveTree::Code{
73 .Tokens: Token::Range{.Begin: Code.index(T: *Start), .End: Code.index(T: *Tok)}});
74 continue;
75 }
76
77 // We have some kind of directive.
78 DirectiveTree::Directive Directive;
79 parseDirective(D: &Directive);
80 Cond Kind = classifyDirective(K: Directive.Kind);
81 if (Kind == Cond::If) {
82 // #if or similar, starting a nested conditional block.
83 DirectiveTree::Conditional Conditional;
84 Conditional.Branches.emplace_back();
85 Conditional.Branches.back().first = std::move(Directive);
86 parseConditional(C: &Conditional);
87 Tree->Chunks.push_back(x: std::move(Conditional));
88 } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) {
89 // #endif or similar, ending this PStructure scope.
90 // (#endif is unexpected at the top level, treat as simple directive).
91 return std::move(Directive);
92 } else {
93 // #define or similar, a simple directive at the current scope.
94 Tree->Chunks.push_back(x: std::move(Directive));
95 }
96 }
97 return std::nullopt;
98 }
99
100 // Parse the rest of a conditional section, after seeing the If directive.
101 // Returns after consuming the End directive.
102 void parseConditional(DirectiveTree::Conditional *C) {
103 assert(C->Branches.size() == 1 &&
104 C->Branches.front().second.Chunks.empty() &&
105 "Should be ready to parse first branch body");
106 while (Tok->Kind != tok::eof) {
107 auto Terminator = parse(Tree: &C->Branches.back().second, /*TopLevel=*/false);
108 if (!Terminator) {
109 assert(Tok->Kind == tok::eof && "gave up parsing before eof?");
110 C->End.Tokens = Token::Range::emptyAt(Index: Code.index(T: *Tok));
111 return;
112 }
113 if (classifyDirective(K: Terminator->Kind) == Cond::End) {
114 C->End = std::move(*Terminator);
115 return;
116 }
117 assert(classifyDirective(Terminator->Kind) == Cond::Else &&
118 "ended branch unexpectedly");
119 C->Branches.emplace_back();
120 C->Branches.back().first = std::move(*Terminator);
121 }
122 }
123
124 // Parse a directive. Tok is the hash.
125 void parseDirective(DirectiveTree::Directive *D) {
126 assert(Tok->Kind == tok::hash);
127
128 // Directive spans from the hash until the end of line or file.
129 const Token *Begin = Tok++;
130 while (Tok->Kind != tok::eof && !Tok->flag(Mask: LexFlags::StartsPPLine))
131 ++Tok;
132 ArrayRef<Token> Tokens{Begin, Tok};
133 D->Tokens = {.Begin: Code.index(T: *Tokens.begin()), .End: Code.index(T: *Tokens.end())};
134
135 // Directive name is the first non-comment token after the hash.
136 Tokens = Tokens.drop_front().drop_while(
137 Pred: [](const Token &T) { return T.Kind == tok::comment; });
138 if (!Tokens.empty())
139 D->Kind = PPKeywords.get(Tokens.front().text()).getPPKeywordID();
140 }
141
142 const TokenStream &Code;
143 const Token *Tok;
144 clang::IdentifierTable PPKeywords;
145};
146
147struct Dumper {
148 llvm::raw_ostream &OS;
149 unsigned Indent = 0;
150
151 Dumper(llvm::raw_ostream& OS) : OS(OS) {}
152 void operator()(const DirectiveTree& Tree) {
153 for (const auto& Chunk : Tree.Chunks)
154 std::visit(visitor&: *this, variants: Chunk);
155 }
156 void operator()(const DirectiveTree::Conditional &Conditional) {
157 for (unsigned I = 0; I < Conditional.Branches.size(); ++I) {
158 const auto &Branch = Conditional.Branches[I];
159 (*this)(Branch.first, Conditional.Taken == I);
160 Indent += 2;
161 (*this)(Branch.second);
162 Indent -= 2;
163 }
164 (*this)(Conditional.End);
165 }
166 void operator()(const DirectiveTree::Directive &Directive,
167 bool Taken = false) {
168 OS.indent(NumSpaces: Indent) << llvm::formatv(
169 Fmt: "#{0} ({1} tokens){2}\n", Vals: tok::getPPKeywordSpelling(Kind: Directive.Kind),
170 Vals: Directive.Tokens.size(), Vals: Taken ? " TAKEN" : "");
171 }
172 void operator()(const DirectiveTree::Code &Code) {
173 OS.indent(NumSpaces: Indent) << llvm::formatv(Fmt: "code ({0} tokens)\n",
174 Vals: Code.Tokens.size());
175 }
176};
177} // namespace
178
179DirectiveTree DirectiveTree::parse(const TokenStream &Code) {
180 DirectiveTree Result;
181 DirectiveParser(Code).parse(Result: &Result);
182 return Result;
183}
184
185// Define operator<< in terms of dump() functions above.
186#define OSTREAM_DUMP(Type) \
187 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Type &T) { \
188 Dumper{OS}(T); \
189 return OS; \
190 }
191OSTREAM_DUMP(DirectiveTree)
192OSTREAM_DUMP(DirectiveTree::Directive)
193OSTREAM_DUMP(DirectiveTree::Conditional)
194OSTREAM_DUMP(DirectiveTree::Code)
195#undef OSTREAM_DUMP
196
197namespace {
198// Makes choices about conditional branches.
199//
200// Generally it tries to maximize the amount of useful code we see.
201//
202// Caveat: each conditional is evaluated independently. Consider this code:
203// #ifdef WINDOWS
204// bool isWindows = true;
205// #endif
206// #ifndef WINDOWS
207// bool isWindows = false;
208// #endif
209// We take both branches and define isWindows twice. We could track more state
210// in order to produce a consistent view, but this is complex.
211class BranchChooser {
212public:
213 BranchChooser(const TokenStream &Code) : Code(Code) {}
214
215 // Describes code seen by making particular branch choices. Higher is better.
216 struct Score {
217 int Tokens = 0; // excluding comments and directives
218 int Directives = 0;
219 int Errors = 0; // #error directives
220
221 bool operator>(const Score &Other) const {
222 // Seeing errors is bad, other things are good.
223 return std::make_tuple(args: -Errors, args: Tokens, args: Directives) >
224 std::make_tuple(args: -Other.Errors, args: Other.Tokens, args: Other.Directives);
225 }
226
227 Score &operator+=(const Score &Other) {
228 Tokens += Other.Tokens;
229 Directives += Other.Directives;
230 Errors += Other.Errors;
231 return *this;
232 }
233 };
234
235 Score operator()(DirectiveTree::Code &C) {
236 Score S;
237 for (const Token &T : Code.tokens(R: C.Tokens))
238 if (T.Kind != tok::comment)
239 ++S.Tokens;
240 return S;
241 }
242
243 Score operator()(DirectiveTree::Directive &D) {
244 Score S;
245 S.Directives = 1;
246 S.Errors = D.Kind == tok::pp_error;
247 return S;
248 }
249
250 Score operator()(DirectiveTree::Conditional &C) {
251 Score Best;
252 bool MayTakeTrivial = true;
253 bool TookTrivial = false;
254
255 for (unsigned I = 0; I < C.Branches.size(); ++I) {
256 // Walk the branch to make its nested choices in any case.
257 Score BranchScore = walk(M&: C.Branches[I].second);
258 // If we already took an #if 1, don't consider any other branches.
259 if (TookTrivial)
260 continue;
261 // Is this a trivial #if 0 or #if 1?
262 if (auto TriviallyTaken = isTakenWhenReached(Dir: C.Branches[I].first)) {
263 if (!*TriviallyTaken)
264 continue; // Don't consider #if 0 even if it scores well.
265 if (MayTakeTrivial)
266 TookTrivial = true;
267 } else {
268 // After a nontrivial condition, #elif 1 isn't guaranteed taken.
269 MayTakeTrivial = false;
270 }
271 // Is this the best branch so far? (Including if it's #if 1).
272 if (TookTrivial || !C.Taken || BranchScore > Best) {
273 Best = BranchScore;
274 C.Taken = I;
275 }
276 }
277 return Best;
278 }
279 Score walk(DirectiveTree &M) {
280 Score S;
281 for (auto &C : M.Chunks)
282 S += std::visit(visitor&: *this, variants&: C);
283 return S;
284 }
285
286private:
287 // Return true if the directive starts an always-taken conditional branch,
288 // false if the branch is never taken, and std::nullopt otherwise.
289 std::optional<bool> isTakenWhenReached(const DirectiveTree::Directive &Dir) {
290 switch (Dir.Kind) {
291 case clang::tok::pp_if:
292 case clang::tok::pp_elif:
293 break; // handled below
294 case clang::tok::pp_else:
295 return true;
296 default: // #ifdef etc
297 return std::nullopt;
298 }
299
300 const auto &Tokens = Code.tokens(R: Dir.Tokens);
301 assert(!Tokens.empty() && Tokens.front().Kind == tok::hash);
302 const Token &Name = Tokens.front().nextNC();
303 const Token &Value = Name.nextNC();
304 // Does the condition consist of exactly one token?
305 if (&Value >= Tokens.end() || &Value.nextNC() < Tokens.end())
306 return std::nullopt;
307 return llvm::StringSwitch<std::optional<bool>>(Value.text())
308 .Cases(S0: "true", S1: "1", Value: true)
309 .Cases(S0: "false", S1: "0", Value: false)
310 .Default(Value: std::nullopt);
311 }
312
313 const TokenStream &Code;
314};
315
316} // namespace
317
318void chooseConditionalBranches(DirectiveTree &Tree, const TokenStream &Code) {
319 BranchChooser{Code}.walk(M&: Tree);
320}
321
322namespace {
323class Preprocessor {
324 const TokenStream &In;
325 TokenStream &Out;
326
327public:
328 Preprocessor(const TokenStream &In, TokenStream &Out) : In(In), Out(Out) {}
329 ~Preprocessor() { Out.finalize(); }
330
331 void walk(const DirectiveTree &T) {
332 for (const auto &C : T.Chunks)
333 std::visit(visitor&: *this, variants: C);
334 }
335
336 void operator()(const DirectiveTree::Code &C) {
337 for (const auto &Tok : In.tokens(R: C.Tokens))
338 Out.push(T: Tok);
339 }
340
341 void operator()(const DirectiveTree::Directive &) {}
342
343 void operator()(const DirectiveTree::Conditional &C) {
344 if (C.Taken)
345 walk(T: C.Branches[*C.Taken].second);
346 }
347};
348} // namespace
349
350TokenStream DirectiveTree::stripDirectives(const TokenStream &In) const {
351 TokenStream Out;
352 Preprocessor(In, Out).walk(T: *this);
353 return Out;
354}
355
356} // namespace pseudo
357} // namespace clang
358

source code of clang-tools-extra/pseudo/lib/DirectiveTree.cpp