1 | //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The pseudoparser tries to match a token stream to the C++ grammar. |
10 | // Preprocessor #defines and other directives are not part of this grammar, and |
11 | // should be removed before the file can be parsed. |
12 | // |
13 | // Conditional blocks like #if...#else...#endif are particularly tricky, as |
14 | // simply stripping the directives may not produce a grammatical result: |
15 | // |
16 | // return |
17 | // #ifndef DEBUG |
18 | // 1 |
19 | // #else |
20 | // 0 |
21 | // #endif |
22 | // ; |
23 | // |
24 | // This header supports analyzing and removing the directives in a source file. |
25 | // |
26 | //===----------------------------------------------------------------------===// |
27 | |
28 | #ifndef CLANG_PSEUDO_DIRECTIVETREE_H |
29 | #define CLANG_PSEUDO_DIRECTIVETREE_H |
30 | |
31 | #include "clang-pseudo/Token.h" |
32 | #include "clang/Basic/TokenKinds.h" |
33 | #include <optional> |
34 | #include <variant> |
35 | #include <vector> |
36 | |
37 | namespace clang { |
38 | namespace pseudo { |
39 | |
40 | /// Describes the structure of a source file, as seen by the preprocessor. |
41 | /// |
42 | /// The structure is a tree, whose leaves are plain source code and directives, |
43 | /// and whose internal nodes are #if...#endif sections. |
44 | /// |
45 | /// (root) |
46 | /// |-+ Directive #include <stdio.h> |
47 | /// |-+ Code int main() { |
48 | /// | ` printf("hello, "); |
49 | /// |-+ Conditional -+ Directive #ifndef NDEBUG |
50 | /// | |-+ Code printf("debug\n"); |
51 | /// | |-+ Directive #else |
52 | /// | |-+ Code printf("production\n"); |
53 | /// | `-+ Directive #endif |
54 | /// |-+ Code return 0; |
55 | /// ` } |
56 | /// |
57 | /// Unlike the clang preprocessor, we model the full tree explicitly. |
58 | /// This class does not recognize macro usage, only directives. |
59 | struct DirectiveTree { |
60 | /// A range of code (and possibly comments) containing no directives. |
61 | struct Code { |
62 | Token::Range Tokens; |
63 | }; |
64 | /// A preprocessor directive. |
65 | struct Directive { |
66 | /// Raw tokens making up the directive, starting with `#`. |
67 | Token::Range Tokens; |
68 | clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword; |
69 | }; |
70 | /// A preprocessor conditional section. |
71 | /// |
72 | /// This starts with an #if, #ifdef, #ifndef etc directive. |
73 | /// It covers all #else branches, and spans until the matching #endif. |
74 | struct Conditional { |
75 | /// The sequence of directives that introduce top-level alternative parses. |
76 | /// |
77 | /// The first branch will have an #if type directive. |
78 | /// Subsequent branches will have #else type directives. |
79 | std::vector<std::pair<Directive, DirectiveTree>> Branches; |
80 | /// The directive terminating the conditional, should be #endif. |
81 | Directive End; |
82 | /// The index of the conditional branch we chose as active. |
83 | /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif). |
84 | /// The initial tree from `parse()` has no branches marked as taken. |
85 | /// See `chooseConditionalBranches()`. |
86 | std::optional<unsigned> Taken; |
87 | }; |
88 | |
89 | /// Some piece of the file. {One of Code, Directive, Conditional}. |
90 | using Chunk = std::variant<Code, Directive, Conditional>; |
91 | std::vector<Chunk> Chunks; |
92 | |
93 | /// Extract preprocessor structure by examining the raw tokens. |
94 | static DirectiveTree parse(const TokenStream &); |
95 | |
96 | /// Produce a parseable token stream by stripping all directive tokens. |
97 | /// |
98 | /// Conditional sections are replaced by the taken branch, if any. |
99 | /// This tree must describe the provided token stream. |
100 | TokenStream stripDirectives(const TokenStream &) const; |
101 | }; |
102 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &); |
103 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &); |
104 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
105 | const DirectiveTree::Directive &); |
106 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
107 | const DirectiveTree::Conditional &); |
108 | |
109 | /// Selects a "taken" branch for each conditional directive in the file. |
110 | /// |
111 | /// The choice is somewhat arbitrary, but aims to produce a useful parse: |
112 | /// - idioms like `#if 0` are respected |
113 | /// - we avoid paths that reach `#error` |
114 | /// - we try to maximize the amount of code seen |
115 | /// The choice may also be "no branch taken". |
116 | /// |
117 | /// Choices are also made for conditionals themselves inside not-taken branches: |
118 | /// #if 1 // taken! |
119 | /// #else // not taken |
120 | /// #if 1 // taken! |
121 | /// #endif |
122 | /// #endif |
123 | /// |
124 | /// The choices are stored in Conditional::Taken nodes. |
125 | void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code); |
126 | |
127 | } // namespace pseudo |
128 | } // namespace clang |
129 | |
130 | #endif // CLANG_PSEUDO_DIRECTIVETREE_H |
131 | |