| 1 | //===--- DirectiveTree.h - Find and strip preprocessor directives *- C++-*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // The pseudoparser tries to match a token stream to the C++ grammar. |
| 10 | // Preprocessor #defines and other directives are not part of this grammar, and |
| 11 | // should be removed before the file can be parsed. |
| 12 | // |
| 13 | // Conditional blocks like #if...#else...#endif are particularly tricky, as |
| 14 | // simply stripping the directives may not produce a grammatical result: |
| 15 | // |
| 16 | // return |
| 17 | // #ifndef DEBUG |
| 18 | // 1 |
| 19 | // #else |
| 20 | // 0 |
| 21 | // #endif |
| 22 | // ; |
| 23 | // |
| 24 | // This header supports analyzing and removing the directives in a source file. |
| 25 | // |
| 26 | //===----------------------------------------------------------------------===// |
| 27 | |
| 28 | #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H |
| 29 | #define |
| 30 | |
| 31 | #include "Token.h" |
| 32 | #include "clang/Basic/TokenKinds.h" |
| 33 | #include <optional> |
| 34 | #include <variant> |
| 35 | #include <vector> |
| 36 | |
| 37 | namespace clang { |
| 38 | namespace clangd { |
| 39 | |
| 40 | /// Describes the structure of a source file, as seen by the preprocessor. |
| 41 | /// |
| 42 | /// The structure is a tree, whose leaves are plain source code and directives, |
| 43 | /// and whose internal nodes are #if...#endif sections. |
| 44 | /// |
| 45 | /// (root) |
| 46 | /// |-+ Directive #include <stdio.h> |
| 47 | /// |-+ Code int main() { |
| 48 | /// | ` printf("hello, "); |
| 49 | /// |-+ Conditional -+ Directive #ifndef NDEBUG |
| 50 | /// | |-+ Code printf("debug\n"); |
| 51 | /// | |-+ Directive #else |
| 52 | /// | |-+ Code printf("production\n"); |
| 53 | /// | `-+ Directive #endif |
| 54 | /// |-+ Code return 0; |
| 55 | /// ` } |
| 56 | /// |
| 57 | /// Unlike the clang preprocessor, we model the full tree explicitly. |
| 58 | /// This class does not recognize macro usage, only directives. |
| 59 | struct DirectiveTree { |
| 60 | /// A range of code (and possibly comments) containing no directives. |
| 61 | struct Code { |
| 62 | Token::Range Tokens; |
| 63 | }; |
| 64 | /// A preprocessor directive. |
| 65 | struct Directive { |
| 66 | /// Raw tokens making up the directive, starting with `#`. |
| 67 | Token::Range Tokens; |
| 68 | clang::tok::PPKeywordKind Kind = clang::tok::pp_not_keyword; |
| 69 | }; |
| 70 | /// A preprocessor conditional section. |
| 71 | /// |
| 72 | /// This starts with an #if, #ifdef, #ifndef etc directive. |
| 73 | /// It covers all #else branches, and spans until the matching #endif. |
| 74 | struct Conditional { |
| 75 | /// The sequence of directives that introduce top-level alternative parses. |
| 76 | /// |
| 77 | /// The first branch will have an #if type directive. |
| 78 | /// Subsequent branches will have #else type directives. |
| 79 | std::vector<std::pair<Directive, DirectiveTree>> Branches; |
| 80 | /// The directive terminating the conditional, should be #endif. |
| 81 | Directive End; |
| 82 | /// The index of the conditional branch we chose as active. |
| 83 | /// std::nullopt indicates no branch was taken (e.g. #if 0 ... #endif). |
| 84 | /// The initial tree from `parse()` has no branches marked as taken. |
| 85 | /// See `chooseConditionalBranches()`. |
| 86 | std::optional<unsigned> Taken; |
| 87 | }; |
| 88 | |
| 89 | /// Some piece of the file. {One of Code, Directive, Conditional}. |
| 90 | using Chunk = std::variant<Code, Directive, Conditional>; |
| 91 | std::vector<Chunk> Chunks; |
| 92 | |
| 93 | /// Extract preprocessor structure by examining the raw tokens. |
| 94 | static DirectiveTree parse(const TokenStream &); |
| 95 | |
| 96 | /// Produce a parseable token stream by stripping all directive tokens. |
| 97 | /// |
| 98 | /// Conditional sections are replaced by the taken branch, if any. |
| 99 | /// This tree must describe the provided token stream. |
| 100 | TokenStream stripDirectives(const TokenStream &) const; |
| 101 | }; |
| 102 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree &); |
| 103 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveTree::Code &); |
| 104 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 105 | const DirectiveTree::Directive &); |
| 106 | llvm::raw_ostream &operator<<(llvm::raw_ostream &, |
| 107 | const DirectiveTree::Conditional &); |
| 108 | |
| 109 | /// Selects a "taken" branch for each conditional directive in the file. |
| 110 | /// |
| 111 | /// The choice is somewhat arbitrary, but aims to produce a useful parse: |
| 112 | /// - idioms like `#if 0` are respected |
| 113 | /// - we avoid paths that reach `#error` |
| 114 | /// - we try to maximize the amount of code seen |
| 115 | /// The choice may also be "no branch taken". |
| 116 | /// |
| 117 | /// Choices are also made for conditionals themselves inside not-taken branches: |
| 118 | /// #if 1 // taken! |
| 119 | /// #else // not taken |
| 120 | /// #if 1 // taken! |
| 121 | /// #endif |
| 122 | /// #endif |
| 123 | /// |
| 124 | /// The choices are stored in Conditional::Taken nodes. |
| 125 | void chooseConditionalBranches(DirectiveTree &, const TokenStream &Code); |
| 126 | |
| 127 | } // namespace clangd |
| 128 | } // namespace clang |
| 129 | |
| 130 | #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_DIRECTIVETREE_H |
| 131 | |