1 | //===--- Bracket.cpp - Analyze bracket structure --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The basic phases of our bracket matching are: |
10 | // |
11 | // 1) A simple "greedy" match looks for well-nested subsequences. |
12 | // |
13 | // We can't fully trust the results of this, consider: |
14 | // while (1) { // A |
15 | // if (true) { // B |
16 | // break; |
17 | // } // C |
18 | // Greedy matching will match B=C, when we should at least consider A=C. |
19 | // However for the correct parts of the file, the greedy match gives the |
20 | // right answer. It produces useful candidates for phase 2. |
21 | // |
22 | // simplePairBrackets handles this step. |
23 | // |
24 | // 2) Try to identify places where formatting indicates that the greedy match |
25 | // was correct. This is similar to how a human would scan a large file. |
26 | // |
27 | // For example: |
28 | // int foo() { // X |
29 | // // indented |
30 | // while (1) { |
31 | // // valid code |
32 | // } |
33 | // return bar(42); |
34 | // } // Y |
35 | // We can "verify" that X..Y looks like a braced block, and the greedy match |
36 | // tells us that substring is perfectly nested. |
37 | // We trust the pairings of those brackets and don't examine them further. |
38 | // However in the first example above, we do not trust B=C because the brace |
39 | // indentation is suspect. |
40 | // |
41 | // FIXME: implement this step. |
42 | // |
43 | // 3) Run full best-match optimization on remaining brackets. |
44 | // |
45 | // Conceptually, this considers all possible matchings and optimizes cost: |
46 | // - there is a cost for failing to match a bracket |
47 | // - there is a variable cost for matching two brackets. |
48 | // (For example if brace indentation doesn't match). |
49 | // |
50 | // In the first example we have three alternatives, and they are ranked: |
51 | // 1) A=C, skip B |
52 | // 2) B=C, skip A |
53 | // 3) skip A, skip B, skip C |
54 | // The cost for skipping a bracket is high, so option 3 is worst. |
55 | // B=C costs more than A=C, because the indentation doesn't match. |
56 | // |
57 | // It would be correct to run this step alone, but it would be too slow. |
58 | // The implementation is dynamic programming in N^3 space and N^2 time. |
59 | // Having earlier steps filter out most brackets is key to performance. |
60 | // |
61 | // FIXME: implement this step. |
62 | // |
63 | //===----------------------------------------------------------------------===// |
64 | |
65 | #include "clang-pseudo/Bracket.h" |
66 | |
67 | namespace clang { |
68 | namespace pseudo { |
69 | namespace { |
70 | |
71 | struct Bracket { |
72 | using Index = unsigned; |
73 | constexpr static Index None = -1; |
74 | |
75 | enum BracketKind : char { Paren, Brace, Square } Kind; |
76 | enum Direction : bool { Open, Close } Dir; |
77 | unsigned Line; |
78 | unsigned Indent; |
79 | Token::Index Tok; |
80 | Bracket::Index Pair = None; |
81 | }; |
82 | |
83 | // Find brackets in the stream and convert to Bracket struct. |
84 | std::vector<Bracket> findBrackets(const TokenStream &Stream) { |
85 | std::vector<Bracket> Brackets; |
86 | auto Add = [&](const pseudo::Token &Tok, Bracket::BracketKind K, |
87 | Bracket::Direction D) { |
88 | Brackets.push_back( |
89 | x: {.Kind: K, .Dir: D, .Line: Tok.Line, .Indent: Tok.Indent, .Tok: Stream.index(T: Tok), .Pair: Bracket::None}); |
90 | }; |
91 | for (const auto &Tok : Stream.tokens()) { |
92 | switch (Tok.Kind) { |
93 | case clang::tok::l_paren: |
94 | Add(Tok, Bracket::Paren, Bracket::Open); |
95 | break; |
96 | case clang::tok::r_paren: |
97 | Add(Tok, Bracket::Paren, Bracket::Close); |
98 | break; |
99 | case clang::tok::l_brace: |
100 | Add(Tok, Bracket::Brace, Bracket::Open); |
101 | break; |
102 | case clang::tok::r_brace: |
103 | Add(Tok, Bracket::Brace, Bracket::Close); |
104 | break; |
105 | case clang::tok::l_square: |
106 | Add(Tok, Bracket::Square, Bracket::Open); |
107 | break; |
108 | case clang::tok::r_square: |
109 | Add(Tok, Bracket::Square, Bracket::Close); |
110 | break; |
111 | default: |
112 | break; |
113 | } |
114 | } |
115 | return Brackets; |
116 | } |
117 | |
118 | // Write the bracket pairings from Brackets back to Tokens. |
119 | void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) { |
120 | for (const auto &B : Brackets) |
121 | Tokens.tokens()[B.Tok].Pair = |
122 | (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok; |
123 | } |
124 | |
125 | // Find perfect pairings (ignoring whitespace) via greedy algorithm. |
126 | // This means two brackets are paired if they match and the brackets between |
127 | // them nest perfectly, with no skipped or crossed brackets. |
128 | void simplePairBrackets(MutableArrayRef<Bracket> Brackets) { |
129 | std::vector<unsigned> Stack; |
130 | for (unsigned I = 0; I < Brackets.size(); ++I) { |
131 | if (Brackets[I].Dir == Bracket::Open) { |
132 | Stack.push_back(x: I); |
133 | } else if (!Stack.empty() && |
134 | Brackets[Stack.back()].Kind == Brackets[I].Kind) { |
135 | Brackets[Stack.back()].Pair = I; |
136 | Brackets[I].Pair = Stack.back(); |
137 | Stack.pop_back(); |
138 | } else { |
139 | // Unpaired closer, no brackets on stack are part of a perfect sequence. |
140 | Stack.clear(); |
141 | } |
142 | } |
143 | // Any remaining brackets on the stack stay unpaired. |
144 | } |
145 | |
146 | } // namespace |
147 | |
148 | void pairBrackets(TokenStream &Stream) { |
149 | auto Brackets = findBrackets(Stream); |
150 | simplePairBrackets(Brackets); |
151 | applyPairings(Brackets, Tokens&: Stream); |
152 | } |
153 | |
154 | } // namespace pseudo |
155 | } // namespace clang |
156 | |