1//===--- Bracket.cpp - Analyze bracket structure --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The basic phases of our bracket matching are:
10//
11// 1) A simple "greedy" match looks for well-nested subsequences.
12//
13// We can't fully trust the results of this, consider:
14// while (1) { // A
15// if (true) { // B
16// break;
17// } // C
18// Greedy matching will match B=C, when we should at least consider A=C.
19// However for the correct parts of the file, the greedy match gives the
20// right answer. It produces useful candidates for phase 2.
21//
22// simplePairBrackets handles this step.
23//
24// 2) Try to identify places where formatting indicates that the greedy match
25// was correct. This is similar to how a human would scan a large file.
26//
27// For example:
28// int foo() { // X
29// // indented
30// while (1) {
31// // valid code
32// }
33// return bar(42);
34// } // Y
35// We can "verify" that X..Y looks like a braced block, and the greedy match
36// tells us that substring is perfectly nested.
37// We trust the pairings of those brackets and don't examine them further.
38// However in the first example above, we do not trust B=C because the brace
39// indentation is suspect.
40//
41// FIXME: implement this step.
42//
43// 3) Run full best-match optimization on remaining brackets.
44//
45// Conceptually, this considers all possible matchings and optimizes cost:
46// - there is a cost for failing to match a bracket
47// - there is a variable cost for matching two brackets.
48// (For example if brace indentation doesn't match).
49//
50// In the first example we have three alternatives, and they are ranked:
51// 1) A=C, skip B
52// 2) B=C, skip A
53// 3) skip A, skip B, skip C
54// The cost for skipping a bracket is high, so option 3 is worst.
55// B=C costs more than A=C, because the indentation doesn't match.
56//
57// It would be correct to run this step alone, but it would be too slow.
58// The implementation is dynamic programming in N^3 space and N^2 time.
59// Having earlier steps filter out most brackets is key to performance.
60//
61// FIXME: implement this step.
62//
63//===----------------------------------------------------------------------===//
64
65#include "clang-pseudo/Bracket.h"
66
67namespace clang {
68namespace pseudo {
69namespace {
70
71struct Bracket {
72 using Index = unsigned;
73 constexpr static Index None = -1;
74
75 enum BracketKind : char { Paren, Brace, Square } Kind;
76 enum Direction : bool { Open, Close } Dir;
77 unsigned Line;
78 unsigned Indent;
79 Token::Index Tok;
80 Bracket::Index Pair = None;
81};
82
83// Find brackets in the stream and convert to Bracket struct.
84std::vector<Bracket> findBrackets(const TokenStream &Stream) {
85 std::vector<Bracket> Brackets;
86 auto Add = [&](const pseudo::Token &Tok, Bracket::BracketKind K,
87 Bracket::Direction D) {
88 Brackets.push_back(
89 x: {.Kind: K, .Dir: D, .Line: Tok.Line, .Indent: Tok.Indent, .Tok: Stream.index(T: Tok), .Pair: Bracket::None});
90 };
91 for (const auto &Tok : Stream.tokens()) {
92 switch (Tok.Kind) {
93 case clang::tok::l_paren:
94 Add(Tok, Bracket::Paren, Bracket::Open);
95 break;
96 case clang::tok::r_paren:
97 Add(Tok, Bracket::Paren, Bracket::Close);
98 break;
99 case clang::tok::l_brace:
100 Add(Tok, Bracket::Brace, Bracket::Open);
101 break;
102 case clang::tok::r_brace:
103 Add(Tok, Bracket::Brace, Bracket::Close);
104 break;
105 case clang::tok::l_square:
106 Add(Tok, Bracket::Square, Bracket::Open);
107 break;
108 case clang::tok::r_square:
109 Add(Tok, Bracket::Square, Bracket::Close);
110 break;
111 default:
112 break;
113 }
114 }
115 return Brackets;
116}
117
118// Write the bracket pairings from Brackets back to Tokens.
119void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) {
120 for (const auto &B : Brackets)
121 Tokens.tokens()[B.Tok].Pair =
122 (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok;
123}
124
125// Find perfect pairings (ignoring whitespace) via greedy algorithm.
126// This means two brackets are paired if they match and the brackets between
127// them nest perfectly, with no skipped or crossed brackets.
128void simplePairBrackets(MutableArrayRef<Bracket> Brackets) {
129 std::vector<unsigned> Stack;
130 for (unsigned I = 0; I < Brackets.size(); ++I) {
131 if (Brackets[I].Dir == Bracket::Open) {
132 Stack.push_back(x: I);
133 } else if (!Stack.empty() &&
134 Brackets[Stack.back()].Kind == Brackets[I].Kind) {
135 Brackets[Stack.back()].Pair = I;
136 Brackets[I].Pair = Stack.back();
137 Stack.pop_back();
138 } else {
139 // Unpaired closer, no brackets on stack are part of a perfect sequence.
140 Stack.clear();
141 }
142 }
143 // Any remaining brackets on the stack stay unpaired.
144}
145
146} // namespace
147
148void pairBrackets(TokenStream &Stream) {
149 auto Brackets = findBrackets(Stream);
150 simplePairBrackets(Brackets);
151 applyPairings(Brackets, Tokens&: Stream);
152}
153
154} // namespace pseudo
155} // namespace clang
156

source code of clang-tools-extra/pseudo/lib/Bracket.cpp