1 | //===--- ForestTest.cpp - Test Forest dump ----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang-pseudo/Forest.h" |
10 | #include "clang-pseudo/Token.h" |
11 | #include "clang/Basic/LangOptions.h" |
12 | #include "llvm/ADT/StringRef.h" |
13 | #include "gmock/gmock.h" |
14 | #include "gtest/gtest.h" |
15 | #include <vector> |
16 | |
17 | namespace clang { |
18 | namespace pseudo { |
19 | namespace { |
20 | |
21 | // FIXME: extract to a TestGrammar class to allow code sharing among tests. |
22 | class ForestTest : public ::testing::Test { |
23 | public: |
24 | void build(llvm::StringRef BNF) { |
25 | Diags.clear(); |
26 | G = Grammar::parseBNF(BNF, Diags); |
27 | } |
28 | |
29 | SymbolID symbol(llvm::StringRef Name) const { |
30 | for (unsigned I = 0; I < NumTerminals; ++I) |
31 | if (G.table().Terminals[I] == Name) |
32 | return tokenSymbol(TK: static_cast<tok::TokenKind>(I)); |
33 | for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID) |
34 | if (G.table().Nonterminals[ID].Name == Name) |
35 | return ID; |
36 | ADD_FAILURE() << "No such symbol found: " << Name; |
37 | return 0; |
38 | } |
39 | |
40 | RuleID ruleFor(llvm::StringRef NonterminalName) const { |
41 | auto RuleRange = G.table().Nonterminals[symbol(Name: NonterminalName)].RuleRange; |
42 | if (RuleRange.End - RuleRange.Start == 1) |
43 | return G.table().Nonterminals[symbol(Name: NonterminalName)].RuleRange.Start; |
44 | ADD_FAILURE() << "Expected a single rule for " << NonterminalName |
45 | << ", but it has " << RuleRange.End - RuleRange.Start |
46 | << " rule!\n" ; |
47 | return 0; |
48 | } |
49 | |
50 | protected: |
51 | Grammar G; |
52 | std::vector<std::string> Diags; |
53 | }; |
54 | |
55 | TEST_F(ForestTest, DumpBasic) { |
56 | build(BNF: R"cpp( |
57 | _ := add-expression EOF |
58 | add-expression := id-expression + id-expression |
59 | id-expression := IDENTIFIER |
60 | )cpp" ); |
61 | ASSERT_TRUE(Diags.empty()); |
62 | ForestArena Arena; |
63 | const auto &TS = |
64 | cook(lex("a + b" , clang::LangOptions()), clang::LangOptions()); |
65 | |
66 | auto T = Arena.createTerminals(Code: TS); |
67 | ASSERT_EQ(T.size(), 4u); |
68 | const auto *Left = &Arena.createSequence( |
69 | SID: symbol(Name: "id-expression" ), RID: ruleFor(NonterminalName: "id-expression" ), Elements: {&T.front()}); |
70 | const auto *Right = &Arena.createSequence(SID: symbol(Name: "id-expression" ), |
71 | RID: ruleFor(NonterminalName: "id-expression" ), Elements: {&T[2]}); |
72 | |
73 | const auto *Add = |
74 | &Arena.createSequence(SID: symbol(Name: "add-expression" ), RID: ruleFor(NonterminalName: "add-expression" ), |
75 | Elements: {Left, &T[1], Right}); |
76 | EXPECT_EQ(Add->dumpRecursive(G, true), |
77 | "[ 0, end) add-expression := id-expression + id-expression\n" |
78 | "[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n" |
79 | "[ 1, 2) ├─+ := tok[1]\n" |
80 | "[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n" ); |
81 | EXPECT_EQ(Add->dumpRecursive(G, false), |
82 | "[ 0, end) add-expression := id-expression + id-expression\n" |
83 | "[ 0, 1) ├─id-expression := IDENTIFIER\n" |
84 | "[ 0, 1) │ └─IDENTIFIER := tok[0]\n" |
85 | "[ 1, 2) ├─+ := tok[1]\n" |
86 | "[ 2, end) └─id-expression := IDENTIFIER\n" |
87 | "[ 2, end) └─IDENTIFIER := tok[2]\n" ); |
88 | } |
89 | |
90 | TEST_F(ForestTest, DumpAmbiguousAndRefs) { |
91 | build(BNF: R"cpp( |
92 | _ := type EOF |
93 | type := class-type # rule 4 |
94 | type := enum-type # rule 5 |
95 | class-type := shared-type |
96 | enum-type := shared-type |
97 | shared-type := IDENTIFIER)cpp" ); |
98 | ASSERT_TRUE(Diags.empty()); |
99 | ForestArena Arena; |
100 | const auto &TS = cook(lex("abc" , clang::LangOptions()), clang::LangOptions()); |
101 | |
102 | auto Terminals = Arena.createTerminals(Code: TS); |
103 | ASSERT_EQ(Terminals.size(), 2u); |
104 | |
105 | const auto *SharedType = &Arena.createSequence( |
106 | SID: symbol(Name: "shared-type" ), RID: ruleFor(NonterminalName: "shared-type" ), Elements: {Terminals.begin()}); |
107 | const auto *ClassType = &Arena.createSequence( |
108 | SID: symbol(Name: "class-type" ), RID: ruleFor(NonterminalName: "class-type" ), Elements: {SharedType}); |
109 | const auto *EnumType = &Arena.createSequence( |
110 | SID: symbol(Name: "enum-type" ), RID: ruleFor(NonterminalName: "enum-type" ), Elements: {SharedType}); |
111 | const auto *Alternative1 = |
112 | &Arena.createSequence(SID: symbol(Name: "type" ), /*RuleID=*/RID: 4, Elements: {ClassType}); |
113 | const auto *Alternative2 = |
114 | &Arena.createSequence(SID: symbol(Name: "type" ), /*RuleID=*/RID: 5, Elements: {EnumType}); |
115 | const auto *Type = |
116 | &Arena.createAmbiguous(SID: symbol(Name: "type" ), Alternatives: {Alternative1, Alternative2}); |
117 | EXPECT_EQ(Type->dumpRecursive(G), |
118 | "[ 0, end) type := <ambiguous>\n" |
119 | "[ 0, end) ├─type := class-type\n" |
120 | "[ 0, end) │ └─class-type := shared-type\n" |
121 | "[ 0, end) │ └─shared-type := IDENTIFIER #1\n" |
122 | "[ 0, end) │ └─IDENTIFIER := tok[0]\n" |
123 | "[ 0, end) └─type := enum-type\n" |
124 | "[ 0, end) └─enum-type := shared-type\n" |
125 | "[ 0, end) └─shared-type =#1\n" ); |
126 | } |
127 | |
128 | TEST_F(ForestTest, DumpAbbreviatedShared) { |
129 | build(BNF: R"cpp( |
130 | _ := A |
131 | A := B |
132 | B := * |
133 | )cpp" ); |
134 | |
135 | ForestArena Arena; |
136 | const auto *Star = &Arena.createTerminal(TK: tok::star, Start: 0); |
137 | |
138 | const auto *B = &Arena.createSequence(SID: symbol(Name: "B" ), RID: ruleFor(NonterminalName: "B" ), Elements: {Star}); |
139 | // We have two identical (but distinct) A nodes. |
140 | // The GLR parser would never produce this, but it makes the example simpler. |
141 | const auto *A1 = &Arena.createSequence(SID: symbol(Name: "A" ), RID: ruleFor(NonterminalName: "A" ), Elements: {B}); |
142 | const auto *A2 = &Arena.createSequence(SID: symbol(Name: "A" ), RID: ruleFor(NonterminalName: "A" ), Elements: {B}); |
143 | const auto *A = &Arena.createAmbiguous(SID: symbol(Name: "A" ), Alternatives: {A1, A2}); |
144 | |
145 | // We must not abbreviate away shared nodes: if we show A~* there's no way to |
146 | // show that the intermediate B node is shared between A1 and A2. |
147 | EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true), |
148 | "[ 0, end) A := <ambiguous>\n" |
149 | "[ 0, end) ├─A~B := * #1\n" |
150 | "[ 0, end) │ └─* := tok[0]\n" |
151 | "[ 0, end) └─A~B =#1\n" ); |
152 | } |
153 | |
154 | TEST_F(ForestTest, Iteration) { |
155 | // Z |
156 | // / \ |
157 | // X Y |
158 | // |\| |
159 | // A B |
160 | ForestArena Arena; |
161 | const auto *A = &Arena.createTerminal(TK: tok::identifier, Start: 0); |
162 | const auto *B = &Arena.createOpaque(SID: 1, Start: 0); |
163 | const auto *X = &Arena.createSequence(SID: 2, RID: 1, Elements: {A, B}); |
164 | const auto *Y = &Arena.createSequence(SID: 2, RID: 2, Elements: {B}); |
165 | const auto *Z = &Arena.createAmbiguous(SID: 2, Alternatives: {X, Y}); |
166 | |
167 | std::vector<const ForestNode *> Nodes; |
168 | for (const ForestNode &N : Z->descendants()) |
169 | Nodes.push_back(x: &N); |
170 | EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z)); |
171 | |
172 | Nodes.clear(); |
173 | for (const ForestNode &N : X->descendants()) |
174 | Nodes.push_back(x: &N); |
175 | EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B)); |
176 | } |
177 | |
178 | } // namespace |
179 | } // namespace pseudo |
180 | } // namespace clang |
181 | |