1//===--- ForestTest.cpp - Test Forest dump ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/Forest.h"
10#include "clang-pseudo/Token.h"
11#include "clang/Basic/LangOptions.h"
12#include "llvm/ADT/StringRef.h"
13#include "gmock/gmock.h"
14#include "gtest/gtest.h"
15#include <vector>
16
17namespace clang {
18namespace pseudo {
19namespace {
20
21// FIXME: extract to a TestGrammar class to allow code sharing among tests.
22class ForestTest : public ::testing::Test {
23public:
24 void build(llvm::StringRef BNF) {
25 Diags.clear();
26 G = Grammar::parseBNF(BNF, Diags);
27 }
28
29 SymbolID symbol(llvm::StringRef Name) const {
30 for (unsigned I = 0; I < NumTerminals; ++I)
31 if (G.table().Terminals[I] == Name)
32 return tokenSymbol(TK: static_cast<tok::TokenKind>(I));
33 for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID)
34 if (G.table().Nonterminals[ID].Name == Name)
35 return ID;
36 ADD_FAILURE() << "No such symbol found: " << Name;
37 return 0;
38 }
39
40 RuleID ruleFor(llvm::StringRef NonterminalName) const {
41 auto RuleRange = G.table().Nonterminals[symbol(Name: NonterminalName)].RuleRange;
42 if (RuleRange.End - RuleRange.Start == 1)
43 return G.table().Nonterminals[symbol(Name: NonterminalName)].RuleRange.Start;
44 ADD_FAILURE() << "Expected a single rule for " << NonterminalName
45 << ", but it has " << RuleRange.End - RuleRange.Start
46 << " rule!\n";
47 return 0;
48 }
49
50protected:
51 Grammar G;
52 std::vector<std::string> Diags;
53};
54
55TEST_F(ForestTest, DumpBasic) {
56 build(BNF: R"cpp(
57 _ := add-expression EOF
58 add-expression := id-expression + id-expression
59 id-expression := IDENTIFIER
60 )cpp");
61 ASSERT_TRUE(Diags.empty());
62 ForestArena Arena;
63 const auto &TS =
64 cook(lex("a + b", clang::LangOptions()), clang::LangOptions());
65
66 auto T = Arena.createTerminals(Code: TS);
67 ASSERT_EQ(T.size(), 4u);
68 const auto *Left = &Arena.createSequence(
69 SID: symbol(Name: "id-expression"), RID: ruleFor(NonterminalName: "id-expression"), Elements: {&T.front()});
70 const auto *Right = &Arena.createSequence(SID: symbol(Name: "id-expression"),
71 RID: ruleFor(NonterminalName: "id-expression"), Elements: {&T[2]});
72
73 const auto *Add =
74 &Arena.createSequence(SID: symbol(Name: "add-expression"), RID: ruleFor(NonterminalName: "add-expression"),
75 Elements: {Left, &T[1], Right});
76 EXPECT_EQ(Add->dumpRecursive(G, true),
77 "[ 0, end) add-expression := id-expression + id-expression\n"
78 "[ 0, 1) ├─id-expression~IDENTIFIER := tok[0]\n"
79 "[ 1, 2) ├─+ := tok[1]\n"
80 "[ 2, end) └─id-expression~IDENTIFIER := tok[2]\n");
81 EXPECT_EQ(Add->dumpRecursive(G, false),
82 "[ 0, end) add-expression := id-expression + id-expression\n"
83 "[ 0, 1) ├─id-expression := IDENTIFIER\n"
84 "[ 0, 1) │ └─IDENTIFIER := tok[0]\n"
85 "[ 1, 2) ├─+ := tok[1]\n"
86 "[ 2, end) └─id-expression := IDENTIFIER\n"
87 "[ 2, end) └─IDENTIFIER := tok[2]\n");
88}
89
90TEST_F(ForestTest, DumpAmbiguousAndRefs) {
91 build(BNF: R"cpp(
92 _ := type EOF
93 type := class-type # rule 4
94 type := enum-type # rule 5
95 class-type := shared-type
96 enum-type := shared-type
97 shared-type := IDENTIFIER)cpp");
98 ASSERT_TRUE(Diags.empty());
99 ForestArena Arena;
100 const auto &TS = cook(lex("abc", clang::LangOptions()), clang::LangOptions());
101
102 auto Terminals = Arena.createTerminals(Code: TS);
103 ASSERT_EQ(Terminals.size(), 2u);
104
105 const auto *SharedType = &Arena.createSequence(
106 SID: symbol(Name: "shared-type"), RID: ruleFor(NonterminalName: "shared-type"), Elements: {Terminals.begin()});
107 const auto *ClassType = &Arena.createSequence(
108 SID: symbol(Name: "class-type"), RID: ruleFor(NonterminalName: "class-type"), Elements: {SharedType});
109 const auto *EnumType = &Arena.createSequence(
110 SID: symbol(Name: "enum-type"), RID: ruleFor(NonterminalName: "enum-type"), Elements: {SharedType});
111 const auto *Alternative1 =
112 &Arena.createSequence(SID: symbol(Name: "type"), /*RuleID=*/RID: 4, Elements: {ClassType});
113 const auto *Alternative2 =
114 &Arena.createSequence(SID: symbol(Name: "type"), /*RuleID=*/RID: 5, Elements: {EnumType});
115 const auto *Type =
116 &Arena.createAmbiguous(SID: symbol(Name: "type"), Alternatives: {Alternative1, Alternative2});
117 EXPECT_EQ(Type->dumpRecursive(G),
118 "[ 0, end) type := <ambiguous>\n"
119 "[ 0, end) ├─type := class-type\n"
120 "[ 0, end) │ └─class-type := shared-type\n"
121 "[ 0, end) │ └─shared-type := IDENTIFIER #1\n"
122 "[ 0, end) │ └─IDENTIFIER := tok[0]\n"
123 "[ 0, end) └─type := enum-type\n"
124 "[ 0, end) └─enum-type := shared-type\n"
125 "[ 0, end) └─shared-type =#1\n");
126}
127
128TEST_F(ForestTest, DumpAbbreviatedShared) {
129 build(BNF: R"cpp(
130 _ := A
131 A := B
132 B := *
133 )cpp");
134
135 ForestArena Arena;
136 const auto *Star = &Arena.createTerminal(TK: tok::star, Start: 0);
137
138 const auto *B = &Arena.createSequence(SID: symbol(Name: "B"), RID: ruleFor(NonterminalName: "B"), Elements: {Star});
139 // We have two identical (but distinct) A nodes.
140 // The GLR parser would never produce this, but it makes the example simpler.
141 const auto *A1 = &Arena.createSequence(SID: symbol(Name: "A"), RID: ruleFor(NonterminalName: "A"), Elements: {B});
142 const auto *A2 = &Arena.createSequence(SID: symbol(Name: "A"), RID: ruleFor(NonterminalName: "A"), Elements: {B});
143 const auto *A = &Arena.createAmbiguous(SID: symbol(Name: "A"), Alternatives: {A1, A2});
144
145 // We must not abbreviate away shared nodes: if we show A~* there's no way to
146 // show that the intermediate B node is shared between A1 and A2.
147 EXPECT_EQ(A->dumpRecursive(G, /*Abbreviate=*/true),
148 "[ 0, end) A := <ambiguous>\n"
149 "[ 0, end) ├─A~B := * #1\n"
150 "[ 0, end) │ └─* := tok[0]\n"
151 "[ 0, end) └─A~B =#1\n");
152}
153
154TEST_F(ForestTest, Iteration) {
155 // Z
156 // / \
157 // X Y
158 // |\|
159 // A B
160 ForestArena Arena;
161 const auto *A = &Arena.createTerminal(TK: tok::identifier, Start: 0);
162 const auto *B = &Arena.createOpaque(SID: 1, Start: 0);
163 const auto *X = &Arena.createSequence(SID: 2, RID: 1, Elements: {A, B});
164 const auto *Y = &Arena.createSequence(SID: 2, RID: 2, Elements: {B});
165 const auto *Z = &Arena.createAmbiguous(SID: 2, Alternatives: {X, Y});
166
167 std::vector<const ForestNode *> Nodes;
168 for (const ForestNode &N : Z->descendants())
169 Nodes.push_back(x: &N);
170 EXPECT_THAT(Nodes, testing::UnorderedElementsAre(A, B, X, Y, Z));
171
172 Nodes.clear();
173 for (const ForestNode &N : X->descendants())
174 Nodes.push_back(x: &N);
175 EXPECT_THAT(Nodes, testing::UnorderedElementsAre(X, A, B));
176}
177
178} // namespace
179} // namespace pseudo
180} // namespace clang
181

source code of clang-tools-extra/pseudo/unittests/ForestTest.cpp