1 | //===--- GrammarTest.cpp - grammar tests -----------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang-pseudo/grammar/Grammar.h" |
10 | #include "gmock/gmock.h" |
11 | #include "gtest/gtest.h" |
12 | #include <memory> |
13 | |
14 | namespace clang { |
15 | namespace pseudo { |
16 | namespace { |
17 | |
18 | using testing::AllOf; |
19 | using testing::ElementsAre; |
20 | using testing::IsEmpty; |
21 | using testing::Pair; |
22 | using testing::UnorderedElementsAre; |
23 | |
24 | MATCHER_P(TargetID, SID, "" ) { return arg.Target == SID; } |
25 | template <typename... T> testing::Matcher<const Rule &> Sequence(T... IDs) { |
26 | return testing::Property(&Rule::seq, ElementsAre(IDs...)); |
27 | } |
28 | |
29 | class GrammarTest : public ::testing::Test { |
30 | public: |
31 | void build(llvm::StringRef BNF) { |
32 | Diags.clear(); |
33 | G = Grammar::parseBNF(BNF, Diags); |
34 | } |
35 | |
36 | SymbolID id(llvm::StringRef Name) const { |
37 | for (unsigned I = 0; I < NumTerminals; ++I) |
38 | if (G.table().Terminals[I] == Name) |
39 | return tokenSymbol(TK: static_cast<tok::TokenKind>(I)); |
40 | for (SymbolID ID = 0; ID < G.table().Nonterminals.size(); ++ID) |
41 | if (G.table().Nonterminals[ID].Name == Name) |
42 | return ID; |
43 | ADD_FAILURE() << "No such symbol found: " << Name; |
44 | return 0; |
45 | } |
46 | |
47 | RuleID ruleFor(llvm::StringRef NonterminalName) const { |
48 | auto RuleRange = G.table().Nonterminals[id(Name: NonterminalName)].RuleRange; |
49 | if (RuleRange.End - RuleRange.Start == 1) |
50 | return G.table().Nonterminals[id(Name: NonterminalName)].RuleRange.Start; |
51 | ADD_FAILURE() << "Expected a single rule for " << NonterminalName |
52 | << ", but it has " << RuleRange.End - RuleRange.Start |
53 | << " rule!\n" ; |
54 | return 0; |
55 | } |
56 | |
57 | protected: |
58 | Grammar G; |
59 | std::vector<std::string> Diags; |
60 | }; |
61 | |
62 | TEST_F(GrammarTest, Basic) { |
63 | build(BNF: "_ := IDENTIFIER + _ # comment" ); |
64 | EXPECT_THAT(Diags, IsEmpty()); |
65 | |
66 | auto ExpectedRule = |
67 | AllOf(matchers: TargetID(gmock_p0: id(Name: "_" )), matchers: Sequence(IDs: id(Name: "IDENTIFIER" ), IDs: id(Name: "+" ), IDs: id(Name: "_" ))); |
68 | EXPECT_EQ(G.symbolName(id("_" )), "_" ); |
69 | EXPECT_THAT(G.rulesFor(id("_" )), UnorderedElementsAre(ExpectedRule)); |
70 | const auto &Rule = G.lookupRule(/*RID=*/0); |
71 | EXPECT_THAT(Rule, ExpectedRule); |
72 | EXPECT_THAT(G.symbolName(Rule.seq()[0]), "IDENTIFIER" ); |
73 | EXPECT_THAT(G.symbolName(Rule.seq()[1]), "+" ); |
74 | EXPECT_THAT(G.symbolName(Rule.seq()[2]), "_" ); |
75 | } |
76 | |
77 | TEST_F(GrammarTest, EliminatedOptional) { |
78 | build(BNF: "_ := CONST_opt INT ;_opt" ); |
79 | EXPECT_THAT(Diags, IsEmpty()); |
80 | EXPECT_THAT(G.table().Rules, |
81 | UnorderedElementsAre(Sequence(id("INT" )), |
82 | Sequence(id("CONST" ), id("INT" )), |
83 | Sequence(id("CONST" ), id("INT" ), id(";" )), |
84 | Sequence(id("INT" ), id(";" )))); |
85 | } |
86 | |
87 | TEST_F(GrammarTest, RuleIDSorted) { |
88 | build(BNF: R"bnf( |
89 | _ := x |
90 | |
91 | x := y |
92 | y := z |
93 | z := IDENTIFIER |
94 | )bnf" ); |
95 | ASSERT_TRUE(Diags.empty()); |
96 | |
97 | EXPECT_LT(ruleFor("z" ), ruleFor("y" )); |
98 | EXPECT_LT(ruleFor("y" ), ruleFor("x" )); |
99 | EXPECT_LT(ruleFor("x" ), ruleFor("_" )); |
100 | } |
101 | |
102 | TEST_F(GrammarTest, Annotation) { |
103 | build(BNF: R"bnf( |
104 | _ := x |
105 | x := IDENTIFIER [guard] |
106 | )bnf" ); |
107 | ASSERT_THAT(Diags, IsEmpty()); |
108 | EXPECT_FALSE(G.lookupRule(ruleFor("_" )).Guarded); |
109 | EXPECT_TRUE(G.lookupRule(ruleFor("x" )).Guarded); |
110 | } |
111 | |
112 | TEST_F(GrammarTest, Diagnostics) { |
113 | build(BNF: R"cpp( |
114 | _ := ,_opt |
115 | _ := undefined-sym |
116 | null := |
117 | _ := IDENFIFIE # a typo of the terminal IDENFITIER |
118 | |
119 | invalid |
120 | # cycle |
121 | a := b |
122 | b := a |
123 | |
124 | _ := IDENTIFIER [unknown=value] |
125 | )cpp" ); |
126 | |
127 | EXPECT_EQ(G.underscore(), id("_" )); |
128 | EXPECT_THAT(Diags, UnorderedElementsAre( |
129 | "Rule '_ := ,_opt' has a nullable RHS" , |
130 | "Rule 'null := ' has a nullable RHS" , |
131 | "No rules for nonterminal: undefined-sym" , |
132 | "Failed to parse 'invalid': no separator :=" , |
133 | "Token-like name IDENFIFIE is used as a nonterminal" , |
134 | "No rules for nonterminal: IDENFIFIE" , |
135 | "The grammar contains a cycle involving symbol a" , |
136 | "Unknown attribute 'unknown'" )); |
137 | } |
138 | |
139 | TEST_F(GrammarTest, DuplicatedDiagnostics) { |
140 | build(BNF: R"cpp( |
141 | _ := test |
142 | |
143 | test := INT |
144 | test := DOUBLE |
145 | test := INT |
146 | )cpp" ); |
147 | |
148 | EXPECT_THAT(Diags, UnorderedElementsAre("Duplicate rule: `test := INT`" )); |
149 | } |
150 | |
151 | TEST_F(GrammarTest, FirstAndFollowSets) { |
152 | build( |
153 | BNF: R"bnf( |
154 | _ := expr |
155 | expr := expr - term |
156 | expr := term |
157 | term := IDENTIFIER |
158 | term := ( expr ) |
159 | )bnf" ); |
160 | ASSERT_TRUE(Diags.empty()); |
161 | auto ToPairs = [](std::vector<llvm::DenseSet<SymbolID>> Input) { |
162 | std::vector<std::pair<SymbolID, llvm::DenseSet<SymbolID>>> Sets; |
163 | for (SymbolID ID = 0; ID < Input.size(); ++ID) |
164 | Sets.emplace_back(args&: ID, args: std::move(Input[ID])); |
165 | return Sets; |
166 | }; |
167 | |
168 | EXPECT_THAT( |
169 | ToPairs(firstSets(G)), |
170 | UnorderedElementsAre( |
171 | Pair(id("_" ), UnorderedElementsAre(id("IDENTIFIER" ), id("(" ))), |
172 | Pair(id("expr" ), UnorderedElementsAre(id("IDENTIFIER" ), id("(" ))), |
173 | Pair(id("term" ), UnorderedElementsAre(id("IDENTIFIER" ), id("(" ))))); |
174 | EXPECT_THAT( |
175 | ToPairs(followSets(G)), |
176 | UnorderedElementsAre( |
177 | Pair(id("_" ), UnorderedElementsAre(id("EOF" ))), |
178 | Pair(id("expr" ), UnorderedElementsAre(id("-" ), id("EOF" ), id(")" ))), |
179 | Pair(id("term" ), UnorderedElementsAre(id("-" ), id("EOF" ), id(")" ))))); |
180 | |
181 | build(BNF: R"bnf( |
182 | # A simplfied C++ decl-specifier-seq. |
183 | _ := decl-specifier-seq |
184 | decl-specifier-seq := decl-specifier decl-specifier-seq |
185 | decl-specifier-seq := decl-specifier |
186 | decl-specifier := simple-type-specifier |
187 | decl-specifier := INLINE |
188 | simple-type-specifier := INT |
189 | )bnf" ); |
190 | ASSERT_TRUE(Diags.empty()); |
191 | EXPECT_THAT( |
192 | ToPairs(firstSets(G)), |
193 | UnorderedElementsAre( |
194 | Pair(id("_" ), UnorderedElementsAre(id("INLINE" ), id("INT" ))), |
195 | Pair(id("decl-specifier-seq" ), |
196 | UnorderedElementsAre(id("INLINE" ), id("INT" ))), |
197 | Pair(id("simple-type-specifier" ), UnorderedElementsAre(id("INT" ))), |
198 | Pair(id("decl-specifier" ), |
199 | UnorderedElementsAre(id("INLINE" ), id("INT" ))))); |
200 | EXPECT_THAT( |
201 | ToPairs(followSets(G)), |
202 | UnorderedElementsAre( |
203 | Pair(id("_" ), UnorderedElementsAre(id("EOF" ))), |
204 | Pair(id("decl-specifier-seq" ), UnorderedElementsAre(id("EOF" ))), |
205 | Pair(id("decl-specifier" ), |
206 | UnorderedElementsAre(id("INLINE" ), id("INT" ), id("EOF" ))), |
207 | Pair(id("simple-type-specifier" ), |
208 | UnorderedElementsAre(id("INLINE" ), id("INT" ), id("EOF" ))))); |
209 | } |
210 | |
211 | } // namespace |
212 | } // namespace pseudo |
213 | } // namespace clang |
214 | |