1 | //===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "clang-pseudo/grammar/Grammar.h" |
10 | #include "clang-pseudo/grammar/LRGraph.h" |
11 | #include "clang-pseudo/grammar/LRTable.h" |
12 | #include "clang/Basic/TokenKinds.h" |
13 | #include "llvm/ADT/STLExtras.h" |
14 | #include "llvm/ADT/SmallSet.h" |
15 | #include <cstdint> |
16 | |
17 | namespace clang { |
18 | namespace pseudo { |
19 | |
20 | LRTable LRTable::Builder::build() && { |
21 | assert(NumNonterminals != 0 && "Set NumNonterminals or init with grammar" ); |
22 | LRTable Table; |
23 | |
24 | // Count number of states: every state has to be reachable somehow. |
25 | StateID MaxState = 0; |
26 | for (const auto &Entry : StartStates) |
27 | MaxState = std::max(a: MaxState, b: Entry.second); |
28 | for (const auto &Entry : Transition) |
29 | MaxState = std::max(a: MaxState, b: Entry.second); |
30 | unsigned NumStates = MaxState + 1; |
31 | |
32 | Table.StartStates = std::move(StartStates); |
33 | |
34 | // Compile the goto and shift actions into transition tables. |
35 | llvm::DenseMap<unsigned, SymbolID> Gotos; |
36 | llvm::DenseMap<unsigned, SymbolID> Shifts; |
37 | for (const auto &E : Transition) { |
38 | if (isToken(ID: E.first.second)) |
39 | Shifts.try_emplace(Key: shiftIndex(State: E.first.first, Terminal: E.first.second, NumStates), |
40 | Args: E.second); |
41 | else |
42 | Gotos.try_emplace(Key: gotoIndex(State: E.first.first, Nonterminal: E.first.second, NumStates), |
43 | Args: E.second); |
44 | } |
45 | Table.Shifts = TransitionTable(Shifts, NumStates * NumTerminals); |
46 | Table.Gotos = TransitionTable(Gotos, NumStates * NumNonterminals); |
47 | |
48 | // Compile the follow sets into a bitmap. |
49 | Table.FollowSets.resize(N: tok::NUM_TOKENS * FollowSets.size()); |
50 | for (SymbolID NT = 0; NT < FollowSets.size(); ++NT) |
51 | for (SymbolID Follow : FollowSets[NT]) |
52 | Table.FollowSets.set(NT * tok::NUM_TOKENS + symbolToToken(SID: Follow)); |
53 | |
54 | // Store the reduce actions in a vector partitioned by state. |
55 | Table.ReduceOffset.reserve(n: NumStates + 1); |
56 | std::vector<RuleID> StateRules; |
57 | for (StateID S = 0; S < NumStates; ++S) { |
58 | Table.ReduceOffset.push_back(x: Table.Reduces.size()); |
59 | auto It = Reduce.find(Val: S); |
60 | if (It == Reduce.end()) |
61 | continue; |
62 | Table.Reduces.insert(position: Table.Reduces.end(), first: It->second.begin(), |
63 | last: It->second.end()); |
64 | llvm::sort(Start: Table.Reduces.begin() + Table.ReduceOffset.back(), |
65 | End: Table.Reduces.end()); |
66 | } |
67 | Table.ReduceOffset.push_back(x: Table.Reduces.size()); |
68 | |
69 | // Error recovery entries: sort (no dups already), and build offset lookup. |
70 | llvm::sort(C&: Recoveries, Comp: [&](const auto &L, const auto &R) { |
71 | return std::tie(L.first, L.second.Result, L.second.Strategy) < |
72 | std::tie(R.first, R.second.Result, R.second.Strategy); |
73 | }); |
74 | Table.Recoveries.reserve(n: Recoveries.size()); |
75 | for (const auto &R : Recoveries) |
76 | Table.Recoveries.push_back(x: {.Strategy: R.second.Strategy, .Result: R.second.Result}); |
77 | Table.RecoveryOffset = std::vector<uint32_t>(NumStates + 1, 0); |
78 | unsigned SortedIndex = 0; |
79 | for (StateID State = 0; State < NumStates; ++State) { |
80 | Table.RecoveryOffset[State] = SortedIndex; |
81 | while (SortedIndex < Recoveries.size() && |
82 | Recoveries[SortedIndex].first == State) |
83 | SortedIndex++; |
84 | } |
85 | Table.RecoveryOffset[NumStates] = SortedIndex; |
86 | assert(SortedIndex == Recoveries.size()); |
87 | |
88 | return Table; |
89 | } |
90 | |
91 | LRTable LRTable::buildSLR(const Grammar &G) { |
92 | auto Graph = LRGraph::buildLR0(G); |
93 | Builder Build(G); |
94 | Build.StartStates = Graph.startStates(); |
95 | for (const auto &T : Graph.edges()) |
96 | Build.Transition.try_emplace(Key: {T.Src, T.Label}, Args: T.Dst); |
97 | for (const auto &Entry : Graph.recoveries()) |
98 | Build.Recoveries.push_back( |
99 | x: {Entry.Src, Recovery{.Strategy: Entry.Strategy, .Result: Entry.Result}}); |
100 | Build.FollowSets = followSets(G); |
101 | assert(Graph.states().size() <= (1 << StateBits) && |
102 | "Graph states execceds the maximum limit!" ); |
103 | // Add reduce actions. |
104 | for (StateID SID = 0; SID < Graph.states().size(); ++SID) { |
105 | for (const Item &I : Graph.states()[SID].Items) { |
106 | // If we've just parsed the start symbol, this means we successfully parse |
107 | // the input. We don't add the reduce action of `_ := start_symbol` in the |
108 | // LRTable (the GLR parser handles it specifically). |
109 | if (G.lookupRule(RID: I.rule()).Target == G.underscore() && !I.hasNext()) |
110 | continue; |
111 | if (!I.hasNext()) |
112 | // If we've reached the end of a rule A := ..., then we can reduce if |
113 | // the next token is in the follow set of A. |
114 | Build.Reduce[SID].insert(V: I.rule()); |
115 | } |
116 | } |
117 | return std::move(Build).build(); |
118 | } |
119 | |
120 | } // namespace pseudo |
121 | } // namespace clang |
122 | |