1//===--- LRGraph.cpp - -------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang-pseudo/grammar/LRGraph.h"
10#include "clang-pseudo/grammar/Grammar.h"
11#include "llvm/ADT/DenseSet.h"
12#include "llvm/ADT/Hashing.h"
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/StringExtras.h"
15#include "llvm/Support/FormatVariadic.h"
16#include "llvm/Support/raw_ostream.h"
17
18using ItemSet = std::vector<clang::pseudo::Item>;
19
20namespace llvm {
21// Support clang::pseudo::Item as DenseMap keys.
22template <> struct DenseMapInfo<ItemSet> {
23 static inline ItemSet getEmptyKey() {
24 return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()};
25 }
26 static inline ItemSet getTombstoneKey() {
27 return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()};
28 }
29 static unsigned getHashValue(const ItemSet &I) {
30 return llvm::hash_combine_range(first: I.begin(), last: I.end());
31 }
32 static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) {
33 return LHS == RHS;
34 }
35};
36} // namespace llvm
37
38namespace clang {
39namespace pseudo {
40namespace {
41
42struct SortByNextSymbol {
43 SortByNextSymbol(const Grammar &G) : G(G) {}
44 bool operator()(const Item &L, const Item &R) {
45 if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G))
46 return L.next(G) < R.next(G);
47 if (L.hasNext() != R.hasNext())
48 return L.hasNext() < R.hasNext(); // a trailing dot is minimal.
49 return L < R;
50 }
51 const Grammar &G;
52};
53
54// Computes a closure of the given item set S:
55// - extends the given S to contain all options for parsing next token;
56// - nonterminals after a dot are recursively expanded into the begin-state
57// of all production rules that produce that nonterminal;
58//
59// Given
60// Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ]
61// Input = [ E := . T ]
62// returns [ E := . T, T := . n, T := . ( E ) ]
63State closure(ItemSet Queue, const Grammar &G) {
64 llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()};
65 // We reuse the passed-by-value Queue as the final result, as it's already
66 // initialized to the right elements.
67 size_t ItIndex = 0;
68 while (ItIndex < Queue.size()) {
69 const Item &ExpandingItem = Queue[ItIndex];
70 ++ItIndex;
71 if (!ExpandingItem.hasNext())
72 continue;
73
74 SymbolID NextSym = ExpandingItem.next(G);
75 if (pseudo::isToken(ID: NextSym))
76 continue;
77 auto RRange = G.table().Nonterminals[NextSym].RuleRange;
78 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
79 Item NewItem = Item::start(ID: RID, G);
80 if (InQueue.insert(V: NewItem).second) // new
81 Queue.push_back(x: std::move(NewItem));
82 }
83 }
84 Queue.shrink_to_fit();
85 llvm::sort(C&: Queue, Comp: SortByNextSymbol(G));
86 return {.Items: std::move(Queue)};
87}
88
89// Returns all next (with a dot advanced) kernel item sets, partitioned by the
90// advanced symbol.
91//
92// Given
93// S = [ E := . a b, E := E . - T ]
94// returns [
95// {id(a), [ E := a . b ]},
96// {id(-), [ E := E - . T ]}
97// ]
98std::vector<std::pair<SymbolID, ItemSet>>
99nextAvailableKernelItems(const State &S, const Grammar &G) {
100 std::vector<std::pair<SymbolID, ItemSet>> Results;
101 llvm::ArrayRef<Item> AllItems = S.Items;
102 AllItems = AllItems.drop_while(Pred: [](const Item &I) { return !I.hasNext(); });
103 while (!AllItems.empty()) {
104 SymbolID AdvancedSymbol = AllItems.front().next(G);
105 auto Batch = AllItems.take_while(Pred: [AdvancedSymbol, &G](const Item &I) {
106 assert(I.hasNext());
107 return I.next(G) == AdvancedSymbol;
108 });
109 assert(!Batch.empty());
110 AllItems = AllItems.drop_front(N: Batch.size());
111
112 // Advance a dot over the Symbol.
113 ItemSet Next;
114 for (const Item &I : Batch)
115 Next.push_back(x: I.advance());
116 // sort the set to keep order determinism for hash computation.
117 llvm::sort(C&: Next);
118 Results.push_back(x: {AdvancedSymbol, std::move(Next)});
119 }
120 return Results;
121}
122
123std::vector<std::pair<ExtensionID, SymbolID>>
124availableRecovery(const State &S, const Grammar &G) {
125 std::vector<std::pair<ExtensionID, SymbolID>> Result;
126 for (const Item &I : S.Items) {
127 const auto &Rule = G.lookupRule(RID: I.rule());
128 if (I.dot() != Rule.RecoveryIndex)
129 continue;
130 Result.push_back(x: {Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]});
131 }
132 llvm::sort(C&: Result);
133 Result.erase(first: std::unique(first: Result.begin(), last: Result.end()), last: Result.end());
134 return Result;
135}
136
137} // namespace
138
139std::string Item::dump(const Grammar &G) const {
140 const auto &Rule = G.lookupRule(RID);
141 auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) {
142 std::vector<llvm::StringRef> Results;
143 for (auto SID : Syms)
144 Results.push_back(x: G.symbolName(SID));
145 return Results;
146 };
147 return llvm::formatv(Fmt: "{0} := {1} • {2}{3}", Vals: G.symbolName(Rule.Target),
148 Vals: llvm::join(R: ToNames(Rule.seq().take_front(N: DotPos)), Separator: " "),
149 Vals: llvm::join(R: ToNames(Rule.seq().drop_front(N: DotPos)), Separator: " "),
150 Vals: Rule.RecoveryIndex == DotPos ? " [recovery]" : "")
151 .str();
152}
153
154std::string State::dump(const Grammar &G, unsigned Indent) const {
155 std::string Result;
156 llvm::raw_string_ostream OS(Result);
157 for (const auto &Item : Items)
158 OS.indent(NumSpaces: Indent) << llvm::formatv(Fmt: "{0}\n", Vals: Item.dump(G));
159 return OS.str();
160}
161
162std::string LRGraph::dumpForTests(const Grammar &G) const {
163 std::string Result;
164 llvm::raw_string_ostream OS(Result);
165 OS << "States:\n";
166 for (StateID ID = 0; ID < States.size(); ++ID) {
167 OS << llvm::formatv(Fmt: "State {0}\n", Vals&: ID);
168 OS << States[ID].dump(G, /*Indent*/ 4);
169 }
170 for (const auto &E : Edges) {
171 OS << llvm::formatv(Fmt: "{0} ->[{1}] {2}\n", Vals: E.Src, Vals: G.symbolName(E.Label),
172 Vals: E.Dst);
173 }
174 return OS.str();
175}
176
177LRGraph LRGraph::buildLR0(const Grammar &G) {
178 class Builder {
179 public:
180 Builder(const Grammar &G) : G(G) {}
181
182 // Adds a given state if not existed.
183 std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) {
184 assert(llvm::is_sorted(KernelItems) &&
185 "Item must be sorted before inserting to a hash map!");
186 auto It = StatesIndex.find(Val: KernelItems);
187 if (It != StatesIndex.end())
188 return {It->second, false};
189 States.push_back(x: closure(Queue: KernelItems, G));
190 StateID NextStateID = States.size() - 1;
191 StatesIndex.insert(KV: {std::move(KernelItems), NextStateID});
192 return {NextStateID, true};
193 }
194
195 void insertEdge(StateID Src, StateID Dst, SymbolID Label) {
196 Edges.push_back(x: {.Src: Src, .Dst: Dst, .Label: Label});
197 }
198
199 void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) {
200 Recoveries.push_back(x: {.Src: Src, .Strategy: Strategy, .Result: Result});
201 }
202
203 // Returns a state with the given id.
204 const State &find(StateID ID) const {
205 assert(ID < States.size());
206 return States[ID];
207 }
208
209 void addStartState(SymbolID Sym, StateID State) {
210 StartStates.push_back(x: {Sym, State});
211 }
212
213 LRGraph build() && {
214 States.shrink_to_fit();
215 Edges.shrink_to_fit();
216 Recoveries.shrink_to_fit();
217 llvm::sort(C&: StartStates);
218 StartStates.shrink_to_fit();
219 return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries),
220 std::move(StartStates));
221 }
222
223 private:
224 // Key is the **kernel** item sets.
225 llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex;
226 std::vector<State> States;
227 std::vector<Edge> Edges;
228 std::vector<Recovery> Recoveries;
229 const Grammar &G;
230 std::vector<std::pair<SymbolID, StateID>> StartStates;
231 } Builder(G);
232
233 std::vector<StateID> PendingStates;
234 // Initialize states with the start symbol.
235 auto RRange = G.table().Nonterminals[G.underscore()].RuleRange;
236 for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
237 auto StartState = std::vector<Item>{Item::start(ID: RID, G)};
238 auto Result = Builder.insert(KernelItems: std::move(StartState));
239 assert(Result.second && "State must be new");
240 PendingStates.push_back(x: Result.first);
241
242 const Rule &StartRule = G.lookupRule(RID);
243 assert(StartRule.Size == 2 &&
244 StartRule.seq().back() == tokenSymbol(tok::eof) &&
245 "Start rule must be of the form `_ := start-symbol EOF`!");
246 Builder.addStartState(Sym: StartRule.seq().front(), State: Result.first);
247 }
248
249 while (!PendingStates.empty()) {
250 auto StateID = PendingStates.back();
251 PendingStates.pop_back();
252 for (auto Next : nextAvailableKernelItems(S: Builder.find(ID: StateID), G)) {
253 auto Insert = Builder.insert(KernelItems: Next.second);
254 if (Insert.second) // new state, insert to the pending queue.
255 PendingStates.push_back(x: Insert.first);
256 Builder.insertEdge(Src: StateID, Dst: Insert.first, Label: Next.first);
257 }
258 for (auto Recovery : availableRecovery(S: Builder.find(ID: StateID), G))
259 Builder.insertRecovery(Src: StateID, Strategy: Recovery.first, Result: Recovery.second);
260 }
261 return std::move(Builder).build();
262}
263
264} // namespace pseudo
265} // namespace clang
266

source code of clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp