LRGraph.cpp source code [clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp]

1	//===--- LRGraph.cpp - -------------------------------------------- C++--===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang-pseudo/grammar/LRGraph.h"
10	#include "clang-pseudo/grammar/Grammar.h"
11	#include "llvm/ADT/DenseSet.h"
12	#include "llvm/ADT/Hashing.h"
13	#include "llvm/ADT/STLExtras.h"
14	#include "llvm/ADT/StringExtras.h"
15	#include "llvm/Support/FormatVariadic.h"
16	#include "llvm/Support/raw_ostream.h"
17
18	using ItemSet = std::vector<clang::pseudo::Item>;
19
20	namespace llvm {
21	// Support clang::pseudo::Item as DenseMap keys.
22	template <> struct DenseMapInfo<ItemSet> {
23	static inline ItemSet getEmptyKey() {
24	return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()};
25	}
26	static inline ItemSet getTombstoneKey() {
27	return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()};
28	}
29	static unsigned getHashValue(const ItemSet &I) {
30	return llvm::hash_combine_range(first: I.begin(), last: I.end());
31	}
32	static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) {
33	return LHS == RHS;
34	}
35	};
36	} // namespace llvm
37
38	namespace clang {
39	namespace pseudo {
40	namespace {
41
42	struct SortByNextSymbol {
43	SortByNextSymbol(const Grammar &G) : G(G) {}
44	bool operator()(const Item &L, const Item &R) {
45	if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G))
46	return L.next(G) < R.next(G);
47	if (L.hasNext() != R.hasNext())
48	return L.hasNext() < R.hasNext(); // a trailing dot is minimal.
49	return L < R;
50	}
51	const Grammar &G;
52	};
53
54	// Computes a closure of the given item set S:
55	// - extends the given S to contain all options for parsing next token;
56	// - nonterminals after a dot are recursively expanded into the begin-state
57	// of all production rules that produce that nonterminal;
58	//
59	// Given
60	// Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ]
61	// Input = [ E := . T ]
62	// returns [ E := . T, T := . n, T := . ( E ) ]
63	State closure(ItemSet Queue, const Grammar &G) {
64	llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()};
65	// We reuse the passed-by-value Queue as the final result, as it's already
66	// initialized to the right elements.
67	size_t ItIndex = `0`;
68	while (ItIndex < Queue.size()) {
69	const Item &ExpandingItem = Queue [ItIndex];
70	++ItIndex;
71	if (!ExpandingItem.hasNext())
72	continue;
73
74	SymbolID NextSym = ExpandingItem.next(G);
75	if (pseudo::isToken(ID: NextSym))
76	continue;
77	auto RRange = G.table().Nonterminals [NextSym].RuleRange;
78	for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
79	Item NewItem = Item::start(ID: RID, G);
80	if (InQueue.insert(V: NewItem).second) // new
81	Queue.push_back(x: std::move(NewItem));
82	}
83	}
84	Queue.shrink_to_fit();
85	llvm::sort(C&: Queue, Comp: SortByNextSymbol (G));
86	return {.Items: std::move(Queue)};
87	}
88
89	// Returns all next (with a dot advanced) kernel item sets, partitioned by the
90	// advanced symbol.
91	//
92	// Given
93	// S = [ E := . a b, E := E . - T ]
94	// returns [
95	// {id(a), [ E := a . b ]},
96	// {id(-), [ E := E - . T ]}
97	// ]
98	std::vector<std::pair<SymbolID, ItemSet>>
99	nextAvailableKernelItems(const State &S, const Grammar &G) {
100	std::vector<std::pair<SymbolID, ItemSet>> Results;
101	llvm::ArrayRef<Item> AllItems = S.Items;
102	AllItems = AllItems.drop_while(Pred: [](const Item &I) { return !I.hasNext(); });
103	while (!AllItems.empty()) {
104	SymbolID AdvancedSymbol = AllItems.front().next(G);
105	auto Batch = AllItems.take_while(Pred: [AdvancedSymbol, &G](const Item &I) {
106	assert(I.hasNext());
107	return I.next(G) == AdvancedSymbol;
108	});
109	assert(!Batch.empty());
110	AllItems = AllItems.drop_front(N: Batch.size());
111
112	// Advance a dot over the Symbol.
113	ItemSet Next;
114	for (const Item &I : Batch)
115	Next.push_back(x: I.advance());
116	// sort the set to keep order determinism for hash computation.
117	llvm::sort(C&: Next);
118	Results.push_back(x: {AdvancedSymbol, std::move(Next)});
119	}
120	return Results;
121	}
122
123	std::vector<std::pair<ExtensionID, SymbolID>>
124	availableRecovery(const State &S, const Grammar &G) {
125	std::vector<std::pair<ExtensionID, SymbolID>> Result;
126	for (const Item &I : S.Items) {
127	const auto &Rule = G.lookupRule(RID: I.rule());
128	if (I.dot() != Rule.RecoveryIndex)
129	continue;
130	Result.push_back(x: {Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]});
131	}
132	llvm::sort(C&: Result);
133	Result.erase(first: std::unique(first: Result.begin(), last: Result.end()), last: Result.end());
134	return Result;
135	}
136
137	} // namespace
138
139	std::string Item::dump(const Grammar &G) const {
140	const auto &Rule = G.lookupRule(RID);
141	auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) {
142	std::vector<llvm::StringRef> Results;
143	for (auto SID : Syms)
144	Results.push_back(x: G.symbolName(SID));
145	return Results;
146	};
147	return llvm::formatv(Fmt: "{0} := {1} • {2}{3}", Vals: G.symbolName(Rule.Target),
148	Vals: llvm::join(R: ToNames (Rule.seq().take_front(N: DotPos)), Separator: " "),
149	Vals: llvm::join(R: ToNames (Rule.seq().drop_front(N: DotPos)), Separator: " "),
150	Vals: Rule.RecoveryIndex == DotPos ? " [recovery]" : "")
151	.str();
152	}
153
154	std::string State::dump(const Grammar &G, unsigned Indent) const {
155	std::string Result;
156	llvm::raw_string_ostream OS(Result);
157	for (const auto &Item : Items)
158	OS.indent(NumSpaces: Indent) << llvm::formatv(Fmt: "{0}\n", Vals: Item.dump(G));
159	return OS.str();
160	}
161
162	std::string LRGraph::dumpForTests(const Grammar &G) const {
163	std::string Result;
164	llvm::raw_string_ostream OS(Result);
165	OS << "States:\n";
166	for (StateID ID = `0`; ID < States.size(); ++ID) {
167	OS << llvm::formatv(Fmt: "State {0}\n", Vals&: ID);
168	OS << States [ID].dump(G, /Indent/ `4`);
169	}
170	for (const auto &E : Edges) {
171	OS << llvm::formatv(Fmt: "{0} ->[{1}] {2}\n", Vals: E.Src, Vals: G.symbolName(E.Label),
172	Vals: E.Dst);
173	}
174	return OS.str();
175	}
176
177	LRGraph LRGraph::buildLR0(const Grammar &G) {
178	class Builder {
179	public:
180	Builder(const Grammar &G) : G(G) {}
181
182	// Adds a given state if not existed.
183	std::pair<StateID, /inserted/ bool> insert(ItemSet KernelItems) {
184	assert(llvm::is_sorted(KernelItems) &&
185	"Item must be sorted before inserting to a hash map!");
186	auto It = StatesIndex.find(Val: KernelItems);
187	if (It != StatesIndex.end())
188	return {It ->second, false};
189	States.push_back(x: closure(Queue: KernelItems, G));
190	StateID NextStateID = States.size() - `1`;
191	StatesIndex.insert(KV: {std::move(KernelItems), NextStateID});
192	return {NextStateID, true};
193	}
194
195	void insertEdge(StateID Src, StateID Dst, SymbolID Label) {
196	Edges.push_back(x: {.Src: Src, .Dst: Dst, .Label: Label});
197	}
198
199	void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) {
200	Recoveries.push_back(x: {.Src: Src, .Strategy: Strategy, .Result: Result});
201	}
202
203	// Returns a state with the given id.
204	const State &find(StateID ID) const {
205	assert(ID < States.size());
206	return States [ID];
207	}
208
209	void addStartState(SymbolID Sym, StateID State) {
210	StartStates.push_back(x: {Sym, State});
211	}
212
213	LRGraph build() && {
214	States.shrink_to_fit();
215	Edges.shrink_to_fit();
216	Recoveries.shrink_to_fit();
217	llvm::sort(C&: StartStates);
218	StartStates.shrink_to_fit();
219	return LRGraph (std::move(States), std::move(Edges), std::move(Recoveries),
220	std::move(StartStates));
221	}
222
223	private:
224	// Key is the kernel* item sets.*
225	llvm::DenseMap<ItemSet, /index of States/ size_t> StatesIndex;
226	std::vector<State> States;
227	std::vector<Edge> Edges;
228	std::vector<Recovery> Recoveries;
229	const Grammar &G;
230	std::vector<std::pair<SymbolID, StateID>> StartStates;
231	} Builder(G);
232
233	std::vector<StateID> PendingStates;
234	// Initialize states with the start symbol.
235	auto RRange = G.table().Nonterminals [G.underscore()].RuleRange;
236	for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) {
237	auto StartState = std::vector<Item>{Item::start(ID: RID, G)};
238	auto Result = Builder.insert(KernelItems: std::move(StartState));
239	assert(Result.second && "State must be new");
240	PendingStates.push_back(x: Result.first);
241
242	const Rule &StartRule = G.lookupRule(RID);
243	assert(StartRule.Size == `2` &&
244	StartRule.seq().back() == tokenSymbol(tok::eof) &&
245	"Start rule must be of the form `_ := start-symbol EOF`!");
246	Builder.addStartState(Sym: StartRule.seq().front(), State: Result.first);
247	}
248
249	while (!PendingStates.empty()) {
250	auto StateID = PendingStates.back();
251	PendingStates.pop_back();
252	for (auto Next : nextAvailableKernelItems(S: Builder.find(ID: StateID), G)) {
253	auto Insert = Builder.insert(KernelItems: Next.second);
254	if (Insert.second) // new state, insert to the pending queue.
255	PendingStates.push_back(x: Insert.first);
256	Builder.insertEdge(Src: StateID, Dst: Insert.first, Label: Next.first);
257	}
258	for (auto Recovery : availableRecovery(S: Builder.find(ID: StateID), G))
259	Builder.insertRecovery(Src: StateID, Strategy: Recovery.first, Result: Recovery.second);
260	}
261	return std::move(Builder).build();
262	}
263
264	} // namespace pseudo
265	} // namespace clang
266

source code of clang-tools-extra/pseudo/lib/grammar/LRGraph.cpp