ScriptLexer.cpp source code [lld/ELF/ScriptLexer.cpp]

1	//===- ScriptLexer.cpp ----------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines a lexer for the linker script.
10	//
11	// The linker script's grammar is not complex but ambiguous due to the
12	// lack of the formal specification of the language. What we are trying to
13	// do in this and other files in LLD is to make a "reasonable" linker
14	// script processor.
15	//
16	// Among simplicity, compatibility and efficiency, we put the most
17	// emphasis on simplicity when we wrote this lexer. Compatibility with the
18	// GNU linkers is important, but we did not try to clone every tiny corner
19	// case of their lexers, as even ld.bfd and ld.gold are subtly different
20	// in various corner cases. We do not care much about efficiency because
21	// the time spent in parsing linker scripts is usually negligible.
22	//
23	// Overall, this lexer works fine for most linker scripts. There might
24	// be room for improving compatibility, but that's probably not at the
25	// top of our todo list.
26	//
27	//===----------------------------------------------------------------------===//
28
29	#include "ScriptLexer.h"
30	#include "Config.h"
31	#include "llvm/ADT/Twine.h"
32	#include "llvm/Support/ErrorHandling.h"
33	#include "llvm/Support/FileSystem.h"
34	#include "llvm/Support/Path.h"
35
36	using namespace llvm;
37	using namespace lld;
38	using namespace lld::elf;
39
40	ScriptLexer::Buffer::Buffer(Ctx &ctx, MemoryBufferRef mb)
41	: s(mb.getBuffer()), filename(mb.getBufferIdentifier()),
42	begin(mb.getBufferStart()) {
43	if (ctx.arg.sysroot == "")
44	return;
45	StringRef path = filename;
46	for (; !path.empty(); path = sys::path::parent_path(path)) {
47	if (!sys::fs::equivalent(A: ctx.arg.sysroot, B: path))
48	continue;
49	isUnderSysroot = true;
50	return;
51	}
52	}
53
54	ScriptLexer::ScriptLexer(Ctx &ctx, MemoryBufferRef mb)
55	: ctx(ctx), curBuf (ctx, mb), mbs (`1`, mb) {
56	activeFilenames.insert(V: mb.getBufferIdentifier());
57	}
58
59	// Returns a whole line containing the current token.
60	StringRef ScriptLexer::getLine() {
61	StringRef s = getCurrentMB().getBuffer();
62
63	size_t pos = s.rfind(C: `'\n'`, From: prevTok.data() - s.data());
64	if (pos != StringRef::npos)
65	s = s.substr(Start: pos + `1`);
66	return s.substr(Start: `0`, N: s.find_first_of(Chars: "\r\n"));
67	}
68
69	// Returns 0-based column number of the current token.
70	size_t ScriptLexer::getColumnNumber() {
71	return prevTok.data() - getLine().data();
72	}
73
74	std::string ScriptLexer::getCurrentLocation() {
75	std::string filename = std::string (getCurrentMB().getBufferIdentifier());
76	return (filename + ":" + Twine(prevTokLine)).str();
77	}
78
79	// We don't want to record cascading errors. Keep only the first one.
80	void ScriptLexer::setError(const Twine &msg) {
81	if (errCount(ctx))
82	return;
83
84	std::string s = (getCurrentLocation() + ": " + msg).str();
85	if (prevTok.size())
86	s += "\n>>> " + getLine().str() + "\n>>> " +
87	std::string (getColumnNumber(), `' '`) + "^";
88	ErrAlways(ctx) << s;
89	}
90
91	void ScriptLexer::lex() {
92	for (;;) {
93	StringRef &s = curBuf.s;
94	s = skipSpace(s);
95	if (s.empty()) {
96	// If this buffer is from an INCLUDE command, switch to the "return
97	// value"; otherwise, mark EOF.
98	if (buffers.empty()) {
99	eof = true;
100	return;
101	}
102	activeFilenames.erase(V: curBuf.filename);
103	curBuf = buffers.pop_back_val();
104	continue;
105	}
106	curTokState = lexState;
107
108	// Quoted token. Note that double-quote characters are parts of a token
109	// because, in a glob match context, only unquoted tokens are interpreted
110	// as glob patterns. Double-quoted tokens are literal patterns in that
111	// context.
112	if (s.starts_with(Prefix: "\"")) {
113	size_t e = s.find(Str: "\"", From: `1`);
114	if (e == StringRef::npos) {
115	size_t lineno =
116	StringRef(curBuf.begin, s.data() - curBuf.begin).count(C: `'\n'`);
117	ErrAlways(ctx) << curBuf.filename << ":" << (lineno + `1`)
118	<< ": unclosed quote";
119	return;
120	}
121
122	curTok = s.take_front(N: e + `1`);
123	s = s.substr(Start: e + `1`);
124	return;
125	}
126
127	// Some operators form separate tokens.
128	if (s.starts_with(Prefix: "<<=") \|\| s.starts_with(Prefix: ">>=")) {
129	curTok = s.substr(Start: `0`, N: `3`);
130	s = s.substr(Start: `3`);
131	return;
132	}
133	if (s.size() > `1` && (s [`1`] == `'='` && strchr(s: "+-*/!&^\|", c: s [`0`]))) {
134	curTok = s.substr(Start: `0`, N: `2`);
135	s = s.substr(Start: `2`);
136	return;
137	}
138
139	// Unquoted token. The non-expression token is more relaxed than tokens in
140	// C-like languages, so that you can write "file-name.cpp" as one bare
141	// token.
142	size_t pos;
143	switch (lexState) {
144	case State::Script:
145	pos = s.find_first_not_of(
146	Chars: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
147	"0123456789_.$/\\~=+[]*?-!^:");
148	break;
149	case State::Expr:
150	pos = s.find_first_not_of(
151	Chars: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
152	"0123456789_.$");
153	if (pos == `0` && s.size() >= `2` &&
154	((s [`0`] == s [`1`] && strchr(s: "<>&\|", c: s [`0`])) \|\|
155	is_contained(Set: {"==", "!=", "<=", ">=", "<<", ">>"}, Element: s.substr(Start: `0`, N: `2`))))
156	pos = `2`;
157	break;
158	}
159
160	if (pos == `0`)
161	pos = `1`;
162	curTok = s.substr(Start: `0`, N: pos);
163	s = s.substr(Start: pos);
164	break;
165	}
166	}
167
168	// Skip leading whitespace characters or comments.
169	StringRef ScriptLexer::skipSpace(StringRef s) {
170	for (;;) {
171	if (s.starts_with(Prefix: "/*")) {
172	size_t e = s.find(Str: "*/", From: `2`);
173	if (e == StringRef::npos) {
174	setError("unclosed comment in a linker script");
175	return "";
176	}
177	curBuf.lineNumber += s.substr(Start: `0`, N: e).count(C: `'\n'`);
178	s = s.substr(Start: e + `2`);
179	continue;
180	}
181	if (s.starts_with(Prefix: "#")) {
182	size_t e = s.find(C: `'\n'`, From: `1`);
183	if (e == StringRef::npos)
184	e = s.size() - `1`;
185	else
186	++curBuf.lineNumber;
187	s = s.substr(Start: e + `1`);
188	continue;
189	}
190	StringRef saved = s;
191	s = s.ltrim();
192	auto len = saved.size() - s.size();
193	if (len == `0`)
194	return s;
195	curBuf.lineNumber += saved.substr(Start: `0`, N: len).count(C: `'\n'`);
196	}
197	}
198
199	// Used to determine whether to stop parsing. Treat errors like EOF.
200	bool ScriptLexer::atEOF() { return eof \|\| errCount(ctx); }
201
202	StringRef ScriptLexer::next() {
203	prevTok = peek();
204	// `prevTokLine` is not updated for EOF so that the line number in `setError`
205	// will be more useful.
206	if (prevTok.size())
207	prevTokLine = curBuf.lineNumber;
208	return std::exchange(obj&: curTok, new_val: StringRef(curBuf.s.data(), `0`));
209	}
210
211	StringRef ScriptLexer::peek() {
212	// curTok is invalid if curTokState and lexState mismatch.
213	if (curTok.size() && curTokState != lexState) {
214	curBuf.s = StringRef(curTok.data(), curBuf.s.end() - curTok.data());
215	curTok = {};
216	}
217	if (curTok.empty())
218	lex();
219	return curTok;
220	}
221
222	bool ScriptLexer::consume(StringRef tok) {
223	if (peek() != tok)
224	return false;
225	next();
226	return true;
227	}
228
229	void ScriptLexer::skip() { (void)next(); }
230
231	void ScriptLexer::expect(StringRef expect) {
232	if (errCount(ctx))
233	return;
234	StringRef tok = next();
235	if (tok != expect) {
236	if (atEOF())
237	setError("unexpected EOF");
238	else
239	setError(expect + " expected, but got " + tok);
240	}
241	}
242
243	ScriptLexer::Token ScriptLexer::till(StringRef tok) {
244	StringRef str = next();
245	if (str == tok)
246	return {};
247	if (!atEOF())
248	return {.str: str};
249	prevTok = {};
250	setError("unexpected EOF");
251	return {};
252	}
253
254	// Returns true if S encloses T.
255	static bool encloses(StringRef s, StringRef t) {
256	return s.bytes_begin() <= t.bytes_begin() && t.bytes_end() <= s.bytes_end();
257	}
258
259	MemoryBufferRef ScriptLexer::getCurrentMB() {
260	// Find input buffer containing the current token.
261	assert(!mbs.empty());
262	for (MemoryBufferRef mb : mbs)
263	if (encloses(s: mb.getBuffer(), t: curBuf.s))
264	return mb;
265	llvm_unreachable("getCurrentMB: failed to find a token");
266	}
267

Provided by KDAB

Definitions

Buffer
ScriptLexer
getLine
getColumnNumber
getCurrentLocation
setError
lex
skipSpace
atEOF
next
peek
consume
skip
expect
till
encloses

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of lld/ELF/ScriptLexer.cpp