strnom.rs source code [crates/cpp_build/src/strnom.rs]

1	//! Fork of the equivalent file from the proc-macro2 file.
2	//! Modified to support line number counting in Cursor.
3	//! Also contains some function from stable.rs of proc_macro2.
4
5	#![allow(dead_code)] // Why is this needed ?
6
7	use std::str::{Bytes, CharIndices, Chars};
8
9	use unicode_xid::UnicodeXID;
10
11	#[derive(Debug)]
12	pub struct LexError {
13	pub line: u32,
14	}
15
16	#[derive(Copy, Clone, Eq, PartialEq)]
17	pub struct Cursor<'a> {
18	pub rest: &'a str,
19	pub off: u32,
20	pub line: u32,
21	pub column: u32,
22	}
23
24	impl<'a> Cursor<'a> {
25	#[allow(clippy::suspicious_map)]
26	pub fn advance(&self, amt: usize) -> Cursor<'a> {
27	let mut column_start: Option<usize> = None;
28	Cursor {
29	rest: &self.rest[amt..],
30	off: self.off + (amt as u32),
31	line: self.line
32	+ self.rest[..amt]
33	.char_indices()
34	.filter(\|(_, x)\| *x == '`\n`')
35	.map(\|(i, _)\| {
36	column_start = Some(i);
37	})
38	.count() as u32,
39	column: match column_start {
40	None => self.column + (amt as u32),
41	Some(i) => (amt - i) as u32 - `1`,
42	},
43	}
44	}
45
46	pub fn find(&self, p: char) -> Option<usize> {
47	self.rest.find(p)
48	}
49
50	pub fn starts_with(&self, s: &str) -> bool {
51	self.rest.starts_with(s)
52	}
53
54	pub fn is_empty(&self) -> bool {
55	self.rest.is_empty()
56	}
57
58	pub fn len(&self) -> usize {
59	self.rest.len()
60	}
61
62	pub fn as_bytes(&self) -> &'a [u8] {
63	self.rest.as_bytes()
64	}
65
66	pub fn bytes(&self) -> Bytes<'a> {
67	self.rest.bytes()
68	}
69
70	pub fn chars(&self) -> Chars<'a> {
71	self.rest.chars()
72	}
73
74	pub fn char_indices(&self) -> CharIndices<'a> {
75	self.rest.char_indices()
76	}
77	}
78
79	pub type PResult<'a, O> = Result<(Cursor<'a>, O), LexError>;
80
81	pub fn whitespace(input: Cursor) -> PResult<()> {
82	if input.is_empty() {
83	return Err(LexError { line: input.line });
84	}
85
86	let bytes = input.as_bytes();
87	let mut i = `0`;
88	while i < bytes.len() {
89	let s = input.advance(i);
90	if bytes[i] == b'/' {
91	if s.starts_with("//")
92	// && (!s.starts_with("///") \|\| s.starts_with("////"))
93	// && !s.starts_with("//!")
94	{
95	if let Some(len) = s.find('`\n`') {
96	i += len + `1`;
97	continue;
98	}
99	break;
100	} else if s.starts_with("/**/") {
101	i += `4`;
102	continue;
103	} else if s.starts_with("/*")
104	// && (!s.starts_with("/") \|\| s.starts_with("/*"))
105	// && !s.starts_with("/!")*
106	{
107	let (_, com) = block_comment(s)?;
108	i += com.len();
109	continue;
110	}
111	}
112	match bytes[i] {
113	b' ' \| `0x09`..=`0x0d` => {
114	i += `1`;
115	continue;
116	}
117	b if b <= `0x7f` => {}
118	_ => {
119	let ch = s.chars().next().unwrap();
120	if is_whitespace(ch) {
121	i += ch.len_utf8();
122	continue;
123	}
124	}
125	}
126	return if i > `0` { Ok((s, ())) } else { Err(LexError { line: s.line }) };
127	}
128	Ok((input.advance(input.len()), ()))
129	}
130
131	pub fn block_comment(input: Cursor) -> PResult<&str> {
132	if !input.starts_with("/*") {
133	return Err(LexError { line: input.line });
134	}
135
136	let mut depth: i32 = `0`;
137	let bytes: &[u8] = input.as_bytes();
138	let mut i: usize = `0`;
139	let upper: usize = bytes.len() - `1`;
140	while i < upper {
141	if bytes[i] == b'/' && bytes[i + `1`] == b'*' {
142	depth += `1`;
143	i += `1`; // eat ''*
144	} else if bytes[i] == b'*' && bytes[i + `1`] == b'/' {
145	depth -= `1`;
146	if depth == `0` {
147	return Ok((input.advance(amt:i + `2`), &input.rest[..i + `2`]));
148	}
149	i += `1`; // eat '/'
150	}
151	i += `1`;
152	}
153	Err(LexError { line: input.line })
154	}
155
156	pub fn skip_whitespace(input: Cursor) -> Cursor {
157	match whitespace(input) {
158	Ok((rest: Cursor<'_>, _)) => rest,
159	Err(_) => input,
160	}
161	}
162
163	fn is_whitespace(ch: char) -> bool {
164	// Rust treats left-to-right mark and right-to-left mark as whitespace
165	ch.is_whitespace() \|\| ch == '`\u{200e}`' \|\| ch == '`\u{200f}`'
166	}
167
168	// --- functions from stable.rs
169
170	#[inline]
171	fn is_ident_start(c: char) -> bool {
172	c.is_ascii_alphabetic() \|\| c == '_' \|\| (c > '`\x7f`' && UnicodeXID::is_xid_start(self:c))
173	}
174
175	#[inline]
176	fn is_ident_continue(c: char) -> bool {
177	c.is_ascii_alphanumeric() \|\| c == '_' \|\| (c > '`\x7f`' && UnicodeXID::is_xid_continue(self:c))
178	}
179
180	pub fn symbol(input: Cursor) -> PResult<&str> {
181	let mut chars = input.char_indices();
182
183	let raw = input.starts_with("r#");
184	if raw {
185	chars.next();
186	chars.next();
187	}
188
189	match chars.next() {
190	Some((_, ch)) if is_ident_start(ch) => {}
191	_ => return Err(LexError { line: input.line }),
192	}
193
194	let mut end = input.len();
195	for (i, ch) in chars {
196	if !is_ident_continue(ch) {
197	end = i;
198	break;
199	}
200	}
201
202	let a = &input.rest[..end];
203	if a == "r#_" {
204	Err(LexError { line: input.line })
205	} else {
206	let ident = if raw { &a[`2`..] } else { a };
207	Ok((input.advance(end), ident))
208	}
209	}
210
211	pub fn cooked_string(input: Cursor) -> PResult<()> {
212	let mut chars = input.char_indices().peekable();
213	while let Some((byte_offset, ch)) = chars.next() {
214	match ch {
215	'"' => {
216	return Ok((input.advance(byte_offset), ()));
217	}
218	'`\r`' => {
219	if let Some((_, '`\n`')) = chars.next() {
220	// ...
221	} else {
222	break;
223	}
224	}
225	'`\\`' => match chars.next() {
226	Some((_, 'x')) => {
227	if !backslash_x_char(&mut chars) {
228	break;
229	}
230	}
231	Some((_, 'n')) \| Some((_, 'r')) \| Some((_, 't')) \| Some((_, '`\\`'))
232	\| Some((_, '`\'`')) \| Some((_, '"')) \| Some((_, '0')) => {}
233	Some((_, 'u')) => {
234	if !backslash_u(&mut chars) {
235	break;
236	}
237	}
238	Some((_, '`\n`')) \| Some((_, '`\r`')) => {
239	while let Some(&(_, ch)) = chars.peek() {
240	if ch.is_whitespace() {
241	chars.next();
242	} else {
243	break;
244	}
245	}
246	}
247	_ => break,
248	},
249	_ch => {}
250	}
251	}
252	Err(LexError { line: input.line })
253	}
254
255	pub fn cooked_byte_string(mut input: Cursor) -> PResult<()> {
256	let mut bytes = input.bytes().enumerate();
257	'outer: while let Some((offset, b)) = bytes.next() {
258	match b {
259	b'"' => {
260	return Ok((input.advance(offset), ()));
261	}
262	b'`\r`' => {
263	if let Some((_, b'`\n`')) = bytes.next() {
264	// ...
265	} else {
266	break;
267	}
268	}
269	b'`\\`' => match bytes.next() {
270	Some((_, b'x')) => {
271	if !backslash_x_byte(&mut bytes) {
272	break;
273	}
274	}
275	Some((_, b'n')) \| Some((_, b'r')) \| Some((_, b't')) \| Some((_, b'`\\`'))
276	\| Some((_, b'0')) \| Some((_, b'`\'`')) \| Some((_, b'"')) => {}
277	Some((newline, b'`\n`')) \| Some((newline, b'`\r`')) => {
278	let rest = input.advance(newline + `1`);
279	for (offset, ch) in rest.char_indices() {
280	if !ch.is_whitespace() {
281	input = rest.advance(offset);
282	bytes = input.bytes().enumerate();
283	continue 'outer;
284	}
285	}
286	break;
287	}
288	_ => break,
289	},
290	b if b < `0x80` => {}
291	_ => break,
292	}
293	}
294	Err(LexError { line: input.line })
295	}
296
297	pub fn raw_string(input: Cursor) -> PResult<()> {
298	let mut chars = input.char_indices();
299	let mut n = `0`;
300	#[allow(clippy::while_let_on_iterator)] //chars is used in the next loop
301	while let Some((byte_offset, ch)) = chars.next() {
302	match ch {
303	'"' => {
304	n = byte_offset;
305	break;
306	}
307	'#' => {}
308	_ => return Err(LexError { line: input.line }),
309	}
310	}
311	for (byte_offset, ch) in chars {
312	match ch {
313	'"' if input.advance(byte_offset + `1`).starts_with(&input.rest[..n]) => {
314	let rest = input.advance(byte_offset + `1` + n);
315	return Ok((rest, ()));
316	}
317	'`\r`' => {}
318	_ => {}
319	}
320	}
321	Err(LexError { line: input.line })
322	}
323
324	pub fn cooked_byte(input: Cursor) -> PResult<()> {
325	let mut bytes = input.bytes().enumerate();
326	let ok = match bytes.next().map(\|(_, b)\| b) {
327	Some(b'`\\`') => match bytes.next().map(\|(_, b)\| b) {
328	Some(b'x') => backslash_x_byte(&mut bytes),
329	Some(b'n') \| Some(b'r') \| Some(b't') \| Some(b'`\\`') \| Some(b'0') \| Some(b'`\'`')
330	\| Some(b'"') => `true`,
331	_ => `false`,
332	},
333	b => b.is_some(),
334	};
335	if ok {
336	match bytes.next() {
337	Some((offset, _)) => {
338	if input.chars().as_str().is_char_boundary(offset) {
339	Ok((input.advance(offset), ()))
340	} else {
341	Err(LexError { line: input.line })
342	}
343	}
344	None => Ok((input.advance(input.len()), ())),
345	}
346	} else {
347	Err(LexError { line: input.line })
348	}
349	}
350
351	pub fn cooked_char(input: Cursor) -> PResult<()> {
352	let mut chars: CharIndices<'_> = input.char_indices();
353	let ok: bool = match chars.next().map(\|(_, ch: char)\| ch) {
354	Some('`\\`') => match chars.next().map(\|(_, ch: char)\| ch) {
355	Some('x') => backslash_x_char(&mut chars),
356	Some('u') => backslash_u(&mut chars),
357	Some('n') \| Some('r') \| Some('t') \| Some('`\\`') \| Some('0') \| Some('`\'`') \| Some('"') => {
358	`true`
359	}
360	_ => `false`,
361	},
362	ch: Option => ch.is_some(),
363	};
364	if ok {
365	match chars.next() {
366	Some((idx: usize, _)) => Ok((input.advance(amt:idx), ())),
367	None => Ok((input.advance(amt:input.len()), ())),
368	}
369	} else {
370	Err(LexError { line: input.line })
371	}
372	}
373
374	macro_rules! next_ch {
375	($chars:ident @ $pat:pat $(\| $rest:pat)*) => {
376	match $chars.next() {
377	Some((_, ch)) => match ch {
378	$pat $(\| $rest)* => ch,
379	_ => return `false`,
380	},
381	None => return `false`
382	}
383	};
384	}
385
386	fn backslash_x_char<I>(chars: &mut I) -> bool
387	where
388	I: Iterator<Item = (usize, char)>,
389	{
390	next_ch!(chars @ '0'..='7');
391	next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F');
392	`true`
393	}
394
395	fn backslash_x_byte<I>(chars: &mut I) -> bool
396	where
397	I: Iterator<Item = (usize, u8)>,
398	{
399	next_ch!(chars @ b'0'..=b'9' \| b'a'..=b'f' \| b'A'..=b'F');
400	next_ch!(chars @ b'0'..=b'9' \| b'a'..=b'f' \| b'A'..=b'F');
401	`true`
402	}
403
404	fn backslash_u<I>(chars: &mut I) -> bool
405	where
406	I: Iterator<Item = (usize, char)>,
407	{
408	next_ch!(chars @ '{');
409	next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F');
410	loop {
411	let c: char = next_ch!(chars @ '0'..='9' \| 'a'..='f' \| 'A'..='F' \| '_' \| '}');
412	if c == '}' {
413	return `true`;
414	}
415	}
416	}
417