lex.rs source code [crates/wit_parser/src/ast/lex.rs]

1	use anyhow::{bail, Result};
2	use std::char;
3	use std::fmt;
4	use std::str;
5	use unicode_xid::UnicodeXID;
6
7	use self::Token::*;
8
9	#[derive(Clone)]
10	pub struct Tokenizer<'a> {
11	input: &'a str,
12	span_offset: u32,
13	chars: CrlfFold<'a>,
14	require_f32_f64: bool,
15	}
16
17	#[derive(Clone)]
18	struct CrlfFold<'a> {
19	chars: str::CharIndices<'a>,
20	}
21
22	/// A span, designating a range of bytes where a token is located.
23	#[derive(Eq, PartialEq, Debug, Clone, Copy)]
24	pub struct Span {
25	/// The start of the range.
26	pub start: u32,
27	/// The end of the range (exclusive).
28	pub end: u32,
29	}
30
31	#[derive(Eq, PartialEq, Debug, Copy, Clone)]
32	pub enum Token {
33	Whitespace,
34	Comment,
35
36	Equals,
37	Comma,
38	Colon,
39	Period,
40	Semicolon,
41	LeftParen,
42	RightParen,
43	LeftBrace,
44	RightBrace,
45	LessThan,
46	GreaterThan,
47	RArrow,
48	Star,
49	At,
50	Slash,
51	Plus,
52	Minus,
53
54	Use,
55	Type,
56	Func,
57	U8,
58	U16,
59	U32,
60	U64,
61	S8,
62	S16,
63	S32,
64	S64,
65	F32,
66	F64,
67	Char,
68	Record,
69	Resource,
70	Own,
71	Borrow,
72	Flags,
73	Variant,
74	Enum,
75	Bool,
76	String_,
77	Option_,
78	Result_,
79	Future,
80	Stream,
81	ErrorContext,
82	List,
83	Underscore,
84	As,
85	From_,
86	Static,
87	Interface,
88	Tuple,
89	Import,
90	Export,
91	World,
92	Package,
93	Constructor,
94
95	Id,
96	ExplicitId,
97
98	Integer,
99
100	Include,
101	With,
102	}
103
104	#[derive(Eq, PartialEq, Debug)]
105	#[allow(dead_code)]
106	pub enum Error {
107	InvalidCharInId(u32, char),
108	IdPartEmpty(u32),
109	InvalidEscape(u32, char),
110	Unexpected(u32, char),
111	UnterminatedComment(u32),
112	Wanted {
113	at: u32,
114	expected: &'static str,
115	found: &'static str,
116	},
117	}
118
119	// NB: keep in sync with `crates/wit-component/src/printing.rs`.
120	const REQUIRE_F32_F64_BY_DEFAULT: bool = `true`;
121
122	impl<'a> Tokenizer<'a> {
123	pub fn new(
124	input: &'a str,
125	span_offset: u32,
126	require_f32_f64: Option<bool>,
127	) -> Result<Tokenizer<'a>> {
128	detect_invalid_input(input)?;
129
130	let mut t = Tokenizer {
131	input,
132	span_offset,
133	chars: CrlfFold {
134	chars: input.char_indices(),
135	},
136	require_f32_f64: require_f32_f64.unwrap_or_else(\|\| {
137	match std::env::var("WIT_REQUIRE_F32_F64") {
138	Ok(s) => s == "1",
139	Err(_) => REQUIRE_F32_F64_BY_DEFAULT,
140	}
141	}),
142	};
143	// Eat utf-8 BOM
144	t.eatc('`\u{feff}`');
145	Ok(t)
146	}
147
148	pub fn expect_semicolon(&mut self) -> Result<()> {
149	self.expect(Token::Semicolon)?;
150	Ok(())
151	}
152
153	pub fn get_span(&self, span: Span) -> &'a str {
154	let start = usize::try_from(span.start - self.span_offset).unwrap();
155	let end = usize::try_from(span.end - self.span_offset).unwrap();
156	&self.input[start..end]
157	}
158
159	pub fn parse_id(&self, span: Span) -> Result<&'a str> {
160	let ret = self.get_span(span);
161	validate_id(span.start, &ret)?;
162	Ok(ret)
163	}
164
165	pub fn parse_explicit_id(&self, span: Span) -> Result<&'a str> {
166	let token = self.get_span(span);
167	let id_part = token.strip_prefix('%').unwrap();
168	validate_id(span.start, id_part)?;
169	Ok(id_part)
170	}
171
172	pub fn next(&mut self) -> Result<Option<(Span, Token)>, Error> {
173	loop {
174	match self.next_raw()? {
175	Some((_, Token::Whitespace)) \| Some((_, Token::Comment)) => {}
176	other => break Ok(other),
177	}
178	}
179	}
180
181	/// Three possibilities when calling this method: an `Err(...)` indicates that lexing failed, an
182	/// `Ok(Some(...))` produces the next token, and `Ok(None)` indicates that there are no more
183	/// tokens available.
184	pub fn next_raw(&mut self) -> Result<Option<(Span, Token)>, Error> {
185	let (str_start, ch) = match self.chars.next() {
186	Some(pair) => pair,
187	None => return Ok(None),
188	};
189	let start = self.span_offset + u32::try_from(str_start).unwrap();
190	let token = match ch {
191	'`\n`' \| '`\t`' \| ' ' => {
192	// Eat all contiguous whitespace tokens
193	while self.eatc(' ') \|\| self.eatc('`\t`') \|\| self.eatc('`\n`') {}
194	Whitespace
195	}
196	'/' => {
197	// Eat a line comment if it's `//...`
198	if self.eatc('/') {
199	for (_, ch) in &mut self.chars {
200	if ch == '`\n`' {
201	break;
202	}
203	}
204	Comment
205	// eat a block comment if it's `/...`*
206	} else if self.eatc('*') {
207	let mut depth = `1`;
208	while depth > `0` {
209	let (_, ch) = match self.chars.next() {
210	Some(pair) => pair,
211	None => return Err(Error::UnterminatedComment(start)),
212	};
213	match ch {
214	'/' if self.eatc('*') => depth += `1`,
215	'*' if self.eatc('/') => depth -= `1`,
216	_ => {}
217	}
218	}
219	Comment
220	} else {
221	Slash
222	}
223	}
224	'=' => Equals,
225	',' => Comma,
226	':' => Colon,
227	'.' => Period,
228	';' => Semicolon,
229	'(' => LeftParen,
230	')' => RightParen,
231	'{' => LeftBrace,
232	'}' => RightBrace,
233	'<' => LessThan,
234	'>' => GreaterThan,
235	'*' => Star,
236	'@' => At,
237	'-' => {
238	if self.eatc('>') {
239	RArrow
240	} else {
241	Minus
242	}
243	}
244	'+' => Plus,
245	'%' => {
246	let mut iter = self.chars.clone();
247	if let Some((_, ch)) = iter.next() {
248	if is_keylike_start(ch) {
249	self.chars = iter.clone();
250	while let Some((_, ch)) = iter.next() {
251	if !is_keylike_continue(ch) {
252	break;
253	}
254	self.chars = iter.clone();
255	}
256	}
257	}
258	ExplicitId
259	}
260	ch if is_keylike_start(ch) => {
261	let remaining = self.chars.chars.as_str().len();
262	let mut iter = self.chars.clone();
263	while let Some((_, ch)) = iter.next() {
264	if !is_keylike_continue(ch) {
265	break;
266	}
267	self.chars = iter.clone();
268	}
269	let str_end =
270	str_start + ch.len_utf8() + (remaining - self.chars.chars.as_str().len());
271	match &self.input[str_start..str_end] {
272	"use" => Use,
273	"type" => Type,
274	"func" => Func,
275	"u8" => U8,
276	"u16" => U16,
277	"u32" => U32,
278	"u64" => U64,
279	"s8" => S8,
280	"s16" => S16,
281	"s32" => S32,
282	"s64" => S64,
283	"f32" => F32,
284	"f64" => F64,
285	"float32" if !self.require_f32_f64 => F32,
286	"float64" if !self.require_f32_f64 => F64,
287	"char" => Char,
288	"resource" => Resource,
289	"own" => Own,
290	"borrow" => Borrow,
291	"record" => Record,
292	"flags" => Flags,
293	"variant" => Variant,
294	"enum" => Enum,
295	"bool" => Bool,
296	"string" => String_,
297	"option" => Option_,
298	"result" => Result_,
299	"future" => Future,
300	"stream" => Stream,
301	"error-context" => ErrorContext,
302	"list" => List,
303	"_" => Underscore,
304	"as" => As,
305	"from" => From_,
306	"static" => Static,
307	"interface" => Interface,
308	"tuple" => Tuple,
309	"world" => World,
310	"import" => Import,
311	"export" => Export,
312	"package" => Package,
313	"constructor" => Constructor,
314	"include" => Include,
315	"with" => With,
316	_ => Id,
317	}
318	}
319
320	ch if ch.is_ascii_digit() => {
321	let mut iter = self.chars.clone();
322	while let Some((_, ch)) = iter.next() {
323	if !ch.is_ascii_digit() {
324	break;
325	}
326	self.chars = iter.clone();
327	}
328
329	Integer
330	}
331
332	ch => return Err(Error::Unexpected(start, ch)),
333	};
334	let end = match self.chars.clone().next() {
335	Some((i, _)) => i,
336	None => self.input.len(),
337	};
338
339	let end = self.span_offset + u32::try_from(end).unwrap();
340	Ok(Some((Span { start, end }, token)))
341	}
342
343	pub fn eat(&mut self, expected: Token) -> Result<bool, Error> {
344	let mut other = self.clone();
345	match other.next()? {
346	Some((_span, found)) if expected == found => {
347	*self = other;
348	Ok(`true`)
349	}
350	Some(_) => Ok(`false`),
351	None => Ok(`false`),
352	}
353	}
354
355	pub fn expect(&mut self, expected: Token) -> Result<Span, Error> {
356	match self.next()? {
357	Some((span, found)) => {
358	if expected == found {
359	Ok(span)
360	} else {
361	Err(Error::Wanted {
362	at: span.start,
363	expected: expected.describe(),
364	found: found.describe(),
365	})
366	}
367	}
368	None => Err(Error::Wanted {
369	at: self.span_offset + u32::try_from(self.input.len()).unwrap(),
370	expected: expected.describe(),
371	found: "eof",
372	}),
373	}
374	}
375
376	fn eatc(&mut self, ch: char) -> bool {
377	let mut iter = self.chars.clone();
378	match iter.next() {
379	Some((_, ch2)) if ch == ch2 => {
380	self.chars = iter;
381	`true`
382	}
383	_ => `false`,
384	}
385	}
386
387	pub fn eof_span(&self) -> Span {
388	let end = self.span_offset + u32::try_from(self.input.len()).unwrap();
389	Span { start: end, end }
390	}
391	}
392
393	impl<'a> Iterator for CrlfFold<'a> {
394	type Item = (usize, char);
395
396	fn next(&mut self) -> Option<(usize, char)> {
397	self.chars.next().map(\|(i: usize, c: char)\| {
398	if c == '`\r`' {
399	let mut attempt: CharIndices<'a> = self.chars.clone();
400	if let Some((_, '`\n`')) = attempt.next() {
401	self.chars = attempt;
402	return (i, '`\n`');
403	}
404	}
405	(i, c)
406	})
407	}
408	}
409
410	fn detect_invalid_input(input: &str) -> Result<()> {
411	// Disallow specific codepoints.
412	let mut line = `1`;
413	for ch in input.chars() {
414	match ch {
415	'`\n`' => line += `1`,
416	'`\r`' \| '`\t`' => {}
417
418	// Bidirectional override codepoints can be used to craft source code that
419	// appears to have a different meaning than its actual meaning. See
420	// [CVE-2021-42574] for background and motivation.
421	//
422	// [CVE-2021-42574]: https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-42574
423	'`\u{202a}`' \| '`\u{202b}`' \| '`\u{202c}`' \| '`\u{202d}`' \| '`\u{202e}`' \| '`\u{2066}`'
424	\| '`\u{2067}`' \| '`\u{2068}`' \| '`\u{2069}`' => {
425	bail!(
426	"Input contains bidirectional override codepoint {:?} at line {}",
427	ch.escape_unicode(),
428	line
429	);
430	}
431
432	// Disallow several characters which are deprecated or discouraged in Unicode.
433	//
434	// U+149 deprecated; see Unicode 13.0.0, sec. 7.1 Latin, Compatibility Digraphs.
435	// U+673 deprecated; see Unicode 13.0.0, sec. 9.2 Arabic, Additional Vowel Marks.
436	// U+F77 and U+F79 deprecated; see Unicode 13.0.0, sec. 13.4 Tibetan, Vowels.
437	// U+17A3 and U+17A4 deprecated, and U+17B4 and U+17B5 discouraged; see
438	// Unicode 13.0.0, sec. 16.4 Khmer, Characters Whose Use Is Discouraged.
439	'`\u{149}`' \| '`\u{673}`' \| '`\u{f77}`' \| '`\u{f79}`' \| '`\u{17a3}`' \| '`\u{17a4}`'
440	\| '`\u{17b4}`' \| '`\u{17b5}`' => {
441	bail!(
442	"Codepoint {:?} at line {} is discouraged by Unicode",
443	ch.escape_unicode(),
444	line
445	);
446	}
447
448	// Disallow control codes other than the ones explicitly recognized above,
449	// so that viewing a wit file on a terminal doesn't have surprising side
450	// effects or appear to have a different meaning than its actual meaning.
451	ch if ch.is_control() => {
452	bail!("Control code '{}' at line {}", ch.escape_unicode(), line);
453	}
454
455	_ => {}
456	}
457	}
458
459	Ok(())
460	}
461
462	fn is_keylike_start(ch: char) -> bool {
463	// Lex any XID start, `_`, or '-'. These aren't all valid identifier chars,
464	// but we'll diagnose that after we've lexed the full string.
465	UnicodeXID::is_xid_start(self:ch) \|\| ch == '_' \|\| ch == '-'
466	}
467
468	fn is_keylike_continue(ch: char) -> bool {
469	// Lex any XID continue (which includes `_`) or '-'.
470	UnicodeXID::is_xid_continue(self:ch) \|\| ch == '-'
471	}
472
473	pub fn validate_id(start: u32, id: &str) -> Result<(), Error> {
474	// IDs must have at least one part.
475	if id.is_empty() {
476	return Err(Error::IdPartEmpty(start));
477	}
478
479	// Ids consist of parts separated by '-'s.
480	for part in id.split('-') {
481	// Parts must be non-empty and contain either all ASCII lowercase or
482	// all ASCII uppercase.
483	let upper = match part.chars().next() {
484	None => return Err(Error::IdPartEmpty(start)),
485	Some(first) => {
486	if first.is_ascii_lowercase() {
487	`false`
488	} else if first.is_ascii_uppercase() {
489	`true`
490	} else {
491	return Err(Error::InvalidCharInId(start, first));
492	}
493	}
494	};
495
496	for ch in part.chars() {
497	if ch.is_ascii_digit() {
498	// Digits are accepted in both uppercase and lowercase segments.
499	} else if upper {
500	if !ch.is_ascii_uppercase() {
501	return Err(Error::InvalidCharInId(start, ch));
502	}
503	} else if !ch.is_ascii_lowercase() {
504	return Err(Error::InvalidCharInId(start, ch));
505	}
506	}
507	}
508
509	Ok(())
510	}
511
512	impl Token {
513	pub fn describe(&self) -> &'static str {
514	match self {
515	Whitespace => "whitespace",
516	Comment => "a comment",
517	Equals => "'='",
518	Comma => "','",
519	Colon => "':'",
520	Period => "'.'",
521	Semicolon => "';'",
522	LeftParen => "'('",
523	RightParen => "')'",
524	LeftBrace => "'{'",
525	RightBrace => "'}'",
526	LessThan => "'<'",
527	GreaterThan => "'>'",
528	Use => "keyword `use`",
529	Type => "keyword `type`",
530	Func => "keyword `func`",
531	U8 => "keyword `u8`",
532	U16 => "keyword `u16`",
533	U32 => "keyword `u32`",
534	U64 => "keyword `u64`",
535	S8 => "keyword `s8`",
536	S16 => "keyword `s16`",
537	S32 => "keyword `s32`",
538	S64 => "keyword `s64`",
539	F32 => "keyword `f32`",
540	F64 => "keyword `f64`",
541	Char => "keyword `char`",
542	Own => "keyword `own`",
543	Borrow => "keyword `borrow`",
544	Resource => "keyword `resource`",
545	Record => "keyword `record`",
546	Flags => "keyword `flags`",
547	Variant => "keyword `variant`",
548	Enum => "keyword `enum`",
549	Bool => "keyword `bool`",
550	String_ => "keyword `string`",
551	Option_ => "keyword `option`",
552	Result_ => "keyword `result`",
553	Future => "keyword `future`",
554	Stream => "keyword `stream`",
555	ErrorContext => "keyword `error-context`",
556	List => "keyword `list`",
557	Underscore => "keyword `_`",
558	Id => "an identifier",
559	ExplicitId => "an '%' identifier",
560	RArrow => "`->`",
561	Star => "`*`",
562	At => "`@`",
563	Slash => "`/`",
564	Plus => "`+`",
565	Minus => "`-`",
566	As => "keyword `as`",
567	From_ => "keyword `from`",
568	Static => "keyword `static`",
569	Interface => "keyword `interface`",
570	Tuple => "keyword `tuple`",
571	Import => "keyword `import`",
572	Export => "keyword `export`",
573	World => "keyword `world`",
574	Package => "keyword `package`",
575	Constructor => "keyword `constructor`",
576	Integer => "an integer",
577	Include => "keyword `include`",
578	With => "keyword `with`",
579	}
580	}
581	}
582
583	impl std::error::Error for Error {}
584
585	impl fmt::Display for Error {
586	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587	match self {
588	Error::Unexpected(_, ch: &char) => write!(f, "unexpected character {:?}", ch),
589	Error::UnterminatedComment(_) => write!(f, "unterminated block comment"),
590	Error::Wanted {
591	expected: &&'static str, found: &&'static str, ..
592	} => write!(f, "expected {}, found {}", expected, found),
593	Error::InvalidCharInId(_, ch: &char) => write!(f, "invalid character in identifier {:?}", ch),
594	Error::IdPartEmpty(_) => write!(f, "identifiers must have characters between '-'s"),
595	Error::InvalidEscape(_, ch: &char) => write!(f, "invalid escape in string {:?}", ch),
596	}
597	}
598	}
599
600	#[test]
601	fn test_validate_id() {
602	validate_id(`0`, "apple").unwrap();
603	validate_id(`0`, "apple-pear").unwrap();
604	validate_id(`0`, "apple-pear-grape").unwrap();
605	validate_id(`0`, "a0").unwrap();
606	validate_id(`0`, "a").unwrap();
607	validate_id(`0`, "a-a").unwrap();
608	validate_id(`0`, "bool").unwrap();
609	validate_id(`0`, "APPLE").unwrap();
610	validate_id(`0`, "APPLE-PEAR").unwrap();
611	validate_id(`0`, "APPLE-PEAR-GRAPE").unwrap();
612	validate_id(`0`, "apple-PEAR-grape").unwrap();
613	validate_id(`0`, "APPLE-pear-GRAPE").unwrap();
614	validate_id(`0`, "ENOENT").unwrap();
615	validate_id(`0`, "is-XML").unwrap();
616
617	assert!(validate_id(`0`, "").is_err());
618	assert!(validate_id(`0`, "0").is_err());
619	assert!(validate_id(`0`, "%").is_err());
620	assert!(validate_id(`0`, "$").is_err());
621	assert!(validate_id(`0`, "0a").is_err());
622	assert!(validate_id(`0`, ".").is_err());
623	assert!(validate_id(`0`, "·").is_err());
624	assert!(validate_id(`0`, "a a").is_err());
625	assert!(validate_id(`0`, "_").is_err());
626	assert!(validate_id(`0`, "-").is_err());
627	assert!(validate_id(`0`, "a-").is_err());
628	assert!(validate_id(`0`, "-a").is_err());
629	assert!(validate_id(`0`, "Apple").is_err());
630	assert!(validate_id(`0`, "applE").is_err());
631	assert!(validate_id(`0`, "-apple-pear").is_err());
632	assert!(validate_id(`0`, "apple-pear-").is_err());
633	assert!(validate_id(`0`, "apple_pear").is_err());
634	assert!(validate_id(`0`, "apple.pear").is_err());
635	assert!(validate_id(`0`, "apple pear").is_err());
636	assert!(validate_id(`0`, "apple/pear").is_err());
637	assert!(validate_id(`0`, "apple\|pear").is_err());
638	assert!(validate_id(`0`, "apple-Pear").is_err());
639	assert!(validate_id(`0`, "apple-0").is_err());
640	assert!(validate_id(`0`, "()()").is_err());
641	assert!(validate_id(`0`, "").is_err());
642	assert!(validate_id(`0`, "*").is_err());
643	assert!(validate_id(`0`, "apple`\u{5f3}`pear").is_err());
644	assert!(validate_id(`0`, "apple`\u{200c}`pear").is_err());
645	assert!(validate_id(`0`, "apple`\u{200d}`pear").is_err());
646	assert!(validate_id(`0`, "apple--pear").is_err());
647	assert!(validate_id(`0`, "_apple").is_err());
648	assert!(validate_id(`0`, "apple_").is_err());
649	assert!(validate_id(`0`, "_Znwj").is_err());
650	assert!(validate_id(`0`, "__i386").is_err());
651	assert!(validate_id(`0`, "__i386__").is_err());
652	assert!(validate_id(`0`, "Москва").is_err());
653	assert!(validate_id(`0`, "garçon-hühnervögel-Москва-東京").is_err());
654	assert!(validate_id(`0`, "😼").is_err(), "non-identifier");
655	assert!(validate_id(`0`, "`\u{212b}`").is_err(), "non-ascii");
656	}
657
658	#[test]
659	fn test_tokenizer() {
660	fn collect(s: &str) -> Result<Vec<Token>> {
661	let mut t = Tokenizer::new(s, `0`, None)?;
662	let mut tokens = Vec::new();
663	while let Some(token) = t.next()? {
664	tokens.push(token.1);
665	}
666	Ok(tokens)
667	}
668
669	assert_eq!(collect("").unwrap(), vec![]);
670	assert_eq!(collect("_").unwrap(), vec![Token::Underscore]);
671	assert_eq!(collect("apple").unwrap(), vec![Token::Id]);
672	assert_eq!(collect("apple-pear").unwrap(), vec![Token::Id]);
673	assert_eq!(collect("apple--pear").unwrap(), vec![Token::Id]);
674	assert_eq!(collect("apple-Pear").unwrap(), vec![Token::Id]);
675	assert_eq!(collect("apple-pear-grape").unwrap(), vec![Token::Id]);
676	assert_eq!(collect("apple pear").unwrap(), vec![Token::Id, Token::Id]);
677	assert_eq!(collect("_a_p_p_l_e_").unwrap(), vec![Token::Id]);
678	assert_eq!(collect("garçon").unwrap(), vec![Token::Id]);
679	assert_eq!(collect("hühnervögel").unwrap(), vec![Token::Id]);
680	assert_eq!(collect("москва").unwrap(), vec![Token::Id]);
681	assert_eq!(collect("東京").unwrap(), vec![Token::Id]);
682	assert_eq!(
683	collect("garçon-hühnervögel-москва-東京").unwrap(),
684	vec![Token::Id]
685	);
686	assert_eq!(collect("a0").unwrap(), vec![Token::Id]);
687	assert_eq!(collect("a").unwrap(), vec![Token::Id]);
688	assert_eq!(collect("%a").unwrap(), vec![Token::ExplicitId]);
689	assert_eq!(collect("%a-a").unwrap(), vec![Token::ExplicitId]);
690	assert_eq!(collect("%bool").unwrap(), vec![Token::ExplicitId]);
691	assert_eq!(collect("%").unwrap(), vec![Token::ExplicitId]);
692	assert_eq!(collect("APPLE").unwrap(), vec![Token::Id]);
693	assert_eq!(collect("APPLE-PEAR").unwrap(), vec![Token::Id]);
694	assert_eq!(collect("APPLE-PEAR-GRAPE").unwrap(), vec![Token::Id]);
695	assert_eq!(collect("apple-PEAR-grape").unwrap(), vec![Token::Id]);
696	assert_eq!(collect("APPLE-pear-GRAPE").unwrap(), vec![Token::Id]);
697	assert_eq!(collect("ENOENT").unwrap(), vec![Token::Id]);
698	assert_eq!(collect("is-XML").unwrap(), vec![Token::Id]);
699
700	assert_eq!(collect("func").unwrap(), vec![Token::Func]);
701	assert_eq!(
702	collect("a: func()").unwrap(),
703	vec![
704	Token::Id,
705	Token::Colon,
706	Token::Func,
707	Token::LeftParen,
708	Token::RightParen
709	]
710	);
711
712	assert_eq!(collect("resource").unwrap(), vec![Token::Resource]);
713
714	assert_eq!(collect("own").unwrap(), vec![Token::Own]);
715	assert_eq!(
716	collect("own<some-id>").unwrap(),
717	vec![Token::Own, Token::LessThan, Token::Id, Token::GreaterThan]
718	);
719
720	assert_eq!(collect("borrow").unwrap(), vec![Token::Borrow]);
721	assert_eq!(
722	collect("borrow<some-id>").unwrap(),
723	vec![
724	Token::Borrow,
725	Token::LessThan,
726	Token::Id,
727	Token::GreaterThan
728	]
729	);
730
731	assert!(collect("`\u{149}`").is_err(), "strongly discouraged");
732	assert!(collect("`\u{673}`").is_err(), "strongly discouraged");
733	assert!(collect("`\u{17a3}`").is_err(), "strongly discouraged");
734	assert!(collect("`\u{17a4}`").is_err(), "strongly discouraged");
735	assert!(collect("`\u{202a}`").is_err(), "bidirectional override");
736	assert!(collect("`\u{2068}`").is_err(), "bidirectional override");
737	assert!(collect("`\u{0}`").is_err(), "control code");
738	assert!(collect("`\u{b}`").is_err(), "control code");
739	assert!(collect("`\u{c}`").is_err(), "control code");
740	assert!(collect("`\u{85}`").is_err(), "control code");
741	}
742