scanner.rs source code [crates/yaml-rust/src/scanner.rs]

1	use std::collections::VecDeque;
2	use std::error::Error;
3	use std::{char, fmt};
4
5	#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6	pub enum TEncoding {
7	Utf8,
8	}
9
10	#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11	pub enum TScalarStyle {
12	Any,
13	Plain,
14	SingleQuoted,
15	DoubleQuoted,
16
17	Literal,
18	Foled,
19	}
20
21	#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22	pub struct Marker {
23	index: usize,
24	line: usize,
25	col: usize,
26	}
27
28	impl Marker {
29	fn new(index: usize, line: usize, col: usize) -> Marker {
30	Marker { index, line, col }
31	}
32
33	pub fn index(&self) -> usize {
34	self.index
35	}
36
37	pub fn line(&self) -> usize {
38	self.line
39	}
40
41	pub fn col(&self) -> usize {
42	self.col
43	}
44	}
45
46	#[derive(Clone, PartialEq, Debug, Eq)]
47	pub struct ScanError {
48	mark: Marker,
49	info: String,
50	}
51
52	impl ScanError {
53	pub fn new(loc: Marker, info: &str) -> ScanError {
54	ScanError {
55	mark: loc,
56	info: info.to_owned(),
57	}
58	}
59
60	pub fn marker(&self) -> &Marker {
61	&self.mark
62	}
63	}
64
65	impl Error for ScanError {
66	fn description(&self) -> &str {
67	self.info.as_ref()
68	}
69
70	fn cause(&self) -> Option<&dyn Error> {
71	None
72	}
73	}
74
75	impl fmt::Display for ScanError {
76	// col starts from 0
77	fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
78	write!(
79	formatter,
80	"{} at line {} column {}",
81	self.info,
82	self.mark.line,
83	self.mark.col + `1`
84	)
85	}
86	}
87
88	#[derive(Clone, PartialEq, Debug, Eq)]
89	pub enum TokenType {
90	NoToken,
91	StreamStart(TEncoding),
92	StreamEnd,
93	/// major, minor
94	VersionDirective(u32, u32),
95	/// handle, prefix
96	TagDirective(String, String),
97	DocumentStart,
98	DocumentEnd,
99	BlockSequenceStart,
100	BlockMappingStart,
101	BlockEnd,
102	FlowSequenceStart,
103	FlowSequenceEnd,
104	FlowMappingStart,
105	FlowMappingEnd,
106	BlockEntry,
107	FlowEntry,
108	Key,
109	Value,
110	Alias(String),
111	Anchor(String),
112	/// handle, suffix
113	Tag(String, String),
114	Scalar(TScalarStyle, String),
115	}
116
117	#[derive(Clone, PartialEq, Debug, Eq)]
118	pub struct Token(pub Marker, pub TokenType);
119
120	#[derive(Clone, PartialEq, Debug, Eq)]
121	struct SimpleKey {
122	possible: bool,
123	required: bool,
124	token_number: usize,
125	mark: Marker,
126	}
127
128	impl SimpleKey {
129	fn new(mark: Marker) -> SimpleKey {
130	SimpleKey {
131	possible: `false`,
132	required: `false`,
133	token_number: `0`,
134	mark,
135	}
136	}
137	}
138
139	#[derive(Debug)]
140	pub struct Scanner<T> {
141	rdr: T,
142	mark: Marker,
143	tokens: VecDeque<Token>,
144	buffer: VecDeque<char>,
145	error: Option<ScanError>,
146
147	stream_start_produced: bool,
148	stream_end_produced: bool,
149	adjacent_value_allowed_at: usize,
150	simple_key_allowed: bool,
151	simple_keys: Vec<SimpleKey>,
152	indent: isize,
153	indents: Vec<isize>,
154	flow_level: u8,
155	tokens_parsed: usize,
156	token_available: bool,
157	}
158
159	impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
160	type Item = Token;
161	fn next(&mut self) -> Option<Token> {
162	if self.error.is_some() {
163	return None;
164	}
165	match self.next_token() {
166	Ok(tok: Option) => tok,
167	Err(e: ScanError) => {
168	self.error = Some(e);
169	None
170	}
171	}
172	}
173	}
174
175	#[inline]
176	fn is_z(c: char) -> bool {
177	c == '`\0`'
178	}
179	#[inline]
180	fn is_break(c: char) -> bool {
181	c == '`\n`' \|\| c == '`\r`'
182	}
183	#[inline]
184	fn is_breakz(c: char) -> bool {
185	is_break(c) \|\| is_z(c)
186	}
187	#[inline]
188	fn is_blank(c: char) -> bool {
189	c == ' ' \|\| c == '`\t`'
190	}
191	#[inline]
192	fn is_blankz(c: char) -> bool {
193	is_blank(c) \|\| is_breakz(c)
194	}
195	#[inline]
196	fn is_digit(c: char) -> bool {
197	c >= '0' && c <= '9'
198	}
199	#[inline]
200	fn is_alpha(c: char) -> bool {
201	match c {
202	'0'..='9' \| 'a'..='z' \| 'A'..='Z' => `true`,
203	'_' \| '-' => `true`,
204	_ => `false`,
205	}
206	}
207	#[inline]
208	fn is_hex(c: char) -> bool {
209	(c >= '0' && c <= '9') \|\| (c >= 'a' && c <= 'f') \|\| (c >= 'A' && c <= 'F')
210	}
211	#[inline]
212	fn as_hex(c: char) -> u32 {
213	match c {
214	'0'..='9' => (c as u32) - ('0' as u32),
215	'a'..='f' => (c as u32) - ('a' as u32) + `10`,
216	'A'..='F' => (c as u32) - ('A' as u32) + `10`,
217	_ => unreachable!(),
218	}
219	}
220	#[inline]
221	fn is_flow(c: char) -> bool {
222	match c {
223	',' \| '[' \| ']' \| '{' \| '}' => `true`,
224	_ => `false`,
225	}
226	}
227
228	pub type ScanResult = Result<(), ScanError>;
229
230	impl<T: Iterator<Item = char>> Scanner<T> {
231	/// Creates the YAML tokenizer.
232	pub fn new(rdr: T) -> Scanner<T> {
233	Scanner {
234	rdr,
235	buffer: VecDeque::new(),
236	mark: Marker::new(`0`, `1`, `0`),
237	tokens: VecDeque::new(),
238	error: None,
239
240	stream_start_produced: `false`,
241	stream_end_produced: `false`,
242	adjacent_value_allowed_at: `0`,
243	simple_key_allowed: `true`,
244	simple_keys: Vec::new(),
245	indent: `-1`,
246	indents: Vec::new(),
247	flow_level: `0`,
248	tokens_parsed: `0`,
249	token_available: `false`,
250	}
251	}
252	#[inline]
253	pub fn get_error(&self) -> Option<ScanError> {
254	match self.error {
255	None => None,
256	Some(ref e) => Some(e.clone()),
257	}
258	}
259
260	#[inline]
261	fn lookahead(&mut self, count: usize) {
262	if self.buffer.len() >= count {
263	return;
264	}
265	for _ in `0`..(count - self.buffer.len()) {
266	self.buffer.push_back(self.rdr.next().unwrap_or('`\0`'));
267	}
268	}
269	#[inline]
270	fn skip(&mut self) {
271	let c = self.buffer.pop_front().unwrap();
272
273	self.mark.index += `1`;
274	if c == '`\n`' {
275	self.mark.line += `1`;
276	self.mark.col = `0`;
277	} else {
278	self.mark.col += `1`;
279	}
280	}
281	#[inline]
282	fn skip_line(&mut self) {
283	if self.buffer[`0`] == '`\r`' && self.buffer[`1`] == '`\n`' {
284	self.skip();
285	self.skip();
286	} else if is_break(self.buffer[`0`]) {
287	self.skip();
288	}
289	}
290	#[inline]
291	fn ch(&self) -> char {
292	self.buffer[`0`]
293	}
294	#[inline]
295	fn ch_is(&self, c: char) -> bool {
296	self.buffer[`0`] == c
297	}
298	#[allow(dead_code)]
299	#[inline]
300	fn eof(&self) -> bool {
301	self.ch_is('`\0`')
302	}
303	#[inline]
304	pub fn stream_started(&self) -> bool {
305	self.stream_start_produced
306	}
307	#[inline]
308	pub fn stream_ended(&self) -> bool {
309	self.stream_end_produced
310	}
311	#[inline]
312	pub fn mark(&self) -> Marker {
313	self.mark
314	}
315	#[inline]
316	fn read_break(&mut self, s: &mut String) {
317	if self.buffer[`0`] == '`\r`' && self.buffer[`1`] == '`\n`' {
318	s.push('`\n`');
319	self.skip();
320	self.skip();
321	} else if self.buffer[`0`] == '`\r`' \|\| self.buffer[`0`] == '`\n`' {
322	s.push('`\n`');
323	self.skip();
324	} else {
325	unreachable!();
326	}
327	}
328	fn insert_token(&mut self, pos: usize, tok: Token) {
329	let old_len = self.tokens.len();
330	assert!(pos <= old_len);
331	self.tokens.push_back(tok);
332	for i in `0`..old_len - pos {
333	self.tokens.swap(old_len - i, old_len - i - `1`);
334	}
335	}
336	fn allow_simple_key(&mut self) {
337	self.simple_key_allowed = `true`;
338	}
339	fn disallow_simple_key(&mut self) {
340	self.simple_key_allowed = `false`;
341	}
342
343	pub fn fetch_next_token(&mut self) -> ScanResult {
344	self.lookahead(`1`);
345	// println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
346
347	if !self.stream_start_produced {
348	self.fetch_stream_start();
349	return Ok(());
350	}
351	self.skip_to_next_token();
352
353	self.stale_simple_keys()?;
354
355	let mark = self.mark;
356	self.unroll_indent(mark.col as isize);
357
358	self.lookahead(`4`);
359
360	if is_z(self.ch()) {
361	self.fetch_stream_end()?;
362	return Ok(());
363	}
364
365	// Is it a directive?
366	if self.mark.col == `0` && self.ch_is('%') {
367	return self.fetch_directive();
368	}
369
370	if self.mark.col == `0`
371	&& self.buffer[`0`] == '-'
372	&& self.buffer[`1`] == '-'
373	&& self.buffer[`2`] == '-'
374	&& is_blankz(self.buffer[`3`])
375	{
376	self.fetch_document_indicator(TokenType::DocumentStart)?;
377	return Ok(());
378	}
379
380	if self.mark.col == `0`
381	&& self.buffer[`0`] == '.'
382	&& self.buffer[`1`] == '.'
383	&& self.buffer[`2`] == '.'
384	&& is_blankz(self.buffer[`3`])
385	{
386	self.fetch_document_indicator(TokenType::DocumentEnd)?;
387	return Ok(());
388	}
389
390	let c = self.buffer[`0`];
391	let nc = self.buffer[`1`];
392	match c {
393	'[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
394	'{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
395	']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
396	'}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
397	',' => self.fetch_flow_entry(),
398	'-' if is_blankz(nc) => self.fetch_block_entry(),
399	'?' if is_blankz(nc) => self.fetch_key(),
400	':' if is_blankz(nc)
401	\|\| (self.flow_level > `0`
402	&& (is_flow(nc) \|\| self.mark.index == self.adjacent_value_allowed_at)) =>
403	{
404	self.fetch_value()
405	}
406	// Is it an alias?
407	'*' => self.fetch_anchor(`true`),
408	// Is it an anchor?
409	'&' => self.fetch_anchor(`false`),
410	'!' => self.fetch_tag(),
411	// Is it a literal scalar?
412	'\|' if self.flow_level == `0` => self.fetch_block_scalar(`true`),
413	// Is it a folded scalar?
414	'>' if self.flow_level == `0` => self.fetch_block_scalar(`false`),
415	'`\'`' => self.fetch_flow_scalar(`true`),
416	'"' => self.fetch_flow_scalar(`false`),
417	// plain scalar
418	'-' if !is_blankz(nc) => self.fetch_plain_scalar(),
419	':' \| '?' if !is_blankz(nc) && self.flow_level == `0` => self.fetch_plain_scalar(),
420	'%' \| '@' \| '`' => Err(ScanError::new(
421	self.mark,
422	&format!("unexpected character: `{}'", c),
423	)),
424	_ => self.fetch_plain_scalar(),
425	}
426	}
427
428	pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
429	if self.stream_end_produced {
430	return Ok(None);
431	}
432
433	if !self.token_available {
434	self.fetch_more_tokens()?;
435	}
436	let t = self.tokens.pop_front().unwrap();
437	self.token_available = `false`;
438	self.tokens_parsed += `1`;
439
440	if let TokenType::StreamEnd = t.1 {
441	self.stream_end_produced = `true`;
442	}
443	Ok(Some(t))
444	}
445
446	pub fn fetch_more_tokens(&mut self) -> ScanResult {
447	let mut need_more;
448	loop {
449	need_more = `false`;
450	if self.tokens.is_empty() {
451	need_more = `true`;
452	} else {
453	self.stale_simple_keys()?;
454	for sk in &self.simple_keys {
455	if sk.possible && sk.token_number == self.tokens_parsed {
456	need_more = `true`;
457	break;
458	}
459	}
460	}
461
462	if !need_more {
463	break;
464	}
465	self.fetch_next_token()?;
466	}
467	self.token_available = `true`;
468
469	Ok(())
470	}
471
472	fn stale_simple_keys(&mut self) -> ScanResult {
473	for sk in &mut self.simple_keys {
474	if sk.possible
475	&& (sk.mark.line < self.mark.line \|\| sk.mark.index + `1024` < self.mark.index)
476	{
477	if sk.required {
478	return Err(ScanError::new(self.mark, "simple key expect ':'"));
479	}
480	sk.possible = `false`;
481	}
482	}
483	Ok(())
484	}
485
486	fn skip_to_next_token(&mut self) {
487	loop {
488	self.lookahead(`1`);
489	// TODO(chenyh) BOM
490	match self.ch() {
491	' ' => self.skip(),
492	'`\t`' if self.flow_level > `0` \|\| !self.simple_key_allowed => self.skip(),
493	'`\n`' \| '`\r`' => {
494	self.lookahead(`2`);
495	self.skip_line();
496	if self.flow_level == `0` {
497	self.allow_simple_key();
498	}
499	}
500	'#' => {
501	while !is_breakz(self.ch()) {
502	self.skip();
503	self.lookahead(`1`);
504	}
505	}
506	_ => break,
507	}
508	}
509	}
510
511	fn fetch_stream_start(&mut self) {
512	let mark = self.mark;
513	self.indent = `-1`;
514	self.stream_start_produced = `true`;
515	self.allow_simple_key();
516	self.tokens
517	.push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
518	self.simple_keys.push(SimpleKey::new(Marker::new(`0`, `0`, `0`)));
519	}
520
521	fn fetch_stream_end(&mut self) -> ScanResult {
522	// force new line
523	if self.mark.col != `0` {
524	self.mark.col = `0`;
525	self.mark.line += `1`;
526	}
527
528	self.unroll_indent(`-1`);
529	self.remove_simple_key()?;
530	self.disallow_simple_key();
531
532	self.tokens
533	.push_back(Token(self.mark, TokenType::StreamEnd));
534	Ok(())
535	}
536
537	fn fetch_directive(&mut self) -> ScanResult {
538	self.unroll_indent(`-1`);
539	self.remove_simple_key()?;
540
541	self.disallow_simple_key();
542
543	let tok = self.scan_directive()?;
544
545	self.tokens.push_back(tok);
546
547	Ok(())
548	}
549
550	fn scan_directive(&mut self) -> Result<Token, ScanError> {
551	let start_mark = self.mark;
552	self.skip();
553
554	let name = self.scan_directive_name()?;
555	let tok = match name.as_ref() {
556	"YAML" => self.scan_version_directive_value(&start_mark)?,
557	"TAG" => self.scan_tag_directive_value(&start_mark)?,
558	// XXX This should be a warning instead of an error
559	_ => {
560	// skip current line
561	self.lookahead(`1`);
562	while !is_breakz(self.ch()) {
563	self.skip();
564	self.lookahead(`1`);
565	}
566	// XXX return an empty TagDirective token
567	Token(
568	start_mark,
569	TokenType::TagDirective(String::new(), String::new()),
570	)
571	// return Err(ScanError::new(start_mark,
572	// "while scanning a directive, found unknown directive name"))
573	}
574	};
575	self.lookahead(`1`);
576
577	while is_blank(self.ch()) {
578	self.skip();
579	self.lookahead(`1`);
580	}
581
582	if self.ch() == '#' {
583	while !is_breakz(self.ch()) {
584	self.skip();
585	self.lookahead(`1`);
586	}
587	}
588
589	if !is_breakz(self.ch()) {
590	return Err(ScanError::new(
591	start_mark,
592	"while scanning a directive, did not find expected comment or line break",
593	));
594	}
595
596	// Eat a line break
597	if is_break(self.ch()) {
598	self.lookahead(`2`);
599	self.skip_line();
600	}
601
602	Ok(tok)
603	}
604
605	fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
606	self.lookahead(`1`);
607
608	while is_blank(self.ch()) {
609	self.skip();
610	self.lookahead(`1`);
611	}
612
613	let major = self.scan_version_directive_number(mark)?;
614
615	if self.ch() != '.' {
616	return Err(ScanError::new(
617	*mark,
618	"while scanning a YAML directive, did not find expected digit or '.' character",
619	));
620	}
621
622	self.skip();
623
624	let minor = self.scan_version_directive_number(mark)?;
625
626	Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
627	}
628
629	fn scan_directive_name(&mut self) -> Result<String, ScanError> {
630	let start_mark = self.mark;
631	let mut string = String::new();
632	self.lookahead(`1`);
633	while is_alpha(self.ch()) {
634	string.push(self.ch());
635	self.skip();
636	self.lookahead(`1`);
637	}
638
639	if string.is_empty() {
640	return Err(ScanError::new(
641	start_mark,
642	"while scanning a directive, could not find expected directive name",
643	));
644	}
645
646	if !is_blankz(self.ch()) {
647	return Err(ScanError::new(
648	start_mark,
649	"while scanning a directive, found unexpected non-alphabetical character",
650	));
651	}
652
653	Ok(string)
654	}
655
656	fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
657	let mut val = `0u32`;
658	let mut length = `0usize`;
659	self.lookahead(`1`);
660	while is_digit(self.ch()) {
661	if length + `1` > `9` {
662	return Err(ScanError::new(
663	*mark,
664	"while scanning a YAML directive, found extremely long version number",
665	));
666	}
667	length += `1`;
668	val = val * `10` + ((self.ch() as u32) - ('0' as u32));
669	self.skip();
670	self.lookahead(`1`);
671	}
672
673	if length == `0` {
674	return Err(ScanError::new(
675	*mark,
676	"while scanning a YAML directive, did not find expected version number",
677	));
678	}
679
680	Ok(val)
681	}
682
683	fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
684	self.lookahead(`1`);
685	/ Eat whitespaces. /
686	while is_blank(self.ch()) {
687	self.skip();
688	self.lookahead(`1`);
689	}
690	let handle = self.scan_tag_handle(`true`, mark)?;
691
692	self.lookahead(`1`);
693	/ Eat whitespaces. /
694	while is_blank(self.ch()) {
695	self.skip();
696	self.lookahead(`1`);
697	}
698
699	let is_secondary = handle == "!!";
700	let prefix = self.scan_tag_uri(`true`, is_secondary, &String::new(), mark)?;
701
702	self.lookahead(`1`);
703
704	if is_blankz(self.ch()) {
705	Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
706	} else {
707	Err(ScanError::new(
708	*mark,
709	"while scanning TAG, did not find expected whitespace or line break",
710	))
711	}
712	}
713
714	fn fetch_tag(&mut self) -> ScanResult {
715	self.save_simple_key()?;
716	self.disallow_simple_key();
717
718	let tok = self.scan_tag()?;
719	self.tokens.push_back(tok);
720	Ok(())
721	}
722
723	fn scan_tag(&mut self) -> Result<Token, ScanError> {
724	let start_mark = self.mark;
725	let mut handle = String::new();
726	let mut suffix;
727	let mut secondary = `false`;
728
729	// Check if the tag is in the canonical form (verbatim).
730	self.lookahead(`2`);
731
732	if self.buffer[`1`] == '<' {
733	// Eat '!<'
734	self.skip();
735	self.skip();
736	suffix = self.scan_tag_uri(`false`, `false`, &String::new(), &start_mark)?;
737
738	if self.ch() != '>' {
739	return Err(ScanError::new(
740	start_mark,
741	"while scanning a tag, did not find the expected '>'",
742	));
743	}
744
745	self.skip();
746	} else {
747	// The tag has either the '!suffix' or the '!handle!suffix'
748	handle = self.scan_tag_handle(`false`, &start_mark)?;
749	// Check if it is, indeed, handle.
750	if handle.len() >= `2` && handle.starts_with('!') && handle.ends_with('!') {
751	if handle == "!!" {
752	secondary = `true`;
753	}
754	suffix = self.scan_tag_uri(`false`, secondary, &String::new(), &start_mark)?;
755	} else {
756	suffix = self.scan_tag_uri(`false`, `false`, &handle, &start_mark)?;
757	handle = "!".to_owned();
758	// A special case: the '!' tag. Set the handle to '' and the
759	// suffix to '!'.
760	if suffix.is_empty() {
761	handle.clear();
762	suffix = "!".to_owned();
763	}
764	}
765	}
766
767	self.lookahead(`1`);
768	if is_blankz(self.ch()) {
769	// XXX: ex 7.2, an empty scalar can follow a secondary tag
770	Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
771	} else {
772	Err(ScanError::new(
773	start_mark,
774	"while scanning a tag, did not find expected whitespace or line break",
775	))
776	}
777	}
778
779	fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
780	let mut string = String::new();
781	self.lookahead(`1`);
782	if self.ch() != '!' {
783	return Err(ScanError::new(
784	*mark,
785	"while scanning a tag, did not find expected '!'",
786	));
787	}
788
789	string.push(self.ch());
790	self.skip();
791
792	self.lookahead(`1`);
793	while is_alpha(self.ch()) {
794	string.push(self.ch());
795	self.skip();
796	self.lookahead(`1`);
797	}
798
799	// Check if the trailing character is '!' and copy it.
800	if self.ch() == '!' {
801	string.push(self.ch());
802	self.skip();
803	} else if directive && string != "!" {
804	// It's either the '!' tag or not really a tag handle. If it's a %TAG
805	// directive, it's an error. If it's a tag token, it must be a part of
806	// URI.
807	return Err(ScanError::new(
808	*mark,
809	"while parsing a tag directive, did not find expected '!'",
810	));
811	}
812	Ok(string)
813	}
814
815	fn scan_tag_uri(
816	&mut self,
817	directive: bool,
818	_is_secondary: bool,
819	head: &str,
820	mark: &Marker,
821	) -> Result<String, ScanError> {
822	let mut length = head.len();
823	let mut string = String::new();
824
825	// Copy the head if needed.
826	// Note that we don't copy the leading '!' character.
827	if length > `1` {
828	string.extend(head.chars().skip(`1`));
829	}
830
831	self.lookahead(`1`);
832	/*
833	* The set of characters that may appear in URI is as follows:
834	*
835	* '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
836	* '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
837	* '%'.
838	*/
839	while match self.ch() {
840	';' \| '/' \| '?' \| ':' \| '@' \| '&' => `true`,
841	'=' \| '+' \| '$' \| ',' \| '.' \| '!' \| '~' \| '*' \| '`\'`' \| '(' \| ')' \| '[' \| ']' => `true`,
842	'%' => `true`,
843	c if is_alpha(c) => `true`,
844	_ => `false`,
845	} {
846	// Check if it is a URI-escape sequence.
847	if self.ch() == '%' {
848	string.push(self.scan_uri_escapes(directive, mark)?);
849	} else {
850	string.push(self.ch());
851	self.skip();
852	}
853
854	length += `1`;
855	self.lookahead(`1`);
856	}
857
858	if length == `0` {
859	return Err(ScanError::new(
860	*mark,
861	"while parsing a tag, did not find expected tag URI",
862	));
863	}
864
865	Ok(string)
866	}
867
868	fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
869	let mut width = `0usize`;
870	let mut code = `0u32`;
871	loop {
872	self.lookahead(`3`);
873
874	if !(self.ch() == '%' && is_hex(self.buffer[`1`]) && is_hex(self.buffer[`2`])) {
875	return Err(ScanError::new(
876	*mark,
877	"while parsing a tag, did not find URI escaped octet",
878	));
879	}
880
881	let octet = (as_hex(self.buffer[`1`]) << `4`) + as_hex(self.buffer[`2`]);
882	if width == `0` {
883	width = match octet {
884	_ if octet & `0x80` == `0x00` => `1`,
885	_ if octet & `0xE0` == `0xC0` => `2`,
886	_ if octet & `0xF0` == `0xE0` => `3`,
887	_ if octet & `0xF8` == `0xF0` => `4`,
888	_ => {
889	return Err(ScanError::new(
890	*mark,
891	"while parsing a tag, found an incorrect leading UTF-8 octet",
892	));
893	}
894	};
895	code = octet;
896	} else {
897	if octet & `0xc0` != `0x80` {
898	return Err(ScanError::new(
899	*mark,
900	"while parsing a tag, found an incorrect trailing UTF-8 octet",
901	));
902	}
903	code = (code << `8`) + octet;
904	}
905
906	self.skip();
907	self.skip();
908	self.skip();
909
910	width -= `1`;
911	if width == `0` {
912	break;
913	}
914	}
915
916	match char::from_u32(code) {
917	Some(ch) => Ok(ch),
918	None => Err(ScanError::new(
919	*mark,
920	"while parsing a tag, found an invalid UTF-8 codepoint",
921	)),
922	}
923	}
924
925	fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
926	self.save_simple_key()?;
927	self.disallow_simple_key();
928
929	let tok = self.scan_anchor(alias)?;
930
931	self.tokens.push_back(tok);
932
933	Ok(())
934	}
935
936	fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
937	let mut string = String::new();
938	let start_mark = self.mark;
939
940	self.skip();
941	self.lookahead(`1`);
942	while is_alpha(self.ch()) {
943	string.push(self.ch());
944	self.skip();
945	self.lookahead(`1`);
946	}
947
948	if string.is_empty()
949	\|\| match self.ch() {
950	c if is_blankz(c) => `false`,
951	'?' \| ':' \| ',' \| ']' \| '}' \| '%' \| '@' \| '`' => `false`,
952	_ => `true`,
953	}
954	{
955	return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
956	}
957
958	if alias {
959	Ok(Token(start_mark, TokenType::Alias(string)))
960	} else {
961	Ok(Token(start_mark, TokenType::Anchor(string)))
962	}
963	}
964
965	fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
966	// The indicators '[' and '{' may start a simple key.
967	self.save_simple_key()?;
968
969	self.increase_flow_level()?;
970
971	self.allow_simple_key();
972
973	let start_mark = self.mark;
974	self.skip();
975
976	self.tokens.push_back(Token(start_mark, tok));
977	Ok(())
978	}
979
980	fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
981	self.remove_simple_key()?;
982	self.decrease_flow_level();
983
984	self.disallow_simple_key();
985
986	let start_mark = self.mark;
987	self.skip();
988
989	self.tokens.push_back(Token(start_mark, tok));
990	Ok(())
991	}
992
993	fn fetch_flow_entry(&mut self) -> ScanResult {
994	self.remove_simple_key()?;
995	self.allow_simple_key();
996
997	let start_mark = self.mark;
998	self.skip();
999
1000	self.tokens
1001	.push_back(Token(start_mark, TokenType::FlowEntry));
1002	Ok(())
1003	}
1004
1005	fn increase_flow_level(&mut self) -> ScanResult {
1006	self.simple_keys.push(SimpleKey::new(Marker::new(`0`, `0`, `0`)));
1007	self.flow_level = self
1008	.flow_level
1009	.checked_add(`1`)
1010	.ok_or_else(\|\| ScanError::new(self.mark, "recursion limit exceeded"))?;
1011	Ok(())
1012	}
1013	fn decrease_flow_level(&mut self) {
1014	if self.flow_level > `0` {
1015	self.flow_level -= `1`;
1016	self.simple_keys.pop().unwrap();
1017	}
1018	}
1019
1020	fn fetch_block_entry(&mut self) -> ScanResult {
1021	if self.flow_level == `0` {
1022	// Check if we are allowed to start a new entry.
1023	if !self.simple_key_allowed {
1024	return Err(ScanError::new(
1025	self.mark,
1026	"block sequence entries are not allowed in this context",
1027	));
1028	}
1029
1030	let mark = self.mark;
1031	// generate BLOCK-SEQUENCE-START if indented
1032	self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1033	} else {
1034	// - only allowed in block*
1035	return Err(ScanError::new(
1036	self.mark,
1037	r#""-" is only valid inside a block"#,
1038	));
1039	}
1040	self.remove_simple_key()?;
1041	self.allow_simple_key();
1042
1043	let start_mark = self.mark;
1044	self.skip();
1045
1046	self.tokens
1047	.push_back(Token(start_mark, TokenType::BlockEntry));
1048	Ok(())
1049	}
1050
1051	fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1052	self.unroll_indent(`-1`);
1053	self.remove_simple_key()?;
1054	self.disallow_simple_key();
1055
1056	let mark = self.mark;
1057
1058	self.skip();
1059	self.skip();
1060	self.skip();
1061
1062	self.tokens.push_back(Token(mark, t));
1063	Ok(())
1064	}
1065
1066	fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1067	self.save_simple_key()?;
1068	self.allow_simple_key();
1069	let tok = self.scan_block_scalar(literal)?;
1070
1071	self.tokens.push_back(tok);
1072	Ok(())
1073	}
1074
1075	fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1076	let start_mark = self.mark;
1077	let mut chomping: i32 = `0`;
1078	let mut increment: usize = `0`;
1079	let mut indent: usize = `0`;
1080	let mut trailing_blank: bool;
1081	let mut leading_blank: bool = `false`;
1082
1083	let mut string = String::new();
1084	let mut leading_break = String::new();
1085	let mut trailing_breaks = String::new();
1086
1087	// skip '\|' or '>'
1088	self.skip();
1089	self.lookahead(`1`);
1090
1091	if self.ch() == '+' \|\| self.ch() == '-' {
1092	if self.ch() == '+' {
1093	chomping = `1`;
1094	} else {
1095	chomping = `-1`;
1096	}
1097	self.skip();
1098	self.lookahead(`1`);
1099	if is_digit(self.ch()) {
1100	if self.ch() == '0' {
1101	return Err(ScanError::new(
1102	start_mark,
1103	"while scanning a block scalar, found an indentation indicator equal to 0",
1104	));
1105	}
1106	increment = (self.ch() as usize) - ('0' as usize);
1107	self.skip();
1108	}
1109	} else if is_digit(self.ch()) {
1110	if self.ch() == '0' {
1111	return Err(ScanError::new(
1112	start_mark,
1113	"while scanning a block scalar, found an indentation indicator equal to 0",
1114	));
1115	}
1116
1117	increment = (self.ch() as usize) - ('0' as usize);
1118	self.skip();
1119	self.lookahead(`1`);
1120	if self.ch() == '+' \|\| self.ch() == '-' {
1121	if self.ch() == '+' {
1122	chomping = `1`;
1123	} else {
1124	chomping = `-1`;
1125	}
1126	self.skip();
1127	}
1128	}
1129
1130	// Eat whitespaces and comments to the end of the line.
1131	self.lookahead(`1`);
1132
1133	while is_blank(self.ch()) {
1134	self.skip();
1135	self.lookahead(`1`);
1136	}
1137
1138	if self.ch() == '#' {
1139	while !is_breakz(self.ch()) {
1140	self.skip();
1141	self.lookahead(`1`);
1142	}
1143	}
1144
1145	// Check if we are at the end of the line.
1146	if !is_breakz(self.ch()) {
1147	return Err(ScanError::new(
1148	start_mark,
1149	"while scanning a block scalar, did not find expected comment or line break",
1150	));
1151	}
1152
1153	if is_break(self.ch()) {
1154	self.lookahead(`2`);
1155	self.skip_line();
1156	}
1157
1158	if increment > `0` {
1159	indent = if self.indent >= `0` {
1160	(self.indent + increment as isize) as usize
1161	} else {
1162	increment
1163	}
1164	}
1165	// Scan the leading line breaks and determine the indentation level if needed.
1166	self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1167
1168	self.lookahead(`1`);
1169
1170	let start_mark = self.mark;
1171
1172	while self.mark.col == indent && !is_z(self.ch()) {
1173	// We are at the beginning of a non-empty line.
1174	trailing_blank = is_blank(self.ch());
1175	if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1176	if trailing_breaks.is_empty() {
1177	string.push(' ');
1178	}
1179	leading_break.clear();
1180	} else {
1181	string.push_str(&leading_break);
1182	leading_break.clear();
1183	}
1184
1185	string.push_str(&trailing_breaks);
1186	trailing_breaks.clear();
1187
1188	leading_blank = is_blank(self.ch());
1189
1190	while !is_breakz(self.ch()) {
1191	string.push(self.ch());
1192	self.skip();
1193	self.lookahead(`1`);
1194	}
1195	// break on EOF
1196	if is_z(self.ch()) {
1197	break;
1198	}
1199
1200	self.lookahead(`2`);
1201	self.read_break(&mut leading_break);
1202
1203	// Eat the following indentation spaces and line breaks.
1204	self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1205	}
1206
1207	// Chomp the tail.
1208	if chomping != `-1` {
1209	string.push_str(&leading_break);
1210	}
1211
1212	if chomping == `1` {
1213	string.push_str(&trailing_breaks);
1214	}
1215
1216	if literal {
1217	Ok(Token(
1218	start_mark,
1219	TokenType::Scalar(TScalarStyle::Literal, string),
1220	))
1221	} else {
1222	Ok(Token(
1223	start_mark,
1224	TokenType::Scalar(TScalarStyle::Foled, string),
1225	))
1226	}
1227	}
1228
1229	fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1230	let mut max_indent = `0`;
1231	loop {
1232	self.lookahead(`1`);
1233	while (indent == `0` \|\| self.mark.col < indent) && self.buffer[`0`] == ' ' {
1234	self.skip();
1235	self.lookahead(`1`);
1236	}
1237
1238	if self.mark.col > max_indent {
1239	max_indent = self.mark.col;
1240	}
1241
1242	// Check for a tab character messing the indentation.
1243	if (indent == `0` \|\| self.mark.col < indent) && self.buffer[`0`] == '`\t`' {
1244	return Err(ScanError::new(self.mark,
1245	"while scanning a block scalar, found a tab character where an indentation space is expected"));
1246	}
1247
1248	if !is_break(self.ch()) {
1249	break;
1250	}
1251
1252	self.lookahead(`2`);
1253	// Consume the line break.
1254	self.read_break(breaks);
1255	}
1256
1257	if *indent == `0` {
1258	*indent = max_indent;
1259	if indent < (self.indent + `1`) as usize* {
1260	indent = (self.indent + `1`) as usize*;
1261	}
1262	if *indent < `1` {
1263	*indent = `1`;
1264	}
1265	}
1266	Ok(())
1267	}
1268
1269	fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1270	self.save_simple_key()?;
1271	self.disallow_simple_key();
1272
1273	let tok = self.scan_flow_scalar(single)?;
1274
1275	// From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
1276	// YAML allows the following value to be specified adjacent to the “:”.
1277	self.adjacent_value_allowed_at = self.mark.index;
1278
1279	self.tokens.push_back(tok);
1280	Ok(())
1281	}
1282
1283	fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1284	let start_mark = self.mark;
1285
1286	let mut string = String::new();
1287	let mut leading_break = String::new();
1288	let mut trailing_breaks = String::new();
1289	let mut whitespaces = String::new();
1290	let mut leading_blanks;
1291
1292	/ Eat the left quote. /
1293	self.skip();
1294
1295	loop {
1296	/ Check for a document indicator. /
1297	self.lookahead(`4`);
1298
1299	if self.mark.col == `0`
1300	&& (((self.buffer[`0`] == '-') && (self.buffer[`1`] == '-') && (self.buffer[`2`] == '-'))
1301	\|\| ((self.buffer[`0`] == '.')
1302	&& (self.buffer[`1`] == '.')
1303	&& (self.buffer[`2`] == '.')))
1304	&& is_blankz(self.buffer[`3`])
1305	{
1306	return Err(ScanError::new(
1307	start_mark,
1308	"while scanning a quoted scalar, found unexpected document indicator",
1309	));
1310	}
1311
1312	if is_z(self.ch()) {
1313	return Err(ScanError::new(
1314	start_mark,
1315	"while scanning a quoted scalar, found unexpected end of stream",
1316	));
1317	}
1318
1319	self.lookahead(`2`);
1320
1321	leading_blanks = `false`;
1322	// Consume non-blank characters.
1323
1324	while !is_blankz(self.ch()) {
1325	match self.ch() {
1326	// Check for an escaped single quote.
1327	'`\'`' if self.buffer[`1`] == '`\'`' && single => {
1328	string.push('`\'`');
1329	self.skip();
1330	self.skip();
1331	}
1332	// Check for the right quote.
1333	'`\'`' if single => break,
1334	'"' if !single => break,
1335	// Check for an escaped line break.
1336	'`\\`' if !single && is_break(self.buffer[`1`]) => {
1337	self.lookahead(`3`);
1338	self.skip();
1339	self.skip_line();
1340	leading_blanks = `true`;
1341	break;
1342	}
1343	// Check for an escape sequence.
1344	'`\\`' if !single => {
1345	let mut code_length = `0usize`;
1346	match self.buffer[`1`] {
1347	'0' => string.push('`\0`'),
1348	'a' => string.push('`\x07`'),
1349	'b' => string.push('`\x08`'),
1350	't' \| '`\t`' => string.push('`\t`'),
1351	'n' => string.push('`\n`'),
1352	'v' => string.push('`\x0b`'),
1353	'f' => string.push('`\x0c`'),
1354	'r' => string.push('`\x0d`'),
1355	'e' => string.push('`\x1b`'),
1356	' ' => string.push('`\x20`'),
1357	'"' => string.push('"'),
1358	'`\'`' => string.push('`\'`'),
1359	'`\\`' => string.push('`\\`'),
1360	// NEL (#x85)
1361	'N' => string.push(char::from_u32(`0x85`).unwrap()),
1362	// #xA0
1363	'_' => string.push(char::from_u32(`0xA0`).unwrap()),
1364	// LS (#x2028)
1365	'L' => string.push(char::from_u32(`0x2028`).unwrap()),
1366	// PS (#x2029)
1367	'P' => string.push(char::from_u32(`0x2029`).unwrap()),
1368	'x' => code_length = `2`,
1369	'u' => code_length = `4`,
1370	'U' => code_length = `8`,
1371	_ => {
1372	return Err(ScanError::new(
1373	start_mark,
1374	"while parsing a quoted scalar, found unknown escape character",
1375	))
1376	}
1377	}
1378	self.skip();
1379	self.skip();
1380	// Consume an arbitrary escape code.
1381	if code_length > `0` {
1382	self.lookahead(code_length);
1383	let mut value = `0u32`;
1384	for i in `0`..code_length {
1385	if !is_hex(self.buffer[i]) {
1386	return Err(ScanError::new(start_mark,
1387	"while parsing a quoted scalar, did not find expected hexadecimal number"));
1388	}
1389	value = (value << `4`) + as_hex(self.buffer[i]);
1390	}
1391
1392	let ch = match char::from_u32(value) {
1393	Some(v) => v,
1394	None => {
1395	return Err(ScanError::new(start_mark,
1396	"while parsing a quoted scalar, found invalid Unicode character escape code"));
1397	}
1398	};
1399	string.push(ch);
1400
1401	for _ in `0`..code_length {
1402	self.skip();
1403	}
1404	}
1405	}
1406	c => {
1407	string.push(c);
1408	self.skip();
1409	}
1410	}
1411	self.lookahead(`2`);
1412	}
1413	self.lookahead(`1`);
1414	match self.ch() {
1415	'`\'`' if single => break,
1416	'"' if !single => break,
1417	_ => {}
1418	}
1419
1420	// Consume blank characters.
1421	while is_blank(self.ch()) \|\| is_break(self.ch()) {
1422	if is_blank(self.ch()) {
1423	// Consume a space or a tab character.
1424	if leading_blanks {
1425	self.skip();
1426	} else {
1427	whitespaces.push(self.ch());
1428	self.skip();
1429	}
1430	} else {
1431	self.lookahead(`2`);
1432	// Check if it is a first line break.
1433	if leading_blanks {
1434	self.read_break(&mut trailing_breaks);
1435	} else {
1436	whitespaces.clear();
1437	self.read_break(&mut leading_break);
1438	leading_blanks = `true`;
1439	}
1440	}
1441	self.lookahead(`1`);
1442	}
1443	// Join the whitespaces or fold line breaks.
1444	if leading_blanks {
1445	if leading_break.is_empty() {
1446	string.push_str(&leading_break);
1447	string.push_str(&trailing_breaks);
1448	trailing_breaks.clear();
1449	leading_break.clear();
1450	} else {
1451	if trailing_breaks.is_empty() {
1452	string.push(' ');
1453	} else {
1454	string.push_str(&trailing_breaks);
1455	trailing_breaks.clear();
1456	}
1457	leading_break.clear();
1458	}
1459	} else {
1460	string.push_str(&whitespaces);
1461	whitespaces.clear();
1462	}
1463	} // loop
1464
1465	// Eat the right quote.
1466	self.skip();
1467
1468	if single {
1469	Ok(Token(
1470	start_mark,
1471	TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1472	))
1473	} else {
1474	Ok(Token(
1475	start_mark,
1476	TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1477	))
1478	}
1479	}
1480
1481	fn fetch_plain_scalar(&mut self) -> ScanResult {
1482	self.save_simple_key()?;
1483	self.disallow_simple_key();
1484
1485	let tok = self.scan_plain_scalar()?;
1486
1487	self.tokens.push_back(tok);
1488	Ok(())
1489	}
1490
1491	fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1492	let indent = self.indent + `1`;
1493	let start_mark = self.mark;
1494
1495	let mut string = String::new();
1496	let mut leading_break = String::new();
1497	let mut trailing_breaks = String::new();
1498	let mut whitespaces = String::new();
1499	let mut leading_blanks = `false`;
1500
1501	loop {
1502	/ Check for a document indicator. /
1503	self.lookahead(`4`);
1504
1505	if self.mark.col == `0`
1506	&& (((self.buffer[`0`] == '-') && (self.buffer[`1`] == '-') && (self.buffer[`2`] == '-'))
1507	\|\| ((self.buffer[`0`] == '.')
1508	&& (self.buffer[`1`] == '.')
1509	&& (self.buffer[`2`] == '.')))
1510	&& is_blankz(self.buffer[`3`])
1511	{
1512	break;
1513	}
1514
1515	if self.ch() == '#' {
1516	break;
1517	}
1518	while !is_blankz(self.ch()) {
1519	// indicators can end a plain scalar, see 7.3.3. Plain Style
1520	match self.ch() {
1521	':' if is_blankz(self.buffer[`1`])
1522	\|\| (self.flow_level > `0` && is_flow(self.buffer[`1`])) =>
1523	{
1524	break;
1525	}
1526	',' \| '[' \| ']' \| '{' \| '}' if self.flow_level > `0` => break,
1527	_ => {}
1528	}
1529
1530	if leading_blanks \|\| !whitespaces.is_empty() {
1531	if leading_blanks {
1532	if leading_break.is_empty() {
1533	string.push_str(&leading_break);
1534	string.push_str(&trailing_breaks);
1535	trailing_breaks.clear();
1536	leading_break.clear();
1537	} else {
1538	if trailing_breaks.is_empty() {
1539	string.push(' ');
1540	} else {
1541	string.push_str(&trailing_breaks);
1542	trailing_breaks.clear();
1543	}
1544	leading_break.clear();
1545	}
1546	leading_blanks = `false`;
1547	} else {
1548	string.push_str(&whitespaces);
1549	whitespaces.clear();
1550	}
1551	}
1552
1553	string.push(self.ch());
1554	self.skip();
1555	self.lookahead(`2`);
1556	}
1557	// is the end?
1558	if !(is_blank(self.ch()) \|\| is_break(self.ch())) {
1559	break;
1560	}
1561	self.lookahead(`1`);
1562
1563	while is_blank(self.ch()) \|\| is_break(self.ch()) {
1564	if is_blank(self.ch()) {
1565	if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '`\t`' {
1566	return Err(ScanError::new(
1567	start_mark,
1568	"while scanning a plain scalar, found a tab",
1569	));
1570	}
1571
1572	if leading_blanks {
1573	self.skip();
1574	} else {
1575	whitespaces.push(self.ch());
1576	self.skip();
1577	}
1578	} else {
1579	self.lookahead(`2`);
1580	// Check if it is a first line break
1581	if leading_blanks {
1582	self.read_break(&mut trailing_breaks);
1583	} else {
1584	whitespaces.clear();
1585	self.read_break(&mut leading_break);
1586	leading_blanks = `true`;
1587	}
1588	}
1589	self.lookahead(`1`);
1590	}
1591
1592	// check indentation level
1593	if self.flow_level == `0` && (self.mark.col as isize) < indent {
1594	break;
1595	}
1596	}
1597
1598	if leading_blanks {
1599	self.allow_simple_key();
1600	}
1601
1602	Ok(Token(
1603	start_mark,
1604	TokenType::Scalar(TScalarStyle::Plain, string),
1605	))
1606	}
1607
1608	fn fetch_key(&mut self) -> ScanResult {
1609	let start_mark = self.mark;
1610	if self.flow_level == `0` {
1611	// Check if we are allowed to start a new key (not necessarily simple).
1612	if !self.simple_key_allowed {
1613	return Err(ScanError::new(
1614	self.mark,
1615	"mapping keys are not allowed in this context",
1616	));
1617	}
1618	self.roll_indent(
1619	start_mark.col,
1620	None,
1621	TokenType::BlockMappingStart,
1622	start_mark,
1623	);
1624	}
1625
1626	self.remove_simple_key()?;
1627
1628	if self.flow_level == `0` {
1629	self.allow_simple_key();
1630	} else {
1631	self.disallow_simple_key();
1632	}
1633
1634	self.skip();
1635	self.tokens.push_back(Token(start_mark, TokenType::Key));
1636	Ok(())
1637	}
1638
1639	fn fetch_value(&mut self) -> ScanResult {
1640	let sk = self.simple_keys.last().unwrap().clone();
1641	let start_mark = self.mark;
1642	if sk.possible {
1643	// insert simple key
1644	let tok = Token(sk.mark, TokenType::Key);
1645	let tokens_parsed = self.tokens_parsed;
1646	self.insert_token(sk.token_number - tokens_parsed, tok);
1647
1648	// Add the BLOCK-MAPPING-START token if needed.
1649	self.roll_indent(
1650	sk.mark.col,
1651	Some(sk.token_number),
1652	TokenType::BlockMappingStart,
1653	start_mark,
1654	);
1655
1656	self.simple_keys.last_mut().unwrap().possible = `false`;
1657	self.disallow_simple_key();
1658	} else {
1659	// The ':' indicator follows a complex key.
1660	if self.flow_level == `0` {
1661	if !self.simple_key_allowed {
1662	return Err(ScanError::new(
1663	start_mark,
1664	"mapping values are not allowed in this context",
1665	));
1666	}
1667
1668	self.roll_indent(
1669	start_mark.col,
1670	None,
1671	TokenType::BlockMappingStart,
1672	start_mark,
1673	);
1674	}
1675
1676	if self.flow_level == `0` {
1677	self.allow_simple_key();
1678	} else {
1679	self.disallow_simple_key();
1680	}
1681	}
1682	self.skip();
1683	self.tokens.push_back(Token(start_mark, TokenType::Value));
1684
1685	Ok(())
1686	}
1687
1688	fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1689	if self.flow_level > `0` {
1690	return;
1691	}
1692
1693	if self.indent < col as isize {
1694	self.indents.push(self.indent);
1695	self.indent = col as isize;
1696	let tokens_parsed = self.tokens_parsed;
1697	match number {
1698	Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1699	None => self.tokens.push_back(Token(mark, tok)),
1700	}
1701	}
1702	}
1703
1704	fn unroll_indent(&mut self, col: isize) {
1705	if self.flow_level > `0` {
1706	return;
1707	}
1708	while self.indent > col {
1709	self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1710	self.indent = self.indents.pop().unwrap();
1711	}
1712	}
1713
1714	fn save_simple_key(&mut self) -> Result<(), ScanError> {
1715	let required = self.flow_level > `0` && self.indent == (self.mark.col as isize);
1716	if self.simple_key_allowed {
1717	let mut sk = SimpleKey::new(self.mark);
1718	sk.possible = `true`;
1719	sk.required = required;
1720	sk.token_number = self.tokens_parsed + self.tokens.len();
1721
1722	self.remove_simple_key()?;
1723
1724	self.simple_keys.pop();
1725	self.simple_keys.push(sk);
1726	}
1727	Ok(())
1728	}
1729
1730	fn remove_simple_key(&mut self) -> ScanResult {
1731	let last = self.simple_keys.last_mut().unwrap();
1732	if last.possible && last.required {
1733	return Err(ScanError::new(self.mark, "simple key expected"));
1734	}
1735
1736	last.possible = `false`;
1737	Ok(())
1738	}
1739	}
1740
1741	#[cfg(test)]
1742	mod test {
1743	use super::TokenType::*;
1744	use super::*;
1745
1746	macro_rules! next {
1747	($p:ident, $tk:pat) => {{
1748	let tok = $p.next().unwrap();
1749	match tok.`1` {
1750	$tk => {}
1751	_ => panic!("unexpected token: {:?}", tok),
1752	}
1753	}};
1754	}
1755
1756	macro_rules! next_scalar {
1757	($p:ident, $tk:expr, $v:expr) => {{
1758	let tok = $p.next().unwrap();
1759	match tok.`1` {
1760	Scalar(style, ref v) => {
1761	assert_eq!(style, $tk);
1762	assert_eq!(v, $v);
1763	}
1764	_ => panic!("unexpected token: {:?}", tok),
1765	}
1766	}};
1767	}
1768
1769	macro_rules! end {
1770	($p:ident) => {{
1771	assert_eq!($p.next(), None);
1772	}};
1773	}
1774	/// test cases in libyaml scanner.c
1775	#[test]
1776	fn test_empty() {
1777	let s = "";
1778	let mut p = Scanner::new(s.chars());
1779	next!(p, StreamStart(..));
1780	next!(p, StreamEnd);
1781	end!(p);
1782	}
1783
1784	#[test]
1785	fn test_scalar() {
1786	let s = "a scalar";
1787	let mut p = Scanner::new(s.chars());
1788	next!(p, StreamStart(..));
1789	next!(p, Scalar(TScalarStyle::Plain, _));
1790	next!(p, StreamEnd);
1791	end!(p);
1792	}
1793
1794	#[test]
1795	fn test_explicit_scalar() {
1796	let s = "---
1797	'a scalar'
1798	...
1799	";
1800	let mut p = Scanner::new(s.chars());
1801	next!(p, StreamStart(..));
1802	next!(p, DocumentStart);
1803	next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1804	next!(p, DocumentEnd);
1805	next!(p, StreamEnd);
1806	end!(p);
1807	}
1808
1809	#[test]
1810	fn test_multiple_documents() {
1811	let s = "
1812	'a scalar'
1813	---
1814	'a scalar'
1815	---
1816	'a scalar'
1817	";
1818	let mut p = Scanner::new(s.chars());
1819	next!(p, StreamStart(..));
1820	next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1821	next!(p, DocumentStart);
1822	next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1823	next!(p, DocumentStart);
1824	next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1825	next!(p, StreamEnd);
1826	end!(p);
1827	}
1828
1829	#[test]
1830	fn test_a_flow_sequence() {
1831	let s = "[item 1, item 2, item 3]";
1832	let mut p = Scanner::new(s.chars());
1833	next!(p, StreamStart(..));
1834	next!(p, FlowSequenceStart);
1835	next_scalar!(p, TScalarStyle::Plain, "item 1");
1836	next!(p, FlowEntry);
1837	next!(p, Scalar(TScalarStyle::Plain, _));
1838	next!(p, FlowEntry);
1839	next!(p, Scalar(TScalarStyle::Plain, _));
1840	next!(p, FlowSequenceEnd);
1841	next!(p, StreamEnd);
1842	end!(p);
1843	}
1844
1845	#[test]
1846	fn test_a_flow_mapping() {
1847	let s = "
1848	{
1849	a simple key: a value, # Note that the KEY token is produced.
1850	? a complex key: another value,
1851	}
1852	";
1853	let mut p = Scanner::new(s.chars());
1854	next!(p, StreamStart(..));
1855	next!(p, FlowMappingStart);
1856	next!(p, Key);
1857	next!(p, Scalar(TScalarStyle::Plain, _));
1858	next!(p, Value);
1859	next!(p, Scalar(TScalarStyle::Plain, _));
1860	next!(p, FlowEntry);
1861	next!(p, Key);
1862	next_scalar!(p, TScalarStyle::Plain, "a complex key");
1863	next!(p, Value);
1864	next!(p, Scalar(TScalarStyle::Plain, _));
1865	next!(p, FlowEntry);
1866	next!(p, FlowMappingEnd);
1867	next!(p, StreamEnd);
1868	end!(p);
1869	}
1870
1871	#[test]
1872	fn test_block_sequences() {
1873	let s = "
1874	- item 1
1875	- item 2
1876	-
1877	- item 3.1
1878	- item 3.2
1879	-
1880	key 1: value 1
1881	key 2: value 2
1882	";
1883	let mut p = Scanner::new(s.chars());
1884	next!(p, StreamStart(..));
1885	next!(p, BlockSequenceStart);
1886	next!(p, BlockEntry);
1887	next_scalar!(p, TScalarStyle::Plain, "item 1");
1888	next!(p, BlockEntry);
1889	next_scalar!(p, TScalarStyle::Plain, "item 2");
1890	next!(p, BlockEntry);
1891	next!(p, BlockSequenceStart);
1892	next!(p, BlockEntry);
1893	next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1894	next!(p, BlockEntry);
1895	next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1896	next!(p, BlockEnd);
1897	next!(p, BlockEntry);
1898	next!(p, BlockMappingStart);
1899	next!(p, Key);
1900	next_scalar!(p, TScalarStyle::Plain, "key 1");
1901	next!(p, Value);
1902	next_scalar!(p, TScalarStyle::Plain, "value 1");
1903	next!(p, Key);
1904	next_scalar!(p, TScalarStyle::Plain, "key 2");
1905	next!(p, Value);
1906	next_scalar!(p, TScalarStyle::Plain, "value 2");
1907	next!(p, BlockEnd);
1908	next!(p, BlockEnd);
1909	next!(p, StreamEnd);
1910	end!(p);
1911	}
1912
1913	#[test]
1914	fn test_block_mappings() {
1915	let s = "
1916	a simple key: a value # The KEY token is produced here.
1917	? a complex key
1918	: another value
1919	a mapping:
1920	key 1: value 1
1921	key 2: value 2
1922	a sequence:
1923	- item 1
1924	- item 2
1925	";
1926	let mut p = Scanner::new(s.chars());
1927	next!(p, StreamStart(..));
1928	next!(p, BlockMappingStart);
1929	next!(p, Key);
1930	next!(p, Scalar(_, _));
1931	next!(p, Value);
1932	next!(p, Scalar(_, _));
1933	next!(p, Key);
1934	next!(p, Scalar(_, _));
1935	next!(p, Value);
1936	next!(p, Scalar(_, _));
1937	next!(p, Key);
1938	next!(p, Scalar(_, _));
1939	next!(p, Value); // libyaml comment seems to be wrong
1940	next!(p, BlockMappingStart);
1941	next!(p, Key);
1942	next!(p, Scalar(_, _));
1943	next!(p, Value);
1944	next!(p, Scalar(_, _));
1945	next!(p, Key);
1946	next!(p, Scalar(_, _));
1947	next!(p, Value);
1948	next!(p, Scalar(_, _));
1949	next!(p, BlockEnd);
1950	next!(p, Key);
1951	next!(p, Scalar(_, _));
1952	next!(p, Value);
1953	next!(p, BlockSequenceStart);
1954	next!(p, BlockEntry);
1955	next!(p, Scalar(_, _));
1956	next!(p, BlockEntry);
1957	next!(p, Scalar(_, _));
1958	next!(p, BlockEnd);
1959	next!(p, BlockEnd);
1960	next!(p, StreamEnd);
1961	end!(p);
1962	}
1963
1964	#[test]
1965	fn test_no_block_sequence_start() {
1966	let s = "
1967	key:
1968	- item 1
1969	- item 2
1970	";
1971	let mut p = Scanner::new(s.chars());
1972	next!(p, StreamStart(..));
1973	next!(p, BlockMappingStart);
1974	next!(p, Key);
1975	next_scalar!(p, TScalarStyle::Plain, "key");
1976	next!(p, Value);
1977	next!(p, BlockEntry);
1978	next_scalar!(p, TScalarStyle::Plain, "item 1");
1979	next!(p, BlockEntry);
1980	next_scalar!(p, TScalarStyle::Plain, "item 2");
1981	next!(p, BlockEnd);
1982	next!(p, StreamEnd);
1983	end!(p);
1984	}
1985
1986	#[test]
1987	fn test_collections_in_sequence() {
1988	let s = "
1989	- - item 1
1990	- item 2
1991	- key 1: value 1
1992	key 2: value 2
1993	- ? complex key
1994	: complex value
1995	";
1996	let mut p = Scanner::new(s.chars());
1997	next!(p, StreamStart(..));
1998	next!(p, BlockSequenceStart);
1999	next!(p, BlockEntry);
2000	next!(p, BlockSequenceStart);
2001	next!(p, BlockEntry);
2002	next_scalar!(p, TScalarStyle::Plain, "item 1");
2003	next!(p, BlockEntry);
2004	next_scalar!(p, TScalarStyle::Plain, "item 2");
2005	next!(p, BlockEnd);
2006	next!(p, BlockEntry);
2007	next!(p, BlockMappingStart);
2008	next!(p, Key);
2009	next_scalar!(p, TScalarStyle::Plain, "key 1");
2010	next!(p, Value);
2011	next_scalar!(p, TScalarStyle::Plain, "value 1");
2012	next!(p, Key);
2013	next_scalar!(p, TScalarStyle::Plain, "key 2");
2014	next!(p, Value);
2015	next_scalar!(p, TScalarStyle::Plain, "value 2");
2016	next!(p, BlockEnd);
2017	next!(p, BlockEntry);
2018	next!(p, BlockMappingStart);
2019	next!(p, Key);
2020	next_scalar!(p, TScalarStyle::Plain, "complex key");
2021	next!(p, Value);
2022	next_scalar!(p, TScalarStyle::Plain, "complex value");
2023	next!(p, BlockEnd);
2024	next!(p, BlockEnd);
2025	next!(p, StreamEnd);
2026	end!(p);
2027	}
2028
2029	#[test]
2030	fn test_collections_in_mapping() {
2031	let s = "
2032	? a sequence
2033	: - item 1
2034	- item 2
2035	? a mapping
2036	: key 1: value 1
2037	key 2: value 2
2038	";
2039	let mut p = Scanner::new(s.chars());
2040	next!(p, StreamStart(..));
2041	next!(p, BlockMappingStart);
2042	next!(p, Key);
2043	next_scalar!(p, TScalarStyle::Plain, "a sequence");
2044	next!(p, Value);
2045	next!(p, BlockSequenceStart);
2046	next!(p, BlockEntry);
2047	next_scalar!(p, TScalarStyle::Plain, "item 1");
2048	next!(p, BlockEntry);
2049	next_scalar!(p, TScalarStyle::Plain, "item 2");
2050	next!(p, BlockEnd);
2051	next!(p, Key);
2052	next_scalar!(p, TScalarStyle::Plain, "a mapping");
2053	next!(p, Value);
2054	next!(p, BlockMappingStart);
2055	next!(p, Key);
2056	next_scalar!(p, TScalarStyle::Plain, "key 1");
2057	next!(p, Value);
2058	next_scalar!(p, TScalarStyle::Plain, "value 1");
2059	next!(p, Key);
2060	next_scalar!(p, TScalarStyle::Plain, "key 2");
2061	next!(p, Value);
2062	next_scalar!(p, TScalarStyle::Plain, "value 2");
2063	next!(p, BlockEnd);
2064	next!(p, BlockEnd);
2065	next!(p, StreamEnd);
2066	end!(p);
2067	}
2068
2069	#[test]
2070	fn test_spec_ex7_3() {
2071	let s = "
2072	{
2073	? foo :,
2074	: bar,
2075	}
2076	";
2077	let mut p = Scanner::new(s.chars());
2078	next!(p, StreamStart(..));
2079	next!(p, FlowMappingStart);
2080	next!(p, Key);
2081	next_scalar!(p, TScalarStyle::Plain, "foo");
2082	next!(p, Value);
2083	next!(p, FlowEntry);
2084	next!(p, Value);
2085	next_scalar!(p, TScalarStyle::Plain, "bar");
2086	next!(p, FlowEntry);
2087	next!(p, FlowMappingEnd);
2088	next!(p, StreamEnd);
2089	end!(p);
2090	}
2091
2092	#[test]
2093	fn test_plain_scalar_starting_with_indicators_in_flow() {
2094	// "Plain scalars must not begin with most indicators, as this would cause ambiguity with
2095	// other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
2096	// character if followed by a non-space “safe” character, as this causes no ambiguity."
2097
2098	let s = "{a: :b}";
2099	let mut p = Scanner::new(s.chars());
2100	next!(p, StreamStart(..));
2101	next!(p, FlowMappingStart);
2102	next!(p, Key);
2103	next_scalar!(p, TScalarStyle::Plain, "a");
2104	next!(p, Value);
2105	next_scalar!(p, TScalarStyle::Plain, ":b");
2106	next!(p, FlowMappingEnd);
2107	next!(p, StreamEnd);
2108	end!(p);
2109
2110	let s = "{a: ?b}";
2111	let mut p = Scanner::new(s.chars());
2112	next!(p, StreamStart(..));
2113	next!(p, FlowMappingStart);
2114	next!(p, Key);
2115	next_scalar!(p, TScalarStyle::Plain, "a");
2116	next!(p, Value);
2117	next_scalar!(p, TScalarStyle::Plain, "?b");
2118	next!(p, FlowMappingEnd);
2119	next!(p, StreamEnd);
2120	end!(p);
2121	}
2122
2123	#[test]
2124	fn test_plain_scalar_starting_with_indicators_in_block() {
2125	let s = ":a";
2126	let mut p = Scanner::new(s.chars());
2127	next!(p, StreamStart(..));
2128	next_scalar!(p, TScalarStyle::Plain, ":a");
2129	next!(p, StreamEnd);
2130	end!(p);
2131
2132	let s = "?a";
2133	let mut p = Scanner::new(s.chars());
2134	next!(p, StreamStart(..));
2135	next_scalar!(p, TScalarStyle::Plain, "?a");
2136	next!(p, StreamEnd);
2137	end!(p);
2138	}
2139
2140	#[test]
2141	fn test_plain_scalar_containing_indicators_in_block() {
2142	let s = "a:,b";
2143	let mut p = Scanner::new(s.chars());
2144	next!(p, StreamStart(..));
2145	next_scalar!(p, TScalarStyle::Plain, "a:,b");
2146	next!(p, StreamEnd);
2147	end!(p);
2148
2149	let s = ":,b";
2150	let mut p = Scanner::new(s.chars());
2151	next!(p, StreamStart(..));
2152	next_scalar!(p, TScalarStyle::Plain, ":,b");
2153	next!(p, StreamEnd);
2154	end!(p);
2155	}
2156
2157	#[test]
2158	fn test_scanner_cr() {
2159	let s = "---`\r\n`- tok1`\r\n`- tok2";
2160	let mut p = Scanner::new(s.chars());
2161	next!(p, StreamStart(..));
2162	next!(p, DocumentStart);
2163	next!(p, BlockSequenceStart);
2164	next!(p, BlockEntry);
2165	next_scalar!(p, TScalarStyle::Plain, "tok1");
2166	next!(p, BlockEntry);
2167	next_scalar!(p, TScalarStyle::Plain, "tok2");
2168	next!(p, BlockEnd);
2169	next!(p, StreamEnd);
2170	end!(p);
2171	}
2172
2173	#[test]
2174	fn test_uri() {
2175	// TODO
2176	}
2177
2178	#[test]
2179	fn test_uri_escapes() {
2180	// TODO
2181	}
2182	}
2183