1use std::collections::VecDeque;
2use std::error::Error;
3use std::{char, fmt};
4
5#[derive(Clone, Copy, PartialEq, Debug, Eq)]
6pub enum TEncoding {
7 Utf8,
8}
9
10#[derive(Clone, Copy, PartialEq, Debug, Eq)]
11pub enum TScalarStyle {
12 Any,
13 Plain,
14 SingleQuoted,
15 DoubleQuoted,
16
17 Literal,
18 Foled,
19}
20
21#[derive(Clone, Copy, PartialEq, Debug, Eq)]
22pub struct Marker {
23 index: usize,
24 line: usize,
25 col: usize,
26}
27
28impl Marker {
29 fn new(index: usize, line: usize, col: usize) -> Marker {
30 Marker { index, line, col }
31 }
32
33 pub fn index(&self) -> usize {
34 self.index
35 }
36
37 pub fn line(&self) -> usize {
38 self.line
39 }
40
41 pub fn col(&self) -> usize {
42 self.col
43 }
44}
45
46#[derive(Clone, PartialEq, Debug, Eq)]
47pub struct ScanError {
48 mark: Marker,
49 info: String,
50}
51
52impl ScanError {
53 pub fn new(loc: Marker, info: &str) -> ScanError {
54 ScanError {
55 mark: loc,
56 info: info.to_owned(),
57 }
58 }
59
60 pub fn marker(&self) -> &Marker {
61 &self.mark
62 }
63}
64
65impl Error for ScanError {
66 fn description(&self) -> &str {
67 self.info.as_ref()
68 }
69
70 fn cause(&self) -> Option<&dyn Error> {
71 None
72 }
73}
74
75impl fmt::Display for ScanError {
76 // col starts from 0
77 fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
78 write!(
79 formatter,
80 "{} at line {} column {}",
81 self.info,
82 self.mark.line,
83 self.mark.col + 1
84 )
85 }
86}
87
88#[derive(Clone, PartialEq, Debug, Eq)]
89pub enum TokenType {
90 NoToken,
91 StreamStart(TEncoding),
92 StreamEnd,
93 /// major, minor
94 VersionDirective(u32, u32),
95 /// handle, prefix
96 TagDirective(String, String),
97 DocumentStart,
98 DocumentEnd,
99 BlockSequenceStart,
100 BlockMappingStart,
101 BlockEnd,
102 FlowSequenceStart,
103 FlowSequenceEnd,
104 FlowMappingStart,
105 FlowMappingEnd,
106 BlockEntry,
107 FlowEntry,
108 Key,
109 Value,
110 Alias(String),
111 Anchor(String),
112 /// handle, suffix
113 Tag(String, String),
114 Scalar(TScalarStyle, String),
115}
116
117#[derive(Clone, PartialEq, Debug, Eq)]
118pub struct Token(pub Marker, pub TokenType);
119
120#[derive(Clone, PartialEq, Debug, Eq)]
121struct SimpleKey {
122 possible: bool,
123 required: bool,
124 token_number: usize,
125 mark: Marker,
126}
127
128impl SimpleKey {
129 fn new(mark: Marker) -> SimpleKey {
130 SimpleKey {
131 possible: false,
132 required: false,
133 token_number: 0,
134 mark,
135 }
136 }
137}
138
139#[derive(Debug)]
140pub struct Scanner<T> {
141 rdr: T,
142 mark: Marker,
143 tokens: VecDeque<Token>,
144 buffer: VecDeque<char>,
145 error: Option<ScanError>,
146
147 stream_start_produced: bool,
148 stream_end_produced: bool,
149 adjacent_value_allowed_at: usize,
150 simple_key_allowed: bool,
151 simple_keys: Vec<SimpleKey>,
152 indent: isize,
153 indents: Vec<isize>,
154 flow_level: u8,
155 tokens_parsed: usize,
156 token_available: bool,
157}
158
159impl<T: Iterator<Item = char>> Iterator for Scanner<T> {
160 type Item = Token;
161 fn next(&mut self) -> Option<Token> {
162 if self.error.is_some() {
163 return None;
164 }
165 match self.next_token() {
166 Ok(tok: Option) => tok,
167 Err(e: ScanError) => {
168 self.error = Some(e);
169 None
170 }
171 }
172 }
173}
174
175#[inline]
176fn is_z(c: char) -> bool {
177 c == '\0'
178}
179#[inline]
180fn is_break(c: char) -> bool {
181 c == '\n' || c == '\r'
182}
183#[inline]
184fn is_breakz(c: char) -> bool {
185 is_break(c) || is_z(c)
186}
187#[inline]
188fn is_blank(c: char) -> bool {
189 c == ' ' || c == '\t'
190}
191#[inline]
192fn is_blankz(c: char) -> bool {
193 is_blank(c) || is_breakz(c)
194}
195#[inline]
196fn is_digit(c: char) -> bool {
197 c >= '0' && c <= '9'
198}
199#[inline]
200fn is_alpha(c: char) -> bool {
201 match c {
202 '0'..='9' | 'a'..='z' | 'A'..='Z' => true,
203 '_' | '-' => true,
204 _ => false,
205 }
206}
207#[inline]
208fn is_hex(c: char) -> bool {
209 (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
210}
211#[inline]
212fn as_hex(c: char) -> u32 {
213 match c {
214 '0'..='9' => (c as u32) - ('0' as u32),
215 'a'..='f' => (c as u32) - ('a' as u32) + 10,
216 'A'..='F' => (c as u32) - ('A' as u32) + 10,
217 _ => unreachable!(),
218 }
219}
220#[inline]
221fn is_flow(c: char) -> bool {
222 match c {
223 ',' | '[' | ']' | '{' | '}' => true,
224 _ => false,
225 }
226}
227
228pub type ScanResult = Result<(), ScanError>;
229
230impl<T: Iterator<Item = char>> Scanner<T> {
231 /// Creates the YAML tokenizer.
232 pub fn new(rdr: T) -> Scanner<T> {
233 Scanner {
234 rdr,
235 buffer: VecDeque::new(),
236 mark: Marker::new(0, 1, 0),
237 tokens: VecDeque::new(),
238 error: None,
239
240 stream_start_produced: false,
241 stream_end_produced: false,
242 adjacent_value_allowed_at: 0,
243 simple_key_allowed: true,
244 simple_keys: Vec::new(),
245 indent: -1,
246 indents: Vec::new(),
247 flow_level: 0,
248 tokens_parsed: 0,
249 token_available: false,
250 }
251 }
252 #[inline]
253 pub fn get_error(&self) -> Option<ScanError> {
254 match self.error {
255 None => None,
256 Some(ref e) => Some(e.clone()),
257 }
258 }
259
260 #[inline]
261 fn lookahead(&mut self, count: usize) {
262 if self.buffer.len() >= count {
263 return;
264 }
265 for _ in 0..(count - self.buffer.len()) {
266 self.buffer.push_back(self.rdr.next().unwrap_or('\0'));
267 }
268 }
269 #[inline]
270 fn skip(&mut self) {
271 let c = self.buffer.pop_front().unwrap();
272
273 self.mark.index += 1;
274 if c == '\n' {
275 self.mark.line += 1;
276 self.mark.col = 0;
277 } else {
278 self.mark.col += 1;
279 }
280 }
281 #[inline]
282 fn skip_line(&mut self) {
283 if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
284 self.skip();
285 self.skip();
286 } else if is_break(self.buffer[0]) {
287 self.skip();
288 }
289 }
290 #[inline]
291 fn ch(&self) -> char {
292 self.buffer[0]
293 }
294 #[inline]
295 fn ch_is(&self, c: char) -> bool {
296 self.buffer[0] == c
297 }
298 #[allow(dead_code)]
299 #[inline]
300 fn eof(&self) -> bool {
301 self.ch_is('\0')
302 }
303 #[inline]
304 pub fn stream_started(&self) -> bool {
305 self.stream_start_produced
306 }
307 #[inline]
308 pub fn stream_ended(&self) -> bool {
309 self.stream_end_produced
310 }
311 #[inline]
312 pub fn mark(&self) -> Marker {
313 self.mark
314 }
315 #[inline]
316 fn read_break(&mut self, s: &mut String) {
317 if self.buffer[0] == '\r' && self.buffer[1] == '\n' {
318 s.push('\n');
319 self.skip();
320 self.skip();
321 } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' {
322 s.push('\n');
323 self.skip();
324 } else {
325 unreachable!();
326 }
327 }
328 fn insert_token(&mut self, pos: usize, tok: Token) {
329 let old_len = self.tokens.len();
330 assert!(pos <= old_len);
331 self.tokens.push_back(tok);
332 for i in 0..old_len - pos {
333 self.tokens.swap(old_len - i, old_len - i - 1);
334 }
335 }
336 fn allow_simple_key(&mut self) {
337 self.simple_key_allowed = true;
338 }
339 fn disallow_simple_key(&mut self) {
340 self.simple_key_allowed = false;
341 }
342
343 pub fn fetch_next_token(&mut self) -> ScanResult {
344 self.lookahead(1);
345 // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch());
346
347 if !self.stream_start_produced {
348 self.fetch_stream_start();
349 return Ok(());
350 }
351 self.skip_to_next_token();
352
353 self.stale_simple_keys()?;
354
355 let mark = self.mark;
356 self.unroll_indent(mark.col as isize);
357
358 self.lookahead(4);
359
360 if is_z(self.ch()) {
361 self.fetch_stream_end()?;
362 return Ok(());
363 }
364
365 // Is it a directive?
366 if self.mark.col == 0 && self.ch_is('%') {
367 return self.fetch_directive();
368 }
369
370 if self.mark.col == 0
371 && self.buffer[0] == '-'
372 && self.buffer[1] == '-'
373 && self.buffer[2] == '-'
374 && is_blankz(self.buffer[3])
375 {
376 self.fetch_document_indicator(TokenType::DocumentStart)?;
377 return Ok(());
378 }
379
380 if self.mark.col == 0
381 && self.buffer[0] == '.'
382 && self.buffer[1] == '.'
383 && self.buffer[2] == '.'
384 && is_blankz(self.buffer[3])
385 {
386 self.fetch_document_indicator(TokenType::DocumentEnd)?;
387 return Ok(());
388 }
389
390 let c = self.buffer[0];
391 let nc = self.buffer[1];
392 match c {
393 '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart),
394 '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart),
395 ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd),
396 '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd),
397 ',' => self.fetch_flow_entry(),
398 '-' if is_blankz(nc) => self.fetch_block_entry(),
399 '?' if is_blankz(nc) => self.fetch_key(),
400 ':' if is_blankz(nc)
401 || (self.flow_level > 0
402 && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) =>
403 {
404 self.fetch_value()
405 }
406 // Is it an alias?
407 '*' => self.fetch_anchor(true),
408 // Is it an anchor?
409 '&' => self.fetch_anchor(false),
410 '!' => self.fetch_tag(),
411 // Is it a literal scalar?
412 '|' if self.flow_level == 0 => self.fetch_block_scalar(true),
413 // Is it a folded scalar?
414 '>' if self.flow_level == 0 => self.fetch_block_scalar(false),
415 '\'' => self.fetch_flow_scalar(true),
416 '"' => self.fetch_flow_scalar(false),
417 // plain scalar
418 '-' if !is_blankz(nc) => self.fetch_plain_scalar(),
419 ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(),
420 '%' | '@' | '`' => Err(ScanError::new(
421 self.mark,
422 &format!("unexpected character: `{}'", c),
423 )),
424 _ => self.fetch_plain_scalar(),
425 }
426 }
427
428 pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> {
429 if self.stream_end_produced {
430 return Ok(None);
431 }
432
433 if !self.token_available {
434 self.fetch_more_tokens()?;
435 }
436 let t = self.tokens.pop_front().unwrap();
437 self.token_available = false;
438 self.tokens_parsed += 1;
439
440 if let TokenType::StreamEnd = t.1 {
441 self.stream_end_produced = true;
442 }
443 Ok(Some(t))
444 }
445
446 pub fn fetch_more_tokens(&mut self) -> ScanResult {
447 let mut need_more;
448 loop {
449 need_more = false;
450 if self.tokens.is_empty() {
451 need_more = true;
452 } else {
453 self.stale_simple_keys()?;
454 for sk in &self.simple_keys {
455 if sk.possible && sk.token_number == self.tokens_parsed {
456 need_more = true;
457 break;
458 }
459 }
460 }
461
462 if !need_more {
463 break;
464 }
465 self.fetch_next_token()?;
466 }
467 self.token_available = true;
468
469 Ok(())
470 }
471
472 fn stale_simple_keys(&mut self) -> ScanResult {
473 for sk in &mut self.simple_keys {
474 if sk.possible
475 && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index)
476 {
477 if sk.required {
478 return Err(ScanError::new(self.mark, "simple key expect ':'"));
479 }
480 sk.possible = false;
481 }
482 }
483 Ok(())
484 }
485
486 fn skip_to_next_token(&mut self) {
487 loop {
488 self.lookahead(1);
489 // TODO(chenyh) BOM
490 match self.ch() {
491 ' ' => self.skip(),
492 '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(),
493 '\n' | '\r' => {
494 self.lookahead(2);
495 self.skip_line();
496 if self.flow_level == 0 {
497 self.allow_simple_key();
498 }
499 }
500 '#' => {
501 while !is_breakz(self.ch()) {
502 self.skip();
503 self.lookahead(1);
504 }
505 }
506 _ => break,
507 }
508 }
509 }
510
511 fn fetch_stream_start(&mut self) {
512 let mark = self.mark;
513 self.indent = -1;
514 self.stream_start_produced = true;
515 self.allow_simple_key();
516 self.tokens
517 .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8)));
518 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
519 }
520
521 fn fetch_stream_end(&mut self) -> ScanResult {
522 // force new line
523 if self.mark.col != 0 {
524 self.mark.col = 0;
525 self.mark.line += 1;
526 }
527
528 self.unroll_indent(-1);
529 self.remove_simple_key()?;
530 self.disallow_simple_key();
531
532 self.tokens
533 .push_back(Token(self.mark, TokenType::StreamEnd));
534 Ok(())
535 }
536
537 fn fetch_directive(&mut self) -> ScanResult {
538 self.unroll_indent(-1);
539 self.remove_simple_key()?;
540
541 self.disallow_simple_key();
542
543 let tok = self.scan_directive()?;
544
545 self.tokens.push_back(tok);
546
547 Ok(())
548 }
549
550 fn scan_directive(&mut self) -> Result<Token, ScanError> {
551 let start_mark = self.mark;
552 self.skip();
553
554 let name = self.scan_directive_name()?;
555 let tok = match name.as_ref() {
556 "YAML" => self.scan_version_directive_value(&start_mark)?,
557 "TAG" => self.scan_tag_directive_value(&start_mark)?,
558 // XXX This should be a warning instead of an error
559 _ => {
560 // skip current line
561 self.lookahead(1);
562 while !is_breakz(self.ch()) {
563 self.skip();
564 self.lookahead(1);
565 }
566 // XXX return an empty TagDirective token
567 Token(
568 start_mark,
569 TokenType::TagDirective(String::new(), String::new()),
570 )
571 // return Err(ScanError::new(start_mark,
572 // "while scanning a directive, found unknown directive name"))
573 }
574 };
575 self.lookahead(1);
576
577 while is_blank(self.ch()) {
578 self.skip();
579 self.lookahead(1);
580 }
581
582 if self.ch() == '#' {
583 while !is_breakz(self.ch()) {
584 self.skip();
585 self.lookahead(1);
586 }
587 }
588
589 if !is_breakz(self.ch()) {
590 return Err(ScanError::new(
591 start_mark,
592 "while scanning a directive, did not find expected comment or line break",
593 ));
594 }
595
596 // Eat a line break
597 if is_break(self.ch()) {
598 self.lookahead(2);
599 self.skip_line();
600 }
601
602 Ok(tok)
603 }
604
605 fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
606 self.lookahead(1);
607
608 while is_blank(self.ch()) {
609 self.skip();
610 self.lookahead(1);
611 }
612
613 let major = self.scan_version_directive_number(mark)?;
614
615 if self.ch() != '.' {
616 return Err(ScanError::new(
617 *mark,
618 "while scanning a YAML directive, did not find expected digit or '.' character",
619 ));
620 }
621
622 self.skip();
623
624 let minor = self.scan_version_directive_number(mark)?;
625
626 Ok(Token(*mark, TokenType::VersionDirective(major, minor)))
627 }
628
629 fn scan_directive_name(&mut self) -> Result<String, ScanError> {
630 let start_mark = self.mark;
631 let mut string = String::new();
632 self.lookahead(1);
633 while is_alpha(self.ch()) {
634 string.push(self.ch());
635 self.skip();
636 self.lookahead(1);
637 }
638
639 if string.is_empty() {
640 return Err(ScanError::new(
641 start_mark,
642 "while scanning a directive, could not find expected directive name",
643 ));
644 }
645
646 if !is_blankz(self.ch()) {
647 return Err(ScanError::new(
648 start_mark,
649 "while scanning a directive, found unexpected non-alphabetical character",
650 ));
651 }
652
653 Ok(string)
654 }
655
656 fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> {
657 let mut val = 0u32;
658 let mut length = 0usize;
659 self.lookahead(1);
660 while is_digit(self.ch()) {
661 if length + 1 > 9 {
662 return Err(ScanError::new(
663 *mark,
664 "while scanning a YAML directive, found extremely long version number",
665 ));
666 }
667 length += 1;
668 val = val * 10 + ((self.ch() as u32) - ('0' as u32));
669 self.skip();
670 self.lookahead(1);
671 }
672
673 if length == 0 {
674 return Err(ScanError::new(
675 *mark,
676 "while scanning a YAML directive, did not find expected version number",
677 ));
678 }
679
680 Ok(val)
681 }
682
683 fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> {
684 self.lookahead(1);
685 /* Eat whitespaces. */
686 while is_blank(self.ch()) {
687 self.skip();
688 self.lookahead(1);
689 }
690 let handle = self.scan_tag_handle(true, mark)?;
691
692 self.lookahead(1);
693 /* Eat whitespaces. */
694 while is_blank(self.ch()) {
695 self.skip();
696 self.lookahead(1);
697 }
698
699 let is_secondary = handle == "!!";
700 let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?;
701
702 self.lookahead(1);
703
704 if is_blankz(self.ch()) {
705 Ok(Token(*mark, TokenType::TagDirective(handle, prefix)))
706 } else {
707 Err(ScanError::new(
708 *mark,
709 "while scanning TAG, did not find expected whitespace or line break",
710 ))
711 }
712 }
713
714 fn fetch_tag(&mut self) -> ScanResult {
715 self.save_simple_key()?;
716 self.disallow_simple_key();
717
718 let tok = self.scan_tag()?;
719 self.tokens.push_back(tok);
720 Ok(())
721 }
722
723 fn scan_tag(&mut self) -> Result<Token, ScanError> {
724 let start_mark = self.mark;
725 let mut handle = String::new();
726 let mut suffix;
727 let mut secondary = false;
728
729 // Check if the tag is in the canonical form (verbatim).
730 self.lookahead(2);
731
732 if self.buffer[1] == '<' {
733 // Eat '!<'
734 self.skip();
735 self.skip();
736 suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?;
737
738 if self.ch() != '>' {
739 return Err(ScanError::new(
740 start_mark,
741 "while scanning a tag, did not find the expected '>'",
742 ));
743 }
744
745 self.skip();
746 } else {
747 // The tag has either the '!suffix' or the '!handle!suffix'
748 handle = self.scan_tag_handle(false, &start_mark)?;
749 // Check if it is, indeed, handle.
750 if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') {
751 if handle == "!!" {
752 secondary = true;
753 }
754 suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?;
755 } else {
756 suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?;
757 handle = "!".to_owned();
758 // A special case: the '!' tag. Set the handle to '' and the
759 // suffix to '!'.
760 if suffix.is_empty() {
761 handle.clear();
762 suffix = "!".to_owned();
763 }
764 }
765 }
766
767 self.lookahead(1);
768 if is_blankz(self.ch()) {
769 // XXX: ex 7.2, an empty scalar can follow a secondary tag
770 Ok(Token(start_mark, TokenType::Tag(handle, suffix)))
771 } else {
772 Err(ScanError::new(
773 start_mark,
774 "while scanning a tag, did not find expected whitespace or line break",
775 ))
776 }
777 }
778
779 fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> {
780 let mut string = String::new();
781 self.lookahead(1);
782 if self.ch() != '!' {
783 return Err(ScanError::new(
784 *mark,
785 "while scanning a tag, did not find expected '!'",
786 ));
787 }
788
789 string.push(self.ch());
790 self.skip();
791
792 self.lookahead(1);
793 while is_alpha(self.ch()) {
794 string.push(self.ch());
795 self.skip();
796 self.lookahead(1);
797 }
798
799 // Check if the trailing character is '!' and copy it.
800 if self.ch() == '!' {
801 string.push(self.ch());
802 self.skip();
803 } else if directive && string != "!" {
804 // It's either the '!' tag or not really a tag handle. If it's a %TAG
805 // directive, it's an error. If it's a tag token, it must be a part of
806 // URI.
807 return Err(ScanError::new(
808 *mark,
809 "while parsing a tag directive, did not find expected '!'",
810 ));
811 }
812 Ok(string)
813 }
814
815 fn scan_tag_uri(
816 &mut self,
817 directive: bool,
818 _is_secondary: bool,
819 head: &str,
820 mark: &Marker,
821 ) -> Result<String, ScanError> {
822 let mut length = head.len();
823 let mut string = String::new();
824
825 // Copy the head if needed.
826 // Note that we don't copy the leading '!' character.
827 if length > 1 {
828 string.extend(head.chars().skip(1));
829 }
830
831 self.lookahead(1);
832 /*
833 * The set of characters that may appear in URI is as follows:
834 *
835 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
836 * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
837 * '%'.
838 */
839 while match self.ch() {
840 ';' | '/' | '?' | ':' | '@' | '&' => true,
841 '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true,
842 '%' => true,
843 c if is_alpha(c) => true,
844 _ => false,
845 } {
846 // Check if it is a URI-escape sequence.
847 if self.ch() == '%' {
848 string.push(self.scan_uri_escapes(directive, mark)?);
849 } else {
850 string.push(self.ch());
851 self.skip();
852 }
853
854 length += 1;
855 self.lookahead(1);
856 }
857
858 if length == 0 {
859 return Err(ScanError::new(
860 *mark,
861 "while parsing a tag, did not find expected tag URI",
862 ));
863 }
864
865 Ok(string)
866 }
867
868 fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> {
869 let mut width = 0usize;
870 let mut code = 0u32;
871 loop {
872 self.lookahead(3);
873
874 if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) {
875 return Err(ScanError::new(
876 *mark,
877 "while parsing a tag, did not find URI escaped octet",
878 ));
879 }
880
881 let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]);
882 if width == 0 {
883 width = match octet {
884 _ if octet & 0x80 == 0x00 => 1,
885 _ if octet & 0xE0 == 0xC0 => 2,
886 _ if octet & 0xF0 == 0xE0 => 3,
887 _ if octet & 0xF8 == 0xF0 => 4,
888 _ => {
889 return Err(ScanError::new(
890 *mark,
891 "while parsing a tag, found an incorrect leading UTF-8 octet",
892 ));
893 }
894 };
895 code = octet;
896 } else {
897 if octet & 0xc0 != 0x80 {
898 return Err(ScanError::new(
899 *mark,
900 "while parsing a tag, found an incorrect trailing UTF-8 octet",
901 ));
902 }
903 code = (code << 8) + octet;
904 }
905
906 self.skip();
907 self.skip();
908 self.skip();
909
910 width -= 1;
911 if width == 0 {
912 break;
913 }
914 }
915
916 match char::from_u32(code) {
917 Some(ch) => Ok(ch),
918 None => Err(ScanError::new(
919 *mark,
920 "while parsing a tag, found an invalid UTF-8 codepoint",
921 )),
922 }
923 }
924
925 fn fetch_anchor(&mut self, alias: bool) -> ScanResult {
926 self.save_simple_key()?;
927 self.disallow_simple_key();
928
929 let tok = self.scan_anchor(alias)?;
930
931 self.tokens.push_back(tok);
932
933 Ok(())
934 }
935
936 fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> {
937 let mut string = String::new();
938 let start_mark = self.mark;
939
940 self.skip();
941 self.lookahead(1);
942 while is_alpha(self.ch()) {
943 string.push(self.ch());
944 self.skip();
945 self.lookahead(1);
946 }
947
948 if string.is_empty()
949 || match self.ch() {
950 c if is_blankz(c) => false,
951 '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false,
952 _ => true,
953 }
954 {
955 return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character"));
956 }
957
958 if alias {
959 Ok(Token(start_mark, TokenType::Alias(string)))
960 } else {
961 Ok(Token(start_mark, TokenType::Anchor(string)))
962 }
963 }
964
965 fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult {
966 // The indicators '[' and '{' may start a simple key.
967 self.save_simple_key()?;
968
969 self.increase_flow_level()?;
970
971 self.allow_simple_key();
972
973 let start_mark = self.mark;
974 self.skip();
975
976 self.tokens.push_back(Token(start_mark, tok));
977 Ok(())
978 }
979
980 fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult {
981 self.remove_simple_key()?;
982 self.decrease_flow_level();
983
984 self.disallow_simple_key();
985
986 let start_mark = self.mark;
987 self.skip();
988
989 self.tokens.push_back(Token(start_mark, tok));
990 Ok(())
991 }
992
993 fn fetch_flow_entry(&mut self) -> ScanResult {
994 self.remove_simple_key()?;
995 self.allow_simple_key();
996
997 let start_mark = self.mark;
998 self.skip();
999
1000 self.tokens
1001 .push_back(Token(start_mark, TokenType::FlowEntry));
1002 Ok(())
1003 }
1004
1005 fn increase_flow_level(&mut self) -> ScanResult {
1006 self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0)));
1007 self.flow_level = self
1008 .flow_level
1009 .checked_add(1)
1010 .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?;
1011 Ok(())
1012 }
1013 fn decrease_flow_level(&mut self) {
1014 if self.flow_level > 0 {
1015 self.flow_level -= 1;
1016 self.simple_keys.pop().unwrap();
1017 }
1018 }
1019
1020 fn fetch_block_entry(&mut self) -> ScanResult {
1021 if self.flow_level == 0 {
1022 // Check if we are allowed to start a new entry.
1023 if !self.simple_key_allowed {
1024 return Err(ScanError::new(
1025 self.mark,
1026 "block sequence entries are not allowed in this context",
1027 ));
1028 }
1029
1030 let mark = self.mark;
1031 // generate BLOCK-SEQUENCE-START if indented
1032 self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark);
1033 } else {
1034 // - * only allowed in block
1035 return Err(ScanError::new(
1036 self.mark,
1037 r#""-" is only valid inside a block"#,
1038 ));
1039 }
1040 self.remove_simple_key()?;
1041 self.allow_simple_key();
1042
1043 let start_mark = self.mark;
1044 self.skip();
1045
1046 self.tokens
1047 .push_back(Token(start_mark, TokenType::BlockEntry));
1048 Ok(())
1049 }
1050
1051 fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult {
1052 self.unroll_indent(-1);
1053 self.remove_simple_key()?;
1054 self.disallow_simple_key();
1055
1056 let mark = self.mark;
1057
1058 self.skip();
1059 self.skip();
1060 self.skip();
1061
1062 self.tokens.push_back(Token(mark, t));
1063 Ok(())
1064 }
1065
1066 fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult {
1067 self.save_simple_key()?;
1068 self.allow_simple_key();
1069 let tok = self.scan_block_scalar(literal)?;
1070
1071 self.tokens.push_back(tok);
1072 Ok(())
1073 }
1074
1075 fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> {
1076 let start_mark = self.mark;
1077 let mut chomping: i32 = 0;
1078 let mut increment: usize = 0;
1079 let mut indent: usize = 0;
1080 let mut trailing_blank: bool;
1081 let mut leading_blank: bool = false;
1082
1083 let mut string = String::new();
1084 let mut leading_break = String::new();
1085 let mut trailing_breaks = String::new();
1086
1087 // skip '|' or '>'
1088 self.skip();
1089 self.lookahead(1);
1090
1091 if self.ch() == '+' || self.ch() == '-' {
1092 if self.ch() == '+' {
1093 chomping = 1;
1094 } else {
1095 chomping = -1;
1096 }
1097 self.skip();
1098 self.lookahead(1);
1099 if is_digit(self.ch()) {
1100 if self.ch() == '0' {
1101 return Err(ScanError::new(
1102 start_mark,
1103 "while scanning a block scalar, found an indentation indicator equal to 0",
1104 ));
1105 }
1106 increment = (self.ch() as usize) - ('0' as usize);
1107 self.skip();
1108 }
1109 } else if is_digit(self.ch()) {
1110 if self.ch() == '0' {
1111 return Err(ScanError::new(
1112 start_mark,
1113 "while scanning a block scalar, found an indentation indicator equal to 0",
1114 ));
1115 }
1116
1117 increment = (self.ch() as usize) - ('0' as usize);
1118 self.skip();
1119 self.lookahead(1);
1120 if self.ch() == '+' || self.ch() == '-' {
1121 if self.ch() == '+' {
1122 chomping = 1;
1123 } else {
1124 chomping = -1;
1125 }
1126 self.skip();
1127 }
1128 }
1129
1130 // Eat whitespaces and comments to the end of the line.
1131 self.lookahead(1);
1132
1133 while is_blank(self.ch()) {
1134 self.skip();
1135 self.lookahead(1);
1136 }
1137
1138 if self.ch() == '#' {
1139 while !is_breakz(self.ch()) {
1140 self.skip();
1141 self.lookahead(1);
1142 }
1143 }
1144
1145 // Check if we are at the end of the line.
1146 if !is_breakz(self.ch()) {
1147 return Err(ScanError::new(
1148 start_mark,
1149 "while scanning a block scalar, did not find expected comment or line break",
1150 ));
1151 }
1152
1153 if is_break(self.ch()) {
1154 self.lookahead(2);
1155 self.skip_line();
1156 }
1157
1158 if increment > 0 {
1159 indent = if self.indent >= 0 {
1160 (self.indent + increment as isize) as usize
1161 } else {
1162 increment
1163 }
1164 }
1165 // Scan the leading line breaks and determine the indentation level if needed.
1166 self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1167
1168 self.lookahead(1);
1169
1170 let start_mark = self.mark;
1171
1172 while self.mark.col == indent && !is_z(self.ch()) {
1173 // We are at the beginning of a non-empty line.
1174 trailing_blank = is_blank(self.ch());
1175 if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank {
1176 if trailing_breaks.is_empty() {
1177 string.push(' ');
1178 }
1179 leading_break.clear();
1180 } else {
1181 string.push_str(&leading_break);
1182 leading_break.clear();
1183 }
1184
1185 string.push_str(&trailing_breaks);
1186 trailing_breaks.clear();
1187
1188 leading_blank = is_blank(self.ch());
1189
1190 while !is_breakz(self.ch()) {
1191 string.push(self.ch());
1192 self.skip();
1193 self.lookahead(1);
1194 }
1195 // break on EOF
1196 if is_z(self.ch()) {
1197 break;
1198 }
1199
1200 self.lookahead(2);
1201 self.read_break(&mut leading_break);
1202
1203 // Eat the following indentation spaces and line breaks.
1204 self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?;
1205 }
1206
1207 // Chomp the tail.
1208 if chomping != -1 {
1209 string.push_str(&leading_break);
1210 }
1211
1212 if chomping == 1 {
1213 string.push_str(&trailing_breaks);
1214 }
1215
1216 if literal {
1217 Ok(Token(
1218 start_mark,
1219 TokenType::Scalar(TScalarStyle::Literal, string),
1220 ))
1221 } else {
1222 Ok(Token(
1223 start_mark,
1224 TokenType::Scalar(TScalarStyle::Foled, string),
1225 ))
1226 }
1227 }
1228
1229 fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult {
1230 let mut max_indent = 0;
1231 loop {
1232 self.lookahead(1);
1233 while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' {
1234 self.skip();
1235 self.lookahead(1);
1236 }
1237
1238 if self.mark.col > max_indent {
1239 max_indent = self.mark.col;
1240 }
1241
1242 // Check for a tab character messing the indentation.
1243 if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' {
1244 return Err(ScanError::new(self.mark,
1245 "while scanning a block scalar, found a tab character where an indentation space is expected"));
1246 }
1247
1248 if !is_break(self.ch()) {
1249 break;
1250 }
1251
1252 self.lookahead(2);
1253 // Consume the line break.
1254 self.read_break(breaks);
1255 }
1256
1257 if *indent == 0 {
1258 *indent = max_indent;
1259 if *indent < (self.indent + 1) as usize {
1260 *indent = (self.indent + 1) as usize;
1261 }
1262 if *indent < 1 {
1263 *indent = 1;
1264 }
1265 }
1266 Ok(())
1267 }
1268
1269 fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult {
1270 self.save_simple_key()?;
1271 self.disallow_simple_key();
1272
1273 let tok = self.scan_flow_scalar(single)?;
1274
1275 // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like,
1276 // YAML allows the following value to be specified adjacent to the “:”.
1277 self.adjacent_value_allowed_at = self.mark.index;
1278
1279 self.tokens.push_back(tok);
1280 Ok(())
1281 }
1282
1283 fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> {
1284 let start_mark = self.mark;
1285
1286 let mut string = String::new();
1287 let mut leading_break = String::new();
1288 let mut trailing_breaks = String::new();
1289 let mut whitespaces = String::new();
1290 let mut leading_blanks;
1291
1292 /* Eat the left quote. */
1293 self.skip();
1294
1295 loop {
1296 /* Check for a document indicator. */
1297 self.lookahead(4);
1298
1299 if self.mark.col == 0
1300 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1301 || ((self.buffer[0] == '.')
1302 && (self.buffer[1] == '.')
1303 && (self.buffer[2] == '.')))
1304 && is_blankz(self.buffer[3])
1305 {
1306 return Err(ScanError::new(
1307 start_mark,
1308 "while scanning a quoted scalar, found unexpected document indicator",
1309 ));
1310 }
1311
1312 if is_z(self.ch()) {
1313 return Err(ScanError::new(
1314 start_mark,
1315 "while scanning a quoted scalar, found unexpected end of stream",
1316 ));
1317 }
1318
1319 self.lookahead(2);
1320
1321 leading_blanks = false;
1322 // Consume non-blank characters.
1323
1324 while !is_blankz(self.ch()) {
1325 match self.ch() {
1326 // Check for an escaped single quote.
1327 '\'' if self.buffer[1] == '\'' && single => {
1328 string.push('\'');
1329 self.skip();
1330 self.skip();
1331 }
1332 // Check for the right quote.
1333 '\'' if single => break,
1334 '"' if !single => break,
1335 // Check for an escaped line break.
1336 '\\' if !single && is_break(self.buffer[1]) => {
1337 self.lookahead(3);
1338 self.skip();
1339 self.skip_line();
1340 leading_blanks = true;
1341 break;
1342 }
1343 // Check for an escape sequence.
1344 '\\' if !single => {
1345 let mut code_length = 0usize;
1346 match self.buffer[1] {
1347 '0' => string.push('\0'),
1348 'a' => string.push('\x07'),
1349 'b' => string.push('\x08'),
1350 't' | '\t' => string.push('\t'),
1351 'n' => string.push('\n'),
1352 'v' => string.push('\x0b'),
1353 'f' => string.push('\x0c'),
1354 'r' => string.push('\x0d'),
1355 'e' => string.push('\x1b'),
1356 ' ' => string.push('\x20'),
1357 '"' => string.push('"'),
1358 '\'' => string.push('\''),
1359 '\\' => string.push('\\'),
1360 // NEL (#x85)
1361 'N' => string.push(char::from_u32(0x85).unwrap()),
1362 // #xA0
1363 '_' => string.push(char::from_u32(0xA0).unwrap()),
1364 // LS (#x2028)
1365 'L' => string.push(char::from_u32(0x2028).unwrap()),
1366 // PS (#x2029)
1367 'P' => string.push(char::from_u32(0x2029).unwrap()),
1368 'x' => code_length = 2,
1369 'u' => code_length = 4,
1370 'U' => code_length = 8,
1371 _ => {
1372 return Err(ScanError::new(
1373 start_mark,
1374 "while parsing a quoted scalar, found unknown escape character",
1375 ))
1376 }
1377 }
1378 self.skip();
1379 self.skip();
1380 // Consume an arbitrary escape code.
1381 if code_length > 0 {
1382 self.lookahead(code_length);
1383 let mut value = 0u32;
1384 for i in 0..code_length {
1385 if !is_hex(self.buffer[i]) {
1386 return Err(ScanError::new(start_mark,
1387 "while parsing a quoted scalar, did not find expected hexadecimal number"));
1388 }
1389 value = (value << 4) + as_hex(self.buffer[i]);
1390 }
1391
1392 let ch = match char::from_u32(value) {
1393 Some(v) => v,
1394 None => {
1395 return Err(ScanError::new(start_mark,
1396 "while parsing a quoted scalar, found invalid Unicode character escape code"));
1397 }
1398 };
1399 string.push(ch);
1400
1401 for _ in 0..code_length {
1402 self.skip();
1403 }
1404 }
1405 }
1406 c => {
1407 string.push(c);
1408 self.skip();
1409 }
1410 }
1411 self.lookahead(2);
1412 }
1413 self.lookahead(1);
1414 match self.ch() {
1415 '\'' if single => break,
1416 '"' if !single => break,
1417 _ => {}
1418 }
1419
1420 // Consume blank characters.
1421 while is_blank(self.ch()) || is_break(self.ch()) {
1422 if is_blank(self.ch()) {
1423 // Consume a space or a tab character.
1424 if leading_blanks {
1425 self.skip();
1426 } else {
1427 whitespaces.push(self.ch());
1428 self.skip();
1429 }
1430 } else {
1431 self.lookahead(2);
1432 // Check if it is a first line break.
1433 if leading_blanks {
1434 self.read_break(&mut trailing_breaks);
1435 } else {
1436 whitespaces.clear();
1437 self.read_break(&mut leading_break);
1438 leading_blanks = true;
1439 }
1440 }
1441 self.lookahead(1);
1442 }
1443 // Join the whitespaces or fold line breaks.
1444 if leading_blanks {
1445 if leading_break.is_empty() {
1446 string.push_str(&leading_break);
1447 string.push_str(&trailing_breaks);
1448 trailing_breaks.clear();
1449 leading_break.clear();
1450 } else {
1451 if trailing_breaks.is_empty() {
1452 string.push(' ');
1453 } else {
1454 string.push_str(&trailing_breaks);
1455 trailing_breaks.clear();
1456 }
1457 leading_break.clear();
1458 }
1459 } else {
1460 string.push_str(&whitespaces);
1461 whitespaces.clear();
1462 }
1463 } // loop
1464
1465 // Eat the right quote.
1466 self.skip();
1467
1468 if single {
1469 Ok(Token(
1470 start_mark,
1471 TokenType::Scalar(TScalarStyle::SingleQuoted, string),
1472 ))
1473 } else {
1474 Ok(Token(
1475 start_mark,
1476 TokenType::Scalar(TScalarStyle::DoubleQuoted, string),
1477 ))
1478 }
1479 }
1480
1481 fn fetch_plain_scalar(&mut self) -> ScanResult {
1482 self.save_simple_key()?;
1483 self.disallow_simple_key();
1484
1485 let tok = self.scan_plain_scalar()?;
1486
1487 self.tokens.push_back(tok);
1488 Ok(())
1489 }
1490
1491 fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> {
1492 let indent = self.indent + 1;
1493 let start_mark = self.mark;
1494
1495 let mut string = String::new();
1496 let mut leading_break = String::new();
1497 let mut trailing_breaks = String::new();
1498 let mut whitespaces = String::new();
1499 let mut leading_blanks = false;
1500
1501 loop {
1502 /* Check for a document indicator. */
1503 self.lookahead(4);
1504
1505 if self.mark.col == 0
1506 && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-'))
1507 || ((self.buffer[0] == '.')
1508 && (self.buffer[1] == '.')
1509 && (self.buffer[2] == '.')))
1510 && is_blankz(self.buffer[3])
1511 {
1512 break;
1513 }
1514
1515 if self.ch() == '#' {
1516 break;
1517 }
1518 while !is_blankz(self.ch()) {
1519 // indicators can end a plain scalar, see 7.3.3. Plain Style
1520 match self.ch() {
1521 ':' if is_blankz(self.buffer[1])
1522 || (self.flow_level > 0 && is_flow(self.buffer[1])) =>
1523 {
1524 break;
1525 }
1526 ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break,
1527 _ => {}
1528 }
1529
1530 if leading_blanks || !whitespaces.is_empty() {
1531 if leading_blanks {
1532 if leading_break.is_empty() {
1533 string.push_str(&leading_break);
1534 string.push_str(&trailing_breaks);
1535 trailing_breaks.clear();
1536 leading_break.clear();
1537 } else {
1538 if trailing_breaks.is_empty() {
1539 string.push(' ');
1540 } else {
1541 string.push_str(&trailing_breaks);
1542 trailing_breaks.clear();
1543 }
1544 leading_break.clear();
1545 }
1546 leading_blanks = false;
1547 } else {
1548 string.push_str(&whitespaces);
1549 whitespaces.clear();
1550 }
1551 }
1552
1553 string.push(self.ch());
1554 self.skip();
1555 self.lookahead(2);
1556 }
1557 // is the end?
1558 if !(is_blank(self.ch()) || is_break(self.ch())) {
1559 break;
1560 }
1561 self.lookahead(1);
1562
1563 while is_blank(self.ch()) || is_break(self.ch()) {
1564 if is_blank(self.ch()) {
1565 if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' {
1566 return Err(ScanError::new(
1567 start_mark,
1568 "while scanning a plain scalar, found a tab",
1569 ));
1570 }
1571
1572 if leading_blanks {
1573 self.skip();
1574 } else {
1575 whitespaces.push(self.ch());
1576 self.skip();
1577 }
1578 } else {
1579 self.lookahead(2);
1580 // Check if it is a first line break
1581 if leading_blanks {
1582 self.read_break(&mut trailing_breaks);
1583 } else {
1584 whitespaces.clear();
1585 self.read_break(&mut leading_break);
1586 leading_blanks = true;
1587 }
1588 }
1589 self.lookahead(1);
1590 }
1591
1592 // check indentation level
1593 if self.flow_level == 0 && (self.mark.col as isize) < indent {
1594 break;
1595 }
1596 }
1597
1598 if leading_blanks {
1599 self.allow_simple_key();
1600 }
1601
1602 Ok(Token(
1603 start_mark,
1604 TokenType::Scalar(TScalarStyle::Plain, string),
1605 ))
1606 }
1607
1608 fn fetch_key(&mut self) -> ScanResult {
1609 let start_mark = self.mark;
1610 if self.flow_level == 0 {
1611 // Check if we are allowed to start a new key (not necessarily simple).
1612 if !self.simple_key_allowed {
1613 return Err(ScanError::new(
1614 self.mark,
1615 "mapping keys are not allowed in this context",
1616 ));
1617 }
1618 self.roll_indent(
1619 start_mark.col,
1620 None,
1621 TokenType::BlockMappingStart,
1622 start_mark,
1623 );
1624 }
1625
1626 self.remove_simple_key()?;
1627
1628 if self.flow_level == 0 {
1629 self.allow_simple_key();
1630 } else {
1631 self.disallow_simple_key();
1632 }
1633
1634 self.skip();
1635 self.tokens.push_back(Token(start_mark, TokenType::Key));
1636 Ok(())
1637 }
1638
1639 fn fetch_value(&mut self) -> ScanResult {
1640 let sk = self.simple_keys.last().unwrap().clone();
1641 let start_mark = self.mark;
1642 if sk.possible {
1643 // insert simple key
1644 let tok = Token(sk.mark, TokenType::Key);
1645 let tokens_parsed = self.tokens_parsed;
1646 self.insert_token(sk.token_number - tokens_parsed, tok);
1647
1648 // Add the BLOCK-MAPPING-START token if needed.
1649 self.roll_indent(
1650 sk.mark.col,
1651 Some(sk.token_number),
1652 TokenType::BlockMappingStart,
1653 start_mark,
1654 );
1655
1656 self.simple_keys.last_mut().unwrap().possible = false;
1657 self.disallow_simple_key();
1658 } else {
1659 // The ':' indicator follows a complex key.
1660 if self.flow_level == 0 {
1661 if !self.simple_key_allowed {
1662 return Err(ScanError::new(
1663 start_mark,
1664 "mapping values are not allowed in this context",
1665 ));
1666 }
1667
1668 self.roll_indent(
1669 start_mark.col,
1670 None,
1671 TokenType::BlockMappingStart,
1672 start_mark,
1673 );
1674 }
1675
1676 if self.flow_level == 0 {
1677 self.allow_simple_key();
1678 } else {
1679 self.disallow_simple_key();
1680 }
1681 }
1682 self.skip();
1683 self.tokens.push_back(Token(start_mark, TokenType::Value));
1684
1685 Ok(())
1686 }
1687
1688 fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) {
1689 if self.flow_level > 0 {
1690 return;
1691 }
1692
1693 if self.indent < col as isize {
1694 self.indents.push(self.indent);
1695 self.indent = col as isize;
1696 let tokens_parsed = self.tokens_parsed;
1697 match number {
1698 Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)),
1699 None => self.tokens.push_back(Token(mark, tok)),
1700 }
1701 }
1702 }
1703
1704 fn unroll_indent(&mut self, col: isize) {
1705 if self.flow_level > 0 {
1706 return;
1707 }
1708 while self.indent > col {
1709 self.tokens.push_back(Token(self.mark, TokenType::BlockEnd));
1710 self.indent = self.indents.pop().unwrap();
1711 }
1712 }
1713
1714 fn save_simple_key(&mut self) -> Result<(), ScanError> {
1715 let required = self.flow_level > 0 && self.indent == (self.mark.col as isize);
1716 if self.simple_key_allowed {
1717 let mut sk = SimpleKey::new(self.mark);
1718 sk.possible = true;
1719 sk.required = required;
1720 sk.token_number = self.tokens_parsed + self.tokens.len();
1721
1722 self.remove_simple_key()?;
1723
1724 self.simple_keys.pop();
1725 self.simple_keys.push(sk);
1726 }
1727 Ok(())
1728 }
1729
1730 fn remove_simple_key(&mut self) -> ScanResult {
1731 let last = self.simple_keys.last_mut().unwrap();
1732 if last.possible && last.required {
1733 return Err(ScanError::new(self.mark, "simple key expected"));
1734 }
1735
1736 last.possible = false;
1737 Ok(())
1738 }
1739}
1740
1741#[cfg(test)]
1742mod test {
1743 use super::TokenType::*;
1744 use super::*;
1745
1746 macro_rules! next {
1747 ($p:ident, $tk:pat) => {{
1748 let tok = $p.next().unwrap();
1749 match tok.1 {
1750 $tk => {}
1751 _ => panic!("unexpected token: {:?}", tok),
1752 }
1753 }};
1754 }
1755
1756 macro_rules! next_scalar {
1757 ($p:ident, $tk:expr, $v:expr) => {{
1758 let tok = $p.next().unwrap();
1759 match tok.1 {
1760 Scalar(style, ref v) => {
1761 assert_eq!(style, $tk);
1762 assert_eq!(v, $v);
1763 }
1764 _ => panic!("unexpected token: {:?}", tok),
1765 }
1766 }};
1767 }
1768
1769 macro_rules! end {
1770 ($p:ident) => {{
1771 assert_eq!($p.next(), None);
1772 }};
1773 }
1774 /// test cases in libyaml scanner.c
1775 #[test]
1776 fn test_empty() {
1777 let s = "";
1778 let mut p = Scanner::new(s.chars());
1779 next!(p, StreamStart(..));
1780 next!(p, StreamEnd);
1781 end!(p);
1782 }
1783
1784 #[test]
1785 fn test_scalar() {
1786 let s = "a scalar";
1787 let mut p = Scanner::new(s.chars());
1788 next!(p, StreamStart(..));
1789 next!(p, Scalar(TScalarStyle::Plain, _));
1790 next!(p, StreamEnd);
1791 end!(p);
1792 }
1793
1794 #[test]
1795 fn test_explicit_scalar() {
1796 let s = "---
1797'a scalar'
1798...
1799";
1800 let mut p = Scanner::new(s.chars());
1801 next!(p, StreamStart(..));
1802 next!(p, DocumentStart);
1803 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1804 next!(p, DocumentEnd);
1805 next!(p, StreamEnd);
1806 end!(p);
1807 }
1808
1809 #[test]
1810 fn test_multiple_documents() {
1811 let s = "
1812'a scalar'
1813---
1814'a scalar'
1815---
1816'a scalar'
1817";
1818 let mut p = Scanner::new(s.chars());
1819 next!(p, StreamStart(..));
1820 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1821 next!(p, DocumentStart);
1822 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1823 next!(p, DocumentStart);
1824 next!(p, Scalar(TScalarStyle::SingleQuoted, _));
1825 next!(p, StreamEnd);
1826 end!(p);
1827 }
1828
1829 #[test]
1830 fn test_a_flow_sequence() {
1831 let s = "[item 1, item 2, item 3]";
1832 let mut p = Scanner::new(s.chars());
1833 next!(p, StreamStart(..));
1834 next!(p, FlowSequenceStart);
1835 next_scalar!(p, TScalarStyle::Plain, "item 1");
1836 next!(p, FlowEntry);
1837 next!(p, Scalar(TScalarStyle::Plain, _));
1838 next!(p, FlowEntry);
1839 next!(p, Scalar(TScalarStyle::Plain, _));
1840 next!(p, FlowSequenceEnd);
1841 next!(p, StreamEnd);
1842 end!(p);
1843 }
1844
1845 #[test]
1846 fn test_a_flow_mapping() {
1847 let s = "
1848{
1849 a simple key: a value, # Note that the KEY token is produced.
1850 ? a complex key: another value,
1851}
1852";
1853 let mut p = Scanner::new(s.chars());
1854 next!(p, StreamStart(..));
1855 next!(p, FlowMappingStart);
1856 next!(p, Key);
1857 next!(p, Scalar(TScalarStyle::Plain, _));
1858 next!(p, Value);
1859 next!(p, Scalar(TScalarStyle::Plain, _));
1860 next!(p, FlowEntry);
1861 next!(p, Key);
1862 next_scalar!(p, TScalarStyle::Plain, "a complex key");
1863 next!(p, Value);
1864 next!(p, Scalar(TScalarStyle::Plain, _));
1865 next!(p, FlowEntry);
1866 next!(p, FlowMappingEnd);
1867 next!(p, StreamEnd);
1868 end!(p);
1869 }
1870
1871 #[test]
1872 fn test_block_sequences() {
1873 let s = "
1874- item 1
1875- item 2
1876-
1877 - item 3.1
1878 - item 3.2
1879-
1880 key 1: value 1
1881 key 2: value 2
1882";
1883 let mut p = Scanner::new(s.chars());
1884 next!(p, StreamStart(..));
1885 next!(p, BlockSequenceStart);
1886 next!(p, BlockEntry);
1887 next_scalar!(p, TScalarStyle::Plain, "item 1");
1888 next!(p, BlockEntry);
1889 next_scalar!(p, TScalarStyle::Plain, "item 2");
1890 next!(p, BlockEntry);
1891 next!(p, BlockSequenceStart);
1892 next!(p, BlockEntry);
1893 next_scalar!(p, TScalarStyle::Plain, "item 3.1");
1894 next!(p, BlockEntry);
1895 next_scalar!(p, TScalarStyle::Plain, "item 3.2");
1896 next!(p, BlockEnd);
1897 next!(p, BlockEntry);
1898 next!(p, BlockMappingStart);
1899 next!(p, Key);
1900 next_scalar!(p, TScalarStyle::Plain, "key 1");
1901 next!(p, Value);
1902 next_scalar!(p, TScalarStyle::Plain, "value 1");
1903 next!(p, Key);
1904 next_scalar!(p, TScalarStyle::Plain, "key 2");
1905 next!(p, Value);
1906 next_scalar!(p, TScalarStyle::Plain, "value 2");
1907 next!(p, BlockEnd);
1908 next!(p, BlockEnd);
1909 next!(p, StreamEnd);
1910 end!(p);
1911 }
1912
1913 #[test]
1914 fn test_block_mappings() {
1915 let s = "
1916a simple key: a value # The KEY token is produced here.
1917? a complex key
1918: another value
1919a mapping:
1920 key 1: value 1
1921 key 2: value 2
1922a sequence:
1923 - item 1
1924 - item 2
1925";
1926 let mut p = Scanner::new(s.chars());
1927 next!(p, StreamStart(..));
1928 next!(p, BlockMappingStart);
1929 next!(p, Key);
1930 next!(p, Scalar(_, _));
1931 next!(p, Value);
1932 next!(p, Scalar(_, _));
1933 next!(p, Key);
1934 next!(p, Scalar(_, _));
1935 next!(p, Value);
1936 next!(p, Scalar(_, _));
1937 next!(p, Key);
1938 next!(p, Scalar(_, _));
1939 next!(p, Value); // libyaml comment seems to be wrong
1940 next!(p, BlockMappingStart);
1941 next!(p, Key);
1942 next!(p, Scalar(_, _));
1943 next!(p, Value);
1944 next!(p, Scalar(_, _));
1945 next!(p, Key);
1946 next!(p, Scalar(_, _));
1947 next!(p, Value);
1948 next!(p, Scalar(_, _));
1949 next!(p, BlockEnd);
1950 next!(p, Key);
1951 next!(p, Scalar(_, _));
1952 next!(p, Value);
1953 next!(p, BlockSequenceStart);
1954 next!(p, BlockEntry);
1955 next!(p, Scalar(_, _));
1956 next!(p, BlockEntry);
1957 next!(p, Scalar(_, _));
1958 next!(p, BlockEnd);
1959 next!(p, BlockEnd);
1960 next!(p, StreamEnd);
1961 end!(p);
1962 }
1963
1964 #[test]
1965 fn test_no_block_sequence_start() {
1966 let s = "
1967key:
1968- item 1
1969- item 2
1970";
1971 let mut p = Scanner::new(s.chars());
1972 next!(p, StreamStart(..));
1973 next!(p, BlockMappingStart);
1974 next!(p, Key);
1975 next_scalar!(p, TScalarStyle::Plain, "key");
1976 next!(p, Value);
1977 next!(p, BlockEntry);
1978 next_scalar!(p, TScalarStyle::Plain, "item 1");
1979 next!(p, BlockEntry);
1980 next_scalar!(p, TScalarStyle::Plain, "item 2");
1981 next!(p, BlockEnd);
1982 next!(p, StreamEnd);
1983 end!(p);
1984 }
1985
1986 #[test]
1987 fn test_collections_in_sequence() {
1988 let s = "
1989- - item 1
1990 - item 2
1991- key 1: value 1
1992 key 2: value 2
1993- ? complex key
1994 : complex value
1995";
1996 let mut p = Scanner::new(s.chars());
1997 next!(p, StreamStart(..));
1998 next!(p, BlockSequenceStart);
1999 next!(p, BlockEntry);
2000 next!(p, BlockSequenceStart);
2001 next!(p, BlockEntry);
2002 next_scalar!(p, TScalarStyle::Plain, "item 1");
2003 next!(p, BlockEntry);
2004 next_scalar!(p, TScalarStyle::Plain, "item 2");
2005 next!(p, BlockEnd);
2006 next!(p, BlockEntry);
2007 next!(p, BlockMappingStart);
2008 next!(p, Key);
2009 next_scalar!(p, TScalarStyle::Plain, "key 1");
2010 next!(p, Value);
2011 next_scalar!(p, TScalarStyle::Plain, "value 1");
2012 next!(p, Key);
2013 next_scalar!(p, TScalarStyle::Plain, "key 2");
2014 next!(p, Value);
2015 next_scalar!(p, TScalarStyle::Plain, "value 2");
2016 next!(p, BlockEnd);
2017 next!(p, BlockEntry);
2018 next!(p, BlockMappingStart);
2019 next!(p, Key);
2020 next_scalar!(p, TScalarStyle::Plain, "complex key");
2021 next!(p, Value);
2022 next_scalar!(p, TScalarStyle::Plain, "complex value");
2023 next!(p, BlockEnd);
2024 next!(p, BlockEnd);
2025 next!(p, StreamEnd);
2026 end!(p);
2027 }
2028
2029 #[test]
2030 fn test_collections_in_mapping() {
2031 let s = "
2032? a sequence
2033: - item 1
2034 - item 2
2035? a mapping
2036: key 1: value 1
2037 key 2: value 2
2038";
2039 let mut p = Scanner::new(s.chars());
2040 next!(p, StreamStart(..));
2041 next!(p, BlockMappingStart);
2042 next!(p, Key);
2043 next_scalar!(p, TScalarStyle::Plain, "a sequence");
2044 next!(p, Value);
2045 next!(p, BlockSequenceStart);
2046 next!(p, BlockEntry);
2047 next_scalar!(p, TScalarStyle::Plain, "item 1");
2048 next!(p, BlockEntry);
2049 next_scalar!(p, TScalarStyle::Plain, "item 2");
2050 next!(p, BlockEnd);
2051 next!(p, Key);
2052 next_scalar!(p, TScalarStyle::Plain, "a mapping");
2053 next!(p, Value);
2054 next!(p, BlockMappingStart);
2055 next!(p, Key);
2056 next_scalar!(p, TScalarStyle::Plain, "key 1");
2057 next!(p, Value);
2058 next_scalar!(p, TScalarStyle::Plain, "value 1");
2059 next!(p, Key);
2060 next_scalar!(p, TScalarStyle::Plain, "key 2");
2061 next!(p, Value);
2062 next_scalar!(p, TScalarStyle::Plain, "value 2");
2063 next!(p, BlockEnd);
2064 next!(p, BlockEnd);
2065 next!(p, StreamEnd);
2066 end!(p);
2067 }
2068
2069 #[test]
2070 fn test_spec_ex7_3() {
2071 let s = "
2072{
2073 ? foo :,
2074 : bar,
2075}
2076";
2077 let mut p = Scanner::new(s.chars());
2078 next!(p, StreamStart(..));
2079 next!(p, FlowMappingStart);
2080 next!(p, Key);
2081 next_scalar!(p, TScalarStyle::Plain, "foo");
2082 next!(p, Value);
2083 next!(p, FlowEntry);
2084 next!(p, Value);
2085 next_scalar!(p, TScalarStyle::Plain, "bar");
2086 next!(p, FlowEntry);
2087 next!(p, FlowMappingEnd);
2088 next!(p, StreamEnd);
2089 end!(p);
2090 }
2091
2092 #[test]
2093 fn test_plain_scalar_starting_with_indicators_in_flow() {
2094 // "Plain scalars must not begin with most indicators, as this would cause ambiguity with
2095 // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first
2096 // character if followed by a non-space “safe” character, as this causes no ambiguity."
2097
2098 let s = "{a: :b}";
2099 let mut p = Scanner::new(s.chars());
2100 next!(p, StreamStart(..));
2101 next!(p, FlowMappingStart);
2102 next!(p, Key);
2103 next_scalar!(p, TScalarStyle::Plain, "a");
2104 next!(p, Value);
2105 next_scalar!(p, TScalarStyle::Plain, ":b");
2106 next!(p, FlowMappingEnd);
2107 next!(p, StreamEnd);
2108 end!(p);
2109
2110 let s = "{a: ?b}";
2111 let mut p = Scanner::new(s.chars());
2112 next!(p, StreamStart(..));
2113 next!(p, FlowMappingStart);
2114 next!(p, Key);
2115 next_scalar!(p, TScalarStyle::Plain, "a");
2116 next!(p, Value);
2117 next_scalar!(p, TScalarStyle::Plain, "?b");
2118 next!(p, FlowMappingEnd);
2119 next!(p, StreamEnd);
2120 end!(p);
2121 }
2122
2123 #[test]
2124 fn test_plain_scalar_starting_with_indicators_in_block() {
2125 let s = ":a";
2126 let mut p = Scanner::new(s.chars());
2127 next!(p, StreamStart(..));
2128 next_scalar!(p, TScalarStyle::Plain, ":a");
2129 next!(p, StreamEnd);
2130 end!(p);
2131
2132 let s = "?a";
2133 let mut p = Scanner::new(s.chars());
2134 next!(p, StreamStart(..));
2135 next_scalar!(p, TScalarStyle::Plain, "?a");
2136 next!(p, StreamEnd);
2137 end!(p);
2138 }
2139
2140 #[test]
2141 fn test_plain_scalar_containing_indicators_in_block() {
2142 let s = "a:,b";
2143 let mut p = Scanner::new(s.chars());
2144 next!(p, StreamStart(..));
2145 next_scalar!(p, TScalarStyle::Plain, "a:,b");
2146 next!(p, StreamEnd);
2147 end!(p);
2148
2149 let s = ":,b";
2150 let mut p = Scanner::new(s.chars());
2151 next!(p, StreamStart(..));
2152 next_scalar!(p, TScalarStyle::Plain, ":,b");
2153 next!(p, StreamEnd);
2154 end!(p);
2155 }
2156
2157 #[test]
2158 fn test_scanner_cr() {
2159 let s = "---\r\n- tok1\r\n- tok2";
2160 let mut p = Scanner::new(s.chars());
2161 next!(p, StreamStart(..));
2162 next!(p, DocumentStart);
2163 next!(p, BlockSequenceStart);
2164 next!(p, BlockEntry);
2165 next_scalar!(p, TScalarStyle::Plain, "tok1");
2166 next!(p, BlockEntry);
2167 next_scalar!(p, TScalarStyle::Plain, "tok2");
2168 next!(p, BlockEnd);
2169 next!(p, StreamEnd);
2170 end!(p);
2171 }
2172
2173 #[test]
2174 fn test_uri() {
2175 // TODO
2176 }
2177
2178 #[test]
2179 fn test_uri_escapes() {
2180 // TODO
2181 }
2182}
2183