1 | use std::collections::VecDeque; |
2 | use std::error::Error; |
3 | use std::{char, fmt}; |
4 | |
5 | #[derive (Clone, Copy, PartialEq, Debug, Eq)] |
6 | pub enum TEncoding { |
7 | Utf8, |
8 | } |
9 | |
10 | #[derive (Clone, Copy, PartialEq, Debug, Eq)] |
11 | pub enum TScalarStyle { |
12 | Any, |
13 | Plain, |
14 | SingleQuoted, |
15 | DoubleQuoted, |
16 | |
17 | Literal, |
18 | Foled, |
19 | } |
20 | |
21 | #[derive (Clone, Copy, PartialEq, Debug, Eq)] |
22 | pub struct Marker { |
23 | index: usize, |
24 | line: usize, |
25 | col: usize, |
26 | } |
27 | |
28 | impl Marker { |
29 | fn new(index: usize, line: usize, col: usize) -> Marker { |
30 | Marker { index, line, col } |
31 | } |
32 | |
33 | pub fn index(&self) -> usize { |
34 | self.index |
35 | } |
36 | |
37 | pub fn line(&self) -> usize { |
38 | self.line |
39 | } |
40 | |
41 | pub fn col(&self) -> usize { |
42 | self.col |
43 | } |
44 | } |
45 | |
46 | #[derive (Clone, PartialEq, Debug, Eq)] |
47 | pub struct ScanError { |
48 | mark: Marker, |
49 | info: String, |
50 | } |
51 | |
52 | impl ScanError { |
53 | pub fn new(loc: Marker, info: &str) -> ScanError { |
54 | ScanError { |
55 | mark: loc, |
56 | info: info.to_owned(), |
57 | } |
58 | } |
59 | |
60 | pub fn marker(&self) -> &Marker { |
61 | &self.mark |
62 | } |
63 | } |
64 | |
65 | impl Error for ScanError { |
66 | fn description(&self) -> &str { |
67 | self.info.as_ref() |
68 | } |
69 | |
70 | fn cause(&self) -> Option<&dyn Error> { |
71 | None |
72 | } |
73 | } |
74 | |
75 | impl fmt::Display for ScanError { |
76 | // col starts from 0 |
77 | fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
78 | write!( |
79 | formatter, |
80 | " {} at line {} column {}" , |
81 | self.info, |
82 | self.mark.line, |
83 | self.mark.col + 1 |
84 | ) |
85 | } |
86 | } |
87 | |
88 | #[derive (Clone, PartialEq, Debug, Eq)] |
89 | pub enum TokenType { |
90 | NoToken, |
91 | StreamStart(TEncoding), |
92 | StreamEnd, |
93 | /// major, minor |
94 | VersionDirective(u32, u32), |
95 | /// handle, prefix |
96 | TagDirective(String, String), |
97 | DocumentStart, |
98 | DocumentEnd, |
99 | BlockSequenceStart, |
100 | BlockMappingStart, |
101 | BlockEnd, |
102 | FlowSequenceStart, |
103 | FlowSequenceEnd, |
104 | FlowMappingStart, |
105 | FlowMappingEnd, |
106 | BlockEntry, |
107 | FlowEntry, |
108 | Key, |
109 | Value, |
110 | Alias(String), |
111 | Anchor(String), |
112 | /// handle, suffix |
113 | Tag(String, String), |
114 | Scalar(TScalarStyle, String), |
115 | } |
116 | |
117 | #[derive (Clone, PartialEq, Debug, Eq)] |
118 | pub struct Token(pub Marker, pub TokenType); |
119 | |
120 | #[derive (Clone, PartialEq, Debug, Eq)] |
121 | struct SimpleKey { |
122 | possible: bool, |
123 | required: bool, |
124 | token_number: usize, |
125 | mark: Marker, |
126 | } |
127 | |
128 | impl SimpleKey { |
129 | fn new(mark: Marker) -> SimpleKey { |
130 | SimpleKey { |
131 | possible: false, |
132 | required: false, |
133 | token_number: 0, |
134 | mark, |
135 | } |
136 | } |
137 | } |
138 | |
139 | #[derive (Debug)] |
140 | pub struct Scanner<T> { |
141 | rdr: T, |
142 | mark: Marker, |
143 | tokens: VecDeque<Token>, |
144 | buffer: VecDeque<char>, |
145 | error: Option<ScanError>, |
146 | |
147 | stream_start_produced: bool, |
148 | stream_end_produced: bool, |
149 | adjacent_value_allowed_at: usize, |
150 | simple_key_allowed: bool, |
151 | simple_keys: Vec<SimpleKey>, |
152 | indent: isize, |
153 | indents: Vec<isize>, |
154 | flow_level: u8, |
155 | tokens_parsed: usize, |
156 | token_available: bool, |
157 | } |
158 | |
159 | impl<T: Iterator<Item = char>> Iterator for Scanner<T> { |
160 | type Item = Token; |
161 | fn next(&mut self) -> Option<Token> { |
162 | if self.error.is_some() { |
163 | return None; |
164 | } |
165 | match self.next_token() { |
166 | Ok(tok: Option) => tok, |
167 | Err(e: ScanError) => { |
168 | self.error = Some(e); |
169 | None |
170 | } |
171 | } |
172 | } |
173 | } |
174 | |
175 | #[inline ] |
176 | fn is_z(c: char) -> bool { |
177 | c == ' \0' |
178 | } |
179 | #[inline ] |
180 | fn is_break(c: char) -> bool { |
181 | c == ' \n' || c == ' \r' |
182 | } |
183 | #[inline ] |
184 | fn is_breakz(c: char) -> bool { |
185 | is_break(c) || is_z(c) |
186 | } |
187 | #[inline ] |
188 | fn is_blank(c: char) -> bool { |
189 | c == ' ' || c == ' \t' |
190 | } |
191 | #[inline ] |
192 | fn is_blankz(c: char) -> bool { |
193 | is_blank(c) || is_breakz(c) |
194 | } |
195 | #[inline ] |
196 | fn is_digit(c: char) -> bool { |
197 | c >= '0' && c <= '9' |
198 | } |
199 | #[inline ] |
200 | fn is_alpha(c: char) -> bool { |
201 | match c { |
202 | '0' ..='9' | 'a' ..='z' | 'A' ..='Z' => true, |
203 | '_' | '-' => true, |
204 | _ => false, |
205 | } |
206 | } |
207 | #[inline ] |
208 | fn is_hex(c: char) -> bool { |
209 | (c >= '0' && c <= '9' ) || (c >= 'a' && c <= 'f' ) || (c >= 'A' && c <= 'F' ) |
210 | } |
211 | #[inline ] |
212 | fn as_hex(c: char) -> u32 { |
213 | match c { |
214 | '0' ..='9' => (c as u32) - ('0' as u32), |
215 | 'a' ..='f' => (c as u32) - ('a' as u32) + 10, |
216 | 'A' ..='F' => (c as u32) - ('A' as u32) + 10, |
217 | _ => unreachable!(), |
218 | } |
219 | } |
220 | #[inline ] |
221 | fn is_flow(c: char) -> bool { |
222 | match c { |
223 | ',' | '[' | ']' | '{' | '}' => true, |
224 | _ => false, |
225 | } |
226 | } |
227 | |
228 | pub type ScanResult = Result<(), ScanError>; |
229 | |
230 | impl<T: Iterator<Item = char>> Scanner<T> { |
231 | /// Creates the YAML tokenizer. |
232 | pub fn new(rdr: T) -> Scanner<T> { |
233 | Scanner { |
234 | rdr, |
235 | buffer: VecDeque::new(), |
236 | mark: Marker::new(0, 1, 0), |
237 | tokens: VecDeque::new(), |
238 | error: None, |
239 | |
240 | stream_start_produced: false, |
241 | stream_end_produced: false, |
242 | adjacent_value_allowed_at: 0, |
243 | simple_key_allowed: true, |
244 | simple_keys: Vec::new(), |
245 | indent: -1, |
246 | indents: Vec::new(), |
247 | flow_level: 0, |
248 | tokens_parsed: 0, |
249 | token_available: false, |
250 | } |
251 | } |
252 | #[inline ] |
253 | pub fn get_error(&self) -> Option<ScanError> { |
254 | match self.error { |
255 | None => None, |
256 | Some(ref e) => Some(e.clone()), |
257 | } |
258 | } |
259 | |
260 | #[inline ] |
261 | fn lookahead(&mut self, count: usize) { |
262 | if self.buffer.len() >= count { |
263 | return; |
264 | } |
265 | for _ in 0..(count - self.buffer.len()) { |
266 | self.buffer.push_back(self.rdr.next().unwrap_or(' \0' )); |
267 | } |
268 | } |
269 | #[inline ] |
270 | fn skip(&mut self) { |
271 | let c = self.buffer.pop_front().unwrap(); |
272 | |
273 | self.mark.index += 1; |
274 | if c == ' \n' { |
275 | self.mark.line += 1; |
276 | self.mark.col = 0; |
277 | } else { |
278 | self.mark.col += 1; |
279 | } |
280 | } |
281 | #[inline ] |
282 | fn skip_line(&mut self) { |
283 | if self.buffer[0] == ' \r' && self.buffer[1] == ' \n' { |
284 | self.skip(); |
285 | self.skip(); |
286 | } else if is_break(self.buffer[0]) { |
287 | self.skip(); |
288 | } |
289 | } |
290 | #[inline ] |
291 | fn ch(&self) -> char { |
292 | self.buffer[0] |
293 | } |
294 | #[inline ] |
295 | fn ch_is(&self, c: char) -> bool { |
296 | self.buffer[0] == c |
297 | } |
298 | #[allow (dead_code)] |
299 | #[inline ] |
300 | fn eof(&self) -> bool { |
301 | self.ch_is(' \0' ) |
302 | } |
303 | #[inline ] |
304 | pub fn stream_started(&self) -> bool { |
305 | self.stream_start_produced |
306 | } |
307 | #[inline ] |
308 | pub fn stream_ended(&self) -> bool { |
309 | self.stream_end_produced |
310 | } |
311 | #[inline ] |
312 | pub fn mark(&self) -> Marker { |
313 | self.mark |
314 | } |
315 | #[inline ] |
316 | fn read_break(&mut self, s: &mut String) { |
317 | if self.buffer[0] == ' \r' && self.buffer[1] == ' \n' { |
318 | s.push(' \n' ); |
319 | self.skip(); |
320 | self.skip(); |
321 | } else if self.buffer[0] == ' \r' || self.buffer[0] == ' \n' { |
322 | s.push(' \n' ); |
323 | self.skip(); |
324 | } else { |
325 | unreachable!(); |
326 | } |
327 | } |
328 | fn insert_token(&mut self, pos: usize, tok: Token) { |
329 | let old_len = self.tokens.len(); |
330 | assert!(pos <= old_len); |
331 | self.tokens.push_back(tok); |
332 | for i in 0..old_len - pos { |
333 | self.tokens.swap(old_len - i, old_len - i - 1); |
334 | } |
335 | } |
336 | fn allow_simple_key(&mut self) { |
337 | self.simple_key_allowed = true; |
338 | } |
339 | fn disallow_simple_key(&mut self) { |
340 | self.simple_key_allowed = false; |
341 | } |
342 | |
343 | pub fn fetch_next_token(&mut self) -> ScanResult { |
344 | self.lookahead(1); |
345 | // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); |
346 | |
347 | if !self.stream_start_produced { |
348 | self.fetch_stream_start(); |
349 | return Ok(()); |
350 | } |
351 | self.skip_to_next_token(); |
352 | |
353 | self.stale_simple_keys()?; |
354 | |
355 | let mark = self.mark; |
356 | self.unroll_indent(mark.col as isize); |
357 | |
358 | self.lookahead(4); |
359 | |
360 | if is_z(self.ch()) { |
361 | self.fetch_stream_end()?; |
362 | return Ok(()); |
363 | } |
364 | |
365 | // Is it a directive? |
366 | if self.mark.col == 0 && self.ch_is('%' ) { |
367 | return self.fetch_directive(); |
368 | } |
369 | |
370 | if self.mark.col == 0 |
371 | && self.buffer[0] == '-' |
372 | && self.buffer[1] == '-' |
373 | && self.buffer[2] == '-' |
374 | && is_blankz(self.buffer[3]) |
375 | { |
376 | self.fetch_document_indicator(TokenType::DocumentStart)?; |
377 | return Ok(()); |
378 | } |
379 | |
380 | if self.mark.col == 0 |
381 | && self.buffer[0] == '.' |
382 | && self.buffer[1] == '.' |
383 | && self.buffer[2] == '.' |
384 | && is_blankz(self.buffer[3]) |
385 | { |
386 | self.fetch_document_indicator(TokenType::DocumentEnd)?; |
387 | return Ok(()); |
388 | } |
389 | |
390 | let c = self.buffer[0]; |
391 | let nc = self.buffer[1]; |
392 | match c { |
393 | '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart), |
394 | '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart), |
395 | ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd), |
396 | '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), |
397 | ',' => self.fetch_flow_entry(), |
398 | '-' if is_blankz(nc) => self.fetch_block_entry(), |
399 | '?' if is_blankz(nc) => self.fetch_key(), |
400 | ':' if is_blankz(nc) |
401 | || (self.flow_level > 0 |
402 | && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => |
403 | { |
404 | self.fetch_value() |
405 | } |
406 | // Is it an alias? |
407 | '*' => self.fetch_anchor(true), |
408 | // Is it an anchor? |
409 | '&' => self.fetch_anchor(false), |
410 | '!' => self.fetch_tag(), |
411 | // Is it a literal scalar? |
412 | '|' if self.flow_level == 0 => self.fetch_block_scalar(true), |
413 | // Is it a folded scalar? |
414 | '>' if self.flow_level == 0 => self.fetch_block_scalar(false), |
415 | ' \'' => self.fetch_flow_scalar(true), |
416 | '"' => self.fetch_flow_scalar(false), |
417 | // plain scalar |
418 | '-' if !is_blankz(nc) => self.fetch_plain_scalar(), |
419 | ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), |
420 | '%' | '@' | '`' => Err(ScanError::new( |
421 | self.mark, |
422 | &format!("unexpected character: ` {}'" , c), |
423 | )), |
424 | _ => self.fetch_plain_scalar(), |
425 | } |
426 | } |
427 | |
428 | pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> { |
429 | if self.stream_end_produced { |
430 | return Ok(None); |
431 | } |
432 | |
433 | if !self.token_available { |
434 | self.fetch_more_tokens()?; |
435 | } |
436 | let t = self.tokens.pop_front().unwrap(); |
437 | self.token_available = false; |
438 | self.tokens_parsed += 1; |
439 | |
440 | if let TokenType::StreamEnd = t.1 { |
441 | self.stream_end_produced = true; |
442 | } |
443 | Ok(Some(t)) |
444 | } |
445 | |
446 | pub fn fetch_more_tokens(&mut self) -> ScanResult { |
447 | let mut need_more; |
448 | loop { |
449 | need_more = false; |
450 | if self.tokens.is_empty() { |
451 | need_more = true; |
452 | } else { |
453 | self.stale_simple_keys()?; |
454 | for sk in &self.simple_keys { |
455 | if sk.possible && sk.token_number == self.tokens_parsed { |
456 | need_more = true; |
457 | break; |
458 | } |
459 | } |
460 | } |
461 | |
462 | if !need_more { |
463 | break; |
464 | } |
465 | self.fetch_next_token()?; |
466 | } |
467 | self.token_available = true; |
468 | |
469 | Ok(()) |
470 | } |
471 | |
472 | fn stale_simple_keys(&mut self) -> ScanResult { |
473 | for sk in &mut self.simple_keys { |
474 | if sk.possible |
475 | && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) |
476 | { |
477 | if sk.required { |
478 | return Err(ScanError::new(self.mark, "simple key expect ':'" )); |
479 | } |
480 | sk.possible = false; |
481 | } |
482 | } |
483 | Ok(()) |
484 | } |
485 | |
486 | fn skip_to_next_token(&mut self) { |
487 | loop { |
488 | self.lookahead(1); |
489 | // TODO(chenyh) BOM |
490 | match self.ch() { |
491 | ' ' => self.skip(), |
492 | ' \t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), |
493 | ' \n' | ' \r' => { |
494 | self.lookahead(2); |
495 | self.skip_line(); |
496 | if self.flow_level == 0 { |
497 | self.allow_simple_key(); |
498 | } |
499 | } |
500 | '#' => { |
501 | while !is_breakz(self.ch()) { |
502 | self.skip(); |
503 | self.lookahead(1); |
504 | } |
505 | } |
506 | _ => break, |
507 | } |
508 | } |
509 | } |
510 | |
511 | fn fetch_stream_start(&mut self) { |
512 | let mark = self.mark; |
513 | self.indent = -1; |
514 | self.stream_start_produced = true; |
515 | self.allow_simple_key(); |
516 | self.tokens |
517 | .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); |
518 | self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); |
519 | } |
520 | |
521 | fn fetch_stream_end(&mut self) -> ScanResult { |
522 | // force new line |
523 | if self.mark.col != 0 { |
524 | self.mark.col = 0; |
525 | self.mark.line += 1; |
526 | } |
527 | |
528 | self.unroll_indent(-1); |
529 | self.remove_simple_key()?; |
530 | self.disallow_simple_key(); |
531 | |
532 | self.tokens |
533 | .push_back(Token(self.mark, TokenType::StreamEnd)); |
534 | Ok(()) |
535 | } |
536 | |
537 | fn fetch_directive(&mut self) -> ScanResult { |
538 | self.unroll_indent(-1); |
539 | self.remove_simple_key()?; |
540 | |
541 | self.disallow_simple_key(); |
542 | |
543 | let tok = self.scan_directive()?; |
544 | |
545 | self.tokens.push_back(tok); |
546 | |
547 | Ok(()) |
548 | } |
549 | |
550 | fn scan_directive(&mut self) -> Result<Token, ScanError> { |
551 | let start_mark = self.mark; |
552 | self.skip(); |
553 | |
554 | let name = self.scan_directive_name()?; |
555 | let tok = match name.as_ref() { |
556 | "YAML" => self.scan_version_directive_value(&start_mark)?, |
557 | "TAG" => self.scan_tag_directive_value(&start_mark)?, |
558 | // XXX This should be a warning instead of an error |
559 | _ => { |
560 | // skip current line |
561 | self.lookahead(1); |
562 | while !is_breakz(self.ch()) { |
563 | self.skip(); |
564 | self.lookahead(1); |
565 | } |
566 | // XXX return an empty TagDirective token |
567 | Token( |
568 | start_mark, |
569 | TokenType::TagDirective(String::new(), String::new()), |
570 | ) |
571 | // return Err(ScanError::new(start_mark, |
572 | // "while scanning a directive, found unknown directive name")) |
573 | } |
574 | }; |
575 | self.lookahead(1); |
576 | |
577 | while is_blank(self.ch()) { |
578 | self.skip(); |
579 | self.lookahead(1); |
580 | } |
581 | |
582 | if self.ch() == '#' { |
583 | while !is_breakz(self.ch()) { |
584 | self.skip(); |
585 | self.lookahead(1); |
586 | } |
587 | } |
588 | |
589 | if !is_breakz(self.ch()) { |
590 | return Err(ScanError::new( |
591 | start_mark, |
592 | "while scanning a directive, did not find expected comment or line break" , |
593 | )); |
594 | } |
595 | |
596 | // Eat a line break |
597 | if is_break(self.ch()) { |
598 | self.lookahead(2); |
599 | self.skip_line(); |
600 | } |
601 | |
602 | Ok(tok) |
603 | } |
604 | |
605 | fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> { |
606 | self.lookahead(1); |
607 | |
608 | while is_blank(self.ch()) { |
609 | self.skip(); |
610 | self.lookahead(1); |
611 | } |
612 | |
613 | let major = self.scan_version_directive_number(mark)?; |
614 | |
615 | if self.ch() != '.' { |
616 | return Err(ScanError::new( |
617 | *mark, |
618 | "while scanning a YAML directive, did not find expected digit or '.' character" , |
619 | )); |
620 | } |
621 | |
622 | self.skip(); |
623 | |
624 | let minor = self.scan_version_directive_number(mark)?; |
625 | |
626 | Ok(Token(*mark, TokenType::VersionDirective(major, minor))) |
627 | } |
628 | |
629 | fn scan_directive_name(&mut self) -> Result<String, ScanError> { |
630 | let start_mark = self.mark; |
631 | let mut string = String::new(); |
632 | self.lookahead(1); |
633 | while is_alpha(self.ch()) { |
634 | string.push(self.ch()); |
635 | self.skip(); |
636 | self.lookahead(1); |
637 | } |
638 | |
639 | if string.is_empty() { |
640 | return Err(ScanError::new( |
641 | start_mark, |
642 | "while scanning a directive, could not find expected directive name" , |
643 | )); |
644 | } |
645 | |
646 | if !is_blankz(self.ch()) { |
647 | return Err(ScanError::new( |
648 | start_mark, |
649 | "while scanning a directive, found unexpected non-alphabetical character" , |
650 | )); |
651 | } |
652 | |
653 | Ok(string) |
654 | } |
655 | |
656 | fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> { |
657 | let mut val = 0u32; |
658 | let mut length = 0usize; |
659 | self.lookahead(1); |
660 | while is_digit(self.ch()) { |
661 | if length + 1 > 9 { |
662 | return Err(ScanError::new( |
663 | *mark, |
664 | "while scanning a YAML directive, found extremely long version number" , |
665 | )); |
666 | } |
667 | length += 1; |
668 | val = val * 10 + ((self.ch() as u32) - ('0' as u32)); |
669 | self.skip(); |
670 | self.lookahead(1); |
671 | } |
672 | |
673 | if length == 0 { |
674 | return Err(ScanError::new( |
675 | *mark, |
676 | "while scanning a YAML directive, did not find expected version number" , |
677 | )); |
678 | } |
679 | |
680 | Ok(val) |
681 | } |
682 | |
683 | fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> { |
684 | self.lookahead(1); |
685 | /* Eat whitespaces. */ |
686 | while is_blank(self.ch()) { |
687 | self.skip(); |
688 | self.lookahead(1); |
689 | } |
690 | let handle = self.scan_tag_handle(true, mark)?; |
691 | |
692 | self.lookahead(1); |
693 | /* Eat whitespaces. */ |
694 | while is_blank(self.ch()) { |
695 | self.skip(); |
696 | self.lookahead(1); |
697 | } |
698 | |
699 | let is_secondary = handle == "!!" ; |
700 | let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?; |
701 | |
702 | self.lookahead(1); |
703 | |
704 | if is_blankz(self.ch()) { |
705 | Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) |
706 | } else { |
707 | Err(ScanError::new( |
708 | *mark, |
709 | "while scanning TAG, did not find expected whitespace or line break" , |
710 | )) |
711 | } |
712 | } |
713 | |
714 | fn fetch_tag(&mut self) -> ScanResult { |
715 | self.save_simple_key()?; |
716 | self.disallow_simple_key(); |
717 | |
718 | let tok = self.scan_tag()?; |
719 | self.tokens.push_back(tok); |
720 | Ok(()) |
721 | } |
722 | |
723 | fn scan_tag(&mut self) -> Result<Token, ScanError> { |
724 | let start_mark = self.mark; |
725 | let mut handle = String::new(); |
726 | let mut suffix; |
727 | let mut secondary = false; |
728 | |
729 | // Check if the tag is in the canonical form (verbatim). |
730 | self.lookahead(2); |
731 | |
732 | if self.buffer[1] == '<' { |
733 | // Eat '!<' |
734 | self.skip(); |
735 | self.skip(); |
736 | suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?; |
737 | |
738 | if self.ch() != '>' { |
739 | return Err(ScanError::new( |
740 | start_mark, |
741 | "while scanning a tag, did not find the expected '>'" , |
742 | )); |
743 | } |
744 | |
745 | self.skip(); |
746 | } else { |
747 | // The tag has either the '!suffix' or the '!handle!suffix' |
748 | handle = self.scan_tag_handle(false, &start_mark)?; |
749 | // Check if it is, indeed, handle. |
750 | if handle.len() >= 2 && handle.starts_with('!' ) && handle.ends_with('!' ) { |
751 | if handle == "!!" { |
752 | secondary = true; |
753 | } |
754 | suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?; |
755 | } else { |
756 | suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; |
757 | handle = "!" .to_owned(); |
758 | // A special case: the '!' tag. Set the handle to '' and the |
759 | // suffix to '!'. |
760 | if suffix.is_empty() { |
761 | handle.clear(); |
762 | suffix = "!" .to_owned(); |
763 | } |
764 | } |
765 | } |
766 | |
767 | self.lookahead(1); |
768 | if is_blankz(self.ch()) { |
769 | // XXX: ex 7.2, an empty scalar can follow a secondary tag |
770 | Ok(Token(start_mark, TokenType::Tag(handle, suffix))) |
771 | } else { |
772 | Err(ScanError::new( |
773 | start_mark, |
774 | "while scanning a tag, did not find expected whitespace or line break" , |
775 | )) |
776 | } |
777 | } |
778 | |
779 | fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> { |
780 | let mut string = String::new(); |
781 | self.lookahead(1); |
782 | if self.ch() != '!' { |
783 | return Err(ScanError::new( |
784 | *mark, |
785 | "while scanning a tag, did not find expected '!'" , |
786 | )); |
787 | } |
788 | |
789 | string.push(self.ch()); |
790 | self.skip(); |
791 | |
792 | self.lookahead(1); |
793 | while is_alpha(self.ch()) { |
794 | string.push(self.ch()); |
795 | self.skip(); |
796 | self.lookahead(1); |
797 | } |
798 | |
799 | // Check if the trailing character is '!' and copy it. |
800 | if self.ch() == '!' { |
801 | string.push(self.ch()); |
802 | self.skip(); |
803 | } else if directive && string != "!" { |
804 | // It's either the '!' tag or not really a tag handle. If it's a %TAG |
805 | // directive, it's an error. If it's a tag token, it must be a part of |
806 | // URI. |
807 | return Err(ScanError::new( |
808 | *mark, |
809 | "while parsing a tag directive, did not find expected '!'" , |
810 | )); |
811 | } |
812 | Ok(string) |
813 | } |
814 | |
815 | fn scan_tag_uri( |
816 | &mut self, |
817 | directive: bool, |
818 | _is_secondary: bool, |
819 | head: &str, |
820 | mark: &Marker, |
821 | ) -> Result<String, ScanError> { |
822 | let mut length = head.len(); |
823 | let mut string = String::new(); |
824 | |
825 | // Copy the head if needed. |
826 | // Note that we don't copy the leading '!' character. |
827 | if length > 1 { |
828 | string.extend(head.chars().skip(1)); |
829 | } |
830 | |
831 | self.lookahead(1); |
832 | /* |
833 | * The set of characters that may appear in URI is as follows: |
834 | * |
835 | * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', |
836 | * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', |
837 | * '%'. |
838 | */ |
839 | while match self.ch() { |
840 | ';' | '/' | '?' | ':' | '@' | '&' => true, |
841 | '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | ' \'' | '(' | ')' | '[' | ']' => true, |
842 | '%' => true, |
843 | c if is_alpha(c) => true, |
844 | _ => false, |
845 | } { |
846 | // Check if it is a URI-escape sequence. |
847 | if self.ch() == '%' { |
848 | string.push(self.scan_uri_escapes(directive, mark)?); |
849 | } else { |
850 | string.push(self.ch()); |
851 | self.skip(); |
852 | } |
853 | |
854 | length += 1; |
855 | self.lookahead(1); |
856 | } |
857 | |
858 | if length == 0 { |
859 | return Err(ScanError::new( |
860 | *mark, |
861 | "while parsing a tag, did not find expected tag URI" , |
862 | )); |
863 | } |
864 | |
865 | Ok(string) |
866 | } |
867 | |
868 | fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> { |
869 | let mut width = 0usize; |
870 | let mut code = 0u32; |
871 | loop { |
872 | self.lookahead(3); |
873 | |
874 | if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) { |
875 | return Err(ScanError::new( |
876 | *mark, |
877 | "while parsing a tag, did not find URI escaped octet" , |
878 | )); |
879 | } |
880 | |
881 | let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]); |
882 | if width == 0 { |
883 | width = match octet { |
884 | _ if octet & 0x80 == 0x00 => 1, |
885 | _ if octet & 0xE0 == 0xC0 => 2, |
886 | _ if octet & 0xF0 == 0xE0 => 3, |
887 | _ if octet & 0xF8 == 0xF0 => 4, |
888 | _ => { |
889 | return Err(ScanError::new( |
890 | *mark, |
891 | "while parsing a tag, found an incorrect leading UTF-8 octet" , |
892 | )); |
893 | } |
894 | }; |
895 | code = octet; |
896 | } else { |
897 | if octet & 0xc0 != 0x80 { |
898 | return Err(ScanError::new( |
899 | *mark, |
900 | "while parsing a tag, found an incorrect trailing UTF-8 octet" , |
901 | )); |
902 | } |
903 | code = (code << 8) + octet; |
904 | } |
905 | |
906 | self.skip(); |
907 | self.skip(); |
908 | self.skip(); |
909 | |
910 | width -= 1; |
911 | if width == 0 { |
912 | break; |
913 | } |
914 | } |
915 | |
916 | match char::from_u32(code) { |
917 | Some(ch) => Ok(ch), |
918 | None => Err(ScanError::new( |
919 | *mark, |
920 | "while parsing a tag, found an invalid UTF-8 codepoint" , |
921 | )), |
922 | } |
923 | } |
924 | |
925 | fn fetch_anchor(&mut self, alias: bool) -> ScanResult { |
926 | self.save_simple_key()?; |
927 | self.disallow_simple_key(); |
928 | |
929 | let tok = self.scan_anchor(alias)?; |
930 | |
931 | self.tokens.push_back(tok); |
932 | |
933 | Ok(()) |
934 | } |
935 | |
936 | fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> { |
937 | let mut string = String::new(); |
938 | let start_mark = self.mark; |
939 | |
940 | self.skip(); |
941 | self.lookahead(1); |
942 | while is_alpha(self.ch()) { |
943 | string.push(self.ch()); |
944 | self.skip(); |
945 | self.lookahead(1); |
946 | } |
947 | |
948 | if string.is_empty() |
949 | || match self.ch() { |
950 | c if is_blankz(c) => false, |
951 | '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, |
952 | _ => true, |
953 | } |
954 | { |
955 | return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character" )); |
956 | } |
957 | |
958 | if alias { |
959 | Ok(Token(start_mark, TokenType::Alias(string))) |
960 | } else { |
961 | Ok(Token(start_mark, TokenType::Anchor(string))) |
962 | } |
963 | } |
964 | |
965 | fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { |
966 | // The indicators '[' and '{' may start a simple key. |
967 | self.save_simple_key()?; |
968 | |
969 | self.increase_flow_level()?; |
970 | |
971 | self.allow_simple_key(); |
972 | |
973 | let start_mark = self.mark; |
974 | self.skip(); |
975 | |
976 | self.tokens.push_back(Token(start_mark, tok)); |
977 | Ok(()) |
978 | } |
979 | |
980 | fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult { |
981 | self.remove_simple_key()?; |
982 | self.decrease_flow_level(); |
983 | |
984 | self.disallow_simple_key(); |
985 | |
986 | let start_mark = self.mark; |
987 | self.skip(); |
988 | |
989 | self.tokens.push_back(Token(start_mark, tok)); |
990 | Ok(()) |
991 | } |
992 | |
993 | fn fetch_flow_entry(&mut self) -> ScanResult { |
994 | self.remove_simple_key()?; |
995 | self.allow_simple_key(); |
996 | |
997 | let start_mark = self.mark; |
998 | self.skip(); |
999 | |
1000 | self.tokens |
1001 | .push_back(Token(start_mark, TokenType::FlowEntry)); |
1002 | Ok(()) |
1003 | } |
1004 | |
1005 | fn increase_flow_level(&mut self) -> ScanResult { |
1006 | self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); |
1007 | self.flow_level = self |
1008 | .flow_level |
1009 | .checked_add(1) |
1010 | .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded" ))?; |
1011 | Ok(()) |
1012 | } |
1013 | fn decrease_flow_level(&mut self) { |
1014 | if self.flow_level > 0 { |
1015 | self.flow_level -= 1; |
1016 | self.simple_keys.pop().unwrap(); |
1017 | } |
1018 | } |
1019 | |
1020 | fn fetch_block_entry(&mut self) -> ScanResult { |
1021 | if self.flow_level == 0 { |
1022 | // Check if we are allowed to start a new entry. |
1023 | if !self.simple_key_allowed { |
1024 | return Err(ScanError::new( |
1025 | self.mark, |
1026 | "block sequence entries are not allowed in this context" , |
1027 | )); |
1028 | } |
1029 | |
1030 | let mark = self.mark; |
1031 | // generate BLOCK-SEQUENCE-START if indented |
1032 | self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); |
1033 | } else { |
1034 | // - * only allowed in block |
1035 | return Err(ScanError::new( |
1036 | self.mark, |
1037 | r#""-" is only valid inside a block"# , |
1038 | )); |
1039 | } |
1040 | self.remove_simple_key()?; |
1041 | self.allow_simple_key(); |
1042 | |
1043 | let start_mark = self.mark; |
1044 | self.skip(); |
1045 | |
1046 | self.tokens |
1047 | .push_back(Token(start_mark, TokenType::BlockEntry)); |
1048 | Ok(()) |
1049 | } |
1050 | |
1051 | fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { |
1052 | self.unroll_indent(-1); |
1053 | self.remove_simple_key()?; |
1054 | self.disallow_simple_key(); |
1055 | |
1056 | let mark = self.mark; |
1057 | |
1058 | self.skip(); |
1059 | self.skip(); |
1060 | self.skip(); |
1061 | |
1062 | self.tokens.push_back(Token(mark, t)); |
1063 | Ok(()) |
1064 | } |
1065 | |
1066 | fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { |
1067 | self.save_simple_key()?; |
1068 | self.allow_simple_key(); |
1069 | let tok = self.scan_block_scalar(literal)?; |
1070 | |
1071 | self.tokens.push_back(tok); |
1072 | Ok(()) |
1073 | } |
1074 | |
1075 | fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> { |
1076 | let start_mark = self.mark; |
1077 | let mut chomping: i32 = 0; |
1078 | let mut increment: usize = 0; |
1079 | let mut indent: usize = 0; |
1080 | let mut trailing_blank: bool; |
1081 | let mut leading_blank: bool = false; |
1082 | |
1083 | let mut string = String::new(); |
1084 | let mut leading_break = String::new(); |
1085 | let mut trailing_breaks = String::new(); |
1086 | |
1087 | // skip '|' or '>' |
1088 | self.skip(); |
1089 | self.lookahead(1); |
1090 | |
1091 | if self.ch() == '+' || self.ch() == '-' { |
1092 | if self.ch() == '+' { |
1093 | chomping = 1; |
1094 | } else { |
1095 | chomping = -1; |
1096 | } |
1097 | self.skip(); |
1098 | self.lookahead(1); |
1099 | if is_digit(self.ch()) { |
1100 | if self.ch() == '0' { |
1101 | return Err(ScanError::new( |
1102 | start_mark, |
1103 | "while scanning a block scalar, found an indentation indicator equal to 0" , |
1104 | )); |
1105 | } |
1106 | increment = (self.ch() as usize) - ('0' as usize); |
1107 | self.skip(); |
1108 | } |
1109 | } else if is_digit(self.ch()) { |
1110 | if self.ch() == '0' { |
1111 | return Err(ScanError::new( |
1112 | start_mark, |
1113 | "while scanning a block scalar, found an indentation indicator equal to 0" , |
1114 | )); |
1115 | } |
1116 | |
1117 | increment = (self.ch() as usize) - ('0' as usize); |
1118 | self.skip(); |
1119 | self.lookahead(1); |
1120 | if self.ch() == '+' || self.ch() == '-' { |
1121 | if self.ch() == '+' { |
1122 | chomping = 1; |
1123 | } else { |
1124 | chomping = -1; |
1125 | } |
1126 | self.skip(); |
1127 | } |
1128 | } |
1129 | |
1130 | // Eat whitespaces and comments to the end of the line. |
1131 | self.lookahead(1); |
1132 | |
1133 | while is_blank(self.ch()) { |
1134 | self.skip(); |
1135 | self.lookahead(1); |
1136 | } |
1137 | |
1138 | if self.ch() == '#' { |
1139 | while !is_breakz(self.ch()) { |
1140 | self.skip(); |
1141 | self.lookahead(1); |
1142 | } |
1143 | } |
1144 | |
1145 | // Check if we are at the end of the line. |
1146 | if !is_breakz(self.ch()) { |
1147 | return Err(ScanError::new( |
1148 | start_mark, |
1149 | "while scanning a block scalar, did not find expected comment or line break" , |
1150 | )); |
1151 | } |
1152 | |
1153 | if is_break(self.ch()) { |
1154 | self.lookahead(2); |
1155 | self.skip_line(); |
1156 | } |
1157 | |
1158 | if increment > 0 { |
1159 | indent = if self.indent >= 0 { |
1160 | (self.indent + increment as isize) as usize |
1161 | } else { |
1162 | increment |
1163 | } |
1164 | } |
1165 | // Scan the leading line breaks and determine the indentation level if needed. |
1166 | self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; |
1167 | |
1168 | self.lookahead(1); |
1169 | |
1170 | let start_mark = self.mark; |
1171 | |
1172 | while self.mark.col == indent && !is_z(self.ch()) { |
1173 | // We are at the beginning of a non-empty line. |
1174 | trailing_blank = is_blank(self.ch()); |
1175 | if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { |
1176 | if trailing_breaks.is_empty() { |
1177 | string.push(' ' ); |
1178 | } |
1179 | leading_break.clear(); |
1180 | } else { |
1181 | string.push_str(&leading_break); |
1182 | leading_break.clear(); |
1183 | } |
1184 | |
1185 | string.push_str(&trailing_breaks); |
1186 | trailing_breaks.clear(); |
1187 | |
1188 | leading_blank = is_blank(self.ch()); |
1189 | |
1190 | while !is_breakz(self.ch()) { |
1191 | string.push(self.ch()); |
1192 | self.skip(); |
1193 | self.lookahead(1); |
1194 | } |
1195 | // break on EOF |
1196 | if is_z(self.ch()) { |
1197 | break; |
1198 | } |
1199 | |
1200 | self.lookahead(2); |
1201 | self.read_break(&mut leading_break); |
1202 | |
1203 | // Eat the following indentation spaces and line breaks. |
1204 | self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; |
1205 | } |
1206 | |
1207 | // Chomp the tail. |
1208 | if chomping != -1 { |
1209 | string.push_str(&leading_break); |
1210 | } |
1211 | |
1212 | if chomping == 1 { |
1213 | string.push_str(&trailing_breaks); |
1214 | } |
1215 | |
1216 | if literal { |
1217 | Ok(Token( |
1218 | start_mark, |
1219 | TokenType::Scalar(TScalarStyle::Literal, string), |
1220 | )) |
1221 | } else { |
1222 | Ok(Token( |
1223 | start_mark, |
1224 | TokenType::Scalar(TScalarStyle::Foled, string), |
1225 | )) |
1226 | } |
1227 | } |
1228 | |
1229 | fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult { |
1230 | let mut max_indent = 0; |
1231 | loop { |
1232 | self.lookahead(1); |
1233 | while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' { |
1234 | self.skip(); |
1235 | self.lookahead(1); |
1236 | } |
1237 | |
1238 | if self.mark.col > max_indent { |
1239 | max_indent = self.mark.col; |
1240 | } |
1241 | |
1242 | // Check for a tab character messing the indentation. |
1243 | if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' \t' { |
1244 | return Err(ScanError::new(self.mark, |
1245 | "while scanning a block scalar, found a tab character where an indentation space is expected" )); |
1246 | } |
1247 | |
1248 | if !is_break(self.ch()) { |
1249 | break; |
1250 | } |
1251 | |
1252 | self.lookahead(2); |
1253 | // Consume the line break. |
1254 | self.read_break(breaks); |
1255 | } |
1256 | |
1257 | if *indent == 0 { |
1258 | *indent = max_indent; |
1259 | if *indent < (self.indent + 1) as usize { |
1260 | *indent = (self.indent + 1) as usize; |
1261 | } |
1262 | if *indent < 1 { |
1263 | *indent = 1; |
1264 | } |
1265 | } |
1266 | Ok(()) |
1267 | } |
1268 | |
1269 | fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { |
1270 | self.save_simple_key()?; |
1271 | self.disallow_simple_key(); |
1272 | |
1273 | let tok = self.scan_flow_scalar(single)?; |
1274 | |
1275 | // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, |
1276 | // YAML allows the following value to be specified adjacent to the “:”. |
1277 | self.adjacent_value_allowed_at = self.mark.index; |
1278 | |
1279 | self.tokens.push_back(tok); |
1280 | Ok(()) |
1281 | } |
1282 | |
1283 | fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> { |
1284 | let start_mark = self.mark; |
1285 | |
1286 | let mut string = String::new(); |
1287 | let mut leading_break = String::new(); |
1288 | let mut trailing_breaks = String::new(); |
1289 | let mut whitespaces = String::new(); |
1290 | let mut leading_blanks; |
1291 | |
1292 | /* Eat the left quote. */ |
1293 | self.skip(); |
1294 | |
1295 | loop { |
1296 | /* Check for a document indicator. */ |
1297 | self.lookahead(4); |
1298 | |
1299 | if self.mark.col == 0 |
1300 | && (((self.buffer[0] == '-' ) && (self.buffer[1] == '-' ) && (self.buffer[2] == '-' )) |
1301 | || ((self.buffer[0] == '.' ) |
1302 | && (self.buffer[1] == '.' ) |
1303 | && (self.buffer[2] == '.' ))) |
1304 | && is_blankz(self.buffer[3]) |
1305 | { |
1306 | return Err(ScanError::new( |
1307 | start_mark, |
1308 | "while scanning a quoted scalar, found unexpected document indicator" , |
1309 | )); |
1310 | } |
1311 | |
1312 | if is_z(self.ch()) { |
1313 | return Err(ScanError::new( |
1314 | start_mark, |
1315 | "while scanning a quoted scalar, found unexpected end of stream" , |
1316 | )); |
1317 | } |
1318 | |
1319 | self.lookahead(2); |
1320 | |
1321 | leading_blanks = false; |
1322 | // Consume non-blank characters. |
1323 | |
1324 | while !is_blankz(self.ch()) { |
1325 | match self.ch() { |
1326 | // Check for an escaped single quote. |
1327 | ' \'' if self.buffer[1] == ' \'' && single => { |
1328 | string.push(' \'' ); |
1329 | self.skip(); |
1330 | self.skip(); |
1331 | } |
1332 | // Check for the right quote. |
1333 | ' \'' if single => break, |
1334 | '"' if !single => break, |
1335 | // Check for an escaped line break. |
1336 | ' \\' if !single && is_break(self.buffer[1]) => { |
1337 | self.lookahead(3); |
1338 | self.skip(); |
1339 | self.skip_line(); |
1340 | leading_blanks = true; |
1341 | break; |
1342 | } |
1343 | // Check for an escape sequence. |
1344 | ' \\' if !single => { |
1345 | let mut code_length = 0usize; |
1346 | match self.buffer[1] { |
1347 | '0' => string.push(' \0' ), |
1348 | 'a' => string.push(' \x07' ), |
1349 | 'b' => string.push(' \x08' ), |
1350 | 't' | ' \t' => string.push(' \t' ), |
1351 | 'n' => string.push(' \n' ), |
1352 | 'v' => string.push(' \x0b' ), |
1353 | 'f' => string.push(' \x0c' ), |
1354 | 'r' => string.push(' \x0d' ), |
1355 | 'e' => string.push(' \x1b' ), |
1356 | ' ' => string.push(' \x20' ), |
1357 | '"' => string.push('"' ), |
1358 | ' \'' => string.push(' \'' ), |
1359 | ' \\' => string.push(' \\' ), |
1360 | // NEL (#x85) |
1361 | 'N' => string.push(char::from_u32(0x85).unwrap()), |
1362 | // #xA0 |
1363 | '_' => string.push(char::from_u32(0xA0).unwrap()), |
1364 | // LS (#x2028) |
1365 | 'L' => string.push(char::from_u32(0x2028).unwrap()), |
1366 | // PS (#x2029) |
1367 | 'P' => string.push(char::from_u32(0x2029).unwrap()), |
1368 | 'x' => code_length = 2, |
1369 | 'u' => code_length = 4, |
1370 | 'U' => code_length = 8, |
1371 | _ => { |
1372 | return Err(ScanError::new( |
1373 | start_mark, |
1374 | "while parsing a quoted scalar, found unknown escape character" , |
1375 | )) |
1376 | } |
1377 | } |
1378 | self.skip(); |
1379 | self.skip(); |
1380 | // Consume an arbitrary escape code. |
1381 | if code_length > 0 { |
1382 | self.lookahead(code_length); |
1383 | let mut value = 0u32; |
1384 | for i in 0..code_length { |
1385 | if !is_hex(self.buffer[i]) { |
1386 | return Err(ScanError::new(start_mark, |
1387 | "while parsing a quoted scalar, did not find expected hexadecimal number" )); |
1388 | } |
1389 | value = (value << 4) + as_hex(self.buffer[i]); |
1390 | } |
1391 | |
1392 | let ch = match char::from_u32(value) { |
1393 | Some(v) => v, |
1394 | None => { |
1395 | return Err(ScanError::new(start_mark, |
1396 | "while parsing a quoted scalar, found invalid Unicode character escape code" )); |
1397 | } |
1398 | }; |
1399 | string.push(ch); |
1400 | |
1401 | for _ in 0..code_length { |
1402 | self.skip(); |
1403 | } |
1404 | } |
1405 | } |
1406 | c => { |
1407 | string.push(c); |
1408 | self.skip(); |
1409 | } |
1410 | } |
1411 | self.lookahead(2); |
1412 | } |
1413 | self.lookahead(1); |
1414 | match self.ch() { |
1415 | ' \'' if single => break, |
1416 | '"' if !single => break, |
1417 | _ => {} |
1418 | } |
1419 | |
1420 | // Consume blank characters. |
1421 | while is_blank(self.ch()) || is_break(self.ch()) { |
1422 | if is_blank(self.ch()) { |
1423 | // Consume a space or a tab character. |
1424 | if leading_blanks { |
1425 | self.skip(); |
1426 | } else { |
1427 | whitespaces.push(self.ch()); |
1428 | self.skip(); |
1429 | } |
1430 | } else { |
1431 | self.lookahead(2); |
1432 | // Check if it is a first line break. |
1433 | if leading_blanks { |
1434 | self.read_break(&mut trailing_breaks); |
1435 | } else { |
1436 | whitespaces.clear(); |
1437 | self.read_break(&mut leading_break); |
1438 | leading_blanks = true; |
1439 | } |
1440 | } |
1441 | self.lookahead(1); |
1442 | } |
1443 | // Join the whitespaces or fold line breaks. |
1444 | if leading_blanks { |
1445 | if leading_break.is_empty() { |
1446 | string.push_str(&leading_break); |
1447 | string.push_str(&trailing_breaks); |
1448 | trailing_breaks.clear(); |
1449 | leading_break.clear(); |
1450 | } else { |
1451 | if trailing_breaks.is_empty() { |
1452 | string.push(' ' ); |
1453 | } else { |
1454 | string.push_str(&trailing_breaks); |
1455 | trailing_breaks.clear(); |
1456 | } |
1457 | leading_break.clear(); |
1458 | } |
1459 | } else { |
1460 | string.push_str(&whitespaces); |
1461 | whitespaces.clear(); |
1462 | } |
1463 | } // loop |
1464 | |
1465 | // Eat the right quote. |
1466 | self.skip(); |
1467 | |
1468 | if single { |
1469 | Ok(Token( |
1470 | start_mark, |
1471 | TokenType::Scalar(TScalarStyle::SingleQuoted, string), |
1472 | )) |
1473 | } else { |
1474 | Ok(Token( |
1475 | start_mark, |
1476 | TokenType::Scalar(TScalarStyle::DoubleQuoted, string), |
1477 | )) |
1478 | } |
1479 | } |
1480 | |
1481 | fn fetch_plain_scalar(&mut self) -> ScanResult { |
1482 | self.save_simple_key()?; |
1483 | self.disallow_simple_key(); |
1484 | |
1485 | let tok = self.scan_plain_scalar()?; |
1486 | |
1487 | self.tokens.push_back(tok); |
1488 | Ok(()) |
1489 | } |
1490 | |
1491 | fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> { |
1492 | let indent = self.indent + 1; |
1493 | let start_mark = self.mark; |
1494 | |
1495 | let mut string = String::new(); |
1496 | let mut leading_break = String::new(); |
1497 | let mut trailing_breaks = String::new(); |
1498 | let mut whitespaces = String::new(); |
1499 | let mut leading_blanks = false; |
1500 | |
1501 | loop { |
1502 | /* Check for a document indicator. */ |
1503 | self.lookahead(4); |
1504 | |
1505 | if self.mark.col == 0 |
1506 | && (((self.buffer[0] == '-' ) && (self.buffer[1] == '-' ) && (self.buffer[2] == '-' )) |
1507 | || ((self.buffer[0] == '.' ) |
1508 | && (self.buffer[1] == '.' ) |
1509 | && (self.buffer[2] == '.' ))) |
1510 | && is_blankz(self.buffer[3]) |
1511 | { |
1512 | break; |
1513 | } |
1514 | |
1515 | if self.ch() == '#' { |
1516 | break; |
1517 | } |
1518 | while !is_blankz(self.ch()) { |
1519 | // indicators can end a plain scalar, see 7.3.3. Plain Style |
1520 | match self.ch() { |
1521 | ':' if is_blankz(self.buffer[1]) |
1522 | || (self.flow_level > 0 && is_flow(self.buffer[1])) => |
1523 | { |
1524 | break; |
1525 | } |
1526 | ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, |
1527 | _ => {} |
1528 | } |
1529 | |
1530 | if leading_blanks || !whitespaces.is_empty() { |
1531 | if leading_blanks { |
1532 | if leading_break.is_empty() { |
1533 | string.push_str(&leading_break); |
1534 | string.push_str(&trailing_breaks); |
1535 | trailing_breaks.clear(); |
1536 | leading_break.clear(); |
1537 | } else { |
1538 | if trailing_breaks.is_empty() { |
1539 | string.push(' ' ); |
1540 | } else { |
1541 | string.push_str(&trailing_breaks); |
1542 | trailing_breaks.clear(); |
1543 | } |
1544 | leading_break.clear(); |
1545 | } |
1546 | leading_blanks = false; |
1547 | } else { |
1548 | string.push_str(&whitespaces); |
1549 | whitespaces.clear(); |
1550 | } |
1551 | } |
1552 | |
1553 | string.push(self.ch()); |
1554 | self.skip(); |
1555 | self.lookahead(2); |
1556 | } |
1557 | // is the end? |
1558 | if !(is_blank(self.ch()) || is_break(self.ch())) { |
1559 | break; |
1560 | } |
1561 | self.lookahead(1); |
1562 | |
1563 | while is_blank(self.ch()) || is_break(self.ch()) { |
1564 | if is_blank(self.ch()) { |
1565 | if leading_blanks && (self.mark.col as isize) < indent && self.ch() == ' \t' { |
1566 | return Err(ScanError::new( |
1567 | start_mark, |
1568 | "while scanning a plain scalar, found a tab" , |
1569 | )); |
1570 | } |
1571 | |
1572 | if leading_blanks { |
1573 | self.skip(); |
1574 | } else { |
1575 | whitespaces.push(self.ch()); |
1576 | self.skip(); |
1577 | } |
1578 | } else { |
1579 | self.lookahead(2); |
1580 | // Check if it is a first line break |
1581 | if leading_blanks { |
1582 | self.read_break(&mut trailing_breaks); |
1583 | } else { |
1584 | whitespaces.clear(); |
1585 | self.read_break(&mut leading_break); |
1586 | leading_blanks = true; |
1587 | } |
1588 | } |
1589 | self.lookahead(1); |
1590 | } |
1591 | |
1592 | // check indentation level |
1593 | if self.flow_level == 0 && (self.mark.col as isize) < indent { |
1594 | break; |
1595 | } |
1596 | } |
1597 | |
1598 | if leading_blanks { |
1599 | self.allow_simple_key(); |
1600 | } |
1601 | |
1602 | Ok(Token( |
1603 | start_mark, |
1604 | TokenType::Scalar(TScalarStyle::Plain, string), |
1605 | )) |
1606 | } |
1607 | |
1608 | fn fetch_key(&mut self) -> ScanResult { |
1609 | let start_mark = self.mark; |
1610 | if self.flow_level == 0 { |
1611 | // Check if we are allowed to start a new key (not necessarily simple). |
1612 | if !self.simple_key_allowed { |
1613 | return Err(ScanError::new( |
1614 | self.mark, |
1615 | "mapping keys are not allowed in this context" , |
1616 | )); |
1617 | } |
1618 | self.roll_indent( |
1619 | start_mark.col, |
1620 | None, |
1621 | TokenType::BlockMappingStart, |
1622 | start_mark, |
1623 | ); |
1624 | } |
1625 | |
1626 | self.remove_simple_key()?; |
1627 | |
1628 | if self.flow_level == 0 { |
1629 | self.allow_simple_key(); |
1630 | } else { |
1631 | self.disallow_simple_key(); |
1632 | } |
1633 | |
1634 | self.skip(); |
1635 | self.tokens.push_back(Token(start_mark, TokenType::Key)); |
1636 | Ok(()) |
1637 | } |
1638 | |
1639 | fn fetch_value(&mut self) -> ScanResult { |
1640 | let sk = self.simple_keys.last().unwrap().clone(); |
1641 | let start_mark = self.mark; |
1642 | if sk.possible { |
1643 | // insert simple key |
1644 | let tok = Token(sk.mark, TokenType::Key); |
1645 | let tokens_parsed = self.tokens_parsed; |
1646 | self.insert_token(sk.token_number - tokens_parsed, tok); |
1647 | |
1648 | // Add the BLOCK-MAPPING-START token if needed. |
1649 | self.roll_indent( |
1650 | sk.mark.col, |
1651 | Some(sk.token_number), |
1652 | TokenType::BlockMappingStart, |
1653 | start_mark, |
1654 | ); |
1655 | |
1656 | self.simple_keys.last_mut().unwrap().possible = false; |
1657 | self.disallow_simple_key(); |
1658 | } else { |
1659 | // The ':' indicator follows a complex key. |
1660 | if self.flow_level == 0 { |
1661 | if !self.simple_key_allowed { |
1662 | return Err(ScanError::new( |
1663 | start_mark, |
1664 | "mapping values are not allowed in this context" , |
1665 | )); |
1666 | } |
1667 | |
1668 | self.roll_indent( |
1669 | start_mark.col, |
1670 | None, |
1671 | TokenType::BlockMappingStart, |
1672 | start_mark, |
1673 | ); |
1674 | } |
1675 | |
1676 | if self.flow_level == 0 { |
1677 | self.allow_simple_key(); |
1678 | } else { |
1679 | self.disallow_simple_key(); |
1680 | } |
1681 | } |
1682 | self.skip(); |
1683 | self.tokens.push_back(Token(start_mark, TokenType::Value)); |
1684 | |
1685 | Ok(()) |
1686 | } |
1687 | |
1688 | fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) { |
1689 | if self.flow_level > 0 { |
1690 | return; |
1691 | } |
1692 | |
1693 | if self.indent < col as isize { |
1694 | self.indents.push(self.indent); |
1695 | self.indent = col as isize; |
1696 | let tokens_parsed = self.tokens_parsed; |
1697 | match number { |
1698 | Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), |
1699 | None => self.tokens.push_back(Token(mark, tok)), |
1700 | } |
1701 | } |
1702 | } |
1703 | |
1704 | fn unroll_indent(&mut self, col: isize) { |
1705 | if self.flow_level > 0 { |
1706 | return; |
1707 | } |
1708 | while self.indent > col { |
1709 | self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); |
1710 | self.indent = self.indents.pop().unwrap(); |
1711 | } |
1712 | } |
1713 | |
1714 | fn save_simple_key(&mut self) -> Result<(), ScanError> { |
1715 | let required = self.flow_level > 0 && self.indent == (self.mark.col as isize); |
1716 | if self.simple_key_allowed { |
1717 | let mut sk = SimpleKey::new(self.mark); |
1718 | sk.possible = true; |
1719 | sk.required = required; |
1720 | sk.token_number = self.tokens_parsed + self.tokens.len(); |
1721 | |
1722 | self.remove_simple_key()?; |
1723 | |
1724 | self.simple_keys.pop(); |
1725 | self.simple_keys.push(sk); |
1726 | } |
1727 | Ok(()) |
1728 | } |
1729 | |
1730 | fn remove_simple_key(&mut self) -> ScanResult { |
1731 | let last = self.simple_keys.last_mut().unwrap(); |
1732 | if last.possible && last.required { |
1733 | return Err(ScanError::new(self.mark, "simple key expected" )); |
1734 | } |
1735 | |
1736 | last.possible = false; |
1737 | Ok(()) |
1738 | } |
1739 | } |
1740 | |
1741 | #[cfg (test)] |
1742 | mod test { |
1743 | use super::TokenType::*; |
1744 | use super::*; |
1745 | |
1746 | macro_rules! next { |
1747 | ($p:ident, $tk:pat) => {{ |
1748 | let tok = $p.next().unwrap(); |
1749 | match tok.1 { |
1750 | $tk => {} |
1751 | _ => panic!("unexpected token: {:?}" , tok), |
1752 | } |
1753 | }}; |
1754 | } |
1755 | |
1756 | macro_rules! next_scalar { |
1757 | ($p:ident, $tk:expr, $v:expr) => {{ |
1758 | let tok = $p.next().unwrap(); |
1759 | match tok.1 { |
1760 | Scalar(style, ref v) => { |
1761 | assert_eq!(style, $tk); |
1762 | assert_eq!(v, $v); |
1763 | } |
1764 | _ => panic!("unexpected token: {:?}" , tok), |
1765 | } |
1766 | }}; |
1767 | } |
1768 | |
1769 | macro_rules! end { |
1770 | ($p:ident) => {{ |
1771 | assert_eq!($p.next(), None); |
1772 | }}; |
1773 | } |
1774 | /// test cases in libyaml scanner.c |
1775 | #[test ] |
1776 | fn test_empty() { |
1777 | let s = "" ; |
1778 | let mut p = Scanner::new(s.chars()); |
1779 | next!(p, StreamStart(..)); |
1780 | next!(p, StreamEnd); |
1781 | end!(p); |
1782 | } |
1783 | |
1784 | #[test ] |
1785 | fn test_scalar() { |
1786 | let s = "a scalar" ; |
1787 | let mut p = Scanner::new(s.chars()); |
1788 | next!(p, StreamStart(..)); |
1789 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1790 | next!(p, StreamEnd); |
1791 | end!(p); |
1792 | } |
1793 | |
1794 | #[test ] |
1795 | fn test_explicit_scalar() { |
1796 | let s = "--- |
1797 | 'a scalar' |
1798 | ... |
1799 | " ; |
1800 | let mut p = Scanner::new(s.chars()); |
1801 | next!(p, StreamStart(..)); |
1802 | next!(p, DocumentStart); |
1803 | next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
1804 | next!(p, DocumentEnd); |
1805 | next!(p, StreamEnd); |
1806 | end!(p); |
1807 | } |
1808 | |
1809 | #[test ] |
1810 | fn test_multiple_documents() { |
1811 | let s = " |
1812 | 'a scalar' |
1813 | --- |
1814 | 'a scalar' |
1815 | --- |
1816 | 'a scalar' |
1817 | " ; |
1818 | let mut p = Scanner::new(s.chars()); |
1819 | next!(p, StreamStart(..)); |
1820 | next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
1821 | next!(p, DocumentStart); |
1822 | next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
1823 | next!(p, DocumentStart); |
1824 | next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
1825 | next!(p, StreamEnd); |
1826 | end!(p); |
1827 | } |
1828 | |
1829 | #[test ] |
1830 | fn test_a_flow_sequence() { |
1831 | let s = "[item 1, item 2, item 3]" ; |
1832 | let mut p = Scanner::new(s.chars()); |
1833 | next!(p, StreamStart(..)); |
1834 | next!(p, FlowSequenceStart); |
1835 | next_scalar!(p, TScalarStyle::Plain, "item 1" ); |
1836 | next!(p, FlowEntry); |
1837 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1838 | next!(p, FlowEntry); |
1839 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1840 | next!(p, FlowSequenceEnd); |
1841 | next!(p, StreamEnd); |
1842 | end!(p); |
1843 | } |
1844 | |
1845 | #[test ] |
1846 | fn test_a_flow_mapping() { |
1847 | let s = " |
1848 | { |
1849 | a simple key: a value, # Note that the KEY token is produced. |
1850 | ? a complex key: another value, |
1851 | } |
1852 | " ; |
1853 | let mut p = Scanner::new(s.chars()); |
1854 | next!(p, StreamStart(..)); |
1855 | next!(p, FlowMappingStart); |
1856 | next!(p, Key); |
1857 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1858 | next!(p, Value); |
1859 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1860 | next!(p, FlowEntry); |
1861 | next!(p, Key); |
1862 | next_scalar!(p, TScalarStyle::Plain, "a complex key" ); |
1863 | next!(p, Value); |
1864 | next!(p, Scalar(TScalarStyle::Plain, _)); |
1865 | next!(p, FlowEntry); |
1866 | next!(p, FlowMappingEnd); |
1867 | next!(p, StreamEnd); |
1868 | end!(p); |
1869 | } |
1870 | |
1871 | #[test ] |
1872 | fn test_block_sequences() { |
1873 | let s = " |
1874 | - item 1 |
1875 | - item 2 |
1876 | - |
1877 | - item 3.1 |
1878 | - item 3.2 |
1879 | - |
1880 | key 1: value 1 |
1881 | key 2: value 2 |
1882 | " ; |
1883 | let mut p = Scanner::new(s.chars()); |
1884 | next!(p, StreamStart(..)); |
1885 | next!(p, BlockSequenceStart); |
1886 | next!(p, BlockEntry); |
1887 | next_scalar!(p, TScalarStyle::Plain, "item 1" ); |
1888 | next!(p, BlockEntry); |
1889 | next_scalar!(p, TScalarStyle::Plain, "item 2" ); |
1890 | next!(p, BlockEntry); |
1891 | next!(p, BlockSequenceStart); |
1892 | next!(p, BlockEntry); |
1893 | next_scalar!(p, TScalarStyle::Plain, "item 3.1" ); |
1894 | next!(p, BlockEntry); |
1895 | next_scalar!(p, TScalarStyle::Plain, "item 3.2" ); |
1896 | next!(p, BlockEnd); |
1897 | next!(p, BlockEntry); |
1898 | next!(p, BlockMappingStart); |
1899 | next!(p, Key); |
1900 | next_scalar!(p, TScalarStyle::Plain, "key 1" ); |
1901 | next!(p, Value); |
1902 | next_scalar!(p, TScalarStyle::Plain, "value 1" ); |
1903 | next!(p, Key); |
1904 | next_scalar!(p, TScalarStyle::Plain, "key 2" ); |
1905 | next!(p, Value); |
1906 | next_scalar!(p, TScalarStyle::Plain, "value 2" ); |
1907 | next!(p, BlockEnd); |
1908 | next!(p, BlockEnd); |
1909 | next!(p, StreamEnd); |
1910 | end!(p); |
1911 | } |
1912 | |
1913 | #[test ] |
1914 | fn test_block_mappings() { |
1915 | let s = " |
1916 | a simple key: a value # The KEY token is produced here. |
1917 | ? a complex key |
1918 | : another value |
1919 | a mapping: |
1920 | key 1: value 1 |
1921 | key 2: value 2 |
1922 | a sequence: |
1923 | - item 1 |
1924 | - item 2 |
1925 | " ; |
1926 | let mut p = Scanner::new(s.chars()); |
1927 | next!(p, StreamStart(..)); |
1928 | next!(p, BlockMappingStart); |
1929 | next!(p, Key); |
1930 | next!(p, Scalar(_, _)); |
1931 | next!(p, Value); |
1932 | next!(p, Scalar(_, _)); |
1933 | next!(p, Key); |
1934 | next!(p, Scalar(_, _)); |
1935 | next!(p, Value); |
1936 | next!(p, Scalar(_, _)); |
1937 | next!(p, Key); |
1938 | next!(p, Scalar(_, _)); |
1939 | next!(p, Value); // libyaml comment seems to be wrong |
1940 | next!(p, BlockMappingStart); |
1941 | next!(p, Key); |
1942 | next!(p, Scalar(_, _)); |
1943 | next!(p, Value); |
1944 | next!(p, Scalar(_, _)); |
1945 | next!(p, Key); |
1946 | next!(p, Scalar(_, _)); |
1947 | next!(p, Value); |
1948 | next!(p, Scalar(_, _)); |
1949 | next!(p, BlockEnd); |
1950 | next!(p, Key); |
1951 | next!(p, Scalar(_, _)); |
1952 | next!(p, Value); |
1953 | next!(p, BlockSequenceStart); |
1954 | next!(p, BlockEntry); |
1955 | next!(p, Scalar(_, _)); |
1956 | next!(p, BlockEntry); |
1957 | next!(p, Scalar(_, _)); |
1958 | next!(p, BlockEnd); |
1959 | next!(p, BlockEnd); |
1960 | next!(p, StreamEnd); |
1961 | end!(p); |
1962 | } |
1963 | |
1964 | #[test ] |
1965 | fn test_no_block_sequence_start() { |
1966 | let s = " |
1967 | key: |
1968 | - item 1 |
1969 | - item 2 |
1970 | " ; |
1971 | let mut p = Scanner::new(s.chars()); |
1972 | next!(p, StreamStart(..)); |
1973 | next!(p, BlockMappingStart); |
1974 | next!(p, Key); |
1975 | next_scalar!(p, TScalarStyle::Plain, "key" ); |
1976 | next!(p, Value); |
1977 | next!(p, BlockEntry); |
1978 | next_scalar!(p, TScalarStyle::Plain, "item 1" ); |
1979 | next!(p, BlockEntry); |
1980 | next_scalar!(p, TScalarStyle::Plain, "item 2" ); |
1981 | next!(p, BlockEnd); |
1982 | next!(p, StreamEnd); |
1983 | end!(p); |
1984 | } |
1985 | |
1986 | #[test ] |
1987 | fn test_collections_in_sequence() { |
1988 | let s = " |
1989 | - - item 1 |
1990 | - item 2 |
1991 | - key 1: value 1 |
1992 | key 2: value 2 |
1993 | - ? complex key |
1994 | : complex value |
1995 | " ; |
1996 | let mut p = Scanner::new(s.chars()); |
1997 | next!(p, StreamStart(..)); |
1998 | next!(p, BlockSequenceStart); |
1999 | next!(p, BlockEntry); |
2000 | next!(p, BlockSequenceStart); |
2001 | next!(p, BlockEntry); |
2002 | next_scalar!(p, TScalarStyle::Plain, "item 1" ); |
2003 | next!(p, BlockEntry); |
2004 | next_scalar!(p, TScalarStyle::Plain, "item 2" ); |
2005 | next!(p, BlockEnd); |
2006 | next!(p, BlockEntry); |
2007 | next!(p, BlockMappingStart); |
2008 | next!(p, Key); |
2009 | next_scalar!(p, TScalarStyle::Plain, "key 1" ); |
2010 | next!(p, Value); |
2011 | next_scalar!(p, TScalarStyle::Plain, "value 1" ); |
2012 | next!(p, Key); |
2013 | next_scalar!(p, TScalarStyle::Plain, "key 2" ); |
2014 | next!(p, Value); |
2015 | next_scalar!(p, TScalarStyle::Plain, "value 2" ); |
2016 | next!(p, BlockEnd); |
2017 | next!(p, BlockEntry); |
2018 | next!(p, BlockMappingStart); |
2019 | next!(p, Key); |
2020 | next_scalar!(p, TScalarStyle::Plain, "complex key" ); |
2021 | next!(p, Value); |
2022 | next_scalar!(p, TScalarStyle::Plain, "complex value" ); |
2023 | next!(p, BlockEnd); |
2024 | next!(p, BlockEnd); |
2025 | next!(p, StreamEnd); |
2026 | end!(p); |
2027 | } |
2028 | |
2029 | #[test ] |
2030 | fn test_collections_in_mapping() { |
2031 | let s = " |
2032 | ? a sequence |
2033 | : - item 1 |
2034 | - item 2 |
2035 | ? a mapping |
2036 | : key 1: value 1 |
2037 | key 2: value 2 |
2038 | " ; |
2039 | let mut p = Scanner::new(s.chars()); |
2040 | next!(p, StreamStart(..)); |
2041 | next!(p, BlockMappingStart); |
2042 | next!(p, Key); |
2043 | next_scalar!(p, TScalarStyle::Plain, "a sequence" ); |
2044 | next!(p, Value); |
2045 | next!(p, BlockSequenceStart); |
2046 | next!(p, BlockEntry); |
2047 | next_scalar!(p, TScalarStyle::Plain, "item 1" ); |
2048 | next!(p, BlockEntry); |
2049 | next_scalar!(p, TScalarStyle::Plain, "item 2" ); |
2050 | next!(p, BlockEnd); |
2051 | next!(p, Key); |
2052 | next_scalar!(p, TScalarStyle::Plain, "a mapping" ); |
2053 | next!(p, Value); |
2054 | next!(p, BlockMappingStart); |
2055 | next!(p, Key); |
2056 | next_scalar!(p, TScalarStyle::Plain, "key 1" ); |
2057 | next!(p, Value); |
2058 | next_scalar!(p, TScalarStyle::Plain, "value 1" ); |
2059 | next!(p, Key); |
2060 | next_scalar!(p, TScalarStyle::Plain, "key 2" ); |
2061 | next!(p, Value); |
2062 | next_scalar!(p, TScalarStyle::Plain, "value 2" ); |
2063 | next!(p, BlockEnd); |
2064 | next!(p, BlockEnd); |
2065 | next!(p, StreamEnd); |
2066 | end!(p); |
2067 | } |
2068 | |
2069 | #[test ] |
2070 | fn test_spec_ex7_3() { |
2071 | let s = " |
2072 | { |
2073 | ? foo :, |
2074 | : bar, |
2075 | } |
2076 | " ; |
2077 | let mut p = Scanner::new(s.chars()); |
2078 | next!(p, StreamStart(..)); |
2079 | next!(p, FlowMappingStart); |
2080 | next!(p, Key); |
2081 | next_scalar!(p, TScalarStyle::Plain, "foo" ); |
2082 | next!(p, Value); |
2083 | next!(p, FlowEntry); |
2084 | next!(p, Value); |
2085 | next_scalar!(p, TScalarStyle::Plain, "bar" ); |
2086 | next!(p, FlowEntry); |
2087 | next!(p, FlowMappingEnd); |
2088 | next!(p, StreamEnd); |
2089 | end!(p); |
2090 | } |
2091 | |
2092 | #[test ] |
2093 | fn test_plain_scalar_starting_with_indicators_in_flow() { |
2094 | // "Plain scalars must not begin with most indicators, as this would cause ambiguity with |
2095 | // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first |
2096 | // character if followed by a non-space “safe” character, as this causes no ambiguity." |
2097 | |
2098 | let s = "{a: :b}" ; |
2099 | let mut p = Scanner::new(s.chars()); |
2100 | next!(p, StreamStart(..)); |
2101 | next!(p, FlowMappingStart); |
2102 | next!(p, Key); |
2103 | next_scalar!(p, TScalarStyle::Plain, "a" ); |
2104 | next!(p, Value); |
2105 | next_scalar!(p, TScalarStyle::Plain, ":b" ); |
2106 | next!(p, FlowMappingEnd); |
2107 | next!(p, StreamEnd); |
2108 | end!(p); |
2109 | |
2110 | let s = "{a: ?b}" ; |
2111 | let mut p = Scanner::new(s.chars()); |
2112 | next!(p, StreamStart(..)); |
2113 | next!(p, FlowMappingStart); |
2114 | next!(p, Key); |
2115 | next_scalar!(p, TScalarStyle::Plain, "a" ); |
2116 | next!(p, Value); |
2117 | next_scalar!(p, TScalarStyle::Plain, "?b" ); |
2118 | next!(p, FlowMappingEnd); |
2119 | next!(p, StreamEnd); |
2120 | end!(p); |
2121 | } |
2122 | |
2123 | #[test ] |
2124 | fn test_plain_scalar_starting_with_indicators_in_block() { |
2125 | let s = ":a" ; |
2126 | let mut p = Scanner::new(s.chars()); |
2127 | next!(p, StreamStart(..)); |
2128 | next_scalar!(p, TScalarStyle::Plain, ":a" ); |
2129 | next!(p, StreamEnd); |
2130 | end!(p); |
2131 | |
2132 | let s = "?a" ; |
2133 | let mut p = Scanner::new(s.chars()); |
2134 | next!(p, StreamStart(..)); |
2135 | next_scalar!(p, TScalarStyle::Plain, "?a" ); |
2136 | next!(p, StreamEnd); |
2137 | end!(p); |
2138 | } |
2139 | |
2140 | #[test ] |
2141 | fn test_plain_scalar_containing_indicators_in_block() { |
2142 | let s = "a:,b" ; |
2143 | let mut p = Scanner::new(s.chars()); |
2144 | next!(p, StreamStart(..)); |
2145 | next_scalar!(p, TScalarStyle::Plain, "a:,b" ); |
2146 | next!(p, StreamEnd); |
2147 | end!(p); |
2148 | |
2149 | let s = ":,b" ; |
2150 | let mut p = Scanner::new(s.chars()); |
2151 | next!(p, StreamStart(..)); |
2152 | next_scalar!(p, TScalarStyle::Plain, ":,b" ); |
2153 | next!(p, StreamEnd); |
2154 | end!(p); |
2155 | } |
2156 | |
2157 | #[test ] |
2158 | fn test_scanner_cr() { |
2159 | let s = "--- \r\n- tok1 \r\n- tok2" ; |
2160 | let mut p = Scanner::new(s.chars()); |
2161 | next!(p, StreamStart(..)); |
2162 | next!(p, DocumentStart); |
2163 | next!(p, BlockSequenceStart); |
2164 | next!(p, BlockEntry); |
2165 | next_scalar!(p, TScalarStyle::Plain, "tok1" ); |
2166 | next!(p, BlockEntry); |
2167 | next_scalar!(p, TScalarStyle::Plain, "tok2" ); |
2168 | next!(p, BlockEnd); |
2169 | next!(p, StreamEnd); |
2170 | end!(p); |
2171 | } |
2172 | |
2173 | #[test ] |
2174 | fn test_uri() { |
2175 | // TODO |
2176 | } |
2177 | |
2178 | #[test ] |
2179 | fn test_uri_escapes() { |
2180 | // TODO |
2181 | } |
2182 | } |
2183 | |