1use crate::error::{Error, ErrorCode, Result};
2use alloc::vec::Vec;
3use core::char;
4use core::cmp;
5use core::ops::Deref;
6use core::str;
7
8#[cfg(feature = "std")]
9use crate::io;
10#[cfg(feature = "std")]
11use crate::iter::LineColIterator;
12
13#[cfg(feature = "raw_value")]
14use crate::raw::BorrowedRawDeserializer;
15#[cfg(all(feature = "raw_value", feature = "std"))]
16use crate::raw::OwnedRawDeserializer;
17#[cfg(all(feature = "raw_value", feature = "std"))]
18use alloc::string::String;
19#[cfg(feature = "raw_value")]
20use serde::de::Visitor;
21
22/// Trait used by the deserializer for iterating over input. This is manually
23/// "specialized" for iterating over &[u8]. Once feature(specialization) is
24/// stable we can use actual specialization.
25///
26/// This trait is sealed and cannot be implemented for types outside of
27/// `serde_json`.
28pub trait Read<'de>: private::Sealed {
29 #[doc(hidden)]
30 fn next(&mut self) -> Result<Option<u8>>;
31 #[doc(hidden)]
32 fn peek(&mut self) -> Result<Option<u8>>;
33
34 /// Only valid after a call to peek(). Discards the peeked byte.
35 #[doc(hidden)]
36 fn discard(&mut self);
37
38 /// Position of the most recent call to next().
39 ///
40 /// The most recent call was probably next() and not peek(), but this method
41 /// should try to return a sensible result if the most recent call was
42 /// actually peek() because we don't always know.
43 ///
44 /// Only called in case of an error, so performance is not important.
45 #[doc(hidden)]
46 fn position(&self) -> Position;
47
48 /// Position of the most recent call to peek().
49 ///
50 /// The most recent call was probably peek() and not next(), but this method
51 /// should try to return a sensible result if the most recent call was
52 /// actually next() because we don't always know.
53 ///
54 /// Only called in case of an error, so performance is not important.
55 #[doc(hidden)]
56 fn peek_position(&self) -> Position;
57
58 /// Offset from the beginning of the input to the next byte that would be
59 /// returned by next() or peek().
60 #[doc(hidden)]
61 fn byte_offset(&self) -> usize;
62
63 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64 /// string until the next quotation mark using the given scratch space if
65 /// necessary. The scratch space is initially empty.
66 #[doc(hidden)]
67 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70 /// string until the next quotation mark using the given scratch space if
71 /// necessary. The scratch space is initially empty.
72 ///
73 /// This function returns the raw bytes in the string with escape sequences
74 /// expanded but without performing unicode validation.
75 #[doc(hidden)]
76 fn parse_str_raw<'s>(
77 &'s mut self,
78 scratch: &'s mut Vec<u8>,
79 ) -> Result<Reference<'de, 's, [u8]>>;
80
81 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82 /// string until the next quotation mark but discards the data.
83 #[doc(hidden)]
84 fn ignore_str(&mut self) -> Result<()>;
85
86 /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87 /// Parses next hexadecimal sequence.
88 #[doc(hidden)]
89 fn decode_hex_escape(&mut self) -> Result<u16>;
90
91 /// Switch raw buffering mode on.
92 ///
93 /// This is used when deserializing `RawValue`.
94 #[cfg(feature = "raw_value")]
95 #[doc(hidden)]
96 fn begin_raw_buffering(&mut self);
97
98 /// Switch raw buffering mode off and provides the raw buffered data to the
99 /// given visitor.
100 #[cfg(feature = "raw_value")]
101 #[doc(hidden)]
102 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103 where
104 V: Visitor<'de>;
105
106 /// Whether StreamDeserializer::next needs to check the failed flag. True
107 /// for IoRead, false for StrRead and SliceRead which can track failure by
108 /// truncating their input slice to avoid the extra check on every next
109 /// call.
110 #[doc(hidden)]
111 const should_early_return_if_failed: bool;
112
113 /// Mark a persistent failure of StreamDeserializer, either by setting the
114 /// flag or by truncating the input data.
115 #[doc(hidden)]
116 fn set_failed(&mut self, failed: &mut bool);
117}
118
119pub struct Position {
120 pub line: usize,
121 pub column: usize,
122}
123
124pub enum Reference<'b, 'c, T>
125where
126 T: ?Sized + 'static,
127{
128 Borrowed(&'b T),
129 Copied(&'c T),
130}
131
132impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133where
134 T: ?Sized + 'static,
135{
136 type Target = T;
137
138 fn deref(&self) -> &Self::Target {
139 match *self {
140 Reference::Borrowed(b: &T) => b,
141 Reference::Copied(c: &T) => c,
142 }
143 }
144}
145
146/// JSON input source that reads from a std::io input stream.
147#[cfg(feature = "std")]
148#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149pub struct IoRead<R>
150where
151 R: io::Read,
152{
153 iter: LineColIterator<io::Bytes<R>>,
154 /// Temporary storage of peeked byte.
155 ch: Option<u8>,
156 #[cfg(feature = "raw_value")]
157 raw_buffer: Option<Vec<u8>>,
158}
159
160/// JSON input source that reads from a slice of bytes.
161//
162// This is more efficient than other iterators because peek() can be read-only
163// and we can compute line/col position only if an error happens.
164pub struct SliceRead<'a> {
165 slice: &'a [u8],
166 /// Index of the *next* byte that will be returned by next() or peek().
167 index: usize,
168 #[cfg(feature = "raw_value")]
169 raw_buffering_start_index: usize,
170}
171
172/// JSON input source that reads from a UTF-8 string.
173//
174// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
175pub struct StrRead<'a> {
176 delegate: SliceRead<'a>,
177 #[cfg(feature = "raw_value")]
178 data: &'a str,
179}
180
181// Prevent users from implementing the Read trait.
182mod private {
183 pub trait Sealed {}
184}
185
186//////////////////////////////////////////////////////////////////////////////
187
188#[cfg(feature = "std")]
189impl<R> IoRead<R>
190where
191 R: io::Read,
192{
193 /// Create a JSON input source to read from a std::io input stream.
194 pub fn new(reader: R) -> Self {
195 IoRead {
196 iter: LineColIterator::new(iter:reader.bytes()),
197 ch: None,
198 #[cfg(feature = "raw_value")]
199 raw_buffer: None,
200 }
201 }
202}
203
204#[cfg(feature = "std")]
205impl<R> private::Sealed for IoRead<R> where R: io::Read {}
206
207#[cfg(feature = "std")]
208impl<R> IoRead<R>
209where
210 R: io::Read,
211{
212 fn parse_str_bytes<'s, T, F>(
213 &'s mut self,
214 scratch: &'s mut Vec<u8>,
215 validate: bool,
216 result: F,
217 ) -> Result<T>
218 where
219 T: 's,
220 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
221 {
222 loop {
223 let ch = tri!(next_or_eof(self));
224 if !ESCAPE[ch as usize] {
225 scratch.push(ch);
226 continue;
227 }
228 match ch {
229 b'"' => {
230 return result(self, scratch);
231 }
232 b'\\' => {
233 tri!(parse_escape(self, validate, scratch));
234 }
235 _ => {
236 if validate {
237 return error(self, ErrorCode::ControlCharacterWhileParsingString);
238 }
239 scratch.push(ch);
240 }
241 }
242 }
243 }
244}
245
246#[cfg(feature = "std")]
247impl<'de, R> Read<'de> for IoRead<R>
248where
249 R: io::Read,
250{
251 #[inline]
252 fn next(&mut self) -> Result<Option<u8>> {
253 match self.ch.take() {
254 Some(ch) => {
255 #[cfg(feature = "raw_value")]
256 {
257 if let Some(buf) = &mut self.raw_buffer {
258 buf.push(ch);
259 }
260 }
261 Ok(Some(ch))
262 }
263 None => match self.iter.next() {
264 Some(Err(err)) => Err(Error::io(err)),
265 Some(Ok(ch)) => {
266 #[cfg(feature = "raw_value")]
267 {
268 if let Some(buf) = &mut self.raw_buffer {
269 buf.push(ch);
270 }
271 }
272 Ok(Some(ch))
273 }
274 None => Ok(None),
275 },
276 }
277 }
278
279 #[inline]
280 fn peek(&mut self) -> Result<Option<u8>> {
281 match self.ch {
282 Some(ch) => Ok(Some(ch)),
283 None => match self.iter.next() {
284 Some(Err(err)) => Err(Error::io(err)),
285 Some(Ok(ch)) => {
286 self.ch = Some(ch);
287 Ok(self.ch)
288 }
289 None => Ok(None),
290 },
291 }
292 }
293
294 #[cfg(not(feature = "raw_value"))]
295 #[inline]
296 fn discard(&mut self) {
297 self.ch = None;
298 }
299
300 #[cfg(feature = "raw_value")]
301 fn discard(&mut self) {
302 if let Some(ch) = self.ch.take() {
303 if let Some(buf) = &mut self.raw_buffer {
304 buf.push(ch);
305 }
306 }
307 }
308
309 fn position(&self) -> Position {
310 Position {
311 line: self.iter.line(),
312 column: self.iter.col(),
313 }
314 }
315
316 fn peek_position(&self) -> Position {
317 // The LineColIterator updates its position during peek() so it has the
318 // right one here.
319 self.position()
320 }
321
322 fn byte_offset(&self) -> usize {
323 match self.ch {
324 Some(_) => self.iter.byte_offset() - 1,
325 None => self.iter.byte_offset(),
326 }
327 }
328
329 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
330 self.parse_str_bytes(scratch, true, as_str)
331 .map(Reference::Copied)
332 }
333
334 fn parse_str_raw<'s>(
335 &'s mut self,
336 scratch: &'s mut Vec<u8>,
337 ) -> Result<Reference<'de, 's, [u8]>> {
338 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
339 .map(Reference::Copied)
340 }
341
342 fn ignore_str(&mut self) -> Result<()> {
343 loop {
344 let ch = tri!(next_or_eof(self));
345 if !ESCAPE[ch as usize] {
346 continue;
347 }
348 match ch {
349 b'"' => {
350 return Ok(());
351 }
352 b'\\' => {
353 tri!(ignore_escape(self));
354 }
355 _ => {
356 return error(self, ErrorCode::ControlCharacterWhileParsingString);
357 }
358 }
359 }
360 }
361
362 fn decode_hex_escape(&mut self) -> Result<u16> {
363 let mut n = 0;
364 for _ in 0..4 {
365 match decode_hex_val(tri!(next_or_eof(self))) {
366 None => return error(self, ErrorCode::InvalidEscape),
367 Some(val) => {
368 n = (n << 4) + val;
369 }
370 }
371 }
372 Ok(n)
373 }
374
375 #[cfg(feature = "raw_value")]
376 fn begin_raw_buffering(&mut self) {
377 self.raw_buffer = Some(Vec::new());
378 }
379
380 #[cfg(feature = "raw_value")]
381 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
382 where
383 V: Visitor<'de>,
384 {
385 let raw = self.raw_buffer.take().unwrap();
386 let raw = match String::from_utf8(raw) {
387 Ok(raw) => raw,
388 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
389 };
390 visitor.visit_map(OwnedRawDeserializer {
391 raw_value: Some(raw),
392 })
393 }
394
395 const should_early_return_if_failed: bool = true;
396
397 #[inline]
398 #[cold]
399 fn set_failed(&mut self, failed: &mut bool) {
400 *failed = true;
401 }
402}
403
404//////////////////////////////////////////////////////////////////////////////
405
406impl<'a> SliceRead<'a> {
407 /// Create a JSON input source to read from a slice of bytes.
408 pub fn new(slice: &'a [u8]) -> Self {
409 SliceRead {
410 slice,
411 index: 0,
412 #[cfg(feature = "raw_value")]
413 raw_buffering_start_index: 0,
414 }
415 }
416
417 fn position_of_index(&self, i: usize) -> Position {
418 let mut position = Position { line: 1, column: 0 };
419 for ch in &self.slice[..i] {
420 match *ch {
421 b'\n' => {
422 position.line += 1;
423 position.column = 0;
424 }
425 _ => {
426 position.column += 1;
427 }
428 }
429 }
430 position
431 }
432
433 /// The big optimization here over IoRead is that if the string contains no
434 /// backslash escape sequences, the returned &str is a slice of the raw JSON
435 /// data so we avoid copying into the scratch space.
436 fn parse_str_bytes<'s, T, F>(
437 &'s mut self,
438 scratch: &'s mut Vec<u8>,
439 validate: bool,
440 result: F,
441 ) -> Result<Reference<'a, 's, T>>
442 where
443 T: ?Sized + 's,
444 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
445 {
446 // Index of the first byte not yet copied into the scratch space.
447 let mut start = self.index;
448
449 loop {
450 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
451 self.index += 1;
452 }
453 if self.index == self.slice.len() {
454 return error(self, ErrorCode::EofWhileParsingString);
455 }
456 match self.slice[self.index] {
457 b'"' => {
458 if scratch.is_empty() {
459 // Fast path: return a slice of the raw JSON without any
460 // copying.
461 let borrowed = &self.slice[start..self.index];
462 self.index += 1;
463 return result(self, borrowed).map(Reference::Borrowed);
464 } else {
465 scratch.extend_from_slice(&self.slice[start..self.index]);
466 self.index += 1;
467 return result(self, scratch).map(Reference::Copied);
468 }
469 }
470 b'\\' => {
471 scratch.extend_from_slice(&self.slice[start..self.index]);
472 self.index += 1;
473 tri!(parse_escape(self, validate, scratch));
474 start = self.index;
475 }
476 _ => {
477 self.index += 1;
478 if validate {
479 return error(self, ErrorCode::ControlCharacterWhileParsingString);
480 }
481 }
482 }
483 }
484 }
485}
486
487impl<'a> private::Sealed for SliceRead<'a> {}
488
489impl<'a> Read<'a> for SliceRead<'a> {
490 #[inline]
491 fn next(&mut self) -> Result<Option<u8>> {
492 // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
493 // is about 10% slower.
494 Ok(if self.index < self.slice.len() {
495 let ch = self.slice[self.index];
496 self.index += 1;
497 Some(ch)
498 } else {
499 None
500 })
501 }
502
503 #[inline]
504 fn peek(&mut self) -> Result<Option<u8>> {
505 // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
506 // for some reason.
507 Ok(if self.index < self.slice.len() {
508 Some(self.slice[self.index])
509 } else {
510 None
511 })
512 }
513
514 #[inline]
515 fn discard(&mut self) {
516 self.index += 1;
517 }
518
519 fn position(&self) -> Position {
520 self.position_of_index(self.index)
521 }
522
523 fn peek_position(&self) -> Position {
524 // Cap it at slice.len() just in case the most recent call was next()
525 // and it returned the last byte.
526 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
527 }
528
529 fn byte_offset(&self) -> usize {
530 self.index
531 }
532
533 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
534 self.parse_str_bytes(scratch, true, as_str)
535 }
536
537 fn parse_str_raw<'s>(
538 &'s mut self,
539 scratch: &'s mut Vec<u8>,
540 ) -> Result<Reference<'a, 's, [u8]>> {
541 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
542 }
543
544 fn ignore_str(&mut self) -> Result<()> {
545 loop {
546 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
547 self.index += 1;
548 }
549 if self.index == self.slice.len() {
550 return error(self, ErrorCode::EofWhileParsingString);
551 }
552 match self.slice[self.index] {
553 b'"' => {
554 self.index += 1;
555 return Ok(());
556 }
557 b'\\' => {
558 self.index += 1;
559 tri!(ignore_escape(self));
560 }
561 _ => {
562 return error(self, ErrorCode::ControlCharacterWhileParsingString);
563 }
564 }
565 }
566 }
567
568 fn decode_hex_escape(&mut self) -> Result<u16> {
569 if self.index + 4 > self.slice.len() {
570 self.index = self.slice.len();
571 return error(self, ErrorCode::EofWhileParsingString);
572 }
573
574 let mut n = 0;
575 for _ in 0..4 {
576 let ch = decode_hex_val(self.slice[self.index]);
577 self.index += 1;
578 match ch {
579 None => return error(self, ErrorCode::InvalidEscape),
580 Some(val) => {
581 n = (n << 4) + val;
582 }
583 }
584 }
585 Ok(n)
586 }
587
588 #[cfg(feature = "raw_value")]
589 fn begin_raw_buffering(&mut self) {
590 self.raw_buffering_start_index = self.index;
591 }
592
593 #[cfg(feature = "raw_value")]
594 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
595 where
596 V: Visitor<'a>,
597 {
598 let raw = &self.slice[self.raw_buffering_start_index..self.index];
599 let raw = match str::from_utf8(raw) {
600 Ok(raw) => raw,
601 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
602 };
603 visitor.visit_map(BorrowedRawDeserializer {
604 raw_value: Some(raw),
605 })
606 }
607
608 const should_early_return_if_failed: bool = false;
609
610 #[inline]
611 #[cold]
612 fn set_failed(&mut self, _failed: &mut bool) {
613 self.slice = &self.slice[..self.index];
614 }
615}
616
617//////////////////////////////////////////////////////////////////////////////
618
619impl<'a> StrRead<'a> {
620 /// Create a JSON input source to read from a UTF-8 string.
621 pub fn new(s: &'a str) -> Self {
622 StrRead {
623 delegate: SliceRead::new(slice:s.as_bytes()),
624 #[cfg(feature = "raw_value")]
625 data: s,
626 }
627 }
628}
629
630impl<'a> private::Sealed for StrRead<'a> {}
631
632impl<'a> Read<'a> for StrRead<'a> {
633 #[inline]
634 fn next(&mut self) -> Result<Option<u8>> {
635 self.delegate.next()
636 }
637
638 #[inline]
639 fn peek(&mut self) -> Result<Option<u8>> {
640 self.delegate.peek()
641 }
642
643 #[inline]
644 fn discard(&mut self) {
645 self.delegate.discard();
646 }
647
648 fn position(&self) -> Position {
649 self.delegate.position()
650 }
651
652 fn peek_position(&self) -> Position {
653 self.delegate.peek_position()
654 }
655
656 fn byte_offset(&self) -> usize {
657 self.delegate.byte_offset()
658 }
659
660 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
661 self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
662 // The deserialization input came in as &str with a UTF-8 guarantee,
663 // and the \u-escapes are checked along the way, so don't need to
664 // check here.
665 Ok(unsafe { str::from_utf8_unchecked(bytes) })
666 })
667 }
668
669 fn parse_str_raw<'s>(
670 &'s mut self,
671 scratch: &'s mut Vec<u8>,
672 ) -> Result<Reference<'a, 's, [u8]>> {
673 self.delegate.parse_str_raw(scratch)
674 }
675
676 fn ignore_str(&mut self) -> Result<()> {
677 self.delegate.ignore_str()
678 }
679
680 fn decode_hex_escape(&mut self) -> Result<u16> {
681 self.delegate.decode_hex_escape()
682 }
683
684 #[cfg(feature = "raw_value")]
685 fn begin_raw_buffering(&mut self) {
686 self.delegate.begin_raw_buffering();
687 }
688
689 #[cfg(feature = "raw_value")]
690 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
691 where
692 V: Visitor<'a>,
693 {
694 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
695 visitor.visit_map(BorrowedRawDeserializer {
696 raw_value: Some(raw),
697 })
698 }
699
700 const should_early_return_if_failed: bool = false;
701
702 #[inline]
703 #[cold]
704 fn set_failed(&mut self, failed: &mut bool) {
705 self.delegate.set_failed(failed);
706 }
707}
708
709//////////////////////////////////////////////////////////////////////////////
710
711impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
712
713impl<'a, 'de, R> Read<'de> for &'a mut R
714where
715 R: Read<'de>,
716{
717 fn next(&mut self) -> Result<Option<u8>> {
718 R::next(self)
719 }
720
721 fn peek(&mut self) -> Result<Option<u8>> {
722 R::peek(self)
723 }
724
725 fn discard(&mut self) {
726 R::discard(self);
727 }
728
729 fn position(&self) -> Position {
730 R::position(self)
731 }
732
733 fn peek_position(&self) -> Position {
734 R::peek_position(self)
735 }
736
737 fn byte_offset(&self) -> usize {
738 R::byte_offset(self)
739 }
740
741 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
742 R::parse_str(self, scratch)
743 }
744
745 fn parse_str_raw<'s>(
746 &'s mut self,
747 scratch: &'s mut Vec<u8>,
748 ) -> Result<Reference<'de, 's, [u8]>> {
749 R::parse_str_raw(self, scratch)
750 }
751
752 fn ignore_str(&mut self) -> Result<()> {
753 R::ignore_str(self)
754 }
755
756 fn decode_hex_escape(&mut self) -> Result<u16> {
757 R::decode_hex_escape(self)
758 }
759
760 #[cfg(feature = "raw_value")]
761 fn begin_raw_buffering(&mut self) {
762 R::begin_raw_buffering(self);
763 }
764
765 #[cfg(feature = "raw_value")]
766 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
767 where
768 V: Visitor<'de>,
769 {
770 R::end_raw_buffering(self, visitor)
771 }
772
773 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
774
775 fn set_failed(&mut self, failed: &mut bool) {
776 R::set_failed(self, failed);
777 }
778}
779
780//////////////////////////////////////////////////////////////////////////////
781
782/// Marker for whether StreamDeserializer can implement FusedIterator.
783pub trait Fused: private::Sealed {}
784impl<'a> Fused for SliceRead<'a> {}
785impl<'a> Fused for StrRead<'a> {}
786
787// Lookup table of bytes that must be escaped. A value of true at index i means
788// that byte i requires an escape sequence in the input.
789static ESCAPE: [bool; 256] = {
790 const CT: bool = true; // control character \x00..=\x1F
791 const QU: bool = true; // quote \x22
792 const BS: bool = true; // backslash \x5C
793 const __: bool = false; // allow unescaped
794 [
795 // 1 2 3 4 5 6 7 8 9 A B C D E F
796 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0
797 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
798 __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
799 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
800 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
801 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
802 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
803 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
804 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
805 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
806 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
807 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
808 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
809 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
810 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
811 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
812 ]
813};
814
815fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
816where
817 R: ?Sized + Read<'de>,
818{
819 match tri!(read.next()) {
820 Some(b: u8) => Ok(b),
821 None => error(read, reason:ErrorCode::EofWhileParsingString),
822 }
823}
824
825fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
826where
827 R: ?Sized + Read<'de>,
828{
829 match tri!(read.peek()) {
830 Some(b: u8) => Ok(b),
831 None => error(read, reason:ErrorCode::EofWhileParsingString),
832 }
833}
834
835fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
836where
837 R: ?Sized + Read<'de>,
838{
839 let position: Position = read.position();
840 Err(Error::syntax(code:reason, position.line, position.column))
841}
842
843fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
844 str::from_utf8(slice).or_else(|_| error(read, reason:ErrorCode::InvalidUnicodeCodePoint))
845}
846
847/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
848/// the previous byte read was a backslash.
849fn parse_escape<'de, R: Read<'de>>(
850 read: &mut R,
851 validate: bool,
852 scratch: &mut Vec<u8>,
853) -> Result<()> {
854 let ch = tri!(next_or_eof(read));
855
856 match ch {
857 b'"' => scratch.push(b'"'),
858 b'\\' => scratch.push(b'\\'),
859 b'/' => scratch.push(b'/'),
860 b'b' => scratch.push(b'\x08'),
861 b'f' => scratch.push(b'\x0c'),
862 b'n' => scratch.push(b'\n'),
863 b'r' => scratch.push(b'\r'),
864 b't' => scratch.push(b'\t'),
865 b'u' => {
866 fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
867 scratch.extend_from_slice(&[
868 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
869 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
870 (n & 0b0011_1111) as u8 | 0b1000_0000,
871 ]);
872 }
873
874 let c = match tri!(read.decode_hex_escape()) {
875 n @ 0xDC00..=0xDFFF => {
876 return if validate {
877 error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
878 } else {
879 encode_surrogate(scratch, n);
880 Ok(())
881 };
882 }
883
884 // Non-BMP characters are encoded as a sequence of two hex
885 // escapes, representing UTF-16 surrogates. If deserializing a
886 // utf-8 string the surrogates are required to be paired,
887 // whereas deserializing a byte string accepts lone surrogates.
888 n1 @ 0xD800..=0xDBFF => {
889 if tri!(peek_or_eof(read)) == b'\\' {
890 read.discard();
891 } else {
892 return if validate {
893 read.discard();
894 error(read, ErrorCode::UnexpectedEndOfHexEscape)
895 } else {
896 encode_surrogate(scratch, n1);
897 Ok(())
898 };
899 }
900
901 if tri!(peek_or_eof(read)) == b'u' {
902 read.discard();
903 } else {
904 return if validate {
905 read.discard();
906 error(read, ErrorCode::UnexpectedEndOfHexEscape)
907 } else {
908 encode_surrogate(scratch, n1);
909 // The \ prior to this byte started an escape sequence,
910 // so we need to parse that now. This recursive call
911 // does not blow the stack on malicious input because
912 // the escape is not \u, so it will be handled by one
913 // of the easy nonrecursive cases.
914 parse_escape(read, validate, scratch)
915 };
916 }
917
918 let n2 = tri!(read.decode_hex_escape());
919
920 if n2 < 0xDC00 || n2 > 0xDFFF {
921 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
922 }
923
924 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
925
926 match char::from_u32(n) {
927 Some(c) => c,
928 None => {
929 return error(read, ErrorCode::InvalidUnicodeCodePoint);
930 }
931 }
932 }
933
934 // Every u16 outside of the surrogate ranges above is guaranteed
935 // to be a legal char.
936 n => char::from_u32(n as u32).unwrap(),
937 };
938
939 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
940 }
941 _ => {
942 return error(read, ErrorCode::InvalidEscape);
943 }
944 }
945
946 Ok(())
947}
948
949/// Parses a JSON escape sequence and discards the value. Assumes the previous
950/// byte read was a backslash.
951fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
952where
953 R: ?Sized + Read<'de>,
954{
955 let ch: u8 = tri!(next_or_eof(read));
956
957 match ch {
958 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
959 b'u' => {
960 // At this point we don't care if the codepoint is valid. We just
961 // want to consume it. We don't actually know what is valid or not
962 // at this point, because that depends on if this string will
963 // ultimately be parsed into a string or a byte buffer in the "real"
964 // parse.
965
966 tri!(read.decode_hex_escape());
967 }
968 _ => {
969 return error(read, reason:ErrorCode::InvalidEscape);
970 }
971 }
972
973 Ok(())
974}
975
976static HEX: [u8; 256] = {
977 const __: u8 = 255; // not a hex digit
978 [
979 // 1 2 3 4 5 6 7 8 9 A B C D E F
980 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
981 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
982 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
983 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
984 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
985 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
986 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
987 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
988 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
989 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
990 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
991 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
992 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
993 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
994 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
995 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
996 ]
997};
998
999fn decode_hex_val(val: u8) -> Option<u16> {
1000 let n: u16 = HEX[val as usize] as u16;
1001 if n == 255 {
1002 None
1003 } else {
1004 Some(n)
1005 }
1006}
1007