1use crate::error::{Error, ErrorCode, Result};
2use alloc::vec::Vec;
3use core::char;
4use core::cmp;
5use core::ops::Deref;
6use core::str;
7
8#[cfg(feature = "std")]
9use crate::io;
10#[cfg(feature = "std")]
11use crate::iter::LineColIterator;
12
13#[cfg(feature = "raw_value")]
14use crate::raw::BorrowedRawDeserializer;
15#[cfg(all(feature = "raw_value", feature = "std"))]
16use crate::raw::OwnedRawDeserializer;
17#[cfg(feature = "raw_value")]
18use serde::de::Visitor;
19
20/// Trait used by the deserializer for iterating over input. This is manually
21/// "specialized" for iterating over &[u8]. Once feature(specialization) is
22/// stable we can use actual specialization.
23///
24/// This trait is sealed and cannot be implemented for types outside of
25/// `serde_json`.
26pub trait Read<'de>: private::Sealed {
27 #[doc(hidden)]
28 fn next(&mut self) -> Result<Option<u8>>;
29 #[doc(hidden)]
30 fn peek(&mut self) -> Result<Option<u8>>;
31
32 /// Only valid after a call to peek(). Discards the peeked byte.
33 #[doc(hidden)]
34 fn discard(&mut self);
35
36 /// Position of the most recent call to next().
37 ///
38 /// The most recent call was probably next() and not peek(), but this method
39 /// should try to return a sensible result if the most recent call was
40 /// actually peek() because we don't always know.
41 ///
42 /// Only called in case of an error, so performance is not important.
43 #[doc(hidden)]
44 fn position(&self) -> Position;
45
46 /// Position of the most recent call to peek().
47 ///
48 /// The most recent call was probably peek() and not next(), but this method
49 /// should try to return a sensible result if the most recent call was
50 /// actually next() because we don't always know.
51 ///
52 /// Only called in case of an error, so performance is not important.
53 #[doc(hidden)]
54 fn peek_position(&self) -> Position;
55
56 /// Offset from the beginning of the input to the next byte that would be
57 /// returned by next() or peek().
58 #[doc(hidden)]
59 fn byte_offset(&self) -> usize;
60
61 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
62 /// string until the next quotation mark using the given scratch space if
63 /// necessary. The scratch space is initially empty.
64 #[doc(hidden)]
65 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
66
67 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
68 /// string until the next quotation mark using the given scratch space if
69 /// necessary. The scratch space is initially empty.
70 ///
71 /// This function returns the raw bytes in the string with escape sequences
72 /// expanded but without performing unicode validation.
73 #[doc(hidden)]
74 fn parse_str_raw<'s>(
75 &'s mut self,
76 scratch: &'s mut Vec<u8>,
77 ) -> Result<Reference<'de, 's, [u8]>>;
78
79 /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
80 /// string until the next quotation mark but discards the data.
81 #[doc(hidden)]
82 fn ignore_str(&mut self) -> Result<()>;
83
84 /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
85 /// Parses next hexadecimal sequence.
86 #[doc(hidden)]
87 fn decode_hex_escape(&mut self) -> Result<u16>;
88
89 /// Switch raw buffering mode on.
90 ///
91 /// This is used when deserializing `RawValue`.
92 #[cfg(feature = "raw_value")]
93 #[doc(hidden)]
94 fn begin_raw_buffering(&mut self);
95
96 /// Switch raw buffering mode off and provides the raw buffered data to the
97 /// given visitor.
98 #[cfg(feature = "raw_value")]
99 #[doc(hidden)]
100 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
101 where
102 V: Visitor<'de>;
103
104 /// Whether StreamDeserializer::next needs to check the failed flag. True
105 /// for IoRead, false for StrRead and SliceRead which can track failure by
106 /// truncating their input slice to avoid the extra check on every next
107 /// call.
108 #[doc(hidden)]
109 const should_early_return_if_failed: bool;
110
111 /// Mark a persistent failure of StreamDeserializer, either by setting the
112 /// flag or by truncating the input data.
113 #[doc(hidden)]
114 fn set_failed(&mut self, failed: &mut bool);
115}
116
117pub struct Position {
118 pub line: usize,
119 pub column: usize,
120}
121
122pub enum Reference<'b, 'c, T>
123where
124 T: ?Sized + 'static,
125{
126 Borrowed(&'b T),
127 Copied(&'c T),
128}
129
130impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
131where
132 T: ?Sized + 'static,
133{
134 type Target = T;
135
136 fn deref(&self) -> &Self::Target {
137 match *self {
138 Reference::Borrowed(b) => b,
139 Reference::Copied(c) => c,
140 }
141 }
142}
143
144/// JSON input source that reads from a std::io input stream.
145#[cfg(feature = "std")]
146#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
147pub struct IoRead<R>
148where
149 R: io::Read,
150{
151 iter: LineColIterator<io::Bytes<R>>,
152 /// Temporary storage of peeked byte.
153 ch: Option<u8>,
154 #[cfg(feature = "raw_value")]
155 raw_buffer: Option<Vec<u8>>,
156}
157
158/// JSON input source that reads from a slice of bytes.
159//
160// This is more efficient than other iterators because peek() can be read-only
161// and we can compute line/col position only if an error happens.
162pub struct SliceRead<'a> {
163 slice: &'a [u8],
164 /// Index of the *next* byte that will be returned by next() or peek().
165 index: usize,
166 #[cfg(feature = "raw_value")]
167 raw_buffering_start_index: usize,
168}
169
170/// JSON input source that reads from a UTF-8 string.
171//
172// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
173pub struct StrRead<'a> {
174 delegate: SliceRead<'a>,
175 #[cfg(feature = "raw_value")]
176 data: &'a str,
177}
178
179// Prevent users from implementing the Read trait.
180mod private {
181 pub trait Sealed {}
182}
183
184//////////////////////////////////////////////////////////////////////////////
185
186#[cfg(feature = "std")]
187impl<R> IoRead<R>
188where
189 R: io::Read,
190{
191 /// Create a JSON input source to read from a std::io input stream.
192 pub fn new(reader: R) -> Self {
193 IoRead {
194 iter: LineColIterator::new(reader.bytes()),
195 ch: None,
196 #[cfg(feature = "raw_value")]
197 raw_buffer: None,
198 }
199 }
200}
201
202#[cfg(feature = "std")]
203impl<R> private::Sealed for IoRead<R> where R: io::Read {}
204
205#[cfg(feature = "std")]
206impl<R> IoRead<R>
207where
208 R: io::Read,
209{
210 fn parse_str_bytes<'s, T, F>(
211 &'s mut self,
212 scratch: &'s mut Vec<u8>,
213 validate: bool,
214 result: F,
215 ) -> Result<T>
216 where
217 T: 's,
218 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
219 {
220 loop {
221 let ch = tri!(next_or_eof(self));
222 if !ESCAPE[ch as usize] {
223 scratch.push(ch);
224 continue;
225 }
226 match ch {
227 b'"' => {
228 return result(self, scratch);
229 }
230 b'\\' => {
231 tri!(parse_escape(self, validate, scratch));
232 }
233 _ => {
234 if validate {
235 return error(self, ErrorCode::ControlCharacterWhileParsingString);
236 }
237 scratch.push(ch);
238 }
239 }
240 }
241 }
242}
243
244#[cfg(feature = "std")]
245impl<'de, R> Read<'de> for IoRead<R>
246where
247 R: io::Read,
248{
249 #[inline]
250 fn next(&mut self) -> Result<Option<u8>> {
251 match self.ch.take() {
252 Some(ch) => {
253 #[cfg(feature = "raw_value")]
254 {
255 if let Some(buf) = &mut self.raw_buffer {
256 buf.push(ch);
257 }
258 }
259 Ok(Some(ch))
260 }
261 None => match self.iter.next() {
262 Some(Err(err)) => Err(Error::io(err)),
263 Some(Ok(ch)) => {
264 #[cfg(feature = "raw_value")]
265 {
266 if let Some(buf) = &mut self.raw_buffer {
267 buf.push(ch);
268 }
269 }
270 Ok(Some(ch))
271 }
272 None => Ok(None),
273 },
274 }
275 }
276
277 #[inline]
278 fn peek(&mut self) -> Result<Option<u8>> {
279 match self.ch {
280 Some(ch) => Ok(Some(ch)),
281 None => match self.iter.next() {
282 Some(Err(err)) => Err(Error::io(err)),
283 Some(Ok(ch)) => {
284 self.ch = Some(ch);
285 Ok(self.ch)
286 }
287 None => Ok(None),
288 },
289 }
290 }
291
292 #[cfg(not(feature = "raw_value"))]
293 #[inline]
294 fn discard(&mut self) {
295 self.ch = None;
296 }
297
298 #[cfg(feature = "raw_value")]
299 fn discard(&mut self) {
300 if let Some(ch) = self.ch.take() {
301 if let Some(buf) = &mut self.raw_buffer {
302 buf.push(ch);
303 }
304 }
305 }
306
307 fn position(&self) -> Position {
308 Position {
309 line: self.iter.line(),
310 column: self.iter.col(),
311 }
312 }
313
314 fn peek_position(&self) -> Position {
315 // The LineColIterator updates its position during peek() so it has the
316 // right one here.
317 self.position()
318 }
319
320 fn byte_offset(&self) -> usize {
321 match self.ch {
322 Some(_) => self.iter.byte_offset() - 1,
323 None => self.iter.byte_offset(),
324 }
325 }
326
327 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
328 self.parse_str_bytes(scratch, true, as_str)
329 .map(Reference::Copied)
330 }
331
332 fn parse_str_raw<'s>(
333 &'s mut self,
334 scratch: &'s mut Vec<u8>,
335 ) -> Result<Reference<'de, 's, [u8]>> {
336 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
337 .map(Reference::Copied)
338 }
339
340 fn ignore_str(&mut self) -> Result<()> {
341 loop {
342 let ch = tri!(next_or_eof(self));
343 if !ESCAPE[ch as usize] {
344 continue;
345 }
346 match ch {
347 b'"' => {
348 return Ok(());
349 }
350 b'\\' => {
351 tri!(ignore_escape(self));
352 }
353 _ => {
354 return error(self, ErrorCode::ControlCharacterWhileParsingString);
355 }
356 }
357 }
358 }
359
360 fn decode_hex_escape(&mut self) -> Result<u16> {
361 let mut n = 0;
362 for _ in 0..4 {
363 match decode_hex_val(tri!(next_or_eof(self))) {
364 None => return error(self, ErrorCode::InvalidEscape),
365 Some(val) => {
366 n = (n << 4) + val;
367 }
368 }
369 }
370 Ok(n)
371 }
372
373 #[cfg(feature = "raw_value")]
374 fn begin_raw_buffering(&mut self) {
375 self.raw_buffer = Some(Vec::new());
376 }
377
378 #[cfg(feature = "raw_value")]
379 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
380 where
381 V: Visitor<'de>,
382 {
383 let raw = self.raw_buffer.take().unwrap();
384 let raw = match String::from_utf8(raw) {
385 Ok(raw) => raw,
386 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
387 };
388 visitor.visit_map(OwnedRawDeserializer {
389 raw_value: Some(raw),
390 })
391 }
392
393 const should_early_return_if_failed: bool = true;
394
395 #[inline]
396 #[cold]
397 fn set_failed(&mut self, failed: &mut bool) {
398 *failed = true;
399 }
400}
401
402//////////////////////////////////////////////////////////////////////////////
403
404impl<'a> SliceRead<'a> {
405 /// Create a JSON input source to read from a slice of bytes.
406 pub fn new(slice: &'a [u8]) -> Self {
407 SliceRead {
408 slice,
409 index: 0,
410 #[cfg(feature = "raw_value")]
411 raw_buffering_start_index: 0,
412 }
413 }
414
415 fn position_of_index(&self, i: usize) -> Position {
416 let mut position = Position { line: 1, column: 0 };
417 for ch in &self.slice[..i] {
418 match *ch {
419 b'\n' => {
420 position.line += 1;
421 position.column = 0;
422 }
423 _ => {
424 position.column += 1;
425 }
426 }
427 }
428 position
429 }
430
431 /// The big optimization here over IoRead is that if the string contains no
432 /// backslash escape sequences, the returned &str is a slice of the raw JSON
433 /// data so we avoid copying into the scratch space.
434 fn parse_str_bytes<'s, T, F>(
435 &'s mut self,
436 scratch: &'s mut Vec<u8>,
437 validate: bool,
438 result: F,
439 ) -> Result<Reference<'a, 's, T>>
440 where
441 T: ?Sized + 's,
442 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
443 {
444 // Index of the first byte not yet copied into the scratch space.
445 let mut start = self.index;
446
447 loop {
448 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
449 self.index += 1;
450 }
451 if self.index == self.slice.len() {
452 return error(self, ErrorCode::EofWhileParsingString);
453 }
454 match self.slice[self.index] {
455 b'"' => {
456 if scratch.is_empty() {
457 // Fast path: return a slice of the raw JSON without any
458 // copying.
459 let borrowed = &self.slice[start..self.index];
460 self.index += 1;
461 return result(self, borrowed).map(Reference::Borrowed);
462 } else {
463 scratch.extend_from_slice(&self.slice[start..self.index]);
464 self.index += 1;
465 return result(self, scratch).map(Reference::Copied);
466 }
467 }
468 b'\\' => {
469 scratch.extend_from_slice(&self.slice[start..self.index]);
470 self.index += 1;
471 tri!(parse_escape(self, validate, scratch));
472 start = self.index;
473 }
474 _ => {
475 self.index += 1;
476 if validate {
477 return error(self, ErrorCode::ControlCharacterWhileParsingString);
478 }
479 }
480 }
481 }
482 }
483}
484
485impl<'a> private::Sealed for SliceRead<'a> {}
486
487impl<'a> Read<'a> for SliceRead<'a> {
488 #[inline]
489 fn next(&mut self) -> Result<Option<u8>> {
490 // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
491 // is about 10% slower.
492 Ok(if self.index < self.slice.len() {
493 let ch = self.slice[self.index];
494 self.index += 1;
495 Some(ch)
496 } else {
497 None
498 })
499 }
500
501 #[inline]
502 fn peek(&mut self) -> Result<Option<u8>> {
503 // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
504 // for some reason.
505 Ok(if self.index < self.slice.len() {
506 Some(self.slice[self.index])
507 } else {
508 None
509 })
510 }
511
512 #[inline]
513 fn discard(&mut self) {
514 self.index += 1;
515 }
516
517 fn position(&self) -> Position {
518 self.position_of_index(self.index)
519 }
520
521 fn peek_position(&self) -> Position {
522 // Cap it at slice.len() just in case the most recent call was next()
523 // and it returned the last byte.
524 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
525 }
526
527 fn byte_offset(&self) -> usize {
528 self.index
529 }
530
531 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
532 self.parse_str_bytes(scratch, true, as_str)
533 }
534
535 fn parse_str_raw<'s>(
536 &'s mut self,
537 scratch: &'s mut Vec<u8>,
538 ) -> Result<Reference<'a, 's, [u8]>> {
539 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
540 }
541
542 fn ignore_str(&mut self) -> Result<()> {
543 loop {
544 while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
545 self.index += 1;
546 }
547 if self.index == self.slice.len() {
548 return error(self, ErrorCode::EofWhileParsingString);
549 }
550 match self.slice[self.index] {
551 b'"' => {
552 self.index += 1;
553 return Ok(());
554 }
555 b'\\' => {
556 self.index += 1;
557 tri!(ignore_escape(self));
558 }
559 _ => {
560 return error(self, ErrorCode::ControlCharacterWhileParsingString);
561 }
562 }
563 }
564 }
565
566 fn decode_hex_escape(&mut self) -> Result<u16> {
567 if self.index + 4 > self.slice.len() {
568 self.index = self.slice.len();
569 return error(self, ErrorCode::EofWhileParsingString);
570 }
571
572 let mut n = 0;
573 for _ in 0..4 {
574 let ch = decode_hex_val(self.slice[self.index]);
575 self.index += 1;
576 match ch {
577 None => return error(self, ErrorCode::InvalidEscape),
578 Some(val) => {
579 n = (n << 4) + val;
580 }
581 }
582 }
583 Ok(n)
584 }
585
586 #[cfg(feature = "raw_value")]
587 fn begin_raw_buffering(&mut self) {
588 self.raw_buffering_start_index = self.index;
589 }
590
591 #[cfg(feature = "raw_value")]
592 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
593 where
594 V: Visitor<'a>,
595 {
596 let raw = &self.slice[self.raw_buffering_start_index..self.index];
597 let raw = match str::from_utf8(raw) {
598 Ok(raw) => raw,
599 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
600 };
601 visitor.visit_map(BorrowedRawDeserializer {
602 raw_value: Some(raw),
603 })
604 }
605
606 const should_early_return_if_failed: bool = false;
607
608 #[inline]
609 #[cold]
610 fn set_failed(&mut self, _failed: &mut bool) {
611 self.slice = &self.slice[..self.index];
612 }
613}
614
615//////////////////////////////////////////////////////////////////////////////
616
617impl<'a> StrRead<'a> {
618 /// Create a JSON input source to read from a UTF-8 string.
619 pub fn new(s: &'a str) -> Self {
620 StrRead {
621 delegate: SliceRead::new(s.as_bytes()),
622 #[cfg(feature = "raw_value")]
623 data: s,
624 }
625 }
626}
627
628impl<'a> private::Sealed for StrRead<'a> {}
629
630impl<'a> Read<'a> for StrRead<'a> {
631 #[inline]
632 fn next(&mut self) -> Result<Option<u8>> {
633 self.delegate.next()
634 }
635
636 #[inline]
637 fn peek(&mut self) -> Result<Option<u8>> {
638 self.delegate.peek()
639 }
640
641 #[inline]
642 fn discard(&mut self) {
643 self.delegate.discard();
644 }
645
646 fn position(&self) -> Position {
647 self.delegate.position()
648 }
649
650 fn peek_position(&self) -> Position {
651 self.delegate.peek_position()
652 }
653
654 fn byte_offset(&self) -> usize {
655 self.delegate.byte_offset()
656 }
657
658 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
659 self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
660 // The deserialization input came in as &str with a UTF-8 guarantee,
661 // and the \u-escapes are checked along the way, so don't need to
662 // check here.
663 Ok(unsafe { str::from_utf8_unchecked(bytes) })
664 })
665 }
666
667 fn parse_str_raw<'s>(
668 &'s mut self,
669 scratch: &'s mut Vec<u8>,
670 ) -> Result<Reference<'a, 's, [u8]>> {
671 self.delegate.parse_str_raw(scratch)
672 }
673
674 fn ignore_str(&mut self) -> Result<()> {
675 self.delegate.ignore_str()
676 }
677
678 fn decode_hex_escape(&mut self) -> Result<u16> {
679 self.delegate.decode_hex_escape()
680 }
681
682 #[cfg(feature = "raw_value")]
683 fn begin_raw_buffering(&mut self) {
684 self.delegate.begin_raw_buffering();
685 }
686
687 #[cfg(feature = "raw_value")]
688 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
689 where
690 V: Visitor<'a>,
691 {
692 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
693 visitor.visit_map(BorrowedRawDeserializer {
694 raw_value: Some(raw),
695 })
696 }
697
698 const should_early_return_if_failed: bool = false;
699
700 #[inline]
701 #[cold]
702 fn set_failed(&mut self, failed: &mut bool) {
703 self.delegate.set_failed(failed);
704 }
705}
706
707//////////////////////////////////////////////////////////////////////////////
708
709impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
710
711impl<'a, 'de, R> Read<'de> for &'a mut R
712where
713 R: Read<'de>,
714{
715 fn next(&mut self) -> Result<Option<u8>> {
716 R::next(self)
717 }
718
719 fn peek(&mut self) -> Result<Option<u8>> {
720 R::peek(self)
721 }
722
723 fn discard(&mut self) {
724 R::discard(self);
725 }
726
727 fn position(&self) -> Position {
728 R::position(self)
729 }
730
731 fn peek_position(&self) -> Position {
732 R::peek_position(self)
733 }
734
735 fn byte_offset(&self) -> usize {
736 R::byte_offset(self)
737 }
738
739 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
740 R::parse_str(self, scratch)
741 }
742
743 fn parse_str_raw<'s>(
744 &'s mut self,
745 scratch: &'s mut Vec<u8>,
746 ) -> Result<Reference<'de, 's, [u8]>> {
747 R::parse_str_raw(self, scratch)
748 }
749
750 fn ignore_str(&mut self) -> Result<()> {
751 R::ignore_str(self)
752 }
753
754 fn decode_hex_escape(&mut self) -> Result<u16> {
755 R::decode_hex_escape(self)
756 }
757
758 #[cfg(feature = "raw_value")]
759 fn begin_raw_buffering(&mut self) {
760 R::begin_raw_buffering(self);
761 }
762
763 #[cfg(feature = "raw_value")]
764 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
765 where
766 V: Visitor<'de>,
767 {
768 R::end_raw_buffering(self, visitor)
769 }
770
771 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
772
773 fn set_failed(&mut self, failed: &mut bool) {
774 R::set_failed(self, failed);
775 }
776}
777
778//////////////////////////////////////////////////////////////////////////////
779
780/// Marker for whether StreamDeserializer can implement FusedIterator.
781pub trait Fused: private::Sealed {}
782impl<'a> Fused for SliceRead<'a> {}
783impl<'a> Fused for StrRead<'a> {}
784
785// Lookup table of bytes that must be escaped. A value of true at index i means
786// that byte i requires an escape sequence in the input.
787static ESCAPE: [bool; 256] = {
788 const CT: bool = true; // control character \x00..=\x1F
789 const QU: bool = true; // quote \x22
790 const BS: bool = true; // backslash \x5C
791 const __: bool = false; // allow unescaped
792 [
793 // 1 2 3 4 5 6 7 8 9 A B C D E F
794 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0
795 CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
796 __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
797 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
798 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
799 __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
800 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
801 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
802 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
803 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
804 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
805 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
806 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
807 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
808 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
809 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
810 ]
811};
812
813fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
814where
815 R: ?Sized + Read<'de>,
816{
817 match tri!(read.next()) {
818 Some(b) => Ok(b),
819 None => error(read, ErrorCode::EofWhileParsingString),
820 }
821}
822
823fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
824where
825 R: ?Sized + Read<'de>,
826{
827 match tri!(read.peek()) {
828 Some(b) => Ok(b),
829 None => error(read, ErrorCode::EofWhileParsingString),
830 }
831}
832
833fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
834where
835 R: ?Sized + Read<'de>,
836{
837 let position = read.position();
838 Err(Error::syntax(reason, position.line, position.column))
839}
840
841fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
842 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
843}
844
845/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
846/// the previous byte read was a backslash.
847fn parse_escape<'de, R: Read<'de>>(
848 read: &mut R,
849 validate: bool,
850 scratch: &mut Vec<u8>,
851) -> Result<()> {
852 let ch = tri!(next_or_eof(read));
853
854 match ch {
855 b'"' => scratch.push(b'"'),
856 b'\\' => scratch.push(b'\\'),
857 b'/' => scratch.push(b'/'),
858 b'b' => scratch.push(b'\x08'),
859 b'f' => scratch.push(b'\x0c'),
860 b'n' => scratch.push(b'\n'),
861 b'r' => scratch.push(b'\r'),
862 b't' => scratch.push(b'\t'),
863 b'u' => {
864 fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
865 scratch.extend_from_slice(&[
866 (n >> 12 & 0b0000_1111) as u8 | 0b1110_0000,
867 (n >> 6 & 0b0011_1111) as u8 | 0b1000_0000,
868 (n & 0b0011_1111) as u8 | 0b1000_0000,
869 ]);
870 }
871
872 let c = match tri!(read.decode_hex_escape()) {
873 n @ 0xDC00..=0xDFFF => {
874 return if validate {
875 error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
876 } else {
877 encode_surrogate(scratch, n);
878 Ok(())
879 };
880 }
881
882 // Non-BMP characters are encoded as a sequence of two hex
883 // escapes, representing UTF-16 surrogates. If deserializing a
884 // utf-8 string the surrogates are required to be paired,
885 // whereas deserializing a byte string accepts lone surrogates.
886 n1 @ 0xD800..=0xDBFF => {
887 if tri!(peek_or_eof(read)) == b'\\' {
888 read.discard();
889 } else {
890 return if validate {
891 read.discard();
892 error(read, ErrorCode::UnexpectedEndOfHexEscape)
893 } else {
894 encode_surrogate(scratch, n1);
895 Ok(())
896 };
897 }
898
899 if tri!(peek_or_eof(read)) == b'u' {
900 read.discard();
901 } else {
902 return if validate {
903 read.discard();
904 error(read, ErrorCode::UnexpectedEndOfHexEscape)
905 } else {
906 encode_surrogate(scratch, n1);
907 // The \ prior to this byte started an escape sequence,
908 // so we need to parse that now. This recursive call
909 // does not blow the stack on malicious input because
910 // the escape is not \u, so it will be handled by one
911 // of the easy nonrecursive cases.
912 parse_escape(read, validate, scratch)
913 };
914 }
915
916 let n2 = tri!(read.decode_hex_escape());
917
918 if n2 < 0xDC00 || n2 > 0xDFFF {
919 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
920 }
921
922 let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
923
924 match char::from_u32(n) {
925 Some(c) => c,
926 None => {
927 return error(read, ErrorCode::InvalidUnicodeCodePoint);
928 }
929 }
930 }
931
932 // Every u16 outside of the surrogate ranges above is guaranteed
933 // to be a legal char.
934 n => char::from_u32(n as u32).unwrap(),
935 };
936
937 scratch.extend_from_slice(c.encode_utf8(&mut [0_u8; 4]).as_bytes());
938 }
939 _ => {
940 return error(read, ErrorCode::InvalidEscape);
941 }
942 }
943
944 Ok(())
945}
946
947/// Parses a JSON escape sequence and discards the value. Assumes the previous
948/// byte read was a backslash.
949fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
950where
951 R: ?Sized + Read<'de>,
952{
953 let ch = tri!(next_or_eof(read));
954
955 match ch {
956 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
957 b'u' => {
958 // At this point we don't care if the codepoint is valid. We just
959 // want to consume it. We don't actually know what is valid or not
960 // at this point, because that depends on if this string will
961 // ultimately be parsed into a string or a byte buffer in the "real"
962 // parse.
963
964 tri!(read.decode_hex_escape());
965 }
966 _ => {
967 return error(read, ErrorCode::InvalidEscape);
968 }
969 }
970
971 Ok(())
972}
973
974static HEX: [u8; 256] = {
975 const __: u8 = 255; // not a hex digit
976 [
977 // 1 2 3 4 5 6 7 8 9 A B C D E F
978 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
979 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
980 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
981 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3
982 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4
983 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
984 __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6
985 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
986 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
987 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
988 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
989 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
990 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
991 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
992 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
993 __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
994 ]
995};
996
997fn decode_hex_val(val: u8) -> Option<u16> {
998 let n = HEX[val as usize] as u16;
999 if n == 255 {
1000 None
1001 } else {
1002 Some(n)
1003 }
1004}
1005