read.rs source code [crates/serde_json/src/read.rs]

1	use crate::error::{Error, ErrorCode, Result};
2	use alloc::vec::Vec;
3	use core::cmp;
4	use core::mem;
5	use core::ops::Deref;
6	use core::str;
7
8	#[cfg(feature = "std")]
9	use crate::io;
10	#[cfg(feature = "std")]
11	use crate::iter::LineColIterator;
12
13	#[cfg(feature = "raw_value")]
14	use crate::raw::BorrowedRawDeserializer;
15	#[cfg(all(feature = "raw_value", feature = "std"))]
16	use crate::raw::OwnedRawDeserializer;
17	#[cfg(all(feature = "raw_value", feature = "std"))]
18	use alloc::string::String;
19	#[cfg(feature = "raw_value")]
20	use serde::de::Visitor;
21
22	/// Trait used by the deserializer for iterating over input. This is manually
23	/// "specialized" for iterating over `&[u8]`. Once feature(specialization) is
24	/// stable we can use actual specialization.
25	///
26	/// This trait is sealed and cannot be implemented for types outside of
27	/// `serde_json`.
28	pub trait Read<'de>: private::Sealed {
29	#[doc(hidden)]
30	fn next(&mut self) -> Result<Option<u8>>;
31	#[doc(hidden)]
32	fn peek(&mut self) -> Result<Option<u8>>;
33
34	/// Only valid after a call to peek(). Discards the peeked byte.
35	#[doc(hidden)]
36	fn discard(&mut self);
37
38	/// Position of the most recent call to next().
39	///
40	/// The most recent call was probably next() and not peek(), but this method
41	/// should try to return a sensible result if the most recent call was
42	/// actually peek() because we don't always know.
43	///
44	/// Only called in case of an error, so performance is not important.
45	#[doc(hidden)]
46	fn position(&self) -> Position;
47
48	/// Position of the most recent call to peek().
49	///
50	/// The most recent call was probably peek() and not next(), but this method
51	/// should try to return a sensible result if the most recent call was
52	/// actually next() because we don't always know.
53	///
54	/// Only called in case of an error, so performance is not important.
55	#[doc(hidden)]
56	fn peek_position(&self) -> Position;
57
58	/// Offset from the beginning of the input to the next byte that would be
59	/// returned by next() or peek().
60	#[doc(hidden)]
61	fn byte_offset(&self) -> usize;
62
63	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64	/// string until the next quotation mark using the given scratch space if
65	/// necessary. The scratch space is initially empty.
66	#[doc(hidden)]
67	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70	/// string until the next quotation mark using the given scratch space if
71	/// necessary. The scratch space is initially empty.
72	///
73	/// This function returns the raw bytes in the string with escape sequences
74	/// expanded but without performing unicode validation.
75	#[doc(hidden)]
76	fn parse_str_raw<'s>(
77	&'s mut self,
78	scratch: &'s mut Vec<u8>,
79	) -> Result<Reference<'de, 's, [u8]>>;
80
81	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82	/// string until the next quotation mark but discards the data.
83	#[doc(hidden)]
84	fn ignore_str(&mut self) -> Result<()>;
85
86	/// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87	/// Parses next hexadecimal sequence.
88	#[doc(hidden)]
89	fn decode_hex_escape(&mut self) -> Result<u16>;
90
91	/// Switch raw buffering mode on.
92	///
93	/// This is used when deserializing `RawValue`.
94	#[cfg(feature = "raw_value")]
95	#[doc(hidden)]
96	fn begin_raw_buffering(&mut self);
97
98	/// Switch raw buffering mode off and provides the raw buffered data to the
99	/// given visitor.
100	#[cfg(feature = "raw_value")]
101	#[doc(hidden)]
102	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103	where
104	V: Visitor<'de>;
105
106	/// Whether StreamDeserializer::next needs to check the failed flag. True
107	/// for IoRead, false for StrRead and SliceRead which can track failure by
108	/// truncating their input slice to avoid the extra check on every next
109	/// call.
110	#[doc(hidden)]
111	const should_early_return_if_failed: bool;
112
113	/// Mark a persistent failure of StreamDeserializer, either by setting the
114	/// flag or by truncating the input data.
115	#[doc(hidden)]
116	fn set_failed(&mut self, failed: &mut bool);
117	}
118
119	pub struct Position {
120	pub line: usize,
121	pub column: usize,
122	}
123
124	pub enum Reference<'b, 'c, T>
125	where
126	T: ?Sized + 'static,
127	{
128	Borrowed(&'b T),
129	Copied(&'c T),
130	}
131
132	impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133	where
134	T: ?Sized + 'static,
135	{
136	type Target = T;
137
138	fn deref(&self) -> &Self::Target {
139	match *self {
140	Reference::Borrowed(b: &T) => b,
141	Reference::Copied(c: &T) => c,
142	}
143	}
144	}
145
146	/// JSON input source that reads from a std::io input stream.
147	#[cfg(feature = "std")]
148	#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149	pub struct IoRead<R>
150	where
151	R: io::Read,
152	{
153	iter: LineColIterator<io::Bytes<R>>,
154	/// Temporary storage of peeked byte.
155	ch: Option<u8>,
156	#[cfg(feature = "raw_value")]
157	raw_buffer: Option<Vec<u8>>,
158	}
159
160	/// JSON input source that reads from a slice of bytes.
161	//
162	// This is more efficient than other iterators because peek() can be read-only
163	// and we can compute line/col position only if an error happens.
164	pub struct SliceRead<'a> {
165	slice: &'a [u8],
166	/// Index of the next* byte that will be returned by next() or peek().*
167	index: usize,
168	#[cfg(feature = "raw_value")]
169	raw_buffering_start_index: usize,
170	}
171
172	/// JSON input source that reads from a UTF-8 string.
173	//
174	// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
175	pub struct StrRead<'a> {
176	delegate: SliceRead<'a>,
177	#[cfg(feature = "raw_value")]
178	data: &'a str,
179	}
180
181	// Prevent users from implementing the Read trait.
182	mod private {
183	pub trait Sealed {}
184	}
185
186	//////////////////////////////////////////////////////////////////////////////
187
188	#[cfg(feature = "std")]
189	impl<R> IoRead<R>
190	where
191	R: io::Read,
192	{
193	/// Create a JSON input source to read from a std::io input stream.
194	pub fn new(reader: R) -> Self {
195	IoRead {
196	iter: LineColIterator::new(iter:reader.bytes()),
197	ch: None,
198	#[cfg(feature = "raw_value")]
199	raw_buffer: None,
200	}
201	}
202	}
203
204	#[cfg(feature = "std")]
205	impl<R> private::Sealed for IoRead<R> where R: io::Read {}
206
207	#[cfg(feature = "std")]
208	impl<R> IoRead<R>
209	where
210	R: io::Read,
211	{
212	fn parse_str_bytes<'s, T, F>(
213	&'s mut self,
214	scratch: &'s mut Vec<u8>,
215	validate: bool,
216	result: F,
217	) -> Result<T>
218	where
219	T: 's,
220	F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
221	{
222	loop {
223	let ch = tri!(next_or_eof(self));
224	if !is_escape(ch, `true`) {
225	scratch.push(ch);
226	continue;
227	}
228	match ch {
229	b'"' => {
230	return result(self, scratch);
231	}
232	b'`\\`' => {
233	tri!(parse_escape(self, validate, scratch));
234	}
235	_ => {
236	if validate {
237	return error(self, ErrorCode::ControlCharacterWhileParsingString);
238	}
239	scratch.push(ch);
240	}
241	}
242	}
243	}
244	}
245
246	#[cfg(feature = "std")]
247	impl<'de, R> Read<'de> for IoRead<R>
248	where
249	R: io::Read,
250	{
251	#[inline]
252	fn next(&mut self) -> Result<Option<u8>> {
253	match self.ch.take() {
254	Some(ch) => {
255	#[cfg(feature = "raw_value")]
256	{
257	if let Some(buf) = &mut self.raw_buffer {
258	buf.push(ch);
259	}
260	}
261	Ok(Some(ch))
262	}
263	None => match self.iter.next() {
264	Some(Err(err)) => Err(Error::io(err)),
265	Some(Ok(ch)) => {
266	#[cfg(feature = "raw_value")]
267	{
268	if let Some(buf) = &mut self.raw_buffer {
269	buf.push(ch);
270	}
271	}
272	Ok(Some(ch))
273	}
274	None => Ok(None),
275	},
276	}
277	}
278
279	#[inline]
280	fn peek(&mut self) -> Result<Option<u8>> {
281	match self.ch {
282	Some(ch) => Ok(Some(ch)),
283	None => match self.iter.next() {
284	Some(Err(err)) => Err(Error::io(err)),
285	Some(Ok(ch)) => {
286	self.ch = Some(ch);
287	Ok(self.ch)
288	}
289	None => Ok(None),
290	},
291	}
292	}
293
294	#[cfg(not(feature = "raw_value"))]
295	#[inline]
296	fn discard(&mut self) {
297	self.ch = None;
298	}
299
300	#[cfg(feature = "raw_value")]
301	fn discard(&mut self) {
302	if let Some(ch) = self.ch.take() {
303	if let Some(buf) = &mut self.raw_buffer {
304	buf.push(ch);
305	}
306	}
307	}
308
309	fn position(&self) -> Position {
310	Position {
311	line: self.iter.line(),
312	column: self.iter.col(),
313	}
314	}
315
316	fn peek_position(&self) -> Position {
317	// The LineColIterator updates its position during peek() so it has the
318	// right one here.
319	self.position()
320	}
321
322	fn byte_offset(&self) -> usize {
323	match self.ch {
324	Some(_) => self.iter.byte_offset() - `1`,
325	None => self.iter.byte_offset(),
326	}
327	}
328
329	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
330	self.parse_str_bytes(scratch, `true`, as_str)
331	.map(Reference::Copied)
332	}
333
334	fn parse_str_raw<'s>(
335	&'s mut self,
336	scratch: &'s mut Vec<u8>,
337	) -> Result<Reference<'de, 's, [u8]>> {
338	self.parse_str_bytes(scratch, `false`, \|_, bytes\| Ok(bytes))
339	.map(Reference::Copied)
340	}
341
342	fn ignore_str(&mut self) -> Result<()> {
343	loop {
344	let ch = tri!(next_or_eof(self));
345	if !is_escape(ch, `true`) {
346	continue;
347	}
348	match ch {
349	b'"' => {
350	return Ok(());
351	}
352	b'`\\`' => {
353	tri!(ignore_escape(self));
354	}
355	_ => {
356	return error(self, ErrorCode::ControlCharacterWhileParsingString);
357	}
358	}
359	}
360	}
361
362	fn decode_hex_escape(&mut self) -> Result<u16> {
363	let a = tri!(next_or_eof(self));
364	let b = tri!(next_or_eof(self));
365	let c = tri!(next_or_eof(self));
366	let d = tri!(next_or_eof(self));
367	match decode_four_hex_digits(a, b, c, d) {
368	Some(val) => Ok(val),
369	None => error(self, ErrorCode::InvalidEscape),
370	}
371	}
372
373	#[cfg(feature = "raw_value")]
374	fn begin_raw_buffering(&mut self) {
375	self.raw_buffer = Some(Vec::new());
376	}
377
378	#[cfg(feature = "raw_value")]
379	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
380	where
381	V: Visitor<'de>,
382	{
383	let raw = self.raw_buffer.take().unwrap();
384	let raw = match String::from_utf8(raw) {
385	Ok(raw) => raw,
386	Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
387	};
388	visitor.visit_map(OwnedRawDeserializer {
389	raw_value: Some(raw),
390	})
391	}
392
393	const should_early_return_if_failed: bool = `true`;
394
395	#[inline]
396	#[cold]
397	fn set_failed(&mut self, failed: &mut bool) {
398	*failed = `true`;
399	}
400	}
401
402	//////////////////////////////////////////////////////////////////////////////
403
404	impl<'a> SliceRead<'a> {
405	/// Create a JSON input source to read from a slice of bytes.
406	pub fn new(slice: &'a [u8]) -> Self {
407	SliceRead {
408	slice,
409	index: `0`,
410	#[cfg(feature = "raw_value")]
411	raw_buffering_start_index: `0`,
412	}
413	}
414
415	fn position_of_index(&self, i: usize) -> Position {
416	let start_of_line = match memchr::memrchr(b'`\n`', &self.slice[..i]) {
417	Some(position) => position + `1`,
418	None => `0`,
419	};
420	Position {
421	line: `1` + memchr::memchr_iter(b'`\n`', &self.slice[..start_of_line]).count(),
422	column: i - start_of_line,
423	}
424	}
425
426	fn skip_to_escape(&mut self, forbid_control_characters: bool) {
427	// Immediately bail-out on empty strings and consecutive escapes (e.g. \u041b\u0435)
428	if self.index == self.slice.len()
429	\|\| is_escape(self.slice[self.index], forbid_control_characters)
430	{
431	return;
432	}
433	self.index += `1`;
434
435	let rest = &self.slice[self.index..];
436
437	if !forbid_control_characters {
438	self.index += memchr::memchr2(b'"', b'`\\`', rest).unwrap_or(rest.len());
439	return;
440	}
441
442	// We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
443	// something akin to memchr3, but the memchr crate does not support this at the moment.
444	// Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
445	// than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
446	// benchmarks and it's cross-platform, so probably the right fit.
447	// [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
448
449	#[cfg(fast_arithmetic = "64")]
450	type Chunk = u64;
451	#[cfg(fast_arithmetic = "32")]
452	type Chunk = u32;
453
454	const STEP: usize = mem::size_of::<Chunk>();
455	const ONE_BYTES: Chunk = Chunk::MAX / `255`; // 0x0101...01
456
457	for chunk in rest.chunks_exact(STEP) {
458	let chars = Chunk::from_le_bytes(chunk.try_into().unwrap());
459	let contains_ctrl = chars.wrapping_sub(ONE_BYTES * `0x20`) & !chars;
460	let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
461	let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
462	let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'`\\`'));
463	let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
464	let masked = (contains_ctrl \| contains_quote \| contains_backslash) & (ONE_BYTES << `7`);
465	if masked != `0` {
466	// SAFETY: chunk is in-bounds for slice
467	self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
468	+ masked.trailing_zeros() as usize / `8`;
469	return;
470	}
471	}
472
473	self.index += rest.len() / STEP * STEP;
474	self.skip_to_escape_slow();
475	}
476
477	#[cold]
478	#[inline(never)]
479	fn skip_to_escape_slow(&mut self) {
480	while self.index < self.slice.len() && !is_escape(self.slice[self.index], `true`) {
481	self.index += `1`;
482	}
483	}
484
485	/// The big optimization here over IoRead is that if the string contains no
486	/// backslash escape sequences, the returned &str is a slice of the raw JSON
487	/// data so we avoid copying into the scratch space.
488	fn parse_str_bytes<'s, T, F>(
489	&'s mut self,
490	scratch: &'s mut Vec<u8>,
491	validate: bool,
492	result: F,
493	) -> Result<Reference<'a, 's, T>>
494	where
495	T: ?Sized + 's,
496	F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
497	{
498	// Index of the first byte not yet copied into the scratch space.
499	let mut start = self.index;
500
501	loop {
502	self.skip_to_escape(validate);
503	if self.index == self.slice.len() {
504	return error(self, ErrorCode::EofWhileParsingString);
505	}
506	match self.slice[self.index] {
507	b'"' => {
508	if scratch.is_empty() {
509	// Fast path: return a slice of the raw JSON without any
510	// copying.
511	let borrowed = &self.slice[start..self.index];
512	self.index += `1`;
513	return result(self, borrowed).map(Reference::Borrowed);
514	} else {
515	scratch.extend_from_slice(&self.slice[start..self.index]);
516	self.index += `1`;
517	return result(self, scratch).map(Reference::Copied);
518	}
519	}
520	b'`\\`' => {
521	scratch.extend_from_slice(&self.slice[start..self.index]);
522	self.index += `1`;
523	tri!(parse_escape(self, validate, scratch));
524	start = self.index;
525	}
526	_ => {
527	self.index += `1`;
528	return error(self, ErrorCode::ControlCharacterWhileParsingString);
529	}
530	}
531	}
532	}
533	}
534
535	impl<'a> private::Sealed for SliceRead<'a> {}
536
537	impl<'a> Read<'a> for SliceRead<'a> {
538	#[inline]
539	fn next(&mut self) -> Result<Option<u8>> {
540	// `Ok(self.slice.get(self.index).map(\|ch\| { self.index += 1; ch }))`*
541	// is about 10% slower.
542	Ok(if self.index < self.slice.len() {
543	let ch = self.slice[self.index];
544	self.index += `1`;
545	Some(ch)
546	} else {
547	None
548	})
549	}
550
551	#[inline]
552	fn peek(&mut self) -> Result<Option<u8>> {
553	// `Ok(self.slice.get(self.index).map(\|ch\| ch))` is about 10% slower*
554	// for some reason.
555	Ok(if self.index < self.slice.len() {
556	Some(self.slice[self.index])
557	} else {
558	None
559	})
560	}
561
562	#[inline]
563	fn discard(&mut self) {
564	self.index += `1`;
565	}
566
567	fn position(&self) -> Position {
568	self.position_of_index(self.index)
569	}
570
571	fn peek_position(&self) -> Position {
572	// Cap it at slice.len() just in case the most recent call was next()
573	// and it returned the last byte.
574	self.position_of_index(cmp::min(self.slice.len(), self.index + `1`))
575	}
576
577	fn byte_offset(&self) -> usize {
578	self.index
579	}
580
581	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
582	self.parse_str_bytes(scratch, `true`, as_str)
583	}
584
585	fn parse_str_raw<'s>(
586	&'s mut self,
587	scratch: &'s mut Vec<u8>,
588	) -> Result<Reference<'a, 's, [u8]>> {
589	self.parse_str_bytes(scratch, `false`, \|_, bytes\| Ok(bytes))
590	}
591
592	fn ignore_str(&mut self) -> Result<()> {
593	loop {
594	self.skip_to_escape(`true`);
595	if self.index == self.slice.len() {
596	return error(self, ErrorCode::EofWhileParsingString);
597	}
598	match self.slice[self.index] {
599	b'"' => {
600	self.index += `1`;
601	return Ok(());
602	}
603	b'`\\`' => {
604	self.index += `1`;
605	tri!(ignore_escape(self));
606	}
607	_ => {
608	return error(self, ErrorCode::ControlCharacterWhileParsingString);
609	}
610	}
611	}
612	}
613
614	#[inline]
615	fn decode_hex_escape(&mut self) -> Result<u16> {
616	match self.slice[self.index..] {
617	[a, b, c, d, ..] => {
618	self.index += `4`;
619	match decode_four_hex_digits(a, b, c, d) {
620	Some(val) => Ok(val),
621	None => error(self, ErrorCode::InvalidEscape),
622	}
623	}
624	_ => {
625	self.index = self.slice.len();
626	error(self, ErrorCode::EofWhileParsingString)
627	}
628	}
629	}
630
631	#[cfg(feature = "raw_value")]
632	fn begin_raw_buffering(&mut self) {
633	self.raw_buffering_start_index = self.index;
634	}
635
636	#[cfg(feature = "raw_value")]
637	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
638	where
639	V: Visitor<'a>,
640	{
641	let raw = &self.slice[self.raw_buffering_start_index..self.index];
642	let raw = match str::from_utf8(raw) {
643	Ok(raw) => raw,
644	Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
645	};
646	visitor.visit_map(BorrowedRawDeserializer {
647	raw_value: Some(raw),
648	})
649	}
650
651	const should_early_return_if_failed: bool = `false`;
652
653	#[inline]
654	#[cold]
655	fn set_failed(&mut self, _failed: &mut bool) {
656	self.slice = &self.slice[..self.index];
657	}
658	}
659
660	//////////////////////////////////////////////////////////////////////////////
661
662	impl<'a> StrRead<'a> {
663	/// Create a JSON input source to read from a UTF-8 string.
664	pub fn new(s: &'a str) -> Self {
665	StrRead {
666	delegate: SliceRead::new(slice:s.as_bytes()),
667	#[cfg(feature = "raw_value")]
668	data: s,
669	}
670	}
671	}
672
673	impl<'a> private::Sealed for StrRead<'a> {}
674
675	impl<'a> Read<'a> for StrRead<'a> {
676	#[inline]
677	fn next(&mut self) -> Result<Option<u8>> {
678	self.delegate.next()
679	}
680
681	#[inline]
682	fn peek(&mut self) -> Result<Option<u8>> {
683	self.delegate.peek()
684	}
685
686	#[inline]
687	fn discard(&mut self) {
688	self.delegate.discard();
689	}
690
691	fn position(&self) -> Position {
692	self.delegate.position()
693	}
694
695	fn peek_position(&self) -> Position {
696	self.delegate.peek_position()
697	}
698
699	fn byte_offset(&self) -> usize {
700	self.delegate.byte_offset()
701	}
702
703	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
704	self.delegate.parse_str_bytes(scratch, `true`, \|_, bytes\| {
705	// The deserialization input came in as &str with a UTF-8 guarantee,
706	// and the \u-escapes are checked along the way, so don't need to
707	// check here.
708	Ok(unsafe { str::from_utf8_unchecked(bytes) })
709	})
710	}
711
712	fn parse_str_raw<'s>(
713	&'s mut self,
714	scratch: &'s mut Vec<u8>,
715	) -> Result<Reference<'a, 's, [u8]>> {
716	self.delegate.parse_str_raw(scratch)
717	}
718
719	fn ignore_str(&mut self) -> Result<()> {
720	self.delegate.ignore_str()
721	}
722
723	fn decode_hex_escape(&mut self) -> Result<u16> {
724	self.delegate.decode_hex_escape()
725	}
726
727	#[cfg(feature = "raw_value")]
728	fn begin_raw_buffering(&mut self) {
729	self.delegate.begin_raw_buffering();
730	}
731
732	#[cfg(feature = "raw_value")]
733	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
734	where
735	V: Visitor<'a>,
736	{
737	let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
738	visitor.visit_map(BorrowedRawDeserializer {
739	raw_value: Some(raw),
740	})
741	}
742
743	const should_early_return_if_failed: bool = `false`;
744
745	#[inline]
746	#[cold]
747	fn set_failed(&mut self, failed: &mut bool) {
748	self.delegate.set_failed(failed);
749	}
750	}
751
752	//////////////////////////////////////////////////////////////////////////////
753
754	impl<'de, R> private::Sealed for &mut R where R: Read<'de> {}
755
756	impl<'de, R> Read<'de> for &mut R
757	where
758	R: Read<'de>,
759	{
760	fn next(&mut self) -> Result<Option<u8>> {
761	R::next(self)
762	}
763
764	fn peek(&mut self) -> Result<Option<u8>> {
765	R::peek(self)
766	}
767
768	fn discard(&mut self) {
769	R::discard(self);
770	}
771
772	fn position(&self) -> Position {
773	R::position(self)
774	}
775
776	fn peek_position(&self) -> Position {
777	R::peek_position(self)
778	}
779
780	fn byte_offset(&self) -> usize {
781	R::byte_offset(self)
782	}
783
784	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
785	R::parse_str(self, scratch)
786	}
787
788	fn parse_str_raw<'s>(
789	&'s mut self,
790	scratch: &'s mut Vec<u8>,
791	) -> Result<Reference<'de, 's, [u8]>> {
792	R::parse_str_raw(self, scratch)
793	}
794
795	fn ignore_str(&mut self) -> Result<()> {
796	R::ignore_str(self)
797	}
798
799	fn decode_hex_escape(&mut self) -> Result<u16> {
800	R::decode_hex_escape(self)
801	}
802
803	#[cfg(feature = "raw_value")]
804	fn begin_raw_buffering(&mut self) {
805	R::begin_raw_buffering(self);
806	}
807
808	#[cfg(feature = "raw_value")]
809	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
810	where
811	V: Visitor<'de>,
812	{
813	R::end_raw_buffering(self, visitor)
814	}
815
816	const should_early_return_if_failed: bool = R::should_early_return_if_failed;
817
818	fn set_failed(&mut self, failed: &mut bool) {
819	R::set_failed(self, failed);
820	}
821	}
822
823	//////////////////////////////////////////////////////////////////////////////
824
825	/// Marker for whether StreamDeserializer can implement FusedIterator.
826	pub trait Fused: private::Sealed {}
827	impl<'a> Fused for SliceRead<'a> {}
828	impl<'a> Fused for StrRead<'a> {}
829
830	fn is_escape(ch: u8, including_control_characters: bool) -> bool {
831	ch == b'"' \|\| ch == b'`\\`' \|\| (including_control_characters && ch < `0x20`)
832	}
833
834	fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
835	where
836	R: ?Sized + Read<'de>,
837	{
838	match tri!(read.next()) {
839	Some(b: u8) => Ok(b),
840	None => error(read, reason:ErrorCode::EofWhileParsingString),
841	}
842	}
843
844	fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
845	where
846	R: ?Sized + Read<'de>,
847	{
848	match tri!(read.peek()) {
849	Some(b: u8) => Ok(b),
850	None => error(read, reason:ErrorCode::EofWhileParsingString),
851	}
852	}
853
854	fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
855	where
856	R: ?Sized + Read<'de>,
857	{
858	let position: Position = read.position();
859	Err(Error::syntax(code:reason, position.line, position.column))
860	}
861
862	fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
863	str::from_utf8(slice).or_else(\|_\| error(read, reason:ErrorCode::InvalidUnicodeCodePoint))
864	}
865
866	/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
867	/// the previous byte read was a backslash.
868	fn parse_escape<'de, R: Read<'de>>(
869	read: &mut R,
870	validate: bool,
871	scratch: &mut Vec<u8>,
872	) -> Result<()> {
873	let ch: u8 = tri!(next_or_eof(read));
874
875	match ch {
876	b'"' => scratch.push(b'"'),
877	b'`\\`' => scratch.push(b'`\\`'),
878	b'/' => scratch.push(b'/'),
879	b'b' => scratch.push(b'`\x08`'),
880	b'f' => scratch.push(b'`\x0c`'),
881	b'n' => scratch.push(b'`\n`'),
882	b'r' => scratch.push(b'`\r`'),
883	b't' => scratch.push(b'`\t`'),
884	b'u' => return parse_unicode_escape(read, validate, scratch),
885	_ => return error(read, reason:ErrorCode::InvalidEscape),
886	}
887
888	Ok(())
889	}
890
891	/// Parses a JSON \u escape and appends it into the scratch space. Assumes `\u`
892	/// has just been read.
893	#[cold]
894	fn parse_unicode_escape<'de, R: Read<'de>>(
895	read: &mut R,
896	validate: bool,
897	scratch: &mut Vec<u8>,
898	) -> Result<()> {
899	let mut n = tri!(read.decode_hex_escape());
900
901	// Non-BMP characters are encoded as a sequence of two hex escapes,
902	// representing UTF-16 surrogates. If deserializing a utf-8 string the
903	// surrogates are required to be paired, whereas deserializing a byte string
904	// accepts lone surrogates.
905	if validate && n >= `0xDC00` && n <= `0xDFFF` {
906	// XXX: This is actually a trailing surrogate.
907	return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
908	}
909
910	loop {
911	if n < `0xD800` \|\| n > `0xDBFF` {
912	// Every u16 outside of the surrogate ranges is guaranteed to be a
913	// legal char.
914	push_wtf8_codepoint(n as u32, scratch);
915	return Ok(());
916	}
917
918	// n is a leading surrogate, we now expect a trailing surrogate.
919	let n1 = n;
920
921	if tri!(peek_or_eof(read)) == b'`\\`' {
922	read.discard();
923	} else {
924	return if validate {
925	read.discard();
926	error(read, ErrorCode::UnexpectedEndOfHexEscape)
927	} else {
928	push_wtf8_codepoint(n1 as u32, scratch);
929	Ok(())
930	};
931	}
932
933	if tri!(peek_or_eof(read)) == b'u' {
934	read.discard();
935	} else {
936	return if validate {
937	read.discard();
938	error(read, ErrorCode::UnexpectedEndOfHexEscape)
939	} else {
940	push_wtf8_codepoint(n1 as u32, scratch);
941	// The \ prior to this byte started an escape sequence, so we
942	// need to parse that now. This recursive call does not blow the
943	// stack on malicious input because the escape is not \u, so it
944	// will be handled by one of the easy nonrecursive cases.
945	parse_escape(read, validate, scratch)
946	};
947	}
948
949	let n2 = tri!(read.decode_hex_escape());
950
951	if n2 < `0xDC00` \|\| n2 > `0xDFFF` {
952	if validate {
953	return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
954	}
955	push_wtf8_codepoint(n1 as u32, scratch);
956	// If n2 is a leading surrogate, we need to restart.
957	n = n2;
958	continue;
959	}
960
961	// This value is in range U+10000..=U+10FFFF, which is always a valid
962	// codepoint.
963	let n = ((((n1 - `0xD800`) as u32) << `10`) \| (n2 - `0xDC00`) as u32) + `0x1_0000`;
964	push_wtf8_codepoint(n, scratch);
965	return Ok(());
966	}
967	}
968
969	/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
970	/// implementation of String::push. The codepoint may be a surrogate.
971	#[inline]
972	fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
973	if n < `0x80` {
974	scratch.push(n as u8);
975	return;
976	}
977
978	scratch.reserve(`4`);
979
980	// SAFETY: After the `reserve` call, `scratch` has at least 4 bytes of
981	// allocated but unintialized memory after its last initialized byte, which
982	// is where `ptr` points. All reachable match arms write `encoded_len` bytes
983	// to that region and update the length accordingly, and `encoded_len` is
984	// always <= 4.
985	unsafe {
986	let ptr = scratch.as_mut_ptr().add(scratch.len());
987
988	let encoded_len = match n {
989	`0`..=`0x7F` => unreachable!(),
990	`0x80`..=`0x7FF` => {
991	ptr.write(((n >> `6`) & `0b0001_1111`) as u8 \| `0b1100_0000`);
992	`2`
993	}
994	`0x800`..=`0xFFFF` => {
995	ptr.write(((n >> `12`) & `0b0000_1111`) as u8 \| `0b1110_0000`);
996	ptr.add(`1`)
997	.write(((n >> `6`) & `0b0011_1111`) as u8 \| `0b1000_0000`);
998	`3`
999	}
1000	`0x1_0000`..=`0x10_FFFF` => {
1001	ptr.write(((n >> `18`) & `0b0000_0111`) as u8 \| `0b1111_0000`);
1002	ptr.add(`1`)
1003	.write(((n >> `12`) & `0b0011_1111`) as u8 \| `0b1000_0000`);
1004	ptr.add(`2`)
1005	.write(((n >> `6`) & `0b0011_1111`) as u8 \| `0b1000_0000`);
1006	`4`
1007	}
1008	`0x11_0000`.. => unreachable!(),
1009	};
1010	ptr.add(encoded_len - `1`)
1011	.write((n & `0b0011_1111`) as u8 \| `0b1000_0000`);
1012
1013	scratch.set_len(scratch.len() + encoded_len);
1014	}
1015	}
1016
1017	/// Parses a JSON escape sequence and discards the value. Assumes the previous
1018	/// byte read was a backslash.
1019	fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1020	where
1021	R: ?Sized + Read<'de>,
1022	{
1023	let ch: u8 = tri!(next_or_eof(read));
1024
1025	match ch {
1026	b'"' \| b'`\\`' \| b'/' \| b'b' \| b'f' \| b'n' \| b'r' \| b't' => {}
1027	b'u' => {
1028	// At this point we don't care if the codepoint is valid. We just
1029	// want to consume it. We don't actually know what is valid or not
1030	// at this point, because that depends on if this string will
1031	// ultimately be parsed into a string or a byte buffer in the "real"
1032	// parse.
1033
1034	tri!(read.decode_hex_escape());
1035	}
1036	_ => {
1037	return error(read, reason:ErrorCode::InvalidEscape);
1038	}
1039	}
1040
1041	Ok(())
1042	}
1043
1044	const fn decode_hex_val_slow(val: u8) -> Option<u8> {
1045	match val {
1046	b'0'..=b'9' => Some(val - b'0'),
1047	b'A'..=b'F' => Some(val - b'A' + `10`),
1048	b'a'..=b'f' => Some(val - b'a' + `10`),
1049	_ => None,
1050	}
1051	}
1052
1053	const fn build_hex_table(shift: usize) -> [i16; `256`] {
1054	let mut table: [i16; 256] = [`0`; `256`];
1055	let mut ch: usize = `0`;
1056	while ch < `256` {
1057	table[ch] = match decode_hex_val_slow(val:ch as u8) {
1058	Some(val: u8) => (val as i16) << shift,
1059	None => `-1`,
1060	};
1061	ch += `1`;
1062	}
1063	table
1064	}
1065
1066	static HEX0: [i16; `256`] = build_hex_table(shift:`0`);
1067	static HEX1: [i16; `256`] = build_hex_table(shift:`4`);
1068
1069	fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1070	let a: i32 = HEX1[a as usize] as i32;
1071	let b: i32 = HEX0[b as usize] as i32;
1072	let c: i32 = HEX1[c as usize] as i32;
1073	let d: i32 = HEX0[d as usize] as i32;
1074
1075	let codepoint: i32 = ((a \| b) << `8`) \| c \| d;
1076
1077	// A single sign bit check.
1078	if codepoint >= `0` {
1079	Some(codepoint as u16)
1080	} else {
1081	None
1082	}
1083	}
1084