read.rs - Codebrowser

1	use crate::error::{Error, ErrorCode, Result};
2	use alloc::vec::Vec;
3	use core::char;
4	use core::cmp;
5	use core::ops::Deref;
6	use core::str;
7
8	#[cfg(feature = "std")]
9	use crate::io;
10	#[cfg(feature = "std")]
11	use crate::iter::LineColIterator;
12
13	#[cfg(feature = "raw_value")]
14	use crate::raw::BorrowedRawDeserializer;
15	#[cfg(all(feature = "raw_value", feature = "std"))]
16	use crate::raw::OwnedRawDeserializer;
17	#[cfg(feature = "raw_value")]
18	use serde::de::Visitor;
19
20	/// Trait used by the deserializer for iterating over input. This is manually
21	/// "specialized" for iterating over &[u8]. Once feature(specialization) is
22	/// stable we can use actual specialization.
23	///
24	/// This trait is sealed and cannot be implemented for types outside of
25	/// `serde_json`.
26	pub trait Read<'de>: private::Sealed {
27	#[doc(hidden)]
28	fn next(&mut self) -> Result<Option<u8>>;
29	#[doc(hidden)]
30	fn peek(&mut self) -> Result<Option<u8>>;
31
32	/// Only valid after a call to peek(). Discards the peeked byte.
33	#[doc(hidden)]
34	fn discard(&mut self);
35
36	/// Position of the most recent call to next().
37	///
38	/// The most recent call was probably next() and not peek(), but this method
39	/// should try to return a sensible result if the most recent call was
40	/// actually peek() because we don't always know.
41	///
42	/// Only called in case of an error, so performance is not important.
43	#[doc(hidden)]
44	fn position(&self) -> Position;
45
46	/// Position of the most recent call to peek().
47	///
48	/// The most recent call was probably peek() and not next(), but this method
49	/// should try to return a sensible result if the most recent call was
50	/// actually next() because we don't always know.
51	///
52	/// Only called in case of an error, so performance is not important.
53	#[doc(hidden)]
54	fn peek_position(&self) -> Position;
55
56	/// Offset from the beginning of the input to the next byte that would be
57	/// returned by next() or peek().
58	#[doc(hidden)]
59	fn byte_offset(&self) -> usize;
60
61	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
62	/// string until the next quotation mark using the given scratch space if
63	/// necessary. The scratch space is initially empty.
64	#[doc(hidden)]
65	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
66
67	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
68	/// string until the next quotation mark using the given scratch space if
69	/// necessary. The scratch space is initially empty.
70	///
71	/// This function returns the raw bytes in the string with escape sequences
72	/// expanded but without performing unicode validation.
73	#[doc(hidden)]
74	fn parse_str_raw<'s>(
75	&'s mut self,
76	scratch: &'s mut Vec<u8>,
77	) -> Result<Reference<'de, 's, [u8]>>;
78
79	/// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
80	/// string until the next quotation mark but discards the data.
81	#[doc(hidden)]
82	fn ignore_str(&mut self) -> Result<()>;
83
84	/// Assumes the previous byte was a hex escape sequence ('\u') in a string.
85	/// Parses next hexadecimal sequence.
86	#[doc(hidden)]
87	fn decode_hex_escape(&mut self) -> Result<u16>;
88
89	/// Switch raw buffering mode on.
90	///
91	/// This is used when deserializing `RawValue`.
92	#[cfg(feature = "raw_value")]
93	#[doc(hidden)]
94	fn begin_raw_buffering(&mut self);
95
96	/// Switch raw buffering mode off and provides the raw buffered data to the
97	/// given visitor.
98	#[cfg(feature = "raw_value")]
99	#[doc(hidden)]
100	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
101	where
102	V: Visitor<'de>;
103
104	/// Whether StreamDeserializer::next needs to check the failed flag. True
105	/// for IoRead, false for StrRead and SliceRead which can track failure by
106	/// truncating their input slice to avoid the extra check on every next
107	/// call.
108	#[doc(hidden)]
109	const should_early_return_if_failed: bool;
110
111	/// Mark a persistent failure of StreamDeserializer, either by setting the
112	/// flag or by truncating the input data.
113	#[doc(hidden)]
114	fn set_failed(&mut self, failed: &mut bool);
115	}
116
117	pub struct Position {
118	pub line: usize,
119	pub column: usize,
120	}
121
122	pub enum Reference<'b, 'c, T>
123	where
124	T: ?Sized + 'static,
125	{
126	Borrowed(&'b T),
127	Copied(&'c T),
128	}
129
130	impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
131	where
132	T: ?Sized + 'static,
133	{
134	type Target = T;
135
136	fn deref(&self) -> &Self::Target {
137	match *self {
138	Reference::Borrowed(b) => b,
139	Reference::Copied(c) => c,
140	}
141	}
142	}
143
144	/// JSON input source that reads from a std::io input stream.
145	#[cfg(feature = "std")]
146	#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
147	pub struct IoRead<R>
148	where
149	R: io::Read,
150	{
151	iter: LineColIterator<io::Bytes<R>>,
152	/// Temporary storage of peeked byte.
153	ch: Option<u8>,
154	#[cfg(feature = "raw_value")]
155	raw_buffer: Option<Vec<u8>>,
156	}
157
158	/// JSON input source that reads from a slice of bytes.
159	//
160	// This is more efficient than other iterators because peek() can be read-only
161	// and we can compute line/col position only if an error happens.
162	pub struct SliceRead<'a> {
163	slice: &'a [u8],
164	/// Index of the next* byte that will be returned by next() or peek().*
165	index: usize,
166	#[cfg(feature = "raw_value")]
167	raw_buffering_start_index: usize,
168	}
169
170	/// JSON input source that reads from a UTF-8 string.
171	//
172	// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
173	pub struct StrRead<'a> {
174	delegate: SliceRead<'a>,
175	#[cfg(feature = "raw_value")]
176	data: &'a str,
177	}
178
179	// Prevent users from implementing the Read trait.
180	mod private {
181	pub trait Sealed {}
182	}
183
184	//////////////////////////////////////////////////////////////////////////////
185
186	#[cfg(feature = "std")]
187	impl<R> IoRead<R>
188	where
189	R: io::Read,
190	{
191	/// Create a JSON input source to read from a std::io input stream.
192	pub fn new(reader: R) -> Self {
193	IoRead {
194	iter: LineColIterator::new(reader.bytes()),
195	ch: None,
196	#[cfg(feature = "raw_value")]
197	raw_buffer: None,
198	}
199	}
200	}
201
202	#[cfg(feature = "std")]
203	impl<R> private::Sealed for IoRead<R> where R: io::Read {}
204
205	#[cfg(feature = "std")]
206	impl<R> IoRead<R>
207	where
208	R: io::Read,
209	{
210	fn parse_str_bytes<'s, T, F>(
211	&'s mut self,
212	scratch: &'s mut Vec<u8>,
213	validate: bool,
214	result: F,
215	) -> Result<T>
216	where
217	T: 's,
218	F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
219	{
220	loop {
221	let ch = tri!(next_or_eof(self));
222	if !ESCAPE[ch as usize] {
223	scratch.push(ch);
224	continue;
225	}
226	match ch {
227	b'"' => {
228	return result(self, scratch);
229	}
230	b'`\\`' => {
231	tri!(parse_escape(self, validate, scratch));
232	}
233	_ => {
234	if validate {
235	return error(self, ErrorCode::ControlCharacterWhileParsingString);
236	}
237	scratch.push(ch);
238	}
239	}
240	}
241	}
242	}
243
244	#[cfg(feature = "std")]
245	impl<'de, R> Read<'de> for IoRead<R>
246	where
247	R: io::Read,
248	{
249	#[inline]
250	fn next(&mut self) -> Result<Option<u8>> {
251	match self.ch.take() {
252	Some(ch) => {
253	#[cfg(feature = "raw_value")]
254	{
255	if let Some(buf) = &mut self.raw_buffer {
256	buf.push(ch);
257	}
258	}
259	Ok(Some(ch))
260	}
261	None => match self.iter.next() {
262	Some(Err(err)) => Err(Error::io(err)),
263	Some(Ok(ch)) => {
264	#[cfg(feature = "raw_value")]
265	{
266	if let Some(buf) = &mut self.raw_buffer {
267	buf.push(ch);
268	}
269	}
270	Ok(Some(ch))
271	}
272	None => Ok(None),
273	},
274	}
275	}
276
277	#[inline]
278	fn peek(&mut self) -> Result<Option<u8>> {
279	match self.ch {
280	Some(ch) => Ok(Some(ch)),
281	None => match self.iter.next() {
282	Some(Err(err)) => Err(Error::io(err)),
283	Some(Ok(ch)) => {
284	self.ch = Some(ch);
285	Ok(self.ch)
286	}
287	None => Ok(None),
288	},
289	}
290	}
291
292	#[cfg(not(feature = "raw_value"))]
293	#[inline]
294	fn discard(&mut self) {
295	self.ch = None;
296	}
297
298	#[cfg(feature = "raw_value")]
299	fn discard(&mut self) {
300	if let Some(ch) = self.ch.take() {
301	if let Some(buf) = &mut self.raw_buffer {
302	buf.push(ch);
303	}
304	}
305	}
306
307	fn position(&self) -> Position {
308	Position {
309	line: self.iter.line(),
310	column: self.iter.col(),
311	}
312	}
313
314	fn peek_position(&self) -> Position {
315	// The LineColIterator updates its position during peek() so it has the
316	// right one here.
317	self.position()
318	}
319
320	fn byte_offset(&self) -> usize {
321	match self.ch {
322	Some(_) => self.iter.byte_offset() - `1`,
323	None => self.iter.byte_offset(),
324	}
325	}
326
327	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
328	self.parse_str_bytes(scratch, `true`, as_str)
329	.map(Reference::Copied)
330	}
331
332	fn parse_str_raw<'s>(
333	&'s mut self,
334	scratch: &'s mut Vec<u8>,
335	) -> Result<Reference<'de, 's, [u8]>> {
336	self.parse_str_bytes(scratch, `false`, \|_, bytes\| Ok(bytes))
337	.map(Reference::Copied)
338	}
339
340	fn ignore_str(&mut self) -> Result<()> {
341	loop {
342	let ch = tri!(next_or_eof(self));
343	if !ESCAPE[ch as usize] {
344	continue;
345	}
346	match ch {
347	b'"' => {
348	return Ok(());
349	}
350	b'`\\`' => {
351	tri!(ignore_escape(self));
352	}
353	_ => {
354	return error(self, ErrorCode::ControlCharacterWhileParsingString);
355	}
356	}
357	}
358	}
359
360	fn decode_hex_escape(&mut self) -> Result<u16> {
361	let mut n = `0`;
362	for _ in `0`..`4` {
363	match decode_hex_val(tri!(next_or_eof(self))) {
364	None => return error(self, ErrorCode::InvalidEscape),
365	Some(val) => {
366	n = (n << `4`) + val;
367	}
368	}
369	}
370	Ok(n)
371	}
372
373	#[cfg(feature = "raw_value")]
374	fn begin_raw_buffering(&mut self) {
375	self.raw_buffer = Some(Vec::new());
376	}
377
378	#[cfg(feature = "raw_value")]
379	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
380	where
381	V: Visitor<'de>,
382	{
383	let raw = self.raw_buffer.take().unwrap();
384	let raw = match String::from_utf8(raw) {
385	Ok(raw) => raw,
386	Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
387	};
388	visitor.visit_map(OwnedRawDeserializer {
389	raw_value: Some(raw),
390	})
391	}
392
393	const should_early_return_if_failed: bool = `true`;
394
395	#[inline]
396	#[cold]
397	fn set_failed(&mut self, failed: &mut bool) {
398	*failed = `true`;
399	}
400	}
401
402	//////////////////////////////////////////////////////////////////////////////
403
404	impl<'a> SliceRead<'a> {
405	/// Create a JSON input source to read from a slice of bytes.
406	pub fn new(slice: &'a [u8]) -> Self {
407	SliceRead {
408	slice,
409	index: `0`,
410	#[cfg(feature = "raw_value")]
411	raw_buffering_start_index: `0`,
412	}
413	}
414
415	fn position_of_index(&self, i: usize) -> Position {
416	let mut position = Position { line: `1`, column: `0` };
417	for ch in &self.slice[..i] {
418	match *ch {
419	b'`\n`' => {
420	position.line += `1`;
421	position.column = `0`;
422	}
423	_ => {
424	position.column += `1`;
425	}
426	}
427	}
428	position
429	}
430
431	/// The big optimization here over IoRead is that if the string contains no
432	/// backslash escape sequences, the returned &str is a slice of the raw JSON
433	/// data so we avoid copying into the scratch space.
434	fn parse_str_bytes<'s, T, F>(
435	&'s mut self,
436	scratch: &'s mut Vec<u8>,
437	validate: bool,
438	result: F,
439	) -> Result<Reference<'a, 's, T>>
440	where
441	T: ?Sized + 's,
442	F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
443	{
444	// Index of the first byte not yet copied into the scratch space.
445	let mut start = self.index;
446
447	loop {
448	while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
449	self.index += `1`;
450	}
451	if self.index == self.slice.len() {
452	return error(self, ErrorCode::EofWhileParsingString);
453	}
454	match self.slice[self.index] {
455	b'"' => {
456	if scratch.is_empty() {
457	// Fast path: return a slice of the raw JSON without any
458	// copying.
459	let borrowed = &self.slice[start..self.index];
460	self.index += `1`;
461	return result(self, borrowed).map(Reference::Borrowed);
462	} else {
463	scratch.extend_from_slice(&self.slice[start..self.index]);
464	self.index += `1`;
465	return result(self, scratch).map(Reference::Copied);
466	}
467	}
468	b'`\\`' => {
469	scratch.extend_from_slice(&self.slice[start..self.index]);
470	self.index += `1`;
471	tri!(parse_escape(self, validate, scratch));
472	start = self.index;
473	}
474	_ => {
475	self.index += `1`;
476	if validate {
477	return error(self, ErrorCode::ControlCharacterWhileParsingString);
478	}
479	}
480	}
481	}
482	}
483	}
484
485	impl<'a> private::Sealed for SliceRead<'a> {}
486
487	impl<'a> Read<'a> for SliceRead<'a> {
488	#[inline]
489	fn next(&mut self) -> Result<Option<u8>> {
490	// `Ok(self.slice.get(self.index).map(\|ch\| { self.index += 1; ch }))`*
491	// is about 10% slower.
492	Ok(if self.index < self.slice.len() {
493	let ch = self.slice[self.index];
494	self.index += `1`;
495	Some(ch)
496	} else {
497	None
498	})
499	}
500
501	#[inline]
502	fn peek(&mut self) -> Result<Option<u8>> {
503	// `Ok(self.slice.get(self.index).map(\|ch\| ch))` is about 10% slower*
504	// for some reason.
505	Ok(if self.index < self.slice.len() {
506	Some(self.slice[self.index])
507	} else {
508	None
509	})
510	}
511
512	#[inline]
513	fn discard(&mut self) {
514	self.index += `1`;
515	}
516
517	fn position(&self) -> Position {
518	self.position_of_index(self.index)
519	}
520
521	fn peek_position(&self) -> Position {
522	// Cap it at slice.len() just in case the most recent call was next()
523	// and it returned the last byte.
524	self.position_of_index(cmp::min(self.slice.len(), self.index + `1`))
525	}
526
527	fn byte_offset(&self) -> usize {
528	self.index
529	}
530
531	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
532	self.parse_str_bytes(scratch, `true`, as_str)
533	}
534
535	fn parse_str_raw<'s>(
536	&'s mut self,
537	scratch: &'s mut Vec<u8>,
538	) -> Result<Reference<'a, 's, [u8]>> {
539	self.parse_str_bytes(scratch, `false`, \|_, bytes\| Ok(bytes))
540	}
541
542	fn ignore_str(&mut self) -> Result<()> {
543	loop {
544	while self.index < self.slice.len() && !ESCAPE[self.slice[self.index] as usize] {
545	self.index += `1`;
546	}
547	if self.index == self.slice.len() {
548	return error(self, ErrorCode::EofWhileParsingString);
549	}
550	match self.slice[self.index] {
551	b'"' => {
552	self.index += `1`;
553	return Ok(());
554	}
555	b'`\\`' => {
556	self.index += `1`;
557	tri!(ignore_escape(self));
558	}
559	_ => {
560	return error(self, ErrorCode::ControlCharacterWhileParsingString);
561	}
562	}
563	}
564	}
565
566	fn decode_hex_escape(&mut self) -> Result<u16> {
567	if self.index + `4` > self.slice.len() {
568	self.index = self.slice.len();
569	return error(self, ErrorCode::EofWhileParsingString);
570	}
571
572	let mut n = `0`;
573	for _ in `0`..`4` {
574	let ch = decode_hex_val(self.slice[self.index]);
575	self.index += `1`;
576	match ch {
577	None => return error(self, ErrorCode::InvalidEscape),
578	Some(val) => {
579	n = (n << `4`) + val;
580	}
581	}
582	}
583	Ok(n)
584	}
585
586	#[cfg(feature = "raw_value")]
587	fn begin_raw_buffering(&mut self) {
588	self.raw_buffering_start_index = self.index;
589	}
590
591	#[cfg(feature = "raw_value")]
592	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
593	where
594	V: Visitor<'a>,
595	{
596	let raw = &self.slice[self.raw_buffering_start_index..self.index];
597	let raw = match str::from_utf8(raw) {
598	Ok(raw) => raw,
599	Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
600	};
601	visitor.visit_map(BorrowedRawDeserializer {
602	raw_value: Some(raw),
603	})
604	}
605
606	const should_early_return_if_failed: bool = `false`;
607
608	#[inline]
609	#[cold]
610	fn set_failed(&mut self, _failed: &mut bool) {
611	self.slice = &self.slice[..self.index];
612	}
613	}
614
615	//////////////////////////////////////////////////////////////////////////////
616
617	impl<'a> StrRead<'a> {
618	/// Create a JSON input source to read from a UTF-8 string.
619	pub fn new(s: &'a str) -> Self {
620	StrRead {
621	delegate: SliceRead::new(s.as_bytes()),
622	#[cfg(feature = "raw_value")]
623	data: s,
624	}
625	}
626	}
627
628	impl<'a> private::Sealed for StrRead<'a> {}
629
630	impl<'a> Read<'a> for StrRead<'a> {
631	#[inline]
632	fn next(&mut self) -> Result<Option<u8>> {
633	self.delegate.next()
634	}
635
636	#[inline]
637	fn peek(&mut self) -> Result<Option<u8>> {
638	self.delegate.peek()
639	}
640
641	#[inline]
642	fn discard(&mut self) {
643	self.delegate.discard();
644	}
645
646	fn position(&self) -> Position {
647	self.delegate.position()
648	}
649
650	fn peek_position(&self) -> Position {
651	self.delegate.peek_position()
652	}
653
654	fn byte_offset(&self) -> usize {
655	self.delegate.byte_offset()
656	}
657
658	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
659	self.delegate.parse_str_bytes(scratch, `true`, \|_, bytes\| {
660	// The deserialization input came in as &str with a UTF-8 guarantee,
661	// and the \u-escapes are checked along the way, so don't need to
662	// check here.
663	Ok(unsafe { str::from_utf8_unchecked(bytes) })
664	})
665	}
666
667	fn parse_str_raw<'s>(
668	&'s mut self,
669	scratch: &'s mut Vec<u8>,
670	) -> Result<Reference<'a, 's, [u8]>> {
671	self.delegate.parse_str_raw(scratch)
672	}
673
674	fn ignore_str(&mut self) -> Result<()> {
675	self.delegate.ignore_str()
676	}
677
678	fn decode_hex_escape(&mut self) -> Result<u16> {
679	self.delegate.decode_hex_escape()
680	}
681
682	#[cfg(feature = "raw_value")]
683	fn begin_raw_buffering(&mut self) {
684	self.delegate.begin_raw_buffering();
685	}
686
687	#[cfg(feature = "raw_value")]
688	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
689	where
690	V: Visitor<'a>,
691	{
692	let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
693	visitor.visit_map(BorrowedRawDeserializer {
694	raw_value: Some(raw),
695	})
696	}
697
698	const should_early_return_if_failed: bool = `false`;
699
700	#[inline]
701	#[cold]
702	fn set_failed(&mut self, failed: &mut bool) {
703	self.delegate.set_failed(failed);
704	}
705	}
706
707	//////////////////////////////////////////////////////////////////////////////
708
709	impl<'a, 'de, R> private::Sealed for &'a mut R where R: Read<'de> {}
710
711	impl<'a, 'de, R> Read<'de> for &'a mut R
712	where
713	R: Read<'de>,
714	{
715	fn next(&mut self) -> Result<Option<u8>> {
716	R::next(self)
717	}
718
719	fn peek(&mut self) -> Result<Option<u8>> {
720	R::peek(self)
721	}
722
723	fn discard(&mut self) {
724	R::discard(self);
725	}
726
727	fn position(&self) -> Position {
728	R::position(self)
729	}
730
731	fn peek_position(&self) -> Position {
732	R::peek_position(self)
733	}
734
735	fn byte_offset(&self) -> usize {
736	R::byte_offset(self)
737	}
738
739	fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
740	R::parse_str(self, scratch)
741	}
742
743	fn parse_str_raw<'s>(
744	&'s mut self,
745	scratch: &'s mut Vec<u8>,
746	) -> Result<Reference<'de, 's, [u8]>> {
747	R::parse_str_raw(self, scratch)
748	}
749
750	fn ignore_str(&mut self) -> Result<()> {
751	R::ignore_str(self)
752	}
753
754	fn decode_hex_escape(&mut self) -> Result<u16> {
755	R::decode_hex_escape(self)
756	}
757
758	#[cfg(feature = "raw_value")]
759	fn begin_raw_buffering(&mut self) {
760	R::begin_raw_buffering(self);
761	}
762
763	#[cfg(feature = "raw_value")]
764	fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
765	where
766	V: Visitor<'de>,
767	{
768	R::end_raw_buffering(self, visitor)
769	}
770
771	const should_early_return_if_failed: bool = R::should_early_return_if_failed;
772
773	fn set_failed(&mut self, failed: &mut bool) {
774	R::set_failed(self, failed);
775	}
776	}
777
778	//////////////////////////////////////////////////////////////////////////////
779
780	/// Marker for whether StreamDeserializer can implement FusedIterator.
781	pub trait Fused: private::Sealed {}
782	impl<'a> Fused for SliceRead<'a> {}
783	impl<'a> Fused for StrRead<'a> {}
784
785	// Lookup table of bytes that must be escaped. A value of true at index i means
786	// that byte i requires an escape sequence in the input.
787	static ESCAPE: [bool; `256`] = {
788	const CT: bool = `true`; // control character \x00..=\x1F
789	const QU: bool = `true`; // quote \x22
790	const BS: bool = `true`; // backslash \x5C
791	const __: bool = `false`; // allow unescaped
792	[
793	// 1 2 3 4 5 6 7 8 9 A B C D E F
794	CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 0
795	CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, CT, // 1
796	__, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
797	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
798	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
799	__, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
800	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
801	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
802	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
803	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
804	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
805	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
806	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
807	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
808	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
809	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
810	]
811	};
812
813	fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
814	where
815	R: ?Sized + Read<'de>,
816	{
817	match tri!(read.next()) {
818	Some(b) => Ok(b),
819	None => error(read, ErrorCode::EofWhileParsingString),
820	}
821	}
822
823	fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
824	where
825	R: ?Sized + Read<'de>,
826	{
827	match tri!(read.peek()) {
828	Some(b) => Ok(b),
829	None => error(read, ErrorCode::EofWhileParsingString),
830	}
831	}
832
833	fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
834	where
835	R: ?Sized + Read<'de>,
836	{
837	let position = read.position();
838	Err(Error::syntax(reason, position.line, position.column))
839	}
840
841	fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
842	str::from_utf8(slice).or_else(\|_\| error(read, ErrorCode::InvalidUnicodeCodePoint))
843	}
844
845	/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
846	/// the previous byte read was a backslash.
847	fn parse_escape<'de, R: Read<'de>>(
848	read: &mut R,
849	validate: bool,
850	scratch: &mut Vec<u8>,
851	) -> Result<()> {
852	let ch = tri!(next_or_eof(read));
853
854	match ch {
855	b'"' => scratch.push(b'"'),
856	b'`\\`' => scratch.push(b'`\\`'),
857	b'/' => scratch.push(b'/'),
858	b'b' => scratch.push(b'`\x08`'),
859	b'f' => scratch.push(b'`\x0c`'),
860	b'n' => scratch.push(b'`\n`'),
861	b'r' => scratch.push(b'`\r`'),
862	b't' => scratch.push(b'`\t`'),
863	b'u' => {
864	fn encode_surrogate(scratch: &mut Vec<u8>, n: u16) {
865	scratch.extend_from_slice(&[
866	(n >> `12` & `0b0000_1111`) as u8 \| `0b1110_0000`,
867	(n >> `6` & `0b0011_1111`) as u8 \| `0b1000_0000`,
868	(n & `0b0011_1111`) as u8 \| `0b1000_0000`,
869	]);
870	}
871
872	let c = match tri!(read.decode_hex_escape()) {
873	n @ `0xDC00`..=`0xDFFF` => {
874	return if validate {
875	error(read, ErrorCode::LoneLeadingSurrogateInHexEscape)
876	} else {
877	encode_surrogate(scratch, n);
878	Ok(())
879	};
880	}
881
882	// Non-BMP characters are encoded as a sequence of two hex
883	// escapes, representing UTF-16 surrogates. If deserializing a
884	// utf-8 string the surrogates are required to be paired,
885	// whereas deserializing a byte string accepts lone surrogates.
886	n1 @ `0xD800`..=`0xDBFF` => {
887	if tri!(peek_or_eof(read)) == b'`\\`' {
888	read.discard();
889	} else {
890	return if validate {
891	read.discard();
892	error(read, ErrorCode::UnexpectedEndOfHexEscape)
893	} else {
894	encode_surrogate(scratch, n1);
895	Ok(())
896	};
897	}
898
899	if tri!(peek_or_eof(read)) == b'u' {
900	read.discard();
901	} else {
902	return if validate {
903	read.discard();
904	error(read, ErrorCode::UnexpectedEndOfHexEscape)
905	} else {
906	encode_surrogate(scratch, n1);
907	// The \ prior to this byte started an escape sequence,
908	// so we need to parse that now. This recursive call
909	// does not blow the stack on malicious input because
910	// the escape is not \u, so it will be handled by one
911	// of the easy nonrecursive cases.
912	parse_escape(read, validate, scratch)
913	};
914	}
915
916	let n2 = tri!(read.decode_hex_escape());
917
918	if n2 < `0xDC00` \|\| n2 > `0xDFFF` {
919	return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
920	}
921
922	let n = (((n1 - `0xD800`) as u32) << `10` \| (n2 - `0xDC00`) as u32) + `0x1_0000`;
923
924	match char::from_u32(n) {
925	Some(c) => c,
926	None => {
927	return error(read, ErrorCode::InvalidUnicodeCodePoint);
928	}
929	}
930	}
931
932	// Every u16 outside of the surrogate ranges above is guaranteed
933	// to be a legal char.
934	n => char::from_u32(n as u32).unwrap(),
935	};
936
937	scratch.extend_from_slice(c.encode_utf8(&mut [`0_u8`; `4`]).as_bytes());
938	}
939	_ => {
940	return error(read, ErrorCode::InvalidEscape);
941	}
942	}
943
944	Ok(())
945	}
946
947	/// Parses a JSON escape sequence and discards the value. Assumes the previous
948	/// byte read was a backslash.
949	fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
950	where
951	R: ?Sized + Read<'de>,
952	{
953	let ch = tri!(next_or_eof(read));
954
955	match ch {
956	b'"' \| b'`\\`' \| b'/' \| b'b' \| b'f' \| b'n' \| b'r' \| b't' => {}
957	b'u' => {
958	// At this point we don't care if the codepoint is valid. We just
959	// want to consume it. We don't actually know what is valid or not
960	// at this point, because that depends on if this string will
961	// ultimately be parsed into a string or a byte buffer in the "real"
962	// parse.
963
964	tri!(read.decode_hex_escape());
965	}
966	_ => {
967	return error(read, ErrorCode::InvalidEscape);
968	}
969	}
970
971	Ok(())
972	}
973
974	static HEX: [u8; `256`] = {
975	const __: u8 = `255`; // not a hex digit
976	[
977	// 1 2 3 4 5 6 7 8 9 A B C D E F
978	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0
979	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1
980	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
981	`00`, `01`, `02`, `03`, `04`, `05`, `06`, `07`, `08`, `09`, __, __, __, __, __, __, // 3
982	__, `10`, `11`, `12`, `13`, `14`, `15`, __, __, __, __, __, __, __, __, __, // 4
983	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5
984	__, `10`, `11`, `12`, `13`, `14`, `15`, __, __, __, __, __, __, __, __, __, // 6
985	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
986	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
987	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
988	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
989	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
990	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
991	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
992	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
993	__, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
994	]
995	};
996
997	fn decode_hex_val(val: u8) -> Option<u16> {
998	let n = HEX[val as usize] as u16;
999	if n == `255` {
1000	None
1001	} else {
1002	Some(n)
1003	}
1004	}
1005