1 | use std::borrow::{Borrow, Cow};
|
2 | use std::fmt::{self, Debug, Formatter};
|
3 | use std::io;
|
4 | use std::ops::Deref;
|
5 |
|
6 | #[cfg (feature = "async-tokio" )]
|
7 | use std::{
|
8 | pin::Pin,
|
9 | task::{Context, Poll},
|
10 | };
|
11 |
|
12 | #[cfg (feature = "serialize" )]
|
13 | use serde::de::{Deserialize, Deserializer, Error, Visitor};
|
14 | #[cfg (feature = "serialize" )]
|
15 | use serde::ser::{Serialize, Serializer};
|
16 |
|
17 | #[allow (clippy::ptr_arg)]
|
18 | pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result {
|
19 | match cow_string {
|
20 | Cow::Owned(s: &Vec) => {
|
21 | write!(f, "Owned(" )?;
|
22 | write_byte_string(f, byte_string:s)?;
|
23 | }
|
24 | Cow::Borrowed(s: &&[u8]) => {
|
25 | write!(f, "Borrowed(" )?;
|
26 | write_byte_string(f, byte_string:s)?;
|
27 | }
|
28 | }
|
29 | write!(f, ")" )
|
30 | }
|
31 |
|
32 | pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result {
|
33 | write!(f, " \"" )?;
|
34 | for b: &u8 in byte_string {
|
35 | match *b {
|
36 | 32..=33 | 35..=126 => write!(f, " {}" , *b as char)?,
|
37 | 34 => write!(f, " \\\"" )?,
|
38 | _ => write!(f, " {:#02X}" , b)?,
|
39 | }
|
40 | }
|
41 | write!(f, " \"" )?;
|
42 | Ok(())
|
43 | }
|
44 |
|
45 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
46 |
|
47 | /// A version of [`Cow`] that can borrow from two different buffers, one of them
|
48 | /// is a deserializer input.
|
49 | ///
|
50 | /// # Lifetimes
|
51 | ///
|
52 | /// - `'i`: lifetime of the data that deserializer borrow from the parsed input
|
53 | /// - `'s`: lifetime of the data that owned by a deserializer
|
54 | pub enum CowRef<'i, 's, B>
|
55 | where
|
56 | B: ToOwned + ?Sized,
|
57 | {
|
58 | /// An input borrowed from the parsed data
|
59 | Input(&'i B),
|
60 | /// An input borrowed from the buffer owned by another deserializer
|
61 | Slice(&'s B),
|
62 | /// An input taken from an external deserializer, owned by that deserializer
|
63 | Owned(<B as ToOwned>::Owned),
|
64 | }
|
65 | impl<'i, 's, B> Deref for CowRef<'i, 's, B>
|
66 | where
|
67 | B: ToOwned + ?Sized,
|
68 | B::Owned: Borrow<B>,
|
69 | {
|
70 | type Target = B;
|
71 |
|
72 | fn deref(&self) -> &B {
|
73 | match *self {
|
74 | Self::Input(borrowed: &'i B) => borrowed,
|
75 | Self::Slice(borrowed: &'s B) => borrowed,
|
76 | Self::Owned(ref owned: &impl Borrow) => owned.borrow(),
|
77 | }
|
78 | }
|
79 | }
|
80 |
|
81 | impl<'i, 's, B> Debug for CowRef<'i, 's, B>
|
82 | where
|
83 | B: ToOwned + ?Sized + Debug,
|
84 | B::Owned: Debug,
|
85 | {
|
86 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
87 | match *self {
|
88 | Self::Input(borrowed: &'i B) => Debug::fmt(self:borrowed, f),
|
89 | Self::Slice(borrowed: &'s B) => Debug::fmt(self:borrowed, f),
|
90 | Self::Owned(ref owned: &impl Debug) => Debug::fmt(self:owned, f),
|
91 | }
|
92 | }
|
93 | }
|
94 |
|
95 | impl<'i, 's> CowRef<'i, 's, str> {
|
96 | /// Supply to the visitor a borrowed string, a string slice, or an owned
|
97 | /// string depending on the kind of input. Unlike [`Self::deserialize_all`],
|
98 | /// only part of [`Self::Owned`] string will be passed to the visitor.
|
99 | ///
|
100 | /// Calls
|
101 | /// - `visitor.visit_borrowed_str` if data borrowed from the input
|
102 | /// - `visitor.visit_str` if data borrowed from another source
|
103 | /// - `visitor.visit_string` if data owned by this type
|
104 | #[cfg (feature = "serialize" )]
|
105 | pub fn deserialize_str<V, E>(self, visitor: V) -> Result<V::Value, E>
|
106 | where
|
107 | V: Visitor<'i>,
|
108 | E: Error,
|
109 | {
|
110 | match self {
|
111 | Self::Input(s) => visitor.visit_borrowed_str(s),
|
112 | Self::Slice(s) => visitor.visit_str(s),
|
113 | Self::Owned(s) => visitor.visit_string(s),
|
114 | }
|
115 | }
|
116 |
|
117 | /// Calls [`Visitor::visit_bool`] with `true` or `false` if text contains
|
118 | /// [valid] boolean representation, otherwise calls [`Self::deserialize_str`].
|
119 | ///
|
120 | /// The valid boolean representations are only `"true"`, `"false"`, `"1"`, and `"0"`.
|
121 | ///
|
122 | /// [valid]: https://www.w3.org/TR/xmlschema11-2/#boolean
|
123 | #[cfg (feature = "serialize" )]
|
124 | pub fn deserialize_bool<V, E>(self, visitor: V) -> Result<V::Value, E>
|
125 | where
|
126 | V: Visitor<'i>,
|
127 | E: Error,
|
128 | {
|
129 | match self.as_ref() {
|
130 | "1" | "true" => visitor.visit_bool(true),
|
131 | "0" | "false" => visitor.visit_bool(false),
|
132 | _ => self.deserialize_str(visitor),
|
133 | }
|
134 | }
|
135 | }
|
136 |
|
137 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
138 |
|
139 | /// Wrapper around `Vec<u8>` that has a human-readable debug representation:
|
140 | /// printable ASCII symbols output as is, all other output in HEX notation.
|
141 | ///
|
142 | /// Also, when [`serialize`] feature is on, this type deserialized using
|
143 | /// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead
|
144 | /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq)
|
145 | ///
|
146 | /// [`serialize`]: ../index.html#serialize
|
147 | #[derive (PartialEq, Eq)]
|
148 | pub struct ByteBuf(pub Vec<u8>);
|
149 |
|
150 | impl Debug for ByteBuf {
|
151 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
152 | write_byte_string(f, &self.0)
|
153 | }
|
154 | }
|
155 |
|
156 | #[cfg (feature = "serialize" )]
|
157 | impl<'de> Deserialize<'de> for ByteBuf {
|
158 | fn deserialize<D>(d: D) -> Result<Self, D::Error>
|
159 | where
|
160 | D: Deserializer<'de>,
|
161 | {
|
162 | struct ValueVisitor;
|
163 |
|
164 | impl<'de> Visitor<'de> for ValueVisitor {
|
165 | type Value = ByteBuf;
|
166 |
|
167 | fn expecting(&self, f: &mut Formatter) -> fmt::Result {
|
168 | f.write_str("byte data" )
|
169 | }
|
170 |
|
171 | fn visit_bytes<E: Error>(self, v: &[u8]) -> Result<Self::Value, E> {
|
172 | Ok(ByteBuf(v.to_vec()))
|
173 | }
|
174 |
|
175 | fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
|
176 | Ok(ByteBuf(v))
|
177 | }
|
178 | }
|
179 |
|
180 | d.deserialize_byte_buf(ValueVisitor)
|
181 | }
|
182 | }
|
183 |
|
184 | #[cfg (feature = "serialize" )]
|
185 | impl Serialize for ByteBuf {
|
186 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
187 | where
|
188 | S: Serializer,
|
189 | {
|
190 | serializer.serialize_bytes(&self.0)
|
191 | }
|
192 | }
|
193 |
|
194 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
195 |
|
196 | /// Wrapper around `&[u8]` that has a human-readable debug representation:
|
197 | /// printable ASCII symbols output as is, all other output in HEX notation.
|
198 | ///
|
199 | /// Also, when [`serialize`] feature is on, this type deserialized using
|
200 | /// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead
|
201 | /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq)
|
202 | ///
|
203 | /// [`serialize`]: ../index.html#serialize
|
204 | #[derive (PartialEq, Eq)]
|
205 | pub struct Bytes<'de>(pub &'de [u8]);
|
206 |
|
207 | impl<'de> Debug for Bytes<'de> {
|
208 | fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
209 | write_byte_string(f, self.0)
|
210 | }
|
211 | }
|
212 |
|
213 | #[cfg (feature = "serialize" )]
|
214 | impl<'de> Deserialize<'de> for Bytes<'de> {
|
215 | fn deserialize<D>(d: D) -> Result<Self, D::Error>
|
216 | where
|
217 | D: Deserializer<'de>,
|
218 | {
|
219 | struct ValueVisitor;
|
220 |
|
221 | impl<'de> Visitor<'de> for ValueVisitor {
|
222 | type Value = Bytes<'de>;
|
223 |
|
224 | fn expecting(&self, f: &mut Formatter) -> fmt::Result {
|
225 | f.write_str("borrowed bytes" )
|
226 | }
|
227 |
|
228 | fn visit_borrowed_bytes<E: Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
|
229 | Ok(Bytes(v))
|
230 | }
|
231 | }
|
232 |
|
233 | d.deserialize_bytes(ValueVisitor)
|
234 | }
|
235 | }
|
236 |
|
237 | #[cfg (feature = "serialize" )]
|
238 | impl<'de> Serialize for Bytes<'de> {
|
239 | fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
240 | where
|
241 | S: Serializer,
|
242 | {
|
243 | serializer.serialize_bytes(self.0)
|
244 | }
|
245 | }
|
246 |
|
247 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
248 |
|
249 | /// A simple producer of infinite stream of bytes, useful in tests.
|
250 | ///
|
251 | /// Will repeat `chunk` field indefinitely.
|
252 | pub struct Fountain<'a> {
|
253 | /// That piece of data repeated infinitely...
|
254 | pub chunk: &'a [u8],
|
255 | /// Part of `chunk` that was consumed by BufRead impl
|
256 | pub consumed: usize,
|
257 | /// The overall count of read bytes
|
258 | pub overall_read: u64,
|
259 | }
|
260 |
|
261 | impl<'a> io::Read for Fountain<'a> {
|
262 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
263 | let available: &[u8] = &self.chunk[self.consumed..];
|
264 | let len: usize = buf.len().min(available.len());
|
265 | let (portion: &[u8], _) = available.split_at(mid:len);
|
266 |
|
267 | buf.copy_from_slice(src:portion);
|
268 | Ok(len)
|
269 | }
|
270 | }
|
271 |
|
272 | impl<'a> io::BufRead for Fountain<'a> {
|
273 | #[inline ]
|
274 | fn fill_buf(&mut self) -> io::Result<&[u8]> {
|
275 | Ok(&self.chunk[self.consumed..])
|
276 | }
|
277 |
|
278 | fn consume(&mut self, amt: usize) {
|
279 | self.consumed += amt;
|
280 | if self.consumed == self.chunk.len() {
|
281 | self.consumed = 0;
|
282 | }
|
283 | self.overall_read += amt as u64;
|
284 | }
|
285 | }
|
286 |
|
287 | #[cfg (feature = "async-tokio" )]
|
288 | impl<'a> tokio::io::AsyncRead for Fountain<'a> {
|
289 | fn poll_read(
|
290 | self: Pin<&mut Self>,
|
291 | _cx: &mut Context<'_>,
|
292 | buf: &mut tokio::io::ReadBuf<'_>,
|
293 | ) -> Poll<io::Result<()>> {
|
294 | let available = &self.chunk[self.consumed..];
|
295 | let len = buf.remaining().min(available.len());
|
296 | let (portion, _) = available.split_at(len);
|
297 |
|
298 | buf.put_slice(portion);
|
299 | Poll::Ready(Ok(()))
|
300 | }
|
301 | }
|
302 |
|
303 | #[cfg (feature = "async-tokio" )]
|
304 | impl<'a> tokio::io::AsyncBufRead for Fountain<'a> {
|
305 | #[inline ]
|
306 | fn poll_fill_buf(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
|
307 | Poll::Ready(io::BufRead::fill_buf(self.get_mut()))
|
308 | }
|
309 |
|
310 | #[inline ]
|
311 | fn consume(self: Pin<&mut Self>, amt: usize) {
|
312 | io::BufRead::consume(self.get_mut(), amt);
|
313 | }
|
314 | }
|
315 |
|
316 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
317 |
|
318 | /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab).
|
319 | #[inline ]
|
320 | pub const fn is_whitespace(b: u8) -> bool {
|
321 | matches!(b, b' ' | b' \r' | b' \n' | b' \t' )
|
322 | }
|
323 |
|
324 | /// Calculates name from an element-like content. Name is the first word in `content`,
|
325 | /// where word boundaries is XML whitespace characters.
|
326 | ///
|
327 | /// 'Whitespace' refers to the definition used by [`is_whitespace`].
|
328 | #[inline ]
|
329 | pub const fn name_len(mut bytes: &[u8]) -> usize {
|
330 | // Note: A pattern matching based approach (instead of indexing) allows
|
331 | // making the function const.
|
332 | let mut len: usize = 0;
|
333 | while let [first: &u8, rest: &[u8] @ ..] = bytes {
|
334 | if is_whitespace(*first) {
|
335 | break;
|
336 | }
|
337 | len += 1;
|
338 | bytes = rest;
|
339 | }
|
340 | len
|
341 | }
|
342 |
|
343 | /// Returns a byte slice with leading XML whitespace bytes removed.
|
344 | ///
|
345 | /// 'Whitespace' refers to the definition used by [`is_whitespace`].
|
346 | #[inline ]
|
347 | pub const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
|
348 | // Note: A pattern matching based approach (instead of indexing) allows
|
349 | // making the function const.
|
350 | while let [first: &u8, rest: &[u8] @ ..] = bytes {
|
351 | if is_whitespace(*first) {
|
352 | bytes = rest;
|
353 | } else {
|
354 | break;
|
355 | }
|
356 | }
|
357 | bytes
|
358 | }
|
359 |
|
360 | /// Returns a byte slice with trailing XML whitespace bytes removed.
|
361 | ///
|
362 | /// 'Whitespace' refers to the definition used by [`is_whitespace`].
|
363 | #[inline ]
|
364 | pub const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
|
365 | // Note: A pattern matching based approach (instead of indexing) allows
|
366 | // making the function const.
|
367 | while let [rest: &[u8] @ .., last: &u8] = bytes {
|
368 | if is_whitespace(*last) {
|
369 | bytes = rest;
|
370 | } else {
|
371 | break;
|
372 | }
|
373 | }
|
374 | bytes
|
375 | }
|
376 |
|
377 | ////////////////////////////////////////////////////////////////////////////////////////////////////
|
378 |
|
379 | #[cfg (test)]
|
380 | mod tests {
|
381 | use super::*;
|
382 | use pretty_assertions::assert_eq;
|
383 |
|
384 | #[test ]
|
385 | fn write_byte_string0() {
|
386 | let bytes = ByteBuf(vec![10, 32, 32, 32, 32, 32, 32, 32, 32]);
|
387 | assert_eq!(format!("{:?}" , bytes), " \"0xA \"" );
|
388 | }
|
389 |
|
390 | #[test ]
|
391 | fn write_byte_string1() {
|
392 | let bytes = ByteBuf(vec![
|
393 | 104, 116, 116, 112, 58, 47, 47, 119, 119, 119, 46, 119, 51, 46, 111, 114, 103, 47, 50,
|
394 | 48, 48, 50, 47, 48, 55, 47, 111, 119, 108, 35,
|
395 | ]);
|
396 | assert_eq!(
|
397 | format!("{:?}" , bytes),
|
398 | r##""http://www.w3.org/2002/07/owl#""##
|
399 | );
|
400 | }
|
401 |
|
402 | #[test ]
|
403 | fn write_byte_string3() {
|
404 | let bytes = ByteBuf(vec![
|
405 | 67, 108, 97, 115, 115, 32, 73, 82, 73, 61, 34, 35, 66, 34,
|
406 | ]);
|
407 | assert_eq!(format!("{:?}" , bytes), r##""Class IRI=\"#B\"""## );
|
408 | }
|
409 |
|
410 | #[test ]
|
411 | fn name_len() {
|
412 | assert_eq!(super::name_len(b"" ), 0);
|
413 | assert_eq!(super::name_len(b" abc" ), 0);
|
414 | assert_eq!(super::name_len(b" \t\r\n" ), 0);
|
415 |
|
416 | assert_eq!(super::name_len(b"abc" ), 3);
|
417 | assert_eq!(super::name_len(b"abc " ), 3);
|
418 |
|
419 | assert_eq!(super::name_len(b"a bc" ), 1);
|
420 | assert_eq!(super::name_len(b"ab \tc" ), 2);
|
421 | assert_eq!(super::name_len(b"ab \rc" ), 2);
|
422 | assert_eq!(super::name_len(b"ab \nc" ), 2);
|
423 | }
|
424 |
|
425 | #[test ]
|
426 | fn trim_xml_start() {
|
427 | assert_eq!(Bytes(super::trim_xml_start(b"" )), Bytes(b"" ));
|
428 | assert_eq!(Bytes(super::trim_xml_start(b"abc" )), Bytes(b"abc" ));
|
429 | assert_eq!(
|
430 | Bytes(super::trim_xml_start(b" \r\n\t ab \t\r\nc \t\r\n" )),
|
431 | Bytes(b"ab \t\r\nc \t\r\n" )
|
432 | );
|
433 | }
|
434 |
|
435 | #[test ]
|
436 | fn trim_xml_end() {
|
437 | assert_eq!(Bytes(super::trim_xml_end(b"" )), Bytes(b"" ));
|
438 | assert_eq!(Bytes(super::trim_xml_end(b"abc" )), Bytes(b"abc" ));
|
439 | assert_eq!(
|
440 | Bytes(super::trim_xml_end(b" \r\n\t ab \t\r\nc \t\r\n" )),
|
441 | Bytes(b" \r\n\t ab \t\r\nc" )
|
442 | );
|
443 | }
|
444 | }
|
445 | |