ext_vec.rs source code [crates/bstr-1.5.0/src/ext_vec.rs]

1	use core::fmt;
2	use core::iter;
3	use core::ops;
4	use core::ptr;
5
6	use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8	#[cfg(feature = "std")]
9	use std::{
10	error,
11	ffi::{OsStr, OsString},
12	path::{Path, PathBuf},
13	};
14
15	use crate::{
16	ext_slice::ByteSlice,
17	utf8::{self, Utf8Error},
18	};
19
20	/// Concatenate the elements given by the iterator together into a single
21	/// `Vec<u8>`.
22	///
23	/// The elements may be any type that can be cheaply converted into an `&[u8]`.
24	/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
25	///
26	/// # Examples
27	///
28	/// Basic usage:
29	///
30	/// ```
31	/// use bstr;
32	///
33	/// let s = bstr::concat(&["foo", "bar", "baz"]);
34	/// assert_eq!(s, "foobarbaz".as_bytes());
35	/// ```
36	#[inline]
37	pub fn concat<T, I>(elements: I) -> Vec<u8>
38	where
39	T: AsRef<[u8]>,
40	I: IntoIterator<Item = T>,
41	{
42	let mut dest: Vec = vec![];
43	for element: T in elements {
44	dest.push_str(bytes:element);
45	}
46	dest
47	}
48
49	/// Join the elements given by the iterator with the given separator into a
50	/// single `Vec<u8>`.
51	///
52	/// Both the separator and the elements may be any type that can be cheaply
53	/// converted into an `&[u8]`. This includes, but is not limited to,
54	/// `&str`, `&BStr` and `&[u8]` itself.
55	///
56	/// # Examples
57	///
58	/// Basic usage:
59	///
60	/// ```
61	/// use bstr;
62	///
63	/// let s = bstr::join(",", &["foo", "bar", "baz"]);
64	/// assert_eq!(s, "foo,bar,baz".as_bytes());
65	/// ```
66	#[inline]
67	pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
68	where
69	B: AsRef<[u8]>,
70	T: AsRef<[u8]>,
71	I: IntoIterator<Item = T>,
72	{
73	let mut it: ::IntoIter = elements.into_iter();
74	let mut dest: Vec = vec![];
75	match it.next() {
76	None => return dest,
77	Some(first: T) => {
78	dest.push_str(bytes:first);
79	}
80	}
81	for element: T in it {
82	dest.push_str(&separator);
83	dest.push_str(bytes:element);
84	}
85	dest
86	}
87
88	impl ByteVec for Vec<u8> {
89	#[inline]
90	fn as_vec(&self) -> &Vec<u8> {
91	self
92	}
93
94	#[inline]
95	fn as_vec_mut(&mut self) -> &mut Vec<u8> {
96	self
97	}
98
99	#[inline]
100	fn into_vec(self) -> Vec<u8> {
101	self
102	}
103	}
104
105	/// Ensure that callers cannot implement `ByteSlice` by making an
106	/// umplementable trait its super trait.
107	mod private {
108	pub trait Sealed {}
109	}
110	impl private::Sealed for Vec<u8> {}
111
112	/// A trait that extends `Vec<u8>` with string oriented methods.
113	///
114	/// Note that when using the constructor methods, such as
115	/// `ByteVec::from_slice`, one should actually call them using the concrete
116	/// type. For example:
117	///
118	/// ```
119	/// use bstr::{B, ByteVec};
120	///
121	/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
122	/// assert_eq!(s, B("abc"));
123	/// ```
124	///
125	/// This trait is sealed and cannot be implemented outside of `bstr`.
126	pub trait ByteVec: private::Sealed {
127	/// A method for accessing the raw vector bytes of this type. This is
128	/// always a no-op and callers shouldn't care about it. This only exists
129	/// for making the extension trait work.
130	#[doc(hidden)]
131	fn as_vec(&self) -> &Vec<u8>;
132
133	/// A method for accessing the raw vector bytes of this type, mutably. This
134	/// is always a no-op and callers shouldn't care about it. This only exists
135	/// for making the extension trait work.
136	#[doc(hidden)]
137	fn as_vec_mut(&mut self) -> &mut Vec<u8>;
138
139	/// A method for consuming ownership of this vector. This is always a no-op
140	/// and callers shouldn't care about it. This only exists for making the
141	/// extension trait work.
142	#[doc(hidden)]
143	fn into_vec(self) -> Vec<u8>
144	where
145	Self: Sized;
146
147	/// Create a new owned byte string from the given byte slice.
148	///
149	/// # Examples
150	///
151	/// Basic usage:
152	///
153	/// ```
154	/// use bstr::{B, ByteVec};
155	///
156	/// let s = Vec::from_slice(b"abc");
157	/// assert_eq!(s, B("abc"));
158	/// ```
159	#[inline]
160	fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
161	bytes.as_ref().to_vec()
162	}
163
164	/// Create a new byte string from an owned OS string.
165	///
166	/// When the underlying bytes of OS strings are accessible, then this
167	/// always succeeds and is zero cost. Otherwise, this returns the given
168	/// `OsString` if it is not valid UTF-8.
169	///
170	/// # Examples
171	///
172	/// Basic usage:
173	///
174	/// ```
175	/// use std::ffi::OsString;
176	///
177	/// use bstr::{B, ByteVec};
178	///
179	/// let os_str = OsString::from("foo");
180	/// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
181	/// assert_eq!(bs, B("foo"));
182	/// ```
183	#[inline]
184	#[cfg(feature = "std")]
185	fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
186	#[cfg(unix)]
187	#[inline]
188	fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
189	use std::os::unix::ffi::OsStringExt;
190
191	Ok(Vec::from(os_str.into_vec()))
192	}
193
194	#[cfg(not(unix))]
195	#[inline]
196	fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
197	os_str.into_string().map(Vec::from)
198	}
199
200	imp(os_str)
201	}
202
203	/// Lossily create a new byte string from an OS string slice.
204	///
205	/// When the underlying bytes of OS strings are accessible, then this is
206	/// zero cost and always returns a slice. Otherwise, a UTF-8 check is
207	/// performed and if the given OS string is not valid UTF-8, then it is
208	/// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
209	/// Unicode replacement codepoint).
210	///
211	/// # Examples
212	///
213	/// Basic usage:
214	///
215	/// ```
216	/// use std::ffi::OsStr;
217	///
218	/// use bstr::{B, ByteVec};
219	///
220	/// let os_str = OsStr::new("foo");
221	/// let bs = Vec::from_os_str_lossy(os_str);
222	/// assert_eq!(bs, B("foo"));
223	/// ```
224	#[inline]
225	#[cfg(feature = "std")]
226	fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
227	#[cfg(unix)]
228	#[inline]
229	fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
230	use std::os::unix::ffi::OsStrExt;
231
232	Cow::Borrowed(os_str.as_bytes())
233	}
234
235	#[cfg(not(unix))]
236	#[inline]
237	fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
238	match os_str.to_string_lossy() {
239	Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
240	Cow::Owned(x) => Cow::Owned(Vec::from(x)),
241	}
242	}
243
244	imp(os_str)
245	}
246
247	/// Create a new byte string from an owned file path.
248	///
249	/// When the underlying bytes of paths are accessible, then this always
250	/// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
251	/// if it is not valid UTF-8.
252	///
253	/// # Examples
254	///
255	/// Basic usage:
256	///
257	/// ```
258	/// use std::path::PathBuf;
259	///
260	/// use bstr::{B, ByteVec};
261	///
262	/// let path = PathBuf::from("foo");
263	/// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
264	/// assert_eq!(bs, B("foo"));
265	/// ```
266	#[inline]
267	#[cfg(feature = "std")]
268	fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
269	Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
270	}
271
272	/// Lossily create a new byte string from a file path.
273	///
274	/// When the underlying bytes of paths are accessible, then this is
275	/// zero cost and always returns a slice. Otherwise, a UTF-8 check is
276	/// performed and if the given path is not valid UTF-8, then it is lossily
277	/// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
278	/// replacement codepoint).
279	///
280	/// # Examples
281	///
282	/// Basic usage:
283	///
284	/// ```
285	/// use std::path::Path;
286	///
287	/// use bstr::{B, ByteVec};
288	///
289	/// let path = Path::new("foo");
290	/// let bs = Vec::from_path_lossy(path);
291	/// assert_eq!(bs, B("foo"));
292	/// ```
293	#[inline]
294	#[cfg(feature = "std")]
295	fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
296	Vec::from_os_str_lossy(path.as_os_str())
297	}
298
299	/// Unescapes the given string into its raw bytes.
300	///
301	/// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
302	/// and `\` and translates them into their corresponding unescaped form.
303	///
304	/// Incomplete escape sequences or things that look like escape sequences
305	/// but are not (for example, `\i` or `\xYZ`) are passed through literally.
306	///
307	/// This is the dual of [`ByteSlice::escape_bytes`].
308	///
309	/// Note that the zero or NUL byte may be represented as either `\0` or
310	/// `\x00`. Both will be unescaped into the zero byte.
311	///
312	/// # Examples
313	///
314	/// This shows basic usage:
315	///
316	/// ```
317	/// # #[cfg(feature = "alloc")] {
318	/// use bstr::{B, BString, ByteVec};
319	///
320	/// assert_eq!(
321	/// BString::from(b"foo`\xFF`bar"),
322	/// Vec::unescape_bytes(r"foo\xFFbar"),
323	/// );
324	/// assert_eq!(
325	/// BString::from(b"foo`\n`bar"),
326	/// Vec::unescape_bytes(r"foo\nbar"),
327	/// );
328	/// assert_eq!(
329	/// BString::from(b"foo`\t`bar"),
330	/// Vec::unescape_bytes(r"foo\tbar"),
331	/// );
332	/// assert_eq!(
333	/// BString::from(b"foo`\\`bar"),
334	/// Vec::unescape_bytes(r"foo\\bar"),
335	/// );
336	/// assert_eq!(
337	/// BString::from("foo☃bar"),
338	/// Vec::unescape_bytes(r"foo☃bar"),
339	/// );
340	///
341	/// # }
342	/// ```
343	///
344	/// This shows some examples of how incomplete or "incorrect" escape
345	/// sequences get passed through literally.
346	///
347	/// ```
348	/// # #[cfg(feature = "alloc")] {
349	/// use bstr::{B, BString, ByteVec};
350	///
351	/// // Show some incomplete escape sequences.
352	/// assert_eq!(
353	/// BString::from(br"\"),
354	/// Vec::unescape_bytes(r"\"),
355	/// );
356	/// assert_eq!(
357	/// BString::from(br"\"),
358	/// Vec::unescape_bytes(r"\\"),
359	/// );
360	/// assert_eq!(
361	/// BString::from(br"\x"),
362	/// Vec::unescape_bytes(r"\x"),
363	/// );
364	/// assert_eq!(
365	/// BString::from(br"\xA"),
366	/// Vec::unescape_bytes(r"\xA"),
367	/// );
368	/// // And now some that kind of look like escape
369	/// // sequences, but aren't.
370	/// assert_eq!(
371	/// BString::from(br"\xZ"),
372	/// Vec::unescape_bytes(r"\xZ"),
373	/// );
374	/// assert_eq!(
375	/// BString::from(br"\xZZ"),
376	/// Vec::unescape_bytes(r"\xZZ"),
377	/// );
378	/// assert_eq!(
379	/// BString::from(br"\i"),
380	/// Vec::unescape_bytes(r"\i"),
381	/// );
382	/// assert_eq!(
383	/// BString::from(br"\u"),
384	/// Vec::unescape_bytes(r"\u"),
385	/// );
386	/// assert_eq!(
387	/// BString::from(br"\u{2603}"),
388	/// Vec::unescape_bytes(r"\u{2603}"),
389	/// );
390	///
391	/// # }
392	/// ```
393	#[inline]
394	#[cfg(feature = "alloc")]
395	fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
396	let s = escaped.as_ref();
397	crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
398	}
399
400	/// Appends the given byte to the end of this byte string.
401	///
402	/// Note that this is equivalent to the generic `Vec::push` method. This
403	/// method is provided to permit callers to explicitly differentiate
404	/// between pushing bytes, codepoints and strings.
405	///
406	/// # Examples
407	///
408	/// Basic usage:
409	///
410	/// ```
411	/// use bstr::ByteVec;
412	///
413	/// let mut s = <Vec<u8>>::from("abc");
414	/// s.push_byte(b'`\xE2`');
415	/// s.push_byte(b'`\x98`');
416	/// s.push_byte(b'`\x83`');
417	/// assert_eq!(s, "abc☃".as_bytes());
418	/// ```
419	#[inline]
420	fn push_byte(&mut self, byte: u8) {
421	self.as_vec_mut().push(byte);
422	}
423
424	/// Appends the given `char` to the end of this byte string.
425	///
426	/// # Examples
427	///
428	/// Basic usage:
429	///
430	/// ```
431	/// use bstr::ByteVec;
432	///
433	/// let mut s = <Vec<u8>>::from("abc");
434	/// s.push_char('1');
435	/// s.push_char('2');
436	/// s.push_char('3');
437	/// assert_eq!(s, "abc123".as_bytes());
438	/// ```
439	#[inline]
440	fn push_char(&mut self, ch: char) {
441	if ch.len_utf8() == `1` {
442	self.push_byte(ch as u8);
443	return;
444	}
445	self.as_vec_mut()
446	.extend_from_slice(ch.encode_utf8(&mut [`0`; `4`]).as_bytes());
447	}
448
449	/// Appends the given slice to the end of this byte string. This accepts
450	/// any type that be converted to a `&[u8]`. This includes, but is not
451	/// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
452	///
453	/// # Examples
454	///
455	/// Basic usage:
456	///
457	/// ```
458	/// use bstr::ByteVec;
459	///
460	/// let mut s = <Vec<u8>>::from("abc");
461	/// s.push_str(b"123");
462	/// assert_eq!(s, "abc123".as_bytes());
463	/// ```
464	#[inline]
465	fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
466	self.as_vec_mut().extend_from_slice(bytes.as_ref());
467	}
468
469	/// Converts a `Vec<u8>` into a `String` if and only if this byte string is
470	/// valid UTF-8.
471	///
472	/// If it is not valid UTF-8, then a
473	/// [`FromUtf8Error`](struct.FromUtf8Error.html)
474	/// is returned. (This error can be used to examine why UTF-8 validation
475	/// failed, or to regain the original byte string.)
476	///
477	/// # Examples
478	///
479	/// Basic usage:
480	///
481	/// ```
482	/// use bstr::ByteVec;
483	///
484	/// let bytes = Vec::from("hello");
485	/// let string = bytes.into_string().unwrap();
486	///
487	/// assert_eq!("hello", string);
488	/// ```
489	///
490	/// If this byte string is not valid UTF-8, then an error will be returned.
491	/// That error can then be used to inspect the location at which invalid
492	/// UTF-8 was found, or to regain the original byte string:
493	///
494	/// ```
495	/// use bstr::{B, ByteVec};
496	///
497	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
498	/// let err = bytes.into_string().unwrap_err();
499	///
500	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
501	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
502	///
503	/// // At no point in this example is an allocation performed.
504	/// let bytes = Vec::from(err.into_vec());
505	/// assert_eq!(bytes, B(b"foo`\xFF`bar"));
506	/// ```
507	#[inline]
508	fn into_string(self) -> Result<String, FromUtf8Error>
509	where
510	Self: Sized,
511	{
512	match utf8::validate(self.as_vec()) {
513	Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
514	Ok(()) => {
515	// SAFETY: This is safe because of the guarantees provided by
516	// utf8::validate.
517	unsafe { Ok(self.into_string_unchecked()) }
518	}
519	}
520	}
521
522	/// Lossily converts a `Vec<u8>` into a `String`. If this byte string
523	/// contains invalid UTF-8, then the invalid bytes are replaced with the
524	/// Unicode replacement codepoint.
525	///
526	/// # Examples
527	///
528	/// Basic usage:
529	///
530	/// ```
531	/// use bstr::ByteVec;
532	///
533	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
534	/// let string = bytes.into_string_lossy();
535	/// assert_eq!(string, "foo`\u{FFFD}`bar");
536	/// ```
537	#[inline]
538	fn into_string_lossy(self) -> String
539	where
540	Self: Sized,
541	{
542	match self.as_vec().to_str_lossy() {
543	Cow::Borrowed(_) => {
544	// SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
545	// the entire string is valid utf8.
546	unsafe { self.into_string_unchecked() }
547	}
548	Cow::Owned(s) => s,
549	}
550	}
551
552	/// Unsafely convert this byte string into a `String`, without checking for
553	/// valid UTF-8.
554	///
555	/// # Safety
556	///
557	/// Callers must* ensure that this byte string is valid UTF-8 before*
558	/// calling this method. Converting a byte string into a `String` that is
559	/// not valid UTF-8 is considered undefined behavior.
560	///
561	/// This routine is useful in performance sensitive contexts where the
562	/// UTF-8 validity of the byte string is already known and it is
563	/// undesirable to pay the cost of an additional UTF-8 validation check
564	/// that [`into_string`](#method.into_string) performs.
565	///
566	/// # Examples
567	///
568	/// Basic usage:
569	///
570	/// ```
571	/// use bstr::ByteVec;
572	///
573	/// // SAFETY: This is safe because string literals are guaranteed to be
574	/// // valid UTF-8 by the Rust compiler.
575	/// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
576	/// assert_eq!("☃βツ", s);
577	/// ```
578	#[inline]
579	unsafe fn into_string_unchecked(self) -> String
580	where
581	Self: Sized,
582	{
583	String::from_utf8_unchecked(self.into_vec())
584	}
585
586	/// Converts this byte string into an OS string, in place.
587	///
588	/// When OS strings can be constructed from arbitrary byte sequences, this
589	/// always succeeds and is zero cost. Otherwise, if this byte string is not
590	/// valid UTF-8, then an error (with the original byte string) is returned.
591	///
592	/// # Examples
593	///
594	/// Basic usage:
595	///
596	/// ```
597	/// use std::ffi::OsStr;
598	///
599	/// use bstr::ByteVec;
600	///
601	/// let bs = Vec::from("foo");
602	/// let os_str = bs.into_os_string().expect("should be valid UTF-8");
603	/// assert_eq!(os_str, OsStr::new("foo"));
604	/// ```
605	#[cfg(feature = "std")]
606	#[inline]
607	fn into_os_string(self) -> Result<OsString, FromUtf8Error>
608	where
609	Self: Sized,
610	{
611	#[cfg(unix)]
612	#[inline]
613	fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
614	use std::os::unix::ffi::OsStringExt;
615
616	Ok(OsString::from_vec(v))
617	}
618
619	#[cfg(not(unix))]
620	#[inline]
621	fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
622	v.into_string().map(OsString::from)
623	}
624
625	imp(self.into_vec())
626	}
627
628	/// Lossily converts this byte string into an OS string, in place.
629	///
630	/// When OS strings can be constructed from arbitrary byte sequences, this
631	/// is zero cost and always returns a slice. Otherwise, this will perform a
632	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
633	/// the Unicode replacement codepoint.
634	///
635	/// Note that this can prevent the correct roundtripping of file paths when
636	/// the representation of `OsString` is opaque.
637	///
638	/// # Examples
639	///
640	/// Basic usage:
641	///
642	/// ```
643	/// use bstr::ByteVec;
644	///
645	/// let bs = Vec::from_slice(b"foo`\xFF`bar");
646	/// let os_str = bs.into_os_string_lossy();
647	/// assert_eq!(os_str.to_string_lossy(), "foo`\u{FFFD}`bar");
648	/// ```
649	#[inline]
650	#[cfg(feature = "std")]
651	fn into_os_string_lossy(self) -> OsString
652	where
653	Self: Sized,
654	{
655	#[cfg(unix)]
656	#[inline]
657	fn imp(v: Vec<u8>) -> OsString {
658	use std::os::unix::ffi::OsStringExt;
659
660	OsString::from_vec(v)
661	}
662
663	#[cfg(not(unix))]
664	#[inline]
665	fn imp(v: Vec<u8>) -> OsString {
666	OsString::from(v.into_string_lossy())
667	}
668
669	imp(self.into_vec())
670	}
671
672	/// Converts this byte string into an owned file path, in place.
673	///
674	/// When paths can be constructed from arbitrary byte sequences, this
675	/// always succeeds and is zero cost. Otherwise, if this byte string is not
676	/// valid UTF-8, then an error (with the original byte string) is returned.
677	///
678	/// # Examples
679	///
680	/// Basic usage:
681	///
682	/// ```
683	/// use bstr::ByteVec;
684	///
685	/// let bs = Vec::from("foo");
686	/// let path = bs.into_path_buf().expect("should be valid UTF-8");
687	/// assert_eq!(path.as_os_str(), "foo");
688	/// ```
689	#[cfg(feature = "std")]
690	#[inline]
691	fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
692	where
693	Self: Sized,
694	{
695	self.into_os_string().map(PathBuf::from)
696	}
697
698	/// Lossily converts this byte string into an owned file path, in place.
699	///
700	/// When paths can be constructed from arbitrary byte sequences, this is
701	/// zero cost and always returns a slice. Otherwise, this will perform a
702	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
703	/// the Unicode replacement codepoint.
704	///
705	/// Note that this can prevent the correct roundtripping of file paths when
706	/// the representation of `PathBuf` is opaque.
707	///
708	/// # Examples
709	///
710	/// Basic usage:
711	///
712	/// ```
713	/// use bstr::ByteVec;
714	///
715	/// let bs = Vec::from_slice(b"foo`\xFF`bar");
716	/// let path = bs.into_path_buf_lossy();
717	/// assert_eq!(path.to_string_lossy(), "foo`\u{FFFD}`bar");
718	/// ```
719	#[inline]
720	#[cfg(feature = "std")]
721	fn into_path_buf_lossy(self) -> PathBuf
722	where
723	Self: Sized,
724	{
725	PathBuf::from(self.into_os_string_lossy())
726	}
727
728	/// Removes the last byte from this `Vec<u8>` and returns it.
729	///
730	/// If this byte string is empty, then `None` is returned.
731	///
732	/// If the last codepoint in this byte string is not ASCII, then removing
733	/// the last byte could make this byte string contain invalid UTF-8.
734	///
735	/// Note that this is equivalent to the generic `Vec::pop` method. This
736	/// method is provided to permit callers to explicitly differentiate
737	/// between popping bytes and codepoints.
738	///
739	/// # Examples
740	///
741	/// Basic usage:
742	///
743	/// ```
744	/// use bstr::ByteVec;
745	///
746	/// let mut s = Vec::from("foo");
747	/// assert_eq!(s.pop_byte(), Some(b'o'));
748	/// assert_eq!(s.pop_byte(), Some(b'o'));
749	/// assert_eq!(s.pop_byte(), Some(b'f'));
750	/// assert_eq!(s.pop_byte(), None);
751	/// ```
752	#[inline]
753	fn pop_byte(&mut self) -> Option<u8> {
754	self.as_vec_mut().pop()
755	}
756
757	/// Removes the last codepoint from this `Vec<u8>` and returns it.
758	///
759	/// If this byte string is empty, then `None` is returned. If the last
760	/// bytes of this byte string do not correspond to a valid UTF-8 code unit
761	/// sequence, then the Unicode replacement codepoint is yielded instead in
762	/// accordance with the
763	/// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
764	///
765	/// # Examples
766	///
767	/// Basic usage:
768	///
769	/// ```
770	/// use bstr::ByteVec;
771	///
772	/// let mut s = Vec::from("foo");
773	/// assert_eq!(s.pop_char(), Some('o'));
774	/// assert_eq!(s.pop_char(), Some('o'));
775	/// assert_eq!(s.pop_char(), Some('f'));
776	/// assert_eq!(s.pop_char(), None);
777	/// ```
778	///
779	/// This shows the replacement codepoint substitution policy. Note that
780	/// the first pop yields a replacement codepoint but actually removes two
781	/// bytes. This is in contrast with subsequent pops when encountering
782	/// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
783	/// code unit sequence.
784	///
785	/// ```
786	/// use bstr::ByteVec;
787	///
788	/// let mut s = Vec::from_slice(b"f`\xFF\xFF\xFF`oo`\xE2\x98`");
789	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
790	/// assert_eq!(s.pop_char(), Some('o'));
791	/// assert_eq!(s.pop_char(), Some('o'));
792	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
793	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
794	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
795	/// assert_eq!(s.pop_char(), Some('f'));
796	/// assert_eq!(s.pop_char(), None);
797	/// ```
798	#[inline]
799	fn pop_char(&mut self) -> Option<char> {
800	let (ch, size) = utf8::decode_last_lossy(self.as_vec());
801	if size == `0` {
802	return None;
803	}
804	let new_len = self.as_vec().len() - size;
805	self.as_vec_mut().truncate(new_len);
806	Some(ch)
807	}
808
809	/// Removes a `char` from this `Vec<u8>` at the given byte position and
810	/// returns it.
811	///
812	/// If the bytes at the given position do not lead to a valid UTF-8 code
813	/// unit sequence, then a
814	/// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
815	///
816	/// # Panics
817	///
818	/// Panics if `at` is larger than or equal to this byte string's length.
819	///
820	/// # Examples
821	///
822	/// Basic usage:
823	///
824	/// ```
825	/// use bstr::ByteVec;
826	///
827	/// let mut s = Vec::from("foo☃bar");
828	/// assert_eq!(s.remove_char(`3`), '☃');
829	/// assert_eq!(s, b"foobar");
830	/// ```
831	///
832	/// This example shows how the Unicode replacement codepoint policy is
833	/// used:
834	///
835	/// ```
836	/// use bstr::ByteVec;
837	///
838	/// let mut s = Vec::from_slice(b"foo`\xFF`bar");
839	/// assert_eq!(s.remove_char(`3`), '`\u{FFFD}`');
840	/// assert_eq!(s, b"foobar");
841	/// ```
842	#[inline]
843	fn remove_char(&mut self, at: usize) -> char {
844	let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
845	assert!(
846	size > `0`,
847	"expected {} to be less than {}",
848	at,
849	self.as_vec().len(),
850	);
851	self.as_vec_mut().drain(at..at + size);
852	ch
853	}
854
855	/// Inserts the given codepoint into this `Vec<u8>` at a particular byte
856	/// position.
857	///
858	/// This is an `O(n)` operation as it may copy a number of elements in this
859	/// byte string proportional to its length.
860	///
861	/// # Panics
862	///
863	/// Panics if `at` is larger than the byte string's length.
864	///
865	/// # Examples
866	///
867	/// Basic usage:
868	///
869	/// ```
870	/// use bstr::ByteVec;
871	///
872	/// let mut s = Vec::from("foobar");
873	/// s.insert_char(`3`, '☃');
874	/// assert_eq!(s, "foo☃bar".as_bytes());
875	/// ```
876	#[inline]
877	fn insert_char(&mut self, at: usize, ch: char) {
878	self.insert_str(at, ch.encode_utf8(&mut [`0`; `4`]).as_bytes());
879	}
880
881	/// Inserts the given byte string into this byte string at a particular
882	/// byte position.
883	///
884	/// This is an `O(n)` operation as it may copy a number of elements in this
885	/// byte string proportional to its length.
886	///
887	/// The given byte string may be any type that can be cheaply converted
888	/// into a `&[u8]`. This includes, but is not limited to, `&str` and
889	/// `&[u8]`.
890	///
891	/// # Panics
892	///
893	/// Panics if `at` is larger than the byte string's length.
894	///
895	/// # Examples
896	///
897	/// Basic usage:
898	///
899	/// ```
900	/// use bstr::ByteVec;
901	///
902	/// let mut s = Vec::from("foobar");
903	/// s.insert_str(`3`, "☃☃☃");
904	/// assert_eq!(s, "foo☃☃☃bar".as_bytes());
905	/// ```
906	#[inline]
907	fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
908	let bytes = bytes.as_ref();
909	let len = self.as_vec().len();
910	assert!(at <= len, "expected {} to be <= {}", at, len);
911
912	// SAFETY: We'd like to efficiently splice in the given bytes into
913	// this byte string. Since we are only working with `u8` elements here,
914	// we only need to consider whether our bounds are correct and whether
915	// our byte string has enough space.
916	self.as_vec_mut().reserve(bytes.len());
917	unsafe {
918	// Shift bytes after `at` over by the length of `bytes` to make
919	// room for it. This requires referencing two regions of memory
920	// that may overlap, so we use ptr::copy.
921	ptr::copy(
922	self.as_vec().as_ptr().add(at),
923	self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
924	len - at,
925	);
926	// Now copy the bytes given into the room we made above. In this
927	// case, we know that the given bytes cannot possibly overlap
928	// with this byte string since we have a mutable borrow of the
929	// latter. Thus, we can use a nonoverlapping copy.
930	ptr::copy_nonoverlapping(
931	bytes.as_ptr(),
932	self.as_vec_mut().as_mut_ptr().add(at),
933	bytes.len(),
934	);
935	self.as_vec_mut().set_len(len + bytes.len());
936	}
937	}
938
939	/// Removes the specified range in this byte string and replaces it with
940	/// the given bytes. The given bytes do not need to have the same length
941	/// as the range provided.
942	///
943	/// # Panics
944	///
945	/// Panics if the given range is invalid.
946	///
947	/// # Examples
948	///
949	/// Basic usage:
950	///
951	/// ```
952	/// use bstr::ByteVec;
953	///
954	/// let mut s = Vec::from("foobar");
955	/// s.replace_range(`2`..`4`, "xxxxx");
956	/// assert_eq!(s, "foxxxxxar".as_bytes());
957	/// ```
958	#[inline]
959	fn replace_range<R, B>(&mut self, range: R, replace_with: B)
960	where
961	R: ops::RangeBounds<usize>,
962	B: AsRef<[u8]>,
963	{
964	self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
965	}
966
967	/// Creates a draining iterator that removes the specified range in this
968	/// `Vec<u8>` and yields each of the removed bytes.
969	///
970	/// Note that the elements specified by the given range are removed
971	/// regardless of whether the returned iterator is fully exhausted.
972	///
973	/// Also note that is is unspecified how many bytes are removed from the
974	/// `Vec<u8>` if the `DrainBytes` iterator is leaked.
975	///
976	/// # Panics
977	///
978	/// Panics if the given range is not valid.
979	///
980	/// # Examples
981	///
982	/// Basic usage:
983	///
984	/// ```
985	/// use bstr::ByteVec;
986	///
987	/// let mut s = Vec::from("foobar");
988	/// {
989	/// let mut drainer = s.drain_bytes(`2`..`4`);
990	/// assert_eq!(drainer.next(), Some(b'o'));
991	/// assert_eq!(drainer.next(), Some(b'b'));
992	/// assert_eq!(drainer.next(), None);
993	/// }
994	/// assert_eq!(s, "foar".as_bytes());
995	/// ```
996	#[inline]
997	fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
998	where
999	R: ops::RangeBounds<usize>,
1000	{
1001	DrainBytes { it: self.as_vec_mut().drain(range) }
1002	}
1003	}
1004
1005	/// A draining byte oriented iterator for `Vec<u8>`.
1006	///
1007	/// This iterator is created by
1008	/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1009	///
1010	/// # Examples
1011	///
1012	/// Basic usage:
1013	///
1014	/// ```
1015	/// use bstr::ByteVec;
1016	///
1017	/// let mut s = Vec::from("foobar");
1018	/// {
1019	/// let mut drainer = s.drain_bytes(`2`..`4`);
1020	/// assert_eq!(drainer.next(), Some(b'o'));
1021	/// assert_eq!(drainer.next(), Some(b'b'));
1022	/// assert_eq!(drainer.next(), None);
1023	/// }
1024	/// assert_eq!(s, "foar".as_bytes());
1025	/// ```
1026	#[derive(Debug)]
1027	pub struct DrainBytes<'a> {
1028	it: vec::Drain<'a, u8>,
1029	}
1030
1031	impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1032
1033	impl<'a> Iterator for DrainBytes<'a> {
1034	type Item = u8;
1035
1036	#[inline]
1037	fn next(&mut self) -> Option<u8> {
1038	self.it.next()
1039	}
1040	}
1041
1042	impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1043	#[inline]
1044	fn next_back(&mut self) -> Option<u8> {
1045	self.it.next_back()
1046	}
1047	}
1048
1049	impl<'a> ExactSizeIterator for DrainBytes<'a> {
1050	#[inline]
1051	fn len(&self) -> usize {
1052	self.it.len()
1053	}
1054	}
1055
1056	/// An error that may occur when converting a `Vec<u8>` to a `String`.
1057	///
1058	/// This error includes the original `Vec<u8>` that failed to convert to a
1059	/// `String`. This permits callers to recover the allocation used even if it
1060	/// it not valid UTF-8.
1061	///
1062	/// # Examples
1063	///
1064	/// Basic usage:
1065	///
1066	/// ```
1067	/// use bstr::{B, ByteVec};
1068	///
1069	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1070	/// let err = bytes.into_string().unwrap_err();
1071	///
1072	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
1073	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
1074	///
1075	/// // At no point in this example is an allocation performed.
1076	/// let bytes = Vec::from(err.into_vec());
1077	/// assert_eq!(bytes, B(b"foo`\xFF`bar"));
1078	/// ```
1079	#[derive(Debug, Eq, PartialEq)]
1080	pub struct FromUtf8Error {
1081	original: Vec<u8>,
1082	err: Utf8Error,
1083	}
1084
1085	impl FromUtf8Error {
1086	/// Return the original bytes as a slice that failed to convert to a
1087	/// `String`.
1088	///
1089	/// # Examples
1090	///
1091	/// Basic usage:
1092	///
1093	/// ```
1094	/// use bstr::{B, ByteVec};
1095	///
1096	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1097	/// let err = bytes.into_string().unwrap_err();
1098	///
1099	/// // At no point in this example is an allocation performed.
1100	/// assert_eq!(err.as_bytes(), B(b"foo`\xFF`bar"));
1101	/// ```
1102	#[inline]
1103	pub fn as_bytes(&self) -> &[u8] {
1104	&self.original
1105	}
1106
1107	/// Consume this error and return the original byte string that failed to
1108	/// convert to a `String`.
1109	///
1110	/// # Examples
1111	///
1112	/// Basic usage:
1113	///
1114	/// ```
1115	/// use bstr::{B, ByteVec};
1116	///
1117	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1118	/// let err = bytes.into_string().unwrap_err();
1119	/// let original = err.into_vec();
1120	///
1121	/// // At no point in this example is an allocation performed.
1122	/// assert_eq!(original, B(b"foo`\xFF`bar"));
1123	/// ```
1124	#[inline]
1125	pub fn into_vec(self) -> Vec<u8> {
1126	self.original
1127	}
1128
1129	/// Return the underlying UTF-8 error that occurred. This error provides
1130	/// information on the nature and location of the invalid UTF-8 detected.
1131	///
1132	/// # Examples
1133	///
1134	/// Basic usage:
1135	///
1136	/// ```
1137	/// use bstr::{B, ByteVec};
1138	///
1139	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1140	/// let err = bytes.into_string().unwrap_err();
1141	///
1142	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
1143	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
1144	/// ```
1145	#[inline]
1146	pub fn utf8_error(&self) -> &Utf8Error {
1147	&self.err
1148	}
1149	}
1150
1151	#[cfg(feature = "std")]
1152	impl error::Error for FromUtf8Error {
1153	#[inline]
1154	fn description(&self) -> &str {
1155	"invalid UTF-8 vector"
1156	}
1157	}
1158
1159	impl fmt::Display for FromUtf8Error {
1160	#[inline]
1161	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1162	write!(f, "{}", self.err)
1163	}
1164	}
1165
1166	#[cfg(all(test, feature = "std"))]
1167	mod tests {
1168	use crate::ext_vec::ByteVec;
1169
1170	#[test]
1171	fn insert() {
1172	let mut s = vec![];
1173	s.insert_str(`0`, "foo");
1174	assert_eq!(s, "foo".as_bytes());
1175
1176	let mut s = Vec::from("a");
1177	s.insert_str(`0`, "foo");
1178	assert_eq!(s, "fooa".as_bytes());
1179
1180	let mut s = Vec::from("a");
1181	s.insert_str(`1`, "foo");
1182	assert_eq!(s, "afoo".as_bytes());
1183
1184	let mut s = Vec::from("foobar");
1185	s.insert_str(`3`, "quux");
1186	assert_eq!(s, "fooquuxbar".as_bytes());
1187
1188	let mut s = Vec::from("foobar");
1189	s.insert_str(`3`, "x");
1190	assert_eq!(s, "fooxbar".as_bytes());
1191
1192	let mut s = Vec::from("foobar");
1193	s.insert_str(`0`, "x");
1194	assert_eq!(s, "xfoobar".as_bytes());
1195
1196	let mut s = Vec::from("foobar");
1197	s.insert_str(`6`, "x");
1198	assert_eq!(s, "foobarx".as_bytes());
1199
1200	let mut s = Vec::from("foobar");
1201	s.insert_str(`3`, "quuxbazquux");
1202	assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1203	}
1204
1205	#[test]
1206	#[should_panic]
1207	fn insert_fail1() {
1208	let mut s = vec![];
1209	s.insert_str(`1`, "foo");
1210	}
1211
1212	#[test]
1213	#[should_panic]
1214	fn insert_fail2() {
1215	let mut s = Vec::from("a");
1216	s.insert_str(`2`, "foo");
1217	}
1218
1219	#[test]
1220	#[should_panic]
1221	fn insert_fail3() {
1222	let mut s = Vec::from("foobar");
1223	s.insert_str(`7`, "foo");
1224	}
1225	}
1226

Provided by KDAB

Definitions