ext_vec.rs source code [crates/bstr/src/ext_vec.rs]

1	use core::{fmt, iter, ops, ptr};
2
3	use alloc::{borrow::Cow, string::String, vec, vec::Vec};
4
5	#[cfg(feature = "std")]
6	use std::{
7	error,
8	ffi::{OsStr, OsString},
9	path::{Path, PathBuf},
10	};
11
12	use crate::{
13	ext_slice::ByteSlice,
14	utf8::{self, Utf8Error},
15	};
16
17	/// Concatenate the elements given by the iterator together into a single
18	/// `Vec<u8>`.
19	///
20	/// The elements may be any type that can be cheaply converted into an `&[u8]`.
21	/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
22	///
23	/// # Examples
24	///
25	/// Basic usage:
26	///
27	/// ```
28	/// use bstr;
29	///
30	/// let s = bstr::concat(&["foo", "bar", "baz"]);
31	/// assert_eq!(s, "foobarbaz".as_bytes());
32	/// ```
33	#[inline]
34	pub fn concat<T, I>(elements: I) -> Vec<u8>
35	where
36	T: AsRef<[u8]>,
37	I: IntoIterator<Item = T>,
38	{
39	let mut dest: Vec = vec![];
40	for element: T in elements {
41	dest.push_str(bytes:element);
42	}
43	dest
44	}
45
46	/// Join the elements given by the iterator with the given separator into a
47	/// single `Vec<u8>`.
48	///
49	/// Both the separator and the elements may be any type that can be cheaply
50	/// converted into an `&[u8]`. This includes, but is not limited to,
51	/// `&str`, `&BStr` and `&[u8]` itself.
52	///
53	/// # Examples
54	///
55	/// Basic usage:
56	///
57	/// ```
58	/// use bstr;
59	///
60	/// let s = bstr::join(",", &["foo", "bar", "baz"]);
61	/// assert_eq!(s, "foo,bar,baz".as_bytes());
62	/// ```
63	#[inline]
64	pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
65	where
66	B: AsRef<[u8]>,
67	T: AsRef<[u8]>,
68	I: IntoIterator<Item = T>,
69	{
70	let mut it: ::IntoIter = elements.into_iter();
71	let mut dest: Vec = vec![];
72	match it.next() {
73	None => return dest,
74	Some(first: T) => {
75	dest.push_str(bytes:first);
76	}
77	}
78	for element: T in it {
79	dest.push_str(&separator);
80	dest.push_str(bytes:element);
81	}
82	dest
83	}
84
85	impl ByteVec for Vec<u8> {
86	#[inline]
87	fn as_vec(&self) -> &Vec<u8> {
88	self
89	}
90
91	#[inline]
92	fn as_vec_mut(&mut self) -> &mut Vec<u8> {
93	self
94	}
95
96	#[inline]
97	fn into_vec(self) -> Vec<u8> {
98	self
99	}
100	}
101
102	/// Ensure that callers cannot implement `ByteSlice` by making an
103	/// umplementable trait its super trait.
104	mod private {
105	pub trait Sealed {}
106	}
107	impl private::Sealed for Vec<u8> {}
108
109	/// A trait that extends `Vec<u8>` with string oriented methods.
110	///
111	/// Note that when using the constructor methods, such as
112	/// `ByteVec::from_slice`, one should actually call them using the concrete
113	/// type. For example:
114	///
115	/// ```
116	/// use bstr::{B, ByteVec};
117	///
118	/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
119	/// assert_eq!(s, B("abc"));
120	/// ```
121	///
122	/// This trait is sealed and cannot be implemented outside of `bstr`.
123	pub trait ByteVec: private::Sealed {
124	/// A method for accessing the raw vector bytes of this type. This is
125	/// always a no-op and callers shouldn't care about it. This only exists
126	/// for making the extension trait work.
127	#[doc(hidden)]
128	fn as_vec(&self) -> &Vec<u8>;
129
130	/// A method for accessing the raw vector bytes of this type, mutably. This
131	/// is always a no-op and callers shouldn't care about it. This only exists
132	/// for making the extension trait work.
133	#[doc(hidden)]
134	fn as_vec_mut(&mut self) -> &mut Vec<u8>;
135
136	/// A method for consuming ownership of this vector. This is always a no-op
137	/// and callers shouldn't care about it. This only exists for making the
138	/// extension trait work.
139	#[doc(hidden)]
140	fn into_vec(self) -> Vec<u8>
141	where
142	Self: Sized;
143
144	/// Create a new owned byte string from the given byte slice.
145	///
146	/// # Examples
147	///
148	/// Basic usage:
149	///
150	/// ```
151	/// use bstr::{B, ByteVec};
152	///
153	/// let s = Vec::from_slice(b"abc");
154	/// assert_eq!(s, B("abc"));
155	/// ```
156	#[inline]
157	fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
158	bytes.as_ref().to_vec()
159	}
160
161	/// Create a new byte string from an owned OS string.
162	///
163	/// When the underlying bytes of OS strings are accessible, then this
164	/// always succeeds and is zero cost. Otherwise, this returns the given
165	/// `OsString` if it is not valid UTF-8.
166	///
167	/// # Examples
168	///
169	/// Basic usage:
170	///
171	/// ```
172	/// use std::ffi::OsString;
173	///
174	/// use bstr::{B, ByteVec};
175	///
176	/// let os_str = OsString::from("foo");
177	/// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
178	/// assert_eq!(bs, B("foo"));
179	/// ```
180	#[inline]
181	#[cfg(feature = "std")]
182	fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
183	#[cfg(unix)]
184	#[inline]
185	fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
186	use std::os::unix::ffi::OsStringExt;
187
188	Ok(os_str.into_vec())
189	}
190
191	#[cfg(not(unix))]
192	#[inline]
193	fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
194	os_str.into_string().map(Vec::from)
195	}
196
197	imp(os_str)
198	}
199
200	/// Lossily create a new byte string from an OS string slice.
201	///
202	/// When the underlying bytes of OS strings are accessible, then this is
203	/// zero cost and always returns a slice. Otherwise, a UTF-8 check is
204	/// performed and if the given OS string is not valid UTF-8, then it is
205	/// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
206	/// Unicode replacement codepoint).
207	///
208	/// # Examples
209	///
210	/// Basic usage:
211	///
212	/// ```
213	/// use std::ffi::OsStr;
214	///
215	/// use bstr::{B, ByteVec};
216	///
217	/// let os_str = OsStr::new("foo");
218	/// let bs = Vec::from_os_str_lossy(os_str);
219	/// assert_eq!(bs, B("foo"));
220	/// ```
221	#[inline]
222	#[cfg(feature = "std")]
223	fn from_os_str_lossy(os_str: &OsStr) -> Cow<'_, [u8]> {
224	#[cfg(unix)]
225	#[inline]
226	fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
227	use std::os::unix::ffi::OsStrExt;
228
229	Cow::Borrowed(os_str.as_bytes())
230	}
231
232	#[cfg(not(unix))]
233	#[inline]
234	fn imp(os_str: &OsStr) -> Cow<'_, [u8]> {
235	match os_str.to_string_lossy() {
236	Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
237	Cow::Owned(x) => Cow::Owned(Vec::from(x)),
238	}
239	}
240
241	imp(os_str)
242	}
243
244	/// Create a new byte string from an owned file path.
245	///
246	/// When the underlying bytes of paths are accessible, then this always
247	/// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
248	/// if it is not valid UTF-8.
249	///
250	/// # Examples
251	///
252	/// Basic usage:
253	///
254	/// ```
255	/// use std::path::PathBuf;
256	///
257	/// use bstr::{B, ByteVec};
258	///
259	/// let path = PathBuf::from("foo");
260	/// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
261	/// assert_eq!(bs, B("foo"));
262	/// ```
263	#[inline]
264	#[cfg(feature = "std")]
265	fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
266	Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
267	}
268
269	/// Lossily create a new byte string from a file path.
270	///
271	/// When the underlying bytes of paths are accessible, then this is
272	/// zero cost and always returns a slice. Otherwise, a UTF-8 check is
273	/// performed and if the given path is not valid UTF-8, then it is lossily
274	/// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
275	/// replacement codepoint).
276	///
277	/// # Examples
278	///
279	/// Basic usage:
280	///
281	/// ```
282	/// use std::path::Path;
283	///
284	/// use bstr::{B, ByteVec};
285	///
286	/// let path = Path::new("foo");
287	/// let bs = Vec::from_path_lossy(path);
288	/// assert_eq!(bs, B("foo"));
289	/// ```
290	#[inline]
291	#[cfg(feature = "std")]
292	fn from_path_lossy(path: &Path) -> Cow<'_, [u8]> {
293	Vec::from_os_str_lossy(path.as_os_str())
294	}
295
296	/// Unescapes the given string into its raw bytes.
297	///
298	/// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
299	/// and `\` and translates them into their corresponding unescaped form.
300	///
301	/// Incomplete escape sequences or things that look like escape sequences
302	/// but are not (for example, `\i` or `\xYZ`) are passed through literally.
303	///
304	/// This is the dual of [`ByteSlice::escape_bytes`].
305	///
306	/// Note that the zero or NUL byte may be represented as either `\0` or
307	/// `\x00`. Both will be unescaped into the zero byte.
308	///
309	/// # Examples
310	///
311	/// This shows basic usage:
312	///
313	/// ```
314	/// # #[cfg(feature = "alloc")] {
315	/// use bstr::{B, BString, ByteVec};
316	///
317	/// assert_eq!(
318	/// BString::from(b"foo`\xFF`bar"),
319	/// Vec::unescape_bytes(r"foo\xFFbar"),
320	/// );
321	/// assert_eq!(
322	/// BString::from(b"foo`\n`bar"),
323	/// Vec::unescape_bytes(r"foo\nbar"),
324	/// );
325	/// assert_eq!(
326	/// BString::from(b"foo`\t`bar"),
327	/// Vec::unescape_bytes(r"foo\tbar"),
328	/// );
329	/// assert_eq!(
330	/// BString::from(b"foo`\\`bar"),
331	/// Vec::unescape_bytes(r"foo\\bar"),
332	/// );
333	/// assert_eq!(
334	/// BString::from("foo☃bar"),
335	/// Vec::unescape_bytes(r"foo☃bar"),
336	/// );
337	///
338	/// # }
339	/// ```
340	///
341	/// This shows some examples of how incomplete or "incorrect" escape
342	/// sequences get passed through literally.
343	///
344	/// ```
345	/// # #[cfg(feature = "alloc")] {
346	/// use bstr::{B, BString, ByteVec};
347	///
348	/// // Show some incomplete escape sequences.
349	/// assert_eq!(
350	/// BString::from(br"\"),
351	/// Vec::unescape_bytes(r"\"),
352	/// );
353	/// assert_eq!(
354	/// BString::from(br"\"),
355	/// Vec::unescape_bytes(r"\\"),
356	/// );
357	/// assert_eq!(
358	/// BString::from(br"\x"),
359	/// Vec::unescape_bytes(r"\x"),
360	/// );
361	/// assert_eq!(
362	/// BString::from(br"\xA"),
363	/// Vec::unescape_bytes(r"\xA"),
364	/// );
365	/// // And now some that kind of look like escape
366	/// // sequences, but aren't.
367	/// assert_eq!(
368	/// BString::from(br"\xZ"),
369	/// Vec::unescape_bytes(r"\xZ"),
370	/// );
371	/// assert_eq!(
372	/// BString::from(br"\xZZ"),
373	/// Vec::unescape_bytes(r"\xZZ"),
374	/// );
375	/// assert_eq!(
376	/// BString::from(br"\i"),
377	/// Vec::unescape_bytes(r"\i"),
378	/// );
379	/// assert_eq!(
380	/// BString::from(br"\u"),
381	/// Vec::unescape_bytes(r"\u"),
382	/// );
383	/// assert_eq!(
384	/// BString::from(br"\u{2603}"),
385	/// Vec::unescape_bytes(r"\u{2603}"),
386	/// );
387	///
388	/// # }
389	/// ```
390	#[inline]
391	#[cfg(feature = "alloc")]
392	fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
393	let s = escaped.as_ref();
394	crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
395	}
396
397	/// Appends the given byte to the end of this byte string.
398	///
399	/// Note that this is equivalent to the generic `Vec::push` method. This
400	/// method is provided to permit callers to explicitly differentiate
401	/// between pushing bytes, codepoints and strings.
402	///
403	/// # Examples
404	///
405	/// Basic usage:
406	///
407	/// ```
408	/// use bstr::ByteVec;
409	///
410	/// let mut s = <Vec<u8>>::from("abc");
411	/// s.push_byte(b'`\xE2`');
412	/// s.push_byte(b'`\x98`');
413	/// s.push_byte(b'`\x83`');
414	/// assert_eq!(s, "abc☃".as_bytes());
415	/// ```
416	#[inline]
417	fn push_byte(&mut self, byte: u8) {
418	self.as_vec_mut().push(byte);
419	}
420
421	/// Appends the given `char` to the end of this byte string.
422	///
423	/// # Examples
424	///
425	/// Basic usage:
426	///
427	/// ```
428	/// use bstr::ByteVec;
429	///
430	/// let mut s = <Vec<u8>>::from("abc");
431	/// s.push_char('1');
432	/// s.push_char('2');
433	/// s.push_char('3');
434	/// assert_eq!(s, "abc123".as_bytes());
435	/// ```
436	#[inline]
437	fn push_char(&mut self, ch: char) {
438	if ch.len_utf8() == `1` {
439	self.push_byte(ch as u8);
440	return;
441	}
442	self.as_vec_mut()
443	.extend_from_slice(ch.encode_utf8(&mut [`0`; `4`]).as_bytes());
444	}
445
446	/// Appends the given slice to the end of this byte string. This accepts
447	/// any type that be converted to a `&[u8]`. This includes, but is not
448	/// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
449	///
450	/// # Examples
451	///
452	/// Basic usage:
453	///
454	/// ```
455	/// use bstr::ByteVec;
456	///
457	/// let mut s = <Vec<u8>>::from("abc");
458	/// s.push_str(b"123");
459	/// assert_eq!(s, "abc123".as_bytes());
460	/// ```
461	#[inline]
462	fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
463	self.as_vec_mut().extend_from_slice(bytes.as_ref());
464	}
465
466	/// Converts a `Vec<u8>` into a `String` if and only if this byte string is
467	/// valid UTF-8.
468	///
469	/// If it is not valid UTF-8, then a
470	/// [`FromUtf8Error`](struct.FromUtf8Error.html)
471	/// is returned. (This error can be used to examine why UTF-8 validation
472	/// failed, or to regain the original byte string.)
473	///
474	/// # Examples
475	///
476	/// Basic usage:
477	///
478	/// ```
479	/// use bstr::ByteVec;
480	///
481	/// let bytes = Vec::from("hello");
482	/// let string = bytes.into_string().unwrap();
483	///
484	/// assert_eq!("hello", string);
485	/// ```
486	///
487	/// If this byte string is not valid UTF-8, then an error will be returned.
488	/// That error can then be used to inspect the location at which invalid
489	/// UTF-8 was found, or to regain the original byte string:
490	///
491	/// ```
492	/// use bstr::{B, ByteVec};
493	///
494	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
495	/// let err = bytes.into_string().unwrap_err();
496	///
497	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
498	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
499	///
500	/// // At no point in this example is an allocation performed.
501	/// let bytes = Vec::from(err.into_vec());
502	/// assert_eq!(bytes, B(b"foo`\xFF`bar"));
503	/// ```
504	#[inline]
505	fn into_string(self) -> Result<String, FromUtf8Error>
506	where
507	Self: Sized,
508	{
509	match utf8::validate(self.as_vec()) {
510	Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
511	Ok(()) => {
512	// SAFETY: This is safe because of the guarantees provided by
513	// utf8::validate.
514	unsafe { Ok(self.into_string_unchecked()) }
515	}
516	}
517	}
518
519	/// Lossily converts a `Vec<u8>` into a `String`. If this byte string
520	/// contains invalid UTF-8, then the invalid bytes are replaced with the
521	/// Unicode replacement codepoint.
522	///
523	/// # Examples
524	///
525	/// Basic usage:
526	///
527	/// ```
528	/// use bstr::ByteVec;
529	///
530	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
531	/// let string = bytes.into_string_lossy();
532	/// assert_eq!(string, "foo`\u{FFFD}`bar");
533	/// ```
534	#[inline]
535	fn into_string_lossy(self) -> String
536	where
537	Self: Sized,
538	{
539	match self.as_vec().to_str_lossy() {
540	Cow::Borrowed(_) => {
541	// SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
542	// the entire string is valid utf8.
543	unsafe { self.into_string_unchecked() }
544	}
545	Cow::Owned(s) => s,
546	}
547	}
548
549	/// Unsafely convert this byte string into a `String`, without checking for
550	/// valid UTF-8.
551	///
552	/// # Safety
553	///
554	/// Callers must* ensure that this byte string is valid UTF-8 before*
555	/// calling this method. Converting a byte string into a `String` that is
556	/// not valid UTF-8 is considered undefined behavior.
557	///
558	/// This routine is useful in performance sensitive contexts where the
559	/// UTF-8 validity of the byte string is already known and it is
560	/// undesirable to pay the cost of an additional UTF-8 validation check
561	/// that [`into_string`](#method.into_string) performs.
562	///
563	/// # Examples
564	///
565	/// Basic usage:
566	///
567	/// ```
568	/// use bstr::ByteVec;
569	///
570	/// // SAFETY: This is safe because string literals are guaranteed to be
571	/// // valid UTF-8 by the Rust compiler.
572	/// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
573	/// assert_eq!("☃βツ", s);
574	/// ```
575	#[inline]
576	unsafe fn into_string_unchecked(self) -> String
577	where
578	Self: Sized,
579	{
580	String::from_utf8_unchecked(self.into_vec())
581	}
582
583	/// Converts this byte string into an OS string, in place.
584	///
585	/// When OS strings can be constructed from arbitrary byte sequences, this
586	/// always succeeds and is zero cost. Otherwise, if this byte string is not
587	/// valid UTF-8, then an error (with the original byte string) is returned.
588	///
589	/// # Examples
590	///
591	/// Basic usage:
592	///
593	/// ```
594	/// use std::ffi::OsStr;
595	///
596	/// use bstr::ByteVec;
597	///
598	/// let bs = Vec::from("foo");
599	/// let os_str = bs.into_os_string().expect("should be valid UTF-8");
600	/// assert_eq!(os_str, OsStr::new("foo"));
601	/// ```
602	#[cfg(feature = "std")]
603	#[inline]
604	fn into_os_string(self) -> Result<OsString, FromUtf8Error>
605	where
606	Self: Sized,
607	{
608	#[cfg(unix)]
609	#[inline]
610	fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
611	use std::os::unix::ffi::OsStringExt;
612
613	Ok(OsString::from_vec(v))
614	}
615
616	#[cfg(not(unix))]
617	#[inline]
618	fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
619	v.into_string().map(OsString::from)
620	}
621
622	imp(self.into_vec())
623	}
624
625	/// Lossily converts this byte string into an OS string, in place.
626	///
627	/// When OS strings can be constructed from arbitrary byte sequences, this
628	/// is zero cost and always returns a slice. Otherwise, this will perform a
629	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
630	/// the Unicode replacement codepoint.
631	///
632	/// Note that this can prevent the correct roundtripping of file paths when
633	/// the representation of `OsString` is opaque.
634	///
635	/// # Examples
636	///
637	/// Basic usage:
638	///
639	/// ```
640	/// use bstr::ByteVec;
641	///
642	/// let bs = Vec::from_slice(b"foo`\xFF`bar");
643	/// let os_str = bs.into_os_string_lossy();
644	/// assert_eq!(os_str.to_string_lossy(), "foo`\u{FFFD}`bar");
645	/// ```
646	#[inline]
647	#[cfg(feature = "std")]
648	fn into_os_string_lossy(self) -> OsString
649	where
650	Self: Sized,
651	{
652	#[cfg(unix)]
653	#[inline]
654	fn imp(v: Vec<u8>) -> OsString {
655	use std::os::unix::ffi::OsStringExt;
656
657	OsString::from_vec(v)
658	}
659
660	#[cfg(not(unix))]
661	#[inline]
662	fn imp(v: Vec<u8>) -> OsString {
663	OsString::from(v.into_string_lossy())
664	}
665
666	imp(self.into_vec())
667	}
668
669	/// Converts this byte string into an owned file path, in place.
670	///
671	/// When paths can be constructed from arbitrary byte sequences, this
672	/// always succeeds and is zero cost. Otherwise, if this byte string is not
673	/// valid UTF-8, then an error (with the original byte string) is returned.
674	///
675	/// # Examples
676	///
677	/// Basic usage:
678	///
679	/// ```
680	/// use bstr::ByteVec;
681	///
682	/// let bs = Vec::from("foo");
683	/// let path = bs.into_path_buf().expect("should be valid UTF-8");
684	/// assert_eq!(path.as_os_str(), "foo");
685	/// ```
686	#[cfg(feature = "std")]
687	#[inline]
688	fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
689	where
690	Self: Sized,
691	{
692	self.into_os_string().map(PathBuf::from)
693	}
694
695	/// Lossily converts this byte string into an owned file path, in place.
696	///
697	/// When paths can be constructed from arbitrary byte sequences, this is
698	/// zero cost and always returns a slice. Otherwise, this will perform a
699	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
700	/// the Unicode replacement codepoint.
701	///
702	/// Note that this can prevent the correct roundtripping of file paths when
703	/// the representation of `PathBuf` is opaque.
704	///
705	/// # Examples
706	///
707	/// Basic usage:
708	///
709	/// ```
710	/// use bstr::ByteVec;
711	///
712	/// let bs = Vec::from_slice(b"foo`\xFF`bar");
713	/// let path = bs.into_path_buf_lossy();
714	/// assert_eq!(path.to_string_lossy(), "foo`\u{FFFD}`bar");
715	/// ```
716	#[inline]
717	#[cfg(feature = "std")]
718	fn into_path_buf_lossy(self) -> PathBuf
719	where
720	Self: Sized,
721	{
722	PathBuf::from(self.into_os_string_lossy())
723	}
724
725	/// Removes the last byte from this `Vec<u8>` and returns it.
726	///
727	/// If this byte string is empty, then `None` is returned.
728	///
729	/// If the last codepoint in this byte string is not ASCII, then removing
730	/// the last byte could make this byte string contain invalid UTF-8.
731	///
732	/// Note that this is equivalent to the generic `Vec::pop` method. This
733	/// method is provided to permit callers to explicitly differentiate
734	/// between popping bytes and codepoints.
735	///
736	/// # Examples
737	///
738	/// Basic usage:
739	///
740	/// ```
741	/// use bstr::ByteVec;
742	///
743	/// let mut s = Vec::from("foo");
744	/// assert_eq!(s.pop_byte(), Some(b'o'));
745	/// assert_eq!(s.pop_byte(), Some(b'o'));
746	/// assert_eq!(s.pop_byte(), Some(b'f'));
747	/// assert_eq!(s.pop_byte(), None);
748	/// ```
749	#[inline]
750	fn pop_byte(&mut self) -> Option<u8> {
751	self.as_vec_mut().pop()
752	}
753
754	/// Removes the last codepoint from this `Vec<u8>` and returns it.
755	///
756	/// If this byte string is empty, then `None` is returned. If the last
757	/// bytes of this byte string do not correspond to a valid UTF-8 code unit
758	/// sequence, then the Unicode replacement codepoint is yielded instead in
759	/// accordance with the
760	/// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
761	///
762	/// # Examples
763	///
764	/// Basic usage:
765	///
766	/// ```
767	/// use bstr::ByteVec;
768	///
769	/// let mut s = Vec::from("foo");
770	/// assert_eq!(s.pop_char(), Some('o'));
771	/// assert_eq!(s.pop_char(), Some('o'));
772	/// assert_eq!(s.pop_char(), Some('f'));
773	/// assert_eq!(s.pop_char(), None);
774	/// ```
775	///
776	/// This shows the replacement codepoint substitution policy. Note that
777	/// the first pop yields a replacement codepoint but actually removes two
778	/// bytes. This is in contrast with subsequent pops when encountering
779	/// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
780	/// code unit sequence.
781	///
782	/// ```
783	/// use bstr::ByteVec;
784	///
785	/// let mut s = Vec::from_slice(b"f`\xFF\xFF\xFF`oo`\xE2\x98`");
786	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
787	/// assert_eq!(s.pop_char(), Some('o'));
788	/// assert_eq!(s.pop_char(), Some('o'));
789	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
790	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
791	/// assert_eq!(s.pop_char(), Some('`\u{FFFD}`'));
792	/// assert_eq!(s.pop_char(), Some('f'));
793	/// assert_eq!(s.pop_char(), None);
794	/// ```
795	#[inline]
796	fn pop_char(&mut self) -> Option<char> {
797	let (ch, size) = utf8::decode_last_lossy(self.as_vec());
798	if size == `0` {
799	return None;
800	}
801	let new_len = self.as_vec().len() - size;
802	self.as_vec_mut().truncate(new_len);
803	Some(ch)
804	}
805
806	/// Removes a `char` from this `Vec<u8>` at the given byte position and
807	/// returns it.
808	///
809	/// If the bytes at the given position do not lead to a valid UTF-8 code
810	/// unit sequence, then a
811	/// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
812	///
813	/// # Panics
814	///
815	/// Panics if `at` is larger than or equal to this byte string's length.
816	///
817	/// # Examples
818	///
819	/// Basic usage:
820	///
821	/// ```
822	/// use bstr::ByteVec;
823	///
824	/// let mut s = Vec::from("foo☃bar");
825	/// assert_eq!(s.remove_char(`3`), '☃');
826	/// assert_eq!(s, b"foobar");
827	/// ```
828	///
829	/// This example shows how the Unicode replacement codepoint policy is
830	/// used:
831	///
832	/// ```
833	/// use bstr::ByteVec;
834	///
835	/// let mut s = Vec::from_slice(b"foo`\xFF`bar");
836	/// assert_eq!(s.remove_char(`3`), '`\u{FFFD}`');
837	/// assert_eq!(s, b"foobar");
838	/// ```
839	#[inline]
840	fn remove_char(&mut self, at: usize) -> char {
841	let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
842	assert!(
843	size > `0`,
844	"expected {} to be less than {}",
845	at,
846	self.as_vec().len(),
847	);
848	self.as_vec_mut().drain(at..at + size);
849	ch
850	}
851
852	/// Inserts the given codepoint into this `Vec<u8>` at a particular byte
853	/// position.
854	///
855	/// This is an `O(n)` operation as it may copy a number of elements in this
856	/// byte string proportional to its length.
857	///
858	/// # Panics
859	///
860	/// Panics if `at` is larger than the byte string's length.
861	///
862	/// # Examples
863	///
864	/// Basic usage:
865	///
866	/// ```
867	/// use bstr::ByteVec;
868	///
869	/// let mut s = Vec::from("foobar");
870	/// s.insert_char(`3`, '☃');
871	/// assert_eq!(s, "foo☃bar".as_bytes());
872	/// ```
873	#[inline]
874	fn insert_char(&mut self, at: usize, ch: char) {
875	self.insert_str(at, ch.encode_utf8(&mut [`0`; `4`]).as_bytes());
876	}
877
878	/// Inserts the given byte string into this byte string at a particular
879	/// byte position.
880	///
881	/// This is an `O(n)` operation as it may copy a number of elements in this
882	/// byte string proportional to its length.
883	///
884	/// The given byte string may be any type that can be cheaply converted
885	/// into a `&[u8]`. This includes, but is not limited to, `&str` and
886	/// `&[u8]`.
887	///
888	/// # Panics
889	///
890	/// Panics if `at` is larger than the byte string's length.
891	///
892	/// # Examples
893	///
894	/// Basic usage:
895	///
896	/// ```
897	/// use bstr::ByteVec;
898	///
899	/// let mut s = Vec::from("foobar");
900	/// s.insert_str(`3`, "☃☃☃");
901	/// assert_eq!(s, "foo☃☃☃bar".as_bytes());
902	/// ```
903	#[inline]
904	fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
905	let bytes = bytes.as_ref();
906	let len = self.as_vec().len();
907	assert!(at <= len, "expected {} to be <= {}", at, len);
908
909	// SAFETY: We'd like to efficiently splice in the given bytes into
910	// this byte string. Since we are only working with `u8` elements here,
911	// we only need to consider whether our bounds are correct and whether
912	// our byte string has enough space.
913	self.as_vec_mut().reserve(bytes.len());
914	unsafe {
915	// Shift bytes after `at` over by the length of `bytes` to make
916	// room for it. This requires referencing two regions of memory
917	// that may overlap, so we use ptr::copy.
918	ptr::copy(
919	self.as_vec().as_ptr().add(at),
920	self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
921	len - at,
922	);
923	// Now copy the bytes given into the room we made above. In this
924	// case, we know that the given bytes cannot possibly overlap
925	// with this byte string since we have a mutable borrow of the
926	// latter. Thus, we can use a nonoverlapping copy.
927	ptr::copy_nonoverlapping(
928	bytes.as_ptr(),
929	self.as_vec_mut().as_mut_ptr().add(at),
930	bytes.len(),
931	);
932	self.as_vec_mut().set_len(len + bytes.len());
933	}
934	}
935
936	/// Removes the specified range in this byte string and replaces it with
937	/// the given bytes. The given bytes do not need to have the same length
938	/// as the range provided.
939	///
940	/// # Panics
941	///
942	/// Panics if the given range is invalid.
943	///
944	/// # Examples
945	///
946	/// Basic usage:
947	///
948	/// ```
949	/// use bstr::ByteVec;
950	///
951	/// let mut s = Vec::from("foobar");
952	/// s.replace_range(`2`..`4`, "xxxxx");
953	/// assert_eq!(s, "foxxxxxar".as_bytes());
954	/// ```
955	#[inline]
956	fn replace_range<R, B>(&mut self, range: R, replace_with: B)
957	where
958	R: ops::RangeBounds<usize>,
959	B: AsRef<[u8]>,
960	{
961	self.as_vec_mut().splice(range, replace_with.as_ref().iter().copied());
962	}
963
964	/// Creates a draining iterator that removes the specified range in this
965	/// `Vec<u8>` and yields each of the removed bytes.
966	///
967	/// Note that the elements specified by the given range are removed
968	/// regardless of whether the returned iterator is fully exhausted.
969	///
970	/// Also note that is is unspecified how many bytes are removed from the
971	/// `Vec<u8>` if the `DrainBytes` iterator is leaked.
972	///
973	/// # Panics
974	///
975	/// Panics if the given range is not valid.
976	///
977	/// # Examples
978	///
979	/// Basic usage:
980	///
981	/// ```
982	/// use bstr::ByteVec;
983	///
984	/// let mut s = Vec::from("foobar");
985	/// {
986	/// let mut drainer = s.drain_bytes(`2`..`4`);
987	/// assert_eq!(drainer.next(), Some(b'o'));
988	/// assert_eq!(drainer.next(), Some(b'b'));
989	/// assert_eq!(drainer.next(), None);
990	/// }
991	/// assert_eq!(s, "foar".as_bytes());
992	/// ```
993	#[inline]
994	fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
995	where
996	R: ops::RangeBounds<usize>,
997	{
998	DrainBytes { it: self.as_vec_mut().drain(range) }
999	}
1000	}
1001
1002	/// A draining byte oriented iterator for `Vec<u8>`.
1003	///
1004	/// This iterator is created by
1005	/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1006	///
1007	/// # Examples
1008	///
1009	/// Basic usage:
1010	///
1011	/// ```
1012	/// use bstr::ByteVec;
1013	///
1014	/// let mut s = Vec::from("foobar");
1015	/// {
1016	/// let mut drainer = s.drain_bytes(`2`..`4`);
1017	/// assert_eq!(drainer.next(), Some(b'o'));
1018	/// assert_eq!(drainer.next(), Some(b'b'));
1019	/// assert_eq!(drainer.next(), None);
1020	/// }
1021	/// assert_eq!(s, "foar".as_bytes());
1022	/// ```
1023	#[derive(Debug)]
1024	pub struct DrainBytes<'a> {
1025	it: vec::Drain<'a, u8>,
1026	}
1027
1028	impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1029
1030	impl<'a> Iterator for DrainBytes<'a> {
1031	type Item = u8;
1032
1033	#[inline]
1034	fn next(&mut self) -> Option<u8> {
1035	self.it.next()
1036	}
1037	}
1038
1039	impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1040	#[inline]
1041	fn next_back(&mut self) -> Option<u8> {
1042	self.it.next_back()
1043	}
1044	}
1045
1046	impl<'a> ExactSizeIterator for DrainBytes<'a> {
1047	#[inline]
1048	fn len(&self) -> usize {
1049	self.it.len()
1050	}
1051	}
1052
1053	/// An error that may occur when converting a `Vec<u8>` to a `String`.
1054	///
1055	/// This error includes the original `Vec<u8>` that failed to convert to a
1056	/// `String`. This permits callers to recover the allocation used even if it
1057	/// it not valid UTF-8.
1058	///
1059	/// # Examples
1060	///
1061	/// Basic usage:
1062	///
1063	/// ```
1064	/// use bstr::{B, ByteVec};
1065	///
1066	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1067	/// let err = bytes.into_string().unwrap_err();
1068	///
1069	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
1070	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
1071	///
1072	/// // At no point in this example is an allocation performed.
1073	/// let bytes = Vec::from(err.into_vec());
1074	/// assert_eq!(bytes, B(b"foo`\xFF`bar"));
1075	/// ```
1076	#[derive(Debug, Eq, PartialEq)]
1077	pub struct FromUtf8Error {
1078	original: Vec<u8>,
1079	err: Utf8Error,
1080	}
1081
1082	impl FromUtf8Error {
1083	/// Return the original bytes as a slice that failed to convert to a
1084	/// `String`.
1085	///
1086	/// # Examples
1087	///
1088	/// Basic usage:
1089	///
1090	/// ```
1091	/// use bstr::{B, ByteVec};
1092	///
1093	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1094	/// let err = bytes.into_string().unwrap_err();
1095	///
1096	/// // At no point in this example is an allocation performed.
1097	/// assert_eq!(err.as_bytes(), B(b"foo`\xFF`bar"));
1098	/// ```
1099	#[inline]
1100	pub fn as_bytes(&self) -> &[u8] {
1101	&self.original
1102	}
1103
1104	/// Consume this error and return the original byte string that failed to
1105	/// convert to a `String`.
1106	///
1107	/// # Examples
1108	///
1109	/// Basic usage:
1110	///
1111	/// ```
1112	/// use bstr::{B, ByteVec};
1113	///
1114	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1115	/// let err = bytes.into_string().unwrap_err();
1116	/// let original = err.into_vec();
1117	///
1118	/// // At no point in this example is an allocation performed.
1119	/// assert_eq!(original, B(b"foo`\xFF`bar"));
1120	/// ```
1121	#[inline]
1122	pub fn into_vec(self) -> Vec<u8> {
1123	self.original
1124	}
1125
1126	/// Return the underlying UTF-8 error that occurred. This error provides
1127	/// information on the nature and location of the invalid UTF-8 detected.
1128	///
1129	/// # Examples
1130	///
1131	/// Basic usage:
1132	///
1133	/// ```
1134	/// use bstr::{B, ByteVec};
1135	///
1136	/// let bytes = Vec::from_slice(b"foo`\xFF`bar");
1137	/// let err = bytes.into_string().unwrap_err();
1138	///
1139	/// assert_eq!(err.utf8_error().valid_up_to(), `3`);
1140	/// assert_eq!(err.utf8_error().error_len(), Some(`1`));
1141	/// ```
1142	#[inline]
1143	pub fn utf8_error(&self) -> &Utf8Error {
1144	&self.err
1145	}
1146	}
1147
1148	#[cfg(feature = "std")]
1149	impl error::Error for FromUtf8Error {
1150	#[inline]
1151	fn description(&self) -> &str {
1152	"invalid UTF-8 vector"
1153	}
1154	}
1155
1156	impl fmt::Display for FromUtf8Error {
1157	#[inline]
1158	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1159	write!(f, "{}", self.err)
1160	}
1161	}
1162
1163	#[cfg(all(test, feature = "std"))]
1164	mod tests {
1165	use alloc::{vec, vec::Vec};
1166
1167	use crate::ext_vec::ByteVec;
1168
1169	#[test]
1170	fn insert() {
1171	let mut s = vec![];
1172	s.insert_str(`0`, "foo");
1173	assert_eq!(s, "foo".as_bytes());
1174
1175	let mut s = Vec::from("a");
1176	s.insert_str(`0`, "foo");
1177	assert_eq!(s, "fooa".as_bytes());
1178
1179	let mut s = Vec::from("a");
1180	s.insert_str(`1`, "foo");
1181	assert_eq!(s, "afoo".as_bytes());
1182
1183	let mut s = Vec::from("foobar");
1184	s.insert_str(`3`, "quux");
1185	assert_eq!(s, "fooquuxbar".as_bytes());
1186
1187	let mut s = Vec::from("foobar");
1188	s.insert_str(`3`, "x");
1189	assert_eq!(s, "fooxbar".as_bytes());
1190
1191	let mut s = Vec::from("foobar");
1192	s.insert_str(`0`, "x");
1193	assert_eq!(s, "xfoobar".as_bytes());
1194
1195	let mut s = Vec::from("foobar");
1196	s.insert_str(`6`, "x");
1197	assert_eq!(s, "foobarx".as_bytes());
1198
1199	let mut s = Vec::from("foobar");
1200	s.insert_str(`3`, "quuxbazquux");
1201	assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1202	}
1203
1204	#[test]
1205	#[should_panic]
1206	fn insert_fail1() {
1207	let mut s = vec![];
1208	s.insert_str(`1`, "foo");
1209	}
1210
1211	#[test]
1212	#[should_panic]
1213	fn insert_fail2() {
1214	let mut s = Vec::from("a");
1215	s.insert_str(`2`, "foo");
1216	}
1217
1218	#[test]
1219	#[should_panic]
1220	fn insert_fail3() {
1221	let mut s = Vec::from("foobar");
1222	s.insert_str(`7`, "foo");
1223	}
1224	}
1225