1use core::fmt;
2use core::iter;
3use core::ops;
4use core::ptr;
5
6use alloc::{borrow::Cow, string::String, vec, vec::Vec};
7
8#[cfg(feature = "std")]
9use std::{
10 error,
11 ffi::{OsStr, OsString},
12 path::{Path, PathBuf},
13};
14
15use crate::{
16 ext_slice::ByteSlice,
17 utf8::{self, Utf8Error},
18};
19
20/// Concatenate the elements given by the iterator together into a single
21/// `Vec<u8>`.
22///
23/// The elements may be any type that can be cheaply converted into an `&[u8]`.
24/// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself.
25///
26/// # Examples
27///
28/// Basic usage:
29///
30/// ```
31/// use bstr;
32///
33/// let s = bstr::concat(&["foo", "bar", "baz"]);
34/// assert_eq!(s, "foobarbaz".as_bytes());
35/// ```
36#[inline]
37pub fn concat<T, I>(elements: I) -> Vec<u8>
38where
39 T: AsRef<[u8]>,
40 I: IntoIterator<Item = T>,
41{
42 let mut dest: Vec = vec![];
43 for element: T in elements {
44 dest.push_str(bytes:element);
45 }
46 dest
47}
48
49/// Join the elements given by the iterator with the given separator into a
50/// single `Vec<u8>`.
51///
52/// Both the separator and the elements may be any type that can be cheaply
53/// converted into an `&[u8]`. This includes, but is not limited to,
54/// `&str`, `&BStr` and `&[u8]` itself.
55///
56/// # Examples
57///
58/// Basic usage:
59///
60/// ```
61/// use bstr;
62///
63/// let s = bstr::join(",", &["foo", "bar", "baz"]);
64/// assert_eq!(s, "foo,bar,baz".as_bytes());
65/// ```
66#[inline]
67pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8>
68where
69 B: AsRef<[u8]>,
70 T: AsRef<[u8]>,
71 I: IntoIterator<Item = T>,
72{
73 let mut it: ::IntoIter = elements.into_iter();
74 let mut dest: Vec = vec![];
75 match it.next() {
76 None => return dest,
77 Some(first: T) => {
78 dest.push_str(bytes:first);
79 }
80 }
81 for element: T in it {
82 dest.push_str(&separator);
83 dest.push_str(bytes:element);
84 }
85 dest
86}
87
88impl ByteVec for Vec<u8> {
89 #[inline]
90 fn as_vec(&self) -> &Vec<u8> {
91 self
92 }
93
94 #[inline]
95 fn as_vec_mut(&mut self) -> &mut Vec<u8> {
96 self
97 }
98
99 #[inline]
100 fn into_vec(self) -> Vec<u8> {
101 self
102 }
103}
104
105/// Ensure that callers cannot implement `ByteSlice` by making an
106/// umplementable trait its super trait.
107mod private {
108 pub trait Sealed {}
109}
110impl private::Sealed for Vec<u8> {}
111
112/// A trait that extends `Vec<u8>` with string oriented methods.
113///
114/// Note that when using the constructor methods, such as
115/// `ByteVec::from_slice`, one should actually call them using the concrete
116/// type. For example:
117///
118/// ```
119/// use bstr::{B, ByteVec};
120///
121/// let s = Vec::from_slice(b"abc"); // NOT ByteVec::from_slice("...")
122/// assert_eq!(s, B("abc"));
123/// ```
124///
125/// This trait is sealed and cannot be implemented outside of `bstr`.
126pub trait ByteVec: private::Sealed {
127 /// A method for accessing the raw vector bytes of this type. This is
128 /// always a no-op and callers shouldn't care about it. This only exists
129 /// for making the extension trait work.
130 #[doc(hidden)]
131 fn as_vec(&self) -> &Vec<u8>;
132
133 /// A method for accessing the raw vector bytes of this type, mutably. This
134 /// is always a no-op and callers shouldn't care about it. This only exists
135 /// for making the extension trait work.
136 #[doc(hidden)]
137 fn as_vec_mut(&mut self) -> &mut Vec<u8>;
138
139 /// A method for consuming ownership of this vector. This is always a no-op
140 /// and callers shouldn't care about it. This only exists for making the
141 /// extension trait work.
142 #[doc(hidden)]
143 fn into_vec(self) -> Vec<u8>
144 where
145 Self: Sized;
146
147 /// Create a new owned byte string from the given byte slice.
148 ///
149 /// # Examples
150 ///
151 /// Basic usage:
152 ///
153 /// ```
154 /// use bstr::{B, ByteVec};
155 ///
156 /// let s = Vec::from_slice(b"abc");
157 /// assert_eq!(s, B("abc"));
158 /// ```
159 #[inline]
160 fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> {
161 bytes.as_ref().to_vec()
162 }
163
164 /// Create a new byte string from an owned OS string.
165 ///
166 /// When the underlying bytes of OS strings are accessible, then this
167 /// always succeeds and is zero cost. Otherwise, this returns the given
168 /// `OsString` if it is not valid UTF-8.
169 ///
170 /// # Examples
171 ///
172 /// Basic usage:
173 ///
174 /// ```
175 /// use std::ffi::OsString;
176 ///
177 /// use bstr::{B, ByteVec};
178 ///
179 /// let os_str = OsString::from("foo");
180 /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8");
181 /// assert_eq!(bs, B("foo"));
182 /// ```
183 #[inline]
184 #[cfg(feature = "std")]
185 fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> {
186 #[cfg(unix)]
187 #[inline]
188 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
189 use std::os::unix::ffi::OsStringExt;
190
191 Ok(Vec::from(os_str.into_vec()))
192 }
193
194 #[cfg(not(unix))]
195 #[inline]
196 fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> {
197 os_str.into_string().map(Vec::from)
198 }
199
200 imp(os_str)
201 }
202
203 /// Lossily create a new byte string from an OS string slice.
204 ///
205 /// When the underlying bytes of OS strings are accessible, then this is
206 /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
207 /// performed and if the given OS string is not valid UTF-8, then it is
208 /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the
209 /// Unicode replacement codepoint).
210 ///
211 /// # Examples
212 ///
213 /// Basic usage:
214 ///
215 /// ```
216 /// use std::ffi::OsStr;
217 ///
218 /// use bstr::{B, ByteVec};
219 ///
220 /// let os_str = OsStr::new("foo");
221 /// let bs = Vec::from_os_str_lossy(os_str);
222 /// assert_eq!(bs, B("foo"));
223 /// ```
224 #[inline]
225 #[cfg(feature = "std")]
226 fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
227 #[cfg(unix)]
228 #[inline]
229 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
230 use std::os::unix::ffi::OsStrExt;
231
232 Cow::Borrowed(os_str.as_bytes())
233 }
234
235 #[cfg(not(unix))]
236 #[inline]
237 fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> {
238 match os_str.to_string_lossy() {
239 Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()),
240 Cow::Owned(x) => Cow::Owned(Vec::from(x)),
241 }
242 }
243
244 imp(os_str)
245 }
246
247 /// Create a new byte string from an owned file path.
248 ///
249 /// When the underlying bytes of paths are accessible, then this always
250 /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf`
251 /// if it is not valid UTF-8.
252 ///
253 /// # Examples
254 ///
255 /// Basic usage:
256 ///
257 /// ```
258 /// use std::path::PathBuf;
259 ///
260 /// use bstr::{B, ByteVec};
261 ///
262 /// let path = PathBuf::from("foo");
263 /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8");
264 /// assert_eq!(bs, B("foo"));
265 /// ```
266 #[inline]
267 #[cfg(feature = "std")]
268 fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> {
269 Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from)
270 }
271
272 /// Lossily create a new byte string from a file path.
273 ///
274 /// When the underlying bytes of paths are accessible, then this is
275 /// zero cost and always returns a slice. Otherwise, a UTF-8 check is
276 /// performed and if the given path is not valid UTF-8, then it is lossily
277 /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode
278 /// replacement codepoint).
279 ///
280 /// # Examples
281 ///
282 /// Basic usage:
283 ///
284 /// ```
285 /// use std::path::Path;
286 ///
287 /// use bstr::{B, ByteVec};
288 ///
289 /// let path = Path::new("foo");
290 /// let bs = Vec::from_path_lossy(path);
291 /// assert_eq!(bs, B("foo"));
292 /// ```
293 #[inline]
294 #[cfg(feature = "std")]
295 fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> {
296 Vec::from_os_str_lossy(path.as_os_str())
297 }
298
299 /// Unescapes the given string into its raw bytes.
300 ///
301 /// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t`
302 /// and `\` and translates them into their corresponding unescaped form.
303 ///
304 /// Incomplete escape sequences or things that look like escape sequences
305 /// but are not (for example, `\i` or `\xYZ`) are passed through literally.
306 ///
307 /// This is the dual of [`ByteSlice::escape_bytes`].
308 ///
309 /// Note that the zero or NUL byte may be represented as either `\0` or
310 /// `\x00`. Both will be unescaped into the zero byte.
311 ///
312 /// # Examples
313 ///
314 /// This shows basic usage:
315 ///
316 /// ```
317 /// # #[cfg(feature = "alloc")] {
318 /// use bstr::{B, BString, ByteVec};
319 ///
320 /// assert_eq!(
321 /// BString::from(b"foo\xFFbar"),
322 /// Vec::unescape_bytes(r"foo\xFFbar"),
323 /// );
324 /// assert_eq!(
325 /// BString::from(b"foo\nbar"),
326 /// Vec::unescape_bytes(r"foo\nbar"),
327 /// );
328 /// assert_eq!(
329 /// BString::from(b"foo\tbar"),
330 /// Vec::unescape_bytes(r"foo\tbar"),
331 /// );
332 /// assert_eq!(
333 /// BString::from(b"foo\\bar"),
334 /// Vec::unescape_bytes(r"foo\\bar"),
335 /// );
336 /// assert_eq!(
337 /// BString::from("foo☃bar"),
338 /// Vec::unescape_bytes(r"foo☃bar"),
339 /// );
340 ///
341 /// # }
342 /// ```
343 ///
344 /// This shows some examples of how incomplete or "incorrect" escape
345 /// sequences get passed through literally.
346 ///
347 /// ```
348 /// # #[cfg(feature = "alloc")] {
349 /// use bstr::{B, BString, ByteVec};
350 ///
351 /// // Show some incomplete escape sequences.
352 /// assert_eq!(
353 /// BString::from(br"\"),
354 /// Vec::unescape_bytes(r"\"),
355 /// );
356 /// assert_eq!(
357 /// BString::from(br"\"),
358 /// Vec::unescape_bytes(r"\\"),
359 /// );
360 /// assert_eq!(
361 /// BString::from(br"\x"),
362 /// Vec::unescape_bytes(r"\x"),
363 /// );
364 /// assert_eq!(
365 /// BString::from(br"\xA"),
366 /// Vec::unescape_bytes(r"\xA"),
367 /// );
368 /// // And now some that kind of look like escape
369 /// // sequences, but aren't.
370 /// assert_eq!(
371 /// BString::from(br"\xZ"),
372 /// Vec::unescape_bytes(r"\xZ"),
373 /// );
374 /// assert_eq!(
375 /// BString::from(br"\xZZ"),
376 /// Vec::unescape_bytes(r"\xZZ"),
377 /// );
378 /// assert_eq!(
379 /// BString::from(br"\i"),
380 /// Vec::unescape_bytes(r"\i"),
381 /// );
382 /// assert_eq!(
383 /// BString::from(br"\u"),
384 /// Vec::unescape_bytes(r"\u"),
385 /// );
386 /// assert_eq!(
387 /// BString::from(br"\u{2603}"),
388 /// Vec::unescape_bytes(r"\u{2603}"),
389 /// );
390 ///
391 /// # }
392 /// ```
393 #[inline]
394 #[cfg(feature = "alloc")]
395 fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> {
396 let s = escaped.as_ref();
397 crate::escape_bytes::UnescapeBytes::new(s.chars()).collect()
398 }
399
400 /// Appends the given byte to the end of this byte string.
401 ///
402 /// Note that this is equivalent to the generic `Vec::push` method. This
403 /// method is provided to permit callers to explicitly differentiate
404 /// between pushing bytes, codepoints and strings.
405 ///
406 /// # Examples
407 ///
408 /// Basic usage:
409 ///
410 /// ```
411 /// use bstr::ByteVec;
412 ///
413 /// let mut s = <Vec<u8>>::from("abc");
414 /// s.push_byte(b'\xE2');
415 /// s.push_byte(b'\x98');
416 /// s.push_byte(b'\x83');
417 /// assert_eq!(s, "abc☃".as_bytes());
418 /// ```
419 #[inline]
420 fn push_byte(&mut self, byte: u8) {
421 self.as_vec_mut().push(byte);
422 }
423
424 /// Appends the given `char` to the end of this byte string.
425 ///
426 /// # Examples
427 ///
428 /// Basic usage:
429 ///
430 /// ```
431 /// use bstr::ByteVec;
432 ///
433 /// let mut s = <Vec<u8>>::from("abc");
434 /// s.push_char('1');
435 /// s.push_char('2');
436 /// s.push_char('3');
437 /// assert_eq!(s, "abc123".as_bytes());
438 /// ```
439 #[inline]
440 fn push_char(&mut self, ch: char) {
441 if ch.len_utf8() == 1 {
442 self.push_byte(ch as u8);
443 return;
444 }
445 self.as_vec_mut()
446 .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes());
447 }
448
449 /// Appends the given slice to the end of this byte string. This accepts
450 /// any type that be converted to a `&[u8]`. This includes, but is not
451 /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself.
452 ///
453 /// # Examples
454 ///
455 /// Basic usage:
456 ///
457 /// ```
458 /// use bstr::ByteVec;
459 ///
460 /// let mut s = <Vec<u8>>::from("abc");
461 /// s.push_str(b"123");
462 /// assert_eq!(s, "abc123".as_bytes());
463 /// ```
464 #[inline]
465 fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) {
466 self.as_vec_mut().extend_from_slice(bytes.as_ref());
467 }
468
469 /// Converts a `Vec<u8>` into a `String` if and only if this byte string is
470 /// valid UTF-8.
471 ///
472 /// If it is not valid UTF-8, then a
473 /// [`FromUtf8Error`](struct.FromUtf8Error.html)
474 /// is returned. (This error can be used to examine why UTF-8 validation
475 /// failed, or to regain the original byte string.)
476 ///
477 /// # Examples
478 ///
479 /// Basic usage:
480 ///
481 /// ```
482 /// use bstr::ByteVec;
483 ///
484 /// let bytes = Vec::from("hello");
485 /// let string = bytes.into_string().unwrap();
486 ///
487 /// assert_eq!("hello", string);
488 /// ```
489 ///
490 /// If this byte string is not valid UTF-8, then an error will be returned.
491 /// That error can then be used to inspect the location at which invalid
492 /// UTF-8 was found, or to regain the original byte string:
493 ///
494 /// ```
495 /// use bstr::{B, ByteVec};
496 ///
497 /// let bytes = Vec::from_slice(b"foo\xFFbar");
498 /// let err = bytes.into_string().unwrap_err();
499 ///
500 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
501 /// assert_eq!(err.utf8_error().error_len(), Some(1));
502 ///
503 /// // At no point in this example is an allocation performed.
504 /// let bytes = Vec::from(err.into_vec());
505 /// assert_eq!(bytes, B(b"foo\xFFbar"));
506 /// ```
507 #[inline]
508 fn into_string(self) -> Result<String, FromUtf8Error>
509 where
510 Self: Sized,
511 {
512 match utf8::validate(self.as_vec()) {
513 Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }),
514 Ok(()) => {
515 // SAFETY: This is safe because of the guarantees provided by
516 // utf8::validate.
517 unsafe { Ok(self.into_string_unchecked()) }
518 }
519 }
520 }
521
522 /// Lossily converts a `Vec<u8>` into a `String`. If this byte string
523 /// contains invalid UTF-8, then the invalid bytes are replaced with the
524 /// Unicode replacement codepoint.
525 ///
526 /// # Examples
527 ///
528 /// Basic usage:
529 ///
530 /// ```
531 /// use bstr::ByteVec;
532 ///
533 /// let bytes = Vec::from_slice(b"foo\xFFbar");
534 /// let string = bytes.into_string_lossy();
535 /// assert_eq!(string, "foo\u{FFFD}bar");
536 /// ```
537 #[inline]
538 fn into_string_lossy(self) -> String
539 where
540 Self: Sized,
541 {
542 match self.as_vec().to_str_lossy() {
543 Cow::Borrowed(_) => {
544 // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees
545 // the entire string is valid utf8.
546 unsafe { self.into_string_unchecked() }
547 }
548 Cow::Owned(s) => s,
549 }
550 }
551
552 /// Unsafely convert this byte string into a `String`, without checking for
553 /// valid UTF-8.
554 ///
555 /// # Safety
556 ///
557 /// Callers *must* ensure that this byte string is valid UTF-8 before
558 /// calling this method. Converting a byte string into a `String` that is
559 /// not valid UTF-8 is considered undefined behavior.
560 ///
561 /// This routine is useful in performance sensitive contexts where the
562 /// UTF-8 validity of the byte string is already known and it is
563 /// undesirable to pay the cost of an additional UTF-8 validation check
564 /// that [`into_string`](#method.into_string) performs.
565 ///
566 /// # Examples
567 ///
568 /// Basic usage:
569 ///
570 /// ```
571 /// use bstr::ByteVec;
572 ///
573 /// // SAFETY: This is safe because string literals are guaranteed to be
574 /// // valid UTF-8 by the Rust compiler.
575 /// let s = unsafe { Vec::from("☃βツ").into_string_unchecked() };
576 /// assert_eq!("☃βツ", s);
577 /// ```
578 #[inline]
579 unsafe fn into_string_unchecked(self) -> String
580 where
581 Self: Sized,
582 {
583 String::from_utf8_unchecked(self.into_vec())
584 }
585
586 /// Converts this byte string into an OS string, in place.
587 ///
588 /// When OS strings can be constructed from arbitrary byte sequences, this
589 /// always succeeds and is zero cost. Otherwise, if this byte string is not
590 /// valid UTF-8, then an error (with the original byte string) is returned.
591 ///
592 /// # Examples
593 ///
594 /// Basic usage:
595 ///
596 /// ```
597 /// use std::ffi::OsStr;
598 ///
599 /// use bstr::ByteVec;
600 ///
601 /// let bs = Vec::from("foo");
602 /// let os_str = bs.into_os_string().expect("should be valid UTF-8");
603 /// assert_eq!(os_str, OsStr::new("foo"));
604 /// ```
605 #[cfg(feature = "std")]
606 #[inline]
607 fn into_os_string(self) -> Result<OsString, FromUtf8Error>
608 where
609 Self: Sized,
610 {
611 #[cfg(unix)]
612 #[inline]
613 fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
614 use std::os::unix::ffi::OsStringExt;
615
616 Ok(OsString::from_vec(v))
617 }
618
619 #[cfg(not(unix))]
620 #[inline]
621 fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> {
622 v.into_string().map(OsString::from)
623 }
624
625 imp(self.into_vec())
626 }
627
628 /// Lossily converts this byte string into an OS string, in place.
629 ///
630 /// When OS strings can be constructed from arbitrary byte sequences, this
631 /// is zero cost and always returns a slice. Otherwise, this will perform a
632 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
633 /// the Unicode replacement codepoint.
634 ///
635 /// Note that this can prevent the correct roundtripping of file paths when
636 /// the representation of `OsString` is opaque.
637 ///
638 /// # Examples
639 ///
640 /// Basic usage:
641 ///
642 /// ```
643 /// use bstr::ByteVec;
644 ///
645 /// let bs = Vec::from_slice(b"foo\xFFbar");
646 /// let os_str = bs.into_os_string_lossy();
647 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
648 /// ```
649 #[inline]
650 #[cfg(feature = "std")]
651 fn into_os_string_lossy(self) -> OsString
652 where
653 Self: Sized,
654 {
655 #[cfg(unix)]
656 #[inline]
657 fn imp(v: Vec<u8>) -> OsString {
658 use std::os::unix::ffi::OsStringExt;
659
660 OsString::from_vec(v)
661 }
662
663 #[cfg(not(unix))]
664 #[inline]
665 fn imp(v: Vec<u8>) -> OsString {
666 OsString::from(v.into_string_lossy())
667 }
668
669 imp(self.into_vec())
670 }
671
672 /// Converts this byte string into an owned file path, in place.
673 ///
674 /// When paths can be constructed from arbitrary byte sequences, this
675 /// always succeeds and is zero cost. Otherwise, if this byte string is not
676 /// valid UTF-8, then an error (with the original byte string) is returned.
677 ///
678 /// # Examples
679 ///
680 /// Basic usage:
681 ///
682 /// ```
683 /// use bstr::ByteVec;
684 ///
685 /// let bs = Vec::from("foo");
686 /// let path = bs.into_path_buf().expect("should be valid UTF-8");
687 /// assert_eq!(path.as_os_str(), "foo");
688 /// ```
689 #[cfg(feature = "std")]
690 #[inline]
691 fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error>
692 where
693 Self: Sized,
694 {
695 self.into_os_string().map(PathBuf::from)
696 }
697
698 /// Lossily converts this byte string into an owned file path, in place.
699 ///
700 /// When paths can be constructed from arbitrary byte sequences, this is
701 /// zero cost and always returns a slice. Otherwise, this will perform a
702 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
703 /// the Unicode replacement codepoint.
704 ///
705 /// Note that this can prevent the correct roundtripping of file paths when
706 /// the representation of `PathBuf` is opaque.
707 ///
708 /// # Examples
709 ///
710 /// Basic usage:
711 ///
712 /// ```
713 /// use bstr::ByteVec;
714 ///
715 /// let bs = Vec::from_slice(b"foo\xFFbar");
716 /// let path = bs.into_path_buf_lossy();
717 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
718 /// ```
719 #[inline]
720 #[cfg(feature = "std")]
721 fn into_path_buf_lossy(self) -> PathBuf
722 where
723 Self: Sized,
724 {
725 PathBuf::from(self.into_os_string_lossy())
726 }
727
728 /// Removes the last byte from this `Vec<u8>` and returns it.
729 ///
730 /// If this byte string is empty, then `None` is returned.
731 ///
732 /// If the last codepoint in this byte string is not ASCII, then removing
733 /// the last byte could make this byte string contain invalid UTF-8.
734 ///
735 /// Note that this is equivalent to the generic `Vec::pop` method. This
736 /// method is provided to permit callers to explicitly differentiate
737 /// between popping bytes and codepoints.
738 ///
739 /// # Examples
740 ///
741 /// Basic usage:
742 ///
743 /// ```
744 /// use bstr::ByteVec;
745 ///
746 /// let mut s = Vec::from("foo");
747 /// assert_eq!(s.pop_byte(), Some(b'o'));
748 /// assert_eq!(s.pop_byte(), Some(b'o'));
749 /// assert_eq!(s.pop_byte(), Some(b'f'));
750 /// assert_eq!(s.pop_byte(), None);
751 /// ```
752 #[inline]
753 fn pop_byte(&mut self) -> Option<u8> {
754 self.as_vec_mut().pop()
755 }
756
757 /// Removes the last codepoint from this `Vec<u8>` and returns it.
758 ///
759 /// If this byte string is empty, then `None` is returned. If the last
760 /// bytes of this byte string do not correspond to a valid UTF-8 code unit
761 /// sequence, then the Unicode replacement codepoint is yielded instead in
762 /// accordance with the
763 /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8).
764 ///
765 /// # Examples
766 ///
767 /// Basic usage:
768 ///
769 /// ```
770 /// use bstr::ByteVec;
771 ///
772 /// let mut s = Vec::from("foo");
773 /// assert_eq!(s.pop_char(), Some('o'));
774 /// assert_eq!(s.pop_char(), Some('o'));
775 /// assert_eq!(s.pop_char(), Some('f'));
776 /// assert_eq!(s.pop_char(), None);
777 /// ```
778 ///
779 /// This shows the replacement codepoint substitution policy. Note that
780 /// the first pop yields a replacement codepoint but actually removes two
781 /// bytes. This is in contrast with subsequent pops when encountering
782 /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8
783 /// code unit sequence.
784 ///
785 /// ```
786 /// use bstr::ByteVec;
787 ///
788 /// let mut s = Vec::from_slice(b"f\xFF\xFF\xFFoo\xE2\x98");
789 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
790 /// assert_eq!(s.pop_char(), Some('o'));
791 /// assert_eq!(s.pop_char(), Some('o'));
792 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
793 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
794 /// assert_eq!(s.pop_char(), Some('\u{FFFD}'));
795 /// assert_eq!(s.pop_char(), Some('f'));
796 /// assert_eq!(s.pop_char(), None);
797 /// ```
798 #[inline]
799 fn pop_char(&mut self) -> Option<char> {
800 let (ch, size) = utf8::decode_last_lossy(self.as_vec());
801 if size == 0 {
802 return None;
803 }
804 let new_len = self.as_vec().len() - size;
805 self.as_vec_mut().truncate(new_len);
806 Some(ch)
807 }
808
809 /// Removes a `char` from this `Vec<u8>` at the given byte position and
810 /// returns it.
811 ///
812 /// If the bytes at the given position do not lead to a valid UTF-8 code
813 /// unit sequence, then a
814 /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8).
815 ///
816 /// # Panics
817 ///
818 /// Panics if `at` is larger than or equal to this byte string's length.
819 ///
820 /// # Examples
821 ///
822 /// Basic usage:
823 ///
824 /// ```
825 /// use bstr::ByteVec;
826 ///
827 /// let mut s = Vec::from("foo☃bar");
828 /// assert_eq!(s.remove_char(3), '☃');
829 /// assert_eq!(s, b"foobar");
830 /// ```
831 ///
832 /// This example shows how the Unicode replacement codepoint policy is
833 /// used:
834 ///
835 /// ```
836 /// use bstr::ByteVec;
837 ///
838 /// let mut s = Vec::from_slice(b"foo\xFFbar");
839 /// assert_eq!(s.remove_char(3), '\u{FFFD}');
840 /// assert_eq!(s, b"foobar");
841 /// ```
842 #[inline]
843 fn remove_char(&mut self, at: usize) -> char {
844 let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]);
845 assert!(
846 size > 0,
847 "expected {} to be less than {}",
848 at,
849 self.as_vec().len(),
850 );
851 self.as_vec_mut().drain(at..at + size);
852 ch
853 }
854
855 /// Inserts the given codepoint into this `Vec<u8>` at a particular byte
856 /// position.
857 ///
858 /// This is an `O(n)` operation as it may copy a number of elements in this
859 /// byte string proportional to its length.
860 ///
861 /// # Panics
862 ///
863 /// Panics if `at` is larger than the byte string's length.
864 ///
865 /// # Examples
866 ///
867 /// Basic usage:
868 ///
869 /// ```
870 /// use bstr::ByteVec;
871 ///
872 /// let mut s = Vec::from("foobar");
873 /// s.insert_char(3, '☃');
874 /// assert_eq!(s, "foo☃bar".as_bytes());
875 /// ```
876 #[inline]
877 fn insert_char(&mut self, at: usize, ch: char) {
878 self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes());
879 }
880
881 /// Inserts the given byte string into this byte string at a particular
882 /// byte position.
883 ///
884 /// This is an `O(n)` operation as it may copy a number of elements in this
885 /// byte string proportional to its length.
886 ///
887 /// The given byte string may be any type that can be cheaply converted
888 /// into a `&[u8]`. This includes, but is not limited to, `&str` and
889 /// `&[u8]`.
890 ///
891 /// # Panics
892 ///
893 /// Panics if `at` is larger than the byte string's length.
894 ///
895 /// # Examples
896 ///
897 /// Basic usage:
898 ///
899 /// ```
900 /// use bstr::ByteVec;
901 ///
902 /// let mut s = Vec::from("foobar");
903 /// s.insert_str(3, "☃☃☃");
904 /// assert_eq!(s, "foo☃☃☃bar".as_bytes());
905 /// ```
906 #[inline]
907 fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) {
908 let bytes = bytes.as_ref();
909 let len = self.as_vec().len();
910 assert!(at <= len, "expected {} to be <= {}", at, len);
911
912 // SAFETY: We'd like to efficiently splice in the given bytes into
913 // this byte string. Since we are only working with `u8` elements here,
914 // we only need to consider whether our bounds are correct and whether
915 // our byte string has enough space.
916 self.as_vec_mut().reserve(bytes.len());
917 unsafe {
918 // Shift bytes after `at` over by the length of `bytes` to make
919 // room for it. This requires referencing two regions of memory
920 // that may overlap, so we use ptr::copy.
921 ptr::copy(
922 self.as_vec().as_ptr().add(at),
923 self.as_vec_mut().as_mut_ptr().add(at + bytes.len()),
924 len - at,
925 );
926 // Now copy the bytes given into the room we made above. In this
927 // case, we know that the given bytes cannot possibly overlap
928 // with this byte string since we have a mutable borrow of the
929 // latter. Thus, we can use a nonoverlapping copy.
930 ptr::copy_nonoverlapping(
931 bytes.as_ptr(),
932 self.as_vec_mut().as_mut_ptr().add(at),
933 bytes.len(),
934 );
935 self.as_vec_mut().set_len(len + bytes.len());
936 }
937 }
938
939 /// Removes the specified range in this byte string and replaces it with
940 /// the given bytes. The given bytes do not need to have the same length
941 /// as the range provided.
942 ///
943 /// # Panics
944 ///
945 /// Panics if the given range is invalid.
946 ///
947 /// # Examples
948 ///
949 /// Basic usage:
950 ///
951 /// ```
952 /// use bstr::ByteVec;
953 ///
954 /// let mut s = Vec::from("foobar");
955 /// s.replace_range(2..4, "xxxxx");
956 /// assert_eq!(s, "foxxxxxar".as_bytes());
957 /// ```
958 #[inline]
959 fn replace_range<R, B>(&mut self, range: R, replace_with: B)
960 where
961 R: ops::RangeBounds<usize>,
962 B: AsRef<[u8]>,
963 {
964 self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned());
965 }
966
967 /// Creates a draining iterator that removes the specified range in this
968 /// `Vec<u8>` and yields each of the removed bytes.
969 ///
970 /// Note that the elements specified by the given range are removed
971 /// regardless of whether the returned iterator is fully exhausted.
972 ///
973 /// Also note that is is unspecified how many bytes are removed from the
974 /// `Vec<u8>` if the `DrainBytes` iterator is leaked.
975 ///
976 /// # Panics
977 ///
978 /// Panics if the given range is not valid.
979 ///
980 /// # Examples
981 ///
982 /// Basic usage:
983 ///
984 /// ```
985 /// use bstr::ByteVec;
986 ///
987 /// let mut s = Vec::from("foobar");
988 /// {
989 /// let mut drainer = s.drain_bytes(2..4);
990 /// assert_eq!(drainer.next(), Some(b'o'));
991 /// assert_eq!(drainer.next(), Some(b'b'));
992 /// assert_eq!(drainer.next(), None);
993 /// }
994 /// assert_eq!(s, "foar".as_bytes());
995 /// ```
996 #[inline]
997 fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_>
998 where
999 R: ops::RangeBounds<usize>,
1000 {
1001 DrainBytes { it: self.as_vec_mut().drain(range) }
1002 }
1003}
1004
1005/// A draining byte oriented iterator for `Vec<u8>`.
1006///
1007/// This iterator is created by
1008/// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes).
1009///
1010/// # Examples
1011///
1012/// Basic usage:
1013///
1014/// ```
1015/// use bstr::ByteVec;
1016///
1017/// let mut s = Vec::from("foobar");
1018/// {
1019/// let mut drainer = s.drain_bytes(2..4);
1020/// assert_eq!(drainer.next(), Some(b'o'));
1021/// assert_eq!(drainer.next(), Some(b'b'));
1022/// assert_eq!(drainer.next(), None);
1023/// }
1024/// assert_eq!(s, "foar".as_bytes());
1025/// ```
1026#[derive(Debug)]
1027pub struct DrainBytes<'a> {
1028 it: vec::Drain<'a, u8>,
1029}
1030
1031impl<'a> iter::FusedIterator for DrainBytes<'a> {}
1032
1033impl<'a> Iterator for DrainBytes<'a> {
1034 type Item = u8;
1035
1036 #[inline]
1037 fn next(&mut self) -> Option<u8> {
1038 self.it.next()
1039 }
1040}
1041
1042impl<'a> DoubleEndedIterator for DrainBytes<'a> {
1043 #[inline]
1044 fn next_back(&mut self) -> Option<u8> {
1045 self.it.next_back()
1046 }
1047}
1048
1049impl<'a> ExactSizeIterator for DrainBytes<'a> {
1050 #[inline]
1051 fn len(&self) -> usize {
1052 self.it.len()
1053 }
1054}
1055
1056/// An error that may occur when converting a `Vec<u8>` to a `String`.
1057///
1058/// This error includes the original `Vec<u8>` that failed to convert to a
1059/// `String`. This permits callers to recover the allocation used even if it
1060/// it not valid UTF-8.
1061///
1062/// # Examples
1063///
1064/// Basic usage:
1065///
1066/// ```
1067/// use bstr::{B, ByteVec};
1068///
1069/// let bytes = Vec::from_slice(b"foo\xFFbar");
1070/// let err = bytes.into_string().unwrap_err();
1071///
1072/// assert_eq!(err.utf8_error().valid_up_to(), 3);
1073/// assert_eq!(err.utf8_error().error_len(), Some(1));
1074///
1075/// // At no point in this example is an allocation performed.
1076/// let bytes = Vec::from(err.into_vec());
1077/// assert_eq!(bytes, B(b"foo\xFFbar"));
1078/// ```
1079#[derive(Debug, Eq, PartialEq)]
1080pub struct FromUtf8Error {
1081 original: Vec<u8>,
1082 err: Utf8Error,
1083}
1084
1085impl FromUtf8Error {
1086 /// Return the original bytes as a slice that failed to convert to a
1087 /// `String`.
1088 ///
1089 /// # Examples
1090 ///
1091 /// Basic usage:
1092 ///
1093 /// ```
1094 /// use bstr::{B, ByteVec};
1095 ///
1096 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1097 /// let err = bytes.into_string().unwrap_err();
1098 ///
1099 /// // At no point in this example is an allocation performed.
1100 /// assert_eq!(err.as_bytes(), B(b"foo\xFFbar"));
1101 /// ```
1102 #[inline]
1103 pub fn as_bytes(&self) -> &[u8] {
1104 &self.original
1105 }
1106
1107 /// Consume this error and return the original byte string that failed to
1108 /// convert to a `String`.
1109 ///
1110 /// # Examples
1111 ///
1112 /// Basic usage:
1113 ///
1114 /// ```
1115 /// use bstr::{B, ByteVec};
1116 ///
1117 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1118 /// let err = bytes.into_string().unwrap_err();
1119 /// let original = err.into_vec();
1120 ///
1121 /// // At no point in this example is an allocation performed.
1122 /// assert_eq!(original, B(b"foo\xFFbar"));
1123 /// ```
1124 #[inline]
1125 pub fn into_vec(self) -> Vec<u8> {
1126 self.original
1127 }
1128
1129 /// Return the underlying UTF-8 error that occurred. This error provides
1130 /// information on the nature and location of the invalid UTF-8 detected.
1131 ///
1132 /// # Examples
1133 ///
1134 /// Basic usage:
1135 ///
1136 /// ```
1137 /// use bstr::{B, ByteVec};
1138 ///
1139 /// let bytes = Vec::from_slice(b"foo\xFFbar");
1140 /// let err = bytes.into_string().unwrap_err();
1141 ///
1142 /// assert_eq!(err.utf8_error().valid_up_to(), 3);
1143 /// assert_eq!(err.utf8_error().error_len(), Some(1));
1144 /// ```
1145 #[inline]
1146 pub fn utf8_error(&self) -> &Utf8Error {
1147 &self.err
1148 }
1149}
1150
1151#[cfg(feature = "std")]
1152impl error::Error for FromUtf8Error {
1153 #[inline]
1154 fn description(&self) -> &str {
1155 "invalid UTF-8 vector"
1156 }
1157}
1158
1159impl fmt::Display for FromUtf8Error {
1160 #[inline]
1161 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1162 write!(f, "{}", self.err)
1163 }
1164}
1165
1166#[cfg(all(test, feature = "std"))]
1167mod tests {
1168 use crate::ext_vec::ByteVec;
1169
1170 #[test]
1171 fn insert() {
1172 let mut s = vec![];
1173 s.insert_str(0, "foo");
1174 assert_eq!(s, "foo".as_bytes());
1175
1176 let mut s = Vec::from("a");
1177 s.insert_str(0, "foo");
1178 assert_eq!(s, "fooa".as_bytes());
1179
1180 let mut s = Vec::from("a");
1181 s.insert_str(1, "foo");
1182 assert_eq!(s, "afoo".as_bytes());
1183
1184 let mut s = Vec::from("foobar");
1185 s.insert_str(3, "quux");
1186 assert_eq!(s, "fooquuxbar".as_bytes());
1187
1188 let mut s = Vec::from("foobar");
1189 s.insert_str(3, "x");
1190 assert_eq!(s, "fooxbar".as_bytes());
1191
1192 let mut s = Vec::from("foobar");
1193 s.insert_str(0, "x");
1194 assert_eq!(s, "xfoobar".as_bytes());
1195
1196 let mut s = Vec::from("foobar");
1197 s.insert_str(6, "x");
1198 assert_eq!(s, "foobarx".as_bytes());
1199
1200 let mut s = Vec::from("foobar");
1201 s.insert_str(3, "quuxbazquux");
1202 assert_eq!(s, "fooquuxbazquuxbar".as_bytes());
1203 }
1204
1205 #[test]
1206 #[should_panic]
1207 fn insert_fail1() {
1208 let mut s = vec![];
1209 s.insert_str(1, "foo");
1210 }
1211
1212 #[test]
1213 #[should_panic]
1214 fn insert_fail2() {
1215 let mut s = Vec::from("a");
1216 s.insert_str(2, "foo");
1217 }
1218
1219 #[test]
1220 #[should_panic]
1221 fn insert_fail3() {
1222 let mut s = Vec::from("foobar");
1223 s.insert_str(7, "foo");
1224 }
1225}
1226