1 | use core::fmt; |
2 | use core::iter; |
3 | use core::ops; |
4 | use core::ptr; |
5 | |
6 | use alloc::{borrow::Cow, string::String, vec, vec::Vec}; |
7 | |
8 | #[cfg (feature = "std" )] |
9 | use std::{ |
10 | error, |
11 | ffi::{OsStr, OsString}, |
12 | path::{Path, PathBuf}, |
13 | }; |
14 | |
15 | use crate::{ |
16 | ext_slice::ByteSlice, |
17 | utf8::{self, Utf8Error}, |
18 | }; |
19 | |
20 | /// Concatenate the elements given by the iterator together into a single |
21 | /// `Vec<u8>`. |
22 | /// |
23 | /// The elements may be any type that can be cheaply converted into an `&[u8]`. |
24 | /// This includes, but is not limited to, `&str`, `&BStr` and `&[u8]` itself. |
25 | /// |
26 | /// # Examples |
27 | /// |
28 | /// Basic usage: |
29 | /// |
30 | /// ``` |
31 | /// use bstr; |
32 | /// |
33 | /// let s = bstr::concat(&["foo" , "bar" , "baz" ]); |
34 | /// assert_eq!(s, "foobarbaz" .as_bytes()); |
35 | /// ``` |
36 | #[inline ] |
37 | pub fn concat<T, I>(elements: I) -> Vec<u8> |
38 | where |
39 | T: AsRef<[u8]>, |
40 | I: IntoIterator<Item = T>, |
41 | { |
42 | let mut dest: Vec = vec![]; |
43 | for element: T in elements { |
44 | dest.push_str(bytes:element); |
45 | } |
46 | dest |
47 | } |
48 | |
49 | /// Join the elements given by the iterator with the given separator into a |
50 | /// single `Vec<u8>`. |
51 | /// |
52 | /// Both the separator and the elements may be any type that can be cheaply |
53 | /// converted into an `&[u8]`. This includes, but is not limited to, |
54 | /// `&str`, `&BStr` and `&[u8]` itself. |
55 | /// |
56 | /// # Examples |
57 | /// |
58 | /// Basic usage: |
59 | /// |
60 | /// ``` |
61 | /// use bstr; |
62 | /// |
63 | /// let s = bstr::join("," , &["foo" , "bar" , "baz" ]); |
64 | /// assert_eq!(s, "foo,bar,baz" .as_bytes()); |
65 | /// ``` |
66 | #[inline ] |
67 | pub fn join<B, T, I>(separator: B, elements: I) -> Vec<u8> |
68 | where |
69 | B: AsRef<[u8]>, |
70 | T: AsRef<[u8]>, |
71 | I: IntoIterator<Item = T>, |
72 | { |
73 | let mut it: ::IntoIter = elements.into_iter(); |
74 | let mut dest: Vec = vec![]; |
75 | match it.next() { |
76 | None => return dest, |
77 | Some(first: T) => { |
78 | dest.push_str(bytes:first); |
79 | } |
80 | } |
81 | for element: T in it { |
82 | dest.push_str(&separator); |
83 | dest.push_str(bytes:element); |
84 | } |
85 | dest |
86 | } |
87 | |
88 | impl ByteVec for Vec<u8> { |
89 | #[inline ] |
90 | fn as_vec(&self) -> &Vec<u8> { |
91 | self |
92 | } |
93 | |
94 | #[inline ] |
95 | fn as_vec_mut(&mut self) -> &mut Vec<u8> { |
96 | self |
97 | } |
98 | |
99 | #[inline ] |
100 | fn into_vec(self) -> Vec<u8> { |
101 | self |
102 | } |
103 | } |
104 | |
105 | /// Ensure that callers cannot implement `ByteSlice` by making an |
106 | /// umplementable trait its super trait. |
107 | mod private { |
108 | pub trait Sealed {} |
109 | } |
110 | impl private::Sealed for Vec<u8> {} |
111 | |
112 | /// A trait that extends `Vec<u8>` with string oriented methods. |
113 | /// |
114 | /// Note that when using the constructor methods, such as |
115 | /// `ByteVec::from_slice`, one should actually call them using the concrete |
116 | /// type. For example: |
117 | /// |
118 | /// ``` |
119 | /// use bstr::{B, ByteVec}; |
120 | /// |
121 | /// let s = Vec::from_slice(b"abc" ); // NOT ByteVec::from_slice("...") |
122 | /// assert_eq!(s, B("abc" )); |
123 | /// ``` |
124 | /// |
125 | /// This trait is sealed and cannot be implemented outside of `bstr`. |
126 | pub trait ByteVec: private::Sealed { |
127 | /// A method for accessing the raw vector bytes of this type. This is |
128 | /// always a no-op and callers shouldn't care about it. This only exists |
129 | /// for making the extension trait work. |
130 | #[doc (hidden)] |
131 | fn as_vec(&self) -> &Vec<u8>; |
132 | |
133 | /// A method for accessing the raw vector bytes of this type, mutably. This |
134 | /// is always a no-op and callers shouldn't care about it. This only exists |
135 | /// for making the extension trait work. |
136 | #[doc (hidden)] |
137 | fn as_vec_mut(&mut self) -> &mut Vec<u8>; |
138 | |
139 | /// A method for consuming ownership of this vector. This is always a no-op |
140 | /// and callers shouldn't care about it. This only exists for making the |
141 | /// extension trait work. |
142 | #[doc (hidden)] |
143 | fn into_vec(self) -> Vec<u8> |
144 | where |
145 | Self: Sized; |
146 | |
147 | /// Create a new owned byte string from the given byte slice. |
148 | /// |
149 | /// # Examples |
150 | /// |
151 | /// Basic usage: |
152 | /// |
153 | /// ``` |
154 | /// use bstr::{B, ByteVec}; |
155 | /// |
156 | /// let s = Vec::from_slice(b"abc" ); |
157 | /// assert_eq!(s, B("abc" )); |
158 | /// ``` |
159 | #[inline ] |
160 | fn from_slice<B: AsRef<[u8]>>(bytes: B) -> Vec<u8> { |
161 | bytes.as_ref().to_vec() |
162 | } |
163 | |
164 | /// Create a new byte string from an owned OS string. |
165 | /// |
166 | /// When the underlying bytes of OS strings are accessible, then this |
167 | /// always succeeds and is zero cost. Otherwise, this returns the given |
168 | /// `OsString` if it is not valid UTF-8. |
169 | /// |
170 | /// # Examples |
171 | /// |
172 | /// Basic usage: |
173 | /// |
174 | /// ``` |
175 | /// use std::ffi::OsString; |
176 | /// |
177 | /// use bstr::{B, ByteVec}; |
178 | /// |
179 | /// let os_str = OsString::from("foo" ); |
180 | /// let bs = Vec::from_os_string(os_str).expect("valid UTF-8" ); |
181 | /// assert_eq!(bs, B("foo" )); |
182 | /// ``` |
183 | #[inline ] |
184 | #[cfg (feature = "std" )] |
185 | fn from_os_string(os_str: OsString) -> Result<Vec<u8>, OsString> { |
186 | #[cfg (unix)] |
187 | #[inline ] |
188 | fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> { |
189 | use std::os::unix::ffi::OsStringExt; |
190 | |
191 | Ok(Vec::from(os_str.into_vec())) |
192 | } |
193 | |
194 | #[cfg (not(unix))] |
195 | #[inline ] |
196 | fn imp(os_str: OsString) -> Result<Vec<u8>, OsString> { |
197 | os_str.into_string().map(Vec::from) |
198 | } |
199 | |
200 | imp(os_str) |
201 | } |
202 | |
203 | /// Lossily create a new byte string from an OS string slice. |
204 | /// |
205 | /// When the underlying bytes of OS strings are accessible, then this is |
206 | /// zero cost and always returns a slice. Otherwise, a UTF-8 check is |
207 | /// performed and if the given OS string is not valid UTF-8, then it is |
208 | /// lossily decoded into valid UTF-8 (with invalid bytes replaced by the |
209 | /// Unicode replacement codepoint). |
210 | /// |
211 | /// # Examples |
212 | /// |
213 | /// Basic usage: |
214 | /// |
215 | /// ``` |
216 | /// use std::ffi::OsStr; |
217 | /// |
218 | /// use bstr::{B, ByteVec}; |
219 | /// |
220 | /// let os_str = OsStr::new("foo" ); |
221 | /// let bs = Vec::from_os_str_lossy(os_str); |
222 | /// assert_eq!(bs, B("foo" )); |
223 | /// ``` |
224 | #[inline ] |
225 | #[cfg (feature = "std" )] |
226 | fn from_os_str_lossy<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { |
227 | #[cfg (unix)] |
228 | #[inline ] |
229 | fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { |
230 | use std::os::unix::ffi::OsStrExt; |
231 | |
232 | Cow::Borrowed(os_str.as_bytes()) |
233 | } |
234 | |
235 | #[cfg (not(unix))] |
236 | #[inline ] |
237 | fn imp<'a>(os_str: &'a OsStr) -> Cow<'a, [u8]> { |
238 | match os_str.to_string_lossy() { |
239 | Cow::Borrowed(x) => Cow::Borrowed(x.as_bytes()), |
240 | Cow::Owned(x) => Cow::Owned(Vec::from(x)), |
241 | } |
242 | } |
243 | |
244 | imp(os_str) |
245 | } |
246 | |
247 | /// Create a new byte string from an owned file path. |
248 | /// |
249 | /// When the underlying bytes of paths are accessible, then this always |
250 | /// succeeds and is zero cost. Otherwise, this returns the given `PathBuf` |
251 | /// if it is not valid UTF-8. |
252 | /// |
253 | /// # Examples |
254 | /// |
255 | /// Basic usage: |
256 | /// |
257 | /// ``` |
258 | /// use std::path::PathBuf; |
259 | /// |
260 | /// use bstr::{B, ByteVec}; |
261 | /// |
262 | /// let path = PathBuf::from("foo" ); |
263 | /// let bs = Vec::from_path_buf(path).expect("must be valid UTF-8" ); |
264 | /// assert_eq!(bs, B("foo" )); |
265 | /// ``` |
266 | #[inline ] |
267 | #[cfg (feature = "std" )] |
268 | fn from_path_buf(path: PathBuf) -> Result<Vec<u8>, PathBuf> { |
269 | Vec::from_os_string(path.into_os_string()).map_err(PathBuf::from) |
270 | } |
271 | |
272 | /// Lossily create a new byte string from a file path. |
273 | /// |
274 | /// When the underlying bytes of paths are accessible, then this is |
275 | /// zero cost and always returns a slice. Otherwise, a UTF-8 check is |
276 | /// performed and if the given path is not valid UTF-8, then it is lossily |
277 | /// decoded into valid UTF-8 (with invalid bytes replaced by the Unicode |
278 | /// replacement codepoint). |
279 | /// |
280 | /// # Examples |
281 | /// |
282 | /// Basic usage: |
283 | /// |
284 | /// ``` |
285 | /// use std::path::Path; |
286 | /// |
287 | /// use bstr::{B, ByteVec}; |
288 | /// |
289 | /// let path = Path::new("foo" ); |
290 | /// let bs = Vec::from_path_lossy(path); |
291 | /// assert_eq!(bs, B("foo" )); |
292 | /// ``` |
293 | #[inline ] |
294 | #[cfg (feature = "std" )] |
295 | fn from_path_lossy<'a>(path: &'a Path) -> Cow<'a, [u8]> { |
296 | Vec::from_os_str_lossy(path.as_os_str()) |
297 | } |
298 | |
299 | /// Unescapes the given string into its raw bytes. |
300 | /// |
301 | /// This looks for the escape sequences `\xNN`, `\0`, `\r`, `\n`, `\t` |
302 | /// and `\` and translates them into their corresponding unescaped form. |
303 | /// |
304 | /// Incomplete escape sequences or things that look like escape sequences |
305 | /// but are not (for example, `\i` or `\xYZ`) are passed through literally. |
306 | /// |
307 | /// This is the dual of [`ByteSlice::escape_bytes`]. |
308 | /// |
309 | /// Note that the zero or NUL byte may be represented as either `\0` or |
310 | /// `\x00`. Both will be unescaped into the zero byte. |
311 | /// |
312 | /// # Examples |
313 | /// |
314 | /// This shows basic usage: |
315 | /// |
316 | /// ``` |
317 | /// # #[cfg (feature = "alloc" )] { |
318 | /// use bstr::{B, BString, ByteVec}; |
319 | /// |
320 | /// assert_eq!( |
321 | /// BString::from(b"foo \xFFbar" ), |
322 | /// Vec::unescape_bytes(r"foo\xFFbar" ), |
323 | /// ); |
324 | /// assert_eq!( |
325 | /// BString::from(b"foo \nbar" ), |
326 | /// Vec::unescape_bytes(r"foo\nbar" ), |
327 | /// ); |
328 | /// assert_eq!( |
329 | /// BString::from(b"foo \tbar" ), |
330 | /// Vec::unescape_bytes(r"foo\tbar" ), |
331 | /// ); |
332 | /// assert_eq!( |
333 | /// BString::from(b"foo \\bar" ), |
334 | /// Vec::unescape_bytes(r"foo\\bar" ), |
335 | /// ); |
336 | /// assert_eq!( |
337 | /// BString::from("foo☃bar" ), |
338 | /// Vec::unescape_bytes(r"foo☃bar" ), |
339 | /// ); |
340 | /// |
341 | /// # } |
342 | /// ``` |
343 | /// |
344 | /// This shows some examples of how incomplete or "incorrect" escape |
345 | /// sequences get passed through literally. |
346 | /// |
347 | /// ``` |
348 | /// # #[cfg (feature = "alloc" )] { |
349 | /// use bstr::{B, BString, ByteVec}; |
350 | /// |
351 | /// // Show some incomplete escape sequences. |
352 | /// assert_eq!( |
353 | /// BString::from(br"\" ), |
354 | /// Vec::unescape_bytes(r"\" ), |
355 | /// ); |
356 | /// assert_eq!( |
357 | /// BString::from(br"\" ), |
358 | /// Vec::unescape_bytes(r"\\" ), |
359 | /// ); |
360 | /// assert_eq!( |
361 | /// BString::from(br"\x" ), |
362 | /// Vec::unescape_bytes(r"\x" ), |
363 | /// ); |
364 | /// assert_eq!( |
365 | /// BString::from(br"\xA" ), |
366 | /// Vec::unescape_bytes(r"\xA" ), |
367 | /// ); |
368 | /// // And now some that kind of look like escape |
369 | /// // sequences, but aren't. |
370 | /// assert_eq!( |
371 | /// BString::from(br"\xZ" ), |
372 | /// Vec::unescape_bytes(r"\xZ" ), |
373 | /// ); |
374 | /// assert_eq!( |
375 | /// BString::from(br"\xZZ" ), |
376 | /// Vec::unescape_bytes(r"\xZZ" ), |
377 | /// ); |
378 | /// assert_eq!( |
379 | /// BString::from(br"\i" ), |
380 | /// Vec::unescape_bytes(r"\i" ), |
381 | /// ); |
382 | /// assert_eq!( |
383 | /// BString::from(br"\u" ), |
384 | /// Vec::unescape_bytes(r"\u" ), |
385 | /// ); |
386 | /// assert_eq!( |
387 | /// BString::from(br"\u{2603}" ), |
388 | /// Vec::unescape_bytes(r"\u{2603}" ), |
389 | /// ); |
390 | /// |
391 | /// # } |
392 | /// ``` |
393 | #[inline ] |
394 | #[cfg (feature = "alloc" )] |
395 | fn unescape_bytes<S: AsRef<str>>(escaped: S) -> Vec<u8> { |
396 | let s = escaped.as_ref(); |
397 | crate::escape_bytes::UnescapeBytes::new(s.chars()).collect() |
398 | } |
399 | |
400 | /// Appends the given byte to the end of this byte string. |
401 | /// |
402 | /// Note that this is equivalent to the generic `Vec::push` method. This |
403 | /// method is provided to permit callers to explicitly differentiate |
404 | /// between pushing bytes, codepoints and strings. |
405 | /// |
406 | /// # Examples |
407 | /// |
408 | /// Basic usage: |
409 | /// |
410 | /// ``` |
411 | /// use bstr::ByteVec; |
412 | /// |
413 | /// let mut s = <Vec<u8>>::from("abc" ); |
414 | /// s.push_byte(b' \xE2' ); |
415 | /// s.push_byte(b' \x98' ); |
416 | /// s.push_byte(b' \x83' ); |
417 | /// assert_eq!(s, "abc☃" .as_bytes()); |
418 | /// ``` |
419 | #[inline ] |
420 | fn push_byte(&mut self, byte: u8) { |
421 | self.as_vec_mut().push(byte); |
422 | } |
423 | |
424 | /// Appends the given `char` to the end of this byte string. |
425 | /// |
426 | /// # Examples |
427 | /// |
428 | /// Basic usage: |
429 | /// |
430 | /// ``` |
431 | /// use bstr::ByteVec; |
432 | /// |
433 | /// let mut s = <Vec<u8>>::from("abc" ); |
434 | /// s.push_char('1' ); |
435 | /// s.push_char('2' ); |
436 | /// s.push_char('3' ); |
437 | /// assert_eq!(s, "abc123" .as_bytes()); |
438 | /// ``` |
439 | #[inline ] |
440 | fn push_char(&mut self, ch: char) { |
441 | if ch.len_utf8() == 1 { |
442 | self.push_byte(ch as u8); |
443 | return; |
444 | } |
445 | self.as_vec_mut() |
446 | .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()); |
447 | } |
448 | |
449 | /// Appends the given slice to the end of this byte string. This accepts |
450 | /// any type that be converted to a `&[u8]`. This includes, but is not |
451 | /// limited to, `&str`, `&BStr`, and of course, `&[u8]` itself. |
452 | /// |
453 | /// # Examples |
454 | /// |
455 | /// Basic usage: |
456 | /// |
457 | /// ``` |
458 | /// use bstr::ByteVec; |
459 | /// |
460 | /// let mut s = <Vec<u8>>::from("abc" ); |
461 | /// s.push_str(b"123" ); |
462 | /// assert_eq!(s, "abc123" .as_bytes()); |
463 | /// ``` |
464 | #[inline ] |
465 | fn push_str<B: AsRef<[u8]>>(&mut self, bytes: B) { |
466 | self.as_vec_mut().extend_from_slice(bytes.as_ref()); |
467 | } |
468 | |
469 | /// Converts a `Vec<u8>` into a `String` if and only if this byte string is |
470 | /// valid UTF-8. |
471 | /// |
472 | /// If it is not valid UTF-8, then a |
473 | /// [`FromUtf8Error`](struct.FromUtf8Error.html) |
474 | /// is returned. (This error can be used to examine why UTF-8 validation |
475 | /// failed, or to regain the original byte string.) |
476 | /// |
477 | /// # Examples |
478 | /// |
479 | /// Basic usage: |
480 | /// |
481 | /// ``` |
482 | /// use bstr::ByteVec; |
483 | /// |
484 | /// let bytes = Vec::from("hello" ); |
485 | /// let string = bytes.into_string().unwrap(); |
486 | /// |
487 | /// assert_eq!("hello" , string); |
488 | /// ``` |
489 | /// |
490 | /// If this byte string is not valid UTF-8, then an error will be returned. |
491 | /// That error can then be used to inspect the location at which invalid |
492 | /// UTF-8 was found, or to regain the original byte string: |
493 | /// |
494 | /// ``` |
495 | /// use bstr::{B, ByteVec}; |
496 | /// |
497 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
498 | /// let err = bytes.into_string().unwrap_err(); |
499 | /// |
500 | /// assert_eq!(err.utf8_error().valid_up_to(), 3); |
501 | /// assert_eq!(err.utf8_error().error_len(), Some(1)); |
502 | /// |
503 | /// // At no point in this example is an allocation performed. |
504 | /// let bytes = Vec::from(err.into_vec()); |
505 | /// assert_eq!(bytes, B(b"foo \xFFbar" )); |
506 | /// ``` |
507 | #[inline ] |
508 | fn into_string(self) -> Result<String, FromUtf8Error> |
509 | where |
510 | Self: Sized, |
511 | { |
512 | match utf8::validate(self.as_vec()) { |
513 | Err(err) => Err(FromUtf8Error { original: self.into_vec(), err }), |
514 | Ok(()) => { |
515 | // SAFETY: This is safe because of the guarantees provided by |
516 | // utf8::validate. |
517 | unsafe { Ok(self.into_string_unchecked()) } |
518 | } |
519 | } |
520 | } |
521 | |
522 | /// Lossily converts a `Vec<u8>` into a `String`. If this byte string |
523 | /// contains invalid UTF-8, then the invalid bytes are replaced with the |
524 | /// Unicode replacement codepoint. |
525 | /// |
526 | /// # Examples |
527 | /// |
528 | /// Basic usage: |
529 | /// |
530 | /// ``` |
531 | /// use bstr::ByteVec; |
532 | /// |
533 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
534 | /// let string = bytes.into_string_lossy(); |
535 | /// assert_eq!(string, "foo \u{FFFD}bar" ); |
536 | /// ``` |
537 | #[inline ] |
538 | fn into_string_lossy(self) -> String |
539 | where |
540 | Self: Sized, |
541 | { |
542 | match self.as_vec().to_str_lossy() { |
543 | Cow::Borrowed(_) => { |
544 | // SAFETY: to_str_lossy() returning a Cow::Borrowed guarantees |
545 | // the entire string is valid utf8. |
546 | unsafe { self.into_string_unchecked() } |
547 | } |
548 | Cow::Owned(s) => s, |
549 | } |
550 | } |
551 | |
552 | /// Unsafely convert this byte string into a `String`, without checking for |
553 | /// valid UTF-8. |
554 | /// |
555 | /// # Safety |
556 | /// |
557 | /// Callers *must* ensure that this byte string is valid UTF-8 before |
558 | /// calling this method. Converting a byte string into a `String` that is |
559 | /// not valid UTF-8 is considered undefined behavior. |
560 | /// |
561 | /// This routine is useful in performance sensitive contexts where the |
562 | /// UTF-8 validity of the byte string is already known and it is |
563 | /// undesirable to pay the cost of an additional UTF-8 validation check |
564 | /// that [`into_string`](#method.into_string) performs. |
565 | /// |
566 | /// # Examples |
567 | /// |
568 | /// Basic usage: |
569 | /// |
570 | /// ``` |
571 | /// use bstr::ByteVec; |
572 | /// |
573 | /// // SAFETY: This is safe because string literals are guaranteed to be |
574 | /// // valid UTF-8 by the Rust compiler. |
575 | /// let s = unsafe { Vec::from("☃βツ" ).into_string_unchecked() }; |
576 | /// assert_eq!("☃βツ" , s); |
577 | /// ``` |
578 | #[inline ] |
579 | unsafe fn into_string_unchecked(self) -> String |
580 | where |
581 | Self: Sized, |
582 | { |
583 | String::from_utf8_unchecked(self.into_vec()) |
584 | } |
585 | |
586 | /// Converts this byte string into an OS string, in place. |
587 | /// |
588 | /// When OS strings can be constructed from arbitrary byte sequences, this |
589 | /// always succeeds and is zero cost. Otherwise, if this byte string is not |
590 | /// valid UTF-8, then an error (with the original byte string) is returned. |
591 | /// |
592 | /// # Examples |
593 | /// |
594 | /// Basic usage: |
595 | /// |
596 | /// ``` |
597 | /// use std::ffi::OsStr; |
598 | /// |
599 | /// use bstr::ByteVec; |
600 | /// |
601 | /// let bs = Vec::from("foo" ); |
602 | /// let os_str = bs.into_os_string().expect("should be valid UTF-8" ); |
603 | /// assert_eq!(os_str, OsStr::new("foo" )); |
604 | /// ``` |
605 | #[cfg (feature = "std" )] |
606 | #[inline ] |
607 | fn into_os_string(self) -> Result<OsString, FromUtf8Error> |
608 | where |
609 | Self: Sized, |
610 | { |
611 | #[cfg (unix)] |
612 | #[inline ] |
613 | fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> { |
614 | use std::os::unix::ffi::OsStringExt; |
615 | |
616 | Ok(OsString::from_vec(v)) |
617 | } |
618 | |
619 | #[cfg (not(unix))] |
620 | #[inline ] |
621 | fn imp(v: Vec<u8>) -> Result<OsString, FromUtf8Error> { |
622 | v.into_string().map(OsString::from) |
623 | } |
624 | |
625 | imp(self.into_vec()) |
626 | } |
627 | |
628 | /// Lossily converts this byte string into an OS string, in place. |
629 | /// |
630 | /// When OS strings can be constructed from arbitrary byte sequences, this |
631 | /// is zero cost and always returns a slice. Otherwise, this will perform a |
632 | /// UTF-8 check and lossily convert this byte string into valid UTF-8 using |
633 | /// the Unicode replacement codepoint. |
634 | /// |
635 | /// Note that this can prevent the correct roundtripping of file paths when |
636 | /// the representation of `OsString` is opaque. |
637 | /// |
638 | /// # Examples |
639 | /// |
640 | /// Basic usage: |
641 | /// |
642 | /// ``` |
643 | /// use bstr::ByteVec; |
644 | /// |
645 | /// let bs = Vec::from_slice(b"foo \xFFbar" ); |
646 | /// let os_str = bs.into_os_string_lossy(); |
647 | /// assert_eq!(os_str.to_string_lossy(), "foo \u{FFFD}bar" ); |
648 | /// ``` |
649 | #[inline ] |
650 | #[cfg (feature = "std" )] |
651 | fn into_os_string_lossy(self) -> OsString |
652 | where |
653 | Self: Sized, |
654 | { |
655 | #[cfg (unix)] |
656 | #[inline ] |
657 | fn imp(v: Vec<u8>) -> OsString { |
658 | use std::os::unix::ffi::OsStringExt; |
659 | |
660 | OsString::from_vec(v) |
661 | } |
662 | |
663 | #[cfg (not(unix))] |
664 | #[inline ] |
665 | fn imp(v: Vec<u8>) -> OsString { |
666 | OsString::from(v.into_string_lossy()) |
667 | } |
668 | |
669 | imp(self.into_vec()) |
670 | } |
671 | |
672 | /// Converts this byte string into an owned file path, in place. |
673 | /// |
674 | /// When paths can be constructed from arbitrary byte sequences, this |
675 | /// always succeeds and is zero cost. Otherwise, if this byte string is not |
676 | /// valid UTF-8, then an error (with the original byte string) is returned. |
677 | /// |
678 | /// # Examples |
679 | /// |
680 | /// Basic usage: |
681 | /// |
682 | /// ``` |
683 | /// use bstr::ByteVec; |
684 | /// |
685 | /// let bs = Vec::from("foo" ); |
686 | /// let path = bs.into_path_buf().expect("should be valid UTF-8" ); |
687 | /// assert_eq!(path.as_os_str(), "foo" ); |
688 | /// ``` |
689 | #[cfg (feature = "std" )] |
690 | #[inline ] |
691 | fn into_path_buf(self) -> Result<PathBuf, FromUtf8Error> |
692 | where |
693 | Self: Sized, |
694 | { |
695 | self.into_os_string().map(PathBuf::from) |
696 | } |
697 | |
698 | /// Lossily converts this byte string into an owned file path, in place. |
699 | /// |
700 | /// When paths can be constructed from arbitrary byte sequences, this is |
701 | /// zero cost and always returns a slice. Otherwise, this will perform a |
702 | /// UTF-8 check and lossily convert this byte string into valid UTF-8 using |
703 | /// the Unicode replacement codepoint. |
704 | /// |
705 | /// Note that this can prevent the correct roundtripping of file paths when |
706 | /// the representation of `PathBuf` is opaque. |
707 | /// |
708 | /// # Examples |
709 | /// |
710 | /// Basic usage: |
711 | /// |
712 | /// ``` |
713 | /// use bstr::ByteVec; |
714 | /// |
715 | /// let bs = Vec::from_slice(b"foo \xFFbar" ); |
716 | /// let path = bs.into_path_buf_lossy(); |
717 | /// assert_eq!(path.to_string_lossy(), "foo \u{FFFD}bar" ); |
718 | /// ``` |
719 | #[inline ] |
720 | #[cfg (feature = "std" )] |
721 | fn into_path_buf_lossy(self) -> PathBuf |
722 | where |
723 | Self: Sized, |
724 | { |
725 | PathBuf::from(self.into_os_string_lossy()) |
726 | } |
727 | |
728 | /// Removes the last byte from this `Vec<u8>` and returns it. |
729 | /// |
730 | /// If this byte string is empty, then `None` is returned. |
731 | /// |
732 | /// If the last codepoint in this byte string is not ASCII, then removing |
733 | /// the last byte could make this byte string contain invalid UTF-8. |
734 | /// |
735 | /// Note that this is equivalent to the generic `Vec::pop` method. This |
736 | /// method is provided to permit callers to explicitly differentiate |
737 | /// between popping bytes and codepoints. |
738 | /// |
739 | /// # Examples |
740 | /// |
741 | /// Basic usage: |
742 | /// |
743 | /// ``` |
744 | /// use bstr::ByteVec; |
745 | /// |
746 | /// let mut s = Vec::from("foo" ); |
747 | /// assert_eq!(s.pop_byte(), Some(b'o' )); |
748 | /// assert_eq!(s.pop_byte(), Some(b'o' )); |
749 | /// assert_eq!(s.pop_byte(), Some(b'f' )); |
750 | /// assert_eq!(s.pop_byte(), None); |
751 | /// ``` |
752 | #[inline ] |
753 | fn pop_byte(&mut self) -> Option<u8> { |
754 | self.as_vec_mut().pop() |
755 | } |
756 | |
757 | /// Removes the last codepoint from this `Vec<u8>` and returns it. |
758 | /// |
759 | /// If this byte string is empty, then `None` is returned. If the last |
760 | /// bytes of this byte string do not correspond to a valid UTF-8 code unit |
761 | /// sequence, then the Unicode replacement codepoint is yielded instead in |
762 | /// accordance with the |
763 | /// [replacement codepoint substitution policy](index.html#handling-of-invalid-utf8-8). |
764 | /// |
765 | /// # Examples |
766 | /// |
767 | /// Basic usage: |
768 | /// |
769 | /// ``` |
770 | /// use bstr::ByteVec; |
771 | /// |
772 | /// let mut s = Vec::from("foo" ); |
773 | /// assert_eq!(s.pop_char(), Some('o' )); |
774 | /// assert_eq!(s.pop_char(), Some('o' )); |
775 | /// assert_eq!(s.pop_char(), Some('f' )); |
776 | /// assert_eq!(s.pop_char(), None); |
777 | /// ``` |
778 | /// |
779 | /// This shows the replacement codepoint substitution policy. Note that |
780 | /// the first pop yields a replacement codepoint but actually removes two |
781 | /// bytes. This is in contrast with subsequent pops when encountering |
782 | /// `\xFF` since `\xFF` is never a valid prefix for any valid UTF-8 |
783 | /// code unit sequence. |
784 | /// |
785 | /// ``` |
786 | /// use bstr::ByteVec; |
787 | /// |
788 | /// let mut s = Vec::from_slice(b"f \xFF\xFF\xFFoo \xE2\x98" ); |
789 | /// assert_eq!(s.pop_char(), Some(' \u{FFFD}' )); |
790 | /// assert_eq!(s.pop_char(), Some('o' )); |
791 | /// assert_eq!(s.pop_char(), Some('o' )); |
792 | /// assert_eq!(s.pop_char(), Some(' \u{FFFD}' )); |
793 | /// assert_eq!(s.pop_char(), Some(' \u{FFFD}' )); |
794 | /// assert_eq!(s.pop_char(), Some(' \u{FFFD}' )); |
795 | /// assert_eq!(s.pop_char(), Some('f' )); |
796 | /// assert_eq!(s.pop_char(), None); |
797 | /// ``` |
798 | #[inline ] |
799 | fn pop_char(&mut self) -> Option<char> { |
800 | let (ch, size) = utf8::decode_last_lossy(self.as_vec()); |
801 | if size == 0 { |
802 | return None; |
803 | } |
804 | let new_len = self.as_vec().len() - size; |
805 | self.as_vec_mut().truncate(new_len); |
806 | Some(ch) |
807 | } |
808 | |
809 | /// Removes a `char` from this `Vec<u8>` at the given byte position and |
810 | /// returns it. |
811 | /// |
812 | /// If the bytes at the given position do not lead to a valid UTF-8 code |
813 | /// unit sequence, then a |
814 | /// [replacement codepoint is returned instead](index.html#handling-of-invalid-utf8-8). |
815 | /// |
816 | /// # Panics |
817 | /// |
818 | /// Panics if `at` is larger than or equal to this byte string's length. |
819 | /// |
820 | /// # Examples |
821 | /// |
822 | /// Basic usage: |
823 | /// |
824 | /// ``` |
825 | /// use bstr::ByteVec; |
826 | /// |
827 | /// let mut s = Vec::from("foo☃bar" ); |
828 | /// assert_eq!(s.remove_char(3), '☃' ); |
829 | /// assert_eq!(s, b"foobar" ); |
830 | /// ``` |
831 | /// |
832 | /// This example shows how the Unicode replacement codepoint policy is |
833 | /// used: |
834 | /// |
835 | /// ``` |
836 | /// use bstr::ByteVec; |
837 | /// |
838 | /// let mut s = Vec::from_slice(b"foo \xFFbar" ); |
839 | /// assert_eq!(s.remove_char(3), ' \u{FFFD}' ); |
840 | /// assert_eq!(s, b"foobar" ); |
841 | /// ``` |
842 | #[inline ] |
843 | fn remove_char(&mut self, at: usize) -> char { |
844 | let (ch, size) = utf8::decode_lossy(&self.as_vec()[at..]); |
845 | assert!( |
846 | size > 0, |
847 | "expected {} to be less than {}" , |
848 | at, |
849 | self.as_vec().len(), |
850 | ); |
851 | self.as_vec_mut().drain(at..at + size); |
852 | ch |
853 | } |
854 | |
855 | /// Inserts the given codepoint into this `Vec<u8>` at a particular byte |
856 | /// position. |
857 | /// |
858 | /// This is an `O(n)` operation as it may copy a number of elements in this |
859 | /// byte string proportional to its length. |
860 | /// |
861 | /// # Panics |
862 | /// |
863 | /// Panics if `at` is larger than the byte string's length. |
864 | /// |
865 | /// # Examples |
866 | /// |
867 | /// Basic usage: |
868 | /// |
869 | /// ``` |
870 | /// use bstr::ByteVec; |
871 | /// |
872 | /// let mut s = Vec::from("foobar" ); |
873 | /// s.insert_char(3, '☃' ); |
874 | /// assert_eq!(s, "foo☃bar" .as_bytes()); |
875 | /// ``` |
876 | #[inline ] |
877 | fn insert_char(&mut self, at: usize, ch: char) { |
878 | self.insert_str(at, ch.encode_utf8(&mut [0; 4]).as_bytes()); |
879 | } |
880 | |
881 | /// Inserts the given byte string into this byte string at a particular |
882 | /// byte position. |
883 | /// |
884 | /// This is an `O(n)` operation as it may copy a number of elements in this |
885 | /// byte string proportional to its length. |
886 | /// |
887 | /// The given byte string may be any type that can be cheaply converted |
888 | /// into a `&[u8]`. This includes, but is not limited to, `&str` and |
889 | /// `&[u8]`. |
890 | /// |
891 | /// # Panics |
892 | /// |
893 | /// Panics if `at` is larger than the byte string's length. |
894 | /// |
895 | /// # Examples |
896 | /// |
897 | /// Basic usage: |
898 | /// |
899 | /// ``` |
900 | /// use bstr::ByteVec; |
901 | /// |
902 | /// let mut s = Vec::from("foobar" ); |
903 | /// s.insert_str(3, "☃☃☃" ); |
904 | /// assert_eq!(s, "foo☃☃☃bar" .as_bytes()); |
905 | /// ``` |
906 | #[inline ] |
907 | fn insert_str<B: AsRef<[u8]>>(&mut self, at: usize, bytes: B) { |
908 | let bytes = bytes.as_ref(); |
909 | let len = self.as_vec().len(); |
910 | assert!(at <= len, "expected {} to be <= {}" , at, len); |
911 | |
912 | // SAFETY: We'd like to efficiently splice in the given bytes into |
913 | // this byte string. Since we are only working with `u8` elements here, |
914 | // we only need to consider whether our bounds are correct and whether |
915 | // our byte string has enough space. |
916 | self.as_vec_mut().reserve(bytes.len()); |
917 | unsafe { |
918 | // Shift bytes after `at` over by the length of `bytes` to make |
919 | // room for it. This requires referencing two regions of memory |
920 | // that may overlap, so we use ptr::copy. |
921 | ptr::copy( |
922 | self.as_vec().as_ptr().add(at), |
923 | self.as_vec_mut().as_mut_ptr().add(at + bytes.len()), |
924 | len - at, |
925 | ); |
926 | // Now copy the bytes given into the room we made above. In this |
927 | // case, we know that the given bytes cannot possibly overlap |
928 | // with this byte string since we have a mutable borrow of the |
929 | // latter. Thus, we can use a nonoverlapping copy. |
930 | ptr::copy_nonoverlapping( |
931 | bytes.as_ptr(), |
932 | self.as_vec_mut().as_mut_ptr().add(at), |
933 | bytes.len(), |
934 | ); |
935 | self.as_vec_mut().set_len(len + bytes.len()); |
936 | } |
937 | } |
938 | |
939 | /// Removes the specified range in this byte string and replaces it with |
940 | /// the given bytes. The given bytes do not need to have the same length |
941 | /// as the range provided. |
942 | /// |
943 | /// # Panics |
944 | /// |
945 | /// Panics if the given range is invalid. |
946 | /// |
947 | /// # Examples |
948 | /// |
949 | /// Basic usage: |
950 | /// |
951 | /// ``` |
952 | /// use bstr::ByteVec; |
953 | /// |
954 | /// let mut s = Vec::from("foobar" ); |
955 | /// s.replace_range(2..4, "xxxxx" ); |
956 | /// assert_eq!(s, "foxxxxxar" .as_bytes()); |
957 | /// ``` |
958 | #[inline ] |
959 | fn replace_range<R, B>(&mut self, range: R, replace_with: B) |
960 | where |
961 | R: ops::RangeBounds<usize>, |
962 | B: AsRef<[u8]>, |
963 | { |
964 | self.as_vec_mut().splice(range, replace_with.as_ref().iter().cloned()); |
965 | } |
966 | |
967 | /// Creates a draining iterator that removes the specified range in this |
968 | /// `Vec<u8>` and yields each of the removed bytes. |
969 | /// |
970 | /// Note that the elements specified by the given range are removed |
971 | /// regardless of whether the returned iterator is fully exhausted. |
972 | /// |
973 | /// Also note that is is unspecified how many bytes are removed from the |
974 | /// `Vec<u8>` if the `DrainBytes` iterator is leaked. |
975 | /// |
976 | /// # Panics |
977 | /// |
978 | /// Panics if the given range is not valid. |
979 | /// |
980 | /// # Examples |
981 | /// |
982 | /// Basic usage: |
983 | /// |
984 | /// ``` |
985 | /// use bstr::ByteVec; |
986 | /// |
987 | /// let mut s = Vec::from("foobar" ); |
988 | /// { |
989 | /// let mut drainer = s.drain_bytes(2..4); |
990 | /// assert_eq!(drainer.next(), Some(b'o' )); |
991 | /// assert_eq!(drainer.next(), Some(b'b' )); |
992 | /// assert_eq!(drainer.next(), None); |
993 | /// } |
994 | /// assert_eq!(s, "foar" .as_bytes()); |
995 | /// ``` |
996 | #[inline ] |
997 | fn drain_bytes<R>(&mut self, range: R) -> DrainBytes<'_> |
998 | where |
999 | R: ops::RangeBounds<usize>, |
1000 | { |
1001 | DrainBytes { it: self.as_vec_mut().drain(range) } |
1002 | } |
1003 | } |
1004 | |
1005 | /// A draining byte oriented iterator for `Vec<u8>`. |
1006 | /// |
1007 | /// This iterator is created by |
1008 | /// [`ByteVec::drain_bytes`](trait.ByteVec.html#method.drain_bytes). |
1009 | /// |
1010 | /// # Examples |
1011 | /// |
1012 | /// Basic usage: |
1013 | /// |
1014 | /// ``` |
1015 | /// use bstr::ByteVec; |
1016 | /// |
1017 | /// let mut s = Vec::from("foobar" ); |
1018 | /// { |
1019 | /// let mut drainer = s.drain_bytes(2..4); |
1020 | /// assert_eq!(drainer.next(), Some(b'o' )); |
1021 | /// assert_eq!(drainer.next(), Some(b'b' )); |
1022 | /// assert_eq!(drainer.next(), None); |
1023 | /// } |
1024 | /// assert_eq!(s, "foar" .as_bytes()); |
1025 | /// ``` |
1026 | #[derive (Debug)] |
1027 | pub struct DrainBytes<'a> { |
1028 | it: vec::Drain<'a, u8>, |
1029 | } |
1030 | |
1031 | impl<'a> iter::FusedIterator for DrainBytes<'a> {} |
1032 | |
1033 | impl<'a> Iterator for DrainBytes<'a> { |
1034 | type Item = u8; |
1035 | |
1036 | #[inline ] |
1037 | fn next(&mut self) -> Option<u8> { |
1038 | self.it.next() |
1039 | } |
1040 | } |
1041 | |
1042 | impl<'a> DoubleEndedIterator for DrainBytes<'a> { |
1043 | #[inline ] |
1044 | fn next_back(&mut self) -> Option<u8> { |
1045 | self.it.next_back() |
1046 | } |
1047 | } |
1048 | |
1049 | impl<'a> ExactSizeIterator for DrainBytes<'a> { |
1050 | #[inline ] |
1051 | fn len(&self) -> usize { |
1052 | self.it.len() |
1053 | } |
1054 | } |
1055 | |
1056 | /// An error that may occur when converting a `Vec<u8>` to a `String`. |
1057 | /// |
1058 | /// This error includes the original `Vec<u8>` that failed to convert to a |
1059 | /// `String`. This permits callers to recover the allocation used even if it |
1060 | /// it not valid UTF-8. |
1061 | /// |
1062 | /// # Examples |
1063 | /// |
1064 | /// Basic usage: |
1065 | /// |
1066 | /// ``` |
1067 | /// use bstr::{B, ByteVec}; |
1068 | /// |
1069 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
1070 | /// let err = bytes.into_string().unwrap_err(); |
1071 | /// |
1072 | /// assert_eq!(err.utf8_error().valid_up_to(), 3); |
1073 | /// assert_eq!(err.utf8_error().error_len(), Some(1)); |
1074 | /// |
1075 | /// // At no point in this example is an allocation performed. |
1076 | /// let bytes = Vec::from(err.into_vec()); |
1077 | /// assert_eq!(bytes, B(b"foo \xFFbar" )); |
1078 | /// ``` |
1079 | #[derive (Debug, Eq, PartialEq)] |
1080 | pub struct FromUtf8Error { |
1081 | original: Vec<u8>, |
1082 | err: Utf8Error, |
1083 | } |
1084 | |
1085 | impl FromUtf8Error { |
1086 | /// Return the original bytes as a slice that failed to convert to a |
1087 | /// `String`. |
1088 | /// |
1089 | /// # Examples |
1090 | /// |
1091 | /// Basic usage: |
1092 | /// |
1093 | /// ``` |
1094 | /// use bstr::{B, ByteVec}; |
1095 | /// |
1096 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
1097 | /// let err = bytes.into_string().unwrap_err(); |
1098 | /// |
1099 | /// // At no point in this example is an allocation performed. |
1100 | /// assert_eq!(err.as_bytes(), B(b"foo \xFFbar" )); |
1101 | /// ``` |
1102 | #[inline ] |
1103 | pub fn as_bytes(&self) -> &[u8] { |
1104 | &self.original |
1105 | } |
1106 | |
1107 | /// Consume this error and return the original byte string that failed to |
1108 | /// convert to a `String`. |
1109 | /// |
1110 | /// # Examples |
1111 | /// |
1112 | /// Basic usage: |
1113 | /// |
1114 | /// ``` |
1115 | /// use bstr::{B, ByteVec}; |
1116 | /// |
1117 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
1118 | /// let err = bytes.into_string().unwrap_err(); |
1119 | /// let original = err.into_vec(); |
1120 | /// |
1121 | /// // At no point in this example is an allocation performed. |
1122 | /// assert_eq!(original, B(b"foo \xFFbar" )); |
1123 | /// ``` |
1124 | #[inline ] |
1125 | pub fn into_vec(self) -> Vec<u8> { |
1126 | self.original |
1127 | } |
1128 | |
1129 | /// Return the underlying UTF-8 error that occurred. This error provides |
1130 | /// information on the nature and location of the invalid UTF-8 detected. |
1131 | /// |
1132 | /// # Examples |
1133 | /// |
1134 | /// Basic usage: |
1135 | /// |
1136 | /// ``` |
1137 | /// use bstr::{B, ByteVec}; |
1138 | /// |
1139 | /// let bytes = Vec::from_slice(b"foo \xFFbar" ); |
1140 | /// let err = bytes.into_string().unwrap_err(); |
1141 | /// |
1142 | /// assert_eq!(err.utf8_error().valid_up_to(), 3); |
1143 | /// assert_eq!(err.utf8_error().error_len(), Some(1)); |
1144 | /// ``` |
1145 | #[inline ] |
1146 | pub fn utf8_error(&self) -> &Utf8Error { |
1147 | &self.err |
1148 | } |
1149 | } |
1150 | |
1151 | #[cfg (feature = "std" )] |
1152 | impl error::Error for FromUtf8Error { |
1153 | #[inline ] |
1154 | fn description(&self) -> &str { |
1155 | "invalid UTF-8 vector" |
1156 | } |
1157 | } |
1158 | |
1159 | impl fmt::Display for FromUtf8Error { |
1160 | #[inline ] |
1161 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
1162 | write!(f, " {}" , self.err) |
1163 | } |
1164 | } |
1165 | |
1166 | #[cfg (all(test, feature = "std" ))] |
1167 | mod tests { |
1168 | use crate::ext_vec::ByteVec; |
1169 | |
1170 | #[test ] |
1171 | fn insert() { |
1172 | let mut s = vec![]; |
1173 | s.insert_str(0, "foo" ); |
1174 | assert_eq!(s, "foo" .as_bytes()); |
1175 | |
1176 | let mut s = Vec::from("a" ); |
1177 | s.insert_str(0, "foo" ); |
1178 | assert_eq!(s, "fooa" .as_bytes()); |
1179 | |
1180 | let mut s = Vec::from("a" ); |
1181 | s.insert_str(1, "foo" ); |
1182 | assert_eq!(s, "afoo" .as_bytes()); |
1183 | |
1184 | let mut s = Vec::from("foobar" ); |
1185 | s.insert_str(3, "quux" ); |
1186 | assert_eq!(s, "fooquuxbar" .as_bytes()); |
1187 | |
1188 | let mut s = Vec::from("foobar" ); |
1189 | s.insert_str(3, "x" ); |
1190 | assert_eq!(s, "fooxbar" .as_bytes()); |
1191 | |
1192 | let mut s = Vec::from("foobar" ); |
1193 | s.insert_str(0, "x" ); |
1194 | assert_eq!(s, "xfoobar" .as_bytes()); |
1195 | |
1196 | let mut s = Vec::from("foobar" ); |
1197 | s.insert_str(6, "x" ); |
1198 | assert_eq!(s, "foobarx" .as_bytes()); |
1199 | |
1200 | let mut s = Vec::from("foobar" ); |
1201 | s.insert_str(3, "quuxbazquux" ); |
1202 | assert_eq!(s, "fooquuxbazquuxbar" .as_bytes()); |
1203 | } |
1204 | |
1205 | #[test ] |
1206 | #[should_panic ] |
1207 | fn insert_fail1() { |
1208 | let mut s = vec![]; |
1209 | s.insert_str(1, "foo" ); |
1210 | } |
1211 | |
1212 | #[test ] |
1213 | #[should_panic ] |
1214 | fn insert_fail2() { |
1215 | let mut s = Vec::from("a" ); |
1216 | s.insert_str(2, "foo" ); |
1217 | } |
1218 | |
1219 | #[test ] |
1220 | #[should_panic ] |
1221 | fn insert_fail3() { |
1222 | let mut s = Vec::from("foobar" ); |
1223 | s.insert_str(7, "foo" ); |
1224 | } |
1225 | } |
1226 | |