1use core::{iter, slice, str};
2
3#[cfg(all(feature = "alloc", feature = "unicode"))]
4use alloc::vec;
5#[cfg(feature = "alloc")]
6use alloc::{borrow::Cow, string::String, vec::Vec};
7
8#[cfg(feature = "std")]
9use std::{ffi::OsStr, path::Path};
10
11use memchr::{memchr, memmem, memrchr};
12
13use crate::escape_bytes::EscapeBytes;
14#[cfg(feature = "alloc")]
15use crate::ext_vec::ByteVec;
16#[cfg(feature = "unicode")]
17use crate::unicode::{
18 whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
19 SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
20 WordsWithBreaks,
21};
22use crate::{
23 ascii,
24 bstr::BStr,
25 byteset,
26 utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
27};
28
29/// A short-hand constructor for building a `&[u8]`.
30///
31/// This idiosyncratic constructor is useful for concisely building byte string
32/// slices. Its primary utility is in conveniently writing byte string literals
33/// in a uniform way. For example, consider this code that does not compile:
34///
35/// ```ignore
36/// let strs = vec![b"a", b"xy"];
37/// ```
38///
39/// The above code doesn't compile because the type of the byte string literal
40/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
41/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
42/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
43/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
44///
45/// One way of getting the above code to compile is to convert byte strings to
46/// slices. You might try this:
47///
48/// ```ignore
49/// let strs = vec![&b"a", &b"xy"];
50/// ```
51///
52/// But this just creates values with type `& &'static [u8; 1]` and
53/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
54///
55/// ```
56/// let strs = vec![&b"a"[..], &b"xy"[..]];
57/// // or
58/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
59/// ```
60///
61/// But neither of these are particularly convenient to type, especially when
62/// it's something as common as a string literal. Thus, this constructor
63/// permits writing the following instead:
64///
65/// ```
66/// use bstr::B;
67///
68/// let strs = vec![B("a"), B(b"xy")];
69/// ```
70///
71/// Notice that this also lets you mix and match both string literals and byte
72/// string literals. This can be quite convenient!
73#[allow(non_snake_case)]
74#[inline]
75pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] {
76 bytes.as_ref()
77}
78
79impl ByteSlice for [u8] {
80 #[inline]
81 fn as_bytes(&self) -> &[u8] {
82 self
83 }
84
85 #[inline]
86 fn as_bytes_mut(&mut self) -> &mut [u8] {
87 self
88 }
89}
90
91impl<const N: usize> ByteSlice for [u8; N] {
92 #[inline]
93 fn as_bytes(&self) -> &[u8] {
94 self
95 }
96
97 #[inline]
98 fn as_bytes_mut(&mut self) -> &mut [u8] {
99 self
100 }
101}
102
103/// Ensure that callers cannot implement `ByteSlice` by making an
104/// umplementable trait its super trait.
105mod private {
106 pub trait Sealed {}
107}
108impl private::Sealed for [u8] {}
109impl<const N: usize> private::Sealed for [u8; N] {}
110
111/// A trait that extends `&[u8]` with string oriented methods.
112///
113/// This trait is sealed and cannot be implemented outside of `bstr`.
114pub trait ByteSlice: private::Sealed {
115 /// A method for accessing the raw bytes of this type. This is always a
116 /// no-op and callers shouldn't care about it. This only exists for making
117 /// the extension trait work.
118 #[doc(hidden)]
119 fn as_bytes(&self) -> &[u8];
120
121 /// A method for accessing the raw bytes of this type, mutably. This is
122 /// always a no-op and callers shouldn't care about it. This only exists
123 /// for making the extension trait work.
124 #[doc(hidden)]
125 fn as_bytes_mut(&mut self) -> &mut [u8];
126
127 /// Return this byte slice as a `&BStr`.
128 ///
129 /// Use `&BStr` is useful because of its `fmt::Debug` representation
130 /// and various other trait implementations (such as `PartialEq` and
131 /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
132 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
133 /// sequences are used.
134 ///
135 /// # Examples
136 ///
137 /// Basic usage:
138 ///
139 /// ```
140 /// use bstr::ByteSlice;
141 ///
142 /// println!("{:?}", b"foo\xFFbar".as_bstr());
143 /// ```
144 #[inline]
145 fn as_bstr(&self) -> &BStr {
146 BStr::new(self.as_bytes())
147 }
148
149 /// Return this byte slice as a `&mut BStr`.
150 ///
151 /// Use `&mut BStr` is useful because of its `fmt::Debug` representation
152 /// and various other trait implementations (such as `PartialEq` and
153 /// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
154 /// shows its bytes as a normal string. For invalid UTF-8, hex escape
155 /// sequences are used.
156 ///
157 /// # Examples
158 ///
159 /// Basic usage:
160 ///
161 /// ```
162 /// use bstr::ByteSlice;
163 ///
164 /// let mut bytes = *b"foo\xFFbar";
165 /// println!("{:?}", &mut bytes.as_bstr_mut());
166 /// ```
167 #[inline]
168 fn as_bstr_mut(&mut self) -> &mut BStr {
169 BStr::new_mut(self.as_bytes_mut())
170 }
171
172 /// Create an immutable byte string from an OS string slice.
173 ///
174 /// When the underlying bytes of OS strings are accessible, then this
175 /// always succeeds and is zero cost. Otherwise, this returns `None` if the
176 /// given OS string is not valid UTF-8. (For example, when the underlying
177 /// bytes are inaccessible on Windows, file paths are allowed to be a
178 /// sequence of arbitrary 16-bit integers. Not all such sequences can be
179 /// transcoded to valid UTF-8.)
180 ///
181 /// # Examples
182 ///
183 /// Basic usage:
184 ///
185 /// ```
186 /// use std::ffi::OsStr;
187 ///
188 /// use bstr::{B, ByteSlice};
189 ///
190 /// let os_str = OsStr::new("foo");
191 /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
192 /// assert_eq!(bs, B("foo"));
193 /// ```
194 #[cfg(feature = "std")]
195 #[inline]
196 fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
197 #[cfg(unix)]
198 #[inline]
199 fn imp(os_str: &OsStr) -> Option<&[u8]> {
200 use std::os::unix::ffi::OsStrExt;
201
202 Some(os_str.as_bytes())
203 }
204
205 #[cfg(not(unix))]
206 #[inline]
207 fn imp(os_str: &OsStr) -> Option<&[u8]> {
208 os_str.to_str().map(|s| s.as_bytes())
209 }
210
211 imp(os_str)
212 }
213
214 /// Create an immutable byte string from a file path.
215 ///
216 /// When the underlying bytes of paths are accessible, then this always
217 /// succeeds and is zero cost. Otherwise, this returns `None` if the given
218 /// path is not valid UTF-8. (For example, when the underlying bytes are
219 /// inaccessible on Windows, file paths are allowed to be a sequence of
220 /// arbitrary 16-bit integers. Not all such sequences can be transcoded to
221 /// valid UTF-8.)
222 ///
223 /// # Examples
224 ///
225 /// Basic usage:
226 ///
227 /// ```
228 /// use std::path::Path;
229 ///
230 /// use bstr::{B, ByteSlice};
231 ///
232 /// let path = Path::new("foo");
233 /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
234 /// assert_eq!(bs, B("foo"));
235 /// ```
236 #[cfg(feature = "std")]
237 #[inline]
238 fn from_path(path: &Path) -> Option<&[u8]> {
239 Self::from_os_str(path.as_os_str())
240 }
241
242 /// Safely convert this byte string into a `&str` if it's valid UTF-8.
243 ///
244 /// If this byte string is not valid UTF-8, then an error is returned. The
245 /// error returned indicates the first invalid byte found and the length
246 /// of the error.
247 ///
248 /// In cases where a lossy conversion to `&str` is acceptable, then use one
249 /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
250 /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
251 /// methods.
252 ///
253 /// # Examples
254 ///
255 /// Basic usage:
256 ///
257 /// ```
258 /// # #[cfg(feature = "alloc")] {
259 /// use bstr::{B, ByteSlice, ByteVec};
260 ///
261 /// # fn example() -> Result<(), bstr::Utf8Error> {
262 /// let s = B("☃βツ").to_str()?;
263 /// assert_eq!("☃βツ", s);
264 ///
265 /// let mut bstring = <Vec<u8>>::from("☃βツ");
266 /// bstring.push(b'\xFF');
267 /// let err = bstring.to_str().unwrap_err();
268 /// assert_eq!(8, err.valid_up_to());
269 /// # Ok(()) }; example().unwrap()
270 /// # }
271 /// ```
272 #[inline]
273 fn to_str(&self) -> Result<&str, Utf8Error> {
274 utf8::validate(self.as_bytes()).map(|_| {
275 // SAFETY: This is safe because of the guarantees provided by
276 // utf8::validate.
277 unsafe { str::from_utf8_unchecked(self.as_bytes()) }
278 })
279 }
280
281 /// Unsafely convert this byte string into a `&str`, without checking for
282 /// valid UTF-8.
283 ///
284 /// # Safety
285 ///
286 /// Callers *must* ensure that this byte string is valid UTF-8 before
287 /// calling this method. Converting a byte string into a `&str` that is
288 /// not valid UTF-8 is considered undefined behavior.
289 ///
290 /// This routine is useful in performance sensitive contexts where the
291 /// UTF-8 validity of the byte string is already known and it is
292 /// undesirable to pay the cost of an additional UTF-8 validation check
293 /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
294 ///
295 /// # Examples
296 ///
297 /// Basic usage:
298 ///
299 /// ```
300 /// use bstr::{B, ByteSlice};
301 ///
302 /// // SAFETY: This is safe because string literals are guaranteed to be
303 /// // valid UTF-8 by the Rust compiler.
304 /// let s = unsafe { B("☃βツ").to_str_unchecked() };
305 /// assert_eq!("☃βツ", s);
306 /// ```
307 #[inline]
308 unsafe fn to_str_unchecked(&self) -> &str {
309 str::from_utf8_unchecked(self.as_bytes())
310 }
311
312 /// Convert this byte string to a valid UTF-8 string by replacing invalid
313 /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
314 ///
315 /// If the byte string is already valid UTF-8, then no copying or
316 /// allocation is performed and a borrrowed string slice is returned. If
317 /// the byte string is not valid UTF-8, then an owned string buffer is
318 /// returned with invalid bytes replaced by the replacement codepoint.
319 ///
320 /// This method uses the "substitution of maximal subparts" (Unicode
321 /// Standard, Chapter 3, Section 9) strategy for inserting the replacement
322 /// codepoint. Specifically, a replacement codepoint is inserted whenever a
323 /// byte is found that cannot possibly lead to a valid code unit sequence.
324 /// If there were previous bytes that represented a prefix of a well-formed
325 /// code unit sequence, then all of those bytes are substituted with a
326 /// single replacement codepoint. The "substitution of maximal subparts"
327 /// strategy is the same strategy used by
328 /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
329 /// For a more precise description of the maximal subpart strategy, see
330 /// the Unicode Standard, Chapter 3, Section 9. See also
331 /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
332 ///
333 /// N.B. Rust's standard library also appears to use the same strategy,
334 /// but it does not appear to be an API guarantee.
335 ///
336 /// # Examples
337 ///
338 /// Basic usage:
339 ///
340 /// ```
341 /// use std::borrow::Cow;
342 ///
343 /// use bstr::ByteSlice;
344 ///
345 /// let mut bstring = <Vec<u8>>::from("☃βツ");
346 /// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
347 ///
348 /// // Add a byte that makes the sequence invalid.
349 /// bstring.push(b'\xFF');
350 /// assert_eq!(Cow::Borrowed("☃βツ\u{FFFD}"), bstring.to_str_lossy());
351 /// ```
352 ///
353 /// This demonstrates the "maximal subpart" substitution logic.
354 ///
355 /// ```
356 /// use bstr::{B, ByteSlice};
357 ///
358 /// // \x61 is the ASCII codepoint for 'a'.
359 /// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
360 /// // \xE1\x80 is a valid 2-byte code unit prefix.
361 /// // \xC2 is a valid 1-byte code unit prefix.
362 /// // \x62 is the ASCII codepoint for 'b'.
363 /// //
364 /// // In sum, each of the prefixes is replaced by a single replacement
365 /// // codepoint since none of the prefixes are properly completed. This
366 /// // is in contrast to other strategies that might insert a replacement
367 /// // codepoint for every single byte.
368 /// let bs = B(b"\x61\xF1\x80\x80\xE1\x80\xC2\x62");
369 /// assert_eq!("a\u{FFFD}\u{FFFD}\u{FFFD}b", bs.to_str_lossy());
370 /// ```
371 #[cfg(feature = "alloc")]
372 #[inline]
373 fn to_str_lossy(&self) -> Cow<'_, str> {
374 match utf8::validate(self.as_bytes()) {
375 Ok(()) => {
376 // SAFETY: This is safe because of the guarantees provided by
377 // utf8::validate.
378 unsafe {
379 Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
380 }
381 }
382 Err(err) => {
383 let mut lossy = String::with_capacity(self.as_bytes().len());
384 let (valid, after) =
385 self.as_bytes().split_at(err.valid_up_to());
386 // SAFETY: This is safe because utf8::validate guarantees
387 // that all of `valid` is valid UTF-8.
388 lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
389 lossy.push_str("\u{FFFD}");
390 if let Some(len) = err.error_len() {
391 after[len..].to_str_lossy_into(&mut lossy);
392 }
393 Cow::Owned(lossy)
394 }
395 }
396 }
397
398 /// Copy the contents of this byte string into the given owned string
399 /// buffer, while replacing invalid UTF-8 code unit sequences with the
400 /// Unicode replacement codepoint (`U+FFFD`).
401 ///
402 /// This method uses the same "substitution of maximal subparts" strategy
403 /// for inserting the replacement codepoint as the
404 /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
405 ///
406 /// This routine is useful for amortizing allocation. However, unlike
407 /// `to_str_lossy`, this routine will _always_ copy the contents of this
408 /// byte string into the destination buffer, even if this byte string is
409 /// valid UTF-8.
410 ///
411 /// # Examples
412 ///
413 /// Basic usage:
414 ///
415 /// ```
416 /// use std::borrow::Cow;
417 ///
418 /// use bstr::ByteSlice;
419 ///
420 /// let mut bstring = <Vec<u8>>::from("☃βツ");
421 /// // Add a byte that makes the sequence invalid.
422 /// bstring.push(b'\xFF');
423 ///
424 /// let mut dest = String::new();
425 /// bstring.to_str_lossy_into(&mut dest);
426 /// assert_eq!("☃βツ\u{FFFD}", dest);
427 /// ```
428 #[cfg(feature = "alloc")]
429 #[inline]
430 fn to_str_lossy_into(&self, dest: &mut String) {
431 let mut bytes = self.as_bytes();
432 dest.reserve(bytes.len());
433 loop {
434 match utf8::validate(bytes) {
435 Ok(()) => {
436 // SAFETY: This is safe because utf8::validate guarantees
437 // that all of `bytes` is valid UTF-8.
438 dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
439 break;
440 }
441 Err(err) => {
442 let (valid, after) = bytes.split_at(err.valid_up_to());
443 // SAFETY: This is safe because utf8::validate guarantees
444 // that all of `valid` is valid UTF-8.
445 dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
446 dest.push_str("\u{FFFD}");
447 match err.error_len() {
448 None => break,
449 Some(len) => bytes = &after[len..],
450 }
451 }
452 }
453 }
454 }
455
456 /// Create an OS string slice from this byte string.
457 ///
458 /// When OS strings can be constructed from arbitrary byte sequences, this
459 /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
460 /// decoding error if this byte string is not valid UTF-8. (For example,
461 /// assuming the representation of `OsStr` is opaque on Windows, file paths
462 /// are allowed to be a sequence of arbitrary 16-bit integers. There is
463 /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
464 /// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
465 /// is even opened up, then this will convert any sequence of bytes to an
466 /// `OsStr` without cost.)
467 ///
468 /// # Examples
469 ///
470 /// Basic usage:
471 ///
472 /// ```
473 /// use bstr::{B, ByteSlice};
474 ///
475 /// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
476 /// assert_eq!(os_str, "foo");
477 /// ```
478 #[cfg(feature = "std")]
479 #[inline]
480 fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
481 #[cfg(unix)]
482 #[inline]
483 fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
484 use std::os::unix::ffi::OsStrExt;
485
486 Ok(OsStr::from_bytes(bytes))
487 }
488
489 #[cfg(not(unix))]
490 #[inline]
491 fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
492 bytes.to_str().map(OsStr::new)
493 }
494
495 imp(self.as_bytes())
496 }
497
498 /// Lossily create an OS string slice from this byte string.
499 ///
500 /// When OS strings can be constructed from arbitrary byte sequences, this
501 /// is zero cost and always returns a slice. Otherwise, this will perform a
502 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
503 /// the Unicode replacement codepoint.
504 ///
505 /// Note that this can prevent the correct roundtripping of file paths when
506 /// the representation of `OsStr` is opaque.
507 ///
508 /// # Examples
509 ///
510 /// Basic usage:
511 ///
512 /// ```
513 /// use bstr::ByteSlice;
514 ///
515 /// let os_str = b"foo\xFFbar".to_os_str_lossy();
516 /// assert_eq!(os_str.to_string_lossy(), "foo\u{FFFD}bar");
517 /// ```
518 #[cfg(feature = "std")]
519 #[inline]
520 fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
521 #[cfg(unix)]
522 #[inline]
523 fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
524 use std::os::unix::ffi::OsStrExt;
525
526 Cow::Borrowed(OsStr::from_bytes(bytes))
527 }
528
529 #[cfg(not(unix))]
530 #[inline]
531 fn imp(bytes: &[u8]) -> Cow<OsStr> {
532 use std::ffi::OsString;
533
534 match bytes.to_str_lossy() {
535 Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
536 Cow::Owned(x) => Cow::Owned(OsString::from(x)),
537 }
538 }
539
540 imp(self.as_bytes())
541 }
542
543 /// Create a path slice from this byte string.
544 ///
545 /// When paths can be constructed from arbitrary byte sequences, this
546 /// always succeeds and is zero cost. Otherwise, this returns a UTF-8
547 /// decoding error if this byte string is not valid UTF-8. (For example,
548 /// assuming the representation of `Path` is opaque on Windows, file paths
549 /// are allowed to be a sequence of arbitrary 16-bit integers. There is
550 /// no obvious mapping from an arbitrary sequence of 8-bit integers to an
551 /// arbitrary sequence of 16-bit integers. If the representation of `Path`
552 /// is even opened up, then this will convert any sequence of bytes to an
553 /// `Path` without cost.)
554 ///
555 /// # Examples
556 ///
557 /// Basic usage:
558 ///
559 /// ```
560 /// use bstr::ByteSlice;
561 ///
562 /// let path = b"foo".to_path().expect("should be valid UTF-8");
563 /// assert_eq!(path.as_os_str(), "foo");
564 /// ```
565 #[cfg(feature = "std")]
566 #[inline]
567 fn to_path(&self) -> Result<&Path, Utf8Error> {
568 self.to_os_str().map(Path::new)
569 }
570
571 /// Lossily create a path slice from this byte string.
572 ///
573 /// When paths can be constructed from arbitrary byte sequences, this is
574 /// zero cost and always returns a slice. Otherwise, this will perform a
575 /// UTF-8 check and lossily convert this byte string into valid UTF-8 using
576 /// the Unicode replacement codepoint.
577 ///
578 /// Note that this can prevent the correct roundtripping of file paths when
579 /// the representation of `Path` is opaque.
580 ///
581 /// # Examples
582 ///
583 /// Basic usage:
584 ///
585 /// ```
586 /// use bstr::ByteSlice;
587 ///
588 /// let bs = b"foo\xFFbar";
589 /// let path = bs.to_path_lossy();
590 /// assert_eq!(path.to_string_lossy(), "foo\u{FFFD}bar");
591 /// ```
592 #[cfg(feature = "std")]
593 #[inline]
594 fn to_path_lossy(&self) -> Cow<'_, Path> {
595 use std::path::PathBuf;
596
597 match self.to_os_str_lossy() {
598 Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
599 Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
600 }
601 }
602
603 /// Create a new byte string by repeating this byte string `n` times.
604 ///
605 /// # Panics
606 ///
607 /// This function panics if the capacity of the new byte string would
608 /// overflow.
609 ///
610 /// # Examples
611 ///
612 /// Basic usage:
613 ///
614 /// ```
615 /// use bstr::{B, ByteSlice};
616 ///
617 /// assert_eq!(b"foo".repeatn(4), B("foofoofoofoo"));
618 /// assert_eq!(b"foo".repeatn(0), B(""));
619 /// ```
620 #[cfg(feature = "alloc")]
621 #[inline]
622 fn repeatn(&self, n: usize) -> Vec<u8> {
623 self.as_bytes().repeat(n)
624 }
625
626 /// Returns true if and only if this byte string contains the given needle.
627 ///
628 /// # Examples
629 ///
630 /// Basic usage:
631 ///
632 /// ```
633 /// use bstr::ByteSlice;
634 ///
635 /// assert!(b"foo bar".contains_str("foo"));
636 /// assert!(b"foo bar".contains_str("bar"));
637 /// assert!(!b"foo".contains_str("foobar"));
638 /// ```
639 #[inline]
640 fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
641 self.find(needle).is_some()
642 }
643
644 /// Returns true if and only if this byte string has the given prefix.
645 ///
646 /// # Examples
647 ///
648 /// Basic usage:
649 ///
650 /// ```
651 /// use bstr::ByteSlice;
652 ///
653 /// assert!(b"foo bar".starts_with_str("foo"));
654 /// assert!(!b"foo bar".starts_with_str("bar"));
655 /// assert!(!b"foo".starts_with_str("foobar"));
656 /// ```
657 #[inline]
658 fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
659 self.as_bytes().starts_with(prefix.as_ref())
660 }
661
662 /// Returns true if and only if this byte string has the given suffix.
663 ///
664 /// # Examples
665 ///
666 /// Basic usage:
667 ///
668 /// ```
669 /// use bstr::ByteSlice;
670 ///
671 /// assert!(b"foo bar".ends_with_str("bar"));
672 /// assert!(!b"foo bar".ends_with_str("foo"));
673 /// assert!(!b"bar".ends_with_str("foobar"));
674 /// ```
675 #[inline]
676 fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
677 self.as_bytes().ends_with(suffix.as_ref())
678 }
679
680 /// Returns the index of the first occurrence of the given needle.
681 ///
682 /// The needle may be any type that can be cheaply converted into a
683 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
684 ///
685 /// Note that if you're are searching for the same needle in many
686 /// different small haystacks, it may be faster to initialize a
687 /// [`Finder`](struct.Finder.html) once, and reuse it for each search.
688 ///
689 /// # Complexity
690 ///
691 /// This routine is guaranteed to have worst case linear time complexity
692 /// with respect to both the needle and the haystack. That is, this runs
693 /// in `O(needle.len() + haystack.len())` time.
694 ///
695 /// This routine is also guaranteed to have worst case constant space
696 /// complexity.
697 ///
698 /// # Examples
699 ///
700 /// Basic usage:
701 ///
702 /// ```
703 /// use bstr::ByteSlice;
704 ///
705 /// let s = b"foo bar baz";
706 /// assert_eq!(Some(0), s.find("foo"));
707 /// assert_eq!(Some(4), s.find("bar"));
708 /// assert_eq!(None, s.find("quux"));
709 /// ```
710 #[inline]
711 fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
712 Finder::new(needle.as_ref()).find(self.as_bytes())
713 }
714
715 /// Returns the index of the last occurrence of the given needle.
716 ///
717 /// The needle may be any type that can be cheaply converted into a
718 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
719 ///
720 /// Note that if you're are searching for the same needle in many
721 /// different small haystacks, it may be faster to initialize a
722 /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
723 /// each search.
724 ///
725 /// # Complexity
726 ///
727 /// This routine is guaranteed to have worst case linear time complexity
728 /// with respect to both the needle and the haystack. That is, this runs
729 /// in `O(needle.len() + haystack.len())` time.
730 ///
731 /// This routine is also guaranteed to have worst case constant space
732 /// complexity.
733 ///
734 /// # Examples
735 ///
736 /// Basic usage:
737 ///
738 /// ```
739 /// use bstr::ByteSlice;
740 ///
741 /// let s = b"foo bar baz";
742 /// assert_eq!(Some(0), s.rfind("foo"));
743 /// assert_eq!(Some(4), s.rfind("bar"));
744 /// assert_eq!(Some(8), s.rfind("ba"));
745 /// assert_eq!(None, s.rfind("quux"));
746 /// ```
747 #[inline]
748 fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
749 FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
750 }
751
752 /// Returns an iterator of the non-overlapping occurrences of the given
753 /// needle. The iterator yields byte offset positions indicating the start
754 /// of each match.
755 ///
756 /// # Complexity
757 ///
758 /// This routine is guaranteed to have worst case linear time complexity
759 /// with respect to both the needle and the haystack. That is, this runs
760 /// in `O(needle.len() + haystack.len())` time.
761 ///
762 /// This routine is also guaranteed to have worst case constant space
763 /// complexity.
764 ///
765 /// # Examples
766 ///
767 /// Basic usage:
768 ///
769 /// ```
770 /// use bstr::ByteSlice;
771 ///
772 /// let s = b"foo bar foo foo quux foo";
773 /// let matches: Vec<usize> = s.find_iter("foo").collect();
774 /// assert_eq!(matches, vec![0, 8, 12, 21]);
775 /// ```
776 ///
777 /// An empty string matches at every position, including the position
778 /// immediately following the last byte:
779 ///
780 /// ```
781 /// use bstr::ByteSlice;
782 ///
783 /// let matches: Vec<usize> = b"foo".find_iter("").collect();
784 /// assert_eq!(matches, vec![0, 1, 2, 3]);
785 ///
786 /// let matches: Vec<usize> = b"".find_iter("").collect();
787 /// assert_eq!(matches, vec![0]);
788 /// ```
789 #[inline]
790 fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
791 &'h self,
792 needle: &'n B,
793 ) -> Find<'h, 'n> {
794 Find::new(self.as_bytes(), needle.as_ref())
795 }
796
797 /// Returns an iterator of the non-overlapping occurrences of the given
798 /// needle in reverse. The iterator yields byte offset positions indicating
799 /// the start of each match.
800 ///
801 /// # Complexity
802 ///
803 /// This routine is guaranteed to have worst case linear time complexity
804 /// with respect to both the needle and the haystack. That is, this runs
805 /// in `O(needle.len() + haystack.len())` time.
806 ///
807 /// This routine is also guaranteed to have worst case constant space
808 /// complexity.
809 ///
810 /// # Examples
811 ///
812 /// Basic usage:
813 ///
814 /// ```
815 /// use bstr::ByteSlice;
816 ///
817 /// let s = b"foo bar foo foo quux foo";
818 /// let matches: Vec<usize> = s.rfind_iter("foo").collect();
819 /// assert_eq!(matches, vec![21, 12, 8, 0]);
820 /// ```
821 ///
822 /// An empty string matches at every position, including the position
823 /// immediately following the last byte:
824 ///
825 /// ```
826 /// use bstr::ByteSlice;
827 ///
828 /// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
829 /// assert_eq!(matches, vec![3, 2, 1, 0]);
830 ///
831 /// let matches: Vec<usize> = b"".rfind_iter("").collect();
832 /// assert_eq!(matches, vec![0]);
833 /// ```
834 #[inline]
835 fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
836 &'h self,
837 needle: &'n B,
838 ) -> FindReverse<'h, 'n> {
839 FindReverse::new(self.as_bytes(), needle.as_ref())
840 }
841
842 /// Returns the index of the first occurrence of the given byte. If the
843 /// byte does not occur in this byte string, then `None` is returned.
844 ///
845 /// # Examples
846 ///
847 /// Basic usage:
848 ///
849 /// ```
850 /// use bstr::ByteSlice;
851 ///
852 /// assert_eq!(Some(10), b"foo bar baz".find_byte(b'z'));
853 /// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
854 /// ```
855 #[inline]
856 fn find_byte(&self, byte: u8) -> Option<usize> {
857 memchr(byte, self.as_bytes())
858 }
859
860 /// Returns the index of the last occurrence of the given byte. If the
861 /// byte does not occur in this byte string, then `None` is returned.
862 ///
863 /// # Examples
864 ///
865 /// Basic usage:
866 ///
867 /// ```
868 /// use bstr::ByteSlice;
869 ///
870 /// assert_eq!(Some(10), b"foo bar baz".rfind_byte(b'z'));
871 /// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
872 /// ```
873 #[inline]
874 fn rfind_byte(&self, byte: u8) -> Option<usize> {
875 memrchr(byte, self.as_bytes())
876 }
877
878 /// Returns the index of the first occurrence of the given codepoint.
879 /// If the codepoint does not occur in this byte string, then `None` is
880 /// returned.
881 ///
882 /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
883 /// then only explicit occurrences of that encoding will be found. Invalid
884 /// UTF-8 sequences will not be matched.
885 ///
886 /// # Examples
887 ///
888 /// Basic usage:
889 ///
890 /// ```
891 /// use bstr::{B, ByteSlice};
892 ///
893 /// assert_eq!(Some(10), b"foo bar baz".find_char('z'));
894 /// assert_eq!(Some(4), B("αβγγδ").find_char('γ'));
895 /// assert_eq!(None, b"foo bar baz".find_char('y'));
896 /// ```
897 #[inline]
898 fn find_char(&self, ch: char) -> Option<usize> {
899 self.find(ch.encode_utf8(&mut [0; 4]))
900 }
901
902 /// Returns the index of the last occurrence of the given codepoint.
903 /// If the codepoint does not occur in this byte string, then `None` is
904 /// returned.
905 ///
906 /// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
907 /// then only explicit occurrences of that encoding will be found. Invalid
908 /// UTF-8 sequences will not be matched.
909 ///
910 /// # Examples
911 ///
912 /// Basic usage:
913 ///
914 /// ```
915 /// use bstr::{B, ByteSlice};
916 ///
917 /// assert_eq!(Some(10), b"foo bar baz".rfind_char('z'));
918 /// assert_eq!(Some(6), B("αβγγδ").rfind_char('γ'));
919 /// assert_eq!(None, b"foo bar baz".rfind_char('y'));
920 /// ```
921 #[inline]
922 fn rfind_char(&self, ch: char) -> Option<usize> {
923 self.rfind(ch.encode_utf8(&mut [0; 4]))
924 }
925
926 /// Returns the index of the first occurrence of any of the bytes in the
927 /// provided set.
928 ///
929 /// The `byteset` may be any type that can be cheaply converted into a
930 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
931 /// note that passing a `&str` which contains multibyte characters may not
932 /// behave as you expect: each byte in the `&str` is treated as an
933 /// individual member of the byte set.
934 ///
935 /// Note that order is irrelevant for the `byteset` parameter, and
936 /// duplicate bytes present in its body are ignored.
937 ///
938 /// # Complexity
939 ///
940 /// This routine is guaranteed to have worst case linear time complexity
941 /// with respect to both the set of bytes and the haystack. That is, this
942 /// runs in `O(byteset.len() + haystack.len())` time.
943 ///
944 /// This routine is also guaranteed to have worst case constant space
945 /// complexity.
946 ///
947 /// # Examples
948 ///
949 /// Basic usage:
950 ///
951 /// ```
952 /// use bstr::ByteSlice;
953 ///
954 /// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(6));
955 /// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(4));
956 /// assert_eq!(None, b"foo baz bar".find_byteset(b"\t\n"));
957 /// // The empty byteset never matches.
958 /// assert_eq!(None, b"abc".find_byteset(b""));
959 /// assert_eq!(None, b"".find_byteset(b""));
960 /// ```
961 #[inline]
962 fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
963 byteset::find(self.as_bytes(), byteset.as_ref())
964 }
965
966 /// Returns the index of the first occurrence of a byte that is not a
967 /// member of the provided set.
968 ///
969 /// The `byteset` may be any type that can be cheaply converted into a
970 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
971 /// note that passing a `&str` which contains multibyte characters may not
972 /// behave as you expect: each byte in the `&str` is treated as an
973 /// individual member of the byte set.
974 ///
975 /// Note that order is irrelevant for the `byteset` parameter, and
976 /// duplicate bytes present in its body are ignored.
977 ///
978 /// # Complexity
979 ///
980 /// This routine is guaranteed to have worst case linear time complexity
981 /// with respect to both the set of bytes and the haystack. That is, this
982 /// runs in `O(byteset.len() + haystack.len())` time.
983 ///
984 /// This routine is also guaranteed to have worst case constant space
985 /// complexity.
986 ///
987 /// # Examples
988 ///
989 /// Basic usage:
990 ///
991 /// ```
992 /// use bstr::ByteSlice;
993 ///
994 /// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(4));
995 /// assert_eq!(b"\t\tbaz bar".find_not_byteset(b" \t\r\n"), Some(2));
996 /// assert_eq!(b"foo\nbaz\tbar".find_not_byteset(b"\t\n"), Some(0));
997 /// // The negation of the empty byteset matches everything.
998 /// assert_eq!(Some(0), b"abc".find_not_byteset(b""));
999 /// // But an empty string never contains anything.
1000 /// assert_eq!(None, b"".find_not_byteset(b""));
1001 /// ```
1002 #[inline]
1003 fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1004 byteset::find_not(self.as_bytes(), byteset.as_ref())
1005 }
1006
1007 /// Returns the index of the last occurrence of any of the bytes in the
1008 /// provided set.
1009 ///
1010 /// The `byteset` may be any type that can be cheaply converted into a
1011 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1012 /// note that passing a `&str` which contains multibyte characters may not
1013 /// behave as you expect: each byte in the `&str` is treated as an
1014 /// individual member of the byte set.
1015 ///
1016 /// Note that order is irrelevant for the `byteset` parameter, and duplicate
1017 /// bytes present in its body are ignored.
1018 ///
1019 /// # Complexity
1020 ///
1021 /// This routine is guaranteed to have worst case linear time complexity
1022 /// with respect to both the set of bytes and the haystack. That is, this
1023 /// runs in `O(byteset.len() + haystack.len())` time.
1024 ///
1025 /// This routine is also guaranteed to have worst case constant space
1026 /// complexity.
1027 ///
1028 /// # Examples
1029 ///
1030 /// Basic usage:
1031 ///
1032 /// ```
1033 /// use bstr::ByteSlice;
1034 ///
1035 /// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(9));
1036 /// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(10));
1037 /// assert_eq!(b"foo baz bar".rfind_byteset(b"\n123"), None);
1038 /// ```
1039 #[inline]
1040 fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1041 byteset::rfind(self.as_bytes(), byteset.as_ref())
1042 }
1043
1044 /// Returns the index of the last occurrence of a byte that is not a member
1045 /// of the provided set.
1046 ///
1047 /// The `byteset` may be any type that can be cheaply converted into a
1048 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1049 /// note that passing a `&str` which contains multibyte characters may not
1050 /// behave as you expect: each byte in the `&str` is treated as an
1051 /// individual member of the byte set.
1052 ///
1053 /// Note that order is irrelevant for the `byteset` parameter, and
1054 /// duplicate bytes present in its body are ignored.
1055 ///
1056 /// # Complexity
1057 ///
1058 /// This routine is guaranteed to have worst case linear time complexity
1059 /// with respect to both the set of bytes and the haystack. That is, this
1060 /// runs in `O(byteset.len() + haystack.len())` time.
1061 ///
1062 /// This routine is also guaranteed to have worst case constant space
1063 /// complexity.
1064 ///
1065 /// # Examples
1066 ///
1067 /// Basic usage:
1068 ///
1069 /// ```
1070 /// use bstr::ByteSlice;
1071 ///
1072 /// assert_eq!(b"foo bar baz,\t".rfind_not_byteset(b",\t"), Some(10));
1073 /// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(2));
1074 /// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1075 /// ```
1076 #[inline]
1077 fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1078 byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1079 }
1080
1081 /// Returns an iterator over the fields in a byte string, separated
1082 /// by contiguous whitespace (according to the Unicode property
1083 /// `White_Space`).
1084 ///
1085 /// # Example
1086 ///
1087 /// Basic usage:
1088 ///
1089 /// ```
1090 /// use bstr::{B, ByteSlice};
1091 ///
1092 /// let s = B(" foo\tbar\t\u{2003}\nquux \n");
1093 /// let fields: Vec<&[u8]> = s.fields().collect();
1094 /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1095 /// ```
1096 ///
1097 /// A byte string consisting of just whitespace yields no elements:
1098 ///
1099 /// ```
1100 /// use bstr::{B, ByteSlice};
1101 ///
1102 /// assert_eq!(0, B(" \n\t\u{2003}\n \t").fields().count());
1103 /// ```
1104 #[cfg(feature = "unicode")]
1105 #[inline]
1106 fn fields(&self) -> Fields<'_> {
1107 Fields::new(self.as_bytes())
1108 }
1109
1110 /// Returns an iterator over the fields in a byte string, separated by
1111 /// contiguous codepoints satisfying the given predicate.
1112 ///
1113 /// If this byte string is not valid UTF-8, then the given closure will
1114 /// be called with a Unicode replacement codepoint when invalid UTF-8
1115 /// bytes are seen.
1116 ///
1117 /// # Example
1118 ///
1119 /// Basic usage:
1120 ///
1121 /// ```
1122 /// use bstr::{B, ByteSlice};
1123 ///
1124 /// let s = b"123foo999999bar1quux123456";
1125 /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect();
1126 /// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1127 /// ```
1128 ///
1129 /// A byte string consisting of all codepoints satisfying the predicate
1130 /// yields no elements:
1131 ///
1132 /// ```
1133 /// use bstr::ByteSlice;
1134 ///
1135 /// assert_eq!(0, b"1911354563".fields_with(|c| c.is_numeric()).count());
1136 /// ```
1137 #[inline]
1138 fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
1139 FieldsWith::new(self.as_bytes(), f)
1140 }
1141
1142 /// Returns an iterator over substrings of this byte string, separated
1143 /// by the given byte string. Each element yielded is guaranteed not to
1144 /// include the splitter substring.
1145 ///
1146 /// The splitter may be any type that can be cheaply converted into a
1147 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1148 ///
1149 /// # Examples
1150 ///
1151 /// Basic usage:
1152 ///
1153 /// ```
1154 /// use bstr::{B, ByteSlice};
1155 ///
1156 /// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1157 /// assert_eq!(x, vec![
1158 /// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1159 /// ]);
1160 ///
1161 /// let x: Vec<&[u8]> = b"".split_str("X").collect();
1162 /// assert_eq!(x, vec![b""]);
1163 ///
1164 /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1165 /// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1166 ///
1167 /// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1168 /// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1169 /// ```
1170 ///
1171 /// If a string contains multiple contiguous separators, you will end up
1172 /// with empty strings yielded by the iterator:
1173 ///
1174 /// ```
1175 /// use bstr::{B, ByteSlice};
1176 ///
1177 /// let x: Vec<&[u8]> = b"||||a||b|c".split_str("|").collect();
1178 /// assert_eq!(x, vec![
1179 /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1180 /// ]);
1181 ///
1182 /// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1183 /// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1184 /// ```
1185 ///
1186 /// Separators at the start or end of a string are neighbored by empty
1187 /// strings.
1188 ///
1189 /// ```
1190 /// use bstr::{B, ByteSlice};
1191 ///
1192 /// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1193 /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1194 /// ```
1195 ///
1196 /// When the empty string is used as a separator, it splits every **byte**
1197 /// in the byte string, along with the beginning and end of the byte
1198 /// string.
1199 ///
1200 /// ```
1201 /// use bstr::{B, ByteSlice};
1202 ///
1203 /// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1204 /// assert_eq!(x, vec![
1205 /// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1206 /// ]);
1207 ///
1208 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1209 /// // may not be valid UTF-8!
1210 /// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1211 /// assert_eq!(x, vec![
1212 /// B(""), B(b"\xE2"), B(b"\x98"), B(b"\x83"), B(""),
1213 /// ]);
1214 /// ```
1215 ///
1216 /// Contiguous separators, especially whitespace, can lead to possibly
1217 /// surprising behavior. For example, this code is correct:
1218 ///
1219 /// ```
1220 /// use bstr::{B, ByteSlice};
1221 ///
1222 /// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
1223 /// assert_eq!(x, vec![
1224 /// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1225 /// ]);
1226 /// ```
1227 ///
1228 /// It does *not* give you `["a", "b", "c"]`. For that behavior, use
1229 /// [`fields`](#method.fields) instead.
1230 #[inline]
1231 fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1232 &'h self,
1233 splitter: &'s B,
1234 ) -> Split<'h, 's> {
1235 Split::new(self.as_bytes(), splitter.as_ref())
1236 }
1237
1238 /// Returns an iterator over substrings of this byte string, separated by
1239 /// the given byte string, in reverse. Each element yielded is guaranteed
1240 /// not to include the splitter substring.
1241 ///
1242 /// The splitter may be any type that can be cheaply converted into a
1243 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1244 ///
1245 /// # Examples
1246 ///
1247 /// Basic usage:
1248 ///
1249 /// ```
1250 /// use bstr::{B, ByteSlice};
1251 ///
1252 /// let x: Vec<&[u8]> =
1253 /// b"Mary had a little lamb".rsplit_str(" ").collect();
1254 /// assert_eq!(x, vec![
1255 /// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1256 /// ]);
1257 ///
1258 /// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1259 /// assert_eq!(x, vec![b""]);
1260 ///
1261 /// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1262 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1263 ///
1264 /// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1265 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1266 /// ```
1267 ///
1268 /// If a string contains multiple contiguous separators, you will end up
1269 /// with empty strings yielded by the iterator:
1270 ///
1271 /// ```
1272 /// use bstr::{B, ByteSlice};
1273 ///
1274 /// let x: Vec<&[u8]> = b"||||a||b|c".rsplit_str("|").collect();
1275 /// assert_eq!(x, vec![
1276 /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1277 /// ]);
1278 ///
1279 /// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1280 /// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1281 /// ```
1282 ///
1283 /// Separators at the start or end of a string are neighbored by empty
1284 /// strings.
1285 ///
1286 /// ```
1287 /// use bstr::{B, ByteSlice};
1288 ///
1289 /// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1290 /// assert_eq!(x, vec![B(""), B("1"), B("")]);
1291 /// ```
1292 ///
1293 /// When the empty string is used as a separator, it splits every **byte**
1294 /// in the byte string, along with the beginning and end of the byte
1295 /// string.
1296 ///
1297 /// ```
1298 /// use bstr::{B, ByteSlice};
1299 ///
1300 /// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1301 /// assert_eq!(x, vec![
1302 /// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1303 /// ]);
1304 ///
1305 /// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1306 /// // may not be valid UTF-8!
1307 /// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1308 /// assert_eq!(x, vec![B(""), B(b"\x83"), B(b"\x98"), B(b"\xE2"), B("")]);
1309 /// ```
1310 ///
1311 /// Contiguous separators, especially whitespace, can lead to possibly
1312 /// surprising behavior. For example, this code is correct:
1313 ///
1314 /// ```
1315 /// use bstr::{B, ByteSlice};
1316 ///
1317 /// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
1318 /// assert_eq!(x, vec![
1319 /// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1320 /// ]);
1321 /// ```
1322 ///
1323 /// It does *not* give you `["a", "b", "c"]`.
1324 #[inline]
1325 fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1326 &'h self,
1327 splitter: &'s B,
1328 ) -> SplitReverse<'h, 's> {
1329 SplitReverse::new(self.as_bytes(), splitter.as_ref())
1330 }
1331
1332 /// Split this byte string at the first occurrence of `splitter`.
1333 ///
1334 /// If the `splitter` is found in the byte string, returns a tuple
1335 /// containing the parts of the string before and after the first occurrence
1336 /// of `splitter` respectively. Otherwise, if there are no occurrences of
1337 /// `splitter` in the byte string, returns `None`.
1338 ///
1339 /// The splitter may be any type that can be cheaply converted into a
1340 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1341 ///
1342 /// If you need to split on the *last* instance of a delimiter instead, see
1343 /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
1344 ///
1345 /// # Examples
1346 ///
1347 /// Basic usage:
1348 ///
1349 /// ```
1350 /// use bstr::{B, ByteSlice};
1351 ///
1352 /// assert_eq!(
1353 /// B("foo,bar").split_once_str(","),
1354 /// Some((B("foo"), B("bar"))),
1355 /// );
1356 /// assert_eq!(
1357 /// B("foo,bar,baz").split_once_str(","),
1358 /// Some((B("foo"), B("bar,baz"))),
1359 /// );
1360 /// assert_eq!(B("foo").split_once_str(","), None);
1361 /// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
1362 /// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
1363 /// ```
1364 #[inline]
1365 fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1366 &'a self,
1367 splitter: &B,
1368 ) -> Option<(&'a [u8], &'a [u8])> {
1369 let bytes = self.as_bytes();
1370 let splitter = splitter.as_ref();
1371 let start = Finder::new(splitter).find(bytes)?;
1372 let end = start + splitter.len();
1373 Some((&bytes[..start], &bytes[end..]))
1374 }
1375
1376 /// Split this byte string at the last occurrence of `splitter`.
1377 ///
1378 /// If the `splitter` is found in the byte string, returns a tuple
1379 /// containing the parts of the string before and after the last occurrence
1380 /// of `splitter`, respectively. Otherwise, if there are no occurrences of
1381 /// `splitter` in the byte string, returns `None`.
1382 ///
1383 /// The splitter may be any type that can be cheaply converted into a
1384 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1385 ///
1386 /// If you need to split on the *first* instance of a delimiter instead, see
1387 /// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
1388 ///
1389 /// # Examples
1390 ///
1391 /// Basic usage:
1392 ///
1393 /// ```
1394 /// use bstr::{B, ByteSlice};
1395 ///
1396 /// assert_eq!(
1397 /// B("foo,bar").rsplit_once_str(","),
1398 /// Some((B("foo"), B("bar"))),
1399 /// );
1400 /// assert_eq!(
1401 /// B("foo,bar,baz").rsplit_once_str(","),
1402 /// Some((B("foo,bar"), B("baz"))),
1403 /// );
1404 /// assert_eq!(B("foo").rsplit_once_str(","), None);
1405 /// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
1406 /// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
1407 /// ```
1408 #[inline]
1409 fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1410 &'a self,
1411 splitter: &B,
1412 ) -> Option<(&'a [u8], &'a [u8])> {
1413 let bytes = self.as_bytes();
1414 let splitter = splitter.as_ref();
1415 let start = FinderReverse::new(splitter).rfind(bytes)?;
1416 let end = start + splitter.len();
1417 Some((&bytes[..start], &bytes[end..]))
1418 }
1419
1420 /// Returns an iterator of at most `limit` substrings of this byte string,
1421 /// separated by the given byte string. If `limit` substrings are yielded,
1422 /// then the last substring will contain the remainder of this byte string.
1423 ///
1424 /// The needle may be any type that can be cheaply converted into a
1425 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1426 ///
1427 /// # Examples
1428 ///
1429 /// Basic usage:
1430 ///
1431 /// ```
1432 /// use bstr::{B, ByteSlice};
1433 ///
1434 /// let x: Vec<_> = b"Mary had a little lamb".splitn_str(3, " ").collect();
1435 /// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1436 ///
1437 /// let x: Vec<_> = b"".splitn_str(3, "X").collect();
1438 /// assert_eq!(x, vec![b""]);
1439 ///
1440 /// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(3, "X").collect();
1441 /// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1442 ///
1443 /// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(2, "::").collect();
1444 /// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1445 ///
1446 /// let x: Vec<_> = b"abcXdef".splitn_str(1, "X").collect();
1447 /// assert_eq!(x, vec![B("abcXdef")]);
1448 ///
1449 /// let x: Vec<_> = b"abcdef".splitn_str(2, "X").collect();
1450 /// assert_eq!(x, vec![B("abcdef")]);
1451 ///
1452 /// let x: Vec<_> = b"abcXdef".splitn_str(0, "X").collect();
1453 /// assert!(x.is_empty());
1454 /// ```
1455 #[inline]
1456 fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1457 &'h self,
1458 limit: usize,
1459 splitter: &'s B,
1460 ) -> SplitN<'h, 's> {
1461 SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1462 }
1463
1464 /// Returns an iterator of at most `limit` substrings of this byte string,
1465 /// separated by the given byte string, in reverse. If `limit` substrings
1466 /// are yielded, then the last substring will contain the remainder of this
1467 /// byte string.
1468 ///
1469 /// The needle may be any type that can be cheaply converted into a
1470 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1471 ///
1472 /// # Examples
1473 ///
1474 /// Basic usage:
1475 ///
1476 /// ```
1477 /// use bstr::{B, ByteSlice};
1478 ///
1479 /// let x: Vec<_> =
1480 /// b"Mary had a little lamb".rsplitn_str(3, " ").collect();
1481 /// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1482 ///
1483 /// let x: Vec<_> = b"".rsplitn_str(3, "X").collect();
1484 /// assert_eq!(x, vec![b""]);
1485 ///
1486 /// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(3, "X").collect();
1487 /// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1488 ///
1489 /// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(2, "::").collect();
1490 /// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1491 ///
1492 /// let x: Vec<_> = b"abcXdef".rsplitn_str(1, "X").collect();
1493 /// assert_eq!(x, vec![B("abcXdef")]);
1494 ///
1495 /// let x: Vec<_> = b"abcdef".rsplitn_str(2, "X").collect();
1496 /// assert_eq!(x, vec![B("abcdef")]);
1497 ///
1498 /// let x: Vec<_> = b"abcXdef".rsplitn_str(0, "X").collect();
1499 /// assert!(x.is_empty());
1500 /// ```
1501 #[inline]
1502 fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1503 &'h self,
1504 limit: usize,
1505 splitter: &'s B,
1506 ) -> SplitNReverse<'h, 's> {
1507 SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1508 }
1509
1510 /// Replace all matches of the given needle with the given replacement, and
1511 /// the result as a new `Vec<u8>`.
1512 ///
1513 /// This routine is useful as a convenience. If you need to reuse an
1514 /// allocation, use [`replace_into`](#method.replace_into) instead.
1515 ///
1516 /// # Examples
1517 ///
1518 /// Basic usage:
1519 ///
1520 /// ```
1521 /// use bstr::ByteSlice;
1522 ///
1523 /// let s = b"this is old".replace("old", "new");
1524 /// assert_eq!(s, "this is new".as_bytes());
1525 /// ```
1526 ///
1527 /// When the pattern doesn't match:
1528 ///
1529 /// ```
1530 /// use bstr::ByteSlice;
1531 ///
1532 /// let s = b"this is old".replace("nada nada", "limonada");
1533 /// assert_eq!(s, "this is old".as_bytes());
1534 /// ```
1535 ///
1536 /// When the needle is an empty string:
1537 ///
1538 /// ```
1539 /// use bstr::ByteSlice;
1540 ///
1541 /// let s = b"foo".replace("", "Z");
1542 /// assert_eq!(s, "ZfZoZoZ".as_bytes());
1543 /// ```
1544 #[cfg(feature = "alloc")]
1545 #[inline]
1546 fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1547 &self,
1548 needle: N,
1549 replacement: R,
1550 ) -> Vec<u8> {
1551 let mut dest = Vec::with_capacity(self.as_bytes().len());
1552 self.replace_into(needle, replacement, &mut dest);
1553 dest
1554 }
1555
1556 /// Replace up to `limit` matches of the given needle with the given
1557 /// replacement, and the result as a new `Vec<u8>`.
1558 ///
1559 /// This routine is useful as a convenience. If you need to reuse an
1560 /// allocation, use [`replacen_into`](#method.replacen_into) instead.
1561 ///
1562 /// # Examples
1563 ///
1564 /// Basic usage:
1565 ///
1566 /// ```
1567 /// use bstr::ByteSlice;
1568 ///
1569 /// let s = b"foofoo".replacen("o", "z", 2);
1570 /// assert_eq!(s, "fzzfoo".as_bytes());
1571 /// ```
1572 ///
1573 /// When the pattern doesn't match:
1574 ///
1575 /// ```
1576 /// use bstr::ByteSlice;
1577 ///
1578 /// let s = b"foofoo".replacen("a", "z", 2);
1579 /// assert_eq!(s, "foofoo".as_bytes());
1580 /// ```
1581 ///
1582 /// When the needle is an empty string:
1583 ///
1584 /// ```
1585 /// use bstr::ByteSlice;
1586 ///
1587 /// let s = b"foo".replacen("", "Z", 2);
1588 /// assert_eq!(s, "ZfZoo".as_bytes());
1589 /// ```
1590 #[cfg(feature = "alloc")]
1591 #[inline]
1592 fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1593 &self,
1594 needle: N,
1595 replacement: R,
1596 limit: usize,
1597 ) -> Vec<u8> {
1598 let mut dest = Vec::with_capacity(self.as_bytes().len());
1599 self.replacen_into(needle, replacement, limit, &mut dest);
1600 dest
1601 }
1602
1603 /// Replace all matches of the given needle with the given replacement,
1604 /// and write the result into the provided `Vec<u8>`.
1605 ///
1606 /// This does **not** clear `dest` before writing to it.
1607 ///
1608 /// This routine is useful for reusing allocation. For a more convenient
1609 /// API, use [`replace`](#method.replace) instead.
1610 ///
1611 /// # Examples
1612 ///
1613 /// Basic usage:
1614 ///
1615 /// ```
1616 /// use bstr::ByteSlice;
1617 ///
1618 /// let s = b"this is old";
1619 ///
1620 /// let mut dest = vec![];
1621 /// s.replace_into("old", "new", &mut dest);
1622 /// assert_eq!(dest, "this is new".as_bytes());
1623 /// ```
1624 ///
1625 /// When the pattern doesn't match:
1626 ///
1627 /// ```
1628 /// use bstr::ByteSlice;
1629 ///
1630 /// let s = b"this is old";
1631 ///
1632 /// let mut dest = vec![];
1633 /// s.replace_into("nada nada", "limonada", &mut dest);
1634 /// assert_eq!(dest, "this is old".as_bytes());
1635 /// ```
1636 ///
1637 /// When the needle is an empty string:
1638 ///
1639 /// ```
1640 /// use bstr::ByteSlice;
1641 ///
1642 /// let s = b"foo";
1643 ///
1644 /// let mut dest = vec![];
1645 /// s.replace_into("", "Z", &mut dest);
1646 /// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1647 /// ```
1648 #[cfg(feature = "alloc")]
1649 #[inline]
1650 fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1651 &self,
1652 needle: N,
1653 replacement: R,
1654 dest: &mut Vec<u8>,
1655 ) {
1656 let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1657
1658 let mut last = 0;
1659 for start in self.find_iter(needle) {
1660 dest.push_str(&self.as_bytes()[last..start]);
1661 dest.push_str(replacement);
1662 last = start + needle.len();
1663 }
1664 dest.push_str(&self.as_bytes()[last..]);
1665 }
1666
1667 /// Replace up to `limit` matches of the given needle with the given
1668 /// replacement, and write the result into the provided `Vec<u8>`.
1669 ///
1670 /// This does **not** clear `dest` before writing to it.
1671 ///
1672 /// This routine is useful for reusing allocation. For a more convenient
1673 /// API, use [`replacen`](#method.replacen) instead.
1674 ///
1675 /// # Examples
1676 ///
1677 /// Basic usage:
1678 ///
1679 /// ```
1680 /// use bstr::ByteSlice;
1681 ///
1682 /// let s = b"foofoo";
1683 ///
1684 /// let mut dest = vec![];
1685 /// s.replacen_into("o", "z", 2, &mut dest);
1686 /// assert_eq!(dest, "fzzfoo".as_bytes());
1687 /// ```
1688 ///
1689 /// When the pattern doesn't match:
1690 ///
1691 /// ```
1692 /// use bstr::ByteSlice;
1693 ///
1694 /// let s = b"foofoo";
1695 ///
1696 /// let mut dest = vec![];
1697 /// s.replacen_into("a", "z", 2, &mut dest);
1698 /// assert_eq!(dest, "foofoo".as_bytes());
1699 /// ```
1700 ///
1701 /// When the needle is an empty string:
1702 ///
1703 /// ```
1704 /// use bstr::ByteSlice;
1705 ///
1706 /// let s = b"foo";
1707 ///
1708 /// let mut dest = vec![];
1709 /// s.replacen_into("", "Z", 2, &mut dest);
1710 /// assert_eq!(dest, "ZfZoo".as_bytes());
1711 /// ```
1712 #[cfg(feature = "alloc")]
1713 #[inline]
1714 fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1715 &self,
1716 needle: N,
1717 replacement: R,
1718 limit: usize,
1719 dest: &mut Vec<u8>,
1720 ) {
1721 let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1722
1723 let mut last = 0;
1724 for start in self.find_iter(needle).take(limit) {
1725 dest.push_str(&self.as_bytes()[last..start]);
1726 dest.push_str(replacement);
1727 last = start + needle.len();
1728 }
1729 dest.push_str(&self.as_bytes()[last..]);
1730 }
1731
1732 /// Returns an iterator over the bytes in this byte string.
1733 ///
1734 /// # Examples
1735 ///
1736 /// Basic usage:
1737 ///
1738 /// ```
1739 /// use bstr::ByteSlice;
1740 ///
1741 /// let bs = b"foobar";
1742 /// let bytes: Vec<u8> = bs.bytes().collect();
1743 /// assert_eq!(bytes, bs);
1744 /// ```
1745 #[inline]
1746 fn bytes(&self) -> Bytes<'_> {
1747 Bytes { it: self.as_bytes().iter() }
1748 }
1749
1750 /// Returns an iterator over the Unicode scalar values in this byte string.
1751 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1752 /// is yielded instead.
1753 ///
1754 /// # Examples
1755 ///
1756 /// Basic usage:
1757 ///
1758 /// ```
1759 /// use bstr::ByteSlice;
1760 ///
1761 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1762 /// let chars: Vec<char> = bs.chars().collect();
1763 /// assert_eq!(vec!['☃', '\u{FFFD}', '𝞃', '\u{FFFD}', 'a'], chars);
1764 /// ```
1765 ///
1766 /// Codepoints can also be iterated over in reverse:
1767 ///
1768 /// ```
1769 /// use bstr::ByteSlice;
1770 ///
1771 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1772 /// let chars: Vec<char> = bs.chars().rev().collect();
1773 /// assert_eq!(vec!['a', '\u{FFFD}', '𝞃', '\u{FFFD}', '☃'], chars);
1774 /// ```
1775 #[inline]
1776 fn chars(&self) -> Chars<'_> {
1777 Chars::new(self.as_bytes())
1778 }
1779
1780 /// Returns an iterator over the Unicode scalar values in this byte string
1781 /// along with their starting and ending byte index positions. If invalid
1782 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1783 /// instead.
1784 ///
1785 /// Note that this is slightly different from the `CharIndices` iterator
1786 /// provided by the standard library. Aside from working on possibly
1787 /// invalid UTF-8, this iterator provides both the corresponding starting
1788 /// and ending byte indices of each codepoint yielded. The ending position
1789 /// is necessary to slice the original byte string when invalid UTF-8 bytes
1790 /// are converted into a Unicode replacement codepoint, since a single
1791 /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1792 /// (inclusive).
1793 ///
1794 /// # Examples
1795 ///
1796 /// Basic usage:
1797 ///
1798 /// ```
1799 /// use bstr::ByteSlice;
1800 ///
1801 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1802 /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1803 /// assert_eq!(chars, vec![
1804 /// (0, 3, '☃'),
1805 /// (3, 4, '\u{FFFD}'),
1806 /// (4, 8, '𝞃'),
1807 /// (8, 10, '\u{FFFD}'),
1808 /// (10, 11, 'a'),
1809 /// ]);
1810 /// ```
1811 ///
1812 /// Codepoints can also be iterated over in reverse:
1813 ///
1814 /// ```
1815 /// use bstr::ByteSlice;
1816 ///
1817 /// let bs = b"\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61";
1818 /// let chars: Vec<(usize, usize, char)> = bs
1819 /// .char_indices()
1820 /// .rev()
1821 /// .collect();
1822 /// assert_eq!(chars, vec![
1823 /// (10, 11, 'a'),
1824 /// (8, 10, '\u{FFFD}'),
1825 /// (4, 8, '𝞃'),
1826 /// (3, 4, '\u{FFFD}'),
1827 /// (0, 3, '☃'),
1828 /// ]);
1829 /// ```
1830 #[inline]
1831 fn char_indices(&self) -> CharIndices<'_> {
1832 CharIndices::new(self.as_bytes())
1833 }
1834
1835 /// Iterate over chunks of valid UTF-8.
1836 ///
1837 /// The iterator returned yields chunks of valid UTF-8 separated by invalid
1838 /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1839 /// which are determined via the "substitution of maximal subparts"
1840 /// strategy described in the docs for the
1841 /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1842 /// method.
1843 ///
1844 /// # Examples
1845 ///
1846 /// This example shows how to gather all valid and invalid chunks from a
1847 /// byte slice:
1848 ///
1849 /// ```
1850 /// use bstr::{ByteSlice, Utf8Chunk};
1851 ///
1852 /// let bytes = b"foo\xFD\xFEbar\xFF";
1853 ///
1854 /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1855 /// for chunk in bytes.utf8_chunks() {
1856 /// if !chunk.valid().is_empty() {
1857 /// valid_chunks.push(chunk.valid());
1858 /// }
1859 /// if !chunk.invalid().is_empty() {
1860 /// invalid_chunks.push(chunk.invalid());
1861 /// }
1862 /// }
1863 ///
1864 /// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1865 /// assert_eq!(invalid_chunks, vec![b"\xFD", b"\xFE", b"\xFF"]);
1866 /// ```
1867 #[inline]
1868 fn utf8_chunks(&self) -> Utf8Chunks<'_> {
1869 Utf8Chunks { bytes: self.as_bytes() }
1870 }
1871
1872 /// Returns an iterator over the grapheme clusters in this byte string.
1873 /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1874 /// is yielded instead.
1875 ///
1876 /// # Examples
1877 ///
1878 /// This example shows how multiple codepoints can combine to form a
1879 /// single grapheme cluster:
1880 ///
1881 /// ```
1882 /// use bstr::ByteSlice;
1883 ///
1884 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1885 /// let graphemes: Vec<&str> = bs.graphemes().collect();
1886 /// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
1887 /// ```
1888 ///
1889 /// This shows that graphemes can be iterated over in reverse:
1890 ///
1891 /// ```
1892 /// use bstr::ByteSlice;
1893 ///
1894 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1895 /// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1896 /// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
1897 /// ```
1898 #[cfg(feature = "unicode")]
1899 #[inline]
1900 fn graphemes(&self) -> Graphemes<'_> {
1901 Graphemes::new(self.as_bytes())
1902 }
1903
1904 /// Returns an iterator over the grapheme clusters in this byte string
1905 /// along with their starting and ending byte index positions. If invalid
1906 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1907 /// instead.
1908 ///
1909 /// # Examples
1910 ///
1911 /// This example shows how to get the byte offsets of each individual
1912 /// grapheme cluster:
1913 ///
1914 /// ```
1915 /// use bstr::ByteSlice;
1916 ///
1917 /// let bs = "a\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}".as_bytes();
1918 /// let graphemes: Vec<(usize, usize, &str)> =
1919 /// bs.grapheme_indices().collect();
1920 /// assert_eq!(vec![(0, 5, "à̖"), (5, 13, "🇺🇸")], graphemes);
1921 /// ```
1922 ///
1923 /// This example shows what happens when invalid UTF-8 is encountered. Note
1924 /// that the offsets are valid indices into the original string, and do
1925 /// not necessarily correspond to the length of the `&str` returned!
1926 ///
1927 /// ```
1928 /// # #[cfg(all(feature = "alloc"))] {
1929 /// use bstr::{ByteSlice, ByteVec};
1930 ///
1931 /// let mut bytes = vec![];
1932 /// bytes.push_str("a\u{0300}\u{0316}");
1933 /// bytes.push(b'\xFF');
1934 /// bytes.push_str("\u{1F1FA}\u{1F1F8}");
1935 ///
1936 /// let graphemes: Vec<(usize, usize, &str)> =
1937 /// bytes.grapheme_indices().collect();
1938 /// assert_eq!(
1939 /// graphemes,
1940 /// vec![(0, 5, "à̖"), (5, 6, "\u{FFFD}"), (6, 14, "🇺🇸")]
1941 /// );
1942 /// # }
1943 /// ```
1944 #[cfg(feature = "unicode")]
1945 #[inline]
1946 fn grapheme_indices(&self) -> GraphemeIndices<'_> {
1947 GraphemeIndices::new(self.as_bytes())
1948 }
1949
1950 /// Returns an iterator over the words in this byte string. If invalid
1951 /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1952 /// instead.
1953 ///
1954 /// This is similar to
1955 /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1956 /// except it only returns elements that contain a "word" character. A word
1957 /// character is defined by UTS #18 (Annex C) to be the combination of the
1958 /// `Alphabetic` and `Join_Control` properties, along with the
1959 /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1960 /// categories.
1961 ///
1962 /// Since words are made up of one or more codepoints, this iterator
1963 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1964 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1965 ///
1966 /// # Examples
1967 ///
1968 /// Basic usage:
1969 ///
1970 /// ```
1971 /// use bstr::ByteSlice;
1972 ///
1973 /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1974 /// let words: Vec<&str> = bs.words().collect();
1975 /// assert_eq!(words, vec![
1976 /// "The", "quick", "brown", "fox", "can't",
1977 /// "jump", "32.3", "feet", "right",
1978 /// ]);
1979 /// ```
1980 #[cfg(feature = "unicode")]
1981 #[inline]
1982 fn words(&self) -> Words<'_> {
1983 Words::new(self.as_bytes())
1984 }
1985
1986 /// Returns an iterator over the words in this byte string along with
1987 /// their starting and ending byte index positions.
1988 ///
1989 /// This is similar to
1990 /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1991 /// except it only returns elements that contain a "word" character. A word
1992 /// character is defined by UTS #18 (Annex C) to be the combination of the
1993 /// `Alphabetic` and `Join_Control` properties, along with the
1994 /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1995 /// categories.
1996 ///
1997 /// Since words are made up of one or more codepoints, this iterator
1998 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1999 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2000 ///
2001 /// # Examples
2002 ///
2003 /// This example shows how to get the byte offsets of each individual
2004 /// word:
2005 ///
2006 /// ```
2007 /// use bstr::ByteSlice;
2008 ///
2009 /// let bs = b"can't jump 32.3 feet";
2010 /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
2011 /// assert_eq!(words, vec![
2012 /// (0, 5, "can't"),
2013 /// (6, 10, "jump"),
2014 /// (11, 15, "32.3"),
2015 /// (16, 20, "feet"),
2016 /// ]);
2017 /// ```
2018 #[cfg(feature = "unicode")]
2019 #[inline]
2020 fn word_indices(&self) -> WordIndices<'_> {
2021 WordIndices::new(self.as_bytes())
2022 }
2023
2024 /// Returns an iterator over the words in this byte string, along with
2025 /// all breaks between the words. Concatenating all elements yielded by
2026 /// the iterator results in the original string (modulo Unicode replacement
2027 /// codepoint substitutions if invalid UTF-8 is encountered).
2028 ///
2029 /// Since words are made up of one or more codepoints, this iterator
2030 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2031 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2032 ///
2033 /// # Examples
2034 ///
2035 /// Basic usage:
2036 ///
2037 /// ```
2038 /// use bstr::ByteSlice;
2039 ///
2040 /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
2041 /// let words: Vec<&str> = bs.words_with_breaks().collect();
2042 /// assert_eq!(words, vec![
2043 /// "The", " ", "quick", " ", "(", "\"", "brown", "\"", ")",
2044 /// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
2045 /// ",", " ", "right", "?",
2046 /// ]);
2047 /// ```
2048 #[cfg(feature = "unicode")]
2049 #[inline]
2050 fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
2051 WordsWithBreaks::new(self.as_bytes())
2052 }
2053
2054 /// Returns an iterator over the words and their byte offsets in this
2055 /// byte string, along with all breaks between the words. Concatenating
2056 /// all elements yielded by the iterator results in the original string
2057 /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2058 /// encountered).
2059 ///
2060 /// Since words are made up of one or more codepoints, this iterator
2061 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2062 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2063 ///
2064 /// # Examples
2065 ///
2066 /// This example shows how to get the byte offsets of each individual
2067 /// word:
2068 ///
2069 /// ```
2070 /// use bstr::ByteSlice;
2071 ///
2072 /// let bs = b"can't jump 32.3 feet";
2073 /// let words: Vec<(usize, usize, &str)> =
2074 /// bs.words_with_break_indices().collect();
2075 /// assert_eq!(words, vec![
2076 /// (0, 5, "can't"),
2077 /// (5, 6, " "),
2078 /// (6, 10, "jump"),
2079 /// (10, 11, " "),
2080 /// (11, 15, "32.3"),
2081 /// (15, 16, " "),
2082 /// (16, 20, "feet"),
2083 /// ]);
2084 /// ```
2085 #[cfg(feature = "unicode")]
2086 #[inline]
2087 fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
2088 WordsWithBreakIndices::new(self.as_bytes())
2089 }
2090
2091 /// Returns an iterator over the sentences in this byte string.
2092 ///
2093 /// Typically, a sentence will include its trailing punctuation and
2094 /// whitespace. Concatenating all elements yielded by the iterator
2095 /// results in the original string (modulo Unicode replacement codepoint
2096 /// substitutions if invalid UTF-8 is encountered).
2097 ///
2098 /// Since sentences are made up of one or more codepoints, this iterator
2099 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2100 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2101 ///
2102 /// # Examples
2103 ///
2104 /// Basic usage:
2105 ///
2106 /// ```
2107 /// use bstr::ByteSlice;
2108 ///
2109 /// let bs = b"I want this. Not that. Right now.";
2110 /// let sentences: Vec<&str> = bs.sentences().collect();
2111 /// assert_eq!(sentences, vec![
2112 /// "I want this. ",
2113 /// "Not that. ",
2114 /// "Right now.",
2115 /// ]);
2116 /// ```
2117 #[cfg(feature = "unicode")]
2118 #[inline]
2119 fn sentences(&self) -> Sentences<'_> {
2120 Sentences::new(self.as_bytes())
2121 }
2122
2123 /// Returns an iterator over the sentences in this byte string along with
2124 /// their starting and ending byte index positions.
2125 ///
2126 /// Typically, a sentence will include its trailing punctuation and
2127 /// whitespace. Concatenating all elements yielded by the iterator
2128 /// results in the original string (modulo Unicode replacement codepoint
2129 /// substitutions if invalid UTF-8 is encountered).
2130 ///
2131 /// Since sentences are made up of one or more codepoints, this iterator
2132 /// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2133 /// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2134 ///
2135 /// # Examples
2136 ///
2137 /// Basic usage:
2138 ///
2139 /// ```
2140 /// use bstr::ByteSlice;
2141 ///
2142 /// let bs = b"I want this. Not that. Right now.";
2143 /// let sentences: Vec<(usize, usize, &str)> =
2144 /// bs.sentence_indices().collect();
2145 /// assert_eq!(sentences, vec![
2146 /// (0, 13, "I want this. "),
2147 /// (13, 23, "Not that. "),
2148 /// (23, 33, "Right now."),
2149 /// ]);
2150 /// ```
2151 #[cfg(feature = "unicode")]
2152 #[inline]
2153 fn sentence_indices(&self) -> SentenceIndices<'_> {
2154 SentenceIndices::new(self.as_bytes())
2155 }
2156
2157 /// An iterator over all lines in a byte string, without their
2158 /// terminators.
2159 ///
2160 /// For this iterator, the only line terminators recognized are `\r\n` and
2161 /// `\n`.
2162 ///
2163 /// # Examples
2164 ///
2165 /// Basic usage:
2166 ///
2167 /// ```
2168 /// use bstr::{B, ByteSlice};
2169 ///
2170 /// let s = b"\
2171 /// foo
2172 ///
2173 /// bar\r
2174 /// baz
2175 ///
2176 ///
2177 /// quux";
2178 /// let lines: Vec<&[u8]> = s.lines().collect();
2179 /// assert_eq!(lines, vec![
2180 /// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2181 /// ]);
2182 /// ```
2183 #[inline]
2184 fn lines(&self) -> Lines<'_> {
2185 Lines::new(self.as_bytes())
2186 }
2187
2188 /// An iterator over all lines in a byte string, including their
2189 /// terminators.
2190 ///
2191 /// For this iterator, the only line terminator recognized is `\n`. (Since
2192 /// line terminators are included, this also handles `\r\n` line endings.)
2193 ///
2194 /// Line terminators are only included if they are present in the original
2195 /// byte string. For example, the last line in a byte string may not end
2196 /// with a line terminator.
2197 ///
2198 /// Concatenating all elements yielded by this iterator is guaranteed to
2199 /// yield the original byte string.
2200 ///
2201 /// # Examples
2202 ///
2203 /// Basic usage:
2204 ///
2205 /// ```
2206 /// use bstr::{B, ByteSlice};
2207 ///
2208 /// let s = b"\
2209 /// foo
2210 ///
2211 /// bar\r
2212 /// baz
2213 ///
2214 ///
2215 /// quux";
2216 /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2217 /// assert_eq!(lines, vec![
2218 /// B("foo\n"),
2219 /// B("\n"),
2220 /// B("bar\r\n"),
2221 /// B("baz\n"),
2222 /// B("\n"),
2223 /// B("\n"),
2224 /// B("quux"),
2225 /// ]);
2226 /// ```
2227 #[inline]
2228 fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
2229 LinesWithTerminator::new(self.as_bytes())
2230 }
2231
2232 /// Return a byte string slice with leading and trailing whitespace
2233 /// removed.
2234 ///
2235 /// Whitespace is defined according to the terms of the `White_Space`
2236 /// Unicode property.
2237 ///
2238 /// # Examples
2239 ///
2240 /// Basic usage:
2241 ///
2242 /// ```
2243 /// use bstr::{B, ByteSlice};
2244 ///
2245 /// let s = B(" foo\tbar\t\u{2003}\n");
2246 /// assert_eq!(s.trim(), B("foo\tbar"));
2247 /// ```
2248 #[cfg(feature = "unicode")]
2249 #[inline]
2250 fn trim(&self) -> &[u8] {
2251 self.trim_start().trim_end()
2252 }
2253
2254 /// Return a byte string slice with leading whitespace removed.
2255 ///
2256 /// Whitespace is defined according to the terms of the `White_Space`
2257 /// Unicode property.
2258 ///
2259 /// # Examples
2260 ///
2261 /// Basic usage:
2262 ///
2263 /// ```
2264 /// use bstr::{B, ByteSlice};
2265 ///
2266 /// let s = B(" foo\tbar\t\u{2003}\n");
2267 /// assert_eq!(s.trim_start(), B("foo\tbar\t\u{2003}\n"));
2268 /// ```
2269 #[cfg(feature = "unicode")]
2270 #[inline]
2271 fn trim_start(&self) -> &[u8] {
2272 let start = whitespace_len_fwd(self.as_bytes());
2273 &self.as_bytes()[start..]
2274 }
2275
2276 /// Return a byte string slice with trailing whitespace removed.
2277 ///
2278 /// Whitespace is defined according to the terms of the `White_Space`
2279 /// Unicode property.
2280 ///
2281 /// # Examples
2282 ///
2283 /// Basic usage:
2284 ///
2285 /// ```
2286 /// use bstr::{B, ByteSlice};
2287 ///
2288 /// let s = B(" foo\tbar\t\u{2003}\n");
2289 /// assert_eq!(s.trim_end(), B(" foo\tbar"));
2290 /// ```
2291 #[cfg(feature = "unicode")]
2292 #[inline]
2293 fn trim_end(&self) -> &[u8] {
2294 let end = whitespace_len_rev(self.as_bytes());
2295 &self.as_bytes()[..end]
2296 }
2297
2298 /// Return a byte string slice with leading and trailing characters
2299 /// satisfying the given predicate removed.
2300 ///
2301 /// # Examples
2302 ///
2303 /// Basic usage:
2304 ///
2305 /// ```
2306 /// use bstr::{B, ByteSlice};
2307 ///
2308 /// let s = b"123foo5bar789";
2309 /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar"));
2310 /// ```
2311 #[inline]
2312 fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2313 self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2314 }
2315
2316 /// Return a byte string slice with leading characters satisfying the given
2317 /// predicate removed.
2318 ///
2319 /// # Examples
2320 ///
2321 /// Basic usage:
2322 ///
2323 /// ```
2324 /// use bstr::{B, ByteSlice};
2325 ///
2326 /// let s = b"123foo5bar789";
2327 /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789"));
2328 /// ```
2329 #[inline]
2330 fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2331 for (s, _, ch) in self.char_indices() {
2332 if !trim(ch) {
2333 return &self.as_bytes()[s..];
2334 }
2335 }
2336 b""
2337 }
2338
2339 /// Return a byte string slice with trailing characters satisfying the
2340 /// given predicate removed.
2341 ///
2342 /// # Examples
2343 ///
2344 /// Basic usage:
2345 ///
2346 /// ```
2347 /// use bstr::{B, ByteSlice};
2348 ///
2349 /// let s = b"123foo5bar789";
2350 /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar"));
2351 /// ```
2352 #[inline]
2353 fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2354 for (_, e, ch) in self.char_indices().rev() {
2355 if !trim(ch) {
2356 return &self.as_bytes()[..e];
2357 }
2358 }
2359 b""
2360 }
2361
2362 /// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2363 /// byte string.
2364 ///
2365 /// In this case, lowercase is defined according to the `Lowercase` Unicode
2366 /// property.
2367 ///
2368 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2369 /// then it is written to the given buffer unchanged.
2370 ///
2371 /// Note that some characters in this byte string may expand into multiple
2372 /// characters when changing the case, so the number of bytes written to
2373 /// the given byte string may not be equivalent to the number of bytes in
2374 /// this byte string.
2375 ///
2376 /// If you'd like to reuse an allocation for performance reasons, then use
2377 /// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2378 ///
2379 /// # Examples
2380 ///
2381 /// Basic usage:
2382 ///
2383 /// ```
2384 /// use bstr::{B, ByteSlice};
2385 ///
2386 /// let s = B("HELLO Β");
2387 /// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2388 /// ```
2389 ///
2390 /// Scripts without case are not changed:
2391 ///
2392 /// ```
2393 /// use bstr::{B, ByteSlice};
2394 ///
2395 /// let s = B("农历新年");
2396 /// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2397 /// ```
2398 ///
2399 /// Invalid UTF-8 remains as is:
2400 ///
2401 /// ```
2402 /// use bstr::{B, ByteSlice};
2403 ///
2404 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2405 /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), s.to_lowercase().as_bytes());
2406 /// ```
2407 #[cfg(all(feature = "alloc", feature = "unicode"))]
2408 #[inline]
2409 fn to_lowercase(&self) -> Vec<u8> {
2410 let mut buf = vec![];
2411 self.to_lowercase_into(&mut buf);
2412 buf
2413 }
2414
2415 /// Writes the lowercase equivalent of this byte string into the given
2416 /// buffer. The buffer is not cleared before written to.
2417 ///
2418 /// In this case, lowercase is defined according to the `Lowercase`
2419 /// Unicode property.
2420 ///
2421 /// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2422 /// then it is written to the given buffer unchanged.
2423 ///
2424 /// Note that some characters in this byte string may expand into multiple
2425 /// characters when changing the case, so the number of bytes written to
2426 /// the given byte string may not be equivalent to the number of bytes in
2427 /// this byte string.
2428 ///
2429 /// If you don't need to amortize allocation and instead prefer
2430 /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2431 ///
2432 /// # Examples
2433 ///
2434 /// Basic usage:
2435 ///
2436 /// ```
2437 /// use bstr::{B, ByteSlice};
2438 ///
2439 /// let s = B("HELLO Β");
2440 ///
2441 /// let mut buf = vec![];
2442 /// s.to_lowercase_into(&mut buf);
2443 /// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2444 /// ```
2445 ///
2446 /// Scripts without case are not changed:
2447 ///
2448 /// ```
2449 /// use bstr::{B, ByteSlice};
2450 ///
2451 /// let s = B("农历新年");
2452 ///
2453 /// let mut buf = vec![];
2454 /// s.to_lowercase_into(&mut buf);
2455 /// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2456 /// ```
2457 ///
2458 /// Invalid UTF-8 remains as is:
2459 ///
2460 /// ```
2461 /// use bstr::{B, ByteSlice};
2462 ///
2463 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2464 ///
2465 /// let mut buf = vec![];
2466 /// s.to_lowercase_into(&mut buf);
2467 /// assert_eq!(B(b"foo\xFFbar\xE2\x98baz"), buf.as_bytes());
2468 /// ```
2469 #[cfg(all(feature = "alloc", feature = "unicode"))]
2470 #[inline]
2471 fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2472 // TODO: This is the best we can do given what std exposes I think.
2473 // If we roll our own case handling, then we might be able to do this
2474 // a bit faster. We shouldn't roll our own case handling unless we
2475 // need to, e.g., for doing caseless matching or case folding.
2476
2477 // TODO(BUG): This doesn't handle any special casing rules.
2478
2479 buf.reserve(self.as_bytes().len());
2480 for (s, e, ch) in self.char_indices() {
2481 if ch == '\u{FFFD}' {
2482 buf.push_str(&self.as_bytes()[s..e]);
2483 } else if ch.is_ascii() {
2484 buf.push_char(ch.to_ascii_lowercase());
2485 } else {
2486 for upper in ch.to_lowercase() {
2487 buf.push_char(upper);
2488 }
2489 }
2490 }
2491 }
2492
2493 /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2494 /// this byte string.
2495 ///
2496 /// In this case, lowercase is only defined in ASCII letters. Namely, the
2497 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2498 /// In particular, the length of the byte string returned is always
2499 /// equivalent to the length of this byte string.
2500 ///
2501 /// If you'd like to reuse an allocation for performance reasons, then use
2502 /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2503 /// the conversion in place.
2504 ///
2505 /// # Examples
2506 ///
2507 /// Basic usage:
2508 ///
2509 /// ```
2510 /// use bstr::{B, ByteSlice};
2511 ///
2512 /// let s = B("HELLO Β");
2513 /// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2514 /// ```
2515 ///
2516 /// Invalid UTF-8 remains as is:
2517 ///
2518 /// ```
2519 /// use bstr::{B, ByteSlice};
2520 ///
2521 /// let s = B(b"FOO\xFFBAR\xE2\x98BAZ");
2522 /// assert_eq!(s.to_ascii_lowercase(), B(b"foo\xFFbar\xE2\x98baz"));
2523 /// ```
2524 #[cfg(feature = "alloc")]
2525 #[inline]
2526 fn to_ascii_lowercase(&self) -> Vec<u8> {
2527 self.as_bytes().to_ascii_lowercase()
2528 }
2529
2530 /// Convert this byte string to its lowercase ASCII equivalent in place.
2531 ///
2532 /// In this case, lowercase is only defined in ASCII letters. Namely, the
2533 /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2534 ///
2535 /// If you don't need to do the conversion in
2536 /// place and instead prefer convenience, then use
2537 /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2538 ///
2539 /// # Examples
2540 ///
2541 /// Basic usage:
2542 ///
2543 /// ```
2544 /// use bstr::ByteSlice;
2545 ///
2546 /// let mut s = <Vec<u8>>::from("HELLO Β");
2547 /// s.make_ascii_lowercase();
2548 /// assert_eq!(s, "hello Β".as_bytes());
2549 /// ```
2550 ///
2551 /// Invalid UTF-8 remains as is:
2552 ///
2553 /// ```
2554 /// # #[cfg(feature = "alloc")] {
2555 /// use bstr::{B, ByteSlice, ByteVec};
2556 ///
2557 /// let mut s = <Vec<u8>>::from_slice(b"FOO\xFFBAR\xE2\x98BAZ");
2558 /// s.make_ascii_lowercase();
2559 /// assert_eq!(s, B(b"foo\xFFbar\xE2\x98baz"));
2560 /// # }
2561 /// ```
2562 #[inline]
2563 fn make_ascii_lowercase(&mut self) {
2564 self.as_bytes_mut().make_ascii_lowercase();
2565 }
2566
2567 /// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2568 /// byte string.
2569 ///
2570 /// In this case, uppercase is defined according to the `Uppercase`
2571 /// Unicode property.
2572 ///
2573 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2574 /// then it is written to the given buffer unchanged.
2575 ///
2576 /// Note that some characters in this byte string may expand into multiple
2577 /// characters when changing the case, so the number of bytes written to
2578 /// the given byte string may not be equivalent to the number of bytes in
2579 /// this byte string.
2580 ///
2581 /// If you'd like to reuse an allocation for performance reasons, then use
2582 /// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2583 ///
2584 /// # Examples
2585 ///
2586 /// Basic usage:
2587 ///
2588 /// ```
2589 /// use bstr::{B, ByteSlice};
2590 ///
2591 /// let s = B("hello β");
2592 /// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2593 /// ```
2594 ///
2595 /// Scripts without case are not changed:
2596 ///
2597 /// ```
2598 /// use bstr::{B, ByteSlice};
2599 ///
2600 /// let s = B("农历新年");
2601 /// assert_eq!(s.to_uppercase(), B("农历新年"));
2602 /// ```
2603 ///
2604 /// Invalid UTF-8 remains as is:
2605 ///
2606 /// ```
2607 /// use bstr::{B, ByteSlice};
2608 ///
2609 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2610 /// assert_eq!(s.to_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2611 /// ```
2612 #[cfg(all(feature = "alloc", feature = "unicode"))]
2613 #[inline]
2614 fn to_uppercase(&self) -> Vec<u8> {
2615 let mut buf = vec![];
2616 self.to_uppercase_into(&mut buf);
2617 buf
2618 }
2619
2620 /// Writes the uppercase equivalent of this byte string into the given
2621 /// buffer. The buffer is not cleared before written to.
2622 ///
2623 /// In this case, uppercase is defined according to the `Uppercase`
2624 /// Unicode property.
2625 ///
2626 /// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2627 /// then it is written to the given buffer unchanged.
2628 ///
2629 /// Note that some characters in this byte string may expand into multiple
2630 /// characters when changing the case, so the number of bytes written to
2631 /// the given byte string may not be equivalent to the number of bytes in
2632 /// this byte string.
2633 ///
2634 /// If you don't need to amortize allocation and instead prefer
2635 /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2636 ///
2637 /// # Examples
2638 ///
2639 /// Basic usage:
2640 ///
2641 /// ```
2642 /// use bstr::{B, ByteSlice};
2643 ///
2644 /// let s = B("hello β");
2645 ///
2646 /// let mut buf = vec![];
2647 /// s.to_uppercase_into(&mut buf);
2648 /// assert_eq!(buf, B("HELLO Β"));
2649 /// ```
2650 ///
2651 /// Scripts without case are not changed:
2652 ///
2653 /// ```
2654 /// use bstr::{B, ByteSlice};
2655 ///
2656 /// let s = B("农历新年");
2657 ///
2658 /// let mut buf = vec![];
2659 /// s.to_uppercase_into(&mut buf);
2660 /// assert_eq!(buf, B("农历新年"));
2661 /// ```
2662 ///
2663 /// Invalid UTF-8 remains as is:
2664 ///
2665 /// ```
2666 /// use bstr::{B, ByteSlice};
2667 ///
2668 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2669 ///
2670 /// let mut buf = vec![];
2671 /// s.to_uppercase_into(&mut buf);
2672 /// assert_eq!(buf, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2673 /// ```
2674 #[cfg(all(feature = "alloc", feature = "unicode"))]
2675 #[inline]
2676 fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2677 // TODO: This is the best we can do given what std exposes I think.
2678 // If we roll our own case handling, then we might be able to do this
2679 // a bit faster. We shouldn't roll our own case handling unless we
2680 // need to, e.g., for doing caseless matching or case folding.
2681 buf.reserve(self.as_bytes().len());
2682 for (s, e, ch) in self.char_indices() {
2683 if ch == '\u{FFFD}' {
2684 buf.push_str(&self.as_bytes()[s..e]);
2685 } else if ch.is_ascii() {
2686 buf.push_char(ch.to_ascii_uppercase());
2687 } else {
2688 for upper in ch.to_uppercase() {
2689 buf.push_char(upper);
2690 }
2691 }
2692 }
2693 }
2694
2695 /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2696 /// this byte string.
2697 ///
2698 /// In this case, uppercase is only defined in ASCII letters. Namely, the
2699 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2700 /// In particular, the length of the byte string returned is always
2701 /// equivalent to the length of this byte string.
2702 ///
2703 /// If you'd like to reuse an allocation for performance reasons, then use
2704 /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2705 /// the conversion in place.
2706 ///
2707 /// # Examples
2708 ///
2709 /// Basic usage:
2710 ///
2711 /// ```
2712 /// use bstr::{B, ByteSlice};
2713 ///
2714 /// let s = B("hello β");
2715 /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2716 /// ```
2717 ///
2718 /// Invalid UTF-8 remains as is:
2719 ///
2720 /// ```
2721 /// use bstr::{B, ByteSlice};
2722 ///
2723 /// let s = B(b"foo\xFFbar\xE2\x98baz");
2724 /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO\xFFBAR\xE2\x98BAZ"));
2725 /// ```
2726 #[cfg(feature = "alloc")]
2727 #[inline]
2728 fn to_ascii_uppercase(&self) -> Vec<u8> {
2729 self.as_bytes().to_ascii_uppercase()
2730 }
2731
2732 /// Convert this byte string to its uppercase ASCII equivalent in place.
2733 ///
2734 /// In this case, uppercase is only defined in ASCII letters. Namely, the
2735 /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2736 ///
2737 /// If you don't need to do the conversion in
2738 /// place and instead prefer convenience, then use
2739 /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2740 ///
2741 /// # Examples
2742 ///
2743 /// Basic usage:
2744 ///
2745 /// ```
2746 /// use bstr::{B, ByteSlice};
2747 ///
2748 /// let mut s = <Vec<u8>>::from("hello β");
2749 /// s.make_ascii_uppercase();
2750 /// assert_eq!(s, B("HELLO β"));
2751 /// ```
2752 ///
2753 /// Invalid UTF-8 remains as is:
2754 ///
2755 /// ```
2756 /// # #[cfg(feature = "alloc")] {
2757 /// use bstr::{B, ByteSlice, ByteVec};
2758 ///
2759 /// let mut s = <Vec<u8>>::from_slice(b"foo\xFFbar\xE2\x98baz");
2760 /// s.make_ascii_uppercase();
2761 /// assert_eq!(s, B(b"FOO\xFFBAR\xE2\x98BAZ"));
2762 /// # }
2763 /// ```
2764 #[inline]
2765 fn make_ascii_uppercase(&mut self) {
2766 self.as_bytes_mut().make_ascii_uppercase();
2767 }
2768
2769 /// Escapes this byte string into a sequence of `char` values.
2770 ///
2771 /// When the sequence of `char` values is concatenated into a string, the
2772 /// result is always valid UTF-8. Any unprintable or invalid UTF-8 in this
2773 /// byte string are escaped using using `\xNN` notation. Moreover, the
2774 /// characters `\0`, `\r`, `\n`, `\t` and `\` are escaped as well.
2775 ///
2776 /// This is useful when one wants to get a human readable view of the raw
2777 /// bytes that is also valid UTF-8.
2778 ///
2779 /// The iterator returned implements the `Display` trait. So one can do
2780 /// `b"foo\xFFbar".escape_bytes().to_string()` to get a `String` with its
2781 /// bytes escaped.
2782 ///
2783 /// The dual of this function is [`ByteVec::unescape_bytes`].
2784 ///
2785 /// Note that this is similar to, but not equivalent to the `Debug`
2786 /// implementation on [`BStr`] and [`BString`]. The `Debug` implementations
2787 /// also use the debug representation for all Unicode codepoints. However,
2788 /// this escaping routine only escapes individual bytes. All Unicode
2789 /// codepoints above `U+007F` are passed through unchanged without any
2790 /// escaping.
2791 ///
2792 /// # Examples
2793 ///
2794 /// ```
2795 /// # #[cfg(feature = "alloc")] {
2796 /// use bstr::{B, ByteSlice};
2797 ///
2798 /// assert_eq!(r"foo\xFFbar", b"foo\xFFbar".escape_bytes().to_string());
2799 /// assert_eq!(r"foo\nbar", b"foo\nbar".escape_bytes().to_string());
2800 /// assert_eq!(r"foo\tbar", b"foo\tbar".escape_bytes().to_string());
2801 /// assert_eq!(r"foo\\bar", b"foo\\bar".escape_bytes().to_string());
2802 /// assert_eq!(r"foo☃bar", B("foo☃bar").escape_bytes().to_string());
2803 /// # }
2804 /// ```
2805 #[inline]
2806 fn escape_bytes(&self) -> EscapeBytes<'_> {
2807 EscapeBytes::new(self.as_bytes())
2808 }
2809
2810 /// Reverse the bytes in this string, in place.
2811 ///
2812 /// This is not necessarily a well formed operation! For example, if this
2813 /// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2814 /// string will likely result in invalid UTF-8 and otherwise non-sensical
2815 /// content.
2816 ///
2817 /// Note that this is equivalent to the generic `[u8]::reverse` method.
2818 /// This method is provided to permit callers to explicitly differentiate
2819 /// between reversing bytes, codepoints and graphemes.
2820 ///
2821 /// # Examples
2822 ///
2823 /// Basic usage:
2824 ///
2825 /// ```
2826 /// use bstr::ByteSlice;
2827 ///
2828 /// let mut s = <Vec<u8>>::from("hello");
2829 /// s.reverse_bytes();
2830 /// assert_eq!(s, "olleh".as_bytes());
2831 /// ```
2832 #[inline]
2833 fn reverse_bytes(&mut self) {
2834 self.as_bytes_mut().reverse();
2835 }
2836
2837 /// Reverse the codepoints in this string, in place.
2838 ///
2839 /// If this byte string is valid UTF-8, then its reversal by codepoint
2840 /// is also guaranteed to be valid UTF-8.
2841 ///
2842 /// This operation is equivalent to the following, but without allocating:
2843 ///
2844 /// ```
2845 /// use bstr::ByteSlice;
2846 ///
2847 /// let mut s = <Vec<u8>>::from("foo☃bar");
2848 ///
2849 /// let mut chars: Vec<char> = s.chars().collect();
2850 /// chars.reverse();
2851 ///
2852 /// let reversed: String = chars.into_iter().collect();
2853 /// assert_eq!(reversed, "rab☃oof");
2854 /// ```
2855 ///
2856 /// Note that this is not necessarily a well formed operation. For example,
2857 /// if this byte string contains grapheme clusters with more than one
2858 /// codepoint, then those grapheme clusters will not necessarily be
2859 /// preserved. If you'd like to preserve grapheme clusters, then use
2860 /// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2861 ///
2862 /// # Examples
2863 ///
2864 /// Basic usage:
2865 ///
2866 /// ```
2867 /// use bstr::ByteSlice;
2868 ///
2869 /// let mut s = <Vec<u8>>::from("foo☃bar");
2870 /// s.reverse_chars();
2871 /// assert_eq!(s, "rab☃oof".as_bytes());
2872 /// ```
2873 ///
2874 /// This example shows that not all reversals lead to a well formed string.
2875 /// For example, in this case, combining marks are used to put accents over
2876 /// some letters, and those accent marks must appear after the codepoints
2877 /// they modify.
2878 ///
2879 /// ```
2880 /// use bstr::{B, ByteSlice};
2881 ///
2882 /// let mut s = <Vec<u8>>::from("résumé");
2883 /// s.reverse_chars();
2884 /// assert_eq!(s, B(b"\xCC\x81emus\xCC\x81er"));
2885 /// ```
2886 ///
2887 /// A word of warning: the above example relies on the fact that
2888 /// `résumé` is in decomposed normal form, which means there are separate
2889 /// codepoints for the accents above `e`. If it is instead in composed
2890 /// normal form, then the example works:
2891 ///
2892 /// ```
2893 /// use bstr::{B, ByteSlice};
2894 ///
2895 /// let mut s = <Vec<u8>>::from("résumé");
2896 /// s.reverse_chars();
2897 /// assert_eq!(s, B("émusér"));
2898 /// ```
2899 ///
2900 /// The point here is to be cautious and not assume that just because
2901 /// `reverse_chars` works in one case, that it therefore works in all
2902 /// cases.
2903 #[inline]
2904 fn reverse_chars(&mut self) {
2905 let mut i = 0;
2906 loop {
2907 let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2908 if size == 0 {
2909 break;
2910 }
2911 if size > 1 {
2912 self.as_bytes_mut()[i..i + size].reverse_bytes();
2913 }
2914 i += size;
2915 }
2916 self.reverse_bytes();
2917 }
2918
2919 /// Reverse the graphemes in this string, in place.
2920 ///
2921 /// If this byte string is valid UTF-8, then its reversal by grapheme
2922 /// is also guaranteed to be valid UTF-8.
2923 ///
2924 /// This operation is equivalent to the following, but without allocating:
2925 ///
2926 /// ```
2927 /// use bstr::ByteSlice;
2928 ///
2929 /// let mut s = <Vec<u8>>::from("foo☃bar");
2930 ///
2931 /// let mut graphemes: Vec<&str> = s.graphemes().collect();
2932 /// graphemes.reverse();
2933 ///
2934 /// let reversed = graphemes.concat();
2935 /// assert_eq!(reversed, "rab☃oof");
2936 /// ```
2937 ///
2938 /// # Examples
2939 ///
2940 /// Basic usage:
2941 ///
2942 /// ```
2943 /// use bstr::ByteSlice;
2944 ///
2945 /// let mut s = <Vec<u8>>::from("foo☃bar");
2946 /// s.reverse_graphemes();
2947 /// assert_eq!(s, "rab☃oof".as_bytes());
2948 /// ```
2949 ///
2950 /// This example shows how this correctly handles grapheme clusters,
2951 /// unlike `reverse_chars`.
2952 ///
2953 /// ```
2954 /// use bstr::ByteSlice;
2955 ///
2956 /// let mut s = <Vec<u8>>::from("résumé");
2957 /// s.reverse_graphemes();
2958 /// assert_eq!(s, "émusér".as_bytes());
2959 /// ```
2960 #[cfg(feature = "unicode")]
2961 #[inline]
2962 fn reverse_graphemes(&mut self) {
2963 use crate::unicode::decode_grapheme;
2964
2965 let mut i = 0;
2966 loop {
2967 let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2968 if size == 0 {
2969 break;
2970 }
2971 if size > 1 {
2972 self.as_bytes_mut()[i..i + size].reverse_bytes();
2973 }
2974 i += size;
2975 }
2976 self.reverse_bytes();
2977 }
2978
2979 /// Returns true if and only if every byte in this byte string is ASCII.
2980 ///
2981 /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2982 /// an ASCII codepoint if and only if it is in the inclusive range
2983 /// `[0, 127]`.
2984 ///
2985 /// # Examples
2986 ///
2987 /// Basic usage:
2988 ///
2989 /// ```
2990 /// use bstr::{B, ByteSlice};
2991 ///
2992 /// assert!(B("abc").is_ascii());
2993 /// assert!(!B("☃βツ").is_ascii());
2994 /// assert!(!B(b"\xFF").is_ascii());
2995 /// ```
2996 #[inline]
2997 fn is_ascii(&self) -> bool {
2998 ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2999 }
3000
3001 /// Returns true if and only if the entire byte string is valid UTF-8.
3002 ///
3003 /// If you need location information about where a byte string's first
3004 /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
3005 ///
3006 /// # Examples
3007 ///
3008 /// Basic usage:
3009 ///
3010 /// ```
3011 /// use bstr::{B, ByteSlice};
3012 ///
3013 /// assert!(B("abc").is_utf8());
3014 /// assert!(B("☃βツ").is_utf8());
3015 /// // invalid bytes
3016 /// assert!(!B(b"abc\xFF").is_utf8());
3017 /// // surrogate encoding
3018 /// assert!(!B(b"\xED\xA0\x80").is_utf8());
3019 /// // incomplete sequence
3020 /// assert!(!B(b"\xF0\x9D\x9Ca").is_utf8());
3021 /// // overlong sequence
3022 /// assert!(!B(b"\xF0\x82\x82\xAC").is_utf8());
3023 /// ```
3024 #[inline]
3025 fn is_utf8(&self) -> bool {
3026 utf8::validate(self.as_bytes()).is_ok()
3027 }
3028
3029 /// Returns the last byte in this byte string, if it's non-empty. If this
3030 /// byte string is empty, this returns `None`.
3031 ///
3032 /// Note that this is like the generic `[u8]::last`, except this returns
3033 /// the byte by value instead of a reference to the byte.
3034 ///
3035 /// # Examples
3036 ///
3037 /// Basic usage:
3038 ///
3039 /// ```
3040 /// use bstr::ByteSlice;
3041 ///
3042 /// assert_eq!(Some(b'z'), b"baz".last_byte());
3043 /// assert_eq!(None, b"".last_byte());
3044 /// ```
3045 #[inline]
3046 fn last_byte(&self) -> Option<u8> {
3047 let bytes = self.as_bytes();
3048 bytes.get(bytes.len().saturating_sub(1)).map(|&b| b)
3049 }
3050
3051 /// Returns the index of the first non-ASCII byte in this byte string (if
3052 /// any such indices exist). Specifically, it returns the index of the
3053 /// first byte with a value greater than or equal to `0x80`.
3054 ///
3055 /// # Examples
3056 ///
3057 /// Basic usage:
3058 ///
3059 /// ```
3060 /// use bstr::{ByteSlice, B};
3061 ///
3062 /// assert_eq!(Some(3), b"abc\xff".find_non_ascii_byte());
3063 /// assert_eq!(None, b"abcde".find_non_ascii_byte());
3064 /// assert_eq!(Some(0), B("😀").find_non_ascii_byte());
3065 /// ```
3066 #[inline]
3067 fn find_non_ascii_byte(&self) -> Option<usize> {
3068 let index = ascii::first_non_ascii_byte(self.as_bytes());
3069 if index == self.as_bytes().len() {
3070 None
3071 } else {
3072 Some(index)
3073 }
3074 }
3075}
3076
3077/// A single substring searcher fixed to a particular needle.
3078///
3079/// The purpose of this type is to permit callers to construct a substring
3080/// searcher that can be used to search haystacks without the overhead of
3081/// constructing the searcher in the first place. This is a somewhat niche
3082/// concern when it's necessary to re-use the same needle to search multiple
3083/// different haystacks with as little overhead as possible. In general, using
3084/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
3085/// or
3086/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
3087/// is good enough, but `Finder` is useful when you can meaningfully observe
3088/// searcher construction time in a profile.
3089///
3090/// When the `std` feature is enabled, then this type has an `into_owned`
3091/// version which permits building a `Finder` that is not connected to the
3092/// lifetime of its needle.
3093#[derive(Clone, Debug)]
3094pub struct Finder<'a>(memmem::Finder<'a>);
3095
3096impl<'a> Finder<'a> {
3097 /// Create a new finder for the given needle.
3098 #[inline]
3099 pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
3100 Finder(memmem::Finder::new(needle.as_ref()))
3101 }
3102
3103 /// Convert this finder into its owned variant, such that it no longer
3104 /// borrows the needle.
3105 ///
3106 /// If this is already an owned finder, then this is a no-op. Otherwise,
3107 /// this copies the needle.
3108 ///
3109 /// This is only available when the `std` feature is enabled.
3110 #[cfg(feature = "std")]
3111 #[inline]
3112 pub fn into_owned(self) -> Finder<'static> {
3113 Finder(self.0.into_owned())
3114 }
3115
3116 /// Returns the needle that this finder searches for.
3117 ///
3118 /// Note that the lifetime of the needle returned is tied to the lifetime
3119 /// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3120 /// finder's needle can be either borrowed or owned, so the lifetime of the
3121 /// needle returned must necessarily be the shorter of the two.
3122 #[inline]
3123 pub fn needle(&self) -> &[u8] {
3124 self.0.needle()
3125 }
3126
3127 /// Returns the index of the first occurrence of this needle in the given
3128 /// haystack.
3129 ///
3130 /// The haystack may be any type that can be cheaply converted into a
3131 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3132 ///
3133 /// # Complexity
3134 ///
3135 /// This routine is guaranteed to have worst case linear time complexity
3136 /// with respect to both the needle and the haystack. That is, this runs
3137 /// in `O(needle.len() + haystack.len())` time.
3138 ///
3139 /// This routine is also guaranteed to have worst case constant space
3140 /// complexity.
3141 ///
3142 /// # Examples
3143 ///
3144 /// Basic usage:
3145 ///
3146 /// ```
3147 /// use bstr::Finder;
3148 ///
3149 /// let haystack = "foo bar baz";
3150 /// assert_eq!(Some(0), Finder::new("foo").find(haystack));
3151 /// assert_eq!(Some(4), Finder::new("bar").find(haystack));
3152 /// assert_eq!(None, Finder::new("quux").find(haystack));
3153 /// ```
3154 #[inline]
3155 pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3156 self.0.find(haystack.as_ref())
3157 }
3158}
3159
3160/// A single substring reverse searcher fixed to a particular needle.
3161///
3162/// The purpose of this type is to permit callers to construct a substring
3163/// searcher that can be used to search haystacks without the overhead of
3164/// constructing the searcher in the first place. This is a somewhat niche
3165/// concern when it's necessary to re-use the same needle to search multiple
3166/// different haystacks with as little overhead as possible. In general, using
3167/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3168/// or
3169/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3170/// is good enough, but `FinderReverse` is useful when you can meaningfully
3171/// observe searcher construction time in a profile.
3172///
3173/// When the `std` feature is enabled, then this type has an `into_owned`
3174/// version which permits building a `FinderReverse` that is not connected to
3175/// the lifetime of its needle.
3176#[derive(Clone, Debug)]
3177pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
3178
3179impl<'a> FinderReverse<'a> {
3180 /// Create a new reverse finder for the given needle.
3181 #[inline]
3182 pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3183 FinderReverse(memmem::FinderRev::new(needle.as_ref()))
3184 }
3185
3186 /// Convert this finder into its owned variant, such that it no longer
3187 /// borrows the needle.
3188 ///
3189 /// If this is already an owned finder, then this is a no-op. Otherwise,
3190 /// this copies the needle.
3191 ///
3192 /// This is only available when the `std` feature is enabled.
3193 #[cfg(feature = "std")]
3194 #[inline]
3195 pub fn into_owned(self) -> FinderReverse<'static> {
3196 FinderReverse(self.0.into_owned())
3197 }
3198
3199 /// Returns the needle that this finder searches for.
3200 ///
3201 /// Note that the lifetime of the needle returned is tied to the lifetime
3202 /// of this finder, and may be shorter than the `'a` lifetime. Namely,
3203 /// a finder's needle can be either borrowed or owned, so the lifetime of
3204 /// the needle returned must necessarily be the shorter of the two.
3205 #[inline]
3206 pub fn needle(&self) -> &[u8] {
3207 self.0.needle()
3208 }
3209
3210 /// Returns the index of the last occurrence of this needle in the given
3211 /// haystack.
3212 ///
3213 /// The haystack may be any type that can be cheaply converted into a
3214 /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3215 ///
3216 /// # Complexity
3217 ///
3218 /// This routine is guaranteed to have worst case linear time complexity
3219 /// with respect to both the needle and the haystack. That is, this runs
3220 /// in `O(needle.len() + haystack.len())` time.
3221 ///
3222 /// This routine is also guaranteed to have worst case constant space
3223 /// complexity.
3224 ///
3225 /// # Examples
3226 ///
3227 /// Basic usage:
3228 ///
3229 /// ```
3230 /// use bstr::FinderReverse;
3231 ///
3232 /// let haystack = "foo bar baz";
3233 /// assert_eq!(Some(0), FinderReverse::new("foo").rfind(haystack));
3234 /// assert_eq!(Some(4), FinderReverse::new("bar").rfind(haystack));
3235 /// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3236 /// ```
3237 #[inline]
3238 pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3239 self.0.rfind(haystack.as_ref())
3240 }
3241}
3242
3243/// An iterator over non-overlapping substring matches.
3244///
3245/// Matches are reported by the byte offset at which they begin.
3246///
3247/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3248/// needle.
3249#[derive(Debug)]
3250pub struct Find<'h, 'n> {
3251 it: memmem::FindIter<'h, 'n>,
3252 haystack: &'h [u8],
3253 needle: &'n [u8],
3254}
3255
3256impl<'h, 'n> Find<'h, 'n> {
3257 fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
3258 Find { it: memmem::find_iter(haystack, needle), haystack, needle }
3259 }
3260}
3261
3262impl<'h, 'n> Iterator for Find<'h, 'n> {
3263 type Item = usize;
3264
3265 #[inline]
3266 fn next(&mut self) -> Option<usize> {
3267 self.it.next()
3268 }
3269}
3270
3271/// An iterator over non-overlapping substring matches in reverse.
3272///
3273/// Matches are reported by the byte offset at which they begin.
3274///
3275/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3276/// needle.
3277#[derive(Debug)]
3278pub struct FindReverse<'h, 'n> {
3279 it: memmem::FindRevIter<'h, 'n>,
3280 haystack: &'h [u8],
3281 needle: &'n [u8],
3282}
3283
3284impl<'h, 'n> FindReverse<'h, 'n> {
3285 fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
3286 FindReverse {
3287 it: memmem::rfind_iter(haystack, needle),
3288 haystack,
3289 needle,
3290 }
3291 }
3292
3293 fn haystack(&self) -> &'h [u8] {
3294 self.haystack
3295 }
3296
3297 fn needle(&self) -> &'n [u8] {
3298 self.needle
3299 }
3300}
3301
3302impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
3303 type Item = usize;
3304
3305 #[inline]
3306 fn next(&mut self) -> Option<usize> {
3307 self.it.next()
3308 }
3309}
3310
3311/// An iterator over the bytes in a byte string.
3312///
3313/// `'a` is the lifetime of the byte string being traversed.
3314#[derive(Clone, Debug)]
3315pub struct Bytes<'a> {
3316 it: slice::Iter<'a, u8>,
3317}
3318
3319impl<'a> Bytes<'a> {
3320 /// Views the remaining underlying data as a subslice of the original data.
3321 /// This has the same lifetime as the original slice,
3322 /// and so the iterator can continue to be used while this exists.
3323 #[inline]
3324 pub fn as_bytes(&self) -> &'a [u8] {
3325 self.it.as_slice()
3326 }
3327}
3328
3329impl<'a> Iterator for Bytes<'a> {
3330 type Item = u8;
3331
3332 #[inline]
3333 fn next(&mut self) -> Option<u8> {
3334 self.it.next().map(|&b: u8| b)
3335 }
3336
3337 #[inline]
3338 fn size_hint(&self) -> (usize, Option<usize>) {
3339 self.it.size_hint()
3340 }
3341}
3342
3343impl<'a> DoubleEndedIterator for Bytes<'a> {
3344 #[inline]
3345 fn next_back(&mut self) -> Option<u8> {
3346 self.it.next_back().map(|&b: u8| b)
3347 }
3348}
3349
3350impl<'a> ExactSizeIterator for Bytes<'a> {
3351 #[inline]
3352 fn len(&self) -> usize {
3353 self.it.len()
3354 }
3355}
3356
3357impl<'a> iter::FusedIterator for Bytes<'a> {}
3358
3359/// An iterator over the fields in a byte string, separated by whitespace.
3360///
3361/// Whitespace for this iterator is defined by the Unicode property
3362/// `White_Space`.
3363///
3364/// This iterator splits on contiguous runs of whitespace, such that the fields
3365/// in `foo\t\t\n \nbar` are `foo` and `bar`.
3366///
3367/// `'a` is the lifetime of the byte string being split.
3368#[cfg(feature = "unicode")]
3369#[derive(Debug)]
3370pub struct Fields<'a> {
3371 it: FieldsWith<'a, fn(char) -> bool>,
3372}
3373
3374#[cfg(feature = "unicode")]
3375impl<'a> Fields<'a> {
3376 fn new(bytes: &'a [u8]) -> Fields<'a> {
3377 Fields { it: bytes.fields_with(|ch: char| ch.is_whitespace()) }
3378 }
3379}
3380
3381#[cfg(feature = "unicode")]
3382impl<'a> Iterator for Fields<'a> {
3383 type Item = &'a [u8];
3384
3385 #[inline]
3386 fn next(&mut self) -> Option<&'a [u8]> {
3387 self.it.next()
3388 }
3389}
3390
3391/// An iterator over fields in the byte string, separated by a predicate over
3392/// codepoints.
3393///
3394/// This iterator splits a byte string based on its predicate function such
3395/// that the elements returned are separated by contiguous runs of codepoints
3396/// for which the predicate returns true.
3397///
3398/// `'a` is the lifetime of the byte string being split, while `F` is the type
3399/// of the predicate, i.e., `FnMut(char) -> bool`.
3400#[derive(Debug)]
3401pub struct FieldsWith<'a, F> {
3402 f: F,
3403 bytes: &'a [u8],
3404 chars: CharIndices<'a>,
3405}
3406
3407impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3408 fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3409 FieldsWith { f, bytes, chars: bytes.char_indices() }
3410 }
3411}
3412
3413impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3414 type Item = &'a [u8];
3415
3416 #[inline]
3417 fn next(&mut self) -> Option<&'a [u8]> {
3418 let (start, mut end);
3419 loop {
3420 match self.chars.next() {
3421 None => return None,
3422 Some((s, e, ch)) => {
3423 if !(self.f)(ch) {
3424 start = s;
3425 end = e;
3426 break;
3427 }
3428 }
3429 }
3430 }
3431 while let Some((_, e, ch)) = self.chars.next() {
3432 if (self.f)(ch) {
3433 break;
3434 }
3435 end = e;
3436 }
3437 Some(&self.bytes[start..end])
3438 }
3439}
3440
3441/// An iterator over substrings in a byte string, split by a separator.
3442///
3443/// `'h` is the lifetime of the byte string being split (the haystack), while
3444/// `'s` is the lifetime of the byte string doing the splitting.
3445#[derive(Debug)]
3446pub struct Split<'h, 's> {
3447 finder: Find<'h, 's>,
3448 /// The end position of the previous match of our splitter. The element
3449 /// we yield corresponds to the substring starting at `last` up to the
3450 /// beginning of the next match of the splitter.
3451 last: usize,
3452 /// Only set when iteration is complete. A corner case here is when a
3453 /// splitter is matched at the end of the haystack. At that point, we still
3454 /// need to yield an empty string following it.
3455 done: bool,
3456}
3457
3458impl<'h, 's> Split<'h, 's> {
3459 fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
3460 let finder: Find<'_, '_> = haystack.find_iter(needle:splitter);
3461 Split { finder, last: 0, done: false }
3462 }
3463}
3464
3465impl<'h, 's> Iterator for Split<'h, 's> {
3466 type Item = &'h [u8];
3467
3468 #[inline]
3469 fn next(&mut self) -> Option<&'h [u8]> {
3470 let haystack = self.finder.haystack;
3471 match self.finder.next() {
3472 Some(start) => {
3473 let next = &haystack[self.last..start];
3474 self.last = start + self.finder.needle.len();
3475 Some(next)
3476 }
3477 None => {
3478 if self.last >= haystack.len() {
3479 if !self.done {
3480 self.done = true;
3481 Some(b"")
3482 } else {
3483 None
3484 }
3485 } else {
3486 let s = &haystack[self.last..];
3487 self.last = haystack.len();
3488 self.done = true;
3489 Some(s)
3490 }
3491 }
3492 }
3493 }
3494}
3495
3496/// An iterator over substrings in a byte string, split by a separator, in
3497/// reverse.
3498///
3499/// `'h` is the lifetime of the byte string being split (the haystack), while
3500/// `'s` is the lifetime of the byte string doing the splitting.
3501#[derive(Debug)]
3502pub struct SplitReverse<'h, 's> {
3503 finder: FindReverse<'h, 's>,
3504 /// The end position of the previous match of our splitter. The element
3505 /// we yield corresponds to the substring starting at `last` up to the
3506 /// beginning of the next match of the splitter.
3507 last: usize,
3508 /// Only set when iteration is complete. A corner case here is when a
3509 /// splitter is matched at the end of the haystack. At that point, we still
3510 /// need to yield an empty string following it.
3511 done: bool,
3512}
3513
3514impl<'h, 's> SplitReverse<'h, 's> {
3515 fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
3516 let finder: FindReverse<'_, '_> = haystack.rfind_iter(needle:splitter);
3517 SplitReverse { finder, last: haystack.len(), done: false }
3518 }
3519}
3520
3521impl<'h, 's> Iterator for SplitReverse<'h, 's> {
3522 type Item = &'h [u8];
3523
3524 #[inline]
3525 fn next(&mut self) -> Option<&'h [u8]> {
3526 let haystack = self.finder.haystack();
3527 match self.finder.next() {
3528 Some(start) => {
3529 let nlen = self.finder.needle().len();
3530 let next = &haystack[start + nlen..self.last];
3531 self.last = start;
3532 Some(next)
3533 }
3534 None => {
3535 if self.last == 0 {
3536 if !self.done {
3537 self.done = true;
3538 Some(b"")
3539 } else {
3540 None
3541 }
3542 } else {
3543 let s = &haystack[..self.last];
3544 self.last = 0;
3545 self.done = true;
3546 Some(s)
3547 }
3548 }
3549 }
3550 }
3551}
3552
3553/// An iterator over at most `n` substrings in a byte string, split by a
3554/// separator.
3555///
3556/// `'h` is the lifetime of the byte string being split (the haystack), while
3557/// `'s` is the lifetime of the byte string doing the splitting.
3558#[derive(Debug)]
3559pub struct SplitN<'h, 's> {
3560 split: Split<'h, 's>,
3561 limit: usize,
3562 count: usize,
3563}
3564
3565impl<'h, 's> SplitN<'h, 's> {
3566 fn new(
3567 haystack: &'h [u8],
3568 splitter: &'s [u8],
3569 limit: usize,
3570 ) -> SplitN<'h, 's> {
3571 let split: Split<'_, '_> = haystack.split_str(splitter);
3572 SplitN { split, limit, count: 0 }
3573 }
3574}
3575
3576impl<'h, 's> Iterator for SplitN<'h, 's> {
3577 type Item = &'h [u8];
3578
3579 #[inline]
3580 fn next(&mut self) -> Option<&'h [u8]> {
3581 self.count += 1;
3582 if self.count > self.limit || self.split.done {
3583 None
3584 } else if self.count == self.limit {
3585 Some(&self.split.finder.haystack[self.split.last..])
3586 } else {
3587 self.split.next()
3588 }
3589 }
3590}
3591
3592/// An iterator over at most `n` substrings in a byte string, split by a
3593/// separator, in reverse.
3594///
3595/// `'h` is the lifetime of the byte string being split (the haystack), while
3596/// `'s` is the lifetime of the byte string doing the splitting.
3597#[derive(Debug)]
3598pub struct SplitNReverse<'h, 's> {
3599 split: SplitReverse<'h, 's>,
3600 limit: usize,
3601 count: usize,
3602}
3603
3604impl<'h, 's> SplitNReverse<'h, 's> {
3605 fn new(
3606 haystack: &'h [u8],
3607 splitter: &'s [u8],
3608 limit: usize,
3609 ) -> SplitNReverse<'h, 's> {
3610 let split: SplitReverse<'_, '_> = haystack.rsplit_str(splitter);
3611 SplitNReverse { split, limit, count: 0 }
3612 }
3613}
3614
3615impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
3616 type Item = &'h [u8];
3617
3618 #[inline]
3619 fn next(&mut self) -> Option<&'h [u8]> {
3620 self.count += 1;
3621 if self.count > self.limit || self.split.done {
3622 None
3623 } else if self.count == self.limit {
3624 Some(&self.split.finder.haystack()[..self.split.last])
3625 } else {
3626 self.split.next()
3627 }
3628 }
3629}
3630
3631/// An iterator over all lines in a byte string, without their terminators.
3632///
3633/// For this iterator, the only line terminators recognized are `\r\n` and
3634/// `\n`.
3635///
3636/// `'a` is the lifetime of the byte string being iterated over.
3637#[derive(Clone, Debug)]
3638pub struct Lines<'a> {
3639 it: LinesWithTerminator<'a>,
3640}
3641
3642impl<'a> Lines<'a> {
3643 fn new(bytes: &'a [u8]) -> Lines<'a> {
3644 Lines { it: LinesWithTerminator::new(bytes) }
3645 }
3646
3647 /// Return a copy of the rest of the underlying bytes without affecting the
3648 /// iterator itself.
3649 ///
3650 /// # Examples
3651 ///
3652 /// Basic usage:
3653 ///
3654 /// ```
3655 /// use bstr::{B, ByteSlice};
3656 ///
3657 /// let s = b"\
3658 /// foo
3659 /// bar\r
3660 /// baz";
3661 /// let mut lines = s.lines();
3662 /// assert_eq!(lines.next(), Some(B("foo")));
3663 /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3664 /// ```
3665 pub fn as_bytes(&self) -> &'a [u8] {
3666 self.it.bytes
3667 }
3668}
3669
3670impl<'a> Iterator for Lines<'a> {
3671 type Item = &'a [u8];
3672
3673 #[inline]
3674 fn next(&mut self) -> Option<&'a [u8]> {
3675 Some(trim_last_terminator(self.it.next()?))
3676 }
3677}
3678
3679impl<'a> DoubleEndedIterator for Lines<'a> {
3680 #[inline]
3681 fn next_back(&mut self) -> Option<Self::Item> {
3682 Some(trim_last_terminator(self.it.next_back()?))
3683 }
3684}
3685
3686impl<'a> iter::FusedIterator for Lines<'a> {}
3687
3688/// An iterator over all lines in a byte string, including their terminators.
3689///
3690/// For this iterator, the only line terminator recognized is `\n`. (Since
3691/// line terminators are included, this also handles `\r\n` line endings.)
3692///
3693/// Line terminators are only included if they are present in the original
3694/// byte string. For example, the last line in a byte string may not end with
3695/// a line terminator.
3696///
3697/// Concatenating all elements yielded by this iterator is guaranteed to yield
3698/// the original byte string.
3699///
3700/// `'a` is the lifetime of the byte string being iterated over.
3701#[derive(Clone, Debug)]
3702pub struct LinesWithTerminator<'a> {
3703 bytes: &'a [u8],
3704}
3705
3706impl<'a> LinesWithTerminator<'a> {
3707 fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3708 LinesWithTerminator { bytes }
3709 }
3710
3711 /// Return a copy of the rest of the underlying bytes without affecting the
3712 /// iterator itself.
3713 ///
3714 /// # Examples
3715 ///
3716 /// Basic usage:
3717 ///
3718 /// ```
3719 /// use bstr::{B, ByteSlice};
3720 ///
3721 /// let s = b"\
3722 /// foo
3723 /// bar\r
3724 /// baz";
3725 /// let mut lines = s.lines_with_terminator();
3726 /// assert_eq!(lines.next(), Some(B("foo\n")));
3727 /// assert_eq!(lines.as_bytes(), B("bar\r\nbaz"));
3728 /// ```
3729 pub fn as_bytes(&self) -> &'a [u8] {
3730 self.bytes
3731 }
3732}
3733
3734impl<'a> Iterator for LinesWithTerminator<'a> {
3735 type Item = &'a [u8];
3736
3737 #[inline]
3738 fn next(&mut self) -> Option<&'a [u8]> {
3739 match self.bytes.find_byte(b'\n') {
3740 None if self.bytes.is_empty() => None,
3741 None => {
3742 let line: &[u8] = self.bytes;
3743 self.bytes = b"";
3744 Some(line)
3745 }
3746 Some(end: usize) => {
3747 let line: &[u8] = &self.bytes[..end + 1];
3748 self.bytes = &self.bytes[end + 1..];
3749 Some(line)
3750 }
3751 }
3752 }
3753}
3754
3755impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
3756 #[inline]
3757 fn next_back(&mut self) -> Option<Self::Item> {
3758 let end: usize = self.bytes.len().checked_sub(1)?;
3759 match self.bytes[..end].rfind_byte(b'\n') {
3760 None => {
3761 let line: &[u8] = self.bytes;
3762 self.bytes = b"";
3763 Some(line)
3764 }
3765 Some(end: usize) => {
3766 let line: &[u8] = &self.bytes[end + 1..];
3767 self.bytes = &self.bytes[..end + 1];
3768 Some(line)
3769 }
3770 }
3771 }
3772}
3773
3774impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
3775
3776fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
3777 if s.last_byte() == Some(b'\n') {
3778 s = &s[..s.len() - 1];
3779 if s.last_byte() == Some(b'\r') {
3780 s = &s[..s.len() - 1];
3781 }
3782 }
3783 s
3784}
3785
3786#[cfg(all(test, feature = "std"))]
3787mod tests {
3788 use crate::{
3789 ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
3790 tests::LOSSY_TESTS,
3791 };
3792
3793 #[test]
3794 fn to_str_lossy() {
3795 for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3796 let got = B(input).to_str_lossy();
3797 assert_eq!(
3798 expected.as_bytes(),
3799 got.as_bytes(),
3800 "to_str_lossy(ith: {:?}, given: {:?})",
3801 i,
3802 input,
3803 );
3804
3805 let mut got = String::new();
3806 B(input).to_str_lossy_into(&mut got);
3807 assert_eq!(
3808 expected.as_bytes(),
3809 got.as_bytes(),
3810 "to_str_lossy_into",
3811 );
3812
3813 let got = String::from_utf8_lossy(input);
3814 assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3815 }
3816 }
3817
3818 #[test]
3819 fn lines_iteration() {
3820 macro_rules! t {
3821 ($it:expr, $forward:expr) => {
3822 let mut res: Vec<&[u8]> = Vec::from($forward);
3823 assert_eq!($it.collect::<Vec<_>>(), res);
3824 res.reverse();
3825 assert_eq!($it.rev().collect::<Vec<_>>(), res);
3826 };
3827 }
3828
3829 t!(Lines::new(b""), []);
3830 t!(LinesWithTerminator::new(b""), []);
3831
3832 t!(Lines::new(b"\n"), [B("")]);
3833 t!(Lines::new(b"\r\n"), [B("")]);
3834 t!(LinesWithTerminator::new(b"\n"), [B("\n")]);
3835
3836 t!(Lines::new(b"a"), [B("a")]);
3837 t!(LinesWithTerminator::new(b"a"), [B("a")]);
3838
3839 t!(Lines::new(b"abc"), [B("abc")]);
3840 t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
3841
3842 t!(Lines::new(b"abc\n"), [B("abc")]);
3843 t!(Lines::new(b"abc\r\n"), [B("abc")]);
3844 t!(LinesWithTerminator::new(b"abc\n"), [B("abc\n")]);
3845
3846 t!(Lines::new(b"abc\n\n"), [B("abc"), B("")]);
3847 t!(LinesWithTerminator::new(b"abc\n\n"), [B("abc\n"), B("\n")]);
3848
3849 t!(Lines::new(b"abc\n\ndef"), [B("abc"), B(""), B("def")]);
3850 t!(
3851 LinesWithTerminator::new(b"abc\n\ndef"),
3852 [B("abc\n"), B("\n"), B("def")]
3853 );
3854
3855 t!(Lines::new(b"abc\n\ndef\n"), [B("abc"), B(""), B("def")]);
3856 t!(
3857 LinesWithTerminator::new(b"abc\n\ndef\n"),
3858 [B("abc\n"), B("\n"), B("def\n")]
3859 );
3860
3861 t!(Lines::new(b"\na\nb\n"), [B(""), B("a"), B("b")]);
3862 t!(
3863 LinesWithTerminator::new(b"\na\nb\n"),
3864 [B("\n"), B("a\n"), B("b\n")]
3865 );
3866
3867 t!(Lines::new(b"\n\n\n"), [B(""), B(""), B("")]);
3868 t!(LinesWithTerminator::new(b"\n\n\n"), [B("\n"), B("\n"), B("\n")]);
3869 }
3870}
3871