ext_slice.rs source code [crates/bstr/src/ext_slice.rs]

1	use core::{iter, slice, str};
2
3	#[cfg(all(feature = "alloc", feature = "unicode"))]
4	use alloc::vec;
5	#[cfg(feature = "alloc")]
6	use alloc::{borrow::Cow, string::String, vec::Vec};
7
8	#[cfg(feature = "std")]
9	use std::{ffi::OsStr, path::Path};
10
11	use memchr::{memchr, memmem, memrchr};
12
13	use crate::escape_bytes::EscapeBytes;
14	#[cfg(feature = "alloc")]
15	use crate::ext_vec::ByteVec;
16	#[cfg(feature = "unicode")]
17	use crate::unicode::{
18	whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes,
19	SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices,
20	WordsWithBreaks,
21	};
22	use crate::{
23	ascii,
24	bstr::BStr,
25	byteset,
26	utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error},
27	};
28
29	/// A short-hand constructor for building a `&[u8]`.
30	///
31	/// This idiosyncratic constructor is useful for concisely building byte string
32	/// slices. Its primary utility is in conveniently writing byte string literals
33	/// in a uniform way. For example, consider this code that does not compile:
34	///
35	/// ```ignore
36	/// let strs = vec![b"a", b"xy"];
37	/// ```
38	///
39	/// The above code doesn't compile because the type of the byte string literal
40	/// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is
41	/// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored
42	/// in the same `Vec`. (This is dissimilar from normal Unicode string slices,
43	/// where both `"a"` and `"xy"` have the same type of `&'static str`.)
44	///
45	/// One way of getting the above code to compile is to convert byte strings to
46	/// slices. You might try this:
47	///
48	/// ```ignore
49	/// let strs = vec![&b"a", &b"xy"];
50	/// ```
51	///
52	/// But this just creates values with type `& &'static [u8; 1]` and
53	/// `& &'static [u8; 2]`. Instead, you need to force the issue like so:
54	///
55	/// ```
56	/// let strs = vec![&b"a"[..], &b"xy"[..]];
57	/// // or
58	/// let strs = vec![b"a".as_ref(), b"xy".as_ref()];
59	/// ```
60	///
61	/// But neither of these are particularly convenient to type, especially when
62	/// it's something as common as a string literal. Thus, this constructor
63	/// permits writing the following instead:
64	///
65	/// ```
66	/// use bstr::B;
67	///
68	/// let strs = vec![B("a"), B(b"xy")];
69	/// ```
70	///
71	/// Notice that this also lets you mix and match both string literals and byte
72	/// string literals. This can be quite convenient!
73	#[allow(non_snake_case)]
74	#[inline]
75	pub fn B<B: ?Sized + AsRef<[u8]>>(bytes: &B) -> &[u8] {
76	bytes.as_ref()
77	}
78
79	impl ByteSlice for [u8] {
80	#[inline]
81	fn as_bytes(&self) -> &[u8] {
82	self
83	}
84
85	#[inline]
86	fn as_bytes_mut(&mut self) -> &mut [u8] {
87	self
88	}
89	}
90
91	impl<const N: usize> ByteSlice for [u8; N] {
92	#[inline]
93	fn as_bytes(&self) -> &[u8] {
94	self
95	}
96
97	#[inline]
98	fn as_bytes_mut(&mut self) -> &mut [u8] {
99	self
100	}
101	}
102
103	/// Ensure that callers cannot implement `ByteSlice` by making an
104	/// umplementable trait its super trait.
105	mod private {
106	pub trait Sealed {}
107	}
108	impl private::Sealed for [u8] {}
109	impl<const N: usize> private::Sealed for [u8; N] {}
110
111	/// A trait that extends `&[u8]` with string oriented methods.
112	///
113	/// This trait is sealed and cannot be implemented outside of `bstr`.
114	pub trait ByteSlice: private::Sealed {
115	/// A method for accessing the raw bytes of this type. This is always a
116	/// no-op and callers shouldn't care about it. This only exists for making
117	/// the extension trait work.
118	#[doc(hidden)]
119	fn as_bytes(&self) -> &[u8];
120
121	/// A method for accessing the raw bytes of this type, mutably. This is
122	/// always a no-op and callers shouldn't care about it. This only exists
123	/// for making the extension trait work.
124	#[doc(hidden)]
125	fn as_bytes_mut(&mut self) -> &mut [u8];
126
127	/// Return this byte slice as a `&BStr`.
128	///
129	/// Use `&BStr` is useful because of its `fmt::Debug` representation
130	/// and various other trait implementations (such as `PartialEq` and
131	/// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
132	/// shows its bytes as a normal string. For invalid UTF-8, hex escape
133	/// sequences are used.
134	///
135	/// # Examples
136	///
137	/// Basic usage:
138	///
139	/// ```
140	/// use bstr::ByteSlice;
141	///
142	/// println!("{:?}", b"foo`\xFF`bar".as_bstr());
143	/// ```
144	#[inline]
145	fn as_bstr(&self) -> &BStr {
146	BStr::new(self.as_bytes())
147	}
148
149	/// Return this byte slice as a `&mut BStr`.
150	///
151	/// Use `&mut BStr` is useful because of its `fmt::Debug` representation
152	/// and various other trait implementations (such as `PartialEq` and
153	/// `PartialOrd`). In particular, the `Debug` implementation for `BStr`
154	/// shows its bytes as a normal string. For invalid UTF-8, hex escape
155	/// sequences are used.
156	///
157	/// # Examples
158	///
159	/// Basic usage:
160	///
161	/// ```
162	/// use bstr::ByteSlice;
163	///
164	/// let mut bytes = *b"foo`\xFF`bar";
165	/// println!("{:?}", &mut bytes.as_bstr_mut());
166	/// ```
167	#[inline]
168	fn as_bstr_mut(&mut self) -> &mut BStr {
169	BStr::new_mut(self.as_bytes_mut())
170	}
171
172	/// Create an immutable byte string from an OS string slice.
173	///
174	/// When the underlying bytes of OS strings are accessible, then this
175	/// always succeeds and is zero cost. Otherwise, this returns `None` if the
176	/// given OS string is not valid UTF-8. (For example, when the underlying
177	/// bytes are inaccessible on Windows, file paths are allowed to be a
178	/// sequence of arbitrary 16-bit integers. Not all such sequences can be
179	/// transcoded to valid UTF-8.)
180	///
181	/// # Examples
182	///
183	/// Basic usage:
184	///
185	/// ```
186	/// use std::ffi::OsStr;
187	///
188	/// use bstr::{B, ByteSlice};
189	///
190	/// let os_str = OsStr::new("foo");
191	/// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8");
192	/// assert_eq!(bs, B("foo"));
193	/// ```
194	#[cfg(feature = "std")]
195	#[inline]
196	fn from_os_str(os_str: &OsStr) -> Option<&[u8]> {
197	#[cfg(unix)]
198	#[inline]
199	fn imp(os_str: &OsStr) -> Option<&[u8]> {
200	use std::os::unix::ffi::OsStrExt;
201
202	Some(os_str.as_bytes())
203	}
204
205	#[cfg(not(unix))]
206	#[inline]
207	fn imp(os_str: &OsStr) -> Option<&[u8]> {
208	os_str.to_str().map(\|s\| s.as_bytes())
209	}
210
211	imp(os_str)
212	}
213
214	/// Create an immutable byte string from a file path.
215	///
216	/// When the underlying bytes of paths are accessible, then this always
217	/// succeeds and is zero cost. Otherwise, this returns `None` if the given
218	/// path is not valid UTF-8. (For example, when the underlying bytes are
219	/// inaccessible on Windows, file paths are allowed to be a sequence of
220	/// arbitrary 16-bit integers. Not all such sequences can be transcoded to
221	/// valid UTF-8.)
222	///
223	/// # Examples
224	///
225	/// Basic usage:
226	///
227	/// ```
228	/// use std::path::Path;
229	///
230	/// use bstr::{B, ByteSlice};
231	///
232	/// let path = Path::new("foo");
233	/// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8");
234	/// assert_eq!(bs, B("foo"));
235	/// ```
236	#[cfg(feature = "std")]
237	#[inline]
238	fn from_path(path: &Path) -> Option<&[u8]> {
239	Self::from_os_str(path.as_os_str())
240	}
241
242	/// Safely convert this byte string into a `&str` if it's valid UTF-8.
243	///
244	/// If this byte string is not valid UTF-8, then an error is returned. The
245	/// error returned indicates the first invalid byte found and the length
246	/// of the error.
247	///
248	/// In cases where a lossy conversion to `&str` is acceptable, then use one
249	/// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or
250	/// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into)
251	/// methods.
252	///
253	/// # Examples
254	///
255	/// Basic usage:
256	///
257	/// ```
258	/// # #[cfg(feature = "alloc")] {
259	/// use bstr::{B, ByteSlice, ByteVec};
260	///
261	/// # fn example() -> Result<(), bstr::Utf8Error> {
262	/// let s = B("☃βツ").to_str()?;
263	/// assert_eq!("☃βツ", s);
264	///
265	/// let mut bstring = <Vec<u8>>::from("☃βツ");
266	/// bstring.push(b'`\xFF`');
267	/// let err = bstring.to_str().unwrap_err();
268	/// assert_eq!(`8`, err.valid_up_to());
269	/// # Ok(()) }; example().unwrap()
270	/// # }
271	/// ```
272	#[inline]
273	fn to_str(&self) -> Result<&str, Utf8Error> {
274	utf8::validate(self.as_bytes()).map(\|_\| {
275	// SAFETY: This is safe because of the guarantees provided by
276	// utf8::validate.
277	unsafe { str::from_utf8_unchecked(self.as_bytes()) }
278	})
279	}
280
281	/// Unsafely convert this byte string into a `&str`, without checking for
282	/// valid UTF-8.
283	///
284	/// # Safety
285	///
286	/// Callers must* ensure that this byte string is valid UTF-8 before*
287	/// calling this method. Converting a byte string into a `&str` that is
288	/// not valid UTF-8 is considered undefined behavior.
289	///
290	/// This routine is useful in performance sensitive contexts where the
291	/// UTF-8 validity of the byte string is already known and it is
292	/// undesirable to pay the cost of an additional UTF-8 validation check
293	/// that [`to_str`](trait.ByteSlice.html#method.to_str) performs.
294	///
295	/// # Examples
296	///
297	/// Basic usage:
298	///
299	/// ```
300	/// use bstr::{B, ByteSlice};
301	///
302	/// // SAFETY: This is safe because string literals are guaranteed to be
303	/// // valid UTF-8 by the Rust compiler.
304	/// let s = unsafe { B("☃βツ").to_str_unchecked() };
305	/// assert_eq!("☃βツ", s);
306	/// ```
307	#[inline]
308	unsafe fn to_str_unchecked(&self) -> &str {
309	str::from_utf8_unchecked(self.as_bytes())
310	}
311
312	/// Convert this byte string to a valid UTF-8 string by replacing invalid
313	/// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`).
314	///
315	/// If the byte string is already valid UTF-8, then no copying or
316	/// allocation is performed and a borrrowed string slice is returned. If
317	/// the byte string is not valid UTF-8, then an owned string buffer is
318	/// returned with invalid bytes replaced by the replacement codepoint.
319	///
320	/// This method uses the "substitution of maximal subparts" (Unicode
321	/// Standard, Chapter 3, Section 9) strategy for inserting the replacement
322	/// codepoint. Specifically, a replacement codepoint is inserted whenever a
323	/// byte is found that cannot possibly lead to a valid code unit sequence.
324	/// If there were previous bytes that represented a prefix of a well-formed
325	/// code unit sequence, then all of those bytes are substituted with a
326	/// single replacement codepoint. The "substitution of maximal subparts"
327	/// strategy is the same strategy used by
328	/// [W3C's Encoding standard](https://www.w3.org/TR/encoding/).
329	/// For a more precise description of the maximal subpart strategy, see
330	/// the Unicode Standard, Chapter 3, Section 9. See also
331	/// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html).
332	///
333	/// N.B. Rust's standard library also appears to use the same strategy,
334	/// but it does not appear to be an API guarantee.
335	///
336	/// # Examples
337	///
338	/// Basic usage:
339	///
340	/// ```
341	/// use std::borrow::Cow;
342	///
343	/// use bstr::ByteSlice;
344	///
345	/// let mut bstring = <Vec<u8>>::from("☃βツ");
346	/// assert_eq!(Cow::Borrowed("☃βツ"), bstring.to_str_lossy());
347	///
348	/// // Add a byte that makes the sequence invalid.
349	/// bstring.push(b'`\xFF`');
350	/// assert_eq!(Cow::Borrowed("☃βツ`\u{FFFD}`"), bstring.to_str_lossy());
351	/// ```
352	///
353	/// This demonstrates the "maximal subpart" substitution logic.
354	///
355	/// ```
356	/// use bstr::{B, ByteSlice};
357	///
358	/// // \x61 is the ASCII codepoint for 'a'.
359	/// // \xF1\x80\x80 is a valid 3-byte code unit prefix.
360	/// // \xE1\x80 is a valid 2-byte code unit prefix.
361	/// // \xC2 is a valid 1-byte code unit prefix.
362	/// // \x62 is the ASCII codepoint for 'b'.
363	/// //
364	/// // In sum, each of the prefixes is replaced by a single replacement
365	/// // codepoint since none of the prefixes are properly completed. This
366	/// // is in contrast to other strategies that might insert a replacement
367	/// // codepoint for every single byte.
368	/// let bs = B(b"`\x61\xF1\x80\x80\xE1\x80\xC2\x62`");
369	/// assert_eq!("a`\u{FFFD}\u{FFFD}\u{FFFD}`b", bs.to_str_lossy());
370	/// ```
371	#[cfg(feature = "alloc")]
372	#[inline]
373	fn to_str_lossy(&self) -> Cow<'_, str> {
374	match utf8::validate(self.as_bytes()) {
375	Ok(()) => {
376	// SAFETY: This is safe because of the guarantees provided by
377	// utf8::validate.
378	unsafe {
379	Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes()))
380	}
381	}
382	Err(err) => {
383	let mut lossy = String::with_capacity(self.as_bytes().len());
384	let (valid, after) =
385	self.as_bytes().split_at(err.valid_up_to());
386	// SAFETY: This is safe because utf8::validate guarantees
387	// that all of `valid` is valid UTF-8.
388	lossy.push_str(unsafe { str::from_utf8_unchecked(valid) });
389	lossy.push_str("`\u{FFFD}`");
390	if let Some(len) = err.error_len() {
391	after[len..].to_str_lossy_into(&mut lossy);
392	}
393	Cow::Owned(lossy)
394	}
395	}
396	}
397
398	/// Copy the contents of this byte string into the given owned string
399	/// buffer, while replacing invalid UTF-8 code unit sequences with the
400	/// Unicode replacement codepoint (`U+FFFD`).
401	///
402	/// This method uses the same "substitution of maximal subparts" strategy
403	/// for inserting the replacement codepoint as the
404	/// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method.
405	///
406	/// This routine is useful for amortizing allocation. However, unlike
407	/// `to_str_lossy`, this routine will _always_ copy the contents of this
408	/// byte string into the destination buffer, even if this byte string is
409	/// valid UTF-8.
410	///
411	/// # Examples
412	///
413	/// Basic usage:
414	///
415	/// ```
416	/// use std::borrow::Cow;
417	///
418	/// use bstr::ByteSlice;
419	///
420	/// let mut bstring = <Vec<u8>>::from("☃βツ");
421	/// // Add a byte that makes the sequence invalid.
422	/// bstring.push(b'`\xFF`');
423	///
424	/// let mut dest = String::new();
425	/// bstring.to_str_lossy_into(&mut dest);
426	/// assert_eq!("☃βツ`\u{FFFD}`", dest);
427	/// ```
428	#[cfg(feature = "alloc")]
429	#[inline]
430	fn to_str_lossy_into(&self, dest: &mut String) {
431	let mut bytes = self.as_bytes();
432	dest.reserve(bytes.len());
433	loop {
434	match utf8::validate(bytes) {
435	Ok(()) => {
436	// SAFETY: This is safe because utf8::validate guarantees
437	// that all of `bytes` is valid UTF-8.
438	dest.push_str(unsafe { str::from_utf8_unchecked(bytes) });
439	break;
440	}
441	Err(err) => {
442	let (valid, after) = bytes.split_at(err.valid_up_to());
443	// SAFETY: This is safe because utf8::validate guarantees
444	// that all of `valid` is valid UTF-8.
445	dest.push_str(unsafe { str::from_utf8_unchecked(valid) });
446	dest.push_str("`\u{FFFD}`");
447	match err.error_len() {
448	None => break,
449	Some(len) => bytes = &after[len..],
450	}
451	}
452	}
453	}
454	}
455
456	/// Create an OS string slice from this byte string.
457	///
458	/// When OS strings can be constructed from arbitrary byte sequences, this
459	/// always succeeds and is zero cost. Otherwise, this returns a UTF-8
460	/// decoding error if this byte string is not valid UTF-8. (For example,
461	/// assuming the representation of `OsStr` is opaque on Windows, file paths
462	/// are allowed to be a sequence of arbitrary 16-bit integers. There is
463	/// no obvious mapping from an arbitrary sequence of 8-bit integers to an
464	/// arbitrary sequence of 16-bit integers. If the representation of `OsStr`
465	/// is even opened up, then this will convert any sequence of bytes to an
466	/// `OsStr` without cost.)
467	///
468	/// # Examples
469	///
470	/// Basic usage:
471	///
472	/// ```
473	/// use bstr::{B, ByteSlice};
474	///
475	/// let os_str = b"foo".to_os_str().expect("should be valid UTF-8");
476	/// assert_eq!(os_str, "foo");
477	/// ```
478	#[cfg(feature = "std")]
479	#[inline]
480	fn to_os_str(&self) -> Result<&OsStr, Utf8Error> {
481	#[cfg(unix)]
482	#[inline]
483	fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
484	use std::os::unix::ffi::OsStrExt;
485
486	Ok(OsStr::from_bytes(bytes))
487	}
488
489	#[cfg(not(unix))]
490	#[inline]
491	fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> {
492	bytes.to_str().map(OsStr::new)
493	}
494
495	imp(self.as_bytes())
496	}
497
498	/// Lossily create an OS string slice from this byte string.
499	///
500	/// When OS strings can be constructed from arbitrary byte sequences, this
501	/// is zero cost and always returns a slice. Otherwise, this will perform a
502	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
503	/// the Unicode replacement codepoint.
504	///
505	/// Note that this can prevent the correct roundtripping of file paths when
506	/// the representation of `OsStr` is opaque.
507	///
508	/// # Examples
509	///
510	/// Basic usage:
511	///
512	/// ```
513	/// use bstr::ByteSlice;
514	///
515	/// let os_str = b"foo`\xFF`bar".to_os_str_lossy();
516	/// assert_eq!(os_str.to_string_lossy(), "foo`\u{FFFD}`bar");
517	/// ```
518	#[cfg(feature = "std")]
519	#[inline]
520	fn to_os_str_lossy(&self) -> Cow<'_, OsStr> {
521	#[cfg(unix)]
522	#[inline]
523	fn imp(bytes: &[u8]) -> Cow<'_, OsStr> {
524	use std::os::unix::ffi::OsStrExt;
525
526	Cow::Borrowed(OsStr::from_bytes(bytes))
527	}
528
529	#[cfg(not(unix))]
530	#[inline]
531	fn imp(bytes: &[u8]) -> Cow<OsStr> {
532	use std::ffi::OsString;
533
534	match bytes.to_str_lossy() {
535	Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)),
536	Cow::Owned(x) => Cow::Owned(OsString::from(x)),
537	}
538	}
539
540	imp(self.as_bytes())
541	}
542
543	/// Create a path slice from this byte string.
544	///
545	/// When paths can be constructed from arbitrary byte sequences, this
546	/// always succeeds and is zero cost. Otherwise, this returns a UTF-8
547	/// decoding error if this byte string is not valid UTF-8. (For example,
548	/// assuming the representation of `Path` is opaque on Windows, file paths
549	/// are allowed to be a sequence of arbitrary 16-bit integers. There is
550	/// no obvious mapping from an arbitrary sequence of 8-bit integers to an
551	/// arbitrary sequence of 16-bit integers. If the representation of `Path`
552	/// is even opened up, then this will convert any sequence of bytes to an
553	/// `Path` without cost.)
554	///
555	/// # Examples
556	///
557	/// Basic usage:
558	///
559	/// ```
560	/// use bstr::ByteSlice;
561	///
562	/// let path = b"foo".to_path().expect("should be valid UTF-8");
563	/// assert_eq!(path.as_os_str(), "foo");
564	/// ```
565	#[cfg(feature = "std")]
566	#[inline]
567	fn to_path(&self) -> Result<&Path, Utf8Error> {
568	self.to_os_str().map(Path::new)
569	}
570
571	/// Lossily create a path slice from this byte string.
572	///
573	/// When paths can be constructed from arbitrary byte sequences, this is
574	/// zero cost and always returns a slice. Otherwise, this will perform a
575	/// UTF-8 check and lossily convert this byte string into valid UTF-8 using
576	/// the Unicode replacement codepoint.
577	///
578	/// Note that this can prevent the correct roundtripping of file paths when
579	/// the representation of `Path` is opaque.
580	///
581	/// # Examples
582	///
583	/// Basic usage:
584	///
585	/// ```
586	/// use bstr::ByteSlice;
587	///
588	/// let bs = b"foo`\xFF`bar";
589	/// let path = bs.to_path_lossy();
590	/// assert_eq!(path.to_string_lossy(), "foo`\u{FFFD}`bar");
591	/// ```
592	#[cfg(feature = "std")]
593	#[inline]
594	fn to_path_lossy(&self) -> Cow<'_, Path> {
595	use std::path::PathBuf;
596
597	match self.to_os_str_lossy() {
598	Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
599	Cow::Owned(x) => Cow::Owned(PathBuf::from(x)),
600	}
601	}
602
603	/// Create a new byte string by repeating this byte string `n` times.
604	///
605	/// # Panics
606	///
607	/// This function panics if the capacity of the new byte string would
608	/// overflow.
609	///
610	/// # Examples
611	///
612	/// Basic usage:
613	///
614	/// ```
615	/// use bstr::{B, ByteSlice};
616	///
617	/// assert_eq!(b"foo".repeatn(`4`), B("foofoofoofoo"));
618	/// assert_eq!(b"foo".repeatn(`0`), B(""));
619	/// ```
620	#[cfg(feature = "alloc")]
621	#[inline]
622	fn repeatn(&self, n: usize) -> Vec<u8> {
623	self.as_bytes().repeat(n)
624	}
625
626	/// Returns true if and only if this byte string contains the given needle.
627	///
628	/// # Examples
629	///
630	/// Basic usage:
631	///
632	/// ```
633	/// use bstr::ByteSlice;
634	///
635	/// assert!(b"foo bar".contains_str("foo"));
636	/// assert!(b"foo bar".contains_str("bar"));
637	/// assert!(!b"foo".contains_str("foobar"));
638	/// ```
639	#[inline]
640	fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool {
641	self.find(needle).is_some()
642	}
643
644	/// Returns true if and only if this byte string has the given prefix.
645	///
646	/// # Examples
647	///
648	/// Basic usage:
649	///
650	/// ```
651	/// use bstr::ByteSlice;
652	///
653	/// assert!(b"foo bar".starts_with_str("foo"));
654	/// assert!(!b"foo bar".starts_with_str("bar"));
655	/// assert!(!b"foo".starts_with_str("foobar"));
656	/// ```
657	#[inline]
658	fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool {
659	self.as_bytes().starts_with(prefix.as_ref())
660	}
661
662	/// Returns true if and only if this byte string has the given suffix.
663	///
664	/// # Examples
665	///
666	/// Basic usage:
667	///
668	/// ```
669	/// use bstr::ByteSlice;
670	///
671	/// assert!(b"foo bar".ends_with_str("bar"));
672	/// assert!(!b"foo bar".ends_with_str("foo"));
673	/// assert!(!b"bar".ends_with_str("foobar"));
674	/// ```
675	#[inline]
676	fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool {
677	self.as_bytes().ends_with(suffix.as_ref())
678	}
679
680	/// Returns the index of the first occurrence of the given needle.
681	///
682	/// The needle may be any type that can be cheaply converted into a
683	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
684	///
685	/// Note that if you're are searching for the same needle in many
686	/// different small haystacks, it may be faster to initialize a
687	/// [`Finder`](struct.Finder.html) once, and reuse it for each search.
688	///
689	/// # Complexity
690	///
691	/// This routine is guaranteed to have worst case linear time complexity
692	/// with respect to both the needle and the haystack. That is, this runs
693	/// in `O(needle.len() + haystack.len())` time.
694	///
695	/// This routine is also guaranteed to have worst case constant space
696	/// complexity.
697	///
698	/// # Examples
699	///
700	/// Basic usage:
701	///
702	/// ```
703	/// use bstr::ByteSlice;
704	///
705	/// let s = b"foo bar baz";
706	/// assert_eq!(Some(`0`), s.find("foo"));
707	/// assert_eq!(Some(`4`), s.find("bar"));
708	/// assert_eq!(None, s.find("quux"));
709	/// ```
710	#[inline]
711	fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
712	Finder::new(needle.as_ref()).find(self.as_bytes())
713	}
714
715	/// Returns the index of the last occurrence of the given needle.
716	///
717	/// The needle may be any type that can be cheaply converted into a
718	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
719	///
720	/// Note that if you're are searching for the same needle in many
721	/// different small haystacks, it may be faster to initialize a
722	/// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for
723	/// each search.
724	///
725	/// # Complexity
726	///
727	/// This routine is guaranteed to have worst case linear time complexity
728	/// with respect to both the needle and the haystack. That is, this runs
729	/// in `O(needle.len() + haystack.len())` time.
730	///
731	/// This routine is also guaranteed to have worst case constant space
732	/// complexity.
733	///
734	/// # Examples
735	///
736	/// Basic usage:
737	///
738	/// ```
739	/// use bstr::ByteSlice;
740	///
741	/// let s = b"foo bar baz";
742	/// assert_eq!(Some(`0`), s.rfind("foo"));
743	/// assert_eq!(Some(`4`), s.rfind("bar"));
744	/// assert_eq!(Some(`8`), s.rfind("ba"));
745	/// assert_eq!(None, s.rfind("quux"));
746	/// ```
747	#[inline]
748	fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> {
749	FinderReverse::new(needle.as_ref()).rfind(self.as_bytes())
750	}
751
752	/// Returns an iterator of the non-overlapping occurrences of the given
753	/// needle. The iterator yields byte offset positions indicating the start
754	/// of each match.
755	///
756	/// # Complexity
757	///
758	/// This routine is guaranteed to have worst case linear time complexity
759	/// with respect to both the needle and the haystack. That is, this runs
760	/// in `O(needle.len() + haystack.len())` time.
761	///
762	/// This routine is also guaranteed to have worst case constant space
763	/// complexity.
764	///
765	/// # Examples
766	///
767	/// Basic usage:
768	///
769	/// ```
770	/// use bstr::ByteSlice;
771	///
772	/// let s = b"foo bar foo foo quux foo";
773	/// let matches: Vec<usize> = s.find_iter("foo").collect();
774	/// assert_eq!(matches, vec![`0`, `8`, `12`, `21`]);
775	/// ```
776	///
777	/// An empty string matches at every position, including the position
778	/// immediately following the last byte:
779	///
780	/// ```
781	/// use bstr::ByteSlice;
782	///
783	/// let matches: Vec<usize> = b"foo".find_iter("").collect();
784	/// assert_eq!(matches, vec![`0`, `1`, `2`, `3`]);
785	///
786	/// let matches: Vec<usize> = b"".find_iter("").collect();
787	/// assert_eq!(matches, vec![`0`]);
788	/// ```
789	#[inline]
790	fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
791	&'h self,
792	needle: &'n B,
793	) -> Find<'h, 'n> {
794	Find::new(self.as_bytes(), needle.as_ref())
795	}
796
797	/// Returns an iterator of the non-overlapping occurrences of the given
798	/// needle in reverse. The iterator yields byte offset positions indicating
799	/// the start of each match.
800	///
801	/// # Complexity
802	///
803	/// This routine is guaranteed to have worst case linear time complexity
804	/// with respect to both the needle and the haystack. That is, this runs
805	/// in `O(needle.len() + haystack.len())` time.
806	///
807	/// This routine is also guaranteed to have worst case constant space
808	/// complexity.
809	///
810	/// # Examples
811	///
812	/// Basic usage:
813	///
814	/// ```
815	/// use bstr::ByteSlice;
816	///
817	/// let s = b"foo bar foo foo quux foo";
818	/// let matches: Vec<usize> = s.rfind_iter("foo").collect();
819	/// assert_eq!(matches, vec![`21`, `12`, `8`, `0`]);
820	/// ```
821	///
822	/// An empty string matches at every position, including the position
823	/// immediately following the last byte:
824	///
825	/// ```
826	/// use bstr::ByteSlice;
827	///
828	/// let matches: Vec<usize> = b"foo".rfind_iter("").collect();
829	/// assert_eq!(matches, vec![`3`, `2`, `1`, `0`]);
830	///
831	/// let matches: Vec<usize> = b"".rfind_iter("").collect();
832	/// assert_eq!(matches, vec![`0`]);
833	/// ```
834	#[inline]
835	fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>(
836	&'h self,
837	needle: &'n B,
838	) -> FindReverse<'h, 'n> {
839	FindReverse::new(self.as_bytes(), needle.as_ref())
840	}
841
842	/// Returns the index of the first occurrence of the given byte. If the
843	/// byte does not occur in this byte string, then `None` is returned.
844	///
845	/// # Examples
846	///
847	/// Basic usage:
848	///
849	/// ```
850	/// use bstr::ByteSlice;
851	///
852	/// assert_eq!(Some(`10`), b"foo bar baz".find_byte(b'z'));
853	/// assert_eq!(None, b"foo bar baz".find_byte(b'y'));
854	/// ```
855	#[inline]
856	fn find_byte(&self, byte: u8) -> Option<usize> {
857	memchr(byte, self.as_bytes())
858	}
859
860	/// Returns the index of the last occurrence of the given byte. If the
861	/// byte does not occur in this byte string, then `None` is returned.
862	///
863	/// # Examples
864	///
865	/// Basic usage:
866	///
867	/// ```
868	/// use bstr::ByteSlice;
869	///
870	/// assert_eq!(Some(`10`), b"foo bar baz".rfind_byte(b'z'));
871	/// assert_eq!(None, b"foo bar baz".rfind_byte(b'y'));
872	/// ```
873	#[inline]
874	fn rfind_byte(&self, byte: u8) -> Option<usize> {
875	memrchr(byte, self.as_bytes())
876	}
877
878	/// Returns the index of the first occurrence of the given codepoint.
879	/// If the codepoint does not occur in this byte string, then `None` is
880	/// returned.
881	///
882	/// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
883	/// then only explicit occurrences of that encoding will be found. Invalid
884	/// UTF-8 sequences will not be matched.
885	///
886	/// # Examples
887	///
888	/// Basic usage:
889	///
890	/// ```
891	/// use bstr::{B, ByteSlice};
892	///
893	/// assert_eq!(Some(`10`), b"foo bar baz".find_char('z'));
894	/// assert_eq!(Some(`4`), B("αβγγδ").find_char('γ'));
895	/// assert_eq!(None, b"foo bar baz".find_char('y'));
896	/// ```
897	#[inline]
898	fn find_char(&self, ch: char) -> Option<usize> {
899	self.find(ch.encode_utf8(&mut [`0`; `4`]))
900	}
901
902	/// Returns the index of the last occurrence of the given codepoint.
903	/// If the codepoint does not occur in this byte string, then `None` is
904	/// returned.
905	///
906	/// Note that if one searches for the replacement codepoint, `\u{FFFD}`,
907	/// then only explicit occurrences of that encoding will be found. Invalid
908	/// UTF-8 sequences will not be matched.
909	///
910	/// # Examples
911	///
912	/// Basic usage:
913	///
914	/// ```
915	/// use bstr::{B, ByteSlice};
916	///
917	/// assert_eq!(Some(`10`), b"foo bar baz".rfind_char('z'));
918	/// assert_eq!(Some(`6`), B("αβγγδ").rfind_char('γ'));
919	/// assert_eq!(None, b"foo bar baz".rfind_char('y'));
920	/// ```
921	#[inline]
922	fn rfind_char(&self, ch: char) -> Option<usize> {
923	self.rfind(ch.encode_utf8(&mut [`0`; `4`]))
924	}
925
926	/// Returns the index of the first occurrence of any of the bytes in the
927	/// provided set.
928	///
929	/// The `byteset` may be any type that can be cheaply converted into a
930	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
931	/// note that passing a `&str` which contains multibyte characters may not
932	/// behave as you expect: each byte in the `&str` is treated as an
933	/// individual member of the byte set.
934	///
935	/// Note that order is irrelevant for the `byteset` parameter, and
936	/// duplicate bytes present in its body are ignored.
937	///
938	/// # Complexity
939	///
940	/// This routine is guaranteed to have worst case linear time complexity
941	/// with respect to both the set of bytes and the haystack. That is, this
942	/// runs in `O(byteset.len() + haystack.len())` time.
943	///
944	/// This routine is also guaranteed to have worst case constant space
945	/// complexity.
946	///
947	/// # Examples
948	///
949	/// Basic usage:
950	///
951	/// ```
952	/// use bstr::ByteSlice;
953	///
954	/// assert_eq!(b"foo bar baz".find_byteset(b"zr"), Some(`6`));
955	/// assert_eq!(b"foo baz bar".find_byteset(b"bzr"), Some(`4`));
956	/// assert_eq!(None, b"foo baz bar".find_byteset(b"`\t\n`"));
957	/// // The empty byteset never matches.
958	/// assert_eq!(None, b"abc".find_byteset(b""));
959	/// assert_eq!(None, b"".find_byteset(b""));
960	/// ```
961	#[inline]
962	fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
963	byteset::find(self.as_bytes(), byteset.as_ref())
964	}
965
966	/// Returns the index of the first occurrence of a byte that is not a
967	/// member of the provided set.
968	///
969	/// The `byteset` may be any type that can be cheaply converted into a
970	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
971	/// note that passing a `&str` which contains multibyte characters may not
972	/// behave as you expect: each byte in the `&str` is treated as an
973	/// individual member of the byte set.
974	///
975	/// Note that order is irrelevant for the `byteset` parameter, and
976	/// duplicate bytes present in its body are ignored.
977	///
978	/// # Complexity
979	///
980	/// This routine is guaranteed to have worst case linear time complexity
981	/// with respect to both the set of bytes and the haystack. That is, this
982	/// runs in `O(byteset.len() + haystack.len())` time.
983	///
984	/// This routine is also guaranteed to have worst case constant space
985	/// complexity.
986	///
987	/// # Examples
988	///
989	/// Basic usage:
990	///
991	/// ```
992	/// use bstr::ByteSlice;
993	///
994	/// assert_eq!(b"foo bar baz".find_not_byteset(b"fo "), Some(`4`));
995	/// assert_eq!(b"`\t\t`baz bar".find_not_byteset(b" `\t\r\n`"), Some(`2`));
996	/// assert_eq!(b"foo`\n`baz`\t`bar".find_not_byteset(b"`\t\n`"), Some(`0`));
997	/// // The negation of the empty byteset matches everything.
998	/// assert_eq!(Some(`0`), b"abc".find_not_byteset(b""));
999	/// // But an empty string never contains anything.
1000	/// assert_eq!(None, b"".find_not_byteset(b""));
1001	/// ```
1002	#[inline]
1003	fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1004	byteset::find_not(self.as_bytes(), byteset.as_ref())
1005	}
1006
1007	/// Returns the index of the last occurrence of any of the bytes in the
1008	/// provided set.
1009	///
1010	/// The `byteset` may be any type that can be cheaply converted into a
1011	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1012	/// note that passing a `&str` which contains multibyte characters may not
1013	/// behave as you expect: each byte in the `&str` is treated as an
1014	/// individual member of the byte set.
1015	///
1016	/// Note that order is irrelevant for the `byteset` parameter, and duplicate
1017	/// bytes present in its body are ignored.
1018	///
1019	/// # Complexity
1020	///
1021	/// This routine is guaranteed to have worst case linear time complexity
1022	/// with respect to both the set of bytes and the haystack. That is, this
1023	/// runs in `O(byteset.len() + haystack.len())` time.
1024	///
1025	/// This routine is also guaranteed to have worst case constant space
1026	/// complexity.
1027	///
1028	/// # Examples
1029	///
1030	/// Basic usage:
1031	///
1032	/// ```
1033	/// use bstr::ByteSlice;
1034	///
1035	/// assert_eq!(b"foo bar baz".rfind_byteset(b"agb"), Some(`9`));
1036	/// assert_eq!(b"foo baz bar".rfind_byteset(b"rabz "), Some(`10`));
1037	/// assert_eq!(b"foo baz bar".rfind_byteset(b"`\n`123"), None);
1038	/// ```
1039	#[inline]
1040	fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1041	byteset::rfind(self.as_bytes(), byteset.as_ref())
1042	}
1043
1044	/// Returns the index of the last occurrence of a byte that is not a member
1045	/// of the provided set.
1046	///
1047	/// The `byteset` may be any type that can be cheaply converted into a
1048	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but
1049	/// note that passing a `&str` which contains multibyte characters may not
1050	/// behave as you expect: each byte in the `&str` is treated as an
1051	/// individual member of the byte set.
1052	///
1053	/// Note that order is irrelevant for the `byteset` parameter, and
1054	/// duplicate bytes present in its body are ignored.
1055	///
1056	/// # Complexity
1057	///
1058	/// This routine is guaranteed to have worst case linear time complexity
1059	/// with respect to both the set of bytes and the haystack. That is, this
1060	/// runs in `O(byteset.len() + haystack.len())` time.
1061	///
1062	/// This routine is also guaranteed to have worst case constant space
1063	/// complexity.
1064	///
1065	/// # Examples
1066	///
1067	/// Basic usage:
1068	///
1069	/// ```
1070	/// use bstr::ByteSlice;
1071	///
1072	/// assert_eq!(b"foo bar baz,`\t`".rfind_not_byteset(b",`\t`"), Some(`10`));
1073	/// assert_eq!(b"foo baz bar".rfind_not_byteset(b"rabz "), Some(`2`));
1074	/// assert_eq!(None, b"foo baz bar".rfind_not_byteset(b"barfoz "));
1075	/// ```
1076	#[inline]
1077	fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> {
1078	byteset::rfind_not(self.as_bytes(), byteset.as_ref())
1079	}
1080
1081	/// Returns an iterator over the fields in a byte string, separated
1082	/// by contiguous whitespace (according to the Unicode property
1083	/// `White_Space`).
1084	///
1085	/// # Example
1086	///
1087	/// Basic usage:
1088	///
1089	/// ```
1090	/// use bstr::{B, ByteSlice};
1091	///
1092	/// let s = B(" foo`\t`bar`\t\u{2003}\n`quux `\n`");
1093	/// let fields: Vec<&[u8]> = s.fields().collect();
1094	/// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1095	/// ```
1096	///
1097	/// A byte string consisting of just whitespace yields no elements:
1098	///
1099	/// ```
1100	/// use bstr::{B, ByteSlice};
1101	///
1102	/// assert_eq!(`0`, B(" `\n\t\u{2003}\n` `\t`").fields().count());
1103	/// ```
1104	#[cfg(feature = "unicode")]
1105	#[inline]
1106	fn fields(&self) -> Fields<'_> {
1107	Fields::new(self.as_bytes())
1108	}
1109
1110	/// Returns an iterator over the fields in a byte string, separated by
1111	/// contiguous codepoints satisfying the given predicate.
1112	///
1113	/// If this byte string is not valid UTF-8, then the given closure will
1114	/// be called with a Unicode replacement codepoint when invalid UTF-8
1115	/// bytes are seen.
1116	///
1117	/// # Example
1118	///
1119	/// Basic usage:
1120	///
1121	/// ```
1122	/// use bstr::{B, ByteSlice};
1123	///
1124	/// let s = b"123foo999999bar1quux123456";
1125	/// let fields: Vec<&[u8]> = s.fields_with(\|c\| c.is_numeric()).collect();
1126	/// assert_eq!(fields, vec![B("foo"), B("bar"), B("quux")]);
1127	/// ```
1128	///
1129	/// A byte string consisting of all codepoints satisfying the predicate
1130	/// yields no elements:
1131	///
1132	/// ```
1133	/// use bstr::ByteSlice;
1134	///
1135	/// assert_eq!(`0`, b"1911354563".fields_with(\|c\| c.is_numeric()).count());
1136	/// ```
1137	#[inline]
1138	fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> {
1139	FieldsWith::new(self.as_bytes(), f)
1140	}
1141
1142	/// Returns an iterator over substrings of this byte string, separated
1143	/// by the given byte string. Each element yielded is guaranteed not to
1144	/// include the splitter substring.
1145	///
1146	/// The splitter may be any type that can be cheaply converted into a
1147	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1148	///
1149	/// # Examples
1150	///
1151	/// Basic usage:
1152	///
1153	/// ```
1154	/// use bstr::{B, ByteSlice};
1155	///
1156	/// let x: Vec<&[u8]> = b"Mary had a little lamb".split_str(" ").collect();
1157	/// assert_eq!(x, vec![
1158	/// B("Mary"), B("had"), B("a"), B("little"), B("lamb"),
1159	/// ]);
1160	///
1161	/// let x: Vec<&[u8]> = b"".split_str("X").collect();
1162	/// assert_eq!(x, vec![b""]);
1163	///
1164	/// let x: Vec<&[u8]> = b"lionXXtigerXleopard".split_str("X").collect();
1165	/// assert_eq!(x, vec![B("lion"), B(""), B("tiger"), B("leopard")]);
1166	///
1167	/// let x: Vec<&[u8]> = b"lion::tiger::leopard".split_str("::").collect();
1168	/// assert_eq!(x, vec![B("lion"), B("tiger"), B("leopard")]);
1169	/// ```
1170	///
1171	/// If a string contains multiple contiguous separators, you will end up
1172	/// with empty strings yielded by the iterator:
1173	///
1174	/// ```
1175	/// use bstr::{B, ByteSlice};
1176	///
1177	/// let x: Vec<&[u8]> = b"\|\|\|\|a\|\|b\|c".split_str("\|").collect();
1178	/// assert_eq!(x, vec![
1179	/// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1180	/// ]);
1181	///
1182	/// let x: Vec<&[u8]> = b"(///)".split_str("/").collect();
1183	/// assert_eq!(x, vec![B("("), B(""), B(""), B(")")]);
1184	/// ```
1185	///
1186	/// Separators at the start or end of a string are neighbored by empty
1187	/// strings.
1188	///
1189	/// ```
1190	/// use bstr::{B, ByteSlice};
1191	///
1192	/// let x: Vec<&[u8]> = b"010".split_str("0").collect();
1193	/// assert_eq!(x, vec![B(""), B("1"), B("")]);
1194	/// ```
1195	///
1196	/// When the empty string is used as a separator, it splits every byte
1197	/// in the byte string, along with the beginning and end of the byte
1198	/// string.
1199	///
1200	/// ```
1201	/// use bstr::{B, ByteSlice};
1202	///
1203	/// let x: Vec<&[u8]> = b"rust".split_str("").collect();
1204	/// assert_eq!(x, vec![
1205	/// B(""), B("r"), B("u"), B("s"), B("t"), B(""),
1206	/// ]);
1207	///
1208	/// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1209	/// // may not be valid UTF-8!
1210	/// let x: Vec<&[u8]> = B("☃").split_str("").collect();
1211	/// assert_eq!(x, vec![
1212	/// B(""), B(b"`\xE2`"), B(b"`\x98`"), B(b"`\x83`"), B(""),
1213	/// ]);
1214	/// ```
1215	///
1216	/// Contiguous separators, especially whitespace, can lead to possibly
1217	/// surprising behavior. For example, this code is correct:
1218	///
1219	/// ```
1220	/// use bstr::{B, ByteSlice};
1221	///
1222	/// let x: Vec<&[u8]> = b" a b c".split_str(" ").collect();
1223	/// assert_eq!(x, vec![
1224	/// B(""), B(""), B(""), B(""), B("a"), B(""), B("b"), B("c"),
1225	/// ]);
1226	/// ```
1227	///
1228	/// It does not* give you `["a", "b", "c"]`. For that behavior, use*
1229	/// [`fields`](#method.fields) instead.
1230	#[inline]
1231	fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1232	&'h self,
1233	splitter: &'s B,
1234	) -> Split<'h, 's> {
1235	Split::new(self.as_bytes(), splitter.as_ref())
1236	}
1237
1238	/// Returns an iterator over substrings of this byte string, separated by
1239	/// the given byte string, in reverse. Each element yielded is guaranteed
1240	/// not to include the splitter substring.
1241	///
1242	/// The splitter may be any type that can be cheaply converted into a
1243	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1244	///
1245	/// # Examples
1246	///
1247	/// Basic usage:
1248	///
1249	/// ```
1250	/// use bstr::{B, ByteSlice};
1251	///
1252	/// let x: Vec<&[u8]> =
1253	/// b"Mary had a little lamb".rsplit_str(" ").collect();
1254	/// assert_eq!(x, vec![
1255	/// B("lamb"), B("little"), B("a"), B("had"), B("Mary"),
1256	/// ]);
1257	///
1258	/// let x: Vec<&[u8]> = b"".rsplit_str("X").collect();
1259	/// assert_eq!(x, vec![b""]);
1260	///
1261	/// let x: Vec<&[u8]> = b"lionXXtigerXleopard".rsplit_str("X").collect();
1262	/// assert_eq!(x, vec![B("leopard"), B("tiger"), B(""), B("lion")]);
1263	///
1264	/// let x: Vec<&[u8]> = b"lion::tiger::leopard".rsplit_str("::").collect();
1265	/// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lion")]);
1266	/// ```
1267	///
1268	/// If a string contains multiple contiguous separators, you will end up
1269	/// with empty strings yielded by the iterator:
1270	///
1271	/// ```
1272	/// use bstr::{B, ByteSlice};
1273	///
1274	/// let x: Vec<&[u8]> = b"\|\|\|\|a\|\|b\|c".rsplit_str("\|").collect();
1275	/// assert_eq!(x, vec![
1276	/// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1277	/// ]);
1278	///
1279	/// let x: Vec<&[u8]> = b"(///)".rsplit_str("/").collect();
1280	/// assert_eq!(x, vec![B(")"), B(""), B(""), B("(")]);
1281	/// ```
1282	///
1283	/// Separators at the start or end of a string are neighbored by empty
1284	/// strings.
1285	///
1286	/// ```
1287	/// use bstr::{B, ByteSlice};
1288	///
1289	/// let x: Vec<&[u8]> = b"010".rsplit_str("0").collect();
1290	/// assert_eq!(x, vec![B(""), B("1"), B("")]);
1291	/// ```
1292	///
1293	/// When the empty string is used as a separator, it splits every byte
1294	/// in the byte string, along with the beginning and end of the byte
1295	/// string.
1296	///
1297	/// ```
1298	/// use bstr::{B, ByteSlice};
1299	///
1300	/// let x: Vec<&[u8]> = b"rust".rsplit_str("").collect();
1301	/// assert_eq!(x, vec![
1302	/// B(""), B("t"), B("s"), B("u"), B("r"), B(""),
1303	/// ]);
1304	///
1305	/// // Splitting by an empty string is not UTF-8 aware. Elements yielded
1306	/// // may not be valid UTF-8!
1307	/// let x: Vec<&[u8]> = B("☃").rsplit_str("").collect();
1308	/// assert_eq!(x, vec![B(""), B(b"`\x83`"), B(b"`\x98`"), B(b"`\xE2`"), B("")]);
1309	/// ```
1310	///
1311	/// Contiguous separators, especially whitespace, can lead to possibly
1312	/// surprising behavior. For example, this code is correct:
1313	///
1314	/// ```
1315	/// use bstr::{B, ByteSlice};
1316	///
1317	/// let x: Vec<&[u8]> = b" a b c".rsplit_str(" ").collect();
1318	/// assert_eq!(x, vec![
1319	/// B("c"), B("b"), B(""), B("a"), B(""), B(""), B(""), B(""),
1320	/// ]);
1321	/// ```
1322	///
1323	/// It does not* give you `["a", "b", "c"]`.*
1324	#[inline]
1325	fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1326	&'h self,
1327	splitter: &'s B,
1328	) -> SplitReverse<'h, 's> {
1329	SplitReverse::new(self.as_bytes(), splitter.as_ref())
1330	}
1331
1332	/// Split this byte string at the first occurrence of `splitter`.
1333	///
1334	/// If the `splitter` is found in the byte string, returns a tuple
1335	/// containing the parts of the string before and after the first occurrence
1336	/// of `splitter` respectively. Otherwise, if there are no occurrences of
1337	/// `splitter` in the byte string, returns `None`.
1338	///
1339	/// The splitter may be any type that can be cheaply converted into a
1340	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1341	///
1342	/// If you need to split on the last* instance of a delimiter instead, see*
1343	/// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method .
1344	///
1345	/// # Examples
1346	///
1347	/// Basic usage:
1348	///
1349	/// ```
1350	/// use bstr::{B, ByteSlice};
1351	///
1352	/// assert_eq!(
1353	/// B("foo,bar").split_once_str(","),
1354	/// Some((B("foo"), B("bar"))),
1355	/// );
1356	/// assert_eq!(
1357	/// B("foo,bar,baz").split_once_str(","),
1358	/// Some((B("foo"), B("bar,baz"))),
1359	/// );
1360	/// assert_eq!(B("foo").split_once_str(","), None);
1361	/// assert_eq!(B("foo,").split_once_str(b","), Some((B("foo"), B(""))));
1362	/// assert_eq!(B(",foo").split_once_str(b","), Some((B(""), B("foo"))));
1363	/// ```
1364	#[inline]
1365	fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1366	&'a self,
1367	splitter: &B,
1368	) -> Option<(&'a [u8], &'a [u8])> {
1369	let bytes = self.as_bytes();
1370	let splitter = splitter.as_ref();
1371	let start = Finder::new(splitter).find(bytes)?;
1372	let end = start + splitter.len();
1373	Some((&bytes[..start], &bytes[end..]))
1374	}
1375
1376	/// Split this byte string at the last occurrence of `splitter`.
1377	///
1378	/// If the `splitter` is found in the byte string, returns a tuple
1379	/// containing the parts of the string before and after the last occurrence
1380	/// of `splitter`, respectively. Otherwise, if there are no occurrences of
1381	/// `splitter` in the byte string, returns `None`.
1382	///
1383	/// The splitter may be any type that can be cheaply converted into a
1384	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1385	///
1386	/// If you need to split on the first* instance of a delimiter instead, see*
1387	/// the [`ByteSlice::split_once_str`](#method.split_once_str) method.
1388	///
1389	/// # Examples
1390	///
1391	/// Basic usage:
1392	///
1393	/// ```
1394	/// use bstr::{B, ByteSlice};
1395	///
1396	/// assert_eq!(
1397	/// B("foo,bar").rsplit_once_str(","),
1398	/// Some((B("foo"), B("bar"))),
1399	/// );
1400	/// assert_eq!(
1401	/// B("foo,bar,baz").rsplit_once_str(","),
1402	/// Some((B("foo,bar"), B("baz"))),
1403	/// );
1404	/// assert_eq!(B("foo").rsplit_once_str(","), None);
1405	/// assert_eq!(B("foo,").rsplit_once_str(b","), Some((B("foo"), B(""))));
1406	/// assert_eq!(B(",foo").rsplit_once_str(b","), Some((B(""), B("foo"))));
1407	/// ```
1408	#[inline]
1409	fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>(
1410	&'a self,
1411	splitter: &B,
1412	) -> Option<(&'a [u8], &'a [u8])> {
1413	let bytes = self.as_bytes();
1414	let splitter = splitter.as_ref();
1415	let start = FinderReverse::new(splitter).rfind(bytes)?;
1416	let end = start + splitter.len();
1417	Some((&bytes[..start], &bytes[end..]))
1418	}
1419
1420	/// Returns an iterator of at most `limit` substrings of this byte string,
1421	/// separated by the given byte string. If `limit` substrings are yielded,
1422	/// then the last substring will contain the remainder of this byte string.
1423	///
1424	/// The needle may be any type that can be cheaply converted into a
1425	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1426	///
1427	/// # Examples
1428	///
1429	/// Basic usage:
1430	///
1431	/// ```
1432	/// use bstr::{B, ByteSlice};
1433	///
1434	/// let x: Vec<_> = b"Mary had a little lamb".splitn_str(`3`, " ").collect();
1435	/// assert_eq!(x, vec![B("Mary"), B("had"), B("a little lamb")]);
1436	///
1437	/// let x: Vec<_> = b"".splitn_str(`3`, "X").collect();
1438	/// assert_eq!(x, vec![b""]);
1439	///
1440	/// let x: Vec<_> = b"lionXXtigerXleopard".splitn_str(`3`, "X").collect();
1441	/// assert_eq!(x, vec![B("lion"), B(""), B("tigerXleopard")]);
1442	///
1443	/// let x: Vec<_> = b"lion::tiger::leopard".splitn_str(`2`, "::").collect();
1444	/// assert_eq!(x, vec![B("lion"), B("tiger::leopard")]);
1445	///
1446	/// let x: Vec<_> = b"abcXdef".splitn_str(`1`, "X").collect();
1447	/// assert_eq!(x, vec![B("abcXdef")]);
1448	///
1449	/// let x: Vec<_> = b"abcdef".splitn_str(`2`, "X").collect();
1450	/// assert_eq!(x, vec![B("abcdef")]);
1451	///
1452	/// let x: Vec<_> = b"abcXdef".splitn_str(`0`, "X").collect();
1453	/// assert!(x.is_empty());
1454	/// ```
1455	#[inline]
1456	fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1457	&'h self,
1458	limit: usize,
1459	splitter: &'s B,
1460	) -> SplitN<'h, 's> {
1461	SplitN::new(self.as_bytes(), splitter.as_ref(), limit)
1462	}
1463
1464	/// Returns an iterator of at most `limit` substrings of this byte string,
1465	/// separated by the given byte string, in reverse. If `limit` substrings
1466	/// are yielded, then the last substring will contain the remainder of this
1467	/// byte string.
1468	///
1469	/// The needle may be any type that can be cheaply converted into a
1470	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
1471	///
1472	/// # Examples
1473	///
1474	/// Basic usage:
1475	///
1476	/// ```
1477	/// use bstr::{B, ByteSlice};
1478	///
1479	/// let x: Vec<_> =
1480	/// b"Mary had a little lamb".rsplitn_str(`3`, " ").collect();
1481	/// assert_eq!(x, vec![B("lamb"), B("little"), B("Mary had a")]);
1482	///
1483	/// let x: Vec<_> = b"".rsplitn_str(`3`, "X").collect();
1484	/// assert_eq!(x, vec![b""]);
1485	///
1486	/// let x: Vec<_> = b"lionXXtigerXleopard".rsplitn_str(`3`, "X").collect();
1487	/// assert_eq!(x, vec![B("leopard"), B("tiger"), B("lionX")]);
1488	///
1489	/// let x: Vec<_> = b"lion::tiger::leopard".rsplitn_str(`2`, "::").collect();
1490	/// assert_eq!(x, vec![B("leopard"), B("lion::tiger")]);
1491	///
1492	/// let x: Vec<_> = b"abcXdef".rsplitn_str(`1`, "X").collect();
1493	/// assert_eq!(x, vec![B("abcXdef")]);
1494	///
1495	/// let x: Vec<_> = b"abcdef".rsplitn_str(`2`, "X").collect();
1496	/// assert_eq!(x, vec![B("abcdef")]);
1497	///
1498	/// let x: Vec<_> = b"abcXdef".rsplitn_str(`0`, "X").collect();
1499	/// assert!(x.is_empty());
1500	/// ```
1501	#[inline]
1502	fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>(
1503	&'h self,
1504	limit: usize,
1505	splitter: &'s B,
1506	) -> SplitNReverse<'h, 's> {
1507	SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit)
1508	}
1509
1510	/// Replace all matches of the given needle with the given replacement, and
1511	/// the result as a new `Vec<u8>`.
1512	///
1513	/// This routine is useful as a convenience. If you need to reuse an
1514	/// allocation, use [`replace_into`](#method.replace_into) instead.
1515	///
1516	/// # Examples
1517	///
1518	/// Basic usage:
1519	///
1520	/// ```
1521	/// use bstr::ByteSlice;
1522	///
1523	/// let s = b"this is old".replace("old", "new");
1524	/// assert_eq!(s, "this is new".as_bytes());
1525	/// ```
1526	///
1527	/// When the pattern doesn't match:
1528	///
1529	/// ```
1530	/// use bstr::ByteSlice;
1531	///
1532	/// let s = b"this is old".replace("nada nada", "limonada");
1533	/// assert_eq!(s, "this is old".as_bytes());
1534	/// ```
1535	///
1536	/// When the needle is an empty string:
1537	///
1538	/// ```
1539	/// use bstr::ByteSlice;
1540	///
1541	/// let s = b"foo".replace("", "Z");
1542	/// assert_eq!(s, "ZfZoZoZ".as_bytes());
1543	/// ```
1544	#[cfg(feature = "alloc")]
1545	#[inline]
1546	fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1547	&self,
1548	needle: N,
1549	replacement: R,
1550	) -> Vec<u8> {
1551	let mut dest = Vec::with_capacity(self.as_bytes().len());
1552	self.replace_into(needle, replacement, &mut dest);
1553	dest
1554	}
1555
1556	/// Replace up to `limit` matches of the given needle with the given
1557	/// replacement, and the result as a new `Vec<u8>`.
1558	///
1559	/// This routine is useful as a convenience. If you need to reuse an
1560	/// allocation, use [`replacen_into`](#method.replacen_into) instead.
1561	///
1562	/// # Examples
1563	///
1564	/// Basic usage:
1565	///
1566	/// ```
1567	/// use bstr::ByteSlice;
1568	///
1569	/// let s = b"foofoo".replacen("o", "z", `2`);
1570	/// assert_eq!(s, "fzzfoo".as_bytes());
1571	/// ```
1572	///
1573	/// When the pattern doesn't match:
1574	///
1575	/// ```
1576	/// use bstr::ByteSlice;
1577	///
1578	/// let s = b"foofoo".replacen("a", "z", `2`);
1579	/// assert_eq!(s, "foofoo".as_bytes());
1580	/// ```
1581	///
1582	/// When the needle is an empty string:
1583	///
1584	/// ```
1585	/// use bstr::ByteSlice;
1586	///
1587	/// let s = b"foo".replacen("", "Z", `2`);
1588	/// assert_eq!(s, "ZfZoo".as_bytes());
1589	/// ```
1590	#[cfg(feature = "alloc")]
1591	#[inline]
1592	fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1593	&self,
1594	needle: N,
1595	replacement: R,
1596	limit: usize,
1597	) -> Vec<u8> {
1598	let mut dest = Vec::with_capacity(self.as_bytes().len());
1599	self.replacen_into(needle, replacement, limit, &mut dest);
1600	dest
1601	}
1602
1603	/// Replace all matches of the given needle with the given replacement,
1604	/// and write the result into the provided `Vec<u8>`.
1605	///
1606	/// This does not* clear `dest` before writing to it.*
1607	///
1608	/// This routine is useful for reusing allocation. For a more convenient
1609	/// API, use [`replace`](#method.replace) instead.
1610	///
1611	/// # Examples
1612	///
1613	/// Basic usage:
1614	///
1615	/// ```
1616	/// use bstr::ByteSlice;
1617	///
1618	/// let s = b"this is old";
1619	///
1620	/// let mut dest = vec![];
1621	/// s.replace_into("old", "new", &mut dest);
1622	/// assert_eq!(dest, "this is new".as_bytes());
1623	/// ```
1624	///
1625	/// When the pattern doesn't match:
1626	///
1627	/// ```
1628	/// use bstr::ByteSlice;
1629	///
1630	/// let s = b"this is old";
1631	///
1632	/// let mut dest = vec![];
1633	/// s.replace_into("nada nada", "limonada", &mut dest);
1634	/// assert_eq!(dest, "this is old".as_bytes());
1635	/// ```
1636	///
1637	/// When the needle is an empty string:
1638	///
1639	/// ```
1640	/// use bstr::ByteSlice;
1641	///
1642	/// let s = b"foo";
1643	///
1644	/// let mut dest = vec![];
1645	/// s.replace_into("", "Z", &mut dest);
1646	/// assert_eq!(dest, "ZfZoZoZ".as_bytes());
1647	/// ```
1648	#[cfg(feature = "alloc")]
1649	#[inline]
1650	fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1651	&self,
1652	needle: N,
1653	replacement: R,
1654	dest: &mut Vec<u8>,
1655	) {
1656	let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1657
1658	let mut last = `0`;
1659	for start in self.find_iter(needle) {
1660	dest.push_str(&self.as_bytes()[last..start]);
1661	dest.push_str(replacement);
1662	last = start + needle.len();
1663	}
1664	dest.push_str(&self.as_bytes()[last..]);
1665	}
1666
1667	/// Replace up to `limit` matches of the given needle with the given
1668	/// replacement, and write the result into the provided `Vec<u8>`.
1669	///
1670	/// This does not* clear `dest` before writing to it.*
1671	///
1672	/// This routine is useful for reusing allocation. For a more convenient
1673	/// API, use [`replacen`](#method.replacen) instead.
1674	///
1675	/// # Examples
1676	///
1677	/// Basic usage:
1678	///
1679	/// ```
1680	/// use bstr::ByteSlice;
1681	///
1682	/// let s = b"foofoo";
1683	///
1684	/// let mut dest = vec![];
1685	/// s.replacen_into("o", "z", `2`, &mut dest);
1686	/// assert_eq!(dest, "fzzfoo".as_bytes());
1687	/// ```
1688	///
1689	/// When the pattern doesn't match:
1690	///
1691	/// ```
1692	/// use bstr::ByteSlice;
1693	///
1694	/// let s = b"foofoo";
1695	///
1696	/// let mut dest = vec![];
1697	/// s.replacen_into("a", "z", `2`, &mut dest);
1698	/// assert_eq!(dest, "foofoo".as_bytes());
1699	/// ```
1700	///
1701	/// When the needle is an empty string:
1702	///
1703	/// ```
1704	/// use bstr::ByteSlice;
1705	///
1706	/// let s = b"foo";
1707	///
1708	/// let mut dest = vec![];
1709	/// s.replacen_into("", "Z", `2`, &mut dest);
1710	/// assert_eq!(dest, "ZfZoo".as_bytes());
1711	/// ```
1712	#[cfg(feature = "alloc")]
1713	#[inline]
1714	fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>(
1715	&self,
1716	needle: N,
1717	replacement: R,
1718	limit: usize,
1719	dest: &mut Vec<u8>,
1720	) {
1721	let (needle, replacement) = (needle.as_ref(), replacement.as_ref());
1722
1723	let mut last = `0`;
1724	for start in self.find_iter(needle).take(limit) {
1725	dest.push_str(&self.as_bytes()[last..start]);
1726	dest.push_str(replacement);
1727	last = start + needle.len();
1728	}
1729	dest.push_str(&self.as_bytes()[last..]);
1730	}
1731
1732	/// Returns an iterator over the bytes in this byte string.
1733	///
1734	/// # Examples
1735	///
1736	/// Basic usage:
1737	///
1738	/// ```
1739	/// use bstr::ByteSlice;
1740	///
1741	/// let bs = b"foobar";
1742	/// let bytes: Vec<u8> = bs.bytes().collect();
1743	/// assert_eq!(bytes, bs);
1744	/// ```
1745	#[inline]
1746	fn bytes(&self) -> Bytes<'_> {
1747	Bytes { it: self.as_bytes().iter() }
1748	}
1749
1750	/// Returns an iterator over the Unicode scalar values in this byte string.
1751	/// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1752	/// is yielded instead.
1753	///
1754	/// # Examples
1755	///
1756	/// Basic usage:
1757	///
1758	/// ```
1759	/// use bstr::ByteSlice;
1760	///
1761	/// let bs = b"`\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61`";
1762	/// let chars: Vec<char> = bs.chars().collect();
1763	/// assert_eq!(vec!['☃', '`\u{FFFD}`', '𝞃', '`\u{FFFD}`', 'a'], chars);
1764	/// ```
1765	///
1766	/// Codepoints can also be iterated over in reverse:
1767	///
1768	/// ```
1769	/// use bstr::ByteSlice;
1770	///
1771	/// let bs = b"`\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61`";
1772	/// let chars: Vec<char> = bs.chars().rev().collect();
1773	/// assert_eq!(vec!['a', '`\u{FFFD}`', '𝞃', '`\u{FFFD}`', '☃'], chars);
1774	/// ```
1775	#[inline]
1776	fn chars(&self) -> Chars<'_> {
1777	Chars::new(self.as_bytes())
1778	}
1779
1780	/// Returns an iterator over the Unicode scalar values in this byte string
1781	/// along with their starting and ending byte index positions. If invalid
1782	/// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1783	/// instead.
1784	///
1785	/// Note that this is slightly different from the `CharIndices` iterator
1786	/// provided by the standard library. Aside from working on possibly
1787	/// invalid UTF-8, this iterator provides both the corresponding starting
1788	/// and ending byte indices of each codepoint yielded. The ending position
1789	/// is necessary to slice the original byte string when invalid UTF-8 bytes
1790	/// are converted into a Unicode replacement codepoint, since a single
1791	/// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes
1792	/// (inclusive).
1793	///
1794	/// # Examples
1795	///
1796	/// Basic usage:
1797	///
1798	/// ```
1799	/// use bstr::ByteSlice;
1800	///
1801	/// let bs = b"`\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61`";
1802	/// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect();
1803	/// assert_eq!(chars, vec![
1804	/// (`0`, `3`, '☃'),
1805	/// (`3`, `4`, '`\u{FFFD}`'),
1806	/// (`4`, `8`, '𝞃'),
1807	/// (`8`, `10`, '`\u{FFFD}`'),
1808	/// (`10`, `11`, 'a'),
1809	/// ]);
1810	/// ```
1811	///
1812	/// Codepoints can also be iterated over in reverse:
1813	///
1814	/// ```
1815	/// use bstr::ByteSlice;
1816	///
1817	/// let bs = b"`\xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61`";
1818	/// let chars: Vec<(usize, usize, char)> = bs
1819	/// .char_indices()
1820	/// .rev()
1821	/// .collect();
1822	/// assert_eq!(chars, vec![
1823	/// (`10`, `11`, 'a'),
1824	/// (`8`, `10`, '`\u{FFFD}`'),
1825	/// (`4`, `8`, '𝞃'),
1826	/// (`3`, `4`, '`\u{FFFD}`'),
1827	/// (`0`, `3`, '☃'),
1828	/// ]);
1829	/// ```
1830	#[inline]
1831	fn char_indices(&self) -> CharIndices<'_> {
1832	CharIndices::new(self.as_bytes())
1833	}
1834
1835	/// Iterate over chunks of valid UTF-8.
1836	///
1837	/// The iterator returned yields chunks of valid UTF-8 separated by invalid
1838	/// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes,
1839	/// which are determined via the "substitution of maximal subparts"
1840	/// strategy described in the docs for the
1841	/// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy)
1842	/// method.
1843	///
1844	/// # Examples
1845	///
1846	/// This example shows how to gather all valid and invalid chunks from a
1847	/// byte slice:
1848	///
1849	/// ```
1850	/// use bstr::{ByteSlice, Utf8Chunk};
1851	///
1852	/// let bytes = b"foo`\xFD\xFE`bar`\xFF`";
1853	///
1854	/// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]);
1855	/// for chunk in bytes.utf8_chunks() {
1856	/// if !chunk.valid().is_empty() {
1857	/// valid_chunks.push(chunk.valid());
1858	/// }
1859	/// if !chunk.invalid().is_empty() {
1860	/// invalid_chunks.push(chunk.invalid());
1861	/// }
1862	/// }
1863	///
1864	/// assert_eq!(valid_chunks, vec!["foo", "bar"]);
1865	/// assert_eq!(invalid_chunks, vec![b"`\xFD`", b"`\xFE`", b"`\xFF`"]);
1866	/// ```
1867	#[inline]
1868	fn utf8_chunks(&self) -> Utf8Chunks<'_> {
1869	Utf8Chunks { bytes: self.as_bytes() }
1870	}
1871
1872	/// Returns an iterator over the grapheme clusters in this byte string.
1873	/// If invalid UTF-8 is encountered, then the Unicode replacement codepoint
1874	/// is yielded instead.
1875	///
1876	/// # Examples
1877	///
1878	/// This example shows how multiple codepoints can combine to form a
1879	/// single grapheme cluster:
1880	///
1881	/// ```
1882	/// use bstr::ByteSlice;
1883	///
1884	/// let bs = "a`\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}`".as_bytes();
1885	/// let graphemes: Vec<&str> = bs.graphemes().collect();
1886	/// assert_eq!(vec!["à̖", "🇺🇸"], graphemes);
1887	/// ```
1888	///
1889	/// This shows that graphemes can be iterated over in reverse:
1890	///
1891	/// ```
1892	/// use bstr::ByteSlice;
1893	///
1894	/// let bs = "a`\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}`".as_bytes();
1895	/// let graphemes: Vec<&str> = bs.graphemes().rev().collect();
1896	/// assert_eq!(vec!["🇺🇸", "à̖"], graphemes);
1897	/// ```
1898	#[cfg(feature = "unicode")]
1899	#[inline]
1900	fn graphemes(&self) -> Graphemes<'_> {
1901	Graphemes::new(self.as_bytes())
1902	}
1903
1904	/// Returns an iterator over the grapheme clusters in this byte string
1905	/// along with their starting and ending byte index positions. If invalid
1906	/// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1907	/// instead.
1908	///
1909	/// # Examples
1910	///
1911	/// This example shows how to get the byte offsets of each individual
1912	/// grapheme cluster:
1913	///
1914	/// ```
1915	/// use bstr::ByteSlice;
1916	///
1917	/// let bs = "a`\u{0300}\u{0316}\u{1F1FA}\u{1F1F8}`".as_bytes();
1918	/// let graphemes: Vec<(usize, usize, &str)> =
1919	/// bs.grapheme_indices().collect();
1920	/// assert_eq!(vec![(`0`, `5`, "à̖"), (`5`, `13`, "🇺🇸")], graphemes);
1921	/// ```
1922	///
1923	/// This example shows what happens when invalid UTF-8 is encountered. Note
1924	/// that the offsets are valid indices into the original string, and do
1925	/// not necessarily correspond to the length of the `&str` returned!
1926	///
1927	/// ```
1928	/// # #[cfg(all(feature = "alloc"))] {
1929	/// use bstr::{ByteSlice, ByteVec};
1930	///
1931	/// let mut bytes = vec![];
1932	/// bytes.push_str("a`\u{0300}\u{0316}`");
1933	/// bytes.push(b'`\xFF`');
1934	/// bytes.push_str("`\u{1F1FA}\u{1F1F8}`");
1935	///
1936	/// let graphemes: Vec<(usize, usize, &str)> =
1937	/// bytes.grapheme_indices().collect();
1938	/// assert_eq!(
1939	/// graphemes,
1940	/// vec![(`0`, `5`, "à̖"), (`5`, `6`, "`\u{FFFD}`"), (`6`, `14`, "🇺🇸")]
1941	/// );
1942	/// # }
1943	/// ```
1944	#[cfg(feature = "unicode")]
1945	#[inline]
1946	fn grapheme_indices(&self) -> GraphemeIndices<'_> {
1947	GraphemeIndices::new(self.as_bytes())
1948	}
1949
1950	/// Returns an iterator over the words in this byte string. If invalid
1951	/// UTF-8 is encountered, then the Unicode replacement codepoint is yielded
1952	/// instead.
1953	///
1954	/// This is similar to
1955	/// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks),
1956	/// except it only returns elements that contain a "word" character. A word
1957	/// character is defined by UTS #18 (Annex C) to be the combination of the
1958	/// `Alphabetic` and `Join_Control` properties, along with the
1959	/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1960	/// categories.
1961	///
1962	/// Since words are made up of one or more codepoints, this iterator
1963	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1964	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
1965	///
1966	/// # Examples
1967	///
1968	/// Basic usage:
1969	///
1970	/// ```
1971	/// use bstr::ByteSlice;
1972	///
1973	/// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
1974	/// let words: Vec<&str> = bs.words().collect();
1975	/// assert_eq!(words, vec![
1976	/// "The", "quick", "brown", "fox", "can't",
1977	/// "jump", "32.3", "feet", "right",
1978	/// ]);
1979	/// ```
1980	#[cfg(feature = "unicode")]
1981	#[inline]
1982	fn words(&self) -> Words<'_> {
1983	Words::new(self.as_bytes())
1984	}
1985
1986	/// Returns an iterator over the words in this byte string along with
1987	/// their starting and ending byte index positions.
1988	///
1989	/// This is similar to
1990	/// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices),
1991	/// except it only returns elements that contain a "word" character. A word
1992	/// character is defined by UTS #18 (Annex C) to be the combination of the
1993	/// `Alphabetic` and `Join_Control` properties, along with the
1994	/// `Decimal_Number`, `Mark` and `Connector_Punctuation` general
1995	/// categories.
1996	///
1997	/// Since words are made up of one or more codepoints, this iterator
1998	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
1999	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2000	///
2001	/// # Examples
2002	///
2003	/// This example shows how to get the byte offsets of each individual
2004	/// word:
2005	///
2006	/// ```
2007	/// use bstr::ByteSlice;
2008	///
2009	/// let bs = b"can't jump 32.3 feet";
2010	/// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect();
2011	/// assert_eq!(words, vec![
2012	/// (`0`, `5`, "can't"),
2013	/// (`6`, `10`, "jump"),
2014	/// (`11`, `15`, "32.3"),
2015	/// (`16`, `20`, "feet"),
2016	/// ]);
2017	/// ```
2018	#[cfg(feature = "unicode")]
2019	#[inline]
2020	fn word_indices(&self) -> WordIndices<'_> {
2021	WordIndices::new(self.as_bytes())
2022	}
2023
2024	/// Returns an iterator over the words in this byte string, along with
2025	/// all breaks between the words. Concatenating all elements yielded by
2026	/// the iterator results in the original string (modulo Unicode replacement
2027	/// codepoint substitutions if invalid UTF-8 is encountered).
2028	///
2029	/// Since words are made up of one or more codepoints, this iterator
2030	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2031	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2032	///
2033	/// # Examples
2034	///
2035	/// Basic usage:
2036	///
2037	/// ```
2038	/// use bstr::ByteSlice;
2039	///
2040	/// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"#;
2041	/// let words: Vec<&str> = bs.words_with_breaks().collect();
2042	/// assert_eq!(words, vec![
2043	/// "The", " ", "quick", " ", "(", "`\"`", "brown", "`\"`", ")",
2044	/// " ", "fox", " ", "can't", " ", "jump", " ", "32.3", " ", "feet",
2045	/// ",", " ", "right", "?",
2046	/// ]);
2047	/// ```
2048	#[cfg(feature = "unicode")]
2049	#[inline]
2050	fn words_with_breaks(&self) -> WordsWithBreaks<'_> {
2051	WordsWithBreaks::new(self.as_bytes())
2052	}
2053
2054	/// Returns an iterator over the words and their byte offsets in this
2055	/// byte string, along with all breaks between the words. Concatenating
2056	/// all elements yielded by the iterator results in the original string
2057	/// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is
2058	/// encountered).
2059	///
2060	/// Since words are made up of one or more codepoints, this iterator
2061	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2062	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2063	///
2064	/// # Examples
2065	///
2066	/// This example shows how to get the byte offsets of each individual
2067	/// word:
2068	///
2069	/// ```
2070	/// use bstr::ByteSlice;
2071	///
2072	/// let bs = b"can't jump 32.3 feet";
2073	/// let words: Vec<(usize, usize, &str)> =
2074	/// bs.words_with_break_indices().collect();
2075	/// assert_eq!(words, vec![
2076	/// (`0`, `5`, "can't"),
2077	/// (`5`, `6`, " "),
2078	/// (`6`, `10`, "jump"),
2079	/// (`10`, `11`, " "),
2080	/// (`11`, `15`, "32.3"),
2081	/// (`15`, `16`, " "),
2082	/// (`16`, `20`, "feet"),
2083	/// ]);
2084	/// ```
2085	#[cfg(feature = "unicode")]
2086	#[inline]
2087	fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> {
2088	WordsWithBreakIndices::new(self.as_bytes())
2089	}
2090
2091	/// Returns an iterator over the sentences in this byte string.
2092	///
2093	/// Typically, a sentence will include its trailing punctuation and
2094	/// whitespace. Concatenating all elements yielded by the iterator
2095	/// results in the original string (modulo Unicode replacement codepoint
2096	/// substitutions if invalid UTF-8 is encountered).
2097	///
2098	/// Since sentences are made up of one or more codepoints, this iterator
2099	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2100	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2101	///
2102	/// # Examples
2103	///
2104	/// Basic usage:
2105	///
2106	/// ```
2107	/// use bstr::ByteSlice;
2108	///
2109	/// let bs = b"I want this. Not that. Right now.";
2110	/// let sentences: Vec<&str> = bs.sentences().collect();
2111	/// assert_eq!(sentences, vec![
2112	/// "I want this. ",
2113	/// "Not that. ",
2114	/// "Right now.",
2115	/// ]);
2116	/// ```
2117	#[cfg(feature = "unicode")]
2118	#[inline]
2119	fn sentences(&self) -> Sentences<'_> {
2120	Sentences::new(self.as_bytes())
2121	}
2122
2123	/// Returns an iterator over the sentences in this byte string along with
2124	/// their starting and ending byte index positions.
2125	///
2126	/// Typically, a sentence will include its trailing punctuation and
2127	/// whitespace. Concatenating all elements yielded by the iterator
2128	/// results in the original string (modulo Unicode replacement codepoint
2129	/// substitutions if invalid UTF-8 is encountered).
2130	///
2131	/// Since sentences are made up of one or more codepoints, this iterator
2132	/// yields `&str` elements. When invalid UTF-8 is encountered, replacement
2133	/// codepoints are [substituted](index.html#handling-of-invalid-utf-8).
2134	///
2135	/// # Examples
2136	///
2137	/// Basic usage:
2138	///
2139	/// ```
2140	/// use bstr::ByteSlice;
2141	///
2142	/// let bs = b"I want this. Not that. Right now.";
2143	/// let sentences: Vec<(usize, usize, &str)> =
2144	/// bs.sentence_indices().collect();
2145	/// assert_eq!(sentences, vec![
2146	/// (`0`, `13`, "I want this. "),
2147	/// (`13`, `23`, "Not that. "),
2148	/// (`23`, `33`, "Right now."),
2149	/// ]);
2150	/// ```
2151	#[cfg(feature = "unicode")]
2152	#[inline]
2153	fn sentence_indices(&self) -> SentenceIndices<'_> {
2154	SentenceIndices::new(self.as_bytes())
2155	}
2156
2157	/// An iterator over all lines in a byte string, without their
2158	/// terminators.
2159	///
2160	/// For this iterator, the only line terminators recognized are `\r\n` and
2161	/// `\n`.
2162	///
2163	/// # Examples
2164	///
2165	/// Basic usage:
2166	///
2167	/// ```
2168	/// use bstr::{B, ByteSlice};
2169	///
2170	/// let s = b"\
2171	/// foo
2172	///
2173	/// bar`\r`
2174	/// baz
2175	///
2176	///
2177	/// quux";
2178	/// let lines: Vec<&[u8]> = s.lines().collect();
2179	/// assert_eq!(lines, vec![
2180	/// B("foo"), B(""), B("bar"), B("baz"), B(""), B(""), B("quux"),
2181	/// ]);
2182	/// ```
2183	#[inline]
2184	fn lines(&self) -> Lines<'_> {
2185	Lines::new(self.as_bytes())
2186	}
2187
2188	/// An iterator over all lines in a byte string, including their
2189	/// terminators.
2190	///
2191	/// For this iterator, the only line terminator recognized is `\n`. (Since
2192	/// line terminators are included, this also handles `\r\n` line endings.)
2193	///
2194	/// Line terminators are only included if they are present in the original
2195	/// byte string. For example, the last line in a byte string may not end
2196	/// with a line terminator.
2197	///
2198	/// Concatenating all elements yielded by this iterator is guaranteed to
2199	/// yield the original byte string.
2200	///
2201	/// # Examples
2202	///
2203	/// Basic usage:
2204	///
2205	/// ```
2206	/// use bstr::{B, ByteSlice};
2207	///
2208	/// let s = b"\
2209	/// foo
2210	///
2211	/// bar`\r`
2212	/// baz
2213	///
2214	///
2215	/// quux";
2216	/// let lines: Vec<&[u8]> = s.lines_with_terminator().collect();
2217	/// assert_eq!(lines, vec![
2218	/// B("foo`\n`"),
2219	/// B("`\n`"),
2220	/// B("bar`\r\n`"),
2221	/// B("baz`\n`"),
2222	/// B("`\n`"),
2223	/// B("`\n`"),
2224	/// B("quux"),
2225	/// ]);
2226	/// ```
2227	#[inline]
2228	fn lines_with_terminator(&self) -> LinesWithTerminator<'_> {
2229	LinesWithTerminator::new(self.as_bytes())
2230	}
2231
2232	/// Return a byte string slice with leading and trailing whitespace
2233	/// removed.
2234	///
2235	/// Whitespace is defined according to the terms of the `White_Space`
2236	/// Unicode property.
2237	///
2238	/// # Examples
2239	///
2240	/// Basic usage:
2241	///
2242	/// ```
2243	/// use bstr::{B, ByteSlice};
2244	///
2245	/// let s = B(" foo`\t`bar`\t\u{2003}\n`");
2246	/// assert_eq!(s.trim(), B("foo`\t`bar"));
2247	/// ```
2248	#[cfg(feature = "unicode")]
2249	#[inline]
2250	fn trim(&self) -> &[u8] {
2251	self.trim_start().trim_end()
2252	}
2253
2254	/// Return a byte string slice with leading whitespace removed.
2255	///
2256	/// Whitespace is defined according to the terms of the `White_Space`
2257	/// Unicode property.
2258	///
2259	/// # Examples
2260	///
2261	/// Basic usage:
2262	///
2263	/// ```
2264	/// use bstr::{B, ByteSlice};
2265	///
2266	/// let s = B(" foo`\t`bar`\t\u{2003}\n`");
2267	/// assert_eq!(s.trim_start(), B("foo`\t`bar`\t\u{2003}\n`"));
2268	/// ```
2269	#[cfg(feature = "unicode")]
2270	#[inline]
2271	fn trim_start(&self) -> &[u8] {
2272	let start = whitespace_len_fwd(self.as_bytes());
2273	&self.as_bytes()[start..]
2274	}
2275
2276	/// Return a byte string slice with trailing whitespace removed.
2277	///
2278	/// Whitespace is defined according to the terms of the `White_Space`
2279	/// Unicode property.
2280	///
2281	/// # Examples
2282	///
2283	/// Basic usage:
2284	///
2285	/// ```
2286	/// use bstr::{B, ByteSlice};
2287	///
2288	/// let s = B(" foo`\t`bar`\t\u{2003}\n`");
2289	/// assert_eq!(s.trim_end(), B(" foo`\t`bar"));
2290	/// ```
2291	#[cfg(feature = "unicode")]
2292	#[inline]
2293	fn trim_end(&self) -> &[u8] {
2294	let end = whitespace_len_rev(self.as_bytes());
2295	&self.as_bytes()[..end]
2296	}
2297
2298	/// Return a byte string slice with leading and trailing characters
2299	/// satisfying the given predicate removed.
2300	///
2301	/// # Examples
2302	///
2303	/// Basic usage:
2304	///
2305	/// ```
2306	/// use bstr::{B, ByteSlice};
2307	///
2308	/// let s = b"123foo5bar789";
2309	/// assert_eq!(s.trim_with(\|c\| c.is_numeric()), B("foo5bar"));
2310	/// ```
2311	#[inline]
2312	fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2313	self.trim_start_with(&mut trim).trim_end_with(&mut trim)
2314	}
2315
2316	/// Return a byte string slice with leading characters satisfying the given
2317	/// predicate removed.
2318	///
2319	/// # Examples
2320	///
2321	/// Basic usage:
2322	///
2323	/// ```
2324	/// use bstr::{B, ByteSlice};
2325	///
2326	/// let s = b"123foo5bar789";
2327	/// assert_eq!(s.trim_start_with(\|c\| c.is_numeric()), B("foo5bar789"));
2328	/// ```
2329	#[inline]
2330	fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2331	for (s, _, ch) in self.char_indices() {
2332	if !trim(ch) {
2333	return &self.as_bytes()[s..];
2334	}
2335	}
2336	b""
2337	}
2338
2339	/// Return a byte string slice with trailing characters satisfying the
2340	/// given predicate removed.
2341	///
2342	/// # Examples
2343	///
2344	/// Basic usage:
2345	///
2346	/// ```
2347	/// use bstr::{B, ByteSlice};
2348	///
2349	/// let s = b"123foo5bar789";
2350	/// assert_eq!(s.trim_end_with(\|c\| c.is_numeric()), B("123foo5bar"));
2351	/// ```
2352	#[inline]
2353	fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] {
2354	for (_, e, ch) in self.char_indices().rev() {
2355	if !trim(ch) {
2356	return &self.as_bytes()[..e];
2357	}
2358	}
2359	b""
2360	}
2361
2362	/// Returns a new `Vec<u8>` containing the lowercase equivalent of this
2363	/// byte string.
2364	///
2365	/// In this case, lowercase is defined according to the `Lowercase` Unicode
2366	/// property.
2367	///
2368	/// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2369	/// then it is written to the given buffer unchanged.
2370	///
2371	/// Note that some characters in this byte string may expand into multiple
2372	/// characters when changing the case, so the number of bytes written to
2373	/// the given byte string may not be equivalent to the number of bytes in
2374	/// this byte string.
2375	///
2376	/// If you'd like to reuse an allocation for performance reasons, then use
2377	/// [`to_lowercase_into`](#method.to_lowercase_into) instead.
2378	///
2379	/// # Examples
2380	///
2381	/// Basic usage:
2382	///
2383	/// ```
2384	/// use bstr::{B, ByteSlice};
2385	///
2386	/// let s = B("HELLO Β");
2387	/// assert_eq!("hello β".as_bytes(), s.to_lowercase().as_bytes());
2388	/// ```
2389	///
2390	/// Scripts without case are not changed:
2391	///
2392	/// ```
2393	/// use bstr::{B, ByteSlice};
2394	///
2395	/// let s = B("农历新年");
2396	/// assert_eq!("农历新年".as_bytes(), s.to_lowercase().as_bytes());
2397	/// ```
2398	///
2399	/// Invalid UTF-8 remains as is:
2400	///
2401	/// ```
2402	/// use bstr::{B, ByteSlice};
2403	///
2404	/// let s = B(b"FOO`\xFF`BAR`\xE2\x98`BAZ");
2405	/// assert_eq!(B(b"foo`\xFF`bar`\xE2\x98`baz"), s.to_lowercase().as_bytes());
2406	/// ```
2407	#[cfg(all(feature = "alloc", feature = "unicode"))]
2408	#[inline]
2409	fn to_lowercase(&self) -> Vec<u8> {
2410	let mut buf = vec![];
2411	self.to_lowercase_into(&mut buf);
2412	buf
2413	}
2414
2415	/// Writes the lowercase equivalent of this byte string into the given
2416	/// buffer. The buffer is not cleared before written to.
2417	///
2418	/// In this case, lowercase is defined according to the `Lowercase`
2419	/// Unicode property.
2420	///
2421	/// If invalid UTF-8 is seen, or if a character has no lowercase variant,
2422	/// then it is written to the given buffer unchanged.
2423	///
2424	/// Note that some characters in this byte string may expand into multiple
2425	/// characters when changing the case, so the number of bytes written to
2426	/// the given byte string may not be equivalent to the number of bytes in
2427	/// this byte string.
2428	///
2429	/// If you don't need to amortize allocation and instead prefer
2430	/// convenience, then use [`to_lowercase`](#method.to_lowercase) instead.
2431	///
2432	/// # Examples
2433	///
2434	/// Basic usage:
2435	///
2436	/// ```
2437	/// use bstr::{B, ByteSlice};
2438	///
2439	/// let s = B("HELLO Β");
2440	///
2441	/// let mut buf = vec![];
2442	/// s.to_lowercase_into(&mut buf);
2443	/// assert_eq!("hello β".as_bytes(), buf.as_bytes());
2444	/// ```
2445	///
2446	/// Scripts without case are not changed:
2447	///
2448	/// ```
2449	/// use bstr::{B, ByteSlice};
2450	///
2451	/// let s = B("农历新年");
2452	///
2453	/// let mut buf = vec![];
2454	/// s.to_lowercase_into(&mut buf);
2455	/// assert_eq!("农历新年".as_bytes(), buf.as_bytes());
2456	/// ```
2457	///
2458	/// Invalid UTF-8 remains as is:
2459	///
2460	/// ```
2461	/// use bstr::{B, ByteSlice};
2462	///
2463	/// let s = B(b"FOO`\xFF`BAR`\xE2\x98`BAZ");
2464	///
2465	/// let mut buf = vec![];
2466	/// s.to_lowercase_into(&mut buf);
2467	/// assert_eq!(B(b"foo`\xFF`bar`\xE2\x98`baz"), buf.as_bytes());
2468	/// ```
2469	#[cfg(all(feature = "alloc", feature = "unicode"))]
2470	#[inline]
2471	fn to_lowercase_into(&self, buf: &mut Vec<u8>) {
2472	// TODO: This is the best we can do given what std exposes I think.
2473	// If we roll our own case handling, then we might be able to do this
2474	// a bit faster. We shouldn't roll our own case handling unless we
2475	// need to, e.g., for doing caseless matching or case folding.
2476
2477	// TODO(BUG): This doesn't handle any special casing rules.
2478
2479	buf.reserve(self.as_bytes().len());
2480	for (s, e, ch) in self.char_indices() {
2481	if ch == '`\u{FFFD}`' {
2482	buf.push_str(&self.as_bytes()[s..e]);
2483	} else if ch.is_ascii() {
2484	buf.push_char(ch.to_ascii_lowercase());
2485	} else {
2486	for upper in ch.to_lowercase() {
2487	buf.push_char(upper);
2488	}
2489	}
2490	}
2491	}
2492
2493	/// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of
2494	/// this byte string.
2495	///
2496	/// In this case, lowercase is only defined in ASCII letters. Namely, the
2497	/// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2498	/// In particular, the length of the byte string returned is always
2499	/// equivalent to the length of this byte string.
2500	///
2501	/// If you'd like to reuse an allocation for performance reasons, then use
2502	/// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform
2503	/// the conversion in place.
2504	///
2505	/// # Examples
2506	///
2507	/// Basic usage:
2508	///
2509	/// ```
2510	/// use bstr::{B, ByteSlice};
2511	///
2512	/// let s = B("HELLO Β");
2513	/// assert_eq!("hello Β".as_bytes(), s.to_ascii_lowercase().as_bytes());
2514	/// ```
2515	///
2516	/// Invalid UTF-8 remains as is:
2517	///
2518	/// ```
2519	/// use bstr::{B, ByteSlice};
2520	///
2521	/// let s = B(b"FOO`\xFF`BAR`\xE2\x98`BAZ");
2522	/// assert_eq!(s.to_ascii_lowercase(), B(b"foo`\xFF`bar`\xE2\x98`baz"));
2523	/// ```
2524	#[cfg(feature = "alloc")]
2525	#[inline]
2526	fn to_ascii_lowercase(&self) -> Vec<u8> {
2527	self.as_bytes().to_ascii_lowercase()
2528	}
2529
2530	/// Convert this byte string to its lowercase ASCII equivalent in place.
2531	///
2532	/// In this case, lowercase is only defined in ASCII letters. Namely, the
2533	/// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged.
2534	///
2535	/// If you don't need to do the conversion in
2536	/// place and instead prefer convenience, then use
2537	/// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead.
2538	///
2539	/// # Examples
2540	///
2541	/// Basic usage:
2542	///
2543	/// ```
2544	/// use bstr::ByteSlice;
2545	///
2546	/// let mut s = <Vec<u8>>::from("HELLO Β");
2547	/// s.make_ascii_lowercase();
2548	/// assert_eq!(s, "hello Β".as_bytes());
2549	/// ```
2550	///
2551	/// Invalid UTF-8 remains as is:
2552	///
2553	/// ```
2554	/// # #[cfg(feature = "alloc")] {
2555	/// use bstr::{B, ByteSlice, ByteVec};
2556	///
2557	/// let mut s = <Vec<u8>>::from_slice(b"FOO`\xFF`BAR`\xE2\x98`BAZ");
2558	/// s.make_ascii_lowercase();
2559	/// assert_eq!(s, B(b"foo`\xFF`bar`\xE2\x98`baz"));
2560	/// # }
2561	/// ```
2562	#[inline]
2563	fn make_ascii_lowercase(&mut self) {
2564	self.as_bytes_mut().make_ascii_lowercase();
2565	}
2566
2567	/// Returns a new `Vec<u8>` containing the uppercase equivalent of this
2568	/// byte string.
2569	///
2570	/// In this case, uppercase is defined according to the `Uppercase`
2571	/// Unicode property.
2572	///
2573	/// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2574	/// then it is written to the given buffer unchanged.
2575	///
2576	/// Note that some characters in this byte string may expand into multiple
2577	/// characters when changing the case, so the number of bytes written to
2578	/// the given byte string may not be equivalent to the number of bytes in
2579	/// this byte string.
2580	///
2581	/// If you'd like to reuse an allocation for performance reasons, then use
2582	/// [`to_uppercase_into`](#method.to_uppercase_into) instead.
2583	///
2584	/// # Examples
2585	///
2586	/// Basic usage:
2587	///
2588	/// ```
2589	/// use bstr::{B, ByteSlice};
2590	///
2591	/// let s = B("hello β");
2592	/// assert_eq!(s.to_uppercase(), B("HELLO Β"));
2593	/// ```
2594	///
2595	/// Scripts without case are not changed:
2596	///
2597	/// ```
2598	/// use bstr::{B, ByteSlice};
2599	///
2600	/// let s = B("农历新年");
2601	/// assert_eq!(s.to_uppercase(), B("农历新年"));
2602	/// ```
2603	///
2604	/// Invalid UTF-8 remains as is:
2605	///
2606	/// ```
2607	/// use bstr::{B, ByteSlice};
2608	///
2609	/// let s = B(b"foo`\xFF`bar`\xE2\x98`baz");
2610	/// assert_eq!(s.to_uppercase(), B(b"FOO`\xFF`BAR`\xE2\x98`BAZ"));
2611	/// ```
2612	#[cfg(all(feature = "alloc", feature = "unicode"))]
2613	#[inline]
2614	fn to_uppercase(&self) -> Vec<u8> {
2615	let mut buf = vec![];
2616	self.to_uppercase_into(&mut buf);
2617	buf
2618	}
2619
2620	/// Writes the uppercase equivalent of this byte string into the given
2621	/// buffer. The buffer is not cleared before written to.
2622	///
2623	/// In this case, uppercase is defined according to the `Uppercase`
2624	/// Unicode property.
2625	///
2626	/// If invalid UTF-8 is seen, or if a character has no uppercase variant,
2627	/// then it is written to the given buffer unchanged.
2628	///
2629	/// Note that some characters in this byte string may expand into multiple
2630	/// characters when changing the case, so the number of bytes written to
2631	/// the given byte string may not be equivalent to the number of bytes in
2632	/// this byte string.
2633	///
2634	/// If you don't need to amortize allocation and instead prefer
2635	/// convenience, then use [`to_uppercase`](#method.to_uppercase) instead.
2636	///
2637	/// # Examples
2638	///
2639	/// Basic usage:
2640	///
2641	/// ```
2642	/// use bstr::{B, ByteSlice};
2643	///
2644	/// let s = B("hello β");
2645	///
2646	/// let mut buf = vec![];
2647	/// s.to_uppercase_into(&mut buf);
2648	/// assert_eq!(buf, B("HELLO Β"));
2649	/// ```
2650	///
2651	/// Scripts without case are not changed:
2652	///
2653	/// ```
2654	/// use bstr::{B, ByteSlice};
2655	///
2656	/// let s = B("农历新年");
2657	///
2658	/// let mut buf = vec![];
2659	/// s.to_uppercase_into(&mut buf);
2660	/// assert_eq!(buf, B("农历新年"));
2661	/// ```
2662	///
2663	/// Invalid UTF-8 remains as is:
2664	///
2665	/// ```
2666	/// use bstr::{B, ByteSlice};
2667	///
2668	/// let s = B(b"foo`\xFF`bar`\xE2\x98`baz");
2669	///
2670	/// let mut buf = vec![];
2671	/// s.to_uppercase_into(&mut buf);
2672	/// assert_eq!(buf, B(b"FOO`\xFF`BAR`\xE2\x98`BAZ"));
2673	/// ```
2674	#[cfg(all(feature = "alloc", feature = "unicode"))]
2675	#[inline]
2676	fn to_uppercase_into(&self, buf: &mut Vec<u8>) {
2677	// TODO: This is the best we can do given what std exposes I think.
2678	// If we roll our own case handling, then we might be able to do this
2679	// a bit faster. We shouldn't roll our own case handling unless we
2680	// need to, e.g., for doing caseless matching or case folding.
2681	buf.reserve(self.as_bytes().len());
2682	for (s, e, ch) in self.char_indices() {
2683	if ch == '`\u{FFFD}`' {
2684	buf.push_str(&self.as_bytes()[s..e]);
2685	} else if ch.is_ascii() {
2686	buf.push_char(ch.to_ascii_uppercase());
2687	} else {
2688	for upper in ch.to_uppercase() {
2689	buf.push_char(upper);
2690	}
2691	}
2692	}
2693	}
2694
2695	/// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of
2696	/// this byte string.
2697	///
2698	/// In this case, uppercase is only defined in ASCII letters. Namely, the
2699	/// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2700	/// In particular, the length of the byte string returned is always
2701	/// equivalent to the length of this byte string.
2702	///
2703	/// If you'd like to reuse an allocation for performance reasons, then use
2704	/// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform
2705	/// the conversion in place.
2706	///
2707	/// # Examples
2708	///
2709	/// Basic usage:
2710	///
2711	/// ```
2712	/// use bstr::{B, ByteSlice};
2713	///
2714	/// let s = B("hello β");
2715	/// assert_eq!(s.to_ascii_uppercase(), B("HELLO β"));
2716	/// ```
2717	///
2718	/// Invalid UTF-8 remains as is:
2719	///
2720	/// ```
2721	/// use bstr::{B, ByteSlice};
2722	///
2723	/// let s = B(b"foo`\xFF`bar`\xE2\x98`baz");
2724	/// assert_eq!(s.to_ascii_uppercase(), B(b"FOO`\xFF`BAR`\xE2\x98`BAZ"));
2725	/// ```
2726	#[cfg(feature = "alloc")]
2727	#[inline]
2728	fn to_ascii_uppercase(&self) -> Vec<u8> {
2729	self.as_bytes().to_ascii_uppercase()
2730	}
2731
2732	/// Convert this byte string to its uppercase ASCII equivalent in place.
2733	///
2734	/// In this case, uppercase is only defined in ASCII letters. Namely, the
2735	/// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged.
2736	///
2737	/// If you don't need to do the conversion in
2738	/// place and instead prefer convenience, then use
2739	/// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead.
2740	///
2741	/// # Examples
2742	///
2743	/// Basic usage:
2744	///
2745	/// ```
2746	/// use bstr::{B, ByteSlice};
2747	///
2748	/// let mut s = <Vec<u8>>::from("hello β");
2749	/// s.make_ascii_uppercase();
2750	/// assert_eq!(s, B("HELLO β"));
2751	/// ```
2752	///
2753	/// Invalid UTF-8 remains as is:
2754	///
2755	/// ```
2756	/// # #[cfg(feature = "alloc")] {
2757	/// use bstr::{B, ByteSlice, ByteVec};
2758	///
2759	/// let mut s = <Vec<u8>>::from_slice(b"foo`\xFF`bar`\xE2\x98`baz");
2760	/// s.make_ascii_uppercase();
2761	/// assert_eq!(s, B(b"FOO`\xFF`BAR`\xE2\x98`BAZ"));
2762	/// # }
2763	/// ```
2764	#[inline]
2765	fn make_ascii_uppercase(&mut self) {
2766	self.as_bytes_mut().make_ascii_uppercase();
2767	}
2768
2769	/// Escapes this byte string into a sequence of `char` values.
2770	///
2771	/// When the sequence of `char` values is concatenated into a string, the
2772	/// result is always valid UTF-8. Any unprintable or invalid UTF-8 in this
2773	/// byte string are escaped using using `\xNN` notation. Moreover, the
2774	/// characters `\0`, `\r`, `\n`, `\t` and `\` are escaped as well.
2775	///
2776	/// This is useful when one wants to get a human readable view of the raw
2777	/// bytes that is also valid UTF-8.
2778	///
2779	/// The iterator returned implements the `Display` trait. So one can do
2780	/// `b"foo\xFFbar".escape_bytes().to_string()` to get a `String` with its
2781	/// bytes escaped.
2782	///
2783	/// The dual of this function is [`ByteVec::unescape_bytes`].
2784	///
2785	/// Note that this is similar to, but not equivalent to the `Debug`
2786	/// implementation on [`BStr`] and [`BString`]. The `Debug` implementations
2787	/// also use the debug representation for all Unicode codepoints. However,
2788	/// this escaping routine only escapes individual bytes. All Unicode
2789	/// codepoints above `U+007F` are passed through unchanged without any
2790	/// escaping.
2791	///
2792	/// # Examples
2793	///
2794	/// ```
2795	/// # #[cfg(feature = "alloc")] {
2796	/// use bstr::{B, ByteSlice};
2797	///
2798	/// assert_eq!(r"foo\xFFbar", b"foo`\xFF`bar".escape_bytes().to_string());
2799	/// assert_eq!(r"foo\nbar", b"foo`\n`bar".escape_bytes().to_string());
2800	/// assert_eq!(r"foo\tbar", b"foo`\t`bar".escape_bytes().to_string());
2801	/// assert_eq!(r"foo\\bar", b"foo`\\`bar".escape_bytes().to_string());
2802	/// assert_eq!(r"foo☃bar", B("foo☃bar").escape_bytes().to_string());
2803	/// # }
2804	/// ```
2805	#[inline]
2806	fn escape_bytes(&self) -> EscapeBytes<'_> {
2807	EscapeBytes::new(self.as_bytes())
2808	}
2809
2810	/// Reverse the bytes in this string, in place.
2811	///
2812	/// This is not necessarily a well formed operation! For example, if this
2813	/// byte string contains valid UTF-8 that isn't ASCII, then reversing the
2814	/// string will likely result in invalid UTF-8 and otherwise non-sensical
2815	/// content.
2816	///
2817	/// Note that this is equivalent to the generic `[u8]::reverse` method.
2818	/// This method is provided to permit callers to explicitly differentiate
2819	/// between reversing bytes, codepoints and graphemes.
2820	///
2821	/// # Examples
2822	///
2823	/// Basic usage:
2824	///
2825	/// ```
2826	/// use bstr::ByteSlice;
2827	///
2828	/// let mut s = <Vec<u8>>::from("hello");
2829	/// s.reverse_bytes();
2830	/// assert_eq!(s, "olleh".as_bytes());
2831	/// ```
2832	#[inline]
2833	fn reverse_bytes(&mut self) {
2834	self.as_bytes_mut().reverse();
2835	}
2836
2837	/// Reverse the codepoints in this string, in place.
2838	///
2839	/// If this byte string is valid UTF-8, then its reversal by codepoint
2840	/// is also guaranteed to be valid UTF-8.
2841	///
2842	/// This operation is equivalent to the following, but without allocating:
2843	///
2844	/// ```
2845	/// use bstr::ByteSlice;
2846	///
2847	/// let mut s = <Vec<u8>>::from("foo☃bar");
2848	///
2849	/// let mut chars: Vec<char> = s.chars().collect();
2850	/// chars.reverse();
2851	///
2852	/// let reversed: String = chars.into_iter().collect();
2853	/// assert_eq!(reversed, "rab☃oof");
2854	/// ```
2855	///
2856	/// Note that this is not necessarily a well formed operation. For example,
2857	/// if this byte string contains grapheme clusters with more than one
2858	/// codepoint, then those grapheme clusters will not necessarily be
2859	/// preserved. If you'd like to preserve grapheme clusters, then use
2860	/// [`reverse_graphemes`](#method.reverse_graphemes) instead.
2861	///
2862	/// # Examples
2863	///
2864	/// Basic usage:
2865	///
2866	/// ```
2867	/// use bstr::ByteSlice;
2868	///
2869	/// let mut s = <Vec<u8>>::from("foo☃bar");
2870	/// s.reverse_chars();
2871	/// assert_eq!(s, "rab☃oof".as_bytes());
2872	/// ```
2873	///
2874	/// This example shows that not all reversals lead to a well formed string.
2875	/// For example, in this case, combining marks are used to put accents over
2876	/// some letters, and those accent marks must appear after the codepoints
2877	/// they modify.
2878	///
2879	/// ```
2880	/// use bstr::{B, ByteSlice};
2881	///
2882	/// let mut s = <Vec<u8>>::from("résumé");
2883	/// s.reverse_chars();
2884	/// assert_eq!(s, B(b"`\xCC\x81`emus`\xCC\x81`er"));
2885	/// ```
2886	///
2887	/// A word of warning: the above example relies on the fact that
2888	/// `résumé` is in decomposed normal form, which means there are separate
2889	/// codepoints for the accents above `e`. If it is instead in composed
2890	/// normal form, then the example works:
2891	///
2892	/// ```
2893	/// use bstr::{B, ByteSlice};
2894	///
2895	/// let mut s = <Vec<u8>>::from("résumé");
2896	/// s.reverse_chars();
2897	/// assert_eq!(s, B("émusér"));
2898	/// ```
2899	///
2900	/// The point here is to be cautious and not assume that just because
2901	/// `reverse_chars` works in one case, that it therefore works in all
2902	/// cases.
2903	#[inline]
2904	fn reverse_chars(&mut self) {
2905	let mut i = `0`;
2906	loop {
2907	let (_, size) = utf8::decode(&self.as_bytes()[i..]);
2908	if size == `0` {
2909	break;
2910	}
2911	if size > `1` {
2912	self.as_bytes_mut()[i..i + size].reverse_bytes();
2913	}
2914	i += size;
2915	}
2916	self.reverse_bytes();
2917	}
2918
2919	/// Reverse the graphemes in this string, in place.
2920	///
2921	/// If this byte string is valid UTF-8, then its reversal by grapheme
2922	/// is also guaranteed to be valid UTF-8.
2923	///
2924	/// This operation is equivalent to the following, but without allocating:
2925	///
2926	/// ```
2927	/// use bstr::ByteSlice;
2928	///
2929	/// let mut s = <Vec<u8>>::from("foo☃bar");
2930	///
2931	/// let mut graphemes: Vec<&str> = s.graphemes().collect();
2932	/// graphemes.reverse();
2933	///
2934	/// let reversed = graphemes.concat();
2935	/// assert_eq!(reversed, "rab☃oof");
2936	/// ```
2937	///
2938	/// # Examples
2939	///
2940	/// Basic usage:
2941	///
2942	/// ```
2943	/// use bstr::ByteSlice;
2944	///
2945	/// let mut s = <Vec<u8>>::from("foo☃bar");
2946	/// s.reverse_graphemes();
2947	/// assert_eq!(s, "rab☃oof".as_bytes());
2948	/// ```
2949	///
2950	/// This example shows how this correctly handles grapheme clusters,
2951	/// unlike `reverse_chars`.
2952	///
2953	/// ```
2954	/// use bstr::ByteSlice;
2955	///
2956	/// let mut s = <Vec<u8>>::from("résumé");
2957	/// s.reverse_graphemes();
2958	/// assert_eq!(s, "émusér".as_bytes());
2959	/// ```
2960	#[cfg(feature = "unicode")]
2961	#[inline]
2962	fn reverse_graphemes(&mut self) {
2963	use crate::unicode::decode_grapheme;
2964
2965	let mut i = `0`;
2966	loop {
2967	let (_, size) = decode_grapheme(&self.as_bytes()[i..]);
2968	if size == `0` {
2969	break;
2970	}
2971	if size > `1` {
2972	self.as_bytes_mut()[i..i + size].reverse_bytes();
2973	}
2974	i += size;
2975	}
2976	self.reverse_bytes();
2977	}
2978
2979	/// Returns true if and only if every byte in this byte string is ASCII.
2980	///
2981	/// ASCII is an encoding that defines 128 codepoints. A byte corresponds to
2982	/// an ASCII codepoint if and only if it is in the inclusive range
2983	/// `[0, 127]`.
2984	///
2985	/// # Examples
2986	///
2987	/// Basic usage:
2988	///
2989	/// ```
2990	/// use bstr::{B, ByteSlice};
2991	///
2992	/// assert!(B("abc").is_ascii());
2993	/// assert!(!B("☃βツ").is_ascii());
2994	/// assert!(!B(b"`\xFF`").is_ascii());
2995	/// ```
2996	#[inline]
2997	fn is_ascii(&self) -> bool {
2998	ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len()
2999	}
3000
3001	/// Returns true if and only if the entire byte string is valid UTF-8.
3002	///
3003	/// If you need location information about where a byte string's first
3004	/// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method.
3005	///
3006	/// # Examples
3007	///
3008	/// Basic usage:
3009	///
3010	/// ```
3011	/// use bstr::{B, ByteSlice};
3012	///
3013	/// assert!(B("abc").is_utf8());
3014	/// assert!(B("☃βツ").is_utf8());
3015	/// // invalid bytes
3016	/// assert!(!B(b"abc`\xFF`").is_utf8());
3017	/// // surrogate encoding
3018	/// assert!(!B(b"`\xED\xA0\x80`").is_utf8());
3019	/// // incomplete sequence
3020	/// assert!(!B(b"`\xF0\x9D\x9C`a").is_utf8());
3021	/// // overlong sequence
3022	/// assert!(!B(b"`\xF0\x82\x82\xAC`").is_utf8());
3023	/// ```
3024	#[inline]
3025	fn is_utf8(&self) -> bool {
3026	utf8::validate(self.as_bytes()).is_ok()
3027	}
3028
3029	/// Returns the last byte in this byte string, if it's non-empty. If this
3030	/// byte string is empty, this returns `None`.
3031	///
3032	/// Note that this is like the generic `[u8]::last`, except this returns
3033	/// the byte by value instead of a reference to the byte.
3034	///
3035	/// # Examples
3036	///
3037	/// Basic usage:
3038	///
3039	/// ```
3040	/// use bstr::ByteSlice;
3041	///
3042	/// assert_eq!(Some(b'z'), b"baz".last_byte());
3043	/// assert_eq!(None, b"".last_byte());
3044	/// ```
3045	#[inline]
3046	fn last_byte(&self) -> Option<u8> {
3047	let bytes = self.as_bytes();
3048	bytes.last().copied()
3049	}
3050
3051	/// Returns the index of the first non-ASCII byte in this byte string (if
3052	/// any such indices exist). Specifically, it returns the index of the
3053	/// first byte with a value greater than or equal to `0x80`.
3054	///
3055	/// # Examples
3056	///
3057	/// Basic usage:
3058	///
3059	/// ```
3060	/// use bstr::{ByteSlice, B};
3061	///
3062	/// assert_eq!(Some(`3`), b"abc`\xff`".find_non_ascii_byte());
3063	/// assert_eq!(None, b"abcde".find_non_ascii_byte());
3064	/// assert_eq!(Some(`0`), B("😀").find_non_ascii_byte());
3065	/// ```
3066	#[inline]
3067	fn find_non_ascii_byte(&self) -> Option<usize> {
3068	let index = ascii::first_non_ascii_byte(self.as_bytes());
3069	if index == self.as_bytes().len() {
3070	None
3071	} else {
3072	Some(index)
3073	}
3074	}
3075	}
3076
3077	/// A single substring searcher fixed to a particular needle.
3078	///
3079	/// The purpose of this type is to permit callers to construct a substring
3080	/// searcher that can be used to search haystacks without the overhead of
3081	/// constructing the searcher in the first place. This is a somewhat niche
3082	/// concern when it's necessary to re-use the same needle to search multiple
3083	/// different haystacks with as little overhead as possible. In general, using
3084	/// [`ByteSlice::find`](trait.ByteSlice.html#method.find)
3085	/// or
3086	/// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter)
3087	/// is good enough, but `Finder` is useful when you can meaningfully observe
3088	/// searcher construction time in a profile.
3089	///
3090	/// When the `std` feature is enabled, then this type has an `into_owned`
3091	/// version which permits building a `Finder` that is not connected to the
3092	/// lifetime of its needle.
3093	#[derive(Clone, Debug)]
3094	pub struct Finder<'a>(memmem::Finder<'a>);
3095
3096	impl<'a> Finder<'a> {
3097	/// Create a new finder for the given needle.
3098	#[inline]
3099	pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> {
3100	Finder(memmem::Finder::new(needle.as_ref()))
3101	}
3102
3103	/// Convert this finder into its owned variant, such that it no longer
3104	/// borrows the needle.
3105	///
3106	/// If this is already an owned finder, then this is a no-op. Otherwise,
3107	/// this copies the needle.
3108	///
3109	/// This is only available when the `alloc` feature is enabled.
3110	#[cfg(feature = "alloc")]
3111	#[inline]
3112	pub fn into_owned(self) -> Finder<'static> {
3113	Finder(self.0.into_owned())
3114	}
3115
3116	/// Returns the needle that this finder searches for.
3117	///
3118	/// Note that the lifetime of the needle returned is tied to the lifetime
3119	/// of the finder, and may be shorter than the `'a` lifetime. Namely, a
3120	/// finder's needle can be either borrowed or owned, so the lifetime of the
3121	/// needle returned must necessarily be the shorter of the two.
3122	#[inline]
3123	pub fn needle(&self) -> &[u8] {
3124	self.0.needle()
3125	}
3126
3127	/// Returns the index of the first occurrence of this needle in the given
3128	/// haystack.
3129	///
3130	/// The haystack may be any type that can be cheaply converted into a
3131	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3132	///
3133	/// # Complexity
3134	///
3135	/// This routine is guaranteed to have worst case linear time complexity
3136	/// with respect to both the needle and the haystack. That is, this runs
3137	/// in `O(needle.len() + haystack.len())` time.
3138	///
3139	/// This routine is also guaranteed to have worst case constant space
3140	/// complexity.
3141	///
3142	/// # Examples
3143	///
3144	/// Basic usage:
3145	///
3146	/// ```
3147	/// use bstr::Finder;
3148	///
3149	/// let haystack = "foo bar baz";
3150	/// assert_eq!(Some(`0`), Finder::new("foo").find(haystack));
3151	/// assert_eq!(Some(`4`), Finder::new("bar").find(haystack));
3152	/// assert_eq!(None, Finder::new("quux").find(haystack));
3153	/// ```
3154	#[inline]
3155	pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3156	self.0.find(haystack.as_ref())
3157	}
3158	}
3159
3160	/// A single substring reverse searcher fixed to a particular needle.
3161	///
3162	/// The purpose of this type is to permit callers to construct a substring
3163	/// searcher that can be used to search haystacks without the overhead of
3164	/// constructing the searcher in the first place. This is a somewhat niche
3165	/// concern when it's necessary to re-use the same needle to search multiple
3166	/// different haystacks with as little overhead as possible. In general, using
3167	/// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind)
3168	/// or
3169	/// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter)
3170	/// is good enough, but `FinderReverse` is useful when you can meaningfully
3171	/// observe searcher construction time in a profile.
3172	///
3173	/// When the `std` feature is enabled, then this type has an `into_owned`
3174	/// version which permits building a `FinderReverse` that is not connected to
3175	/// the lifetime of its needle.
3176	#[derive(Clone, Debug)]
3177	pub struct FinderReverse<'a>(memmem::FinderRev<'a>);
3178
3179	impl<'a> FinderReverse<'a> {
3180	/// Create a new reverse finder for the given needle.
3181	#[inline]
3182	pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> {
3183	FinderReverse(memmem::FinderRev::new(needle.as_ref()))
3184	}
3185
3186	/// Convert this finder into its owned variant, such that it no longer
3187	/// borrows the needle.
3188	///
3189	/// If this is already an owned finder, then this is a no-op. Otherwise,
3190	/// this copies the needle.
3191	///
3192	/// This is only available when the `alloc` feature is enabled.
3193	#[cfg(feature = "alloc")]
3194	#[inline]
3195	pub fn into_owned(self) -> FinderReverse<'static> {
3196	FinderReverse(self.0.into_owned())
3197	}
3198
3199	/// Returns the needle that this finder searches for.
3200	///
3201	/// Note that the lifetime of the needle returned is tied to the lifetime
3202	/// of this finder, and may be shorter than the `'a` lifetime. Namely,
3203	/// a finder's needle can be either borrowed or owned, so the lifetime of
3204	/// the needle returned must necessarily be the shorter of the two.
3205	#[inline]
3206	pub fn needle(&self) -> &[u8] {
3207	self.0.needle()
3208	}
3209
3210	/// Returns the index of the last occurrence of this needle in the given
3211	/// haystack.
3212	///
3213	/// The haystack may be any type that can be cheaply converted into a
3214	/// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`.
3215	///
3216	/// # Complexity
3217	///
3218	/// This routine is guaranteed to have worst case linear time complexity
3219	/// with respect to both the needle and the haystack. That is, this runs
3220	/// in `O(needle.len() + haystack.len())` time.
3221	///
3222	/// This routine is also guaranteed to have worst case constant space
3223	/// complexity.
3224	///
3225	/// # Examples
3226	///
3227	/// Basic usage:
3228	///
3229	/// ```
3230	/// use bstr::FinderReverse;
3231	///
3232	/// let haystack = "foo bar baz";
3233	/// assert_eq!(Some(`0`), FinderReverse::new("foo").rfind(haystack));
3234	/// assert_eq!(Some(`4`), FinderReverse::new("bar").rfind(haystack));
3235	/// assert_eq!(None, FinderReverse::new("quux").rfind(haystack));
3236	/// ```
3237	#[inline]
3238	pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> {
3239	self.0.rfind(haystack.as_ref())
3240	}
3241	}
3242
3243	/// An iterator over non-overlapping substring matches.
3244	///
3245	/// Matches are reported by the byte offset at which they begin.
3246	///
3247	/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3248	/// needle.
3249	#[derive(Clone, Debug)]
3250	pub struct Find<'h, 'n> {
3251	it: memmem::FindIter<'h, 'n>,
3252	haystack: &'h [u8],
3253	needle: &'n [u8],
3254	}
3255
3256	impl<'h, 'n> Find<'h, 'n> {
3257	fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> {
3258	Find { it: memmem::find_iter(haystack, needle), haystack, needle }
3259	}
3260	}
3261
3262	impl<'h, 'n> Iterator for Find<'h, 'n> {
3263	type Item = usize;
3264
3265	#[inline]
3266	fn next(&mut self) -> Option<usize> {
3267	self.it.next()
3268	}
3269	}
3270
3271	/// An iterator over non-overlapping substring matches in reverse.
3272	///
3273	/// Matches are reported by the byte offset at which they begin.
3274	///
3275	/// `'h` is the lifetime of the haystack while `'n` is the lifetime of the
3276	/// needle.
3277	#[derive(Clone, Debug)]
3278	pub struct FindReverse<'h, 'n> {
3279	it: memmem::FindRevIter<'h, 'n>,
3280	haystack: &'h [u8],
3281	needle: &'n [u8],
3282	}
3283
3284	impl<'h, 'n> FindReverse<'h, 'n> {
3285	fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> {
3286	FindReverse {
3287	it: memmem::rfind_iter(haystack, needle),
3288	haystack,
3289	needle,
3290	}
3291	}
3292
3293	fn haystack(&self) -> &'h [u8] {
3294	self.haystack
3295	}
3296
3297	fn needle(&self) -> &'n [u8] {
3298	self.needle
3299	}
3300	}
3301
3302	impl<'h, 'n> Iterator for FindReverse<'h, 'n> {
3303	type Item = usize;
3304
3305	#[inline]
3306	fn next(&mut self) -> Option<usize> {
3307	self.it.next()
3308	}
3309	}
3310
3311	/// An iterator over the bytes in a byte string.
3312	///
3313	/// `'a` is the lifetime of the byte string being traversed.
3314	#[derive(Clone, Debug)]
3315	pub struct Bytes<'a> {
3316	it: slice::Iter<'a, u8>,
3317	}
3318
3319	impl<'a> Bytes<'a> {
3320	/// Views the remaining underlying data as a subslice of the original data.
3321	/// This has the same lifetime as the original slice,
3322	/// and so the iterator can continue to be used while this exists.
3323	#[inline]
3324	pub fn as_bytes(&self) -> &'a [u8] {
3325	self.it.as_slice()
3326	}
3327	}
3328
3329	impl<'a> Iterator for Bytes<'a> {
3330	type Item = u8;
3331
3332	#[inline]
3333	fn next(&mut self) -> Option<u8> {
3334	self.it.next().copied()
3335	}
3336
3337	#[inline]
3338	fn size_hint(&self) -> (usize, Option<usize>) {
3339	self.it.size_hint()
3340	}
3341	}
3342
3343	impl<'a> DoubleEndedIterator for Bytes<'a> {
3344	#[inline]
3345	fn next_back(&mut self) -> Option<u8> {
3346	self.it.next_back().copied()
3347	}
3348	}
3349
3350	impl<'a> ExactSizeIterator for Bytes<'a> {
3351	#[inline]
3352	fn len(&self) -> usize {
3353	self.it.len()
3354	}
3355	}
3356
3357	impl<'a> iter::FusedIterator for Bytes<'a> {}
3358
3359	/// An iterator over the fields in a byte string, separated by whitespace.
3360	///
3361	/// Whitespace for this iterator is defined by the Unicode property
3362	/// `White_Space`.
3363	///
3364	/// This iterator splits on contiguous runs of whitespace, such that the fields
3365	/// in `foo\t\t\n \nbar` are `foo` and `bar`.
3366	///
3367	/// `'a` is the lifetime of the byte string being split.
3368	#[cfg(feature = "unicode")]
3369	#[derive(Clone, Debug)]
3370	pub struct Fields<'a> {
3371	it: FieldsWith<'a, fn(char) -> bool>,
3372	}
3373
3374	#[cfg(feature = "unicode")]
3375	impl<'a> Fields<'a> {
3376	fn new(bytes: &'a [u8]) -> Fields<'a> {
3377	Fields { it: bytes.fields_with(char::is_whitespace) }
3378	}
3379	}
3380
3381	#[cfg(feature = "unicode")]
3382	impl<'a> Iterator for Fields<'a> {
3383	type Item = &'a [u8];
3384
3385	#[inline]
3386	fn next(&mut self) -> Option<&'a [u8]> {
3387	self.it.next()
3388	}
3389	}
3390
3391	/// An iterator over fields in the byte string, separated by a predicate over
3392	/// codepoints.
3393	///
3394	/// This iterator splits a byte string based on its predicate function such
3395	/// that the elements returned are separated by contiguous runs of codepoints
3396	/// for which the predicate returns true.
3397	///
3398	/// `'a` is the lifetime of the byte string being split, while `F` is the type
3399	/// of the predicate, i.e., `FnMut(char) -> bool`.
3400	#[derive(Clone, Debug)]
3401	pub struct FieldsWith<'a, F> {
3402	f: F,
3403	bytes: &'a [u8],
3404	chars: CharIndices<'a>,
3405	}
3406
3407	impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> {
3408	fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> {
3409	FieldsWith { f, bytes, chars: bytes.char_indices() }
3410	}
3411	}
3412
3413	impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> {
3414	type Item = &'a [u8];
3415
3416	#[inline]
3417	fn next(&mut self) -> Option<&'a [u8]> {
3418	let (start, mut end);
3419	loop {
3420	match self.chars.next() {
3421	None => return None,
3422	Some((s, e, ch)) => {
3423	if !(self.f)(ch) {
3424	start = s;
3425	end = e;
3426	break;
3427	}
3428	}
3429	}
3430	}
3431	for (_, e, ch) in self.chars.by_ref() {
3432	if (self.f)(ch) {
3433	break;
3434	}
3435	end = e;
3436	}
3437	Some(&self.bytes[start..end])
3438	}
3439	}
3440
3441	/// An iterator over substrings in a byte string, split by a separator.
3442	///
3443	/// `'h` is the lifetime of the byte string being split (the haystack), while
3444	/// `'s` is the lifetime of the byte string doing the splitting.
3445	#[derive(Clone, Debug)]
3446	pub struct Split<'h, 's> {
3447	finder: Find<'h, 's>,
3448	/// The end position of the previous match of our splitter. The element
3449	/// we yield corresponds to the substring starting at `last` up to the
3450	/// beginning of the next match of the splitter.
3451	last: usize,
3452	/// Only set when iteration is complete. A corner case here is when a
3453	/// splitter is matched at the end of the haystack. At that point, we still
3454	/// need to yield an empty string following it.
3455	done: bool,
3456	}
3457
3458	impl<'h, 's> Split<'h, 's> {
3459	fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> {
3460	let finder: Find<'_, '_> = haystack.find_iter(needle:splitter);
3461	Split { finder, last: `0`, done: `false` }
3462	}
3463	}
3464
3465	impl<'h, 's> Iterator for Split<'h, 's> {
3466	type Item = &'h [u8];
3467
3468	#[inline]
3469	fn next(&mut self) -> Option<&'h [u8]> {
3470	let haystack = self.finder.haystack;
3471	match self.finder.next() {
3472	Some(start) => {
3473	let next = &haystack[self.last..start];
3474	self.last = start + self.finder.needle.len();
3475	Some(next)
3476	}
3477	None => {
3478	if self.last >= haystack.len() {
3479	if !self.done {
3480	self.done = `true`;
3481	Some(b"")
3482	} else {
3483	None
3484	}
3485	} else {
3486	let s = &haystack[self.last..];
3487	self.last = haystack.len();
3488	self.done = `true`;
3489	Some(s)
3490	}
3491	}
3492	}
3493	}
3494	}
3495
3496	/// An iterator over substrings in a byte string, split by a separator, in
3497	/// reverse.
3498	///
3499	/// `'h` is the lifetime of the byte string being split (the haystack), while
3500	/// `'s` is the lifetime of the byte string doing the splitting.
3501	#[derive(Clone, Debug)]
3502	pub struct SplitReverse<'h, 's> {
3503	finder: FindReverse<'h, 's>,
3504	/// The end position of the previous match of our splitter. The element
3505	/// we yield corresponds to the substring starting at `last` up to the
3506	/// beginning of the next match of the splitter.
3507	last: usize,
3508	/// Only set when iteration is complete. A corner case here is when a
3509	/// splitter is matched at the end of the haystack. At that point, we still
3510	/// need to yield an empty string following it.
3511	done: bool,
3512	}
3513
3514	impl<'h, 's> SplitReverse<'h, 's> {
3515	fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> {
3516	let finder: FindReverse<'_, '_> = haystack.rfind_iter(needle:splitter);
3517	SplitReverse { finder, last: haystack.len(), done: `false` }
3518	}
3519	}
3520
3521	impl<'h, 's> Iterator for SplitReverse<'h, 's> {
3522	type Item = &'h [u8];
3523
3524	#[inline]
3525	fn next(&mut self) -> Option<&'h [u8]> {
3526	let haystack = self.finder.haystack();
3527	match self.finder.next() {
3528	Some(start) => {
3529	let nlen = self.finder.needle().len();
3530	let next = &haystack[start + nlen..self.last];
3531	self.last = start;
3532	Some(next)
3533	}
3534	None => {
3535	if self.last == `0` {
3536	if !self.done {
3537	self.done = `true`;
3538	Some(b"")
3539	} else {
3540	None
3541	}
3542	} else {
3543	let s = &haystack[..self.last];
3544	self.last = `0`;
3545	self.done = `true`;
3546	Some(s)
3547	}
3548	}
3549	}
3550	}
3551	}
3552
3553	/// An iterator over at most `n` substrings in a byte string, split by a
3554	/// separator.
3555	///
3556	/// `'h` is the lifetime of the byte string being split (the haystack), while
3557	/// `'s` is the lifetime of the byte string doing the splitting.
3558	#[derive(Clone, Debug)]
3559	pub struct SplitN<'h, 's> {
3560	split: Split<'h, 's>,
3561	limit: usize,
3562	count: usize,
3563	}
3564
3565	impl<'h, 's> SplitN<'h, 's> {
3566	fn new(
3567	haystack: &'h [u8],
3568	splitter: &'s [u8],
3569	limit: usize,
3570	) -> SplitN<'h, 's> {
3571	let split: Split<'_, '_> = haystack.split_str(splitter);
3572	SplitN { split, limit, count: `0` }
3573	}
3574	}
3575
3576	impl<'h, 's> Iterator for SplitN<'h, 's> {
3577	type Item = &'h [u8];
3578
3579	#[inline]
3580	fn next(&mut self) -> Option<&'h [u8]> {
3581	self.count += `1`;
3582	if self.count > self.limit \|\| self.split.done {
3583	None
3584	} else if self.count == self.limit {
3585	Some(&self.split.finder.haystack[self.split.last..])
3586	} else {
3587	self.split.next()
3588	}
3589	}
3590	}
3591
3592	/// An iterator over at most `n` substrings in a byte string, split by a
3593	/// separator, in reverse.
3594	///
3595	/// `'h` is the lifetime of the byte string being split (the haystack), while
3596	/// `'s` is the lifetime of the byte string doing the splitting.
3597	#[derive(Clone, Debug)]
3598	pub struct SplitNReverse<'h, 's> {
3599	split: SplitReverse<'h, 's>,
3600	limit: usize,
3601	count: usize,
3602	}
3603
3604	impl<'h, 's> SplitNReverse<'h, 's> {
3605	fn new(
3606	haystack: &'h [u8],
3607	splitter: &'s [u8],
3608	limit: usize,
3609	) -> SplitNReverse<'h, 's> {
3610	let split: SplitReverse<'_, '_> = haystack.rsplit_str(splitter);
3611	SplitNReverse { split, limit, count: `0` }
3612	}
3613	}
3614
3615	impl<'h, 's> Iterator for SplitNReverse<'h, 's> {
3616	type Item = &'h [u8];
3617
3618	#[inline]
3619	fn next(&mut self) -> Option<&'h [u8]> {
3620	self.count += `1`;
3621	if self.count > self.limit \|\| self.split.done {
3622	None
3623	} else if self.count == self.limit {
3624	Some(&self.split.finder.haystack()[..self.split.last])
3625	} else {
3626	self.split.next()
3627	}
3628	}
3629	}
3630
3631	/// An iterator over all lines in a byte string, without their terminators.
3632	///
3633	/// For this iterator, the only line terminators recognized are `\r\n` and
3634	/// `\n`.
3635	///
3636	/// `'a` is the lifetime of the byte string being iterated over.
3637	#[derive(Clone, Debug)]
3638	pub struct Lines<'a> {
3639	it: LinesWithTerminator<'a>,
3640	}
3641
3642	impl<'a> Lines<'a> {
3643	fn new(bytes: &'a [u8]) -> Lines<'a> {
3644	Lines { it: LinesWithTerminator::new(bytes) }
3645	}
3646
3647	/// Return a copy of the rest of the underlying bytes without affecting the
3648	/// iterator itself.
3649	///
3650	/// # Examples
3651	///
3652	/// Basic usage:
3653	///
3654	/// ```
3655	/// use bstr::{B, ByteSlice};
3656	///
3657	/// let s = b"\
3658	/// foo
3659	/// bar`\r`
3660	/// baz";
3661	/// let mut lines = s.lines();
3662	/// assert_eq!(lines.next(), Some(B("foo")));
3663	/// assert_eq!(lines.as_bytes(), B("bar`\r\n`baz"));
3664	/// ```
3665	pub fn as_bytes(&self) -> &'a [u8] {
3666	self.it.bytes
3667	}
3668	}
3669
3670	impl<'a> Iterator for Lines<'a> {
3671	type Item = &'a [u8];
3672
3673	#[inline]
3674	fn next(&mut self) -> Option<&'a [u8]> {
3675	Some(trim_last_terminator(self.it.next()?))
3676	}
3677	}
3678
3679	impl<'a> DoubleEndedIterator for Lines<'a> {
3680	#[inline]
3681	fn next_back(&mut self) -> Option<Self::Item> {
3682	Some(trim_last_terminator(self.it.next_back()?))
3683	}
3684	}
3685
3686	impl<'a> iter::FusedIterator for Lines<'a> {}
3687
3688	/// An iterator over all lines in a byte string, including their terminators.
3689	///
3690	/// For this iterator, the only line terminator recognized is `\n`. (Since
3691	/// line terminators are included, this also handles `\r\n` line endings.)
3692	///
3693	/// Line terminators are only included if they are present in the original
3694	/// byte string. For example, the last line in a byte string may not end with
3695	/// a line terminator.
3696	///
3697	/// Concatenating all elements yielded by this iterator is guaranteed to yield
3698	/// the original byte string.
3699	///
3700	/// `'a` is the lifetime of the byte string being iterated over.
3701	#[derive(Clone, Debug)]
3702	pub struct LinesWithTerminator<'a> {
3703	bytes: &'a [u8],
3704	}
3705
3706	impl<'a> LinesWithTerminator<'a> {
3707	fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> {
3708	LinesWithTerminator { bytes }
3709	}
3710
3711	/// Return a copy of the rest of the underlying bytes without affecting the
3712	/// iterator itself.
3713	///
3714	/// # Examples
3715	///
3716	/// Basic usage:
3717	///
3718	/// ```
3719	/// use bstr::{B, ByteSlice};
3720	///
3721	/// let s = b"\
3722	/// foo
3723	/// bar`\r`
3724	/// baz";
3725	/// let mut lines = s.lines_with_terminator();
3726	/// assert_eq!(lines.next(), Some(B("foo`\n`")));
3727	/// assert_eq!(lines.as_bytes(), B("bar`\r\n`baz"));
3728	/// ```
3729	pub fn as_bytes(&self) -> &'a [u8] {
3730	self.bytes
3731	}
3732	}
3733
3734	impl<'a> Iterator for LinesWithTerminator<'a> {
3735	type Item = &'a [u8];
3736
3737	#[inline]
3738	fn next(&mut self) -> Option<&'a [u8]> {
3739	match self.bytes.find_byte(b'`\n`') {
3740	None if self.bytes.is_empty() => None,
3741	None => {
3742	let line: &'a [u8] = self.bytes;
3743	self.bytes = b"";
3744	Some(line)
3745	}
3746	Some(end: usize) => {
3747	let line: &[u8] = &self.bytes[..=end];
3748	self.bytes = &self.bytes[end + `1`..];
3749	Some(line)
3750	}
3751	}
3752	}
3753	}
3754
3755	impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> {
3756	#[inline]
3757	fn next_back(&mut self) -> Option<Self::Item> {
3758	let end: usize = self.bytes.len().checked_sub(`1`)?;
3759	match self.bytes[..end].rfind_byte(b'`\n`') {
3760	None => {
3761	let line: &'a [u8] = self.bytes;
3762	self.bytes = b"";
3763	Some(line)
3764	}
3765	Some(end: usize) => {
3766	let line: &[u8] = &self.bytes[end + `1`..];
3767	self.bytes = &self.bytes[..=end];
3768	Some(line)
3769	}
3770	}
3771	}
3772	}
3773
3774	impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {}
3775
3776	fn trim_last_terminator(mut s: &[u8]) -> &[u8] {
3777	if s.last_byte() == Some(b'`\n`') {
3778	s = &s[..s.len() - `1`];
3779	if s.last_byte() == Some(b'`\r`') {
3780	s = &s[..s.len() - `1`];
3781	}
3782	}
3783	s
3784	}
3785
3786	#[cfg(all(test, feature = "std"))]
3787	mod tests {
3788	use alloc::{string::String, vec::Vec};
3789
3790	use crate::{
3791	ext_slice::{ByteSlice, Lines, LinesWithTerminator, B},
3792	tests::LOSSY_TESTS,
3793	};
3794
3795	#[test]
3796	fn to_str_lossy() {
3797	for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() {
3798	let got = B(input).to_str_lossy();
3799	assert_eq!(
3800	expected.as_bytes(),
3801	got.as_bytes(),
3802	"to_str_lossy(ith: {:?}, given: {:?})",
3803	i,
3804	input,
3805	);
3806
3807	let mut got = String::new();
3808	B(input).to_str_lossy_into(&mut got);
3809	assert_eq!(
3810	expected.as_bytes(),
3811	got.as_bytes(),
3812	"to_str_lossy_into",
3813	);
3814
3815	let got = String::from_utf8_lossy(input);
3816	assert_eq!(expected.as_bytes(), got.as_bytes(), "std");
3817	}
3818	}
3819
3820	#[test]
3821	fn lines_iteration() {
3822	macro_rules! t {
3823	($it:expr, $forward:expr) => {
3824	let mut res: Vec<&[u8]> = Vec::from($forward);
3825	assert_eq!($it.collect::<Vec<_>>(), res);
3826	res.reverse();
3827	assert_eq!($it.rev().collect::<Vec<_>>(), res);
3828	};
3829	}
3830
3831	t!(Lines::new(b""), []);
3832	t!(LinesWithTerminator::new(b""), []);
3833
3834	t!(Lines::new(b"`\n`"), [B("")]);
3835	t!(Lines::new(b"`\r\n`"), [B("")]);
3836	t!(LinesWithTerminator::new(b"`\n`"), [B("`\n`")]);
3837
3838	t!(Lines::new(b"a"), [B("a")]);
3839	t!(LinesWithTerminator::new(b"a"), [B("a")]);
3840
3841	t!(Lines::new(b"abc"), [B("abc")]);
3842	t!(LinesWithTerminator::new(b"abc"), [B("abc")]);
3843
3844	t!(Lines::new(b"abc`\n`"), [B("abc")]);
3845	t!(Lines::new(b"abc`\r\n`"), [B("abc")]);
3846	t!(LinesWithTerminator::new(b"abc`\n`"), [B("abc`\n`")]);
3847
3848	t!(Lines::new(b"abc`\n\n`"), [B("abc"), B("")]);
3849	t!(LinesWithTerminator::new(b"abc`\n\n`"), [B("abc`\n`"), B("`\n`")]);
3850
3851	t!(Lines::new(b"abc`\n\n`def"), [B("abc"), B(""), B("def")]);
3852	t!(
3853	LinesWithTerminator::new(b"abc`\n\n`def"),
3854	[B("abc`\n`"), B("`\n`"), B("def")]
3855	);
3856
3857	t!(Lines::new(b"abc`\n\n`def`\n`"), [B("abc"), B(""), B("def")]);
3858	t!(
3859	LinesWithTerminator::new(b"abc`\n\n`def`\n`"),
3860	[B("abc`\n`"), B("`\n`"), B("def`\n`")]
3861	);
3862
3863	t!(Lines::new(b"`\n`a`\n`b`\n`"), [B(""), B("a"), B("b")]);
3864	t!(
3865	LinesWithTerminator::new(b"`\n`a`\n`b`\n`"),
3866	[B("`\n`"), B("a`\n`"), B("b`\n`")]
3867	);
3868
3869	t!(Lines::new(b"`\n\n\n`"), [B(""), B(""), B("")]);
3870	t!(LinesWithTerminator::new(b"`\n\n\n`"), [B("`\n`"), B("`\n`"), B("`\n`")]);
3871	}
3872	}
3873