1 | use core::{iter, slice, str}; |
2 | |
3 | #[cfg (all(feature = "alloc" , feature = "unicode" ))] |
4 | use alloc::vec; |
5 | #[cfg (feature = "alloc" )] |
6 | use alloc::{borrow::Cow, string::String, vec::Vec}; |
7 | |
8 | #[cfg (feature = "std" )] |
9 | use std::{ffi::OsStr, path::Path}; |
10 | |
11 | use memchr::{memchr, memmem, memrchr}; |
12 | |
13 | use crate::escape_bytes::EscapeBytes; |
14 | #[cfg (feature = "alloc" )] |
15 | use crate::ext_vec::ByteVec; |
16 | #[cfg (feature = "unicode" )] |
17 | use crate::unicode::{ |
18 | whitespace_len_fwd, whitespace_len_rev, GraphemeIndices, Graphemes, |
19 | SentenceIndices, Sentences, WordIndices, Words, WordsWithBreakIndices, |
20 | WordsWithBreaks, |
21 | }; |
22 | use crate::{ |
23 | ascii, |
24 | bstr::BStr, |
25 | byteset, |
26 | utf8::{self, CharIndices, Chars, Utf8Chunks, Utf8Error}, |
27 | }; |
28 | |
29 | /// A short-hand constructor for building a `&[u8]`. |
30 | /// |
31 | /// This idiosyncratic constructor is useful for concisely building byte string |
32 | /// slices. Its primary utility is in conveniently writing byte string literals |
33 | /// in a uniform way. For example, consider this code that does not compile: |
34 | /// |
35 | /// ```ignore |
36 | /// let strs = vec![b"a" , b"xy" ]; |
37 | /// ``` |
38 | /// |
39 | /// The above code doesn't compile because the type of the byte string literal |
40 | /// `b"a"` is `&'static [u8; 1]`, and the type of `b"xy"` is |
41 | /// `&'static [u8; 2]`. Since their types aren't the same, they can't be stored |
42 | /// in the same `Vec`. (This is dissimilar from normal Unicode string slices, |
43 | /// where both `"a"` and `"xy"` have the same type of `&'static str`.) |
44 | /// |
45 | /// One way of getting the above code to compile is to convert byte strings to |
46 | /// slices. You might try this: |
47 | /// |
48 | /// ```ignore |
49 | /// let strs = vec![&b"a" , &b"xy" ]; |
50 | /// ``` |
51 | /// |
52 | /// But this just creates values with type `& &'static [u8; 1]` and |
53 | /// `& &'static [u8; 2]`. Instead, you need to force the issue like so: |
54 | /// |
55 | /// ``` |
56 | /// let strs = vec![&b"a" [..], &b"xy" [..]]; |
57 | /// // or |
58 | /// let strs = vec![b"a" .as_ref(), b"xy" .as_ref()]; |
59 | /// ``` |
60 | /// |
61 | /// But neither of these are particularly convenient to type, especially when |
62 | /// it's something as common as a string literal. Thus, this constructor |
63 | /// permits writing the following instead: |
64 | /// |
65 | /// ``` |
66 | /// use bstr::B; |
67 | /// |
68 | /// let strs = vec![B("a" ), B(b"xy" )]; |
69 | /// ``` |
70 | /// |
71 | /// Notice that this also lets you mix and match both string literals and byte |
72 | /// string literals. This can be quite convenient! |
73 | #[allow (non_snake_case)] |
74 | #[inline ] |
75 | pub fn B<'a, B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> &'a [u8] { |
76 | bytes.as_ref() |
77 | } |
78 | |
79 | impl ByteSlice for [u8] { |
80 | #[inline ] |
81 | fn as_bytes(&self) -> &[u8] { |
82 | self |
83 | } |
84 | |
85 | #[inline ] |
86 | fn as_bytes_mut(&mut self) -> &mut [u8] { |
87 | self |
88 | } |
89 | } |
90 | |
91 | impl<const N: usize> ByteSlice for [u8; N] { |
92 | #[inline ] |
93 | fn as_bytes(&self) -> &[u8] { |
94 | self |
95 | } |
96 | |
97 | #[inline ] |
98 | fn as_bytes_mut(&mut self) -> &mut [u8] { |
99 | self |
100 | } |
101 | } |
102 | |
103 | /// Ensure that callers cannot implement `ByteSlice` by making an |
104 | /// umplementable trait its super trait. |
105 | mod private { |
106 | pub trait Sealed {} |
107 | } |
108 | impl private::Sealed for [u8] {} |
109 | impl<const N: usize> private::Sealed for [u8; N] {} |
110 | |
111 | /// A trait that extends `&[u8]` with string oriented methods. |
112 | /// |
113 | /// This trait is sealed and cannot be implemented outside of `bstr`. |
114 | pub trait ByteSlice: private::Sealed { |
115 | /// A method for accessing the raw bytes of this type. This is always a |
116 | /// no-op and callers shouldn't care about it. This only exists for making |
117 | /// the extension trait work. |
118 | #[doc (hidden)] |
119 | fn as_bytes(&self) -> &[u8]; |
120 | |
121 | /// A method for accessing the raw bytes of this type, mutably. This is |
122 | /// always a no-op and callers shouldn't care about it. This only exists |
123 | /// for making the extension trait work. |
124 | #[doc (hidden)] |
125 | fn as_bytes_mut(&mut self) -> &mut [u8]; |
126 | |
127 | /// Return this byte slice as a `&BStr`. |
128 | /// |
129 | /// Use `&BStr` is useful because of its `fmt::Debug` representation |
130 | /// and various other trait implementations (such as `PartialEq` and |
131 | /// `PartialOrd`). In particular, the `Debug` implementation for `BStr` |
132 | /// shows its bytes as a normal string. For invalid UTF-8, hex escape |
133 | /// sequences are used. |
134 | /// |
135 | /// # Examples |
136 | /// |
137 | /// Basic usage: |
138 | /// |
139 | /// ``` |
140 | /// use bstr::ByteSlice; |
141 | /// |
142 | /// println!("{:?}" , b"foo \xFFbar" .as_bstr()); |
143 | /// ``` |
144 | #[inline ] |
145 | fn as_bstr(&self) -> &BStr { |
146 | BStr::new(self.as_bytes()) |
147 | } |
148 | |
149 | /// Return this byte slice as a `&mut BStr`. |
150 | /// |
151 | /// Use `&mut BStr` is useful because of its `fmt::Debug` representation |
152 | /// and various other trait implementations (such as `PartialEq` and |
153 | /// `PartialOrd`). In particular, the `Debug` implementation for `BStr` |
154 | /// shows its bytes as a normal string. For invalid UTF-8, hex escape |
155 | /// sequences are used. |
156 | /// |
157 | /// # Examples |
158 | /// |
159 | /// Basic usage: |
160 | /// |
161 | /// ``` |
162 | /// use bstr::ByteSlice; |
163 | /// |
164 | /// let mut bytes = *b"foo \xFFbar" ; |
165 | /// println!("{:?}" , &mut bytes.as_bstr_mut()); |
166 | /// ``` |
167 | #[inline ] |
168 | fn as_bstr_mut(&mut self) -> &mut BStr { |
169 | BStr::new_mut(self.as_bytes_mut()) |
170 | } |
171 | |
172 | /// Create an immutable byte string from an OS string slice. |
173 | /// |
174 | /// When the underlying bytes of OS strings are accessible, then this |
175 | /// always succeeds and is zero cost. Otherwise, this returns `None` if the |
176 | /// given OS string is not valid UTF-8. (For example, when the underlying |
177 | /// bytes are inaccessible on Windows, file paths are allowed to be a |
178 | /// sequence of arbitrary 16-bit integers. Not all such sequences can be |
179 | /// transcoded to valid UTF-8.) |
180 | /// |
181 | /// # Examples |
182 | /// |
183 | /// Basic usage: |
184 | /// |
185 | /// ``` |
186 | /// use std::ffi::OsStr; |
187 | /// |
188 | /// use bstr::{B, ByteSlice}; |
189 | /// |
190 | /// let os_str = OsStr::new("foo" ); |
191 | /// let bs = <[u8]>::from_os_str(os_str).expect("should be valid UTF-8" ); |
192 | /// assert_eq!(bs, B("foo" )); |
193 | /// ``` |
194 | #[cfg (feature = "std" )] |
195 | #[inline ] |
196 | fn from_os_str(os_str: &OsStr) -> Option<&[u8]> { |
197 | #[cfg (unix)] |
198 | #[inline ] |
199 | fn imp(os_str: &OsStr) -> Option<&[u8]> { |
200 | use std::os::unix::ffi::OsStrExt; |
201 | |
202 | Some(os_str.as_bytes()) |
203 | } |
204 | |
205 | #[cfg (not(unix))] |
206 | #[inline ] |
207 | fn imp(os_str: &OsStr) -> Option<&[u8]> { |
208 | os_str.to_str().map(|s| s.as_bytes()) |
209 | } |
210 | |
211 | imp(os_str) |
212 | } |
213 | |
214 | /// Create an immutable byte string from a file path. |
215 | /// |
216 | /// When the underlying bytes of paths are accessible, then this always |
217 | /// succeeds and is zero cost. Otherwise, this returns `None` if the given |
218 | /// path is not valid UTF-8. (For example, when the underlying bytes are |
219 | /// inaccessible on Windows, file paths are allowed to be a sequence of |
220 | /// arbitrary 16-bit integers. Not all such sequences can be transcoded to |
221 | /// valid UTF-8.) |
222 | /// |
223 | /// # Examples |
224 | /// |
225 | /// Basic usage: |
226 | /// |
227 | /// ``` |
228 | /// use std::path::Path; |
229 | /// |
230 | /// use bstr::{B, ByteSlice}; |
231 | /// |
232 | /// let path = Path::new("foo" ); |
233 | /// let bs = <[u8]>::from_path(path).expect("should be valid UTF-8" ); |
234 | /// assert_eq!(bs, B("foo" )); |
235 | /// ``` |
236 | #[cfg (feature = "std" )] |
237 | #[inline ] |
238 | fn from_path(path: &Path) -> Option<&[u8]> { |
239 | Self::from_os_str(path.as_os_str()) |
240 | } |
241 | |
242 | /// Safely convert this byte string into a `&str` if it's valid UTF-8. |
243 | /// |
244 | /// If this byte string is not valid UTF-8, then an error is returned. The |
245 | /// error returned indicates the first invalid byte found and the length |
246 | /// of the error. |
247 | /// |
248 | /// In cases where a lossy conversion to `&str` is acceptable, then use one |
249 | /// of the [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) or |
250 | /// [`to_str_lossy_into`](trait.ByteSlice.html#method.to_str_lossy_into) |
251 | /// methods. |
252 | /// |
253 | /// # Examples |
254 | /// |
255 | /// Basic usage: |
256 | /// |
257 | /// ``` |
258 | /// # #[cfg (feature = "alloc" )] { |
259 | /// use bstr::{B, ByteSlice, ByteVec}; |
260 | /// |
261 | /// # fn example() -> Result<(), bstr::Utf8Error> { |
262 | /// let s = B("☃βツ" ).to_str()?; |
263 | /// assert_eq!("☃βツ" , s); |
264 | /// |
265 | /// let mut bstring = <Vec<u8>>::from("☃βツ" ); |
266 | /// bstring.push(b' \xFF' ); |
267 | /// let err = bstring.to_str().unwrap_err(); |
268 | /// assert_eq!(8, err.valid_up_to()); |
269 | /// # Ok(()) }; example().unwrap() |
270 | /// # } |
271 | /// ``` |
272 | #[inline ] |
273 | fn to_str(&self) -> Result<&str, Utf8Error> { |
274 | utf8::validate(self.as_bytes()).map(|_| { |
275 | // SAFETY: This is safe because of the guarantees provided by |
276 | // utf8::validate. |
277 | unsafe { str::from_utf8_unchecked(self.as_bytes()) } |
278 | }) |
279 | } |
280 | |
281 | /// Unsafely convert this byte string into a `&str`, without checking for |
282 | /// valid UTF-8. |
283 | /// |
284 | /// # Safety |
285 | /// |
286 | /// Callers *must* ensure that this byte string is valid UTF-8 before |
287 | /// calling this method. Converting a byte string into a `&str` that is |
288 | /// not valid UTF-8 is considered undefined behavior. |
289 | /// |
290 | /// This routine is useful in performance sensitive contexts where the |
291 | /// UTF-8 validity of the byte string is already known and it is |
292 | /// undesirable to pay the cost of an additional UTF-8 validation check |
293 | /// that [`to_str`](trait.ByteSlice.html#method.to_str) performs. |
294 | /// |
295 | /// # Examples |
296 | /// |
297 | /// Basic usage: |
298 | /// |
299 | /// ``` |
300 | /// use bstr::{B, ByteSlice}; |
301 | /// |
302 | /// // SAFETY: This is safe because string literals are guaranteed to be |
303 | /// // valid UTF-8 by the Rust compiler. |
304 | /// let s = unsafe { B("☃βツ" ).to_str_unchecked() }; |
305 | /// assert_eq!("☃βツ" , s); |
306 | /// ``` |
307 | #[inline ] |
308 | unsafe fn to_str_unchecked(&self) -> &str { |
309 | str::from_utf8_unchecked(self.as_bytes()) |
310 | } |
311 | |
312 | /// Convert this byte string to a valid UTF-8 string by replacing invalid |
313 | /// UTF-8 bytes with the Unicode replacement codepoint (`U+FFFD`). |
314 | /// |
315 | /// If the byte string is already valid UTF-8, then no copying or |
316 | /// allocation is performed and a borrrowed string slice is returned. If |
317 | /// the byte string is not valid UTF-8, then an owned string buffer is |
318 | /// returned with invalid bytes replaced by the replacement codepoint. |
319 | /// |
320 | /// This method uses the "substitution of maximal subparts" (Unicode |
321 | /// Standard, Chapter 3, Section 9) strategy for inserting the replacement |
322 | /// codepoint. Specifically, a replacement codepoint is inserted whenever a |
323 | /// byte is found that cannot possibly lead to a valid code unit sequence. |
324 | /// If there were previous bytes that represented a prefix of a well-formed |
325 | /// code unit sequence, then all of those bytes are substituted with a |
326 | /// single replacement codepoint. The "substitution of maximal subparts" |
327 | /// strategy is the same strategy used by |
328 | /// [W3C's Encoding standard](https://www.w3.org/TR/encoding/). |
329 | /// For a more precise description of the maximal subpart strategy, see |
330 | /// the Unicode Standard, Chapter 3, Section 9. See also |
331 | /// [Public Review Issue #121](https://www.unicode.org/review/pr-121.html). |
332 | /// |
333 | /// N.B. Rust's standard library also appears to use the same strategy, |
334 | /// but it does not appear to be an API guarantee. |
335 | /// |
336 | /// # Examples |
337 | /// |
338 | /// Basic usage: |
339 | /// |
340 | /// ``` |
341 | /// use std::borrow::Cow; |
342 | /// |
343 | /// use bstr::ByteSlice; |
344 | /// |
345 | /// let mut bstring = <Vec<u8>>::from("☃βツ" ); |
346 | /// assert_eq!(Cow::Borrowed("☃βツ" ), bstring.to_str_lossy()); |
347 | /// |
348 | /// // Add a byte that makes the sequence invalid. |
349 | /// bstring.push(b' \xFF' ); |
350 | /// assert_eq!(Cow::Borrowed("☃βツ \u{FFFD}" ), bstring.to_str_lossy()); |
351 | /// ``` |
352 | /// |
353 | /// This demonstrates the "maximal subpart" substitution logic. |
354 | /// |
355 | /// ``` |
356 | /// use bstr::{B, ByteSlice}; |
357 | /// |
358 | /// // \x61 is the ASCII codepoint for 'a'. |
359 | /// // \xF1\x80\x80 is a valid 3-byte code unit prefix. |
360 | /// // \xE1\x80 is a valid 2-byte code unit prefix. |
361 | /// // \xC2 is a valid 1-byte code unit prefix. |
362 | /// // \x62 is the ASCII codepoint for 'b'. |
363 | /// // |
364 | /// // In sum, each of the prefixes is replaced by a single replacement |
365 | /// // codepoint since none of the prefixes are properly completed. This |
366 | /// // is in contrast to other strategies that might insert a replacement |
367 | /// // codepoint for every single byte. |
368 | /// let bs = B(b" \x61\xF1\x80\x80\xE1\x80\xC2\x62" ); |
369 | /// assert_eq!("a \u{FFFD}\u{FFFD}\u{FFFD}b" , bs.to_str_lossy()); |
370 | /// ``` |
371 | #[cfg (feature = "alloc" )] |
372 | #[inline ] |
373 | fn to_str_lossy(&self) -> Cow<'_, str> { |
374 | match utf8::validate(self.as_bytes()) { |
375 | Ok(()) => { |
376 | // SAFETY: This is safe because of the guarantees provided by |
377 | // utf8::validate. |
378 | unsafe { |
379 | Cow::Borrowed(str::from_utf8_unchecked(self.as_bytes())) |
380 | } |
381 | } |
382 | Err(err) => { |
383 | let mut lossy = String::with_capacity(self.as_bytes().len()); |
384 | let (valid, after) = |
385 | self.as_bytes().split_at(err.valid_up_to()); |
386 | // SAFETY: This is safe because utf8::validate guarantees |
387 | // that all of `valid` is valid UTF-8. |
388 | lossy.push_str(unsafe { str::from_utf8_unchecked(valid) }); |
389 | lossy.push_str(" \u{FFFD}" ); |
390 | if let Some(len) = err.error_len() { |
391 | after[len..].to_str_lossy_into(&mut lossy); |
392 | } |
393 | Cow::Owned(lossy) |
394 | } |
395 | } |
396 | } |
397 | |
398 | /// Copy the contents of this byte string into the given owned string |
399 | /// buffer, while replacing invalid UTF-8 code unit sequences with the |
400 | /// Unicode replacement codepoint (`U+FFFD`). |
401 | /// |
402 | /// This method uses the same "substitution of maximal subparts" strategy |
403 | /// for inserting the replacement codepoint as the |
404 | /// [`to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) method. |
405 | /// |
406 | /// This routine is useful for amortizing allocation. However, unlike |
407 | /// `to_str_lossy`, this routine will _always_ copy the contents of this |
408 | /// byte string into the destination buffer, even if this byte string is |
409 | /// valid UTF-8. |
410 | /// |
411 | /// # Examples |
412 | /// |
413 | /// Basic usage: |
414 | /// |
415 | /// ``` |
416 | /// use std::borrow::Cow; |
417 | /// |
418 | /// use bstr::ByteSlice; |
419 | /// |
420 | /// let mut bstring = <Vec<u8>>::from("☃βツ" ); |
421 | /// // Add a byte that makes the sequence invalid. |
422 | /// bstring.push(b' \xFF' ); |
423 | /// |
424 | /// let mut dest = String::new(); |
425 | /// bstring.to_str_lossy_into(&mut dest); |
426 | /// assert_eq!("☃βツ \u{FFFD}" , dest); |
427 | /// ``` |
428 | #[cfg (feature = "alloc" )] |
429 | #[inline ] |
430 | fn to_str_lossy_into(&self, dest: &mut String) { |
431 | let mut bytes = self.as_bytes(); |
432 | dest.reserve(bytes.len()); |
433 | loop { |
434 | match utf8::validate(bytes) { |
435 | Ok(()) => { |
436 | // SAFETY: This is safe because utf8::validate guarantees |
437 | // that all of `bytes` is valid UTF-8. |
438 | dest.push_str(unsafe { str::from_utf8_unchecked(bytes) }); |
439 | break; |
440 | } |
441 | Err(err) => { |
442 | let (valid, after) = bytes.split_at(err.valid_up_to()); |
443 | // SAFETY: This is safe because utf8::validate guarantees |
444 | // that all of `valid` is valid UTF-8. |
445 | dest.push_str(unsafe { str::from_utf8_unchecked(valid) }); |
446 | dest.push_str(" \u{FFFD}" ); |
447 | match err.error_len() { |
448 | None => break, |
449 | Some(len) => bytes = &after[len..], |
450 | } |
451 | } |
452 | } |
453 | } |
454 | } |
455 | |
456 | /// Create an OS string slice from this byte string. |
457 | /// |
458 | /// When OS strings can be constructed from arbitrary byte sequences, this |
459 | /// always succeeds and is zero cost. Otherwise, this returns a UTF-8 |
460 | /// decoding error if this byte string is not valid UTF-8. (For example, |
461 | /// assuming the representation of `OsStr` is opaque on Windows, file paths |
462 | /// are allowed to be a sequence of arbitrary 16-bit integers. There is |
463 | /// no obvious mapping from an arbitrary sequence of 8-bit integers to an |
464 | /// arbitrary sequence of 16-bit integers. If the representation of `OsStr` |
465 | /// is even opened up, then this will convert any sequence of bytes to an |
466 | /// `OsStr` without cost.) |
467 | /// |
468 | /// # Examples |
469 | /// |
470 | /// Basic usage: |
471 | /// |
472 | /// ``` |
473 | /// use bstr::{B, ByteSlice}; |
474 | /// |
475 | /// let os_str = b"foo" .to_os_str().expect("should be valid UTF-8" ); |
476 | /// assert_eq!(os_str, "foo" ); |
477 | /// ``` |
478 | #[cfg (feature = "std" )] |
479 | #[inline ] |
480 | fn to_os_str(&self) -> Result<&OsStr, Utf8Error> { |
481 | #[cfg (unix)] |
482 | #[inline ] |
483 | fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { |
484 | use std::os::unix::ffi::OsStrExt; |
485 | |
486 | Ok(OsStr::from_bytes(bytes)) |
487 | } |
488 | |
489 | #[cfg (not(unix))] |
490 | #[inline ] |
491 | fn imp(bytes: &[u8]) -> Result<&OsStr, Utf8Error> { |
492 | bytes.to_str().map(OsStr::new) |
493 | } |
494 | |
495 | imp(self.as_bytes()) |
496 | } |
497 | |
498 | /// Lossily create an OS string slice from this byte string. |
499 | /// |
500 | /// When OS strings can be constructed from arbitrary byte sequences, this |
501 | /// is zero cost and always returns a slice. Otherwise, this will perform a |
502 | /// UTF-8 check and lossily convert this byte string into valid UTF-8 using |
503 | /// the Unicode replacement codepoint. |
504 | /// |
505 | /// Note that this can prevent the correct roundtripping of file paths when |
506 | /// the representation of `OsStr` is opaque. |
507 | /// |
508 | /// # Examples |
509 | /// |
510 | /// Basic usage: |
511 | /// |
512 | /// ``` |
513 | /// use bstr::ByteSlice; |
514 | /// |
515 | /// let os_str = b"foo \xFFbar" .to_os_str_lossy(); |
516 | /// assert_eq!(os_str.to_string_lossy(), "foo \u{FFFD}bar" ); |
517 | /// ``` |
518 | #[cfg (feature = "std" )] |
519 | #[inline ] |
520 | fn to_os_str_lossy(&self) -> Cow<'_, OsStr> { |
521 | #[cfg (unix)] |
522 | #[inline ] |
523 | fn imp(bytes: &[u8]) -> Cow<'_, OsStr> { |
524 | use std::os::unix::ffi::OsStrExt; |
525 | |
526 | Cow::Borrowed(OsStr::from_bytes(bytes)) |
527 | } |
528 | |
529 | #[cfg (not(unix))] |
530 | #[inline ] |
531 | fn imp(bytes: &[u8]) -> Cow<OsStr> { |
532 | use std::ffi::OsString; |
533 | |
534 | match bytes.to_str_lossy() { |
535 | Cow::Borrowed(x) => Cow::Borrowed(OsStr::new(x)), |
536 | Cow::Owned(x) => Cow::Owned(OsString::from(x)), |
537 | } |
538 | } |
539 | |
540 | imp(self.as_bytes()) |
541 | } |
542 | |
543 | /// Create a path slice from this byte string. |
544 | /// |
545 | /// When paths can be constructed from arbitrary byte sequences, this |
546 | /// always succeeds and is zero cost. Otherwise, this returns a UTF-8 |
547 | /// decoding error if this byte string is not valid UTF-8. (For example, |
548 | /// assuming the representation of `Path` is opaque on Windows, file paths |
549 | /// are allowed to be a sequence of arbitrary 16-bit integers. There is |
550 | /// no obvious mapping from an arbitrary sequence of 8-bit integers to an |
551 | /// arbitrary sequence of 16-bit integers. If the representation of `Path` |
552 | /// is even opened up, then this will convert any sequence of bytes to an |
553 | /// `Path` without cost.) |
554 | /// |
555 | /// # Examples |
556 | /// |
557 | /// Basic usage: |
558 | /// |
559 | /// ``` |
560 | /// use bstr::ByteSlice; |
561 | /// |
562 | /// let path = b"foo" .to_path().expect("should be valid UTF-8" ); |
563 | /// assert_eq!(path.as_os_str(), "foo" ); |
564 | /// ``` |
565 | #[cfg (feature = "std" )] |
566 | #[inline ] |
567 | fn to_path(&self) -> Result<&Path, Utf8Error> { |
568 | self.to_os_str().map(Path::new) |
569 | } |
570 | |
571 | /// Lossily create a path slice from this byte string. |
572 | /// |
573 | /// When paths can be constructed from arbitrary byte sequences, this is |
574 | /// zero cost and always returns a slice. Otherwise, this will perform a |
575 | /// UTF-8 check and lossily convert this byte string into valid UTF-8 using |
576 | /// the Unicode replacement codepoint. |
577 | /// |
578 | /// Note that this can prevent the correct roundtripping of file paths when |
579 | /// the representation of `Path` is opaque. |
580 | /// |
581 | /// # Examples |
582 | /// |
583 | /// Basic usage: |
584 | /// |
585 | /// ``` |
586 | /// use bstr::ByteSlice; |
587 | /// |
588 | /// let bs = b"foo \xFFbar" ; |
589 | /// let path = bs.to_path_lossy(); |
590 | /// assert_eq!(path.to_string_lossy(), "foo \u{FFFD}bar" ); |
591 | /// ``` |
592 | #[cfg (feature = "std" )] |
593 | #[inline ] |
594 | fn to_path_lossy(&self) -> Cow<'_, Path> { |
595 | use std::path::PathBuf; |
596 | |
597 | match self.to_os_str_lossy() { |
598 | Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), |
599 | Cow::Owned(x) => Cow::Owned(PathBuf::from(x)), |
600 | } |
601 | } |
602 | |
603 | /// Create a new byte string by repeating this byte string `n` times. |
604 | /// |
605 | /// # Panics |
606 | /// |
607 | /// This function panics if the capacity of the new byte string would |
608 | /// overflow. |
609 | /// |
610 | /// # Examples |
611 | /// |
612 | /// Basic usage: |
613 | /// |
614 | /// ``` |
615 | /// use bstr::{B, ByteSlice}; |
616 | /// |
617 | /// assert_eq!(b"foo" .repeatn(4), B("foofoofoofoo" )); |
618 | /// assert_eq!(b"foo" .repeatn(0), B("" )); |
619 | /// ``` |
620 | #[cfg (feature = "alloc" )] |
621 | #[inline ] |
622 | fn repeatn(&self, n: usize) -> Vec<u8> { |
623 | self.as_bytes().repeat(n) |
624 | } |
625 | |
626 | /// Returns true if and only if this byte string contains the given needle. |
627 | /// |
628 | /// # Examples |
629 | /// |
630 | /// Basic usage: |
631 | /// |
632 | /// ``` |
633 | /// use bstr::ByteSlice; |
634 | /// |
635 | /// assert!(b"foo bar" .contains_str("foo" )); |
636 | /// assert!(b"foo bar" .contains_str("bar" )); |
637 | /// assert!(!b"foo" .contains_str("foobar" )); |
638 | /// ``` |
639 | #[inline ] |
640 | fn contains_str<B: AsRef<[u8]>>(&self, needle: B) -> bool { |
641 | self.find(needle).is_some() |
642 | } |
643 | |
644 | /// Returns true if and only if this byte string has the given prefix. |
645 | /// |
646 | /// # Examples |
647 | /// |
648 | /// Basic usage: |
649 | /// |
650 | /// ``` |
651 | /// use bstr::ByteSlice; |
652 | /// |
653 | /// assert!(b"foo bar" .starts_with_str("foo" )); |
654 | /// assert!(!b"foo bar" .starts_with_str("bar" )); |
655 | /// assert!(!b"foo" .starts_with_str("foobar" )); |
656 | /// ``` |
657 | #[inline ] |
658 | fn starts_with_str<B: AsRef<[u8]>>(&self, prefix: B) -> bool { |
659 | self.as_bytes().starts_with(prefix.as_ref()) |
660 | } |
661 | |
662 | /// Returns true if and only if this byte string has the given suffix. |
663 | /// |
664 | /// # Examples |
665 | /// |
666 | /// Basic usage: |
667 | /// |
668 | /// ``` |
669 | /// use bstr::ByteSlice; |
670 | /// |
671 | /// assert!(b"foo bar" .ends_with_str("bar" )); |
672 | /// assert!(!b"foo bar" .ends_with_str("foo" )); |
673 | /// assert!(!b"bar" .ends_with_str("foobar" )); |
674 | /// ``` |
675 | #[inline ] |
676 | fn ends_with_str<B: AsRef<[u8]>>(&self, suffix: B) -> bool { |
677 | self.as_bytes().ends_with(suffix.as_ref()) |
678 | } |
679 | |
680 | /// Returns the index of the first occurrence of the given needle. |
681 | /// |
682 | /// The needle may be any type that can be cheaply converted into a |
683 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
684 | /// |
685 | /// Note that if you're are searching for the same needle in many |
686 | /// different small haystacks, it may be faster to initialize a |
687 | /// [`Finder`](struct.Finder.html) once, and reuse it for each search. |
688 | /// |
689 | /// # Complexity |
690 | /// |
691 | /// This routine is guaranteed to have worst case linear time complexity |
692 | /// with respect to both the needle and the haystack. That is, this runs |
693 | /// in `O(needle.len() + haystack.len())` time. |
694 | /// |
695 | /// This routine is also guaranteed to have worst case constant space |
696 | /// complexity. |
697 | /// |
698 | /// # Examples |
699 | /// |
700 | /// Basic usage: |
701 | /// |
702 | /// ``` |
703 | /// use bstr::ByteSlice; |
704 | /// |
705 | /// let s = b"foo bar baz" ; |
706 | /// assert_eq!(Some(0), s.find("foo" )); |
707 | /// assert_eq!(Some(4), s.find("bar" )); |
708 | /// assert_eq!(None, s.find("quux" )); |
709 | /// ``` |
710 | #[inline ] |
711 | fn find<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { |
712 | Finder::new(needle.as_ref()).find(self.as_bytes()) |
713 | } |
714 | |
715 | /// Returns the index of the last occurrence of the given needle. |
716 | /// |
717 | /// The needle may be any type that can be cheaply converted into a |
718 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
719 | /// |
720 | /// Note that if you're are searching for the same needle in many |
721 | /// different small haystacks, it may be faster to initialize a |
722 | /// [`FinderReverse`](struct.FinderReverse.html) once, and reuse it for |
723 | /// each search. |
724 | /// |
725 | /// # Complexity |
726 | /// |
727 | /// This routine is guaranteed to have worst case linear time complexity |
728 | /// with respect to both the needle and the haystack. That is, this runs |
729 | /// in `O(needle.len() + haystack.len())` time. |
730 | /// |
731 | /// This routine is also guaranteed to have worst case constant space |
732 | /// complexity. |
733 | /// |
734 | /// # Examples |
735 | /// |
736 | /// Basic usage: |
737 | /// |
738 | /// ``` |
739 | /// use bstr::ByteSlice; |
740 | /// |
741 | /// let s = b"foo bar baz" ; |
742 | /// assert_eq!(Some(0), s.rfind("foo" )); |
743 | /// assert_eq!(Some(4), s.rfind("bar" )); |
744 | /// assert_eq!(Some(8), s.rfind("ba" )); |
745 | /// assert_eq!(None, s.rfind("quux" )); |
746 | /// ``` |
747 | #[inline ] |
748 | fn rfind<B: AsRef<[u8]>>(&self, needle: B) -> Option<usize> { |
749 | FinderReverse::new(needle.as_ref()).rfind(self.as_bytes()) |
750 | } |
751 | |
752 | /// Returns an iterator of the non-overlapping occurrences of the given |
753 | /// needle. The iterator yields byte offset positions indicating the start |
754 | /// of each match. |
755 | /// |
756 | /// # Complexity |
757 | /// |
758 | /// This routine is guaranteed to have worst case linear time complexity |
759 | /// with respect to both the needle and the haystack. That is, this runs |
760 | /// in `O(needle.len() + haystack.len())` time. |
761 | /// |
762 | /// This routine is also guaranteed to have worst case constant space |
763 | /// complexity. |
764 | /// |
765 | /// # Examples |
766 | /// |
767 | /// Basic usage: |
768 | /// |
769 | /// ``` |
770 | /// use bstr::ByteSlice; |
771 | /// |
772 | /// let s = b"foo bar foo foo quux foo" ; |
773 | /// let matches: Vec<usize> = s.find_iter("foo" ).collect(); |
774 | /// assert_eq!(matches, vec![0, 8, 12, 21]); |
775 | /// ``` |
776 | /// |
777 | /// An empty string matches at every position, including the position |
778 | /// immediately following the last byte: |
779 | /// |
780 | /// ``` |
781 | /// use bstr::ByteSlice; |
782 | /// |
783 | /// let matches: Vec<usize> = b"foo" .find_iter("" ).collect(); |
784 | /// assert_eq!(matches, vec![0, 1, 2, 3]); |
785 | /// |
786 | /// let matches: Vec<usize> = b"" .find_iter("" ).collect(); |
787 | /// assert_eq!(matches, vec![0]); |
788 | /// ``` |
789 | #[inline ] |
790 | fn find_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( |
791 | &'h self, |
792 | needle: &'n B, |
793 | ) -> Find<'h, 'n> { |
794 | Find::new(self.as_bytes(), needle.as_ref()) |
795 | } |
796 | |
797 | /// Returns an iterator of the non-overlapping occurrences of the given |
798 | /// needle in reverse. The iterator yields byte offset positions indicating |
799 | /// the start of each match. |
800 | /// |
801 | /// # Complexity |
802 | /// |
803 | /// This routine is guaranteed to have worst case linear time complexity |
804 | /// with respect to both the needle and the haystack. That is, this runs |
805 | /// in `O(needle.len() + haystack.len())` time. |
806 | /// |
807 | /// This routine is also guaranteed to have worst case constant space |
808 | /// complexity. |
809 | /// |
810 | /// # Examples |
811 | /// |
812 | /// Basic usage: |
813 | /// |
814 | /// ``` |
815 | /// use bstr::ByteSlice; |
816 | /// |
817 | /// let s = b"foo bar foo foo quux foo" ; |
818 | /// let matches: Vec<usize> = s.rfind_iter("foo" ).collect(); |
819 | /// assert_eq!(matches, vec![21, 12, 8, 0]); |
820 | /// ``` |
821 | /// |
822 | /// An empty string matches at every position, including the position |
823 | /// immediately following the last byte: |
824 | /// |
825 | /// ``` |
826 | /// use bstr::ByteSlice; |
827 | /// |
828 | /// let matches: Vec<usize> = b"foo" .rfind_iter("" ).collect(); |
829 | /// assert_eq!(matches, vec![3, 2, 1, 0]); |
830 | /// |
831 | /// let matches: Vec<usize> = b"" .rfind_iter("" ).collect(); |
832 | /// assert_eq!(matches, vec![0]); |
833 | /// ``` |
834 | #[inline ] |
835 | fn rfind_iter<'h, 'n, B: ?Sized + AsRef<[u8]>>( |
836 | &'h self, |
837 | needle: &'n B, |
838 | ) -> FindReverse<'h, 'n> { |
839 | FindReverse::new(self.as_bytes(), needle.as_ref()) |
840 | } |
841 | |
842 | /// Returns the index of the first occurrence of the given byte. If the |
843 | /// byte does not occur in this byte string, then `None` is returned. |
844 | /// |
845 | /// # Examples |
846 | /// |
847 | /// Basic usage: |
848 | /// |
849 | /// ``` |
850 | /// use bstr::ByteSlice; |
851 | /// |
852 | /// assert_eq!(Some(10), b"foo bar baz" .find_byte(b'z' )); |
853 | /// assert_eq!(None, b"foo bar baz" .find_byte(b'y' )); |
854 | /// ``` |
855 | #[inline ] |
856 | fn find_byte(&self, byte: u8) -> Option<usize> { |
857 | memchr(byte, self.as_bytes()) |
858 | } |
859 | |
860 | /// Returns the index of the last occurrence of the given byte. If the |
861 | /// byte does not occur in this byte string, then `None` is returned. |
862 | /// |
863 | /// # Examples |
864 | /// |
865 | /// Basic usage: |
866 | /// |
867 | /// ``` |
868 | /// use bstr::ByteSlice; |
869 | /// |
870 | /// assert_eq!(Some(10), b"foo bar baz" .rfind_byte(b'z' )); |
871 | /// assert_eq!(None, b"foo bar baz" .rfind_byte(b'y' )); |
872 | /// ``` |
873 | #[inline ] |
874 | fn rfind_byte(&self, byte: u8) -> Option<usize> { |
875 | memrchr(byte, self.as_bytes()) |
876 | } |
877 | |
878 | /// Returns the index of the first occurrence of the given codepoint. |
879 | /// If the codepoint does not occur in this byte string, then `None` is |
880 | /// returned. |
881 | /// |
882 | /// Note that if one searches for the replacement codepoint, `\u{FFFD}`, |
883 | /// then only explicit occurrences of that encoding will be found. Invalid |
884 | /// UTF-8 sequences will not be matched. |
885 | /// |
886 | /// # Examples |
887 | /// |
888 | /// Basic usage: |
889 | /// |
890 | /// ``` |
891 | /// use bstr::{B, ByteSlice}; |
892 | /// |
893 | /// assert_eq!(Some(10), b"foo bar baz" .find_char('z' )); |
894 | /// assert_eq!(Some(4), B("αβγγδ" ).find_char('γ' )); |
895 | /// assert_eq!(None, b"foo bar baz" .find_char('y' )); |
896 | /// ``` |
897 | #[inline ] |
898 | fn find_char(&self, ch: char) -> Option<usize> { |
899 | self.find(ch.encode_utf8(&mut [0; 4])) |
900 | } |
901 | |
902 | /// Returns the index of the last occurrence of the given codepoint. |
903 | /// If the codepoint does not occur in this byte string, then `None` is |
904 | /// returned. |
905 | /// |
906 | /// Note that if one searches for the replacement codepoint, `\u{FFFD}`, |
907 | /// then only explicit occurrences of that encoding will be found. Invalid |
908 | /// UTF-8 sequences will not be matched. |
909 | /// |
910 | /// # Examples |
911 | /// |
912 | /// Basic usage: |
913 | /// |
914 | /// ``` |
915 | /// use bstr::{B, ByteSlice}; |
916 | /// |
917 | /// assert_eq!(Some(10), b"foo bar baz" .rfind_char('z' )); |
918 | /// assert_eq!(Some(6), B("αβγγδ" ).rfind_char('γ' )); |
919 | /// assert_eq!(None, b"foo bar baz" .rfind_char('y' )); |
920 | /// ``` |
921 | #[inline ] |
922 | fn rfind_char(&self, ch: char) -> Option<usize> { |
923 | self.rfind(ch.encode_utf8(&mut [0; 4])) |
924 | } |
925 | |
926 | /// Returns the index of the first occurrence of any of the bytes in the |
927 | /// provided set. |
928 | /// |
929 | /// The `byteset` may be any type that can be cheaply converted into a |
930 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but |
931 | /// note that passing a `&str` which contains multibyte characters may not |
932 | /// behave as you expect: each byte in the `&str` is treated as an |
933 | /// individual member of the byte set. |
934 | /// |
935 | /// Note that order is irrelevant for the `byteset` parameter, and |
936 | /// duplicate bytes present in its body are ignored. |
937 | /// |
938 | /// # Complexity |
939 | /// |
940 | /// This routine is guaranteed to have worst case linear time complexity |
941 | /// with respect to both the set of bytes and the haystack. That is, this |
942 | /// runs in `O(byteset.len() + haystack.len())` time. |
943 | /// |
944 | /// This routine is also guaranteed to have worst case constant space |
945 | /// complexity. |
946 | /// |
947 | /// # Examples |
948 | /// |
949 | /// Basic usage: |
950 | /// |
951 | /// ``` |
952 | /// use bstr::ByteSlice; |
953 | /// |
954 | /// assert_eq!(b"foo bar baz" .find_byteset(b"zr" ), Some(6)); |
955 | /// assert_eq!(b"foo baz bar" .find_byteset(b"bzr" ), Some(4)); |
956 | /// assert_eq!(None, b"foo baz bar" .find_byteset(b" \t\n" )); |
957 | /// // The empty byteset never matches. |
958 | /// assert_eq!(None, b"abc" .find_byteset(b"" )); |
959 | /// assert_eq!(None, b"" .find_byteset(b"" )); |
960 | /// ``` |
961 | #[inline ] |
962 | fn find_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { |
963 | byteset::find(self.as_bytes(), byteset.as_ref()) |
964 | } |
965 | |
966 | /// Returns the index of the first occurrence of a byte that is not a |
967 | /// member of the provided set. |
968 | /// |
969 | /// The `byteset` may be any type that can be cheaply converted into a |
970 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but |
971 | /// note that passing a `&str` which contains multibyte characters may not |
972 | /// behave as you expect: each byte in the `&str` is treated as an |
973 | /// individual member of the byte set. |
974 | /// |
975 | /// Note that order is irrelevant for the `byteset` parameter, and |
976 | /// duplicate bytes present in its body are ignored. |
977 | /// |
978 | /// # Complexity |
979 | /// |
980 | /// This routine is guaranteed to have worst case linear time complexity |
981 | /// with respect to both the set of bytes and the haystack. That is, this |
982 | /// runs in `O(byteset.len() + haystack.len())` time. |
983 | /// |
984 | /// This routine is also guaranteed to have worst case constant space |
985 | /// complexity. |
986 | /// |
987 | /// # Examples |
988 | /// |
989 | /// Basic usage: |
990 | /// |
991 | /// ``` |
992 | /// use bstr::ByteSlice; |
993 | /// |
994 | /// assert_eq!(b"foo bar baz" .find_not_byteset(b"fo " ), Some(4)); |
995 | /// assert_eq!(b" \t\tbaz bar" .find_not_byteset(b" \t\r\n" ), Some(2)); |
996 | /// assert_eq!(b"foo \nbaz \tbar" .find_not_byteset(b" \t\n" ), Some(0)); |
997 | /// // The negation of the empty byteset matches everything. |
998 | /// assert_eq!(Some(0), b"abc" .find_not_byteset(b"" )); |
999 | /// // But an empty string never contains anything. |
1000 | /// assert_eq!(None, b"" .find_not_byteset(b"" )); |
1001 | /// ``` |
1002 | #[inline ] |
1003 | fn find_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { |
1004 | byteset::find_not(self.as_bytes(), byteset.as_ref()) |
1005 | } |
1006 | |
1007 | /// Returns the index of the last occurrence of any of the bytes in the |
1008 | /// provided set. |
1009 | /// |
1010 | /// The `byteset` may be any type that can be cheaply converted into a |
1011 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but |
1012 | /// note that passing a `&str` which contains multibyte characters may not |
1013 | /// behave as you expect: each byte in the `&str` is treated as an |
1014 | /// individual member of the byte set. |
1015 | /// |
1016 | /// Note that order is irrelevant for the `byteset` parameter, and duplicate |
1017 | /// bytes present in its body are ignored. |
1018 | /// |
1019 | /// # Complexity |
1020 | /// |
1021 | /// This routine is guaranteed to have worst case linear time complexity |
1022 | /// with respect to both the set of bytes and the haystack. That is, this |
1023 | /// runs in `O(byteset.len() + haystack.len())` time. |
1024 | /// |
1025 | /// This routine is also guaranteed to have worst case constant space |
1026 | /// complexity. |
1027 | /// |
1028 | /// # Examples |
1029 | /// |
1030 | /// Basic usage: |
1031 | /// |
1032 | /// ``` |
1033 | /// use bstr::ByteSlice; |
1034 | /// |
1035 | /// assert_eq!(b"foo bar baz" .rfind_byteset(b"agb" ), Some(9)); |
1036 | /// assert_eq!(b"foo baz bar" .rfind_byteset(b"rabz " ), Some(10)); |
1037 | /// assert_eq!(b"foo baz bar" .rfind_byteset(b" \n123" ), None); |
1038 | /// ``` |
1039 | #[inline ] |
1040 | fn rfind_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { |
1041 | byteset::rfind(self.as_bytes(), byteset.as_ref()) |
1042 | } |
1043 | |
1044 | /// Returns the index of the last occurrence of a byte that is not a member |
1045 | /// of the provided set. |
1046 | /// |
1047 | /// The `byteset` may be any type that can be cheaply converted into a |
1048 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`, but |
1049 | /// note that passing a `&str` which contains multibyte characters may not |
1050 | /// behave as you expect: each byte in the `&str` is treated as an |
1051 | /// individual member of the byte set. |
1052 | /// |
1053 | /// Note that order is irrelevant for the `byteset` parameter, and |
1054 | /// duplicate bytes present in its body are ignored. |
1055 | /// |
1056 | /// # Complexity |
1057 | /// |
1058 | /// This routine is guaranteed to have worst case linear time complexity |
1059 | /// with respect to both the set of bytes and the haystack. That is, this |
1060 | /// runs in `O(byteset.len() + haystack.len())` time. |
1061 | /// |
1062 | /// This routine is also guaranteed to have worst case constant space |
1063 | /// complexity. |
1064 | /// |
1065 | /// # Examples |
1066 | /// |
1067 | /// Basic usage: |
1068 | /// |
1069 | /// ``` |
1070 | /// use bstr::ByteSlice; |
1071 | /// |
1072 | /// assert_eq!(b"foo bar baz, \t" .rfind_not_byteset(b", \t" ), Some(10)); |
1073 | /// assert_eq!(b"foo baz bar" .rfind_not_byteset(b"rabz " ), Some(2)); |
1074 | /// assert_eq!(None, b"foo baz bar" .rfind_not_byteset(b"barfoz " )); |
1075 | /// ``` |
1076 | #[inline ] |
1077 | fn rfind_not_byteset<B: AsRef<[u8]>>(&self, byteset: B) -> Option<usize> { |
1078 | byteset::rfind_not(self.as_bytes(), byteset.as_ref()) |
1079 | } |
1080 | |
1081 | /// Returns an iterator over the fields in a byte string, separated |
1082 | /// by contiguous whitespace (according to the Unicode property |
1083 | /// `White_Space`). |
1084 | /// |
1085 | /// # Example |
1086 | /// |
1087 | /// Basic usage: |
1088 | /// |
1089 | /// ``` |
1090 | /// use bstr::{B, ByteSlice}; |
1091 | /// |
1092 | /// let s = B(" foo \tbar \t\u{2003}\nquux \n" ); |
1093 | /// let fields: Vec<&[u8]> = s.fields().collect(); |
1094 | /// assert_eq!(fields, vec![B("foo" ), B("bar" ), B("quux" )]); |
1095 | /// ``` |
1096 | /// |
1097 | /// A byte string consisting of just whitespace yields no elements: |
1098 | /// |
1099 | /// ``` |
1100 | /// use bstr::{B, ByteSlice}; |
1101 | /// |
1102 | /// assert_eq!(0, B(" \n\t\u{2003}\n \t" ).fields().count()); |
1103 | /// ``` |
1104 | #[cfg (feature = "unicode" )] |
1105 | #[inline ] |
1106 | fn fields(&self) -> Fields<'_> { |
1107 | Fields::new(self.as_bytes()) |
1108 | } |
1109 | |
1110 | /// Returns an iterator over the fields in a byte string, separated by |
1111 | /// contiguous codepoints satisfying the given predicate. |
1112 | /// |
1113 | /// If this byte string is not valid UTF-8, then the given closure will |
1114 | /// be called with a Unicode replacement codepoint when invalid UTF-8 |
1115 | /// bytes are seen. |
1116 | /// |
1117 | /// # Example |
1118 | /// |
1119 | /// Basic usage: |
1120 | /// |
1121 | /// ``` |
1122 | /// use bstr::{B, ByteSlice}; |
1123 | /// |
1124 | /// let s = b"123foo999999bar1quux123456" ; |
1125 | /// let fields: Vec<&[u8]> = s.fields_with(|c| c.is_numeric()).collect(); |
1126 | /// assert_eq!(fields, vec![B("foo" ), B("bar" ), B("quux" )]); |
1127 | /// ``` |
1128 | /// |
1129 | /// A byte string consisting of all codepoints satisfying the predicate |
1130 | /// yields no elements: |
1131 | /// |
1132 | /// ``` |
1133 | /// use bstr::ByteSlice; |
1134 | /// |
1135 | /// assert_eq!(0, b"1911354563" .fields_with(|c| c.is_numeric()).count()); |
1136 | /// ``` |
1137 | #[inline ] |
1138 | fn fields_with<F: FnMut(char) -> bool>(&self, f: F) -> FieldsWith<'_, F> { |
1139 | FieldsWith::new(self.as_bytes(), f) |
1140 | } |
1141 | |
1142 | /// Returns an iterator over substrings of this byte string, separated |
1143 | /// by the given byte string. Each element yielded is guaranteed not to |
1144 | /// include the splitter substring. |
1145 | /// |
1146 | /// The splitter may be any type that can be cheaply converted into a |
1147 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1148 | /// |
1149 | /// # Examples |
1150 | /// |
1151 | /// Basic usage: |
1152 | /// |
1153 | /// ``` |
1154 | /// use bstr::{B, ByteSlice}; |
1155 | /// |
1156 | /// let x: Vec<&[u8]> = b"Mary had a little lamb" .split_str(" " ).collect(); |
1157 | /// assert_eq!(x, vec![ |
1158 | /// B("Mary" ), B("had" ), B("a" ), B("little" ), B("lamb" ), |
1159 | /// ]); |
1160 | /// |
1161 | /// let x: Vec<&[u8]> = b"" .split_str("X" ).collect(); |
1162 | /// assert_eq!(x, vec![b"" ]); |
1163 | /// |
1164 | /// let x: Vec<&[u8]> = b"lionXXtigerXleopard" .split_str("X" ).collect(); |
1165 | /// assert_eq!(x, vec![B("lion" ), B("" ), B("tiger" ), B("leopard" )]); |
1166 | /// |
1167 | /// let x: Vec<&[u8]> = b"lion::tiger::leopard" .split_str("::" ).collect(); |
1168 | /// assert_eq!(x, vec![B("lion" ), B("tiger" ), B("leopard" )]); |
1169 | /// ``` |
1170 | /// |
1171 | /// If a string contains multiple contiguous separators, you will end up |
1172 | /// with empty strings yielded by the iterator: |
1173 | /// |
1174 | /// ``` |
1175 | /// use bstr::{B, ByteSlice}; |
1176 | /// |
1177 | /// let x: Vec<&[u8]> = b"||||a||b|c" .split_str("|" ).collect(); |
1178 | /// assert_eq!(x, vec![ |
1179 | /// B("" ), B("" ), B("" ), B("" ), B("a" ), B("" ), B("b" ), B("c" ), |
1180 | /// ]); |
1181 | /// |
1182 | /// let x: Vec<&[u8]> = b"(///)" .split_str("/" ).collect(); |
1183 | /// assert_eq!(x, vec![B("(" ), B("" ), B("" ), B(")" )]); |
1184 | /// ``` |
1185 | /// |
1186 | /// Separators at the start or end of a string are neighbored by empty |
1187 | /// strings. |
1188 | /// |
1189 | /// ``` |
1190 | /// use bstr::{B, ByteSlice}; |
1191 | /// |
1192 | /// let x: Vec<&[u8]> = b"010" .split_str("0" ).collect(); |
1193 | /// assert_eq!(x, vec![B("" ), B("1" ), B("" )]); |
1194 | /// ``` |
1195 | /// |
1196 | /// When the empty string is used as a separator, it splits every **byte** |
1197 | /// in the byte string, along with the beginning and end of the byte |
1198 | /// string. |
1199 | /// |
1200 | /// ``` |
1201 | /// use bstr::{B, ByteSlice}; |
1202 | /// |
1203 | /// let x: Vec<&[u8]> = b"rust" .split_str("" ).collect(); |
1204 | /// assert_eq!(x, vec![ |
1205 | /// B("" ), B("r" ), B("u" ), B("s" ), B("t" ), B("" ), |
1206 | /// ]); |
1207 | /// |
1208 | /// // Splitting by an empty string is not UTF-8 aware. Elements yielded |
1209 | /// // may not be valid UTF-8! |
1210 | /// let x: Vec<&[u8]> = B("☃" ).split_str("" ).collect(); |
1211 | /// assert_eq!(x, vec![ |
1212 | /// B("" ), B(b" \xE2" ), B(b" \x98" ), B(b" \x83" ), B("" ), |
1213 | /// ]); |
1214 | /// ``` |
1215 | /// |
1216 | /// Contiguous separators, especially whitespace, can lead to possibly |
1217 | /// surprising behavior. For example, this code is correct: |
1218 | /// |
1219 | /// ``` |
1220 | /// use bstr::{B, ByteSlice}; |
1221 | /// |
1222 | /// let x: Vec<&[u8]> = b" a b c" .split_str(" " ).collect(); |
1223 | /// assert_eq!(x, vec![ |
1224 | /// B("" ), B("" ), B("" ), B("" ), B("a" ), B("" ), B("b" ), B("c" ), |
1225 | /// ]); |
1226 | /// ``` |
1227 | /// |
1228 | /// It does *not* give you `["a", "b", "c"]`. For that behavior, use |
1229 | /// [`fields`](#method.fields) instead. |
1230 | #[inline ] |
1231 | fn split_str<'h, 's, B: ?Sized + AsRef<[u8]>>( |
1232 | &'h self, |
1233 | splitter: &'s B, |
1234 | ) -> Split<'h, 's> { |
1235 | Split::new(self.as_bytes(), splitter.as_ref()) |
1236 | } |
1237 | |
1238 | /// Returns an iterator over substrings of this byte string, separated by |
1239 | /// the given byte string, in reverse. Each element yielded is guaranteed |
1240 | /// not to include the splitter substring. |
1241 | /// |
1242 | /// The splitter may be any type that can be cheaply converted into a |
1243 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1244 | /// |
1245 | /// # Examples |
1246 | /// |
1247 | /// Basic usage: |
1248 | /// |
1249 | /// ``` |
1250 | /// use bstr::{B, ByteSlice}; |
1251 | /// |
1252 | /// let x: Vec<&[u8]> = |
1253 | /// b"Mary had a little lamb" .rsplit_str(" " ).collect(); |
1254 | /// assert_eq!(x, vec![ |
1255 | /// B("lamb" ), B("little" ), B("a" ), B("had" ), B("Mary" ), |
1256 | /// ]); |
1257 | /// |
1258 | /// let x: Vec<&[u8]> = b"" .rsplit_str("X" ).collect(); |
1259 | /// assert_eq!(x, vec![b"" ]); |
1260 | /// |
1261 | /// let x: Vec<&[u8]> = b"lionXXtigerXleopard" .rsplit_str("X" ).collect(); |
1262 | /// assert_eq!(x, vec![B("leopard" ), B("tiger" ), B("" ), B("lion" )]); |
1263 | /// |
1264 | /// let x: Vec<&[u8]> = b"lion::tiger::leopard" .rsplit_str("::" ).collect(); |
1265 | /// assert_eq!(x, vec![B("leopard" ), B("tiger" ), B("lion" )]); |
1266 | /// ``` |
1267 | /// |
1268 | /// If a string contains multiple contiguous separators, you will end up |
1269 | /// with empty strings yielded by the iterator: |
1270 | /// |
1271 | /// ``` |
1272 | /// use bstr::{B, ByteSlice}; |
1273 | /// |
1274 | /// let x: Vec<&[u8]> = b"||||a||b|c" .rsplit_str("|" ).collect(); |
1275 | /// assert_eq!(x, vec![ |
1276 | /// B("c" ), B("b" ), B("" ), B("a" ), B("" ), B("" ), B("" ), B("" ), |
1277 | /// ]); |
1278 | /// |
1279 | /// let x: Vec<&[u8]> = b"(///)" .rsplit_str("/" ).collect(); |
1280 | /// assert_eq!(x, vec![B(")" ), B("" ), B("" ), B("(" )]); |
1281 | /// ``` |
1282 | /// |
1283 | /// Separators at the start or end of a string are neighbored by empty |
1284 | /// strings. |
1285 | /// |
1286 | /// ``` |
1287 | /// use bstr::{B, ByteSlice}; |
1288 | /// |
1289 | /// let x: Vec<&[u8]> = b"010" .rsplit_str("0" ).collect(); |
1290 | /// assert_eq!(x, vec![B("" ), B("1" ), B("" )]); |
1291 | /// ``` |
1292 | /// |
1293 | /// When the empty string is used as a separator, it splits every **byte** |
1294 | /// in the byte string, along with the beginning and end of the byte |
1295 | /// string. |
1296 | /// |
1297 | /// ``` |
1298 | /// use bstr::{B, ByteSlice}; |
1299 | /// |
1300 | /// let x: Vec<&[u8]> = b"rust" .rsplit_str("" ).collect(); |
1301 | /// assert_eq!(x, vec![ |
1302 | /// B("" ), B("t" ), B("s" ), B("u" ), B("r" ), B("" ), |
1303 | /// ]); |
1304 | /// |
1305 | /// // Splitting by an empty string is not UTF-8 aware. Elements yielded |
1306 | /// // may not be valid UTF-8! |
1307 | /// let x: Vec<&[u8]> = B("☃" ).rsplit_str("" ).collect(); |
1308 | /// assert_eq!(x, vec![B("" ), B(b" \x83" ), B(b" \x98" ), B(b" \xE2" ), B("" )]); |
1309 | /// ``` |
1310 | /// |
1311 | /// Contiguous separators, especially whitespace, can lead to possibly |
1312 | /// surprising behavior. For example, this code is correct: |
1313 | /// |
1314 | /// ``` |
1315 | /// use bstr::{B, ByteSlice}; |
1316 | /// |
1317 | /// let x: Vec<&[u8]> = b" a b c" .rsplit_str(" " ).collect(); |
1318 | /// assert_eq!(x, vec![ |
1319 | /// B("c" ), B("b" ), B("" ), B("a" ), B("" ), B("" ), B("" ), B("" ), |
1320 | /// ]); |
1321 | /// ``` |
1322 | /// |
1323 | /// It does *not* give you `["a", "b", "c"]`. |
1324 | #[inline ] |
1325 | fn rsplit_str<'h, 's, B: ?Sized + AsRef<[u8]>>( |
1326 | &'h self, |
1327 | splitter: &'s B, |
1328 | ) -> SplitReverse<'h, 's> { |
1329 | SplitReverse::new(self.as_bytes(), splitter.as_ref()) |
1330 | } |
1331 | |
1332 | /// Split this byte string at the first occurrence of `splitter`. |
1333 | /// |
1334 | /// If the `splitter` is found in the byte string, returns a tuple |
1335 | /// containing the parts of the string before and after the first occurrence |
1336 | /// of `splitter` respectively. Otherwise, if there are no occurrences of |
1337 | /// `splitter` in the byte string, returns `None`. |
1338 | /// |
1339 | /// The splitter may be any type that can be cheaply converted into a |
1340 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1341 | /// |
1342 | /// If you need to split on the *last* instance of a delimiter instead, see |
1343 | /// the [`ByteSlice::rsplit_once_str`](#method.rsplit_once_str) method . |
1344 | /// |
1345 | /// # Examples |
1346 | /// |
1347 | /// Basic usage: |
1348 | /// |
1349 | /// ``` |
1350 | /// use bstr::{B, ByteSlice}; |
1351 | /// |
1352 | /// assert_eq!( |
1353 | /// B("foo,bar" ).split_once_str("," ), |
1354 | /// Some((B("foo" ), B("bar" ))), |
1355 | /// ); |
1356 | /// assert_eq!( |
1357 | /// B("foo,bar,baz" ).split_once_str("," ), |
1358 | /// Some((B("foo" ), B("bar,baz" ))), |
1359 | /// ); |
1360 | /// assert_eq!(B("foo" ).split_once_str("," ), None); |
1361 | /// assert_eq!(B("foo," ).split_once_str(b"," ), Some((B("foo" ), B("" )))); |
1362 | /// assert_eq!(B(",foo" ).split_once_str(b"," ), Some((B("" ), B("foo" )))); |
1363 | /// ``` |
1364 | #[inline ] |
1365 | fn split_once_str<'a, B: ?Sized + AsRef<[u8]>>( |
1366 | &'a self, |
1367 | splitter: &B, |
1368 | ) -> Option<(&'a [u8], &'a [u8])> { |
1369 | let bytes = self.as_bytes(); |
1370 | let splitter = splitter.as_ref(); |
1371 | let start = Finder::new(splitter).find(bytes)?; |
1372 | let end = start + splitter.len(); |
1373 | Some((&bytes[..start], &bytes[end..])) |
1374 | } |
1375 | |
1376 | /// Split this byte string at the last occurrence of `splitter`. |
1377 | /// |
1378 | /// If the `splitter` is found in the byte string, returns a tuple |
1379 | /// containing the parts of the string before and after the last occurrence |
1380 | /// of `splitter`, respectively. Otherwise, if there are no occurrences of |
1381 | /// `splitter` in the byte string, returns `None`. |
1382 | /// |
1383 | /// The splitter may be any type that can be cheaply converted into a |
1384 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1385 | /// |
1386 | /// If you need to split on the *first* instance of a delimiter instead, see |
1387 | /// the [`ByteSlice::split_once_str`](#method.split_once_str) method. |
1388 | /// |
1389 | /// # Examples |
1390 | /// |
1391 | /// Basic usage: |
1392 | /// |
1393 | /// ``` |
1394 | /// use bstr::{B, ByteSlice}; |
1395 | /// |
1396 | /// assert_eq!( |
1397 | /// B("foo,bar" ).rsplit_once_str("," ), |
1398 | /// Some((B("foo" ), B("bar" ))), |
1399 | /// ); |
1400 | /// assert_eq!( |
1401 | /// B("foo,bar,baz" ).rsplit_once_str("," ), |
1402 | /// Some((B("foo,bar" ), B("baz" ))), |
1403 | /// ); |
1404 | /// assert_eq!(B("foo" ).rsplit_once_str("," ), None); |
1405 | /// assert_eq!(B("foo," ).rsplit_once_str(b"," ), Some((B("foo" ), B("" )))); |
1406 | /// assert_eq!(B(",foo" ).rsplit_once_str(b"," ), Some((B("" ), B("foo" )))); |
1407 | /// ``` |
1408 | #[inline ] |
1409 | fn rsplit_once_str<'a, B: ?Sized + AsRef<[u8]>>( |
1410 | &'a self, |
1411 | splitter: &B, |
1412 | ) -> Option<(&'a [u8], &'a [u8])> { |
1413 | let bytes = self.as_bytes(); |
1414 | let splitter = splitter.as_ref(); |
1415 | let start = FinderReverse::new(splitter).rfind(bytes)?; |
1416 | let end = start + splitter.len(); |
1417 | Some((&bytes[..start], &bytes[end..])) |
1418 | } |
1419 | |
1420 | /// Returns an iterator of at most `limit` substrings of this byte string, |
1421 | /// separated by the given byte string. If `limit` substrings are yielded, |
1422 | /// then the last substring will contain the remainder of this byte string. |
1423 | /// |
1424 | /// The needle may be any type that can be cheaply converted into a |
1425 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1426 | /// |
1427 | /// # Examples |
1428 | /// |
1429 | /// Basic usage: |
1430 | /// |
1431 | /// ``` |
1432 | /// use bstr::{B, ByteSlice}; |
1433 | /// |
1434 | /// let x: Vec<_> = b"Mary had a little lamb" .splitn_str(3, " " ).collect(); |
1435 | /// assert_eq!(x, vec![B("Mary" ), B("had" ), B("a little lamb" )]); |
1436 | /// |
1437 | /// let x: Vec<_> = b"" .splitn_str(3, "X" ).collect(); |
1438 | /// assert_eq!(x, vec![b"" ]); |
1439 | /// |
1440 | /// let x: Vec<_> = b"lionXXtigerXleopard" .splitn_str(3, "X" ).collect(); |
1441 | /// assert_eq!(x, vec![B("lion" ), B("" ), B("tigerXleopard" )]); |
1442 | /// |
1443 | /// let x: Vec<_> = b"lion::tiger::leopard" .splitn_str(2, "::" ).collect(); |
1444 | /// assert_eq!(x, vec![B("lion" ), B("tiger::leopard" )]); |
1445 | /// |
1446 | /// let x: Vec<_> = b"abcXdef" .splitn_str(1, "X" ).collect(); |
1447 | /// assert_eq!(x, vec![B("abcXdef" )]); |
1448 | /// |
1449 | /// let x: Vec<_> = b"abcdef" .splitn_str(2, "X" ).collect(); |
1450 | /// assert_eq!(x, vec![B("abcdef" )]); |
1451 | /// |
1452 | /// let x: Vec<_> = b"abcXdef" .splitn_str(0, "X" ).collect(); |
1453 | /// assert!(x.is_empty()); |
1454 | /// ``` |
1455 | #[inline ] |
1456 | fn splitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( |
1457 | &'h self, |
1458 | limit: usize, |
1459 | splitter: &'s B, |
1460 | ) -> SplitN<'h, 's> { |
1461 | SplitN::new(self.as_bytes(), splitter.as_ref(), limit) |
1462 | } |
1463 | |
1464 | /// Returns an iterator of at most `limit` substrings of this byte string, |
1465 | /// separated by the given byte string, in reverse. If `limit` substrings |
1466 | /// are yielded, then the last substring will contain the remainder of this |
1467 | /// byte string. |
1468 | /// |
1469 | /// The needle may be any type that can be cheaply converted into a |
1470 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
1471 | /// |
1472 | /// # Examples |
1473 | /// |
1474 | /// Basic usage: |
1475 | /// |
1476 | /// ``` |
1477 | /// use bstr::{B, ByteSlice}; |
1478 | /// |
1479 | /// let x: Vec<_> = |
1480 | /// b"Mary had a little lamb" .rsplitn_str(3, " " ).collect(); |
1481 | /// assert_eq!(x, vec![B("lamb" ), B("little" ), B("Mary had a" )]); |
1482 | /// |
1483 | /// let x: Vec<_> = b"" .rsplitn_str(3, "X" ).collect(); |
1484 | /// assert_eq!(x, vec![b"" ]); |
1485 | /// |
1486 | /// let x: Vec<_> = b"lionXXtigerXleopard" .rsplitn_str(3, "X" ).collect(); |
1487 | /// assert_eq!(x, vec![B("leopard" ), B("tiger" ), B("lionX" )]); |
1488 | /// |
1489 | /// let x: Vec<_> = b"lion::tiger::leopard" .rsplitn_str(2, "::" ).collect(); |
1490 | /// assert_eq!(x, vec![B("leopard" ), B("lion::tiger" )]); |
1491 | /// |
1492 | /// let x: Vec<_> = b"abcXdef" .rsplitn_str(1, "X" ).collect(); |
1493 | /// assert_eq!(x, vec![B("abcXdef" )]); |
1494 | /// |
1495 | /// let x: Vec<_> = b"abcdef" .rsplitn_str(2, "X" ).collect(); |
1496 | /// assert_eq!(x, vec![B("abcdef" )]); |
1497 | /// |
1498 | /// let x: Vec<_> = b"abcXdef" .rsplitn_str(0, "X" ).collect(); |
1499 | /// assert!(x.is_empty()); |
1500 | /// ``` |
1501 | #[inline ] |
1502 | fn rsplitn_str<'h, 's, B: ?Sized + AsRef<[u8]>>( |
1503 | &'h self, |
1504 | limit: usize, |
1505 | splitter: &'s B, |
1506 | ) -> SplitNReverse<'h, 's> { |
1507 | SplitNReverse::new(self.as_bytes(), splitter.as_ref(), limit) |
1508 | } |
1509 | |
1510 | /// Replace all matches of the given needle with the given replacement, and |
1511 | /// the result as a new `Vec<u8>`. |
1512 | /// |
1513 | /// This routine is useful as a convenience. If you need to reuse an |
1514 | /// allocation, use [`replace_into`](#method.replace_into) instead. |
1515 | /// |
1516 | /// # Examples |
1517 | /// |
1518 | /// Basic usage: |
1519 | /// |
1520 | /// ``` |
1521 | /// use bstr::ByteSlice; |
1522 | /// |
1523 | /// let s = b"this is old" .replace("old" , "new" ); |
1524 | /// assert_eq!(s, "this is new" .as_bytes()); |
1525 | /// ``` |
1526 | /// |
1527 | /// When the pattern doesn't match: |
1528 | /// |
1529 | /// ``` |
1530 | /// use bstr::ByteSlice; |
1531 | /// |
1532 | /// let s = b"this is old" .replace("nada nada" , "limonada" ); |
1533 | /// assert_eq!(s, "this is old" .as_bytes()); |
1534 | /// ``` |
1535 | /// |
1536 | /// When the needle is an empty string: |
1537 | /// |
1538 | /// ``` |
1539 | /// use bstr::ByteSlice; |
1540 | /// |
1541 | /// let s = b"foo" .replace("" , "Z" ); |
1542 | /// assert_eq!(s, "ZfZoZoZ" .as_bytes()); |
1543 | /// ``` |
1544 | #[cfg (feature = "alloc" )] |
1545 | #[inline ] |
1546 | fn replace<N: AsRef<[u8]>, R: AsRef<[u8]>>( |
1547 | &self, |
1548 | needle: N, |
1549 | replacement: R, |
1550 | ) -> Vec<u8> { |
1551 | let mut dest = Vec::with_capacity(self.as_bytes().len()); |
1552 | self.replace_into(needle, replacement, &mut dest); |
1553 | dest |
1554 | } |
1555 | |
1556 | /// Replace up to `limit` matches of the given needle with the given |
1557 | /// replacement, and the result as a new `Vec<u8>`. |
1558 | /// |
1559 | /// This routine is useful as a convenience. If you need to reuse an |
1560 | /// allocation, use [`replacen_into`](#method.replacen_into) instead. |
1561 | /// |
1562 | /// # Examples |
1563 | /// |
1564 | /// Basic usage: |
1565 | /// |
1566 | /// ``` |
1567 | /// use bstr::ByteSlice; |
1568 | /// |
1569 | /// let s = b"foofoo" .replacen("o" , "z" , 2); |
1570 | /// assert_eq!(s, "fzzfoo" .as_bytes()); |
1571 | /// ``` |
1572 | /// |
1573 | /// When the pattern doesn't match: |
1574 | /// |
1575 | /// ``` |
1576 | /// use bstr::ByteSlice; |
1577 | /// |
1578 | /// let s = b"foofoo" .replacen("a" , "z" , 2); |
1579 | /// assert_eq!(s, "foofoo" .as_bytes()); |
1580 | /// ``` |
1581 | /// |
1582 | /// When the needle is an empty string: |
1583 | /// |
1584 | /// ``` |
1585 | /// use bstr::ByteSlice; |
1586 | /// |
1587 | /// let s = b"foo" .replacen("" , "Z" , 2); |
1588 | /// assert_eq!(s, "ZfZoo" .as_bytes()); |
1589 | /// ``` |
1590 | #[cfg (feature = "alloc" )] |
1591 | #[inline ] |
1592 | fn replacen<N: AsRef<[u8]>, R: AsRef<[u8]>>( |
1593 | &self, |
1594 | needle: N, |
1595 | replacement: R, |
1596 | limit: usize, |
1597 | ) -> Vec<u8> { |
1598 | let mut dest = Vec::with_capacity(self.as_bytes().len()); |
1599 | self.replacen_into(needle, replacement, limit, &mut dest); |
1600 | dest |
1601 | } |
1602 | |
1603 | /// Replace all matches of the given needle with the given replacement, |
1604 | /// and write the result into the provided `Vec<u8>`. |
1605 | /// |
1606 | /// This does **not** clear `dest` before writing to it. |
1607 | /// |
1608 | /// This routine is useful for reusing allocation. For a more convenient |
1609 | /// API, use [`replace`](#method.replace) instead. |
1610 | /// |
1611 | /// # Examples |
1612 | /// |
1613 | /// Basic usage: |
1614 | /// |
1615 | /// ``` |
1616 | /// use bstr::ByteSlice; |
1617 | /// |
1618 | /// let s = b"this is old" ; |
1619 | /// |
1620 | /// let mut dest = vec![]; |
1621 | /// s.replace_into("old" , "new" , &mut dest); |
1622 | /// assert_eq!(dest, "this is new" .as_bytes()); |
1623 | /// ``` |
1624 | /// |
1625 | /// When the pattern doesn't match: |
1626 | /// |
1627 | /// ``` |
1628 | /// use bstr::ByteSlice; |
1629 | /// |
1630 | /// let s = b"this is old" ; |
1631 | /// |
1632 | /// let mut dest = vec![]; |
1633 | /// s.replace_into("nada nada" , "limonada" , &mut dest); |
1634 | /// assert_eq!(dest, "this is old" .as_bytes()); |
1635 | /// ``` |
1636 | /// |
1637 | /// When the needle is an empty string: |
1638 | /// |
1639 | /// ``` |
1640 | /// use bstr::ByteSlice; |
1641 | /// |
1642 | /// let s = b"foo" ; |
1643 | /// |
1644 | /// let mut dest = vec![]; |
1645 | /// s.replace_into("" , "Z" , &mut dest); |
1646 | /// assert_eq!(dest, "ZfZoZoZ" .as_bytes()); |
1647 | /// ``` |
1648 | #[cfg (feature = "alloc" )] |
1649 | #[inline ] |
1650 | fn replace_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( |
1651 | &self, |
1652 | needle: N, |
1653 | replacement: R, |
1654 | dest: &mut Vec<u8>, |
1655 | ) { |
1656 | let (needle, replacement) = (needle.as_ref(), replacement.as_ref()); |
1657 | |
1658 | let mut last = 0; |
1659 | for start in self.find_iter(needle) { |
1660 | dest.push_str(&self.as_bytes()[last..start]); |
1661 | dest.push_str(replacement); |
1662 | last = start + needle.len(); |
1663 | } |
1664 | dest.push_str(&self.as_bytes()[last..]); |
1665 | } |
1666 | |
1667 | /// Replace up to `limit` matches of the given needle with the given |
1668 | /// replacement, and write the result into the provided `Vec<u8>`. |
1669 | /// |
1670 | /// This does **not** clear `dest` before writing to it. |
1671 | /// |
1672 | /// This routine is useful for reusing allocation. For a more convenient |
1673 | /// API, use [`replacen`](#method.replacen) instead. |
1674 | /// |
1675 | /// # Examples |
1676 | /// |
1677 | /// Basic usage: |
1678 | /// |
1679 | /// ``` |
1680 | /// use bstr::ByteSlice; |
1681 | /// |
1682 | /// let s = b"foofoo" ; |
1683 | /// |
1684 | /// let mut dest = vec![]; |
1685 | /// s.replacen_into("o" , "z" , 2, &mut dest); |
1686 | /// assert_eq!(dest, "fzzfoo" .as_bytes()); |
1687 | /// ``` |
1688 | /// |
1689 | /// When the pattern doesn't match: |
1690 | /// |
1691 | /// ``` |
1692 | /// use bstr::ByteSlice; |
1693 | /// |
1694 | /// let s = b"foofoo" ; |
1695 | /// |
1696 | /// let mut dest = vec![]; |
1697 | /// s.replacen_into("a" , "z" , 2, &mut dest); |
1698 | /// assert_eq!(dest, "foofoo" .as_bytes()); |
1699 | /// ``` |
1700 | /// |
1701 | /// When the needle is an empty string: |
1702 | /// |
1703 | /// ``` |
1704 | /// use bstr::ByteSlice; |
1705 | /// |
1706 | /// let s = b"foo" ; |
1707 | /// |
1708 | /// let mut dest = vec![]; |
1709 | /// s.replacen_into("" , "Z" , 2, &mut dest); |
1710 | /// assert_eq!(dest, "ZfZoo" .as_bytes()); |
1711 | /// ``` |
1712 | #[cfg (feature = "alloc" )] |
1713 | #[inline ] |
1714 | fn replacen_into<N: AsRef<[u8]>, R: AsRef<[u8]>>( |
1715 | &self, |
1716 | needle: N, |
1717 | replacement: R, |
1718 | limit: usize, |
1719 | dest: &mut Vec<u8>, |
1720 | ) { |
1721 | let (needle, replacement) = (needle.as_ref(), replacement.as_ref()); |
1722 | |
1723 | let mut last = 0; |
1724 | for start in self.find_iter(needle).take(limit) { |
1725 | dest.push_str(&self.as_bytes()[last..start]); |
1726 | dest.push_str(replacement); |
1727 | last = start + needle.len(); |
1728 | } |
1729 | dest.push_str(&self.as_bytes()[last..]); |
1730 | } |
1731 | |
1732 | /// Returns an iterator over the bytes in this byte string. |
1733 | /// |
1734 | /// # Examples |
1735 | /// |
1736 | /// Basic usage: |
1737 | /// |
1738 | /// ``` |
1739 | /// use bstr::ByteSlice; |
1740 | /// |
1741 | /// let bs = b"foobar" ; |
1742 | /// let bytes: Vec<u8> = bs.bytes().collect(); |
1743 | /// assert_eq!(bytes, bs); |
1744 | /// ``` |
1745 | #[inline ] |
1746 | fn bytes(&self) -> Bytes<'_> { |
1747 | Bytes { it: self.as_bytes().iter() } |
1748 | } |
1749 | |
1750 | /// Returns an iterator over the Unicode scalar values in this byte string. |
1751 | /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint |
1752 | /// is yielded instead. |
1753 | /// |
1754 | /// # Examples |
1755 | /// |
1756 | /// Basic usage: |
1757 | /// |
1758 | /// ``` |
1759 | /// use bstr::ByteSlice; |
1760 | /// |
1761 | /// let bs = b" \xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61" ; |
1762 | /// let chars: Vec<char> = bs.chars().collect(); |
1763 | /// assert_eq!(vec!['☃' , ' \u{FFFD}' , '𝞃' , ' \u{FFFD}' , 'a' ], chars); |
1764 | /// ``` |
1765 | /// |
1766 | /// Codepoints can also be iterated over in reverse: |
1767 | /// |
1768 | /// ``` |
1769 | /// use bstr::ByteSlice; |
1770 | /// |
1771 | /// let bs = b" \xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61" ; |
1772 | /// let chars: Vec<char> = bs.chars().rev().collect(); |
1773 | /// assert_eq!(vec!['a' , ' \u{FFFD}' , '𝞃' , ' \u{FFFD}' , '☃' ], chars); |
1774 | /// ``` |
1775 | #[inline ] |
1776 | fn chars(&self) -> Chars<'_> { |
1777 | Chars::new(self.as_bytes()) |
1778 | } |
1779 | |
1780 | /// Returns an iterator over the Unicode scalar values in this byte string |
1781 | /// along with their starting and ending byte index positions. If invalid |
1782 | /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded |
1783 | /// instead. |
1784 | /// |
1785 | /// Note that this is slightly different from the `CharIndices` iterator |
1786 | /// provided by the standard library. Aside from working on possibly |
1787 | /// invalid UTF-8, this iterator provides both the corresponding starting |
1788 | /// and ending byte indices of each codepoint yielded. The ending position |
1789 | /// is necessary to slice the original byte string when invalid UTF-8 bytes |
1790 | /// are converted into a Unicode replacement codepoint, since a single |
1791 | /// replacement codepoint can substitute anywhere from 1 to 3 invalid bytes |
1792 | /// (inclusive). |
1793 | /// |
1794 | /// # Examples |
1795 | /// |
1796 | /// Basic usage: |
1797 | /// |
1798 | /// ``` |
1799 | /// use bstr::ByteSlice; |
1800 | /// |
1801 | /// let bs = b" \xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61" ; |
1802 | /// let chars: Vec<(usize, usize, char)> = bs.char_indices().collect(); |
1803 | /// assert_eq!(chars, vec![ |
1804 | /// (0, 3, '☃' ), |
1805 | /// (3, 4, ' \u{FFFD}' ), |
1806 | /// (4, 8, '𝞃' ), |
1807 | /// (8, 10, ' \u{FFFD}' ), |
1808 | /// (10, 11, 'a' ), |
1809 | /// ]); |
1810 | /// ``` |
1811 | /// |
1812 | /// Codepoints can also be iterated over in reverse: |
1813 | /// |
1814 | /// ``` |
1815 | /// use bstr::ByteSlice; |
1816 | /// |
1817 | /// let bs = b" \xE2\x98\x83\xFF\xF0\x9D\x9E\x83\xE2\x98\x61" ; |
1818 | /// let chars: Vec<(usize, usize, char)> = bs |
1819 | /// .char_indices() |
1820 | /// .rev() |
1821 | /// .collect(); |
1822 | /// assert_eq!(chars, vec![ |
1823 | /// (10, 11, 'a' ), |
1824 | /// (8, 10, ' \u{FFFD}' ), |
1825 | /// (4, 8, '𝞃' ), |
1826 | /// (3, 4, ' \u{FFFD}' ), |
1827 | /// (0, 3, '☃' ), |
1828 | /// ]); |
1829 | /// ``` |
1830 | #[inline ] |
1831 | fn char_indices(&self) -> CharIndices<'_> { |
1832 | CharIndices::new(self.as_bytes()) |
1833 | } |
1834 | |
1835 | /// Iterate over chunks of valid UTF-8. |
1836 | /// |
1837 | /// The iterator returned yields chunks of valid UTF-8 separated by invalid |
1838 | /// UTF-8 bytes, if they exist. Invalid UTF-8 bytes are always 1-3 bytes, |
1839 | /// which are determined via the "substitution of maximal subparts" |
1840 | /// strategy described in the docs for the |
1841 | /// [`ByteSlice::to_str_lossy`](trait.ByteSlice.html#method.to_str_lossy) |
1842 | /// method. |
1843 | /// |
1844 | /// # Examples |
1845 | /// |
1846 | /// This example shows how to gather all valid and invalid chunks from a |
1847 | /// byte slice: |
1848 | /// |
1849 | /// ``` |
1850 | /// use bstr::{ByteSlice, Utf8Chunk}; |
1851 | /// |
1852 | /// let bytes = b"foo \xFD\xFEbar \xFF" ; |
1853 | /// |
1854 | /// let (mut valid_chunks, mut invalid_chunks) = (vec![], vec![]); |
1855 | /// for chunk in bytes.utf8_chunks() { |
1856 | /// if !chunk.valid().is_empty() { |
1857 | /// valid_chunks.push(chunk.valid()); |
1858 | /// } |
1859 | /// if !chunk.invalid().is_empty() { |
1860 | /// invalid_chunks.push(chunk.invalid()); |
1861 | /// } |
1862 | /// } |
1863 | /// |
1864 | /// assert_eq!(valid_chunks, vec!["foo" , "bar" ]); |
1865 | /// assert_eq!(invalid_chunks, vec![b" \xFD" , b" \xFE" , b" \xFF" ]); |
1866 | /// ``` |
1867 | #[inline ] |
1868 | fn utf8_chunks(&self) -> Utf8Chunks<'_> { |
1869 | Utf8Chunks { bytes: self.as_bytes() } |
1870 | } |
1871 | |
1872 | /// Returns an iterator over the grapheme clusters in this byte string. |
1873 | /// If invalid UTF-8 is encountered, then the Unicode replacement codepoint |
1874 | /// is yielded instead. |
1875 | /// |
1876 | /// # Examples |
1877 | /// |
1878 | /// This example shows how multiple codepoints can combine to form a |
1879 | /// single grapheme cluster: |
1880 | /// |
1881 | /// ``` |
1882 | /// use bstr::ByteSlice; |
1883 | /// |
1884 | /// let bs = "a \u{0300}\u{0316}\u{1F1FA}\u{1F1F8}" .as_bytes(); |
1885 | /// let graphemes: Vec<&str> = bs.graphemes().collect(); |
1886 | /// assert_eq!(vec!["à̖" , "🇺🇸" ], graphemes); |
1887 | /// ``` |
1888 | /// |
1889 | /// This shows that graphemes can be iterated over in reverse: |
1890 | /// |
1891 | /// ``` |
1892 | /// use bstr::ByteSlice; |
1893 | /// |
1894 | /// let bs = "a \u{0300}\u{0316}\u{1F1FA}\u{1F1F8}" .as_bytes(); |
1895 | /// let graphemes: Vec<&str> = bs.graphemes().rev().collect(); |
1896 | /// assert_eq!(vec!["🇺🇸" , "à̖" ], graphemes); |
1897 | /// ``` |
1898 | #[cfg (feature = "unicode" )] |
1899 | #[inline ] |
1900 | fn graphemes(&self) -> Graphemes<'_> { |
1901 | Graphemes::new(self.as_bytes()) |
1902 | } |
1903 | |
1904 | /// Returns an iterator over the grapheme clusters in this byte string |
1905 | /// along with their starting and ending byte index positions. If invalid |
1906 | /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded |
1907 | /// instead. |
1908 | /// |
1909 | /// # Examples |
1910 | /// |
1911 | /// This example shows how to get the byte offsets of each individual |
1912 | /// grapheme cluster: |
1913 | /// |
1914 | /// ``` |
1915 | /// use bstr::ByteSlice; |
1916 | /// |
1917 | /// let bs = "a \u{0300}\u{0316}\u{1F1FA}\u{1F1F8}" .as_bytes(); |
1918 | /// let graphemes: Vec<(usize, usize, &str)> = |
1919 | /// bs.grapheme_indices().collect(); |
1920 | /// assert_eq!(vec![(0, 5, "à̖" ), (5, 13, "🇺🇸" )], graphemes); |
1921 | /// ``` |
1922 | /// |
1923 | /// This example shows what happens when invalid UTF-8 is encountered. Note |
1924 | /// that the offsets are valid indices into the original string, and do |
1925 | /// not necessarily correspond to the length of the `&str` returned! |
1926 | /// |
1927 | /// ``` |
1928 | /// # #[cfg (all(feature = "alloc" ))] { |
1929 | /// use bstr::{ByteSlice, ByteVec}; |
1930 | /// |
1931 | /// let mut bytes = vec![]; |
1932 | /// bytes.push_str("a \u{0300}\u{0316}" ); |
1933 | /// bytes.push(b' \xFF' ); |
1934 | /// bytes.push_str(" \u{1F1FA}\u{1F1F8}" ); |
1935 | /// |
1936 | /// let graphemes: Vec<(usize, usize, &str)> = |
1937 | /// bytes.grapheme_indices().collect(); |
1938 | /// assert_eq!( |
1939 | /// graphemes, |
1940 | /// vec![(0, 5, "à̖" ), (5, 6, " \u{FFFD}" ), (6, 14, "🇺🇸" )] |
1941 | /// ); |
1942 | /// # } |
1943 | /// ``` |
1944 | #[cfg (feature = "unicode" )] |
1945 | #[inline ] |
1946 | fn grapheme_indices(&self) -> GraphemeIndices<'_> { |
1947 | GraphemeIndices::new(self.as_bytes()) |
1948 | } |
1949 | |
1950 | /// Returns an iterator over the words in this byte string. If invalid |
1951 | /// UTF-8 is encountered, then the Unicode replacement codepoint is yielded |
1952 | /// instead. |
1953 | /// |
1954 | /// This is similar to |
1955 | /// [`words_with_breaks`](trait.ByteSlice.html#method.words_with_breaks), |
1956 | /// except it only returns elements that contain a "word" character. A word |
1957 | /// character is defined by UTS #18 (Annex C) to be the combination of the |
1958 | /// `Alphabetic` and `Join_Control` properties, along with the |
1959 | /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general |
1960 | /// categories. |
1961 | /// |
1962 | /// Since words are made up of one or more codepoints, this iterator |
1963 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
1964 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
1965 | /// |
1966 | /// # Examples |
1967 | /// |
1968 | /// Basic usage: |
1969 | /// |
1970 | /// ``` |
1971 | /// use bstr::ByteSlice; |
1972 | /// |
1973 | /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"# ; |
1974 | /// let words: Vec<&str> = bs.words().collect(); |
1975 | /// assert_eq!(words, vec![ |
1976 | /// "The" , "quick" , "brown" , "fox" , "can't" , |
1977 | /// "jump" , "32.3" , "feet" , "right" , |
1978 | /// ]); |
1979 | /// ``` |
1980 | #[cfg (feature = "unicode" )] |
1981 | #[inline ] |
1982 | fn words(&self) -> Words<'_> { |
1983 | Words::new(self.as_bytes()) |
1984 | } |
1985 | |
1986 | /// Returns an iterator over the words in this byte string along with |
1987 | /// their starting and ending byte index positions. |
1988 | /// |
1989 | /// This is similar to |
1990 | /// [`words_with_break_indices`](trait.ByteSlice.html#method.words_with_break_indices), |
1991 | /// except it only returns elements that contain a "word" character. A word |
1992 | /// character is defined by UTS #18 (Annex C) to be the combination of the |
1993 | /// `Alphabetic` and `Join_Control` properties, along with the |
1994 | /// `Decimal_Number`, `Mark` and `Connector_Punctuation` general |
1995 | /// categories. |
1996 | /// |
1997 | /// Since words are made up of one or more codepoints, this iterator |
1998 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
1999 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
2000 | /// |
2001 | /// # Examples |
2002 | /// |
2003 | /// This example shows how to get the byte offsets of each individual |
2004 | /// word: |
2005 | /// |
2006 | /// ``` |
2007 | /// use bstr::ByteSlice; |
2008 | /// |
2009 | /// let bs = b"can't jump 32.3 feet" ; |
2010 | /// let words: Vec<(usize, usize, &str)> = bs.word_indices().collect(); |
2011 | /// assert_eq!(words, vec![ |
2012 | /// (0, 5, "can't" ), |
2013 | /// (6, 10, "jump" ), |
2014 | /// (11, 15, "32.3" ), |
2015 | /// (16, 20, "feet" ), |
2016 | /// ]); |
2017 | /// ``` |
2018 | #[cfg (feature = "unicode" )] |
2019 | #[inline ] |
2020 | fn word_indices(&self) -> WordIndices<'_> { |
2021 | WordIndices::new(self.as_bytes()) |
2022 | } |
2023 | |
2024 | /// Returns an iterator over the words in this byte string, along with |
2025 | /// all breaks between the words. Concatenating all elements yielded by |
2026 | /// the iterator results in the original string (modulo Unicode replacement |
2027 | /// codepoint substitutions if invalid UTF-8 is encountered). |
2028 | /// |
2029 | /// Since words are made up of one or more codepoints, this iterator |
2030 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
2031 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
2032 | /// |
2033 | /// # Examples |
2034 | /// |
2035 | /// Basic usage: |
2036 | /// |
2037 | /// ``` |
2038 | /// use bstr::ByteSlice; |
2039 | /// |
2040 | /// let bs = br#"The quick ("brown") fox can't jump 32.3 feet, right?"# ; |
2041 | /// let words: Vec<&str> = bs.words_with_breaks().collect(); |
2042 | /// assert_eq!(words, vec![ |
2043 | /// "The" , " " , "quick" , " " , "(" , " \"" , "brown" , " \"" , ")" , |
2044 | /// " " , "fox" , " " , "can't" , " " , "jump" , " " , "32.3" , " " , "feet" , |
2045 | /// "," , " " , "right" , "?" , |
2046 | /// ]); |
2047 | /// ``` |
2048 | #[cfg (feature = "unicode" )] |
2049 | #[inline ] |
2050 | fn words_with_breaks(&self) -> WordsWithBreaks<'_> { |
2051 | WordsWithBreaks::new(self.as_bytes()) |
2052 | } |
2053 | |
2054 | /// Returns an iterator over the words and their byte offsets in this |
2055 | /// byte string, along with all breaks between the words. Concatenating |
2056 | /// all elements yielded by the iterator results in the original string |
2057 | /// (modulo Unicode replacement codepoint substitutions if invalid UTF-8 is |
2058 | /// encountered). |
2059 | /// |
2060 | /// Since words are made up of one or more codepoints, this iterator |
2061 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
2062 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
2063 | /// |
2064 | /// # Examples |
2065 | /// |
2066 | /// This example shows how to get the byte offsets of each individual |
2067 | /// word: |
2068 | /// |
2069 | /// ``` |
2070 | /// use bstr::ByteSlice; |
2071 | /// |
2072 | /// let bs = b"can't jump 32.3 feet" ; |
2073 | /// let words: Vec<(usize, usize, &str)> = |
2074 | /// bs.words_with_break_indices().collect(); |
2075 | /// assert_eq!(words, vec![ |
2076 | /// (0, 5, "can't" ), |
2077 | /// (5, 6, " " ), |
2078 | /// (6, 10, "jump" ), |
2079 | /// (10, 11, " " ), |
2080 | /// (11, 15, "32.3" ), |
2081 | /// (15, 16, " " ), |
2082 | /// (16, 20, "feet" ), |
2083 | /// ]); |
2084 | /// ``` |
2085 | #[cfg (feature = "unicode" )] |
2086 | #[inline ] |
2087 | fn words_with_break_indices(&self) -> WordsWithBreakIndices<'_> { |
2088 | WordsWithBreakIndices::new(self.as_bytes()) |
2089 | } |
2090 | |
2091 | /// Returns an iterator over the sentences in this byte string. |
2092 | /// |
2093 | /// Typically, a sentence will include its trailing punctuation and |
2094 | /// whitespace. Concatenating all elements yielded by the iterator |
2095 | /// results in the original string (modulo Unicode replacement codepoint |
2096 | /// substitutions if invalid UTF-8 is encountered). |
2097 | /// |
2098 | /// Since sentences are made up of one or more codepoints, this iterator |
2099 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
2100 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
2101 | /// |
2102 | /// # Examples |
2103 | /// |
2104 | /// Basic usage: |
2105 | /// |
2106 | /// ``` |
2107 | /// use bstr::ByteSlice; |
2108 | /// |
2109 | /// let bs = b"I want this. Not that. Right now." ; |
2110 | /// let sentences: Vec<&str> = bs.sentences().collect(); |
2111 | /// assert_eq!(sentences, vec![ |
2112 | /// "I want this. " , |
2113 | /// "Not that. " , |
2114 | /// "Right now." , |
2115 | /// ]); |
2116 | /// ``` |
2117 | #[cfg (feature = "unicode" )] |
2118 | #[inline ] |
2119 | fn sentences(&self) -> Sentences<'_> { |
2120 | Sentences::new(self.as_bytes()) |
2121 | } |
2122 | |
2123 | /// Returns an iterator over the sentences in this byte string along with |
2124 | /// their starting and ending byte index positions. |
2125 | /// |
2126 | /// Typically, a sentence will include its trailing punctuation and |
2127 | /// whitespace. Concatenating all elements yielded by the iterator |
2128 | /// results in the original string (modulo Unicode replacement codepoint |
2129 | /// substitutions if invalid UTF-8 is encountered). |
2130 | /// |
2131 | /// Since sentences are made up of one or more codepoints, this iterator |
2132 | /// yields `&str` elements. When invalid UTF-8 is encountered, replacement |
2133 | /// codepoints are [substituted](index.html#handling-of-invalid-utf-8). |
2134 | /// |
2135 | /// # Examples |
2136 | /// |
2137 | /// Basic usage: |
2138 | /// |
2139 | /// ``` |
2140 | /// use bstr::ByteSlice; |
2141 | /// |
2142 | /// let bs = b"I want this. Not that. Right now." ; |
2143 | /// let sentences: Vec<(usize, usize, &str)> = |
2144 | /// bs.sentence_indices().collect(); |
2145 | /// assert_eq!(sentences, vec![ |
2146 | /// (0, 13, "I want this. " ), |
2147 | /// (13, 23, "Not that. " ), |
2148 | /// (23, 33, "Right now." ), |
2149 | /// ]); |
2150 | /// ``` |
2151 | #[cfg (feature = "unicode" )] |
2152 | #[inline ] |
2153 | fn sentence_indices(&self) -> SentenceIndices<'_> { |
2154 | SentenceIndices::new(self.as_bytes()) |
2155 | } |
2156 | |
2157 | /// An iterator over all lines in a byte string, without their |
2158 | /// terminators. |
2159 | /// |
2160 | /// For this iterator, the only line terminators recognized are `\r\n` and |
2161 | /// `\n`. |
2162 | /// |
2163 | /// # Examples |
2164 | /// |
2165 | /// Basic usage: |
2166 | /// |
2167 | /// ``` |
2168 | /// use bstr::{B, ByteSlice}; |
2169 | /// |
2170 | /// let s = b"\ |
2171 | /// foo |
2172 | /// |
2173 | /// bar \r |
2174 | /// baz |
2175 | /// |
2176 | /// |
2177 | /// quux" ; |
2178 | /// let lines: Vec<&[u8]> = s.lines().collect(); |
2179 | /// assert_eq!(lines, vec![ |
2180 | /// B("foo" ), B("" ), B("bar" ), B("baz" ), B("" ), B("" ), B("quux" ), |
2181 | /// ]); |
2182 | /// ``` |
2183 | #[inline ] |
2184 | fn lines(&self) -> Lines<'_> { |
2185 | Lines::new(self.as_bytes()) |
2186 | } |
2187 | |
2188 | /// An iterator over all lines in a byte string, including their |
2189 | /// terminators. |
2190 | /// |
2191 | /// For this iterator, the only line terminator recognized is `\n`. (Since |
2192 | /// line terminators are included, this also handles `\r\n` line endings.) |
2193 | /// |
2194 | /// Line terminators are only included if they are present in the original |
2195 | /// byte string. For example, the last line in a byte string may not end |
2196 | /// with a line terminator. |
2197 | /// |
2198 | /// Concatenating all elements yielded by this iterator is guaranteed to |
2199 | /// yield the original byte string. |
2200 | /// |
2201 | /// # Examples |
2202 | /// |
2203 | /// Basic usage: |
2204 | /// |
2205 | /// ``` |
2206 | /// use bstr::{B, ByteSlice}; |
2207 | /// |
2208 | /// let s = b"\ |
2209 | /// foo |
2210 | /// |
2211 | /// bar \r |
2212 | /// baz |
2213 | /// |
2214 | /// |
2215 | /// quux" ; |
2216 | /// let lines: Vec<&[u8]> = s.lines_with_terminator().collect(); |
2217 | /// assert_eq!(lines, vec![ |
2218 | /// B("foo \n" ), |
2219 | /// B(" \n" ), |
2220 | /// B("bar \r\n" ), |
2221 | /// B("baz \n" ), |
2222 | /// B(" \n" ), |
2223 | /// B(" \n" ), |
2224 | /// B("quux" ), |
2225 | /// ]); |
2226 | /// ``` |
2227 | #[inline ] |
2228 | fn lines_with_terminator(&self) -> LinesWithTerminator<'_> { |
2229 | LinesWithTerminator::new(self.as_bytes()) |
2230 | } |
2231 | |
2232 | /// Return a byte string slice with leading and trailing whitespace |
2233 | /// removed. |
2234 | /// |
2235 | /// Whitespace is defined according to the terms of the `White_Space` |
2236 | /// Unicode property. |
2237 | /// |
2238 | /// # Examples |
2239 | /// |
2240 | /// Basic usage: |
2241 | /// |
2242 | /// ``` |
2243 | /// use bstr::{B, ByteSlice}; |
2244 | /// |
2245 | /// let s = B(" foo \tbar \t\u{2003}\n" ); |
2246 | /// assert_eq!(s.trim(), B("foo \tbar" )); |
2247 | /// ``` |
2248 | #[cfg (feature = "unicode" )] |
2249 | #[inline ] |
2250 | fn trim(&self) -> &[u8] { |
2251 | self.trim_start().trim_end() |
2252 | } |
2253 | |
2254 | /// Return a byte string slice with leading whitespace removed. |
2255 | /// |
2256 | /// Whitespace is defined according to the terms of the `White_Space` |
2257 | /// Unicode property. |
2258 | /// |
2259 | /// # Examples |
2260 | /// |
2261 | /// Basic usage: |
2262 | /// |
2263 | /// ``` |
2264 | /// use bstr::{B, ByteSlice}; |
2265 | /// |
2266 | /// let s = B(" foo \tbar \t\u{2003}\n" ); |
2267 | /// assert_eq!(s.trim_start(), B("foo \tbar \t\u{2003}\n" )); |
2268 | /// ``` |
2269 | #[cfg (feature = "unicode" )] |
2270 | #[inline ] |
2271 | fn trim_start(&self) -> &[u8] { |
2272 | let start = whitespace_len_fwd(self.as_bytes()); |
2273 | &self.as_bytes()[start..] |
2274 | } |
2275 | |
2276 | /// Return a byte string slice with trailing whitespace removed. |
2277 | /// |
2278 | /// Whitespace is defined according to the terms of the `White_Space` |
2279 | /// Unicode property. |
2280 | /// |
2281 | /// # Examples |
2282 | /// |
2283 | /// Basic usage: |
2284 | /// |
2285 | /// ``` |
2286 | /// use bstr::{B, ByteSlice}; |
2287 | /// |
2288 | /// let s = B(" foo \tbar \t\u{2003}\n" ); |
2289 | /// assert_eq!(s.trim_end(), B(" foo \tbar" )); |
2290 | /// ``` |
2291 | #[cfg (feature = "unicode" )] |
2292 | #[inline ] |
2293 | fn trim_end(&self) -> &[u8] { |
2294 | let end = whitespace_len_rev(self.as_bytes()); |
2295 | &self.as_bytes()[..end] |
2296 | } |
2297 | |
2298 | /// Return a byte string slice with leading and trailing characters |
2299 | /// satisfying the given predicate removed. |
2300 | /// |
2301 | /// # Examples |
2302 | /// |
2303 | /// Basic usage: |
2304 | /// |
2305 | /// ``` |
2306 | /// use bstr::{B, ByteSlice}; |
2307 | /// |
2308 | /// let s = b"123foo5bar789" ; |
2309 | /// assert_eq!(s.trim_with(|c| c.is_numeric()), B("foo5bar" )); |
2310 | /// ``` |
2311 | #[inline ] |
2312 | fn trim_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { |
2313 | self.trim_start_with(&mut trim).trim_end_with(&mut trim) |
2314 | } |
2315 | |
2316 | /// Return a byte string slice with leading characters satisfying the given |
2317 | /// predicate removed. |
2318 | /// |
2319 | /// # Examples |
2320 | /// |
2321 | /// Basic usage: |
2322 | /// |
2323 | /// ``` |
2324 | /// use bstr::{B, ByteSlice}; |
2325 | /// |
2326 | /// let s = b"123foo5bar789" ; |
2327 | /// assert_eq!(s.trim_start_with(|c| c.is_numeric()), B("foo5bar789" )); |
2328 | /// ``` |
2329 | #[inline ] |
2330 | fn trim_start_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { |
2331 | for (s, _, ch) in self.char_indices() { |
2332 | if !trim(ch) { |
2333 | return &self.as_bytes()[s..]; |
2334 | } |
2335 | } |
2336 | b"" |
2337 | } |
2338 | |
2339 | /// Return a byte string slice with trailing characters satisfying the |
2340 | /// given predicate removed. |
2341 | /// |
2342 | /// # Examples |
2343 | /// |
2344 | /// Basic usage: |
2345 | /// |
2346 | /// ``` |
2347 | /// use bstr::{B, ByteSlice}; |
2348 | /// |
2349 | /// let s = b"123foo5bar789" ; |
2350 | /// assert_eq!(s.trim_end_with(|c| c.is_numeric()), B("123foo5bar" )); |
2351 | /// ``` |
2352 | #[inline ] |
2353 | fn trim_end_with<F: FnMut(char) -> bool>(&self, mut trim: F) -> &[u8] { |
2354 | for (_, e, ch) in self.char_indices().rev() { |
2355 | if !trim(ch) { |
2356 | return &self.as_bytes()[..e]; |
2357 | } |
2358 | } |
2359 | b"" |
2360 | } |
2361 | |
2362 | /// Returns a new `Vec<u8>` containing the lowercase equivalent of this |
2363 | /// byte string. |
2364 | /// |
2365 | /// In this case, lowercase is defined according to the `Lowercase` Unicode |
2366 | /// property. |
2367 | /// |
2368 | /// If invalid UTF-8 is seen, or if a character has no lowercase variant, |
2369 | /// then it is written to the given buffer unchanged. |
2370 | /// |
2371 | /// Note that some characters in this byte string may expand into multiple |
2372 | /// characters when changing the case, so the number of bytes written to |
2373 | /// the given byte string may not be equivalent to the number of bytes in |
2374 | /// this byte string. |
2375 | /// |
2376 | /// If you'd like to reuse an allocation for performance reasons, then use |
2377 | /// [`to_lowercase_into`](#method.to_lowercase_into) instead. |
2378 | /// |
2379 | /// # Examples |
2380 | /// |
2381 | /// Basic usage: |
2382 | /// |
2383 | /// ``` |
2384 | /// use bstr::{B, ByteSlice}; |
2385 | /// |
2386 | /// let s = B("HELLO Β" ); |
2387 | /// assert_eq!("hello β" .as_bytes(), s.to_lowercase().as_bytes()); |
2388 | /// ``` |
2389 | /// |
2390 | /// Scripts without case are not changed: |
2391 | /// |
2392 | /// ``` |
2393 | /// use bstr::{B, ByteSlice}; |
2394 | /// |
2395 | /// let s = B("农历新年" ); |
2396 | /// assert_eq!("农历新年" .as_bytes(), s.to_lowercase().as_bytes()); |
2397 | /// ``` |
2398 | /// |
2399 | /// Invalid UTF-8 remains as is: |
2400 | /// |
2401 | /// ``` |
2402 | /// use bstr::{B, ByteSlice}; |
2403 | /// |
2404 | /// let s = B(b"FOO \xFFBAR \xE2\x98BAZ" ); |
2405 | /// assert_eq!(B(b"foo \xFFbar \xE2\x98baz" ), s.to_lowercase().as_bytes()); |
2406 | /// ``` |
2407 | #[cfg (all(feature = "alloc" , feature = "unicode" ))] |
2408 | #[inline ] |
2409 | fn to_lowercase(&self) -> Vec<u8> { |
2410 | let mut buf = vec![]; |
2411 | self.to_lowercase_into(&mut buf); |
2412 | buf |
2413 | } |
2414 | |
2415 | /// Writes the lowercase equivalent of this byte string into the given |
2416 | /// buffer. The buffer is not cleared before written to. |
2417 | /// |
2418 | /// In this case, lowercase is defined according to the `Lowercase` |
2419 | /// Unicode property. |
2420 | /// |
2421 | /// If invalid UTF-8 is seen, or if a character has no lowercase variant, |
2422 | /// then it is written to the given buffer unchanged. |
2423 | /// |
2424 | /// Note that some characters in this byte string may expand into multiple |
2425 | /// characters when changing the case, so the number of bytes written to |
2426 | /// the given byte string may not be equivalent to the number of bytes in |
2427 | /// this byte string. |
2428 | /// |
2429 | /// If you don't need to amortize allocation and instead prefer |
2430 | /// convenience, then use [`to_lowercase`](#method.to_lowercase) instead. |
2431 | /// |
2432 | /// # Examples |
2433 | /// |
2434 | /// Basic usage: |
2435 | /// |
2436 | /// ``` |
2437 | /// use bstr::{B, ByteSlice}; |
2438 | /// |
2439 | /// let s = B("HELLO Β" ); |
2440 | /// |
2441 | /// let mut buf = vec![]; |
2442 | /// s.to_lowercase_into(&mut buf); |
2443 | /// assert_eq!("hello β" .as_bytes(), buf.as_bytes()); |
2444 | /// ``` |
2445 | /// |
2446 | /// Scripts without case are not changed: |
2447 | /// |
2448 | /// ``` |
2449 | /// use bstr::{B, ByteSlice}; |
2450 | /// |
2451 | /// let s = B("农历新年" ); |
2452 | /// |
2453 | /// let mut buf = vec![]; |
2454 | /// s.to_lowercase_into(&mut buf); |
2455 | /// assert_eq!("农历新年" .as_bytes(), buf.as_bytes()); |
2456 | /// ``` |
2457 | /// |
2458 | /// Invalid UTF-8 remains as is: |
2459 | /// |
2460 | /// ``` |
2461 | /// use bstr::{B, ByteSlice}; |
2462 | /// |
2463 | /// let s = B(b"FOO \xFFBAR \xE2\x98BAZ" ); |
2464 | /// |
2465 | /// let mut buf = vec![]; |
2466 | /// s.to_lowercase_into(&mut buf); |
2467 | /// assert_eq!(B(b"foo \xFFbar \xE2\x98baz" ), buf.as_bytes()); |
2468 | /// ``` |
2469 | #[cfg (all(feature = "alloc" , feature = "unicode" ))] |
2470 | #[inline ] |
2471 | fn to_lowercase_into(&self, buf: &mut Vec<u8>) { |
2472 | // TODO: This is the best we can do given what std exposes I think. |
2473 | // If we roll our own case handling, then we might be able to do this |
2474 | // a bit faster. We shouldn't roll our own case handling unless we |
2475 | // need to, e.g., for doing caseless matching or case folding. |
2476 | |
2477 | // TODO(BUG): This doesn't handle any special casing rules. |
2478 | |
2479 | buf.reserve(self.as_bytes().len()); |
2480 | for (s, e, ch) in self.char_indices() { |
2481 | if ch == ' \u{FFFD}' { |
2482 | buf.push_str(&self.as_bytes()[s..e]); |
2483 | } else if ch.is_ascii() { |
2484 | buf.push_char(ch.to_ascii_lowercase()); |
2485 | } else { |
2486 | for upper in ch.to_lowercase() { |
2487 | buf.push_char(upper); |
2488 | } |
2489 | } |
2490 | } |
2491 | } |
2492 | |
2493 | /// Returns a new `Vec<u8>` containing the ASCII lowercase equivalent of |
2494 | /// this byte string. |
2495 | /// |
2496 | /// In this case, lowercase is only defined in ASCII letters. Namely, the |
2497 | /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged. |
2498 | /// In particular, the length of the byte string returned is always |
2499 | /// equivalent to the length of this byte string. |
2500 | /// |
2501 | /// If you'd like to reuse an allocation for performance reasons, then use |
2502 | /// [`make_ascii_lowercase`](#method.make_ascii_lowercase) to perform |
2503 | /// the conversion in place. |
2504 | /// |
2505 | /// # Examples |
2506 | /// |
2507 | /// Basic usage: |
2508 | /// |
2509 | /// ``` |
2510 | /// use bstr::{B, ByteSlice}; |
2511 | /// |
2512 | /// let s = B("HELLO Β" ); |
2513 | /// assert_eq!("hello Β" .as_bytes(), s.to_ascii_lowercase().as_bytes()); |
2514 | /// ``` |
2515 | /// |
2516 | /// Invalid UTF-8 remains as is: |
2517 | /// |
2518 | /// ``` |
2519 | /// use bstr::{B, ByteSlice}; |
2520 | /// |
2521 | /// let s = B(b"FOO \xFFBAR \xE2\x98BAZ" ); |
2522 | /// assert_eq!(s.to_ascii_lowercase(), B(b"foo \xFFbar \xE2\x98baz" )); |
2523 | /// ``` |
2524 | #[cfg (feature = "alloc" )] |
2525 | #[inline ] |
2526 | fn to_ascii_lowercase(&self) -> Vec<u8> { |
2527 | self.as_bytes().to_ascii_lowercase() |
2528 | } |
2529 | |
2530 | /// Convert this byte string to its lowercase ASCII equivalent in place. |
2531 | /// |
2532 | /// In this case, lowercase is only defined in ASCII letters. Namely, the |
2533 | /// letters `A-Z` are converted to `a-z`. All other bytes remain unchanged. |
2534 | /// |
2535 | /// If you don't need to do the conversion in |
2536 | /// place and instead prefer convenience, then use |
2537 | /// [`to_ascii_lowercase`](#method.to_ascii_lowercase) instead. |
2538 | /// |
2539 | /// # Examples |
2540 | /// |
2541 | /// Basic usage: |
2542 | /// |
2543 | /// ``` |
2544 | /// use bstr::ByteSlice; |
2545 | /// |
2546 | /// let mut s = <Vec<u8>>::from("HELLO Β" ); |
2547 | /// s.make_ascii_lowercase(); |
2548 | /// assert_eq!(s, "hello Β" .as_bytes()); |
2549 | /// ``` |
2550 | /// |
2551 | /// Invalid UTF-8 remains as is: |
2552 | /// |
2553 | /// ``` |
2554 | /// # #[cfg (feature = "alloc" )] { |
2555 | /// use bstr::{B, ByteSlice, ByteVec}; |
2556 | /// |
2557 | /// let mut s = <Vec<u8>>::from_slice(b"FOO \xFFBAR \xE2\x98BAZ" ); |
2558 | /// s.make_ascii_lowercase(); |
2559 | /// assert_eq!(s, B(b"foo \xFFbar \xE2\x98baz" )); |
2560 | /// # } |
2561 | /// ``` |
2562 | #[inline ] |
2563 | fn make_ascii_lowercase(&mut self) { |
2564 | self.as_bytes_mut().make_ascii_lowercase(); |
2565 | } |
2566 | |
2567 | /// Returns a new `Vec<u8>` containing the uppercase equivalent of this |
2568 | /// byte string. |
2569 | /// |
2570 | /// In this case, uppercase is defined according to the `Uppercase` |
2571 | /// Unicode property. |
2572 | /// |
2573 | /// If invalid UTF-8 is seen, or if a character has no uppercase variant, |
2574 | /// then it is written to the given buffer unchanged. |
2575 | /// |
2576 | /// Note that some characters in this byte string may expand into multiple |
2577 | /// characters when changing the case, so the number of bytes written to |
2578 | /// the given byte string may not be equivalent to the number of bytes in |
2579 | /// this byte string. |
2580 | /// |
2581 | /// If you'd like to reuse an allocation for performance reasons, then use |
2582 | /// [`to_uppercase_into`](#method.to_uppercase_into) instead. |
2583 | /// |
2584 | /// # Examples |
2585 | /// |
2586 | /// Basic usage: |
2587 | /// |
2588 | /// ``` |
2589 | /// use bstr::{B, ByteSlice}; |
2590 | /// |
2591 | /// let s = B("hello β" ); |
2592 | /// assert_eq!(s.to_uppercase(), B("HELLO Β" )); |
2593 | /// ``` |
2594 | /// |
2595 | /// Scripts without case are not changed: |
2596 | /// |
2597 | /// ``` |
2598 | /// use bstr::{B, ByteSlice}; |
2599 | /// |
2600 | /// let s = B("农历新年" ); |
2601 | /// assert_eq!(s.to_uppercase(), B("农历新年" )); |
2602 | /// ``` |
2603 | /// |
2604 | /// Invalid UTF-8 remains as is: |
2605 | /// |
2606 | /// ``` |
2607 | /// use bstr::{B, ByteSlice}; |
2608 | /// |
2609 | /// let s = B(b"foo \xFFbar \xE2\x98baz" ); |
2610 | /// assert_eq!(s.to_uppercase(), B(b"FOO \xFFBAR \xE2\x98BAZ" )); |
2611 | /// ``` |
2612 | #[cfg (all(feature = "alloc" , feature = "unicode" ))] |
2613 | #[inline ] |
2614 | fn to_uppercase(&self) -> Vec<u8> { |
2615 | let mut buf = vec![]; |
2616 | self.to_uppercase_into(&mut buf); |
2617 | buf |
2618 | } |
2619 | |
2620 | /// Writes the uppercase equivalent of this byte string into the given |
2621 | /// buffer. The buffer is not cleared before written to. |
2622 | /// |
2623 | /// In this case, uppercase is defined according to the `Uppercase` |
2624 | /// Unicode property. |
2625 | /// |
2626 | /// If invalid UTF-8 is seen, or if a character has no uppercase variant, |
2627 | /// then it is written to the given buffer unchanged. |
2628 | /// |
2629 | /// Note that some characters in this byte string may expand into multiple |
2630 | /// characters when changing the case, so the number of bytes written to |
2631 | /// the given byte string may not be equivalent to the number of bytes in |
2632 | /// this byte string. |
2633 | /// |
2634 | /// If you don't need to amortize allocation and instead prefer |
2635 | /// convenience, then use [`to_uppercase`](#method.to_uppercase) instead. |
2636 | /// |
2637 | /// # Examples |
2638 | /// |
2639 | /// Basic usage: |
2640 | /// |
2641 | /// ``` |
2642 | /// use bstr::{B, ByteSlice}; |
2643 | /// |
2644 | /// let s = B("hello β" ); |
2645 | /// |
2646 | /// let mut buf = vec![]; |
2647 | /// s.to_uppercase_into(&mut buf); |
2648 | /// assert_eq!(buf, B("HELLO Β" )); |
2649 | /// ``` |
2650 | /// |
2651 | /// Scripts without case are not changed: |
2652 | /// |
2653 | /// ``` |
2654 | /// use bstr::{B, ByteSlice}; |
2655 | /// |
2656 | /// let s = B("农历新年" ); |
2657 | /// |
2658 | /// let mut buf = vec![]; |
2659 | /// s.to_uppercase_into(&mut buf); |
2660 | /// assert_eq!(buf, B("农历新年" )); |
2661 | /// ``` |
2662 | /// |
2663 | /// Invalid UTF-8 remains as is: |
2664 | /// |
2665 | /// ``` |
2666 | /// use bstr::{B, ByteSlice}; |
2667 | /// |
2668 | /// let s = B(b"foo \xFFbar \xE2\x98baz" ); |
2669 | /// |
2670 | /// let mut buf = vec![]; |
2671 | /// s.to_uppercase_into(&mut buf); |
2672 | /// assert_eq!(buf, B(b"FOO \xFFBAR \xE2\x98BAZ" )); |
2673 | /// ``` |
2674 | #[cfg (all(feature = "alloc" , feature = "unicode" ))] |
2675 | #[inline ] |
2676 | fn to_uppercase_into(&self, buf: &mut Vec<u8>) { |
2677 | // TODO: This is the best we can do given what std exposes I think. |
2678 | // If we roll our own case handling, then we might be able to do this |
2679 | // a bit faster. We shouldn't roll our own case handling unless we |
2680 | // need to, e.g., for doing caseless matching or case folding. |
2681 | buf.reserve(self.as_bytes().len()); |
2682 | for (s, e, ch) in self.char_indices() { |
2683 | if ch == ' \u{FFFD}' { |
2684 | buf.push_str(&self.as_bytes()[s..e]); |
2685 | } else if ch.is_ascii() { |
2686 | buf.push_char(ch.to_ascii_uppercase()); |
2687 | } else { |
2688 | for upper in ch.to_uppercase() { |
2689 | buf.push_char(upper); |
2690 | } |
2691 | } |
2692 | } |
2693 | } |
2694 | |
2695 | /// Returns a new `Vec<u8>` containing the ASCII uppercase equivalent of |
2696 | /// this byte string. |
2697 | /// |
2698 | /// In this case, uppercase is only defined in ASCII letters. Namely, the |
2699 | /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged. |
2700 | /// In particular, the length of the byte string returned is always |
2701 | /// equivalent to the length of this byte string. |
2702 | /// |
2703 | /// If you'd like to reuse an allocation for performance reasons, then use |
2704 | /// [`make_ascii_uppercase`](#method.make_ascii_uppercase) to perform |
2705 | /// the conversion in place. |
2706 | /// |
2707 | /// # Examples |
2708 | /// |
2709 | /// Basic usage: |
2710 | /// |
2711 | /// ``` |
2712 | /// use bstr::{B, ByteSlice}; |
2713 | /// |
2714 | /// let s = B("hello β" ); |
2715 | /// assert_eq!(s.to_ascii_uppercase(), B("HELLO β" )); |
2716 | /// ``` |
2717 | /// |
2718 | /// Invalid UTF-8 remains as is: |
2719 | /// |
2720 | /// ``` |
2721 | /// use bstr::{B, ByteSlice}; |
2722 | /// |
2723 | /// let s = B(b"foo \xFFbar \xE2\x98baz" ); |
2724 | /// assert_eq!(s.to_ascii_uppercase(), B(b"FOO \xFFBAR \xE2\x98BAZ" )); |
2725 | /// ``` |
2726 | #[cfg (feature = "alloc" )] |
2727 | #[inline ] |
2728 | fn to_ascii_uppercase(&self) -> Vec<u8> { |
2729 | self.as_bytes().to_ascii_uppercase() |
2730 | } |
2731 | |
2732 | /// Convert this byte string to its uppercase ASCII equivalent in place. |
2733 | /// |
2734 | /// In this case, uppercase is only defined in ASCII letters. Namely, the |
2735 | /// letters `a-z` are converted to `A-Z`. All other bytes remain unchanged. |
2736 | /// |
2737 | /// If you don't need to do the conversion in |
2738 | /// place and instead prefer convenience, then use |
2739 | /// [`to_ascii_uppercase`](#method.to_ascii_uppercase) instead. |
2740 | /// |
2741 | /// # Examples |
2742 | /// |
2743 | /// Basic usage: |
2744 | /// |
2745 | /// ``` |
2746 | /// use bstr::{B, ByteSlice}; |
2747 | /// |
2748 | /// let mut s = <Vec<u8>>::from("hello β" ); |
2749 | /// s.make_ascii_uppercase(); |
2750 | /// assert_eq!(s, B("HELLO β" )); |
2751 | /// ``` |
2752 | /// |
2753 | /// Invalid UTF-8 remains as is: |
2754 | /// |
2755 | /// ``` |
2756 | /// # #[cfg (feature = "alloc" )] { |
2757 | /// use bstr::{B, ByteSlice, ByteVec}; |
2758 | /// |
2759 | /// let mut s = <Vec<u8>>::from_slice(b"foo \xFFbar \xE2\x98baz" ); |
2760 | /// s.make_ascii_uppercase(); |
2761 | /// assert_eq!(s, B(b"FOO \xFFBAR \xE2\x98BAZ" )); |
2762 | /// # } |
2763 | /// ``` |
2764 | #[inline ] |
2765 | fn make_ascii_uppercase(&mut self) { |
2766 | self.as_bytes_mut().make_ascii_uppercase(); |
2767 | } |
2768 | |
2769 | /// Escapes this byte string into a sequence of `char` values. |
2770 | /// |
2771 | /// When the sequence of `char` values is concatenated into a string, the |
2772 | /// result is always valid UTF-8. Any unprintable or invalid UTF-8 in this |
2773 | /// byte string are escaped using using `\xNN` notation. Moreover, the |
2774 | /// characters `\0`, `\r`, `\n`, `\t` and `\` are escaped as well. |
2775 | /// |
2776 | /// This is useful when one wants to get a human readable view of the raw |
2777 | /// bytes that is also valid UTF-8. |
2778 | /// |
2779 | /// The iterator returned implements the `Display` trait. So one can do |
2780 | /// `b"foo\xFFbar".escape_bytes().to_string()` to get a `String` with its |
2781 | /// bytes escaped. |
2782 | /// |
2783 | /// The dual of this function is [`ByteVec::unescape_bytes`]. |
2784 | /// |
2785 | /// Note that this is similar to, but not equivalent to the `Debug` |
2786 | /// implementation on [`BStr`] and [`BString`]. The `Debug` implementations |
2787 | /// also use the debug representation for all Unicode codepoints. However, |
2788 | /// this escaping routine only escapes individual bytes. All Unicode |
2789 | /// codepoints above `U+007F` are passed through unchanged without any |
2790 | /// escaping. |
2791 | /// |
2792 | /// # Examples |
2793 | /// |
2794 | /// ``` |
2795 | /// # #[cfg (feature = "alloc" )] { |
2796 | /// use bstr::{B, ByteSlice}; |
2797 | /// |
2798 | /// assert_eq!(r"foo\xFFbar" , b"foo \xFFbar" .escape_bytes().to_string()); |
2799 | /// assert_eq!(r"foo\nbar" , b"foo \nbar" .escape_bytes().to_string()); |
2800 | /// assert_eq!(r"foo\tbar" , b"foo \tbar" .escape_bytes().to_string()); |
2801 | /// assert_eq!(r"foo\\bar" , b"foo \\bar" .escape_bytes().to_string()); |
2802 | /// assert_eq!(r"foo☃bar" , B("foo☃bar" ).escape_bytes().to_string()); |
2803 | /// # } |
2804 | /// ``` |
2805 | #[inline ] |
2806 | fn escape_bytes(&self) -> EscapeBytes<'_> { |
2807 | EscapeBytes::new(self.as_bytes()) |
2808 | } |
2809 | |
2810 | /// Reverse the bytes in this string, in place. |
2811 | /// |
2812 | /// This is not necessarily a well formed operation! For example, if this |
2813 | /// byte string contains valid UTF-8 that isn't ASCII, then reversing the |
2814 | /// string will likely result in invalid UTF-8 and otherwise non-sensical |
2815 | /// content. |
2816 | /// |
2817 | /// Note that this is equivalent to the generic `[u8]::reverse` method. |
2818 | /// This method is provided to permit callers to explicitly differentiate |
2819 | /// between reversing bytes, codepoints and graphemes. |
2820 | /// |
2821 | /// # Examples |
2822 | /// |
2823 | /// Basic usage: |
2824 | /// |
2825 | /// ``` |
2826 | /// use bstr::ByteSlice; |
2827 | /// |
2828 | /// let mut s = <Vec<u8>>::from("hello" ); |
2829 | /// s.reverse_bytes(); |
2830 | /// assert_eq!(s, "olleh" .as_bytes()); |
2831 | /// ``` |
2832 | #[inline ] |
2833 | fn reverse_bytes(&mut self) { |
2834 | self.as_bytes_mut().reverse(); |
2835 | } |
2836 | |
2837 | /// Reverse the codepoints in this string, in place. |
2838 | /// |
2839 | /// If this byte string is valid UTF-8, then its reversal by codepoint |
2840 | /// is also guaranteed to be valid UTF-8. |
2841 | /// |
2842 | /// This operation is equivalent to the following, but without allocating: |
2843 | /// |
2844 | /// ``` |
2845 | /// use bstr::ByteSlice; |
2846 | /// |
2847 | /// let mut s = <Vec<u8>>::from("foo☃bar" ); |
2848 | /// |
2849 | /// let mut chars: Vec<char> = s.chars().collect(); |
2850 | /// chars.reverse(); |
2851 | /// |
2852 | /// let reversed: String = chars.into_iter().collect(); |
2853 | /// assert_eq!(reversed, "rab☃oof" ); |
2854 | /// ``` |
2855 | /// |
2856 | /// Note that this is not necessarily a well formed operation. For example, |
2857 | /// if this byte string contains grapheme clusters with more than one |
2858 | /// codepoint, then those grapheme clusters will not necessarily be |
2859 | /// preserved. If you'd like to preserve grapheme clusters, then use |
2860 | /// [`reverse_graphemes`](#method.reverse_graphemes) instead. |
2861 | /// |
2862 | /// # Examples |
2863 | /// |
2864 | /// Basic usage: |
2865 | /// |
2866 | /// ``` |
2867 | /// use bstr::ByteSlice; |
2868 | /// |
2869 | /// let mut s = <Vec<u8>>::from("foo☃bar" ); |
2870 | /// s.reverse_chars(); |
2871 | /// assert_eq!(s, "rab☃oof" .as_bytes()); |
2872 | /// ``` |
2873 | /// |
2874 | /// This example shows that not all reversals lead to a well formed string. |
2875 | /// For example, in this case, combining marks are used to put accents over |
2876 | /// some letters, and those accent marks must appear after the codepoints |
2877 | /// they modify. |
2878 | /// |
2879 | /// ``` |
2880 | /// use bstr::{B, ByteSlice}; |
2881 | /// |
2882 | /// let mut s = <Vec<u8>>::from("résumé" ); |
2883 | /// s.reverse_chars(); |
2884 | /// assert_eq!(s, B(b" \xCC\x81emus \xCC\x81er" )); |
2885 | /// ``` |
2886 | /// |
2887 | /// A word of warning: the above example relies on the fact that |
2888 | /// `résumé` is in decomposed normal form, which means there are separate |
2889 | /// codepoints for the accents above `e`. If it is instead in composed |
2890 | /// normal form, then the example works: |
2891 | /// |
2892 | /// ``` |
2893 | /// use bstr::{B, ByteSlice}; |
2894 | /// |
2895 | /// let mut s = <Vec<u8>>::from("résumé" ); |
2896 | /// s.reverse_chars(); |
2897 | /// assert_eq!(s, B("émusér" )); |
2898 | /// ``` |
2899 | /// |
2900 | /// The point here is to be cautious and not assume that just because |
2901 | /// `reverse_chars` works in one case, that it therefore works in all |
2902 | /// cases. |
2903 | #[inline ] |
2904 | fn reverse_chars(&mut self) { |
2905 | let mut i = 0; |
2906 | loop { |
2907 | let (_, size) = utf8::decode(&self.as_bytes()[i..]); |
2908 | if size == 0 { |
2909 | break; |
2910 | } |
2911 | if size > 1 { |
2912 | self.as_bytes_mut()[i..i + size].reverse_bytes(); |
2913 | } |
2914 | i += size; |
2915 | } |
2916 | self.reverse_bytes(); |
2917 | } |
2918 | |
2919 | /// Reverse the graphemes in this string, in place. |
2920 | /// |
2921 | /// If this byte string is valid UTF-8, then its reversal by grapheme |
2922 | /// is also guaranteed to be valid UTF-8. |
2923 | /// |
2924 | /// This operation is equivalent to the following, but without allocating: |
2925 | /// |
2926 | /// ``` |
2927 | /// use bstr::ByteSlice; |
2928 | /// |
2929 | /// let mut s = <Vec<u8>>::from("foo☃bar" ); |
2930 | /// |
2931 | /// let mut graphemes: Vec<&str> = s.graphemes().collect(); |
2932 | /// graphemes.reverse(); |
2933 | /// |
2934 | /// let reversed = graphemes.concat(); |
2935 | /// assert_eq!(reversed, "rab☃oof" ); |
2936 | /// ``` |
2937 | /// |
2938 | /// # Examples |
2939 | /// |
2940 | /// Basic usage: |
2941 | /// |
2942 | /// ``` |
2943 | /// use bstr::ByteSlice; |
2944 | /// |
2945 | /// let mut s = <Vec<u8>>::from("foo☃bar" ); |
2946 | /// s.reverse_graphemes(); |
2947 | /// assert_eq!(s, "rab☃oof" .as_bytes()); |
2948 | /// ``` |
2949 | /// |
2950 | /// This example shows how this correctly handles grapheme clusters, |
2951 | /// unlike `reverse_chars`. |
2952 | /// |
2953 | /// ``` |
2954 | /// use bstr::ByteSlice; |
2955 | /// |
2956 | /// let mut s = <Vec<u8>>::from("résumé" ); |
2957 | /// s.reverse_graphemes(); |
2958 | /// assert_eq!(s, "émusér" .as_bytes()); |
2959 | /// ``` |
2960 | #[cfg (feature = "unicode" )] |
2961 | #[inline ] |
2962 | fn reverse_graphemes(&mut self) { |
2963 | use crate::unicode::decode_grapheme; |
2964 | |
2965 | let mut i = 0; |
2966 | loop { |
2967 | let (_, size) = decode_grapheme(&self.as_bytes()[i..]); |
2968 | if size == 0 { |
2969 | break; |
2970 | } |
2971 | if size > 1 { |
2972 | self.as_bytes_mut()[i..i + size].reverse_bytes(); |
2973 | } |
2974 | i += size; |
2975 | } |
2976 | self.reverse_bytes(); |
2977 | } |
2978 | |
2979 | /// Returns true if and only if every byte in this byte string is ASCII. |
2980 | /// |
2981 | /// ASCII is an encoding that defines 128 codepoints. A byte corresponds to |
2982 | /// an ASCII codepoint if and only if it is in the inclusive range |
2983 | /// `[0, 127]`. |
2984 | /// |
2985 | /// # Examples |
2986 | /// |
2987 | /// Basic usage: |
2988 | /// |
2989 | /// ``` |
2990 | /// use bstr::{B, ByteSlice}; |
2991 | /// |
2992 | /// assert!(B("abc" ).is_ascii()); |
2993 | /// assert!(!B("☃βツ" ).is_ascii()); |
2994 | /// assert!(!B(b" \xFF" ).is_ascii()); |
2995 | /// ``` |
2996 | #[inline ] |
2997 | fn is_ascii(&self) -> bool { |
2998 | ascii::first_non_ascii_byte(self.as_bytes()) == self.as_bytes().len() |
2999 | } |
3000 | |
3001 | /// Returns true if and only if the entire byte string is valid UTF-8. |
3002 | /// |
3003 | /// If you need location information about where a byte string's first |
3004 | /// invalid UTF-8 byte is, then use the [`to_str`](#method.to_str) method. |
3005 | /// |
3006 | /// # Examples |
3007 | /// |
3008 | /// Basic usage: |
3009 | /// |
3010 | /// ``` |
3011 | /// use bstr::{B, ByteSlice}; |
3012 | /// |
3013 | /// assert!(B("abc" ).is_utf8()); |
3014 | /// assert!(B("☃βツ" ).is_utf8()); |
3015 | /// // invalid bytes |
3016 | /// assert!(!B(b"abc \xFF" ).is_utf8()); |
3017 | /// // surrogate encoding |
3018 | /// assert!(!B(b" \xED\xA0\x80" ).is_utf8()); |
3019 | /// // incomplete sequence |
3020 | /// assert!(!B(b" \xF0\x9D\x9Ca" ).is_utf8()); |
3021 | /// // overlong sequence |
3022 | /// assert!(!B(b" \xF0\x82\x82\xAC" ).is_utf8()); |
3023 | /// ``` |
3024 | #[inline ] |
3025 | fn is_utf8(&self) -> bool { |
3026 | utf8::validate(self.as_bytes()).is_ok() |
3027 | } |
3028 | |
3029 | /// Returns the last byte in this byte string, if it's non-empty. If this |
3030 | /// byte string is empty, this returns `None`. |
3031 | /// |
3032 | /// Note that this is like the generic `[u8]::last`, except this returns |
3033 | /// the byte by value instead of a reference to the byte. |
3034 | /// |
3035 | /// # Examples |
3036 | /// |
3037 | /// Basic usage: |
3038 | /// |
3039 | /// ``` |
3040 | /// use bstr::ByteSlice; |
3041 | /// |
3042 | /// assert_eq!(Some(b'z' ), b"baz" .last_byte()); |
3043 | /// assert_eq!(None, b"" .last_byte()); |
3044 | /// ``` |
3045 | #[inline ] |
3046 | fn last_byte(&self) -> Option<u8> { |
3047 | let bytes = self.as_bytes(); |
3048 | bytes.get(bytes.len().saturating_sub(1)).map(|&b| b) |
3049 | } |
3050 | |
3051 | /// Returns the index of the first non-ASCII byte in this byte string (if |
3052 | /// any such indices exist). Specifically, it returns the index of the |
3053 | /// first byte with a value greater than or equal to `0x80`. |
3054 | /// |
3055 | /// # Examples |
3056 | /// |
3057 | /// Basic usage: |
3058 | /// |
3059 | /// ``` |
3060 | /// use bstr::{ByteSlice, B}; |
3061 | /// |
3062 | /// assert_eq!(Some(3), b"abc \xff" .find_non_ascii_byte()); |
3063 | /// assert_eq!(None, b"abcde" .find_non_ascii_byte()); |
3064 | /// assert_eq!(Some(0), B("😀" ).find_non_ascii_byte()); |
3065 | /// ``` |
3066 | #[inline ] |
3067 | fn find_non_ascii_byte(&self) -> Option<usize> { |
3068 | let index = ascii::first_non_ascii_byte(self.as_bytes()); |
3069 | if index == self.as_bytes().len() { |
3070 | None |
3071 | } else { |
3072 | Some(index) |
3073 | } |
3074 | } |
3075 | } |
3076 | |
3077 | /// A single substring searcher fixed to a particular needle. |
3078 | /// |
3079 | /// The purpose of this type is to permit callers to construct a substring |
3080 | /// searcher that can be used to search haystacks without the overhead of |
3081 | /// constructing the searcher in the first place. This is a somewhat niche |
3082 | /// concern when it's necessary to re-use the same needle to search multiple |
3083 | /// different haystacks with as little overhead as possible. In general, using |
3084 | /// [`ByteSlice::find`](trait.ByteSlice.html#method.find) |
3085 | /// or |
3086 | /// [`ByteSlice::find_iter`](trait.ByteSlice.html#method.find_iter) |
3087 | /// is good enough, but `Finder` is useful when you can meaningfully observe |
3088 | /// searcher construction time in a profile. |
3089 | /// |
3090 | /// When the `std` feature is enabled, then this type has an `into_owned` |
3091 | /// version which permits building a `Finder` that is not connected to the |
3092 | /// lifetime of its needle. |
3093 | #[derive (Clone, Debug)] |
3094 | pub struct Finder<'a>(memmem::Finder<'a>); |
3095 | |
3096 | impl<'a> Finder<'a> { |
3097 | /// Create a new finder for the given needle. |
3098 | #[inline ] |
3099 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> Finder<'a> { |
3100 | Finder(memmem::Finder::new(needle.as_ref())) |
3101 | } |
3102 | |
3103 | /// Convert this finder into its owned variant, such that it no longer |
3104 | /// borrows the needle. |
3105 | /// |
3106 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
3107 | /// this copies the needle. |
3108 | /// |
3109 | /// This is only available when the `std` feature is enabled. |
3110 | #[cfg (feature = "std" )] |
3111 | #[inline ] |
3112 | pub fn into_owned(self) -> Finder<'static> { |
3113 | Finder(self.0.into_owned()) |
3114 | } |
3115 | |
3116 | /// Returns the needle that this finder searches for. |
3117 | /// |
3118 | /// Note that the lifetime of the needle returned is tied to the lifetime |
3119 | /// of the finder, and may be shorter than the `'a` lifetime. Namely, a |
3120 | /// finder's needle can be either borrowed or owned, so the lifetime of the |
3121 | /// needle returned must necessarily be the shorter of the two. |
3122 | #[inline ] |
3123 | pub fn needle(&self) -> &[u8] { |
3124 | self.0.needle() |
3125 | } |
3126 | |
3127 | /// Returns the index of the first occurrence of this needle in the given |
3128 | /// haystack. |
3129 | /// |
3130 | /// The haystack may be any type that can be cheaply converted into a |
3131 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
3132 | /// |
3133 | /// # Complexity |
3134 | /// |
3135 | /// This routine is guaranteed to have worst case linear time complexity |
3136 | /// with respect to both the needle and the haystack. That is, this runs |
3137 | /// in `O(needle.len() + haystack.len())` time. |
3138 | /// |
3139 | /// This routine is also guaranteed to have worst case constant space |
3140 | /// complexity. |
3141 | /// |
3142 | /// # Examples |
3143 | /// |
3144 | /// Basic usage: |
3145 | /// |
3146 | /// ``` |
3147 | /// use bstr::Finder; |
3148 | /// |
3149 | /// let haystack = "foo bar baz" ; |
3150 | /// assert_eq!(Some(0), Finder::new("foo" ).find(haystack)); |
3151 | /// assert_eq!(Some(4), Finder::new("bar" ).find(haystack)); |
3152 | /// assert_eq!(None, Finder::new("quux" ).find(haystack)); |
3153 | /// ``` |
3154 | #[inline ] |
3155 | pub fn find<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { |
3156 | self.0.find(haystack.as_ref()) |
3157 | } |
3158 | } |
3159 | |
3160 | /// A single substring reverse searcher fixed to a particular needle. |
3161 | /// |
3162 | /// The purpose of this type is to permit callers to construct a substring |
3163 | /// searcher that can be used to search haystacks without the overhead of |
3164 | /// constructing the searcher in the first place. This is a somewhat niche |
3165 | /// concern when it's necessary to re-use the same needle to search multiple |
3166 | /// different haystacks with as little overhead as possible. In general, using |
3167 | /// [`ByteSlice::rfind`](trait.ByteSlice.html#method.rfind) |
3168 | /// or |
3169 | /// [`ByteSlice::rfind_iter`](trait.ByteSlice.html#method.rfind_iter) |
3170 | /// is good enough, but `FinderReverse` is useful when you can meaningfully |
3171 | /// observe searcher construction time in a profile. |
3172 | /// |
3173 | /// When the `std` feature is enabled, then this type has an `into_owned` |
3174 | /// version which permits building a `FinderReverse` that is not connected to |
3175 | /// the lifetime of its needle. |
3176 | #[derive (Clone, Debug)] |
3177 | pub struct FinderReverse<'a>(memmem::FinderRev<'a>); |
3178 | |
3179 | impl<'a> FinderReverse<'a> { |
3180 | /// Create a new reverse finder for the given needle. |
3181 | #[inline ] |
3182 | pub fn new<B: ?Sized + AsRef<[u8]>>(needle: &'a B) -> FinderReverse<'a> { |
3183 | FinderReverse(memmem::FinderRev::new(needle.as_ref())) |
3184 | } |
3185 | |
3186 | /// Convert this finder into its owned variant, such that it no longer |
3187 | /// borrows the needle. |
3188 | /// |
3189 | /// If this is already an owned finder, then this is a no-op. Otherwise, |
3190 | /// this copies the needle. |
3191 | /// |
3192 | /// This is only available when the `std` feature is enabled. |
3193 | #[cfg (feature = "std" )] |
3194 | #[inline ] |
3195 | pub fn into_owned(self) -> FinderReverse<'static> { |
3196 | FinderReverse(self.0.into_owned()) |
3197 | } |
3198 | |
3199 | /// Returns the needle that this finder searches for. |
3200 | /// |
3201 | /// Note that the lifetime of the needle returned is tied to the lifetime |
3202 | /// of this finder, and may be shorter than the `'a` lifetime. Namely, |
3203 | /// a finder's needle can be either borrowed or owned, so the lifetime of |
3204 | /// the needle returned must necessarily be the shorter of the two. |
3205 | #[inline ] |
3206 | pub fn needle(&self) -> &[u8] { |
3207 | self.0.needle() |
3208 | } |
3209 | |
3210 | /// Returns the index of the last occurrence of this needle in the given |
3211 | /// haystack. |
3212 | /// |
3213 | /// The haystack may be any type that can be cheaply converted into a |
3214 | /// `&[u8]`. This includes, but is not limited to, `&str` and `&[u8]`. |
3215 | /// |
3216 | /// # Complexity |
3217 | /// |
3218 | /// This routine is guaranteed to have worst case linear time complexity |
3219 | /// with respect to both the needle and the haystack. That is, this runs |
3220 | /// in `O(needle.len() + haystack.len())` time. |
3221 | /// |
3222 | /// This routine is also guaranteed to have worst case constant space |
3223 | /// complexity. |
3224 | /// |
3225 | /// # Examples |
3226 | /// |
3227 | /// Basic usage: |
3228 | /// |
3229 | /// ``` |
3230 | /// use bstr::FinderReverse; |
3231 | /// |
3232 | /// let haystack = "foo bar baz" ; |
3233 | /// assert_eq!(Some(0), FinderReverse::new("foo" ).rfind(haystack)); |
3234 | /// assert_eq!(Some(4), FinderReverse::new("bar" ).rfind(haystack)); |
3235 | /// assert_eq!(None, FinderReverse::new("quux" ).rfind(haystack)); |
3236 | /// ``` |
3237 | #[inline ] |
3238 | pub fn rfind<B: AsRef<[u8]>>(&self, haystack: B) -> Option<usize> { |
3239 | self.0.rfind(haystack.as_ref()) |
3240 | } |
3241 | } |
3242 | |
3243 | /// An iterator over non-overlapping substring matches. |
3244 | /// |
3245 | /// Matches are reported by the byte offset at which they begin. |
3246 | /// |
3247 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
3248 | /// needle. |
3249 | #[derive (Debug)] |
3250 | pub struct Find<'h, 'n> { |
3251 | it: memmem::FindIter<'h, 'n>, |
3252 | haystack: &'h [u8], |
3253 | needle: &'n [u8], |
3254 | } |
3255 | |
3256 | impl<'h, 'n> Find<'h, 'n> { |
3257 | fn new(haystack: &'h [u8], needle: &'n [u8]) -> Find<'h, 'n> { |
3258 | Find { it: memmem::find_iter(haystack, needle), haystack, needle } |
3259 | } |
3260 | } |
3261 | |
3262 | impl<'h, 'n> Iterator for Find<'h, 'n> { |
3263 | type Item = usize; |
3264 | |
3265 | #[inline ] |
3266 | fn next(&mut self) -> Option<usize> { |
3267 | self.it.next() |
3268 | } |
3269 | } |
3270 | |
3271 | /// An iterator over non-overlapping substring matches in reverse. |
3272 | /// |
3273 | /// Matches are reported by the byte offset at which they begin. |
3274 | /// |
3275 | /// `'h` is the lifetime of the haystack while `'n` is the lifetime of the |
3276 | /// needle. |
3277 | #[derive (Debug)] |
3278 | pub struct FindReverse<'h, 'n> { |
3279 | it: memmem::FindRevIter<'h, 'n>, |
3280 | haystack: &'h [u8], |
3281 | needle: &'n [u8], |
3282 | } |
3283 | |
3284 | impl<'h, 'n> FindReverse<'h, 'n> { |
3285 | fn new(haystack: &'h [u8], needle: &'n [u8]) -> FindReverse<'h, 'n> { |
3286 | FindReverse { |
3287 | it: memmem::rfind_iter(haystack, needle), |
3288 | haystack, |
3289 | needle, |
3290 | } |
3291 | } |
3292 | |
3293 | fn haystack(&self) -> &'h [u8] { |
3294 | self.haystack |
3295 | } |
3296 | |
3297 | fn needle(&self) -> &'n [u8] { |
3298 | self.needle |
3299 | } |
3300 | } |
3301 | |
3302 | impl<'h, 'n> Iterator for FindReverse<'h, 'n> { |
3303 | type Item = usize; |
3304 | |
3305 | #[inline ] |
3306 | fn next(&mut self) -> Option<usize> { |
3307 | self.it.next() |
3308 | } |
3309 | } |
3310 | |
3311 | /// An iterator over the bytes in a byte string. |
3312 | /// |
3313 | /// `'a` is the lifetime of the byte string being traversed. |
3314 | #[derive (Clone, Debug)] |
3315 | pub struct Bytes<'a> { |
3316 | it: slice::Iter<'a, u8>, |
3317 | } |
3318 | |
3319 | impl<'a> Bytes<'a> { |
3320 | /// Views the remaining underlying data as a subslice of the original data. |
3321 | /// This has the same lifetime as the original slice, |
3322 | /// and so the iterator can continue to be used while this exists. |
3323 | #[inline ] |
3324 | pub fn as_bytes(&self) -> &'a [u8] { |
3325 | self.it.as_slice() |
3326 | } |
3327 | } |
3328 | |
3329 | impl<'a> Iterator for Bytes<'a> { |
3330 | type Item = u8; |
3331 | |
3332 | #[inline ] |
3333 | fn next(&mut self) -> Option<u8> { |
3334 | self.it.next().map(|&b: u8| b) |
3335 | } |
3336 | |
3337 | #[inline ] |
3338 | fn size_hint(&self) -> (usize, Option<usize>) { |
3339 | self.it.size_hint() |
3340 | } |
3341 | } |
3342 | |
3343 | impl<'a> DoubleEndedIterator for Bytes<'a> { |
3344 | #[inline ] |
3345 | fn next_back(&mut self) -> Option<u8> { |
3346 | self.it.next_back().map(|&b: u8| b) |
3347 | } |
3348 | } |
3349 | |
3350 | impl<'a> ExactSizeIterator for Bytes<'a> { |
3351 | #[inline ] |
3352 | fn len(&self) -> usize { |
3353 | self.it.len() |
3354 | } |
3355 | } |
3356 | |
3357 | impl<'a> iter::FusedIterator for Bytes<'a> {} |
3358 | |
3359 | /// An iterator over the fields in a byte string, separated by whitespace. |
3360 | /// |
3361 | /// Whitespace for this iterator is defined by the Unicode property |
3362 | /// `White_Space`. |
3363 | /// |
3364 | /// This iterator splits on contiguous runs of whitespace, such that the fields |
3365 | /// in `foo\t\t\n \nbar` are `foo` and `bar`. |
3366 | /// |
3367 | /// `'a` is the lifetime of the byte string being split. |
3368 | #[cfg (feature = "unicode" )] |
3369 | #[derive (Debug)] |
3370 | pub struct Fields<'a> { |
3371 | it: FieldsWith<'a, fn(char) -> bool>, |
3372 | } |
3373 | |
3374 | #[cfg (feature = "unicode" )] |
3375 | impl<'a> Fields<'a> { |
3376 | fn new(bytes: &'a [u8]) -> Fields<'a> { |
3377 | Fields { it: bytes.fields_with(|ch: char| ch.is_whitespace()) } |
3378 | } |
3379 | } |
3380 | |
3381 | #[cfg (feature = "unicode" )] |
3382 | impl<'a> Iterator for Fields<'a> { |
3383 | type Item = &'a [u8]; |
3384 | |
3385 | #[inline ] |
3386 | fn next(&mut self) -> Option<&'a [u8]> { |
3387 | self.it.next() |
3388 | } |
3389 | } |
3390 | |
3391 | /// An iterator over fields in the byte string, separated by a predicate over |
3392 | /// codepoints. |
3393 | /// |
3394 | /// This iterator splits a byte string based on its predicate function such |
3395 | /// that the elements returned are separated by contiguous runs of codepoints |
3396 | /// for which the predicate returns true. |
3397 | /// |
3398 | /// `'a` is the lifetime of the byte string being split, while `F` is the type |
3399 | /// of the predicate, i.e., `FnMut(char) -> bool`. |
3400 | #[derive (Debug)] |
3401 | pub struct FieldsWith<'a, F> { |
3402 | f: F, |
3403 | bytes: &'a [u8], |
3404 | chars: CharIndices<'a>, |
3405 | } |
3406 | |
3407 | impl<'a, F: FnMut(char) -> bool> FieldsWith<'a, F> { |
3408 | fn new(bytes: &'a [u8], f: F) -> FieldsWith<'a, F> { |
3409 | FieldsWith { f, bytes, chars: bytes.char_indices() } |
3410 | } |
3411 | } |
3412 | |
3413 | impl<'a, F: FnMut(char) -> bool> Iterator for FieldsWith<'a, F> { |
3414 | type Item = &'a [u8]; |
3415 | |
3416 | #[inline ] |
3417 | fn next(&mut self) -> Option<&'a [u8]> { |
3418 | let (start, mut end); |
3419 | loop { |
3420 | match self.chars.next() { |
3421 | None => return None, |
3422 | Some((s, e, ch)) => { |
3423 | if !(self.f)(ch) { |
3424 | start = s; |
3425 | end = e; |
3426 | break; |
3427 | } |
3428 | } |
3429 | } |
3430 | } |
3431 | while let Some((_, e, ch)) = self.chars.next() { |
3432 | if (self.f)(ch) { |
3433 | break; |
3434 | } |
3435 | end = e; |
3436 | } |
3437 | Some(&self.bytes[start..end]) |
3438 | } |
3439 | } |
3440 | |
3441 | /// An iterator over substrings in a byte string, split by a separator. |
3442 | /// |
3443 | /// `'h` is the lifetime of the byte string being split (the haystack), while |
3444 | /// `'s` is the lifetime of the byte string doing the splitting. |
3445 | #[derive (Debug)] |
3446 | pub struct Split<'h, 's> { |
3447 | finder: Find<'h, 's>, |
3448 | /// The end position of the previous match of our splitter. The element |
3449 | /// we yield corresponds to the substring starting at `last` up to the |
3450 | /// beginning of the next match of the splitter. |
3451 | last: usize, |
3452 | /// Only set when iteration is complete. A corner case here is when a |
3453 | /// splitter is matched at the end of the haystack. At that point, we still |
3454 | /// need to yield an empty string following it. |
3455 | done: bool, |
3456 | } |
3457 | |
3458 | impl<'h, 's> Split<'h, 's> { |
3459 | fn new(haystack: &'h [u8], splitter: &'s [u8]) -> Split<'h, 's> { |
3460 | let finder: Find<'_, '_> = haystack.find_iter(needle:splitter); |
3461 | Split { finder, last: 0, done: false } |
3462 | } |
3463 | } |
3464 | |
3465 | impl<'h, 's> Iterator for Split<'h, 's> { |
3466 | type Item = &'h [u8]; |
3467 | |
3468 | #[inline ] |
3469 | fn next(&mut self) -> Option<&'h [u8]> { |
3470 | let haystack = self.finder.haystack; |
3471 | match self.finder.next() { |
3472 | Some(start) => { |
3473 | let next = &haystack[self.last..start]; |
3474 | self.last = start + self.finder.needle.len(); |
3475 | Some(next) |
3476 | } |
3477 | None => { |
3478 | if self.last >= haystack.len() { |
3479 | if !self.done { |
3480 | self.done = true; |
3481 | Some(b"" ) |
3482 | } else { |
3483 | None |
3484 | } |
3485 | } else { |
3486 | let s = &haystack[self.last..]; |
3487 | self.last = haystack.len(); |
3488 | self.done = true; |
3489 | Some(s) |
3490 | } |
3491 | } |
3492 | } |
3493 | } |
3494 | } |
3495 | |
3496 | /// An iterator over substrings in a byte string, split by a separator, in |
3497 | /// reverse. |
3498 | /// |
3499 | /// `'h` is the lifetime of the byte string being split (the haystack), while |
3500 | /// `'s` is the lifetime of the byte string doing the splitting. |
3501 | #[derive (Debug)] |
3502 | pub struct SplitReverse<'h, 's> { |
3503 | finder: FindReverse<'h, 's>, |
3504 | /// The end position of the previous match of our splitter. The element |
3505 | /// we yield corresponds to the substring starting at `last` up to the |
3506 | /// beginning of the next match of the splitter. |
3507 | last: usize, |
3508 | /// Only set when iteration is complete. A corner case here is when a |
3509 | /// splitter is matched at the end of the haystack. At that point, we still |
3510 | /// need to yield an empty string following it. |
3511 | done: bool, |
3512 | } |
3513 | |
3514 | impl<'h, 's> SplitReverse<'h, 's> { |
3515 | fn new(haystack: &'h [u8], splitter: &'s [u8]) -> SplitReverse<'h, 's> { |
3516 | let finder: FindReverse<'_, '_> = haystack.rfind_iter(needle:splitter); |
3517 | SplitReverse { finder, last: haystack.len(), done: false } |
3518 | } |
3519 | } |
3520 | |
3521 | impl<'h, 's> Iterator for SplitReverse<'h, 's> { |
3522 | type Item = &'h [u8]; |
3523 | |
3524 | #[inline ] |
3525 | fn next(&mut self) -> Option<&'h [u8]> { |
3526 | let haystack = self.finder.haystack(); |
3527 | match self.finder.next() { |
3528 | Some(start) => { |
3529 | let nlen = self.finder.needle().len(); |
3530 | let next = &haystack[start + nlen..self.last]; |
3531 | self.last = start; |
3532 | Some(next) |
3533 | } |
3534 | None => { |
3535 | if self.last == 0 { |
3536 | if !self.done { |
3537 | self.done = true; |
3538 | Some(b"" ) |
3539 | } else { |
3540 | None |
3541 | } |
3542 | } else { |
3543 | let s = &haystack[..self.last]; |
3544 | self.last = 0; |
3545 | self.done = true; |
3546 | Some(s) |
3547 | } |
3548 | } |
3549 | } |
3550 | } |
3551 | } |
3552 | |
3553 | /// An iterator over at most `n` substrings in a byte string, split by a |
3554 | /// separator. |
3555 | /// |
3556 | /// `'h` is the lifetime of the byte string being split (the haystack), while |
3557 | /// `'s` is the lifetime of the byte string doing the splitting. |
3558 | #[derive (Debug)] |
3559 | pub struct SplitN<'h, 's> { |
3560 | split: Split<'h, 's>, |
3561 | limit: usize, |
3562 | count: usize, |
3563 | } |
3564 | |
3565 | impl<'h, 's> SplitN<'h, 's> { |
3566 | fn new( |
3567 | haystack: &'h [u8], |
3568 | splitter: &'s [u8], |
3569 | limit: usize, |
3570 | ) -> SplitN<'h, 's> { |
3571 | let split: Split<'_, '_> = haystack.split_str(splitter); |
3572 | SplitN { split, limit, count: 0 } |
3573 | } |
3574 | } |
3575 | |
3576 | impl<'h, 's> Iterator for SplitN<'h, 's> { |
3577 | type Item = &'h [u8]; |
3578 | |
3579 | #[inline ] |
3580 | fn next(&mut self) -> Option<&'h [u8]> { |
3581 | self.count += 1; |
3582 | if self.count > self.limit || self.split.done { |
3583 | None |
3584 | } else if self.count == self.limit { |
3585 | Some(&self.split.finder.haystack[self.split.last..]) |
3586 | } else { |
3587 | self.split.next() |
3588 | } |
3589 | } |
3590 | } |
3591 | |
3592 | /// An iterator over at most `n` substrings in a byte string, split by a |
3593 | /// separator, in reverse. |
3594 | /// |
3595 | /// `'h` is the lifetime of the byte string being split (the haystack), while |
3596 | /// `'s` is the lifetime of the byte string doing the splitting. |
3597 | #[derive (Debug)] |
3598 | pub struct SplitNReverse<'h, 's> { |
3599 | split: SplitReverse<'h, 's>, |
3600 | limit: usize, |
3601 | count: usize, |
3602 | } |
3603 | |
3604 | impl<'h, 's> SplitNReverse<'h, 's> { |
3605 | fn new( |
3606 | haystack: &'h [u8], |
3607 | splitter: &'s [u8], |
3608 | limit: usize, |
3609 | ) -> SplitNReverse<'h, 's> { |
3610 | let split: SplitReverse<'_, '_> = haystack.rsplit_str(splitter); |
3611 | SplitNReverse { split, limit, count: 0 } |
3612 | } |
3613 | } |
3614 | |
3615 | impl<'h, 's> Iterator for SplitNReverse<'h, 's> { |
3616 | type Item = &'h [u8]; |
3617 | |
3618 | #[inline ] |
3619 | fn next(&mut self) -> Option<&'h [u8]> { |
3620 | self.count += 1; |
3621 | if self.count > self.limit || self.split.done { |
3622 | None |
3623 | } else if self.count == self.limit { |
3624 | Some(&self.split.finder.haystack()[..self.split.last]) |
3625 | } else { |
3626 | self.split.next() |
3627 | } |
3628 | } |
3629 | } |
3630 | |
3631 | /// An iterator over all lines in a byte string, without their terminators. |
3632 | /// |
3633 | /// For this iterator, the only line terminators recognized are `\r\n` and |
3634 | /// `\n`. |
3635 | /// |
3636 | /// `'a` is the lifetime of the byte string being iterated over. |
3637 | #[derive (Clone, Debug)] |
3638 | pub struct Lines<'a> { |
3639 | it: LinesWithTerminator<'a>, |
3640 | } |
3641 | |
3642 | impl<'a> Lines<'a> { |
3643 | fn new(bytes: &'a [u8]) -> Lines<'a> { |
3644 | Lines { it: LinesWithTerminator::new(bytes) } |
3645 | } |
3646 | |
3647 | /// Return a copy of the rest of the underlying bytes without affecting the |
3648 | /// iterator itself. |
3649 | /// |
3650 | /// # Examples |
3651 | /// |
3652 | /// Basic usage: |
3653 | /// |
3654 | /// ``` |
3655 | /// use bstr::{B, ByteSlice}; |
3656 | /// |
3657 | /// let s = b"\ |
3658 | /// foo |
3659 | /// bar \r |
3660 | /// baz" ; |
3661 | /// let mut lines = s.lines(); |
3662 | /// assert_eq!(lines.next(), Some(B("foo" ))); |
3663 | /// assert_eq!(lines.as_bytes(), B("bar \r\nbaz" )); |
3664 | /// ``` |
3665 | pub fn as_bytes(&self) -> &'a [u8] { |
3666 | self.it.bytes |
3667 | } |
3668 | } |
3669 | |
3670 | impl<'a> Iterator for Lines<'a> { |
3671 | type Item = &'a [u8]; |
3672 | |
3673 | #[inline ] |
3674 | fn next(&mut self) -> Option<&'a [u8]> { |
3675 | Some(trim_last_terminator(self.it.next()?)) |
3676 | } |
3677 | } |
3678 | |
3679 | impl<'a> DoubleEndedIterator for Lines<'a> { |
3680 | #[inline ] |
3681 | fn next_back(&mut self) -> Option<Self::Item> { |
3682 | Some(trim_last_terminator(self.it.next_back()?)) |
3683 | } |
3684 | } |
3685 | |
3686 | impl<'a> iter::FusedIterator for Lines<'a> {} |
3687 | |
3688 | /// An iterator over all lines in a byte string, including their terminators. |
3689 | /// |
3690 | /// For this iterator, the only line terminator recognized is `\n`. (Since |
3691 | /// line terminators are included, this also handles `\r\n` line endings.) |
3692 | /// |
3693 | /// Line terminators are only included if they are present in the original |
3694 | /// byte string. For example, the last line in a byte string may not end with |
3695 | /// a line terminator. |
3696 | /// |
3697 | /// Concatenating all elements yielded by this iterator is guaranteed to yield |
3698 | /// the original byte string. |
3699 | /// |
3700 | /// `'a` is the lifetime of the byte string being iterated over. |
3701 | #[derive (Clone, Debug)] |
3702 | pub struct LinesWithTerminator<'a> { |
3703 | bytes: &'a [u8], |
3704 | } |
3705 | |
3706 | impl<'a> LinesWithTerminator<'a> { |
3707 | fn new(bytes: &'a [u8]) -> LinesWithTerminator<'a> { |
3708 | LinesWithTerminator { bytes } |
3709 | } |
3710 | |
3711 | /// Return a copy of the rest of the underlying bytes without affecting the |
3712 | /// iterator itself. |
3713 | /// |
3714 | /// # Examples |
3715 | /// |
3716 | /// Basic usage: |
3717 | /// |
3718 | /// ``` |
3719 | /// use bstr::{B, ByteSlice}; |
3720 | /// |
3721 | /// let s = b"\ |
3722 | /// foo |
3723 | /// bar \r |
3724 | /// baz" ; |
3725 | /// let mut lines = s.lines_with_terminator(); |
3726 | /// assert_eq!(lines.next(), Some(B("foo \n" ))); |
3727 | /// assert_eq!(lines.as_bytes(), B("bar \r\nbaz" )); |
3728 | /// ``` |
3729 | pub fn as_bytes(&self) -> &'a [u8] { |
3730 | self.bytes |
3731 | } |
3732 | } |
3733 | |
3734 | impl<'a> Iterator for LinesWithTerminator<'a> { |
3735 | type Item = &'a [u8]; |
3736 | |
3737 | #[inline ] |
3738 | fn next(&mut self) -> Option<&'a [u8]> { |
3739 | match self.bytes.find_byte(b' \n' ) { |
3740 | None if self.bytes.is_empty() => None, |
3741 | None => { |
3742 | let line: &[u8] = self.bytes; |
3743 | self.bytes = b"" ; |
3744 | Some(line) |
3745 | } |
3746 | Some(end: usize) => { |
3747 | let line: &[u8] = &self.bytes[..end + 1]; |
3748 | self.bytes = &self.bytes[end + 1..]; |
3749 | Some(line) |
3750 | } |
3751 | } |
3752 | } |
3753 | } |
3754 | |
3755 | impl<'a> DoubleEndedIterator for LinesWithTerminator<'a> { |
3756 | #[inline ] |
3757 | fn next_back(&mut self) -> Option<Self::Item> { |
3758 | let end: usize = self.bytes.len().checked_sub(1)?; |
3759 | match self.bytes[..end].rfind_byte(b' \n' ) { |
3760 | None => { |
3761 | let line: &[u8] = self.bytes; |
3762 | self.bytes = b"" ; |
3763 | Some(line) |
3764 | } |
3765 | Some(end: usize) => { |
3766 | let line: &[u8] = &self.bytes[end + 1..]; |
3767 | self.bytes = &self.bytes[..end + 1]; |
3768 | Some(line) |
3769 | } |
3770 | } |
3771 | } |
3772 | } |
3773 | |
3774 | impl<'a> iter::FusedIterator for LinesWithTerminator<'a> {} |
3775 | |
3776 | fn trim_last_terminator(mut s: &[u8]) -> &[u8] { |
3777 | if s.last_byte() == Some(b' \n' ) { |
3778 | s = &s[..s.len() - 1]; |
3779 | if s.last_byte() == Some(b' \r' ) { |
3780 | s = &s[..s.len() - 1]; |
3781 | } |
3782 | } |
3783 | s |
3784 | } |
3785 | |
3786 | #[cfg (all(test, feature = "std" ))] |
3787 | mod tests { |
3788 | use crate::{ |
3789 | ext_slice::{ByteSlice, Lines, LinesWithTerminator, B}, |
3790 | tests::LOSSY_TESTS, |
3791 | }; |
3792 | |
3793 | #[test ] |
3794 | fn to_str_lossy() { |
3795 | for (i, &(expected, input)) in LOSSY_TESTS.iter().enumerate() { |
3796 | let got = B(input).to_str_lossy(); |
3797 | assert_eq!( |
3798 | expected.as_bytes(), |
3799 | got.as_bytes(), |
3800 | "to_str_lossy(ith: {:?}, given: {:?})" , |
3801 | i, |
3802 | input, |
3803 | ); |
3804 | |
3805 | let mut got = String::new(); |
3806 | B(input).to_str_lossy_into(&mut got); |
3807 | assert_eq!( |
3808 | expected.as_bytes(), |
3809 | got.as_bytes(), |
3810 | "to_str_lossy_into" , |
3811 | ); |
3812 | |
3813 | let got = String::from_utf8_lossy(input); |
3814 | assert_eq!(expected.as_bytes(), got.as_bytes(), "std" ); |
3815 | } |
3816 | } |
3817 | |
3818 | #[test ] |
3819 | fn lines_iteration() { |
3820 | macro_rules! t { |
3821 | ($it:expr, $forward:expr) => { |
3822 | let mut res: Vec<&[u8]> = Vec::from($forward); |
3823 | assert_eq!($it.collect::<Vec<_>>(), res); |
3824 | res.reverse(); |
3825 | assert_eq!($it.rev().collect::<Vec<_>>(), res); |
3826 | }; |
3827 | } |
3828 | |
3829 | t!(Lines::new(b"" ), []); |
3830 | t!(LinesWithTerminator::new(b"" ), []); |
3831 | |
3832 | t!(Lines::new(b" \n" ), [B("" )]); |
3833 | t!(Lines::new(b" \r\n" ), [B("" )]); |
3834 | t!(LinesWithTerminator::new(b" \n" ), [B(" \n" )]); |
3835 | |
3836 | t!(Lines::new(b"a" ), [B("a" )]); |
3837 | t!(LinesWithTerminator::new(b"a" ), [B("a" )]); |
3838 | |
3839 | t!(Lines::new(b"abc" ), [B("abc" )]); |
3840 | t!(LinesWithTerminator::new(b"abc" ), [B("abc" )]); |
3841 | |
3842 | t!(Lines::new(b"abc \n" ), [B("abc" )]); |
3843 | t!(Lines::new(b"abc \r\n" ), [B("abc" )]); |
3844 | t!(LinesWithTerminator::new(b"abc \n" ), [B("abc \n" )]); |
3845 | |
3846 | t!(Lines::new(b"abc \n\n" ), [B("abc" ), B("" )]); |
3847 | t!(LinesWithTerminator::new(b"abc \n\n" ), [B("abc \n" ), B(" \n" )]); |
3848 | |
3849 | t!(Lines::new(b"abc \n\ndef" ), [B("abc" ), B("" ), B("def" )]); |
3850 | t!( |
3851 | LinesWithTerminator::new(b"abc \n\ndef" ), |
3852 | [B("abc \n" ), B(" \n" ), B("def" )] |
3853 | ); |
3854 | |
3855 | t!(Lines::new(b"abc \n\ndef \n" ), [B("abc" ), B("" ), B("def" )]); |
3856 | t!( |
3857 | LinesWithTerminator::new(b"abc \n\ndef \n" ), |
3858 | [B("abc \n" ), B(" \n" ), B("def \n" )] |
3859 | ); |
3860 | |
3861 | t!(Lines::new(b" \na \nb \n" ), [B("" ), B("a" ), B("b" )]); |
3862 | t!( |
3863 | LinesWithTerminator::new(b" \na \nb \n" ), |
3864 | [B(" \n" ), B("a \n" ), B("b \n" )] |
3865 | ); |
3866 | |
3867 | t!(Lines::new(b" \n\n\n" ), [B("" ), B("" ), B("" )]); |
3868 | t!(LinesWithTerminator::new(b" \n\n\n" ), [B(" \n" ), B(" \n" ), B(" \n" )]); |
3869 | } |
3870 | } |
3871 | |