1//! Parallel iterator types for [strings][std::str]
2//!
3//! You will rarely need to interact with this module directly unless you need
4//! to name one of the iterator types.
5//!
6//! Note: [`ParallelString::par_split()`] and [`par_split_terminator()`]
7//! reference a `Pattern` trait which is not visible outside this crate.
8//! This trait is intentionally kept private, for use only by Rayon itself.
9//! It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
10//! and any function or closure `F: Fn(char) -> bool + Sync + Send`.
11//!
12//! [`ParallelString::par_split()`]: trait.ParallelString.html#method.par_split
13//! [`par_split_terminator()`]: trait.ParallelString.html#method.par_split_terminator
14//!
15//! [std::str]: https://doc.rust-lang.org/stable/std/str/
16
17use crate::iter::plumbing::*;
18use crate::iter::*;
19use crate::split_producer::*;
20
21/// Test if a byte is the start of a UTF-8 character.
22/// (extracted from `str::is_char_boundary`)
23#[inline]
24fn is_char_boundary(b: u8) -> bool {
25 // This is bit magic equivalent to: b < 128 || b >= 192
26 (b as i8) >= -0x40
27}
28
29/// Find the index of a character boundary near the midpoint.
30#[inline]
31fn find_char_midpoint(chars: &str) -> usize {
32 let mid: usize = chars.len() / 2;
33
34 // We want to split near the midpoint, but we need to find an actual
35 // character boundary. So we look at the raw bytes, first scanning
36 // forward from the midpoint for a boundary, then trying backward.
37 let (left: &[u8], right: &[u8]) = chars.as_bytes().split_at(mid);
38 match right.iter().copied().position(is_char_boundary) {
39 Some(i: usize) => mid + i,
40 None => left
41 .iter()
42 .copied()
43 .rposition(is_char_boundary)
44 .unwrap_or(default:0),
45 }
46}
47
48/// Try to split a string near the midpoint.
49#[inline]
50fn split(chars: &str) -> Option<(&str, &str)> {
51 let index: usize = find_char_midpoint(chars);
52 if index > 0 {
53 Some(chars.split_at(mid:index))
54 } else {
55 None
56 }
57}
58
59/// Parallel extensions for strings.
60pub trait ParallelString {
61 /// Returns a plain string slice, which is used to implement the rest of
62 /// the parallel methods.
63 fn as_parallel_string(&self) -> &str;
64
65 /// Returns a parallel iterator over the characters of a string.
66 ///
67 /// # Examples
68 ///
69 /// ```
70 /// use rayon::prelude::*;
71 /// let max = "hello".par_chars().max_by_key(|c| *c as i32);
72 /// assert_eq!(Some('o'), max);
73 /// ```
74 fn par_chars(&self) -> Chars<'_> {
75 Chars {
76 chars: self.as_parallel_string(),
77 }
78 }
79
80 /// Returns a parallel iterator over the characters of a string, with their positions.
81 ///
82 /// # Examples
83 ///
84 /// ```
85 /// use rayon::prelude::*;
86 /// let min = "hello".par_char_indices().min_by_key(|&(_i, c)| c as i32);
87 /// assert_eq!(Some((1, 'e')), min);
88 /// ```
89 fn par_char_indices(&self) -> CharIndices<'_> {
90 CharIndices {
91 chars: self.as_parallel_string(),
92 }
93 }
94
95 /// Returns a parallel iterator over the bytes of a string.
96 ///
97 /// Note that multi-byte sequences (for code points greater than `U+007F`)
98 /// are produced as separate items, but will not be split across threads.
99 /// If you would prefer an indexed iterator without that guarantee, consider
100 /// `string.as_bytes().par_iter().copied()` instead.
101 ///
102 /// # Examples
103 ///
104 /// ```
105 /// use rayon::prelude::*;
106 /// let max = "hello".par_bytes().max();
107 /// assert_eq!(Some(b'o'), max);
108 /// ```
109 fn par_bytes(&self) -> Bytes<'_> {
110 Bytes {
111 chars: self.as_parallel_string(),
112 }
113 }
114
115 /// Returns a parallel iterator over a string encoded as UTF-16.
116 ///
117 /// Note that surrogate pairs (for code points greater than `U+FFFF`) are
118 /// produced as separate items, but will not be split across threads.
119 ///
120 /// # Examples
121 ///
122 /// ```
123 /// use rayon::prelude::*;
124 ///
125 /// let max = "hello".par_encode_utf16().max();
126 /// assert_eq!(Some(b'o' as u16), max);
127 ///
128 /// let text = "Zażółć gęślą jaźń";
129 /// let utf8_len = text.len();
130 /// let utf16_len = text.par_encode_utf16().count();
131 /// assert!(utf16_len <= utf8_len);
132 /// ```
133 fn par_encode_utf16(&self) -> EncodeUtf16<'_> {
134 EncodeUtf16 {
135 chars: self.as_parallel_string(),
136 }
137 }
138
139 /// Returns a parallel iterator over substrings separated by a
140 /// given character or predicate, similar to `str::split`.
141 ///
142 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
143 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
144 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
145 ///
146 /// # Examples
147 ///
148 /// ```
149 /// use rayon::prelude::*;
150 /// let total = "1, 2, buckle, 3, 4, door"
151 /// .par_split(',')
152 /// .filter_map(|s| s.trim().parse::<i32>().ok())
153 /// .sum();
154 /// assert_eq!(10, total);
155 /// ```
156 fn par_split<P: Pattern>(&self, separator: P) -> Split<'_, P> {
157 Split::new(self.as_parallel_string(), separator)
158 }
159
160 /// Returns a parallel iterator over substrings separated by a
161 /// given character or predicate, keeping the matched part as a terminator
162 /// of the substring similar to `str::split_inclusive`.
163 ///
164 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
165 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
166 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
167 ///
168 /// # Examples
169 ///
170 /// ```
171 /// use rayon::prelude::*;
172 /// let lines: Vec<_> = "Mary had a little lamb\nlittle lamb\nlittle lamb."
173 /// .par_split_inclusive('\n')
174 /// .collect();
175 /// assert_eq!(lines, ["Mary had a little lamb\n", "little lamb\n", "little lamb."]);
176 /// ```
177 fn par_split_inclusive<P: Pattern>(&self, separator: P) -> SplitInclusive<'_, P> {
178 SplitInclusive::new(self.as_parallel_string(), separator)
179 }
180
181 /// Returns a parallel iterator over substrings terminated by a
182 /// given character or predicate, similar to `str::split_terminator`.
183 /// It's equivalent to `par_split`, except it doesn't produce an empty
184 /// substring after a trailing terminator.
185 ///
186 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
187 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
188 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
189 ///
190 /// # Examples
191 ///
192 /// ```
193 /// use rayon::prelude::*;
194 /// let parts: Vec<_> = "((1 + 3) * 2)"
195 /// .par_split_terminator(|c| c == '(' || c == ')')
196 /// .collect();
197 /// assert_eq!(vec!["", "", "1 + 3", " * 2"], parts);
198 /// ```
199 fn par_split_terminator<P: Pattern>(&self, terminator: P) -> SplitTerminator<'_, P> {
200 SplitTerminator::new(self.as_parallel_string(), terminator)
201 }
202
203 /// Returns a parallel iterator over the lines of a string, ending with an
204 /// optional carriage return and with a newline (`\r\n` or just `\n`).
205 /// The final line ending is optional, and line endings are not included in
206 /// the output strings.
207 ///
208 /// # Examples
209 ///
210 /// ```
211 /// use rayon::prelude::*;
212 /// let lengths: Vec<_> = "hello world\nfizbuzz"
213 /// .par_lines()
214 /// .map(|l| l.len())
215 /// .collect();
216 /// assert_eq!(vec![11, 7], lengths);
217 /// ```
218 fn par_lines(&self) -> Lines<'_> {
219 Lines(self.as_parallel_string())
220 }
221
222 /// Returns a parallel iterator over the sub-slices of a string that are
223 /// separated by any amount of whitespace.
224 ///
225 /// As with `str::split_whitespace`, 'whitespace' is defined according to
226 /// the terms of the Unicode Derived Core Property `White_Space`.
227 /// If you only want to split on ASCII whitespace instead, use
228 /// [`par_split_ascii_whitespace`][`ParallelString::par_split_ascii_whitespace`].
229 ///
230 /// # Examples
231 ///
232 /// ```
233 /// use rayon::prelude::*;
234 /// let longest = "which is the longest word?"
235 /// .par_split_whitespace()
236 /// .max_by_key(|word| word.len());
237 /// assert_eq!(Some("longest"), longest);
238 /// ```
239 ///
240 /// All kinds of whitespace are considered:
241 ///
242 /// ```
243 /// use rayon::prelude::*;
244 /// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
245 /// .par_split_whitespace()
246 /// .collect();
247 /// assert_eq!(words, ["Mary", "had", "a", "little", "lamb"]);
248 /// ```
249 ///
250 /// If the string is empty or all whitespace, the iterator yields no string slices:
251 ///
252 /// ```
253 /// use rayon::prelude::*;
254 /// assert_eq!("".par_split_whitespace().count(), 0);
255 /// assert_eq!(" ".par_split_whitespace().count(), 0);
256 /// ```
257 fn par_split_whitespace(&self) -> SplitWhitespace<'_> {
258 SplitWhitespace(self.as_parallel_string())
259 }
260
261 /// Returns a parallel iterator over the sub-slices of a string that are
262 /// separated by any amount of ASCII whitespace.
263 ///
264 /// To split by Unicode `White_Space` instead, use
265 /// [`par_split_whitespace`][`ParallelString::par_split_whitespace`].
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// use rayon::prelude::*;
271 /// let longest = "which is the longest word?"
272 /// .par_split_ascii_whitespace()
273 /// .max_by_key(|word| word.len());
274 /// assert_eq!(Some("longest"), longest);
275 /// ```
276 ///
277 /// All kinds of ASCII whitespace are considered, but not Unicode `White_Space`:
278 ///
279 /// ```
280 /// use rayon::prelude::*;
281 /// let words: Vec<&str> = " Mary had\ta\u{2009}little \n\t lamb"
282 /// .par_split_ascii_whitespace()
283 /// .collect();
284 /// assert_eq!(words, ["Mary", "had", "a\u{2009}little", "lamb"]);
285 /// ```
286 ///
287 /// If the string is empty or all ASCII whitespace, the iterator yields no string slices:
288 ///
289 /// ```
290 /// use rayon::prelude::*;
291 /// assert_eq!("".par_split_whitespace().count(), 0);
292 /// assert_eq!(" ".par_split_whitespace().count(), 0);
293 /// ```
294 fn par_split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
295 SplitAsciiWhitespace(self.as_parallel_string())
296 }
297
298 /// Returns a parallel iterator over substrings that match a
299 /// given character or predicate, similar to `str::matches`.
300 ///
301 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
302 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
303 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
304 ///
305 /// # Examples
306 ///
307 /// ```
308 /// use rayon::prelude::*;
309 /// let total = "1, 2, buckle, 3, 4, door"
310 /// .par_matches(char::is_numeric)
311 /// .map(|s| s.parse::<i32>().expect("digit"))
312 /// .sum();
313 /// assert_eq!(10, total);
314 /// ```
315 fn par_matches<P: Pattern>(&self, pattern: P) -> Matches<'_, P> {
316 Matches {
317 chars: self.as_parallel_string(),
318 pattern,
319 }
320 }
321
322 /// Returns a parallel iterator over substrings that match a given character
323 /// or predicate, with their positions, similar to `str::match_indices`.
324 ///
325 /// Note: the `Pattern` trait is private, for use only by Rayon itself.
326 /// It is implemented for `char`, `&[char]`, `[char; N]`, `&[char; N]`,
327 /// and any function or closure `F: Fn(char) -> bool + Sync + Send`.
328 ///
329 /// # Examples
330 ///
331 /// ```
332 /// use rayon::prelude::*;
333 /// let digits: Vec<_> = "1, 2, buckle, 3, 4, door"
334 /// .par_match_indices(char::is_numeric)
335 /// .collect();
336 /// assert_eq!(digits, vec![(0, "1"), (3, "2"), (14, "3"), (17, "4")]);
337 /// ```
338 fn par_match_indices<P: Pattern>(&self, pattern: P) -> MatchIndices<'_, P> {
339 MatchIndices {
340 chars: self.as_parallel_string(),
341 pattern,
342 }
343 }
344}
345
346impl ParallelString for str {
347 #[inline]
348 fn as_parallel_string(&self) -> &str {
349 self
350 }
351}
352
353// /////////////////////////////////////////////////////////////////////////
354
355/// We hide the `Pattern` trait in a private module, as its API is not meant
356/// for general consumption. If we could have privacy on trait items, then it
357/// would be nicer to have its basic existence and implementors public while
358/// keeping all of the methods private.
359mod private {
360 use crate::iter::plumbing::Folder;
361
362 /// Pattern-matching trait for `ParallelString`, somewhat like a mix of
363 /// `std::str::pattern::{Pattern, Searcher}`.
364 ///
365 /// Implementing this trait is not permitted outside of `rayon`.
366 pub trait Pattern: Sized + Sync + Send {
367 private_decl! {}
368 fn find_in(&self, haystack: &str) -> Option<usize>;
369 fn rfind_in(&self, haystack: &str) -> Option<usize>;
370 fn is_suffix_of(&self, haystack: &str) -> bool;
371 fn fold_splits<'ch, F>(&self, haystack: &'ch str, folder: F, skip_last: bool) -> F
372 where
373 F: Folder<&'ch str>;
374 fn fold_inclusive_splits<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
375 where
376 F: Folder<&'ch str>;
377 fn fold_matches<'ch, F>(&self, haystack: &'ch str, folder: F) -> F
378 where
379 F: Folder<&'ch str>;
380 fn fold_match_indices<'ch, F>(&self, haystack: &'ch str, folder: F, base: usize) -> F
381 where
382 F: Folder<(usize, &'ch str)>;
383 }
384}
385use self::private::Pattern;
386
387#[inline]
388fn offset<T>(base: usize) -> impl Fn((usize, T)) -> (usize, T) {
389 move |(i: usize, x: T)| (base + i, x)
390}
391
392macro_rules! impl_pattern {
393 (&$self:ident => $pattern:expr) => {
394 private_impl! {}
395
396 #[inline]
397 fn find_in(&$self, chars: &str) -> Option<usize> {
398 chars.find($pattern)
399 }
400
401 #[inline]
402 fn rfind_in(&$self, chars: &str) -> Option<usize> {
403 chars.rfind($pattern)
404 }
405
406 #[inline]
407 fn is_suffix_of(&$self, chars: &str) -> bool {
408 chars.ends_with($pattern)
409 }
410
411 fn fold_splits<'ch, F>(&$self, chars: &'ch str, folder: F, skip_last: bool) -> F
412 where
413 F: Folder<&'ch str>,
414 {
415 let mut split = chars.split($pattern);
416 if skip_last {
417 split.next_back();
418 }
419 folder.consume_iter(split)
420 }
421
422 fn fold_inclusive_splits<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
423 where
424 F: Folder<&'ch str>,
425 {
426 folder.consume_iter(chars.split_inclusive($pattern))
427 }
428
429 fn fold_matches<'ch, F>(&$self, chars: &'ch str, folder: F) -> F
430 where
431 F: Folder<&'ch str>,
432 {
433 folder.consume_iter(chars.matches($pattern))
434 }
435
436 fn fold_match_indices<'ch, F>(&$self, chars: &'ch str, folder: F, base: usize) -> F
437 where
438 F: Folder<(usize, &'ch str)>,
439 {
440 folder.consume_iter(chars.match_indices($pattern).map(offset(base)))
441 }
442 }
443}
444
445impl Pattern for char {
446 impl_pattern!(&self => *self);
447}
448
449impl Pattern for &[char] {
450 impl_pattern!(&self => *self);
451}
452
453// TODO (MSRV 1.75): use `*self` for array patterns too.
454// - Needs `DoubleEndedSearcher` so `split.next_back()` works.
455
456impl<const N: usize> Pattern for [char; N] {
457 impl_pattern!(&self => self.as_slice());
458}
459
460impl<const N: usize> Pattern for &[char; N] {
461 impl_pattern!(&self => self.as_slice());
462}
463
464impl<FN: Sync + Send + Fn(char) -> bool> Pattern for FN {
465 impl_pattern!(&self => self);
466}
467
468// /////////////////////////////////////////////////////////////////////////
469
470/// Parallel iterator over the characters of a string
471#[derive(Debug, Clone)]
472pub struct Chars<'ch> {
473 chars: &'ch str,
474}
475
476struct CharsProducer<'ch> {
477 chars: &'ch str,
478}
479
480impl<'ch> ParallelIterator for Chars<'ch> {
481 type Item = char;
482
483 fn drive_unindexed<C>(self, consumer: C) -> C::Result
484 where
485 C: UnindexedConsumer<Self::Item>,
486 {
487 bridge_unindexed(producer:CharsProducer { chars: self.chars }, consumer)
488 }
489}
490
491impl<'ch> UnindexedProducer for CharsProducer<'ch> {
492 type Item = char;
493
494 fn split(self) -> (Self, Option<Self>) {
495 match split(self.chars) {
496 Some((left: &str, right: &str)) => (
497 CharsProducer { chars: left },
498 Some(CharsProducer { chars: right }),
499 ),
500 None => (self, None),
501 }
502 }
503
504 fn fold_with<F>(self, folder: F) -> F
505 where
506 F: Folder<Self::Item>,
507 {
508 folder.consume_iter(self.chars.chars())
509 }
510}
511
512// /////////////////////////////////////////////////////////////////////////
513
514/// Parallel iterator over the characters of a string, with their positions
515#[derive(Debug, Clone)]
516pub struct CharIndices<'ch> {
517 chars: &'ch str,
518}
519
520struct CharIndicesProducer<'ch> {
521 index: usize,
522 chars: &'ch str,
523}
524
525impl<'ch> ParallelIterator for CharIndices<'ch> {
526 type Item = (usize, char);
527
528 fn drive_unindexed<C>(self, consumer: C) -> C::Result
529 where
530 C: UnindexedConsumer<Self::Item>,
531 {
532 let producer: CharIndicesProducer<'_> = CharIndicesProducer {
533 index: 0,
534 chars: self.chars,
535 };
536 bridge_unindexed(producer, consumer)
537 }
538}
539
540impl<'ch> UnindexedProducer for CharIndicesProducer<'ch> {
541 type Item = (usize, char);
542
543 fn split(self) -> (Self, Option<Self>) {
544 match split(self.chars) {
545 Some((left, right)) => (
546 CharIndicesProducer {
547 chars: left,
548 ..self
549 },
550 Some(CharIndicesProducer {
551 chars: right,
552 index: self.index + left.len(),
553 }),
554 ),
555 None => (self, None),
556 }
557 }
558
559 fn fold_with<F>(self, folder: F) -> F
560 where
561 F: Folder<Self::Item>,
562 {
563 let base = self.index;
564 folder.consume_iter(self.chars.char_indices().map(offset(base)))
565 }
566}
567
568// /////////////////////////////////////////////////////////////////////////
569
570/// Parallel iterator over the bytes of a string
571#[derive(Debug, Clone)]
572pub struct Bytes<'ch> {
573 chars: &'ch str,
574}
575
576struct BytesProducer<'ch> {
577 chars: &'ch str,
578}
579
580impl<'ch> ParallelIterator for Bytes<'ch> {
581 type Item = u8;
582
583 fn drive_unindexed<C>(self, consumer: C) -> C::Result
584 where
585 C: UnindexedConsumer<Self::Item>,
586 {
587 bridge_unindexed(producer:BytesProducer { chars: self.chars }, consumer)
588 }
589}
590
591impl<'ch> UnindexedProducer for BytesProducer<'ch> {
592 type Item = u8;
593
594 fn split(self) -> (Self, Option<Self>) {
595 match split(self.chars) {
596 Some((left: &str, right: &str)) => (
597 BytesProducer { chars: left },
598 Some(BytesProducer { chars: right }),
599 ),
600 None => (self, None),
601 }
602 }
603
604 fn fold_with<F>(self, folder: F) -> F
605 where
606 F: Folder<Self::Item>,
607 {
608 folder.consume_iter(self.chars.bytes())
609 }
610}
611
612// /////////////////////////////////////////////////////////////////////////
613
614/// Parallel iterator over a string encoded as UTF-16
615#[derive(Debug, Clone)]
616pub struct EncodeUtf16<'ch> {
617 chars: &'ch str,
618}
619
620struct EncodeUtf16Producer<'ch> {
621 chars: &'ch str,
622}
623
624impl<'ch> ParallelIterator for EncodeUtf16<'ch> {
625 type Item = u16;
626
627 fn drive_unindexed<C>(self, consumer: C) -> C::Result
628 where
629 C: UnindexedConsumer<Self::Item>,
630 {
631 bridge_unindexed(producer:EncodeUtf16Producer { chars: self.chars }, consumer)
632 }
633}
634
635impl<'ch> UnindexedProducer for EncodeUtf16Producer<'ch> {
636 type Item = u16;
637
638 fn split(self) -> (Self, Option<Self>) {
639 match split(self.chars) {
640 Some((left: &str, right: &str)) => (
641 EncodeUtf16Producer { chars: left },
642 Some(EncodeUtf16Producer { chars: right }),
643 ),
644 None => (self, None),
645 }
646 }
647
648 fn fold_with<F>(self, folder: F) -> F
649 where
650 F: Folder<Self::Item>,
651 {
652 folder.consume_iter(self.chars.encode_utf16())
653 }
654}
655
656// /////////////////////////////////////////////////////////////////////////
657
658/// Parallel iterator over substrings separated by a pattern
659#[derive(Debug, Clone)]
660pub struct Split<'ch, P: Pattern> {
661 chars: &'ch str,
662 separator: P,
663}
664
665impl<'ch, P: Pattern> Split<'ch, P> {
666 fn new(chars: &'ch str, separator: P) -> Self {
667 Split { chars, separator }
668 }
669}
670
671impl<'ch, P: Pattern> ParallelIterator for Split<'ch, P> {
672 type Item = &'ch str;
673
674 fn drive_unindexed<C>(self, consumer: C) -> C::Result
675 where
676 C: UnindexedConsumer<Self::Item>,
677 {
678 let producer: SplitProducer<'_, P, &str> = SplitProducer::new(self.chars, &self.separator);
679 bridge_unindexed(producer, consumer)
680 }
681}
682
683/// Implement support for `SplitProducer`.
684impl<'ch, P: Pattern> Fissile<P> for &'ch str {
685 fn length(&self) -> usize {
686 self.len()
687 }
688
689 fn midpoint(&self, end: usize) -> usize {
690 // First find a suitable UTF-8 boundary.
691 find_char_midpoint(&self[..end])
692 }
693
694 fn find(&self, separator: &P, start: usize, end: usize) -> Option<usize> {
695 separator.find_in(&self[start..end])
696 }
697
698 fn rfind(&self, separator: &P, end: usize) -> Option<usize> {
699 separator.rfind_in(&self[..end])
700 }
701
702 fn split_once<const INCL: bool>(self, index: usize) -> (Self, Self) {
703 if INCL {
704 // include the separator in the left side
705 let separator = self[index..].chars().next().unwrap();
706 self.split_at(index + separator.len_utf8())
707 } else {
708 let (left, right) = self.split_at(index);
709 let mut right_iter = right.chars();
710 right_iter.next(); // skip the separator
711 (left, right_iter.as_str())
712 }
713 }
714
715 fn fold_splits<F, const INCL: bool>(self, separator: &P, folder: F, skip_last: bool) -> F
716 where
717 F: Folder<Self>,
718 {
719 if INCL {
720 debug_assert!(!skip_last);
721 separator.fold_inclusive_splits(self, folder)
722 } else {
723 separator.fold_splits(self, folder, skip_last)
724 }
725 }
726}
727
728// /////////////////////////////////////////////////////////////////////////
729
730/// Parallel iterator over substrings separated by a pattern
731#[derive(Debug, Clone)]
732pub struct SplitInclusive<'ch, P: Pattern> {
733 chars: &'ch str,
734 separator: P,
735}
736
737impl<'ch, P: Pattern> SplitInclusive<'ch, P> {
738 fn new(chars: &'ch str, separator: P) -> Self {
739 SplitInclusive { chars, separator }
740 }
741}
742
743impl<'ch, P: Pattern> ParallelIterator for SplitInclusive<'ch, P> {
744 type Item = &'ch str;
745
746 fn drive_unindexed<C>(self, consumer: C) -> C::Result
747 where
748 C: UnindexedConsumer<Self::Item>,
749 {
750 let producer: SplitProducer<'_, P, &str, true> = SplitInclusiveProducer::new_incl(self.chars, &self.separator);
751 bridge_unindexed(producer, consumer)
752 }
753}
754
755// /////////////////////////////////////////////////////////////////////////
756
757/// Parallel iterator over substrings separated by a terminator pattern
758#[derive(Debug, Clone)]
759pub struct SplitTerminator<'ch, P: Pattern> {
760 chars: &'ch str,
761 terminator: P,
762}
763
764struct SplitTerminatorProducer<'ch, 'sep, P: Pattern> {
765 splitter: SplitProducer<'sep, P, &'ch str>,
766 skip_last: bool,
767}
768
769impl<'ch, P: Pattern> SplitTerminator<'ch, P> {
770 fn new(chars: &'ch str, terminator: P) -> Self {
771 SplitTerminator { chars, terminator }
772 }
773}
774
775impl<'ch, 'sep, P: Pattern + 'sep> SplitTerminatorProducer<'ch, 'sep, P> {
776 fn new(chars: &'ch str, terminator: &'sep P) -> Self {
777 SplitTerminatorProducer {
778 splitter: SplitProducer::new(data:chars, separator:terminator),
779 skip_last: chars.is_empty() || terminator.is_suffix_of(haystack:chars),
780 }
781 }
782}
783
784impl<'ch, P: Pattern> ParallelIterator for SplitTerminator<'ch, P> {
785 type Item = &'ch str;
786
787 fn drive_unindexed<C>(self, consumer: C) -> C::Result
788 where
789 C: UnindexedConsumer<Self::Item>,
790 {
791 let producer: SplitTerminatorProducer<'_, '_, …> = SplitTerminatorProducer::new(self.chars, &self.terminator);
792 bridge_unindexed(producer, consumer)
793 }
794}
795
796impl<'ch, 'sep, P: Pattern + 'sep> UnindexedProducer for SplitTerminatorProducer<'ch, 'sep, P> {
797 type Item = &'ch str;
798
799 fn split(mut self) -> (Self, Option<Self>) {
800 let (left: SplitProducer<'_, P, &str>, right: Option>) = self.splitter.split();
801 self.splitter = left;
802 let right: Option> = right.map(|right: SplitProducer<'_, P, &str>| {
803 let skip_last: bool = self.skip_last;
804 self.skip_last = false;
805 SplitTerminatorProducer {
806 splitter: right,
807 skip_last,
808 }
809 });
810 (self, right)
811 }
812
813 fn fold_with<F>(self, folder: F) -> F
814 where
815 F: Folder<Self::Item>,
816 {
817 self.splitter.fold_with(folder, self.skip_last)
818 }
819}
820
821// /////////////////////////////////////////////////////////////////////////
822
823/// Parallel iterator over lines in a string
824#[derive(Debug, Clone)]
825pub struct Lines<'ch>(&'ch str);
826
827#[inline]
828fn no_carriage_return(line: &str) -> &str {
829 line.strip_suffix('\r').unwrap_or(default:line)
830}
831
832impl<'ch> ParallelIterator for Lines<'ch> {
833 type Item = &'ch str;
834
835 fn drive_unindexed<C>(self, consumer: C) -> C::Result
836 where
837 C: UnindexedConsumer<Self::Item>,
838 {
839 self.0
840 .par_split_terminator('\n')
841 .map(map_op:no_carriage_return)
842 .drive_unindexed(consumer)
843 }
844}
845
846// /////////////////////////////////////////////////////////////////////////
847
848/// Parallel iterator over substrings separated by whitespace
849#[derive(Debug, Clone)]
850pub struct SplitWhitespace<'ch>(&'ch str);
851
852#[inline]
853fn not_empty(s: &&str) -> bool {
854 !s.is_empty()
855}
856
857impl<'ch> ParallelIterator for SplitWhitespace<'ch> {
858 type Item = &'ch str;
859
860 fn drive_unindexed<C>(self, consumer: C) -> C::Result
861 where
862 C: UnindexedConsumer<Self::Item>,
863 {
864 self.0
865 .par_split(char::is_whitespace)
866 .filter(filter_op:not_empty)
867 .drive_unindexed(consumer)
868 }
869}
870
871// /////////////////////////////////////////////////////////////////////////
872
873/// Parallel iterator over substrings separated by ASCII whitespace
874#[derive(Debug, Clone)]
875pub struct SplitAsciiWhitespace<'ch>(&'ch str);
876
877#[inline]
878fn is_ascii_whitespace(c: char) -> bool {
879 c.is_ascii_whitespace()
880}
881
882impl<'ch> ParallelIterator for SplitAsciiWhitespace<'ch> {
883 type Item = &'ch str;
884
885 fn drive_unindexed<C>(self, consumer: C) -> C::Result
886 where
887 C: UnindexedConsumer<Self::Item>,
888 {
889 self.0
890 .par_split(is_ascii_whitespace)
891 .filter(filter_op:not_empty)
892 .drive_unindexed(consumer)
893 }
894}
895
896// /////////////////////////////////////////////////////////////////////////
897
898/// Parallel iterator over substrings that match a pattern
899#[derive(Debug, Clone)]
900pub struct Matches<'ch, P: Pattern> {
901 chars: &'ch str,
902 pattern: P,
903}
904
905struct MatchesProducer<'ch, 'pat, P: Pattern> {
906 chars: &'ch str,
907 pattern: &'pat P,
908}
909
910impl<'ch, P: Pattern> ParallelIterator for Matches<'ch, P> {
911 type Item = &'ch str;
912
913 fn drive_unindexed<C>(self, consumer: C) -> C::Result
914 where
915 C: UnindexedConsumer<Self::Item>,
916 {
917 let producer: MatchesProducer<'_, '_, P> = MatchesProducer {
918 chars: self.chars,
919 pattern: &self.pattern,
920 };
921 bridge_unindexed(producer, consumer)
922 }
923}
924
925impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchesProducer<'ch, 'pat, P> {
926 type Item = &'ch str;
927
928 fn split(self) -> (Self, Option<Self>) {
929 match split(self.chars) {
930 Some((left, right)) => (
931 MatchesProducer {
932 chars: left,
933 ..self
934 },
935 Some(MatchesProducer {
936 chars: right,
937 ..self
938 }),
939 ),
940 None => (self, None),
941 }
942 }
943
944 fn fold_with<F>(self, folder: F) -> F
945 where
946 F: Folder<Self::Item>,
947 {
948 self.pattern.fold_matches(self.chars, folder)
949 }
950}
951
952// /////////////////////////////////////////////////////////////////////////
953
954/// Parallel iterator over substrings that match a pattern, with their positions
955#[derive(Debug, Clone)]
956pub struct MatchIndices<'ch, P: Pattern> {
957 chars: &'ch str,
958 pattern: P,
959}
960
961struct MatchIndicesProducer<'ch, 'pat, P: Pattern> {
962 index: usize,
963 chars: &'ch str,
964 pattern: &'pat P,
965}
966
967impl<'ch, P: Pattern> ParallelIterator for MatchIndices<'ch, P> {
968 type Item = (usize, &'ch str);
969
970 fn drive_unindexed<C>(self, consumer: C) -> C::Result
971 where
972 C: UnindexedConsumer<Self::Item>,
973 {
974 let producer: MatchIndicesProducer<'_, '_, …> = MatchIndicesProducer {
975 index: 0,
976 chars: self.chars,
977 pattern: &self.pattern,
978 };
979 bridge_unindexed(producer, consumer)
980 }
981}
982
983impl<'ch, 'pat, P: Pattern> UnindexedProducer for MatchIndicesProducer<'ch, 'pat, P> {
984 type Item = (usize, &'ch str);
985
986 fn split(self) -> (Self, Option<Self>) {
987 match split(self.chars) {
988 Some((left, right)) => (
989 MatchIndicesProducer {
990 chars: left,
991 ..self
992 },
993 Some(MatchIndicesProducer {
994 chars: right,
995 index: self.index + left.len(),
996 ..self
997 }),
998 ),
999 None => (self, None),
1000 }
1001 }
1002
1003 fn fold_with<F>(self, folder: F) -> F
1004 where
1005 F: Folder<Self::Item>,
1006 {
1007 self.pattern
1008 .fold_match_indices(self.chars, folder, self.index)
1009 }
1010}
1011