1 | // The code in this file was adapted from the CharIndices implementation of |
2 | // the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026 |
3 | // (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs). |
4 | // |
5 | // Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT , |
6 | // which refers to |
7 | // https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE |
8 | // and |
9 | // https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT |
10 | // : |
11 | // |
12 | // For full authorship information, see the version control history or |
13 | // https://thanks.rust-lang.org |
14 | // |
15 | // Except as otherwise noted (below and/or in individual files), Rust is |
16 | // licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or |
17 | // <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
18 | // <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option. |
19 | |
20 | use super::Utf8Chars; |
21 | use core::iter::FusedIterator; |
22 | |
23 | /// An iterator over the [`char`]s and their positions. |
24 | #[derive (Clone, Debug)] |
25 | #[must_use = "iterators are lazy and do nothing unless consumed" ] |
26 | pub struct Utf8CharIndices<'a> { |
27 | front_offset: usize, |
28 | iter: Utf8Chars<'a>, |
29 | } |
30 | |
31 | impl<'a> Iterator for Utf8CharIndices<'a> { |
32 | type Item = (usize, char); |
33 | |
34 | #[inline ] |
35 | fn next(&mut self) -> Option<(usize, char)> { |
36 | let pre_len = self.as_slice().len(); |
37 | match self.iter.next() { |
38 | None => None, |
39 | Some(ch) => { |
40 | let index = self.front_offset; |
41 | let len = self.as_slice().len(); |
42 | self.front_offset += pre_len - len; |
43 | Some((index, ch)) |
44 | } |
45 | } |
46 | } |
47 | |
48 | #[inline ] |
49 | fn count(self) -> usize { |
50 | self.iter.count() |
51 | } |
52 | |
53 | #[inline ] |
54 | fn size_hint(&self) -> (usize, Option<usize>) { |
55 | self.iter.size_hint() |
56 | } |
57 | |
58 | #[inline ] |
59 | fn last(mut self) -> Option<(usize, char)> { |
60 | // No need to go through the entire string. |
61 | self.next_back() |
62 | } |
63 | } |
64 | |
65 | impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> { |
66 | #[inline ] |
67 | fn next_back(&mut self) -> Option<(usize, char)> { |
68 | self.iter.next_back().map(|ch: char| { |
69 | let index: usize = self.front_offset + self.as_slice().len(); |
70 | (index, ch) |
71 | }) |
72 | } |
73 | } |
74 | |
75 | impl FusedIterator for Utf8CharIndices<'_> {} |
76 | |
77 | impl<'a> Utf8CharIndices<'a> { |
78 | #[inline (always)] |
79 | /// Creates the iterator from a byte slice. |
80 | pub fn new(bytes: &'a [u8]) -> Self { |
81 | Utf8CharIndices::<'a> { |
82 | front_offset: 0, |
83 | iter: Utf8Chars::new(bytes), |
84 | } |
85 | } |
86 | |
87 | /// Views the underlying data as a subslice of the original data. |
88 | /// |
89 | /// This has the same lifetime as the original slice, and so the |
90 | /// iterator can continue to be used while this exists. |
91 | #[must_use ] |
92 | #[inline ] |
93 | pub fn as_slice(&self) -> &'a [u8] { |
94 | self.iter.as_slice() |
95 | } |
96 | |
97 | /// Returns the byte position of the next character, or the length |
98 | /// of the underlying string if there are no more characters. |
99 | /// |
100 | /// # Examples |
101 | /// |
102 | /// ``` |
103 | /// use utf8_iter::Utf8CharsEx; |
104 | /// let mut chars = "a楽" .as_bytes().char_indices(); |
105 | /// |
106 | /// assert_eq!(chars.offset(), 0); |
107 | /// assert_eq!(chars.next(), Some((0, 'a' ))); |
108 | /// |
109 | /// assert_eq!(chars.offset(), 1); |
110 | /// assert_eq!(chars.next(), Some((1, '楽' ))); |
111 | /// |
112 | /// assert_eq!(chars.offset(), 4); |
113 | /// assert_eq!(chars.next(), None); |
114 | /// ``` |
115 | #[inline ] |
116 | #[must_use ] |
117 | pub fn offset(&self) -> usize { |
118 | self.front_offset |
119 | } |
120 | } |
121 | |