indices.rs source code [crates/utf8_iter/src/indices.rs]

1	// The code in this file was adapted from the CharIndices implementation of
2	// the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026
3	// (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs).
4	//
5	// Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT ,
6	// which refers to
7	// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE
8	// and
9	// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT
10	// :
11	//
12	// For full authorship information, see the version control history or
13	// https://thanks.rust-lang.org
14	//
15	// Except as otherwise noted (below and/or in individual files), Rust is
16	// licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or
17	// <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
18	// <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option.
19
20	use super::Utf8Chars;
21	use core::iter::FusedIterator;
22
23	/// An iterator over the [`char`]s and their positions.
24	#[derive(Clone, Debug)]
25	#[must_use = "iterators are lazy and do nothing unless consumed"]
26	pub struct Utf8CharIndices<'a> {
27	front_offset: usize,
28	iter: Utf8Chars<'a>,
29	}
30
31	impl<'a> Iterator for Utf8CharIndices<'a> {
32	type Item = (usize, char);
33
34	#[inline]
35	fn next(&mut self) -> Option<(usize, char)> {
36	let pre_len = self.as_slice().len();
37	match self.iter.next() {
38	None => None,
39	Some(ch) => {
40	let index = self.front_offset;
41	let len = self.as_slice().len();
42	self.front_offset += pre_len - len;
43	Some((index, ch))
44	}
45	}
46	}
47
48	#[inline]
49	fn count(self) -> usize {
50	self.iter.count()
51	}
52
53	#[inline]
54	fn size_hint(&self) -> (usize, Option<usize>) {
55	self.iter.size_hint()
56	}
57
58	#[inline]
59	fn last(mut self) -> Option<(usize, char)> {
60	// No need to go through the entire string.
61	self.next_back()
62	}
63	}
64
65	impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> {
66	#[inline]
67	fn next_back(&mut self) -> Option<(usize, char)> {
68	self.iter.next_back().map(\|ch: char\| {
69	let index: usize = self.front_offset + self.as_slice().len();
70	(index, ch)
71	})
72	}
73	}
74
75	impl FusedIterator for Utf8CharIndices<'_> {}
76
77	impl<'a> Utf8CharIndices<'a> {
78	#[inline(always)]
79	/// Creates the iterator from a byte slice.
80	pub fn new(bytes: &'a [u8]) -> Self {
81	Utf8CharIndices::<'a> {
82	front_offset: `0`,
83	iter: Utf8Chars::new(bytes),
84	}
85	}
86
87	/// Views the underlying data as a subslice of the original data.
88	///
89	/// This has the same lifetime as the original slice, and so the
90	/// iterator can continue to be used while this exists.
91	#[must_use]
92	#[inline]
93	pub fn as_slice(&self) -> &'a [u8] {
94	self.iter.as_slice()
95	}
96
97	/// Returns the byte position of the next character, or the length
98	/// of the underlying string if there are no more characters.
99	///
100	/// # Examples
101	///
102	/// ```
103	/// use utf8_iter::Utf8CharsEx;
104	/// let mut chars = "a楽".as_bytes().char_indices();
105	///
106	/// assert_eq!(chars.offset(), `0`);
107	/// assert_eq!(chars.next(), Some((`0`, 'a')));
108	///
109	/// assert_eq!(chars.offset(), `1`);
110	/// assert_eq!(chars.next(), Some((`1`, '楽')));
111	///
112	/// assert_eq!(chars.offset(), `4`);
113	/// assert_eq!(chars.next(), None);
114	/// ```
115	#[inline]
116	#[must_use]
117	pub fn offset(&self) -> usize {
118	self.front_offset
119	}
120	}
121