| 1 | // The code in this file was adapted from the CharIndices implementation of | 
| 2 | // the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026 | 
|---|
| 3 | // (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs). | 
|---|
| 4 | // | 
|---|
| 5 | // Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT , | 
|---|
| 6 | // which refers to | 
|---|
| 7 | // https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE | 
|---|
| 8 | // and | 
|---|
| 9 | // https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT | 
|---|
| 10 | // : | 
|---|
| 11 | // | 
|---|
| 12 | // For full authorship information, see the version control history or | 
|---|
| 13 | // https://thanks.rust-lang.org | 
|---|
| 14 | // | 
|---|
| 15 | // Except as otherwise noted (below and/or in individual files), Rust is | 
|---|
| 16 | // licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or | 
|---|
| 17 | // <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|---|
| 18 | // <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option. | 
|---|
| 19 |  | 
|---|
| 20 | use super::Utf8Chars; | 
|---|
| 21 | use core::iter::FusedIterator; | 
|---|
| 22 |  | 
|---|
| 23 | /// An iterator over the [`char`]s  and their positions. | 
|---|
| 24 | #[ derive(Clone, Debug)] | 
|---|
| 25 | #[ must_use= "iterators are lazy and do nothing unless consumed"] | 
|---|
| 26 | pub struct Utf8CharIndices<'a> { | 
|---|
| 27 | front_offset: usize, | 
|---|
| 28 | iter: Utf8Chars<'a>, | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | impl<'a> Iterator for Utf8CharIndices<'a> { | 
|---|
| 32 | type Item = (usize, char); | 
|---|
| 33 |  | 
|---|
| 34 | #[ inline] | 
|---|
| 35 | fn next(&mut self) -> Option<(usize, char)> { | 
|---|
| 36 | let pre_len = self.as_slice().len(); | 
|---|
| 37 | match self.iter.next() { | 
|---|
| 38 | None => None, | 
|---|
| 39 | Some(ch) => { | 
|---|
| 40 | let index = self.front_offset; | 
|---|
| 41 | let len = self.as_slice().len(); | 
|---|
| 42 | self.front_offset += pre_len - len; | 
|---|
| 43 | Some((index, ch)) | 
|---|
| 44 | } | 
|---|
| 45 | } | 
|---|
| 46 | } | 
|---|
| 47 |  | 
|---|
| 48 | #[ inline] | 
|---|
| 49 | fn count(self) -> usize { | 
|---|
| 50 | self.iter.count() | 
|---|
| 51 | } | 
|---|
| 52 |  | 
|---|
| 53 | #[ inline] | 
|---|
| 54 | fn size_hint(&self) -> (usize, Option<usize>) { | 
|---|
| 55 | self.iter.size_hint() | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | #[ inline] | 
|---|
| 59 | fn last(mut self) -> Option<(usize, char)> { | 
|---|
| 60 | // No need to go through the entire string. | 
|---|
| 61 | self.next_back() | 
|---|
| 62 | } | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> { | 
|---|
| 66 | #[ inline] | 
|---|
| 67 | fn next_back(&mut self) -> Option<(usize, char)> { | 
|---|
| 68 | self.iter.next_back().map(|ch: char| { | 
|---|
| 69 | let index: usize = self.front_offset + self.as_slice().len(); | 
|---|
| 70 | (index, ch) | 
|---|
| 71 | }) | 
|---|
| 72 | } | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | impl FusedIterator for Utf8CharIndices<'_> {} | 
|---|
| 76 |  | 
|---|
| 77 | impl<'a> Utf8CharIndices<'a> { | 
|---|
| 78 | #[ inline(always)] | 
|---|
| 79 | /// Creates the iterator from a byte slice. | 
|---|
| 80 | pub fn new(bytes: &'a [u8]) -> Self { | 
|---|
| 81 | Utf8CharIndices::<'a> { | 
|---|
| 82 | front_offset: 0, | 
|---|
| 83 | iter: Utf8Chars::new(bytes), | 
|---|
| 84 | } | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 | /// Views the underlying data as a subslice of the original data. | 
|---|
| 88 | /// | 
|---|
| 89 | /// This has the same lifetime as the original slice, and so the | 
|---|
| 90 | /// iterator can continue to be used while this exists. | 
|---|
| 91 | #[ must_use] | 
|---|
| 92 | #[ inline] | 
|---|
| 93 | pub fn as_slice(&self) -> &'a [u8] { | 
|---|
| 94 | self.iter.as_slice() | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | /// Returns the byte position of the next character, or the length | 
|---|
| 98 | /// of the underlying string if there are no more characters. | 
|---|
| 99 | /// | 
|---|
| 100 | /// # Examples | 
|---|
| 101 | /// | 
|---|
| 102 | /// ``` | 
|---|
| 103 | /// use utf8_iter::Utf8CharsEx; | 
|---|
| 104 | /// let mut chars = "a楽".as_bytes().char_indices(); | 
|---|
| 105 | /// | 
|---|
| 106 | /// assert_eq!(chars.offset(), 0); | 
|---|
| 107 | /// assert_eq!(chars.next(), Some((0, 'a'))); | 
|---|
| 108 | /// | 
|---|
| 109 | /// assert_eq!(chars.offset(), 1); | 
|---|
| 110 | /// assert_eq!(chars.next(), Some((1, '楽'))); | 
|---|
| 111 | /// | 
|---|
| 112 | /// assert_eq!(chars.offset(), 4); | 
|---|
| 113 | /// assert_eq!(chars.next(), None); | 
|---|
| 114 | /// ``` | 
|---|
| 115 | #[ inline] | 
|---|
| 116 | #[ must_use] | 
|---|
| 117 | pub fn offset(&self) -> usize { | 
|---|
| 118 | self.front_offset | 
|---|
| 119 | } | 
|---|
| 120 | } | 
|---|
| 121 |  | 
|---|