re_trait.rs source code [crates/regex-1.8.4/src/re_trait.rs]

1	use std::fmt;
2	use std::iter::FusedIterator;
3
4	/// Slot is a single saved capture location. Note that there are two slots for
5	/// every capture in a regular expression (one slot each for the start and end
6	/// of the capture).
7	pub type Slot = Option<usize>;
8
9	/// Locations represents the offsets of each capturing group in a regex for
10	/// a single match.
11	///
12	/// Unlike `Captures`, a `Locations` value only stores offsets.
13	#[doc(hidden)]
14	#[derive(Clone, Debug)]
15	pub struct Locations(Vec<Slot>);
16
17	impl Locations {
18	/// Returns the start and end positions of the Nth capture group. Returns
19	/// `None` if `i` is not a valid capture group or if the capture group did
20	/// not match anything. The positions returned are always* byte indices*
21	/// with respect to the original string matched.
22	pub fn pos(&self, i: usize) -> Option<(usize, usize)> {
23	let (s, e) = (i.checked_mul(`2`)?, i.checked_mul(`2`)?.checked_add(`1`)?);
24	match (self.0.get(s), self.0.get(e)) {
25	(Some(&Some(s)), Some(&Some(e))) => Some((s, e)),
26	_ => None,
27	}
28	}
29
30	/// Creates an iterator of all the capture group positions in order of
31	/// appearance in the regular expression. Positions are byte indices
32	/// in terms of the original string matched.
33	pub fn iter(&self) -> SubCapturesPosIter<'_> {
34	SubCapturesPosIter { idx: `0`, locs: self }
35	}
36
37	/// Returns the total number of capturing groups.
38	///
39	/// This is always at least `1` since every regex has at least `1`
40	/// capturing group that corresponds to the entire match.
41	pub fn len(&self) -> usize {
42	self.0.len() / `2`
43	}
44
45	/// Return the individual slots as a slice.
46	pub(crate) fn as_slots(&mut self) -> &mut [Slot] {
47	&mut self.0
48	}
49	}
50
51	/// An iterator over capture group positions for a particular match of a
52	/// regular expression.
53	///
54	/// Positions are byte indices in terms of the original string matched.
55	///
56	/// `'c` is the lifetime of the captures.
57	#[derive(Clone, Debug)]
58	pub struct SubCapturesPosIter<'c> {
59	idx: usize,
60	locs: &'c Locations,
61	}
62
63	impl<'c> Iterator for SubCapturesPosIter<'c> {
64	type Item = Option<(usize, usize)>;
65
66	fn next(&mut self) -> Option<Option<(usize, usize)>> {
67	if self.idx >= self.locs.len() {
68	return None;
69	}
70	let x: Option> = match self.locs.pos(self.idx) {
71	None => Some(None),
72	Some((s: usize, e: usize)) => Some(Some((s, e))),
73	};
74	self.idx += `1`;
75	x
76	}
77
78	fn size_hint(&self) -> (usize, Option<usize>) {
79	let len: usize = self.locs.len() - self.idx;
80	(len, Some(len))
81	}
82
83	fn count(self) -> usize {
84	self.len()
85	}
86	}
87
88	impl<'c> ExactSizeIterator for SubCapturesPosIter<'c> {}
89
90	impl<'c> FusedIterator for SubCapturesPosIter<'c> {}
91
92	/// `RegularExpression` describes types that can implement regex searching.
93	///
94	/// This trait is my attempt at reducing code duplication and to standardize
95	/// the internal API. Specific duplication that is avoided are the `find`
96	/// and `capture` iterators, which are slightly tricky.
97	///
98	/// It's not clear whether this trait is worth it, and it also isn't
99	/// clear whether it's useful as a public trait or not. Methods like
100	/// `next_after_empty` reak of bad design, but the rest of the methods seem
101	/// somewhat reasonable. One particular thing this trait would expose would be
102	/// the ability to start the search of a regex anywhere in a haystack, which
103	/// isn't possible in the current public API.
104	pub trait RegularExpression: Sized + fmt::Debug {
105	/// The type of the haystack.
106	type Text: ?Sized + fmt::Debug;
107
108	/// The number of capture slots in the compiled regular expression. This is
109	/// always two times the number of capture groups (two slots per group).
110	fn slots_len(&self) -> usize;
111
112	/// Allocates fresh space for all capturing groups in this regex.
113	fn locations(&self) -> Locations {
114	Locations(vec![None; self.slots_len()])
115	}
116
117	/// Returns the position of the next character after `i`.
118	///
119	/// For example, a haystack with type `&[u8]` probably returns `i+1`,
120	/// whereas a haystack with type `&str` probably returns `i` plus the
121	/// length of the next UTF-8 sequence.
122	fn next_after_empty(&self, text: &Self::Text, i: usize) -> usize;
123
124	/// Returns the location of the shortest match.
125	fn shortest_match_at(
126	&self,
127	text: &Self::Text,
128	start: usize,
129	) -> Option<usize>;
130
131	/// Returns whether the regex matches the text given.
132	fn is_match_at(&self, text: &Self::Text, start: usize) -> bool;
133
134	/// Returns the leftmost-first match location if one exists.
135	fn find_at(
136	&self,
137	text: &Self::Text,
138	start: usize,
139	) -> Option<(usize, usize)>;
140
141	/// Returns the leftmost-first match location if one exists, and also
142	/// fills in any matching capture slot locations.
143	fn captures_read_at(
144	&self,
145	locs: &mut Locations,
146	text: &Self::Text,
147	start: usize,
148	) -> Option<(usize, usize)>;
149
150	/// Returns an iterator over all non-overlapping successive leftmost-first
151	/// matches.
152	fn find_iter(self, text: &Self::Text) -> Matches<'_, Self> {
153	Matches { re: self, text, last_end: `0`, last_match: None }
154	}
155
156	/// Returns an iterator over all non-overlapping successive leftmost-first
157	/// matches with captures.
158	fn captures_iter(self, text: &Self::Text) -> CaptureMatches<'_, Self> {
159	CaptureMatches(self.find_iter(text))
160	}
161	}
162
163	/// An iterator over all non-overlapping successive leftmost-first matches.
164	#[derive(Debug)]
165	pub struct Matches<'t, R>
166	where
167	R: RegularExpression,
168	R::Text: 't,
169	{
170	re: R,
171	text: &'t R::Text,
172	last_end: usize,
173	last_match: Option<usize>,
174	}
175
176	impl<'t, R> Matches<'t, R>
177	where
178	R: RegularExpression,
179	R::Text: 't,
180	{
181	/// Return the text being searched.
182	pub fn text(&self) -> &'t R::Text {
183	self.text
184	}
185
186	/// Return the underlying regex.
187	pub fn regex(&self) -> &R {
188	&self.re
189	}
190	}
191
192	impl<'t, R> Iterator for Matches<'t, R>
193	where
194	R: RegularExpression,
195	R::Text: 't + AsRef<[u8]>,
196	{
197	type Item = (usize, usize);
198
199	fn next(&mut self) -> Option<(usize, usize)> {
200	if self.last_end > self.text.as_ref().len() {
201	return None;
202	}
203	let (s, e) = match self.re.find_at(self.text, self.last_end) {
204	None => return None,
205	Some((s, e)) => (s, e),
206	};
207	if s == e {
208	// This is an empty match. To ensure we make progress, start
209	// the next search at the smallest possible starting position
210	// of the next match following this one.
211	self.last_end = self.re.next_after_empty(self.text, e);
212	// Don't accept empty matches immediately following a match.
213	// Just move on to the next match.
214	if Some(e) == self.last_match {
215	return self.next();
216	}
217	} else {
218	self.last_end = e;
219	}
220	self.last_match = Some(e);
221	Some((s, e))
222	}
223	}
224
225	impl<'t, R> FusedIterator for Matches<'t, R>
226	where
227	R: RegularExpression,
228	R::Text: 't + AsRef<[u8]>,
229	{
230	}
231
232	/// An iterator over all non-overlapping successive leftmost-first matches with
233	/// captures.
234	#[derive(Debug)]
235	pub struct CaptureMatches<'t, R>(Matches<'t, R>)
236	where
237	R: RegularExpression,
238	R::Text: 't;
239
240	impl<'t, R> CaptureMatches<'t, R>
241	where
242	R: RegularExpression,
243	R::Text: 't,
244	{
245	/// Return the text being searched.
246	pub fn text(&self) -> &'t R::Text {
247	self.0.text()
248	}
249
250	/// Return the underlying regex.
251	pub fn regex(&self) -> &R {
252	self.0.regex()
253	}
254	}
255
256	impl<'t, R> Iterator for CaptureMatches<'t, R>
257	where
258	R: RegularExpression,
259	R::Text: 't + AsRef<[u8]>,
260	{
261	type Item = Locations;
262
263	fn next(&mut self) -> Option<Locations> {
264	if self.0.last_end > self.0.text.as_ref().len() {
265	return None;
266	}
267	let mut locs = self.0.re.locations();
268	let (s, e) = match self.0.re.captures_read_at(
269	&mut locs,
270	self.0.text,
271	self.0.last_end,
272	) {
273	None => return None,
274	Some((s, e)) => (s, e),
275	};
276	if s == e {
277	self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
278	if Some(e) == self.0.last_match {
279	return self.next();
280	}
281	} else {
282	self.0.last_end = e;
283	}
284	self.0.last_match = Some(e);
285	Some(locs)
286	}
287	}
288
289	impl<'t, R> FusedIterator for CaptureMatches<'t, R>
290	where
291	R: RegularExpression,
292	R::Text: 't + AsRef<[u8]>,
293	{
294	}
295