1 | use std::borrow::Cow; |
2 | use std::collections::HashMap; |
3 | use std::fmt; |
4 | use std::iter::FusedIterator; |
5 | use std::ops::{Index, Range}; |
6 | use std::str::FromStr; |
7 | use std::sync::Arc; |
8 | |
9 | use crate::find_byte::find_byte; |
10 | |
11 | use crate::error::Error; |
12 | use crate::exec::{Exec, ExecNoSyncStr}; |
13 | use crate::expand::expand_str; |
14 | use crate::re_builder::unicode::RegexBuilder; |
15 | use crate::re_trait::{self, RegularExpression, SubCapturesPosIter}; |
16 | |
17 | /// Escapes all regular expression meta characters in `text`. |
18 | /// |
19 | /// The string returned may be safely used as a literal in a regular |
20 | /// expression. |
21 | pub fn escape(text: &str) -> String { |
22 | regex_syntax::escape(text) |
23 | } |
24 | |
25 | /// Match represents a single match of a regex in a haystack. |
26 | /// |
27 | /// The lifetime parameter `'t` refers to the lifetime of the matched text. |
28 | #[derive (Copy, Clone, Eq, PartialEq)] |
29 | pub struct Match<'t> { |
30 | text: &'t str, |
31 | start: usize, |
32 | end: usize, |
33 | } |
34 | |
35 | impl<'t> Match<'t> { |
36 | /// Returns the starting byte offset of the match in the haystack. |
37 | #[inline ] |
38 | pub fn start(&self) -> usize { |
39 | self.start |
40 | } |
41 | |
42 | /// Returns the ending byte offset of the match in the haystack. |
43 | #[inline ] |
44 | pub fn end(&self) -> usize { |
45 | self.end |
46 | } |
47 | |
48 | /// Returns true if and only if this match has a length of zero. |
49 | #[inline ] |
50 | pub fn is_empty(&self) -> bool { |
51 | self.start == self.end |
52 | } |
53 | |
54 | /// Returns the length, in bytes, of this match. |
55 | #[inline ] |
56 | pub fn len(&self) -> usize { |
57 | self.end - self.start |
58 | } |
59 | |
60 | /// Returns the range over the starting and ending byte offsets of the |
61 | /// match in the haystack. |
62 | #[inline ] |
63 | pub fn range(&self) -> Range<usize> { |
64 | self.start..self.end |
65 | } |
66 | |
67 | /// Returns the matched text. |
68 | #[inline ] |
69 | pub fn as_str(&self) -> &'t str { |
70 | &self.text[self.range()] |
71 | } |
72 | |
73 | /// Creates a new match from the given haystack and byte offsets. |
74 | #[inline ] |
75 | fn new(haystack: &'t str, start: usize, end: usize) -> Match<'t> { |
76 | Match { text: haystack, start, end } |
77 | } |
78 | } |
79 | |
80 | impl<'t> std::fmt::Debug for Match<'t> { |
81 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
82 | f&mut DebugStruct<'_, '_>.debug_struct("Match" ) |
83 | .field("start" , &self.start) |
84 | .field("end" , &self.end) |
85 | .field(name:"string" , &self.as_str()) |
86 | .finish() |
87 | } |
88 | } |
89 | |
90 | impl<'t> From<Match<'t>> for &'t str { |
91 | fn from(m: Match<'t>) -> &'t str { |
92 | m.as_str() |
93 | } |
94 | } |
95 | |
96 | impl<'t> From<Match<'t>> for Range<usize> { |
97 | fn from(m: Match<'t>) -> Range<usize> { |
98 | m.range() |
99 | } |
100 | } |
101 | |
102 | /// A compiled regular expression for matching Unicode strings. |
103 | /// |
104 | /// It is represented as either a sequence of bytecode instructions (dynamic) |
105 | /// or as a specialized Rust function (native). It can be used to search, split |
106 | /// or replace text. All searching is done with an implicit `.*?` at the |
107 | /// beginning and end of an expression. To force an expression to match the |
108 | /// whole string (or a prefix or a suffix), you must use an anchor like `^` or |
109 | /// `$` (or `\A` and `\z`). |
110 | /// |
111 | /// While this crate will handle Unicode strings (whether in the regular |
112 | /// expression or in the search text), all positions returned are **byte |
113 | /// indices**. Every byte index is guaranteed to be at a Unicode code point |
114 | /// boundary. |
115 | /// |
116 | /// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a |
117 | /// compiled regular expression and text to search, respectively. |
118 | /// |
119 | /// The only methods that allocate new strings are the string replacement |
120 | /// methods. All other methods (searching and splitting) return borrowed |
121 | /// pointers into the string given. |
122 | /// |
123 | /// # Examples |
124 | /// |
125 | /// Find the location of a US phone number: |
126 | /// |
127 | /// ```rust |
128 | /// # use regex::Regex; |
129 | /// let re = Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}" ).unwrap(); |
130 | /// let mat = re.find("phone: 111-222-3333" ).unwrap(); |
131 | /// assert_eq!((mat.start(), mat.end()), (7, 19)); |
132 | /// ``` |
133 | /// |
134 | /// # Using the `std::str::pattern` methods with `Regex` |
135 | /// |
136 | /// > **Note**: This section requires that this crate is compiled with the |
137 | /// > `pattern` Cargo feature enabled, which **requires nightly Rust**. |
138 | /// |
139 | /// Since `Regex` implements `Pattern`, you can use regexes with methods |
140 | /// defined on `&str`. For example, `is_match`, `find`, `find_iter` |
141 | /// and `split` can be replaced with `str::contains`, `str::find`, |
142 | /// `str::match_indices` and `str::split`. |
143 | /// |
144 | /// Here are some examples: |
145 | /// |
146 | /// ```rust,ignore |
147 | /// # use regex::Regex; |
148 | /// let re = Regex::new(r"\d+" ).unwrap(); |
149 | /// let haystack = "a111b222c" ; |
150 | /// |
151 | /// assert!(haystack.contains(&re)); |
152 | /// assert_eq!(haystack.find(&re), Some(1)); |
153 | /// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(), |
154 | /// vec![(1, "111" ), (5, "222" )]); |
155 | /// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a" , "b" , "c" ]); |
156 | /// ``` |
157 | #[derive (Clone)] |
158 | pub struct Regex(Exec); |
159 | |
160 | impl fmt::Display for Regex { |
161 | /// Shows the original regular expression. |
162 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
163 | write!(f, " {}" , self.as_str()) |
164 | } |
165 | } |
166 | |
167 | impl fmt::Debug for Regex { |
168 | /// Shows the original regular expression. |
169 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
170 | fmt::Display::fmt(self, f) |
171 | } |
172 | } |
173 | |
174 | #[doc (hidden)] |
175 | impl From<Exec> for Regex { |
176 | fn from(exec: Exec) -> Regex { |
177 | Regex(exec) |
178 | } |
179 | } |
180 | |
181 | impl FromStr for Regex { |
182 | type Err = Error; |
183 | |
184 | /// Attempts to parse a string into a regular expression |
185 | fn from_str(s: &str) -> Result<Regex, Error> { |
186 | Regex::new(re:s) |
187 | } |
188 | } |
189 | |
190 | /// Core regular expression methods. |
191 | impl Regex { |
192 | /// Compiles a regular expression. Once compiled, it can be used repeatedly |
193 | /// to search, split or replace text in a string. |
194 | /// |
195 | /// If an invalid expression is given, then an error is returned. |
196 | pub fn new(re: &str) -> Result<Regex, Error> { |
197 | RegexBuilder::new(re).build() |
198 | } |
199 | |
200 | /// Returns true if and only if there is a match for the regex in the |
201 | /// string given. |
202 | /// |
203 | /// It is recommended to use this method if all you need to do is test |
204 | /// a match, since the underlying matching engine may be able to do less |
205 | /// work. |
206 | /// |
207 | /// # Example |
208 | /// |
209 | /// Test if some text contains at least one word with exactly 13 |
210 | /// Unicode word characters: |
211 | /// |
212 | /// ```rust |
213 | /// # use regex::Regex; |
214 | /// # fn main() { |
215 | /// let text = "I categorically deny having triskaidekaphobia." ; |
216 | /// assert!(Regex::new(r"\b\w{13}\b" ).unwrap().is_match(text)); |
217 | /// # } |
218 | /// ``` |
219 | pub fn is_match(&self, text: &str) -> bool { |
220 | self.is_match_at(text, 0) |
221 | } |
222 | |
223 | /// Returns the start and end byte range of the leftmost-first match in |
224 | /// `text`. If no match exists, then `None` is returned. |
225 | /// |
226 | /// Note that this should only be used if you want to discover the position |
227 | /// of the match. Testing the existence of a match is faster if you use |
228 | /// `is_match`. |
229 | /// |
230 | /// # Example |
231 | /// |
232 | /// Find the start and end location of the first word with exactly 13 |
233 | /// Unicode word characters: |
234 | /// |
235 | /// ```rust |
236 | /// # use regex::Regex; |
237 | /// # fn main() { |
238 | /// let text = "I categorically deny having triskaidekaphobia." ; |
239 | /// let mat = Regex::new(r"\b\w{13}\b" ).unwrap().find(text).unwrap(); |
240 | /// assert_eq!(mat.start(), 2); |
241 | /// assert_eq!(mat.end(), 15); |
242 | /// # } |
243 | /// ``` |
244 | pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> { |
245 | self.find_at(text, 0) |
246 | } |
247 | |
248 | /// Returns an iterator for each successive non-overlapping match in |
249 | /// `text`, returning the start and end byte indices with respect to |
250 | /// `text`. |
251 | /// |
252 | /// # Example |
253 | /// |
254 | /// Find the start and end location of every word with exactly 13 Unicode |
255 | /// word characters: |
256 | /// |
257 | /// ```rust |
258 | /// # use regex::Regex; |
259 | /// # fn main() { |
260 | /// let text = "Retroactively relinquishing remunerations is reprehensible." ; |
261 | /// for mat in Regex::new(r"\b\w{13}\b" ).unwrap().find_iter(text) { |
262 | /// println!("{:?}" , mat); |
263 | /// } |
264 | /// # } |
265 | /// ``` |
266 | pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> { |
267 | Matches(self.0.searcher_str().find_iter(text)) |
268 | } |
269 | |
270 | /// Returns the capture groups corresponding to the leftmost-first |
271 | /// match in `text`. Capture group `0` always corresponds to the entire |
272 | /// match. If no match is found, then `None` is returned. |
273 | /// |
274 | /// You should only use `captures` if you need access to the location of |
275 | /// capturing group matches. Otherwise, `find` is faster for discovering |
276 | /// the location of the overall match. |
277 | /// |
278 | /// # Examples |
279 | /// |
280 | /// Say you have some text with movie names and their release years, |
281 | /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text |
282 | /// looking like that, while also extracting the movie name and its release |
283 | /// year separately. |
284 | /// |
285 | /// ```rust |
286 | /// # use regex::Regex; |
287 | /// # fn main() { |
288 | /// let re = Regex::new(r"'([^']+)'\s+\((\d{4})\)" ).unwrap(); |
289 | /// let text = "Not my favorite movie: 'Citizen Kane' (1941)." ; |
290 | /// let caps = re.captures(text).unwrap(); |
291 | /// assert_eq!(caps.get(1).unwrap().as_str(), "Citizen Kane" ); |
292 | /// assert_eq!(caps.get(2).unwrap().as_str(), "1941" ); |
293 | /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)" ); |
294 | /// // You can also access the groups by index using the Index notation. |
295 | /// // Note that this will panic on an invalid index. |
296 | /// assert_eq!(&caps[1], "Citizen Kane" ); |
297 | /// assert_eq!(&caps[2], "1941" ); |
298 | /// assert_eq!(&caps[0], "'Citizen Kane' (1941)" ); |
299 | /// # } |
300 | /// ``` |
301 | /// |
302 | /// Note that the full match is at capture group `0`. Each subsequent |
303 | /// capture group is indexed by the order of its opening `(`. |
304 | /// |
305 | /// We can make this example a bit clearer by using *named* capture groups: |
306 | /// |
307 | /// ```rust |
308 | /// # use regex::Regex; |
309 | /// # fn main() { |
310 | /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" ) |
311 | /// .unwrap(); |
312 | /// let text = "Not my favorite movie: 'Citizen Kane' (1941)." ; |
313 | /// let caps = re.captures(text).unwrap(); |
314 | /// assert_eq!(caps.name("title" ).unwrap().as_str(), "Citizen Kane" ); |
315 | /// assert_eq!(caps.name("year" ).unwrap().as_str(), "1941" ); |
316 | /// assert_eq!(caps.get(0).unwrap().as_str(), "'Citizen Kane' (1941)" ); |
317 | /// // You can also access the groups by name using the Index notation. |
318 | /// // Note that this will panic on an invalid group name. |
319 | /// assert_eq!(&caps["title" ], "Citizen Kane" ); |
320 | /// assert_eq!(&caps["year" ], "1941" ); |
321 | /// assert_eq!(&caps[0], "'Citizen Kane' (1941)" ); |
322 | /// |
323 | /// # } |
324 | /// ``` |
325 | /// |
326 | /// Here we name the capture groups, which we can access with the `name` |
327 | /// method or the `Index` notation with a `&str`. Note that the named |
328 | /// capture groups are still accessible with `get` or the `Index` notation |
329 | /// with a `usize`. |
330 | /// |
331 | /// The `0`th capture group is always unnamed, so it must always be |
332 | /// accessed with `get(0)` or `[0]`. |
333 | pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> { |
334 | self.captures_at(text, 0) |
335 | } |
336 | |
337 | /// Returns an iterator over all the non-overlapping capture groups matched |
338 | /// in `text`. This is operationally the same as `find_iter`, except it |
339 | /// yields information about capturing group matches. |
340 | /// |
341 | /// # Example |
342 | /// |
343 | /// We can use this to find all movie titles and their release years in |
344 | /// some text, where the movie is formatted like "'Title' (xxxx)": |
345 | /// |
346 | /// ```rust |
347 | /// # use regex::Regex; |
348 | /// # fn main() { |
349 | /// let re = Regex::new(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)" ) |
350 | /// .unwrap(); |
351 | /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931)." ; |
352 | /// for caps in re.captures_iter(text) { |
353 | /// println!("Movie: {:?}, Released: {:?}" , |
354 | /// &caps["title" ], &caps["year" ]); |
355 | /// } |
356 | /// // Output: |
357 | /// // Movie: Citizen Kane, Released: 1941 |
358 | /// // Movie: The Wizard of Oz, Released: 1939 |
359 | /// // Movie: M, Released: 1931 |
360 | /// # } |
361 | /// ``` |
362 | pub fn captures_iter<'r, 't>( |
363 | &'r self, |
364 | text: &'t str, |
365 | ) -> CaptureMatches<'r, 't> { |
366 | CaptureMatches(self.0.searcher_str().captures_iter(text)) |
367 | } |
368 | |
369 | /// Returns an iterator of substrings of `text` delimited by a match of the |
370 | /// regular expression. Namely, each element of the iterator corresponds to |
371 | /// text that *isn't* matched by the regular expression. |
372 | /// |
373 | /// This method will *not* copy the text given. |
374 | /// |
375 | /// # Example |
376 | /// |
377 | /// To split a string delimited by arbitrary amounts of spaces or tabs: |
378 | /// |
379 | /// ```rust |
380 | /// # use regex::Regex; |
381 | /// # fn main() { |
382 | /// let re = Regex::new(r"[ \t]+" ).unwrap(); |
383 | /// let fields: Vec<&str> = re.split("a b \t c \td e" ).collect(); |
384 | /// assert_eq!(fields, vec!["a" , "b" , "c" , "d" , "e" ]); |
385 | /// # } |
386 | /// ``` |
387 | pub fn split<'r, 't>(&'r self, text: &'t str) -> Split<'r, 't> { |
388 | Split { finder: self.find_iter(text), last: 0 } |
389 | } |
390 | |
391 | /// Returns an iterator of at most `limit` substrings of `text` delimited |
392 | /// by a match of the regular expression. (A `limit` of `0` will return no |
393 | /// substrings.) Namely, each element of the iterator corresponds to text |
394 | /// that *isn't* matched by the regular expression. The remainder of the |
395 | /// string that is not split will be the last element in the iterator. |
396 | /// |
397 | /// This method will *not* copy the text given. |
398 | /// |
399 | /// # Example |
400 | /// |
401 | /// Get the first two words in some text: |
402 | /// |
403 | /// ```rust |
404 | /// # use regex::Regex; |
405 | /// # fn main() { |
406 | /// let re = Regex::new(r"\W+" ).unwrap(); |
407 | /// let fields: Vec<&str> = re.splitn("Hey! How are you?" , 3).collect(); |
408 | /// assert_eq!(fields, vec!("Hey" , "How" , "are you?" )); |
409 | /// # } |
410 | /// ``` |
411 | pub fn splitn<'r, 't>( |
412 | &'r self, |
413 | text: &'t str, |
414 | limit: usize, |
415 | ) -> SplitN<'r, 't> { |
416 | SplitN { splits: self.split(text), n: limit } |
417 | } |
418 | |
419 | /// Replaces the leftmost-first match with the replacement provided. |
420 | /// The replacement can be a regular string (where `$N` and `$name` are |
421 | /// expanded to match capture groups) or a function that takes the matches' |
422 | /// `Captures` and returns the replaced string. |
423 | /// |
424 | /// If no match is found, then a copy of the string is returned unchanged. |
425 | /// |
426 | /// # Replacement string syntax |
427 | /// |
428 | /// All instances of `$name` in the replacement text is replaced with the |
429 | /// corresponding capture group `name`. |
430 | /// |
431 | /// `name` may be an integer corresponding to the index of the |
432 | /// capture group (counted by order of opening parenthesis where `0` is the |
433 | /// entire match) or it can be a name (consisting of letters, digits or |
434 | /// underscores) corresponding to a named capture group. |
435 | /// |
436 | /// If `name` isn't a valid capture group (whether the name doesn't exist |
437 | /// or isn't a valid index), then it is replaced with the empty string. |
438 | /// |
439 | /// The longest possible name is used. e.g., `$1a` looks up the capture |
440 | /// group named `1a` and not the capture group at index `1`. To exert more |
441 | /// precise control over the name, use braces, e.g., `${1}a`. |
442 | /// |
443 | /// To write a literal `$` use `$$`. |
444 | /// |
445 | /// # Examples |
446 | /// |
447 | /// Note that this function is polymorphic with respect to the replacement. |
448 | /// In typical usage, this can just be a normal string: |
449 | /// |
450 | /// ```rust |
451 | /// # use regex::Regex; |
452 | /// # fn main() { |
453 | /// let re = Regex::new("[^01]+" ).unwrap(); |
454 | /// assert_eq!(re.replace("1078910" , "" ), "1010" ); |
455 | /// # } |
456 | /// ``` |
457 | /// |
458 | /// But anything satisfying the `Replacer` trait will work. For example, |
459 | /// a closure of type `|&Captures| -> String` provides direct access to the |
460 | /// captures corresponding to a match. This allows one to access |
461 | /// capturing group matches easily: |
462 | /// |
463 | /// ```rust |
464 | /// # use regex::Regex; |
465 | /// # use regex::Captures; fn main() { |
466 | /// let re = Regex::new(r"([^,\s]+),\s+(\S+)" ).unwrap(); |
467 | /// let result = re.replace("Springsteen, Bruce" , |caps: &Captures| { |
468 | /// format!("{} {}" , &caps[2], &caps[1]) |
469 | /// }); |
470 | /// assert_eq!(result, "Bruce Springsteen" ); |
471 | /// # } |
472 | /// ``` |
473 | /// |
474 | /// But this is a bit cumbersome to use all the time. Instead, a simple |
475 | /// syntax is supported that expands `$name` into the corresponding capture |
476 | /// group. Here's the last example, but using this expansion technique |
477 | /// with named capture groups: |
478 | /// |
479 | /// ```rust |
480 | /// # use regex::Regex; |
481 | /// # fn main() { |
482 | /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)" ).unwrap(); |
483 | /// let result = re.replace("Springsteen, Bruce" , "$first $last" ); |
484 | /// assert_eq!(result, "Bruce Springsteen" ); |
485 | /// # } |
486 | /// ``` |
487 | /// |
488 | /// Note that using `$2` instead of `$first` or `$1` instead of `$last` |
489 | /// would produce the same result. To write a literal `$` use `$$`. |
490 | /// |
491 | /// Sometimes the replacement string requires use of curly braces to |
492 | /// delineate a capture group replacement and surrounding literal text. |
493 | /// For example, if we wanted to join two words together with an |
494 | /// underscore: |
495 | /// |
496 | /// ```rust |
497 | /// # use regex::Regex; |
498 | /// # fn main() { |
499 | /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)" ).unwrap(); |
500 | /// let result = re.replace("deep fried" , "${first}_$second" ); |
501 | /// assert_eq!(result, "deep_fried" ); |
502 | /// # } |
503 | /// ``` |
504 | /// |
505 | /// Without the curly braces, the capture group name `first_` would be |
506 | /// used, and since it doesn't exist, it would be replaced with the empty |
507 | /// string. |
508 | /// |
509 | /// Finally, sometimes you just want to replace a literal string with no |
510 | /// regard for capturing group expansion. This can be done by wrapping a |
511 | /// byte string with `NoExpand`: |
512 | /// |
513 | /// ```rust |
514 | /// # use regex::Regex; |
515 | /// # fn main() { |
516 | /// use regex::NoExpand; |
517 | /// |
518 | /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)" ).unwrap(); |
519 | /// let result = re.replace("Springsteen, Bruce" , NoExpand("$2 $last" )); |
520 | /// assert_eq!(result, "$2 $last" ); |
521 | /// # } |
522 | /// ``` |
523 | pub fn replace<'t, R: Replacer>( |
524 | &self, |
525 | text: &'t str, |
526 | rep: R, |
527 | ) -> Cow<'t, str> { |
528 | self.replacen(text, 1, rep) |
529 | } |
530 | |
531 | /// Replaces all non-overlapping matches in `text` with the replacement |
532 | /// provided. This is the same as calling `replacen` with `limit` set to |
533 | /// `0`. |
534 | /// |
535 | /// See the documentation for `replace` for details on how to access |
536 | /// capturing group matches in the replacement string. |
537 | pub fn replace_all<'t, R: Replacer>( |
538 | &self, |
539 | text: &'t str, |
540 | rep: R, |
541 | ) -> Cow<'t, str> { |
542 | self.replacen(text, 0, rep) |
543 | } |
544 | |
545 | /// Replaces at most `limit` non-overlapping matches in `text` with the |
546 | /// replacement provided. If `limit` is 0, then all non-overlapping matches |
547 | /// are replaced. |
548 | /// |
549 | /// See the documentation for `replace` for details on how to access |
550 | /// capturing group matches in the replacement string. |
551 | pub fn replacen<'t, R: Replacer>( |
552 | &self, |
553 | text: &'t str, |
554 | limit: usize, |
555 | mut rep: R, |
556 | ) -> Cow<'t, str> { |
557 | // If we know that the replacement doesn't have any capture expansions, |
558 | // then we can use the fast path. The fast path can make a tremendous |
559 | // difference: |
560 | // |
561 | // 1) We use `find_iter` instead of `captures_iter`. Not asking for |
562 | // captures generally makes the regex engines faster. |
563 | // 2) We don't need to look up all of the capture groups and do |
564 | // replacements inside the replacement string. We just push it |
565 | // at each match and be done with it. |
566 | if let Some(rep) = rep.no_expansion() { |
567 | let mut it = self.find_iter(text).enumerate().peekable(); |
568 | if it.peek().is_none() { |
569 | return Cow::Borrowed(text); |
570 | } |
571 | let mut new = String::with_capacity(text.len()); |
572 | let mut last_match = 0; |
573 | for (i, m) in it { |
574 | new.push_str(&text[last_match..m.start()]); |
575 | new.push_str(&rep); |
576 | last_match = m.end(); |
577 | if limit > 0 && i >= limit - 1 { |
578 | break; |
579 | } |
580 | } |
581 | new.push_str(&text[last_match..]); |
582 | return Cow::Owned(new); |
583 | } |
584 | |
585 | // The slower path, which we use if the replacement needs access to |
586 | // capture groups. |
587 | let mut it = self.captures_iter(text).enumerate().peekable(); |
588 | if it.peek().is_none() { |
589 | return Cow::Borrowed(text); |
590 | } |
591 | let mut new = String::with_capacity(text.len()); |
592 | let mut last_match = 0; |
593 | for (i, cap) in it { |
594 | // unwrap on 0 is OK because captures only reports matches |
595 | let m = cap.get(0).unwrap(); |
596 | new.push_str(&text[last_match..m.start()]); |
597 | rep.replace_append(&cap, &mut new); |
598 | last_match = m.end(); |
599 | if limit > 0 && i >= limit - 1 { |
600 | break; |
601 | } |
602 | } |
603 | new.push_str(&text[last_match..]); |
604 | Cow::Owned(new) |
605 | } |
606 | } |
607 | |
608 | /// Advanced or "lower level" search methods. |
609 | impl Regex { |
610 | /// Returns the end location of a match in the text given. |
611 | /// |
612 | /// This method may have the same performance characteristics as |
613 | /// `is_match`, except it provides an end location for a match. In |
614 | /// particular, the location returned *may be shorter* than the proper end |
615 | /// of the leftmost-first match that you would find via `Regex::find`. |
616 | /// |
617 | /// Note that it is not guaranteed that this routine finds the shortest or |
618 | /// "earliest" possible match. Instead, the main idea of this API is that |
619 | /// it returns the offset at the point at which the internal regex engine |
620 | /// has determined that a match has occurred. This may vary depending on |
621 | /// which internal regex engine is used, and thus, the offset itself may |
622 | /// change. |
623 | /// |
624 | /// # Example |
625 | /// |
626 | /// Typically, `a+` would match the entire first sequence of `a` in some |
627 | /// text, but `shortest_match` can give up as soon as it sees the first |
628 | /// `a`. |
629 | /// |
630 | /// ```rust |
631 | /// # use regex::Regex; |
632 | /// # fn main() { |
633 | /// let text = "aaaaa" ; |
634 | /// let pos = Regex::new(r"a+" ).unwrap().shortest_match(text); |
635 | /// assert_eq!(pos, Some(1)); |
636 | /// # } |
637 | /// ``` |
638 | pub fn shortest_match(&self, text: &str) -> Option<usize> { |
639 | self.shortest_match_at(text, 0) |
640 | } |
641 | |
642 | /// Returns the same as `shortest_match`, but starts the search at the |
643 | /// given offset. |
644 | /// |
645 | /// The significance of the starting point is that it takes the surrounding |
646 | /// context into consideration. For example, the `\A` anchor can only match |
647 | /// when `start == 0`. |
648 | pub fn shortest_match_at( |
649 | &self, |
650 | text: &str, |
651 | start: usize, |
652 | ) -> Option<usize> { |
653 | self.0.searcher_str().shortest_match_at(text, start) |
654 | } |
655 | |
656 | /// Returns the same as is_match, but starts the search at the given |
657 | /// offset. |
658 | /// |
659 | /// The significance of the starting point is that it takes the surrounding |
660 | /// context into consideration. For example, the `\A` anchor can only |
661 | /// match when `start == 0`. |
662 | pub fn is_match_at(&self, text: &str, start: usize) -> bool { |
663 | self.0.searcher_str().is_match_at(text, start) |
664 | } |
665 | |
666 | /// Returns the same as find, but starts the search at the given |
667 | /// offset. |
668 | /// |
669 | /// The significance of the starting point is that it takes the surrounding |
670 | /// context into consideration. For example, the `\A` anchor can only |
671 | /// match when `start == 0`. |
672 | pub fn find_at<'t>( |
673 | &self, |
674 | text: &'t str, |
675 | start: usize, |
676 | ) -> Option<Match<'t>> { |
677 | self.0 |
678 | .searcher_str() |
679 | .find_at(text, start) |
680 | .map(|(s, e)| Match::new(text, s, e)) |
681 | } |
682 | |
683 | /// Returns the same as [`Regex::captures`], but starts the search at the |
684 | /// given offset. |
685 | /// |
686 | /// The significance of the starting point is that it takes the surrounding |
687 | /// context into consideration. For example, the `\A` anchor can only |
688 | /// match when `start == 0`. |
689 | pub fn captures_at<'t>( |
690 | &self, |
691 | text: &'t str, |
692 | start: usize, |
693 | ) -> Option<Captures<'t>> { |
694 | let mut locs = self.capture_locations(); |
695 | self.captures_read_at(&mut locs, text, start).map(move |_| Captures { |
696 | text, |
697 | locs: locs.0, |
698 | named_groups: self.0.capture_name_idx().clone(), |
699 | }) |
700 | } |
701 | |
702 | /// This is like `captures`, but uses |
703 | /// [`CaptureLocations`](struct.CaptureLocations.html) |
704 | /// instead of |
705 | /// [`Captures`](struct.Captures.html) in order to amortize allocations. |
706 | /// |
707 | /// To create a `CaptureLocations` value, use the |
708 | /// `Regex::capture_locations` method. |
709 | /// |
710 | /// This returns the overall match if this was successful, which is always |
711 | /// equivalence to the `0`th capture group. |
712 | pub fn captures_read<'t>( |
713 | &self, |
714 | locs: &mut CaptureLocations, |
715 | text: &'t str, |
716 | ) -> Option<Match<'t>> { |
717 | self.captures_read_at(locs, text, 0) |
718 | } |
719 | |
720 | /// Returns the same as captures, but starts the search at the given |
721 | /// offset and populates the capture locations given. |
722 | /// |
723 | /// The significance of the starting point is that it takes the surrounding |
724 | /// context into consideration. For example, the `\A` anchor can only |
725 | /// match when `start == 0`. |
726 | pub fn captures_read_at<'t>( |
727 | &self, |
728 | locs: &mut CaptureLocations, |
729 | text: &'t str, |
730 | start: usize, |
731 | ) -> Option<Match<'t>> { |
732 | self.0 |
733 | .searcher_str() |
734 | .captures_read_at(&mut locs.0, text, start) |
735 | .map(|(s, e)| Match::new(text, s, e)) |
736 | } |
737 | |
738 | /// An undocumented alias for `captures_read_at`. |
739 | /// |
740 | /// The `regex-capi` crate previously used this routine, so to avoid |
741 | /// breaking that crate, we continue to provide the name as an undocumented |
742 | /// alias. |
743 | #[doc (hidden)] |
744 | pub fn read_captures_at<'t>( |
745 | &self, |
746 | locs: &mut CaptureLocations, |
747 | text: &'t str, |
748 | start: usize, |
749 | ) -> Option<Match<'t>> { |
750 | self.captures_read_at(locs, text, start) |
751 | } |
752 | } |
753 | |
754 | /// Auxiliary methods. |
755 | impl Regex { |
756 | /// Returns the original string of this regex. |
757 | pub fn as_str(&self) -> &str { |
758 | &self.0.regex_strings()[0] |
759 | } |
760 | |
761 | /// Returns an iterator over the capture names. |
762 | pub fn capture_names(&self) -> CaptureNames<'_> { |
763 | CaptureNames(self.0.capture_names().iter()) |
764 | } |
765 | |
766 | /// Returns the number of captures. |
767 | pub fn captures_len(&self) -> usize { |
768 | self.0.capture_names().len() |
769 | } |
770 | |
771 | /// Returns the total number of capturing groups that appear in every |
772 | /// possible match. |
773 | /// |
774 | /// If the number of capture groups can vary depending on the match, then |
775 | /// this returns `None`. That is, a value is only returned when the number |
776 | /// of matching groups is invariant or "static." |
777 | /// |
778 | /// Note that like [`Regex::captures_len`], this **does** include the |
779 | /// implicit capturing group corresponding to the entire match. Therefore, |
780 | /// when a non-None value is returned, it is guaranteed to be at least `1`. |
781 | /// Stated differently, a return value of `Some(0)` is impossible. |
782 | /// |
783 | /// # Example |
784 | /// |
785 | /// This shows a few cases where a static number of capture groups is |
786 | /// available and a few cases where it is not. |
787 | /// |
788 | /// ``` |
789 | /// use regex::Regex; |
790 | /// |
791 | /// let len = |pattern| { |
792 | /// Regex::new(pattern).map(|re| re.static_captures_len()) |
793 | /// }; |
794 | /// |
795 | /// assert_eq!(Some(1), len("a" )?); |
796 | /// assert_eq!(Some(2), len("(a)" )?); |
797 | /// assert_eq!(Some(2), len("(a)|(b)" )?); |
798 | /// assert_eq!(Some(3), len("(a)(b)|(c)(d)" )?); |
799 | /// assert_eq!(None, len("(a)|b" )?); |
800 | /// assert_eq!(None, len("a|(b)" )?); |
801 | /// assert_eq!(None, len("(b)*" )?); |
802 | /// assert_eq!(Some(2), len("(b)+" )?); |
803 | /// |
804 | /// # Ok::<(), Box<dyn std::error::Error>>(()) |
805 | /// ``` |
806 | #[inline ] |
807 | pub fn static_captures_len(&self) -> Option<usize> { |
808 | self.0.static_captures_len().map(|len| len.saturating_add(1)) |
809 | } |
810 | |
811 | /// Returns an empty set of capture locations that can be reused in |
812 | /// multiple calls to `captures_read` or `captures_read_at`. |
813 | pub fn capture_locations(&self) -> CaptureLocations { |
814 | CaptureLocations(self.0.searcher_str().locations()) |
815 | } |
816 | |
817 | /// An alias for `capture_locations` to preserve backward compatibility. |
818 | /// |
819 | /// The `regex-capi` crate uses this method, so to avoid breaking that |
820 | /// crate, we continue to export it as an undocumented API. |
821 | #[doc (hidden)] |
822 | pub fn locations(&self) -> CaptureLocations { |
823 | CaptureLocations(self.0.searcher_str().locations()) |
824 | } |
825 | } |
826 | |
827 | /// An iterator over the names of all possible captures. |
828 | /// |
829 | /// `None` indicates an unnamed capture; the first element (capture 0, the |
830 | /// whole matched region) is always unnamed. |
831 | /// |
832 | /// `'r` is the lifetime of the compiled regular expression. |
833 | #[derive (Clone, Debug)] |
834 | pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>); |
835 | |
836 | impl<'r> Iterator for CaptureNames<'r> { |
837 | type Item = Option<&'r str>; |
838 | |
839 | fn next(&mut self) -> Option<Option<&'r str>> { |
840 | self.0 |
841 | .next() |
842 | .as_ref() |
843 | .map(|slot: &&Option| slot.as_ref().map(|name: &String| name.as_ref())) |
844 | } |
845 | |
846 | fn size_hint(&self) -> (usize, Option<usize>) { |
847 | self.0.size_hint() |
848 | } |
849 | |
850 | fn count(self) -> usize { |
851 | self.0.count() |
852 | } |
853 | } |
854 | |
855 | impl<'r> ExactSizeIterator for CaptureNames<'r> {} |
856 | |
857 | impl<'r> FusedIterator for CaptureNames<'r> {} |
858 | |
859 | /// Yields all substrings delimited by a regular expression match. |
860 | /// |
861 | /// `'r` is the lifetime of the compiled regular expression and `'t` is the |
862 | /// lifetime of the string being split. |
863 | #[derive (Debug)] |
864 | pub struct Split<'r, 't> { |
865 | finder: Matches<'r, 't>, |
866 | last: usize, |
867 | } |
868 | |
869 | impl<'r, 't> Iterator for Split<'r, 't> { |
870 | type Item = &'t str; |
871 | |
872 | fn next(&mut self) -> Option<&'t str> { |
873 | let text: &str = self.finder.0.text(); |
874 | match self.finder.next() { |
875 | None => { |
876 | if self.last > text.len() { |
877 | None |
878 | } else { |
879 | let s: &str = &text[self.last..]; |
880 | self.last = text.len() + 1; // Next call will return None |
881 | Some(s) |
882 | } |
883 | } |
884 | Some(m: Match<'_>) => { |
885 | let matched: &str = &text[self.last..m.start()]; |
886 | self.last = m.end(); |
887 | Some(matched) |
888 | } |
889 | } |
890 | } |
891 | } |
892 | |
893 | impl<'r, 't> FusedIterator for Split<'r, 't> {} |
894 | |
895 | /// Yields at most `N` substrings delimited by a regular expression match. |
896 | /// |
897 | /// The last substring will be whatever remains after splitting. |
898 | /// |
899 | /// `'r` is the lifetime of the compiled regular expression and `'t` is the |
900 | /// lifetime of the string being split. |
901 | #[derive (Debug)] |
902 | pub struct SplitN<'r, 't> { |
903 | splits: Split<'r, 't>, |
904 | n: usize, |
905 | } |
906 | |
907 | impl<'r, 't> Iterator for SplitN<'r, 't> { |
908 | type Item = &'t str; |
909 | |
910 | fn next(&mut self) -> Option<&'t str> { |
911 | if self.n == 0 { |
912 | return None; |
913 | } |
914 | |
915 | self.n -= 1; |
916 | if self.n > 0 { |
917 | return self.splits.next(); |
918 | } |
919 | |
920 | let text = self.splits.finder.0.text(); |
921 | if self.splits.last > text.len() { |
922 | // We've already returned all substrings. |
923 | None |
924 | } else { |
925 | // self.n == 0, so future calls will return None immediately |
926 | Some(&text[self.splits.last..]) |
927 | } |
928 | } |
929 | |
930 | fn size_hint(&self) -> (usize, Option<usize>) { |
931 | (0, Some(self.n)) |
932 | } |
933 | } |
934 | |
935 | impl<'r, 't> FusedIterator for SplitN<'r, 't> {} |
936 | |
937 | /// CaptureLocations is a low level representation of the raw offsets of each |
938 | /// submatch. |
939 | /// |
940 | /// You can think of this as a lower level |
941 | /// [`Captures`](struct.Captures.html), where this type does not support |
942 | /// named capturing groups directly and it does not borrow the text that these |
943 | /// offsets were matched on. |
944 | /// |
945 | /// Primarily, this type is useful when using the lower level `Regex` APIs |
946 | /// such as `read_captures`, which permits amortizing the allocation in which |
947 | /// capture match locations are stored. |
948 | /// |
949 | /// In order to build a value of this type, you'll need to call the |
950 | /// `capture_locations` method on the `Regex` being used to execute the search. |
951 | /// The value returned can then be reused in subsequent searches. |
952 | /// |
953 | /// # Example |
954 | /// |
955 | /// This example shows how to create and use `CaptureLocations` in a search. |
956 | /// |
957 | /// ``` |
958 | /// use regex::Regex; |
959 | /// |
960 | /// let re = Regex::new(r"(?<first>\w+)\s+(?<last>\w+)" ).unwrap(); |
961 | /// let mut locs = re.capture_locations(); |
962 | /// let m = re.captures_read(&mut locs, "Bruce Springsteen" ).unwrap(); |
963 | /// assert_eq!(0..17, m.range()); |
964 | /// assert_eq!(Some((0, 17)), locs.get(0)); |
965 | /// assert_eq!(Some((0, 5)), locs.get(1)); |
966 | /// assert_eq!(Some((6, 17)), locs.get(2)); |
967 | /// |
968 | /// // Asking for an invalid capture group always returns None. |
969 | /// assert_eq!(None, locs.get(3)); |
970 | /// assert_eq!(None, locs.get(34973498648)); |
971 | /// assert_eq!(None, locs.get(9944060567225171988)); |
972 | /// ``` |
973 | #[derive (Clone, Debug)] |
974 | pub struct CaptureLocations(re_trait::Locations); |
975 | |
976 | /// A type alias for `CaptureLocations` for backwards compatibility. |
977 | /// |
978 | /// Previously, we exported `CaptureLocations` as `Locations` in an |
979 | /// undocumented API. To prevent breaking that code (e.g., in `regex-capi`), |
980 | /// we continue re-exporting the same undocumented API. |
981 | #[doc (hidden)] |
982 | pub type Locations = CaptureLocations; |
983 | |
984 | impl CaptureLocations { |
985 | /// Returns the start and end positions of the Nth capture group. Returns |
986 | /// `None` if `i` is not a valid capture group or if the capture group did |
987 | /// not match anything. The positions returned are *always* byte indices |
988 | /// with respect to the original string matched. |
989 | #[inline ] |
990 | pub fn get(&self, i: usize) -> Option<(usize, usize)> { |
991 | self.0.pos(i) |
992 | } |
993 | |
994 | /// Returns the total number of capture groups (even if they didn't match). |
995 | /// |
996 | /// This is always at least `1` since every regex has at least `1` |
997 | /// capturing group that corresponds to the entire match. |
998 | #[inline ] |
999 | pub fn len(&self) -> usize { |
1000 | self.0.len() |
1001 | } |
1002 | |
1003 | /// An alias for the `get` method for backwards compatibility. |
1004 | /// |
1005 | /// Previously, we exported `get` as `pos` in an undocumented API. To |
1006 | /// prevent breaking that code (e.g., in `regex-capi`), we continue |
1007 | /// re-exporting the same undocumented API. |
1008 | #[doc (hidden)] |
1009 | #[inline ] |
1010 | pub fn pos(&self, i: usize) -> Option<(usize, usize)> { |
1011 | self.get(i) |
1012 | } |
1013 | } |
1014 | |
1015 | /// Captures represents a group of captured strings for a single match. |
1016 | /// |
1017 | /// The 0th capture always corresponds to the entire match. Each subsequent |
1018 | /// index corresponds to the next capture group in the regex. If a capture |
1019 | /// group is named, then the matched string is *also* available via the `name` |
1020 | /// method. (Note that the 0th capture is always unnamed and so must be |
1021 | /// accessed with the `get` method.) |
1022 | /// |
1023 | /// Positions returned from a capture group are always byte indices. |
1024 | /// |
1025 | /// `'t` is the lifetime of the matched text. |
1026 | pub struct Captures<'t> { |
1027 | text: &'t str, |
1028 | locs: re_trait::Locations, |
1029 | named_groups: Arc<HashMap<String, usize>>, |
1030 | } |
1031 | |
1032 | impl<'t> Captures<'t> { |
1033 | /// Returns the match associated with the capture group at index `i`. If |
1034 | /// `i` does not correspond to a capture group, or if the capture group |
1035 | /// did not participate in the match, then `None` is returned. |
1036 | /// |
1037 | /// # Examples |
1038 | /// |
1039 | /// Get the text of the match with a default of an empty string if this |
1040 | /// group didn't participate in the match: |
1041 | /// |
1042 | /// ```rust |
1043 | /// # use regex::Regex; |
1044 | /// let re = Regex::new(r"[a-z]+(?:([0-9]+)|([A-Z]+))" ).unwrap(); |
1045 | /// let caps = re.captures("abc123" ).unwrap(); |
1046 | /// |
1047 | /// let text1 = caps.get(1).map_or("" , |m| m.as_str()); |
1048 | /// let text2 = caps.get(2).map_or("" , |m| m.as_str()); |
1049 | /// assert_eq!(text1, "123" ); |
1050 | /// assert_eq!(text2, "" ); |
1051 | /// ``` |
1052 | pub fn get(&self, i: usize) -> Option<Match<'t>> { |
1053 | self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e)) |
1054 | } |
1055 | |
1056 | /// Returns the match for the capture group named `name`. If `name` isn't a |
1057 | /// valid capture group or didn't match anything, then `None` is returned. |
1058 | pub fn name(&self, name: &str) -> Option<Match<'t>> { |
1059 | self.named_groups.get(name).and_then(|&i| self.get(i)) |
1060 | } |
1061 | |
1062 | /// An iterator that yields all capturing matches in the order in which |
1063 | /// they appear in the regex. If a particular capture group didn't |
1064 | /// participate in the match, then `None` is yielded for that capture. |
1065 | /// |
1066 | /// The first match always corresponds to the overall match of the regex. |
1067 | pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> { |
1068 | SubCaptureMatches { caps: self, it: self.locs.iter() } |
1069 | } |
1070 | |
1071 | /// Expands all instances of `$name` in `replacement` to the corresponding |
1072 | /// capture group `name`, and writes them to the `dst` buffer given. |
1073 | /// |
1074 | /// `name` may be an integer corresponding to the index of the capture |
1075 | /// group (counted by order of opening parenthesis where `0` is the |
1076 | /// entire match) or it can be a name (consisting of letters, digits or |
1077 | /// underscores) corresponding to a named capture group. |
1078 | /// |
1079 | /// If `name` isn't a valid capture group (whether the name doesn't exist |
1080 | /// or isn't a valid index), then it is replaced with the empty string. |
1081 | /// |
1082 | /// The longest possible name consisting of the characters `[_0-9A-Za-z]` |
1083 | /// is used. e.g., `$1a` looks up the capture group named `1a` and not the |
1084 | /// capture group at index `1`. To exert more precise control over the |
1085 | /// name, or to refer to a capture group name that uses characters outside |
1086 | /// of `[_0-9A-Za-z]`, use braces, e.g., `${1}a` or `${foo[bar].baz}`. When |
1087 | /// using braces, any sequence of characters is permitted. If the sequence |
1088 | /// does not refer to a capture group name in the corresponding regex, then |
1089 | /// it is replaced with an empty string. |
1090 | /// |
1091 | /// To write a literal `$` use `$$`. |
1092 | pub fn expand(&self, replacement: &str, dst: &mut String) { |
1093 | expand_str(self, replacement, dst) |
1094 | } |
1095 | |
1096 | /// Returns the total number of capture groups (even if they didn't match). |
1097 | /// |
1098 | /// This is always at least `1`, since every regex has at least one capture |
1099 | /// group that corresponds to the full match. |
1100 | #[inline ] |
1101 | pub fn len(&self) -> usize { |
1102 | self.locs.len() |
1103 | } |
1104 | } |
1105 | |
1106 | impl<'t> fmt::Debug for Captures<'t> { |
1107 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
1108 | f.debug_tuple(name:"Captures" ).field(&CapturesDebug(self)).finish() |
1109 | } |
1110 | } |
1111 | |
1112 | struct CapturesDebug<'c, 't>(&'c Captures<'t>); |
1113 | |
1114 | impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { |
1115 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
1116 | // We'd like to show something nice here, even if it means an |
1117 | // allocation to build a reverse index. |
1118 | let slot_to_name: HashMap<&usize, &String> = |
1119 | self.0.named_groups.iter().map(|(a: &String, b: &usize)| (b, a)).collect(); |
1120 | let mut map: DebugMap<'_, '_> = f.debug_map(); |
1121 | for (slot: usize, m: Option<(usize, usize)>) in self.0.locs.iter().enumerate() { |
1122 | let m: Option<&str> = m.map(|(s: usize, e: usize)| &self.0.text[s..e]); |
1123 | if let Some(name: &&String) = slot_to_name.get(&slot) { |
1124 | map.entry(&name, &m); |
1125 | } else { |
1126 | map.entry(&slot, &m); |
1127 | } |
1128 | } |
1129 | map.finish() |
1130 | } |
1131 | } |
1132 | |
1133 | /// Get a group by index. |
1134 | /// |
1135 | /// `'t` is the lifetime of the matched text. |
1136 | /// |
1137 | /// The text can't outlive the `Captures` object if this method is |
1138 | /// used, because of how `Index` is defined (normally `a[i]` is part |
1139 | /// of `a` and can't outlive it); to do that, use `get()` instead. |
1140 | /// |
1141 | /// # Panics |
1142 | /// |
1143 | /// If there is no group at the given index. |
1144 | impl<'t> Index<usize> for Captures<'t> { |
1145 | type Output = str; |
1146 | |
1147 | fn index(&self, i: usize) -> &str { |
1148 | self.get(i) |
1149 | .map(|m: Match<'_>| m.as_str()) |
1150 | .unwrap_or_else(|| panic!("no group at index ' {}'" , i)) |
1151 | } |
1152 | } |
1153 | |
1154 | /// Get a group by name. |
1155 | /// |
1156 | /// `'t` is the lifetime of the matched text and `'i` is the lifetime |
1157 | /// of the group name (the index). |
1158 | /// |
1159 | /// The text can't outlive the `Captures` object if this method is |
1160 | /// used, because of how `Index` is defined (normally `a[i]` is part |
1161 | /// of `a` and can't outlive it); to do that, use `name` instead. |
1162 | /// |
1163 | /// # Panics |
1164 | /// |
1165 | /// If there is no group named by the given value. |
1166 | impl<'t, 'i> Index<&'i str> for Captures<'t> { |
1167 | type Output = str; |
1168 | |
1169 | fn index<'a>(&'a self, name: &'i str) -> &'a str { |
1170 | self.name(name) |
1171 | .map(|m: Match<'_>| m.as_str()) |
1172 | .unwrap_or_else(|| panic!("no group named ' {}'" , name)) |
1173 | } |
1174 | } |
1175 | |
1176 | /// An iterator that yields all capturing matches in the order in which they |
1177 | /// appear in the regex. |
1178 | /// |
1179 | /// If a particular capture group didn't participate in the match, then `None` |
1180 | /// is yielded for that capture. The first match always corresponds to the |
1181 | /// overall match of the regex. |
1182 | /// |
1183 | /// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and |
1184 | /// the lifetime `'t` corresponds to the originally matched text. |
1185 | #[derive (Clone, Debug)] |
1186 | pub struct SubCaptureMatches<'c, 't> { |
1187 | caps: &'c Captures<'t>, |
1188 | it: SubCapturesPosIter<'c>, |
1189 | } |
1190 | |
1191 | impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> { |
1192 | type Item = Option<Match<'t>>; |
1193 | |
1194 | fn next(&mut self) -> Option<Option<Match<'t>>> { |
1195 | self.it |
1196 | .next() |
1197 | .map(|cap: Option<(usize, usize)>| cap.map(|(s: usize, e: usize)| Match::new(self.caps.text, start:s, end:e))) |
1198 | } |
1199 | |
1200 | fn size_hint(&self) -> (usize, Option<usize>) { |
1201 | self.it.size_hint() |
1202 | } |
1203 | |
1204 | fn count(self) -> usize { |
1205 | self.it.count() |
1206 | } |
1207 | } |
1208 | |
1209 | impl<'c, 't> ExactSizeIterator for SubCaptureMatches<'c, 't> {} |
1210 | |
1211 | impl<'c, 't> FusedIterator for SubCaptureMatches<'c, 't> {} |
1212 | |
1213 | /// An iterator that yields all non-overlapping capture groups matching a |
1214 | /// particular regular expression. |
1215 | /// |
1216 | /// The iterator stops when no more matches can be found. |
1217 | /// |
1218 | /// `'r` is the lifetime of the compiled regular expression and `'t` is the |
1219 | /// lifetime of the matched string. |
1220 | #[derive (Debug)] |
1221 | pub struct CaptureMatches<'r, 't>( |
1222 | re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>, |
1223 | ); |
1224 | |
1225 | impl<'r, 't> Iterator for CaptureMatches<'r, 't> { |
1226 | type Item = Captures<'t>; |
1227 | |
1228 | fn next(&mut self) -> Option<Captures<'t>> { |
1229 | self.0.next().map(|locs: Locations| Captures { |
1230 | text: self.0.text(), |
1231 | locs, |
1232 | named_groups: self.0.regex().capture_name_idx().clone(), |
1233 | }) |
1234 | } |
1235 | } |
1236 | |
1237 | impl<'r, 't> FusedIterator for CaptureMatches<'r, 't> {} |
1238 | |
1239 | /// An iterator over all non-overlapping matches for a particular string. |
1240 | /// |
1241 | /// The iterator yields a `Match` value. The iterator stops when no more |
1242 | /// matches can be found. |
1243 | /// |
1244 | /// `'r` is the lifetime of the compiled regular expression and `'t` is the |
1245 | /// lifetime of the matched string. |
1246 | #[derive (Debug)] |
1247 | pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>); |
1248 | |
1249 | impl<'r, 't> Iterator for Matches<'r, 't> { |
1250 | type Item = Match<'t>; |
1251 | |
1252 | fn next(&mut self) -> Option<Match<'t>> { |
1253 | let text: &str = self.0.text(); |
1254 | self.0.next().map(|(s: usize, e: usize)| Match::new(haystack:text, start:s, end:e)) |
1255 | } |
1256 | } |
1257 | |
1258 | impl<'r, 't> FusedIterator for Matches<'r, 't> {} |
1259 | |
1260 | /// Replacer describes types that can be used to replace matches in a string. |
1261 | /// |
1262 | /// In general, users of this crate shouldn't need to implement this trait, |
1263 | /// since implementations are already provided for `&str` along with other |
1264 | /// variants of string types and `FnMut(&Captures) -> String` (or any |
1265 | /// `FnMut(&Captures) -> T` where `T: AsRef<str>`), which covers most use cases. |
1266 | pub trait Replacer { |
1267 | /// Appends text to `dst` to replace the current match. |
1268 | /// |
1269 | /// The current match is represented by `caps`, which is guaranteed to |
1270 | /// have a match at capture group `0`. |
1271 | /// |
1272 | /// For example, a no-op replacement would be |
1273 | /// `dst.push_str(caps.get(0).unwrap().as_str())`. |
1274 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String); |
1275 | |
1276 | /// Return a fixed unchanging replacement string. |
1277 | /// |
1278 | /// When doing replacements, if access to `Captures` is not needed (e.g., |
1279 | /// the replacement byte string does not need `$` expansion), then it can |
1280 | /// be beneficial to avoid finding sub-captures. |
1281 | /// |
1282 | /// In general, this is called once for every call to `replacen`. |
1283 | fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> { |
1284 | None |
1285 | } |
1286 | |
1287 | /// Return a `Replacer` that borrows and wraps this `Replacer`. |
1288 | /// |
1289 | /// This is useful when you want to take a generic `Replacer` (which might |
1290 | /// not be cloneable) and use it without consuming it, so it can be used |
1291 | /// more than once. |
1292 | /// |
1293 | /// # Example |
1294 | /// |
1295 | /// ``` |
1296 | /// use regex::{Regex, Replacer}; |
1297 | /// |
1298 | /// fn replace_all_twice<R: Replacer>( |
1299 | /// re: Regex, |
1300 | /// src: &str, |
1301 | /// mut rep: R, |
1302 | /// ) -> String { |
1303 | /// let dst = re.replace_all(src, rep.by_ref()); |
1304 | /// let dst = re.replace_all(&dst, rep.by_ref()); |
1305 | /// dst.into_owned() |
1306 | /// } |
1307 | /// ``` |
1308 | fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> { |
1309 | ReplacerRef(self) |
1310 | } |
1311 | } |
1312 | |
1313 | /// By-reference adaptor for a `Replacer` |
1314 | /// |
1315 | /// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref). |
1316 | #[derive (Debug)] |
1317 | pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R); |
1318 | |
1319 | impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> { |
1320 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1321 | self.0.replace_append(caps, dst) |
1322 | } |
1323 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1324 | self.0.no_expansion() |
1325 | } |
1326 | } |
1327 | |
1328 | impl<'a> Replacer for &'a str { |
1329 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1330 | caps.expand(*self, dst); |
1331 | } |
1332 | |
1333 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1334 | no_expansion(self) |
1335 | } |
1336 | } |
1337 | |
1338 | impl<'a> Replacer for &'a String { |
1339 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1340 | self.as_str().replace_append(caps, dst) |
1341 | } |
1342 | |
1343 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1344 | no_expansion(self) |
1345 | } |
1346 | } |
1347 | |
1348 | impl Replacer for String { |
1349 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1350 | self.as_str().replace_append(caps, dst) |
1351 | } |
1352 | |
1353 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1354 | no_expansion(self) |
1355 | } |
1356 | } |
1357 | |
1358 | impl<'a> Replacer for Cow<'a, str> { |
1359 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1360 | self.as_ref().replace_append(caps, dst) |
1361 | } |
1362 | |
1363 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1364 | no_expansion(self) |
1365 | } |
1366 | } |
1367 | |
1368 | impl<'a> Replacer for &'a Cow<'a, str> { |
1369 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1370 | self.as_ref().replace_append(caps, dst) |
1371 | } |
1372 | |
1373 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1374 | no_expansion(self) |
1375 | } |
1376 | } |
1377 | |
1378 | fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> { |
1379 | let s: &str = t.as_ref(); |
1380 | match find_byte(needle:b'$' , haystack:s.as_bytes()) { |
1381 | Some(_) => None, |
1382 | None => Some(Cow::Borrowed(s)), |
1383 | } |
1384 | } |
1385 | |
1386 | impl<F, T> Replacer for F |
1387 | where |
1388 | F: FnMut(&Captures<'_>) -> T, |
1389 | T: AsRef<str>, |
1390 | { |
1391 | fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
1392 | dst.push_str((*self)(caps).as_ref()); |
1393 | } |
1394 | } |
1395 | |
1396 | /// `NoExpand` indicates literal string replacement. |
1397 | /// |
1398 | /// It can be used with `replace` and `replace_all` to do a literal string |
1399 | /// replacement without expanding `$name` to their corresponding capture |
1400 | /// groups. This can be both convenient (to avoid escaping `$`, for example) |
1401 | /// and performant (since capture groups don't need to be found). |
1402 | /// |
1403 | /// `'t` is the lifetime of the literal text. |
1404 | #[derive (Clone, Debug)] |
1405 | pub struct NoExpand<'t>(pub &'t str); |
1406 | |
1407 | impl<'t> Replacer for NoExpand<'t> { |
1408 | fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) { |
1409 | dst.push_str(self.0); |
1410 | } |
1411 | |
1412 | fn no_expansion(&mut self) -> Option<Cow<'_, str>> { |
1413 | Some(Cow::Borrowed(self.0)) |
1414 | } |
1415 | } |
1416 | |