1 | use alloc::string::String; |
---|---|

2 | |

3 | use regex_automata::{meta, Input, PatternID, PatternSet, PatternSetIter}; |

4 | |

5 | use crate::{bytes::RegexSetBuilder, Error}; |

6 | |

7 | /// Match multiple, possibly overlapping, regexes in a single search. |

8 | /// |

9 | /// A regex set corresponds to the union of zero or more regular expressions. |

10 | /// That is, a regex set will match a haystack when at least one of its |

11 | /// constituent regexes matches. A regex set as its formulated here provides a |

12 | /// touch more power: it will also report *which* regular expressions in the |

13 | /// set match. Indeed, this is the key difference between regex sets and a |

14 | /// single `Regex` with many alternates, since only one alternate can match at |

15 | /// a time. |

16 | /// |

17 | /// For example, consider regular expressions to match email addresses and |

18 | /// domains: `[a-z]+@[a-z]+\.(com|org|net)` and `[a-z]+\.(com|org|net)`. If a |

19 | /// regex set is constructed from those regexes, then searching the haystack |

20 | /// `foo@example.com` will report both regexes as matching. Of course, one |

21 | /// could accomplish this by compiling each regex on its own and doing two |

22 | /// searches over the haystack. The key advantage of using a regex set is |

23 | /// that it will report the matching regexes using a *single pass through the |

24 | /// haystack*. If one has hundreds or thousands of regexes to match repeatedly |

25 | /// (like a URL router for a complex web application or a user agent matcher), |

26 | /// then a regex set *can* realize huge performance gains. |

27 | /// |

28 | /// Unlike the top-level [`RegexSet`](crate::RegexSet), this `RegexSet` |

29 | /// searches haystacks with type `&[u8]` instead of `&str`. Consequently, this |

30 | /// `RegexSet` is permitted to match invalid UTF-8. |

31 | /// |

32 | /// # Limitations |

33 | /// |

34 | /// Regex sets are limited to answering the following two questions: |

35 | /// |

36 | /// 1. Does any regex in the set match? |

37 | /// 2. If so, which regexes in the set match? |

38 | /// |

39 | /// As with the main [`Regex`][crate::bytes::Regex] type, it is cheaper to ask |

40 | /// (1) instead of (2) since the matching engines can stop after the first |

41 | /// match is found. |

42 | /// |

43 | /// You cannot directly extract [`Match`][crate::bytes::Match] or |

44 | /// [`Captures`][crate::bytes::Captures] objects from a regex set. If you need |

45 | /// these operations, the recommended approach is to compile each pattern in |

46 | /// the set independently and scan the exact same haystack a second time with |

47 | /// those independently compiled patterns: |

48 | /// |

49 | /// ``` |

50 | /// use regex::bytes::{Regex, RegexSet}; |

51 | /// |

52 | /// let patterns = ["foo", "bar"]; |

53 | /// // Both patterns will match different ranges of this string. |

54 | /// let hay = b"barfoo"; |

55 | /// |

56 | /// // Compile a set matching any of our patterns. |

57 | /// let set = RegexSet::new(patterns).unwrap(); |

58 | /// // Compile each pattern independently. |

59 | /// let regexes: Vec<_> = set |

60 | /// .patterns() |

61 | /// .iter() |

62 | /// .map(|pat| Regex::new(pat).unwrap()) |

63 | /// .collect(); |

64 | /// |

65 | /// // Match against the whole set first and identify the individual |

66 | /// // matching patterns. |

67 | /// let matches: Vec<&[u8]> = set |

68 | /// .matches(hay) |

69 | /// .into_iter() |

70 | /// // Dereference the match index to get the corresponding |

71 | /// // compiled pattern. |

72 | /// .map(|index| ®exes[index]) |

73 | /// // To get match locations or any other info, we then have to search the |

74 | /// // exact same haystack again, using our separately-compiled pattern. |

75 | /// .map(|re| re.find(hay).unwrap().as_bytes()) |

76 | /// .collect(); |

77 | /// |

78 | /// // Matches arrive in the order the constituent patterns were declared, |

79 | /// // not the order they appear in the haystack. |

80 | /// assert_eq!(vec![&b"foo"[..], & b"bar"[..]], matches); |

81 | /// ``` |

82 | /// |

83 | /// # Performance |

84 | /// |

85 | /// A `RegexSet` has the same performance characteristics as `Regex`. Namely, |

86 | /// search takes `O(m * n)` time, where `m` is proportional to the size of the |

87 | /// regex set and `n` is proportional to the length of the haystack. |

88 | /// |

89 | /// # Trait implementations |

90 | /// |

91 | /// The `Default` trait is implemented for `RegexSet`. The default value |

92 | /// is an empty set. An empty set can also be explicitly constructed via |

93 | /// [`RegexSet::empty`]. |

94 | /// |

95 | /// # Example |

96 | /// |

97 | /// This shows how the above two regexes (for matching email addresses and |

98 | /// domains) might work: |

99 | /// |

100 | /// ``` |

101 | /// use regex::bytes::RegexSet; |

102 | /// |

103 | /// let set = RegexSet::new(&[ |

104 | /// r"[a-z]+@[a-z]+\.(com|org|net)", |

105 | /// r"[a-z]+\.(com|org|net)", |

106 | /// ]).unwrap(); |

107 | /// |

108 | /// // Ask whether any regexes in the set match. |

109 | /// assert!(set.is_match(b"foo@example.com")); |

110 | /// |

111 | /// // Identify which regexes in the set match. |

112 | /// let matches: Vec<_> = set.matches(b"foo@example.com").into_iter().collect(); |

113 | /// assert_eq!(vec![0, 1], matches); |

114 | /// |

115 | /// // Try again, but with a haystack that only matches one of the regexes. |

116 | /// let matches: Vec<_> = set.matches(b"example.com").into_iter().collect(); |

117 | /// assert_eq!(vec![1], matches); |

118 | /// |

119 | /// // Try again, but with a haystack that doesn't match any regex in the set. |

120 | /// let matches: Vec<_> = set.matches(b"example").into_iter().collect(); |

121 | /// assert!(matches.is_empty()); |

122 | /// ``` |

123 | /// |

124 | /// Note that it would be possible to adapt the above example to using `Regex` |

125 | /// with an expression like: |

126 | /// |

127 | /// ```text |

128 | /// (?P<email>[a-z]+@(?P<email_domain>[a-z]+[.](com|org|net)))|(?P<domain>[a-z]+[.](com|org|net)) |

129 | /// ``` |

130 | /// |

131 | /// After a match, one could then inspect the capture groups to figure out |

132 | /// which alternates matched. The problem is that it is hard to make this |

133 | /// approach scale when there are many regexes since the overlap between each |

134 | /// alternate isn't always obvious to reason about. |

135 | #[derive(Clone)] |

136 | pub struct RegexSet { |

137 | pub(crate) meta: meta::Regex, |

138 | pub(crate) patterns: alloc::sync::Arc<[String]>, |

139 | } |

140 | |

141 | impl RegexSet { |

142 | /// Create a new regex set with the given regular expressions. |

143 | /// |

144 | /// This takes an iterator of `S`, where `S` is something that can produce |

145 | /// a `&str`. If any of the strings in the iterator are not valid regular |

146 | /// expressions, then an error is returned. |

147 | /// |

148 | /// # Example |

149 | /// |

150 | /// Create a new regex set from an iterator of strings: |

151 | /// |

152 | /// ``` |

153 | /// use regex::bytes::RegexSet; |

154 | /// |

155 | /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); |

156 | /// assert!(set.is_match(b"foo")); |

157 | /// ``` |

158 | pub fn new<I, S>(exprs: I) -> Result<RegexSet, Error> |

159 | where |

160 | S: AsRef<str>, |

161 | I: IntoIterator<Item = S>, |

162 | { |

163 | RegexSetBuilder::new(exprs).build() |

164 | } |

165 | |

166 | /// Create a new empty regex set. |

167 | /// |

168 | /// An empty regex never matches anything. |

169 | /// |

170 | /// This is a convenience function for `RegexSet::new([])`, but doesn't |

171 | /// require one to specify the type of the input. |

172 | /// |

173 | /// # Example |

174 | /// |

175 | /// ``` |

176 | /// use regex::bytes::RegexSet; |

177 | /// |

178 | /// let set = RegexSet::empty(); |

179 | /// assert!(set.is_empty()); |

180 | /// // an empty set matches nothing |

181 | /// assert!(!set.is_match(b"")); |

182 | /// ``` |

183 | pub fn empty() -> RegexSet { |

184 | let empty: [&str; 0] = []; |

185 | RegexSetBuilder::new(empty).build().unwrap() |

186 | } |

187 | |

188 | /// Returns true if and only if one of the regexes in this set matches |

189 | /// the haystack given. |

190 | /// |

191 | /// This method should be preferred if you only need to test whether any |

192 | /// of the regexes in the set should match, but don't care about *which* |

193 | /// regexes matched. This is because the underlying matching engine will |

194 | /// quit immediately after seeing the first match instead of continuing to |

195 | /// find all matches. |

196 | /// |

197 | /// Note that as with searches using [`Regex`](crate::bytes::Regex), the |

198 | /// expression is unanchored by default. That is, if the regex does not |

199 | /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted |

200 | /// to match anywhere in the haystack. |

201 | /// |

202 | /// # Example |

203 | /// |

204 | /// Tests whether a set matches somewhere in a haystack: |

205 | /// |

206 | /// ``` |

207 | /// use regex::bytes::RegexSet; |

208 | /// |

209 | /// let set = RegexSet::new([r"\w+", r"\d+"]).unwrap(); |

210 | /// assert!(set.is_match(b"foo")); |

211 | /// assert!(!set.is_match("☃".as_bytes())); |

212 | /// ``` |

213 | #[inline] |

214 | pub fn is_match(&self, haystack: &[u8]) -> bool { |

215 | self.is_match_at(haystack, 0) |

216 | } |

217 | |

218 | /// Returns true if and only if one of the regexes in this set matches the |

219 | /// haystack given, with the search starting at the offset given. |

220 | /// |

221 | /// The significance of the starting point is that it takes the surrounding |

222 | /// context into consideration. For example, the `\A` anchor can only |

223 | /// match when `start == 0`. |

224 | /// |

225 | /// # Panics |

226 | /// |

227 | /// This panics when `start >= haystack.len() + 1`. |

228 | /// |

229 | /// # Example |

230 | /// |

231 | /// This example shows the significance of `start`. Namely, consider a |

232 | /// haystack `foobar` and a desire to execute a search starting at offset |

233 | /// `3`. You could search a substring explicitly, but then the look-around |

234 | /// assertions won't work correctly. Instead, you can use this method to |

235 | /// specify the start position of a search. |

236 | /// |

237 | /// ``` |

238 | /// use regex::bytes::RegexSet; |

239 | /// |

240 | /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); |

241 | /// let hay = b"foobar"; |

242 | /// // We get a match here, but it's probably not intended. |

243 | /// assert!(set.is_match(&hay[3..])); |

244 | /// // No match because the assertions take the context into account. |

245 | /// assert!(!set.is_match_at(hay, 3)); |

246 | /// ``` |

247 | #[inline] |

248 | pub fn is_match_at(&self, haystack: &[u8], start: usize) -> bool { |

249 | self.meta.is_match(Input::new(haystack).span(start..haystack.len())) |

250 | } |

251 | |

252 | /// Returns the set of regexes that match in the given haystack. |

253 | /// |

254 | /// The set returned contains the index of each regex that matches in |

255 | /// the given haystack. The index is in correspondence with the order of |

256 | /// regular expressions given to `RegexSet`'s constructor. |

257 | /// |

258 | /// The set can also be used to iterate over the matched indices. The order |

259 | /// of iteration is always ascending with respect to the matching indices. |

260 | /// |

261 | /// Note that as with searches using [`Regex`](crate::bytes::Regex), the |

262 | /// expression is unanchored by default. That is, if the regex does not |

263 | /// start with `^` or `\A`, or end with `$` or `\z`, then it is permitted |

264 | /// to match anywhere in the haystack. |

265 | /// |

266 | /// # Example |

267 | /// |

268 | /// Tests which regular expressions match the given haystack: |

269 | /// |

270 | /// ``` |

271 | /// use regex::bytes::RegexSet; |

272 | /// |

273 | /// let set = RegexSet::new([ |

274 | /// r"\w+", |

275 | /// r"\d+", |

276 | /// r"\pL+", |

277 | /// r"foo", |

278 | /// r"bar", |

279 | /// r"barfoo", |

280 | /// r"foobar", |

281 | /// ]).unwrap(); |

282 | /// let matches: Vec<_> = set.matches(b"foobar").into_iter().collect(); |

283 | /// assert_eq!(matches, vec![0, 2, 3, 4, 6]); |

284 | /// |

285 | /// // You can also test whether a particular regex matched: |

286 | /// let matches = set.matches(b"foobar"); |

287 | /// assert!(!matches.matched(5)); |

288 | /// assert!(matches.matched(6)); |

289 | /// ``` |

290 | #[inline] |

291 | pub fn matches(&self, haystack: &[u8]) -> SetMatches { |

292 | self.matches_at(haystack, 0) |

293 | } |

294 | |

295 | /// Returns the set of regexes that match in the given haystack. |

296 | /// |

297 | /// The set returned contains the index of each regex that matches in |

298 | /// the given haystack. The index is in correspondence with the order of |

299 | /// regular expressions given to `RegexSet`'s constructor. |

300 | /// |

301 | /// The set can also be used to iterate over the matched indices. The order |

302 | /// of iteration is always ascending with respect to the matching indices. |

303 | /// |

304 | /// The significance of the starting point is that it takes the surrounding |

305 | /// context into consideration. For example, the `\A` anchor can only |

306 | /// match when `start == 0`. |

307 | /// |

308 | /// # Panics |

309 | /// |

310 | /// This panics when `start >= haystack.len() + 1`. |

311 | /// |

312 | /// # Example |

313 | /// |

314 | /// Tests which regular expressions match the given haystack: |

315 | /// |

316 | /// ``` |

317 | /// use regex::bytes::RegexSet; |

318 | /// |

319 | /// let set = RegexSet::new([r"\bbar\b", r"(?m)^bar$"]).unwrap(); |

320 | /// let hay = b"foobar"; |

321 | /// // We get matches here, but it's probably not intended. |

322 | /// let matches: Vec<_> = set.matches(&hay[3..]).into_iter().collect(); |

323 | /// assert_eq!(matches, vec![0, 1]); |

324 | /// // No matches because the assertions take the context into account. |

325 | /// let matches: Vec<_> = set.matches_at(hay, 3).into_iter().collect(); |

326 | /// assert_eq!(matches, vec![]); |

327 | /// ``` |

328 | #[inline] |

329 | pub fn matches_at(&self, haystack: &[u8], start: usize) -> SetMatches { |

330 | let input = Input::new(haystack).span(start..haystack.len()); |

331 | let mut patset = PatternSet::new(self.meta.pattern_len()); |

332 | self.meta.which_overlapping_matches(&input, &mut patset); |

333 | SetMatches(patset) |

334 | } |

335 | |

336 | /// Returns the same as matches, but starts the search at the given |

337 | /// offset and stores the matches into the slice given. |

338 | /// |

339 | /// The significance of the starting point is that it takes the surrounding |

340 | /// context into consideration. For example, the `\A` anchor can only |

341 | /// match when `start == 0`. |

342 | /// |

343 | /// `matches` must have a length that is at least the number of regexes |

344 | /// in this set. |

345 | /// |

346 | /// This method returns true if and only if at least one member of |

347 | /// `matches` is true after executing the set against `haystack`. |

348 | #[doc(hidden)] |

349 | #[inline] |

350 | pub fn matches_read_at( |

351 | &self, |

352 | matches: &mut [bool], |

353 | haystack: &[u8], |

354 | start: usize, |

355 | ) -> bool { |

356 | // This is pretty dumb. We should try to fix this, but the |

357 | // regex-automata API doesn't provide a way to store matches in an |

358 | // arbitrary &mut [bool]. Thankfully, this API is is doc(hidden) and |

359 | // thus not public... But regex-capi currently uses it. We should |

360 | // fix regex-capi to use a PatternSet, maybe? Not sure... PatternSet |

361 | // is in regex-automata, not regex. So maybe we should just accept a |

362 | // 'SetMatches', which is basically just a newtype around PatternSet. |

363 | let mut patset = PatternSet::new(self.meta.pattern_len()); |

364 | let mut input = Input::new(haystack); |

365 | input.set_start(start); |

366 | self.meta.which_overlapping_matches(&input, &mut patset); |

367 | for pid in patset.iter() { |

368 | matches[pid] = true; |

369 | } |

370 | !patset.is_empty() |

371 | } |

372 | |

373 | /// An alias for `matches_read_at` to preserve backward compatibility. |

374 | /// |

375 | /// The `regex-capi` crate used this method, so to avoid breaking that |

376 | /// crate, we continue to export it as an undocumented API. |

377 | #[doc(hidden)] |

378 | #[inline] |

379 | pub fn read_matches_at( |

380 | &self, |

381 | matches: &mut [bool], |

382 | haystack: &[u8], |

383 | start: usize, |

384 | ) -> bool { |

385 | self.matches_read_at(matches, haystack, start) |

386 | } |

387 | |

388 | /// Returns the total number of regexes in this set. |

389 | /// |

390 | /// # Example |

391 | /// |

392 | /// ``` |

393 | /// use regex::bytes::RegexSet; |

394 | /// |

395 | /// assert_eq!(0, RegexSet::empty().len()); |

396 | /// assert_eq!(1, RegexSet::new([r"[0-9]"]).unwrap().len()); |

397 | /// assert_eq!(2, RegexSet::new([r"[0-9]", r"[a-z]"]).unwrap().len()); |

398 | /// ``` |

399 | #[inline] |

400 | pub fn len(&self) -> usize { |

401 | self.meta.pattern_len() |

402 | } |

403 | |

404 | /// Returns `true` if this set contains no regexes. |

405 | /// |

406 | /// # Example |

407 | /// |

408 | /// ``` |

409 | /// use regex::bytes::RegexSet; |

410 | /// |

411 | /// assert!(RegexSet::empty().is_empty()); |

412 | /// assert!(!RegexSet::new([r"[0-9]"]).unwrap().is_empty()); |

413 | /// ``` |

414 | #[inline] |

415 | pub fn is_empty(&self) -> bool { |

416 | self.meta.pattern_len() == 0 |

417 | } |

418 | |

419 | /// Returns the regex patterns that this regex set was constructed from. |

420 | /// |

421 | /// This function can be used to determine the pattern for a match. The |

422 | /// slice returned has exactly as many patterns givens to this regex set, |

423 | /// and the order of the slice is the same as the order of the patterns |

424 | /// provided to the set. |

425 | /// |

426 | /// # Example |

427 | /// |

428 | /// ``` |

429 | /// use regex::bytes::RegexSet; |

430 | /// |

431 | /// let set = RegexSet::new(&[ |

432 | /// r"\w+", |

433 | /// r"\d+", |

434 | /// r"\pL+", |

435 | /// r"foo", |

436 | /// r"bar", |

437 | /// r"barfoo", |

438 | /// r"foobar", |

439 | /// ]).unwrap(); |

440 | /// let matches: Vec<_> = set |

441 | /// .matches(b"foobar") |

442 | /// .into_iter() |

443 | /// .map(|index| &set.patterns()[index]) |

444 | /// .collect(); |

445 | /// assert_eq!(matches, vec![r"\w+", r"\pL+", r"foo", r"bar", r"foobar"]); |

446 | /// ``` |

447 | #[inline] |

448 | pub fn patterns(&self) -> &[String] { |

449 | &self.patterns |

450 | } |

451 | } |

452 | |

453 | impl Default for RegexSet { |

454 | fn default() -> Self { |

455 | RegexSet::empty() |

456 | } |

457 | } |

458 | |

459 | /// A set of matches returned by a regex set. |

460 | /// |

461 | /// Values of this type are constructed by [`RegexSet::matches`]. |

462 | #[derive(Clone, Debug)] |

463 | pub struct SetMatches(PatternSet); |

464 | |

465 | impl SetMatches { |

466 | /// Whether this set contains any matches. |

467 | /// |

468 | /// # Example |

469 | /// |

470 | /// ``` |

471 | /// use regex::bytes::RegexSet; |

472 | /// |

473 | /// let set = RegexSet::new(&[ |

474 | /// r"[a-z]+@[a-z]+\.(com|org|net)", |

475 | /// r"[a-z]+\.(com|org|net)", |

476 | /// ]).unwrap(); |

477 | /// let matches = set.matches(b"foo@example.com"); |

478 | /// assert!(matches.matched_any()); |

479 | /// ``` |

480 | #[inline] |

481 | pub fn matched_any(&self) -> bool { |

482 | !self.0.is_empty() |

483 | } |

484 | |

485 | /// Whether the regex at the given index matched. |

486 | /// |

487 | /// The index for a regex is determined by its insertion order upon the |

488 | /// initial construction of a `RegexSet`, starting at `0`. |

489 | /// |

490 | /// # Panics |

491 | /// |

492 | /// If `index` is greater than or equal to the number of regexes in the |

493 | /// original set that produced these matches. Equivalently, when `index` |

494 | /// is greater than or equal to [`SetMatches::len`]. |

495 | /// |

496 | /// # Example |

497 | /// |

498 | /// ``` |

499 | /// use regex::bytes::RegexSet; |

500 | /// |

501 | /// let set = RegexSet::new([ |

502 | /// r"[a-z]+@[a-z]+\.(com|org|net)", |

503 | /// r"[a-z]+\.(com|org|net)", |

504 | /// ]).unwrap(); |

505 | /// let matches = set.matches(b"example.com"); |

506 | /// assert!(!matches.matched(0)); |

507 | /// assert!(matches.matched(1)); |

508 | /// ``` |

509 | #[inline] |

510 | pub fn matched(&self, index: usize) -> bool { |

511 | self.0.contains(PatternID::new_unchecked(index)) |

512 | } |

513 | |

514 | /// The total number of regexes in the set that created these matches. |

515 | /// |

516 | /// **WARNING:** This always returns the same value as [`RegexSet::len`]. |

517 | /// In particular, it does *not* return the number of elements yielded by |

518 | /// [`SetMatches::iter`]. The only way to determine the total number of |

519 | /// matched regexes is to iterate over them. |

520 | /// |

521 | /// # Example |

522 | /// |

523 | /// Notice that this method returns the total number of regexes in the |

524 | /// original set, and *not* the total number of regexes that matched. |

525 | /// |

526 | /// ``` |

527 | /// use regex::bytes::RegexSet; |

528 | /// |

529 | /// let set = RegexSet::new([ |

530 | /// r"[a-z]+@[a-z]+\.(com|org|net)", |

531 | /// r"[a-z]+\.(com|org|net)", |

532 | /// ]).unwrap(); |

533 | /// let matches = set.matches(b"example.com"); |

534 | /// // Total number of patterns that matched. |

535 | /// assert_eq!(1, matches.iter().count()); |

536 | /// // Total number of patterns in the set. |

537 | /// assert_eq!(2, matches.len()); |

538 | /// ``` |

539 | #[inline] |

540 | pub fn len(&self) -> usize { |

541 | self.0.capacity() |

542 | } |

543 | |

544 | /// Returns an iterator over the indices of the regexes that matched. |

545 | /// |

546 | /// This will always produces matches in ascending order, where the index |

547 | /// yielded corresponds to the index of the regex that matched with respect |

548 | /// to its position when initially building the set. |

549 | /// |

550 | /// # Example |

551 | /// |

552 | /// ``` |

553 | /// use regex::bytes::RegexSet; |

554 | /// |

555 | /// let set = RegexSet::new([ |

556 | /// r"[0-9]", |

557 | /// r"[a-z]", |

558 | /// r"[A-Z]", |

559 | /// r"\p{Greek}", |

560 | /// ]).unwrap(); |

561 | /// let hay = "βa1".as_bytes(); |

562 | /// let matches: Vec<_> = set.matches(hay).iter().collect(); |

563 | /// assert_eq!(matches, vec![0, 1, 3]); |

564 | /// ``` |

565 | /// |

566 | /// Note that `SetMatches` also implemnets the `IntoIterator` trait, so |

567 | /// this method is not always needed. For example: |

568 | /// |

569 | /// ``` |

570 | /// use regex::bytes::RegexSet; |

571 | /// |

572 | /// let set = RegexSet::new([ |

573 | /// r"[0-9]", |

574 | /// r"[a-z]", |

575 | /// r"[A-Z]", |

576 | /// r"\p{Greek}", |

577 | /// ]).unwrap(); |

578 | /// let hay = "βa1".as_bytes(); |

579 | /// let mut matches = vec![]; |

580 | /// for index in set.matches(hay) { |

581 | /// matches.push(index); |

582 | /// } |

583 | /// assert_eq!(matches, vec![0, 1, 3]); |

584 | /// ``` |

585 | #[inline] |

586 | pub fn iter(&self) -> SetMatchesIter<'_> { |

587 | SetMatchesIter(self.0.iter()) |

588 | } |

589 | } |

590 | |

591 | impl IntoIterator for SetMatches { |

592 | type IntoIter = SetMatchesIntoIter; |

593 | type Item = usize; |

594 | |

595 | fn into_iter(self) -> Self::IntoIter { |

596 | let it = 0..self.0.capacity(); |

597 | SetMatchesIntoIter { patset: self.0, it } |

598 | } |

599 | } |

600 | |

601 | impl<'a> IntoIterator for &'a SetMatches { |

602 | type IntoIter = SetMatchesIter<'a>; |

603 | type Item = usize; |

604 | |

605 | fn into_iter(self) -> Self::IntoIter { |

606 | self.iter() |

607 | } |

608 | } |

609 | |

610 | /// An owned iterator over the set of matches from a regex set. |

611 | /// |

612 | /// This will always produces matches in ascending order of index, where the |

613 | /// index corresponds to the index of the regex that matched with respect to |

614 | /// its position when initially building the set. |

615 | /// |

616 | /// This iterator is created by calling `SetMatches::into_iter` via the |

617 | /// `IntoIterator` trait. This is automatically done in `for` loops. |

618 | /// |

619 | /// # Example |

620 | /// |

621 | /// ``` |

622 | /// use regex::bytes::RegexSet; |

623 | /// |

624 | /// let set = RegexSet::new([ |

625 | /// r"[0-9]", |

626 | /// r"[a-z]", |

627 | /// r"[A-Z]", |

628 | /// r"\p{Greek}", |

629 | /// ]).unwrap(); |

630 | /// let hay = "βa1".as_bytes(); |

631 | /// let mut matches = vec![]; |

632 | /// for index in set.matches(hay) { |

633 | /// matches.push(index); |

634 | /// } |

635 | /// assert_eq!(matches, vec![0, 1, 3]); |

636 | /// ``` |

637 | #[derive(Debug)] |

638 | pub struct SetMatchesIntoIter { |

639 | patset: PatternSet, |

640 | it: core::ops::Range<usize>, |

641 | } |

642 | |

643 | impl Iterator for SetMatchesIntoIter { |

644 | type Item = usize; |

645 | |

646 | fn next(&mut self) -> Option<usize> { |

647 | loop { |

648 | let id = self.it.next()?; |

649 | if self.patset.contains(PatternID::new_unchecked(id)) { |

650 | return Some(id); |

651 | } |

652 | } |

653 | } |

654 | |

655 | fn size_hint(&self) -> (usize, Option<usize>) { |

656 | self.it.size_hint() |

657 | } |

658 | } |

659 | |

660 | impl DoubleEndedIterator for SetMatchesIntoIter { |

661 | fn next_back(&mut self) -> Option<usize> { |

662 | loop { |

663 | let id = self.it.next_back()?; |

664 | if self.patset.contains(PatternID::new_unchecked(id)) { |

665 | return Some(id); |

666 | } |

667 | } |

668 | } |

669 | } |

670 | |

671 | impl core::iter::FusedIterator for SetMatchesIntoIter {} |

672 | |

673 | /// A borrowed iterator over the set of matches from a regex set. |

674 | /// |

675 | /// The lifetime `'a` refers to the lifetime of the [`SetMatches`] value that |

676 | /// created this iterator. |

677 | /// |

678 | /// This will always produces matches in ascending order, where the index |

679 | /// corresponds to the index of the regex that matched with respect to its |

680 | /// position when initially building the set. |

681 | /// |

682 | /// This iterator is created by the [`SetMatches::iter`] method. |

683 | #[derive(Clone, Debug)] |

684 | pub struct SetMatchesIter<'a>(PatternSetIter<'a>); |

685 | |

686 | impl<'a> Iterator for SetMatchesIter<'a> { |

687 | type Item = usize; |

688 | |

689 | fn next(&mut self) -> Option<usize> { |

690 | self.0.next().map(|pid| pid.as_usize()) |

691 | } |

692 | |

693 | fn size_hint(&self) -> (usize, Option<usize>) { |

694 | self.0.size_hint() |

695 | } |

696 | } |

697 | |

698 | impl<'a> DoubleEndedIterator for SetMatchesIter<'a> { |

699 | fn next_back(&mut self) -> Option<usize> { |

700 | self.0.next_back().map(|pid| pid.as_usize()) |

701 | } |

702 | } |

703 | |

704 | impl<'a> core::iter::FusedIterator for SetMatchesIter<'a> {} |

705 | |

706 | impl core::fmt::Debug for RegexSet { |

707 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |

708 | write!(f, "RegexSet({:?})", self.patterns()) |

709 | } |

710 | } |

711 |