| 1 | use { |
| 2 | anyhow::Result, |
| 3 | regex::bytes::{Regex, RegexBuilder}, |
| 4 | regex_test::{ |
| 5 | CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, |
| 6 | }, |
| 7 | }; |
| 8 | |
| 9 | /// Tests the default configuration of the hybrid NFA/DFA. |
| 10 | #[test] |
| 11 | fn default() -> Result<()> { |
| 12 | let mut runner = TestRunner::new()?; |
| 13 | runner |
| 14 | .expand(&["is_match" , "find" , "captures" ], |test| test.compiles()) |
| 15 | .blacklist_iter(super::BLACKLIST) |
| 16 | .test_iter(crate::suite()?.iter(), compiler) |
| 17 | .assert(); |
| 18 | Ok(()) |
| 19 | } |
| 20 | |
| 21 | fn run_test(re: &Regex, test: &RegexTest) -> TestResult { |
| 22 | match test.additional_name() { |
| 23 | "is_match" => TestResult::matched(re.is_match(test.haystack())), |
| 24 | "find" => TestResult::matches( |
| 25 | re.find_iter(test.haystack()) |
| 26 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| 27 | .map(|m| Match { |
| 28 | id: 0, |
| 29 | span: Span { start: m.start(), end: m.end() }, |
| 30 | }), |
| 31 | ), |
| 32 | "captures" => { |
| 33 | let it = re |
| 34 | .captures_iter(test.haystack()) |
| 35 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| 36 | .map(|caps| testify_captures(&caps)); |
| 37 | TestResult::captures(it) |
| 38 | } |
| 39 | name => TestResult::fail(&format!("unrecognized test name: {}" , name)), |
| 40 | } |
| 41 | } |
| 42 | |
| 43 | /// Converts the given regex test to a closure that searches with a |
| 44 | /// `bytes::Regex`. If the test configuration is unsupported, then a |
| 45 | /// `CompiledRegex` that skips the test is returned. |
| 46 | fn compiler( |
| 47 | test: &RegexTest, |
| 48 | _patterns: &[String], |
| 49 | ) -> anyhow::Result<CompiledRegex> { |
| 50 | let skip = Ok(CompiledRegex::skip()); |
| 51 | |
| 52 | // We're only testing bytes::Regex here, which supports one pattern only. |
| 53 | let pattern = match test.regexes().len() { |
| 54 | 1 => &test.regexes()[0], |
| 55 | _ => return skip, |
| 56 | }; |
| 57 | // We only test is_match, find_iter and captures_iter. All of those are |
| 58 | // leftmost searches. |
| 59 | if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { |
| 60 | return skip; |
| 61 | } |
| 62 | // The top-level single-pattern regex API always uses leftmost-first. |
| 63 | if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { |
| 64 | return skip; |
| 65 | } |
| 66 | // The top-level regex API always runs unanchored searches. ... But we can |
| 67 | // handle tests that are anchored but have only one match. |
| 68 | if test.anchored() && test.match_limit() != Some(1) { |
| 69 | return skip; |
| 70 | } |
| 71 | // We don't support tests with explicit search bounds. We could probably |
| 72 | // support this by using the 'find_at' (and such) APIs. |
| 73 | let bounds = test.bounds(); |
| 74 | if !(bounds.start == 0 && bounds.end == test.haystack().len()) { |
| 75 | return skip; |
| 76 | } |
| 77 | // The bytes::Regex API specifically does not support enabling UTF-8 mode. |
| 78 | // It could I suppose, but currently it does not. That is, it permits |
| 79 | // matches to have offsets that split codepoints. |
| 80 | if test.utf8() { |
| 81 | return skip; |
| 82 | } |
| 83 | // If the test requires Unicode but the Unicode feature isn't enabled, |
| 84 | // skip it. This is a little aggressive, but the test suite doesn't |
| 85 | // have any easy way of communicating which Unicode features are needed. |
| 86 | if test.unicode() && !cfg!(feature = "unicode" ) { |
| 87 | return skip; |
| 88 | } |
| 89 | let re = RegexBuilder::new(pattern) |
| 90 | .case_insensitive(test.case_insensitive()) |
| 91 | .unicode(test.unicode()) |
| 92 | .line_terminator(test.line_terminator()) |
| 93 | .build()?; |
| 94 | Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) |
| 95 | } |
| 96 | |
| 97 | /// Convert `Captures` into the test suite's capture values. |
| 98 | fn testify_captures( |
| 99 | caps: ®ex::bytes::Captures<'_>, |
| 100 | ) -> regex_test::Captures { |
| 101 | let spans = caps.iter().map(|group| { |
| 102 | group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) |
| 103 | }); |
| 104 | // This unwrap is OK because we assume our 'caps' represents a match, and |
| 105 | // a match always gives a non-zero number of groups with the first group |
| 106 | // being non-None. |
| 107 | regex_test::Captures::new(0, spans).unwrap() |
| 108 | } |
| 109 | |