| 1 | use { |
| 2 | anyhow::Result, |
| 3 | regex::{Regex, RegexBuilder}, |
| 4 | regex_test::{ |
| 5 | CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, |
| 6 | }, |
| 7 | }; |
| 8 | |
| 9 | /// Tests the default configuration of the hybrid NFA/DFA. |
| 10 | #[test] |
| 11 | fn default() -> Result<()> { |
| 12 | let mut runner = TestRunner::new()?; |
| 13 | runner |
| 14 | .expand(&["is_match" , "find" , "captures" ], |test| test.compiles()) |
| 15 | .blacklist_iter(super::BLACKLIST) |
| 16 | .test_iter(crate::suite()?.iter(), compiler) |
| 17 | .assert(); |
| 18 | Ok(()) |
| 19 | } |
| 20 | |
| 21 | fn run_test(re: &Regex, test: &RegexTest) -> TestResult { |
| 22 | let hay = match std::str::from_utf8(test.haystack()) { |
| 23 | Ok(hay) => hay, |
| 24 | Err(err) => { |
| 25 | return TestResult::fail(&format!( |
| 26 | "haystack is not valid UTF-8: {}" , |
| 27 | err |
| 28 | )); |
| 29 | } |
| 30 | }; |
| 31 | match test.additional_name() { |
| 32 | "is_match" => TestResult::matched(re.is_match(hay)), |
| 33 | "find" => TestResult::matches( |
| 34 | re.find_iter(hay) |
| 35 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| 36 | .map(|m| Match { |
| 37 | id: 0, |
| 38 | span: Span { start: m.start(), end: m.end() }, |
| 39 | }), |
| 40 | ), |
| 41 | "captures" => { |
| 42 | let it = re |
| 43 | .captures_iter(hay) |
| 44 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
| 45 | .map(|caps| testify_captures(&caps)); |
| 46 | TestResult::captures(it) |
| 47 | } |
| 48 | name => TestResult::fail(&format!("unrecognized test name: {}" , name)), |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | /// Converts the given regex test to a closure that searches with a |
| 53 | /// `bytes::Regex`. If the test configuration is unsupported, then a |
| 54 | /// `CompiledRegex` that skips the test is returned. |
| 55 | fn compiler( |
| 56 | test: &RegexTest, |
| 57 | _patterns: &[String], |
| 58 | ) -> anyhow::Result<CompiledRegex> { |
| 59 | let skip = Ok(CompiledRegex::skip()); |
| 60 | |
| 61 | // We're only testing bytes::Regex here, which supports one pattern only. |
| 62 | let pattern = match test.regexes().len() { |
| 63 | 1 => &test.regexes()[0], |
| 64 | _ => return skip, |
| 65 | }; |
| 66 | // We only test is_match, find_iter and captures_iter. All of those are |
| 67 | // leftmost searches. |
| 68 | if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { |
| 69 | return skip; |
| 70 | } |
| 71 | // The top-level single-pattern regex API always uses leftmost-first. |
| 72 | if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { |
| 73 | return skip; |
| 74 | } |
| 75 | // The top-level regex API always runs unanchored searches. ... But we can |
| 76 | // handle tests that are anchored but have only one match. |
| 77 | if test.anchored() && test.match_limit() != Some(1) { |
| 78 | return skip; |
| 79 | } |
| 80 | // We don't support tests with explicit search bounds. We could probably |
| 81 | // support this by using the 'find_at' (and such) APIs. |
| 82 | let bounds = test.bounds(); |
| 83 | if !(bounds.start == 0 && bounds.end == test.haystack().len()) { |
| 84 | return skip; |
| 85 | } |
| 86 | // The Regex API specifically does not support disabling UTF-8 mode because |
| 87 | // it can only search &str which is always valid UTF-8. |
| 88 | if !test.utf8() { |
| 89 | return skip; |
| 90 | } |
| 91 | // If the test requires Unicode but the Unicode feature isn't enabled, |
| 92 | // skip it. This is a little aggressive, but the test suite doesn't |
| 93 | // have any easy way of communicating which Unicode features are needed. |
| 94 | if test.unicode() && !cfg!(feature = "unicode" ) { |
| 95 | return skip; |
| 96 | } |
| 97 | let re = RegexBuilder::new(pattern) |
| 98 | .case_insensitive(test.case_insensitive()) |
| 99 | .unicode(test.unicode()) |
| 100 | .line_terminator(test.line_terminator()) |
| 101 | .build()?; |
| 102 | Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) |
| 103 | } |
| 104 | |
| 105 | /// Convert `Captures` into the test suite's capture values. |
| 106 | fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { |
| 107 | let spans = caps.iter().map(|group| { |
| 108 | group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) |
| 109 | }); |
| 110 | // This unwrap is OK because we assume our 'caps' represents a match, and |
| 111 | // a match always gives a non-zero number of groups with the first group |
| 112 | // being non-None. |
| 113 | regex_test::Captures::new(0, spans).unwrap() |
| 114 | } |
| 115 | |