1 | use { |
2 | anyhow::Result, |
3 | regex::{Regex, RegexBuilder}, |
4 | regex_test::{ |
5 | CompiledRegex, Match, RegexTest, Span, TestResult, TestRunner, |
6 | }, |
7 | }; |
8 | |
9 | /// Tests the default configuration of the hybrid NFA/DFA. |
10 | #[test] |
11 | fn default() -> Result<()> { |
12 | let mut runner = TestRunner::new()?; |
13 | runner |
14 | .expand(&["is_match" , "find" , "captures" ], |test| test.compiles()) |
15 | .blacklist_iter(super::BLACKLIST) |
16 | .test_iter(crate::suite()?.iter(), compiler) |
17 | .assert(); |
18 | Ok(()) |
19 | } |
20 | |
21 | fn run_test(re: &Regex, test: &RegexTest) -> TestResult { |
22 | let hay = match std::str::from_utf8(test.haystack()) { |
23 | Ok(hay) => hay, |
24 | Err(err) => { |
25 | return TestResult::fail(&format!( |
26 | "haystack is not valid UTF-8: {}" , |
27 | err |
28 | )); |
29 | } |
30 | }; |
31 | match test.additional_name() { |
32 | "is_match" => TestResult::matched(re.is_match(hay)), |
33 | "find" => TestResult::matches( |
34 | re.find_iter(hay) |
35 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
36 | .map(|m| Match { |
37 | id: 0, |
38 | span: Span { start: m.start(), end: m.end() }, |
39 | }), |
40 | ), |
41 | "captures" => { |
42 | let it = re |
43 | .captures_iter(hay) |
44 | .take(test.match_limit().unwrap_or(std::usize::MAX)) |
45 | .map(|caps| testify_captures(&caps)); |
46 | TestResult::captures(it) |
47 | } |
48 | name => TestResult::fail(&format!("unrecognized test name: {}" , name)), |
49 | } |
50 | } |
51 | |
52 | /// Converts the given regex test to a closure that searches with a |
53 | /// `bytes::Regex`. If the test configuration is unsupported, then a |
54 | /// `CompiledRegex` that skips the test is returned. |
55 | fn compiler( |
56 | test: &RegexTest, |
57 | _patterns: &[String], |
58 | ) -> anyhow::Result<CompiledRegex> { |
59 | let skip = Ok(CompiledRegex::skip()); |
60 | |
61 | // We're only testing bytes::Regex here, which supports one pattern only. |
62 | let pattern = match test.regexes().len() { |
63 | 1 => &test.regexes()[0], |
64 | _ => return skip, |
65 | }; |
66 | // We only test is_match, find_iter and captures_iter. All of those are |
67 | // leftmost searches. |
68 | if !matches!(test.search_kind(), regex_test::SearchKind::Leftmost) { |
69 | return skip; |
70 | } |
71 | // The top-level single-pattern regex API always uses leftmost-first. |
72 | if !matches!(test.match_kind(), regex_test::MatchKind::LeftmostFirst) { |
73 | return skip; |
74 | } |
75 | // The top-level regex API always runs unanchored searches. ... But we can |
76 | // handle tests that are anchored but have only one match. |
77 | if test.anchored() && test.match_limit() != Some(1) { |
78 | return skip; |
79 | } |
80 | // We don't support tests with explicit search bounds. We could probably |
81 | // support this by using the 'find_at' (and such) APIs. |
82 | let bounds = test.bounds(); |
83 | if !(bounds.start == 0 && bounds.end == test.haystack().len()) { |
84 | return skip; |
85 | } |
86 | // The Regex API specifically does not support disabling UTF-8 mode because |
87 | // it can only search &str which is always valid UTF-8. |
88 | if !test.utf8() { |
89 | return skip; |
90 | } |
91 | // If the test requires Unicode but the Unicode feature isn't enabled, |
92 | // skip it. This is a little aggressive, but the test suite doesn't |
93 | // have any easy way of communicating which Unicode features are needed. |
94 | if test.unicode() && !cfg!(feature = "unicode" ) { |
95 | return skip; |
96 | } |
97 | let re = RegexBuilder::new(pattern) |
98 | .case_insensitive(test.case_insensitive()) |
99 | .unicode(test.unicode()) |
100 | .line_terminator(test.line_terminator()) |
101 | .build()?; |
102 | Ok(CompiledRegex::compiled(move |test| run_test(&re, test))) |
103 | } |
104 | |
105 | /// Convert `Captures` into the test suite's capture values. |
106 | fn testify_captures(caps: ®ex::Captures<'_>) -> regex_test::Captures { |
107 | let spans = caps.iter().map(|group| { |
108 | group.map(|m| regex_test::Span { start: m.start(), end: m.end() }) |
109 | }); |
110 | // This unwrap is OK because we assume our 'caps' represents a match, and |
111 | // a match always gives a non-zero number of groups with the first group |
112 | // being non-None. |
113 | regex_test::Captures::new(0, spans).unwrap() |
114 | } |
115 | |