1 | use std::{collections::HashMap, format, string::String, vec::Vec}; |
---|---|

2 | |

3 | use crate::{ |

4 | AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, Anchored, Input, Match, |

5 | MatchKind, StartKind, |

6 | }; |

7 | |

8 | /// A description of a single test against an Aho-Corasick automaton. |

9 | /// |

10 | /// A single test may not necessarily pass on every configuration of an |

11 | /// Aho-Corasick automaton. The tests are categorized and grouped appropriately |

12 | /// below. |

13 | #[derive(Clone, Debug, Eq, PartialEq)] |

14 | struct SearchTest { |

15 | /// The name of this test, for debugging. |

16 | name: &'static str, |

17 | /// The patterns to search for. |

18 | patterns: &'static [&'static str], |

19 | /// The text to search. |

20 | haystack: &'static str, |

21 | /// Each match is a triple of (pattern_index, start, end), where |

22 | /// pattern_index is an index into `patterns` and `start`/`end` are indices |

23 | /// into `haystack`. |

24 | matches: &'static [(usize, usize, usize)], |

25 | } |

26 | |

27 | /// Short-hand constructor for SearchTest. We use it a lot below. |

28 | macro_rules! t { |

29 | ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { |

30 | SearchTest { |

31 | name: stringify!($name), |

32 | patterns: $patterns, |

33 | haystack: $haystack, |

34 | matches: $matches, |

35 | } |

36 | }; |

37 | } |

38 | |

39 | /// A collection of test groups. |

40 | type TestCollection = &'static [&'static [SearchTest]]; |

41 | |

42 | // Define several collections corresponding to the different type of match |

43 | // semantics supported by Aho-Corasick. These collections have some overlap, |

44 | // but each collection should have some tests that no other collection has. |

45 | |

46 | /// Tests for Aho-Corasick's standard non-overlapping match semantics. |

47 | const AC_STANDARD_NON_OVERLAPPING: TestCollection = |

48 | &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION]; |

49 | |

50 | /// Tests for Aho-Corasick's anchored standard non-overlapping match semantics. |

51 | const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection = |

52 | &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED]; |

53 | |

54 | /// Tests for Aho-Corasick's standard overlapping match semantics. |

55 | const AC_STANDARD_OVERLAPPING: TestCollection = |

56 | &[BASICS, OVERLAPPING, REGRESSION]; |

57 | |

58 | /* |

59 | Iterators of anchored overlapping searches were removed from the API in |

60 | after 0.7, but we leave the tests commented out for posterity. |

61 | /// Tests for Aho-Corasick's anchored standard overlapping match semantics. |

62 | const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection = |

63 | &[ANCHORED_BASICS, ANCHORED_OVERLAPPING]; |

64 | */ |

65 | |

66 | /// Tests for Aho-Corasick's leftmost-first match semantics. |

67 | const AC_LEFTMOST_FIRST: TestCollection = |

68 | &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION]; |

69 | |

70 | /// Tests for Aho-Corasick's anchored leftmost-first match semantics. |

71 | const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[ |

72 | ANCHORED_BASICS, |

73 | ANCHORED_NON_OVERLAPPING, |

74 | ANCHORED_LEFTMOST, |

75 | ANCHORED_LEFTMOST_FIRST, |

76 | ]; |

77 | |

78 | /// Tests for Aho-Corasick's leftmost-longest match semantics. |

79 | const AC_LEFTMOST_LONGEST: TestCollection = |

80 | &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION]; |

81 | |

82 | /// Tests for Aho-Corasick's anchored leftmost-longest match semantics. |

83 | const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[ |

84 | ANCHORED_BASICS, |

85 | ANCHORED_NON_OVERLAPPING, |

86 | ANCHORED_LEFTMOST, |

87 | ANCHORED_LEFTMOST_LONGEST, |

88 | ]; |

89 | |

90 | // Now define the individual tests that make up the collections above. |

91 | |

92 | /// A collection of tests for the Aho-Corasick algorithm that should always be |

93 | /// true regardless of match semantics. That is, all combinations of |

94 | /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} |

95 | /// should produce the same answer. |

96 | const BASICS: &'static [SearchTest] = &[ |

97 | t!(basic000, &[], "", &[]), |

98 | t!(basic001, &[""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

99 | t!(basic002, &["a"], "", &[]), |

100 | t!(basic010, &["a"], "a", &[( 0, 0, 1)]), |

101 | t!(basic020, &["a"], "aa", &[( 0, 0, 1), (0, 1, 2)]), |

102 | t!(basic030, &["a"], "aaa", &[( 0, 0, 1), (0, 1, 2), (0, 2, 3)]), |

103 | t!(basic040, &["a"], "aba", &[( 0, 0, 1), (0, 2, 3)]), |

104 | t!(basic050, &["a"], "bba", &[( 0, 2, 3)]), |

105 | t!(basic060, &["a"], "bbb", &[]), |

106 | t!(basic070, &["a"], "bababbbba", &[( 0, 1, 2), (0, 3, 4), (0, 8, 9)]), |

107 | t!(basic100, &["aa"], "", &[]), |

108 | t!(basic110, &["aa"], "aa", &[( 0, 0, 2)]), |

109 | t!(basic120, &["aa"], "aabbaa", &[( 0, 0, 2), (0, 4, 6)]), |

110 | t!(basic130, &["aa"], "abbab", &[]), |

111 | t!(basic140, &["aa"], "abbabaa", &[( 0, 5, 7)]), |

112 | t!(basic200, &["abc"], "abc", &[( 0, 0, 3)]), |

113 | t!(basic210, &["abc"], "zazabzabcz", &[( 0, 6, 9)]), |

114 | t!(basic220, &["abc"], "zazabczabcz", &[( 0, 3, 6), (0, 7, 10)]), |

115 | t!(basic300, &["a", "b"], "", &[]), |

116 | t!(basic310, &["a", "b"], "z", &[]), |

117 | t!(basic320, &["a", "b"], "b", &[( 1, 0, 1)]), |

118 | t!(basic330, &["a", "b"], "a", &[( 0, 0, 1)]), |

119 | t!( |

120 | basic340, |

121 | &["a", "b"], |

122 | "abba", |

123 | &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] |

124 | ), |

125 | t!( |

126 | basic350, |

127 | &["b", "a"], |

128 | "abba", |

129 | &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] |

130 | ), |

131 | t!(basic360, &["abc", "bc"], "xbc", &[( 1, 1, 3),]), |

132 | t!(basic400, &["foo", "bar"], "", &[]), |

133 | t!(basic410, &["foo", "bar"], "foobar", &[( 0, 0, 3), (1, 3, 6),]), |

134 | t!(basic420, &["foo", "bar"], "barfoo", &[( 1, 0, 3), (0, 3, 6),]), |

135 | t!(basic430, &["foo", "bar"], "foofoo", &[( 0, 0, 3), (0, 3, 6),]), |

136 | t!(basic440, &["foo", "bar"], "barbar", &[( 1, 0, 3), (1, 3, 6),]), |

137 | t!(basic450, &["foo", "bar"], "bafofoo", &[( 0, 4, 7),]), |

138 | t!(basic460, &["bar", "foo"], "bafofoo", &[( 1, 4, 7),]), |

139 | t!(basic470, &["foo", "bar"], "fobabar", &[( 1, 4, 7),]), |

140 | t!(basic480, &["bar", "foo"], "fobabar", &[( 0, 4, 7),]), |

141 | t!(basic600, &[""], "", &[( 0, 0, 0)]), |

142 | t!(basic610, &[""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

143 | t!(basic620, &[""], "abc", &[( 0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), |

144 | t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[( 0, 0, 7),]), |

145 | t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[( 1, 1, 10),]), |

146 | t!( |

147 | basic720, |

148 | &["yabcdef", "bcdeyabc", "abcdezghi"], |

149 | "yabcdezghi", |

150 | &[(2, 1, 10),] |

151 | ), |

152 | ]; |

153 | |

154 | /// A collection of *anchored* tests for the Aho-Corasick algorithm that should |

155 | /// always be true regardless of match semantics. That is, all combinations of |

156 | /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should |

157 | /// produce the same answer. |

158 | const ANCHORED_BASICS: &'static [SearchTest] = &[ |

159 | t!(abasic000, &[], "", &[]), |

160 | t!(abasic001, &[], "a", &[]), |

161 | t!(abasic002, &[], "abc", &[]), |

162 | t!(abasic010, &[""], "", &[( 0, 0, 0)]), |

163 | t!(abasic020, &[""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

164 | t!(abasic030, &[""], "abc", &[( 0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), |

165 | t!(abasic100, &["a"], "a", &[( 0, 0, 1)]), |

166 | t!(abasic110, &["a"], "aa", &[( 0, 0, 1), (0, 1, 2)]), |

167 | t!(abasic120, &["a", "b"], "ab", &[( 0, 0, 1), (1, 1, 2)]), |

168 | t!(abasic130, &["a", "b"], "ba", &[( 1, 0, 1), (0, 1, 2)]), |

169 | t!(abasic140, &["foo", "foofoo"], "foo", &[( 0, 0, 3)]), |

170 | t!(abasic150, &["foofoo", "foo"], "foo", &[( 1, 0, 3)]), |

171 | t!(abasic200, &["foo"], "foofoo foo", &[( 0, 0, 3), (0, 3, 6)]), |

172 | ]; |

173 | |

174 | /// Tests for non-overlapping standard match semantics. |

175 | /// |

176 | /// These tests generally shouldn't pass for leftmost-{first,longest}, although |

177 | /// some do in order to write clearer tests. For example, standard000 will |

178 | /// pass with leftmost-first semantics, but standard010 will not. We write |

179 | /// both to emphasize how the match semantics work. |

180 | const STANDARD: &'static [SearchTest] = &[ |

181 | t!(standard000, &["ab", "abcd"], "abcd", &[( 0, 0, 2)]), |

182 | t!(standard010, &["abcd", "ab"], "abcd", &[( 1, 0, 2)]), |

183 | t!(standard020, &["abcd", "ab", "abc"], "abcd", &[( 1, 0, 2)]), |

184 | t!(standard030, &["abcd", "abc", "ab"], "abcd", &[( 2, 0, 2)]), |

185 | t!(standard040, &["a", ""], "a", &[( 1, 0, 0), (1, 1, 1)]), |

186 | t!( |

187 | standard400, |

188 | &["abcd", "bcd", "cd", "b"], |

189 | "abcd", |

190 | &[(3, 1, 2), (2, 2, 4),] |

191 | ), |

192 | t!(standard410, &["", "a"], "a", &[( 0, 0, 0), (0, 1, 1),]), |

193 | t!(standard420, &["", "a"], "aa", &[( 0, 0, 0), (0, 1, 1), (0, 2, 2),]), |

194 | t!(standard430, &["", "a", ""], "a", &[( 0, 0, 0), (0, 1, 1),]), |

195 | t!(standard440, &["a", "", ""], "a", &[( 1, 0, 0), (1, 1, 1),]), |

196 | t!(standard450, &["", "", "a"], "a", &[( 0, 0, 0), (0, 1, 1),]), |

197 | ]; |

198 | |

199 | /// Like STANDARD, but for anchored searches. |

200 | const STANDARD_ANCHORED: &'static [SearchTest] = &[ |

201 | t!(astandard000, &["ab", "abcd"], "abcd", &[( 0, 0, 2)]), |

202 | t!(astandard010, &["abcd", "ab"], "abcd", &[( 1, 0, 2)]), |

203 | t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[( 1, 0, 2)]), |

204 | t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[( 2, 0, 2)]), |

205 | t!(astandard040, &["a", ""], "a", &[( 1, 0, 0), (1, 1, 1)]), |

206 | t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[( 0, 0, 4)]), |

207 | t!(astandard410, &["", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

208 | t!(astandard420, &["", "a"], "aa", &[( 0, 0, 0), (0, 1, 1), (0, 2, 2)]), |

209 | t!(astandard430, &["", "a", ""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

210 | t!(astandard440, &["a", "", ""], "a", &[( 1, 0, 0), (1, 1, 1)]), |

211 | t!(astandard450, &["", "", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

212 | ]; |

213 | |

214 | /// Tests for non-overlapping leftmost match semantics. These should pass for |

215 | /// both leftmost-first and leftmost-longest match kinds. Stated differently, |

216 | /// among ambiguous matches, the longest match and the match that appeared |

217 | /// first when constructing the automaton should always be the same. |

218 | const LEFTMOST: &'static [SearchTest] = &[ |

219 | t!(leftmost000, &["ab", "ab"], "abcd", &[( 0, 0, 2)]), |

220 | t!(leftmost010, &["a", ""], "a", &[( 0, 0, 1)]), |

221 | t!(leftmost011, &["a", ""], "ab", &[( 0, 0, 1), (1, 2, 2)]), |

222 | t!(leftmost020, &["", ""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

223 | t!(leftmost030, &["a", "ab"], "aa", &[( 0, 0, 1), (0, 1, 2)]), |

224 | t!(leftmost031, &["ab", "a"], "aa", &[( 1, 0, 1), (1, 1, 2)]), |

225 | t!(leftmost032, &["ab", "a"], "xayabbbz", &[( 1, 1, 2), (0, 3, 5)]), |

226 | t!(leftmost300, &["abcd", "bce", "b"], "abce", &[( 1, 1, 4)]), |

227 | t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[( 2, 1, 3)]), |

228 | t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[( 1, 1, 4)]), |

229 | t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[( 3, 1, 3)]), |

230 | t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[( 2, 0, 2)]), |

231 | t!(leftmost350, &["abc", "bd", "ab"], "abd", &[( 2, 0, 2)]), |

232 | t!( |

233 | leftmost360, |

234 | &["abcdefghi", "hz", "abcdefgh"], |

235 | "abcdefghz", |

236 | &[(2, 0, 8),] |

237 | ), |

238 | t!( |

239 | leftmost370, |

240 | &["abcdefghi", "cde", "hz", "abcdefgh"], |

241 | "abcdefghz", |

242 | &[(3, 0, 8),] |

243 | ), |

244 | t!( |

245 | leftmost380, |

246 | &["abcdefghi", "hz", "abcdefgh", "a"], |

247 | "abcdefghz", |

248 | &[(2, 0, 8),] |

249 | ), |

250 | t!( |

251 | leftmost390, |

252 | &["b", "abcdefghi", "hz", "abcdefgh"], |

253 | "abcdefghz", |

254 | &[(3, 0, 8),] |

255 | ), |

256 | t!( |

257 | leftmost400, |

258 | &["h", "abcdefghi", "hz", "abcdefgh"], |

259 | "abcdefghz", |

260 | &[(3, 0, 8),] |

261 | ), |

262 | t!( |

263 | leftmost410, |

264 | &["z", "abcdefghi", "hz", "abcdefgh"], |

265 | "abcdefghz", |

266 | &[(3, 0, 8), (0, 8, 9),] |

267 | ), |

268 | ]; |

269 | |

270 | /// Like LEFTMOST, but for anchored searches. |

271 | const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ |

272 | t!(aleftmost000, &["ab", "ab"], "abcd", &[( 0, 0, 2)]), |

273 | // We shouldn't allow an empty match immediately following a match, right? |

274 | t!(aleftmost010, &["a", ""], "a", &[( 0, 0, 1)]), |

275 | t!(aleftmost020, &["", ""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

276 | t!(aleftmost030, &["a", "ab"], "aa", &[( 0, 0, 1), (0, 1, 2)]), |

277 | t!(aleftmost031, &["ab", "a"], "aa", &[( 1, 0, 1), (1, 1, 2)]), |

278 | t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]), |

279 | t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]), |

280 | t!(aleftmost301, &["abcd", "bcd", "cd", "b"], "abcd", &[( 0, 0, 4)]), |

281 | t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]), |

282 | t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]), |

283 | t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]), |

284 | t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[( 2, 0, 2)]), |

285 | t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[( 2, 0, 2)]), |

286 | t!( |

287 | aleftmost360, |

288 | &["abcdefghi", "hz", "abcdefgh"], |

289 | "abcdefghz", |

290 | &[(2, 0, 8),] |

291 | ), |

292 | t!( |

293 | aleftmost370, |

294 | &["abcdefghi", "cde", "hz", "abcdefgh"], |

295 | "abcdefghz", |

296 | &[(3, 0, 8),] |

297 | ), |

298 | t!( |

299 | aleftmost380, |

300 | &["abcdefghi", "hz", "abcdefgh", "a"], |

301 | "abcdefghz", |

302 | &[(2, 0, 8),] |

303 | ), |

304 | t!( |

305 | aleftmost390, |

306 | &["b", "abcdefghi", "hz", "abcdefgh"], |

307 | "abcdefghz", |

308 | &[(3, 0, 8),] |

309 | ), |

310 | t!( |

311 | aleftmost400, |

312 | &["h", "abcdefghi", "hz", "abcdefgh"], |

313 | "abcdefghz", |

314 | &[(3, 0, 8),] |

315 | ), |

316 | t!( |

317 | aleftmost410, |

318 | &["z", "abcdefghi", "hz", "abcdefgh"], |

319 | "abcdefghzyz", |

320 | &[(3, 0, 8), (0, 8, 9)] |

321 | ), |

322 | ]; |

323 | |

324 | /// Tests for non-overlapping leftmost-first match semantics. These tests |

325 | /// should generally be specific to leftmost-first, which means they should |

326 | /// generally fail under leftmost-longest semantics. |

327 | const LEFTMOST_FIRST: &'static [SearchTest] = &[ |

328 | t!(leftfirst000, &["ab", "abcd"], "abcd", &[( 0, 0, 2)]), |

329 | t!(leftfirst010, &["", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

330 | t!(leftfirst011, &["", "a", ""], "a", &[( 0, 0, 0), (0, 1, 1),]), |

331 | t!(leftfirst012, &["a", "", ""], "a", &[( 0, 0, 1)]), |

332 | t!(leftfirst013, &["", "", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

333 | t!(leftfirst014, &["a", ""], "a", &[( 0, 0, 1)]), |

334 | t!(leftfirst015, &["a", ""], "ab", &[( 0, 0, 1), (1, 2, 2)]), |

335 | t!(leftfirst020, &["abcd", "ab"], "abcd", &[( 0, 0, 4)]), |

336 | t!(leftfirst030, &["ab", "ab"], "abcd", &[( 0, 0, 2)]), |

337 | t!(leftfirst040, &["a", "ab"], "xayabbbz", &[( 0, 1, 2), (0, 3, 4)]), |

338 | t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[( 1, 1, 5)]), |

339 | t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[( 1, 1, 6)]), |

340 | t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[( 1, 1, 2)]), |

341 | t!( |

342 | leftfirst310, |

343 | &["abcd", "b", "bce", "ce"], |

344 | "abce", |

345 | &[(1, 1, 2), (3, 2, 4),] |

346 | ), |

347 | t!( |

348 | leftfirst320, |

349 | &["a", "abcdefghi", "hz", "abcdefgh"], |

350 | "abcdefghz", |

351 | &[(0, 0, 1), (2, 7, 9),] |

352 | ), |

353 | t!(leftfirst330, &["a", "abab"], "abab", &[( 0, 0, 1), (0, 2, 3)]), |

354 | t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[( 2, 1, 4)]), |

355 | ]; |

356 | |

357 | /// Like LEFTMOST_FIRST, but for anchored searches. |

358 | const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ |

359 | t!(aleftfirst000, &["ab", "abcd"], "abcd", &[( 0, 0, 2)]), |

360 | t!(aleftfirst010, &["", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

361 | t!(aleftfirst011, &["", "a", ""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

362 | t!(aleftfirst012, &["a", "", ""], "a", &[( 0, 0, 1)]), |

363 | t!(aleftfirst013, &["", "", "a"], "a", &[( 0, 0, 0), (0, 1, 1)]), |

364 | t!(aleftfirst020, &["abcd", "ab"], "abcd", &[( 0, 0, 4)]), |

365 | t!(aleftfirst030, &["ab", "ab"], "abcd", &[( 0, 0, 2)]), |

366 | t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]), |

367 | t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), |

368 | t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), |

369 | t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]), |

370 | t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]), |

371 | t!( |

372 | aleftfirst320, |

373 | &["a", "abcdefghi", "hz", "abcdefgh"], |

374 | "abcdefghz", |

375 | &[(0, 0, 1)] |

376 | ), |

377 | t!(aleftfirst330, &["a", "abab"], "abab", &[( 0, 0, 1)]), |

378 | t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[( 2, 0, 3)]), |

379 | ]; |

380 | |

381 | /// Tests for non-overlapping leftmost-longest match semantics. These tests |

382 | /// should generally be specific to leftmost-longest, which means they should |

383 | /// generally fail under leftmost-first semantics. |

384 | const LEFTMOST_LONGEST: &'static [SearchTest] = &[ |

385 | t!(leftlong000, &["ab", "abcd"], "abcd", &[( 1, 0, 4)]), |

386 | t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[( 0, 0, 4),]), |

387 | t!(leftlong020, &["", "a"], "a", &[( 1, 0, 1)]), |

388 | t!(leftlong021, &["", "a", ""], "a", &[( 1, 0, 1)]), |

389 | t!(leftlong022, &["a", "", ""], "a", &[( 0, 0, 1)]), |

390 | t!(leftlong023, &["", "", "a"], "a", &[( 2, 0, 1)]), |

391 | t!(leftlong024, &["", "a"], "ab", &[( 1, 0, 1), (0, 2, 2)]), |

392 | t!(leftlong030, &["", "a"], "aa", &[( 1, 0, 1), (1, 1, 2)]), |

393 | t!(leftlong040, &["a", "ab"], "a", &[( 0, 0, 1)]), |

394 | t!(leftlong050, &["a", "ab"], "ab", &[( 1, 0, 2)]), |

395 | t!(leftlong060, &["ab", "a"], "a", &[( 1, 0, 1)]), |

396 | t!(leftlong070, &["ab", "a"], "ab", &[( 0, 0, 2)]), |

397 | t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[( 2, 1, 6)]), |

398 | t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[( 1, 1, 6)]), |

399 | t!(leftlong300, &["abcd", "b", "bce"], "abce", &[( 2, 1, 4)]), |

400 | t!( |

401 | leftlong310, |

402 | &["a", "abcdefghi", "hz", "abcdefgh"], |

403 | "abcdefghz", |

404 | &[(3, 0, 8),] |

405 | ), |

406 | t!(leftlong320, &["a", "abab"], "abab", &[( 1, 0, 4)]), |

407 | t!(leftlong330, &["abcd", "b", "ce"], "abce", &[( 1, 1, 2), (2, 2, 4),]), |

408 | t!(leftlong340, &["a", "ab"], "xayabbbz", &[( 0, 1, 2), (1, 3, 5)]), |

409 | ]; |

410 | |

411 | /// Like LEFTMOST_LONGEST, but for anchored searches. |

412 | const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[ |

413 | t!(aleftlong000, &["ab", "abcd"], "abcd", &[( 1, 0, 4)]), |

414 | t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[( 0, 0, 4),]), |

415 | t!(aleftlong020, &["", "a"], "a", &[( 1, 0, 1)]), |

416 | t!(aleftlong021, &["", "a", ""], "a", &[( 1, 0, 1)]), |

417 | t!(aleftlong022, &["a", "", ""], "a", &[( 0, 0, 1)]), |

418 | t!(aleftlong023, &["", "", "a"], "a", &[( 2, 0, 1)]), |

419 | t!(aleftlong030, &["", "a"], "aa", &[( 1, 0, 1), (1, 1, 2)]), |

420 | t!(aleftlong040, &["a", "ab"], "a", &[( 0, 0, 1)]), |

421 | t!(aleftlong050, &["a", "ab"], "ab", &[( 1, 0, 2)]), |

422 | t!(aleftlong060, &["ab", "a"], "a", &[( 1, 0, 1)]), |

423 | t!(aleftlong070, &["ab", "a"], "ab", &[( 0, 0, 2)]), |

424 | t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]), |

425 | t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]), |

426 | t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]), |

427 | t!( |

428 | aleftlong310, |

429 | &["a", "abcdefghi", "hz", "abcdefgh"], |

430 | "abcdefghz", |

431 | &[(3, 0, 8),] |

432 | ), |

433 | t!(aleftlong320, &["a", "abab"], "abab", &[( 1, 0, 4)]), |

434 | t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]), |

435 | t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]), |

436 | ]; |

437 | |

438 | /// Tests for non-overlapping match semantics. |

439 | /// |

440 | /// Generally these tests shouldn't pass when using overlapping semantics. |

441 | /// These should pass for both standard and leftmost match semantics. |

442 | const NON_OVERLAPPING: &'static [SearchTest] = &[ |

443 | t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[( 0, 0, 4),]), |

444 | t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[( 2, 0, 4),]), |

445 | t!(nover030, &["abc", "bc"], "zazabcz", &[( 0, 3, 6),]), |

446 | t!( |

447 | nover100, |

448 | &["ab", "ba"], |

449 | "abababa", |

450 | &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] |

451 | ), |

452 | t!(nover200, &["foo", "foo"], "foobarfoo", &[( 0, 0, 3), (0, 6, 9),]), |

453 | t!(nover300, &["", ""], "", &[( 0, 0, 0),]), |

454 | t!(nover310, &["", ""], "a", &[( 0, 0, 0), (0, 1, 1),]), |

455 | ]; |

456 | |

457 | /// Like NON_OVERLAPPING, but for anchored searches. |

458 | const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[ |

459 | t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[( 0, 0, 4),]), |

460 | t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[( 2, 0, 4),]), |

461 | t!(anover030, &["abc", "bc"], "zazabcz", &[]), |

462 | t!( |

463 | anover100, |

464 | &["ab", "ba"], |

465 | "abababa", |

466 | &[(0, 0, 2), (0, 2, 4), (0, 4, 6)] |

467 | ), |

468 | t!(anover200, &["foo", "foo"], "foobarfoo", &[( 0, 0, 3)]), |

469 | t!(anover300, &["", ""], "", &[( 0, 0, 0)]), |

470 | t!(anover310, &["", ""], "a", &[( 0, 0, 0), (0, 1, 1)]), |

471 | ]; |

472 | |

473 | /// Tests for overlapping match semantics. |

474 | /// |

475 | /// This only supports standard match semantics, since leftmost-{first,longest} |

476 | /// do not support overlapping matches. |

477 | const OVERLAPPING: &'static [SearchTest] = &[ |

478 | t!( |

479 | over000, |

480 | &["abcd", "bcd", "cd", "b"], |

481 | "abcd", |

482 | &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),] |

483 | ), |

484 | t!( |

485 | over010, |

486 | &["bcd", "cd", "b", "abcd"], |

487 | "abcd", |

488 | &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),] |

489 | ), |

490 | t!( |

491 | over020, |

492 | &["abcd", "bcd", "cd"], |

493 | "abcd", |

494 | &[(0, 0, 4), (1, 1, 4), (2, 2, 4),] |

495 | ), |

496 | t!( |

497 | over030, |

498 | &["bcd", "abcd", "cd"], |

499 | "abcd", |

500 | &[(1, 0, 4), (0, 1, 4), (2, 2, 4),] |

501 | ), |

502 | t!( |

503 | over040, |

504 | &["bcd", "cd", "abcd"], |

505 | "abcd", |

506 | &[(2, 0, 4), (0, 1, 4), (1, 2, 4),] |

507 | ), |

508 | t!(over050, &["abc", "bc"], "zazabcz", &[( 0, 3, 6), (1, 4, 6),]), |

509 | t!( |

510 | over100, |

511 | &["ab", "ba"], |

512 | "abababa", |

513 | &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),] |

514 | ), |

515 | t!( |

516 | over200, |

517 | &["foo", "foo"], |

518 | "foobarfoo", |

519 | &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),] |

520 | ), |

521 | t!(over300, &["", ""], "", &[( 0, 0, 0), (1, 0, 0),]), |

522 | t!( |

523 | over310, |

524 | &["", ""], |

525 | "a", |

526 | &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),] |

527 | ), |

528 | t!(over320, &["", "a"], "a", &[( 0, 0, 0), (1, 0, 1), (0, 1, 1),]), |

529 | t!( |

530 | over330, |

531 | &["", "a", ""], |

532 | "a", |

533 | &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),] |

534 | ), |

535 | t!( |

536 | over340, |

537 | &["a", "", ""], |

538 | "a", |

539 | &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),] |

540 | ), |

541 | t!( |

542 | over350, |

543 | &["", "", "a"], |

544 | "a", |

545 | &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),] |

546 | ), |

547 | t!( |

548 | over360, |

549 | &["foo", "foofoo"], |

550 | "foofoo", |

551 | &[(0, 0, 3), (1, 0, 6), (0, 3, 6)] |

552 | ), |

553 | ]; |

554 | |

555 | /* |

556 | Iterators of anchored overlapping searches were removed from the API in |

557 | after 0.7, but we leave the tests commented out for posterity. |

558 | /// Like OVERLAPPING, but for anchored searches. |

559 | const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ |

560 | t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), |

561 | t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]), |

562 | t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]), |

563 | t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]), |

564 | t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]), |

565 | t!(aover050, &["abc", "bc"], "zazabcz", &[]), |

566 | t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), |

567 | t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]), |

568 | t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), |

569 | t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]), |

570 | t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]), |

571 | t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]), |

572 | t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]), |

573 | t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]), |

574 | t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]), |

575 | ]; |

576 | */ |

577 | |

578 | /// Tests for ASCII case insensitivity. |

579 | /// |

580 | /// These tests should all have the same behavior regardless of match semantics |

581 | /// or whether the search is overlapping. |

582 | const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[ |

583 | t!(acasei000, &["a"], "A", &[( 0, 0, 1)]), |

584 | t!(acasei010, &["Samwise"], "SAMWISE", &[( 0, 0, 7)]), |

585 | t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[( 0, 0, 7)]), |

586 | t!(acasei020, &["fOoBaR"], "quux foobar baz", &[( 0, 5, 11)]), |

587 | ]; |

588 | |

589 | /// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests. |

590 | const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[ |

591 | t!(acasei000, &["foo", "FOO"], "fOo", &[( 0, 0, 3)]), |

592 | t!(acasei000, &["FOO", "foo"], "fOo", &[( 0, 0, 3)]), |

593 | t!(acasei010, &["abc", "def"], "abcdef", &[( 0, 0, 3), (1, 3, 6)]), |

594 | ]; |

595 | |

596 | /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests. |

597 | const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[ |

598 | t!(acasei000, &["foo", "FOO"], "fOo", &[( 0, 0, 3), (1, 0, 3)]), |

599 | t!(acasei001, &["FOO", "foo"], "fOo", &[( 0, 0, 3), (1, 0, 3)]), |

600 | // This is a regression test from: |

601 | // https://github.com/BurntSushi/aho-corasick/issues/68 |

602 | // Previously, it was reporting a duplicate (1, 3, 6) match. |

603 | t!( |

604 | acasei010, |

605 | &["abc", "def", "abcdef"], |

606 | "abcdef", |

607 | &[(0, 0, 3), (2, 0, 6), (1, 3, 6)] |

608 | ), |

609 | ]; |

610 | |

611 | /// Regression tests that are applied to all Aho-Corasick combinations. |

612 | /// |

613 | /// If regression tests are needed for specific match semantics, then add them |

614 | /// to the appropriate group above. |

615 | const REGRESSION: &'static [SearchTest] = &[ |

616 | t!(regression010, &["inf", "ind"], "infind", &[( 0, 0, 3), (1, 3, 6),]), |

617 | t!(regression020, &["ind", "inf"], "infind", &[( 1, 0, 3), (0, 3, 6),]), |

618 | t!( |

619 | regression030, |

620 | &["libcore/", "libstd/"], |

621 | "libcore/char/methods.rs", |

622 | &[(0, 0, 8),] |

623 | ), |

624 | t!( |

625 | regression040, |

626 | &["libstd/", "libcore/"], |

627 | "libcore/char/methods.rs", |

628 | &[(1, 0, 8),] |

629 | ), |

630 | t!( |

631 | regression050, |

632 | &[" \x00\x00\x01", " \x00\x00\x00"], |

633 | " \x00\x00\x00", |

634 | &[(1, 0, 3),] |

635 | ), |

636 | t!( |

637 | regression060, |

638 | &[" \x00\x00\x00", " \x00\x00\x01"], |

639 | " \x00\x00\x00", |

640 | &[(0, 0, 3),] |

641 | ), |

642 | ]; |

643 | |

644 | // Now define a test for each combination of things above that we want to run. |

645 | // Since there are a few different combinations for each collection of tests, |

646 | // we define a couple of macros to avoid repetition drudgery. The testconfig |

647 | // macro constructs the automaton from a given match kind, and runs the search |

648 | // tests one-by-one over the given collection. The `with` parameter allows one |

649 | // to configure the builder with additional parameters. The testcombo macro |

650 | // invokes testconfig in precisely this way: it sets up several tests where |

651 | // each one turns a different knob on AhoCorasickBuilder. |

652 | |

653 | macro_rules! testconfig { |

654 | (anchored, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |

655 | #[test] |

656 | fn $name() { |

657 | run_search_tests($collection, |test| { |

658 | let mut builder = AhoCorasick::builder(); |

659 | $with(&mut builder); |

660 | let input = Input::new(test.haystack).anchored(Anchored::Yes); |

661 | builder |

662 | .match_kind(MatchKind::$kind) |

663 | .build(test.patterns) |

664 | .unwrap() |

665 | .try_find_iter(input) |

666 | .unwrap() |

667 | .collect() |

668 | }); |

669 | } |

670 | }; |

671 | (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |

672 | #[test] |

673 | fn $name() { |

674 | run_search_tests($collection, |test| { |

675 | let mut builder = AhoCorasick::builder(); |

676 | $with(&mut builder); |

677 | builder |

678 | .match_kind(MatchKind::$kind) |

679 | .build(test.patterns) |

680 | .unwrap() |

681 | .find_overlapping_iter(test.haystack) |

682 | .collect() |

683 | }); |

684 | } |

685 | }; |

686 | (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |

687 | #[test] |

688 | fn $name() { |

689 | run_stream_search_tests($collection, |test| { |

690 | let buf = std::io::BufReader::with_capacity( |

691 | 1, |

692 | test.haystack.as_bytes(), |

693 | ); |

694 | let mut builder = AhoCorasick::builder(); |

695 | $with(&mut builder); |

696 | builder |

697 | .match_kind(MatchKind::$kind) |

698 | .build(test.patterns) |

699 | .unwrap() |

700 | .stream_find_iter(buf) |

701 | .map(|result| result.unwrap()) |

702 | .collect() |

703 | }); |

704 | } |

705 | }; |

706 | ($name:ident, $collection:expr, $kind:ident, $with:expr) => { |

707 | #[test] |

708 | fn $name() { |

709 | run_search_tests($collection, |test| { |

710 | let mut builder = AhoCorasick::builder(); |

711 | $with(&mut builder); |

712 | builder |

713 | .match_kind(MatchKind::$kind) |

714 | .build(test.patterns) |

715 | .unwrap() |

716 | .find_iter(test.haystack) |

717 | .collect() |

718 | }); |

719 | } |

720 | }; |

721 | } |

722 | |

723 | macro_rules! testcombo { |

724 | ($name:ident, $collection:expr, $kind:ident) => { |

725 | mod $name { |

726 | use super::*; |

727 | |

728 | testconfig!(default, $collection, $kind, |_| ()); |

729 | testconfig!( |

730 | nfa_default, |

731 | $collection, |

732 | $kind, |

733 | |b: &mut AhoCorasickBuilder| { |

734 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

735 | } |

736 | ); |

737 | testconfig!( |

738 | nfa_noncontig_no_prefilter, |

739 | $collection, |

740 | $kind, |

741 | |b: &mut AhoCorasickBuilder| { |

742 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

743 | .prefilter(false); |

744 | } |

745 | ); |

746 | testconfig!( |

747 | nfa_noncontig_all_sparse, |

748 | $collection, |

749 | $kind, |

750 | |b: &mut AhoCorasickBuilder| { |

751 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

752 | .dense_depth(0); |

753 | } |

754 | ); |

755 | testconfig!( |

756 | nfa_noncontig_all_dense, |

757 | $collection, |

758 | $kind, |

759 | |b: &mut AhoCorasickBuilder| { |

760 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

761 | .dense_depth(usize::MAX); |

762 | } |

763 | ); |

764 | testconfig!( |

765 | nfa_contig_default, |

766 | $collection, |

767 | $kind, |

768 | |b: &mut AhoCorasickBuilder| { |

769 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |

770 | } |

771 | ); |

772 | testconfig!( |

773 | nfa_contig_no_prefilter, |

774 | $collection, |

775 | $kind, |

776 | |b: &mut AhoCorasickBuilder| { |

777 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

778 | .prefilter(false); |

779 | } |

780 | ); |

781 | testconfig!( |

782 | nfa_contig_all_sparse, |

783 | $collection, |

784 | $kind, |

785 | |b: &mut AhoCorasickBuilder| { |

786 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

787 | .dense_depth(0); |

788 | } |

789 | ); |

790 | testconfig!( |

791 | nfa_contig_all_dense, |

792 | $collection, |

793 | $kind, |

794 | |b: &mut AhoCorasickBuilder| { |

795 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

796 | .dense_depth(usize::MAX); |

797 | } |

798 | ); |

799 | testconfig!( |

800 | nfa_contig_no_byte_class, |

801 | $collection, |

802 | $kind, |

803 | |b: &mut AhoCorasickBuilder| { |

804 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

805 | .byte_classes(false); |

806 | } |

807 | ); |

808 | testconfig!( |

809 | dfa_default, |

810 | $collection, |

811 | $kind, |

812 | |b: &mut AhoCorasickBuilder| { |

813 | b.kind(Some(AhoCorasickKind::DFA)); |

814 | } |

815 | ); |

816 | testconfig!( |

817 | dfa_start_both, |

818 | $collection, |

819 | $kind, |

820 | |b: &mut AhoCorasickBuilder| { |

821 | b.kind(Some(AhoCorasickKind::DFA)) |

822 | .start_kind(StartKind::Both); |

823 | } |

824 | ); |

825 | testconfig!( |

826 | dfa_no_prefilter, |

827 | $collection, |

828 | $kind, |

829 | |b: &mut AhoCorasickBuilder| { |

830 | b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); |

831 | } |

832 | ); |

833 | testconfig!( |

834 | dfa_start_both_no_prefilter, |

835 | $collection, |

836 | $kind, |

837 | |b: &mut AhoCorasickBuilder| { |

838 | b.kind(Some(AhoCorasickKind::DFA)) |

839 | .start_kind(StartKind::Both) |

840 | .prefilter(false); |

841 | } |

842 | ); |

843 | testconfig!( |

844 | dfa_no_byte_class, |

845 | $collection, |

846 | $kind, |

847 | |b: &mut AhoCorasickBuilder| { |

848 | b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); |

849 | } |

850 | ); |

851 | testconfig!( |

852 | dfa_start_both_no_byte_class, |

853 | $collection, |

854 | $kind, |

855 | |b: &mut AhoCorasickBuilder| { |

856 | b.kind(Some(AhoCorasickKind::DFA)) |

857 | .start_kind(StartKind::Both) |

858 | .byte_classes(false); |

859 | } |

860 | ); |

861 | } |

862 | }; |

863 | } |

864 | |

865 | // Write out the various combinations of match semantics given the variety of |

866 | // configurations tested by 'testcombo!'. |

867 | testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); |

868 | testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); |

869 | testcombo!( |

870 | search_standard_nonoverlapping, |

871 | AC_STANDARD_NON_OVERLAPPING, |

872 | Standard |

873 | ); |

874 | |

875 | // Write out the overlapping combo by hand since there is only one of them. |

876 | testconfig!( |

877 | overlapping, |

878 | search_standard_overlapping_default, |

879 | AC_STANDARD_OVERLAPPING, |

880 | Standard, |

881 | |_| () |

882 | ); |

883 | testconfig!( |

884 | overlapping, |

885 | search_standard_overlapping_nfa_noncontig_default, |

886 | AC_STANDARD_OVERLAPPING, |

887 | Standard, |

888 | |b: &mut AhoCorasickBuilder| { |

889 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

890 | } |

891 | ); |

892 | testconfig!( |

893 | overlapping, |

894 | search_standard_overlapping_nfa_noncontig_no_prefilter, |

895 | AC_STANDARD_OVERLAPPING, |

896 | Standard, |

897 | |b: &mut AhoCorasickBuilder| { |

898 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)).prefilter(false); |

899 | } |

900 | ); |

901 | testconfig!( |

902 | overlapping, |

903 | search_standard_overlapping_nfa_contig_default, |

904 | AC_STANDARD_OVERLAPPING, |

905 | Standard, |

906 | |b: &mut AhoCorasickBuilder| { |

907 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |

908 | } |

909 | ); |

910 | testconfig!( |

911 | overlapping, |

912 | search_standard_overlapping_nfa_contig_no_prefilter, |

913 | AC_STANDARD_OVERLAPPING, |

914 | Standard, |

915 | |b: &mut AhoCorasickBuilder| { |

916 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).prefilter(false); |

917 | } |

918 | ); |

919 | testconfig!( |

920 | overlapping, |

921 | search_standard_overlapping_nfa_contig_all_sparse, |

922 | AC_STANDARD_OVERLAPPING, |

923 | Standard, |

924 | |b: &mut AhoCorasickBuilder| { |

925 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(0); |

926 | } |

927 | ); |

928 | testconfig!( |

929 | overlapping, |

930 | search_standard_overlapping_nfa_contig_all_dense, |

931 | AC_STANDARD_OVERLAPPING, |

932 | Standard, |

933 | |b: &mut AhoCorasickBuilder| { |

934 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(usize::MAX); |

935 | } |

936 | ); |

937 | testconfig!( |

938 | overlapping, |

939 | search_standard_overlapping_dfa_default, |

940 | AC_STANDARD_OVERLAPPING, |

941 | Standard, |

942 | |b: &mut AhoCorasickBuilder| { |

943 | b.kind(Some(AhoCorasickKind::DFA)); |

944 | } |

945 | ); |

946 | testconfig!( |

947 | overlapping, |

948 | search_standard_overlapping_dfa_start_both, |

949 | AC_STANDARD_OVERLAPPING, |

950 | Standard, |

951 | |b: &mut AhoCorasickBuilder| { |

952 | b.kind(Some(AhoCorasickKind::DFA)).start_kind(StartKind::Both); |

953 | } |

954 | ); |

955 | testconfig!( |

956 | overlapping, |

957 | search_standard_overlapping_dfa_no_prefilter, |

958 | AC_STANDARD_OVERLAPPING, |

959 | Standard, |

960 | |b: &mut AhoCorasickBuilder| { |

961 | b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); |

962 | } |

963 | ); |

964 | testconfig!( |

965 | overlapping, |

966 | search_standard_overlapping_dfa_start_both_no_prefilter, |

967 | AC_STANDARD_OVERLAPPING, |

968 | Standard, |

969 | |b: &mut AhoCorasickBuilder| { |

970 | b.kind(Some(AhoCorasickKind::DFA)) |

971 | .start_kind(StartKind::Both) |

972 | .prefilter(false); |

973 | } |

974 | ); |

975 | testconfig!( |

976 | overlapping, |

977 | search_standard_overlapping_dfa_no_byte_class, |

978 | AC_STANDARD_OVERLAPPING, |

979 | Standard, |

980 | |b: &mut AhoCorasickBuilder| { |

981 | b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); |

982 | } |

983 | ); |

984 | testconfig!( |

985 | overlapping, |

986 | search_standard_overlapping_dfa_start_both_no_byte_class, |

987 | AC_STANDARD_OVERLAPPING, |

988 | Standard, |

989 | |b: &mut AhoCorasickBuilder| { |

990 | b.kind(Some(AhoCorasickKind::DFA)) |

991 | .start_kind(StartKind::Both) |

992 | .byte_classes(false); |

993 | } |

994 | ); |

995 | |

996 | // Also write out tests manually for streams, since we only test the standard |

997 | // match semantics. We also don't bother testing different automaton |

998 | // configurations, since those are well covered by tests above. |

999 | #[cfg(feature = "std")] |

1000 | testconfig!( |

1001 | stream, |

1002 | search_standard_stream_default, |

1003 | AC_STANDARD_NON_OVERLAPPING, |

1004 | Standard, |

1005 | |_| () |

1006 | ); |

1007 | #[cfg(feature = "std")] |

1008 | testconfig!( |

1009 | stream, |

1010 | search_standard_stream_nfa_noncontig_default, |

1011 | AC_STANDARD_NON_OVERLAPPING, |

1012 | Standard, |

1013 | |b: &mut AhoCorasickBuilder| { |

1014 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

1015 | } |

1016 | ); |

1017 | #[cfg(feature = "std")] |

1018 | testconfig!( |

1019 | stream, |

1020 | search_standard_stream_nfa_contig_default, |

1021 | AC_STANDARD_NON_OVERLAPPING, |

1022 | Standard, |

1023 | |b: &mut AhoCorasickBuilder| { |

1024 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |

1025 | } |

1026 | ); |

1027 | #[cfg(feature = "std")] |

1028 | testconfig!( |

1029 | stream, |

1030 | search_standard_stream_dfa_default, |

1031 | AC_STANDARD_NON_OVERLAPPING, |

1032 | Standard, |

1033 | |b: &mut AhoCorasickBuilder| { |

1034 | b.kind(Some(AhoCorasickKind::DFA)); |

1035 | } |

1036 | ); |

1037 | |

1038 | // Same thing for anchored searches. Write them out manually. |

1039 | testconfig!( |

1040 | anchored, |

1041 | search_standard_anchored_default, |

1042 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |

1043 | Standard, |

1044 | |b: &mut AhoCorasickBuilder| { |

1045 | b.start_kind(StartKind::Anchored); |

1046 | } |

1047 | ); |

1048 | testconfig!( |

1049 | anchored, |

1050 | search_standard_anchored_nfa_noncontig_default, |

1051 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |

1052 | Standard, |

1053 | |b: &mut AhoCorasickBuilder| { |

1054 | b.start_kind(StartKind::Anchored) |

1055 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

1056 | } |

1057 | ); |

1058 | testconfig!( |

1059 | anchored, |

1060 | search_standard_anchored_nfa_contig_default, |

1061 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |

1062 | Standard, |

1063 | |b: &mut AhoCorasickBuilder| { |

1064 | b.start_kind(StartKind::Anchored) |

1065 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |

1066 | } |

1067 | ); |

1068 | testconfig!( |

1069 | anchored, |

1070 | search_standard_anchored_dfa_default, |

1071 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |

1072 | Standard, |

1073 | |b: &mut AhoCorasickBuilder| { |

1074 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |

1075 | } |

1076 | ); |

1077 | testconfig!( |

1078 | anchored, |

1079 | search_standard_anchored_dfa_start_both, |

1080 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |

1081 | Standard, |

1082 | |b: &mut AhoCorasickBuilder| { |

1083 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |

1084 | } |

1085 | ); |

1086 | testconfig!( |

1087 | anchored, |

1088 | search_leftmost_first_anchored_default, |

1089 | AC_LEFTMOST_FIRST_ANCHORED, |

1090 | LeftmostFirst, |

1091 | |b: &mut AhoCorasickBuilder| { |

1092 | b.start_kind(StartKind::Anchored); |

1093 | } |

1094 | ); |

1095 | testconfig!( |

1096 | anchored, |

1097 | search_leftmost_first_anchored_nfa_noncontig_default, |

1098 | AC_LEFTMOST_FIRST_ANCHORED, |

1099 | LeftmostFirst, |

1100 | |b: &mut AhoCorasickBuilder| { |

1101 | b.start_kind(StartKind::Anchored) |

1102 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

1103 | } |

1104 | ); |

1105 | testconfig!( |

1106 | anchored, |

1107 | search_leftmost_first_anchored_nfa_contig_default, |

1108 | AC_LEFTMOST_FIRST_ANCHORED, |

1109 | LeftmostFirst, |

1110 | |b: &mut AhoCorasickBuilder| { |

1111 | b.start_kind(StartKind::Anchored) |

1112 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |

1113 | } |

1114 | ); |

1115 | testconfig!( |

1116 | anchored, |

1117 | search_leftmost_first_anchored_dfa_default, |

1118 | AC_LEFTMOST_FIRST_ANCHORED, |

1119 | LeftmostFirst, |

1120 | |b: &mut AhoCorasickBuilder| { |

1121 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |

1122 | } |

1123 | ); |

1124 | testconfig!( |

1125 | anchored, |

1126 | search_leftmost_first_anchored_dfa_start_both, |

1127 | AC_LEFTMOST_FIRST_ANCHORED, |

1128 | LeftmostFirst, |

1129 | |b: &mut AhoCorasickBuilder| { |

1130 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |

1131 | } |

1132 | ); |

1133 | testconfig!( |

1134 | anchored, |

1135 | search_leftmost_longest_anchored_default, |

1136 | AC_LEFTMOST_LONGEST_ANCHORED, |

1137 | LeftmostLongest, |

1138 | |b: &mut AhoCorasickBuilder| { |

1139 | b.start_kind(StartKind::Anchored); |

1140 | } |

1141 | ); |

1142 | testconfig!( |

1143 | anchored, |

1144 | search_leftmost_longest_anchored_nfa_noncontig_default, |

1145 | AC_LEFTMOST_LONGEST_ANCHORED, |

1146 | LeftmostLongest, |

1147 | |b: &mut AhoCorasickBuilder| { |

1148 | b.start_kind(StartKind::Anchored) |

1149 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |

1150 | } |

1151 | ); |

1152 | testconfig!( |

1153 | anchored, |

1154 | search_leftmost_longest_anchored_nfa_contig_default, |

1155 | AC_LEFTMOST_LONGEST_ANCHORED, |

1156 | LeftmostLongest, |

1157 | |b: &mut AhoCorasickBuilder| { |

1158 | b.start_kind(StartKind::Anchored) |

1159 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |

1160 | } |

1161 | ); |

1162 | testconfig!( |

1163 | anchored, |

1164 | search_leftmost_longest_anchored_dfa_default, |

1165 | AC_LEFTMOST_LONGEST_ANCHORED, |

1166 | LeftmostLongest, |

1167 | |b: &mut AhoCorasickBuilder| { |

1168 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |

1169 | } |

1170 | ); |

1171 | testconfig!( |

1172 | anchored, |

1173 | search_leftmost_longest_anchored_dfa_start_both, |

1174 | AC_LEFTMOST_LONGEST_ANCHORED, |

1175 | LeftmostLongest, |

1176 | |b: &mut AhoCorasickBuilder| { |

1177 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |

1178 | } |

1179 | ); |

1180 | |

1181 | // And also write out the test combinations for ASCII case insensitivity. |

1182 | testconfig!( |

1183 | acasei_standard_default, |

1184 | &[ASCII_CASE_INSENSITIVE], |

1185 | Standard, |

1186 | |b: &mut AhoCorasickBuilder| { |

1187 | b.prefilter(false).ascii_case_insensitive(true); |

1188 | } |

1189 | ); |

1190 | testconfig!( |

1191 | acasei_standard_nfa_noncontig_default, |

1192 | &[ASCII_CASE_INSENSITIVE], |

1193 | Standard, |

1194 | |b: &mut AhoCorasickBuilder| { |

1195 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1196 | .prefilter(false) |

1197 | .ascii_case_insensitive(true); |

1198 | } |

1199 | ); |

1200 | testconfig!( |

1201 | acasei_standard_nfa_contig_default, |

1202 | &[ASCII_CASE_INSENSITIVE], |

1203 | Standard, |

1204 | |b: &mut AhoCorasickBuilder| { |

1205 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

1206 | .prefilter(false) |

1207 | .ascii_case_insensitive(true); |

1208 | } |

1209 | ); |

1210 | testconfig!( |

1211 | acasei_standard_dfa_default, |

1212 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1213 | Standard, |

1214 | |b: &mut AhoCorasickBuilder| { |

1215 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |

1216 | } |

1217 | ); |

1218 | testconfig!( |

1219 | overlapping, |

1220 | acasei_standard_overlapping_default, |

1221 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |

1222 | Standard, |

1223 | |b: &mut AhoCorasickBuilder| { |

1224 | b.ascii_case_insensitive(true); |

1225 | } |

1226 | ); |

1227 | testconfig!( |

1228 | overlapping, |

1229 | acasei_standard_overlapping_nfa_noncontig_default, |

1230 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |

1231 | Standard, |

1232 | |b: &mut AhoCorasickBuilder| { |

1233 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1234 | .ascii_case_insensitive(true); |

1235 | } |

1236 | ); |

1237 | testconfig!( |

1238 | overlapping, |

1239 | acasei_standard_overlapping_nfa_contig_default, |

1240 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |

1241 | Standard, |

1242 | |b: &mut AhoCorasickBuilder| { |

1243 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

1244 | .ascii_case_insensitive(true); |

1245 | } |

1246 | ); |

1247 | testconfig!( |

1248 | overlapping, |

1249 | acasei_standard_overlapping_dfa_default, |

1250 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |

1251 | Standard, |

1252 | |b: &mut AhoCorasickBuilder| { |

1253 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |

1254 | } |

1255 | ); |

1256 | testconfig!( |

1257 | acasei_leftmost_first_default, |

1258 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1259 | LeftmostFirst, |

1260 | |b: &mut AhoCorasickBuilder| { |

1261 | b.ascii_case_insensitive(true); |

1262 | } |

1263 | ); |

1264 | testconfig!( |

1265 | acasei_leftmost_first_nfa_noncontig_default, |

1266 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1267 | LeftmostFirst, |

1268 | |b: &mut AhoCorasickBuilder| { |

1269 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1270 | .ascii_case_insensitive(true); |

1271 | } |

1272 | ); |

1273 | testconfig!( |

1274 | acasei_leftmost_first_nfa_contig_default, |

1275 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1276 | LeftmostFirst, |

1277 | |b: &mut AhoCorasickBuilder| { |

1278 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

1279 | .ascii_case_insensitive(true); |

1280 | } |

1281 | ); |

1282 | testconfig!( |

1283 | acasei_leftmost_first_dfa_default, |

1284 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1285 | LeftmostFirst, |

1286 | |b: &mut AhoCorasickBuilder| { |

1287 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |

1288 | } |

1289 | ); |

1290 | testconfig!( |

1291 | acasei_leftmost_longest_default, |

1292 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1293 | LeftmostLongest, |

1294 | |b: &mut AhoCorasickBuilder| { |

1295 | b.ascii_case_insensitive(true); |

1296 | } |

1297 | ); |

1298 | testconfig!( |

1299 | acasei_leftmost_longest_nfa_noncontig_default, |

1300 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1301 | LeftmostLongest, |

1302 | |b: &mut AhoCorasickBuilder| { |

1303 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1304 | .ascii_case_insensitive(true); |

1305 | } |

1306 | ); |

1307 | testconfig!( |

1308 | acasei_leftmost_longest_nfa_contig_default, |

1309 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1310 | LeftmostLongest, |

1311 | |b: &mut AhoCorasickBuilder| { |

1312 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |

1313 | .ascii_case_insensitive(true); |

1314 | } |

1315 | ); |

1316 | testconfig!( |

1317 | acasei_leftmost_longest_dfa_default, |

1318 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |

1319 | LeftmostLongest, |

1320 | |b: &mut AhoCorasickBuilder| { |

1321 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |

1322 | } |

1323 | ); |

1324 | |

1325 | fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( |

1326 | which: TestCollection, |

1327 | mut f: F, |

1328 | ) { |

1329 | let get_match_triples = |

1330 | |matches: Vec<Match>| -> Vec<(usize, usize, usize)> { |

1331 | matches |

1332 | .into_iter() |

1333 | .map(|m| (m.pattern().as_usize(), m.start(), m.end())) |

1334 | .collect() |

1335 | }; |

1336 | for &tests in which { |

1337 | for test in tests { |

1338 | assert_eq!( |

1339 | test.matches, |

1340 | get_match_triples(f(&test)).as_slice(), |

1341 | "test: {}, patterns: {:?}, haystack: {:?}", |

1342 | test.name, |

1343 | test.patterns, |

1344 | test.haystack |

1345 | ); |

1346 | } |

1347 | } |

1348 | } |

1349 | |

1350 | // Like 'run_search_tests', but we skip any tests that contain the empty |

1351 | // pattern because stream searching doesn't support it. |

1352 | #[cfg(feature = "std")] |

1353 | fn run_stream_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( |

1354 | which: TestCollection, |

1355 | mut f: F, |

1356 | ) { |

1357 | let get_match_triples = |

1358 | |matches: Vec<Match>| -> Vec<(usize, usize, usize)> { |

1359 | matches |

1360 | .into_iter() |

1361 | .map(|m| (m.pattern().as_usize(), m.start(), m.end())) |

1362 | .collect() |

1363 | }; |

1364 | for &tests in which { |

1365 | for test in tests { |

1366 | if test.patterns.iter().any(|p| p.is_empty()) { |

1367 | continue; |

1368 | } |

1369 | assert_eq!( |

1370 | test.matches, |

1371 | get_match_triples(f(&test)).as_slice(), |

1372 | "test: {}, patterns: {:?}, haystack: {:?}", |

1373 | test.name, |

1374 | test.patterns, |

1375 | test.haystack |

1376 | ); |

1377 | } |

1378 | } |

1379 | } |

1380 | |

1381 | #[test] |

1382 | fn search_tests_have_unique_names() { |

1383 | let assert = |constname, tests: &[SearchTest]| { |

1384 | let mut seen = HashMap::new(); // map from test name to position |

1385 | for (i, test) in tests.iter().enumerate() { |

1386 | if !seen.contains_key(test.name) { |

1387 | seen.insert(test.name, i); |

1388 | } else { |

1389 | let last = seen[test.name]; |

1390 | panic!( |

1391 | "{} tests have duplicate names at positions {} and {}", |

1392 | constname, last, i |

1393 | ); |

1394 | } |

1395 | } |

1396 | }; |

1397 | assert("BASICS", BASICS); |

1398 | assert("STANDARD", STANDARD); |

1399 | assert("LEFTMOST", LEFTMOST); |

1400 | assert("LEFTMOST_FIRST", LEFTMOST_FIRST); |

1401 | assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST); |

1402 | assert("NON_OVERLAPPING", NON_OVERLAPPING); |

1403 | assert("OVERLAPPING", OVERLAPPING); |

1404 | assert("REGRESSION", REGRESSION); |

1405 | } |

1406 | |

1407 | #[cfg(feature = "std")] |

1408 | #[test] |

1409 | #[should_panic] |

1410 | fn stream_not_allowed_leftmost_first() { |

1411 | let fsm = AhoCorasick::builder() |

1412 | .match_kind(MatchKind::LeftmostFirst) |

1413 | .build(None::<String>) |

1414 | .unwrap(); |

1415 | assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); |

1416 | } |

1417 | |

1418 | #[cfg(feature = "std")] |

1419 | #[test] |

1420 | #[should_panic] |

1421 | fn stream_not_allowed_leftmost_longest() { |

1422 | let fsm = AhoCorasick::builder() |

1423 | .match_kind(MatchKind::LeftmostLongest) |

1424 | .build(None::<String>) |

1425 | .unwrap(); |

1426 | assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0); |

1427 | } |

1428 | |

1429 | #[test] |

1430 | #[should_panic] |

1431 | fn overlapping_not_allowed_leftmost_first() { |

1432 | let fsm = AhoCorasick::builder() |

1433 | .match_kind(MatchKind::LeftmostFirst) |

1434 | .build(None::<String>) |

1435 | .unwrap(); |

1436 | assert_eq!(fsm.find_overlapping_iter("").count(), 0); |

1437 | } |

1438 | |

1439 | #[test] |

1440 | #[should_panic] |

1441 | fn overlapping_not_allowed_leftmost_longest() { |

1442 | let fsm = AhoCorasick::builder() |

1443 | .match_kind(MatchKind::LeftmostLongest) |

1444 | .build(None::<String>) |

1445 | .unwrap(); |

1446 | assert_eq!(fsm.find_overlapping_iter("").count(), 0); |

1447 | } |

1448 | |

1449 | // This tests that if we build an AC matcher with an "unanchored" start kind, |

1450 | // then we can't run an anchored search even if the underlying searcher |

1451 | // supports it. |

1452 | // |

1453 | // The key bit here is that both of the NFAs in this crate unconditionally |

1454 | // support both unanchored and anchored searches, but the DFA does not because |

1455 | // of the added cost of doing so. To avoid the top-level AC matcher sometimes |

1456 | // supporting anchored and sometimes not (depending on which searcher it |

1457 | // chooses to use internally), we ensure that the given 'StartKind' is always |

1458 | // respected. |

1459 | #[test] |

1460 | fn anchored_not_allowed_even_if_technically_available() { |

1461 | let ac = AhoCorasick::builder() |

1462 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1463 | .start_kind(StartKind::Unanchored) |

1464 | .build(&["foo"]) |

1465 | .unwrap(); |

1466 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); |

1467 | |

1468 | let ac = AhoCorasick::builder() |

1469 | .kind(Some(AhoCorasickKind::ContiguousNFA)) |

1470 | .start_kind(StartKind::Unanchored) |

1471 | .build(&["foo"]) |

1472 | .unwrap(); |

1473 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); |

1474 | |

1475 | // For completeness, check that the DFA returns an error too. |

1476 | let ac = AhoCorasick::builder() |

1477 | .kind(Some(AhoCorasickKind::DFA)) |

1478 | .start_kind(StartKind::Unanchored) |

1479 | .build(&["foo"]) |

1480 | .unwrap(); |

1481 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err()); |

1482 | } |

1483 | |

1484 | // This is like the test aboved, but with unanchored and anchored flipped. That |

1485 | // is, we asked for an AC searcher with anchored support and we check that |

1486 | // unanchored searches return an error even if the underlying searcher would |

1487 | // technically support it. |

1488 | #[test] |

1489 | fn unanchored_not_allowed_even_if_technically_available() { |

1490 | let ac = AhoCorasick::builder() |

1491 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)) |

1492 | .start_kind(StartKind::Anchored) |

1493 | .build(&["foo"]) |

1494 | .unwrap(); |

1495 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); |

1496 | |

1497 | let ac = AhoCorasick::builder() |

1498 | .kind(Some(AhoCorasickKind::ContiguousNFA)) |

1499 | .start_kind(StartKind::Anchored) |

1500 | .build(&["foo"]) |

1501 | .unwrap(); |

1502 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); |

1503 | |

1504 | // For completeness, check that the DFA returns an error too. |

1505 | let ac = AhoCorasick::builder() |

1506 | .kind(Some(AhoCorasickKind::DFA)) |

1507 | .start_kind(StartKind::Anchored) |

1508 | .build(&["foo"]) |

1509 | .unwrap(); |

1510 | assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err()); |

1511 | } |

1512 | |

1513 | // This tests that a prefilter does not cause a search to report a match |

1514 | // outside the bounds provided by the caller. |

1515 | // |

1516 | // This is a regression test for a bug I introduced during the rewrite of most |

1517 | // of the crate after 0.7. It was never released. The tricky part here is |

1518 | // ensuring we get a prefilter that can report matches on its own (such as the |

1519 | // packed searcher). Otherwise, prefilters that report false positives might |

1520 | // have searched past the bounds provided by the caller, but confirming the |

1521 | // match would subsequently fail. |

1522 | #[test] |

1523 | fn prefilter_stays_in_bounds() { |

1524 | let ac = AhoCorasick::builder() |

1525 | .match_kind(MatchKind::LeftmostFirst) |

1526 | .build(&["sam", "frodo", "pippin", "merry", "gandalf", "sauron"]) |

1527 | .unwrap(); |

1528 | let haystack = "foo gandalf"; |

1529 | assert_eq!(None, ac.find(Input::new(haystack).range(0..10))); |

1530 | } |

1531 | |

1532 | // See: https://github.com/BurntSushi/aho-corasick/issues/44 |

1533 | // |

1534 | // In short, this test ensures that enabling ASCII case insensitivity does not |

1535 | // visit an exponential number of states when filling in failure transitions. |

1536 | #[test] |

1537 | fn regression_ascii_case_insensitive_no_exponential() { |

1538 | let ac = AhoCorasick::builder() |

1539 | .ascii_case_insensitive(true) |

1540 | .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"]) |

1541 | .unwrap(); |

1542 | assert!(ac.find("").is_none()); |

1543 | } |

1544 | |

1545 | // See: https://github.com/BurntSushi/aho-corasick/issues/53 |

1546 | // |

1547 | // This test ensures that the rare byte prefilter works in a particular corner |

1548 | // case. In particular, the shift offset detected for '/' in the patterns below |

1549 | // was incorrect, leading to a false negative. |

1550 | #[test] |

1551 | fn regression_rare_byte_prefilter() { |

1552 | use crate::AhoCorasick; |

1553 | |

1554 | let ac = AhoCorasick::new(&["ab/j/", "x/"]).unwrap(); |

1555 | assert!(ac.is_match("ab/j/")); |

1556 | } |

1557 | |

1558 | #[test] |

1559 | fn regression_case_insensitive_prefilter() { |

1560 | for c in b'a'.. b'z'{ |

1561 | for c2 in b'a'.. b'z'{ |

1562 | let c = c as char; |

1563 | let c2 = c2 as char; |

1564 | let needle = format!("{}{}", c, c2).to_lowercase(); |

1565 | let haystack = needle.to_uppercase(); |

1566 | let ac = AhoCorasick::builder() |

1567 | .ascii_case_insensitive(true) |

1568 | .prefilter(true) |

1569 | .build(&[&needle]) |

1570 | .unwrap(); |

1571 | assert_eq!( |

1572 | 1, |

1573 | ac.find_iter(&haystack).count(), |

1574 | "failed to find {:?} in {:?} \n\nautomaton: \n{:?}", |

1575 | needle, |

1576 | haystack, |

1577 | ac, |

1578 | ); |

1579 | } |

1580 | } |

1581 | } |

1582 | |

1583 | // See: https://github.com/BurntSushi/aho-corasick/issues/64 |

1584 | // |

1585 | // This occurs when the rare byte prefilter is active. |

1586 | #[cfg(feature = "std")] |

1587 | #[test] |

1588 | fn regression_stream_rare_byte_prefilter() { |

1589 | use std::io::Read; |

1590 | |

1591 | // NOTE: The test only fails if this ends with j. |

1592 | const MAGIC: [u8; 5] = *b"1234j"; |

1593 | |

1594 | // NOTE: The test fails for value in 8188..=8191 These value put the string |

1595 | // to search accross two call to read because the buffer size is 64KB by |

1596 | // default. |

1597 | const BEGIN: usize = 65_535; |

1598 | |

1599 | /// This is just a structure that implements Reader. The reader |

1600 | /// implementation will simulate a file filled with 0, except for the MAGIC |

1601 | /// string at offset BEGIN. |

1602 | #[derive(Default)] |

1603 | struct R { |

1604 | read: usize, |

1605 | } |

1606 | |

1607 | impl Read for R { |

1608 | fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { |

1609 | if self.read > 100000 { |

1610 | return Ok(0); |

1611 | } |

1612 | let mut from = 0; |

1613 | if self.read < BEGIN { |

1614 | from = buf.len().min(BEGIN - self.read); |

1615 | for x in 0..from { |

1616 | buf[x] = 0; |

1617 | } |

1618 | self.read += from; |

1619 | } |

1620 | if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() { |

1621 | let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from); |

1622 | if to > from { |

1623 | buf[from..to].copy_from_slice( |

1624 | &MAGIC |

1625 | [self.read - BEGIN..self.read - BEGIN + to - from], |

1626 | ); |

1627 | self.read += to - from; |

1628 | from = to; |

1629 | } |

1630 | } |

1631 | for x in from..buf.len() { |

1632 | buf[x] = 0; |

1633 | self.read += 1; |

1634 | } |

1635 | Ok(buf.len()) |

1636 | } |

1637 | } |

1638 | |

1639 | fn run() -> std::io::Result<()> { |

1640 | let aut = AhoCorasick::builder() |

1641 | // Enable byte classes to make debugging the automaton easier. It |

1642 | // should have no effect on the test result. |

1643 | .byte_classes(false) |

1644 | .build(&[&MAGIC]) |

1645 | .unwrap(); |

1646 | |

1647 | // While reading from a vector, it works: |

1648 | let mut buf = alloc::vec![]; |

1649 | R::default().read_to_end(&mut buf)?; |

1650 | let from_whole = aut.find_iter(&buf).next().unwrap().start(); |

1651 | |

1652 | // But using stream_find_iter fails! |

1653 | let mut file = std::io::BufReader::new(R::default()); |

1654 | let begin = aut |

1655 | .stream_find_iter(&mut file) |

1656 | .next() |

1657 | .expect("NOT FOUND!!!!")? // Panic here |

1658 | .start(); |

1659 | assert_eq!(from_whole, begin); |

1660 | Ok(()) |

1661 | } |

1662 | |

1663 | run().unwrap() |

1664 | } |

1665 |