twoway.rs - Codebrowser

1	/!*
2	An implementation of the [Two-Way substring search algorithm][two-way].
3
4	[`Finder`] can be built for forward searches, while [`FinderRev`] can be built
5	for reverse searches.
6
7	Two-Way makes for a nice general purpose substring search algorithm because of
8	its time and space complexity properties. It also performs well in practice.
9	Namely, with `m = len(needle)` and `n = len(haystack)`, Two-Way takes `O(m)`
10	time to create a finder, `O(1)` space and `O(n)` search time. In other words,
11	the preprocessing step is quick, doesn't require any heap memory and the worst
12	case search time is guaranteed to be linear in the haystack regardless of the
13	size of the needle.
14
15	While vector algorithms will usually beat Two-Way handedly, vector algorithms
16	also usually have pathological or edge cases that are better handled by Two-Way.
17	Moreover, not all targets support vector algorithms or implementations for them
18	simply may not exist yet.
19
20	Two-Way can be found in the `memmem` implementations in at least [GNU libc] and
21	[musl].
22
23	[two-way]: https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm
24	[GNU libc]: https://www.gnu.org/software/libc/
25	[musl]: https://www.musl-libc.org/
26	*/
27
28	use core::cmp;
29
30	use crate::{
31	arch::all::{is_prefix, is_suffix},
32	memmem::Pre,
33	};
34
35	/// A forward substring searcher that uses the Two-Way algorithm.
36	#[derive(Clone, Copy, Debug)]
37	pub struct Finder(TwoWay);
38
39	/// A reverse substring searcher that uses the Two-Way algorithm.
40	#[derive(Clone, Copy, Debug)]
41	pub struct FinderRev(TwoWay);
42
43	/// An implementation of the TwoWay substring search algorithm.
44	///
45	/// This searcher supports forward and reverse search, although not
46	/// simultaneously. It runs in `O(n + m)` time and `O(1)` space, where
47	/// `n ~ len(needle)` and `m ~ len(haystack)`.
48	///
49	/// The implementation here roughly matches that which was developed by
50	/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
51	/// changes in this implementation are 1) the use of zero-based indices, 2) a
52	/// heuristic skip table based on the last byte (borrowed from Rust's standard
53	/// library) and 3) the addition of heuristics for a fast skip loop. For (3),
54	/// callers can pass any kind of prefilter they want, but usually it's one
55	/// based on a heuristic that uses an approximate background frequency of bytes
56	/// to choose rare bytes to quickly look for candidate match positions. Note
57	/// though that currently, this prefilter functionality is not exposed directly
58	/// in the public API. (File an issue if you want it and provide a use case
59	/// please.)
60	///
61	/// The heuristic for fast skipping is automatically shut off if it's
62	/// detected to be ineffective at search time. Generally, this only occurs in
63	/// pathological cases. But this is generally necessary in order to preserve
64	/// a `O(n + m)` time bound.
65	///
66	/// The code below is fairly complex and not obviously correct at all. It's
67	/// likely necessary to read the Two-Way paper cited above in order to fully
68	/// grok this code. The essence of it is:
69	///
70	/// 1. Do something to detect a "critical" position in the needle.
71	/// 2. For the current position in the haystack, look if `needle[critical..]`
72	/// matches at that position.
73	/// 3. If so, look if `needle[..critical]` matches.
74	/// 4. If a mismatch occurs, shift the search by some amount based on the
75	/// critical position and a pre-computed shift.
76	///
77	/// This type is wrapped in the forward and reverse finders that expose
78	/// consistent forward or reverse APIs.
79	#[derive(Clone, Copy, Debug)]
80	struct TwoWay {
81	/// A small bitset used as a quick prefilter (in addition to any prefilter
82	/// given by the caller). Namely, a bit `i` is set if and only if `b%64==i`
83	/// for any `b == needle[i]`.
84	///
85	/// When used as a prefilter, if the last byte at the current candidate
86	/// position is NOT in this set, then we can skip that entire candidate
87	/// position (the length of the needle). This is essentially the shift
88	/// trick found in Boyer-Moore, but only applied to bytes that don't appear
89	/// in the needle.
90	///
91	/// N.B. This trick was inspired by something similar in std's
92	/// implementation of Two-Way.
93	byteset: ApproximateByteSet,
94	/// A critical position in needle. Specifically, this position corresponds
95	/// to beginning of either the minimal or maximal suffix in needle. (N.B.
96	/// See SuffixType below for why "minimal" isn't quite the correct word
97	/// here.)
98	///
99	/// This is the position at which every search begins. Namely, search
100	/// starts by scanning text to the right of this position, and only if
101	/// there's a match does the text to the left of this position get scanned.
102	critical_pos: usize,
103	/// The amount we shift by in the Two-Way search algorithm. This
104	/// corresponds to the "small period" and "large period" cases.
105	shift: Shift,
106	}
107
108	impl Finder {
109	/// Create a searcher that finds occurrences of the given `needle`.
110	///
111	/// An empty `needle` results in a match at every position in a haystack,
112	/// including at `haystack.len()`.
113	#[inline]
114	pub fn new(needle: &[u8]) -> Finder {
115	let byteset = ApproximateByteSet::new(needle);
116	let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
117	let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
118	let (period_lower_bound, critical_pos) =
119	if min_suffix.pos > max_suffix.pos {
120	(min_suffix.period, min_suffix.pos)
121	} else {
122	(max_suffix.period, max_suffix.pos)
123	};
124	let shift = Shift::forward(needle, period_lower_bound, critical_pos);
125	Finder(TwoWay { byteset, critical_pos, shift })
126	}
127
128	/// Returns the first occurrence of `needle` in the given `haystack`, or
129	/// `None` if no such occurrence could be found.
130	///
131	/// The `needle` given must be the same as the `needle` provided to
132	/// [`Finder::new`].
133	///
134	/// An empty `needle` results in a match at every position in a haystack,
135	/// including at `haystack.len()`.
136	#[inline]
137	pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
138	self.find_with_prefilter(None, haystack, needle)
139	}
140
141	/// This is like [`Finder::find`], but it accepts a prefilter for
142	/// accelerating searches.
143	///
144	/// Currently this is not exposed in the public API because, at the time
145	/// of writing, I didn't want to spend time thinking about how to expose
146	/// the prefilter infrastructure (if at all). If you have a compelling use
147	/// case for exposing this routine, please create an issue. Do not* open*
148	/// a PR that just exposes `Pre` and friends. Exporting this routine will
149	/// require API design.
150	#[inline(always)]
151	pub(crate) fn find_with_prefilter(
152	&self,
153	pre: Option<Pre<'_>>,
154	haystack: &[u8],
155	needle: &[u8],
156	) -> Option<usize> {
157	match self.0.shift {
158	Shift::Small { period } => {
159	self.find_small_imp(pre, haystack, needle, period)
160	}
161	Shift::Large { shift } => {
162	self.find_large_imp(pre, haystack, needle, shift)
163	}
164	}
165	}
166
167	// Each of the two search implementations below can be accelerated by a
168	// prefilter, but it is not always enabled. To avoid its overhead when
169	// its disabled, we explicitly inline each search implementation based on
170	// whether a prefilter will be used or not. The decision on which to use
171	// is made in the parent meta searcher.
172
173	#[inline(always)]
174	fn find_small_imp(
175	&self,
176	mut pre: Option<Pre<'_>>,
177	haystack: &[u8],
178	needle: &[u8],
179	period: usize,
180	) -> Option<usize> {
181	let mut pos = `0`;
182	let mut shift = `0`;
183	let last_byte_pos = match needle.len().checked_sub(`1`) {
184	None => return Some(pos),
185	Some(last_byte) => last_byte,
186	};
187	while pos + needle.len() <= haystack.len() {
188	let mut i = cmp::max(self.0.critical_pos, shift);
189	if let Some(pre) = pre.as_mut() {
190	if pre.is_effective() {
191	pos += pre.find(&haystack[pos..])?;
192	shift = `0`;
193	i = self.0.critical_pos;
194	if pos + needle.len() > haystack.len() {
195	return None;
196	}
197	}
198	}
199	if !self.0.byteset.contains(haystack[pos + last_byte_pos]) {
200	pos += needle.len();
201	shift = `0`;
202	continue;
203	}
204	while i < needle.len() && needle[i] == haystack[pos + i] {
205	i += `1`;
206	}
207	if i < needle.len() {
208	pos += i - self.0.critical_pos + `1`;
209	shift = `0`;
210	} else {
211	let mut j = self.0.critical_pos;
212	while j > shift && needle[j] == haystack[pos + j] {
213	j -= `1`;
214	}
215	if j <= shift && needle[shift] == haystack[pos + shift] {
216	return Some(pos);
217	}
218	pos += period;
219	shift = needle.len() - period;
220	}
221	}
222	None
223	}
224
225	#[inline(always)]
226	fn find_large_imp(
227	&self,
228	mut pre: Option<Pre<'_>>,
229	haystack: &[u8],
230	needle: &[u8],
231	shift: usize,
232	) -> Option<usize> {
233	let mut pos = `0`;
234	let last_byte_pos = match needle.len().checked_sub(`1`) {
235	None => return Some(pos),
236	Some(last_byte) => last_byte,
237	};
238	'outer: while pos + needle.len() <= haystack.len() {
239	if let Some(pre) = pre.as_mut() {
240	if pre.is_effective() {
241	pos += pre.find(&haystack[pos..])?;
242	if pos + needle.len() > haystack.len() {
243	return None;
244	}
245	}
246	}
247
248	if !self.0.byteset.contains(haystack[pos + last_byte_pos]) {
249	pos += needle.len();
250	continue;
251	}
252	let mut i = self.0.critical_pos;
253	while i < needle.len() && needle[i] == haystack[pos + i] {
254	i += `1`;
255	}
256	if i < needle.len() {
257	pos += i - self.0.critical_pos + `1`;
258	} else {
259	for j in (`0`..self.0.critical_pos).rev() {
260	if needle[j] != haystack[pos + j] {
261	pos += shift;
262	continue 'outer;
263	}
264	}
265	return Some(pos);
266	}
267	}
268	None
269	}
270	}
271
272	impl FinderRev {
273	/// Create a searcher that finds occurrences of the given `needle`.
274	///
275	/// An empty `needle` results in a match at every position in a haystack,
276	/// including at `haystack.len()`.
277	#[inline]
278	pub fn new(needle: &[u8]) -> FinderRev {
279	let byteset = ApproximateByteSet::new(needle);
280	let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
281	let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
282	let (period_lower_bound, critical_pos) =
283	if min_suffix.pos < max_suffix.pos {
284	(min_suffix.period, min_suffix.pos)
285	} else {
286	(max_suffix.period, max_suffix.pos)
287	};
288	let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
289	FinderRev(TwoWay { byteset, critical_pos, shift })
290	}
291
292	/// Returns the last occurrence of `needle` in the given `haystack`, or
293	/// `None` if no such occurrence could be found.
294	///
295	/// The `needle` given must be the same as the `needle` provided to
296	/// [`FinderRev::new`].
297	///
298	/// An empty `needle` results in a match at every position in a haystack,
299	/// including at `haystack.len()`.
300	#[inline]
301	pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
302	// For the reverse case, we don't use a prefilter. It's plausible that
303	// perhaps we should, but it's a lot of additional code to do it, and
304	// it's not clear that it's actually worth it. If you have a really
305	// compelling use case for this, please file an issue.
306	match self.0.shift {
307	Shift::Small { period } => {
308	self.rfind_small_imp(haystack, needle, period)
309	}
310	Shift::Large { shift } => {
311	self.rfind_large_imp(haystack, needle, shift)
312	}
313	}
314	}
315
316	#[inline(always)]
317	fn rfind_small_imp(
318	&self,
319	haystack: &[u8],
320	needle: &[u8],
321	period: usize,
322	) -> Option<usize> {
323	let nlen = needle.len();
324	let mut pos = haystack.len();
325	let mut shift = nlen;
326	let first_byte = match needle.get(`0`) {
327	None => return Some(pos),
328	Some(&first_byte) => first_byte,
329	};
330	while pos >= nlen {
331	if !self.0.byteset.contains(haystack[pos - nlen]) {
332	pos -= nlen;
333	shift = nlen;
334	continue;
335	}
336	let mut i = cmp::min(self.0.critical_pos, shift);
337	while i > `0` && needle[i - `1`] == haystack[pos - nlen + i - `1`] {
338	i -= `1`;
339	}
340	if i > `0` \|\| first_byte != haystack[pos - nlen] {
341	pos -= self.0.critical_pos - i + `1`;
342	shift = nlen;
343	} else {
344	let mut j = self.0.critical_pos;
345	while j < shift && needle[j] == haystack[pos - nlen + j] {
346	j += `1`;
347	}
348	if j >= shift {
349	return Some(pos - nlen);
350	}
351	pos -= period;
352	shift = period;
353	}
354	}
355	None
356	}
357
358	#[inline(always)]
359	fn rfind_large_imp(
360	&self,
361	haystack: &[u8],
362	needle: &[u8],
363	shift: usize,
364	) -> Option<usize> {
365	let nlen = needle.len();
366	let mut pos = haystack.len();
367	let first_byte = match needle.get(`0`) {
368	None => return Some(pos),
369	Some(&first_byte) => first_byte,
370	};
371	while pos >= nlen {
372	if !self.0.byteset.contains(haystack[pos - nlen]) {
373	pos -= nlen;
374	continue;
375	}
376	let mut i = self.0.critical_pos;
377	while i > `0` && needle[i - `1`] == haystack[pos - nlen + i - `1`] {
378	i -= `1`;
379	}
380	if i > `0` \|\| first_byte != haystack[pos - nlen] {
381	pos -= self.0.critical_pos - i + `1`;
382	} else {
383	let mut j = self.0.critical_pos;
384	while j < nlen && needle[j] == haystack[pos - nlen + j] {
385	j += `1`;
386	}
387	if j == nlen {
388	return Some(pos - nlen);
389	}
390	pos -= shift;
391	}
392	}
393	None
394	}
395	}
396
397	/// A representation of the amount we're allowed to shift by during Two-Way
398	/// search.
399	///
400	/// When computing a critical factorization of the needle, we find the position
401	/// of the critical factorization by finding the needle's maximal (or minimal)
402	/// suffix, along with the period of that suffix. It turns out that the period
403	/// of that suffix is a lower bound on the period of the needle itself.
404	///
405	/// This lower bound is equivalent to the actual period of the needle in
406	/// some cases. To describe that case, we denote the needle as `x` where
407	/// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower
408	/// bound given here is always the period of `v`, which is `<= period(x)`. The
409	/// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and
410	/// where `u` is a suffix of `v[0..period(v)]`.
411	///
412	/// This case is important because the search algorithm for when the
413	/// periods are equivalent is slightly different than the search algorithm
414	/// for when the periods are not equivalent. In particular, when they aren't
415	/// equivalent, we know that the period of the needle is no less than half its
416	/// length. In this case, we shift by an amount less than or equal to the
417	/// period of the needle (determined by the maximum length of the components
418	/// of the critical factorization of `x`, i.e., `max(len(u), len(v))`)..
419	///
420	/// The above two cases are represented by the variants below. Each entails
421	/// a different instantiation of the Two-Way search algorithm.
422	///
423	/// N.B. If we could find a way to compute the exact period in all cases,
424	/// then we could collapse this case analysis and simplify the algorithm. The
425	/// Two-Way paper suggests this is possible, but more reading is required to
426	/// grok why the authors didn't pursue that path.
427	#[derive(Clone, Copy, Debug)]
428	enum Shift {
429	Small { period: usize },
430	Large { shift: usize },
431	}
432
433	impl Shift {
434	/// Compute the shift for a given needle in the forward direction.
435	///
436	/// This requires a lower bound on the period and a critical position.
437	/// These can be computed by extracting both the minimal and maximal
438	/// lexicographic suffixes, and choosing the right-most starting position.
439	/// The lower bound on the period is then the period of the chosen suffix.
440	fn forward(
441	needle: &[u8],
442	period_lower_bound: usize,
443	critical_pos: usize,
444	) -> Shift {
445	let large = cmp::max(critical_pos, needle.len() - critical_pos);
446	if critical_pos * `2` >= needle.len() {
447	return Shift::Large { shift: large };
448	}
449
450	let (u, v) = needle.split_at(critical_pos);
451	if !is_suffix(&v[..period_lower_bound], u) {
452	return Shift::Large { shift: large };
453	}
454	Shift::Small { period: period_lower_bound }
455	}
456
457	/// Compute the shift for a given needle in the reverse direction.
458	///
459	/// This requires a lower bound on the period and a critical position.
460	/// These can be computed by extracting both the minimal and maximal
461	/// lexicographic suffixes, and choosing the left-most starting position.
462	/// The lower bound on the period is then the period of the chosen suffix.
463	fn reverse(
464	needle: &[u8],
465	period_lower_bound: usize,
466	critical_pos: usize,
467	) -> Shift {
468	let large = cmp::max(critical_pos, needle.len() - critical_pos);
469	if (needle.len() - critical_pos) * `2` >= needle.len() {
470	return Shift::Large { shift: large };
471	}
472
473	let (v, u) = needle.split_at(critical_pos);
474	if !is_prefix(&v[v.len() - period_lower_bound..], u) {
475	return Shift::Large { shift: large };
476	}
477	Shift::Small { period: period_lower_bound }
478	}
479	}
480
481	/// A suffix extracted from a needle along with its period.
482	#[derive(Debug)]
483	struct Suffix {
484	/// The starting position of this suffix.
485	///
486	/// If this is a forward suffix, then `&bytes[pos..]` can be used. If this
487	/// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for
488	/// forward suffixes, this is an inclusive starting position, where as for
489	/// reverse suffixes, this is an exclusive ending position.
490	pos: usize,
491	/// The period of this suffix.
492	///
493	/// Note that this is NOT necessarily the period of the string from which
494	/// this suffix comes from. (It is always less than or equal to the period
495	/// of the original string.)
496	period: usize,
497	}
498
499	impl Suffix {
500	fn forward(needle: &[u8], kind: SuffixKind) -> Suffix {
501	// suffix represents our maximal (or minimal) suffix, along with
502	// its period.
503	let mut suffix = Suffix { pos: `0`, period: `1` };
504	// The start of a suffix in `needle` that we are considering as a
505	// more maximal (or minimal) suffix than what's in `suffix`.
506	let mut candidate_start = `1`;
507	// The current offset of our suffixes that we're comparing.
508	//
509	// When the characters at this offset are the same, then we mush on
510	// to the next position since no decision is possible. When the
511	// candidate's character is greater (or lesser) than the corresponding
512	// character than our current maximal (or minimal) suffix, then the
513	// current suffix is changed over to the candidate and we restart our
514	// search. Otherwise, the candidate suffix is no good and we restart
515	// our search on the next candidate.
516	//
517	// The three cases above correspond to the three cases in the loop
518	// below.
519	let mut offset = `0`;
520
521	while candidate_start + offset < needle.len() {
522	let current = needle[suffix.pos + offset];
523	let candidate = needle[candidate_start + offset];
524	match kind.cmp(current, candidate) {
525	SuffixOrdering::Accept => {
526	suffix = Suffix { pos: candidate_start, period: `1` };
527	candidate_start += `1`;
528	offset = `0`;
529	}
530	SuffixOrdering::Skip => {
531	candidate_start += offset + `1`;
532	offset = `0`;
533	suffix.period = candidate_start - suffix.pos;
534	}
535	SuffixOrdering::Push => {
536	if offset + `1` == suffix.period {
537	candidate_start += suffix.period;
538	offset = `0`;
539	} else {
540	offset += `1`;
541	}
542	}
543	}
544	}
545	suffix
546	}
547
548	fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix {
549	// See the comments in `forward` for how this works.
550	let mut suffix = Suffix { pos: needle.len(), period: `1` };
551	if needle.len() == `1` {
552	return suffix;
553	}
554	let mut candidate_start = match needle.len().checked_sub(`1`) {
555	None => return suffix,
556	Some(candidate_start) => candidate_start,
557	};
558	let mut offset = `0`;
559
560	while offset < candidate_start {
561	let current = needle[suffix.pos - offset - `1`];
562	let candidate = needle[candidate_start - offset - `1`];
563	match kind.cmp(current, candidate) {
564	SuffixOrdering::Accept => {
565	suffix = Suffix { pos: candidate_start, period: `1` };
566	candidate_start -= `1`;
567	offset = `0`;
568	}
569	SuffixOrdering::Skip => {
570	candidate_start -= offset + `1`;
571	offset = `0`;
572	suffix.period = suffix.pos - candidate_start;
573	}
574	SuffixOrdering::Push => {
575	if offset + `1` == suffix.period {
576	candidate_start -= suffix.period;
577	offset = `0`;
578	} else {
579	offset += `1`;
580	}
581	}
582	}
583	}
584	suffix
585	}
586	}
587
588	/// The kind of suffix to extract.
589	#[derive(Clone, Copy, Debug)]
590	enum SuffixKind {
591	/// Extract the smallest lexicographic suffix from a string.
592	///
593	/// Technically, this doesn't actually pick the smallest lexicographic
594	/// suffix. e.g., Given the choice between `a` and `aa`, this will choose
595	/// the latter over the former, even though `a < aa`. The reasoning for
596	/// this isn't clear from the paper, but it still smells like a minimal
597	/// suffix.
598	Minimal,
599	/// Extract the largest lexicographic suffix from a string.
600	///
601	/// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given
602	/// the choice between `z` and `zz`, this will choose the latter over the
603	/// former.
604	Maximal,
605	}
606
607	/// The result of comparing corresponding bytes between two suffixes.
608	#[derive(Clone, Copy, Debug)]
609	enum SuffixOrdering {
610	/// This occurs when the given candidate byte indicates that the candidate
611	/// suffix is better than the current maximal (or minimal) suffix. That is,
612	/// the current candidate suffix should supplant the current maximal (or
613	/// minimal) suffix.
614	Accept,
615	/// This occurs when the given candidate byte excludes the candidate suffix
616	/// from being better than the current maximal (or minimal) suffix. That
617	/// is, the current candidate suffix should be dropped and the next one
618	/// should be considered.
619	Skip,
620	/// This occurs when no decision to accept or skip the candidate suffix
621	/// can be made, e.g., when corresponding bytes are equivalent. In this
622	/// case, the next corresponding bytes should be compared.
623	Push,
624	}
625
626	impl SuffixKind {
627	/// Returns true if and only if the given candidate byte indicates that
628	/// it should replace the current suffix as the maximal (or minimal)
629	/// suffix.
630	fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering {
631	use self::SuffixOrdering::*;
632
633	match self {
634	SuffixKind::Minimal if candidate < current => Accept,
635	SuffixKind::Minimal if candidate > current => Skip,
636	SuffixKind::Minimal => Push,
637	SuffixKind::Maximal if candidate > current => Accept,
638	SuffixKind::Maximal if candidate < current => Skip,
639	SuffixKind::Maximal => Push,
640	}
641	}
642	}
643
644	/// A bitset used to track whether a particular byte exists in a needle or not.
645	///
646	/// Namely, bit 'i' is set if and only if byte%64==i for any byte in the
647	/// needle. If a particular byte in the haystack is NOT in this set, then one
648	/// can conclude that it is also not in the needle, and thus, one can advance
649	/// in the haystack by needle.len() bytes.
650	#[derive(Clone, Copy, Debug)]
651	struct ApproximateByteSet(u64);
652
653	impl ApproximateByteSet {
654	/// Create a new set from the given needle.
655	fn new(needle: &[u8]) -> ApproximateByteSet {
656	let mut bits = `0`;
657	for &b in needle {
658	bits \|= `1` << (b % `64`);
659	}
660	ApproximateByteSet(bits)
661	}
662
663	/// Return true if and only if the given byte might be in this set. This
664	/// may return a false positive, but will never return a false negative.
665	#[inline(always)]
666	fn contains(&self, byte: u8) -> bool {
667	self.0 & (`1` << (byte % `64`)) != `0`
668	}
669	}
670
671	#[cfg(test)]
672	mod tests {
673	use alloc::vec::Vec;
674
675	use super::*;
676
677	/// Convenience wrapper for computing the suffix as a byte string.
678	fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
679	let s = Suffix::forward(needle, kind);
680	(&needle[s.pos..], s.period)
681	}
682
683	/// Convenience wrapper for computing the reverse suffix as a byte string.
684	fn get_suffix_reverse(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
685	let s = Suffix::reverse(needle, kind);
686	(&needle[..s.pos], s.period)
687	}
688
689	/// Return all of the non-empty suffixes in the given byte string.
690	fn suffixes(bytes: &[u8]) -> Vec<&[u8]> {
691	(`0`..bytes.len()).map(\|i\| &bytes[i..]).collect()
692	}
693
694	/// Return the lexicographically maximal suffix of the given byte string.
695	fn naive_maximal_suffix_forward(needle: &[u8]) -> &[u8] {
696	let mut sufs = suffixes(needle);
697	sufs.sort();
698	sufs.pop().unwrap()
699	}
700
701	/// Return the lexicographically maximal suffix of the reverse of the given
702	/// byte string.
703	fn naive_maximal_suffix_reverse(needle: &[u8]) -> Vec<u8> {
704	let mut reversed = needle.to_vec();
705	reversed.reverse();
706	let mut got = naive_maximal_suffix_forward(&reversed).to_vec();
707	got.reverse();
708	got
709	}
710
711	define_substring_forward_quickcheck!(\|h, n\| Some(
712	Finder::new(n).find(h, n)
713	));
714	define_substring_reverse_quickcheck!(\|h, n\| Some(
715	FinderRev::new(n).rfind(h, n)
716	));
717
718	#[test]
719	fn forward() {
720	crate::tests::substring::Runner::new()
721	.fwd(\|h, n\| Some(Finder::new(n).find(h, n)))
722	.run();
723	}
724
725	#[test]
726	fn reverse() {
727	crate::tests::substring::Runner::new()
728	.rev(\|h, n\| Some(FinderRev::new(n).rfind(h, n)))
729	.run();
730	}
731
732	#[test]
733	fn suffix_forward() {
734	macro_rules! assert_suffix_min {
735	($given:expr, $expected:expr, $period:expr) => {
736	let (got_suffix, got_period) =
737	get_suffix_forward($given.as_bytes(), SuffixKind::Minimal);
738	let got_suffix = core::str::from_utf8(got_suffix).unwrap();
739	assert_eq!(($expected, $period), (got_suffix, got_period));
740	};
741	}
742
743	macro_rules! assert_suffix_max {
744	($given:expr, $expected:expr, $period:expr) => {
745	let (got_suffix, got_period) =
746	get_suffix_forward($given.as_bytes(), SuffixKind::Maximal);
747	let got_suffix = core::str::from_utf8(got_suffix).unwrap();
748	assert_eq!(($expected, $period), (got_suffix, got_period));
749	};
750	}
751
752	assert_suffix_min!("a", "a", `1`);
753	assert_suffix_max!("a", "a", `1`);
754
755	assert_suffix_min!("ab", "ab", `2`);
756	assert_suffix_max!("ab", "b", `1`);
757
758	assert_suffix_min!("ba", "a", `1`);
759	assert_suffix_max!("ba", "ba", `2`);
760
761	assert_suffix_min!("abc", "abc", `3`);
762	assert_suffix_max!("abc", "c", `1`);
763
764	assert_suffix_min!("acb", "acb", `3`);
765	assert_suffix_max!("acb", "cb", `2`);
766
767	assert_suffix_min!("cba", "a", `1`);
768	assert_suffix_max!("cba", "cba", `3`);
769
770	assert_suffix_min!("abcabc", "abcabc", `3`);
771	assert_suffix_max!("abcabc", "cabc", `3`);
772
773	assert_suffix_min!("abcabcabc", "abcabcabc", `3`);
774	assert_suffix_max!("abcabcabc", "cabcabc", `3`);
775
776	assert_suffix_min!("abczz", "abczz", `5`);
777	assert_suffix_max!("abczz", "zz", `1`);
778
779	assert_suffix_min!("zzabc", "abc", `3`);
780	assert_suffix_max!("zzabc", "zzabc", `5`);
781
782	assert_suffix_min!("aaa", "aaa", `1`);
783	assert_suffix_max!("aaa", "aaa", `1`);
784
785	assert_suffix_min!("foobar", "ar", `2`);
786	assert_suffix_max!("foobar", "r", `1`);
787	}
788
789	#[test]
790	fn suffix_reverse() {
791	macro_rules! assert_suffix_min {
792	($given:expr, $expected:expr, $period:expr) => {
793	let (got_suffix, got_period) =
794	get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal);
795	let got_suffix = core::str::from_utf8(got_suffix).unwrap();
796	assert_eq!(($expected, $period), (got_suffix, got_period));
797	};
798	}
799
800	macro_rules! assert_suffix_max {
801	($given:expr, $expected:expr, $period:expr) => {
802	let (got_suffix, got_period) =
803	get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal);
804	let got_suffix = core::str::from_utf8(got_suffix).unwrap();
805	assert_eq!(($expected, $period), (got_suffix, got_period));
806	};
807	}
808
809	assert_suffix_min!("a", "a", `1`);
810	assert_suffix_max!("a", "a", `1`);
811
812	assert_suffix_min!("ab", "a", `1`);
813	assert_suffix_max!("ab", "ab", `2`);
814
815	assert_suffix_min!("ba", "ba", `2`);
816	assert_suffix_max!("ba", "b", `1`);
817
818	assert_suffix_min!("abc", "a", `1`);
819	assert_suffix_max!("abc", "abc", `3`);
820
821	assert_suffix_min!("acb", "a", `1`);
822	assert_suffix_max!("acb", "ac", `2`);
823
824	assert_suffix_min!("cba", "cba", `3`);
825	assert_suffix_max!("cba", "c", `1`);
826
827	assert_suffix_min!("abcabc", "abca", `3`);
828	assert_suffix_max!("abcabc", "abcabc", `3`);
829
830	assert_suffix_min!("abcabcabc", "abcabca", `3`);
831	assert_suffix_max!("abcabcabc", "abcabcabc", `3`);
832
833	assert_suffix_min!("abczz", "a", `1`);
834	assert_suffix_max!("abczz", "abczz", `5`);
835
836	assert_suffix_min!("zzabc", "zza", `3`);
837	assert_suffix_max!("zzabc", "zz", `1`);
838
839	assert_suffix_min!("aaa", "aaa", `1`);
840	assert_suffix_max!("aaa", "aaa", `1`);
841	}
842
843	#[cfg(not(miri))]
844	quickcheck::quickcheck! {
845	fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
846	if bytes.is_empty() {
847	return `true`;
848	}
849
850	let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal);
851	let expected = naive_maximal_suffix_forward(&bytes);
852	got == expected
853	}
854
855	fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
856	if bytes.is_empty() {
857	return `true`;
858	}
859
860	let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
861	let expected = naive_maximal_suffix_reverse(&bytes);
862	expected == got
863	}
864	}
865
866	// This is a regression test caught by quickcheck that exercised a bug in
867	// the reverse small period handling. The bug was that we were using 'if j
868	// == shift' to determine if a match occurred, but the correct guard is 'if
869	// j >= shift', which matches the corresponding guard in the forward impl.
870	#[test]
871	fn regression_rev_small_period() {
872	let rfind = \|h, n\| FinderRev::new(n).rfind(h, n);
873	let haystack = "ababaz";
874	let needle = "abab";
875	assert_eq!(Some(`0`), rfind(haystack.as_bytes(), needle.as_bytes()));
876	}
877	}
878