1use core::{
2 fmt::Debug,
3 panic::{RefUnwindSafe, UnwindSafe},
4};
5
6use alloc::sync::Arc;
7
8use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
9
10/// A builder for constructing a Teddy matcher.
11///
12/// The builder primarily permits fine grained configuration of the Teddy
13/// matcher. Most options are made only available for testing/benchmarking
14/// purposes. In reality, options are automatically determined by the nature
15/// and number of patterns given to the builder.
16#[derive(Clone, Debug)]
17pub(crate) struct Builder {
18 /// When none, this is automatically determined. Otherwise, `false` means
19 /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
20 /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
21 /// available and Fat Teddy was requested, no matcher will be built.
22 only_fat: Option<bool>,
23 /// When none, this is automatically determined. Otherwise, `false` means
24 /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
25 /// `true` means that 256-bit vectors will be used. As with `fat`, if
26 /// 256-bit vectors are requested and they aren't available, then a
27 /// searcher will not be built.
28 only_256bit: Option<bool>,
29 /// When true (the default), the number of patterns will be used as a
30 /// heuristic for refusing construction of a Teddy searcher. The point here
31 /// is that too many patterns can overwhelm Teddy. But this can be disabled
32 /// in cases where the caller knows better.
33 heuristic_pattern_limits: bool,
34}
35
36impl Default for Builder {
37 fn default() -> Builder {
38 Builder::new()
39 }
40}
41
42impl Builder {
43 /// Create a new builder for configuring a Teddy matcher.
44 pub(crate) fn new() -> Builder {
45 Builder {
46 only_fat: None,
47 only_256bit: None,
48 heuristic_pattern_limits: true,
49 }
50 }
51
52 /// Build a matcher for the set of patterns given. If a matcher could not
53 /// be built, then `None` is returned.
54 ///
55 /// Generally, a matcher isn't built if the necessary CPU features aren't
56 /// available, an unsupported target or if the searcher is believed to be
57 /// slower than standard techniques (i.e., if there are too many literals).
58 pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
59 self.build_imp(patterns)
60 }
61
62 /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
63 /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
64 /// for a larger set of literals.
65 ///
66 /// `None` is the default, which results in an automatic selection based
67 /// on the number of literals and available CPU features.
68 pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
69 self.only_fat = yes;
70 self
71 }
72
73 /// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
74 /// Generally, a larger vector size is better since it either permits
75 /// matching more patterns or matching more bytes in the haystack at once.
76 ///
77 /// `None` is the default, which results in an automatic selection based on
78 /// the number of literals and available CPU features.
79 pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
80 self.only_256bit = yes;
81 self
82 }
83
84 /// Request that heuristic limitations on the number of patterns be
85 /// employed. This useful to disable for benchmarking where one wants to
86 /// explore how Teddy performs on large number of patterns even if the
87 /// heuristics would otherwise refuse construction.
88 ///
89 /// This is enabled by default.
90 pub(crate) fn heuristic_pattern_limits(
91 &mut self,
92 yes: bool,
93 ) -> &mut Builder {
94 self.heuristic_pattern_limits = yes;
95 self
96 }
97
98 fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
99 let patlimit = self.heuristic_pattern_limits;
100 // There's no particular reason why we limit ourselves to little endian
101 // here, but it seems likely that some parts of Teddy as they are
102 // currently written (e.g., the uses of `trailing_zeros`) are likely
103 // wrong on non-little-endian targets. Such things are likely easy to
104 // fix, but at the time of writing (2023/09/18), I actually do not know
105 // how to test this code on a big-endian target. So for now, we're
106 // conservative and just bail out.
107 if !cfg!(target_endian = "little") {
108 debug!("skipping Teddy because target isn't little endian");
109 return None;
110 }
111 // Too many patterns will overwhelm Teddy and likely lead to slow
112 // downs, typically in the verification step.
113 if patlimit && patterns.len() > 64 {
114 debug!("skipping Teddy because of too many patterns");
115 return None;
116 }
117
118 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
119 {
120 use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
121
122 let mask_len = core::cmp::min(4, patterns.minimum_len());
123 let beefy = patterns.len() > 32;
124 let has_avx2 = self::x86_64::is_available_avx2();
125 let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
126 let use_avx2 = if self.only_256bit == Some(true) {
127 if !has_avx2 {
128 debug!(
129 "skipping Teddy because avx2 was demanded but unavailable"
130 );
131 return None;
132 }
133 true
134 } else if self.only_256bit == Some(false) {
135 if !has_ssse3 {
136 debug!(
137 "skipping Teddy because ssse3 was demanded but unavailable"
138 );
139 return None;
140 }
141 false
142 } else if !has_ssse3 && !has_avx2 {
143 debug!(
144 "skipping Teddy because ssse3 and avx2 are unavailable"
145 );
146 return None;
147 } else {
148 has_avx2
149 };
150 let fat = match self.only_fat {
151 None => use_avx2 && beefy,
152 Some(false) => false,
153 Some(true) if !use_avx2 => {
154 debug!(
155 "skipping Teddy because fat was demanded, but fat \
156 Teddy requires avx2 which is unavailable"
157 );
158 return None;
159 }
160 Some(true) => true,
161 };
162 // Just like for aarch64, it's possible that too many patterns will
163 // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
164 // helps things scale a bit more by spreading patterns over more
165 // buckets.
166 //
167 // These thresholds were determined by looking at the measurements
168 // for the rust/aho-corasick/packed/leftmost-first and
169 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
170 // benchmarks.
171 if patlimit && mask_len == 1 && patterns.len() > 16 {
172 debug!(
173 "skipping Teddy (mask len: 1) because there are \
174 too many patterns",
175 );
176 return None;
177 }
178 match (mask_len, use_avx2, fat) {
179 (1, false, _) => {
180 debug!("Teddy choice: 128-bit slim, 1 byte");
181 SlimSSSE3::<1>::new(&patterns)
182 }
183 (1, true, false) => {
184 debug!("Teddy choice: 256-bit slim, 1 byte");
185 SlimAVX2::<1>::new(&patterns)
186 }
187 (1, true, true) => {
188 debug!("Teddy choice: 256-bit fat, 1 byte");
189 FatAVX2::<1>::new(&patterns)
190 }
191 (2, false, _) => {
192 debug!("Teddy choice: 128-bit slim, 2 bytes");
193 SlimSSSE3::<2>::new(&patterns)
194 }
195 (2, true, false) => {
196 debug!("Teddy choice: 256-bit slim, 2 bytes");
197 SlimAVX2::<2>::new(&patterns)
198 }
199 (2, true, true) => {
200 debug!("Teddy choice: 256-bit fat, 2 bytes");
201 FatAVX2::<2>::new(&patterns)
202 }
203 (3, false, _) => {
204 debug!("Teddy choice: 128-bit slim, 3 bytes");
205 SlimSSSE3::<3>::new(&patterns)
206 }
207 (3, true, false) => {
208 debug!("Teddy choice: 256-bit slim, 3 bytes");
209 SlimAVX2::<3>::new(&patterns)
210 }
211 (3, true, true) => {
212 debug!("Teddy choice: 256-bit fat, 3 bytes");
213 FatAVX2::<3>::new(&patterns)
214 }
215 (4, false, _) => {
216 debug!("Teddy choice: 128-bit slim, 4 bytes");
217 SlimSSSE3::<4>::new(&patterns)
218 }
219 (4, true, false) => {
220 debug!("Teddy choice: 256-bit slim, 4 bytes");
221 SlimAVX2::<4>::new(&patterns)
222 }
223 (4, true, true) => {
224 debug!("Teddy choice: 256-bit fat, 4 bytes");
225 FatAVX2::<4>::new(&patterns)
226 }
227 _ => {
228 debug!("no supported Teddy configuration found");
229 None
230 }
231 }
232 }
233 #[cfg(target_arch = "aarch64")]
234 {
235 use self::aarch64::SlimNeon;
236
237 let mask_len = core::cmp::min(4, patterns.minimum_len());
238 if self.only_256bit == Some(true) {
239 debug!(
240 "skipping Teddy because 256-bits were demanded \
241 but unavailable"
242 );
243 return None;
244 }
245 if self.only_fat == Some(true) {
246 debug!(
247 "skipping Teddy because fat was demanded but unavailable"
248 );
249 }
250 // Since we don't have Fat teddy in aarch64 (I think we'd want at
251 // least 256-bit vectors for that), we need to be careful not to
252 // allow too many patterns as it might overwhelm Teddy. Generally
253 // speaking, as the mask length goes up, the more patterns we can
254 // handle because the mask length results in fewer candidates
255 // generated.
256 //
257 // These thresholds were determined by looking at the measurements
258 // for the rust/aho-corasick/packed/leftmost-first and
259 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
260 // benchmarks.
261 match mask_len {
262 1 => {
263 if patlimit && patterns.len() > 16 {
264 debug!(
265 "skipping Teddy (mask len: 1) because there are \
266 too many patterns",
267 );
268 }
269 debug!("Teddy choice: 128-bit slim, 1 byte");
270 SlimNeon::<1>::new(&patterns)
271 }
272 2 => {
273 if patlimit && patterns.len() > 32 {
274 debug!(
275 "skipping Teddy (mask len: 2) because there are \
276 too many patterns",
277 );
278 }
279 debug!("Teddy choice: 128-bit slim, 2 bytes");
280 SlimNeon::<2>::new(&patterns)
281 }
282 3 => {
283 if patlimit && patterns.len() > 48 {
284 debug!(
285 "skipping Teddy (mask len: 3) because there are \
286 too many patterns",
287 );
288 }
289 debug!("Teddy choice: 128-bit slim, 3 bytes");
290 SlimNeon::<3>::new(&patterns)
291 }
292 4 => {
293 debug!("Teddy choice: 128-bit slim, 4 bytes");
294 SlimNeon::<4>::new(&patterns)
295 }
296 _ => {
297 debug!("no supported Teddy configuration found");
298 None
299 }
300 }
301 }
302 #[cfg(not(any(
303 all(target_arch = "x86_64", target_feature = "sse2"),
304 target_arch = "aarch64"
305 )))]
306 {
307 None
308 }
309 }
310}
311
312/// A searcher that dispatches to one of several possible Teddy variants.
313#[derive(Clone, Debug)]
314pub(crate) struct Searcher {
315 /// The Teddy variant we use. We use dynamic dispatch under the theory that
316 /// it results in better codegen then a enum, although this is a specious
317 /// claim.
318 ///
319 /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
320 /// object. We just make `memory_usage` and `minimum_len` available without
321 /// going through dynamic dispatch.
322 imp: Arc<dyn SearcherT>,
323 /// Total heap memory used by the Teddy variant.
324 memory_usage: usize,
325 /// The minimum haystack length this searcher can handle. It is intended
326 /// for callers to use some other search routine (such as Rabin-Karp) in
327 /// cases where the haystack (or remainer of the haystack) is too short.
328 minimum_len: usize,
329}
330
331impl Searcher {
332 /// Look for the leftmost occurrence of any pattern in this search in the
333 /// given haystack starting at the given position.
334 ///
335 /// # Panics
336 ///
337 /// This panics when `haystack[at..].len()` is less than the minimum length
338 /// for this haystack.
339 #[inline(always)]
340 pub(crate) fn find(
341 &self,
342 haystack: &[u8],
343 at: usize,
344 ) -> Option<crate::Match> {
345 // SAFETY: The Teddy implementations all require a minimum haystack
346 // length, and this is required for safety. Therefore, we assert it
347 // here in order to make this method sound.
348 assert!(haystack[at..].len() >= self.minimum_len);
349 let hayptr = haystack.as_ptr();
350 // SAFETY: Construction of the searcher guarantees that we are able
351 // to run it in the current environment (i.e., we won't get an AVX2
352 // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
353 // are valid as they are derived directly from a borrowed slice.
354 let teddym = unsafe {
355 self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
356 };
357 let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
358 let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
359 let span = crate::Span { start, end };
360 // OK because we won't permit the construction of a searcher that
361 // could report a pattern ID bigger than what can fit in the crate-wide
362 // PatternID type.
363 let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
364 let m = crate::Match::new(pid, span);
365 Some(m)
366 }
367
368 /// Returns the approximate total amount of heap used by this type, in
369 /// units of bytes.
370 #[inline(always)]
371 pub(crate) fn memory_usage(&self) -> usize {
372 self.memory_usage
373 }
374
375 /// Returns the minimum length, in bytes, that a haystack must be in order
376 /// to use it with this searcher.
377 #[inline(always)]
378 pub(crate) fn minimum_len(&self) -> usize {
379 self.minimum_len
380 }
381}
382
383/// A trait that provides dynamic dispatch over the different possible Teddy
384/// variants on the same algorithm.
385///
386/// On `x86_64` for example, it isn't known until runtime which of 12 possible
387/// variants will be used. One might use one of the four slim 128-bit vector
388/// variants, or one of the four 256-bit vector variants or even one of the
389/// four fat 256-bit vector variants.
390///
391/// Since this choice is generally made when the Teddy searcher is constructed
392/// and this choice is based on the patterns given and what the current CPU
393/// supports, it follows that there must be some kind of indirection at search
394/// time that "selects" the variant chosen at build time.
395///
396/// There are a few different ways to go about this. One approach is to use an
397/// enum. It works fine, but in my experiments, this generally results in worse
398/// codegen. Another approach, which is what we use here, is dynamic dispatch
399/// via a trait object. We basically implement this trait for each possible
400/// variant, select the variant we want at build time and convert it to a
401/// trait object for use at search time.
402///
403/// Another approach is to use function pointers and stick each of the possible
404/// variants into a union. This is essentially isomorphic to the dynamic
405/// dispatch approach, but doesn't require any allocations. Since this crate
406/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
407/// `memchr` crate does this.)
408trait SearcherT:
409 Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
410{
411 /// Execute a search on the given haystack (identified by `start` and `end`
412 /// raw pointers).
413 ///
414 /// # Safety
415 ///
416 /// Essentially, the `start` and `end` pointers must be valid and point
417 /// to a haystack one can read. As long as you derive them from, for
418 /// example, a `&[u8]`, they should automatically satisfy all of the safety
419 /// obligations:
420 ///
421 /// * Both `start` and `end` must be valid for reads.
422 /// * Both `start` and `end` must point to an initialized value.
423 /// * Both `start` and `end` must point to the same allocated object and
424 /// must either be in bounds or at most one byte past the end of the
425 /// allocated object.
426 /// * Both `start` and `end` must be _derived from_ a pointer to the same
427 /// object.
428 /// * The distance between `start` and `end` must not overflow `isize`.
429 /// * The distance being in bounds must not rely on "wrapping around" the
430 /// address space.
431 /// * It must be the case that `start <= end`.
432 /// * `end - start` must be greater than the minimum length for this
433 /// searcher.
434 ///
435 /// Also, it is expected that implementations of this trait will tag this
436 /// method with a `target_feature` attribute. Callers must ensure that
437 /// they are executing this method in an environment where that attribute
438 /// is valid.
439 unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
440}
441
442#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
443mod x86_64 {
444 use core::arch::x86_64::{__m128i, __m256i};
445
446 use alloc::sync::Arc;
447
448 use crate::packed::{
449 ext::Pointer,
450 pattern::Patterns,
451 teddy::generic::{self, Match},
452 };
453
454 use super::{Searcher, SearcherT};
455
456 #[derive(Clone, Debug)]
457 pub(super) struct SlimSSSE3<const BYTES: usize> {
458 slim128: generic::Slim<__m128i, BYTES>,
459 }
460
461 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
462 macro_rules! slim_ssse3 {
463 ($len:expr) => {
464 impl SlimSSSE3<$len> {
465 /// Creates a new searcher using "slim" Teddy with 128-bit
466 /// vectors. If SSSE3 is not available in the current
467 /// environment, then this returns `None`.
468 pub(super) fn new(
469 patterns: &Arc<Patterns>,
470 ) -> Option<Searcher> {
471 if !is_available_ssse3() {
472 return None;
473 }
474 Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
475 }
476
477 /// Creates a new searcher using "slim" Teddy with 256-bit
478 /// vectors without checking whether SSSE3 is available or not.
479 ///
480 /// # Safety
481 ///
482 /// Callers must ensure that SSSE3 is available in the current
483 /// environment.
484 #[target_feature(enable = "ssse3")]
485 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
486 let slim128 = generic::Slim::<__m128i, $len>::new(
487 Arc::clone(patterns),
488 );
489 let memory_usage = slim128.memory_usage();
490 let minimum_len = slim128.minimum_len();
491 let imp = Arc::new(SlimSSSE3 { slim128 });
492 Searcher { imp, memory_usage, minimum_len }
493 }
494 }
495
496 impl SearcherT for SlimSSSE3<$len> {
497 #[target_feature(enable = "ssse3")]
498 #[inline]
499 unsafe fn find(
500 &self,
501 start: *const u8,
502 end: *const u8,
503 ) -> Option<Match> {
504 // SAFETY: All obligations except for `target_feature` are
505 // passed to the caller. Our use of `target_feature` is
506 // safe because construction of this type requires that the
507 // requisite target features are available.
508 self.slim128.find(start, end)
509 }
510 }
511 };
512 }
513
514 slim_ssse3!(1);
515 slim_ssse3!(2);
516 slim_ssse3!(3);
517 slim_ssse3!(4);
518
519 #[derive(Clone, Debug)]
520 pub(super) struct SlimAVX2<const BYTES: usize> {
521 slim128: generic::Slim<__m128i, BYTES>,
522 slim256: generic::Slim<__m256i, BYTES>,
523 }
524
525 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
526 macro_rules! slim_avx2 {
527 ($len:expr) => {
528 impl SlimAVX2<$len> {
529 /// Creates a new searcher using "slim" Teddy with 256-bit
530 /// vectors. If AVX2 is not available in the current
531 /// environment, then this returns `None`.
532 pub(super) fn new(
533 patterns: &Arc<Patterns>,
534 ) -> Option<Searcher> {
535 if !is_available_avx2() {
536 return None;
537 }
538 Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
539 }
540
541 /// Creates a new searcher using "slim" Teddy with 256-bit
542 /// vectors without checking whether AVX2 is available or not.
543 ///
544 /// # Safety
545 ///
546 /// Callers must ensure that AVX2 is available in the current
547 /// environment.
548 #[target_feature(enable = "avx2")]
549 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
550 let slim128 = generic::Slim::<__m128i, $len>::new(
551 Arc::clone(&patterns),
552 );
553 let slim256 = generic::Slim::<__m256i, $len>::new(
554 Arc::clone(&patterns),
555 );
556 let memory_usage =
557 slim128.memory_usage() + slim256.memory_usage();
558 let minimum_len = slim128.minimum_len();
559 let imp = Arc::new(SlimAVX2 { slim128, slim256 });
560 Searcher { imp, memory_usage, minimum_len }
561 }
562 }
563
564 impl SearcherT for SlimAVX2<$len> {
565 #[target_feature(enable = "avx2")]
566 #[inline]
567 unsafe fn find(
568 &self,
569 start: *const u8,
570 end: *const u8,
571 ) -> Option<Match> {
572 // SAFETY: All obligations except for `target_feature` are
573 // passed to the caller. Our use of `target_feature` is
574 // safe because construction of this type requires that the
575 // requisite target features are available.
576 let len = end.distance(start);
577 if len < self.slim256.minimum_len() {
578 self.slim128.find(start, end)
579 } else {
580 self.slim256.find(start, end)
581 }
582 }
583 }
584 };
585 }
586
587 slim_avx2!(1);
588 slim_avx2!(2);
589 slim_avx2!(3);
590 slim_avx2!(4);
591
592 #[derive(Clone, Debug)]
593 pub(super) struct FatAVX2<const BYTES: usize> {
594 fat256: generic::Fat<__m256i, BYTES>,
595 }
596
597 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
598 macro_rules! fat_avx2 {
599 ($len:expr) => {
600 impl FatAVX2<$len> {
601 /// Creates a new searcher using "slim" Teddy with 256-bit
602 /// vectors. If AVX2 is not available in the current
603 /// environment, then this returns `None`.
604 pub(super) fn new(
605 patterns: &Arc<Patterns>,
606 ) -> Option<Searcher> {
607 if !is_available_avx2() {
608 return None;
609 }
610 Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
611 }
612
613 /// Creates a new searcher using "slim" Teddy with 256-bit
614 /// vectors without checking whether AVX2 is available or not.
615 ///
616 /// # Safety
617 ///
618 /// Callers must ensure that AVX2 is available in the current
619 /// environment.
620 #[target_feature(enable = "avx2")]
621 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
622 let fat256 = generic::Fat::<__m256i, $len>::new(
623 Arc::clone(&patterns),
624 );
625 let memory_usage = fat256.memory_usage();
626 let minimum_len = fat256.minimum_len();
627 let imp = Arc::new(FatAVX2 { fat256 });
628 Searcher { imp, memory_usage, minimum_len }
629 }
630 }
631
632 impl SearcherT for FatAVX2<$len> {
633 #[target_feature(enable = "avx2")]
634 #[inline]
635 unsafe fn find(
636 &self,
637 start: *const u8,
638 end: *const u8,
639 ) -> Option<Match> {
640 // SAFETY: All obligations except for `target_feature` are
641 // passed to the caller. Our use of `target_feature` is
642 // safe because construction of this type requires that the
643 // requisite target features are available.
644 self.fat256.find(start, end)
645 }
646 }
647 };
648 }
649
650 fat_avx2!(1);
651 fat_avx2!(2);
652 fat_avx2!(3);
653 fat_avx2!(4);
654
655 #[inline]
656 pub(super) fn is_available_ssse3() -> bool {
657 #[cfg(not(target_feature = "sse2"))]
658 {
659 false
660 }
661 #[cfg(target_feature = "sse2")]
662 {
663 #[cfg(target_feature = "ssse3")]
664 {
665 true
666 }
667 #[cfg(not(target_feature = "ssse3"))]
668 {
669 #[cfg(feature = "std")]
670 {
671 std::is_x86_feature_detected!("ssse3")
672 }
673 #[cfg(not(feature = "std"))]
674 {
675 false
676 }
677 }
678 }
679 }
680
681 #[inline]
682 pub(super) fn is_available_avx2() -> bool {
683 #[cfg(not(target_feature = "sse2"))]
684 {
685 false
686 }
687 #[cfg(target_feature = "sse2")]
688 {
689 #[cfg(target_feature = "avx2")]
690 {
691 true
692 }
693 #[cfg(not(target_feature = "avx2"))]
694 {
695 #[cfg(feature = "std")]
696 {
697 std::is_x86_feature_detected!("avx2")
698 }
699 #[cfg(not(feature = "std"))]
700 {
701 false
702 }
703 }
704 }
705 }
706}
707
708#[cfg(target_arch = "aarch64")]
709mod aarch64 {
710 use core::arch::aarch64::uint8x16_t;
711
712 use alloc::sync::Arc;
713
714 use crate::packed::{
715 pattern::Patterns,
716 teddy::generic::{self, Match},
717 };
718
719 use super::{Searcher, SearcherT};
720
721 #[derive(Clone, Debug)]
722 pub(super) struct SlimNeon<const BYTES: usize> {
723 slim128: generic::Slim<uint8x16_t, BYTES>,
724 }
725
726 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
727 macro_rules! slim_neon {
728 ($len:expr) => {
729 impl SlimNeon<$len> {
730 /// Creates a new searcher using "slim" Teddy with 128-bit
731 /// vectors. If SSSE3 is not available in the current
732 /// environment, then this returns `None`.
733 pub(super) fn new(
734 patterns: &Arc<Patterns>,
735 ) -> Option<Searcher> {
736 Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
737 }
738
739 /// Creates a new searcher using "slim" Teddy with 256-bit
740 /// vectors without checking whether SSSE3 is available or not.
741 ///
742 /// # Safety
743 ///
744 /// Callers must ensure that SSSE3 is available in the current
745 /// environment.
746 #[target_feature(enable = "neon")]
747 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
748 let slim128 = generic::Slim::<uint8x16_t, $len>::new(
749 Arc::clone(patterns),
750 );
751 let memory_usage = slim128.memory_usage();
752 let minimum_len = slim128.minimum_len();
753 let imp = Arc::new(SlimNeon { slim128 });
754 Searcher { imp, memory_usage, minimum_len }
755 }
756 }
757
758 impl SearcherT for SlimNeon<$len> {
759 #[target_feature(enable = "neon")]
760 #[inline]
761 unsafe fn find(
762 &self,
763 start: *const u8,
764 end: *const u8,
765 ) -> Option<Match> {
766 // SAFETY: All obligations except for `target_feature` are
767 // passed to the caller. Our use of `target_feature` is
768 // safe because construction of this type requires that the
769 // requisite target features are available.
770 self.slim128.find(start, end)
771 }
772 }
773 };
774 }
775
776 slim_neon!(1);
777 slim_neon!(2);
778 slim_neon!(3);
779 slim_neon!(4);
780}
781