1use core::{
2 fmt::Debug,
3 panic::{RefUnwindSafe, UnwindSafe},
4};
5
6use alloc::sync::Arc;
7
8use crate::packed::{ext::Pointer, pattern::Patterns, teddy::generic::Match};
9
10/// A builder for constructing a Teddy matcher.
11///
12/// The builder primarily permits fine grained configuration of the Teddy
13/// matcher. Most options are made only available for testing/benchmarking
14/// purposes. In reality, options are automatically determined by the nature
15/// and number of patterns given to the builder.
16#[derive(Clone, Debug)]
17pub(crate) struct Builder {
18 /// When none, this is automatically determined. Otherwise, `false` means
19 /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
20 /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
21 /// available and Fat Teddy was requested, no matcher will be built.
22 only_fat: Option<bool>,
23 /// When none, this is automatically determined. Otherwise, `false` means
24 /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
25 /// `true` means that 256-bit vectors will be used. As with `fat`, if
26 /// 256-bit vectors are requested and they aren't available, then a
27 /// searcher will not be built.
28 only_256bit: Option<bool>,
29 /// When true (the default), the number of patterns will be used as a
30 /// heuristic for refusing construction of a Teddy searcher. The point here
31 /// is that too many patterns can overwhelm Teddy. But this can be disabled
32 /// in cases where the caller knows better.
33 heuristic_pattern_limits: bool,
34}
35
36impl Default for Builder {
37 fn default() -> Builder {
38 Builder::new()
39 }
40}
41
42impl Builder {
43 /// Create a new builder for configuring a Teddy matcher.
44 pub(crate) fn new() -> Builder {
45 Builder {
46 only_fat: None,
47 only_256bit: None,
48 heuristic_pattern_limits: true,
49 }
50 }
51
52 /// Build a matcher for the set of patterns given. If a matcher could not
53 /// be built, then `None` is returned.
54 ///
55 /// Generally, a matcher isn't built if the necessary CPU features aren't
56 /// available, an unsupported target or if the searcher is believed to be
57 /// slower than standard techniques (i.e., if there are too many literals).
58 pub(crate) fn build(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
59 self.build_imp(patterns)
60 }
61
62 /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
63 /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
64 /// for a larger set of literals.
65 ///
66 /// `None` is the default, which results in an automatic selection based
67 /// on the number of literals and available CPU features.
68 pub(crate) fn only_fat(&mut self, yes: Option<bool>) -> &mut Builder {
69 self.only_fat = yes;
70 self
71 }
72
73 /// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
74 /// Generally, a larger vector size is better since it either permits
75 /// matching more patterns or matching more bytes in the haystack at once.
76 ///
77 /// `None` is the default, which results in an automatic selection based on
78 /// the number of literals and available CPU features.
79 pub(crate) fn only_256bit(&mut self, yes: Option<bool>) -> &mut Builder {
80 self.only_256bit = yes;
81 self
82 }
83
84 /// Request that heuristic limitations on the number of patterns be
85 /// employed. This useful to disable for benchmarking where one wants to
86 /// explore how Teddy performs on large number of patterns even if the
87 /// heuristics would otherwise refuse construction.
88 ///
89 /// This is enabled by default.
90 pub(crate) fn heuristic_pattern_limits(
91 &mut self,
92 yes: bool,
93 ) -> &mut Builder {
94 self.heuristic_pattern_limits = yes;
95 self
96 }
97
98 fn build_imp(&self, patterns: Arc<Patterns>) -> Option<Searcher> {
99 let patlimit = self.heuristic_pattern_limits;
100 // There's no particular reason why we limit ourselves to little endian
101 // here, but it seems likely that some parts of Teddy as they are
102 // currently written (e.g., the uses of `trailing_zeros`) are likely
103 // wrong on non-little-endian targets. Such things are likely easy to
104 // fix, but at the time of writing (2023/09/18), I actually do not know
105 // how to test this code on a big-endian target. So for now, we're
106 // conservative and just bail out.
107 if !cfg!(target_endian = "little") {
108 debug!("skipping Teddy because target isn't little endian");
109 return None;
110 }
111 // Too many patterns will overwhelm Teddy and likely lead to slow
112 // downs, typically in the verification step.
113 if patlimit && patterns.len() > 64 {
114 debug!("skipping Teddy because of too many patterns");
115 return None;
116 }
117
118 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
119 {
120 use self::x86_64::{FatAVX2, SlimAVX2, SlimSSSE3};
121
122 let mask_len = core::cmp::min(4, patterns.minimum_len());
123 let beefy = patterns.len() > 32;
124 let has_avx2 = self::x86_64::is_available_avx2();
125 let has_ssse3 = has_avx2 || self::x86_64::is_available_ssse3();
126 let use_avx2 = if self.only_256bit == Some(true) {
127 if !has_avx2 {
128 debug!(
129 "skipping Teddy because avx2 was demanded but unavailable"
130 );
131 return None;
132 }
133 true
134 } else if self.only_256bit == Some(false) {
135 if !has_ssse3 {
136 debug!(
137 "skipping Teddy because ssse3 was demanded but unavailable"
138 );
139 return None;
140 }
141 false
142 } else if !has_ssse3 && !has_avx2 {
143 debug!(
144 "skipping Teddy because ssse3 and avx2 are unavailable"
145 );
146 return None;
147 } else {
148 has_avx2
149 };
150 let fat = match self.only_fat {
151 None => use_avx2 && beefy,
152 Some(false) => false,
153 Some(true) if !use_avx2 => {
154 debug!(
155 "skipping Teddy because fat was demanded, but fat \
156 Teddy requires avx2 which is unavailable"
157 );
158 return None;
159 }
160 Some(true) => true,
161 };
162 // Just like for aarch64, it's possible that too many patterns will
163 // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
164 // helps things scale a bit more by spreading patterns over more
165 // buckets.
166 //
167 // These thresholds were determined by looking at the measurements
168 // for the rust/aho-corasick/packed/leftmost-first and
169 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
170 // benchmarks.
171 if patlimit && mask_len == 1 && patterns.len() > 16 {
172 debug!(
173 "skipping Teddy (mask len: 1) because there are \
174 too many patterns",
175 );
176 return None;
177 }
178 match (mask_len, use_avx2, fat) {
179 (1, false, _) => {
180 debug!("Teddy choice: 128-bit slim, 1 byte");
181 SlimSSSE3::<1>::new(&patterns)
182 }
183 (1, true, false) => {
184 debug!("Teddy choice: 256-bit slim, 1 byte");
185 SlimAVX2::<1>::new(&patterns)
186 }
187 (1, true, true) => {
188 debug!("Teddy choice: 256-bit fat, 1 byte");
189 FatAVX2::<1>::new(&patterns)
190 }
191 (2, false, _) => {
192 debug!("Teddy choice: 128-bit slim, 2 bytes");
193 SlimSSSE3::<2>::new(&patterns)
194 }
195 (2, true, false) => {
196 debug!("Teddy choice: 256-bit slim, 2 bytes");
197 SlimAVX2::<2>::new(&patterns)
198 }
199 (2, true, true) => {
200 debug!("Teddy choice: 256-bit fat, 2 bytes");
201 FatAVX2::<2>::new(&patterns)
202 }
203 (3, false, _) => {
204 debug!("Teddy choice: 128-bit slim, 3 bytes");
205 SlimSSSE3::<3>::new(&patterns)
206 }
207 (3, true, false) => {
208 debug!("Teddy choice: 256-bit slim, 3 bytes");
209 SlimAVX2::<3>::new(&patterns)
210 }
211 (3, true, true) => {
212 debug!("Teddy choice: 256-bit fat, 3 bytes");
213 FatAVX2::<3>::new(&patterns)
214 }
215 (4, false, _) => {
216 debug!("Teddy choice: 128-bit slim, 4 bytes");
217 SlimSSSE3::<4>::new(&patterns)
218 }
219 (4, true, false) => {
220 debug!("Teddy choice: 256-bit slim, 4 bytes");
221 SlimAVX2::<4>::new(&patterns)
222 }
223 (4, true, true) => {
224 debug!("Teddy choice: 256-bit fat, 4 bytes");
225 FatAVX2::<4>::new(&patterns)
226 }
227 _ => {
228 debug!("no supported Teddy configuration found");
229 None
230 }
231 }
232 }
233 #[cfg(all(
234 target_arch = "aarch64",
235 target_feature = "neon",
236 target_endian = "little"
237 ))]
238 {
239 use self::aarch64::SlimNeon;
240
241 let mask_len = core::cmp::min(4, patterns.minimum_len());
242 if self.only_256bit == Some(true) {
243 debug!(
244 "skipping Teddy because 256-bits were demanded \
245 but unavailable"
246 );
247 return None;
248 }
249 if self.only_fat == Some(true) {
250 debug!(
251 "skipping Teddy because fat was demanded but unavailable"
252 );
253 }
254 // Since we don't have Fat teddy in aarch64 (I think we'd want at
255 // least 256-bit vectors for that), we need to be careful not to
256 // allow too many patterns as it might overwhelm Teddy. Generally
257 // speaking, as the mask length goes up, the more patterns we can
258 // handle because the mask length results in fewer candidates
259 // generated.
260 //
261 // These thresholds were determined by looking at the measurements
262 // for the rust/aho-corasick/packed/leftmost-first and
263 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
264 // benchmarks.
265 match mask_len {
266 1 => {
267 if patlimit && patterns.len() > 16 {
268 debug!(
269 "skipping Teddy (mask len: 1) because there are \
270 too many patterns",
271 );
272 }
273 debug!("Teddy choice: 128-bit slim, 1 byte");
274 SlimNeon::<1>::new(&patterns)
275 }
276 2 => {
277 if patlimit && patterns.len() > 32 {
278 debug!(
279 "skipping Teddy (mask len: 2) because there are \
280 too many patterns",
281 );
282 }
283 debug!("Teddy choice: 128-bit slim, 2 bytes");
284 SlimNeon::<2>::new(&patterns)
285 }
286 3 => {
287 if patlimit && patterns.len() > 48 {
288 debug!(
289 "skipping Teddy (mask len: 3) because there are \
290 too many patterns",
291 );
292 }
293 debug!("Teddy choice: 128-bit slim, 3 bytes");
294 SlimNeon::<3>::new(&patterns)
295 }
296 4 => {
297 debug!("Teddy choice: 128-bit slim, 4 bytes");
298 SlimNeon::<4>::new(&patterns)
299 }
300 _ => {
301 debug!("no supported Teddy configuration found");
302 None
303 }
304 }
305 }
306 #[cfg(not(any(
307 all(target_arch = "x86_64", target_feature = "sse2"),
308 all(
309 target_arch = "aarch64",
310 target_feature = "neon",
311 target_endian = "little"
312 )
313 )))]
314 {
315 None
316 }
317 }
318}
319
320/// A searcher that dispatches to one of several possible Teddy variants.
321#[derive(Clone, Debug)]
322pub(crate) struct Searcher {
323 /// The Teddy variant we use. We use dynamic dispatch under the theory that
324 /// it results in better codegen then a enum, although this is a specious
325 /// claim.
326 ///
327 /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
328 /// object. We just make `memory_usage` and `minimum_len` available without
329 /// going through dynamic dispatch.
330 imp: Arc<dyn SearcherT>,
331 /// Total heap memory used by the Teddy variant.
332 memory_usage: usize,
333 /// The minimum haystack length this searcher can handle. It is intended
334 /// for callers to use some other search routine (such as Rabin-Karp) in
335 /// cases where the haystack (or remainer of the haystack) is too short.
336 minimum_len: usize,
337}
338
339impl Searcher {
340 /// Look for the leftmost occurrence of any pattern in this search in the
341 /// given haystack starting at the given position.
342 ///
343 /// # Panics
344 ///
345 /// This panics when `haystack[at..].len()` is less than the minimum length
346 /// for this haystack.
347 #[inline(always)]
348 pub(crate) fn find(
349 &self,
350 haystack: &[u8],
351 at: usize,
352 ) -> Option<crate::Match> {
353 // SAFETY: The Teddy implementations all require a minimum haystack
354 // length, and this is required for safety. Therefore, we assert it
355 // here in order to make this method sound.
356 assert!(haystack[at..].len() >= self.minimum_len);
357 let hayptr = haystack.as_ptr();
358 // SAFETY: Construction of the searcher guarantees that we are able
359 // to run it in the current environment (i.e., we won't get an AVX2
360 // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
361 // are valid as they are derived directly from a borrowed slice.
362 let teddym = unsafe {
363 self.imp.find(hayptr.add(at), hayptr.add(haystack.len()))?
364 };
365 let start = teddym.start().as_usize().wrapping_sub(hayptr.as_usize());
366 let end = teddym.end().as_usize().wrapping_sub(hayptr.as_usize());
367 let span = crate::Span { start, end };
368 // OK because we won't permit the construction of a searcher that
369 // could report a pattern ID bigger than what can fit in the crate-wide
370 // PatternID type.
371 let pid = crate::PatternID::new_unchecked(teddym.pattern().as_usize());
372 let m = crate::Match::new(pid, span);
373 Some(m)
374 }
375
376 /// Returns the approximate total amount of heap used by this type, in
377 /// units of bytes.
378 #[inline(always)]
379 pub(crate) fn memory_usage(&self) -> usize {
380 self.memory_usage
381 }
382
383 /// Returns the minimum length, in bytes, that a haystack must be in order
384 /// to use it with this searcher.
385 #[inline(always)]
386 pub(crate) fn minimum_len(&self) -> usize {
387 self.minimum_len
388 }
389}
390
391/// A trait that provides dynamic dispatch over the different possible Teddy
392/// variants on the same algorithm.
393///
394/// On `x86_64` for example, it isn't known until runtime which of 12 possible
395/// variants will be used. One might use one of the four slim 128-bit vector
396/// variants, or one of the four 256-bit vector variants or even one of the
397/// four fat 256-bit vector variants.
398///
399/// Since this choice is generally made when the Teddy searcher is constructed
400/// and this choice is based on the patterns given and what the current CPU
401/// supports, it follows that there must be some kind of indirection at search
402/// time that "selects" the variant chosen at build time.
403///
404/// There are a few different ways to go about this. One approach is to use an
405/// enum. It works fine, but in my experiments, this generally results in worse
406/// codegen. Another approach, which is what we use here, is dynamic dispatch
407/// via a trait object. We basically implement this trait for each possible
408/// variant, select the variant we want at build time and convert it to a
409/// trait object for use at search time.
410///
411/// Another approach is to use function pointers and stick each of the possible
412/// variants into a union. This is essentially isomorphic to the dynamic
413/// dispatch approach, but doesn't require any allocations. Since this crate
414/// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
415/// `memchr` crate does this.)
416trait SearcherT:
417 Debug + Send + Sync + UnwindSafe + RefUnwindSafe + 'static
418{
419 /// Execute a search on the given haystack (identified by `start` and `end`
420 /// raw pointers).
421 ///
422 /// # Safety
423 ///
424 /// Essentially, the `start` and `end` pointers must be valid and point
425 /// to a haystack one can read. As long as you derive them from, for
426 /// example, a `&[u8]`, they should automatically satisfy all of the safety
427 /// obligations:
428 ///
429 /// * Both `start` and `end` must be valid for reads.
430 /// * Both `start` and `end` must point to an initialized value.
431 /// * Both `start` and `end` must point to the same allocated object and
432 /// must either be in bounds or at most one byte past the end of the
433 /// allocated object.
434 /// * Both `start` and `end` must be _derived from_ a pointer to the same
435 /// object.
436 /// * The distance between `start` and `end` must not overflow `isize`.
437 /// * The distance being in bounds must not rely on "wrapping around" the
438 /// address space.
439 /// * It must be the case that `start <= end`.
440 /// * `end - start` must be greater than the minimum length for this
441 /// searcher.
442 ///
443 /// Also, it is expected that implementations of this trait will tag this
444 /// method with a `target_feature` attribute. Callers must ensure that
445 /// they are executing this method in an environment where that attribute
446 /// is valid.
447 unsafe fn find(&self, start: *const u8, end: *const u8) -> Option<Match>;
448}
449
450#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
451mod x86_64 {
452 use core::arch::x86_64::{__m128i, __m256i};
453
454 use alloc::sync::Arc;
455
456 use crate::packed::{
457 ext::Pointer,
458 pattern::Patterns,
459 teddy::generic::{self, Match},
460 };
461
462 use super::{Searcher, SearcherT};
463
464 #[derive(Clone, Debug)]
465 pub(super) struct SlimSSSE3<const BYTES: usize> {
466 slim128: generic::Slim<__m128i, BYTES>,
467 }
468
469 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
470 macro_rules! slim_ssse3 {
471 ($len:expr) => {
472 impl SlimSSSE3<$len> {
473 /// Creates a new searcher using "slim" Teddy with 128-bit
474 /// vectors. If SSSE3 is not available in the current
475 /// environment, then this returns `None`.
476 pub(super) fn new(
477 patterns: &Arc<Patterns>,
478 ) -> Option<Searcher> {
479 if !is_available_ssse3() {
480 return None;
481 }
482 Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) })
483 }
484
485 /// Creates a new searcher using "slim" Teddy with 256-bit
486 /// vectors without checking whether SSSE3 is available or not.
487 ///
488 /// # Safety
489 ///
490 /// Callers must ensure that SSSE3 is available in the current
491 /// environment.
492 #[target_feature(enable = "ssse3")]
493 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
494 let slim128 = generic::Slim::<__m128i, $len>::new(
495 Arc::clone(patterns),
496 );
497 let memory_usage = slim128.memory_usage();
498 let minimum_len = slim128.minimum_len();
499 let imp = Arc::new(SlimSSSE3 { slim128 });
500 Searcher { imp, memory_usage, minimum_len }
501 }
502 }
503
504 impl SearcherT for SlimSSSE3<$len> {
505 #[target_feature(enable = "ssse3")]
506 #[inline]
507 unsafe fn find(
508 &self,
509 start: *const u8,
510 end: *const u8,
511 ) -> Option<Match> {
512 // SAFETY: All obligations except for `target_feature` are
513 // passed to the caller. Our use of `target_feature` is
514 // safe because construction of this type requires that the
515 // requisite target features are available.
516 self.slim128.find(start, end)
517 }
518 }
519 };
520 }
521
522 slim_ssse3!(1);
523 slim_ssse3!(2);
524 slim_ssse3!(3);
525 slim_ssse3!(4);
526
527 #[derive(Clone, Debug)]
528 pub(super) struct SlimAVX2<const BYTES: usize> {
529 slim128: generic::Slim<__m128i, BYTES>,
530 slim256: generic::Slim<__m256i, BYTES>,
531 }
532
533 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
534 macro_rules! slim_avx2 {
535 ($len:expr) => {
536 impl SlimAVX2<$len> {
537 /// Creates a new searcher using "slim" Teddy with 256-bit
538 /// vectors. If AVX2 is not available in the current
539 /// environment, then this returns `None`.
540 pub(super) fn new(
541 patterns: &Arc<Patterns>,
542 ) -> Option<Searcher> {
543 if !is_available_avx2() {
544 return None;
545 }
546 Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) })
547 }
548
549 /// Creates a new searcher using "slim" Teddy with 256-bit
550 /// vectors without checking whether AVX2 is available or not.
551 ///
552 /// # Safety
553 ///
554 /// Callers must ensure that AVX2 is available in the current
555 /// environment.
556 #[target_feature(enable = "avx2")]
557 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
558 let slim128 = generic::Slim::<__m128i, $len>::new(
559 Arc::clone(&patterns),
560 );
561 let slim256 = generic::Slim::<__m256i, $len>::new(
562 Arc::clone(&patterns),
563 );
564 let memory_usage =
565 slim128.memory_usage() + slim256.memory_usage();
566 let minimum_len = slim128.minimum_len();
567 let imp = Arc::new(SlimAVX2 { slim128, slim256 });
568 Searcher { imp, memory_usage, minimum_len }
569 }
570 }
571
572 impl SearcherT for SlimAVX2<$len> {
573 #[target_feature(enable = "avx2")]
574 #[inline]
575 unsafe fn find(
576 &self,
577 start: *const u8,
578 end: *const u8,
579 ) -> Option<Match> {
580 // SAFETY: All obligations except for `target_feature` are
581 // passed to the caller. Our use of `target_feature` is
582 // safe because construction of this type requires that the
583 // requisite target features are available.
584 let len = end.distance(start);
585 if len < self.slim256.minimum_len() {
586 self.slim128.find(start, end)
587 } else {
588 self.slim256.find(start, end)
589 }
590 }
591 }
592 };
593 }
594
595 slim_avx2!(1);
596 slim_avx2!(2);
597 slim_avx2!(3);
598 slim_avx2!(4);
599
600 #[derive(Clone, Debug)]
601 pub(super) struct FatAVX2<const BYTES: usize> {
602 fat256: generic::Fat<__m256i, BYTES>,
603 }
604
605 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
606 macro_rules! fat_avx2 {
607 ($len:expr) => {
608 impl FatAVX2<$len> {
609 /// Creates a new searcher using "slim" Teddy with 256-bit
610 /// vectors. If AVX2 is not available in the current
611 /// environment, then this returns `None`.
612 pub(super) fn new(
613 patterns: &Arc<Patterns>,
614 ) -> Option<Searcher> {
615 if !is_available_avx2() {
616 return None;
617 }
618 Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) })
619 }
620
621 /// Creates a new searcher using "slim" Teddy with 256-bit
622 /// vectors without checking whether AVX2 is available or not.
623 ///
624 /// # Safety
625 ///
626 /// Callers must ensure that AVX2 is available in the current
627 /// environment.
628 #[target_feature(enable = "avx2")]
629 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
630 let fat256 = generic::Fat::<__m256i, $len>::new(
631 Arc::clone(&patterns),
632 );
633 let memory_usage = fat256.memory_usage();
634 let minimum_len = fat256.minimum_len();
635 let imp = Arc::new(FatAVX2 { fat256 });
636 Searcher { imp, memory_usage, minimum_len }
637 }
638 }
639
640 impl SearcherT for FatAVX2<$len> {
641 #[target_feature(enable = "avx2")]
642 #[inline]
643 unsafe fn find(
644 &self,
645 start: *const u8,
646 end: *const u8,
647 ) -> Option<Match> {
648 // SAFETY: All obligations except for `target_feature` are
649 // passed to the caller. Our use of `target_feature` is
650 // safe because construction of this type requires that the
651 // requisite target features are available.
652 self.fat256.find(start, end)
653 }
654 }
655 };
656 }
657
658 fat_avx2!(1);
659 fat_avx2!(2);
660 fat_avx2!(3);
661 fat_avx2!(4);
662
663 #[inline]
664 pub(super) fn is_available_ssse3() -> bool {
665 #[cfg(not(target_feature = "sse2"))]
666 {
667 false
668 }
669 #[cfg(target_feature = "sse2")]
670 {
671 #[cfg(target_feature = "ssse3")]
672 {
673 true
674 }
675 #[cfg(not(target_feature = "ssse3"))]
676 {
677 #[cfg(feature = "std")]
678 {
679 std::is_x86_feature_detected!("ssse3")
680 }
681 #[cfg(not(feature = "std"))]
682 {
683 false
684 }
685 }
686 }
687 }
688
689 #[inline]
690 pub(super) fn is_available_avx2() -> bool {
691 #[cfg(not(target_feature = "sse2"))]
692 {
693 false
694 }
695 #[cfg(target_feature = "sse2")]
696 {
697 #[cfg(target_feature = "avx2")]
698 {
699 true
700 }
701 #[cfg(not(target_feature = "avx2"))]
702 {
703 #[cfg(feature = "std")]
704 {
705 std::is_x86_feature_detected!("avx2")
706 }
707 #[cfg(not(feature = "std"))]
708 {
709 false
710 }
711 }
712 }
713 }
714}
715
716#[cfg(all(
717 target_arch = "aarch64",
718 target_feature = "neon",
719 target_endian = "little"
720))]
721mod aarch64 {
722 use core::arch::aarch64::uint8x16_t;
723
724 use alloc::sync::Arc;
725
726 use crate::packed::{
727 pattern::Patterns,
728 teddy::generic::{self, Match},
729 };
730
731 use super::{Searcher, SearcherT};
732
733 #[derive(Clone, Debug)]
734 pub(super) struct SlimNeon<const BYTES: usize> {
735 slim128: generic::Slim<uint8x16_t, BYTES>,
736 }
737
738 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
739 macro_rules! slim_neon {
740 ($len:expr) => {
741 impl SlimNeon<$len> {
742 /// Creates a new searcher using "slim" Teddy with 128-bit
743 /// vectors. If SSSE3 is not available in the current
744 /// environment, then this returns `None`.
745 pub(super) fn new(
746 patterns: &Arc<Patterns>,
747 ) -> Option<Searcher> {
748 Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) })
749 }
750
751 /// Creates a new searcher using "slim" Teddy with 256-bit
752 /// vectors without checking whether SSSE3 is available or not.
753 ///
754 /// # Safety
755 ///
756 /// Callers must ensure that SSSE3 is available in the current
757 /// environment.
758 #[target_feature(enable = "neon")]
759 unsafe fn new_unchecked(patterns: &Arc<Patterns>) -> Searcher {
760 let slim128 = generic::Slim::<uint8x16_t, $len>::new(
761 Arc::clone(patterns),
762 );
763 let memory_usage = slim128.memory_usage();
764 let minimum_len = slim128.minimum_len();
765 let imp = Arc::new(SlimNeon { slim128 });
766 Searcher { imp, memory_usage, minimum_len }
767 }
768 }
769
770 impl SearcherT for SlimNeon<$len> {
771 #[target_feature(enable = "neon")]
772 #[inline]
773 unsafe fn find(
774 &self,
775 start: *const u8,
776 end: *const u8,
777 ) -> Option<Match> {
778 // SAFETY: All obligations except for `target_feature` are
779 // passed to the caller. Our use of `target_feature` is
780 // safe because construction of this type requires that the
781 // requisite target features are available.
782 self.slim128.find(start, end)
783 }
784 }
785 };
786 }
787
788 slim_neon!(1);
789 slim_neon!(2);
790 slim_neon!(3);
791 slim_neon!(4);
792}
793