| 1 | #[ cfg(not(feature = "runtime-dispatch-simd"))] | 
| 2 | use core::{mem, ptr, usize}; | 
|---|
| 3 | #[ cfg(feature = "runtime-dispatch-simd")] | 
|---|
| 4 | use std::{mem, ptr, usize}; | 
|---|
| 5 |  | 
|---|
| 6 | fn splat(byte: u8) -> usize { | 
|---|
| 7 | let lo: usize = usize::MAX / 0xFF; | 
|---|
| 8 | lo * byte as usize | 
|---|
| 9 | } | 
|---|
| 10 |  | 
|---|
| 11 | unsafe fn usize_load_unchecked(bytes: &[u8], offset: usize) -> usize { | 
|---|
| 12 | let mut output: usize = 0; | 
|---|
| 13 | ptr::copy_nonoverlapping( | 
|---|
| 14 | src:bytes.as_ptr().add(offset), | 
|---|
| 15 | &mut output as *mut usize as *mut u8, | 
|---|
| 16 | count:mem::size_of::<usize>(), | 
|---|
| 17 | ); | 
|---|
| 18 | output | 
|---|
| 19 | } | 
|---|
| 20 |  | 
|---|
| 21 | fn bytewise_equal(lhs: usize, rhs: usize) -> usize { | 
|---|
| 22 | let lo: usize = usize::MAX / 0xFF; | 
|---|
| 23 | let hi: usize = lo << 7; | 
|---|
| 24 |  | 
|---|
| 25 | let x: usize = lhs ^ rhs; | 
|---|
| 26 | !((((x & !hi) + !hi) | x) >> 7) & lo | 
|---|
| 27 | } | 
|---|
| 28 |  | 
|---|
| 29 | fn sum_usize(values: usize) -> usize { | 
|---|
| 30 | let every_other_byte_lo: usize = usize::MAX / 0xFFFF; | 
|---|
| 31 | let every_other_byte: usize = every_other_byte_lo * 0xFF; | 
|---|
| 32 |  | 
|---|
| 33 | // Pairwise reduction to avoid overflow on next step. | 
|---|
| 34 | let pair_sum: usize = (values & every_other_byte) + ((values >> 8) & every_other_byte); | 
|---|
| 35 |  | 
|---|
| 36 | // Multiplication results in top two bytes holding sum. | 
|---|
| 37 | pair_sum.wrapping_mul(every_other_byte_lo) >> ((mem::size_of::<usize>() - 2) * 8) | 
|---|
| 38 | } | 
|---|
| 39 |  | 
|---|
| 40 | fn is_leading_utf8_byte(values: usize) -> usize { | 
|---|
| 41 | // a leading UTF-8 byte is one which does not start with the bits 10. | 
|---|
| 42 | ((!values >> 7) | (values >> 6)) & splat(byte:1) | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 | pub fn chunk_count(haystack: &[u8], needle: u8) -> usize { | 
|---|
| 46 | let chunksize = mem::size_of::<usize>(); | 
|---|
| 47 | assert!(haystack.len() >= chunksize); | 
|---|
| 48 |  | 
|---|
| 49 | unsafe { | 
|---|
| 50 | let mut offset = 0; | 
|---|
| 51 | let mut count = 0; | 
|---|
| 52 |  | 
|---|
| 53 | let needles = splat(needle); | 
|---|
| 54 |  | 
|---|
| 55 | // 2040 | 
|---|
| 56 | while haystack.len() >= offset + chunksize * 255 { | 
|---|
| 57 | let mut counts = 0; | 
|---|
| 58 | for _ in 0..255 { | 
|---|
| 59 | counts += bytewise_equal(usize_load_unchecked(haystack, offset), needles); | 
|---|
| 60 | offset += chunksize; | 
|---|
| 61 | } | 
|---|
| 62 | count += sum_usize(counts); | 
|---|
| 63 | } | 
|---|
| 64 |  | 
|---|
| 65 | // 8 | 
|---|
| 66 | let mut counts = 0; | 
|---|
| 67 | for i in 0..(haystack.len() - offset) / chunksize { | 
|---|
| 68 | counts += bytewise_equal( | 
|---|
| 69 | usize_load_unchecked(haystack, offset + i * chunksize), | 
|---|
| 70 | needles, | 
|---|
| 71 | ); | 
|---|
| 72 | } | 
|---|
| 73 | if haystack.len() % 8 != 0 { | 
|---|
| 74 | let mask = usize::from_le(!(!0 >> ((haystack.len() % chunksize) * 8))); | 
|---|
| 75 | counts += bytewise_equal( | 
|---|
| 76 | usize_load_unchecked(haystack, haystack.len() - chunksize), | 
|---|
| 77 | needles, | 
|---|
| 78 | ) & mask; | 
|---|
| 79 | } | 
|---|
| 80 | count += sum_usize(counts); | 
|---|
| 81 |  | 
|---|
| 82 | count | 
|---|
| 83 | } | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|
| 86 | pub fn chunk_num_chars(utf8_chars: &[u8]) -> usize { | 
|---|
| 87 | let chunksize = mem::size_of::<usize>(); | 
|---|
| 88 | assert!(utf8_chars.len() >= chunksize); | 
|---|
| 89 |  | 
|---|
| 90 | unsafe { | 
|---|
| 91 | let mut offset = 0; | 
|---|
| 92 | let mut count = 0; | 
|---|
| 93 |  | 
|---|
| 94 | // 2040 | 
|---|
| 95 | while utf8_chars.len() >= offset + chunksize * 255 { | 
|---|
| 96 | let mut counts = 0; | 
|---|
| 97 | for _ in 0..255 { | 
|---|
| 98 | counts += is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset)); | 
|---|
| 99 | offset += chunksize; | 
|---|
| 100 | } | 
|---|
| 101 | count += sum_usize(counts); | 
|---|
| 102 | } | 
|---|
| 103 |  | 
|---|
| 104 | // 8 | 
|---|
| 105 | let mut counts = 0; | 
|---|
| 106 | for i in 0..(utf8_chars.len() - offset) / chunksize { | 
|---|
| 107 | counts += | 
|---|
| 108 | is_leading_utf8_byte(usize_load_unchecked(utf8_chars, offset + i * chunksize)); | 
|---|
| 109 | } | 
|---|
| 110 | if utf8_chars.len() % 8 != 0 { | 
|---|
| 111 | let mask = usize::from_le(!(!0 >> ((utf8_chars.len() % chunksize) * 8))); | 
|---|
| 112 | counts += is_leading_utf8_byte(usize_load_unchecked( | 
|---|
| 113 | utf8_chars, | 
|---|
| 114 | utf8_chars.len() - chunksize, | 
|---|
| 115 | )) & mask; | 
|---|
| 116 | } | 
|---|
| 117 | count += sum_usize(counts); | 
|---|
| 118 |  | 
|---|
| 119 | count | 
|---|
| 120 | } | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|