| 1 | /// SWAR: SIMD Within A Register |
| 2 | /// SIMD validator backend that validates register-sized chunks of data at a time. |
| 3 | use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes}; |
| 4 | |
| 5 | // Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8 |
| 6 | const BLOCK_SIZE: usize = core::mem::size_of::<usize>(); |
| 7 | type ByteBlock = [u8; BLOCK_SIZE]; |
| 8 | |
| 9 | #[inline ] |
| 10 | pub fn match_uri_vectored(bytes: &mut Bytes) { |
| 11 | loop { |
| 12 | if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
| 13 | let n = match_uri_char_8_swar(bytes8); |
| 14 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
| 15 | // in `bytes`, so calling `advance(n)` is safe. |
| 16 | unsafe { |
| 17 | bytes.advance(n); |
| 18 | } |
| 19 | if n == BLOCK_SIZE { |
| 20 | continue; |
| 21 | } |
| 22 | } |
| 23 | if let Some(b) = bytes.peek() { |
| 24 | if is_uri_token(b) { |
| 25 | // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte |
| 26 | // in bytes, so calling advance is safe. |
| 27 | unsafe { |
| 28 | bytes.advance(1); |
| 29 | } |
| 30 | continue; |
| 31 | } |
| 32 | } |
| 33 | break; |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | #[inline ] |
| 38 | pub fn match_header_value_vectored(bytes: &mut Bytes) { |
| 39 | loop { |
| 40 | if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
| 41 | let n = match_header_value_char_8_swar(bytes8); |
| 42 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
| 43 | // in `bytes`, so calling `advance(n)` is safe. |
| 44 | unsafe { |
| 45 | bytes.advance(n); |
| 46 | } |
| 47 | if n == BLOCK_SIZE { |
| 48 | continue; |
| 49 | } |
| 50 | } |
| 51 | if let Some(b) = bytes.peek() { |
| 52 | if is_header_value_token(b) { |
| 53 | // SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte |
| 54 | // in bytes, so calling advance is safe. |
| 55 | unsafe { |
| 56 | bytes.advance(1); |
| 57 | } |
| 58 | continue; |
| 59 | } |
| 60 | } |
| 61 | break; |
| 62 | } |
| 63 | } |
| 64 | |
| 65 | #[inline ] |
| 66 | pub fn match_header_name_vectored(bytes: &mut Bytes) { |
| 67 | while let Some(block: [u8; 8]) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) { |
| 68 | let n: usize = match_block(f:is_header_name_token, block); |
| 69 | // SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes |
| 70 | // in `bytes`, so calling `advance(n)` is safe. |
| 71 | unsafe { |
| 72 | bytes.advance(n); |
| 73 | } |
| 74 | if n != BLOCK_SIZE { |
| 75 | return; |
| 76 | } |
| 77 | } |
| 78 | // SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the |
| 79 | // end, but no further. |
| 80 | unsafe { bytes.advance(match_tail(f:is_header_name_token, bytes.as_ref())) }; |
| 81 | } |
| 82 | |
| 83 | // Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon |
| 84 | #[cold ] |
| 85 | #[inline ] |
| 86 | fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize { |
| 87 | for (i: usize, &b: u8) in bytes.iter().enumerate() { |
| 88 | if !f(b) { |
| 89 | return i; |
| 90 | } |
| 91 | } |
| 92 | bytes.len() |
| 93 | } |
| 94 | |
| 95 | // Naive fallback block matcher |
| 96 | #[inline (always)] |
| 97 | fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize { |
| 98 | for (i: usize, &b: u8) in block.iter().enumerate() { |
| 99 | if !f(b) { |
| 100 | return i; |
| 101 | } |
| 102 | } |
| 103 | BLOCK_SIZE |
| 104 | } |
| 105 | |
| 106 | // A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44) |
| 107 | // creates a u64 whose bytes are each equal to b |
| 108 | const fn uniform_block(b: u8) -> usize { |
| 109 | (b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize |
| 110 | } |
| 111 | |
| 112 | // A byte-wise range-check on an entire word/block, |
| 113 | // ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255` |
| 114 | #[inline ] |
| 115 | fn match_uri_char_8_swar(block: ByteBlock) -> usize { |
| 116 | // 33 <= (x != 127) <= 255 |
| 117 | const M: u8 = 0x21; |
| 118 | // uniform block full of exclamation mark (!) (33). |
| 119 | const BM: usize = uniform_block(M); |
| 120 | // uniform block full of 1. |
| 121 | const ONE: usize = uniform_block(0x01); |
| 122 | // uniform block full of DEL (127). |
| 123 | const DEL: usize = uniform_block(0x7f); |
| 124 | // uniform block full of 128. |
| 125 | const M128: usize = uniform_block(128); |
| 126 | |
| 127 | let x: usize = usize::from_ne_bytes(block); // Really just a transmute |
| 128 | let lt: usize = x.wrapping_sub(BM) & !x; // <= m |
| 129 | |
| 130 | let xor_del: usize = x ^ DEL; |
| 131 | let eq_del: usize = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL |
| 132 | |
| 133 | offsetnz((lt | eq_del) & M128) |
| 134 | } |
| 135 | |
| 136 | // A byte-wise range-check on an entire word/block, |
| 137 | // ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255` |
| 138 | #[inline ] |
| 139 | fn match_header_value_char_8_swar(block: ByteBlock) -> usize { |
| 140 | // 32 <= (x != 127) <= 255 |
| 141 | const M: u8 = 0x20; |
| 142 | // uniform block full of exclamation mark (!) (33). |
| 143 | const BM: usize = uniform_block(M); |
| 144 | // uniform block full of 1. |
| 145 | const ONE: usize = uniform_block(0x01); |
| 146 | // uniform block full of DEL (127). |
| 147 | const DEL: usize = uniform_block(0x7f); |
| 148 | // uniform block full of 128. |
| 149 | const M128: usize = uniform_block(128); |
| 150 | |
| 151 | let x: usize = usize::from_ne_bytes(block); // Really just a transmute |
| 152 | let lt: usize = x.wrapping_sub(BM) & !x; // <= m |
| 153 | |
| 154 | let xor_del: usize = x ^ DEL; |
| 155 | let eq_del: usize = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL |
| 156 | |
| 157 | offsetnz((lt | eq_del) & M128) |
| 158 | } |
| 159 | |
| 160 | /// Check block to find offset of first non-zero byte |
| 161 | // NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit |
| 162 | #[inline ] |
| 163 | fn offsetnz(block: usize) -> usize { |
| 164 | // fast path optimistic case (common for long valid sequences) |
| 165 | if block == 0 { |
| 166 | return BLOCK_SIZE; |
| 167 | } |
| 168 | |
| 169 | // perf: rust will unroll this loop |
| 170 | for (i: usize, b: u8) in block.to_ne_bytes().iter().copied().enumerate() { |
| 171 | if b != 0 { |
| 172 | return i; |
| 173 | } |
| 174 | } |
| 175 | unreachable!() |
| 176 | } |
| 177 | |
| 178 | #[test ] |
| 179 | fn test_is_header_value_block() { |
| 180 | let is_header_value_block = |b| match_header_value_char_8_swar(b) == BLOCK_SIZE; |
| 181 | |
| 182 | // 0..32 => false |
| 183 | for b in 0..32_u8 { |
| 184 | assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}" , b); |
| 185 | } |
| 186 | // 32..=126 => true |
| 187 | for b in 32..=126_u8 { |
| 188 | assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}" , b); |
| 189 | } |
| 190 | // 127 => false |
| 191 | assert!(!is_header_value_block([b' \x7F' ; BLOCK_SIZE]), "b={}" , b' \x7F' ); |
| 192 | // 128..=255 => true |
| 193 | for b in 128..=255_u8 { |
| 194 | assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}" , b); |
| 195 | } |
| 196 | |
| 197 | |
| 198 | #[cfg (target_pointer_width = "64" )] |
| 199 | { |
| 200 | // A few sanity checks on non-uniform bytes for safe-measure |
| 201 | assert!(!is_header_value_block(*b"foo.com \n" )); |
| 202 | assert!(!is_header_value_block(*b"o.com \r\nU" )); |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | #[test ] |
| 207 | fn test_is_uri_block() { |
| 208 | let is_uri_block = |b| match_uri_char_8_swar(b) == BLOCK_SIZE; |
| 209 | |
| 210 | // 0..33 => false |
| 211 | for b in 0..33_u8 { |
| 212 | assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}" , b); |
| 213 | } |
| 214 | // 33..=126 => true |
| 215 | for b in 33..=126_u8 { |
| 216 | assert!(is_uri_block([b; BLOCK_SIZE]), "b={}" , b); |
| 217 | } |
| 218 | // 127 => false |
| 219 | assert!(!is_uri_block([b' \x7F' ; BLOCK_SIZE]), "b={}" , b' \x7F' ); |
| 220 | // 128..=255 => true |
| 221 | for b in 128..=255_u8 { |
| 222 | assert!(is_uri_block([b; BLOCK_SIZE]), "b={}" , b); |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | #[test ] |
| 227 | fn test_offsetnz() { |
| 228 | let seq = [0_u8; BLOCK_SIZE]; |
| 229 | for i in 0..BLOCK_SIZE { |
| 230 | let mut seq = seq; |
| 231 | seq[i] = 1; |
| 232 | let x = usize::from_ne_bytes(seq); |
| 233 | assert_eq!(offsetnz(x), i); |
| 234 | } |
| 235 | } |
| 236 | |