swar.rs source code [crates/httparse/src/simd/swar.rs]

1	/// SWAR: SIMD Within A Register
2	/// SIMD validator backend that validates register-sized chunks of data at a time.
3	use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes};
4
5	// Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8
6	const BLOCK_SIZE: usize = core::mem::size_of::<usize>();
7	type ByteBlock = [u8; BLOCK_SIZE];
8
9	#[inline]
10	pub fn match_uri_vectored(bytes: &mut Bytes) {
11	loop {
12	if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
13	let n = match_uri_char_8_swar(bytes8);
14	// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
15	// in `bytes`, so calling `advance(n)` is safe.
16	unsafe {
17	bytes.advance(n);
18	}
19	if n == BLOCK_SIZE {
20	continue;
21	}
22	}
23	if let Some(b) = bytes.peek() {
24	if is_uri_token(b) {
25	// SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
26	// in bytes, so calling advance is safe.
27	unsafe {
28	bytes.advance(`1`);
29	}
30	continue;
31	}
32	}
33	break;
34	}
35	}
36
37	#[inline]
38	pub fn match_header_value_vectored(bytes: &mut Bytes) {
39	loop {
40	if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
41	let n = match_header_value_char_8_swar(bytes8);
42	// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
43	// in `bytes`, so calling `advance(n)` is safe.
44	unsafe {
45	bytes.advance(n);
46	}
47	if n == BLOCK_SIZE {
48	continue;
49	}
50	}
51	if let Some(b) = bytes.peek() {
52	if is_header_value_token(b) {
53	// SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
54	// in bytes, so calling advance is safe.
55	unsafe {
56	bytes.advance(`1`);
57	}
58	continue;
59	}
60	}
61	break;
62	}
63	}
64
65	#[inline]
66	pub fn match_header_name_vectored(bytes: &mut Bytes) {
67	while let Some(block: [u8; 8]) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
68	let n: usize = match_block(f:is_header_name_token, block);
69	// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
70	// in `bytes`, so calling `advance(n)` is safe.
71	unsafe {
72	bytes.advance(n);
73	}
74	if n != BLOCK_SIZE {
75	return;
76	}
77	}
78	// SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the
79	// end, but no further.
80	unsafe { bytes.advance(match_tail(f:is_header_name_token, bytes.as_ref())) };
81	}
82
83	// Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon
84	#[cold]
85	#[inline]
86	fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize {
87	for (i: usize, &b: u8) in bytes.iter().enumerate() {
88	if !f(b) {
89	return i;
90	}
91	}
92	bytes.len()
93	}
94
95	// Naive fallback block matcher
96	#[inline(always)]
97	fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize {
98	for (i: usize, &b: u8) in block.iter().enumerate() {
99	if !f(b) {
100	return i;
101	}
102	}
103	BLOCK_SIZE
104	}
105
106	// A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44)
107	// creates a u64 whose bytes are each equal to b
108	const fn uniform_block(b: u8) -> usize {
109	(b as u64 * `0x01_01_01_01_01_01_01_01` / [1_u8; 8] /) as usize
110	}
111
112	// A byte-wise range-check on an entire word/block,
113	// ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255`
114	#[inline]
115	fn match_uri_char_8_swar(block: ByteBlock) -> usize {
116	// 33 <= (x != 127) <= 255
117	const M: u8 = `0x21`;
118	// uniform block full of exclamation mark (!) (33).
119	const BM: usize = uniform_block(M);
120	// uniform block full of 1.
121	const ONE: usize = uniform_block(`0x01`);
122	// uniform block full of DEL (127).
123	const DEL: usize = uniform_block(`0x7f`);
124	// uniform block full of 128.
125	const M128: usize = uniform_block(`128`);
126
127	let x: usize = usize::from_ne_bytes(block); // Really just a transmute
128	let lt: usize = x.wrapping_sub(BM) & !x; // <= m
129
130	let xor_del: usize = x ^ DEL;
131	let eq_del: usize = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
132
133	offsetnz((lt \| eq_del) & M128)
134	}
135
136	// A byte-wise range-check on an entire word/block,
137	// ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255`
138	#[inline]
139	fn match_header_value_char_8_swar(block: ByteBlock) -> usize {
140	// 32 <= (x != 127) <= 255
141	const M: u8 = `0x20`;
142	// uniform block full of exclamation mark (!) (33).
143	const BM: usize = uniform_block(M);
144	// uniform block full of 1.
145	const ONE: usize = uniform_block(`0x01`);
146	// uniform block full of DEL (127).
147	const DEL: usize = uniform_block(`0x7f`);
148	// uniform block full of 128.
149	const M128: usize = uniform_block(`128`);
150
151	let x: usize = usize::from_ne_bytes(block); // Really just a transmute
152	let lt: usize = x.wrapping_sub(BM) & !x; // <= m
153
154	let xor_del: usize = x ^ DEL;
155	let eq_del: usize = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
156
157	offsetnz((lt \| eq_del) & M128)
158	}
159
160	/// Check block to find offset of first non-zero byte
161	// NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit
162	#[inline]
163	fn offsetnz(block: usize) -> usize {
164	// fast path optimistic case (common for long valid sequences)
165	if block == `0` {
166	return BLOCK_SIZE;
167	}
168
169	// perf: rust will unroll this loop
170	for (i: usize, b: u8) in block.to_ne_bytes().iter().copied().enumerate() {
171	if b != `0` {
172	return i;
173	}
174	}
175	unreachable!()
176	}
177
178	#[test]
179	fn test_is_header_value_block() {
180	let is_header_value_block = \|b\| match_header_value_char_8_swar(b) == BLOCK_SIZE;
181
182	// 0..32 => false
183	for b in `0`..`32_u8` {
184	assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
185	}
186	// 32..=126 => true
187	for b in `32`..=`126_u8` {
188	assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
189	}
190	// 127 => false
191	assert!(!is_header_value_block([b'`\x7F`'; BLOCK_SIZE]), "b={}", b'`\x7F`');
192	// 128..=255 => true
193	for b in `128`..=`255_u8` {
194	assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
195	}
196
197
198	#[cfg(target_pointer_width = "64")]
199	{
200	// A few sanity checks on non-uniform bytes for safe-measure
201	assert!(!is_header_value_block(*b"foo.com`\n`"));
202	assert!(!is_header_value_block(*b"o.com`\r\n`U"));
203	}
204	}
205
206	#[test]
207	fn test_is_uri_block() {
208	let is_uri_block = \|b\| match_uri_char_8_swar(b) == BLOCK_SIZE;
209
210	// 0..33 => false
211	for b in `0`..`33_u8` {
212	assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b);
213	}
214	// 33..=126 => true
215	for b in `33`..=`126_u8` {
216	assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
217	}
218	// 127 => false
219	assert!(!is_uri_block([b'`\x7F`'; BLOCK_SIZE]), "b={}", b'`\x7F`');
220	// 128..=255 => true
221	for b in `128`..=`255_u8` {
222	assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
223	}
224	}
225
226	#[test]
227	fn test_offsetnz() {
228	let seq = [`0_u8`; BLOCK_SIZE];
229	for i in `0`..BLOCK_SIZE {
230	let mut seq = seq;
231	seq[i] = `1`;
232	let x = usize::from_ne_bytes(seq);
233	assert_eq!(offsetnz(x), i);
234	}
235	}
236