1 | //! Streaming SIMD Extensions 4.2 (SSE4.2) |
2 | //! |
3 | //! Extends SSE4.1 with STTNI (String and Text New Instructions). |
4 | |
5 | #[cfg (test)] |
6 | use stdarch_test::assert_instr; |
7 | |
8 | use crate::{ |
9 | core_arch::{simd::*, simd_llvm::*, x86::*}, |
10 | mem::transmute, |
11 | }; |
12 | |
13 | /// String contains unsigned 8-bit characters *(Default)* |
14 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
15 | pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; |
16 | /// String contains unsigned 16-bit characters |
17 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
18 | pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; |
19 | /// String contains signed 8-bit characters |
20 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
21 | pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; |
22 | /// String contains unsigned 16-bit characters |
23 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
24 | pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; |
25 | |
26 | /// For each character in `a`, find if it is in `b` *(Default)* |
27 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
28 | pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; |
29 | /// For each character in `a`, determine if |
30 | /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` |
31 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
32 | pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; |
33 | /// The strings defined by `a` and `b` are equal |
34 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
35 | pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; |
36 | /// Search for the defined substring in the target |
37 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
38 | pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; |
39 | |
40 | /// Do not negate results *(Default)* |
41 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
42 | pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; |
43 | /// Negates results |
44 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
45 | pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; |
46 | /// Do not negate results before the end of the string |
47 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
48 | pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; |
49 | /// Negates results only before the end of the string |
50 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
51 | pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; |
52 | |
53 | /// **Index only**: return the least significant bit *(Default)* |
54 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
55 | pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; |
56 | /// **Index only**: return the most significant bit |
57 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
58 | pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; |
59 | |
60 | /// **Mask only**: return the bit mask |
61 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
62 | pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; |
63 | /// **Mask only**: return the byte mask |
64 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
65 | pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; |
66 | |
67 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
68 | /// control in `IMM8`, and return the generated mask. |
69 | /// |
70 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm) |
71 | #[inline ] |
72 | #[target_feature (enable = "sse4.2" )] |
73 | #[cfg_attr (test, assert_instr(pcmpistrm, IMM8 = 0))] |
74 | #[rustc_legacy_const_generics (2)] |
75 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
76 | pub unsafe fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { |
77 | static_assert_uimm_bits!(IMM8, 8); |
78 | transmute(src:pcmpistrm128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8)) |
79 | } |
80 | |
81 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
82 | /// control in `IMM8` and return the generated index. Similar to |
83 | /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the |
84 | /// lengths of `a` and `b` to be explicitly specified. |
85 | /// |
86 | /// # Control modes |
87 | /// |
88 | /// The control specified by `IMM8` may be one or more of the following. |
89 | /// |
90 | /// ## Data size and signedness |
91 | /// |
92 | /// - [`_SIDD_UBYTE_OPS`] - Default |
93 | /// - [`_SIDD_UWORD_OPS`] |
94 | /// - [`_SIDD_SBYTE_OPS`] |
95 | /// - [`_SIDD_SWORD_OPS`] |
96 | /// |
97 | /// ## Comparison options |
98 | /// - [`_SIDD_CMP_EQUAL_ANY`] - Default |
99 | /// - [`_SIDD_CMP_RANGES`] |
100 | /// - [`_SIDD_CMP_EQUAL_EACH`] |
101 | /// - [`_SIDD_CMP_EQUAL_ORDERED`] |
102 | /// |
103 | /// ## Result polarity |
104 | /// - [`_SIDD_POSITIVE_POLARITY`] - Default |
105 | /// - [`_SIDD_NEGATIVE_POLARITY`] |
106 | /// |
107 | /// ## Bit returned |
108 | /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default |
109 | /// - [`_SIDD_MOST_SIGNIFICANT`] |
110 | /// |
111 | /// # Examples |
112 | /// |
113 | /// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] |
114 | /// |
115 | /// ``` |
116 | /// #[cfg(target_arch = "x86" )] |
117 | /// use std::arch::x86::*; |
118 | /// #[cfg(target_arch = "x86_64" )] |
119 | /// use std::arch::x86_64::*; |
120 | /// |
121 | /// # fn main() { |
122 | /// # if is_x86_feature_detected!("sse4.2" ) { |
123 | /// # #[target_feature (enable = "sse4.2" )] |
124 | /// # unsafe fn worker() { |
125 | /// let haystack = b"This is a long string of text data \r\n\tthat extends |
126 | /// multiple lines" ; |
127 | /// let needle = b" \r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0" ; |
128 | /// |
129 | /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); |
130 | /// let hop = 16; |
131 | /// let mut indexes = Vec::new(); |
132 | /// |
133 | /// // Chunk the haystack into 16 byte chunks and find |
134 | /// // the first "\r\n\t" in the chunk. |
135 | /// for (i, chunk) in haystack.chunks(hop).enumerate() { |
136 | /// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); |
137 | /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); |
138 | /// if idx != 16 { |
139 | /// indexes.push((idx as usize) + (i * hop)); |
140 | /// } |
141 | /// } |
142 | /// assert_eq!(indexes, vec![34]); |
143 | /// # } |
144 | /// # unsafe { worker(); } |
145 | /// # } |
146 | /// # } |
147 | /// ``` |
148 | /// |
149 | /// The `_mm_cmpistri` intrinsic may also be used to find the existence of |
150 | /// one or more of a given set of characters in the haystack. |
151 | /// |
152 | /// ``` |
153 | /// #[cfg(target_arch = "x86" )] |
154 | /// use std::arch::x86::*; |
155 | /// #[cfg(target_arch = "x86_64" )] |
156 | /// use std::arch::x86_64::*; |
157 | /// |
158 | /// # fn main() { |
159 | /// # if is_x86_feature_detected!("sse4.2" ) { |
160 | /// # #[target_feature (enable = "sse4.2" )] |
161 | /// # unsafe fn worker() { |
162 | /// // Ensure your input is 16 byte aligned |
163 | /// let password = b"hunter2 \0\0\0\0\0\0\0\0\0" ; |
164 | /// let special_chars = b"!@#$%^&*()[]:;<>" ; |
165 | /// |
166 | /// // Load the input |
167 | /// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); |
168 | /// let b = _mm_loadu_si128(password.as_ptr() as *const _); |
169 | /// |
170 | /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b |
171 | /// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); |
172 | /// |
173 | /// if idx < 16 { |
174 | /// println!("Congrats! Your password contains a special character" ); |
175 | /// # panic!("{:?} does not contain a special character" , password); |
176 | /// } else { |
177 | /// println!("Your password should contain a special character" ); |
178 | /// } |
179 | /// # } |
180 | /// # unsafe { worker(); } |
181 | /// # } |
182 | /// # } |
183 | /// ``` |
184 | /// |
185 | /// Finds the index of the first character in the haystack that is within a |
186 | /// range of characters. |
187 | /// |
188 | /// ``` |
189 | /// #[cfg(target_arch = "x86" )] |
190 | /// use std::arch::x86::*; |
191 | /// #[cfg(target_arch = "x86_64" )] |
192 | /// use std::arch::x86_64::*; |
193 | /// |
194 | /// # fn main() { |
195 | /// # if is_x86_feature_detected!("sse4.2" ) { |
196 | /// # #[target_feature (enable = "sse4.2" )] |
197 | /// # unsafe fn worker() { |
198 | /// # let b = b":;<=>?@[ \\]^_`abc" ; |
199 | /// # let b = _mm_loadu_si128(b.as_ptr() as *const _); |
200 | /// |
201 | /// // Specify the ranges of values to be searched for [A-Za-z0-9]. |
202 | /// let a = b"AZaz09 \0\0\0\0\0\0\0\0\0\0" ; |
203 | /// let a = _mm_loadu_si128(a.as_ptr() as *const _); |
204 | /// |
205 | /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. |
206 | /// // Which in this case will be the first alpha numeric byte found |
207 | /// // in the string. |
208 | /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); |
209 | /// |
210 | /// if idx < 16 { |
211 | /// println!("Found an alpha numeric character" ); |
212 | /// # assert_eq!(idx, 13); |
213 | /// } else { |
214 | /// println!("Did not find an alpha numeric character" ); |
215 | /// } |
216 | /// # } |
217 | /// # unsafe { worker(); } |
218 | /// # } |
219 | /// # } |
220 | /// ``` |
221 | /// |
222 | /// Working with 16-bit characters. |
223 | /// |
224 | /// ``` |
225 | /// #[cfg(target_arch = "x86" )] |
226 | /// use std::arch::x86::*; |
227 | /// #[cfg(target_arch = "x86_64" )] |
228 | /// use std::arch::x86_64::*; |
229 | /// |
230 | /// # fn main() { |
231 | /// # if is_x86_feature_detected!("sse4.2" ) { |
232 | /// # #[target_feature (enable = "sse4.2" )] |
233 | /// # unsafe fn worker() { |
234 | /// # let mut some_utf16_words = [0u16; 8]; |
235 | /// # let mut more_utf16_words = [0u16; 8]; |
236 | /// # '❤' .encode_utf16(&mut some_utf16_words); |
237 | /// # '𝕊' .encode_utf16(&mut more_utf16_words); |
238 | /// // Load the input |
239 | /// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); |
240 | /// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); |
241 | /// |
242 | /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and |
243 | /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. |
244 | /// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); |
245 | /// |
246 | /// if idx == 0 { |
247 | /// println!("16-bit unicode strings were equal!" ); |
248 | /// # panic!("Strings should not be equal!" ) |
249 | /// } else { |
250 | /// println!("16-bit unicode strings were not equal!" ); |
251 | /// } |
252 | /// # } |
253 | /// # unsafe { worker(); } |
254 | /// # } |
255 | /// # } |
256 | /// ``` |
257 | /// |
258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri) |
259 | #[inline ] |
260 | #[target_feature (enable = "sse4.2" )] |
261 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
262 | #[rustc_legacy_const_generics (2)] |
263 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
264 | pub unsafe fn _mm_cmpistri<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
265 | static_assert_uimm_bits!(IMM8, 8); |
266 | pcmpistri128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
267 | } |
268 | |
269 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
270 | /// control in `IMM8`, and return `1` if any character in `b` was null. |
271 | /// and `0` otherwise. |
272 | /// |
273 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz) |
274 | #[inline ] |
275 | #[target_feature (enable = "sse4.2" )] |
276 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
277 | #[rustc_legacy_const_generics (2)] |
278 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
279 | pub unsafe fn _mm_cmpistrz<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
280 | static_assert_uimm_bits!(IMM8, 8); |
281 | pcmpistriz128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
282 | } |
283 | |
284 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
285 | /// control in `IMM8`, and return `1` if the resulting mask was non-zero, |
286 | /// and `0` otherwise. |
287 | /// |
288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc) |
289 | #[inline ] |
290 | #[target_feature (enable = "sse4.2" )] |
291 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
292 | #[rustc_legacy_const_generics (2)] |
293 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
294 | pub unsafe fn _mm_cmpistrc<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
295 | static_assert_uimm_bits!(IMM8, 8); |
296 | pcmpistric128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
297 | } |
298 | |
299 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
300 | /// control in `IMM8`, and returns `1` if any character in `a` was null, |
301 | /// and `0` otherwise. |
302 | /// |
303 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs) |
304 | #[inline ] |
305 | #[target_feature (enable = "sse4.2" )] |
306 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
307 | #[rustc_legacy_const_generics (2)] |
308 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
309 | pub unsafe fn _mm_cmpistrs<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
310 | static_assert_uimm_bits!(IMM8, 8); |
311 | pcmpistris128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
312 | } |
313 | |
314 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
315 | /// control in `IMM8`, and return bit `0` of the resulting bit mask. |
316 | /// |
317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro) |
318 | #[inline ] |
319 | #[target_feature (enable = "sse4.2" )] |
320 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
321 | #[rustc_legacy_const_generics (2)] |
322 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
323 | pub unsafe fn _mm_cmpistro<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
324 | static_assert_uimm_bits!(IMM8, 8); |
325 | pcmpistrio128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
326 | } |
327 | |
328 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
329 | /// control in `IMM8`, and return `1` if `b` did not contain a null |
330 | /// character and the resulting mask was zero, and `0` otherwise. |
331 | /// |
332 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra) |
333 | #[inline ] |
334 | #[target_feature (enable = "sse4.2" )] |
335 | #[cfg_attr (test, assert_instr(pcmpistri, IMM8 = 0))] |
336 | #[rustc_legacy_const_generics (2)] |
337 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
338 | pub unsafe fn _mm_cmpistra<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { |
339 | static_assert_uimm_bits!(IMM8, 8); |
340 | pcmpistria128(a:a.as_i8x16(), b:b.as_i8x16(), IMM8 as i8) |
341 | } |
342 | |
343 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
344 | /// using the control in `IMM8`, and return the generated mask. |
345 | /// |
346 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm) |
347 | #[inline ] |
348 | #[target_feature (enable = "sse4.2" )] |
349 | #[cfg_attr (test, assert_instr(pcmpestrm, IMM8 = 0))] |
350 | #[rustc_legacy_const_generics (4)] |
351 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
352 | pub unsafe fn _mm_cmpestrm<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { |
353 | static_assert_uimm_bits!(IMM8, 8); |
354 | transmute(src:pcmpestrm128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8)) |
355 | } |
356 | |
357 | /// Compares packed strings `a` and `b` with lengths `la` and `lb` using the |
358 | /// control in `IMM8` and return the generated index. Similar to |
359 | /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly |
360 | /// determines the length of `a` and `b`. |
361 | /// |
362 | /// # Control modes |
363 | /// |
364 | /// The control specified by `IMM8` may be one or more of the following. |
365 | /// |
366 | /// ## Data size and signedness |
367 | /// |
368 | /// - [`_SIDD_UBYTE_OPS`] - Default |
369 | /// - [`_SIDD_UWORD_OPS`] |
370 | /// - [`_SIDD_SBYTE_OPS`] |
371 | /// - [`_SIDD_SWORD_OPS`] |
372 | /// |
373 | /// ## Comparison options |
374 | /// - [`_SIDD_CMP_EQUAL_ANY`] - Default |
375 | /// - [`_SIDD_CMP_RANGES`] |
376 | /// - [`_SIDD_CMP_EQUAL_EACH`] |
377 | /// - [`_SIDD_CMP_EQUAL_ORDERED`] |
378 | /// |
379 | /// ## Result polarity |
380 | /// - [`_SIDD_POSITIVE_POLARITY`] - Default |
381 | /// - [`_SIDD_NEGATIVE_POLARITY`] |
382 | /// |
383 | /// ## Bit returned |
384 | /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default |
385 | /// - [`_SIDD_MOST_SIGNIFICANT`] |
386 | /// |
387 | /// # Examples |
388 | /// |
389 | /// ``` |
390 | /// #[cfg(target_arch = "x86" )] |
391 | /// use std::arch::x86::*; |
392 | /// #[cfg(target_arch = "x86_64" )] |
393 | /// use std::arch::x86_64::*; |
394 | /// |
395 | /// # fn main() { |
396 | /// # if is_x86_feature_detected!("sse4.2" ) { |
397 | /// # #[target_feature (enable = "sse4.2" )] |
398 | /// # unsafe fn worker() { |
399 | /// |
400 | /// // The string we want to find a substring in |
401 | /// let haystack = b"Split \r\n\t line " ; |
402 | /// |
403 | /// // The string we want to search for with some |
404 | /// // extra bytes we do not want to search for. |
405 | /// let needle = b" \r\n\t ignore this " ; |
406 | /// |
407 | /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); |
408 | /// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); |
409 | /// |
410 | /// // Note: We explicitly specify we only want to search `b` for the |
411 | /// // first 3 characters of a. |
412 | /// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); |
413 | /// |
414 | /// assert_eq!(idx, 6); |
415 | /// # } |
416 | /// # unsafe { worker(); } |
417 | /// # } |
418 | /// # } |
419 | /// ``` |
420 | /// |
421 | /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html |
422 | /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html |
423 | /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html |
424 | /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html |
425 | /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html |
426 | /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html |
427 | /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html |
428 | /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html |
429 | /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html |
430 | /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html |
431 | /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html |
432 | /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html |
433 | /// [`_mm_cmpistri`]: fn._mm_cmpistri.html |
434 | /// |
435 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri) |
436 | #[inline ] |
437 | #[target_feature (enable = "sse4.2" )] |
438 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
439 | #[rustc_legacy_const_generics (4)] |
440 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
441 | pub unsafe fn _mm_cmpestri<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
442 | static_assert_uimm_bits!(IMM8, 8); |
443 | pcmpestri128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
444 | } |
445 | |
446 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
447 | /// using the control in `IMM8`, and return `1` if any character in |
448 | /// `b` was null, and `0` otherwise. |
449 | /// |
450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz) |
451 | #[inline ] |
452 | #[target_feature (enable = "sse4.2" )] |
453 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
454 | #[rustc_legacy_const_generics (4)] |
455 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
456 | pub unsafe fn _mm_cmpestrz<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
457 | static_assert_uimm_bits!(IMM8, 8); |
458 | pcmpestriz128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
459 | } |
460 | |
461 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
462 | /// using the control in `IMM8`, and return `1` if the resulting mask |
463 | /// was non-zero, and `0` otherwise. |
464 | /// |
465 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc) |
466 | #[inline ] |
467 | #[target_feature (enable = "sse4.2" )] |
468 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
469 | #[rustc_legacy_const_generics (4)] |
470 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
471 | pub unsafe fn _mm_cmpestrc<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
472 | static_assert_uimm_bits!(IMM8, 8); |
473 | pcmpestric128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
474 | } |
475 | |
476 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
477 | /// using the control in `IMM8`, and return `1` if any character in |
478 | /// a was null, and `0` otherwise. |
479 | /// |
480 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs) |
481 | #[inline ] |
482 | #[target_feature (enable = "sse4.2" )] |
483 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
484 | #[rustc_legacy_const_generics (4)] |
485 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
486 | pub unsafe fn _mm_cmpestrs<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
487 | static_assert_uimm_bits!(IMM8, 8); |
488 | pcmpestris128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
489 | } |
490 | |
491 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
492 | /// using the control in `IMM8`, and return bit `0` of the resulting |
493 | /// bit mask. |
494 | /// |
495 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro) |
496 | #[inline ] |
497 | #[target_feature (enable = "sse4.2" )] |
498 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
499 | #[rustc_legacy_const_generics (4)] |
500 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
501 | pub unsafe fn _mm_cmpestro<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
502 | static_assert_uimm_bits!(IMM8, 8); |
503 | pcmpestrio128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
504 | } |
505 | |
506 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
507 | /// using the control in `IMM8`, and return `1` if `b` did not |
508 | /// contain a null character and the resulting mask was zero, and `0` |
509 | /// otherwise. |
510 | /// |
511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra) |
512 | #[inline ] |
513 | #[target_feature (enable = "sse4.2" )] |
514 | #[cfg_attr (test, assert_instr(pcmpestri, IMM8 = 0))] |
515 | #[rustc_legacy_const_generics (4)] |
516 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
517 | pub unsafe fn _mm_cmpestra<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { |
518 | static_assert_uimm_bits!(IMM8, 8); |
519 | pcmpestria128(a:a.as_i8x16(), la, b:b.as_i8x16(), lb, IMM8 as i8) |
520 | } |
521 | |
522 | /// Starting with the initial value in `crc`, return the accumulated |
523 | /// CRC32-C value for unsigned 8-bit integer `v`. |
524 | /// |
525 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8) |
526 | #[inline ] |
527 | #[target_feature (enable = "sse4.2" )] |
528 | #[cfg_attr (test, assert_instr(crc32))] |
529 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
530 | pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { |
531 | crc32_32_8(crc, v) |
532 | } |
533 | |
534 | /// Starting with the initial value in `crc`, return the accumulated |
535 | /// CRC32-C value for unsigned 16-bit integer `v`. |
536 | /// |
537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16) |
538 | #[inline ] |
539 | #[target_feature (enable = "sse4.2" )] |
540 | #[cfg_attr (test, assert_instr(crc32))] |
541 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
542 | pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { |
543 | crc32_32_16(crc, v) |
544 | } |
545 | |
546 | /// Starting with the initial value in `crc`, return the accumulated |
547 | /// CRC32-C value for unsigned 32-bit integer `v`. |
548 | /// |
549 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32) |
550 | #[inline ] |
551 | #[target_feature (enable = "sse4.2" )] |
552 | #[cfg_attr (test, assert_instr(crc32))] |
553 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
554 | pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { |
555 | crc32_32_32(crc, v) |
556 | } |
557 | |
558 | /// Compares packed 64-bit integers in `a` and `b` for greater-than, |
559 | /// return the results. |
560 | /// |
561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64) |
562 | #[inline ] |
563 | #[target_feature (enable = "sse4.2" )] |
564 | #[cfg_attr (test, assert_instr(pcmpgtq))] |
565 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
566 | pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { |
567 | transmute(src:simd_gt::<_, i64x2>(x:a.as_i64x2(), y:b.as_i64x2())) |
568 | } |
569 | |
570 | #[allow (improper_ctypes)] |
571 | extern "C" { |
572 | // SSE 4.2 string and text comparison ops |
573 | #[link_name = "llvm.x86.sse42.pcmpestrm128" ] |
574 | fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16; |
575 | #[link_name = "llvm.x86.sse42.pcmpestri128" ] |
576 | fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
577 | #[link_name = "llvm.x86.sse42.pcmpestriz128" ] |
578 | fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
579 | #[link_name = "llvm.x86.sse42.pcmpestric128" ] |
580 | fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
581 | #[link_name = "llvm.x86.sse42.pcmpestris128" ] |
582 | fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
583 | #[link_name = "llvm.x86.sse42.pcmpestrio128" ] |
584 | fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
585 | #[link_name = "llvm.x86.sse42.pcmpestria128" ] |
586 | fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; |
587 | #[link_name = "llvm.x86.sse42.pcmpistrm128" ] |
588 | fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16; |
589 | #[link_name = "llvm.x86.sse42.pcmpistri128" ] |
590 | fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
591 | #[link_name = "llvm.x86.sse42.pcmpistriz128" ] |
592 | fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
593 | #[link_name = "llvm.x86.sse42.pcmpistric128" ] |
594 | fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
595 | #[link_name = "llvm.x86.sse42.pcmpistris128" ] |
596 | fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
597 | #[link_name = "llvm.x86.sse42.pcmpistrio128" ] |
598 | fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
599 | #[link_name = "llvm.x86.sse42.pcmpistria128" ] |
600 | fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32; |
601 | // SSE 4.2 CRC instructions |
602 | #[link_name = "llvm.x86.sse42.crc32.32.8" ] |
603 | fn crc32_32_8(crc: u32, v: u8) -> u32; |
604 | #[link_name = "llvm.x86.sse42.crc32.32.16" ] |
605 | fn crc32_32_16(crc: u32, v: u16) -> u32; |
606 | #[link_name = "llvm.x86.sse42.crc32.32.32" ] |
607 | fn crc32_32_32(crc: u32, v: u32) -> u32; |
608 | } |
609 | |
610 | #[cfg (test)] |
611 | mod tests { |
612 | use stdarch_test::simd_test; |
613 | |
614 | use crate::core_arch::x86::*; |
615 | use std::ptr; |
616 | |
617 | // Currently one cannot `load` a &[u8] that is less than 16 |
618 | // in length. This makes loading strings less than 16 in length |
619 | // a bit difficult. Rather than `load` and mutate the __m128i, |
620 | // it is easier to memcpy the given string to a local slice with |
621 | // length 16 and `load` the local slice. |
622 | #[target_feature (enable = "sse4.2" )] |
623 | unsafe fn str_to_m128i(s: &[u8]) -> __m128i { |
624 | assert!(s.len() <= 16); |
625 | let slice = &mut [0u8; 16]; |
626 | ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); |
627 | _mm_loadu_si128(slice.as_ptr() as *const _) |
628 | } |
629 | |
630 | #[simd_test(enable = "sse4.2" )] |
631 | unsafe fn test_mm_cmpistrm() { |
632 | let a = str_to_m128i(b"Hello! Good-Bye!" ); |
633 | let b = str_to_m128i(b"hello! good-bye!" ); |
634 | let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b); |
635 | #[rustfmt::skip] |
636 | let res = _mm_setr_epi8( |
637 | 0x00, !0, !0, !0, !0, !0, !0, 0x00, |
638 | !0, !0, !0, !0, 0x00, !0, !0, !0, |
639 | ); |
640 | assert_eq_m128i(i, res); |
641 | } |
642 | |
643 | #[simd_test(enable = "sse4.2" )] |
644 | unsafe fn test_mm_cmpistri() { |
645 | let a = str_to_m128i(b"Hello" ); |
646 | let b = str_to_m128i(b" Hello " ); |
647 | let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
648 | assert_eq!(3, i); |
649 | } |
650 | |
651 | #[simd_test(enable = "sse4.2" )] |
652 | unsafe fn test_mm_cmpistrz() { |
653 | let a = str_to_m128i(b"" ); |
654 | let b = str_to_m128i(b"Hello" ); |
655 | let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
656 | assert_eq!(1, i); |
657 | } |
658 | |
659 | #[simd_test(enable = "sse4.2" )] |
660 | unsafe fn test_mm_cmpistrc() { |
661 | let a = str_to_m128i(b" " ); |
662 | let b = str_to_m128i(b" ! " ); |
663 | let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b); |
664 | assert_eq!(1, i); |
665 | } |
666 | |
667 | #[simd_test(enable = "sse4.2" )] |
668 | unsafe fn test_mm_cmpistrs() { |
669 | let a = str_to_m128i(b"Hello" ); |
670 | let b = str_to_m128i(b"" ); |
671 | let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
672 | assert_eq!(1, i); |
673 | } |
674 | |
675 | #[simd_test(enable = "sse4.2" )] |
676 | unsafe fn test_mm_cmpistro() { |
677 | #[rustfmt::skip] |
678 | let a_bytes = _mm_setr_epi8( |
679 | 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, |
680 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
681 | ); |
682 | #[rustfmt::skip] |
683 | let b_bytes = _mm_setr_epi8( |
684 | 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, |
685 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
686 | ); |
687 | let a = a_bytes; |
688 | let b = b_bytes; |
689 | let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b); |
690 | assert_eq!(0, i); |
691 | } |
692 | |
693 | #[simd_test(enable = "sse4.2" )] |
694 | unsafe fn test_mm_cmpistra() { |
695 | let a = str_to_m128i(b"" ); |
696 | let b = str_to_m128i(b"Hello!!!!!!!!!!!" ); |
697 | let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b); |
698 | assert_eq!(1, i); |
699 | } |
700 | |
701 | #[simd_test(enable = "sse4.2" )] |
702 | unsafe fn test_mm_cmpestrm() { |
703 | let a = str_to_m128i(b"Hello!" ); |
704 | let b = str_to_m128i(b"Hello." ); |
705 | let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5); |
706 | #[rustfmt::skip] |
707 | let r = _mm_setr_epi8( |
708 | !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, |
709 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
710 | ); |
711 | assert_eq_m128i(i, r); |
712 | } |
713 | |
714 | #[simd_test(enable = "sse4.2" )] |
715 | unsafe fn test_mm_cmpestri() { |
716 | let a = str_to_m128i(b"bar - garbage" ); |
717 | let b = str_to_m128i(b"foobar" ); |
718 | let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6); |
719 | assert_eq!(3, i); |
720 | } |
721 | |
722 | #[simd_test(enable = "sse4.2" )] |
723 | unsafe fn test_mm_cmpestrz() { |
724 | let a = str_to_m128i(b"" ); |
725 | let b = str_to_m128i(b"Hello" ); |
726 | let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6); |
727 | assert_eq!(1, i); |
728 | } |
729 | |
730 | #[simd_test(enable = "sse4.2" )] |
731 | unsafe fn test_mm_cmpestrc() { |
732 | let va = str_to_m128i(b"!!!!!!!!" ); |
733 | let vb = str_to_m128i(b" " ); |
734 | let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7); |
735 | assert_eq!(0, i); |
736 | } |
737 | |
738 | #[simd_test(enable = "sse4.2" )] |
739 | unsafe fn test_mm_cmpestrs() { |
740 | #[rustfmt::skip] |
741 | let a_bytes = _mm_setr_epi8( |
742 | 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, |
743 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
744 | ); |
745 | let a = a_bytes; |
746 | let b = _mm_set1_epi8(0x00); |
747 | let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0); |
748 | assert_eq!(0, i); |
749 | } |
750 | |
751 | #[simd_test(enable = "sse4.2" )] |
752 | unsafe fn test_mm_cmpestro() { |
753 | let a = str_to_m128i(b"Hello" ); |
754 | let b = str_to_m128i(b"World" ); |
755 | let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5); |
756 | assert_eq!(0, i); |
757 | } |
758 | |
759 | #[simd_test(enable = "sse4.2" )] |
760 | unsafe fn test_mm_cmpestra() { |
761 | let a = str_to_m128i(b"Cannot match a" ); |
762 | let b = str_to_m128i(b"Null after 14" ); |
763 | let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16); |
764 | assert_eq!(1, i); |
765 | } |
766 | |
767 | #[simd_test(enable = "sse4.2" )] |
768 | unsafe fn test_mm_crc32_u8() { |
769 | let crc = 0x2aa1e72b; |
770 | let v = 0x2a; |
771 | let i = _mm_crc32_u8(crc, v); |
772 | assert_eq!(i, 0xf24122e4); |
773 | } |
774 | |
775 | #[simd_test(enable = "sse4.2" )] |
776 | unsafe fn test_mm_crc32_u16() { |
777 | let crc = 0x8ecec3b5; |
778 | let v = 0x22b; |
779 | let i = _mm_crc32_u16(crc, v); |
780 | assert_eq!(i, 0x13bb2fb); |
781 | } |
782 | |
783 | #[simd_test(enable = "sse4.2" )] |
784 | unsafe fn test_mm_crc32_u32() { |
785 | let crc = 0xae2912c8; |
786 | let v = 0x845fed; |
787 | let i = _mm_crc32_u32(crc, v); |
788 | assert_eq!(i, 0xffae2ed1); |
789 | } |
790 | |
791 | #[simd_test(enable = "sse4.2" )] |
792 | unsafe fn test_mm_cmpgt_epi64() { |
793 | let a = _mm_setr_epi64x(0, 0x2a); |
794 | let b = _mm_set1_epi64x(0x00); |
795 | let i = _mm_cmpgt_epi64(a, b); |
796 | assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); |
797 | } |
798 | } |
799 | |