1 | //! Bit Manipulation Instruction (BMI) Set 2.0. |
2 | //! |
3 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
4 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
5 | //! |
6 | //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions |
7 | //! available. |
8 | //! |
9 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
10 | //! [wikipedia_bmi]: |
11 | //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 |
12 | |
13 | #[cfg (test)] |
14 | use stdarch_test::assert_instr; |
15 | |
16 | /// Unsigned multiply without affecting flags. |
17 | /// |
18 | /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with |
19 | /// the low half and the high half of the result. |
20 | /// |
21 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u32) |
22 | #[inline ] |
23 | // LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 |
24 | #[cfg_attr (all(test, target_arch = "x86_64" ), assert_instr(imul))] |
25 | #[cfg_attr (all(test, target_arch = "x86" ), assert_instr(mul))] |
26 | #[target_feature (enable = "bmi2" )] |
27 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
28 | pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { |
29 | let result: u64 = (a as u64) * (b as u64); |
30 | *hi = (result >> 32) as u32; |
31 | result as u32 |
32 | } |
33 | |
34 | /// Zeroes higher bits of `a` >= `index`. |
35 | /// |
36 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32) |
37 | #[inline ] |
38 | #[target_feature (enable = "bmi2" )] |
39 | #[cfg_attr (test, assert_instr(bzhi))] |
40 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
41 | pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 { |
42 | x86_bmi2_bzhi_32(x:a, y:index) |
43 | } |
44 | |
45 | /// Scatter contiguous low order bits of `a` to the result at the positions |
46 | /// specified by the `mask`. |
47 | /// |
48 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32) |
49 | #[inline ] |
50 | #[target_feature (enable = "bmi2" )] |
51 | #[cfg_attr (test, assert_instr(pdep))] |
52 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
53 | pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 { |
54 | x86_bmi2_pdep_32(x:a, y:mask) |
55 | } |
56 | |
57 | /// Gathers the bits of `x` specified by the `mask` into the contiguous low |
58 | /// order bit positions of the result. |
59 | /// |
60 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32) |
61 | #[inline ] |
62 | #[target_feature (enable = "bmi2" )] |
63 | #[cfg_attr (test, assert_instr(pext))] |
64 | #[stable (feature = "simd_x86" , since = "1.27.0" )] |
65 | pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 { |
66 | x86_bmi2_pext_32(x:a, y:mask) |
67 | } |
68 | |
69 | extern "C" { |
70 | #[link_name = "llvm.x86.bmi.bzhi.32" ] |
71 | fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; |
72 | #[link_name = "llvm.x86.bmi.pdep.32" ] |
73 | fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; |
74 | #[link_name = "llvm.x86.bmi.pext.32" ] |
75 | fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; |
76 | } |
77 | |
78 | #[cfg (test)] |
79 | mod tests { |
80 | use stdarch_test::simd_test; |
81 | |
82 | use crate::core_arch::x86::*; |
83 | |
84 | #[simd_test(enable = "bmi2" )] |
85 | unsafe fn test_pext_u32() { |
86 | let n = 0b1011_1110_1001_0011u32; |
87 | |
88 | let m0 = 0b0110_0011_1000_0101u32; |
89 | let s0 = 0b0000_0000_0011_0101u32; |
90 | |
91 | let m1 = 0b1110_1011_1110_1111u32; |
92 | let s1 = 0b0001_0111_0100_0011u32; |
93 | |
94 | assert_eq!(_pext_u32(n, m0), s0); |
95 | assert_eq!(_pext_u32(n, m1), s1); |
96 | } |
97 | |
98 | #[simd_test(enable = "bmi2" )] |
99 | unsafe fn test_pdep_u32() { |
100 | let n = 0b1011_1110_1001_0011u32; |
101 | |
102 | let m0 = 0b0110_0011_1000_0101u32; |
103 | let s0 = 0b0000_0010_0000_0101u32; |
104 | |
105 | let m1 = 0b1110_1011_1110_1111u32; |
106 | let s1 = 0b1110_1001_0010_0011u32; |
107 | |
108 | assert_eq!(_pdep_u32(n, m0), s0); |
109 | assert_eq!(_pdep_u32(n, m1), s1); |
110 | } |
111 | |
112 | #[simd_test(enable = "bmi2" )] |
113 | unsafe fn test_bzhi_u32() { |
114 | let n = 0b1111_0010u32; |
115 | let s = 0b0001_0010u32; |
116 | assert_eq!(_bzhi_u32(n, 5), s); |
117 | } |
118 | |
119 | #[simd_test(enable = "bmi2" )] |
120 | unsafe fn test_mulx_u32() { |
121 | let a: u32 = 4_294_967_200; |
122 | let b: u32 = 2; |
123 | let mut hi = 0; |
124 | let lo = _mulx_u32(a, b, &mut hi); |
125 | /* |
126 | result = 8589934400 |
127 | = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 |
128 | ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
129 | */ |
130 | assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32); |
131 | assert_eq!(hi, 0b0001u32); |
132 | } |
133 | } |
134 | |