| 1 | //! Bit Manipulation Instruction (BMI) Set 2.0. | 
| 2 | //! | 
|---|
| 3 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's | 
|---|
| 4 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. | 
|---|
| 5 | //! | 
|---|
| 6 | //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions | 
|---|
| 7 | //! available. | 
|---|
| 8 | //! | 
|---|
| 9 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf | 
|---|
| 10 | //! [wikipedia_bmi]: | 
|---|
| 11 | //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 | 
|---|
| 12 |  | 
|---|
| 13 | #[ cfg(test)] | 
|---|
| 14 | use stdarch_test::assert_instr; | 
|---|
| 15 |  | 
|---|
| 16 | /// Unsigned multiply without affecting flags. | 
|---|
| 17 | /// | 
|---|
| 18 | /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with | 
|---|
| 19 | /// the low half and the high half of the result. | 
|---|
| 20 | /// | 
|---|
| 21 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mulx_u64) | 
|---|
| 22 | #[ inline] | 
|---|
| 23 | #[ cfg_attr(test, assert_instr(mul))] | 
|---|
| 24 | #[ target_feature(enable = "bmi2")] | 
|---|
| 25 | #[ cfg(not(target_arch = "x86"))] // calls an intrinsic | 
|---|
| 26 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 27 | pub fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { | 
|---|
| 28 | let result: u128 = (a as u128) * (b as u128); | 
|---|
| 29 | *hi = (result >> 64) as u64; | 
|---|
| 30 | result as u64 | 
|---|
| 31 | } | 
|---|
| 32 |  | 
|---|
| 33 | /// Zeroes higher bits of `a` >= `index`. | 
|---|
| 34 | /// | 
|---|
| 35 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u64) | 
|---|
| 36 | #[ inline] | 
|---|
| 37 | #[ target_feature(enable = "bmi2")] | 
|---|
| 38 | #[ cfg_attr(test, assert_instr(bzhi))] | 
|---|
| 39 | #[ cfg(not(target_arch = "x86"))] | 
|---|
| 40 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 41 | pub fn _bzhi_u64(a: u64, index: u32) -> u64 { | 
|---|
| 42 | unsafe { x86_bmi2_bzhi_64(x:a, y:index as u64) } | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 | /// Scatter contiguous low order bits of `a` to the result at the positions | 
|---|
| 46 | /// specified by the `mask`. | 
|---|
| 47 | /// | 
|---|
| 48 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u64) | 
|---|
| 49 | #[ inline] | 
|---|
| 50 | #[ target_feature(enable = "bmi2")] | 
|---|
| 51 | #[ cfg_attr(test, assert_instr(pdep))] | 
|---|
| 52 | #[ cfg(not(target_arch = "x86"))] | 
|---|
| 53 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 54 | pub fn _pdep_u64(a: u64, mask: u64) -> u64 { | 
|---|
| 55 | unsafe { x86_bmi2_pdep_64(x:a, y:mask) } | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | /// Gathers the bits of `x` specified by the `mask` into the contiguous low | 
|---|
| 59 | /// order bit positions of the result. | 
|---|
| 60 | /// | 
|---|
| 61 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u64) | 
|---|
| 62 | #[ inline] | 
|---|
| 63 | #[ target_feature(enable = "bmi2")] | 
|---|
| 64 | #[ cfg_attr(test, assert_instr(pext))] | 
|---|
| 65 | #[ cfg(not(target_arch = "x86"))] | 
|---|
| 66 | #[ stable(feature = "simd_x86", since = "1.27.0")] | 
|---|
| 67 | pub fn _pext_u64(a: u64, mask: u64) -> u64 { | 
|---|
| 68 | unsafe { x86_bmi2_pext_64(x:a, y:mask) } | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | unsafe extern "C"{ | 
|---|
| 72 | #[ link_name= "llvm.x86.bmi.bzhi.64"] | 
|---|
| 73 | unsafefn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; | 
|---|
| 74 | #[ link_name= "llvm.x86.bmi.pdep.64"] | 
|---|
| 75 | unsafefn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; | 
|---|
| 76 | #[ link_name= "llvm.x86.bmi.pext.64"] | 
|---|
| 77 | unsafefn x86_bmi2_pext_64(x: u64, y: u64) -> u64; | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | #[ cfg(test)] | 
|---|
| 81 | mod tests { | 
|---|
| 82 | use stdarch_test::simd_test; | 
|---|
| 83 |  | 
|---|
| 84 | use crate::core_arch::x86_64::*; | 
|---|
| 85 |  | 
|---|
| 86 | #[simd_test(enable = "bmi2")] | 
|---|
| 87 | unsafe fn test_pext_u64() { | 
|---|
| 88 | let n = 0b1011_1110_1001_0011u64; | 
|---|
| 89 |  | 
|---|
| 90 | let m0 = 0b0110_0011_1000_0101u64; | 
|---|
| 91 | let s0 = 0b0000_0000_0011_0101u64; | 
|---|
| 92 |  | 
|---|
| 93 | let m1 = 0b1110_1011_1110_1111u64; | 
|---|
| 94 | let s1 = 0b0001_0111_0100_0011u64; | 
|---|
| 95 |  | 
|---|
| 96 | assert_eq!(_pext_u64(n, m0), s0); | 
|---|
| 97 | assert_eq!(_pext_u64(n, m1), s1); | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | #[simd_test(enable = "bmi2")] | 
|---|
| 101 | unsafe fn test_pdep_u64() { | 
|---|
| 102 | let n = 0b1011_1110_1001_0011u64; | 
|---|
| 103 |  | 
|---|
| 104 | let m0 = 0b0110_0011_1000_0101u64; | 
|---|
| 105 | let s0 = 0b0000_0010_0000_0101u64; | 
|---|
| 106 |  | 
|---|
| 107 | let m1 = 0b1110_1011_1110_1111u64; | 
|---|
| 108 | let s1 = 0b1110_1001_0010_0011u64; | 
|---|
| 109 |  | 
|---|
| 110 | assert_eq!(_pdep_u64(n, m0), s0); | 
|---|
| 111 | assert_eq!(_pdep_u64(n, m1), s1); | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 | #[simd_test(enable = "bmi2")] | 
|---|
| 115 | unsafe fn test_bzhi_u64() { | 
|---|
| 116 | let n = 0b1111_0010u64; | 
|---|
| 117 | let s = 0b0001_0010u64; | 
|---|
| 118 | assert_eq!(_bzhi_u64(n, 5), s); | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | #[simd_test(enable = "bmi2")] | 
|---|
| 122 | #[rustfmt::skip] | 
|---|
| 123 | unsafe fn test_mulx_u64() { | 
|---|
| 124 | let a: u64 = 9_223_372_036_854_775_800; | 
|---|
| 125 | let b: u64 = 100; | 
|---|
| 126 | let mut hi = 0; | 
|---|
| 127 | let lo = _mulx_u64(a, b, &mut hi); | 
|---|
| 128 | /* | 
|---|
| 129 | result = 922337203685477580000 = | 
|---|
| 130 | 0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000 | 
|---|
| 131 | ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
|---|
| 132 | */ | 
|---|
| 133 | assert_eq!( | 
|---|
| 134 | lo, | 
|---|
| 135 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64 | 
|---|
| 136 | ); | 
|---|
| 137 | assert_eq!(hi, 0b00110001u64); | 
|---|
| 138 | } | 
|---|
| 139 | } | 
|---|
| 140 |  | 
|---|