| 1 | use crate::arch::asm; | 
| 2 | use crate::core_arch::x86::*; | 
|---|
| 3 |  | 
|---|
| 4 | #[ cfg(test)] | 
|---|
| 5 | use stdarch_test::assert_instr; | 
|---|
| 6 |  | 
|---|
| 7 | /// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location | 
|---|
| 8 | /// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) | 
|---|
| 9 | /// floating-point elements, and store the results in dst. | 
|---|
| 10 | /// | 
|---|
| 11 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps) | 
|---|
| 12 | #[ inline] | 
|---|
| 13 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 14 | #[ cfg_attr(test, assert_instr(vbcstnebf162ps))] | 
|---|
| 15 | #[ unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] | 
|---|
| 16 | pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 { | 
|---|
| 17 | bcstnebf162ps_128(a) | 
|---|
| 18 | } | 
|---|
| 19 |  | 
|---|
| 20 | /// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location | 
|---|
| 21 | /// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) floating-point | 
|---|
| 22 | /// elements, and store the results in dst. | 
|---|
| 23 | /// | 
|---|
| 24 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps) | 
|---|
| 25 | #[ inline] | 
|---|
| 26 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 27 | #[ cfg_attr(test, assert_instr(vbcstnebf162ps))] | 
|---|
| 28 | #[ unstable(feature = "stdarch_x86_avx512_bf16", issue = "127356")] | 
|---|
| 29 | pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 { | 
|---|
| 30 | bcstnebf162ps_256(a) | 
|---|
| 31 | } | 
|---|
| 32 |  | 
|---|
| 33 | /// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting | 
|---|
| 34 | /// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision | 
|---|
| 35 | /// (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 36 | /// | 
|---|
| 37 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps) | 
|---|
| 38 | #[ inline] | 
|---|
| 39 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 40 | #[ cfg_attr(test, assert_instr(vbcstnesh2ps))] | 
|---|
| 41 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 42 | pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 { | 
|---|
| 43 | bcstnesh2ps_128(a) | 
|---|
| 44 | } | 
|---|
| 45 |  | 
|---|
| 46 | /// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting | 
|---|
| 47 | /// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision | 
|---|
| 48 | /// (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 49 | /// | 
|---|
| 50 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps) | 
|---|
| 51 | #[ inline] | 
|---|
| 52 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 53 | #[ cfg_attr(test, assert_instr(vbcstnesh2ps))] | 
|---|
| 54 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 55 | pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 { | 
|---|
| 56 | bcstnesh2ps_256(a) | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | /// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at | 
|---|
| 60 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 61 | /// | 
|---|
| 62 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps) | 
|---|
| 63 | #[ inline] | 
|---|
| 64 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 65 | #[ cfg_attr(test, assert_instr(vcvtneebf162ps))] | 
|---|
| 66 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 67 | pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 { | 
|---|
| 68 | transmute(src:cvtneebf162ps_128(a)) | 
|---|
| 69 | } | 
|---|
| 70 |  | 
|---|
| 71 | /// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at | 
|---|
| 72 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 73 | /// | 
|---|
| 74 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps) | 
|---|
| 75 | #[ inline] | 
|---|
| 76 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 77 | #[ cfg_attr(test, assert_instr(vcvtneebf162ps))] | 
|---|
| 78 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 79 | pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 { | 
|---|
| 80 | transmute(src:cvtneebf162ps_256(a)) | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | /// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at | 
|---|
| 84 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 85 | /// | 
|---|
| 86 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps) | 
|---|
| 87 | #[ inline] | 
|---|
| 88 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 89 | #[ cfg_attr(test, assert_instr(vcvtneeph2ps))] | 
|---|
| 90 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 91 | pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 { | 
|---|
| 92 | transmute(src:cvtneeph2ps_128(a)) | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | /// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at | 
|---|
| 96 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 97 | /// | 
|---|
| 98 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps) | 
|---|
| 99 | #[ inline] | 
|---|
| 100 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 101 | #[ cfg_attr(test, assert_instr(vcvtneeph2ps))] | 
|---|
| 102 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 103 | pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 { | 
|---|
| 104 | transmute(src:cvtneeph2ps_256(a)) | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 | /// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at | 
|---|
| 108 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 109 | /// | 
|---|
| 110 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps) | 
|---|
| 111 | #[ inline] | 
|---|
| 112 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 113 | #[ cfg_attr(test, assert_instr(vcvtneobf162ps))] | 
|---|
| 114 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 115 | pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 { | 
|---|
| 116 | transmute(src:cvtneobf162ps_128(a)) | 
|---|
| 117 | } | 
|---|
| 118 |  | 
|---|
| 119 | /// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at | 
|---|
| 120 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 121 | /// | 
|---|
| 122 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps) | 
|---|
| 123 | #[ inline] | 
|---|
| 124 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 125 | #[ cfg_attr(test, assert_instr(vcvtneobf162ps))] | 
|---|
| 126 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 127 | pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 { | 
|---|
| 128 | transmute(src:cvtneobf162ps_256(a)) | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | /// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at | 
|---|
| 132 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 133 | /// | 
|---|
| 134 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps) | 
|---|
| 135 | #[ inline] | 
|---|
| 136 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 137 | #[ cfg_attr(test, assert_instr(vcvtneoph2ps))] | 
|---|
| 138 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 139 | pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 { | 
|---|
| 140 | transmute(src:cvtneoph2ps_128(a)) | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | /// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at | 
|---|
| 144 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. | 
|---|
| 145 | /// | 
|---|
| 146 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps) | 
|---|
| 147 | #[ inline] | 
|---|
| 148 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 149 | #[ cfg_attr(test, assert_instr(vcvtneoph2ps))] | 
|---|
| 150 | #[ unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")] | 
|---|
| 151 | pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 { | 
|---|
| 152 | transmute(src:cvtneoph2ps_256(a)) | 
|---|
| 153 | } | 
|---|
| 154 |  | 
|---|
| 155 | /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point | 
|---|
| 156 | /// elements, and store the results in dst. | 
|---|
| 157 | /// | 
|---|
| 158 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh) | 
|---|
| 159 | #[ inline] | 
|---|
| 160 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 161 | #[ cfg_attr(test, assert_instr(vcvtneps2bf16))] | 
|---|
| 162 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 163 | pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { | 
|---|
| 164 | unsafe { | 
|---|
| 165 | let mut dst: __m128bh; | 
|---|
| 166 | asm!( | 
|---|
| 167 | "{{vex}}vcvtneps2bf16 {dst },{src }", | 
|---|
| 168 | dst = lateout(xmm_reg) dst, | 
|---|
| 169 | src = in(xmm_reg) a, | 
|---|
| 170 | options(pure, nomem, nostack, preserves_flags) | 
|---|
| 171 | ); | 
|---|
| 172 | dst | 
|---|
| 173 | } | 
|---|
| 174 | } | 
|---|
| 175 |  | 
|---|
| 176 | /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point | 
|---|
| 177 | /// elements, and store the results in dst. | 
|---|
| 178 | /// | 
|---|
| 179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh) | 
|---|
| 180 | #[ inline] | 
|---|
| 181 | #[ target_feature(enable = "avxneconvert")] | 
|---|
| 182 | #[ cfg_attr(test, assert_instr(vcvtneps2bf16))] | 
|---|
| 183 | #[ stable(feature = "stdarch_x86_avx512", since = "1.89")] | 
|---|
| 184 | pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { | 
|---|
| 185 | unsafe { | 
|---|
| 186 | let mut dst: __m128bh; | 
|---|
| 187 | asm!( | 
|---|
| 188 | "{{vex}}vcvtneps2bf16 {dst },{src }", | 
|---|
| 189 | dst = lateout(xmm_reg) dst, | 
|---|
| 190 | src = in(ymm_reg) a, | 
|---|
| 191 | options(pure, nomem, nostack, preserves_flags) | 
|---|
| 192 | ); | 
|---|
| 193 | dst | 
|---|
| 194 | } | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | #[ allow(improper_ctypes)] | 
|---|
| 198 | unsafe extern "C"{ | 
|---|
| 199 | #[ link_name= "llvm.x86.vbcstnebf162ps128"] | 
|---|
| 200 | unsafefn bcstnebf162ps_128(a: *const bf16) -> __m128; | 
|---|
| 201 | #[ link_name= "llvm.x86.vbcstnebf162ps256"] | 
|---|
| 202 | unsafefn bcstnebf162ps_256(a: *const bf16) -> __m256; | 
|---|
| 203 | #[ link_name= "llvm.x86.vbcstnesh2ps128"] | 
|---|
| 204 | unsafefn bcstnesh2ps_128(a: *const f16) -> __m128; | 
|---|
| 205 | #[ link_name= "llvm.x86.vbcstnesh2ps256"] | 
|---|
| 206 | unsafefn bcstnesh2ps_256(a: *const f16) -> __m256; | 
|---|
| 207 |  | 
|---|
| 208 | #[ link_name= "llvm.x86.vcvtneebf162ps128"] | 
|---|
| 209 | unsafefn cvtneebf162ps_128(a: *const __m128bh) -> __m128; | 
|---|
| 210 | #[ link_name= "llvm.x86.vcvtneebf162ps256"] | 
|---|
| 211 | unsafefn cvtneebf162ps_256(a: *const __m256bh) -> __m256; | 
|---|
| 212 | #[ link_name= "llvm.x86.vcvtneeph2ps128"] | 
|---|
| 213 | unsafefn cvtneeph2ps_128(a: *const __m128h) -> __m128; | 
|---|
| 214 | #[ link_name= "llvm.x86.vcvtneeph2ps256"] | 
|---|
| 215 | unsafefn cvtneeph2ps_256(a: *const __m256h) -> __m256; | 
|---|
| 216 |  | 
|---|
| 217 | #[ link_name= "llvm.x86.vcvtneobf162ps128"] | 
|---|
| 218 | unsafefn cvtneobf162ps_128(a: *const __m128bh) -> __m128; | 
|---|
| 219 | #[ link_name= "llvm.x86.vcvtneobf162ps256"] | 
|---|
| 220 | unsafefn cvtneobf162ps_256(a: *const __m256bh) -> __m256; | 
|---|
| 221 | #[ link_name= "llvm.x86.vcvtneoph2ps128"] | 
|---|
| 222 | unsafefn cvtneoph2ps_128(a: *const __m128h) -> __m128; | 
|---|
| 223 | #[ link_name= "llvm.x86.vcvtneoph2ps256"] | 
|---|
| 224 | unsafefn cvtneoph2ps_256(a: *const __m256h) -> __m256; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | #[ cfg(test)] | 
|---|
| 228 | mod tests { | 
|---|
| 229 | use crate::core_arch::simd::{u16x4, u16x8}; | 
|---|
| 230 | use crate::core_arch::x86::*; | 
|---|
| 231 | use crate::mem::transmute_copy; | 
|---|
| 232 | use std::ptr::addr_of; | 
|---|
| 233 | use stdarch_test::simd_test; | 
|---|
| 234 |  | 
|---|
| 235 | const BF16_ONE: u16 = 0b0_01111111_0000000; | 
|---|
| 236 | const BF16_TWO: u16 = 0b0_10000000_0000000; | 
|---|
| 237 | const BF16_THREE: u16 = 0b0_10000000_1000000; | 
|---|
| 238 | const BF16_FOUR: u16 = 0b0_10000001_0000000; | 
|---|
| 239 | const BF16_FIVE: u16 = 0b0_10000001_0100000; | 
|---|
| 240 | const BF16_SIX: u16 = 0b0_10000001_1000000; | 
|---|
| 241 | const BF16_SEVEN: u16 = 0b0_10000001_1100000; | 
|---|
| 242 | const BF16_EIGHT: u16 = 0b0_10000010_0000000; | 
|---|
| 243 |  | 
|---|
| 244 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 245 | unsafe fn test_mm_bcstnebf16_ps() { | 
|---|
| 246 | let a = bf16::from_bits(BF16_ONE); | 
|---|
| 247 | let r = _mm_bcstnebf16_ps(addr_of!(a)); | 
|---|
| 248 | let e = _mm_set_ps(1., 1., 1., 1.); | 
|---|
| 249 | assert_eq_m128(r, e); | 
|---|
| 250 | } | 
|---|
| 251 |  | 
|---|
| 252 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 253 | unsafe fn test_mm256_bcstnebf16_ps() { | 
|---|
| 254 | let a = bf16::from_bits(BF16_ONE); | 
|---|
| 255 | let r = _mm256_bcstnebf16_ps(addr_of!(a)); | 
|---|
| 256 | let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); | 
|---|
| 257 | assert_eq_m256(r, e); | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 261 | unsafe fn test_mm_bcstnesh_ps() { | 
|---|
| 262 | let a = 1.0_f16; | 
|---|
| 263 | let r = _mm_bcstnesh_ps(addr_of!(a)); | 
|---|
| 264 | let e = _mm_set_ps(1., 1., 1., 1.); | 
|---|
| 265 | assert_eq_m128(r, e); | 
|---|
| 266 | } | 
|---|
| 267 |  | 
|---|
| 268 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 269 | unsafe fn test_mm256_bcstnesh_ps() { | 
|---|
| 270 | let a = 1.0_f16; | 
|---|
| 271 | let r = _mm256_bcstnesh_ps(addr_of!(a)); | 
|---|
| 272 | let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); | 
|---|
| 273 | assert_eq_m256(r, e); | 
|---|
| 274 | } | 
|---|
| 275 |  | 
|---|
| 276 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 277 | unsafe fn test_mm_cvtneebf16_ps() { | 
|---|
| 278 | let a = __m128bh([ | 
|---|
| 279 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 280 | ]); | 
|---|
| 281 | let r = _mm_cvtneebf16_ps(addr_of!(a)); | 
|---|
| 282 | let e = _mm_setr_ps(1., 3., 5., 7.); | 
|---|
| 283 | assert_eq_m128(r, e); | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 287 | unsafe fn test_mm256_cvtneebf16_ps() { | 
|---|
| 288 | let a = __m256bh([ | 
|---|
| 289 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 290 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 291 | ]); | 
|---|
| 292 | let r = _mm256_cvtneebf16_ps(addr_of!(a)); | 
|---|
| 293 | let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.); | 
|---|
| 294 | assert_eq_m256(r, e); | 
|---|
| 295 | } | 
|---|
| 296 |  | 
|---|
| 297 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 298 | unsafe fn test_mm_cvtneeph_ps() { | 
|---|
| 299 | let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); | 
|---|
| 300 | let r = _mm_cvtneeph_ps(addr_of!(a)); | 
|---|
| 301 | let e = _mm_setr_ps(1., 3., 5., 7.); | 
|---|
| 302 | assert_eq_m128(r, e); | 
|---|
| 303 | } | 
|---|
| 304 |  | 
|---|
| 305 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 306 | unsafe fn test_mm256_cvtneeph_ps() { | 
|---|
| 307 | let a = __m256h([ | 
|---|
| 308 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, | 
|---|
| 309 | ]); | 
|---|
| 310 | let r = _mm256_cvtneeph_ps(addr_of!(a)); | 
|---|
| 311 | let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); | 
|---|
| 312 | assert_eq_m256(r, e); | 
|---|
| 313 | } | 
|---|
| 314 |  | 
|---|
| 315 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 316 | unsafe fn test_mm_cvtneobf16_ps() { | 
|---|
| 317 | let a = __m128bh([ | 
|---|
| 318 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 319 | ]); | 
|---|
| 320 | let r = _mm_cvtneobf16_ps(addr_of!(a)); | 
|---|
| 321 | let e = _mm_setr_ps(2., 4., 6., 8.); | 
|---|
| 322 | assert_eq_m128(r, e); | 
|---|
| 323 | } | 
|---|
| 324 |  | 
|---|
| 325 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 326 | unsafe fn test_mm256_cvtneobf16_ps() { | 
|---|
| 327 | let a = __m256bh([ | 
|---|
| 328 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 329 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 330 | ]); | 
|---|
| 331 | let r = _mm256_cvtneobf16_ps(addr_of!(a)); | 
|---|
| 332 | let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.); | 
|---|
| 333 | assert_eq_m256(r, e); | 
|---|
| 334 | } | 
|---|
| 335 |  | 
|---|
| 336 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 337 | unsafe fn test_mm_cvtneoph_ps() { | 
|---|
| 338 | let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); | 
|---|
| 339 | let r = _mm_cvtneoph_ps(addr_of!(a)); | 
|---|
| 340 | let e = _mm_setr_ps(2., 4., 6., 8.); | 
|---|
| 341 | assert_eq_m128(r, e); | 
|---|
| 342 | } | 
|---|
| 343 |  | 
|---|
| 344 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 345 | unsafe fn test_mm256_cvtneoph_ps() { | 
|---|
| 346 | let a = __m256h([ | 
|---|
| 347 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, | 
|---|
| 348 | ]); | 
|---|
| 349 | let r = _mm256_cvtneoph_ps(addr_of!(a)); | 
|---|
| 350 | let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); | 
|---|
| 351 | assert_eq_m256(r, e); | 
|---|
| 352 | } | 
|---|
| 353 |  | 
|---|
| 354 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 355 | unsafe fn test_mm_cvtneps_avx_pbh() { | 
|---|
| 356 | let a = _mm_setr_ps(1., 2., 3., 4.); | 
|---|
| 357 | let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a)); | 
|---|
| 358 | let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR); | 
|---|
| 359 | assert_eq!(r, e); | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | #[simd_test(enable = "avxneconvert")] | 
|---|
| 363 | unsafe fn test_mm256_cvtneps_avx_pbh() { | 
|---|
| 364 | let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); | 
|---|
| 365 | let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a)); | 
|---|
| 366 | let e = u16x8::new( | 
|---|
| 367 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, | 
|---|
| 368 | ); | 
|---|
| 369 | assert_eq!(r, e); | 
|---|
| 370 | } | 
|---|
| 371 | } | 
|---|
| 372 |  | 
|---|