| 1 | use crate::arch::asm; |
| 2 | use crate::core_arch::x86::*; |
| 3 | |
| 4 | #[cfg (test)] |
| 5 | use stdarch_test::assert_instr; |
| 6 | |
| 7 | /// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location |
| 8 | /// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) |
| 9 | /// floating-point elements, and store the results in dst. |
| 10 | /// |
| 11 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnebf16_ps) |
| 12 | #[inline ] |
| 13 | #[target_feature (enable = "avxneconvert" )] |
| 14 | #[cfg_attr ( |
| 15 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 16 | assert_instr(vbcstnebf162ps) |
| 17 | )] |
| 18 | #[unstable (feature = "stdarch_x86_avx512_bf16" , issue = "127356" )] |
| 19 | pub unsafe fn _mm_bcstnebf16_ps(a: *const bf16) -> __m128 { |
| 20 | bcstnebf162ps_128(a) |
| 21 | } |
| 22 | |
| 23 | /// Convert scalar BF16 (16-bit) floating point element stored at memory locations starting at location |
| 24 | /// a to single precision (32-bit) floating-point, broadcast it to packed single precision (32-bit) floating-point |
| 25 | /// elements, and store the results in dst. |
| 26 | /// |
| 27 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnebf16_ps) |
| 28 | #[inline ] |
| 29 | #[target_feature (enable = "avxneconvert" )] |
| 30 | #[cfg_attr ( |
| 31 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 32 | assert_instr(vbcstnebf162ps) |
| 33 | )] |
| 34 | #[unstable (feature = "stdarch_x86_avx512_bf16" , issue = "127356" )] |
| 35 | pub unsafe fn _mm256_bcstnebf16_ps(a: *const bf16) -> __m256 { |
| 36 | bcstnebf162ps_256(a) |
| 37 | } |
| 38 | |
| 39 | /// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting |
| 40 | /// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision |
| 41 | /// (32-bit) floating-point elements, and store the results in dst. |
| 42 | /// |
| 43 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bcstnesh_ps) |
| 44 | #[inline ] |
| 45 | #[target_feature (enable = "avxneconvert" )] |
| 46 | #[cfg_attr ( |
| 47 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 48 | assert_instr(vbcstnesh2ps) |
| 49 | )] |
| 50 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 51 | pub unsafe fn _mm_bcstnesh_ps(a: *const f16) -> __m128 { |
| 52 | bcstnesh2ps_128(a) |
| 53 | } |
| 54 | |
| 55 | /// Convert scalar half-precision (16-bit) floating-point element stored at memory locations starting |
| 56 | /// at location a to a single-precision (32-bit) floating-point, broadcast it to packed single-precision |
| 57 | /// (32-bit) floating-point elements, and store the results in dst. |
| 58 | /// |
| 59 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bcstnesh_ps) |
| 60 | #[inline ] |
| 61 | #[target_feature (enable = "avxneconvert" )] |
| 62 | #[cfg_attr ( |
| 63 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 64 | assert_instr(vbcstnesh2ps) |
| 65 | )] |
| 66 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 67 | pub unsafe fn _mm256_bcstnesh_ps(a: *const f16) -> __m256 { |
| 68 | bcstnesh2ps_256(a) |
| 69 | } |
| 70 | |
| 71 | /// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at |
| 72 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 73 | /// |
| 74 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneebf16_ps) |
| 75 | #[inline ] |
| 76 | #[target_feature (enable = "avxneconvert" )] |
| 77 | #[cfg_attr ( |
| 78 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 79 | assert_instr(vcvtneebf162ps) |
| 80 | )] |
| 81 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 82 | pub unsafe fn _mm_cvtneebf16_ps(a: *const __m128bh) -> __m128 { |
| 83 | transmute(src:cvtneebf162ps_128(a)) |
| 84 | } |
| 85 | |
| 86 | /// Convert packed BF16 (16-bit) floating-point even-indexed elements stored at memory locations starting at |
| 87 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 88 | /// |
| 89 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneebf16_ps) |
| 90 | #[inline ] |
| 91 | #[target_feature (enable = "avxneconvert" )] |
| 92 | #[cfg_attr ( |
| 93 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 94 | assert_instr(vcvtneebf162ps) |
| 95 | )] |
| 96 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 97 | pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 { |
| 98 | transmute(src:cvtneebf162ps_256(a)) |
| 99 | } |
| 100 | |
| 101 | /// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at |
| 102 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 103 | /// |
| 104 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneeph_ps) |
| 105 | #[inline ] |
| 106 | #[target_feature (enable = "avxneconvert" )] |
| 107 | #[cfg_attr ( |
| 108 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 109 | assert_instr(vcvtneeph2ps) |
| 110 | )] |
| 111 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 112 | pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 { |
| 113 | transmute(src:cvtneeph2ps_128(a)) |
| 114 | } |
| 115 | |
| 116 | /// Convert packed half-precision (16-bit) floating-point even-indexed elements stored at memory locations starting at |
| 117 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 118 | /// |
| 119 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneeph_ps) |
| 120 | #[inline ] |
| 121 | #[target_feature (enable = "avxneconvert" )] |
| 122 | #[cfg_attr ( |
| 123 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 124 | assert_instr(vcvtneeph2ps) |
| 125 | )] |
| 126 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 127 | pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 { |
| 128 | transmute(src:cvtneeph2ps_256(a)) |
| 129 | } |
| 130 | |
| 131 | /// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at |
| 132 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 133 | /// |
| 134 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneobf16_ps) |
| 135 | #[inline ] |
| 136 | #[target_feature (enable = "avxneconvert" )] |
| 137 | #[cfg_attr ( |
| 138 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 139 | assert_instr(vcvtneobf162ps) |
| 140 | )] |
| 141 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 142 | pub unsafe fn _mm_cvtneobf16_ps(a: *const __m128bh) -> __m128 { |
| 143 | transmute(src:cvtneobf162ps_128(a)) |
| 144 | } |
| 145 | |
| 146 | /// Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at memory locations starting at |
| 147 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 148 | /// |
| 149 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneobf16_ps) |
| 150 | #[inline ] |
| 151 | #[target_feature (enable = "avxneconvert" )] |
| 152 | #[cfg_attr ( |
| 153 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 154 | assert_instr(vcvtneobf162ps) |
| 155 | )] |
| 156 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 157 | pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 { |
| 158 | transmute(src:cvtneobf162ps_256(a)) |
| 159 | } |
| 160 | |
| 161 | /// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at |
| 162 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 163 | /// |
| 164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneoph_ps) |
| 165 | #[inline ] |
| 166 | #[target_feature (enable = "avxneconvert" )] |
| 167 | #[cfg_attr ( |
| 168 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 169 | assert_instr(vcvtneoph2ps) |
| 170 | )] |
| 171 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 172 | pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 { |
| 173 | transmute(src:cvtneoph2ps_128(a)) |
| 174 | } |
| 175 | |
| 176 | /// Convert packed half-precision (16-bit) floating-point odd-indexed elements stored at memory locations starting at |
| 177 | /// location a to single precision (32-bit) floating-point elements, and store the results in dst. |
| 178 | /// |
| 179 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneoph_ps) |
| 180 | #[inline ] |
| 181 | #[target_feature (enable = "avxneconvert" )] |
| 182 | #[cfg_attr ( |
| 183 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 184 | assert_instr(vcvtneoph2ps) |
| 185 | )] |
| 186 | #[unstable (feature = "stdarch_x86_avx512_f16" , issue = "127213" )] |
| 187 | pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 { |
| 188 | transmute(src:cvtneoph2ps_256(a)) |
| 189 | } |
| 190 | |
| 191 | /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point |
| 192 | /// elements, and store the results in dst. |
| 193 | /// |
| 194 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtneps_avx_pbh) |
| 195 | #[inline ] |
| 196 | #[target_feature (enable = "avxneconvert" )] |
| 197 | #[cfg_attr ( |
| 198 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 199 | assert_instr(vcvtneps2bf16) |
| 200 | )] |
| 201 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 202 | pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh { |
| 203 | unsafe { |
| 204 | let mut dst: __m128bh; |
| 205 | asm!( |
| 206 | "{{vex}}vcvtneps2bf16 { dst},{ src}" , |
| 207 | dst = lateout(xmm_reg) dst, |
| 208 | src = in(xmm_reg) a, |
| 209 | options(pure, nomem, nostack, preserves_flags) |
| 210 | ); |
| 211 | dst |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point |
| 216 | /// elements, and store the results in dst. |
| 217 | /// |
| 218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtneps_avx_pbh) |
| 219 | #[inline ] |
| 220 | #[target_feature (enable = "avxneconvert" )] |
| 221 | #[cfg_attr ( |
| 222 | all(test, any(target_os = "linux" , target_env = "msvc" )), |
| 223 | assert_instr(vcvtneps2bf16) |
| 224 | )] |
| 225 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
| 226 | pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh { |
| 227 | unsafe { |
| 228 | let mut dst: __m128bh; |
| 229 | asm!( |
| 230 | "{{vex}}vcvtneps2bf16 { dst},{ src}" , |
| 231 | dst = lateout(xmm_reg) dst, |
| 232 | src = in(ymm_reg) a, |
| 233 | options(pure, nomem, nostack, preserves_flags) |
| 234 | ); |
| 235 | dst |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | #[allow (improper_ctypes)] |
| 240 | unsafe extern "C" { |
| 241 | #[link_name = "llvm.x86.vbcstnebf162ps128" ] |
| 242 | unsafefn bcstnebf162ps_128(a: *const bf16) -> __m128; |
| 243 | #[link_name = "llvm.x86.vbcstnebf162ps256" ] |
| 244 | unsafefn bcstnebf162ps_256(a: *const bf16) -> __m256; |
| 245 | #[link_name = "llvm.x86.vbcstnesh2ps128" ] |
| 246 | unsafefn bcstnesh2ps_128(a: *const f16) -> __m128; |
| 247 | #[link_name = "llvm.x86.vbcstnesh2ps256" ] |
| 248 | unsafefn bcstnesh2ps_256(a: *const f16) -> __m256; |
| 249 | |
| 250 | #[link_name = "llvm.x86.vcvtneebf162ps128" ] |
| 251 | unsafefn cvtneebf162ps_128(a: *const __m128bh) -> __m128; |
| 252 | #[link_name = "llvm.x86.vcvtneebf162ps256" ] |
| 253 | unsafefn cvtneebf162ps_256(a: *const __m256bh) -> __m256; |
| 254 | #[link_name = "llvm.x86.vcvtneeph2ps128" ] |
| 255 | unsafefn cvtneeph2ps_128(a: *const __m128h) -> __m128; |
| 256 | #[link_name = "llvm.x86.vcvtneeph2ps256" ] |
| 257 | unsafefn cvtneeph2ps_256(a: *const __m256h) -> __m256; |
| 258 | |
| 259 | #[link_name = "llvm.x86.vcvtneobf162ps128" ] |
| 260 | unsafefn cvtneobf162ps_128(a: *const __m128bh) -> __m128; |
| 261 | #[link_name = "llvm.x86.vcvtneobf162ps256" ] |
| 262 | unsafefn cvtneobf162ps_256(a: *const __m256bh) -> __m256; |
| 263 | #[link_name = "llvm.x86.vcvtneoph2ps128" ] |
| 264 | unsafefn cvtneoph2ps_128(a: *const __m128h) -> __m128; |
| 265 | #[link_name = "llvm.x86.vcvtneoph2ps256" ] |
| 266 | unsafefn cvtneoph2ps_256(a: *const __m256h) -> __m256; |
| 267 | } |
| 268 | |
| 269 | #[cfg (test)] |
| 270 | mod tests { |
| 271 | use crate::core_arch::simd::{u16x4, u16x8}; |
| 272 | use crate::core_arch::x86::*; |
| 273 | use crate::mem::transmute_copy; |
| 274 | use std::ptr::addr_of; |
| 275 | use stdarch_test::simd_test; |
| 276 | |
| 277 | const BF16_ONE: u16 = 0b0_01111111_0000000; |
| 278 | const BF16_TWO: u16 = 0b0_10000000_0000000; |
| 279 | const BF16_THREE: u16 = 0b0_10000000_1000000; |
| 280 | const BF16_FOUR: u16 = 0b0_10000001_0000000; |
| 281 | const BF16_FIVE: u16 = 0b0_10000001_0100000; |
| 282 | const BF16_SIX: u16 = 0b0_10000001_1000000; |
| 283 | const BF16_SEVEN: u16 = 0b0_10000001_1100000; |
| 284 | const BF16_EIGHT: u16 = 0b0_10000010_0000000; |
| 285 | |
| 286 | #[simd_test(enable = "avxneconvert" )] |
| 287 | unsafe fn test_mm_bcstnebf16_ps() { |
| 288 | let a = bf16::from_bits(BF16_ONE); |
| 289 | let r = _mm_bcstnebf16_ps(addr_of!(a)); |
| 290 | let e = _mm_set_ps(1., 1., 1., 1.); |
| 291 | assert_eq_m128(r, e); |
| 292 | } |
| 293 | |
| 294 | #[simd_test(enable = "avxneconvert" )] |
| 295 | unsafe fn test_mm256_bcstnebf16_ps() { |
| 296 | let a = bf16::from_bits(BF16_ONE); |
| 297 | let r = _mm256_bcstnebf16_ps(addr_of!(a)); |
| 298 | let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); |
| 299 | assert_eq_m256(r, e); |
| 300 | } |
| 301 | |
| 302 | #[simd_test(enable = "avxneconvert" )] |
| 303 | unsafe fn test_mm_bcstnesh_ps() { |
| 304 | let a = 1.0_f16; |
| 305 | let r = _mm_bcstnesh_ps(addr_of!(a)); |
| 306 | let e = _mm_set_ps(1., 1., 1., 1.); |
| 307 | assert_eq_m128(r, e); |
| 308 | } |
| 309 | |
| 310 | #[simd_test(enable = "avxneconvert" )] |
| 311 | unsafe fn test_mm256_bcstnesh_ps() { |
| 312 | let a = 1.0_f16; |
| 313 | let r = _mm256_bcstnesh_ps(addr_of!(a)); |
| 314 | let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.); |
| 315 | assert_eq_m256(r, e); |
| 316 | } |
| 317 | |
| 318 | #[simd_test(enable = "avxneconvert" )] |
| 319 | unsafe fn test_mm_cvtneebf16_ps() { |
| 320 | let a = __m128bh([ |
| 321 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 322 | ]); |
| 323 | let r = _mm_cvtneebf16_ps(addr_of!(a)); |
| 324 | let e = _mm_setr_ps(1., 3., 5., 7.); |
| 325 | assert_eq_m128(r, e); |
| 326 | } |
| 327 | |
| 328 | #[simd_test(enable = "avxneconvert" )] |
| 329 | unsafe fn test_mm256_cvtneebf16_ps() { |
| 330 | let a = __m256bh([ |
| 331 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 332 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 333 | ]); |
| 334 | let r = _mm256_cvtneebf16_ps(addr_of!(a)); |
| 335 | let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.); |
| 336 | assert_eq_m256(r, e); |
| 337 | } |
| 338 | |
| 339 | #[simd_test(enable = "avxneconvert" )] |
| 340 | unsafe fn test_mm_cvtneeph_ps() { |
| 341 | let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); |
| 342 | let r = _mm_cvtneeph_ps(addr_of!(a)); |
| 343 | let e = _mm_setr_ps(1., 3., 5., 7.); |
| 344 | assert_eq_m128(r, e); |
| 345 | } |
| 346 | |
| 347 | #[simd_test(enable = "avxneconvert" )] |
| 348 | unsafe fn test_mm256_cvtneeph_ps() { |
| 349 | let a = __m256h([ |
| 350 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
| 351 | ]); |
| 352 | let r = _mm256_cvtneeph_ps(addr_of!(a)); |
| 353 | let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); |
| 354 | assert_eq_m256(r, e); |
| 355 | } |
| 356 | |
| 357 | #[simd_test(enable = "avxneconvert" )] |
| 358 | unsafe fn test_mm_cvtneobf16_ps() { |
| 359 | let a = __m128bh([ |
| 360 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 361 | ]); |
| 362 | let r = _mm_cvtneobf16_ps(addr_of!(a)); |
| 363 | let e = _mm_setr_ps(2., 4., 6., 8.); |
| 364 | assert_eq_m128(r, e); |
| 365 | } |
| 366 | |
| 367 | #[simd_test(enable = "avxneconvert" )] |
| 368 | unsafe fn test_mm256_cvtneobf16_ps() { |
| 369 | let a = __m256bh([ |
| 370 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 371 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 372 | ]); |
| 373 | let r = _mm256_cvtneobf16_ps(addr_of!(a)); |
| 374 | let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.); |
| 375 | assert_eq_m256(r, e); |
| 376 | } |
| 377 | |
| 378 | #[simd_test(enable = "avxneconvert" )] |
| 379 | unsafe fn test_mm_cvtneoph_ps() { |
| 380 | let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]); |
| 381 | let r = _mm_cvtneoph_ps(addr_of!(a)); |
| 382 | let e = _mm_setr_ps(2., 4., 6., 8.); |
| 383 | assert_eq_m128(r, e); |
| 384 | } |
| 385 | |
| 386 | #[simd_test(enable = "avxneconvert" )] |
| 387 | unsafe fn test_mm256_cvtneoph_ps() { |
| 388 | let a = __m256h([ |
| 389 | 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, |
| 390 | ]); |
| 391 | let r = _mm256_cvtneoph_ps(addr_of!(a)); |
| 392 | let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); |
| 393 | assert_eq_m256(r, e); |
| 394 | } |
| 395 | |
| 396 | #[simd_test(enable = "avxneconvert" )] |
| 397 | unsafe fn test_mm_cvtneps_avx_pbh() { |
| 398 | let a = _mm_setr_ps(1., 2., 3., 4.); |
| 399 | let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a)); |
| 400 | let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR); |
| 401 | assert_eq!(r, e); |
| 402 | } |
| 403 | |
| 404 | #[simd_test(enable = "avxneconvert" )] |
| 405 | unsafe fn test_mm256_cvtneps_avx_pbh() { |
| 406 | let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); |
| 407 | let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a)); |
| 408 | let e = u16x8::new( |
| 409 | BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT, |
| 410 | ); |
| 411 | assert_eq!(r, e); |
| 412 | } |
| 413 | } |
| 414 | |