1 | //===-- x86_64 implementations of the fma function --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FMA_H |
10 | #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FMA_H |
11 | |
12 | #include "src/__support/macros/attributes.h" // LIBC_INLINE |
13 | #include "src/__support/macros/properties/architectures.h" |
14 | #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA |
15 | |
16 | #if !defined(LIBC_TARGET_ARCH_IS_X86_64) |
17 | #error "Invalid include" |
18 | #endif |
19 | |
20 | #if !defined(LIBC_TARGET_CPU_HAS_FMA) |
21 | #error "FMA instructions are not supported" |
22 | #endif |
23 | |
24 | #include "src/__support/CPP/type_traits.h" |
25 | #include <immintrin.h> |
26 | |
27 | namespace LIBC_NAMESPACE { |
28 | namespace fputil { |
29 | |
30 | template <typename T> |
31 | LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, float>, T> fma(T x, T y, T z) { |
32 | float result; |
33 | __m128 xmm = _mm_load_ss(&x); // NOLINT |
34 | __m128 ymm = _mm_load_ss(&y); // NOLINT |
35 | __m128 zmm = _mm_load_ss(&z); // NOLINT |
36 | __m128 r = _mm_fmadd_ss(A: xmm, B: ymm, C: zmm); // NOLINT |
37 | _mm_store_ss(p: &result, a: r); // NOLINT |
38 | return result; |
39 | } |
40 | |
41 | template <typename T> |
42 | LIBC_INLINE cpp::enable_if_t<cpp::is_same_v<T, double>, T> fma(T x, T y, T z) { |
43 | double result; |
44 | __m128d xmm = _mm_load_sd(&x); // NOLINT |
45 | __m128d ymm = _mm_load_sd(&y); // NOLINT |
46 | __m128d zmm = _mm_load_sd(&z); // NOLINT |
47 | __m128d r = _mm_fmadd_sd(A: xmm, B: ymm, C: zmm); // NOLINT |
48 | _mm_store_sd(dp: &result, a: r); // NOLINT |
49 | return result; |
50 | } |
51 | |
52 | } // namespace fputil |
53 | } // namespace LIBC_NAMESPACE |
54 | |
55 | #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FMA_H |
56 | |