1#include "./common.h"
2
3#ifndef SIGNALSMITH_DSP_PERF_H
4#define SIGNALSMITH_DSP_PERF_H
5
6#include <complex>
7
8#if defined(__SSE__) || defined(_M_X64)
9# include <xmmintrin.h>
10#else
11# include <cstdint> // for uintptr_t
12#endif
13
14namespace signalsmith {
15namespace perf {
16 /** @defgroup Performance Performance helpers
17 @brief Nothing serious, just some `#defines` and helpers
18
19 @{
20 @file
21 */
22
23 /// *Really* insist that a function/method is inlined (mostly for performance in DEBUG builds)
24 #ifndef SIGNALSMITH_INLINE
25 #ifdef __GNUC__
26 #define SIGNALSMITH_INLINE __attribute__((always_inline)) inline
27 #elif defined(__MSVC__)
28 #define SIGNALSMITH_INLINE __forceinline inline
29 #else
30 #define SIGNALSMITH_INLINE inline
31 #endif
32 #endif
33
34 /** @brief Complex-multiplication (with optional conjugate second-arg), without handling NaN/Infinity
35 The `std::complex` multiplication has edge-cases around NaNs which slow things down and prevent auto-vectorisation. Flags like `-ffast-math` sort this out anyway, but this helps with Debug builds.
36 */
37 template <bool conjugateSecond=false, typename V>
38 SIGNALSMITH_INLINE static std::complex<V> mul(const std::complex<V> &a, const std::complex<V> &b) {
39 return conjugateSecond ? std::complex<V>{
40 b.real()*a.real() + b.imag()*a.imag(),
41 b.real()*a.imag() - b.imag()*a.real()
42 } : std::complex<V>{
43 a.real()*b.real() - a.imag()*b.imag(),
44 a.real()*b.imag() + a.imag()*b.real()
45 };
46 }
47
48#if defined(__SSE__) || defined(_M_X64)
49 class StopDenormals {
50 unsigned int controlStatusRegister;
51 public:
52 StopDenormals() : controlStatusRegister(_mm_getcsr()) {
53 _mm_setcsr(i: controlStatusRegister|0x8040); // Flush-to-Zero and Denormals-Are-Zero
54 }
55 ~StopDenormals() {
56 _mm_setcsr(i: controlStatusRegister);
57 }
58 };
59#elif (defined (__ARM_NEON) || defined (__ARM_NEON__))
60 class StopDenormals {
61 uintptr_t status;
62 public:
63 StopDenormals() {
64 uintptr_t asmStatus;
65 asm volatile("mrs %0, fpcr" : "=r"(asmStatus));
66 status = asmStatus = asmStatus|0x01000000U; // Flush to Zero
67 asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
68 }
69 ~StopDenormals() {
70 uintptr_t asmStatus = status;
71 asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
72 }
73 };
74#else
75# if __cplusplus >= 202302L
76# warning "The `StopDenormals` class doesn't do anything for this architecture"
77# endif
78 class StopDenormals {}; // FIXME: add for other architectures
79#endif
80
81/** @} */
82}} // signalsmith::perf::
83
84#endif // include guard
85

source code of qtmultimedia/src/3rdparty/signalsmith-stretch/dsp/perf.h