1 | //===-- Common utility class for differential analysis --------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "src/__support/CPP/algorithm.h" |
10 | #include "src/__support/FPUtil/FPBits.h" |
11 | #include "src/__support/macros/config.h" |
12 | #include "test/src/math/performance_testing/Timer.h" |
13 | |
14 | #include <cstddef> |
15 | #include <fstream> |
16 | |
17 | namespace LIBC_NAMESPACE_DECL { |
18 | namespace testing { |
19 | template <typename OutputType, typename InputType> class PerfTest { |
20 | using FPBits = fputil::FPBits<OutputType>; |
21 | using StorageType = typename FPBits::StorageType; |
22 | static constexpr StorageType U_INT_MAX = |
23 | cpp::numeric_limits<StorageType>::max(); |
24 | |
25 | public: |
26 | using BinaryFuncPtr = OutputType (*)(InputType, InputType); |
27 | using UnaryFuncPtr = OutputType (*)(InputType); |
28 | |
29 | template <bool binary, typename Func> |
30 | static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit, |
31 | StorageType endingBit, size_t N, size_t rounds, |
32 | const char *name_a, const char *name_b, |
33 | std::ofstream &log) { |
34 | if (sizeof(StorageType) <= sizeof(size_t)) |
35 | N = cpp::min(N, static_cast<size_t>(endingBit - startingBit)); |
36 | |
37 | auto runner = [=](Func func) { |
38 | [[maybe_unused]] volatile OutputType result; |
39 | if (endingBit < startingBit) { |
40 | return; |
41 | } |
42 | |
43 | StorageType step = (endingBit - startingBit) / N; |
44 | if (step == 0) |
45 | step = 1; |
46 | for (size_t i = 0; i < rounds; i++) { |
47 | for (StorageType bits_x = startingBit, bits_y = endingBit;; |
48 | bits_x += step, bits_y -= step) { |
49 | InputType x = FPBits(bits_x).get_val(); |
50 | if constexpr (binary) { |
51 | InputType y = FPBits(bits_y).get_val(); |
52 | result = func(x, y); |
53 | } else { |
54 | result = func(x); |
55 | } |
56 | if (endingBit - bits_x < step) { |
57 | break; |
58 | } |
59 | } |
60 | } |
61 | }; |
62 | |
63 | Timer timer; |
64 | timer.start(); |
65 | runner(FuncA); |
66 | timer.stop(); |
67 | |
68 | double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds; |
69 | log << "-- Function A: " << name_a << " --\n" ; |
70 | log << " Total time : " << timer.nanoseconds() << " ns \n" ; |
71 | log << " Average runtime : " << a_average << " ns/op \n" ; |
72 | log << " Ops per second : " |
73 | << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n" ; |
74 | |
75 | timer.start(); |
76 | runner(FuncB); |
77 | timer.stop(); |
78 | |
79 | double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds; |
80 | log << "-- Function B: " << name_b << " --\n" ; |
81 | log << " Total time : " << timer.nanoseconds() << " ns \n" ; |
82 | log << " Average runtime : " << b_average << " ns/op \n" ; |
83 | log << " Ops per second : " |
84 | << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n" ; |
85 | |
86 | log << "-- Average ops per second ratio --\n" ; |
87 | log << " A / B : " << b_average / a_average << " \n" ; |
88 | } |
89 | |
90 | template <bool binary, typename Func> |
91 | static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a, |
92 | const char *name_b, const char *logFile) { |
93 | std::ofstream log(logFile); |
94 | log << " Performance tests with inputs in denormal range:\n" ; |
95 | run_perf_in_range<binary>( |
96 | FuncA, FuncB, /* startingBit= */ StorageType(0), |
97 | /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, rounds, |
98 | name_a, name_b, log); |
99 | log << "\n Performance tests with inputs in normal range:\n" ; |
100 | run_perf_in_range<binary>(FuncA, FuncB, |
101 | /* startingBit= */ FPBits::min_normal().uintval(), |
102 | /* endingBit= */ FPBits::max_normal().uintval(), |
103 | 1'000'001, rounds, name_a, name_b, log); |
104 | log << "\n Performance tests with inputs in normal range with exponents " |
105 | "close to each other:\n" ; |
106 | run_perf_in_range<binary>( |
107 | FuncA, FuncB, |
108 | /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(), |
109 | /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001, |
110 | rounds, name_a, name_b, log); |
111 | } |
112 | }; |
113 | |
114 | } // namespace testing |
115 | } // namespace LIBC_NAMESPACE_DECL |
116 | |
117 | #define BINARY_INPUT_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB, \ |
118 | filename) \ |
119 | { \ |
120 | using TargetFuncPtr = \ |
121 | typename LIBC_NAMESPACE::testing::PerfTest<OutputType, \ |
122 | InputType>::BinaryFuncPtr; \ |
123 | LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>( \ |
124 | static_cast<TargetFuncPtr>(&FuncA), \ |
125 | static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename); \ |
126 | return 0; \ |
127 | } |
128 | |
129 | #define BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA, \ |
130 | FuncB, rounds, filename) \ |
131 | { \ |
132 | using TargetFuncPtr = \ |
133 | typename LIBC_NAMESPACE::testing::PerfTest<OutputType, \ |
134 | InputType>::BinaryFuncPtr; \ |
135 | LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>( \ |
136 | static_cast<TargetFuncPtr>(&FuncA), \ |
137 | static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \ |
138 | return 0; \ |
139 | } |
140 | |
141 | #define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename) \ |
142 | { \ |
143 | using TargetFuncPtr = \ |
144 | typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr; \ |
145 | LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>( \ |
146 | static_cast<TargetFuncPtr>(&FuncA), \ |
147 | static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename); \ |
148 | return 0; \ |
149 | } |
150 | |
151 | #define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename) \ |
152 | { \ |
153 | using TargetFuncPtr = \ |
154 | typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr; \ |
155 | LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>( \ |
156 | static_cast<TargetFuncPtr>(&FuncA), \ |
157 | static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \ |
158 | return 0; \ |
159 | } |
160 | |