| 1 | //===-- Common utility class for differential analysis --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "src/__support/CPP/algorithm.h" |
| 10 | #include "src/__support/FPUtil/FPBits.h" |
| 11 | #include "src/__support/macros/config.h" |
| 12 | #include "test/src/math/performance_testing/Timer.h" |
| 13 | |
| 14 | #include <cstddef> |
| 15 | #include <fstream> |
| 16 | |
| 17 | namespace LIBC_NAMESPACE_DECL { |
| 18 | namespace testing { |
| 19 | template <typename OutputType, typename InputType> class PerfTest { |
| 20 | using FPBits = fputil::FPBits<OutputType>; |
| 21 | using StorageType = typename FPBits::StorageType; |
| 22 | static constexpr StorageType U_INT_MAX = |
| 23 | cpp::numeric_limits<StorageType>::max(); |
| 24 | |
| 25 | public: |
| 26 | using BinaryFuncPtr = OutputType (*)(InputType, InputType); |
| 27 | using UnaryFuncPtr = OutputType (*)(InputType); |
| 28 | |
| 29 | template <bool binary, typename Func> |
| 30 | static void run_perf_in_range(Func FuncA, Func FuncB, StorageType startingBit, |
| 31 | StorageType endingBit, size_t N, size_t rounds, |
| 32 | const char *name_a, const char *name_b, |
| 33 | std::ofstream &log) { |
| 34 | if (sizeof(StorageType) <= sizeof(size_t)) |
| 35 | N = cpp::min(N, static_cast<size_t>(endingBit - startingBit)); |
| 36 | |
| 37 | auto runner = [=](Func func) { |
| 38 | [[maybe_unused]] volatile OutputType result; |
| 39 | if (endingBit < startingBit) { |
| 40 | return; |
| 41 | } |
| 42 | |
| 43 | StorageType step = (endingBit - startingBit) / N; |
| 44 | if (step == 0) |
| 45 | step = 1; |
| 46 | for (size_t i = 0; i < rounds; i++) { |
| 47 | for (StorageType bits_x = startingBit, bits_y = endingBit;; |
| 48 | bits_x += step, bits_y -= step) { |
| 49 | InputType x = FPBits(bits_x).get_val(); |
| 50 | if constexpr (binary) { |
| 51 | InputType y = FPBits(bits_y).get_val(); |
| 52 | result = func(x, y); |
| 53 | } else { |
| 54 | result = func(x); |
| 55 | } |
| 56 | if (endingBit - bits_x < step) { |
| 57 | break; |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | }; |
| 62 | |
| 63 | Timer timer; |
| 64 | timer.start(); |
| 65 | runner(FuncA); |
| 66 | timer.stop(); |
| 67 | |
| 68 | double a_average = static_cast<double>(timer.nanoseconds()) / N / rounds; |
| 69 | log << "-- Function A: " << name_a << " --\n" ; |
| 70 | log << " Total time : " << timer.nanoseconds() << " ns \n" ; |
| 71 | log << " Average runtime : " << a_average << " ns/op \n" ; |
| 72 | log << " Ops per second : " |
| 73 | << static_cast<uint64_t>(1'000'000'000.0 / a_average) << " op/s \n" ; |
| 74 | |
| 75 | timer.start(); |
| 76 | runner(FuncB); |
| 77 | timer.stop(); |
| 78 | |
| 79 | double b_average = static_cast<double>(timer.nanoseconds()) / N / rounds; |
| 80 | log << "-- Function B: " << name_b << " --\n" ; |
| 81 | log << " Total time : " << timer.nanoseconds() << " ns \n" ; |
| 82 | log << " Average runtime : " << b_average << " ns/op \n" ; |
| 83 | log << " Ops per second : " |
| 84 | << static_cast<uint64_t>(1'000'000'000.0 / b_average) << " op/s \n" ; |
| 85 | |
| 86 | log << "-- Average ops per second ratio --\n" ; |
| 87 | log << " A / B : " << b_average / a_average << " \n" ; |
| 88 | } |
| 89 | |
| 90 | template <bool binary, typename Func> |
| 91 | static void run_perf(Func FuncA, Func FuncB, int rounds, const char *name_a, |
| 92 | const char *name_b, const char *logFile) { |
| 93 | std::ofstream log(logFile); |
| 94 | log << " Performance tests with inputs in denormal range:\n" ; |
| 95 | run_perf_in_range<binary>( |
| 96 | FuncA, FuncB, /* startingBit= */ StorageType(0), |
| 97 | /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, rounds, |
| 98 | name_a, name_b, log); |
| 99 | log << "\n Performance tests with inputs in normal range:\n" ; |
| 100 | run_perf_in_range<binary>(FuncA, FuncB, |
| 101 | /* startingBit= */ FPBits::min_normal().uintval(), |
| 102 | /* endingBit= */ FPBits::max_normal().uintval(), |
| 103 | 1'000'001, rounds, name_a, name_b, log); |
| 104 | log << "\n Performance tests with inputs in normal range with exponents " |
| 105 | "close to each other:\n" ; |
| 106 | run_perf_in_range<binary>( |
| 107 | FuncA, FuncB, |
| 108 | /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(), |
| 109 | /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001, |
| 110 | rounds, name_a, name_b, log); |
| 111 | } |
| 112 | }; |
| 113 | |
| 114 | } // namespace testing |
| 115 | } // namespace LIBC_NAMESPACE_DECL |
| 116 | |
| 117 | #define BINARY_INPUT_SINGLE_OUTPUT_PERF(OutputType, InputType, FuncA, FuncB, \ |
| 118 | filename) \ |
| 119 | { \ |
| 120 | using TargetFuncPtr = \ |
| 121 | typename LIBC_NAMESPACE::testing::PerfTest<OutputType, \ |
| 122 | InputType>::BinaryFuncPtr; \ |
| 123 | LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>( \ |
| 124 | static_cast<TargetFuncPtr>(&FuncA), \ |
| 125 | static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename); \ |
| 126 | return 0; \ |
| 127 | } |
| 128 | |
| 129 | #define BINARY_INPUT_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, FuncA, \ |
| 130 | FuncB, rounds, filename) \ |
| 131 | { \ |
| 132 | using TargetFuncPtr = \ |
| 133 | typename LIBC_NAMESPACE::testing::PerfTest<OutputType, \ |
| 134 | InputType>::BinaryFuncPtr; \ |
| 135 | LIBC_NAMESPACE::testing::PerfTest<OutputType, InputType>::run_perf<true>( \ |
| 136 | static_cast<TargetFuncPtr>(&FuncA), \ |
| 137 | static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \ |
| 138 | return 0; \ |
| 139 | } |
| 140 | |
| 141 | #define SINGLE_INPUT_SINGLE_OUTPUT_PERF(T, FuncA, FuncB, filename) \ |
| 142 | { \ |
| 143 | using TargetFuncPtr = \ |
| 144 | typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr; \ |
| 145 | LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>( \ |
| 146 | static_cast<TargetFuncPtr>(&FuncA), \ |
| 147 | static_cast<TargetFuncPtr>(&FuncB), 1, #FuncA, #FuncB, filename); \ |
| 148 | return 0; \ |
| 149 | } |
| 150 | |
| 151 | #define SINGLE_INPUT_SINGLE_OUTPUT_PERF_EX(T, FuncA, FuncB, rounds, filename) \ |
| 152 | { \ |
| 153 | using TargetFuncPtr = \ |
| 154 | typename LIBC_NAMESPACE::testing::PerfTest<T, T>::UnaryFuncPtr; \ |
| 155 | LIBC_NAMESPACE::testing::PerfTest<T, T>::run_perf<false>( \ |
| 156 | static_cast<TargetFuncPtr>(&FuncA), \ |
| 157 | static_cast<TargetFuncPtr>(&FuncB), rounds, #FuncA, #FuncB, filename); \ |
| 158 | return 0; \ |
| 159 | } |
| 160 | |