1//===-- Performance test for nearest integer functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "src/__support/FPUtil/FPBits.h"
10#include "src/math/ceilf.h"
11#include "src/math/ceilf16.h"
12#include "src/math/floorf.h"
13#include "src/math/floorf16.h"
14#include "src/math/rintf.h"
15#include "src/math/rintf16.h"
16#include "src/math/roundevenf.h"
17#include "src/math/roundevenf16.h"
18#include "src/math/roundf.h"
19#include "src/math/roundf16.h"
20#include "src/math/truncf.h"
21#include "src/math/truncf16.h"
22#include "test/UnitTest/RoundingModeUtils.h"
23#include "test/src/math/performance_testing/Timer.h"
24
25#include <fstream>
26#include <math.h>
27
28using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode;
29using LIBC_NAMESPACE::fputil::testing::RoundingMode;
30
31namespace LIBC_NAMESPACE::testing {
32
33template <typename T> class NearestIntegerPerf {
34 using FPBits = fputil::FPBits<T>;
35 using StorageType = typename FPBits::StorageType;
36
37public:
38 typedef T Func(T);
39
40 static void run_perf_in_range(Func my_func, Func other_func,
41 StorageType starting_bit,
42 StorageType ending_bit, StorageType step,
43 size_t rounds, const char *name_a,
44 const char *name_b, std::ofstream &log) {
45 auto runner = [=](Func func) {
46 [[maybe_unused]] volatile T result;
47 for (size_t i = 0; i < rounds; i++) {
48 for (StorageType bits = starting_bit; bits <= ending_bit;
49 bits += step) {
50 T x = FPBits(bits).get_val();
51 result = func(x);
52 }
53 }
54 };
55
56 Timer timer;
57 timer.start();
58 runner(my_func);
59 timer.stop();
60
61 size_t number_of_runs = (ending_bit - starting_bit) / step + 1;
62 double my_average =
63 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
64 log << "-- Function A: " << name_a << " --\n";
65 log << " Total time : " << timer.nanoseconds() << " ns \n";
66 log << " Average runtime : " << my_average << " ns/op \n";
67 log << " Ops per second : "
68 << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
69
70 timer.start();
71 runner(other_func);
72 timer.stop();
73
74 double other_average =
75 static_cast<double>(timer.nanoseconds()) / number_of_runs / rounds;
76 log << "-- Function B: " << name_b << " --\n";
77 log << " Total time : " << timer.nanoseconds() << " ns \n";
78 log << " Average runtime : " << other_average << " ns/op \n";
79 log << " Ops per second : "
80 << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
81
82 log << "-- Average ops per second ratio --\n";
83 log << " A / B : " << other_average / my_average << " \n";
84 }
85
86 static void run_perf(Func my_func, Func other_func, size_t rounds,
87 const char *name_a, const char *name_b,
88 const char *log_file) {
89 std::ofstream log(log_file);
90 log << "Performance tests with inputs in normal integral range:\n";
91 run_perf_in_range(
92 my_func, other_func,
93 /*starting_bit=*/StorageType((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN),
94 /*ending_bit=*/
95 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN - 1)
96 << FPBits::SIG_LEN),
97 /*step=*/StorageType(1 << FPBits::SIG_LEN),
98 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
99 log << "\n Performance tests with inputs in low integral range:\n";
100 run_perf_in_range(
101 my_func, other_func,
102 /*starting_bit=*/StorageType(1 << FPBits::SIG_LEN),
103 /*ending_bit=*/StorageType((FPBits::EXP_BIAS - 1) << FPBits::SIG_LEN),
104 /*step_bit=*/StorageType(1 << FPBits::SIG_LEN),
105 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
106 log << "\n Performance tests with inputs in high integral range:\n";
107 run_perf_in_range(
108 my_func, other_func,
109 /*starting_bit=*/
110 StorageType((FPBits::EXP_BIAS + FPBits::FRACTION_LEN)
111 << FPBits::SIG_LEN),
112 /*ending_bit=*/
113 StorageType(FPBits::MAX_BIASED_EXPONENT << FPBits::SIG_LEN),
114 /*step=*/StorageType(1 << FPBits::SIG_LEN),
115 rounds * FPBits::EXP_BIAS * FPBits::EXP_BIAS * 2, name_a, name_b, log);
116 log << "\n Performance tests with inputs in normal fractional range:\n";
117 run_perf_in_range(
118 my_func, other_func,
119 /*starting_bit=*/
120 StorageType(((FPBits::EXP_BIAS + 1) << FPBits::SIG_LEN) + 1),
121 /*ending_bit=*/
122 StorageType(((FPBits::EXP_BIAS + 2) << FPBits::SIG_LEN) - 1),
123 /*step=*/StorageType(1), rounds * 2, name_a, name_b, log);
124 log << "\n Performance tests with inputs in subnormal fractional range:\n";
125 run_perf_in_range(my_func, other_func, /*starting_bit=*/StorageType(1),
126 /*ending_bit=*/StorageType(FPBits::SIG_MASK),
127 /*step=*/StorageType(1), rounds, name_a, name_b, log);
128 }
129};
130
131} // namespace LIBC_NAMESPACE::testing
132
133#define NEAREST_INTEGER_PERF(T, my_func, other_func, rounds, filename) \
134 { \
135 LIBC_NAMESPACE::testing::NearestIntegerPerf<T>::run_perf( \
136 &my_func, &other_func, rounds, #my_func, #other_func, filename); \
137 }
138
139static constexpr size_t FLOAT16_ROUNDS = 20'000;
140static constexpr size_t FLOAT_ROUNDS = 40;
141
142// LLVM libc might be the only libc implementation with support for float16 math
143// functions currently. We can't compare our float16 functions against the
144// system libc, so we compare them against this placeholder function.
145float16 placeholderf16(float16 x) { return x; }
146
147// The system libc might not provide the roundeven* C23 math functions either.
148float placeholderf(float x) { return x; }
149
150int main() {
151 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::ceilf16, ::placeholderf16,
152 FLOAT16_ROUNDS, "ceilf16_perf.log")
153 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::floorf16, ::placeholderf16,
154 FLOAT16_ROUNDS, "floorf16_perf.log")
155 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundf16, ::placeholderf16,
156 FLOAT16_ROUNDS, "roundf16_perf.log")
157 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::roundevenf16, ::placeholderf16,
158 FLOAT16_ROUNDS, "roundevenf16_perf.log")
159 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::truncf16, ::placeholderf16,
160 FLOAT16_ROUNDS, "truncf16_perf.log")
161
162 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::ceilf, ::ceilf, FLOAT_ROUNDS,
163 "ceilf_perf.log")
164 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::floorf, ::floorf, FLOAT_ROUNDS,
165 "floorf_perf.log")
166 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundf, ::roundf, FLOAT_ROUNDS,
167 "roundf_perf.log")
168 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::roundevenf, ::placeholderf,
169 FLOAT_ROUNDS, "roundevenf_perf.log")
170 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::truncf, ::truncf, FLOAT_ROUNDS,
171 "truncf_perf.log")
172
173 if (ForceRoundingMode r(RoundingMode::Upward); r.success) {
174 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
175 FLOAT16_ROUNDS, "rintf16_upward_perf.log")
176 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
177 "rintf_upward_perf.log")
178 }
179 if (ForceRoundingMode r(RoundingMode::Downward); r.success) {
180 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
181 FLOAT16_ROUNDS, "rintf16_downward_perf.log")
182 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
183 "rintf_downward_perf.log")
184 }
185 if (ForceRoundingMode r(RoundingMode::TowardZero); r.success) {
186 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
187 FLOAT16_ROUNDS, "rintf16_towardzero_perf.log")
188 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
189 "rintf_towardzero_perf.log")
190 }
191 if (ForceRoundingMode r(RoundingMode::Nearest); r.success) {
192 NEAREST_INTEGER_PERF(float16, LIBC_NAMESPACE::rintf16, ::placeholderf16,
193 FLOAT16_ROUNDS, "rintf16_nearest_perf.log")
194 NEAREST_INTEGER_PERF(float, LIBC_NAMESPACE::rintf, ::rintf, FLOAT_ROUNDS,
195 "rintf_nearest_perf.log")
196 }
197
198 return 0;
199}
200

source code of libc/test/src/math/performance_testing/nearest_integer_funcs_perf.cpp