1 | use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion}; |
2 | use half::prelude::*; |
3 | use std::{f32, f64, iter}; |
4 | |
5 | const SIMD_LARGE_BENCH_SLICE_LEN: usize = 1024; |
6 | |
7 | fn bench_f32_to_f16(c: &mut Criterion) { |
8 | let mut group = c.benchmark_group("Convert f16 From f32" ); |
9 | for val in &[ |
10 | 0., |
11 | -0., |
12 | 1., |
13 | f32::MIN, |
14 | f32::MAX, |
15 | f32::MIN_POSITIVE, |
16 | f32::NEG_INFINITY, |
17 | f32::INFINITY, |
18 | f32::NAN, |
19 | f32::consts::E, |
20 | f32::consts::PI, |
21 | ] { |
22 | group.bench_with_input(BenchmarkId::new("f16::from_f32" , val), val, |b, i| { |
23 | b.iter(|| f16::from_f32(*i)) |
24 | }); |
25 | } |
26 | } |
27 | |
28 | fn bench_f64_to_f16(c: &mut Criterion) { |
29 | let mut group = c.benchmark_group("Convert f16 From f64" ); |
30 | for val in &[ |
31 | 0., |
32 | -0., |
33 | 1., |
34 | f64::MIN, |
35 | f64::MAX, |
36 | f64::MIN_POSITIVE, |
37 | f64::NEG_INFINITY, |
38 | f64::INFINITY, |
39 | f64::NAN, |
40 | f64::consts::E, |
41 | f64::consts::PI, |
42 | ] { |
43 | group.bench_with_input(BenchmarkId::new("f16::from_f64" , val), val, |b, i| { |
44 | b.iter(|| f16::from_f64(*i)) |
45 | }); |
46 | } |
47 | } |
48 | |
49 | fn bench_f16_to_f32(c: &mut Criterion) { |
50 | let mut group = c.benchmark_group("Convert f16 to f32" ); |
51 | for val in &[ |
52 | f16::ZERO, |
53 | f16::NEG_ZERO, |
54 | f16::ONE, |
55 | f16::MIN, |
56 | f16::MAX, |
57 | f16::MIN_POSITIVE, |
58 | f16::NEG_INFINITY, |
59 | f16::INFINITY, |
60 | f16::NAN, |
61 | f16::E, |
62 | f16::PI, |
63 | ] { |
64 | group.bench_with_input(BenchmarkId::new("f16::to_f32" , val), val, |b, i| { |
65 | b.iter(|| i.to_f32()) |
66 | }); |
67 | } |
68 | } |
69 | |
70 | fn bench_f16_to_f64(c: &mut Criterion) { |
71 | let mut group = c.benchmark_group("Convert f16 to f64" ); |
72 | for val in &[ |
73 | f16::ZERO, |
74 | f16::NEG_ZERO, |
75 | f16::ONE, |
76 | f16::MIN, |
77 | f16::MAX, |
78 | f16::MIN_POSITIVE, |
79 | f16::NEG_INFINITY, |
80 | f16::INFINITY, |
81 | f16::NAN, |
82 | f16::E, |
83 | f16::PI, |
84 | ] { |
85 | group.bench_with_input(BenchmarkId::new("f16::to_f64" , val), val, |b, i| { |
86 | b.iter(|| i.to_f64()) |
87 | }); |
88 | } |
89 | } |
90 | |
91 | criterion_group!( |
92 | f16_sisd, |
93 | bench_f32_to_f16, |
94 | bench_f64_to_f16, |
95 | bench_f16_to_f32, |
96 | bench_f16_to_f64 |
97 | ); |
98 | |
99 | fn bench_slice_f32_to_f16(c: &mut Criterion) { |
100 | let mut constant_buffer = [f16::ZERO; 11]; |
101 | let constants = [ |
102 | 0., |
103 | -0., |
104 | 1., |
105 | f32::MIN, |
106 | f32::MAX, |
107 | f32::MIN_POSITIVE, |
108 | f32::NEG_INFINITY, |
109 | f32::INFINITY, |
110 | f32::NAN, |
111 | f32::consts::E, |
112 | f32::consts::PI, |
113 | ]; |
114 | c.bench_function( |
115 | "HalfFloatSliceExt::convert_from_f32_slice/constants" , |
116 | |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f32_slice(&constants)), |
117 | ); |
118 | |
119 | let large: Vec<_> = iter::repeat(0) |
120 | .enumerate() |
121 | .map(|(i, _)| i as f32) |
122 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |
123 | .collect(); |
124 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; |
125 | c.bench_function( |
126 | "HalfFloatSliceExt::convert_from_f32_slice/large" , |
127 | |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f32_slice(&large)), |
128 | ); |
129 | } |
130 | |
131 | fn bench_slice_f64_to_f16(c: &mut Criterion) { |
132 | let mut constant_buffer = [f16::ZERO; 11]; |
133 | let constants = [ |
134 | 0., |
135 | -0., |
136 | 1., |
137 | f64::MIN, |
138 | f64::MAX, |
139 | f64::MIN_POSITIVE, |
140 | f64::NEG_INFINITY, |
141 | f64::INFINITY, |
142 | f64::NAN, |
143 | f64::consts::E, |
144 | f64::consts::PI, |
145 | ]; |
146 | c.bench_function( |
147 | "HalfFloatSliceExt::convert_from_f64_slice/constants" , |
148 | |b: &mut Bencher<'_>| b.iter(|| constant_buffer.convert_from_f64_slice(&constants)), |
149 | ); |
150 | |
151 | let large: Vec<_> = iter::repeat(0) |
152 | .enumerate() |
153 | .map(|(i, _)| i as f64) |
154 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |
155 | .collect(); |
156 | let mut large_buffer = [f16::ZERO; SIMD_LARGE_BENCH_SLICE_LEN]; |
157 | c.bench_function( |
158 | "HalfFloatSliceExt::convert_from_f64_slice/large" , |
159 | |b: &mut Bencher<'_>| b.iter(|| large_buffer.convert_from_f64_slice(&large)), |
160 | ); |
161 | } |
162 | |
163 | fn bench_slice_f16_to_f32(c: &mut Criterion) { |
164 | let mut constant_buffer = [0f32; 11]; |
165 | let constants = [ |
166 | f16::ZERO, |
167 | f16::NEG_ZERO, |
168 | f16::ONE, |
169 | f16::MIN, |
170 | f16::MAX, |
171 | f16::MIN_POSITIVE, |
172 | f16::NEG_INFINITY, |
173 | f16::INFINITY, |
174 | f16::NAN, |
175 | f16::E, |
176 | f16::PI, |
177 | ]; |
178 | c.bench_function( |
179 | "HalfFloatSliceExt::convert_to_f32_slice/constants" , |
180 | |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f32_slice(&mut constant_buffer)), |
181 | ); |
182 | |
183 | let large: Vec<_> = iter::repeat(0) |
184 | .enumerate() |
185 | .map(|(i, _)| f16::from_f32(i as f32)) |
186 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |
187 | .collect(); |
188 | let mut large_buffer = [0f32; SIMD_LARGE_BENCH_SLICE_LEN]; |
189 | c.bench_function( |
190 | "HalfFloatSliceExt::convert_to_f32_slice/large" , |
191 | |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f32_slice(&mut large_buffer)), |
192 | ); |
193 | } |
194 | |
195 | fn bench_slice_f16_to_f64(c: &mut Criterion) { |
196 | let mut constant_buffer = [0f64; 11]; |
197 | let constants = [ |
198 | f16::ZERO, |
199 | f16::NEG_ZERO, |
200 | f16::ONE, |
201 | f16::MIN, |
202 | f16::MAX, |
203 | f16::MIN_POSITIVE, |
204 | f16::NEG_INFINITY, |
205 | f16::INFINITY, |
206 | f16::NAN, |
207 | f16::E, |
208 | f16::PI, |
209 | ]; |
210 | c.bench_function( |
211 | "HalfFloatSliceExt::convert_to_f64_slice/constants" , |
212 | |b: &mut Bencher<'_>| b.iter(|| constants.convert_to_f64_slice(&mut constant_buffer)), |
213 | ); |
214 | |
215 | let large: Vec<_> = iter::repeat(0) |
216 | .enumerate() |
217 | .map(|(i, _)| f16::from_f64(i as f64)) |
218 | .take(SIMD_LARGE_BENCH_SLICE_LEN) |
219 | .collect(); |
220 | let mut large_buffer = [0f64; SIMD_LARGE_BENCH_SLICE_LEN]; |
221 | c.bench_function( |
222 | "HalfFloatSliceExt::convert_to_f64_slice/large" , |
223 | |b: &mut Bencher<'_>| b.iter(|| large.convert_to_f64_slice(&mut large_buffer)), |
224 | ); |
225 | } |
226 | |
227 | criterion_group!( |
228 | f16_simd, |
229 | bench_slice_f32_to_f16, |
230 | bench_slice_f64_to_f16, |
231 | bench_slice_f16_to_f32, |
232 | bench_slice_f16_to_f64 |
233 | ); |
234 | |
235 | fn bench_f32_to_bf16(c: &mut Criterion) { |
236 | let mut group = c.benchmark_group("Convert bf16 From f32" ); |
237 | for val in &[ |
238 | 0., |
239 | -0., |
240 | 1., |
241 | f32::MIN, |
242 | f32::MAX, |
243 | f32::MIN_POSITIVE, |
244 | f32::NEG_INFINITY, |
245 | f32::INFINITY, |
246 | f32::NAN, |
247 | f32::consts::E, |
248 | f32::consts::PI, |
249 | ] { |
250 | group.bench_with_input(BenchmarkId::new("bf16::from_f32" , val), val, |b, i| { |
251 | b.iter(|| bf16::from_f32(*i)) |
252 | }); |
253 | } |
254 | } |
255 | |
256 | fn bench_f64_to_bf16(c: &mut Criterion) { |
257 | let mut group = c.benchmark_group("Convert bf16 From f64" ); |
258 | for val in &[ |
259 | 0., |
260 | -0., |
261 | 1., |
262 | f64::MIN, |
263 | f64::MAX, |
264 | f64::MIN_POSITIVE, |
265 | f64::NEG_INFINITY, |
266 | f64::INFINITY, |
267 | f64::NAN, |
268 | f64::consts::E, |
269 | f64::consts::PI, |
270 | ] { |
271 | group.bench_with_input(BenchmarkId::new("bf16::from_f64" , val), val, |b, i| { |
272 | b.iter(|| bf16::from_f64(*i)) |
273 | }); |
274 | } |
275 | } |
276 | |
277 | fn bench_bf16_to_f32(c: &mut Criterion) { |
278 | let mut group = c.benchmark_group("Convert bf16 to f32" ); |
279 | for val in &[ |
280 | bf16::ZERO, |
281 | bf16::NEG_ZERO, |
282 | bf16::ONE, |
283 | bf16::MIN, |
284 | bf16::MAX, |
285 | bf16::MIN_POSITIVE, |
286 | bf16::NEG_INFINITY, |
287 | bf16::INFINITY, |
288 | bf16::NAN, |
289 | bf16::E, |
290 | bf16::PI, |
291 | ] { |
292 | group.bench_with_input(BenchmarkId::new("bf16::to_f32" , val), val, |b, i| { |
293 | b.iter(|| i.to_f32()) |
294 | }); |
295 | } |
296 | } |
297 | |
298 | fn bench_bf16_to_f64(c: &mut Criterion) { |
299 | let mut group = c.benchmark_group("Convert bf16 to f64" ); |
300 | for val in &[ |
301 | bf16::ZERO, |
302 | bf16::NEG_ZERO, |
303 | bf16::ONE, |
304 | bf16::MIN, |
305 | bf16::MAX, |
306 | bf16::MIN_POSITIVE, |
307 | bf16::NEG_INFINITY, |
308 | bf16::INFINITY, |
309 | bf16::NAN, |
310 | bf16::E, |
311 | bf16::PI, |
312 | ] { |
313 | group.bench_with_input(BenchmarkId::new("bf16::to_f64" , val), val, |b, i| { |
314 | b.iter(|| i.to_f64()) |
315 | }); |
316 | } |
317 | } |
318 | |
319 | criterion_group!( |
320 | bf16_sisd, |
321 | bench_f32_to_bf16, |
322 | bench_f64_to_bf16, |
323 | bench_bf16_to_f32, |
324 | bench_bf16_to_f64 |
325 | ); |
326 | |
327 | criterion_main!(f16_sisd, bf16_sisd, f16_simd); |
328 | |