1 | /* |
2 | * Microbenchmark for math functions. |
3 | * |
4 | * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
5 | * See https://llvm.org/LICENSE.txt for license information. |
6 | * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
7 | */ |
8 | |
9 | #undef _GNU_SOURCE |
10 | #define _GNU_SOURCE 1 |
11 | #include <stdint.h> |
12 | #include <stdlib.h> |
13 | #include <stdio.h> |
14 | #include <string.h> |
15 | #include <time.h> |
16 | #include <math.h> |
17 | #include "mathlib.h" |
18 | |
19 | #ifndef WANT_VMATH |
20 | /* Enable the build of vector math code. */ |
21 | # define WANT_VMATH 1 |
22 | #endif |
23 | |
24 | /* Number of measurements, best result is reported. */ |
25 | #define MEASURE 60 |
26 | /* Array size. */ |
27 | #define N 8000 |
28 | /* Iterations over the array. */ |
29 | #define ITER 125 |
30 | |
31 | static double *Trace; |
32 | static size_t trace_size; |
33 | static double A[N]; |
34 | static float Af[N]; |
35 | static long measurecount = MEASURE; |
36 | static long itercount = ITER; |
37 | |
38 | #if __aarch64__ && WANT_VMATH |
39 | typedef __f64x2_t v_double; |
40 | |
41 | #define v_double_len() 2 |
42 | |
43 | static inline v_double |
44 | v_double_load (const double *p) |
45 | { |
46 | return (v_double){p[0], p[1]}; |
47 | } |
48 | |
49 | static inline v_double |
50 | v_double_dup (double x) |
51 | { |
52 | return (v_double){x, x}; |
53 | } |
54 | |
55 | typedef __f32x4_t v_float; |
56 | |
57 | #define v_float_len() 4 |
58 | |
59 | static inline v_float |
60 | v_float_load (const float *p) |
61 | { |
62 | return (v_float){p[0], p[1], p[2], p[3]}; |
63 | } |
64 | |
65 | static inline v_float |
66 | v_float_dup (float x) |
67 | { |
68 | return (v_float){x, x, x, x}; |
69 | } |
70 | #else |
71 | /* dummy definitions to make things compile. */ |
72 | typedef double v_double; |
73 | typedef float v_float; |
74 | #define v_double_len(x) 1 |
75 | #define v_double_load(x) (x)[0] |
76 | #define v_double_dup(x) (x) |
77 | #define v_float_len(x) 1 |
78 | #define v_float_load(x) (x)[0] |
79 | #define v_float_dup(x) (x) |
80 | #endif |
81 | |
82 | static double |
83 | dummy (double x) |
84 | { |
85 | return x; |
86 | } |
87 | |
88 | static float |
89 | dummyf (float x) |
90 | { |
91 | return x; |
92 | } |
93 | |
94 | #if WANT_VMATH |
95 | #if __aarch64__ |
96 | static v_double |
97 | __v_dummy (v_double x) |
98 | { |
99 | return x; |
100 | } |
101 | |
102 | static v_float |
103 | __v_dummyf (v_float x) |
104 | { |
105 | return x; |
106 | } |
107 | |
108 | #ifdef __vpcs |
109 | __vpcs static v_double |
110 | __vn_dummy (v_double x) |
111 | { |
112 | return x; |
113 | } |
114 | |
115 | __vpcs static v_float |
116 | __vn_dummyf (v_float x) |
117 | { |
118 | return x; |
119 | } |
120 | |
121 | __vpcs static v_float |
122 | xy__vn_powf (v_float x) |
123 | { |
124 | return __vn_powf (x, x); |
125 | } |
126 | |
127 | __vpcs static v_float |
128 | xy_Z_powf (v_float x) |
129 | { |
130 | return _ZGVnN4vv_powf (x, x); |
131 | } |
132 | |
133 | __vpcs static v_double |
134 | xy__vn_pow (v_double x) |
135 | { |
136 | return __vn_pow (x, x); |
137 | } |
138 | |
139 | __vpcs static v_double |
140 | xy_Z_pow (v_double x) |
141 | { |
142 | return _ZGVnN2vv_pow (x, x); |
143 | } |
144 | #endif |
145 | |
146 | static v_float |
147 | xy__v_powf (v_float x) |
148 | { |
149 | return __v_powf (x, x); |
150 | } |
151 | |
152 | static v_double |
153 | xy__v_pow (v_double x) |
154 | { |
155 | return __v_pow (x, x); |
156 | } |
157 | #endif |
158 | |
159 | static float |
160 | xy__s_powf (float x) |
161 | { |
162 | return __s_powf (x, x); |
163 | } |
164 | |
165 | static double |
166 | xy__s_pow (double x) |
167 | { |
168 | return __s_pow (x, x); |
169 | } |
170 | #endif |
171 | |
172 | static double |
173 | xypow (double x) |
174 | { |
175 | return pow (x: x, y: x); |
176 | } |
177 | |
178 | static float |
179 | xypowf (float x) |
180 | { |
181 | return powf (x: x, y: x); |
182 | } |
183 | |
184 | static double |
185 | xpow (double x) |
186 | { |
187 | return pow (x: x, y: 23.4); |
188 | } |
189 | |
190 | static float |
191 | xpowf (float x) |
192 | { |
193 | return powf (x: x, y: 23.4f); |
194 | } |
195 | |
196 | static double |
197 | ypow (double x) |
198 | { |
199 | return pow (x: 2.34, y: x); |
200 | } |
201 | |
202 | static float |
203 | ypowf (float x) |
204 | { |
205 | return powf (x: 2.34f, y: x); |
206 | } |
207 | |
208 | static float |
209 | sincosf_wrap (float x) |
210 | { |
211 | float s, c; |
212 | sincosf (x: x, sinx: &s, cosx: &c); |
213 | return s + c; |
214 | } |
215 | |
216 | static const struct fun |
217 | { |
218 | const char *name; |
219 | int prec; |
220 | int vec; |
221 | double lo; |
222 | double hi; |
223 | union |
224 | { |
225 | double (*d) (double); |
226 | float (*f) (float); |
227 | v_double (*vd) (v_double); |
228 | v_float (*vf) (v_float); |
229 | #ifdef __vpcs |
230 | __vpcs v_double (*vnd) (v_double); |
231 | __vpcs v_float (*vnf) (v_float); |
232 | #endif |
233 | } fun; |
234 | } funtab[] = { |
235 | #define D(func, lo, hi) {#func, 'd', 0, lo, hi, {.d = func}}, |
236 | #define F(func, lo, hi) {#func, 'f', 0, lo, hi, {.f = func}}, |
237 | #define VD(func, lo, hi) {#func, 'd', 'v', lo, hi, {.vd = func}}, |
238 | #define VF(func, lo, hi) {#func, 'f', 'v', lo, hi, {.vf = func}}, |
239 | #define VND(func, lo, hi) {#func, 'd', 'n', lo, hi, {.vnd = func}}, |
240 | #define VNF(func, lo, hi) {#func, 'f', 'n', lo, hi, {.vnf = func}}, |
241 | D (dummy, 1.0, 2.0) |
242 | D (exp, -9.9, 9.9) |
243 | D (exp, 0.5, 1.0) |
244 | D (exp2, -9.9, 9.9) |
245 | D (log, 0.01, 11.1) |
246 | D (log, 0.999, 1.001) |
247 | D (log2, 0.01, 11.1) |
248 | D (log2, 0.999, 1.001) |
249 | {"pow" , 'd', 0, 0.01, 11.1, {.d = xypow}}, |
250 | D (xpow, 0.01, 11.1) |
251 | D (ypow, -9.9, 9.9) |
252 | |
253 | F (dummyf, 1.0, 2.0) |
254 | F (expf, -9.9, 9.9) |
255 | F (exp2f, -9.9, 9.9) |
256 | F (logf, 0.01, 11.1) |
257 | F (log2f, 0.01, 11.1) |
258 | {"powf" , 'f', 0, 0.01, 11.1, {.f = xypowf}}, |
259 | F (xpowf, 0.01, 11.1) |
260 | F (ypowf, -9.9, 9.9) |
261 | {"sincosf" , 'f', 0, 0.1, 0.7, {.f = sincosf_wrap}}, |
262 | {"sincosf" , 'f', 0, 0.8, 3.1, {.f = sincosf_wrap}}, |
263 | {"sincosf" , 'f', 0, -3.1, 3.1, {.f = sincosf_wrap}}, |
264 | {"sincosf" , 'f', 0, 3.3, 33.3, {.f = sincosf_wrap}}, |
265 | {"sincosf" , 'f', 0, 100, 1000, {.f = sincosf_wrap}}, |
266 | {"sincosf" , 'f', 0, 1e6, 1e32, {.f = sincosf_wrap}}, |
267 | F (sinf, 0.1, 0.7) |
268 | F (sinf, 0.8, 3.1) |
269 | F (sinf, -3.1, 3.1) |
270 | F (sinf, 3.3, 33.3) |
271 | F (sinf, 100, 1000) |
272 | F (sinf, 1e6, 1e32) |
273 | F (cosf, 0.1, 0.7) |
274 | F (cosf, 0.8, 3.1) |
275 | F (cosf, -3.1, 3.1) |
276 | F (cosf, 3.3, 33.3) |
277 | F (cosf, 100, 1000) |
278 | F (cosf, 1e6, 1e32) |
279 | #if WANT_VMATH |
280 | D (__s_sin, -3.1, 3.1) |
281 | D (__s_cos, -3.1, 3.1) |
282 | D (__s_exp, -9.9, 9.9) |
283 | D (__s_log, 0.01, 11.1) |
284 | {"__s_pow" , 'd', 0, 0.01, 11.1, {.d = xy__s_pow}}, |
285 | F (__s_expf, -9.9, 9.9) |
286 | F (__s_expf_1u, -9.9, 9.9) |
287 | F (__s_exp2f, -9.9, 9.9) |
288 | F (__s_exp2f_1u, -9.9, 9.9) |
289 | F (__s_logf, 0.01, 11.1) |
290 | {"__s_powf" , 'f', 0, 0.01, 11.1, {.f = xy__s_powf}}, |
291 | F (__s_sinf, -3.1, 3.1) |
292 | F (__s_cosf, -3.1, 3.1) |
293 | #if __aarch64__ |
294 | VD (__v_dummy, 1.0, 2.0) |
295 | VD (__v_sin, -3.1, 3.1) |
296 | VD (__v_cos, -3.1, 3.1) |
297 | VD (__v_exp, -9.9, 9.9) |
298 | VD (__v_log, 0.01, 11.1) |
299 | {"__v_pow" , 'd', 'v', 0.01, 11.1, {.vd = xy__v_pow}}, |
300 | VF (__v_dummyf, 1.0, 2.0) |
301 | VF (__v_expf, -9.9, 9.9) |
302 | VF (__v_expf_1u, -9.9, 9.9) |
303 | VF (__v_exp2f, -9.9, 9.9) |
304 | VF (__v_exp2f_1u, -9.9, 9.9) |
305 | VF (__v_logf, 0.01, 11.1) |
306 | {"__v_powf" , 'f', 'v', 0.01, 11.1, {.vf = xy__v_powf}}, |
307 | VF (__v_sinf, -3.1, 3.1) |
308 | VF (__v_cosf, -3.1, 3.1) |
309 | #ifdef __vpcs |
310 | VND (__vn_dummy, 1.0, 2.0) |
311 | VND (__vn_exp, -9.9, 9.9) |
312 | VND (_ZGVnN2v_exp, -9.9, 9.9) |
313 | VND (__vn_log, 0.01, 11.1) |
314 | VND (_ZGVnN2v_log, 0.01, 11.1) |
315 | {"__vn_pow" , 'd', 'n', 0.01, 11.1, {.vnd = xy__vn_pow}}, |
316 | {"_ZGVnN2vv_pow" , 'd', 'n', 0.01, 11.1, {.vnd = xy_Z_pow}}, |
317 | VND (__vn_sin, -3.1, 3.1) |
318 | VND (_ZGVnN2v_sin, -3.1, 3.1) |
319 | VND (__vn_cos, -3.1, 3.1) |
320 | VND (_ZGVnN2v_cos, -3.1, 3.1) |
321 | VNF (__vn_dummyf, 1.0, 2.0) |
322 | VNF (__vn_expf, -9.9, 9.9) |
323 | VNF (_ZGVnN4v_expf, -9.9, 9.9) |
324 | VNF (__vn_expf_1u, -9.9, 9.9) |
325 | VNF (__vn_exp2f, -9.9, 9.9) |
326 | VNF (_ZGVnN4v_exp2f, -9.9, 9.9) |
327 | VNF (__vn_exp2f_1u, -9.9, 9.9) |
328 | VNF (__vn_logf, 0.01, 11.1) |
329 | VNF (_ZGVnN4v_logf, 0.01, 11.1) |
330 | {"__vn_powf" , 'f', 'n', 0.01, 11.1, {.vnf = xy__vn_powf}}, |
331 | {"_ZGVnN4vv_powf" , 'f', 'n', 0.01, 11.1, {.vnf = xy_Z_powf}}, |
332 | VNF (__vn_sinf, -3.1, 3.1) |
333 | VNF (_ZGVnN4v_sinf, -3.1, 3.1) |
334 | VNF (__vn_cosf, -3.1, 3.1) |
335 | VNF (_ZGVnN4v_cosf, -3.1, 3.1) |
336 | #endif |
337 | #endif |
338 | #endif |
339 | {0}, |
340 | #undef F |
341 | #undef D |
342 | #undef VF |
343 | #undef VD |
344 | #undef VNF |
345 | #undef VND |
346 | }; |
347 | |
348 | static void |
349 | gen_linear (double lo, double hi) |
350 | { |
351 | for (int i = 0; i < N; i++) |
352 | A[i] = (lo * (N - i) + hi * i) / N; |
353 | } |
354 | |
355 | static void |
356 | genf_linear (double lo, double hi) |
357 | { |
358 | for (int i = 0; i < N; i++) |
359 | Af[i] = (float)(lo * (N - i) + hi * i) / N; |
360 | } |
361 | |
362 | static inline double |
363 | asdouble (uint64_t i) |
364 | { |
365 | union |
366 | { |
367 | uint64_t i; |
368 | double f; |
369 | } u = {.i: i}; |
370 | return u.f; |
371 | } |
372 | |
373 | static uint64_t seed = 0x0123456789abcdef; |
374 | |
375 | static double |
376 | frand (double lo, double hi) |
377 | { |
378 | seed = 6364136223846793005ULL * seed + 1; |
379 | return lo + (hi - lo) * (asdouble (i: seed >> 12 | 0x3ffULL << 52) - 1.0); |
380 | } |
381 | |
382 | static void |
383 | gen_rand (double lo, double hi) |
384 | { |
385 | for (int i = 0; i < N; i++) |
386 | A[i] = frand (lo, hi); |
387 | } |
388 | |
389 | static void |
390 | genf_rand (double lo, double hi) |
391 | { |
392 | for (int i = 0; i < N; i++) |
393 | Af[i] = (float)frand (lo, hi); |
394 | } |
395 | |
396 | static void |
397 | gen_trace (int index) |
398 | { |
399 | for (int i = 0; i < N; i++) |
400 | A[i] = Trace[index + i]; |
401 | } |
402 | |
403 | static void |
404 | genf_trace (int index) |
405 | { |
406 | for (int i = 0; i < N; i++) |
407 | Af[i] = (float)Trace[index + i]; |
408 | } |
409 | |
410 | static void |
411 | run_thruput (double f (double)) |
412 | { |
413 | for (int i = 0; i < N; i++) |
414 | f (A[i]); |
415 | } |
416 | |
417 | static void |
418 | runf_thruput (float f (float)) |
419 | { |
420 | for (int i = 0; i < N; i++) |
421 | f (Af[i]); |
422 | } |
423 | |
424 | volatile double zero = 0; |
425 | |
426 | static void |
427 | run_latency (double f (double)) |
428 | { |
429 | double z = zero; |
430 | double prev = z; |
431 | for (int i = 0; i < N; i++) |
432 | prev = f (A[i] + prev * z); |
433 | } |
434 | |
435 | static void |
436 | runf_latency (float f (float)) |
437 | { |
438 | float z = (float)zero; |
439 | float prev = z; |
440 | for (int i = 0; i < N; i++) |
441 | prev = f (Af[i] + prev * z); |
442 | } |
443 | |
444 | static void |
445 | run_v_thruput (v_double f (v_double)) |
446 | { |
447 | for (int i = 0; i < N; i += v_double_len ()) |
448 | f (v_double_load (A+i)); |
449 | } |
450 | |
451 | static void |
452 | runf_v_thruput (v_float f (v_float)) |
453 | { |
454 | for (int i = 0; i < N; i += v_float_len ()) |
455 | f (v_float_load (Af+i)); |
456 | } |
457 | |
458 | static void |
459 | run_v_latency (v_double f (v_double)) |
460 | { |
461 | v_double z = v_double_dup (zero); |
462 | v_double prev = z; |
463 | for (int i = 0; i < N; i += v_double_len ()) |
464 | prev = f (v_double_load (A+i) + prev * z); |
465 | } |
466 | |
467 | static void |
468 | runf_v_latency (v_float f (v_float)) |
469 | { |
470 | v_float z = v_float_dup (zero); |
471 | v_float prev = z; |
472 | for (int i = 0; i < N; i += v_float_len ()) |
473 | prev = f (v_float_load (Af+i) + prev * z); |
474 | } |
475 | |
476 | #ifdef __vpcs |
477 | static void |
478 | run_vn_thruput (__vpcs v_double f (v_double)) |
479 | { |
480 | for (int i = 0; i < N; i += v_double_len ()) |
481 | f (v_double_load (A+i)); |
482 | } |
483 | |
484 | static void |
485 | runf_vn_thruput (__vpcs v_float f (v_float)) |
486 | { |
487 | for (int i = 0; i < N; i += v_float_len ()) |
488 | f (v_float_load (Af+i)); |
489 | } |
490 | |
491 | static void |
492 | run_vn_latency (__vpcs v_double f (v_double)) |
493 | { |
494 | v_double z = v_double_dup (zero); |
495 | v_double prev = z; |
496 | for (int i = 0; i < N; i += v_double_len ()) |
497 | prev = f (v_double_load (A+i) + prev * z); |
498 | } |
499 | |
500 | static void |
501 | runf_vn_latency (__vpcs v_float f (v_float)) |
502 | { |
503 | v_float z = v_float_dup (zero); |
504 | v_float prev = z; |
505 | for (int i = 0; i < N; i += v_float_len ()) |
506 | prev = f (v_float_load (Af+i) + prev * z); |
507 | } |
508 | #endif |
509 | |
510 | static uint64_t |
511 | tic (void) |
512 | { |
513 | struct timespec ts; |
514 | if (clock_gettime (CLOCK_REALTIME, tp: &ts)) |
515 | abort (); |
516 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; |
517 | } |
518 | |
519 | #define TIMEIT(run, f) do { \ |
520 | dt = -1; \ |
521 | run (f); /* Warm up. */ \ |
522 | for (int j = 0; j < measurecount; j++) \ |
523 | { \ |
524 | uint64_t t0 = tic (); \ |
525 | for (int i = 0; i < itercount; i++) \ |
526 | run (f); \ |
527 | uint64_t t1 = tic (); \ |
528 | if (t1 - t0 < dt) \ |
529 | dt = t1 - t0; \ |
530 | } \ |
531 | } while (0) |
532 | |
533 | static void |
534 | bench1 (const struct fun *f, int type, double lo, double hi) |
535 | { |
536 | uint64_t dt = 0; |
537 | uint64_t ns100; |
538 | const char *s = type == 't' ? "rthruput" : "latency" ; |
539 | int vlen = 1; |
540 | |
541 | if (f->vec && f->prec == 'd') |
542 | vlen = v_double_len(); |
543 | else if (f->vec && f->prec == 'f') |
544 | vlen = v_float_len(); |
545 | |
546 | if (f->prec == 'd' && type == 't' && f->vec == 0) |
547 | TIMEIT (run_thruput, f->fun.d); |
548 | else if (f->prec == 'd' && type == 'l' && f->vec == 0) |
549 | TIMEIT (run_latency, f->fun.d); |
550 | else if (f->prec == 'f' && type == 't' && f->vec == 0) |
551 | TIMEIT (runf_thruput, f->fun.f); |
552 | else if (f->prec == 'f' && type == 'l' && f->vec == 0) |
553 | TIMEIT (runf_latency, f->fun.f); |
554 | else if (f->prec == 'd' && type == 't' && f->vec == 'v') |
555 | TIMEIT (run_v_thruput, f->fun.vd); |
556 | else if (f->prec == 'd' && type == 'l' && f->vec == 'v') |
557 | TIMEIT (run_v_latency, f->fun.vd); |
558 | else if (f->prec == 'f' && type == 't' && f->vec == 'v') |
559 | TIMEIT (runf_v_thruput, f->fun.vf); |
560 | else if (f->prec == 'f' && type == 'l' && f->vec == 'v') |
561 | TIMEIT (runf_v_latency, f->fun.vf); |
562 | #ifdef __vpcs |
563 | else if (f->prec == 'd' && type == 't' && f->vec == 'n') |
564 | TIMEIT (run_vn_thruput, f->fun.vnd); |
565 | else if (f->prec == 'd' && type == 'l' && f->vec == 'n') |
566 | TIMEIT (run_vn_latency, f->fun.vnd); |
567 | else if (f->prec == 'f' && type == 't' && f->vec == 'n') |
568 | TIMEIT (runf_vn_thruput, f->fun.vnf); |
569 | else if (f->prec == 'f' && type == 'l' && f->vec == 'n') |
570 | TIMEIT (runf_vn_latency, f->fun.vnf); |
571 | #endif |
572 | |
573 | if (type == 't') |
574 | { |
575 | ns100 = (100 * dt + itercount * N / 2) / (itercount * N); |
576 | printf (format: "%9s %8s: %4u.%02u ns/elem %10llu ns in [%g %g]\n" , f->name, s, |
577 | (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), |
578 | (unsigned long long) dt, lo, hi); |
579 | } |
580 | else if (type == 'l') |
581 | { |
582 | ns100 = (100 * dt + itercount * N / vlen / 2) / (itercount * N / vlen); |
583 | printf (format: "%9s %8s: %4u.%02u ns/call %10llu ns in [%g %g]\n" , f->name, s, |
584 | (unsigned) (ns100 / 100), (unsigned) (ns100 % 100), |
585 | (unsigned long long) dt, lo, hi); |
586 | } |
587 | fflush (stdout); |
588 | } |
589 | |
590 | static void |
591 | bench (const struct fun *f, double lo, double hi, int type, int gen) |
592 | { |
593 | if (f->prec == 'd' && gen == 'r') |
594 | gen_rand (lo, hi); |
595 | else if (f->prec == 'd' && gen == 'l') |
596 | gen_linear (lo, hi); |
597 | else if (f->prec == 'd' && gen == 't') |
598 | gen_trace (index: 0); |
599 | else if (f->prec == 'f' && gen == 'r') |
600 | genf_rand (lo, hi); |
601 | else if (f->prec == 'f' && gen == 'l') |
602 | genf_linear (lo, hi); |
603 | else if (f->prec == 'f' && gen == 't') |
604 | genf_trace (index: 0); |
605 | |
606 | if (gen == 't') |
607 | hi = trace_size / N; |
608 | |
609 | if (type == 'b' || type == 't') |
610 | bench1 (f, type: 't', lo, hi); |
611 | |
612 | if (type == 'b' || type == 'l') |
613 | bench1 (f, type: 'l', lo, hi); |
614 | |
615 | for (int i = N; i < trace_size; i += N) |
616 | { |
617 | if (f->prec == 'd') |
618 | gen_trace (index: i); |
619 | else |
620 | genf_trace (index: i); |
621 | |
622 | lo = i / N; |
623 | if (type == 'b' || type == 't') |
624 | bench1 (f, type: 't', lo, hi); |
625 | |
626 | if (type == 'b' || type == 'l') |
627 | bench1 (f, type: 'l', lo, hi); |
628 | } |
629 | } |
630 | |
631 | static void |
632 | readtrace (const char *name) |
633 | { |
634 | int n = 0; |
635 | FILE *f = strcmp (s1: name, s2: "-" ) == 0 ? stdin : fopen (filename: name, modes: "r" ); |
636 | if (!f) |
637 | { |
638 | printf (format: "opening \"%s\" failed: %m\n" , name); |
639 | exit (status: 1); |
640 | } |
641 | for (;;) |
642 | { |
643 | if (n >= trace_size) |
644 | { |
645 | trace_size += N; |
646 | Trace = realloc (ptr: Trace, size: trace_size * sizeof (Trace[0])); |
647 | if (Trace == NULL) |
648 | { |
649 | printf (format: "out of memory\n" ); |
650 | exit (status: 1); |
651 | } |
652 | } |
653 | if (fscanf (stream: f, format: "%lf" , Trace + n) != 1) |
654 | break; |
655 | n++; |
656 | } |
657 | if (ferror (stream: f) || n == 0) |
658 | { |
659 | printf (format: "reading \"%s\" failed: %m\n" , name); |
660 | exit (status: 1); |
661 | } |
662 | fclose (stream: f); |
663 | if (n % N == 0) |
664 | trace_size = n; |
665 | for (int i = 0; n < trace_size; n++, i++) |
666 | Trace[n] = Trace[i]; |
667 | } |
668 | |
669 | static void |
670 | usage (void) |
671 | { |
672 | printf (format: "usage: ./mathbench [-g rand|linear|trace] [-t latency|thruput|both] " |
673 | "[-i low high] [-f tracefile] [-m measurements] [-c iterations] func " |
674 | "[func2 ..]\n" ); |
675 | printf (format: "func:\n" ); |
676 | printf (format: "%7s [run all benchmarks]\n" , "all" ); |
677 | for (const struct fun *f = funtab; f->name; f++) |
678 | printf (format: "%7s [low: %g high: %g]\n" , f->name, f->lo, f->hi); |
679 | exit (status: 1); |
680 | } |
681 | |
682 | int |
683 | main (int argc, char *argv[]) |
684 | { |
685 | int usergen = 0, gen = 'r', type = 'b', all = 0; |
686 | double lo = 0, hi = 0; |
687 | const char *tracefile = "-" ; |
688 | |
689 | argv++; |
690 | argc--; |
691 | for (;;) |
692 | { |
693 | if (argc <= 0) |
694 | usage (); |
695 | if (argv[0][0] != '-') |
696 | break; |
697 | else if (argc >= 3 && strcmp (s1: argv[0], s2: "-i" ) == 0) |
698 | { |
699 | usergen = 1; |
700 | lo = strtod (nptr: argv[1], endptr: 0); |
701 | hi = strtod (nptr: argv[2], endptr: 0); |
702 | argv += 3; |
703 | argc -= 3; |
704 | } |
705 | else if (argc >= 2 && strcmp (s1: argv[0], s2: "-m" ) == 0) |
706 | { |
707 | measurecount = strtol (nptr: argv[1], endptr: 0, base: 0); |
708 | argv += 2; |
709 | argc -= 2; |
710 | } |
711 | else if (argc >= 2 && strcmp (s1: argv[0], s2: "-c" ) == 0) |
712 | { |
713 | itercount = strtol (nptr: argv[1], endptr: 0, base: 0); |
714 | argv += 2; |
715 | argc -= 2; |
716 | } |
717 | else if (argc >= 2 && strcmp (s1: argv[0], s2: "-g" ) == 0) |
718 | { |
719 | gen = argv[1][0]; |
720 | if (strchr (s: "rlt" , c: gen) == 0) |
721 | usage (); |
722 | argv += 2; |
723 | argc -= 2; |
724 | } |
725 | else if (argc >= 2 && strcmp (s1: argv[0], s2: "-f" ) == 0) |
726 | { |
727 | gen = 't'; /* -f implies -g trace. */ |
728 | tracefile = argv[1]; |
729 | argv += 2; |
730 | argc -= 2; |
731 | } |
732 | else if (argc >= 2 && strcmp (s1: argv[0], s2: "-t" ) == 0) |
733 | { |
734 | type = argv[1][0]; |
735 | if (strchr (s: "ltb" , c: type) == 0) |
736 | usage (); |
737 | argv += 2; |
738 | argc -= 2; |
739 | } |
740 | else |
741 | usage (); |
742 | } |
743 | if (gen == 't') |
744 | { |
745 | readtrace (name: tracefile); |
746 | lo = hi = 0; |
747 | usergen = 1; |
748 | } |
749 | while (argc > 0) |
750 | { |
751 | int found = 0; |
752 | all = strcmp (s1: argv[0], s2: "all" ) == 0; |
753 | for (const struct fun *f = funtab; f->name; f++) |
754 | if (all || strcmp (s1: argv[0], s2: f->name) == 0) |
755 | { |
756 | found = 1; |
757 | if (!usergen) |
758 | { |
759 | lo = f->lo; |
760 | hi = f->hi; |
761 | } |
762 | bench (f, lo, hi, type, gen); |
763 | if (usergen && !all) |
764 | break; |
765 | } |
766 | if (!found) |
767 | printf (format: "unknown function: %s\n" , argv[0]); |
768 | argv++; |
769 | argc--; |
770 | } |
771 | return 0; |
772 | } |
773 | |