1 | /* |
2 | * Copyright (c) 2003, 2007-14 Matteo Frigo |
3 | * Copyright (c) 2003, 2007-14 Massachusetts Institute of Technology |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining |
6 | * a copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sublicense, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be |
14 | * included in all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
19 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
20 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
21 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
22 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | */ |
25 | |
26 | |
27 | /* machine-dependent cycle counters code. Needs to be inlined. */ |
28 | |
29 | /***************************************************************************/ |
30 | /* To use the cycle counters in your code, simply #include "cycle.h" (this |
31 | file), and then use the functions/macros: |
32 | |
33 | ticks getticks(void); |
34 | |
35 | ticks is an opaque typedef defined below, representing the current time. |
36 | You extract the elapsed time between two calls to gettick() via: |
37 | |
38 | double elapsed(ticks t1, ticks t0); |
39 | |
40 | which returns a double-precision variable in arbitrary units. You |
41 | are not expected to convert this into human units like seconds; it |
42 | is intended only for *comparisons* of time intervals. |
43 | |
44 | (In order to use some of the OS-dependent timer routines like |
45 | Solaris' gethrtime, you need to paste the autoconf snippet below |
46 | into your configure.ac file and #include "config.h" before cycle.h, |
47 | or define the relevant macros manually if you are not using autoconf.) |
48 | */ |
49 | |
50 | /***************************************************************************/ |
51 | /* This file uses macros like HAVE_GETHRTIME that are assumed to be |
52 | defined according to whether the corresponding function/type/header |
53 | is available on your system. The necessary macros are most |
54 | conveniently defined if you are using GNU autoconf, via the tests: |
55 | |
56 | dnl --------------------------------------------------------------------- |
57 | |
58 | AC_C_INLINE |
59 | AC_HEADER_TIME |
60 | AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) |
61 | |
62 | AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H |
63 | #include <sys/time.h> |
64 | #endif]) |
65 | |
66 | AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) |
67 | |
68 | dnl Cray UNICOS _rtc() (real-time clock) intrinsic |
69 | AC_MSG_CHECKING([for _rtc intrinsic]) |
70 | rtc_ok=yes |
71 | AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H |
72 | #include <intrinsics.h> |
73 | #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) |
74 | AC_MSG_RESULT($rtc_ok) |
75 | |
76 | dnl --------------------------------------------------------------------- |
77 | */ |
78 | |
79 | #ifndef QBENCHLIB_CYCLE_H |
80 | #define QBENCHLIB_CYCLE_H |
81 | #define ticks CycleCounterTicks |
82 | |
83 | /***************************************************************************/ |
84 | |
85 | #if TIME_WITH_SYS_TIME |
86 | # include <sys/time.h> |
87 | # include <time.h> |
88 | #else |
89 | # if HAVE_SYS_TIME_H |
90 | # include <sys/time.h> |
91 | # else |
92 | # include <time.h> |
93 | # endif |
94 | #endif |
95 | |
96 | #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ |
97 | { \ |
98 | return (double)t1 - (double)t0; \ |
99 | } |
100 | |
101 | /*----------------------------------------------------------------*/ |
102 | /* Solaris */ |
103 | #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) |
104 | typedef hrtime_t ticks; |
105 | |
106 | #define getticks gethrtime |
107 | |
108 | INLINE_ELAPSED(inline) |
109 | |
110 | #define HAVE_TICK_COUNTER |
111 | #endif |
112 | |
113 | /*----------------------------------------------------------------*/ |
114 | /* AIX v. 4+ routines to read the real-time clock or time-base register */ |
115 | #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) |
116 | typedef timebasestruct_t ticks; |
117 | |
118 | static __inline ticks getticks(void) |
119 | { |
120 | ticks t; |
121 | read_real_time(&t, TIMEBASE_SZ); |
122 | return t; |
123 | } |
124 | |
125 | static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ |
126 | { |
127 | time_base_to_time(&t1, TIMEBASE_SZ); |
128 | time_base_to_time(&t0, TIMEBASE_SZ); |
129 | return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + |
130 | ((double)t1.tb_low - (double)t0.tb_low)); |
131 | } |
132 | |
133 | #define HAVE_TICK_COUNTER |
134 | #endif |
135 | |
136 | /*----------------------------------------------------------------*/ |
137 | /* |
138 | * PowerPC ``cycle'' counter using the time base register. |
139 | */ |
140 | #if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER) |
141 | typedef unsigned long long ticks; |
142 | |
143 | static __inline__ ticks getticks(void) |
144 | { |
145 | unsigned int tbl, tbu0, tbu1; |
146 | |
147 | do { |
148 | __asm__ __volatile__ ("mftbu %0" : "=r" (tbu0)); |
149 | __asm__ __volatile__ ("mftb %0" : "=r" (tbl)); |
150 | __asm__ __volatile__ ("mftbu %0" : "=r" (tbu1)); |
151 | } while (tbu0 != tbu1); |
152 | |
153 | return (((unsigned long long)tbu0) << 32) | tbl; |
154 | } |
155 | |
156 | INLINE_ELAPSED(__inline__) |
157 | |
158 | #define HAVE_TICK_COUNTER |
159 | #endif |
160 | |
161 | /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, |
162 | from Carbon, requires no additional libraries to be linked). */ |
163 | #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) |
164 | #include <mach/mach_time.h> |
165 | typedef uint64_t ticks; |
166 | #define getticks mach_absolute_time |
167 | INLINE_ELAPSED(__inline__) |
168 | #define HAVE_TICK_COUNTER |
169 | #endif |
170 | |
171 | /*----------------------------------------------------------------*/ |
172 | /* |
173 | * Pentium cycle counter |
174 | */ |
175 | #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) |
176 | typedef unsigned long long ticks; |
177 | |
178 | static __inline__ ticks getticks(void) |
179 | { |
180 | ticks ret; |
181 | |
182 | __asm__ __volatile__("rdtsc" : "=A" (ret)); |
183 | /* no input, nothing else clobbered */ |
184 | return ret; |
185 | } |
186 | |
187 | INLINE_ELAPSED(__inline__) |
188 | |
189 | #define HAVE_TICK_COUNTER |
190 | #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
191 | #endif |
192 | |
193 | /* Visual C++ -- thanks to Morten Nissov for his help with this */ |
194 | #ifdef _MSC_VER |
195 | #if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) |
196 | #include <windows.h> |
197 | typedef LARGE_INTEGER ticks; |
198 | #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ |
199 | |
200 | static __inline ticks getticks(void) |
201 | { |
202 | ticks retval; |
203 | |
204 | __asm { |
205 | RDTSC |
206 | mov retval.HighPart, edx |
207 | mov retval.LowPart, eax |
208 | } |
209 | return retval; |
210 | } |
211 | |
212 | static __inline double elapsed(ticks t1, ticks t0) |
213 | { |
214 | return (double)t1.QuadPart - (double)t0.QuadPart; |
215 | } |
216 | |
217 | #define HAVE_TICK_COUNTER |
218 | #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
219 | #endif |
220 | #endif // _MSC_VER |
221 | |
222 | /*----------------------------------------------------------------*/ |
223 | /* |
224 | * X86-64 cycle counter |
225 | */ |
226 | #if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
227 | typedef unsigned long long ticks; |
228 | |
229 | static __inline__ ticks getticks(void) |
230 | { |
231 | unsigned a, d; |
232 | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); |
233 | return ((ticks)a) | (((ticks)d) << 32); |
234 | } |
235 | |
236 | INLINE_ELAPSED(__inline__) |
237 | |
238 | #define HAVE_TICK_COUNTER |
239 | #define TIME_MIN 5000.0 |
240 | #endif |
241 | |
242 | /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. |
243 | NOTE: this code will fail to link unless you use the -Masmkeyword compiler |
244 | option (grrr). */ |
245 | #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
246 | typedef unsigned long long ticks; |
247 | static ticks getticks(void) |
248 | { |
249 | asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; " ); |
250 | } |
251 | INLINE_ELAPSED(__inline__) |
252 | #define HAVE_TICK_COUNTER |
253 | #define TIME_MIN 5000.0 |
254 | #endif |
255 | |
256 | /* Visual C++, courtesy of Dirk Michaelis */ |
257 | #ifdef _MSC_VER |
258 | #if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) |
259 | |
260 | #include <intrin.h> |
261 | #pragma intrinsic(__rdtsc) |
262 | typedef unsigned __int64 ticks; |
263 | #define getticks __rdtsc |
264 | INLINE_ELAPSED(__inline) |
265 | |
266 | #define HAVE_TICK_COUNTER |
267 | #define TIME_MIN 5000.0 |
268 | #endif |
269 | #endif // _MSC_VER |
270 | |
271 | /*----------------------------------------------------------------*/ |
272 | /* |
273 | * IA64 cycle counter |
274 | */ |
275 | |
276 | /* intel's icc/ecc compiler */ |
277 | #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
278 | typedef unsigned long ticks; |
279 | #include <ia64intrin.h> |
280 | |
281 | static __inline__ ticks getticks(void) |
282 | { |
283 | return __getReg(_IA64_REG_AR_ITC); |
284 | } |
285 | |
286 | INLINE_ELAPSED(__inline__) |
287 | |
288 | #define HAVE_TICK_COUNTER |
289 | #endif |
290 | |
291 | /* gcc */ |
292 | #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
293 | typedef unsigned long ticks; |
294 | |
295 | static __inline__ ticks getticks(void) |
296 | { |
297 | ticks ret; |
298 | |
299 | __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret)); |
300 | return ret; |
301 | } |
302 | |
303 | INLINE_ELAPSED(__inline__) |
304 | |
305 | #define HAVE_TICK_COUNTER |
306 | #endif |
307 | |
308 | /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ |
309 | #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) |
310 | #include <machine/sys/inline.h> |
311 | typedef unsigned long ticks; |
312 | |
313 | static inline ticks getticks(void) |
314 | { |
315 | ticks ret; |
316 | |
317 | ret = _Asm_mov_from_ar (_AREG_ITC); |
318 | return ret; |
319 | } |
320 | |
321 | INLINE_ELAPSED(inline) |
322 | |
323 | #define HAVE_TICK_COUNTER |
324 | #endif |
325 | |
326 | /* Microsoft Visual C++ */ |
327 | #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) |
328 | typedef unsigned __int64 ticks; |
329 | |
330 | # ifdef __cplusplus |
331 | extern "C" |
332 | # endif |
333 | ticks __getReg(int whichReg); |
334 | #pragma intrinsic(__getReg) |
335 | |
336 | static __inline ticks getticks(void) |
337 | { |
338 | volatile ticks temp; |
339 | temp = __getReg(3116); |
340 | return temp; |
341 | } |
342 | |
343 | INLINE_ELAPSED(inline) |
344 | |
345 | #define HAVE_TICK_COUNTER |
346 | #endif |
347 | |
348 | /*----------------------------------------------------------------*/ |
349 | /* |
350 | * PA-RISC cycle counter |
351 | */ |
352 | #if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER) |
353 | typedef unsigned long ticks; |
354 | |
355 | # ifdef __GNUC__ |
356 | static __inline__ ticks getticks(void) |
357 | { |
358 | ticks ret; |
359 | |
360 | __asm__ __volatile__("mfctl 16, %0" : "=r" (ret)); |
361 | /* no input, nothing else clobbered */ |
362 | return ret; |
363 | } |
364 | |
365 | INLINE_ELAPSED(inline) |
366 | |
367 | #define HAVE_TICK_COUNTER |
368 | # elif 0 // Doesn't compile |
369 | # include <machine/inline.h> |
370 | static inline unsigned long getticks(void) |
371 | { |
372 | register ticks ret; |
373 | _MFCTL(16, ret); |
374 | return ret; |
375 | } |
376 | # endif |
377 | #endif |
378 | |
379 | /*----------------------------------------------------------------*/ |
380 | /* S390, courtesy of James Treacy */ |
381 | #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) |
382 | typedef unsigned long long ticks; |
383 | |
384 | static __inline__ ticks getticks(void) |
385 | { |
386 | ticks cycles; |
387 | __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory" , "cc" ); |
388 | return cycles; |
389 | } |
390 | |
391 | INLINE_ELAPSED(__inline__) |
392 | |
393 | #define HAVE_TICK_COUNTER |
394 | #endif |
395 | /*----------------------------------------------------------------*/ |
396 | #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) |
397 | /* |
398 | * The 32-bit cycle counter on alpha overflows pretty quickly, |
399 | * unfortunately. A 1GHz machine overflows in 4 seconds. |
400 | */ |
401 | typedef unsigned int ticks; |
402 | |
403 | static __inline__ ticks getticks(void) |
404 | { |
405 | unsigned long cc; |
406 | __asm__ __volatile__ ("rpcc %0" : "=r" (cc)); |
407 | return (cc & 0xFFFFFFFF); |
408 | } |
409 | |
410 | INLINE_ELAPSED(__inline__) |
411 | |
412 | #define HAVE_TICK_COUNTER |
413 | #endif |
414 | |
415 | /*----------------------------------------------------------------*/ |
416 | #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) |
417 | typedef unsigned long ticks; |
418 | |
419 | static __inline__ ticks getticks(void) |
420 | { |
421 | ticks ret; |
422 | __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); |
423 | return ret; |
424 | } |
425 | |
426 | INLINE_ELAPSED(__inline__) |
427 | |
428 | #define HAVE_TICK_COUNTER |
429 | #endif |
430 | |
431 | /*----------------------------------------------------------------*/ |
432 | #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) |
433 | # include <c_asm.h> |
434 | typedef unsigned int ticks; |
435 | |
436 | static __inline ticks getticks(void) |
437 | { |
438 | unsigned long cc; |
439 | cc = asm("rpcc %v0" ); |
440 | return (cc & 0xFFFFFFFF); |
441 | } |
442 | |
443 | INLINE_ELAPSED(__inline) |
444 | |
445 | #define HAVE_TICK_COUNTER |
446 | #endif |
447 | /*----------------------------------------------------------------*/ |
448 | /* SGI/Irix */ |
449 | #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) && !defined(__ANDROID__) |
450 | typedef struct timespec ticks; |
451 | |
452 | static inline ticks getticks(void) |
453 | { |
454 | struct timespec t; |
455 | clock_gettime(CLOCK_SGI_CYCLE, &t); |
456 | return t; |
457 | } |
458 | |
459 | static inline double elapsed(ticks t1, ticks t0) |
460 | { |
461 | return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 + |
462 | ((double)t1.tv_nsec - (double)t0.tv_nsec); |
463 | } |
464 | #define HAVE_TICK_COUNTER |
465 | #endif |
466 | |
467 | /*----------------------------------------------------------------*/ |
468 | /* Cray UNICOS _rtc() intrinsic function */ |
469 | #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) |
470 | #ifdef HAVE_INTRINSICS_H |
471 | # include <intrinsics.h> |
472 | #endif |
473 | |
474 | typedef long long ticks; |
475 | |
476 | #define getticks _rtc |
477 | |
478 | INLINE_ELAPSED(inline) |
479 | |
480 | #define HAVE_TICK_COUNTER |
481 | #endif |
482 | |
483 | /*----------------------------------------------------------------*/ |
484 | /* MIPS ZBus */ |
485 | #if HAVE_MIPS_ZBUS_TIMER |
486 | #if defined(__mips__) && !defined(HAVE_TICK_COUNTER) |
487 | #include <sys/mman.h> |
488 | #include <unistd.h> |
489 | #include <fcntl.h> |
490 | |
491 | typedef uint64_t ticks; |
492 | |
493 | static inline ticks getticks(void) |
494 | { |
495 | static uint64_t* addr = 0; |
496 | |
497 | if (addr == 0) |
498 | { |
499 | uint32_t rq_addr = 0x10030000; |
500 | int fd; |
501 | int pgsize; |
502 | |
503 | pgsize = getpagesize(); |
504 | fd = open ("/dev/mem" , O_RDONLY | O_SYNC, 0); |
505 | if (fd < 0) { |
506 | perror("open" ); |
507 | return NULL; |
508 | } |
509 | addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); |
510 | close(fd); |
511 | if (addr == (uint64_t *)-1) { |
512 | perror("mmap" ); |
513 | return NULL; |
514 | } |
515 | } |
516 | |
517 | return *addr; |
518 | } |
519 | |
520 | INLINE_ELAPSED(inline) |
521 | |
522 | #define HAVE_TICK_COUNTER |
523 | #endif |
524 | #endif /* HAVE_MIPS_ZBUS_TIMER */ |
525 | |
526 | #if defined(HAVE_ARMV7A_CNTVCT) |
527 | typedef uint64_t ticks; |
528 | static inline ticks getticks(void) |
529 | { |
530 | uint32_t Rt, Rt2 = 0; |
531 | asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (Rt), "=r" (Rt2)); |
532 | return ((uint64_t)Rt) | (((uint64_t)Rt2) << 32); |
533 | } |
534 | INLINE_ELAPSED(inline) |
535 | #define HAVE_TICK_COUNTER |
536 | #endif |
537 | |
538 | #if defined(HAVE_ARMV7A_PMCCNTR) |
539 | typedef uint64_t ticks; |
540 | static inline ticks getticks(void) |
541 | { |
542 | uint32_t r; |
543 | asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (r) ); |
544 | return r; |
545 | } |
546 | INLINE_ELAPSED(inline) |
547 | #define HAVE_TICK_COUNTER |
548 | #endif |
549 | |
550 | #if defined(__aarch64__) && defined(HAVE_ARMV8_CNTVCT_EL0) && !defined(HAVE_ARMV8_PMCCNTR_EL0) |
551 | typedef uint64_t ticks; |
552 | static inline ticks getticks(void) |
553 | { |
554 | uint64_t Rt; |
555 | asm volatile("mrs %0, CNTVCT_EL0" : "=r" (Rt)); |
556 | return Rt; |
557 | } |
558 | INLINE_ELAPSED(inline) |
559 | #define HAVE_TICK_COUNTER |
560 | #endif |
561 | |
562 | #if defined(__aarch64__) && defined(HAVE_ARMV8_PMCCNTR_EL0) |
563 | typedef uint64_t ticks; |
564 | static inline ticks getticks(void) |
565 | { |
566 | uint64_t cc = 0; |
567 | asm volatile("mrs %0, PMCCNTR_EL0" : "=r" (cc)); |
568 | return cc; |
569 | } |
570 | INLINE_ELAPSED(inline) |
571 | #define HAVE_TICK_COUNTER |
572 | #endif |
573 | |
574 | #undef ticks |
575 | #endif // QBENCHLIB_CYCLE_H |
576 | |