1 | /* |
2 | * Copyright (c) 2003, 2006 Matteo Frigo |
3 | * Copyright (c) 2003, 2006 Massachusetts Institute of Technology |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining |
6 | * a copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sublicense, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be |
14 | * included in all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
17 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
18 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
19 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
20 | * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
21 | * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
22 | * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | */ |
25 | |
26 | /* $Id: cycle.h,v 1.52 2006-02-08 02:36:47 athena Exp $ */ |
27 | |
28 | /* machine-dependent cycle counters code. Needs to be inlined. */ |
29 | |
30 | /***************************************************************************/ |
31 | /* To use the cycle counters in your code, simply #include "cycle.h" (this |
32 | file), and then use the functions/macros: |
33 | |
34 | CycleCounterTicks getticks(void); |
35 | |
36 | CycleCounterTicks is an opaque typedef defined below, representing the current time. |
37 | You extract the elapsed time between two calls to gettick() via: |
38 | |
39 | double elapsed(CycleCounterTicks t1, CycleCounterTicks t0); |
40 | |
41 | which returns a double-precision variable in arbitrary units. You |
42 | are not expected to convert this into human units like seconds; it |
43 | is intended only for *comparisons* of time intervals. |
44 | |
45 | (In order to use some of the OS-dependent timer routines like |
46 | Solaris' gethrtime, you need to paste the autoconf snippet below |
47 | into your configure.ac file and #include "config.h" before cycle.h, |
48 | or define the relevant macros manually if you are not using autoconf.) |
49 | */ |
50 | |
51 | /***************************************************************************/ |
52 | /* This file uses macros like HAVE_GETHRTIME that are assumed to be |
53 | defined according to whether the corresponding function/type/header |
54 | is available on your system. The necessary macros are most |
55 | conveniently defined if you are using GNU autoconf, via the tests: |
56 | |
57 | dnl --------------------------------------------------------------------- |
58 | |
59 | AC_C_INLINE |
60 | AC_HEADER_TIME |
61 | AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) |
62 | |
63 | AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H |
64 | #include <sys/time.h> |
65 | #endif]) |
66 | |
67 | AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) |
68 | |
69 | dnl Cray UNICOS _rtc() (real-time clock) intrinsic |
70 | AC_MSG_CHECKING([for _rtc intrinsic]) |
71 | rtc_ok=yes |
72 | AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H |
73 | #include <intrinsics.h> |
74 | #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) |
75 | AC_MSG_RESULT($rtc_ok) |
76 | |
77 | dnl --------------------------------------------------------------------- |
78 | */ |
79 | |
80 | /***************************************************************************/ |
81 | |
82 | #ifndef QBENCHLIB_CYCLE_H |
83 | #define QBENCHLIB_CYCLE_H |
84 | |
85 | #if TIME_WITH_SYS_TIME |
86 | # include <sys/time.h> |
87 | # include <time.h> |
88 | #else |
89 | # if HAVE_SYS_TIME_H |
90 | # include <sys/time.h> |
91 | # else |
92 | # include <time.h> |
93 | # endif |
94 | #endif |
95 | |
96 | #define INLINE_ELAPSED(INL) static INL double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) \ |
97 | { \ |
98 | return (double)(t1 - t0); \ |
99 | } |
100 | |
101 | /*----------------------------------------------------------------*/ |
102 | /* Solaris */ |
103 | #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) |
104 | typedef hrtime_t CycleCounterTicks; |
105 | |
106 | #define getticks gethrtime |
107 | |
108 | INLINE_ELAPSED(inline) |
109 | |
110 | #define HAVE_TICK_COUNTER |
111 | #endif |
112 | |
113 | /*----------------------------------------------------------------*/ |
114 | /* AIX v. 4+ routines to read the real-time clock or time-base register */ |
115 | #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) |
116 | typedef timebasestruct_t CycleCounterTicks; |
117 | |
118 | static inline CycleCounterTicks getticks(void) |
119 | { |
120 | CycleCounterTicks t; |
121 | read_real_time(&t, TIMEBASE_SZ); |
122 | return t; |
123 | } |
124 | |
125 | static inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) /* time in nanoseconds */ |
126 | { |
127 | time_base_to_time(&t1, TIMEBASE_SZ); |
128 | time_base_to_time(&t0, TIMEBASE_SZ); |
129 | return ((t1.tb_high - t0.tb_high) * 1e9 + (t1.tb_low - t0.tb_low)); |
130 | } |
131 | |
132 | #define HAVE_TICK_COUNTER |
133 | #endif |
134 | |
135 | /*----------------------------------------------------------------*/ |
136 | /* |
137 | * PowerPC ``cycle'' counter using the time base register. |
138 | */ |
139 | #if ((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh))) && !defined(HAVE_TICK_COUNTER) |
140 | typedef unsigned long long CycleCounterTicks; |
141 | |
142 | static __inline__ CycleCounterTicks getticks(void) |
143 | { |
144 | unsigned int tbl, tbu0, tbu1; |
145 | |
146 | do { |
147 | __asm__ __volatile__ ("mftbu %0" : "=r" (tbu0)); |
148 | __asm__ __volatile__ ("mftb %0" : "=r" (tbl)); |
149 | __asm__ __volatile__ ("mftbu %0" : "=r" (tbu1)); |
150 | } while (tbu0 != tbu1); |
151 | |
152 | return (((unsigned long long)tbu0) << 32) | tbl; |
153 | } |
154 | |
155 | INLINE_ELAPSED(__inline__) |
156 | |
157 | #define HAVE_TICK_COUNTER |
158 | #endif |
159 | |
160 | /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, |
161 | from Carbon, requires no additional libraries to be linked). */ |
162 | #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) |
163 | #include <mach/mach_time.h> |
164 | typedef uint64_t CycleCounterTicks; |
165 | #define getticks mach_absolute_time |
166 | INLINE_ELAPSED(__inline__) |
167 | #define HAVE_TICK_COUNTER |
168 | #endif |
169 | |
170 | /*----------------------------------------------------------------*/ |
171 | /* |
172 | * Pentium cycle counter |
173 | */ |
174 | #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) |
175 | typedef unsigned long long CycleCounterTicks; |
176 | |
177 | static __inline__ CycleCounterTicks getticks(void) |
178 | { |
179 | CycleCounterTicks ret; |
180 | |
181 | __asm__ __volatile__("rdtsc" : "=A" (ret)); |
182 | /* no input, nothing else clobbered */ |
183 | return ret; |
184 | } |
185 | |
186 | INLINE_ELAPSED(__inline__) |
187 | |
188 | #define HAVE_TICK_COUNTER |
189 | #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
190 | #endif |
191 | |
192 | /* Visual C++ -- thanks to Morten Nissov for his help with this */ |
193 | #if defined(_MSC_VER) |
194 | #if _MSC_VER >= 1200 && (_M_IX86 >= 500 || (defined(_WIN32_WCE) && defined(_X86_))) && !defined(HAVE_TICK_COUNTER) |
195 | #include <windows.h> |
196 | typedef LARGE_INTEGER CycleCounterTicks; |
197 | #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ |
198 | |
199 | static __inline CycleCounterTicks getticks(void) |
200 | { |
201 | CycleCounterTicks retval; |
202 | |
203 | __asm { |
204 | RDTSC |
205 | mov retval.HighPart, edx |
206 | mov retval.LowPart, eax |
207 | } |
208 | return retval; |
209 | } |
210 | |
211 | static __inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
212 | { |
213 | return (double)(t1.QuadPart - t0.QuadPart); |
214 | } |
215 | |
216 | #define HAVE_TICK_COUNTER |
217 | #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ |
218 | #endif |
219 | #endif |
220 | |
221 | #if _MSC_VER >= 1400 && defined(_WIN32_WCE) && !defined(HAVE_TICK_COUNTER) |
222 | #include <windows.h> |
223 | typedef DWORD CycleCounterTicks; |
224 | |
225 | static __inline CycleCounterTicks getticks(void) |
226 | { |
227 | return GetTickCount(); |
228 | } |
229 | |
230 | static __inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
231 | { |
232 | return (double)(t1 - t0); |
233 | } |
234 | |
235 | #define HAVE_TICK_COUNTER |
236 | #define TIME_MIN 5000.0 |
237 | #endif |
238 | |
239 | /*----------------------------------------------------------------*/ |
240 | /* |
241 | * X86-64 cycle counter |
242 | */ |
243 | #if (defined(__GNUC__) || defined(__ICC)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
244 | typedef unsigned long long CycleCounterTicks; |
245 | |
246 | static __inline__ CycleCounterTicks getticks(void) |
247 | { |
248 | unsigned a, d; |
249 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); |
250 | return ((CycleCounterTicks)a) | (((CycleCounterTicks)d) << 32); |
251 | } |
252 | |
253 | INLINE_ELAPSED(__inline__) |
254 | |
255 | #define HAVE_TICK_COUNTER |
256 | #endif |
257 | |
258 | /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. |
259 | NOTE: this code will fail to link unless you use the -Masmkeyword compiler |
260 | option (grrr). */ |
261 | #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) |
262 | typedef unsigned long long CycleCounterTicks; |
263 | static CycleCounterTicks getticks(void) |
264 | { |
265 | asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; " ); |
266 | } |
267 | INLINE_ELAPSED(__inline__) |
268 | #define HAVE_TICK_COUNTER |
269 | #endif |
270 | |
271 | /* Visual C++ */ |
272 | #if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) |
273 | #include <intrin.h> |
274 | |
275 | typedef unsigned __int64 CycleCounterTicks; |
276 | |
277 | #define getticks __rdtsc |
278 | |
279 | INLINE_ELAPSED(__inline) |
280 | |
281 | #define HAVE_TICK_COUNTER |
282 | #endif |
283 | |
284 | /*----------------------------------------------------------------*/ |
285 | /* |
286 | * IA64 cycle counter |
287 | */ |
288 | |
289 | /* intel's icc/ecc compiler */ |
290 | #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
291 | typedef unsigned long CycleCounterTicks; |
292 | #include <ia64intrin.h> |
293 | |
294 | static __inline__ CycleCounterTicks getticks(void) |
295 | { |
296 | return __getReg(_IA64_REG_AR_ITC); |
297 | } |
298 | |
299 | INLINE_ELAPSED(__inline__) |
300 | |
301 | #define HAVE_TICK_COUNTER |
302 | #endif |
303 | |
304 | /* gcc */ |
305 | #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) |
306 | typedef unsigned long CycleCounterTicks; |
307 | |
308 | static __inline__ CycleCounterTicks getticks(void) |
309 | { |
310 | CycleCounterTicks ret; |
311 | |
312 | __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret)); |
313 | return ret; |
314 | } |
315 | |
316 | INLINE_ELAPSED(__inline__) |
317 | |
318 | #define HAVE_TICK_COUNTER |
319 | #endif |
320 | |
321 | /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ |
322 | #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) |
323 | #include <machine/sys/inline.h> |
324 | typedef unsigned long CycleCounterTicks; |
325 | |
326 | static inline CycleCounterTicks getticks(void) |
327 | { |
328 | CycleCounterTicks ret; |
329 | |
330 | ret = _Asm_mov_from_ar (_AREG_ITC); |
331 | return ret; |
332 | } |
333 | |
334 | INLINE_ELAPSED(inline) |
335 | |
336 | #define HAVE_TICK_COUNTER |
337 | #endif |
338 | |
339 | /* Microsoft Visual C++ */ |
340 | #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) |
341 | typedef unsigned __int64 CycleCounterTicks; |
342 | |
343 | # ifdef __cplusplus |
344 | extern "C" |
345 | # endif |
346 | ticks __getReg(int whichReg); |
347 | #pragma intrinsic(__getReg) |
348 | |
349 | static __inline CycleCounterTicks getticks(void) |
350 | { |
351 | volatile CycleCounterTicks temp; |
352 | temp = __getReg(3116); |
353 | return temp; |
354 | } |
355 | |
356 | #define HAVE_TICK_COUNTER |
357 | #endif |
358 | |
359 | /*----------------------------------------------------------------*/ |
360 | /* |
361 | * PA-RISC cycle counter |
362 | */ |
363 | #if (defined(__hppa__) || defined(__hppa)) && !defined(HAVE_TICK_COUNTER) |
364 | typedef unsigned long CycleCounterTicks; |
365 | |
366 | # ifdef __GNUC__ |
367 | static __inline__ CycleCounterTicks getticks(void) |
368 | { |
369 | CycleCounterTicks ret; |
370 | |
371 | __asm__ __volatile__("mfctl 16, %0" : "=r" (ret)); |
372 | /* no input, nothing else clobbered */ |
373 | return ret; |
374 | } |
375 | |
376 | INLINE_ELAPSED(inline) |
377 | |
378 | #define HAVE_TICK_COUNTER |
379 | |
380 | # elif 0 // Doesn't compile |
381 | # include <machine/inline.h> |
382 | static inline unsigned long getticks(void) |
383 | { |
384 | register CycleCounterTicks ret; |
385 | _MFCTL(16, ret); |
386 | return ret; |
387 | } |
388 | # endif |
389 | |
390 | #endif |
391 | |
392 | /*----------------------------------------------------------------*/ |
393 | /* S390, courtesy of James Treacy */ |
394 | #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) |
395 | typedef unsigned long long CycleCounterTicks; |
396 | |
397 | static __inline__ CycleCounterTicks getticks(void) |
398 | { |
399 | CycleCounterTicks cycles; |
400 | __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory" , "cc" ); |
401 | return cycles; |
402 | } |
403 | |
404 | INLINE_ELAPSED(__inline__) |
405 | |
406 | #define HAVE_TICK_COUNTER |
407 | #endif |
408 | /*----------------------------------------------------------------*/ |
409 | #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) |
410 | /* |
411 | * The 32-bit cycle counter on alpha overflows pretty quickly, |
412 | * unfortunately. A 1GHz machine overflows in 4 seconds. |
413 | */ |
414 | typedef unsigned int CycleCounterTicks; |
415 | |
416 | static __inline__ CycleCounterTicks getticks(void) |
417 | { |
418 | unsigned long cc; |
419 | __asm__ __volatile__ ("rpcc %0" : "=r" (cc)); |
420 | return (cc & 0xFFFFFFFF); |
421 | } |
422 | |
423 | INLINE_ELAPSED(__inline__) |
424 | |
425 | #define HAVE_TICK_COUNTER |
426 | #endif |
427 | |
428 | /*----------------------------------------------------------------*/ |
429 | #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) |
430 | typedef unsigned long CycleCounterTicks; |
431 | |
432 | static __inline__ CycleCounterTicks getticks(void) |
433 | { |
434 | CycleCounterTicks ret; |
435 | __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); |
436 | return ret; |
437 | } |
438 | |
439 | INLINE_ELAPSED(__inline__) |
440 | |
441 | #define HAVE_TICK_COUNTER |
442 | #endif |
443 | |
444 | /*----------------------------------------------------------------*/ |
445 | #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) |
446 | # include <c_asm.h> |
447 | typedef unsigned int CycleCounterTicks; |
448 | |
449 | static __inline CycleCounterTicks getticks(void) |
450 | { |
451 | unsigned long cc; |
452 | cc = asm("rpcc %v0" ); |
453 | return (cc & 0xFFFFFFFF); |
454 | } |
455 | |
456 | INLINE_ELAPSED(__inline) |
457 | |
458 | #define HAVE_TICK_COUNTER |
459 | #endif |
460 | /*----------------------------------------------------------------*/ |
461 | /* SGI/Irix */ |
462 | #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) |
463 | typedef struct timespec CycleCounterTicks; |
464 | |
465 | static inline CycleCounterTicks getticks(void) |
466 | { |
467 | struct timespec t; |
468 | clock_gettime(CLOCK_SGI_CYCLE, &t); |
469 | return t; |
470 | } |
471 | |
472 | static inline double elapsed(CycleCounterTicks t1, CycleCounterTicks t0) |
473 | { |
474 | return (double)(t1.tv_sec - t0.tv_sec) * 1.0E9 + |
475 | (double)(t1.tv_nsec - t0.tv_nsec); |
476 | } |
477 | #define HAVE_TICK_COUNTER |
478 | #endif |
479 | |
480 | /*----------------------------------------------------------------*/ |
481 | /* Cray UNICOS _rtc() intrinsic function */ |
482 | #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) |
483 | #ifdef HAVE_INTRINSICS_H |
484 | # include <intrinsics.h> |
485 | #endif |
486 | |
487 | typedef long long CycleCounterTicks; |
488 | |
489 | #define getticks _rtc |
490 | |
491 | INLINE_ELAPSED(inline) |
492 | |
493 | #define HAVE_TICK_COUNTER |
494 | #endif |
495 | |
496 | #endif // QBENCHLIB_CYCLE_H |
497 | |