1 | // Copyright 2015 Google Inc. All rights reserved. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #if defined(_MSC_VER) |
16 | // FIXME: This must be defined before any other includes to disable deprecation |
17 | // warnings for use of codecvt from C++17. We should remove our reliance on |
18 | // the deprecated functionality instead. |
19 | #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING |
20 | #endif |
21 | |
22 | #include "internal_macros.h" |
23 | |
24 | #ifdef BENCHMARK_OS_WINDOWS |
25 | #if !defined(WINVER) || WINVER < 0x0600 |
26 | #undef WINVER |
27 | #define WINVER 0x0600 |
28 | #endif // WINVER handling |
29 | #include <shlwapi.h> |
30 | #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA |
31 | #include <versionhelpers.h> |
32 | #include <windows.h> |
33 | |
34 | #include <codecvt> |
35 | #else |
36 | #include <fcntl.h> |
37 | #if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) |
38 | #include <sys/resource.h> |
39 | #endif |
40 | #include <sys/time.h> |
41 | #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD |
42 | #include <unistd.h> |
43 | #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \ |
44 | defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD || \ |
45 | defined BENCHMARK_OS_DRAGONFLY |
46 | #define BENCHMARK_HAS_SYSCTL |
47 | #include <sys/sysctl.h> |
48 | #endif |
49 | #endif |
50 | #if defined(BENCHMARK_OS_SOLARIS) |
51 | #include <kstat.h> |
52 | #include <netdb.h> |
53 | #endif |
54 | #if defined(BENCHMARK_OS_QNX) |
55 | #include <sys/syspage.h> |
56 | #endif |
57 | #if defined(BENCHMARK_OS_QURT) |
58 | #include <qurt.h> |
59 | #endif |
60 | #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) |
61 | #include <pthread.h> |
62 | #endif |
63 | |
64 | #include <algorithm> |
65 | #include <array> |
66 | #include <bitset> |
67 | #include <cerrno> |
68 | #include <climits> |
69 | #include <cstdint> |
70 | #include <cstdio> |
71 | #include <cstdlib> |
72 | #include <cstring> |
73 | #include <fstream> |
74 | #include <iostream> |
75 | #include <iterator> |
76 | #include <limits> |
77 | #include <locale> |
78 | #include <memory> |
79 | #include <random> |
80 | #include <sstream> |
81 | #include <utility> |
82 | |
83 | #include "benchmark/benchmark.h" |
84 | #include "check.h" |
85 | #include "cycleclock.h" |
86 | #include "internal_macros.h" |
87 | #include "log.h" |
88 | #include "string_util.h" |
89 | #include "timers.h" |
90 | |
91 | namespace benchmark { |
92 | namespace { |
93 | |
94 | void PrintImp(std::ostream& out) { out << std::endl; } |
95 | |
96 | template <class First, class... Rest> |
97 | void PrintImp(std::ostream& out, First&& f, Rest&&... rest) { |
98 | out << std::forward<First>(f); |
99 | PrintImp(out, std::forward<Rest>(rest)...); |
100 | } |
101 | |
102 | template <class... Args> |
103 | BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) { |
104 | PrintImp(std::cerr, std::forward<Args>(args)...); |
105 | std::exit(EXIT_FAILURE); |
106 | } |
107 | |
108 | #ifdef BENCHMARK_HAS_SYSCTL |
109 | |
110 | /// ValueUnion - A type used to correctly alias the byte-for-byte output of |
111 | /// `sysctl` with the result type it's to be interpreted as. |
112 | struct ValueUnion { |
113 | union DataT { |
114 | int32_t int32_value; |
115 | int64_t int64_value; |
116 | // For correct aliasing of union members from bytes. |
117 | char bytes[8]; |
118 | }; |
119 | using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>; |
120 | |
121 | // The size of the data union member + its trailing array size. |
122 | std::size_t size; |
123 | DataPtr buff; |
124 | |
125 | public: |
126 | ValueUnion() : size(0), buff(nullptr, &std::free) {} |
127 | |
128 | explicit ValueUnion(std::size_t buff_size) |
129 | : size(sizeof(DataT) + buff_size), |
130 | buff(::new (std::malloc(size)) DataT(), &std::free) {} |
131 | |
132 | ValueUnion(ValueUnion&& other) = default; |
133 | |
134 | explicit operator bool() const { return bool(buff); } |
135 | |
136 | char* data() const { return buff->bytes; } |
137 | |
138 | std::string GetAsString() const { return std::string(data()); } |
139 | |
140 | int64_t GetAsInteger() const { |
141 | if (size == sizeof(buff->int32_value)) |
142 | return buff->int32_value; |
143 | else if (size == sizeof(buff->int64_value)) |
144 | return buff->int64_value; |
145 | BENCHMARK_UNREACHABLE(); |
146 | } |
147 | |
148 | template <class T, int N> |
149 | std::array<T, N> GetAsArray() { |
150 | const int arr_size = sizeof(T) * N; |
151 | BM_CHECK_LE(arr_size, size); |
152 | std::array<T, N> arr; |
153 | std::memcpy(arr.data(), data(), arr_size); |
154 | return arr; |
155 | } |
156 | }; |
157 | |
158 | ValueUnion GetSysctlImp(std::string const& name) { |
159 | #if defined BENCHMARK_OS_OPENBSD |
160 | int mib[2]; |
161 | |
162 | mib[0] = CTL_HW; |
163 | if ((name == "hw.ncpu" ) || (name == "hw.cpuspeed" )) { |
164 | ValueUnion buff(sizeof(int)); |
165 | |
166 | if (name == "hw.ncpu" ) { |
167 | mib[1] = HW_NCPU; |
168 | } else { |
169 | mib[1] = HW_CPUSPEED; |
170 | } |
171 | |
172 | if (sysctl(mib, 2, buff.data(), &buff.Size, nullptr, 0) == -1) { |
173 | return ValueUnion(); |
174 | } |
175 | return buff; |
176 | } |
177 | return ValueUnion(); |
178 | #else |
179 | std::size_t cur_buff_size = 0; |
180 | if (sysctlbyname(name.c_str(), nullptr, &cur_buff_size, nullptr, 0) == -1) |
181 | return ValueUnion(); |
182 | |
183 | ValueUnion buff(cur_buff_size); |
184 | if (sysctlbyname(name.c_str(), buff.data(), &buff.size, nullptr, 0) == 0) |
185 | return buff; |
186 | return ValueUnion(); |
187 | #endif |
188 | } |
189 | |
190 | BENCHMARK_MAYBE_UNUSED |
191 | bool GetSysctl(std::string const& name, std::string* out) { |
192 | out->clear(); |
193 | auto buff = GetSysctlImp(name); |
194 | if (!buff) return false; |
195 | out->assign(buff.data()); |
196 | return true; |
197 | } |
198 | |
199 | template <class Tp, |
200 | class = typename std::enable_if<std::is_integral<Tp>::value>::type> |
201 | bool GetSysctl(std::string const& name, Tp* out) { |
202 | *out = 0; |
203 | auto buff = GetSysctlImp(name); |
204 | if (!buff) return false; |
205 | *out = static_cast<Tp>(buff.GetAsInteger()); |
206 | return true; |
207 | } |
208 | |
209 | template <class Tp, size_t N> |
210 | bool GetSysctl(std::string const& name, std::array<Tp, N>* out) { |
211 | auto buff = GetSysctlImp(name); |
212 | if (!buff) return false; |
213 | *out = buff.GetAsArray<Tp, N>(); |
214 | return true; |
215 | } |
216 | #endif |
217 | |
218 | template <class ArgT> |
219 | bool ReadFromFile(std::string const& fname, ArgT* arg) { |
220 | *arg = ArgT(); |
221 | std::ifstream f(fname.c_str()); |
222 | if (!f.is_open()) return false; |
223 | f >> *arg; |
224 | return f.good(); |
225 | } |
226 | |
227 | CPUInfo::Scaling CpuScaling(int num_cpus) { |
228 | // We don't have a valid CPU count, so don't even bother. |
229 | if (num_cpus <= 0) return CPUInfo::Scaling::UNKNOWN; |
230 | #if defined(BENCHMARK_OS_QNX) |
231 | return CPUInfo::Scaling::UNKNOWN; |
232 | #elif !defined(BENCHMARK_OS_WINDOWS) |
233 | // On Linux, the CPUfreq subsystem exposes CPU information as files on the |
234 | // local file system. If reading the exported files fails, then we may not be |
235 | // running on Linux, so we silently ignore all the read errors. |
236 | std::string res; |
237 | for (int cpu = 0; cpu < num_cpus; ++cpu) { |
238 | std::string governor_file = |
239 | StrCat(args: "/sys/devices/system/cpu/cpu" , args&: cpu, args: "/cpufreq/scaling_governor" ); |
240 | if (ReadFromFile(fname: governor_file, arg: &res) && res != "performance" ) |
241 | return CPUInfo::Scaling::ENABLED; |
242 | } |
243 | return CPUInfo::Scaling::DISABLED; |
244 | #else |
245 | return CPUInfo::Scaling::UNKNOWN; |
246 | #endif |
247 | } |
248 | |
249 | int CountSetBitsInCPUMap(std::string val) { |
250 | auto CountBits = [](std::string part) { |
251 | using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>; |
252 | part = "0x" + part; |
253 | CPUMask mask(benchmark::stoul(str: part, idx: nullptr, base: 16)); |
254 | return static_cast<int>(mask.count()); |
255 | }; |
256 | std::size_t pos; |
257 | int total = 0; |
258 | while ((pos = val.find(c: ',')) != std::string::npos) { |
259 | total += CountBits(val.substr(pos: 0, n: pos)); |
260 | val = val.substr(pos: pos + 1); |
261 | } |
262 | if (!val.empty()) { |
263 | total += CountBits(val); |
264 | } |
265 | return total; |
266 | } |
267 | |
268 | BENCHMARK_MAYBE_UNUSED |
269 | std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() { |
270 | std::vector<CPUInfo::CacheInfo> res; |
271 | std::string dir = "/sys/devices/system/cpu/cpu0/cache/" ; |
272 | int idx = 0; |
273 | while (true) { |
274 | CPUInfo::CacheInfo info; |
275 | std::string fpath = StrCat(args&: dir, args: "index" , args: idx++, args: "/" ); |
276 | std::ifstream f(StrCat(args&: fpath, args: "size" ).c_str()); |
277 | if (!f.is_open()) break; |
278 | std::string suffix; |
279 | f >> info.size; |
280 | if (f.fail()) |
281 | PrintErrorAndDie(args: "Failed while reading file '" , args&: fpath, args: "size'" ); |
282 | if (f.good()) { |
283 | f >> suffix; |
284 | if (f.bad()) |
285 | PrintErrorAndDie( |
286 | args: "Invalid cache size format: failed to read size suffix" ); |
287 | else if (f && suffix != "K" ) |
288 | PrintErrorAndDie(args: "Invalid cache size format: Expected bytes " , args&: suffix); |
289 | else if (suffix == "K" ) |
290 | info.size *= 1024; |
291 | } |
292 | if (!ReadFromFile(fname: StrCat(args&: fpath, args: "type" ), arg: &info.type)) |
293 | PrintErrorAndDie(args: "Failed to read from file " , args&: fpath, args: "type" ); |
294 | if (!ReadFromFile(fname: StrCat(args&: fpath, args: "level" ), arg: &info.level)) |
295 | PrintErrorAndDie(args: "Failed to read from file " , args&: fpath, args: "level" ); |
296 | std::string map_str; |
297 | if (!ReadFromFile(fname: StrCat(args&: fpath, args: "shared_cpu_map" ), arg: &map_str)) |
298 | PrintErrorAndDie(args: "Failed to read from file " , args&: fpath, args: "shared_cpu_map" ); |
299 | info.num_sharing = CountSetBitsInCPUMap(val: map_str); |
300 | res.push_back(x: info); |
301 | } |
302 | |
303 | return res; |
304 | } |
305 | |
306 | #ifdef BENCHMARK_OS_MACOSX |
307 | std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() { |
308 | std::vector<CPUInfo::CacheInfo> res; |
309 | std::array<int, 4> cache_counts{{0, 0, 0, 0}}; |
310 | GetSysctl("hw.cacheconfig" , &cache_counts); |
311 | |
312 | struct { |
313 | std::string name; |
314 | std::string type; |
315 | int level; |
316 | int num_sharing; |
317 | } cases[] = {{"hw.l1dcachesize" , "Data" , 1, cache_counts[1]}, |
318 | {"hw.l1icachesize" , "Instruction" , 1, cache_counts[1]}, |
319 | {"hw.l2cachesize" , "Unified" , 2, cache_counts[2]}, |
320 | {"hw.l3cachesize" , "Unified" , 3, cache_counts[3]}}; |
321 | for (auto& c : cases) { |
322 | int val; |
323 | if (!GetSysctl(c.name, &val)) continue; |
324 | CPUInfo::CacheInfo info; |
325 | info.type = c.type; |
326 | info.level = c.level; |
327 | info.size = val; |
328 | info.num_sharing = c.num_sharing; |
329 | res.push_back(std::move(info)); |
330 | } |
331 | return res; |
332 | } |
333 | #elif defined(BENCHMARK_OS_WINDOWS) |
334 | std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() { |
335 | std::vector<CPUInfo::CacheInfo> res; |
336 | DWORD buffer_size = 0; |
337 | using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; |
338 | using CInfo = CACHE_DESCRIPTOR; |
339 | |
340 | using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>; |
341 | GetLogicalProcessorInformation(nullptr, &buffer_size); |
342 | UPtr buff(static_cast<PInfo*>(std::malloc(buffer_size)), &std::free); |
343 | if (!GetLogicalProcessorInformation(buff.get(), &buffer_size)) |
344 | PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: " , |
345 | GetLastError()); |
346 | |
347 | PInfo* it = buff.get(); |
348 | PInfo* end = buff.get() + (buffer_size / sizeof(PInfo)); |
349 | |
350 | for (; it != end; ++it) { |
351 | if (it->Relationship != RelationCache) continue; |
352 | using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>; |
353 | BitSet b(it->ProcessorMask); |
354 | // To prevent duplicates, only consider caches where CPU 0 is specified |
355 | if (!b.test(0)) continue; |
356 | const CInfo& cache = it->Cache; |
357 | CPUInfo::CacheInfo C; |
358 | C.num_sharing = static_cast<int>(b.count()); |
359 | C.level = cache.Level; |
360 | C.size = cache.Size; |
361 | C.type = "Unknown" ; |
362 | switch (cache.Type) { |
363 | case CacheUnified: |
364 | C.type = "Unified" ; |
365 | break; |
366 | case CacheInstruction: |
367 | C.type = "Instruction" ; |
368 | break; |
369 | case CacheData: |
370 | C.type = "Data" ; |
371 | break; |
372 | case CacheTrace: |
373 | C.type = "Trace" ; |
374 | break; |
375 | } |
376 | res.push_back(C); |
377 | } |
378 | return res; |
379 | } |
380 | #elif BENCHMARK_OS_QNX |
381 | std::vector<CPUInfo::CacheInfo> GetCacheSizesQNX() { |
382 | std::vector<CPUInfo::CacheInfo> res; |
383 | struct cacheattr_entry* cache = SYSPAGE_ENTRY(cacheattr); |
384 | uint32_t const elsize = SYSPAGE_ELEMENT_SIZE(cacheattr); |
385 | int num = SYSPAGE_ENTRY_SIZE(cacheattr) / elsize; |
386 | for (int i = 0; i < num; ++i) { |
387 | CPUInfo::CacheInfo info; |
388 | switch (cache->flags) { |
389 | case CACHE_FLAG_INSTR: |
390 | info.type = "Instruction" ; |
391 | info.level = 1; |
392 | break; |
393 | case CACHE_FLAG_DATA: |
394 | info.type = "Data" ; |
395 | info.level = 1; |
396 | break; |
397 | case CACHE_FLAG_UNIFIED: |
398 | info.type = "Unified" ; |
399 | info.level = 2; |
400 | break; |
401 | case CACHE_FLAG_SHARED: |
402 | info.type = "Shared" ; |
403 | info.level = 3; |
404 | break; |
405 | default: |
406 | continue; |
407 | break; |
408 | } |
409 | info.size = cache->line_size * cache->num_lines; |
410 | info.num_sharing = 0; |
411 | res.push_back(std::move(info)); |
412 | cache = SYSPAGE_ARRAY_ADJ_OFFSET(cacheattr, cache, elsize); |
413 | } |
414 | return res; |
415 | } |
416 | #endif |
417 | |
418 | std::vector<CPUInfo::CacheInfo> GetCacheSizes() { |
419 | #ifdef BENCHMARK_OS_MACOSX |
420 | return GetCacheSizesMacOSX(); |
421 | #elif defined(BENCHMARK_OS_WINDOWS) |
422 | return GetCacheSizesWindows(); |
423 | #elif defined(BENCHMARK_OS_QNX) |
424 | return GetCacheSizesQNX(); |
425 | #elif defined(BENCHMARK_OS_QURT) |
426 | return std::vector<CPUInfo::CacheInfo>(); |
427 | #else |
428 | return GetCacheSizesFromKVFS(); |
429 | #endif |
430 | } |
431 | |
432 | std::string GetSystemName() { |
433 | #if defined(BENCHMARK_OS_WINDOWS) |
434 | std::string str; |
435 | static constexpr int COUNT = MAX_COMPUTERNAME_LENGTH + 1; |
436 | TCHAR hostname[COUNT] = {'\0'}; |
437 | DWORD DWCOUNT = COUNT; |
438 | if (!GetComputerName(hostname, &DWCOUNT)) return std::string("" ); |
439 | #ifndef UNICODE |
440 | str = std::string(hostname, DWCOUNT); |
441 | #else |
442 | // `WideCharToMultiByte` returns `0` when conversion fails. |
443 | int len = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, |
444 | DWCOUNT, NULL, 0, NULL, NULL); |
445 | str.resize(len); |
446 | WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, hostname, DWCOUNT, &str[0], |
447 | str.size(), NULL, NULL); |
448 | #endif |
449 | return str; |
450 | #elif defined(BENCHMARK_OS_QURT) |
451 | std::string str = "Hexagon DSP" ; |
452 | qurt_arch_version_t arch_version_struct; |
453 | if (qurt_sysenv_get_arch_version(&arch_version_struct) == QURT_EOK) { |
454 | str += " v" ; |
455 | str += std::to_string(arch_version_struct.arch_version); |
456 | } |
457 | return str; |
458 | #else |
459 | #ifndef HOST_NAME_MAX |
460 | #ifdef BENCHMARK_HAS_SYSCTL // BSD/Mac doesn't have HOST_NAME_MAX defined |
461 | #define HOST_NAME_MAX 64 |
462 | #elif defined(BENCHMARK_OS_NACL) |
463 | #define HOST_NAME_MAX 64 |
464 | #elif defined(BENCHMARK_OS_QNX) |
465 | #define HOST_NAME_MAX 154 |
466 | #elif defined(BENCHMARK_OS_RTEMS) |
467 | #define HOST_NAME_MAX 256 |
468 | #elif defined(BENCHMARK_OS_SOLARIS) |
469 | #define HOST_NAME_MAX MAXHOSTNAMELEN |
470 | #elif defined(BENCHMARK_OS_ZOS) |
471 | #define HOST_NAME_MAX _POSIX_HOST_NAME_MAX |
472 | #else |
473 | #pragma message("HOST_NAME_MAX not defined. using 64") |
474 | #define HOST_NAME_MAX 64 |
475 | #endif |
476 | #endif // def HOST_NAME_MAX |
477 | char hostname[HOST_NAME_MAX]; |
478 | int retVal = gethostname(name: hostname, HOST_NAME_MAX); |
479 | if (retVal != 0) return std::string("" ); |
480 | return std::string(hostname); |
481 | #endif // Catch-all POSIX block. |
482 | } |
483 | |
484 | int GetNumCPUsImpl() { |
485 | #ifdef BENCHMARK_HAS_SYSCTL |
486 | int num_cpu = -1; |
487 | if (GetSysctl("hw.ncpu" , &num_cpu)) return num_cpu; |
488 | PrintErrorAndDie("Err: " , strerror(errno)); |
489 | #elif defined(BENCHMARK_OS_WINDOWS) |
490 | SYSTEM_INFO sysinfo; |
491 | // Use memset as opposed to = {} to avoid GCC missing initializer false |
492 | // positives. |
493 | std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO)); |
494 | GetSystemInfo(&sysinfo); |
495 | return sysinfo.dwNumberOfProcessors; // number of logical |
496 | // processors in the current |
497 | // group |
498 | #elif defined(BENCHMARK_OS_SOLARIS) |
499 | // Returns -1 in case of a failure. |
500 | long num_cpu = sysconf(_SC_NPROCESSORS_ONLN); |
501 | if (num_cpu < 0) { |
502 | PrintErrorAndDie("sysconf(_SC_NPROCESSORS_ONLN) failed with error: " , |
503 | strerror(errno)); |
504 | } |
505 | return (int)num_cpu; |
506 | #elif defined(BENCHMARK_OS_QNX) |
507 | return static_cast<int>(_syspage_ptr->num_cpu); |
508 | #elif defined(BENCHMARK_OS_QURT) |
509 | qurt_sysenv_max_hthreads_t hardware_threads; |
510 | if (qurt_sysenv_get_max_hw_threads(&hardware_threads) != QURT_EOK) { |
511 | hardware_threads.max_hthreads = 1; |
512 | } |
513 | return hardware_threads.max_hthreads; |
514 | #else |
515 | int num_cpus = 0; |
516 | int max_id = -1; |
517 | std::ifstream f("/proc/cpuinfo" ); |
518 | if (!f.is_open()) { |
519 | PrintErrorAndDie(args: "Failed to open /proc/cpuinfo" ); |
520 | } |
521 | #if defined(__alpha__) |
522 | const std::string Key = "cpus detected" ; |
523 | #else |
524 | const std::string Key = "processor" ; |
525 | #endif |
526 | std::string ln; |
527 | while (std::getline(is&: f, str&: ln)) { |
528 | if (ln.empty()) continue; |
529 | std::size_t split_idx = ln.find(c: ':'); |
530 | std::string value; |
531 | #if defined(__s390__) |
532 | // s390 has another format in /proc/cpuinfo |
533 | // it needs to be parsed differently |
534 | if (split_idx != std::string::npos) |
535 | value = ln.substr(Key.size() + 1, split_idx - Key.size() - 1); |
536 | #else |
537 | if (split_idx != std::string::npos) value = ln.substr(pos: split_idx + 1); |
538 | #endif |
539 | if (ln.size() >= Key.size() && ln.compare(pos: 0, n: Key.size(), str: Key) == 0) { |
540 | num_cpus++; |
541 | if (!value.empty()) { |
542 | const int cur_id = benchmark::stoi(str: value); |
543 | max_id = std::max(a: cur_id, b: max_id); |
544 | } |
545 | } |
546 | } |
547 | if (f.bad()) { |
548 | PrintErrorAndDie(args: "Failure reading /proc/cpuinfo" ); |
549 | } |
550 | if (!f.eof()) { |
551 | PrintErrorAndDie(args: "Failed to read to end of /proc/cpuinfo" ); |
552 | } |
553 | f.close(); |
554 | |
555 | if ((max_id + 1) != num_cpus) { |
556 | fprintf(stderr, |
557 | format: "CPU ID assignments in /proc/cpuinfo seem messed up." |
558 | " This is usually caused by a bad BIOS.\n" ); |
559 | } |
560 | return num_cpus; |
561 | #endif |
562 | BENCHMARK_UNREACHABLE(); |
563 | } |
564 | |
565 | int GetNumCPUs() { |
566 | const int num_cpus = GetNumCPUsImpl(); |
567 | if (num_cpus < 1) { |
568 | PrintErrorAndDie( |
569 | args: "Unable to extract number of CPUs. If your platform uses " |
570 | "/proc/cpuinfo, custom support may need to be added." ); |
571 | } |
572 | return num_cpus; |
573 | } |
574 | |
575 | class ThreadAffinityGuard final { |
576 | public: |
577 | ThreadAffinityGuard() : reset_affinity(SetAffinity()) { |
578 | if (!reset_affinity) |
579 | std::cerr << "***WARNING*** Failed to set thread affinity. Estimated CPU " |
580 | "frequency may be incorrect." |
581 | << std::endl; |
582 | } |
583 | |
584 | ~ThreadAffinityGuard() { |
585 | if (!reset_affinity) return; |
586 | |
587 | #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) |
588 | int ret = pthread_setaffinity_np(th: self, cpusetsize: sizeof(previous_affinity), |
589 | cpuset: &previous_affinity); |
590 | if (ret == 0) return; |
591 | #elif defined(BENCHMARK_OS_WINDOWS_WIN32) |
592 | DWORD_PTR ret = SetThreadAffinityMask(self, previous_affinity); |
593 | if (ret != 0) return; |
594 | #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY |
595 | PrintErrorAndDie(args: "Failed to reset thread affinity" ); |
596 | } |
597 | |
598 | ThreadAffinityGuard(ThreadAffinityGuard&&) = delete; |
599 | ThreadAffinityGuard(const ThreadAffinityGuard&) = delete; |
600 | ThreadAffinityGuard& operator=(ThreadAffinityGuard&&) = delete; |
601 | ThreadAffinityGuard& operator=(const ThreadAffinityGuard&) = delete; |
602 | |
603 | private: |
604 | bool SetAffinity() { |
605 | #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) |
606 | int ret; |
607 | self = pthread_self(); |
608 | ret = pthread_getaffinity_np(th: self, cpusetsize: sizeof(previous_affinity), |
609 | cpuset: &previous_affinity); |
610 | if (ret != 0) return false; |
611 | |
612 | cpu_set_t affinity; |
613 | memcpy(dest: &affinity, src: &previous_affinity, n: sizeof(affinity)); |
614 | |
615 | bool is_first_cpu = true; |
616 | |
617 | for (int i = 0; i < CPU_SETSIZE; ++i) |
618 | if (CPU_ISSET(i, &affinity)) { |
619 | if (is_first_cpu) |
620 | is_first_cpu = false; |
621 | else |
622 | CPU_CLR(i, &affinity); |
623 | } |
624 | |
625 | if (is_first_cpu) return false; |
626 | |
627 | ret = pthread_setaffinity_np(th: self, cpusetsize: sizeof(affinity), cpuset: &affinity); |
628 | return ret == 0; |
629 | #elif defined(BENCHMARK_OS_WINDOWS_WIN32) |
630 | self = GetCurrentThread(); |
631 | DWORD_PTR mask = static_cast<DWORD_PTR>(1) << GetCurrentProcessorNumber(); |
632 | previous_affinity = SetThreadAffinityMask(self, mask); |
633 | return previous_affinity != 0; |
634 | #else |
635 | return false; |
636 | #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY |
637 | } |
638 | |
639 | #if defined(BENCHMARK_HAS_PTHREAD_AFFINITY) |
640 | pthread_t self; |
641 | cpu_set_t previous_affinity; |
642 | #elif defined(BENCHMARK_OS_WINDOWS_WIN32) |
643 | HANDLE self; |
644 | DWORD_PTR previous_affinity; |
645 | #endif // def BENCHMARK_HAS_PTHREAD_AFFINITY |
646 | bool reset_affinity; |
647 | }; |
648 | |
649 | double GetCPUCyclesPerSecond(CPUInfo::Scaling scaling) { |
650 | // Currently, scaling is only used on linux path here, |
651 | // suppress diagnostics about it being unused on other paths. |
652 | (void)scaling; |
653 | |
654 | #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN |
655 | long freq; |
656 | |
657 | // If the kernel is exporting the tsc frequency use that. There are issues |
658 | // where cpuinfo_max_freq cannot be relied on because the BIOS may be |
659 | // exporintg an invalid p-state (on x86) or p-states may be used to put the |
660 | // processor in a new mode (turbo mode). Essentially, those frequencies |
661 | // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as |
662 | // well. |
663 | if (ReadFromFile(fname: "/sys/devices/system/cpu/cpu0/tsc_freq_khz" , arg: &freq) |
664 | // If CPU scaling is disabled, use the *current* frequency. |
665 | // Note that we specifically don't want to read cpuinfo_cur_freq, |
666 | // because it is only readable by root. |
667 | || (scaling == CPUInfo::Scaling::DISABLED && |
668 | ReadFromFile(fname: "/sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq" , |
669 | arg: &freq)) |
670 | // Otherwise, if CPU scaling may be in effect, we want to use |
671 | // the *maximum* frequency, not whatever CPU speed some random processor |
672 | // happens to be using now. |
673 | || ReadFromFile(fname: "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq" , |
674 | arg: &freq)) { |
675 | // The value is in kHz (as the file name suggests). For example, on a |
676 | // 2GHz warpstation, the file contains the value "2000000". |
677 | return static_cast<double>(freq) * 1000.0; |
678 | } |
679 | |
680 | const double error_value = -1; |
681 | double bogo_clock = error_value; |
682 | |
683 | std::ifstream f("/proc/cpuinfo" ); |
684 | if (!f.is_open()) { |
685 | std::cerr << "failed to open /proc/cpuinfo\n" ; |
686 | return error_value; |
687 | } |
688 | |
689 | auto StartsWithKey = [](std::string const& Value, std::string const& Key) { |
690 | if (Key.size() > Value.size()) return false; |
691 | auto Cmp = [&](char X, char Y) { |
692 | return std::tolower(c: X) == std::tolower(c: Y); |
693 | }; |
694 | return std::equal(first1: Key.begin(), last1: Key.end(), first2: Value.begin(), binary_pred: Cmp); |
695 | }; |
696 | |
697 | std::string ln; |
698 | while (std::getline(is&: f, str&: ln)) { |
699 | if (ln.empty()) continue; |
700 | std::size_t split_idx = ln.find(c: ':'); |
701 | std::string value; |
702 | if (split_idx != std::string::npos) value = ln.substr(pos: split_idx + 1); |
703 | // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only |
704 | // accept positive values. Some environments (virtual machines) report zero, |
705 | // which would cause infinite looping in WallTime_Init. |
706 | if (StartsWithKey(ln, "cpu MHz" )) { |
707 | if (!value.empty()) { |
708 | double cycles_per_second = benchmark::stod(str: value) * 1000000.0; |
709 | if (cycles_per_second > 0) return cycles_per_second; |
710 | } |
711 | } else if (StartsWithKey(ln, "bogomips" )) { |
712 | if (!value.empty()) { |
713 | bogo_clock = benchmark::stod(str: value) * 1000000.0; |
714 | if (bogo_clock < 0.0) bogo_clock = error_value; |
715 | } |
716 | } |
717 | } |
718 | if (f.bad()) { |
719 | std::cerr << "Failure reading /proc/cpuinfo\n" ; |
720 | return error_value; |
721 | } |
722 | if (!f.eof()) { |
723 | std::cerr << "Failed to read to end of /proc/cpuinfo\n" ; |
724 | return error_value; |
725 | } |
726 | f.close(); |
727 | // If we found the bogomips clock, but nothing better, we'll use it (but |
728 | // we're not happy about it); otherwise, fallback to the rough estimation |
729 | // below. |
730 | if (bogo_clock >= 0.0) return bogo_clock; |
731 | |
732 | #elif defined BENCHMARK_HAS_SYSCTL |
733 | constexpr auto* freqStr = |
734 | #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD) |
735 | "machdep.tsc_freq" ; |
736 | #elif defined BENCHMARK_OS_OPENBSD |
737 | "hw.cpuspeed" ; |
738 | #elif defined BENCHMARK_OS_DRAGONFLY |
739 | "hw.tsc_frequency" ; |
740 | #else |
741 | "hw.cpufrequency" ; |
742 | #endif |
743 | unsigned long long hz = 0; |
744 | #if defined BENCHMARK_OS_OPENBSD |
745 | if (GetSysctl(freqStr, &hz)) return hz * 1000000; |
746 | #else |
747 | if (GetSysctl(freqStr, &hz)) return hz; |
748 | #endif |
749 | fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n" , |
750 | freqStr, strerror(errno)); |
751 | fprintf(stderr, |
752 | "This does not affect benchmark measurements, only the " |
753 | "metadata output.\n" ); |
754 | |
755 | #elif defined BENCHMARK_OS_WINDOWS_WIN32 |
756 | // In NT, read MHz from the registry. If we fail to do so or we're in win9x |
757 | // then make a crude estimate. |
758 | DWORD data, data_size = sizeof(data); |
759 | if (IsWindowsXPOrGreater() && |
760 | SUCCEEDED( |
761 | SHGetValueA(HKEY_LOCAL_MACHINE, |
762 | "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0" , |
763 | "~MHz" , nullptr, &data, &data_size))) |
764 | return static_cast<double>(static_cast<int64_t>(data) * |
765 | static_cast<int64_t>(1000 * 1000)); // was mhz |
766 | #elif defined(BENCHMARK_OS_SOLARIS) |
767 | kstat_ctl_t* kc = kstat_open(); |
768 | if (!kc) { |
769 | std::cerr << "failed to open /dev/kstat\n" ; |
770 | return -1; |
771 | } |
772 | kstat_t* ksp = kstat_lookup(kc, const_cast<char*>("cpu_info" ), -1, |
773 | const_cast<char*>("cpu_info0" )); |
774 | if (!ksp) { |
775 | std::cerr << "failed to lookup in /dev/kstat\n" ; |
776 | return -1; |
777 | } |
778 | if (kstat_read(kc, ksp, NULL) < 0) { |
779 | std::cerr << "failed to read from /dev/kstat\n" ; |
780 | return -1; |
781 | } |
782 | kstat_named_t* knp = (kstat_named_t*)kstat_data_lookup( |
783 | ksp, const_cast<char*>("current_clock_Hz" )); |
784 | if (!knp) { |
785 | std::cerr << "failed to lookup data in /dev/kstat\n" ; |
786 | return -1; |
787 | } |
788 | if (knp->data_type != KSTAT_DATA_UINT64) { |
789 | std::cerr << "current_clock_Hz is of unexpected data type: " |
790 | << knp->data_type << "\n" ; |
791 | return -1; |
792 | } |
793 | double clock_hz = knp->value.ui64; |
794 | kstat_close(kc); |
795 | return clock_hz; |
796 | #elif defined(BENCHMARK_OS_QNX) |
797 | return static_cast<double>( |
798 | static_cast<int64_t>(SYSPAGE_ENTRY(cpuinfo)->speed) * |
799 | static_cast<int64_t>(1000 * 1000)); |
800 | #elif defined(BENCHMARK_OS_QURT) |
801 | // QuRT doesn't provide any API to query Hexagon frequency. |
802 | return 1000000000; |
803 | #endif |
804 | // If we've fallen through, attempt to roughly estimate the CPU clock rate. |
805 | |
806 | // Make sure to use the same cycle counter when starting and stopping the |
807 | // cycle timer. We just pin the current thread to a cpu in the previous |
808 | // affinity set. |
809 | ThreadAffinityGuard affinity_guard; |
810 | |
811 | static constexpr double estimate_time_s = 1.0; |
812 | const double start_time = ChronoClockNow(); |
813 | const auto start_ticks = cycleclock::Now(); |
814 | |
815 | // Impose load instead of calling sleep() to make sure the cycle counter |
816 | // works. |
817 | using PRNG = std::minstd_rand; |
818 | using Result = PRNG::result_type; |
819 | PRNG rng(static_cast<Result>(start_ticks)); |
820 | |
821 | Result state = 0; |
822 | |
823 | do { |
824 | static constexpr size_t batch_size = 10000; |
825 | rng.discard(z: batch_size); |
826 | state += rng(); |
827 | |
828 | } while (ChronoClockNow() - start_time < estimate_time_s); |
829 | |
830 | DoNotOptimize(value&: state); |
831 | |
832 | const auto end_ticks = cycleclock::Now(); |
833 | const double end_time = ChronoClockNow(); |
834 | |
835 | return static_cast<double>(end_ticks - start_ticks) / (end_time - start_time); |
836 | // Reset the affinity of current thread when the lifetime of affinity_guard |
837 | // ends. |
838 | } |
839 | |
840 | std::vector<double> GetLoadAvg() { |
841 | #if (defined BENCHMARK_OS_FREEBSD || defined(BENCHMARK_OS_LINUX) || \ |
842 | defined BENCHMARK_OS_MACOSX || defined BENCHMARK_OS_NETBSD || \ |
843 | defined BENCHMARK_OS_OPENBSD || defined BENCHMARK_OS_DRAGONFLY) && \ |
844 | !(defined(__ANDROID__) && __ANDROID_API__ < 29) |
845 | static constexpr int kMaxSamples = 3; |
846 | std::vector<double> res(kMaxSamples, 0.0); |
847 | const int nelem = getloadavg(loadavg: res.data(), nelem: kMaxSamples); |
848 | if (nelem < 1) { |
849 | res.clear(); |
850 | } else { |
851 | res.resize(new_size: nelem); |
852 | } |
853 | return res; |
854 | #else |
855 | return {}; |
856 | #endif |
857 | } |
858 | |
859 | } // end namespace |
860 | |
861 | const CPUInfo& CPUInfo::Get() { |
862 | static const CPUInfo* info = new CPUInfo(); |
863 | return *info; |
864 | } |
865 | |
866 | CPUInfo::CPUInfo() |
867 | : num_cpus(GetNumCPUs()), |
868 | scaling(CpuScaling(num_cpus)), |
869 | cycles_per_second(GetCPUCyclesPerSecond(scaling)), |
870 | caches(GetCacheSizes()), |
871 | load_avg(GetLoadAvg()) {} |
872 | |
873 | const SystemInfo& SystemInfo::Get() { |
874 | static const SystemInfo* info = new SystemInfo(); |
875 | return *info; |
876 | } |
877 | |
878 | SystemInfo::SystemInfo() : name(GetSystemName()) {} |
879 | } // end namespace benchmark |
880 | |