| 1 | /* |
| 2 | * kmp_affinity.h -- header for affinity management |
| 3 | */ |
| 4 | |
| 5 | //===----------------------------------------------------------------------===// |
| 6 | // |
| 7 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 8 | // See https://llvm.org/LICENSE.txt for license information. |
| 9 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #ifndef KMP_AFFINITY_H |
| 14 | #define KMP_AFFINITY_H |
| 15 | |
| 16 | #include "kmp.h" |
| 17 | #include "kmp_os.h" |
| 18 | #include <limits> |
| 19 | |
| 20 | #if KMP_AFFINITY_SUPPORTED |
| 21 | #if KMP_USE_HWLOC |
| 22 | class KMPHwlocAffinity : public KMPAffinity { |
| 23 | public: |
| 24 | class Mask : public KMPAffinity::Mask { |
| 25 | hwloc_cpuset_t mask; |
| 26 | |
| 27 | public: |
| 28 | Mask() { |
| 29 | mask = hwloc_bitmap_alloc(); |
| 30 | this->zero(); |
| 31 | } |
| 32 | Mask(const Mask &other) = delete; |
| 33 | Mask &operator=(const Mask &other) = delete; |
| 34 | ~Mask() { hwloc_bitmap_free(mask); } |
| 35 | void set(int i) override { hwloc_bitmap_set(mask, i); } |
| 36 | bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } |
| 37 | void clear(int i) override { hwloc_bitmap_clr(mask, i); } |
| 38 | void zero() override { hwloc_bitmap_zero(mask); } |
| 39 | bool empty() const override { return hwloc_bitmap_iszero(mask); } |
| 40 | void copy(const KMPAffinity::Mask *src) override { |
| 41 | const Mask *convert = static_cast<const Mask *>(src); |
| 42 | hwloc_bitmap_copy(mask, convert->mask); |
| 43 | } |
| 44 | void bitwise_and(const KMPAffinity::Mask *rhs) override { |
| 45 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 46 | hwloc_bitmap_and(mask, mask, convert->mask); |
| 47 | } |
| 48 | void bitwise_or(const KMPAffinity::Mask *rhs) override { |
| 49 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 50 | hwloc_bitmap_or(mask, mask, convert->mask); |
| 51 | } |
| 52 | void bitwise_not() override { hwloc_bitmap_not(mask, mask); } |
| 53 | bool is_equal(const KMPAffinity::Mask *rhs) const override { |
| 54 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 55 | return hwloc_bitmap_isequal(mask, convert->mask); |
| 56 | } |
| 57 | int begin() const override { return hwloc_bitmap_first(mask); } |
| 58 | int end() const override { return -1; } |
| 59 | int next(int previous) const override { |
| 60 | return hwloc_bitmap_next(mask, previous); |
| 61 | } |
| 62 | int get_system_affinity(bool abort_on_error) override { |
| 63 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 64 | "Illegal get affinity operation when not capable" ); |
| 65 | long retval = |
| 66 | hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); |
| 67 | if (retval >= 0) { |
| 68 | return 0; |
| 69 | } |
| 70 | int error = errno; |
| 71 | if (abort_on_error) { |
| 72 | __kmp_fatal(KMP_MSG(FunctionError, "hwloc_get_cpubind()" ), |
| 73 | KMP_ERR(error), __kmp_msg_null); |
| 74 | } |
| 75 | return error; |
| 76 | } |
| 77 | int set_system_affinity(bool abort_on_error) const override { |
| 78 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 79 | "Illegal set affinity operation when not capable" ); |
| 80 | long retval = |
| 81 | hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); |
| 82 | if (retval >= 0) { |
| 83 | return 0; |
| 84 | } |
| 85 | int error = errno; |
| 86 | if (abort_on_error) { |
| 87 | __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()" ), |
| 88 | KMP_ERR(error), __kmp_msg_null); |
| 89 | } |
| 90 | return error; |
| 91 | } |
| 92 | #if KMP_OS_WINDOWS |
| 93 | int set_process_affinity(bool abort_on_error) const override { |
| 94 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 95 | "Illegal set process affinity operation when not capable" ); |
| 96 | int error = 0; |
| 97 | const hwloc_topology_support *support = |
| 98 | hwloc_topology_get_support(__kmp_hwloc_topology); |
| 99 | if (support->cpubind->set_proc_cpubind) { |
| 100 | int retval; |
| 101 | retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, |
| 102 | HWLOC_CPUBIND_PROCESS); |
| 103 | if (retval >= 0) |
| 104 | return 0; |
| 105 | error = errno; |
| 106 | if (abort_on_error) |
| 107 | __kmp_fatal(KMP_MSG(FunctionError, "hwloc_set_cpubind()" ), |
| 108 | KMP_ERR(error), __kmp_msg_null); |
| 109 | } |
| 110 | return error; |
| 111 | } |
| 112 | #endif |
| 113 | int get_proc_group() const override { |
| 114 | int group = -1; |
| 115 | #if KMP_OS_WINDOWS |
| 116 | if (__kmp_num_proc_groups == 1) { |
| 117 | return 1; |
| 118 | } |
| 119 | for (int i = 0; i < __kmp_num_proc_groups; i++) { |
| 120 | // On windows, the long type is always 32 bits |
| 121 | unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); |
| 122 | unsigned long second_32_bits = |
| 123 | hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); |
| 124 | if (first_32_bits == 0 && second_32_bits == 0) { |
| 125 | continue; |
| 126 | } |
| 127 | if (group >= 0) { |
| 128 | return -1; |
| 129 | } |
| 130 | group = i; |
| 131 | } |
| 132 | #endif /* KMP_OS_WINDOWS */ |
| 133 | return group; |
| 134 | } |
| 135 | }; |
| 136 | void determine_capable(const char *var) override { |
| 137 | const hwloc_topology_support *topology_support; |
| 138 | if (__kmp_hwloc_topology == NULL) { |
| 139 | if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { |
| 140 | __kmp_hwloc_error = TRUE; |
| 141 | if (__kmp_affinity.flags.verbose) { |
| 142 | KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()" ); |
| 143 | } |
| 144 | } |
| 145 | if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { |
| 146 | __kmp_hwloc_error = TRUE; |
| 147 | if (__kmp_affinity.flags.verbose) { |
| 148 | KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()" ); |
| 149 | } |
| 150 | } |
| 151 | } |
| 152 | topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); |
| 153 | // Is the system capable of setting/getting this thread's affinity? |
| 154 | // Also, is topology discovery possible? (pu indicates ability to discover |
| 155 | // processing units). And finally, were there no errors when calling any |
| 156 | // hwloc_* API functions? |
| 157 | if (topology_support && topology_support->cpubind->set_thisthread_cpubind && |
| 158 | topology_support->cpubind->get_thisthread_cpubind && |
| 159 | topology_support->discovery->pu && !__kmp_hwloc_error) { |
| 160 | // enables affinity according to KMP_AFFINITY_CAPABLE() macro |
| 161 | KMP_AFFINITY_ENABLE(TRUE); |
| 162 | } else { |
| 163 | // indicate that hwloc didn't work and disable affinity |
| 164 | __kmp_hwloc_error = TRUE; |
| 165 | KMP_AFFINITY_DISABLE(); |
| 166 | } |
| 167 | } |
| 168 | void bind_thread(int which) override { |
| 169 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 170 | "Illegal set affinity operation when not capable" ); |
| 171 | KMPAffinity::Mask *mask; |
| 172 | KMP_CPU_ALLOC_ON_STACK(mask); |
| 173 | KMP_CPU_ZERO(mask); |
| 174 | KMP_CPU_SET(which, mask); |
| 175 | __kmp_set_system_affinity(mask, TRUE); |
| 176 | KMP_CPU_FREE_FROM_STACK(mask); |
| 177 | } |
| 178 | KMPAffinity::Mask *allocate_mask() override { return new Mask(); } |
| 179 | void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } |
| 180 | KMPAffinity::Mask *allocate_mask_array(int num) override { |
| 181 | return new Mask[num]; |
| 182 | } |
| 183 | void deallocate_mask_array(KMPAffinity::Mask *array) override { |
| 184 | Mask *hwloc_array = static_cast<Mask *>(array); |
| 185 | delete[] hwloc_array; |
| 186 | } |
| 187 | KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, |
| 188 | int index) override { |
| 189 | Mask *hwloc_array = static_cast<Mask *>(array); |
| 190 | return &(hwloc_array[index]); |
| 191 | } |
| 192 | api_type get_api_type() const override { return HWLOC; } |
| 193 | }; |
| 194 | #endif /* KMP_USE_HWLOC */ |
| 195 | |
| 196 | #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \ |
| 197 | KMP_OS_AIX |
| 198 | #if KMP_OS_LINUX |
| 199 | /* On some of the older OS's that we build on, these constants aren't present |
| 200 | in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on |
| 201 | all systems of the same arch where they are defined, and they cannot change. |
| 202 | stone forever. */ |
| 203 | #include <sys/syscall.h> |
| 204 | #if KMP_ARCH_X86 || KMP_ARCH_ARM |
| 205 | #ifndef __NR_sched_setaffinity |
| 206 | #define __NR_sched_setaffinity 241 |
| 207 | #elif __NR_sched_setaffinity != 241 |
| 208 | #error Wrong code for setaffinity system call. |
| 209 | #endif /* __NR_sched_setaffinity */ |
| 210 | #ifndef __NR_sched_getaffinity |
| 211 | #define __NR_sched_getaffinity 242 |
| 212 | #elif __NR_sched_getaffinity != 242 |
| 213 | #error Wrong code for getaffinity system call. |
| 214 | #endif /* __NR_sched_getaffinity */ |
| 215 | #elif KMP_ARCH_AARCH64 |
| 216 | #ifndef __NR_sched_setaffinity |
| 217 | #define __NR_sched_setaffinity 122 |
| 218 | #elif __NR_sched_setaffinity != 122 |
| 219 | #error Wrong code for setaffinity system call. |
| 220 | #endif /* __NR_sched_setaffinity */ |
| 221 | #ifndef __NR_sched_getaffinity |
| 222 | #define __NR_sched_getaffinity 123 |
| 223 | #elif __NR_sched_getaffinity != 123 |
| 224 | #error Wrong code for getaffinity system call. |
| 225 | #endif /* __NR_sched_getaffinity */ |
| 226 | #elif KMP_ARCH_X86_64 |
| 227 | #ifndef __NR_sched_setaffinity |
| 228 | #define __NR_sched_setaffinity 203 |
| 229 | #elif __NR_sched_setaffinity != 203 |
| 230 | #error Wrong code for setaffinity system call. |
| 231 | #endif /* __NR_sched_setaffinity */ |
| 232 | #ifndef __NR_sched_getaffinity |
| 233 | #define __NR_sched_getaffinity 204 |
| 234 | #elif __NR_sched_getaffinity != 204 |
| 235 | #error Wrong code for getaffinity system call. |
| 236 | #endif /* __NR_sched_getaffinity */ |
| 237 | #elif KMP_ARCH_PPC64 |
| 238 | #ifndef __NR_sched_setaffinity |
| 239 | #define __NR_sched_setaffinity 222 |
| 240 | #elif __NR_sched_setaffinity != 222 |
| 241 | #error Wrong code for setaffinity system call. |
| 242 | #endif /* __NR_sched_setaffinity */ |
| 243 | #ifndef __NR_sched_getaffinity |
| 244 | #define __NR_sched_getaffinity 223 |
| 245 | #elif __NR_sched_getaffinity != 223 |
| 246 | #error Wrong code for getaffinity system call. |
| 247 | #endif /* __NR_sched_getaffinity */ |
| 248 | #elif KMP_ARCH_MIPS |
| 249 | #ifndef __NR_sched_setaffinity |
| 250 | #define __NR_sched_setaffinity 4239 |
| 251 | #elif __NR_sched_setaffinity != 4239 |
| 252 | #error Wrong code for setaffinity system call. |
| 253 | #endif /* __NR_sched_setaffinity */ |
| 254 | #ifndef __NR_sched_getaffinity |
| 255 | #define __NR_sched_getaffinity 4240 |
| 256 | #elif __NR_sched_getaffinity != 4240 |
| 257 | #error Wrong code for getaffinity system call. |
| 258 | #endif /* __NR_sched_getaffinity */ |
| 259 | #elif KMP_ARCH_MIPS64 |
| 260 | #ifndef __NR_sched_setaffinity |
| 261 | #define __NR_sched_setaffinity 5195 |
| 262 | #elif __NR_sched_setaffinity != 5195 |
| 263 | #error Wrong code for setaffinity system call. |
| 264 | #endif /* __NR_sched_setaffinity */ |
| 265 | #ifndef __NR_sched_getaffinity |
| 266 | #define __NR_sched_getaffinity 5196 |
| 267 | #elif __NR_sched_getaffinity != 5196 |
| 268 | #error Wrong code for getaffinity system call. |
| 269 | #endif /* __NR_sched_getaffinity */ |
| 270 | #elif KMP_ARCH_LOONGARCH64 |
| 271 | #ifndef __NR_sched_setaffinity |
| 272 | #define __NR_sched_setaffinity 122 |
| 273 | #elif __NR_sched_setaffinity != 122 |
| 274 | #error Wrong code for setaffinity system call. |
| 275 | #endif /* __NR_sched_setaffinity */ |
| 276 | #ifndef __NR_sched_getaffinity |
| 277 | #define __NR_sched_getaffinity 123 |
| 278 | #elif __NR_sched_getaffinity != 123 |
| 279 | #error Wrong code for getaffinity system call. |
| 280 | #endif /* __NR_sched_getaffinity */ |
| 281 | #elif KMP_ARCH_RISCV64 |
| 282 | #ifndef __NR_sched_setaffinity |
| 283 | #define __NR_sched_setaffinity 122 |
| 284 | #elif __NR_sched_setaffinity != 122 |
| 285 | #error Wrong code for setaffinity system call. |
| 286 | #endif /* __NR_sched_setaffinity */ |
| 287 | #ifndef __NR_sched_getaffinity |
| 288 | #define __NR_sched_getaffinity 123 |
| 289 | #elif __NR_sched_getaffinity != 123 |
| 290 | #error Wrong code for getaffinity system call. |
| 291 | #endif /* __NR_sched_getaffinity */ |
| 292 | #elif KMP_ARCH_VE |
| 293 | #ifndef __NR_sched_setaffinity |
| 294 | #define __NR_sched_setaffinity 203 |
| 295 | #elif __NR_sched_setaffinity != 203 |
| 296 | #error Wrong code for setaffinity system call. |
| 297 | #endif /* __NR_sched_setaffinity */ |
| 298 | #ifndef __NR_sched_getaffinity |
| 299 | #define __NR_sched_getaffinity 204 |
| 300 | #elif __NR_sched_getaffinity != 204 |
| 301 | #error Wrong code for getaffinity system call. |
| 302 | #endif /* __NR_sched_getaffinity */ |
| 303 | #elif KMP_ARCH_S390X |
| 304 | #ifndef __NR_sched_setaffinity |
| 305 | #define __NR_sched_setaffinity 239 |
| 306 | #elif __NR_sched_setaffinity != 239 |
| 307 | #error Wrong code for setaffinity system call. |
| 308 | #endif /* __NR_sched_setaffinity */ |
| 309 | #ifndef __NR_sched_getaffinity |
| 310 | #define __NR_sched_getaffinity 240 |
| 311 | #elif __NR_sched_getaffinity != 240 |
| 312 | #error Wrong code for getaffinity system call. |
| 313 | #endif /* __NR_sched_getaffinity */ |
| 314 | #elif KMP_ARCH_SPARC |
| 315 | #ifndef __NR_sched_setaffinity |
| 316 | #define __NR_sched_setaffinity 261 |
| 317 | #elif __NR_sched_setaffinity != 261 |
| 318 | #error Wrong code for setaffinity system call. |
| 319 | #endif /* __NR_sched_setaffinity */ |
| 320 | #ifndef __NR_sched_getaffinity |
| 321 | #define __NR_sched_getaffinity 260 |
| 322 | #elif __NR_sched_getaffinity != 260 |
| 323 | #error Wrong code for getaffinity system call. |
| 324 | #endif /* __NR_sched_getaffinity */ |
| 325 | #else |
| 326 | #error Unknown or unsupported architecture |
| 327 | #endif /* KMP_ARCH_* */ |
| 328 | #elif KMP_OS_FREEBSD || KMP_OS_DRAGONFLY |
| 329 | #include <pthread.h> |
| 330 | #include <pthread_np.h> |
| 331 | #elif KMP_OS_NETBSD |
| 332 | #include <pthread.h> |
| 333 | #include <sched.h> |
| 334 | #elif KMP_OS_AIX |
| 335 | #include <sys/dr.h> |
| 336 | #include <sys/rset.h> |
| 337 | #define VMI_MAXRADS 64 // Maximum number of RADs allowed by AIX. |
| 338 | #define GET_NUMBER_SMT_SETS 0x0004 |
| 339 | extern "C" int syssmt(int flags, int, int, int *); |
| 340 | #endif |
| 341 | class KMPNativeAffinity : public KMPAffinity { |
| 342 | class Mask : public KMPAffinity::Mask { |
| 343 | typedef unsigned long mask_t; |
| 344 | typedef decltype(__kmp_affin_mask_size) mask_size_type; |
| 345 | static const unsigned int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; |
| 346 | static const mask_t ONE = 1; |
| 347 | mask_size_type get_num_mask_types() const { |
| 348 | return __kmp_affin_mask_size / sizeof(mask_t); |
| 349 | } |
| 350 | |
| 351 | public: |
| 352 | mask_t *mask; |
| 353 | Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } |
| 354 | ~Mask() { |
| 355 | if (mask) |
| 356 | __kmp_free(mask); |
| 357 | } |
| 358 | void set(int i) override { |
| 359 | mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T)); |
| 360 | } |
| 361 | bool is_set(int i) const override { |
| 362 | return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T))); |
| 363 | } |
| 364 | void clear(int i) override { |
| 365 | mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T)); |
| 366 | } |
| 367 | void zero() override { |
| 368 | mask_size_type e = get_num_mask_types(); |
| 369 | for (mask_size_type i = 0; i < e; ++i) |
| 370 | mask[i] = (mask_t)0; |
| 371 | } |
| 372 | bool empty() const override { |
| 373 | mask_size_type e = get_num_mask_types(); |
| 374 | for (mask_size_type i = 0; i < e; ++i) |
| 375 | if (mask[i] != (mask_t)0) |
| 376 | return false; |
| 377 | return true; |
| 378 | } |
| 379 | void copy(const KMPAffinity::Mask *src) override { |
| 380 | const Mask *convert = static_cast<const Mask *>(src); |
| 381 | mask_size_type e = get_num_mask_types(); |
| 382 | for (mask_size_type i = 0; i < e; ++i) |
| 383 | mask[i] = convert->mask[i]; |
| 384 | } |
| 385 | void bitwise_and(const KMPAffinity::Mask *rhs) override { |
| 386 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 387 | mask_size_type e = get_num_mask_types(); |
| 388 | for (mask_size_type i = 0; i < e; ++i) |
| 389 | mask[i] &= convert->mask[i]; |
| 390 | } |
| 391 | void bitwise_or(const KMPAffinity::Mask *rhs) override { |
| 392 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 393 | mask_size_type e = get_num_mask_types(); |
| 394 | for (mask_size_type i = 0; i < e; ++i) |
| 395 | mask[i] |= convert->mask[i]; |
| 396 | } |
| 397 | void bitwise_not() override { |
| 398 | mask_size_type e = get_num_mask_types(); |
| 399 | for (mask_size_type i = 0; i < e; ++i) |
| 400 | mask[i] = ~(mask[i]); |
| 401 | } |
| 402 | bool is_equal(const KMPAffinity::Mask *rhs) const override { |
| 403 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 404 | mask_size_type e = get_num_mask_types(); |
| 405 | for (mask_size_type i = 0; i < e; ++i) |
| 406 | if (mask[i] != convert->mask[i]) |
| 407 | return false; |
| 408 | return true; |
| 409 | } |
| 410 | int begin() const override { |
| 411 | int retval = 0; |
| 412 | while (retval < end() && !is_set(i: retval)) |
| 413 | ++retval; |
| 414 | return retval; |
| 415 | } |
| 416 | int end() const override { |
| 417 | int e; |
| 418 | __kmp_type_convert(src: get_num_mask_types() * BITS_PER_MASK_T, dest: &e); |
| 419 | return e; |
| 420 | } |
| 421 | int next(int previous) const override { |
| 422 | int retval = previous + 1; |
| 423 | while (retval < end() && !is_set(i: retval)) |
| 424 | ++retval; |
| 425 | return retval; |
| 426 | } |
| 427 | #if KMP_OS_AIX |
| 428 | // On AIX, we don't have a way to get CPU(s) a thread is bound to. |
| 429 | // This routine is only used to get the full mask. |
| 430 | int get_system_affinity(bool abort_on_error) override { |
| 431 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 432 | "Illegal get affinity operation when not capable" ); |
| 433 | |
| 434 | (void)abort_on_error; |
| 435 | |
| 436 | // Set the mask with all CPUs that are available. |
| 437 | for (int i = 0; i < __kmp_xproc; ++i) |
| 438 | KMP_CPU_SET(i, this); |
| 439 | return 0; |
| 440 | } |
| 441 | int set_system_affinity(bool abort_on_error) const override { |
| 442 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 443 | |
| 444 | "Illegal set affinity operation when not capable" ); |
| 445 | |
| 446 | int location; |
| 447 | int gtid = __kmp_entry_gtid(); |
| 448 | int tid = thread_self(); |
| 449 | |
| 450 | // Unbind the thread if it was bound to any processors before so that |
| 451 | // we can bind the thread to CPUs specified by the mask not others. |
| 452 | int retval = bindprocessor(BINDTHREAD, tid, PROCESSOR_CLASS_ANY); |
| 453 | |
| 454 | // On AIX, we can only bind to one instead of a set of CPUs with the |
| 455 | // bindprocessor() system call. |
| 456 | KMP_CPU_SET_ITERATE(location, this) { |
| 457 | if (KMP_CPU_ISSET(location, this)) { |
| 458 | retval = bindprocessor(BINDTHREAD, tid, location); |
| 459 | if (retval == -1 && errno == 1) { |
| 460 | rsid_t rsid; |
| 461 | rsethandle_t rsh; |
| 462 | // Put something in rsh to prevent compiler warning |
| 463 | // about uninitalized use |
| 464 | rsh = rs_alloc(RS_EMPTY); |
| 465 | rsid.at_pid = getpid(); |
| 466 | if (RS_DEFAULT_RSET != ra_getrset(R_PROCESS, rsid, 0, rsh)) { |
| 467 | retval = ra_detachrset(R_PROCESS, rsid, 0); |
| 468 | retval = bindprocessor(BINDTHREAD, tid, location); |
| 469 | } |
| 470 | } |
| 471 | if (retval == 0) { |
| 472 | KA_TRACE(10, ("__kmp_set_system_affinity: Done binding " |
| 473 | "T#%d to cpu=%d.\n" , |
| 474 | gtid, location)); |
| 475 | continue; |
| 476 | } |
| 477 | int error = errno; |
| 478 | if (abort_on_error) { |
| 479 | __kmp_fatal(KMP_MSG(FunctionError, "bindprocessor()" ), |
| 480 | KMP_ERR(error), __kmp_msg_null); |
| 481 | KA_TRACE(10, ("__kmp_set_system_affinity: Error binding " |
| 482 | "T#%d to cpu=%d, errno=%d.\n" , |
| 483 | gtid, location, error)); |
| 484 | return error; |
| 485 | } |
| 486 | } |
| 487 | } |
| 488 | return 0; |
| 489 | } |
| 490 | #else // !KMP_OS_AIX |
| 491 | int get_system_affinity(bool abort_on_error) override { |
| 492 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 493 | "Illegal get affinity operation when not capable" ); |
| 494 | #if KMP_OS_LINUX |
| 495 | long retval = |
| 496 | syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); |
| 497 | #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY |
| 498 | int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, |
| 499 | reinterpret_cast<cpuset_t *>(mask)); |
| 500 | int retval = (r == 0 ? 0 : -1); |
| 501 | #endif |
| 502 | if (retval >= 0) { |
| 503 | return 0; |
| 504 | } |
| 505 | int error = errno; |
| 506 | if (abort_on_error) { |
| 507 | __kmp_fatal(KMP_MSG(FunctionError, "pthread_getaffinity_np()" ), |
| 508 | KMP_ERR(error), __kmp_msg_null); |
| 509 | } |
| 510 | return error; |
| 511 | } |
| 512 | int set_system_affinity(bool abort_on_error) const override { |
| 513 | KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), |
| 514 | "Illegal set affinity operation when not capable" ); |
| 515 | #if KMP_OS_LINUX |
| 516 | long retval = |
| 517 | syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); |
| 518 | #elif KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY |
| 519 | int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, |
| 520 | reinterpret_cast<cpuset_t *>(mask)); |
| 521 | int retval = (r == 0 ? 0 : -1); |
| 522 | #endif |
| 523 | if (retval >= 0) { |
| 524 | return 0; |
| 525 | } |
| 526 | int error = errno; |
| 527 | if (abort_on_error) { |
| 528 | __kmp_fatal(KMP_MSG(FunctionError, "pthread_setaffinity_np()" ), |
| 529 | KMP_ERR(error), __kmp_msg_null); |
| 530 | } |
| 531 | return error; |
| 532 | } |
| 533 | #endif // KMP_OS_AIX |
| 534 | }; |
| 535 | void determine_capable(const char *env_var) override { |
| 536 | __kmp_affinity_determine_capable(env_var); |
| 537 | } |
| 538 | void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } |
| 539 | KMPAffinity::Mask *allocate_mask() override { |
| 540 | KMPNativeAffinity::Mask *retval = new Mask(); |
| 541 | return retval; |
| 542 | } |
| 543 | void deallocate_mask(KMPAffinity::Mask *m) override { |
| 544 | KMPNativeAffinity::Mask *native_mask = |
| 545 | static_cast<KMPNativeAffinity::Mask *>(m); |
| 546 | delete native_mask; |
| 547 | } |
| 548 | KMPAffinity::Mask *allocate_mask_array(int num) override { |
| 549 | return new Mask[num]; |
| 550 | } |
| 551 | void deallocate_mask_array(KMPAffinity::Mask *array) override { |
| 552 | Mask *linux_array = static_cast<Mask *>(array); |
| 553 | delete[] linux_array; |
| 554 | } |
| 555 | KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, |
| 556 | int index) override { |
| 557 | Mask *linux_array = static_cast<Mask *>(array); |
| 558 | return &(linux_array[index]); |
| 559 | } |
| 560 | api_type get_api_type() const override { return NATIVE_OS; } |
| 561 | }; |
| 562 | #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY \ |
| 563 | || KMP_OS_AIX */ |
| 564 | |
| 565 | #if KMP_OS_WINDOWS |
| 566 | class KMPNativeAffinity : public KMPAffinity { |
| 567 | class Mask : public KMPAffinity::Mask { |
| 568 | typedef ULONG_PTR mask_t; |
| 569 | static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; |
| 570 | mask_t *mask; |
| 571 | |
| 572 | public: |
| 573 | Mask() { |
| 574 | mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); |
| 575 | } |
| 576 | ~Mask() { |
| 577 | if (mask) |
| 578 | __kmp_free(mask); |
| 579 | } |
| 580 | void set(int i) override { |
| 581 | mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); |
| 582 | } |
| 583 | bool is_set(int i) const override { |
| 584 | return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); |
| 585 | } |
| 586 | void clear(int i) override { |
| 587 | mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); |
| 588 | } |
| 589 | void zero() override { |
| 590 | for (int i = 0; i < __kmp_num_proc_groups; ++i) |
| 591 | mask[i] = 0; |
| 592 | } |
| 593 | bool empty() const override { |
| 594 | for (size_t i = 0; i < __kmp_num_proc_groups; ++i) |
| 595 | if (mask[i]) |
| 596 | return false; |
| 597 | return true; |
| 598 | } |
| 599 | void copy(const KMPAffinity::Mask *src) override { |
| 600 | const Mask *convert = static_cast<const Mask *>(src); |
| 601 | for (int i = 0; i < __kmp_num_proc_groups; ++i) |
| 602 | mask[i] = convert->mask[i]; |
| 603 | } |
| 604 | void bitwise_and(const KMPAffinity::Mask *rhs) override { |
| 605 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 606 | for (int i = 0; i < __kmp_num_proc_groups; ++i) |
| 607 | mask[i] &= convert->mask[i]; |
| 608 | } |
| 609 | void bitwise_or(const KMPAffinity::Mask *rhs) override { |
| 610 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 611 | for (int i = 0; i < __kmp_num_proc_groups; ++i) |
| 612 | mask[i] |= convert->mask[i]; |
| 613 | } |
| 614 | void bitwise_not() override { |
| 615 | for (int i = 0; i < __kmp_num_proc_groups; ++i) |
| 616 | mask[i] = ~(mask[i]); |
| 617 | } |
| 618 | bool is_equal(const KMPAffinity::Mask *rhs) const override { |
| 619 | const Mask *convert = static_cast<const Mask *>(rhs); |
| 620 | for (size_t i = 0; i < __kmp_num_proc_groups; ++i) |
| 621 | if (mask[i] != convert->mask[i]) |
| 622 | return false; |
| 623 | return true; |
| 624 | } |
| 625 | int begin() const override { |
| 626 | int retval = 0; |
| 627 | while (retval < end() && !is_set(retval)) |
| 628 | ++retval; |
| 629 | return retval; |
| 630 | } |
| 631 | int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } |
| 632 | int next(int previous) const override { |
| 633 | int retval = previous + 1; |
| 634 | while (retval < end() && !is_set(retval)) |
| 635 | ++retval; |
| 636 | return retval; |
| 637 | } |
| 638 | int set_process_affinity(bool abort_on_error) const override { |
| 639 | if (__kmp_num_proc_groups <= 1) { |
| 640 | if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) { |
| 641 | DWORD error = GetLastError(); |
| 642 | if (abort_on_error) { |
| 643 | __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), |
| 644 | __kmp_msg_null); |
| 645 | } |
| 646 | return error; |
| 647 | } |
| 648 | } |
| 649 | return 0; |
| 650 | } |
| 651 | int set_system_affinity(bool abort_on_error) const override { |
| 652 | if (__kmp_num_proc_groups > 1) { |
| 653 | // Check for a valid mask. |
| 654 | GROUP_AFFINITY ga; |
| 655 | int group = get_proc_group(); |
| 656 | if (group < 0) { |
| 657 | if (abort_on_error) { |
| 658 | KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity" ); |
| 659 | } |
| 660 | return -1; |
| 661 | } |
| 662 | // Transform the bit vector into a GROUP_AFFINITY struct |
| 663 | // and make the system call to set affinity. |
| 664 | ga.Group = group; |
| 665 | ga.Mask = mask[group]; |
| 666 | ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; |
| 667 | |
| 668 | KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); |
| 669 | if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { |
| 670 | DWORD error = GetLastError(); |
| 671 | if (abort_on_error) { |
| 672 | __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), |
| 673 | __kmp_msg_null); |
| 674 | } |
| 675 | return error; |
| 676 | } |
| 677 | } else { |
| 678 | if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { |
| 679 | DWORD error = GetLastError(); |
| 680 | if (abort_on_error) { |
| 681 | __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error), |
| 682 | __kmp_msg_null); |
| 683 | } |
| 684 | return error; |
| 685 | } |
| 686 | } |
| 687 | return 0; |
| 688 | } |
| 689 | int get_system_affinity(bool abort_on_error) override { |
| 690 | if (__kmp_num_proc_groups > 1) { |
| 691 | this->zero(); |
| 692 | GROUP_AFFINITY ga; |
| 693 | KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); |
| 694 | if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { |
| 695 | DWORD error = GetLastError(); |
| 696 | if (abort_on_error) { |
| 697 | __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()" ), |
| 698 | KMP_ERR(error), __kmp_msg_null); |
| 699 | } |
| 700 | return error; |
| 701 | } |
| 702 | if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || |
| 703 | (ga.Mask == 0)) { |
| 704 | return -1; |
| 705 | } |
| 706 | mask[ga.Group] = ga.Mask; |
| 707 | } else { |
| 708 | mask_t newMask, sysMask, retval; |
| 709 | if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { |
| 710 | DWORD error = GetLastError(); |
| 711 | if (abort_on_error) { |
| 712 | __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()" ), |
| 713 | KMP_ERR(error), __kmp_msg_null); |
| 714 | } |
| 715 | return error; |
| 716 | } |
| 717 | retval = SetThreadAffinityMask(GetCurrentThread(), newMask); |
| 718 | if (!retval) { |
| 719 | DWORD error = GetLastError(); |
| 720 | if (abort_on_error) { |
| 721 | __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()" ), |
| 722 | KMP_ERR(error), __kmp_msg_null); |
| 723 | } |
| 724 | return error; |
| 725 | } |
| 726 | newMask = SetThreadAffinityMask(GetCurrentThread(), retval); |
| 727 | if (!newMask) { |
| 728 | DWORD error = GetLastError(); |
| 729 | if (abort_on_error) { |
| 730 | __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()" ), |
| 731 | KMP_ERR(error), __kmp_msg_null); |
| 732 | } |
| 733 | } |
| 734 | *mask = retval; |
| 735 | } |
| 736 | return 0; |
| 737 | } |
| 738 | int get_proc_group() const override { |
| 739 | int group = -1; |
| 740 | if (__kmp_num_proc_groups == 1) { |
| 741 | return 1; |
| 742 | } |
| 743 | for (int i = 0; i < __kmp_num_proc_groups; i++) { |
| 744 | if (mask[i] == 0) |
| 745 | continue; |
| 746 | if (group >= 0) |
| 747 | return -1; |
| 748 | group = i; |
| 749 | } |
| 750 | return group; |
| 751 | } |
| 752 | }; |
| 753 | void determine_capable(const char *env_var) override { |
| 754 | __kmp_affinity_determine_capable(env_var); |
| 755 | } |
| 756 | void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } |
| 757 | KMPAffinity::Mask *allocate_mask() override { return new Mask(); } |
| 758 | void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } |
| 759 | KMPAffinity::Mask *allocate_mask_array(int num) override { |
| 760 | return new Mask[num]; |
| 761 | } |
| 762 | void deallocate_mask_array(KMPAffinity::Mask *array) override { |
| 763 | Mask *windows_array = static_cast<Mask *>(array); |
| 764 | delete[] windows_array; |
| 765 | } |
| 766 | KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, |
| 767 | int index) override { |
| 768 | Mask *windows_array = static_cast<Mask *>(array); |
| 769 | return &(windows_array[index]); |
| 770 | } |
| 771 | api_type get_api_type() const override { return NATIVE_OS; } |
| 772 | }; |
| 773 | #endif /* KMP_OS_WINDOWS */ |
| 774 | #endif /* KMP_AFFINITY_SUPPORTED */ |
| 775 | |
| 776 | // Describe an attribute for a level in the machine topology |
| 777 | struct kmp_hw_attr_t { |
| 778 | int core_type : 8; |
| 779 | int core_eff : 8; |
| 780 | unsigned valid : 1; |
| 781 | unsigned reserved : 15; |
| 782 | |
| 783 | static const int UNKNOWN_CORE_EFF = -1; |
| 784 | |
| 785 | kmp_hw_attr_t() |
| 786 | : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF), |
| 787 | valid(0), reserved(0) {} |
| 788 | void set_core_type(kmp_hw_core_type_t type) { |
| 789 | valid = 1; |
| 790 | core_type = type; |
| 791 | } |
| 792 | void set_core_eff(int eff) { |
| 793 | valid = 1; |
| 794 | core_eff = eff; |
| 795 | } |
| 796 | kmp_hw_core_type_t get_core_type() const { |
| 797 | return (kmp_hw_core_type_t)core_type; |
| 798 | } |
| 799 | int get_core_eff() const { return core_eff; } |
| 800 | bool is_core_type_valid() const { |
| 801 | return core_type != KMP_HW_CORE_TYPE_UNKNOWN; |
| 802 | } |
| 803 | bool is_core_eff_valid() const { return core_eff != UNKNOWN_CORE_EFF; } |
| 804 | operator bool() const { return valid; } |
| 805 | void clear() { |
| 806 | core_type = KMP_HW_CORE_TYPE_UNKNOWN; |
| 807 | core_eff = UNKNOWN_CORE_EFF; |
| 808 | valid = 0; |
| 809 | } |
| 810 | bool contains(const kmp_hw_attr_t &other) const { |
| 811 | if (!valid && !other.valid) |
| 812 | return true; |
| 813 | if (valid && other.valid) { |
| 814 | if (other.is_core_type_valid()) { |
| 815 | if (!is_core_type_valid() || (get_core_type() != other.get_core_type())) |
| 816 | return false; |
| 817 | } |
| 818 | if (other.is_core_eff_valid()) { |
| 819 | if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff())) |
| 820 | return false; |
| 821 | } |
| 822 | return true; |
| 823 | } |
| 824 | return false; |
| 825 | } |
| 826 | #if KMP_AFFINITY_SUPPORTED |
| 827 | bool contains(const kmp_affinity_attrs_t &attr) const { |
| 828 | if (!valid && !attr.valid) |
| 829 | return true; |
| 830 | if (valid && attr.valid) { |
| 831 | if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN) |
| 832 | return (is_core_type_valid() && |
| 833 | (get_core_type() == (kmp_hw_core_type_t)attr.core_type)); |
| 834 | if (attr.core_eff != UNKNOWN_CORE_EFF) |
| 835 | return (is_core_eff_valid() && (get_core_eff() == attr.core_eff)); |
| 836 | return true; |
| 837 | } |
| 838 | return false; |
| 839 | } |
| 840 | #endif // KMP_AFFINITY_SUPPORTED |
| 841 | bool operator==(const kmp_hw_attr_t &rhs) const { |
| 842 | return (rhs.valid == valid && rhs.core_eff == core_eff && |
| 843 | rhs.core_type == core_type); |
| 844 | } |
| 845 | bool operator!=(const kmp_hw_attr_t &rhs) const { return !operator==(rhs); } |
| 846 | }; |
| 847 | |
| 848 | #if KMP_AFFINITY_SUPPORTED |
| 849 | KMP_BUILD_ASSERT(sizeof(kmp_hw_attr_t) == sizeof(kmp_affinity_attrs_t)); |
| 850 | #endif |
| 851 | |
| 852 | class kmp_hw_thread_t { |
| 853 | public: |
| 854 | static const int UNKNOWN_ID = -1; |
| 855 | static const int MULTIPLE_ID = -2; |
| 856 | static int compare_ids(const void *a, const void *b); |
| 857 | static int compare_compact(const void *a, const void *b); |
| 858 | int ids[KMP_HW_LAST]; |
| 859 | int sub_ids[KMP_HW_LAST]; |
| 860 | bool leader; |
| 861 | int os_id; |
| 862 | int original_idx; |
| 863 | kmp_hw_attr_t attrs; |
| 864 | |
| 865 | void print() const; |
| 866 | void clear() { |
| 867 | for (int i = 0; i < (int)KMP_HW_LAST; ++i) |
| 868 | ids[i] = UNKNOWN_ID; |
| 869 | leader = false; |
| 870 | attrs.clear(); |
| 871 | } |
| 872 | }; |
| 873 | |
| 874 | class kmp_topology_t { |
| 875 | |
| 876 | struct flags_t { |
| 877 | int uniform : 1; |
| 878 | int reserved : 31; |
| 879 | }; |
| 880 | |
| 881 | int depth; |
| 882 | |
| 883 | // The following arrays are all 'depth' long and have been |
| 884 | // allocated to hold up to KMP_HW_LAST number of objects if |
| 885 | // needed so layers can be added without reallocation of any array |
| 886 | |
| 887 | // Orderd array of the types in the topology |
| 888 | kmp_hw_t *types; |
| 889 | |
| 890 | // Keep quick topology ratios, for non-uniform topologies, |
| 891 | // this ratio holds the max number of itemAs per itemB |
| 892 | // e.g., [ 4 packages | 6 cores / package | 2 threads / core ] |
| 893 | int *ratio; |
| 894 | |
| 895 | // Storage containing the absolute number of each topology layer |
| 896 | int *count; |
| 897 | |
| 898 | // The number of core efficiencies. This is only useful for hybrid |
| 899 | // topologies. Core efficiencies will range from 0 to num efficiencies - 1 |
| 900 | int num_core_efficiencies; |
| 901 | int num_core_types; |
| 902 | kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES]; |
| 903 | |
| 904 | // The hardware threads array |
| 905 | // hw_threads is num_hw_threads long |
| 906 | // Each hw_thread's ids and sub_ids are depth deep |
| 907 | int num_hw_threads; |
| 908 | kmp_hw_thread_t *hw_threads; |
| 909 | |
| 910 | // Equivalence hash where the key is the hardware topology item |
| 911 | // and the value is the equivalent hardware topology type in the |
| 912 | // types[] array, if the value is KMP_HW_UNKNOWN, then there is no |
| 913 | // known equivalence for the topology type |
| 914 | kmp_hw_t equivalent[KMP_HW_LAST]; |
| 915 | |
| 916 | // Flags describing the topology |
| 917 | flags_t flags; |
| 918 | |
| 919 | // Compact value used during sort_compact() |
| 920 | int compact; |
| 921 | |
| 922 | #if KMP_GROUP_AFFINITY |
| 923 | // Insert topology information about Windows Processor groups |
| 924 | void _insert_windows_proc_groups(); |
| 925 | #endif |
| 926 | |
| 927 | // Count each item & get the num x's per y |
| 928 | // e.g., get the number of cores and the number of threads per core |
| 929 | // for each (x, y) in (KMP_HW_* , KMP_HW_*) |
| 930 | void _gather_enumeration_information(); |
| 931 | |
| 932 | // Remove layers that don't add information to the topology. |
| 933 | // This is done by having the layer take on the id = UNKNOWN_ID (-1) |
| 934 | void _remove_radix1_layers(); |
| 935 | |
| 936 | // Find out if the topology is uniform |
| 937 | void _discover_uniformity(); |
| 938 | |
| 939 | // Set all the sub_ids for each hardware thread |
| 940 | void _set_sub_ids(); |
| 941 | |
| 942 | // Set global affinity variables describing the number of threads per |
| 943 | // core, the number of packages, the number of cores per package, and |
| 944 | // the number of cores. |
| 945 | void _set_globals(); |
| 946 | |
| 947 | // Set the last level cache equivalent type |
| 948 | void _set_last_level_cache(); |
| 949 | |
| 950 | // Return the number of cores with a particular attribute, 'attr'. |
| 951 | // If 'find_all' is true, then find all cores on the machine, otherwise find |
| 952 | // all cores per the layer 'above' |
| 953 | int _get_ncores_with_attr(const kmp_hw_attr_t &attr, int above, |
| 954 | bool find_all = false) const; |
| 955 | |
| 956 | public: |
| 957 | // Force use of allocate()/deallocate() |
| 958 | kmp_topology_t() = delete; |
| 959 | kmp_topology_t(const kmp_topology_t &t) = delete; |
| 960 | kmp_topology_t(kmp_topology_t &&t) = delete; |
| 961 | kmp_topology_t &operator=(const kmp_topology_t &t) = delete; |
| 962 | kmp_topology_t &operator=(kmp_topology_t &&t) = delete; |
| 963 | |
| 964 | static kmp_topology_t *allocate(int nproc, int ndepth, const kmp_hw_t *types); |
| 965 | static void deallocate(kmp_topology_t *); |
| 966 | |
| 967 | // Functions used in create_map() routines |
| 968 | kmp_hw_thread_t &at(int index) { |
| 969 | KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); |
| 970 | return hw_threads[index]; |
| 971 | } |
| 972 | const kmp_hw_thread_t &at(int index) const { |
| 973 | KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads); |
| 974 | return hw_threads[index]; |
| 975 | } |
| 976 | int get_num_hw_threads() const { return num_hw_threads; } |
| 977 | void sort_ids() { |
| 978 | qsort(base: hw_threads, nmemb: num_hw_threads, size: sizeof(kmp_hw_thread_t), |
| 979 | compar: kmp_hw_thread_t::compare_ids); |
| 980 | } |
| 981 | |
| 982 | // Insert a new topology layer after allocation |
| 983 | void insert_layer(kmp_hw_t type, const int *ids); |
| 984 | |
| 985 | // Check if the hardware ids are unique, if they are |
| 986 | // return true, otherwise return false |
| 987 | bool check_ids() const; |
| 988 | |
| 989 | // Function to call after the create_map() routine |
| 990 | void canonicalize(); |
| 991 | void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores); |
| 992 | |
| 993 | // Functions used after canonicalize() called |
| 994 | |
| 995 | #if KMP_AFFINITY_SUPPORTED |
| 996 | // Set the granularity for affinity settings |
| 997 | void set_granularity(kmp_affinity_t &stgs) const; |
| 998 | bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const; |
| 999 | bool restrict_to_mask(const kmp_affin_mask_t *mask); |
| 1000 | bool filter_hw_subset(); |
| 1001 | #endif |
| 1002 | bool is_uniform() const { return flags.uniform; } |
| 1003 | // Tell whether a type is a valid type in the topology |
| 1004 | // returns KMP_HW_UNKNOWN when there is no equivalent type |
| 1005 | kmp_hw_t get_equivalent_type(kmp_hw_t type) const { |
| 1006 | if (type == KMP_HW_UNKNOWN) |
| 1007 | return KMP_HW_UNKNOWN; |
| 1008 | return equivalent[type]; |
| 1009 | } |
| 1010 | // Set type1 = type2 |
| 1011 | void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) { |
| 1012 | KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1); |
| 1013 | KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2); |
| 1014 | kmp_hw_t real_type2 = equivalent[type2]; |
| 1015 | if (real_type2 == KMP_HW_UNKNOWN) |
| 1016 | real_type2 = type2; |
| 1017 | equivalent[type1] = real_type2; |
| 1018 | // This loop is required since any of the types may have been set to |
| 1019 | // be equivalent to type1. They all must be checked and reset to type2. |
| 1020 | KMP_FOREACH_HW_TYPE(type) { |
| 1021 | if (equivalent[type] == type1) { |
| 1022 | equivalent[type] = real_type2; |
| 1023 | } |
| 1024 | } |
| 1025 | } |
| 1026 | // Calculate number of types corresponding to level1 |
| 1027 | // per types corresponding to level2 (e.g., number of threads per core) |
| 1028 | int calculate_ratio(int level1, int level2) const { |
| 1029 | KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth); |
| 1030 | KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth); |
| 1031 | int r = 1; |
| 1032 | for (int level = level1; level > level2; --level) |
| 1033 | r *= ratio[level]; |
| 1034 | return r; |
| 1035 | } |
| 1036 | int get_ratio(int level) const { |
| 1037 | KMP_DEBUG_ASSERT(level >= 0 && level < depth); |
| 1038 | return ratio[level]; |
| 1039 | } |
| 1040 | int get_depth() const { return depth; }; |
| 1041 | kmp_hw_t get_type(int level) const { |
| 1042 | KMP_DEBUG_ASSERT(level >= 0 && level < depth); |
| 1043 | return types[level]; |
| 1044 | } |
| 1045 | int get_level(kmp_hw_t type) const { |
| 1046 | KMP_DEBUG_ASSERT_VALID_HW_TYPE(type); |
| 1047 | int eq_type = equivalent[type]; |
| 1048 | if (eq_type == KMP_HW_UNKNOWN) |
| 1049 | return -1; |
| 1050 | for (int i = 0; i < depth; ++i) |
| 1051 | if (types[i] == eq_type) |
| 1052 | return i; |
| 1053 | return -1; |
| 1054 | } |
| 1055 | int get_count(int level) const { |
| 1056 | KMP_DEBUG_ASSERT(level >= 0 && level < depth); |
| 1057 | return count[level]; |
| 1058 | } |
| 1059 | // Return the total number of cores with attribute 'attr' |
| 1060 | int get_ncores_with_attr(const kmp_hw_attr_t &attr) const { |
| 1061 | return _get_ncores_with_attr(attr, above: -1, find_all: true); |
| 1062 | } |
| 1063 | // Return the number of cores with attribute |
| 1064 | // 'attr' per topology level 'above' |
| 1065 | int get_ncores_with_attr_per(const kmp_hw_attr_t &attr, int above) const { |
| 1066 | return _get_ncores_with_attr(attr, above, find_all: false); |
| 1067 | } |
| 1068 | |
| 1069 | #if KMP_AFFINITY_SUPPORTED |
| 1070 | friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b); |
| 1071 | void sort_compact(kmp_affinity_t &affinity) { |
| 1072 | compact = affinity.compact; |
| 1073 | qsort(base: hw_threads, nmemb: num_hw_threads, size: sizeof(kmp_hw_thread_t), |
| 1074 | compar: kmp_hw_thread_t::compare_compact); |
| 1075 | } |
| 1076 | #endif |
| 1077 | void print(const char *env_var = "KMP_AFFINITY" ) const; |
| 1078 | void dump() const; |
| 1079 | }; |
| 1080 | extern kmp_topology_t *__kmp_topology; |
| 1081 | |
| 1082 | class kmp_hw_subset_t { |
| 1083 | const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS; |
| 1084 | |
| 1085 | public: |
| 1086 | // Describe a machine topology item in KMP_HW_SUBSET |
| 1087 | struct item_t { |
| 1088 | kmp_hw_t type; |
| 1089 | int num_attrs; |
| 1090 | int num[MAX_ATTRS]; |
| 1091 | int offset[MAX_ATTRS]; |
| 1092 | kmp_hw_attr_t attr[MAX_ATTRS]; |
| 1093 | }; |
| 1094 | // Put parenthesis around max to avoid accidental use of Windows max macro. |
| 1095 | const static int USE_ALL = (std::numeric_limits<int>::max)(); |
| 1096 | |
| 1097 | private: |
| 1098 | int depth; |
| 1099 | int capacity; |
| 1100 | item_t *items; |
| 1101 | kmp_uint64 set; |
| 1102 | bool absolute; |
| 1103 | // The set must be able to handle up to KMP_HW_LAST number of layers |
| 1104 | KMP_BUILD_ASSERT(sizeof(set) * 8 >= KMP_HW_LAST); |
| 1105 | // Sorting the KMP_HW_SUBSET items to follow topology order |
| 1106 | // All unknown topology types will be at the beginning of the subset |
| 1107 | static int hw_subset_compare(const void *i1, const void *i2) { |
| 1108 | kmp_hw_t type1 = ((const item_t *)i1)->type; |
| 1109 | kmp_hw_t type2 = ((const item_t *)i2)->type; |
| 1110 | int level1 = __kmp_topology->get_level(type: type1); |
| 1111 | int level2 = __kmp_topology->get_level(type: type2); |
| 1112 | return level1 - level2; |
| 1113 | } |
| 1114 | |
| 1115 | public: |
| 1116 | // Force use of allocate()/deallocate() |
| 1117 | kmp_hw_subset_t() = delete; |
| 1118 | kmp_hw_subset_t(const kmp_hw_subset_t &t) = delete; |
| 1119 | kmp_hw_subset_t(kmp_hw_subset_t &&t) = delete; |
| 1120 | kmp_hw_subset_t &operator=(const kmp_hw_subset_t &t) = delete; |
| 1121 | kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) = delete; |
| 1122 | |
| 1123 | static kmp_hw_subset_t *allocate() { |
| 1124 | int initial_capacity = 5; |
| 1125 | kmp_hw_subset_t *retval = |
| 1126 | (kmp_hw_subset_t *)__kmp_allocate(sizeof(kmp_hw_subset_t)); |
| 1127 | retval->depth = 0; |
| 1128 | retval->capacity = initial_capacity; |
| 1129 | retval->set = 0ull; |
| 1130 | retval->absolute = false; |
| 1131 | retval->items = (item_t *)__kmp_allocate(sizeof(item_t) * initial_capacity); |
| 1132 | return retval; |
| 1133 | } |
| 1134 | static void deallocate(kmp_hw_subset_t *subset) { |
| 1135 | __kmp_free(subset->items); |
| 1136 | __kmp_free(subset); |
| 1137 | } |
| 1138 | void set_absolute() { absolute = true; } |
| 1139 | bool is_absolute() const { return absolute; } |
| 1140 | void push_back(int num, kmp_hw_t type, int offset, kmp_hw_attr_t attr) { |
| 1141 | for (int i = 0; i < depth; ++i) { |
| 1142 | // Found an existing item for this layer type |
| 1143 | // Add the num, offset, and attr to this item |
| 1144 | if (items[i].type == type) { |
| 1145 | int idx = items[i].num_attrs++; |
| 1146 | if ((size_t)idx >= MAX_ATTRS) |
| 1147 | return; |
| 1148 | items[i].num[idx] = num; |
| 1149 | items[i].offset[idx] = offset; |
| 1150 | items[i].attr[idx] = attr; |
| 1151 | return; |
| 1152 | } |
| 1153 | } |
| 1154 | if (depth == capacity - 1) { |
| 1155 | capacity *= 2; |
| 1156 | item_t *new_items = (item_t *)__kmp_allocate(sizeof(item_t) * capacity); |
| 1157 | for (int i = 0; i < depth; ++i) |
| 1158 | new_items[i] = items[i]; |
| 1159 | __kmp_free(items); |
| 1160 | items = new_items; |
| 1161 | } |
| 1162 | items[depth].num_attrs = 1; |
| 1163 | items[depth].type = type; |
| 1164 | items[depth].num[0] = num; |
| 1165 | items[depth].offset[0] = offset; |
| 1166 | items[depth].attr[0] = attr; |
| 1167 | depth++; |
| 1168 | set |= (1ull << type); |
| 1169 | } |
| 1170 | int get_depth() const { return depth; } |
| 1171 | const item_t &at(int index) const { |
| 1172 | KMP_DEBUG_ASSERT(index >= 0 && index < depth); |
| 1173 | return items[index]; |
| 1174 | } |
| 1175 | item_t &at(int index) { |
| 1176 | KMP_DEBUG_ASSERT(index >= 0 && index < depth); |
| 1177 | return items[index]; |
| 1178 | } |
| 1179 | void remove(int index) { |
| 1180 | KMP_DEBUG_ASSERT(index >= 0 && index < depth); |
| 1181 | set &= ~(1ull << items[index].type); |
| 1182 | for (int j = index + 1; j < depth; ++j) { |
| 1183 | items[j - 1] = items[j]; |
| 1184 | } |
| 1185 | depth--; |
| 1186 | } |
| 1187 | void sort() { |
| 1188 | KMP_DEBUG_ASSERT(__kmp_topology); |
| 1189 | qsort(base: items, nmemb: depth, size: sizeof(item_t), compar: hw_subset_compare); |
| 1190 | } |
| 1191 | bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } |
| 1192 | |
| 1193 | // Canonicalize the KMP_HW_SUBSET value if it is not an absolute subset. |
| 1194 | // This means putting each of {sockets, cores, threads} in the topology if |
| 1195 | // they are not specified: |
| 1196 | // e.g., 1s,2c => 1s,2c,*t | 2c,1t => *s,2c,1t | 1t => *s,*c,1t | etc. |
| 1197 | // e.g., 3module => *s,3module,*c,*t |
| 1198 | // By doing this, the runtime assumes users who fiddle with KMP_HW_SUBSET |
| 1199 | // are expecting the traditional sockets/cores/threads topology. For newer |
| 1200 | // hardware, there can be intervening layers like dies/tiles/modules |
| 1201 | // (usually corresponding to a cache level). So when a user asks for |
| 1202 | // 1s,6c,2t and the topology is really 1s,2modules,4cores,2threads, the user |
| 1203 | // should get 12 hardware threads across 6 cores and effectively ignore the |
| 1204 | // module layer. |
| 1205 | void canonicalize(const kmp_topology_t *top) { |
| 1206 | // Layers to target for KMP_HW_SUBSET canonicalization |
| 1207 | kmp_hw_t targeted[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD}; |
| 1208 | |
| 1209 | // Do not target-layer-canonicalize absolute KMP_HW_SUBSETS |
| 1210 | if (is_absolute()) |
| 1211 | return; |
| 1212 | |
| 1213 | // Do not target-layer-canonicalize KMP_HW_SUBSETS when the |
| 1214 | // topology doesn't have these layers |
| 1215 | for (kmp_hw_t type : targeted) |
| 1216 | if (top->get_level(type) == KMP_HW_UNKNOWN) |
| 1217 | return; |
| 1218 | |
| 1219 | // Put targeted layers in topology if they do not exist |
| 1220 | for (kmp_hw_t type : targeted) { |
| 1221 | bool found = false; |
| 1222 | for (int i = 0; i < get_depth(); ++i) { |
| 1223 | if (top->get_equivalent_type(type: items[i].type) == type) { |
| 1224 | found = true; |
| 1225 | break; |
| 1226 | } |
| 1227 | } |
| 1228 | if (!found) { |
| 1229 | push_back(num: USE_ALL, type, offset: 0, attr: kmp_hw_attr_t{}); |
| 1230 | } |
| 1231 | } |
| 1232 | sort(); |
| 1233 | // Set as an absolute topology that only targets the targeted layers |
| 1234 | set_absolute(); |
| 1235 | } |
| 1236 | void dump() const { |
| 1237 | printf(format: "**********************\n" ); |
| 1238 | printf(format: "*** kmp_hw_subset: ***\n" ); |
| 1239 | printf(format: "* depth: %d\n" , depth); |
| 1240 | printf(format: "* items:\n" ); |
| 1241 | for (int i = 0; i < depth; ++i) { |
| 1242 | printf(format: " type: %s\n" , __kmp_hw_get_keyword(type: items[i].type)); |
| 1243 | for (int j = 0; j < items[i].num_attrs; ++j) { |
| 1244 | printf(format: " num: %d, offset: %d, attr: " , items[i].num[j], |
| 1245 | items[i].offset[j]); |
| 1246 | if (!items[i].attr[j]) { |
| 1247 | printf(format: " (none)\n" ); |
| 1248 | } else { |
| 1249 | printf( |
| 1250 | format: " core_type = %s, core_eff = %d\n" , |
| 1251 | __kmp_hw_get_core_type_string(type: items[i].attr[j].get_core_type()), |
| 1252 | items[i].attr[j].get_core_eff()); |
| 1253 | } |
| 1254 | } |
| 1255 | } |
| 1256 | printf(format: "* set: 0x%llx\n" , set); |
| 1257 | printf(format: "* absolute: %d\n" , absolute); |
| 1258 | printf(format: "**********************\n" ); |
| 1259 | } |
| 1260 | }; |
| 1261 | extern kmp_hw_subset_t *__kmp_hw_subset; |
| 1262 | |
| 1263 | /* A structure for holding machine-specific hierarchy info to be computed once |
| 1264 | at init. This structure represents a mapping of threads to the actual machine |
| 1265 | hierarchy, or to our best guess at what the hierarchy might be, for the |
| 1266 | purpose of performing an efficient barrier. In the worst case, when there is |
| 1267 | no machine hierarchy information, it produces a tree suitable for a barrier, |
| 1268 | similar to the tree used in the hyper barrier. */ |
| 1269 | class hierarchy_info { |
| 1270 | public: |
| 1271 | /* Good default values for number of leaves and branching factor, given no |
| 1272 | affinity information. Behaves a bit like hyper barrier. */ |
| 1273 | static const kmp_uint32 maxLeaves = 4; |
| 1274 | static const kmp_uint32 minBranch = 4; |
| 1275 | /** Number of levels in the hierarchy. Typical levels are threads/core, |
| 1276 | cores/package or socket, packages/node, nodes/machine, etc. We don't want |
| 1277 | to get specific with nomenclature. When the machine is oversubscribed we |
| 1278 | add levels to duplicate the hierarchy, doubling the thread capacity of the |
| 1279 | hierarchy each time we add a level. */ |
| 1280 | kmp_uint32 maxLevels; |
| 1281 | |
| 1282 | /** This is specifically the depth of the machine configuration hierarchy, in |
| 1283 | terms of the number of levels along the longest path from root to any |
| 1284 | leaf. It corresponds to the number of entries in numPerLevel if we exclude |
| 1285 | all but one trailing 1. */ |
| 1286 | kmp_uint32 depth; |
| 1287 | kmp_uint32 base_num_threads = 0; |
| 1288 | enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; |
| 1289 | volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, |
| 1290 | // 2=initialization in progress |
| 1291 | volatile kmp_int8 resizing; // 0=not resizing, 1=resizing |
| 1292 | |
| 1293 | /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children |
| 1294 | the parent of a node at level i has. For example, if we have a machine |
| 1295 | with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = |
| 1296 | {2, 4, 4, 1, 1}. All empty levels are set to 1. */ |
| 1297 | kmp_uint32 *numPerLevel = nullptr; |
| 1298 | kmp_uint32 *skipPerLevel = nullptr; |
| 1299 | |
| 1300 | void deriveLevels() { |
| 1301 | int hier_depth = __kmp_topology->get_depth(); |
| 1302 | for (int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) { |
| 1303 | numPerLevel[level] = __kmp_topology->get_ratio(level: i); |
| 1304 | } |
| 1305 | } |
| 1306 | |
| 1307 | hierarchy_info() |
| 1308 | : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} |
| 1309 | |
| 1310 | void fini() { |
| 1311 | if (!uninitialized && numPerLevel) { |
| 1312 | __kmp_free(numPerLevel); |
| 1313 | numPerLevel = NULL; |
| 1314 | uninitialized = not_initialized; |
| 1315 | } |
| 1316 | } |
| 1317 | |
| 1318 | void init(int num_addrs) { |
| 1319 | kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( |
| 1320 | &uninitialized, not_initialized, initializing); |
| 1321 | if (bool_result == 0) { // Wait for initialization |
| 1322 | while (TCR_1(uninitialized) != initialized) |
| 1323 | KMP_CPU_PAUSE(); |
| 1324 | return; |
| 1325 | } |
| 1326 | KMP_DEBUG_ASSERT(bool_result == 1); |
| 1327 | |
| 1328 | /* Added explicit initialization of the data fields here to prevent usage of |
| 1329 | dirty value observed when static library is re-initialized multiple times |
| 1330 | (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses |
| 1331 | OpenMP). */ |
| 1332 | depth = 1; |
| 1333 | resizing = 0; |
| 1334 | maxLevels = 7; |
| 1335 | numPerLevel = |
| 1336 | (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); |
| 1337 | skipPerLevel = &(numPerLevel[maxLevels]); |
| 1338 | for (kmp_uint32 i = 0; i < maxLevels; |
| 1339 | ++i) { // init numPerLevel[*] to 1 item per level |
| 1340 | numPerLevel[i] = 1; |
| 1341 | skipPerLevel[i] = 1; |
| 1342 | } |
| 1343 | |
| 1344 | // Sort table by physical ID |
| 1345 | if (__kmp_topology && __kmp_topology->get_depth() > 0) { |
| 1346 | deriveLevels(); |
| 1347 | } else { |
| 1348 | numPerLevel[0] = maxLeaves; |
| 1349 | numPerLevel[1] = num_addrs / maxLeaves; |
| 1350 | if (num_addrs % maxLeaves) |
| 1351 | numPerLevel[1]++; |
| 1352 | } |
| 1353 | |
| 1354 | base_num_threads = num_addrs; |
| 1355 | for (int i = maxLevels - 1; i >= 0; |
| 1356 | --i) // count non-empty levels to get depth |
| 1357 | if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' |
| 1358 | depth++; |
| 1359 | |
| 1360 | kmp_uint32 branch = minBranch; |
| 1361 | if (numPerLevel[0] == 1) |
| 1362 | branch = num_addrs / maxLeaves; |
| 1363 | if (branch < minBranch) |
| 1364 | branch = minBranch; |
| 1365 | for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width |
| 1366 | while (numPerLevel[d] > branch || |
| 1367 | (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! |
| 1368 | if (numPerLevel[d] & 1) |
| 1369 | numPerLevel[d]++; |
| 1370 | numPerLevel[d] = numPerLevel[d] >> 1; |
| 1371 | if (numPerLevel[d + 1] == 1) |
| 1372 | depth++; |
| 1373 | numPerLevel[d + 1] = numPerLevel[d + 1] << 1; |
| 1374 | } |
| 1375 | if (numPerLevel[0] == 1) { |
| 1376 | branch = branch >> 1; |
| 1377 | if (branch < 4) |
| 1378 | branch = minBranch; |
| 1379 | } |
| 1380 | } |
| 1381 | |
| 1382 | for (kmp_uint32 i = 1; i < depth; ++i) |
| 1383 | skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1]; |
| 1384 | // Fill in hierarchy in the case of oversubscription |
| 1385 | for (kmp_uint32 i = depth; i < maxLevels; ++i) |
| 1386 | skipPerLevel[i] = 2 * skipPerLevel[i - 1]; |
| 1387 | |
| 1388 | uninitialized = initialized; // One writer |
| 1389 | } |
| 1390 | |
| 1391 | // Resize the hierarchy if nproc changes to something larger than before |
| 1392 | void resize(kmp_uint32 nproc) { |
| 1393 | kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); |
| 1394 | while (bool_result == 0) { // someone else is trying to resize |
| 1395 | KMP_CPU_PAUSE(); |
| 1396 | if (nproc <= base_num_threads) // happy with other thread's resize |
| 1397 | return; |
| 1398 | else // try to resize |
| 1399 | bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); |
| 1400 | } |
| 1401 | KMP_DEBUG_ASSERT(bool_result != 0); |
| 1402 | if (nproc <= base_num_threads) |
| 1403 | return; // happy with other thread's resize |
| 1404 | |
| 1405 | // Calculate new maxLevels |
| 1406 | kmp_uint32 old_sz = skipPerLevel[depth - 1]; |
| 1407 | kmp_uint32 incs = 0, old_maxLevels = maxLevels; |
| 1408 | // First see if old maxLevels is enough to contain new size |
| 1409 | for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) { |
| 1410 | skipPerLevel[i] = 2 * skipPerLevel[i - 1]; |
| 1411 | numPerLevel[i - 1] *= 2; |
| 1412 | old_sz *= 2; |
| 1413 | depth++; |
| 1414 | } |
| 1415 | if (nproc > old_sz) { // Not enough space, need to expand hierarchy |
| 1416 | while (nproc > old_sz) { |
| 1417 | old_sz *= 2; |
| 1418 | incs++; |
| 1419 | depth++; |
| 1420 | } |
| 1421 | maxLevels += incs; |
| 1422 | |
| 1423 | // Resize arrays |
| 1424 | kmp_uint32 *old_numPerLevel = numPerLevel; |
| 1425 | kmp_uint32 *old_skipPerLevel = skipPerLevel; |
| 1426 | numPerLevel = skipPerLevel = NULL; |
| 1427 | numPerLevel = |
| 1428 | (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); |
| 1429 | skipPerLevel = &(numPerLevel[maxLevels]); |
| 1430 | |
| 1431 | // Copy old elements from old arrays |
| 1432 | for (kmp_uint32 i = 0; i < old_maxLevels; ++i) { |
| 1433 | // init numPerLevel[*] to 1 item per level |
| 1434 | numPerLevel[i] = old_numPerLevel[i]; |
| 1435 | skipPerLevel[i] = old_skipPerLevel[i]; |
| 1436 | } |
| 1437 | |
| 1438 | // Init new elements in arrays to 1 |
| 1439 | for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) { |
| 1440 | // init numPerLevel[*] to 1 item per level |
| 1441 | numPerLevel[i] = 1; |
| 1442 | skipPerLevel[i] = 1; |
| 1443 | } |
| 1444 | |
| 1445 | // Free old arrays |
| 1446 | __kmp_free(old_numPerLevel); |
| 1447 | } |
| 1448 | |
| 1449 | // Fill in oversubscription levels of hierarchy |
| 1450 | for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) |
| 1451 | skipPerLevel[i] = 2 * skipPerLevel[i - 1]; |
| 1452 | |
| 1453 | base_num_threads = nproc; |
| 1454 | resizing = 0; // One writer |
| 1455 | } |
| 1456 | }; |
| 1457 | #endif // KMP_AFFINITY_H |
| 1458 | |