| 1 | // Copyright (c) 2005, 2007, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Sanjay Ghemawat <opensource@google.com> |
| 33 | // |
| 34 | // A malloc that uses a per-thread cache to satisfy small malloc requests. |
| 35 | // (The time for malloc/free of a small object drops from 300 ns to 50 ns.) |
| 36 | // |
| 37 | // See doc/tcmalloc.html for a high-level |
| 38 | // description of how this malloc works. |
| 39 | // |
| 40 | // SYNCHRONIZATION |
| 41 | // 1. The thread-specific lists are accessed without acquiring any locks. |
| 42 | // This is safe because each such list is only accessed by one thread. |
| 43 | // 2. We have a lock per central free-list, and hold it while manipulating |
| 44 | // the central free list for a particular size. |
| 45 | // 3. The central page allocator is protected by "pageheap_lock". |
| 46 | // 4. The pagemap (which maps from page-number to descriptor), |
| 47 | // can be read without holding any locks, and written while holding |
| 48 | // the "pageheap_lock". |
| 49 | // 5. To improve performance, a subset of the information one can get |
| 50 | // from the pagemap is cached in a data structure, pagemap_cache_, |
| 51 | // that atomically reads and writes its entries. This cache can be |
| 52 | // read and written without locking. |
| 53 | // |
| 54 | // This multi-threaded access to the pagemap is safe for fairly |
| 55 | // subtle reasons. We basically assume that when an object X is |
| 56 | // allocated by thread A and deallocated by thread B, there must |
| 57 | // have been appropriate synchronization in the handoff of object |
| 58 | // X from thread A to thread B. The same logic applies to pagemap_cache_. |
| 59 | // |
| 60 | // THE PAGEID-TO-SIZECLASS CACHE |
| 61 | // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache |
| 62 | // returns 0 for a particular PageID then that means "no information," not that |
| 63 | // the sizeclass is 0. The cache may have stale information for pages that do |
| 64 | // not hold the beginning of any free()'able object. Staleness is eliminated |
| 65 | // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and |
| 66 | // do_memalign() for all other relevant pages. |
| 67 | // |
| 68 | // TODO: Bias reclamation to larger addresses |
| 69 | // TODO: implement mallinfo/mallopt |
| 70 | // TODO: Better testing |
| 71 | // |
| 72 | // 9/28/2003 (new page-level allocator replaces ptmalloc2): |
| 73 | // * malloc/free of small objects goes from ~300 ns to ~50 ns. |
| 74 | // * allocation of a reasonably complicated struct |
| 75 | // goes from about 1100 ns to about 300 ns. |
| 76 | |
| 77 | #include "config.h" |
| 78 | #include "FastMalloc.h" |
| 79 | |
| 80 | #include "Assertions.h" |
| 81 | #include <limits> |
| 82 | #if ENABLE(JSC_MULTIPLE_THREADS) |
| 83 | #include <pthread.h> |
| 84 | #endif |
| 85 | |
| 86 | #ifndef NO_TCMALLOC_SAMPLES |
| 87 | #ifdef WTF_CHANGES |
| 88 | #define NO_TCMALLOC_SAMPLES |
| 89 | #endif |
| 90 | #endif |
| 91 | |
| 92 | #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG) |
| 93 | #define FORCE_SYSTEM_MALLOC 0 |
| 94 | #else |
| 95 | #define FORCE_SYSTEM_MALLOC 1 |
| 96 | #endif |
| 97 | |
| 98 | // Use a background thread to periodically scavenge memory to release back to the system |
| 99 | // https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash. |
| 100 | #if defined(BUILDING_ON_TIGER) |
| 101 | #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0 |
| 102 | #else |
| 103 | #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1 |
| 104 | #endif |
| 105 | |
| 106 | #if defined(__HP_aCC) |
| 107 | // HP'a aCC compiler has broken for scoping |
| 108 | # define for if(0){}else for |
| 109 | #endif |
| 110 | |
| 111 | #ifndef NDEBUG |
| 112 | namespace WTF { |
| 113 | |
| 114 | #if ENABLE(JSC_MULTIPLE_THREADS) |
| 115 | static pthread_key_t isForbiddenKey; |
| 116 | static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT; |
| 117 | static void initializeIsForbiddenKey() |
| 118 | { |
| 119 | pthread_key_create(&isForbiddenKey, 0); |
| 120 | } |
| 121 | |
| 122 | #if !ASSERT_DISABLED |
| 123 | static bool isForbidden() |
| 124 | { |
| 125 | pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
| 126 | return !!pthread_getspecific(isForbiddenKey); |
| 127 | } |
| 128 | #endif |
| 129 | |
| 130 | void fastMallocForbid() |
| 131 | { |
| 132 | pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
| 133 | pthread_setspecific(isForbiddenKey, &isForbiddenKey); |
| 134 | } |
| 135 | |
| 136 | void fastMallocAllow() |
| 137 | { |
| 138 | pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey); |
| 139 | pthread_setspecific(isForbiddenKey, 0); |
| 140 | } |
| 141 | |
| 142 | #else |
| 143 | |
| 144 | static bool staticIsForbidden; |
| 145 | static bool isForbidden() |
| 146 | { |
| 147 | return staticIsForbidden; |
| 148 | } |
| 149 | |
| 150 | void fastMallocForbid() |
| 151 | { |
| 152 | staticIsForbidden = true; |
| 153 | } |
| 154 | |
| 155 | void fastMallocAllow() |
| 156 | { |
| 157 | staticIsForbidden = false; |
| 158 | } |
| 159 | #endif // ENABLE(JSC_MULTIPLE_THREADS) |
| 160 | |
| 161 | } // namespace WTF |
| 162 | #endif // NDEBUG |
| 163 | |
| 164 | #include <string.h> |
| 165 | |
| 166 | namespace WTF { |
| 167 | |
| 168 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 169 | |
| 170 | namespace Internal { |
| 171 | |
| 172 | void fastMallocMatchFailed(void*) |
| 173 | { |
| 174 | CRASH(); |
| 175 | } |
| 176 | |
| 177 | } // namespace Internal |
| 178 | |
| 179 | #endif |
| 180 | |
| 181 | void* fastZeroedMalloc(size_t n) |
| 182 | { |
| 183 | void* result = fastMalloc(n); |
| 184 | memset(s: result, c: 0, n: n); |
| 185 | return result; |
| 186 | } |
| 187 | |
| 188 | char* fastStrDup(const char* src) |
| 189 | { |
| 190 | int len = strlen(s: src) + 1; |
| 191 | char* dup = static_cast<char*>(fastMalloc(len)); |
| 192 | |
| 193 | if (dup) |
| 194 | memcpy(dest: dup, src: src, n: len); |
| 195 | |
| 196 | return dup; |
| 197 | } |
| 198 | |
| 199 | TryMallocReturnValue tryFastZeroedMalloc(size_t n) |
| 200 | { |
| 201 | void* result; |
| 202 | if (!tryFastMalloc(n).getValue(data&: result)) |
| 203 | return 0; |
| 204 | memset(s: result, c: 0, n: n); |
| 205 | return result; |
| 206 | } |
| 207 | |
| 208 | } // namespace WTF |
| 209 | |
| 210 | #if FORCE_SYSTEM_MALLOC |
| 211 | |
| 212 | namespace WTF { |
| 213 | |
| 214 | TryMallocReturnValue tryFastMalloc(size_t n) |
| 215 | { |
| 216 | ASSERT(!isForbidden()); |
| 217 | |
| 218 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 219 | if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... |
| 220 | return 0; |
| 221 | |
| 222 | void* result = malloc(n + sizeof(AllocAlignmentInteger)); |
| 223 | if (!result) |
| 224 | return 0; |
| 225 | |
| 226 | *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
| 227 | result = static_cast<AllocAlignmentInteger*>(result) + 1; |
| 228 | |
| 229 | return result; |
| 230 | #else |
| 231 | return malloc(size: n); |
| 232 | #endif |
| 233 | } |
| 234 | |
| 235 | void* fastMalloc(size_t n) |
| 236 | { |
| 237 | ASSERT(!isForbidden()); |
| 238 | |
| 239 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 240 | TryMallocReturnValue returnValue = tryFastMalloc(n); |
| 241 | void* result; |
| 242 | returnValue.getValue(result); |
| 243 | #else |
| 244 | void* result = malloc(size: n); |
| 245 | #endif |
| 246 | |
| 247 | if (!result) |
| 248 | CRASH(); |
| 249 | return result; |
| 250 | } |
| 251 | |
| 252 | TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size) |
| 253 | { |
| 254 | ASSERT(!isForbidden()); |
| 255 | |
| 256 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 257 | size_t totalBytes = n_elements * element_size; |
| 258 | if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes)) |
| 259 | return 0; |
| 260 | |
| 261 | totalBytes += sizeof(AllocAlignmentInteger); |
| 262 | void* result = malloc(totalBytes); |
| 263 | if (!result) |
| 264 | return 0; |
| 265 | |
| 266 | memset(result, 0, totalBytes); |
| 267 | *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
| 268 | result = static_cast<AllocAlignmentInteger*>(result) + 1; |
| 269 | return result; |
| 270 | #else |
| 271 | return calloc(nmemb: n_elements, size: element_size); |
| 272 | #endif |
| 273 | } |
| 274 | |
| 275 | void* fastCalloc(size_t n_elements, size_t element_size) |
| 276 | { |
| 277 | ASSERT(!isForbidden()); |
| 278 | |
| 279 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 280 | TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size); |
| 281 | void* result; |
| 282 | returnValue.getValue(result); |
| 283 | #else |
| 284 | void* result = calloc(nmemb: n_elements, size: element_size); |
| 285 | #endif |
| 286 | |
| 287 | if (!result) |
| 288 | CRASH(); |
| 289 | return result; |
| 290 | } |
| 291 | |
| 292 | void fastFree(void* p) |
| 293 | { |
| 294 | ASSERT(!isForbidden()); |
| 295 | |
| 296 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 297 | if (!p) |
| 298 | return; |
| 299 | |
| 300 | AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); |
| 301 | if (*header != Internal::AllocTypeMalloc) |
| 302 | Internal::fastMallocMatchFailed(p); |
| 303 | free(header); |
| 304 | #else |
| 305 | free(ptr: p); |
| 306 | #endif |
| 307 | } |
| 308 | |
| 309 | TryMallocReturnValue tryFastRealloc(void* p, size_t n) |
| 310 | { |
| 311 | ASSERT(!isForbidden()); |
| 312 | |
| 313 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 314 | if (p) { |
| 315 | if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur... |
| 316 | return 0; |
| 317 | AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p); |
| 318 | if (*header != Internal::AllocTypeMalloc) |
| 319 | Internal::fastMallocMatchFailed(p); |
| 320 | void* result = realloc(header, n + sizeof(AllocAlignmentInteger)); |
| 321 | if (!result) |
| 322 | return 0; |
| 323 | |
| 324 | // This should not be needed because the value is already there: |
| 325 | // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
| 326 | result = static_cast<AllocAlignmentInteger*>(result) + 1; |
| 327 | return result; |
| 328 | } else { |
| 329 | return fastMalloc(n); |
| 330 | } |
| 331 | #else |
| 332 | return realloc(ptr: p, size: n); |
| 333 | #endif |
| 334 | } |
| 335 | |
| 336 | void* fastRealloc(void* p, size_t n) |
| 337 | { |
| 338 | ASSERT(!isForbidden()); |
| 339 | |
| 340 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 341 | TryMallocReturnValue returnValue = tryFastRealloc(p, n); |
| 342 | void* result; |
| 343 | returnValue.getValue(result); |
| 344 | #else |
| 345 | void* result = realloc(ptr: p, size: n); |
| 346 | #endif |
| 347 | |
| 348 | if (!result) |
| 349 | CRASH(); |
| 350 | return result; |
| 351 | } |
| 352 | |
| 353 | void releaseFastMallocFreeMemory() { } |
| 354 | |
| 355 | FastMallocStatistics fastMallocStatistics() |
| 356 | { |
| 357 | FastMallocStatistics statistics = { .heapSize: 0, .freeSizeInHeap: 0, .freeSizeInCaches: 0, .returnedSize: 0 }; |
| 358 | return statistics; |
| 359 | } |
| 360 | |
| 361 | } // namespace WTF |
| 362 | |
| 363 | #if OS(DARWIN) |
| 364 | // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled. |
| 365 | // It will never be used in this case, so it's type and value are less interesting than its presence. |
| 366 | extern "C" const int jscore_fastmalloc_introspection = 0; |
| 367 | #endif |
| 368 | |
| 369 | #else // FORCE_SYSTEM_MALLOC |
| 370 | |
| 371 | #if HAVE(STDINT_H) |
| 372 | #include <stdint.h> |
| 373 | #elif HAVE(INTTYPES_H) |
| 374 | #include <inttypes.h> |
| 375 | #else |
| 376 | #include <sys/types.h> |
| 377 | #endif |
| 378 | |
| 379 | #include "AlwaysInline.h" |
| 380 | #include "Assertions.h" |
| 381 | #include "TCPackedCache.h" |
| 382 | #include "TCPageMap.h" |
| 383 | #include "TCSpinLock.h" |
| 384 | #include "TCSystemAlloc.h" |
| 385 | #include <algorithm> |
| 386 | #include <errno.h> |
| 387 | #include <limits> |
| 388 | #include <new> |
| 389 | #include <pthread.h> |
| 390 | #include <stdarg.h> |
| 391 | #include <stddef.h> |
| 392 | #include <stdio.h> |
| 393 | #if OS(UNIX) |
| 394 | #include <unistd.h> |
| 395 | #endif |
| 396 | #if COMPILER(MSVC) |
| 397 | #ifndef WIN32_LEAN_AND_MEAN |
| 398 | #define WIN32_LEAN_AND_MEAN |
| 399 | #endif |
| 400 | #include <windows.h> |
| 401 | #endif |
| 402 | |
| 403 | #if WTF_CHANGES |
| 404 | |
| 405 | #if OS(DARWIN) |
| 406 | #include "MallocZoneSupport.h" |
| 407 | #include <wtf/HashSet.h> |
| 408 | #include <wtf/Vector.h> |
| 409 | #endif |
| 410 | #if HAVE(DISPATCH_H) |
| 411 | #include <dispatch/dispatch.h> |
| 412 | #endif |
| 413 | |
| 414 | |
| 415 | #ifndef PRIuS |
| 416 | #define PRIuS "zu" |
| 417 | #endif |
| 418 | |
| 419 | // Calling pthread_getspecific through a global function pointer is faster than a normal |
| 420 | // call to the function on Mac OS X, and it's used in performance-critical code. So we |
| 421 | // use a function pointer. But that's not necessarily faster on other platforms, and we had |
| 422 | // problems with this technique on Windows, so we'll do this only on Mac OS X. |
| 423 | #if OS(DARWIN) |
| 424 | static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific; |
| 425 | #define pthread_getspecific(key) pthread_getspecific_function_pointer(key) |
| 426 | #endif |
| 427 | |
| 428 | #define DEFINE_VARIABLE(type, name, value, meaning) \ |
| 429 | namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \ |
| 430 | type FLAGS_##name(value); \ |
| 431 | char FLAGS_no##name; \ |
| 432 | } \ |
| 433 | using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name |
| 434 | |
| 435 | #define DEFINE_int64(name, value, meaning) \ |
| 436 | DEFINE_VARIABLE(int64_t, name, value, meaning) |
| 437 | |
| 438 | #define DEFINE_double(name, value, meaning) \ |
| 439 | DEFINE_VARIABLE(double, name, value, meaning) |
| 440 | |
| 441 | namespace WTF { |
| 442 | |
| 443 | #define malloc fastMalloc |
| 444 | #define calloc fastCalloc |
| 445 | #define free fastFree |
| 446 | #define realloc fastRealloc |
| 447 | |
| 448 | #define MESSAGE LOG_ERROR |
| 449 | #define CHECK_CONDITION ASSERT |
| 450 | |
| 451 | #if OS(DARWIN) |
| 452 | class Span; |
| 453 | class TCMalloc_Central_FreeListPadded; |
| 454 | class TCMalloc_PageHeap; |
| 455 | class TCMalloc_ThreadCache; |
| 456 | template <typename T> class PageHeapAllocator; |
| 457 | |
| 458 | class FastMallocZone { |
| 459 | public: |
| 460 | static void init(); |
| 461 | |
| 462 | static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t); |
| 463 | static size_t goodSize(malloc_zone_t*, size_t size) { return size; } |
| 464 | static boolean_t check(malloc_zone_t*) { return true; } |
| 465 | static void print(malloc_zone_t*, boolean_t) { } |
| 466 | static void log(malloc_zone_t*, void*) { } |
| 467 | static void forceLock(malloc_zone_t*) { } |
| 468 | static void forceUnlock(malloc_zone_t*) { } |
| 469 | static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); } |
| 470 | |
| 471 | private: |
| 472 | FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*); |
| 473 | static size_t size(malloc_zone_t*, const void*); |
| 474 | static void* zoneMalloc(malloc_zone_t*, size_t); |
| 475 | static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size); |
| 476 | static void zoneFree(malloc_zone_t*, void*); |
| 477 | static void* zoneRealloc(malloc_zone_t*, void*, size_t); |
| 478 | static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported" ); return 0; } |
| 479 | static void zoneDestroy(malloc_zone_t*) { } |
| 480 | |
| 481 | malloc_zone_t m_zone; |
| 482 | TCMalloc_PageHeap* m_pageHeap; |
| 483 | TCMalloc_ThreadCache** m_threadHeaps; |
| 484 | TCMalloc_Central_FreeListPadded* m_centralCaches; |
| 485 | PageHeapAllocator<Span>* m_spanAllocator; |
| 486 | PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator; |
| 487 | }; |
| 488 | |
| 489 | #endif |
| 490 | |
| 491 | #endif |
| 492 | |
| 493 | #ifndef WTF_CHANGES |
| 494 | // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if |
| 495 | // you're porting to a system where you really can't get a stacktrace. |
| 496 | #ifdef NO_TCMALLOC_SAMPLES |
| 497 | // We use #define so code compiles even if you #include stacktrace.h somehow. |
| 498 | # define GetStackTrace(stack, depth, skip) (0) |
| 499 | #else |
| 500 | # include <google/stacktrace.h> |
| 501 | #endif |
| 502 | #endif |
| 503 | |
| 504 | // Even if we have support for thread-local storage in the compiler |
| 505 | // and linker, the OS may not support it. We need to check that at |
| 506 | // runtime. Right now, we have to keep a manual set of "bad" OSes. |
| 507 | #if defined(HAVE_TLS) |
| 508 | static bool kernel_supports_tls = false; // be conservative |
| 509 | static inline bool KernelSupportsTLS() { |
| 510 | return kernel_supports_tls; |
| 511 | } |
| 512 | # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS |
| 513 | static void CheckIfKernelSupportsTLS() { |
| 514 | kernel_supports_tls = false; |
| 515 | } |
| 516 | # else |
| 517 | # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too |
| 518 | static void CheckIfKernelSupportsTLS() { |
| 519 | struct utsname buf; |
| 520 | if (uname(&buf) != 0) { // should be impossible |
| 521 | MESSAGE("uname failed assuming no TLS support (errno=%d)\n" , errno); |
| 522 | kernel_supports_tls = false; |
| 523 | } else if (strcasecmp(buf.sysname, "linux" ) == 0) { |
| 524 | // The linux case: the first kernel to support TLS was 2.6.0 |
| 525 | if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x |
| 526 | kernel_supports_tls = false; |
| 527 | else if (buf.release[0] == '2' && buf.release[1] == '.' && |
| 528 | buf.release[2] >= '0' && buf.release[2] < '6' && |
| 529 | buf.release[3] == '.') // 2.0 - 2.5 |
| 530 | kernel_supports_tls = false; |
| 531 | else |
| 532 | kernel_supports_tls = true; |
| 533 | } else { // some other kernel, we'll be optimisitic |
| 534 | kernel_supports_tls = true; |
| 535 | } |
| 536 | // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG |
| 537 | } |
| 538 | # endif // HAVE_DECL_UNAME |
| 539 | #endif // HAVE_TLS |
| 540 | |
| 541 | // __THROW is defined in glibc systems. It means, counter-intuitively, |
| 542 | // "This function will never throw an exception." It's an optional |
| 543 | // optimization tool, but we may need to use it to match glibc prototypes. |
| 544 | #ifndef __THROW // I guess we're not on a glibc system |
| 545 | # define __THROW // __THROW is just an optimization, so ok to make it "" |
| 546 | #endif |
| 547 | |
| 548 | //------------------------------------------------------------------- |
| 549 | // Configuration |
| 550 | //------------------------------------------------------------------- |
| 551 | |
| 552 | // Not all possible combinations of the following parameters make |
| 553 | // sense. In particular, if kMaxSize increases, you may have to |
| 554 | // increase kNumClasses as well. |
| 555 | static const size_t kPageShift = 12; |
| 556 | static const size_t kPageSize = 1 << kPageShift; |
| 557 | static const size_t kMaxSize = 8u * kPageSize; |
| 558 | static const size_t kAlignShift = 3; |
| 559 | static const size_t kAlignment = 1 << kAlignShift; |
| 560 | static const size_t kNumClasses = 68; |
| 561 | |
| 562 | // Allocates a big block of memory for the pagemap once we reach more than |
| 563 | // 128MB |
| 564 | static const size_t kPageMapBigAllocationThreshold = 128 << 20; |
| 565 | |
| 566 | // Minimum number of pages to fetch from system at a time. Must be |
| 567 | // significantly bigger than kPageSize to amortize system-call |
| 568 | // overhead, and also to reduce external fragementation. Also, we |
| 569 | // should keep this value big because various incarnations of Linux |
| 570 | // have small limits on the number of mmap() regions per |
| 571 | // address-space. |
| 572 | static const size_t kMinSystemAlloc = 1 << (20 - kPageShift); |
| 573 | |
| 574 | // Number of objects to move between a per-thread list and a central |
| 575 | // list in one shot. We want this to be not too small so we can |
| 576 | // amortize the lock overhead for accessing the central list. Making |
| 577 | // it too big may temporarily cause unnecessary memory wastage in the |
| 578 | // per-thread free list until the scavenger cleans up the list. |
| 579 | static int num_objects_to_move[kNumClasses]; |
| 580 | |
| 581 | // Maximum length we allow a per-thread free-list to have before we |
| 582 | // move objects from it into the corresponding central free-list. We |
| 583 | // want this big to avoid locking the central free-list too often. It |
| 584 | // should not hurt to make this list somewhat big because the |
| 585 | // scavenging code will shrink it down when its contents are not in use. |
| 586 | static const int kMaxFreeListLength = 256; |
| 587 | |
| 588 | // Lower and upper bounds on the per-thread cache sizes |
| 589 | static const size_t kMinThreadCacheSize = kMaxSize * 2; |
| 590 | static const size_t kMaxThreadCacheSize = 2 << 20; |
| 591 | |
| 592 | // Default bound on the total amount of thread caches |
| 593 | static const size_t kDefaultOverallThreadCacheSize = 16 << 20; |
| 594 | |
| 595 | // For all span-lengths < kMaxPages we keep an exact-size list. |
| 596 | // REQUIRED: kMaxPages >= kMinSystemAlloc; |
| 597 | static const size_t kMaxPages = kMinSystemAlloc; |
| 598 | |
| 599 | /* The smallest prime > 2^n */ |
| 600 | static int primes_list[] = { |
| 601 | // Small values might cause high rates of sampling |
| 602 | // and hence commented out. |
| 603 | // 2, 5, 11, 17, 37, 67, 131, 257, |
| 604 | // 521, 1031, 2053, 4099, 8209, 16411, |
| 605 | 32771, 65537, 131101, 262147, 524309, 1048583, |
| 606 | 2097169, 4194319, 8388617, 16777259, 33554467 }; |
| 607 | |
| 608 | // Twice the approximate gap between sampling actions. |
| 609 | // I.e., we take one sample approximately once every |
| 610 | // tcmalloc_sample_parameter/2 |
| 611 | // bytes of allocation, i.e., ~ once every 128KB. |
| 612 | // Must be a prime number. |
| 613 | #ifdef NO_TCMALLOC_SAMPLES |
| 614 | DEFINE_int64(tcmalloc_sample_parameter, 0, |
| 615 | "Unused: code is compiled with NO_TCMALLOC_SAMPLES" ); |
| 616 | static size_t sample_period = 0; |
| 617 | #else |
| 618 | DEFINE_int64(tcmalloc_sample_parameter, 262147, |
| 619 | "Twice the approximate gap between sampling actions." |
| 620 | " Must be a prime number. Otherwise will be rounded up to a " |
| 621 | " larger prime number" ); |
| 622 | static size_t sample_period = 262147; |
| 623 | #endif |
| 624 | |
| 625 | // Protects sample_period above |
| 626 | static SpinLock sample_period_lock = SPINLOCK_INITIALIZER; |
| 627 | |
| 628 | // Parameters for controlling how fast memory is returned to the OS. |
| 629 | |
| 630 | DEFINE_double(tcmalloc_release_rate, 1, |
| 631 | "Rate at which we release unused memory to the system. " |
| 632 | "Zero means we never release memory back to the system. " |
| 633 | "Increase this flag to return memory faster; decrease it " |
| 634 | "to return memory slower. Reasonable rates are in the " |
| 635 | "range [0,10]" ); |
| 636 | |
| 637 | //------------------------------------------------------------------- |
| 638 | // Mapping from size to size_class and vice versa |
| 639 | //------------------------------------------------------------------- |
| 640 | |
| 641 | // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an |
| 642 | // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. |
| 643 | // So for these larger sizes we have an array indexed by ceil(size/128). |
| 644 | // |
| 645 | // We flatten both logical arrays into one physical array and use |
| 646 | // arithmetic to compute an appropriate index. The constants used by |
| 647 | // ClassIndex() were selected to make the flattening work. |
| 648 | // |
| 649 | // Examples: |
| 650 | // Size Expression Index |
| 651 | // ------------------------------------------------------- |
| 652 | // 0 (0 + 7) / 8 0 |
| 653 | // 1 (1 + 7) / 8 1 |
| 654 | // ... |
| 655 | // 1024 (1024 + 7) / 8 128 |
| 656 | // 1025 (1025 + 127 + (120<<7)) / 128 129 |
| 657 | // ... |
| 658 | // 32768 (32768 + 127 + (120<<7)) / 128 376 |
| 659 | static const size_t kMaxSmallSize = 1024; |
| 660 | static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128 |
| 661 | static const int add_amount[2] = { 7, 127 + (120 << 7) }; |
| 662 | static unsigned char class_array[377]; |
| 663 | |
| 664 | // Compute index of the class_array[] entry for a given size |
| 665 | static inline int ClassIndex(size_t s) { |
| 666 | const int i = (s > kMaxSmallSize); |
| 667 | return static_cast<int>((s + add_amount[i]) >> shift_amount[i]); |
| 668 | } |
| 669 | |
| 670 | // Mapping from size class to max size storable in that class |
| 671 | static size_t class_to_size[kNumClasses]; |
| 672 | |
| 673 | // Mapping from size class to number of pages to allocate at a time |
| 674 | static size_t class_to_pages[kNumClasses]; |
| 675 | |
| 676 | // TransferCache is used to cache transfers of num_objects_to_move[size_class] |
| 677 | // back and forth between thread caches and the central cache for a given size |
| 678 | // class. |
| 679 | struct TCEntry { |
| 680 | void *head; // Head of chain of objects. |
| 681 | void *tail; // Tail of chain of objects. |
| 682 | }; |
| 683 | // A central cache freelist can have anywhere from 0 to kNumTransferEntries |
| 684 | // slots to put link list chains into. To keep memory usage bounded the total |
| 685 | // number of TCEntries across size classes is fixed. Currently each size |
| 686 | // class is initially given one TCEntry which also means that the maximum any |
| 687 | // one class can have is kNumClasses. |
| 688 | static const int kNumTransferEntries = kNumClasses; |
| 689 | |
| 690 | // Note: the following only works for "n"s that fit in 32-bits, but |
| 691 | // that is fine since we only use it for small sizes. |
| 692 | static inline int LgFloor(size_t n) { |
| 693 | int log = 0; |
| 694 | for (int i = 4; i >= 0; --i) { |
| 695 | int shift = (1 << i); |
| 696 | size_t x = n >> shift; |
| 697 | if (x != 0) { |
| 698 | n = x; |
| 699 | log += shift; |
| 700 | } |
| 701 | } |
| 702 | ASSERT(n == 1); |
| 703 | return log; |
| 704 | } |
| 705 | |
| 706 | // Some very basic linked list functions for dealing with using void * as |
| 707 | // storage. |
| 708 | |
| 709 | static inline void *SLL_Next(void *t) { |
| 710 | return *(reinterpret_cast<void**>(t)); |
| 711 | } |
| 712 | |
| 713 | static inline void SLL_SetNext(void *t, void *n) { |
| 714 | *(reinterpret_cast<void**>(t)) = n; |
| 715 | } |
| 716 | |
| 717 | static inline void SLL_Push(void **list, void *element) { |
| 718 | SLL_SetNext(element, *list); |
| 719 | *list = element; |
| 720 | } |
| 721 | |
| 722 | static inline void *SLL_Pop(void **list) { |
| 723 | void *result = *list; |
| 724 | *list = SLL_Next(*list); |
| 725 | return result; |
| 726 | } |
| 727 | |
| 728 | |
| 729 | // Remove N elements from a linked list to which head points. head will be |
| 730 | // modified to point to the new head. start and end will point to the first |
| 731 | // and last nodes of the range. Note that end will point to NULL after this |
| 732 | // function is called. |
| 733 | static inline void SLL_PopRange(void **head, int N, void **start, void **end) { |
| 734 | if (N == 0) { |
| 735 | *start = NULL; |
| 736 | *end = NULL; |
| 737 | return; |
| 738 | } |
| 739 | |
| 740 | void *tmp = *head; |
| 741 | for (int i = 1; i < N; ++i) { |
| 742 | tmp = SLL_Next(tmp); |
| 743 | } |
| 744 | |
| 745 | *start = *head; |
| 746 | *end = tmp; |
| 747 | *head = SLL_Next(tmp); |
| 748 | // Unlink range from list. |
| 749 | SLL_SetNext(tmp, NULL); |
| 750 | } |
| 751 | |
| 752 | static inline void SLL_PushRange(void **head, void *start, void *end) { |
| 753 | if (!start) return; |
| 754 | SLL_SetNext(end, *head); |
| 755 | *head = start; |
| 756 | } |
| 757 | |
| 758 | static inline size_t SLL_Size(void *head) { |
| 759 | int count = 0; |
| 760 | while (head) { |
| 761 | count++; |
| 762 | head = SLL_Next(head); |
| 763 | } |
| 764 | return count; |
| 765 | } |
| 766 | |
| 767 | // Setup helper functions. |
| 768 | |
| 769 | static ALWAYS_INLINE size_t SizeClass(size_t size) { |
| 770 | return class_array[ClassIndex(size)]; |
| 771 | } |
| 772 | |
| 773 | // Get the byte-size for a specified class |
| 774 | static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) { |
| 775 | return class_to_size[cl]; |
| 776 | } |
| 777 | static int NumMoveSize(size_t size) { |
| 778 | if (size == 0) return 0; |
| 779 | // Use approx 64k transfers between thread and central caches. |
| 780 | int num = static_cast<int>(64.0 * 1024.0 / size); |
| 781 | if (num < 2) num = 2; |
| 782 | // Clamp well below kMaxFreeListLength to avoid ping pong between central |
| 783 | // and thread caches. |
| 784 | if (num > static_cast<int>(0.8 * kMaxFreeListLength)) |
| 785 | num = static_cast<int>(0.8 * kMaxFreeListLength); |
| 786 | |
| 787 | // Also, avoid bringing in too many objects into small object free |
| 788 | // lists. There are lots of such lists, and if we allow each one to |
| 789 | // fetch too many at a time, we end up having to scavenge too often |
| 790 | // (especially when there are lots of threads and each thread gets a |
| 791 | // small allowance for its thread cache). |
| 792 | // |
| 793 | // TODO: Make thread cache free list sizes dynamic so that we do not |
| 794 | // have to equally divide a fixed resource amongst lots of threads. |
| 795 | if (num > 32) num = 32; |
| 796 | |
| 797 | return num; |
| 798 | } |
| 799 | |
| 800 | // Initialize the mapping arrays |
| 801 | static void InitSizeClasses() { |
| 802 | // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] |
| 803 | if (ClassIndex(0) < 0) { |
| 804 | MESSAGE("Invalid class index %d for size 0\n" , ClassIndex(0)); |
| 805 | CRASH(); |
| 806 | } |
| 807 | if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) { |
| 808 | MESSAGE("Invalid class index %d for kMaxSize\n" , ClassIndex(kMaxSize)); |
| 809 | CRASH(); |
| 810 | } |
| 811 | |
| 812 | // Compute the size classes we want to use |
| 813 | size_t sc = 1; // Next size class to assign |
| 814 | unsigned char alignshift = kAlignShift; |
| 815 | int last_lg = -1; |
| 816 | for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) { |
| 817 | int lg = LgFloor(size); |
| 818 | if (lg > last_lg) { |
| 819 | // Increase alignment every so often. |
| 820 | // |
| 821 | // Since we double the alignment every time size doubles and |
| 822 | // size >= 128, this means that space wasted due to alignment is |
| 823 | // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256 |
| 824 | // bytes, so the space wasted as a percentage starts falling for |
| 825 | // sizes > 2K. |
| 826 | if ((lg >= 7) && (alignshift < 8)) { |
| 827 | alignshift++; |
| 828 | } |
| 829 | last_lg = lg; |
| 830 | } |
| 831 | |
| 832 | // Allocate enough pages so leftover is less than 1/8 of total. |
| 833 | // This bounds wasted space to at most 12.5%. |
| 834 | size_t psize = kPageSize; |
| 835 | while ((psize % size) > (psize >> 3)) { |
| 836 | psize += kPageSize; |
| 837 | } |
| 838 | const size_t my_pages = psize >> kPageShift; |
| 839 | |
| 840 | if (sc > 1 && my_pages == class_to_pages[sc-1]) { |
| 841 | // See if we can merge this into the previous class without |
| 842 | // increasing the fragmentation of the previous class. |
| 843 | const size_t my_objects = (my_pages << kPageShift) / size; |
| 844 | const size_t prev_objects = (class_to_pages[sc-1] << kPageShift) |
| 845 | / class_to_size[sc-1]; |
| 846 | if (my_objects == prev_objects) { |
| 847 | // Adjust last class to include this size |
| 848 | class_to_size[sc-1] = size; |
| 849 | continue; |
| 850 | } |
| 851 | } |
| 852 | |
| 853 | // Add new class |
| 854 | class_to_pages[sc] = my_pages; |
| 855 | class_to_size[sc] = size; |
| 856 | sc++; |
| 857 | } |
| 858 | if (sc != kNumClasses) { |
| 859 | MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n" , |
| 860 | sc, int(kNumClasses)); |
| 861 | CRASH(); |
| 862 | } |
| 863 | |
| 864 | // Initialize the mapping arrays |
| 865 | int next_size = 0; |
| 866 | for (unsigned char c = 1; c < kNumClasses; c++) { |
| 867 | const size_t max_size_in_class = class_to_size[c]; |
| 868 | for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) { |
| 869 | class_array[ClassIndex(s)] = c; |
| 870 | } |
| 871 | next_size = static_cast<int>(max_size_in_class + kAlignment); |
| 872 | } |
| 873 | |
| 874 | // Double-check sizes just to be safe |
| 875 | for (size_t size = 0; size <= kMaxSize; size++) { |
| 876 | const size_t sc = SizeClass(size); |
| 877 | if (sc == 0) { |
| 878 | MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n" , sc, size); |
| 879 | CRASH(); |
| 880 | } |
| 881 | if (sc > 1 && size <= class_to_size[sc-1]) { |
| 882 | MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS |
| 883 | "\n" , sc, size); |
| 884 | CRASH(); |
| 885 | } |
| 886 | if (sc >= kNumClasses) { |
| 887 | MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n" , sc, size); |
| 888 | CRASH(); |
| 889 | } |
| 890 | const size_t s = class_to_size[sc]; |
| 891 | if (size > s) { |
| 892 | MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n" , s, size, sc); |
| 893 | CRASH(); |
| 894 | } |
| 895 | if (s == 0) { |
| 896 | MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n" , s, size, sc); |
| 897 | CRASH(); |
| 898 | } |
| 899 | } |
| 900 | |
| 901 | // Initialize the num_objects_to_move array. |
| 902 | for (size_t cl = 1; cl < kNumClasses; ++cl) { |
| 903 | num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl)); |
| 904 | } |
| 905 | |
| 906 | #ifndef WTF_CHANGES |
| 907 | if (false) { |
| 908 | // Dump class sizes and maximum external wastage per size class |
| 909 | for (size_t cl = 1; cl < kNumClasses; ++cl) { |
| 910 | const int alloc_size = class_to_pages[cl] << kPageShift; |
| 911 | const int alloc_objs = alloc_size / class_to_size[cl]; |
| 912 | const int min_used = (class_to_size[cl-1] + 1) * alloc_objs; |
| 913 | const int max_waste = alloc_size - min_used; |
| 914 | MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n" , |
| 915 | int(cl), |
| 916 | int(class_to_size[cl-1] + 1), |
| 917 | int(class_to_size[cl]), |
| 918 | int(class_to_pages[cl] << kPageShift), |
| 919 | max_waste * 100.0 / alloc_size |
| 920 | ); |
| 921 | } |
| 922 | } |
| 923 | #endif |
| 924 | } |
| 925 | |
| 926 | // ------------------------------------------------------------------------- |
| 927 | // Simple allocator for objects of a specified type. External locking |
| 928 | // is required before accessing one of these objects. |
| 929 | // ------------------------------------------------------------------------- |
| 930 | |
| 931 | // Metadata allocator -- keeps stats about how many bytes allocated |
| 932 | static uint64_t metadata_system_bytes = 0; |
| 933 | static void* MetaDataAlloc(size_t bytes) { |
| 934 | void* result = TCMalloc_SystemAlloc(bytes, 0); |
| 935 | if (result != NULL) { |
| 936 | metadata_system_bytes += bytes; |
| 937 | } |
| 938 | return result; |
| 939 | } |
| 940 | |
| 941 | template <class T> |
| 942 | class PageHeapAllocator { |
| 943 | private: |
| 944 | // How much to allocate from system at a time |
| 945 | static const size_t kAllocIncrement = 32 << 10; |
| 946 | |
| 947 | // Aligned size of T |
| 948 | static const size_t kAlignedSize |
| 949 | = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment); |
| 950 | |
| 951 | // Free area from which to carve new objects |
| 952 | char* free_area_; |
| 953 | size_t free_avail_; |
| 954 | |
| 955 | // Linked list of all regions allocated by this allocator |
| 956 | void* allocated_regions_; |
| 957 | |
| 958 | // Free list of already carved objects |
| 959 | void* free_list_; |
| 960 | |
| 961 | // Number of allocated but unfreed objects |
| 962 | int inuse_; |
| 963 | |
| 964 | public: |
| 965 | void Init() { |
| 966 | ASSERT(kAlignedSize <= kAllocIncrement); |
| 967 | inuse_ = 0; |
| 968 | allocated_regions_ = 0; |
| 969 | free_area_ = NULL; |
| 970 | free_avail_ = 0; |
| 971 | free_list_ = NULL; |
| 972 | } |
| 973 | |
| 974 | T* New() { |
| 975 | // Consult free list |
| 976 | void* result; |
| 977 | if (free_list_ != NULL) { |
| 978 | result = free_list_; |
| 979 | free_list_ = *(reinterpret_cast<void**>(result)); |
| 980 | } else { |
| 981 | if (free_avail_ < kAlignedSize) { |
| 982 | // Need more room |
| 983 | char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement)); |
| 984 | if (!new_allocation) |
| 985 | CRASH(); |
| 986 | |
| 987 | *(void**)new_allocation = allocated_regions_; |
| 988 | allocated_regions_ = new_allocation; |
| 989 | free_area_ = new_allocation + kAlignedSize; |
| 990 | free_avail_ = kAllocIncrement - kAlignedSize; |
| 991 | } |
| 992 | result = free_area_; |
| 993 | free_area_ += kAlignedSize; |
| 994 | free_avail_ -= kAlignedSize; |
| 995 | } |
| 996 | inuse_++; |
| 997 | return reinterpret_cast<T*>(result); |
| 998 | } |
| 999 | |
| 1000 | void Delete(T* p) { |
| 1001 | *(reinterpret_cast<void**>(p)) = free_list_; |
| 1002 | free_list_ = p; |
| 1003 | inuse_--; |
| 1004 | } |
| 1005 | |
| 1006 | int inuse() const { return inuse_; } |
| 1007 | |
| 1008 | #if defined(WTF_CHANGES) && OS(DARWIN) |
| 1009 | template <class Recorder> |
| 1010 | void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader) |
| 1011 | { |
| 1012 | vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_); |
| 1013 | while (adminAllocation) { |
| 1014 | recorder.recordRegion(adminAllocation, kAllocIncrement); |
| 1015 | adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation)); |
| 1016 | } |
| 1017 | } |
| 1018 | #endif |
| 1019 | }; |
| 1020 | |
| 1021 | // ------------------------------------------------------------------------- |
| 1022 | // Span - a contiguous run of pages |
| 1023 | // ------------------------------------------------------------------------- |
| 1024 | |
| 1025 | // Type that can hold a page number |
| 1026 | typedef uintptr_t PageID; |
| 1027 | |
| 1028 | // Type that can hold the length of a run of pages |
| 1029 | typedef uintptr_t Length; |
| 1030 | |
| 1031 | static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; |
| 1032 | |
| 1033 | // Convert byte size into pages. This won't overflow, but may return |
| 1034 | // an unreasonably large value if bytes is huge enough. |
| 1035 | static inline Length pages(size_t bytes) { |
| 1036 | return (bytes >> kPageShift) + |
| 1037 | ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); |
| 1038 | } |
| 1039 | |
| 1040 | // Convert a user size into the number of bytes that will actually be |
| 1041 | // allocated |
| 1042 | static size_t AllocationSize(size_t bytes) { |
| 1043 | if (bytes > kMaxSize) { |
| 1044 | // Large object: we allocate an integral number of pages |
| 1045 | ASSERT(bytes <= (kMaxValidPages << kPageShift)); |
| 1046 | return pages(bytes) << kPageShift; |
| 1047 | } else { |
| 1048 | // Small object: find the size class to which it belongs |
| 1049 | return ByteSizeForClass(SizeClass(bytes)); |
| 1050 | } |
| 1051 | } |
| 1052 | |
| 1053 | // Information kept for a span (a contiguous run of pages). |
| 1054 | struct Span { |
| 1055 | PageID start; // Starting page number |
| 1056 | Length length; // Number of pages in span |
| 1057 | Span* next; // Used when in link list |
| 1058 | Span* prev; // Used when in link list |
| 1059 | void* objects; // Linked list of free objects |
| 1060 | unsigned int free : 1; // Is the span free |
| 1061 | #ifndef NO_TCMALLOC_SAMPLES |
| 1062 | unsigned int sample : 1; // Sampled object? |
| 1063 | #endif |
| 1064 | unsigned int sizeclass : 8; // Size-class for small objects (or 0) |
| 1065 | unsigned int refcount : 11; // Number of non-free objects |
| 1066 | bool decommitted : 1; |
| 1067 | |
| 1068 | #undef SPAN_HISTORY |
| 1069 | #ifdef SPAN_HISTORY |
| 1070 | // For debugging, we can keep a log events per span |
| 1071 | int nexthistory; |
| 1072 | char history[64]; |
| 1073 | int value[64]; |
| 1074 | #endif |
| 1075 | }; |
| 1076 | |
| 1077 | #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted) |
| 1078 | |
| 1079 | #ifdef SPAN_HISTORY |
| 1080 | void Event(Span* span, char op, int v = 0) { |
| 1081 | span->history[span->nexthistory] = op; |
| 1082 | span->value[span->nexthistory] = v; |
| 1083 | span->nexthistory++; |
| 1084 | if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0; |
| 1085 | } |
| 1086 | #else |
| 1087 | #define Event(s,o,v) ((void) 0) |
| 1088 | #endif |
| 1089 | |
| 1090 | // Allocator/deallocator for spans |
| 1091 | static PageHeapAllocator<Span> span_allocator; |
| 1092 | static Span* NewSpan(PageID p, Length len) { |
| 1093 | Span* result = span_allocator.New(); |
| 1094 | memset(result, 0, sizeof(*result)); |
| 1095 | result->start = p; |
| 1096 | result->length = len; |
| 1097 | #ifdef SPAN_HISTORY |
| 1098 | result->nexthistory = 0; |
| 1099 | #endif |
| 1100 | return result; |
| 1101 | } |
| 1102 | |
| 1103 | static inline void DeleteSpan(Span* span) { |
| 1104 | #ifndef NDEBUG |
| 1105 | // In debug mode, trash the contents of deleted Spans |
| 1106 | memset(span, 0x3f, sizeof(*span)); |
| 1107 | #endif |
| 1108 | span_allocator.Delete(span); |
| 1109 | } |
| 1110 | |
| 1111 | // ------------------------------------------------------------------------- |
| 1112 | // Doubly linked list of spans. |
| 1113 | // ------------------------------------------------------------------------- |
| 1114 | |
| 1115 | static inline void DLL_Init(Span* list) { |
| 1116 | list->next = list; |
| 1117 | list->prev = list; |
| 1118 | } |
| 1119 | |
| 1120 | static inline void DLL_Remove(Span* span) { |
| 1121 | span->prev->next = span->next; |
| 1122 | span->next->prev = span->prev; |
| 1123 | span->prev = NULL; |
| 1124 | span->next = NULL; |
| 1125 | } |
| 1126 | |
| 1127 | static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) { |
| 1128 | return list->next == list; |
| 1129 | } |
| 1130 | |
| 1131 | static int DLL_Length(const Span* list) { |
| 1132 | int result = 0; |
| 1133 | for (Span* s = list->next; s != list; s = s->next) { |
| 1134 | result++; |
| 1135 | } |
| 1136 | return result; |
| 1137 | } |
| 1138 | |
| 1139 | #if 0 /* Not needed at the moment -- causes compiler warnings if not used */ |
| 1140 | static void DLL_Print(const char* label, const Span* list) { |
| 1141 | MESSAGE("%-10s %p:" , label, list); |
| 1142 | for (const Span* s = list->next; s != list; s = s->next) { |
| 1143 | MESSAGE(" <%p,%u,%u>" , s, s->start, s->length); |
| 1144 | } |
| 1145 | MESSAGE("\n" ); |
| 1146 | } |
| 1147 | #endif |
| 1148 | |
| 1149 | static inline void DLL_Prepend(Span* list, Span* span) { |
| 1150 | ASSERT(span->next == NULL); |
| 1151 | ASSERT(span->prev == NULL); |
| 1152 | span->next = list->next; |
| 1153 | span->prev = list; |
| 1154 | list->next->prev = span; |
| 1155 | list->next = span; |
| 1156 | } |
| 1157 | |
| 1158 | // ------------------------------------------------------------------------- |
| 1159 | // Stack traces kept for sampled allocations |
| 1160 | // The following state is protected by pageheap_lock_. |
| 1161 | // ------------------------------------------------------------------------- |
| 1162 | |
| 1163 | // size/depth are made the same size as a pointer so that some generic |
| 1164 | // code below can conveniently cast them back and forth to void*. |
| 1165 | static const int kMaxStackDepth = 31; |
| 1166 | struct StackTrace { |
| 1167 | uintptr_t size; // Size of object |
| 1168 | uintptr_t depth; // Number of PC values stored in array below |
| 1169 | void* stack[kMaxStackDepth]; |
| 1170 | }; |
| 1171 | static PageHeapAllocator<StackTrace> stacktrace_allocator; |
| 1172 | static Span sampled_objects; |
| 1173 | |
| 1174 | // ------------------------------------------------------------------------- |
| 1175 | // Map from page-id to per-page data |
| 1176 | // ------------------------------------------------------------------------- |
| 1177 | |
| 1178 | // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines. |
| 1179 | // We also use a simple one-level cache for hot PageID-to-sizeclass mappings, |
| 1180 | // because sometimes the sizeclass is all the information we need. |
| 1181 | |
| 1182 | // Selector class -- general selector uses 3-level map |
| 1183 | template <int BITS> class MapSelector { |
| 1184 | public: |
| 1185 | typedef TCMalloc_PageMap3<BITS-kPageShift> Type; |
| 1186 | typedef PackedCache<BITS, uint64_t> CacheType; |
| 1187 | }; |
| 1188 | |
| 1189 | #if defined(WTF_CHANGES) |
| 1190 | #if CPU(X86_64) |
| 1191 | // On all known X86-64 platforms, the upper 16 bits are always unused and therefore |
| 1192 | // can be excluded from the PageMap key. |
| 1193 | // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details |
| 1194 | |
| 1195 | static const size_t kBitsUnusedOn64Bit = 16; |
| 1196 | #else |
| 1197 | static const size_t kBitsUnusedOn64Bit = 0; |
| 1198 | #endif |
| 1199 | |
| 1200 | // A three-level map for 64-bit machines |
| 1201 | template <> class MapSelector<64> { |
| 1202 | public: |
| 1203 | typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type; |
| 1204 | typedef PackedCache<64, uint64_t> CacheType; |
| 1205 | }; |
| 1206 | #endif |
| 1207 | |
| 1208 | // A two-level map for 32-bit machines |
| 1209 | template <> class MapSelector<32> { |
| 1210 | public: |
| 1211 | typedef TCMalloc_PageMap2<32 - kPageShift> Type; |
| 1212 | typedef PackedCache<32 - kPageShift, uint16_t> CacheType; |
| 1213 | }; |
| 1214 | |
| 1215 | // ------------------------------------------------------------------------- |
| 1216 | // Page-level allocator |
| 1217 | // * Eager coalescing |
| 1218 | // |
| 1219 | // Heap for page-level allocation. We allow allocating and freeing a |
| 1220 | // contiguous runs of pages (called a "span"). |
| 1221 | // ------------------------------------------------------------------------- |
| 1222 | |
| 1223 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1224 | // The central page heap collects spans of memory that have been deleted but are still committed until they are released |
| 1225 | // back to the system. We use a background thread to periodically scan the list of free spans and release some back to the |
| 1226 | // system. Every 5 seconds, the background thread wakes up and does the following: |
| 1227 | // - Check if we needed to commit memory in the last 5 seconds. If so, skip this scavenge because it's a sign that we are short |
| 1228 | // of free committed pages and so we should not release them back to the system yet. |
| 1229 | // - Otherwise, go through the list of free spans (from largest to smallest) and release up to a fraction of the free committed pages |
| 1230 | // back to the system. |
| 1231 | // - If the number of free committed pages reaches kMinimumFreeCommittedPageCount, we can stop the scavenging and block the |
| 1232 | // scavenging thread until the number of free committed pages goes above kMinimumFreeCommittedPageCount. |
| 1233 | |
| 1234 | // Background thread wakes up every 5 seconds to scavenge as long as there is memory available to return to the system. |
| 1235 | static const int kScavengeTimerDelayInSeconds = 5; |
| 1236 | |
| 1237 | // Number of free committed pages that we want to keep around. |
| 1238 | static const size_t kMinimumFreeCommittedPageCount = 512; |
| 1239 | |
| 1240 | // During a scavenge, we'll release up to a fraction of the free committed pages. |
| 1241 | #if OS(WINDOWS) |
| 1242 | // We are slightly less aggressive in releasing memory on Windows due to performance reasons. |
| 1243 | static const int kMaxScavengeAmountFactor = 3; |
| 1244 | #else |
| 1245 | static const int kMaxScavengeAmountFactor = 2; |
| 1246 | #endif |
| 1247 | #endif |
| 1248 | |
| 1249 | class TCMalloc_PageHeap { |
| 1250 | public: |
| 1251 | void init(); |
| 1252 | |
| 1253 | // Allocate a run of "n" pages. Returns zero if out of memory. |
| 1254 | Span* New(Length n); |
| 1255 | |
| 1256 | // Delete the span "[p, p+n-1]". |
| 1257 | // REQUIRES: span was returned by earlier call to New() and |
| 1258 | // has not yet been deleted. |
| 1259 | void Delete(Span* span); |
| 1260 | |
| 1261 | // Mark an allocated span as being used for small objects of the |
| 1262 | // specified size-class. |
| 1263 | // REQUIRES: span was returned by an earlier call to New() |
| 1264 | // and has not yet been deleted. |
| 1265 | void RegisterSizeClass(Span* span, size_t sc); |
| 1266 | |
| 1267 | // Split an allocated span into two spans: one of length "n" pages |
| 1268 | // followed by another span of length "span->length - n" pages. |
| 1269 | // Modifies "*span" to point to the first span of length "n" pages. |
| 1270 | // Returns a pointer to the second span. |
| 1271 | // |
| 1272 | // REQUIRES: "0 < n < span->length" |
| 1273 | // REQUIRES: !span->free |
| 1274 | // REQUIRES: span->sizeclass == 0 |
| 1275 | Span* Split(Span* span, Length n); |
| 1276 | |
| 1277 | // Return the descriptor for the specified page. |
| 1278 | inline Span* GetDescriptor(PageID p) const { |
| 1279 | return reinterpret_cast<Span*>(pagemap_.get(p)); |
| 1280 | } |
| 1281 | |
| 1282 | #ifdef WTF_CHANGES |
| 1283 | inline Span* GetDescriptorEnsureSafe(PageID p) |
| 1284 | { |
| 1285 | pagemap_.Ensure(p, 1); |
| 1286 | return GetDescriptor(p); |
| 1287 | } |
| 1288 | |
| 1289 | size_t ReturnedBytes() const; |
| 1290 | #endif |
| 1291 | |
| 1292 | // Dump state to stderr |
| 1293 | #ifndef WTF_CHANGES |
| 1294 | void Dump(TCMalloc_Printer* out); |
| 1295 | #endif |
| 1296 | |
| 1297 | // Return number of bytes allocated from system |
| 1298 | inline uint64_t SystemBytes() const { return system_bytes_; } |
| 1299 | |
| 1300 | // Return number of free bytes in heap |
| 1301 | uint64_t FreeBytes() const { |
| 1302 | return (static_cast<uint64_t>(free_pages_) << kPageShift); |
| 1303 | } |
| 1304 | |
| 1305 | bool Check(); |
| 1306 | bool CheckList(Span* list, Length min_pages, Length max_pages); |
| 1307 | |
| 1308 | // Release all pages on the free list for reuse by the OS: |
| 1309 | void ReleaseFreePages(); |
| 1310 | |
| 1311 | // Return 0 if we have no information, or else the correct sizeclass for p. |
| 1312 | // Reads and writes to pagemap_cache_ do not require locking. |
| 1313 | // The entries are 64 bits on 64-bit hardware and 16 bits on |
| 1314 | // 32-bit hardware, and we don't mind raciness as long as each read of |
| 1315 | // an entry yields a valid entry, not a partially updated entry. |
| 1316 | size_t GetSizeClassIfCached(PageID p) const { |
| 1317 | return pagemap_cache_.GetOrDefault(p, 0); |
| 1318 | } |
| 1319 | void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); } |
| 1320 | |
| 1321 | private: |
| 1322 | // Pick the appropriate map and cache types based on pointer size |
| 1323 | typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap; |
| 1324 | typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache; |
| 1325 | PageMap pagemap_; |
| 1326 | mutable PageMapCache pagemap_cache_; |
| 1327 | |
| 1328 | // We segregate spans of a given size into two circular linked |
| 1329 | // lists: one for normal spans, and one for spans whose memory |
| 1330 | // has been returned to the system. |
| 1331 | struct SpanList { |
| 1332 | Span normal; |
| 1333 | Span returned; |
| 1334 | }; |
| 1335 | |
| 1336 | // List of free spans of length >= kMaxPages |
| 1337 | SpanList large_; |
| 1338 | |
| 1339 | // Array mapping from span length to a doubly linked list of free spans |
| 1340 | SpanList free_[kMaxPages]; |
| 1341 | |
| 1342 | // Number of pages kept in free lists |
| 1343 | uintptr_t free_pages_; |
| 1344 | |
| 1345 | // Bytes allocated from system |
| 1346 | uint64_t system_bytes_; |
| 1347 | |
| 1348 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1349 | // Number of pages kept in free lists that are still committed. |
| 1350 | Length free_committed_pages_; |
| 1351 | |
| 1352 | // Number of pages that we committed in the last scavenge wait interval. |
| 1353 | Length pages_committed_since_last_scavenge_; |
| 1354 | #endif |
| 1355 | |
| 1356 | bool GrowHeap(Length n); |
| 1357 | |
| 1358 | // REQUIRES span->length >= n |
| 1359 | // Remove span from its free list, and move any leftover part of |
| 1360 | // span into appropriate free lists. Also update "span" to have |
| 1361 | // length exactly "n" and mark it as non-free so it can be returned |
| 1362 | // to the client. |
| 1363 | // |
| 1364 | // "released" is true iff "span" was found on a "returned" list. |
| 1365 | void Carve(Span* span, Length n, bool released); |
| 1366 | |
| 1367 | void RecordSpan(Span* span) { |
| 1368 | pagemap_.set(span->start, span); |
| 1369 | if (span->length > 1) { |
| 1370 | pagemap_.set(span->start + span->length - 1, span); |
| 1371 | } |
| 1372 | } |
| 1373 | |
| 1374 | // Allocate a large span of length == n. If successful, returns a |
| 1375 | // span of exactly the specified length. Else, returns NULL. |
| 1376 | Span* AllocLarge(Length n); |
| 1377 | |
| 1378 | #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1379 | // Incrementally release some memory to the system. |
| 1380 | // IncrementalScavenge(n) is called whenever n pages are freed. |
| 1381 | void IncrementalScavenge(Length n); |
| 1382 | #endif |
| 1383 | |
| 1384 | // Number of pages to deallocate before doing more scavenging |
| 1385 | int64_t scavenge_counter_; |
| 1386 | |
| 1387 | // Index of last free list we scavenged |
| 1388 | size_t scavenge_index_; |
| 1389 | |
| 1390 | #if defined(WTF_CHANGES) && OS(DARWIN) |
| 1391 | friend class FastMallocZone; |
| 1392 | #endif |
| 1393 | |
| 1394 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1395 | void initializeScavenger(); |
| 1396 | ALWAYS_INLINE void signalScavenger(); |
| 1397 | void scavenge(); |
| 1398 | ALWAYS_INLINE bool shouldContinueScavenging() const; |
| 1399 | |
| 1400 | #if !HAVE(DISPATCH_H) |
| 1401 | static NO_RETURN void* runScavengerThread(void*); |
| 1402 | NO_RETURN void scavengerThread(); |
| 1403 | |
| 1404 | // Keeps track of whether the background thread is actively scavenging memory every kScavengeTimerDelayInSeconds, or |
| 1405 | // it's blocked waiting for more pages to be deleted. |
| 1406 | bool m_scavengeThreadActive; |
| 1407 | |
| 1408 | pthread_mutex_t m_scavengeMutex; |
| 1409 | pthread_cond_t m_scavengeCondition; |
| 1410 | #else // !HAVE(DISPATCH_H) |
| 1411 | void periodicScavenge(); |
| 1412 | |
| 1413 | dispatch_queue_t m_scavengeQueue; |
| 1414 | dispatch_source_t m_scavengeTimer; |
| 1415 | bool m_scavengingScheduled; |
| 1416 | #endif |
| 1417 | |
| 1418 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1419 | }; |
| 1420 | |
| 1421 | void TCMalloc_PageHeap::init() |
| 1422 | { |
| 1423 | pagemap_.init(MetaDataAlloc); |
| 1424 | pagemap_cache_ = PageMapCache(0); |
| 1425 | free_pages_ = 0; |
| 1426 | system_bytes_ = 0; |
| 1427 | |
| 1428 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1429 | free_committed_pages_ = 0; |
| 1430 | pages_committed_since_last_scavenge_ = 0; |
| 1431 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1432 | |
| 1433 | scavenge_counter_ = 0; |
| 1434 | // Start scavenging at kMaxPages list |
| 1435 | scavenge_index_ = kMaxPages-1; |
| 1436 | COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits); |
| 1437 | DLL_Init(&large_.normal); |
| 1438 | DLL_Init(&large_.returned); |
| 1439 | for (size_t i = 0; i < kMaxPages; i++) { |
| 1440 | DLL_Init(&free_[i].normal); |
| 1441 | DLL_Init(&free_[i].returned); |
| 1442 | } |
| 1443 | |
| 1444 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1445 | initializeScavenger(); |
| 1446 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1447 | } |
| 1448 | |
| 1449 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1450 | |
| 1451 | #if !HAVE(DISPATCH_H) |
| 1452 | |
| 1453 | void TCMalloc_PageHeap::initializeScavenger() |
| 1454 | { |
| 1455 | pthread_mutex_init(&m_scavengeMutex, 0); |
| 1456 | pthread_cond_init(&m_scavengeCondition, 0); |
| 1457 | m_scavengeThreadActive = true; |
| 1458 | pthread_t thread; |
| 1459 | pthread_create(&thread, 0, runScavengerThread, this); |
| 1460 | } |
| 1461 | |
| 1462 | void* TCMalloc_PageHeap::runScavengerThread(void* context) |
| 1463 | { |
| 1464 | static_cast<TCMalloc_PageHeap*>(context)->scavengerThread(); |
| 1465 | #if COMPILER(MSVC) || OS(SOLARIS) |
| 1466 | // Without this, Visual Studio will complain that this method does not return a value. |
| 1467 | return 0; |
| 1468 | #endif |
| 1469 | } |
| 1470 | |
| 1471 | ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() |
| 1472 | { |
| 1473 | if (!m_scavengeThreadActive && shouldContinueScavenging()) |
| 1474 | pthread_cond_signal(&m_scavengeCondition); |
| 1475 | } |
| 1476 | |
| 1477 | #else // !HAVE(DISPATCH_H) |
| 1478 | |
| 1479 | void TCMalloc_PageHeap::initializeScavenger() |
| 1480 | { |
| 1481 | m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger" , NULL); |
| 1482 | m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue); |
| 1483 | dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeTimerDelayInSeconds * NSEC_PER_SEC); |
| 1484 | dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeTimerDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC); |
| 1485 | dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); }); |
| 1486 | m_scavengingScheduled = false; |
| 1487 | } |
| 1488 | |
| 1489 | ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger() |
| 1490 | { |
| 1491 | if (!m_scavengingScheduled && shouldContinueScavenging()) { |
| 1492 | m_scavengingScheduled = true; |
| 1493 | dispatch_resume(m_scavengeTimer); |
| 1494 | } |
| 1495 | } |
| 1496 | |
| 1497 | #endif |
| 1498 | |
| 1499 | void TCMalloc_PageHeap::scavenge() |
| 1500 | { |
| 1501 | // If we have to commit memory in the last 5 seconds, it means we don't have enough free committed pages |
| 1502 | // for the amount of allocations that we do. So hold off on releasing memory back to the system. |
| 1503 | if (pages_committed_since_last_scavenge_ > 0) { |
| 1504 | pages_committed_since_last_scavenge_ = 0; |
| 1505 | return; |
| 1506 | } |
| 1507 | Length pagesDecommitted = 0; |
| 1508 | for (int i = kMaxPages; i >= 0; i--) { |
| 1509 | SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i]; |
| 1510 | if (!DLL_IsEmpty(&slist->normal)) { |
| 1511 | // Release the last span on the normal portion of this list |
| 1512 | Span* s = slist->normal.prev; |
| 1513 | // Only decommit up to a fraction of the free committed pages if pages_allocated_since_last_scavenge_ > 0. |
| 1514 | if ((pagesDecommitted + s->length) * kMaxScavengeAmountFactor > free_committed_pages_) |
| 1515 | continue; |
| 1516 | DLL_Remove(s); |
| 1517 | TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
| 1518 | static_cast<size_t>(s->length << kPageShift)); |
| 1519 | if (!s->decommitted) { |
| 1520 | pagesDecommitted += s->length; |
| 1521 | s->decommitted = true; |
| 1522 | } |
| 1523 | DLL_Prepend(&slist->returned, s); |
| 1524 | // We can stop scavenging if the number of free committed pages left is less than or equal to the minimum number we want to keep around. |
| 1525 | if (free_committed_pages_ <= kMinimumFreeCommittedPageCount + pagesDecommitted) |
| 1526 | break; |
| 1527 | } |
| 1528 | } |
| 1529 | pages_committed_since_last_scavenge_ = 0; |
| 1530 | ASSERT(free_committed_pages_ >= pagesDecommitted); |
| 1531 | free_committed_pages_ -= pagesDecommitted; |
| 1532 | } |
| 1533 | |
| 1534 | ALWAYS_INLINE bool TCMalloc_PageHeap::shouldContinueScavenging() const |
| 1535 | { |
| 1536 | return free_committed_pages_ > kMinimumFreeCommittedPageCount; |
| 1537 | } |
| 1538 | |
| 1539 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1540 | |
| 1541 | inline Span* TCMalloc_PageHeap::New(Length n) { |
| 1542 | ASSERT(Check()); |
| 1543 | ASSERT(n > 0); |
| 1544 | |
| 1545 | // Find first size >= n that has a non-empty list |
| 1546 | for (Length s = n; s < kMaxPages; s++) { |
| 1547 | Span* ll = NULL; |
| 1548 | bool released = false; |
| 1549 | if (!DLL_IsEmpty(&free_[s].normal)) { |
| 1550 | // Found normal span |
| 1551 | ll = &free_[s].normal; |
| 1552 | } else if (!DLL_IsEmpty(&free_[s].returned)) { |
| 1553 | // Found returned span; reallocate it |
| 1554 | ll = &free_[s].returned; |
| 1555 | released = true; |
| 1556 | } else { |
| 1557 | // Keep looking in larger classes |
| 1558 | continue; |
| 1559 | } |
| 1560 | |
| 1561 | Span* result = ll->next; |
| 1562 | Carve(result, n, released); |
| 1563 | if (result->decommitted) { |
| 1564 | TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift)); |
| 1565 | result->decommitted = false; |
| 1566 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1567 | pages_committed_since_last_scavenge_ += n; |
| 1568 | #endif |
| 1569 | } |
| 1570 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1571 | else { |
| 1572 | // The newly allocated memory is from a span that's in the normal span list (already committed). Update the |
| 1573 | // free committed pages count. |
| 1574 | ASSERT(free_committed_pages_ >= n); |
| 1575 | free_committed_pages_ -= n; |
| 1576 | } |
| 1577 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1578 | ASSERT(Check()); |
| 1579 | free_pages_ -= n; |
| 1580 | return result; |
| 1581 | } |
| 1582 | |
| 1583 | Span* result = AllocLarge(n); |
| 1584 | if (result != NULL) { |
| 1585 | ASSERT_SPAN_COMMITTED(result); |
| 1586 | return result; |
| 1587 | } |
| 1588 | |
| 1589 | // Grow the heap and try again |
| 1590 | if (!GrowHeap(n)) { |
| 1591 | ASSERT(Check()); |
| 1592 | return NULL; |
| 1593 | } |
| 1594 | |
| 1595 | return AllocLarge(n); |
| 1596 | } |
| 1597 | |
| 1598 | Span* TCMalloc_PageHeap::AllocLarge(Length n) { |
| 1599 | // find the best span (closest to n in size). |
| 1600 | // The following loops implements address-ordered best-fit. |
| 1601 | bool from_released = false; |
| 1602 | Span *best = NULL; |
| 1603 | |
| 1604 | // Search through normal list |
| 1605 | for (Span* span = large_.normal.next; |
| 1606 | span != &large_.normal; |
| 1607 | span = span->next) { |
| 1608 | if (span->length >= n) { |
| 1609 | if ((best == NULL) |
| 1610 | || (span->length < best->length) |
| 1611 | || ((span->length == best->length) && (span->start < best->start))) { |
| 1612 | best = span; |
| 1613 | from_released = false; |
| 1614 | } |
| 1615 | } |
| 1616 | } |
| 1617 | |
| 1618 | // Search through released list in case it has a better fit |
| 1619 | for (Span* span = large_.returned.next; |
| 1620 | span != &large_.returned; |
| 1621 | span = span->next) { |
| 1622 | if (span->length >= n) { |
| 1623 | if ((best == NULL) |
| 1624 | || (span->length < best->length) |
| 1625 | || ((span->length == best->length) && (span->start < best->start))) { |
| 1626 | best = span; |
| 1627 | from_released = true; |
| 1628 | } |
| 1629 | } |
| 1630 | } |
| 1631 | |
| 1632 | if (best != NULL) { |
| 1633 | Carve(best, n, from_released); |
| 1634 | if (best->decommitted) { |
| 1635 | TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift)); |
| 1636 | best->decommitted = false; |
| 1637 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1638 | pages_committed_since_last_scavenge_ += n; |
| 1639 | #endif |
| 1640 | } |
| 1641 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1642 | else { |
| 1643 | // The newly allocated memory is from a span that's in the normal span list (already committed). Update the |
| 1644 | // free committed pages count. |
| 1645 | ASSERT(free_committed_pages_ >= n); |
| 1646 | free_committed_pages_ -= n; |
| 1647 | } |
| 1648 | #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1649 | ASSERT(Check()); |
| 1650 | free_pages_ -= n; |
| 1651 | return best; |
| 1652 | } |
| 1653 | return NULL; |
| 1654 | } |
| 1655 | |
| 1656 | Span* TCMalloc_PageHeap::Split(Span* span, Length n) { |
| 1657 | ASSERT(0 < n); |
| 1658 | ASSERT(n < span->length); |
| 1659 | ASSERT(!span->free); |
| 1660 | ASSERT(span->sizeclass == 0); |
| 1661 | Event(span, 'T', n); |
| 1662 | |
| 1663 | const Length extra = span->length - n; |
| 1664 | Span* leftover = NewSpan(span->start + n, extra); |
| 1665 | Event(leftover, 'U', extra); |
| 1666 | RecordSpan(leftover); |
| 1667 | pagemap_.set(span->start + n - 1, span); // Update map from pageid to span |
| 1668 | span->length = n; |
| 1669 | |
| 1670 | return leftover; |
| 1671 | } |
| 1672 | |
| 1673 | static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source) |
| 1674 | { |
| 1675 | destination->decommitted = source->decommitted; |
| 1676 | } |
| 1677 | |
| 1678 | inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) { |
| 1679 | ASSERT(n > 0); |
| 1680 | DLL_Remove(span); |
| 1681 | span->free = 0; |
| 1682 | Event(span, 'A', n); |
| 1683 | |
| 1684 | const int extra = static_cast<int>(span->length - n); |
| 1685 | ASSERT(extra >= 0); |
| 1686 | if (extra > 0) { |
| 1687 | Span* leftover = NewSpan(span->start + n, extra); |
| 1688 | leftover->free = 1; |
| 1689 | propagateDecommittedState(leftover, span); |
| 1690 | Event(leftover, 'S', extra); |
| 1691 | RecordSpan(leftover); |
| 1692 | |
| 1693 | // Place leftover span on appropriate free list |
| 1694 | SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_; |
| 1695 | Span* dst = released ? &listpair->returned : &listpair->normal; |
| 1696 | DLL_Prepend(dst, leftover); |
| 1697 | |
| 1698 | span->length = n; |
| 1699 | pagemap_.set(span->start + n - 1, span); |
| 1700 | } |
| 1701 | } |
| 1702 | |
| 1703 | static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other) |
| 1704 | { |
| 1705 | if (destination->decommitted && !other->decommitted) { |
| 1706 | TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift), |
| 1707 | static_cast<size_t>(other->length << kPageShift)); |
| 1708 | } else if (other->decommitted && !destination->decommitted) { |
| 1709 | TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift), |
| 1710 | static_cast<size_t>(destination->length << kPageShift)); |
| 1711 | destination->decommitted = true; |
| 1712 | } |
| 1713 | } |
| 1714 | |
| 1715 | inline void TCMalloc_PageHeap::Delete(Span* span) { |
| 1716 | ASSERT(Check()); |
| 1717 | ASSERT(!span->free); |
| 1718 | ASSERT(span->length > 0); |
| 1719 | ASSERT(GetDescriptor(span->start) == span); |
| 1720 | ASSERT(GetDescriptor(span->start + span->length - 1) == span); |
| 1721 | span->sizeclass = 0; |
| 1722 | #ifndef NO_TCMALLOC_SAMPLES |
| 1723 | span->sample = 0; |
| 1724 | #endif |
| 1725 | |
| 1726 | // Coalesce -- we guarantee that "p" != 0, so no bounds checking |
| 1727 | // necessary. We do not bother resetting the stale pagemap |
| 1728 | // entries for the pieces we are merging together because we only |
| 1729 | // care about the pagemap entries for the boundaries. |
| 1730 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1731 | // Track the total size of the neighboring free spans that are committed. |
| 1732 | Length neighboringCommittedSpansLength = 0; |
| 1733 | #endif |
| 1734 | const PageID p = span->start; |
| 1735 | const Length n = span->length; |
| 1736 | Span* prev = GetDescriptor(p-1); |
| 1737 | if (prev != NULL && prev->free) { |
| 1738 | // Merge preceding span into this span |
| 1739 | ASSERT(prev->start + prev->length == p); |
| 1740 | const Length len = prev->length; |
| 1741 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1742 | if (!prev->decommitted) |
| 1743 | neighboringCommittedSpansLength += len; |
| 1744 | #endif |
| 1745 | mergeDecommittedStates(span, prev); |
| 1746 | DLL_Remove(prev); |
| 1747 | DeleteSpan(prev); |
| 1748 | span->start -= len; |
| 1749 | span->length += len; |
| 1750 | pagemap_.set(span->start, span); |
| 1751 | Event(span, 'L', len); |
| 1752 | } |
| 1753 | Span* next = GetDescriptor(p+n); |
| 1754 | if (next != NULL && next->free) { |
| 1755 | // Merge next span into this span |
| 1756 | ASSERT(next->start == p+n); |
| 1757 | const Length len = next->length; |
| 1758 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1759 | if (!next->decommitted) |
| 1760 | neighboringCommittedSpansLength += len; |
| 1761 | #endif |
| 1762 | mergeDecommittedStates(span, next); |
| 1763 | DLL_Remove(next); |
| 1764 | DeleteSpan(next); |
| 1765 | span->length += len; |
| 1766 | pagemap_.set(span->start + span->length - 1, span); |
| 1767 | Event(span, 'R', len); |
| 1768 | } |
| 1769 | |
| 1770 | Event(span, 'D', span->length); |
| 1771 | span->free = 1; |
| 1772 | if (span->decommitted) { |
| 1773 | if (span->length < kMaxPages) |
| 1774 | DLL_Prepend(&free_[span->length].returned, span); |
| 1775 | else |
| 1776 | DLL_Prepend(&large_.returned, span); |
| 1777 | } else { |
| 1778 | if (span->length < kMaxPages) |
| 1779 | DLL_Prepend(&free_[span->length].normal, span); |
| 1780 | else |
| 1781 | DLL_Prepend(&large_.normal, span); |
| 1782 | } |
| 1783 | free_pages_ += n; |
| 1784 | |
| 1785 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1786 | if (span->decommitted) { |
| 1787 | // If the merged span is decommitted, that means we decommitted any neighboring spans that were |
| 1788 | // committed. Update the free committed pages count. |
| 1789 | free_committed_pages_ -= neighboringCommittedSpansLength; |
| 1790 | } else { |
| 1791 | // If the merged span remains committed, add the deleted span's size to the free committed pages count. |
| 1792 | free_committed_pages_ += n; |
| 1793 | } |
| 1794 | |
| 1795 | // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system. |
| 1796 | signalScavenger(); |
| 1797 | #else |
| 1798 | IncrementalScavenge(n); |
| 1799 | #endif |
| 1800 | |
| 1801 | ASSERT(Check()); |
| 1802 | } |
| 1803 | |
| 1804 | #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1805 | void TCMalloc_PageHeap::IncrementalScavenge(Length n) { |
| 1806 | // Fast path; not yet time to release memory |
| 1807 | scavenge_counter_ -= n; |
| 1808 | if (scavenge_counter_ >= 0) return; // Not yet time to scavenge |
| 1809 | |
| 1810 | // If there is nothing to release, wait for so many pages before |
| 1811 | // scavenging again. With 4K pages, this comes to 16MB of memory. |
| 1812 | static const size_t kDefaultReleaseDelay = 1 << 8; |
| 1813 | |
| 1814 | // Find index of free list to scavenge |
| 1815 | size_t index = scavenge_index_ + 1; |
| 1816 | for (size_t i = 0; i < kMaxPages+1; i++) { |
| 1817 | if (index > kMaxPages) index = 0; |
| 1818 | SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index]; |
| 1819 | if (!DLL_IsEmpty(&slist->normal)) { |
| 1820 | // Release the last span on the normal portion of this list |
| 1821 | Span* s = slist->normal.prev; |
| 1822 | DLL_Remove(s); |
| 1823 | TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
| 1824 | static_cast<size_t>(s->length << kPageShift)); |
| 1825 | s->decommitted = true; |
| 1826 | DLL_Prepend(&slist->returned, s); |
| 1827 | |
| 1828 | scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay))); |
| 1829 | |
| 1830 | if (index == kMaxPages && !DLL_IsEmpty(&slist->normal)) |
| 1831 | scavenge_index_ = index - 1; |
| 1832 | else |
| 1833 | scavenge_index_ = index; |
| 1834 | return; |
| 1835 | } |
| 1836 | index++; |
| 1837 | } |
| 1838 | |
| 1839 | // Nothing to scavenge, delay for a while |
| 1840 | scavenge_counter_ = kDefaultReleaseDelay; |
| 1841 | } |
| 1842 | #endif |
| 1843 | |
| 1844 | void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) { |
| 1845 | // Associate span object with all interior pages as well |
| 1846 | ASSERT(!span->free); |
| 1847 | ASSERT(GetDescriptor(span->start) == span); |
| 1848 | ASSERT(GetDescriptor(span->start+span->length-1) == span); |
| 1849 | Event(span, 'C', sc); |
| 1850 | span->sizeclass = static_cast<unsigned int>(sc); |
| 1851 | for (Length i = 1; i < span->length-1; i++) { |
| 1852 | pagemap_.set(span->start+i, span); |
| 1853 | } |
| 1854 | } |
| 1855 | |
| 1856 | #ifdef WTF_CHANGES |
| 1857 | size_t TCMalloc_PageHeap::ReturnedBytes() const { |
| 1858 | size_t result = 0; |
| 1859 | for (unsigned s = 0; s < kMaxPages; s++) { |
| 1860 | const int r_length = DLL_Length(&free_[s].returned); |
| 1861 | unsigned r_pages = s * r_length; |
| 1862 | result += r_pages << kPageShift; |
| 1863 | } |
| 1864 | |
| 1865 | for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) |
| 1866 | result += s->length << kPageShift; |
| 1867 | return result; |
| 1868 | } |
| 1869 | #endif |
| 1870 | |
| 1871 | #ifndef WTF_CHANGES |
| 1872 | static double PagesToMB(uint64_t pages) { |
| 1873 | return (pages << kPageShift) / 1048576.0; |
| 1874 | } |
| 1875 | |
| 1876 | void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) { |
| 1877 | int nonempty_sizes = 0; |
| 1878 | for (int s = 0; s < kMaxPages; s++) { |
| 1879 | if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) { |
| 1880 | nonempty_sizes++; |
| 1881 | } |
| 1882 | } |
| 1883 | out->printf("------------------------------------------------\n" ); |
| 1884 | out->printf("PageHeap: %d sizes; %6.1f MB free\n" , |
| 1885 | nonempty_sizes, PagesToMB(free_pages_)); |
| 1886 | out->printf("------------------------------------------------\n" ); |
| 1887 | uint64_t total_normal = 0; |
| 1888 | uint64_t total_returned = 0; |
| 1889 | for (int s = 0; s < kMaxPages; s++) { |
| 1890 | const int n_length = DLL_Length(&free_[s].normal); |
| 1891 | const int r_length = DLL_Length(&free_[s].returned); |
| 1892 | if (n_length + r_length > 0) { |
| 1893 | uint64_t n_pages = s * n_length; |
| 1894 | uint64_t r_pages = s * r_length; |
| 1895 | total_normal += n_pages; |
| 1896 | total_returned += r_pages; |
| 1897 | out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum" |
| 1898 | "; unmapped: %6.1f MB; %6.1f MB cum\n" , |
| 1899 | s, |
| 1900 | (n_length + r_length), |
| 1901 | PagesToMB(n_pages + r_pages), |
| 1902 | PagesToMB(total_normal + total_returned), |
| 1903 | PagesToMB(r_pages), |
| 1904 | PagesToMB(total_returned)); |
| 1905 | } |
| 1906 | } |
| 1907 | |
| 1908 | uint64_t n_pages = 0; |
| 1909 | uint64_t r_pages = 0; |
| 1910 | int n_spans = 0; |
| 1911 | int r_spans = 0; |
| 1912 | out->printf("Normal large spans:\n" ); |
| 1913 | for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) { |
| 1914 | out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n" , |
| 1915 | s->length, PagesToMB(s->length)); |
| 1916 | n_pages += s->length; |
| 1917 | n_spans++; |
| 1918 | } |
| 1919 | out->printf("Unmapped large spans:\n" ); |
| 1920 | for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) { |
| 1921 | out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n" , |
| 1922 | s->length, PagesToMB(s->length)); |
| 1923 | r_pages += s->length; |
| 1924 | r_spans++; |
| 1925 | } |
| 1926 | total_normal += n_pages; |
| 1927 | total_returned += r_pages; |
| 1928 | out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum" |
| 1929 | "; unmapped: %6.1f MB; %6.1f MB cum\n" , |
| 1930 | (n_spans + r_spans), |
| 1931 | PagesToMB(n_pages + r_pages), |
| 1932 | PagesToMB(total_normal + total_returned), |
| 1933 | PagesToMB(r_pages), |
| 1934 | PagesToMB(total_returned)); |
| 1935 | } |
| 1936 | #endif |
| 1937 | |
| 1938 | bool TCMalloc_PageHeap::GrowHeap(Length n) { |
| 1939 | ASSERT(kMaxPages >= kMinSystemAlloc); |
| 1940 | if (n > kMaxValidPages) return false; |
| 1941 | Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc); |
| 1942 | size_t actual_size; |
| 1943 | void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); |
| 1944 | if (ptr == NULL) { |
| 1945 | if (n < ask) { |
| 1946 | // Try growing just "n" pages |
| 1947 | ask = n; |
| 1948 | ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize); |
| 1949 | } |
| 1950 | if (ptr == NULL) return false; |
| 1951 | } |
| 1952 | ask = actual_size >> kPageShift; |
| 1953 | |
| 1954 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 1955 | pages_committed_since_last_scavenge_ += ask; |
| 1956 | #endif |
| 1957 | |
| 1958 | uint64_t old_system_bytes = system_bytes_; |
| 1959 | system_bytes_ += (ask << kPageShift); |
| 1960 | const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
| 1961 | ASSERT(p > 0); |
| 1962 | |
| 1963 | // If we have already a lot of pages allocated, just pre allocate a bunch of |
| 1964 | // memory for the page map. This prevents fragmentation by pagemap metadata |
| 1965 | // when a program keeps allocating and freeing large blocks. |
| 1966 | |
| 1967 | if (old_system_bytes < kPageMapBigAllocationThreshold |
| 1968 | && system_bytes_ >= kPageMapBigAllocationThreshold) { |
| 1969 | pagemap_.PreallocateMoreMemory(); |
| 1970 | } |
| 1971 | |
| 1972 | // Make sure pagemap_ has entries for all of the new pages. |
| 1973 | // Plus ensure one before and one after so coalescing code |
| 1974 | // does not need bounds-checking. |
| 1975 | if (pagemap_.Ensure(p-1, ask+2)) { |
| 1976 | // Pretend the new area is allocated and then Delete() it to |
| 1977 | // cause any necessary coalescing to occur. |
| 1978 | // |
| 1979 | // We do not adjust free_pages_ here since Delete() will do it for us. |
| 1980 | Span* span = NewSpan(p, ask); |
| 1981 | RecordSpan(span); |
| 1982 | Delete(span); |
| 1983 | ASSERT(Check()); |
| 1984 | return true; |
| 1985 | } else { |
| 1986 | // We could not allocate memory within "pagemap_" |
| 1987 | // TODO: Once we can return memory to the system, return the new span |
| 1988 | return false; |
| 1989 | } |
| 1990 | } |
| 1991 | |
| 1992 | bool TCMalloc_PageHeap::Check() { |
| 1993 | ASSERT(free_[0].normal.next == &free_[0].normal); |
| 1994 | ASSERT(free_[0].returned.next == &free_[0].returned); |
| 1995 | CheckList(&large_.normal, kMaxPages, 1000000000); |
| 1996 | CheckList(&large_.returned, kMaxPages, 1000000000); |
| 1997 | for (Length s = 1; s < kMaxPages; s++) { |
| 1998 | CheckList(&free_[s].normal, s, s); |
| 1999 | CheckList(&free_[s].returned, s, s); |
| 2000 | } |
| 2001 | return true; |
| 2002 | } |
| 2003 | |
| 2004 | #if ASSERT_DISABLED |
| 2005 | bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) { |
| 2006 | return true; |
| 2007 | } |
| 2008 | #else |
| 2009 | bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) { |
| 2010 | for (Span* s = list->next; s != list; s = s->next) { |
| 2011 | CHECK_CONDITION(s->free); |
| 2012 | CHECK_CONDITION(s->length >= min_pages); |
| 2013 | CHECK_CONDITION(s->length <= max_pages); |
| 2014 | CHECK_CONDITION(GetDescriptor(s->start) == s); |
| 2015 | CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s); |
| 2016 | } |
| 2017 | return true; |
| 2018 | } |
| 2019 | #endif |
| 2020 | |
| 2021 | static void ReleaseFreeList(Span* list, Span* returned) { |
| 2022 | // Walk backwards through list so that when we push these |
| 2023 | // spans on the "returned" list, we preserve the order. |
| 2024 | while (!DLL_IsEmpty(list)) { |
| 2025 | Span* s = list->prev; |
| 2026 | DLL_Remove(s); |
| 2027 | DLL_Prepend(returned, s); |
| 2028 | TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift), |
| 2029 | static_cast<size_t>(s->length << kPageShift)); |
| 2030 | } |
| 2031 | } |
| 2032 | |
| 2033 | void TCMalloc_PageHeap::ReleaseFreePages() { |
| 2034 | for (Length s = 0; s < kMaxPages; s++) { |
| 2035 | ReleaseFreeList(&free_[s].normal, &free_[s].returned); |
| 2036 | } |
| 2037 | ReleaseFreeList(&large_.normal, &large_.returned); |
| 2038 | ASSERT(Check()); |
| 2039 | } |
| 2040 | |
| 2041 | //------------------------------------------------------------------- |
| 2042 | // Free list |
| 2043 | //------------------------------------------------------------------- |
| 2044 | |
| 2045 | class TCMalloc_ThreadCache_FreeList { |
| 2046 | private: |
| 2047 | void* list_; // Linked list of nodes |
| 2048 | uint16_t length_; // Current length |
| 2049 | uint16_t lowater_; // Low water mark for list length |
| 2050 | |
| 2051 | public: |
| 2052 | void Init() { |
| 2053 | list_ = NULL; |
| 2054 | length_ = 0; |
| 2055 | lowater_ = 0; |
| 2056 | } |
| 2057 | |
| 2058 | // Return current length of list |
| 2059 | int length() const { |
| 2060 | return length_; |
| 2061 | } |
| 2062 | |
| 2063 | // Is list empty? |
| 2064 | bool empty() const { |
| 2065 | return list_ == NULL; |
| 2066 | } |
| 2067 | |
| 2068 | // Low-water mark management |
| 2069 | int lowwatermark() const { return lowater_; } |
| 2070 | void clear_lowwatermark() { lowater_ = length_; } |
| 2071 | |
| 2072 | ALWAYS_INLINE void Push(void* ptr) { |
| 2073 | SLL_Push(&list_, ptr); |
| 2074 | length_++; |
| 2075 | } |
| 2076 | |
| 2077 | void PushRange(int N, void *start, void *end) { |
| 2078 | SLL_PushRange(&list_, start, end); |
| 2079 | length_ = length_ + static_cast<uint16_t>(N); |
| 2080 | } |
| 2081 | |
| 2082 | void PopRange(int N, void **start, void **end) { |
| 2083 | SLL_PopRange(&list_, N, start, end); |
| 2084 | ASSERT(length_ >= N); |
| 2085 | length_ = length_ - static_cast<uint16_t>(N); |
| 2086 | if (length_ < lowater_) lowater_ = length_; |
| 2087 | } |
| 2088 | |
| 2089 | ALWAYS_INLINE void* Pop() { |
| 2090 | ASSERT(list_ != NULL); |
| 2091 | length_--; |
| 2092 | if (length_ < lowater_) lowater_ = length_; |
| 2093 | return SLL_Pop(&list_); |
| 2094 | } |
| 2095 | |
| 2096 | #ifdef WTF_CHANGES |
| 2097 | template <class Finder, class Reader> |
| 2098 | void enumerateFreeObjects(Finder& finder, const Reader& reader) |
| 2099 | { |
| 2100 | for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject))) |
| 2101 | finder.visit(nextObject); |
| 2102 | } |
| 2103 | #endif |
| 2104 | }; |
| 2105 | |
| 2106 | //------------------------------------------------------------------- |
| 2107 | // Data kept per thread |
| 2108 | //------------------------------------------------------------------- |
| 2109 | |
| 2110 | class TCMalloc_ThreadCache { |
| 2111 | private: |
| 2112 | typedef TCMalloc_ThreadCache_FreeList FreeList; |
| 2113 | #if COMPILER(MSVC) |
| 2114 | typedef DWORD ThreadIdentifier; |
| 2115 | #else |
| 2116 | typedef pthread_t ThreadIdentifier; |
| 2117 | #endif |
| 2118 | |
| 2119 | size_t size_; // Combined size of data |
| 2120 | ThreadIdentifier tid_; // Which thread owns it |
| 2121 | bool in_setspecific_; // Called pthread_setspecific? |
| 2122 | FreeList list_[kNumClasses]; // Array indexed by size-class |
| 2123 | |
| 2124 | // We sample allocations, biased by the size of the allocation |
| 2125 | uint32_t rnd_; // Cheap random number generator |
| 2126 | size_t bytes_until_sample_; // Bytes until we sample next |
| 2127 | |
| 2128 | // Allocate a new heap. REQUIRES: pageheap_lock is held. |
| 2129 | static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid); |
| 2130 | |
| 2131 | // Use only as pthread thread-specific destructor function. |
| 2132 | static void DestroyThreadCache(void* ptr); |
| 2133 | public: |
| 2134 | // All ThreadCache objects are kept in a linked list (for stats collection) |
| 2135 | TCMalloc_ThreadCache* next_; |
| 2136 | TCMalloc_ThreadCache* prev_; |
| 2137 | |
| 2138 | void Init(ThreadIdentifier tid); |
| 2139 | void Cleanup(); |
| 2140 | |
| 2141 | // Accessors (mostly just for printing stats) |
| 2142 | int freelist_length(size_t cl) const { return list_[cl].length(); } |
| 2143 | |
| 2144 | // Total byte size in cache |
| 2145 | size_t Size() const { return size_; } |
| 2146 | |
| 2147 | void* Allocate(size_t size); |
| 2148 | void Deallocate(void* ptr, size_t size_class); |
| 2149 | |
| 2150 | void FetchFromCentralCache(size_t cl, size_t allocationSize); |
| 2151 | void ReleaseToCentralCache(size_t cl, int N); |
| 2152 | void Scavenge(); |
| 2153 | void Print() const; |
| 2154 | |
| 2155 | // Record allocation of "k" bytes. Return true iff allocation |
| 2156 | // should be sampled |
| 2157 | bool SampleAllocation(size_t k); |
| 2158 | |
| 2159 | // Pick next sampling point |
| 2160 | void PickNextSample(size_t k); |
| 2161 | |
| 2162 | static void InitModule(); |
| 2163 | static void InitTSD(); |
| 2164 | static TCMalloc_ThreadCache* GetThreadHeap(); |
| 2165 | static TCMalloc_ThreadCache* GetCache(); |
| 2166 | static TCMalloc_ThreadCache* GetCacheIfPresent(); |
| 2167 | static TCMalloc_ThreadCache* CreateCacheIfNecessary(); |
| 2168 | static void DeleteCache(TCMalloc_ThreadCache* heap); |
| 2169 | static void BecomeIdle(); |
| 2170 | static void RecomputeThreadCacheSize(); |
| 2171 | |
| 2172 | #ifdef WTF_CHANGES |
| 2173 | template <class Finder, class Reader> |
| 2174 | void enumerateFreeObjects(Finder& finder, const Reader& reader) |
| 2175 | { |
| 2176 | for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++) |
| 2177 | list_[sizeClass].enumerateFreeObjects(finder, reader); |
| 2178 | } |
| 2179 | #endif |
| 2180 | }; |
| 2181 | |
| 2182 | //------------------------------------------------------------------- |
| 2183 | // Data kept per size-class in central cache |
| 2184 | //------------------------------------------------------------------- |
| 2185 | |
| 2186 | class TCMalloc_Central_FreeList { |
| 2187 | public: |
| 2188 | void Init(size_t cl); |
| 2189 | |
| 2190 | // These methods all do internal locking. |
| 2191 | |
| 2192 | // Insert the specified range into the central freelist. N is the number of |
| 2193 | // elements in the range. |
| 2194 | void InsertRange(void *start, void *end, int N); |
| 2195 | |
| 2196 | // Returns the actual number of fetched elements into N. |
| 2197 | void RemoveRange(void **start, void **end, int *N); |
| 2198 | |
| 2199 | // Returns the number of free objects in cache. |
| 2200 | size_t length() { |
| 2201 | SpinLockHolder h(&lock_); |
| 2202 | return counter_; |
| 2203 | } |
| 2204 | |
| 2205 | // Returns the number of free objects in the transfer cache. |
| 2206 | int tc_length() { |
| 2207 | SpinLockHolder h(&lock_); |
| 2208 | return used_slots_ * num_objects_to_move[size_class_]; |
| 2209 | } |
| 2210 | |
| 2211 | #ifdef WTF_CHANGES |
| 2212 | template <class Finder, class Reader> |
| 2213 | void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList) |
| 2214 | { |
| 2215 | for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0)) |
| 2216 | ASSERT(!span->objects); |
| 2217 | |
| 2218 | ASSERT(!nonempty_.objects); |
| 2219 | static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this); |
| 2220 | |
| 2221 | Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset); |
| 2222 | Span* remoteSpan = nonempty_.next; |
| 2223 | |
| 2224 | for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) { |
| 2225 | for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject))) |
| 2226 | finder.visit(nextObject); |
| 2227 | } |
| 2228 | } |
| 2229 | #endif |
| 2230 | |
| 2231 | private: |
| 2232 | // REQUIRES: lock_ is held |
| 2233 | // Remove object from cache and return. |
| 2234 | // Return NULL if no free entries in cache. |
| 2235 | void* FetchFromSpans(); |
| 2236 | |
| 2237 | // REQUIRES: lock_ is held |
| 2238 | // Remove object from cache and return. Fetches |
| 2239 | // from pageheap if cache is empty. Only returns |
| 2240 | // NULL on allocation failure. |
| 2241 | void* FetchFromSpansSafe(); |
| 2242 | |
| 2243 | // REQUIRES: lock_ is held |
| 2244 | // Release a linked list of objects to spans. |
| 2245 | // May temporarily release lock_. |
| 2246 | void ReleaseListToSpans(void *start); |
| 2247 | |
| 2248 | // REQUIRES: lock_ is held |
| 2249 | // Release an object to spans. |
| 2250 | // May temporarily release lock_. |
| 2251 | void ReleaseToSpans(void* object); |
| 2252 | |
| 2253 | // REQUIRES: lock_ is held |
| 2254 | // Populate cache by fetching from the page heap. |
| 2255 | // May temporarily release lock_. |
| 2256 | void Populate(); |
| 2257 | |
| 2258 | // REQUIRES: lock is held. |
| 2259 | // Tries to make room for a TCEntry. If the cache is full it will try to |
| 2260 | // expand it at the cost of some other cache size. Return false if there is |
| 2261 | // no space. |
| 2262 | bool MakeCacheSpace(); |
| 2263 | |
| 2264 | // REQUIRES: lock_ for locked_size_class is held. |
| 2265 | // Picks a "random" size class to steal TCEntry slot from. In reality it |
| 2266 | // just iterates over the sizeclasses but does so without taking a lock. |
| 2267 | // Returns true on success. |
| 2268 | // May temporarily lock a "random" size class. |
| 2269 | static bool EvictRandomSizeClass(size_t locked_size_class, bool force); |
| 2270 | |
| 2271 | // REQUIRES: lock_ is *not* held. |
| 2272 | // Tries to shrink the Cache. If force is true it will relase objects to |
| 2273 | // spans if it allows it to shrink the cache. Return false if it failed to |
| 2274 | // shrink the cache. Decrements cache_size_ on succeess. |
| 2275 | // May temporarily take lock_. If it takes lock_, the locked_size_class |
| 2276 | // lock is released to the thread from holding two size class locks |
| 2277 | // concurrently which could lead to a deadlock. |
| 2278 | bool ShrinkCache(int locked_size_class, bool force); |
| 2279 | |
| 2280 | // This lock protects all the data members. cached_entries and cache_size_ |
| 2281 | // may be looked at without holding the lock. |
| 2282 | SpinLock lock_; |
| 2283 | |
| 2284 | // We keep linked lists of empty and non-empty spans. |
| 2285 | size_t size_class_; // My size class |
| 2286 | Span empty_; // Dummy header for list of empty spans |
| 2287 | Span nonempty_; // Dummy header for list of non-empty spans |
| 2288 | size_t counter_; // Number of free objects in cache entry |
| 2289 | |
| 2290 | // Here we reserve space for TCEntry cache slots. Since one size class can |
| 2291 | // end up getting all the TCEntries quota in the system we just preallocate |
| 2292 | // sufficient number of entries here. |
| 2293 | TCEntry tc_slots_[kNumTransferEntries]; |
| 2294 | |
| 2295 | // Number of currently used cached entries in tc_slots_. This variable is |
| 2296 | // updated under a lock but can be read without one. |
| 2297 | int32_t used_slots_; |
| 2298 | // The current number of slots for this size class. This is an |
| 2299 | // adaptive value that is increased if there is lots of traffic |
| 2300 | // on a given size class. |
| 2301 | int32_t cache_size_; |
| 2302 | }; |
| 2303 | |
| 2304 | // Pad each CentralCache object to multiple of 64 bytes |
| 2305 | class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList { |
| 2306 | private: |
| 2307 | char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64]; |
| 2308 | }; |
| 2309 | |
| 2310 | //------------------------------------------------------------------- |
| 2311 | // Global variables |
| 2312 | //------------------------------------------------------------------- |
| 2313 | |
| 2314 | // Central cache -- a collection of free-lists, one per size-class. |
| 2315 | // We have a separate lock per free-list to reduce contention. |
| 2316 | static TCMalloc_Central_FreeListPadded central_cache[kNumClasses]; |
| 2317 | |
| 2318 | // Page-level allocator |
| 2319 | static SpinLock pageheap_lock = SPINLOCK_INITIALIZER; |
| 2320 | static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)]; |
| 2321 | static bool phinited = false; |
| 2322 | |
| 2323 | // Avoid extra level of indirection by making "pageheap" be just an alias |
| 2324 | // of pageheap_memory. |
| 2325 | typedef union { |
| 2326 | void* m_memory; |
| 2327 | TCMalloc_PageHeap* m_pageHeap; |
| 2328 | } PageHeapUnion; |
| 2329 | |
| 2330 | static inline TCMalloc_PageHeap* getPageHeap() |
| 2331 | { |
| 2332 | PageHeapUnion u = { &pageheap_memory[0] }; |
| 2333 | return u.m_pageHeap; |
| 2334 | } |
| 2335 | |
| 2336 | #define pageheap getPageHeap() |
| 2337 | |
| 2338 | #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY |
| 2339 | |
| 2340 | #if !HAVE(DISPATCH_H) |
| 2341 | #if OS(WINDOWS) |
| 2342 | static void sleep(unsigned seconds) |
| 2343 | { |
| 2344 | ::Sleep(seconds * 1000); |
| 2345 | } |
| 2346 | #endif |
| 2347 | |
| 2348 | void TCMalloc_PageHeap::scavengerThread() |
| 2349 | { |
| 2350 | #if HAVE(PTHREAD_SETNAME_NP) |
| 2351 | pthread_setname_np("JavaScriptCore: FastMalloc scavenger" ); |
| 2352 | #endif |
| 2353 | |
| 2354 | while (1) { |
| 2355 | if (!shouldContinueScavenging()) { |
| 2356 | pthread_mutex_lock(&m_scavengeMutex); |
| 2357 | m_scavengeThreadActive = false; |
| 2358 | // Block until there are enough freed pages to release back to the system. |
| 2359 | pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex); |
| 2360 | m_scavengeThreadActive = true; |
| 2361 | pthread_mutex_unlock(&m_scavengeMutex); |
| 2362 | } |
| 2363 | sleep(kScavengeTimerDelayInSeconds); |
| 2364 | { |
| 2365 | SpinLockHolder h(&pageheap_lock); |
| 2366 | pageheap->scavenge(); |
| 2367 | } |
| 2368 | } |
| 2369 | } |
| 2370 | |
| 2371 | #else |
| 2372 | |
| 2373 | void TCMalloc_PageHeap::periodicScavenge() |
| 2374 | { |
| 2375 | { |
| 2376 | SpinLockHolder h(&pageheap_lock); |
| 2377 | pageheap->scavenge(); |
| 2378 | } |
| 2379 | |
| 2380 | if (!shouldContinueScavenging()) { |
| 2381 | m_scavengingScheduled = false; |
| 2382 | dispatch_suspend(m_scavengeTimer); |
| 2383 | } |
| 2384 | } |
| 2385 | #endif // HAVE(DISPATCH_H) |
| 2386 | |
| 2387 | #endif |
| 2388 | |
| 2389 | // If TLS is available, we also store a copy |
| 2390 | // of the per-thread object in a __thread variable |
| 2391 | // since __thread variables are faster to read |
| 2392 | // than pthread_getspecific(). We still need |
| 2393 | // pthread_setspecific() because __thread |
| 2394 | // variables provide no way to run cleanup |
| 2395 | // code when a thread is destroyed. |
| 2396 | #ifdef HAVE_TLS |
| 2397 | static __thread TCMalloc_ThreadCache *threadlocal_heap; |
| 2398 | #endif |
| 2399 | // Thread-specific key. Initialization here is somewhat tricky |
| 2400 | // because some Linux startup code invokes malloc() before it |
| 2401 | // is in a good enough state to handle pthread_keycreate(). |
| 2402 | // Therefore, we use TSD keys only after tsd_inited is set to true. |
| 2403 | // Until then, we use a slow path to get the heap object. |
| 2404 | static bool tsd_inited = false; |
| 2405 | static pthread_key_t heap_key; |
| 2406 | #if COMPILER(MSVC) |
| 2407 | DWORD tlsIndex = TLS_OUT_OF_INDEXES; |
| 2408 | #endif |
| 2409 | |
| 2410 | static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap) |
| 2411 | { |
| 2412 | // still do pthread_setspecific when using MSVC fast TLS to |
| 2413 | // benefit from the delete callback. |
| 2414 | pthread_setspecific(heap_key, heap); |
| 2415 | #if COMPILER(MSVC) |
| 2416 | TlsSetValue(tlsIndex, heap); |
| 2417 | #endif |
| 2418 | } |
| 2419 | |
| 2420 | // Allocator for thread heaps |
| 2421 | static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator; |
| 2422 | |
| 2423 | // Linked list of heap objects. Protected by pageheap_lock. |
| 2424 | static TCMalloc_ThreadCache* thread_heaps = NULL; |
| 2425 | static int thread_heap_count = 0; |
| 2426 | |
| 2427 | // Overall thread cache size. Protected by pageheap_lock. |
| 2428 | static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize; |
| 2429 | |
| 2430 | // Global per-thread cache size. Writes are protected by |
| 2431 | // pageheap_lock. Reads are done without any locking, which should be |
| 2432 | // fine as long as size_t can be written atomically and we don't place |
| 2433 | // invariants between this variable and other pieces of state. |
| 2434 | static volatile size_t per_thread_cache_size = kMaxThreadCacheSize; |
| 2435 | |
| 2436 | //------------------------------------------------------------------- |
| 2437 | // Central cache implementation |
| 2438 | //------------------------------------------------------------------- |
| 2439 | |
| 2440 | void TCMalloc_Central_FreeList::Init(size_t cl) { |
| 2441 | lock_.Init(); |
| 2442 | size_class_ = cl; |
| 2443 | DLL_Init(&empty_); |
| 2444 | DLL_Init(&nonempty_); |
| 2445 | counter_ = 0; |
| 2446 | |
| 2447 | cache_size_ = 1; |
| 2448 | used_slots_ = 0; |
| 2449 | ASSERT(cache_size_ <= kNumTransferEntries); |
| 2450 | } |
| 2451 | |
| 2452 | void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) { |
| 2453 | while (start) { |
| 2454 | void *next = SLL_Next(start); |
| 2455 | ReleaseToSpans(start); |
| 2456 | start = next; |
| 2457 | } |
| 2458 | } |
| 2459 | |
| 2460 | ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) { |
| 2461 | const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift; |
| 2462 | Span* span = pageheap->GetDescriptor(p); |
| 2463 | ASSERT(span != NULL); |
| 2464 | ASSERT(span->refcount > 0); |
| 2465 | |
| 2466 | // If span is empty, move it to non-empty list |
| 2467 | if (span->objects == NULL) { |
| 2468 | DLL_Remove(span); |
| 2469 | DLL_Prepend(&nonempty_, span); |
| 2470 | Event(span, 'N', 0); |
| 2471 | } |
| 2472 | |
| 2473 | // The following check is expensive, so it is disabled by default |
| 2474 | if (false) { |
| 2475 | // Check that object does not occur in list |
| 2476 | unsigned got = 0; |
| 2477 | for (void* p = span->objects; p != NULL; p = *((void**) p)) { |
| 2478 | ASSERT(p != object); |
| 2479 | got++; |
| 2480 | } |
| 2481 | ASSERT(got + span->refcount == |
| 2482 | (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass)); |
| 2483 | } |
| 2484 | |
| 2485 | counter_++; |
| 2486 | span->refcount--; |
| 2487 | if (span->refcount == 0) { |
| 2488 | Event(span, '#', 0); |
| 2489 | counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass); |
| 2490 | DLL_Remove(span); |
| 2491 | |
| 2492 | // Release central list lock while operating on pageheap |
| 2493 | lock_.Unlock(); |
| 2494 | { |
| 2495 | SpinLockHolder h(&pageheap_lock); |
| 2496 | pageheap->Delete(span); |
| 2497 | } |
| 2498 | lock_.Lock(); |
| 2499 | } else { |
| 2500 | *(reinterpret_cast<void**>(object)) = span->objects; |
| 2501 | span->objects = object; |
| 2502 | } |
| 2503 | } |
| 2504 | |
| 2505 | ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass( |
| 2506 | size_t locked_size_class, bool force) { |
| 2507 | static int race_counter = 0; |
| 2508 | int t = race_counter++; // Updated without a lock, but who cares. |
| 2509 | if (t >= static_cast<int>(kNumClasses)) { |
| 2510 | while (t >= static_cast<int>(kNumClasses)) { |
| 2511 | t -= kNumClasses; |
| 2512 | } |
| 2513 | race_counter = t; |
| 2514 | } |
| 2515 | ASSERT(t >= 0); |
| 2516 | ASSERT(t < static_cast<int>(kNumClasses)); |
| 2517 | if (t == static_cast<int>(locked_size_class)) return false; |
| 2518 | return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force); |
| 2519 | } |
| 2520 | |
| 2521 | bool TCMalloc_Central_FreeList::MakeCacheSpace() { |
| 2522 | // Is there room in the cache? |
| 2523 | if (used_slots_ < cache_size_) return true; |
| 2524 | // Check if we can expand this cache? |
| 2525 | if (cache_size_ == kNumTransferEntries) return false; |
| 2526 | // Ok, we'll try to grab an entry from some other size class. |
| 2527 | if (EvictRandomSizeClass(size_class_, false) || |
| 2528 | EvictRandomSizeClass(size_class_, true)) { |
| 2529 | // Succeeded in evicting, we're going to make our cache larger. |
| 2530 | cache_size_++; |
| 2531 | return true; |
| 2532 | } |
| 2533 | return false; |
| 2534 | } |
| 2535 | |
| 2536 | |
| 2537 | namespace { |
| 2538 | class LockInverter { |
| 2539 | private: |
| 2540 | SpinLock *held_, *temp_; |
| 2541 | public: |
| 2542 | inline explicit LockInverter(SpinLock* held, SpinLock *temp) |
| 2543 | : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); } |
| 2544 | inline ~LockInverter() { temp_->Unlock(); held_->Lock(); } |
| 2545 | }; |
| 2546 | } |
| 2547 | |
| 2548 | bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) { |
| 2549 | // Start with a quick check without taking a lock. |
| 2550 | if (cache_size_ == 0) return false; |
| 2551 | // We don't evict from a full cache unless we are 'forcing'. |
| 2552 | if (force == false && used_slots_ == cache_size_) return false; |
| 2553 | |
| 2554 | // Grab lock, but first release the other lock held by this thread. We use |
| 2555 | // the lock inverter to ensure that we never hold two size class locks |
| 2556 | // concurrently. That can create a deadlock because there is no well |
| 2557 | // defined nesting order. |
| 2558 | LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_); |
| 2559 | ASSERT(used_slots_ <= cache_size_); |
| 2560 | ASSERT(0 <= cache_size_); |
| 2561 | if (cache_size_ == 0) return false; |
| 2562 | if (used_slots_ == cache_size_) { |
| 2563 | if (force == false) return false; |
| 2564 | // ReleaseListToSpans releases the lock, so we have to make all the |
| 2565 | // updates to the central list before calling it. |
| 2566 | cache_size_--; |
| 2567 | used_slots_--; |
| 2568 | ReleaseListToSpans(tc_slots_[used_slots_].head); |
| 2569 | return true; |
| 2570 | } |
| 2571 | cache_size_--; |
| 2572 | return true; |
| 2573 | } |
| 2574 | |
| 2575 | void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) { |
| 2576 | SpinLockHolder h(&lock_); |
| 2577 | if (N == num_objects_to_move[size_class_] && |
| 2578 | MakeCacheSpace()) { |
| 2579 | int slot = used_slots_++; |
| 2580 | ASSERT(slot >=0); |
| 2581 | ASSERT(slot < kNumTransferEntries); |
| 2582 | TCEntry *entry = &tc_slots_[slot]; |
| 2583 | entry->head = start; |
| 2584 | entry->tail = end; |
| 2585 | return; |
| 2586 | } |
| 2587 | ReleaseListToSpans(start); |
| 2588 | } |
| 2589 | |
| 2590 | void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) { |
| 2591 | int num = *N; |
| 2592 | ASSERT(num > 0); |
| 2593 | |
| 2594 | SpinLockHolder h(&lock_); |
| 2595 | if (num == num_objects_to_move[size_class_] && used_slots_ > 0) { |
| 2596 | int slot = --used_slots_; |
| 2597 | ASSERT(slot >= 0); |
| 2598 | TCEntry *entry = &tc_slots_[slot]; |
| 2599 | *start = entry->head; |
| 2600 | *end = entry->tail; |
| 2601 | return; |
| 2602 | } |
| 2603 | |
| 2604 | // TODO: Prefetch multiple TCEntries? |
| 2605 | void *tail = FetchFromSpansSafe(); |
| 2606 | if (!tail) { |
| 2607 | // We are completely out of memory. |
| 2608 | *start = *end = NULL; |
| 2609 | *N = 0; |
| 2610 | return; |
| 2611 | } |
| 2612 | |
| 2613 | SLL_SetNext(tail, NULL); |
| 2614 | void *head = tail; |
| 2615 | int count = 1; |
| 2616 | while (count < num) { |
| 2617 | void *t = FetchFromSpans(); |
| 2618 | if (!t) break; |
| 2619 | SLL_Push(&head, t); |
| 2620 | count++; |
| 2621 | } |
| 2622 | *start = head; |
| 2623 | *end = tail; |
| 2624 | *N = count; |
| 2625 | } |
| 2626 | |
| 2627 | |
| 2628 | void* TCMalloc_Central_FreeList::FetchFromSpansSafe() { |
| 2629 | void *t = FetchFromSpans(); |
| 2630 | if (!t) { |
| 2631 | Populate(); |
| 2632 | t = FetchFromSpans(); |
| 2633 | } |
| 2634 | return t; |
| 2635 | } |
| 2636 | |
| 2637 | void* TCMalloc_Central_FreeList::FetchFromSpans() { |
| 2638 | // Intel compiler bug; issue id 6000056746 |
| 2639 | // if (DLL_IsEmpty(&nonempty_)) return NULL; |
| 2640 | Span* span = nonempty_.next; |
| 2641 | if (span == &nonempty_) |
| 2642 | return NULL; |
| 2643 | |
| 2644 | ASSERT(span->objects != NULL); |
| 2645 | ASSERT_SPAN_COMMITTED(span); |
| 2646 | span->refcount++; |
| 2647 | void* result = span->objects; |
| 2648 | span->objects = *(reinterpret_cast<void**>(result)); |
| 2649 | if (span->objects == NULL) { |
| 2650 | // Move to empty list |
| 2651 | DLL_Remove(span); |
| 2652 | DLL_Prepend(&empty_, span); |
| 2653 | Event(span, 'E', 0); |
| 2654 | } |
| 2655 | counter_--; |
| 2656 | return result; |
| 2657 | } |
| 2658 | |
| 2659 | // Fetch memory from the system and add to the central cache freelist. |
| 2660 | ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() { |
| 2661 | // Release central list lock while operating on pageheap |
| 2662 | lock_.Unlock(); |
| 2663 | const size_t npages = class_to_pages[size_class_]; |
| 2664 | |
| 2665 | Span* span; |
| 2666 | { |
| 2667 | SpinLockHolder h(&pageheap_lock); |
| 2668 | span = pageheap->New(npages); |
| 2669 | if (span) pageheap->RegisterSizeClass(span, size_class_); |
| 2670 | } |
| 2671 | if (span == NULL) { |
| 2672 | MESSAGE("allocation failed: %d\n" , errno); |
| 2673 | lock_.Lock(); |
| 2674 | return; |
| 2675 | } |
| 2676 | ASSERT_SPAN_COMMITTED(span); |
| 2677 | ASSERT(span->length == npages); |
| 2678 | // Cache sizeclass info eagerly. Locking is not necessary. |
| 2679 | // (Instead of being eager, we could just replace any stale info |
| 2680 | // about this span, but that seems to be no better in practice.) |
| 2681 | for (size_t i = 0; i < npages; i++) { |
| 2682 | pageheap->CacheSizeClass(span->start + i, size_class_); |
| 2683 | } |
| 2684 | |
| 2685 | // Split the block into pieces and add to the free-list |
| 2686 | // TODO: coloring of objects to avoid cache conflicts? |
| 2687 | void** tail = &span->objects; |
| 2688 | char* ptr = reinterpret_cast<char*>(span->start << kPageShift); |
| 2689 | char* limit = ptr + (npages << kPageShift); |
| 2690 | const size_t size = ByteSizeForClass(size_class_); |
| 2691 | int num = 0; |
| 2692 | char* nptr; |
| 2693 | while ((nptr = ptr + size) <= limit) { |
| 2694 | *tail = ptr; |
| 2695 | tail = reinterpret_cast<void**>(ptr); |
| 2696 | ptr = nptr; |
| 2697 | num++; |
| 2698 | } |
| 2699 | ASSERT(ptr <= limit); |
| 2700 | *tail = NULL; |
| 2701 | span->refcount = 0; // No sub-object in use yet |
| 2702 | |
| 2703 | // Add span to list of non-empty spans |
| 2704 | lock_.Lock(); |
| 2705 | DLL_Prepend(&nonempty_, span); |
| 2706 | counter_ += num; |
| 2707 | } |
| 2708 | |
| 2709 | //------------------------------------------------------------------- |
| 2710 | // TCMalloc_ThreadCache implementation |
| 2711 | //------------------------------------------------------------------- |
| 2712 | |
| 2713 | inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) { |
| 2714 | if (bytes_until_sample_ < k) { |
| 2715 | PickNextSample(k); |
| 2716 | return true; |
| 2717 | } else { |
| 2718 | bytes_until_sample_ -= k; |
| 2719 | return false; |
| 2720 | } |
| 2721 | } |
| 2722 | |
| 2723 | void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) { |
| 2724 | size_ = 0; |
| 2725 | next_ = NULL; |
| 2726 | prev_ = NULL; |
| 2727 | tid_ = tid; |
| 2728 | in_setspecific_ = false; |
| 2729 | for (size_t cl = 0; cl < kNumClasses; ++cl) { |
| 2730 | list_[cl].Init(); |
| 2731 | } |
| 2732 | |
| 2733 | // Initialize RNG -- run it for a bit to get to good values |
| 2734 | bytes_until_sample_ = 0; |
| 2735 | rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this)); |
| 2736 | for (int i = 0; i < 100; i++) { |
| 2737 | PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2)); |
| 2738 | } |
| 2739 | } |
| 2740 | |
| 2741 | void TCMalloc_ThreadCache::Cleanup() { |
| 2742 | // Put unused memory back into central cache |
| 2743 | for (size_t cl = 0; cl < kNumClasses; ++cl) { |
| 2744 | if (list_[cl].length() > 0) { |
| 2745 | ReleaseToCentralCache(cl, list_[cl].length()); |
| 2746 | } |
| 2747 | } |
| 2748 | } |
| 2749 | |
| 2750 | ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) { |
| 2751 | ASSERT(size <= kMaxSize); |
| 2752 | const size_t cl = SizeClass(size); |
| 2753 | FreeList* list = &list_[cl]; |
| 2754 | size_t allocationSize = ByteSizeForClass(cl); |
| 2755 | if (list->empty()) { |
| 2756 | FetchFromCentralCache(cl, allocationSize); |
| 2757 | if (list->empty()) return NULL; |
| 2758 | } |
| 2759 | size_ -= allocationSize; |
| 2760 | return list->Pop(); |
| 2761 | } |
| 2762 | |
| 2763 | inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) { |
| 2764 | size_ += ByteSizeForClass(cl); |
| 2765 | FreeList* list = &list_[cl]; |
| 2766 | list->Push(ptr); |
| 2767 | // If enough data is free, put back into central cache |
| 2768 | if (list->length() > kMaxFreeListLength) { |
| 2769 | ReleaseToCentralCache(cl, num_objects_to_move[cl]); |
| 2770 | } |
| 2771 | if (size_ >= per_thread_cache_size) Scavenge(); |
| 2772 | } |
| 2773 | |
| 2774 | // Remove some objects of class "cl" from central cache and add to thread heap |
| 2775 | ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) { |
| 2776 | int fetch_count = num_objects_to_move[cl]; |
| 2777 | void *start, *end; |
| 2778 | central_cache[cl].RemoveRange(&start, &end, &fetch_count); |
| 2779 | list_[cl].PushRange(fetch_count, start, end); |
| 2780 | size_ += allocationSize * fetch_count; |
| 2781 | } |
| 2782 | |
| 2783 | // Remove some objects of class "cl" from thread heap and add to central cache |
| 2784 | inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) { |
| 2785 | ASSERT(N > 0); |
| 2786 | FreeList* src = &list_[cl]; |
| 2787 | if (N > src->length()) N = src->length(); |
| 2788 | size_ -= N*ByteSizeForClass(cl); |
| 2789 | |
| 2790 | // We return prepackaged chains of the correct size to the central cache. |
| 2791 | // TODO: Use the same format internally in the thread caches? |
| 2792 | int batch_size = num_objects_to_move[cl]; |
| 2793 | while (N > batch_size) { |
| 2794 | void *tail, *head; |
| 2795 | src->PopRange(batch_size, &head, &tail); |
| 2796 | central_cache[cl].InsertRange(head, tail, batch_size); |
| 2797 | N -= batch_size; |
| 2798 | } |
| 2799 | void *tail, *head; |
| 2800 | src->PopRange(N, &head, &tail); |
| 2801 | central_cache[cl].InsertRange(head, tail, N); |
| 2802 | } |
| 2803 | |
| 2804 | // Release idle memory to the central cache |
| 2805 | inline void TCMalloc_ThreadCache::Scavenge() { |
| 2806 | // If the low-water mark for the free list is L, it means we would |
| 2807 | // not have had to allocate anything from the central cache even if |
| 2808 | // we had reduced the free list size by L. We aim to get closer to |
| 2809 | // that situation by dropping L/2 nodes from the free list. This |
| 2810 | // may not release much memory, but if so we will call scavenge again |
| 2811 | // pretty soon and the low-water marks will be high on that call. |
| 2812 | //int64 start = CycleClock::Now(); |
| 2813 | |
| 2814 | for (size_t cl = 0; cl < kNumClasses; cl++) { |
| 2815 | FreeList* list = &list_[cl]; |
| 2816 | const int lowmark = list->lowwatermark(); |
| 2817 | if (lowmark > 0) { |
| 2818 | const int drop = (lowmark > 1) ? lowmark/2 : 1; |
| 2819 | ReleaseToCentralCache(cl, drop); |
| 2820 | } |
| 2821 | list->clear_lowwatermark(); |
| 2822 | } |
| 2823 | |
| 2824 | //int64 finish = CycleClock::Now(); |
| 2825 | //CycleTimer ct; |
| 2826 | //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0); |
| 2827 | } |
| 2828 | |
| 2829 | void TCMalloc_ThreadCache::PickNextSample(size_t k) { |
| 2830 | // Make next "random" number |
| 2831 | // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers |
| 2832 | static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0); |
| 2833 | uint32_t r = rnd_; |
| 2834 | rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly); |
| 2835 | |
| 2836 | // Next point is "rnd_ % (sample_period)". I.e., average |
| 2837 | // increment is "sample_period/2". |
| 2838 | const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter); |
| 2839 | static int last_flag_value = -1; |
| 2840 | |
| 2841 | if (flag_value != last_flag_value) { |
| 2842 | SpinLockHolder h(&sample_period_lock); |
| 2843 | int i; |
| 2844 | for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) { |
| 2845 | if (primes_list[i] >= flag_value) { |
| 2846 | break; |
| 2847 | } |
| 2848 | } |
| 2849 | sample_period = primes_list[i]; |
| 2850 | last_flag_value = flag_value; |
| 2851 | } |
| 2852 | |
| 2853 | bytes_until_sample_ += rnd_ % sample_period; |
| 2854 | |
| 2855 | if (k > (static_cast<size_t>(-1) >> 2)) { |
| 2856 | // If the user has asked for a huge allocation then it is possible |
| 2857 | // for the code below to loop infinitely. Just return (note that |
| 2858 | // this throws off the sampling accuracy somewhat, but a user who |
| 2859 | // is allocating more than 1G of memory at a time can live with a |
| 2860 | // minor inaccuracy in profiling of small allocations, and also |
| 2861 | // would rather not wait for the loop below to terminate). |
| 2862 | return; |
| 2863 | } |
| 2864 | |
| 2865 | while (bytes_until_sample_ < k) { |
| 2866 | // Increase bytes_until_sample_ by enough average sampling periods |
| 2867 | // (sample_period >> 1) to allow us to sample past the current |
| 2868 | // allocation. |
| 2869 | bytes_until_sample_ += (sample_period >> 1); |
| 2870 | } |
| 2871 | |
| 2872 | bytes_until_sample_ -= k; |
| 2873 | } |
| 2874 | |
| 2875 | void TCMalloc_ThreadCache::InitModule() { |
| 2876 | // There is a slight potential race here because of double-checked |
| 2877 | // locking idiom. However, as long as the program does a small |
| 2878 | // allocation before switching to multi-threaded mode, we will be |
| 2879 | // fine. We increase the chances of doing such a small allocation |
| 2880 | // by doing one in the constructor of the module_enter_exit_hook |
| 2881 | // object declared below. |
| 2882 | SpinLockHolder h(&pageheap_lock); |
| 2883 | if (!phinited) { |
| 2884 | #ifdef WTF_CHANGES |
| 2885 | InitTSD(); |
| 2886 | #endif |
| 2887 | InitSizeClasses(); |
| 2888 | threadheap_allocator.Init(); |
| 2889 | span_allocator.Init(); |
| 2890 | span_allocator.New(); // Reduce cache conflicts |
| 2891 | span_allocator.New(); // Reduce cache conflicts |
| 2892 | stacktrace_allocator.Init(); |
| 2893 | DLL_Init(&sampled_objects); |
| 2894 | for (size_t i = 0; i < kNumClasses; ++i) { |
| 2895 | central_cache[i].Init(i); |
| 2896 | } |
| 2897 | pageheap->init(); |
| 2898 | phinited = 1; |
| 2899 | #if defined(WTF_CHANGES) && OS(DARWIN) |
| 2900 | FastMallocZone::init(); |
| 2901 | #endif |
| 2902 | } |
| 2903 | } |
| 2904 | |
| 2905 | inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) { |
| 2906 | // Create the heap and add it to the linked list |
| 2907 | TCMalloc_ThreadCache *heap = threadheap_allocator.New(); |
| 2908 | heap->Init(tid); |
| 2909 | heap->next_ = thread_heaps; |
| 2910 | heap->prev_ = NULL; |
| 2911 | if (thread_heaps != NULL) thread_heaps->prev_ = heap; |
| 2912 | thread_heaps = heap; |
| 2913 | thread_heap_count++; |
| 2914 | RecomputeThreadCacheSize(); |
| 2915 | return heap; |
| 2916 | } |
| 2917 | |
| 2918 | inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() { |
| 2919 | #ifdef HAVE_TLS |
| 2920 | // __thread is faster, but only when the kernel supports it |
| 2921 | if (KernelSupportsTLS()) |
| 2922 | return threadlocal_heap; |
| 2923 | #elif COMPILER(MSVC) |
| 2924 | return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex)); |
| 2925 | #else |
| 2926 | return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key)); |
| 2927 | #endif |
| 2928 | } |
| 2929 | |
| 2930 | inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() { |
| 2931 | TCMalloc_ThreadCache* ptr = NULL; |
| 2932 | if (!tsd_inited) { |
| 2933 | InitModule(); |
| 2934 | } else { |
| 2935 | ptr = GetThreadHeap(); |
| 2936 | } |
| 2937 | if (ptr == NULL) ptr = CreateCacheIfNecessary(); |
| 2938 | return ptr; |
| 2939 | } |
| 2940 | |
| 2941 | // In deletion paths, we do not try to create a thread-cache. This is |
| 2942 | // because we may be in the thread destruction code and may have |
| 2943 | // already cleaned up the cache for this thread. |
| 2944 | inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() { |
| 2945 | if (!tsd_inited) return NULL; |
| 2946 | void* const p = GetThreadHeap(); |
| 2947 | return reinterpret_cast<TCMalloc_ThreadCache*>(p); |
| 2948 | } |
| 2949 | |
| 2950 | void TCMalloc_ThreadCache::InitTSD() { |
| 2951 | ASSERT(!tsd_inited); |
| 2952 | pthread_key_create(&heap_key, DestroyThreadCache); |
| 2953 | #if COMPILER(MSVC) |
| 2954 | tlsIndex = TlsAlloc(); |
| 2955 | #endif |
| 2956 | tsd_inited = true; |
| 2957 | |
| 2958 | #if !COMPILER(MSVC) |
| 2959 | // We may have used a fake pthread_t for the main thread. Fix it. |
| 2960 | pthread_t zero; |
| 2961 | memset(&zero, 0, sizeof(zero)); |
| 2962 | #endif |
| 2963 | #ifndef WTF_CHANGES |
| 2964 | SpinLockHolder h(&pageheap_lock); |
| 2965 | #else |
| 2966 | ASSERT(pageheap_lock.IsHeld()); |
| 2967 | #endif |
| 2968 | for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
| 2969 | #if COMPILER(MSVC) |
| 2970 | if (h->tid_ == 0) { |
| 2971 | h->tid_ = GetCurrentThreadId(); |
| 2972 | } |
| 2973 | #else |
| 2974 | if (pthread_equal(h->tid_, zero)) { |
| 2975 | h->tid_ = pthread_self(); |
| 2976 | } |
| 2977 | #endif |
| 2978 | } |
| 2979 | } |
| 2980 | |
| 2981 | TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() { |
| 2982 | // Initialize per-thread data if necessary |
| 2983 | TCMalloc_ThreadCache* heap = NULL; |
| 2984 | { |
| 2985 | SpinLockHolder lockholder(&pageheap_lock); |
| 2986 | |
| 2987 | #if COMPILER(MSVC) |
| 2988 | DWORD me; |
| 2989 | if (!tsd_inited) { |
| 2990 | me = 0; |
| 2991 | } else { |
| 2992 | me = GetCurrentThreadId(); |
| 2993 | } |
| 2994 | #else |
| 2995 | // Early on in glibc's life, we cannot even call pthread_self() |
| 2996 | pthread_t me; |
| 2997 | if (!tsd_inited) { |
| 2998 | memset(&me, 0, sizeof(me)); |
| 2999 | } else { |
| 3000 | me = pthread_self(); |
| 3001 | } |
| 3002 | #endif |
| 3003 | |
| 3004 | // This may be a recursive malloc call from pthread_setspecific() |
| 3005 | // In that case, the heap for this thread has already been created |
| 3006 | // and added to the linked list. So we search for that first. |
| 3007 | for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
| 3008 | #if COMPILER(MSVC) |
| 3009 | if (h->tid_ == me) { |
| 3010 | #else |
| 3011 | if (pthread_equal(h->tid_, me)) { |
| 3012 | #endif |
| 3013 | heap = h; |
| 3014 | break; |
| 3015 | } |
| 3016 | } |
| 3017 | |
| 3018 | if (heap == NULL) heap = NewHeap(me); |
| 3019 | } |
| 3020 | |
| 3021 | // We call pthread_setspecific() outside the lock because it may |
| 3022 | // call malloc() recursively. The recursive call will never get |
| 3023 | // here again because it will find the already allocated heap in the |
| 3024 | // linked list of heaps. |
| 3025 | if (!heap->in_setspecific_ && tsd_inited) { |
| 3026 | heap->in_setspecific_ = true; |
| 3027 | setThreadHeap(heap); |
| 3028 | } |
| 3029 | return heap; |
| 3030 | } |
| 3031 | |
| 3032 | void TCMalloc_ThreadCache::BecomeIdle() { |
| 3033 | if (!tsd_inited) return; // No caches yet |
| 3034 | TCMalloc_ThreadCache* heap = GetThreadHeap(); |
| 3035 | if (heap == NULL) return; // No thread cache to remove |
| 3036 | if (heap->in_setspecific_) return; // Do not disturb the active caller |
| 3037 | |
| 3038 | heap->in_setspecific_ = true; |
| 3039 | pthread_setspecific(heap_key, NULL); |
| 3040 | #ifdef HAVE_TLS |
| 3041 | // Also update the copy in __thread |
| 3042 | threadlocal_heap = NULL; |
| 3043 | #endif |
| 3044 | heap->in_setspecific_ = false; |
| 3045 | if (GetThreadHeap() == heap) { |
| 3046 | // Somehow heap got reinstated by a recursive call to malloc |
| 3047 | // from pthread_setspecific. We give up in this case. |
| 3048 | return; |
| 3049 | } |
| 3050 | |
| 3051 | // We can now get rid of the heap |
| 3052 | DeleteCache(heap); |
| 3053 | } |
| 3054 | |
| 3055 | void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) { |
| 3056 | // Note that "ptr" cannot be NULL since pthread promises not |
| 3057 | // to invoke the destructor on NULL values, but for safety, |
| 3058 | // we check anyway. |
| 3059 | if (ptr == NULL) return; |
| 3060 | #ifdef HAVE_TLS |
| 3061 | // Prevent fast path of GetThreadHeap() from returning heap. |
| 3062 | threadlocal_heap = NULL; |
| 3063 | #endif |
| 3064 | DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr)); |
| 3065 | } |
| 3066 | |
| 3067 | void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) { |
| 3068 | // Remove all memory from heap |
| 3069 | heap->Cleanup(); |
| 3070 | |
| 3071 | // Remove from linked list |
| 3072 | SpinLockHolder h(&pageheap_lock); |
| 3073 | if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_; |
| 3074 | if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_; |
| 3075 | if (thread_heaps == heap) thread_heaps = heap->next_; |
| 3076 | thread_heap_count--; |
| 3077 | RecomputeThreadCacheSize(); |
| 3078 | |
| 3079 | threadheap_allocator.Delete(heap); |
| 3080 | } |
| 3081 | |
| 3082 | void TCMalloc_ThreadCache::RecomputeThreadCacheSize() { |
| 3083 | // Divide available space across threads |
| 3084 | int n = thread_heap_count > 0 ? thread_heap_count : 1; |
| 3085 | size_t space = overall_thread_cache_size / n; |
| 3086 | |
| 3087 | // Limit to allowed range |
| 3088 | if (space < kMinThreadCacheSize) space = kMinThreadCacheSize; |
| 3089 | if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize; |
| 3090 | |
| 3091 | per_thread_cache_size = space; |
| 3092 | } |
| 3093 | |
| 3094 | void TCMalloc_ThreadCache::Print() const { |
| 3095 | for (size_t cl = 0; cl < kNumClasses; ++cl) { |
| 3096 | MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n" , |
| 3097 | ByteSizeForClass(cl), |
| 3098 | list_[cl].length(), |
| 3099 | list_[cl].lowwatermark()); |
| 3100 | } |
| 3101 | } |
| 3102 | |
| 3103 | // Extract interesting stats |
| 3104 | struct TCMallocStats { |
| 3105 | uint64_t system_bytes; // Bytes alloced from system |
| 3106 | uint64_t thread_bytes; // Bytes in thread caches |
| 3107 | uint64_t central_bytes; // Bytes in central cache |
| 3108 | uint64_t transfer_bytes; // Bytes in central transfer cache |
| 3109 | uint64_t pageheap_bytes; // Bytes in page heap |
| 3110 | uint64_t metadata_bytes; // Bytes alloced for metadata |
| 3111 | }; |
| 3112 | |
| 3113 | #ifndef WTF_CHANGES |
| 3114 | // Get stats into "r". Also get per-size-class counts if class_count != NULL |
| 3115 | static void ExtractStats(TCMallocStats* r, uint64_t* class_count) { |
| 3116 | r->central_bytes = 0; |
| 3117 | r->transfer_bytes = 0; |
| 3118 | for (int cl = 0; cl < kNumClasses; ++cl) { |
| 3119 | const int length = central_cache[cl].length(); |
| 3120 | const int tc_length = central_cache[cl].tc_length(); |
| 3121 | r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length; |
| 3122 | r->transfer_bytes += |
| 3123 | static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length; |
| 3124 | if (class_count) class_count[cl] = length + tc_length; |
| 3125 | } |
| 3126 | |
| 3127 | // Add stats from per-thread heaps |
| 3128 | r->thread_bytes = 0; |
| 3129 | { // scope |
| 3130 | SpinLockHolder h(&pageheap_lock); |
| 3131 | for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) { |
| 3132 | r->thread_bytes += h->Size(); |
| 3133 | if (class_count) { |
| 3134 | for (size_t cl = 0; cl < kNumClasses; ++cl) { |
| 3135 | class_count[cl] += h->freelist_length(cl); |
| 3136 | } |
| 3137 | } |
| 3138 | } |
| 3139 | } |
| 3140 | |
| 3141 | { //scope |
| 3142 | SpinLockHolder h(&pageheap_lock); |
| 3143 | r->system_bytes = pageheap->SystemBytes(); |
| 3144 | r->metadata_bytes = metadata_system_bytes; |
| 3145 | r->pageheap_bytes = pageheap->FreeBytes(); |
| 3146 | } |
| 3147 | } |
| 3148 | #endif |
| 3149 | |
| 3150 | #ifndef WTF_CHANGES |
| 3151 | // WRITE stats to "out" |
| 3152 | static void DumpStats(TCMalloc_Printer* out, int level) { |
| 3153 | TCMallocStats stats; |
| 3154 | uint64_t class_count[kNumClasses]; |
| 3155 | ExtractStats(&stats, (level >= 2 ? class_count : NULL)); |
| 3156 | |
| 3157 | if (level >= 2) { |
| 3158 | out->printf("------------------------------------------------\n" ); |
| 3159 | uint64_t cumulative = 0; |
| 3160 | for (int cl = 0; cl < kNumClasses; ++cl) { |
| 3161 | if (class_count[cl] > 0) { |
| 3162 | uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl); |
| 3163 | cumulative += class_bytes; |
| 3164 | out->printf("class %3d [ %8" PRIuS " bytes ] : " |
| 3165 | "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n" , |
| 3166 | cl, ByteSizeForClass(cl), |
| 3167 | class_count[cl], |
| 3168 | class_bytes / 1048576.0, |
| 3169 | cumulative / 1048576.0); |
| 3170 | } |
| 3171 | } |
| 3172 | |
| 3173 | SpinLockHolder h(&pageheap_lock); |
| 3174 | pageheap->Dump(out); |
| 3175 | } |
| 3176 | |
| 3177 | const uint64_t bytes_in_use = stats.system_bytes |
| 3178 | - stats.pageheap_bytes |
| 3179 | - stats.central_bytes |
| 3180 | - stats.transfer_bytes |
| 3181 | - stats.thread_bytes; |
| 3182 | |
| 3183 | out->printf("------------------------------------------------\n" |
| 3184 | "MALLOC: %12" PRIu64 " Heap size\n" |
| 3185 | "MALLOC: %12" PRIu64 " Bytes in use by application\n" |
| 3186 | "MALLOC: %12" PRIu64 " Bytes free in page heap\n" |
| 3187 | "MALLOC: %12" PRIu64 " Bytes free in central cache\n" |
| 3188 | "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n" |
| 3189 | "MALLOC: %12" PRIu64 " Bytes free in thread caches\n" |
| 3190 | "MALLOC: %12" PRIu64 " Spans in use\n" |
| 3191 | "MALLOC: %12" PRIu64 " Thread heaps in use\n" |
| 3192 | "MALLOC: %12" PRIu64 " Metadata allocated\n" |
| 3193 | "------------------------------------------------\n" , |
| 3194 | stats.system_bytes, |
| 3195 | bytes_in_use, |
| 3196 | stats.pageheap_bytes, |
| 3197 | stats.central_bytes, |
| 3198 | stats.transfer_bytes, |
| 3199 | stats.thread_bytes, |
| 3200 | uint64_t(span_allocator.inuse()), |
| 3201 | uint64_t(threadheap_allocator.inuse()), |
| 3202 | stats.metadata_bytes); |
| 3203 | } |
| 3204 | |
| 3205 | static void PrintStats(int level) { |
| 3206 | const int kBufferSize = 16 << 10; |
| 3207 | char* buffer = new char[kBufferSize]; |
| 3208 | TCMalloc_Printer printer(buffer, kBufferSize); |
| 3209 | DumpStats(&printer, level); |
| 3210 | write(STDERR_FILENO, buffer, strlen(buffer)); |
| 3211 | delete[] buffer; |
| 3212 | } |
| 3213 | |
| 3214 | static void** DumpStackTraces() { |
| 3215 | // Count how much space we need |
| 3216 | int needed_slots = 0; |
| 3217 | { |
| 3218 | SpinLockHolder h(&pageheap_lock); |
| 3219 | for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { |
| 3220 | StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); |
| 3221 | needed_slots += 3 + stack->depth; |
| 3222 | } |
| 3223 | needed_slots += 100; // Slop in case sample grows |
| 3224 | needed_slots += needed_slots/8; // An extra 12.5% slop |
| 3225 | } |
| 3226 | |
| 3227 | void** result = new void*[needed_slots]; |
| 3228 | if (result == NULL) { |
| 3229 | MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n" , |
| 3230 | needed_slots); |
| 3231 | return NULL; |
| 3232 | } |
| 3233 | |
| 3234 | SpinLockHolder h(&pageheap_lock); |
| 3235 | int used_slots = 0; |
| 3236 | for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) { |
| 3237 | ASSERT(used_slots < needed_slots); // Need to leave room for terminator |
| 3238 | StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects); |
| 3239 | if (used_slots + 3 + stack->depth >= needed_slots) { |
| 3240 | // No more room |
| 3241 | break; |
| 3242 | } |
| 3243 | |
| 3244 | result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1)); |
| 3245 | result[used_slots+1] = reinterpret_cast<void*>(stack->size); |
| 3246 | result[used_slots+2] = reinterpret_cast<void*>(stack->depth); |
| 3247 | for (int d = 0; d < stack->depth; d++) { |
| 3248 | result[used_slots+3+d] = stack->stack[d]; |
| 3249 | } |
| 3250 | used_slots += 3 + stack->depth; |
| 3251 | } |
| 3252 | result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0)); |
| 3253 | return result; |
| 3254 | } |
| 3255 | #endif |
| 3256 | |
| 3257 | #ifndef WTF_CHANGES |
| 3258 | |
| 3259 | // TCMalloc's support for extra malloc interfaces |
| 3260 | class TCMallocImplementation : public MallocExtension { |
| 3261 | public: |
| 3262 | virtual void GetStats(char* buffer, int buffer_length) { |
| 3263 | ASSERT(buffer_length > 0); |
| 3264 | TCMalloc_Printer printer(buffer, buffer_length); |
| 3265 | |
| 3266 | // Print level one stats unless lots of space is available |
| 3267 | if (buffer_length < 10000) { |
| 3268 | DumpStats(&printer, 1); |
| 3269 | } else { |
| 3270 | DumpStats(&printer, 2); |
| 3271 | } |
| 3272 | } |
| 3273 | |
| 3274 | virtual void** ReadStackTraces() { |
| 3275 | return DumpStackTraces(); |
| 3276 | } |
| 3277 | |
| 3278 | virtual bool GetNumericProperty(const char* name, size_t* value) { |
| 3279 | ASSERT(name != NULL); |
| 3280 | |
| 3281 | if (strcmp(name, "generic.current_allocated_bytes" ) == 0) { |
| 3282 | TCMallocStats stats; |
| 3283 | ExtractStats(&stats, NULL); |
| 3284 | *value = stats.system_bytes |
| 3285 | - stats.thread_bytes |
| 3286 | - stats.central_bytes |
| 3287 | - stats.pageheap_bytes; |
| 3288 | return true; |
| 3289 | } |
| 3290 | |
| 3291 | if (strcmp(name, "generic.heap_size" ) == 0) { |
| 3292 | TCMallocStats stats; |
| 3293 | ExtractStats(&stats, NULL); |
| 3294 | *value = stats.system_bytes; |
| 3295 | return true; |
| 3296 | } |
| 3297 | |
| 3298 | if (strcmp(name, "tcmalloc.slack_bytes" ) == 0) { |
| 3299 | // We assume that bytes in the page heap are not fragmented too |
| 3300 | // badly, and are therefore available for allocation. |
| 3301 | SpinLockHolder l(&pageheap_lock); |
| 3302 | *value = pageheap->FreeBytes(); |
| 3303 | return true; |
| 3304 | } |
| 3305 | |
| 3306 | if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes" ) == 0) { |
| 3307 | SpinLockHolder l(&pageheap_lock); |
| 3308 | *value = overall_thread_cache_size; |
| 3309 | return true; |
| 3310 | } |
| 3311 | |
| 3312 | if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes" ) == 0) { |
| 3313 | TCMallocStats stats; |
| 3314 | ExtractStats(&stats, NULL); |
| 3315 | *value = stats.thread_bytes; |
| 3316 | return true; |
| 3317 | } |
| 3318 | |
| 3319 | return false; |
| 3320 | } |
| 3321 | |
| 3322 | virtual bool SetNumericProperty(const char* name, size_t value) { |
| 3323 | ASSERT(name != NULL); |
| 3324 | |
| 3325 | if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes" ) == 0) { |
| 3326 | // Clip the value to a reasonable range |
| 3327 | if (value < kMinThreadCacheSize) value = kMinThreadCacheSize; |
| 3328 | if (value > (1<<30)) value = (1<<30); // Limit to 1GB |
| 3329 | |
| 3330 | SpinLockHolder l(&pageheap_lock); |
| 3331 | overall_thread_cache_size = static_cast<size_t>(value); |
| 3332 | TCMalloc_ThreadCache::RecomputeThreadCacheSize(); |
| 3333 | return true; |
| 3334 | } |
| 3335 | |
| 3336 | return false; |
| 3337 | } |
| 3338 | |
| 3339 | virtual void MarkThreadIdle() { |
| 3340 | TCMalloc_ThreadCache::BecomeIdle(); |
| 3341 | } |
| 3342 | |
| 3343 | virtual void ReleaseFreeMemory() { |
| 3344 | SpinLockHolder h(&pageheap_lock); |
| 3345 | pageheap->ReleaseFreePages(); |
| 3346 | } |
| 3347 | }; |
| 3348 | #endif |
| 3349 | |
| 3350 | // The constructor allocates an object to ensure that initialization |
| 3351 | // runs before main(), and therefore we do not have a chance to become |
| 3352 | // multi-threaded before initialization. We also create the TSD key |
| 3353 | // here. Presumably by the time this constructor runs, glibc is in |
| 3354 | // good enough shape to handle pthread_key_create(). |
| 3355 | // |
| 3356 | // The constructor also takes the opportunity to tell STL to use |
| 3357 | // tcmalloc. We want to do this early, before construct time, so |
| 3358 | // all user STL allocations go through tcmalloc (which works really |
| 3359 | // well for STL). |
| 3360 | // |
| 3361 | // The destructor prints stats when the program exits. |
| 3362 | class TCMallocGuard { |
| 3363 | public: |
| 3364 | |
| 3365 | TCMallocGuard() { |
| 3366 | #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS |
| 3367 | // Check whether the kernel also supports TLS (needs to happen at runtime) |
| 3368 | CheckIfKernelSupportsTLS(); |
| 3369 | #endif |
| 3370 | #ifndef WTF_CHANGES |
| 3371 | #ifdef WIN32 // patch the windows VirtualAlloc, etc. |
| 3372 | PatchWindowsFunctions(); // defined in windows/patch_functions.cc |
| 3373 | #endif |
| 3374 | #endif |
| 3375 | free(malloc(1)); |
| 3376 | TCMalloc_ThreadCache::InitTSD(); |
| 3377 | free(malloc(1)); |
| 3378 | #ifndef WTF_CHANGES |
| 3379 | MallocExtension::Register(new TCMallocImplementation); |
| 3380 | #endif |
| 3381 | } |
| 3382 | |
| 3383 | #ifndef WTF_CHANGES |
| 3384 | ~TCMallocGuard() { |
| 3385 | const char* env = getenv("MALLOCSTATS" ); |
| 3386 | if (env != NULL) { |
| 3387 | int level = atoi(env); |
| 3388 | if (level < 1) level = 1; |
| 3389 | PrintStats(level); |
| 3390 | } |
| 3391 | #ifdef WIN32 |
| 3392 | UnpatchWindowsFunctions(); |
| 3393 | #endif |
| 3394 | } |
| 3395 | #endif |
| 3396 | }; |
| 3397 | |
| 3398 | #ifndef WTF_CHANGES |
| 3399 | static TCMallocGuard module_enter_exit_hook; |
| 3400 | #endif |
| 3401 | |
| 3402 | |
| 3403 | //------------------------------------------------------------------- |
| 3404 | // Helpers for the exported routines below |
| 3405 | //------------------------------------------------------------------- |
| 3406 | |
| 3407 | #ifndef WTF_CHANGES |
| 3408 | |
| 3409 | static Span* DoSampledAllocation(size_t size) { |
| 3410 | |
| 3411 | // Grab the stack trace outside the heap lock |
| 3412 | StackTrace tmp; |
| 3413 | tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1); |
| 3414 | tmp.size = size; |
| 3415 | |
| 3416 | SpinLockHolder h(&pageheap_lock); |
| 3417 | // Allocate span |
| 3418 | Span *span = pageheap->New(pages(size == 0 ? 1 : size)); |
| 3419 | if (span == NULL) { |
| 3420 | return NULL; |
| 3421 | } |
| 3422 | |
| 3423 | // Allocate stack trace |
| 3424 | StackTrace *stack = stacktrace_allocator.New(); |
| 3425 | if (stack == NULL) { |
| 3426 | // Sampling failed because of lack of memory |
| 3427 | return span; |
| 3428 | } |
| 3429 | |
| 3430 | *stack = tmp; |
| 3431 | span->sample = 1; |
| 3432 | span->objects = stack; |
| 3433 | DLL_Prepend(&sampled_objects, span); |
| 3434 | |
| 3435 | return span; |
| 3436 | } |
| 3437 | #endif |
| 3438 | |
| 3439 | static inline bool CheckCachedSizeClass(void *ptr) { |
| 3440 | PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
| 3441 | size_t cached_value = pageheap->GetSizeClassIfCached(p); |
| 3442 | return cached_value == 0 || |
| 3443 | cached_value == pageheap->GetDescriptor(p)->sizeclass; |
| 3444 | } |
| 3445 | |
| 3446 | static inline void* CheckedMallocResult(void *result) |
| 3447 | { |
| 3448 | ASSERT(result == 0 || CheckCachedSizeClass(result)); |
| 3449 | return result; |
| 3450 | } |
| 3451 | |
| 3452 | static inline void* SpanToMallocResult(Span *span) { |
| 3453 | ASSERT_SPAN_COMMITTED(span); |
| 3454 | pageheap->CacheSizeClass(span->start, 0); |
| 3455 | return |
| 3456 | CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift)); |
| 3457 | } |
| 3458 | |
| 3459 | #ifdef WTF_CHANGES |
| 3460 | template <bool crashOnFailure> |
| 3461 | #endif |
| 3462 | static ALWAYS_INLINE void* do_malloc(size_t size) { |
| 3463 | void* ret = NULL; |
| 3464 | |
| 3465 | #ifdef WTF_CHANGES |
| 3466 | ASSERT(!isForbidden()); |
| 3467 | #endif |
| 3468 | |
| 3469 | // The following call forces module initialization |
| 3470 | TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); |
| 3471 | #ifndef WTF_CHANGES |
| 3472 | if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) { |
| 3473 | Span* span = DoSampledAllocation(size); |
| 3474 | if (span != NULL) { |
| 3475 | ret = SpanToMallocResult(span); |
| 3476 | } |
| 3477 | } else |
| 3478 | #endif |
| 3479 | if (size > kMaxSize) { |
| 3480 | // Use page-level allocator |
| 3481 | SpinLockHolder h(&pageheap_lock); |
| 3482 | Span* span = pageheap->New(pages(size)); |
| 3483 | if (span != NULL) { |
| 3484 | ret = SpanToMallocResult(span); |
| 3485 | } |
| 3486 | } else { |
| 3487 | // The common case, and also the simplest. This just pops the |
| 3488 | // size-appropriate freelist, afer replenishing it if it's empty. |
| 3489 | ret = CheckedMallocResult(heap->Allocate(size)); |
| 3490 | } |
| 3491 | if (!ret) { |
| 3492 | #ifdef WTF_CHANGES |
| 3493 | if (crashOnFailure) // This branch should be optimized out by the compiler. |
| 3494 | CRASH(); |
| 3495 | #else |
| 3496 | errno = ENOMEM; |
| 3497 | #endif |
| 3498 | } |
| 3499 | return ret; |
| 3500 | } |
| 3501 | |
| 3502 | static ALWAYS_INLINE void do_free(void* ptr) { |
| 3503 | if (ptr == NULL) return; |
| 3504 | ASSERT(pageheap != NULL); // Should not call free() before malloc() |
| 3505 | const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift; |
| 3506 | Span* span = NULL; |
| 3507 | size_t cl = pageheap->GetSizeClassIfCached(p); |
| 3508 | |
| 3509 | if (cl == 0) { |
| 3510 | span = pageheap->GetDescriptor(p); |
| 3511 | cl = span->sizeclass; |
| 3512 | pageheap->CacheSizeClass(p, cl); |
| 3513 | } |
| 3514 | if (cl != 0) { |
| 3515 | #ifndef NO_TCMALLOC_SAMPLES |
| 3516 | ASSERT(!pageheap->GetDescriptor(p)->sample); |
| 3517 | #endif |
| 3518 | TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent(); |
| 3519 | if (heap != NULL) { |
| 3520 | heap->Deallocate(ptr, cl); |
| 3521 | } else { |
| 3522 | // Delete directly into central cache |
| 3523 | SLL_SetNext(ptr, NULL); |
| 3524 | central_cache[cl].InsertRange(ptr, ptr, 1); |
| 3525 | } |
| 3526 | } else { |
| 3527 | SpinLockHolder h(&pageheap_lock); |
| 3528 | ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0); |
| 3529 | ASSERT(span != NULL && span->start == p); |
| 3530 | #ifndef NO_TCMALLOC_SAMPLES |
| 3531 | if (span->sample) { |
| 3532 | DLL_Remove(span); |
| 3533 | stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects)); |
| 3534 | span->objects = NULL; |
| 3535 | } |
| 3536 | #endif |
| 3537 | pageheap->Delete(span); |
| 3538 | } |
| 3539 | } |
| 3540 | |
| 3541 | #ifndef WTF_CHANGES |
| 3542 | // For use by exported routines below that want specific alignments |
| 3543 | // |
| 3544 | // Note: this code can be slow, and can significantly fragment memory. |
| 3545 | // The expectation is that memalign/posix_memalign/valloc/pvalloc will |
| 3546 | // not be invoked very often. This requirement simplifies our |
| 3547 | // implementation and allows us to tune for expected allocation |
| 3548 | // patterns. |
| 3549 | static void* do_memalign(size_t align, size_t size) { |
| 3550 | ASSERT((align & (align - 1)) == 0); |
| 3551 | ASSERT(align > 0); |
| 3552 | if (pageheap == NULL) TCMalloc_ThreadCache::InitModule(); |
| 3553 | |
| 3554 | // Allocate at least one byte to avoid boundary conditions below |
| 3555 | if (size == 0) size = 1; |
| 3556 | |
| 3557 | if (size <= kMaxSize && align < kPageSize) { |
| 3558 | // Search through acceptable size classes looking for one with |
| 3559 | // enough alignment. This depends on the fact that |
| 3560 | // InitSizeClasses() currently produces several size classes that |
| 3561 | // are aligned at powers of two. We will waste time and space if |
| 3562 | // we miss in the size class array, but that is deemed acceptable |
| 3563 | // since memalign() should be used rarely. |
| 3564 | size_t cl = SizeClass(size); |
| 3565 | while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) { |
| 3566 | cl++; |
| 3567 | } |
| 3568 | if (cl < kNumClasses) { |
| 3569 | TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache(); |
| 3570 | return CheckedMallocResult(heap->Allocate(class_to_size[cl])); |
| 3571 | } |
| 3572 | } |
| 3573 | |
| 3574 | // We will allocate directly from the page heap |
| 3575 | SpinLockHolder h(&pageheap_lock); |
| 3576 | |
| 3577 | if (align <= kPageSize) { |
| 3578 | // Any page-level allocation will be fine |
| 3579 | // TODO: We could put the rest of this page in the appropriate |
| 3580 | // TODO: cache but it does not seem worth it. |
| 3581 | Span* span = pageheap->New(pages(size)); |
| 3582 | return span == NULL ? NULL : SpanToMallocResult(span); |
| 3583 | } |
| 3584 | |
| 3585 | // Allocate extra pages and carve off an aligned portion |
| 3586 | const Length alloc = pages(size + align); |
| 3587 | Span* span = pageheap->New(alloc); |
| 3588 | if (span == NULL) return NULL; |
| 3589 | |
| 3590 | // Skip starting portion so that we end up aligned |
| 3591 | Length skip = 0; |
| 3592 | while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) { |
| 3593 | skip++; |
| 3594 | } |
| 3595 | ASSERT(skip < alloc); |
| 3596 | if (skip > 0) { |
| 3597 | Span* rest = pageheap->Split(span, skip); |
| 3598 | pageheap->Delete(span); |
| 3599 | span = rest; |
| 3600 | } |
| 3601 | |
| 3602 | // Skip trailing portion that we do not need to return |
| 3603 | const Length needed = pages(size); |
| 3604 | ASSERT(span->length >= needed); |
| 3605 | if (span->length > needed) { |
| 3606 | Span* trailer = pageheap->Split(span, needed); |
| 3607 | pageheap->Delete(trailer); |
| 3608 | } |
| 3609 | return SpanToMallocResult(span); |
| 3610 | } |
| 3611 | #endif |
| 3612 | |
| 3613 | // Helpers for use by exported routines below: |
| 3614 | |
| 3615 | #ifndef WTF_CHANGES |
| 3616 | static inline void do_malloc_stats() { |
| 3617 | PrintStats(1); |
| 3618 | } |
| 3619 | #endif |
| 3620 | |
| 3621 | static inline int do_mallopt(int, int) { |
| 3622 | return 1; // Indicates error |
| 3623 | } |
| 3624 | |
| 3625 | #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance |
| 3626 | static inline struct mallinfo do_mallinfo() { |
| 3627 | TCMallocStats stats; |
| 3628 | ExtractStats(&stats, NULL); |
| 3629 | |
| 3630 | // Just some of the fields are filled in. |
| 3631 | struct mallinfo info; |
| 3632 | memset(&info, 0, sizeof(info)); |
| 3633 | |
| 3634 | // Unfortunately, the struct contains "int" field, so some of the |
| 3635 | // size values will be truncated. |
| 3636 | info.arena = static_cast<int>(stats.system_bytes); |
| 3637 | info.fsmblks = static_cast<int>(stats.thread_bytes |
| 3638 | + stats.central_bytes |
| 3639 | + stats.transfer_bytes); |
| 3640 | info.fordblks = static_cast<int>(stats.pageheap_bytes); |
| 3641 | info.uordblks = static_cast<int>(stats.system_bytes |
| 3642 | - stats.thread_bytes |
| 3643 | - stats.central_bytes |
| 3644 | - stats.transfer_bytes |
| 3645 | - stats.pageheap_bytes); |
| 3646 | |
| 3647 | return info; |
| 3648 | } |
| 3649 | #endif |
| 3650 | |
| 3651 | //------------------------------------------------------------------- |
| 3652 | // Exported routines |
| 3653 | //------------------------------------------------------------------- |
| 3654 | |
| 3655 | // CAVEAT: The code structure below ensures that MallocHook methods are always |
| 3656 | // called from the stack frame of the invoked allocation function. |
| 3657 | // heap-checker.cc depends on this to start a stack trace from |
| 3658 | // the call to the (de)allocation function. |
| 3659 | |
| 3660 | #ifndef WTF_CHANGES |
| 3661 | extern "C" |
| 3662 | #else |
| 3663 | #define do_malloc do_malloc<crashOnFailure> |
| 3664 | |
| 3665 | template <bool crashOnFailure> |
| 3666 | void* malloc(size_t); |
| 3667 | |
| 3668 | void* fastMalloc(size_t size) |
| 3669 | { |
| 3670 | return malloc<true>(size); |
| 3671 | } |
| 3672 | |
| 3673 | TryMallocReturnValue tryFastMalloc(size_t size) |
| 3674 | { |
| 3675 | return malloc<false>(size); |
| 3676 | } |
| 3677 | |
| 3678 | template <bool crashOnFailure> |
| 3679 | ALWAYS_INLINE |
| 3680 | #endif |
| 3681 | void* malloc(size_t size) { |
| 3682 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3683 | if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur... |
| 3684 | return 0; |
| 3685 | size += sizeof(AllocAlignmentInteger); |
| 3686 | void* result = do_malloc(size); |
| 3687 | if (!result) |
| 3688 | return 0; |
| 3689 | |
| 3690 | *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
| 3691 | result = static_cast<AllocAlignmentInteger*>(result) + 1; |
| 3692 | #else |
| 3693 | void* result = do_malloc(size); |
| 3694 | #endif |
| 3695 | |
| 3696 | #ifndef WTF_CHANGES |
| 3697 | MallocHook::InvokeNewHook(result, size); |
| 3698 | #endif |
| 3699 | return result; |
| 3700 | } |
| 3701 | |
| 3702 | #ifndef WTF_CHANGES |
| 3703 | extern "C" |
| 3704 | #endif |
| 3705 | void free(void* ptr) { |
| 3706 | #ifndef WTF_CHANGES |
| 3707 | MallocHook::InvokeDeleteHook(ptr); |
| 3708 | #endif |
| 3709 | |
| 3710 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3711 | if (!ptr) |
| 3712 | return; |
| 3713 | |
| 3714 | AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr); |
| 3715 | if (*header != Internal::AllocTypeMalloc) |
| 3716 | Internal::fastMallocMatchFailed(ptr); |
| 3717 | do_free(header); |
| 3718 | #else |
| 3719 | do_free(ptr); |
| 3720 | #endif |
| 3721 | } |
| 3722 | |
| 3723 | #ifndef WTF_CHANGES |
| 3724 | extern "C" |
| 3725 | #else |
| 3726 | template <bool crashOnFailure> |
| 3727 | void* calloc(size_t, size_t); |
| 3728 | |
| 3729 | void* fastCalloc(size_t n, size_t elem_size) |
| 3730 | { |
| 3731 | return calloc<true>(n, elem_size); |
| 3732 | } |
| 3733 | |
| 3734 | TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size) |
| 3735 | { |
| 3736 | return calloc<false>(n, elem_size); |
| 3737 | } |
| 3738 | |
| 3739 | template <bool crashOnFailure> |
| 3740 | ALWAYS_INLINE |
| 3741 | #endif |
| 3742 | void* calloc(size_t n, size_t elem_size) { |
| 3743 | size_t totalBytes = n * elem_size; |
| 3744 | |
| 3745 | // Protect against overflow |
| 3746 | if (n > 1 && elem_size && (totalBytes / elem_size) != n) |
| 3747 | return 0; |
| 3748 | |
| 3749 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3750 | if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur... |
| 3751 | return 0; |
| 3752 | |
| 3753 | totalBytes += sizeof(AllocAlignmentInteger); |
| 3754 | void* result = do_malloc(totalBytes); |
| 3755 | if (!result) |
| 3756 | return 0; |
| 3757 | |
| 3758 | memset(result, 0, totalBytes); |
| 3759 | *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc; |
| 3760 | result = static_cast<AllocAlignmentInteger*>(result) + 1; |
| 3761 | #else |
| 3762 | void* result = do_malloc(totalBytes); |
| 3763 | if (result != NULL) { |
| 3764 | memset(result, 0, totalBytes); |
| 3765 | } |
| 3766 | #endif |
| 3767 | |
| 3768 | #ifndef WTF_CHANGES |
| 3769 | MallocHook::InvokeNewHook(result, totalBytes); |
| 3770 | #endif |
| 3771 | return result; |
| 3772 | } |
| 3773 | |
| 3774 | // Since cfree isn't used anywhere, we don't compile it in. |
| 3775 | #ifndef WTF_CHANGES |
| 3776 | #ifndef WTF_CHANGES |
| 3777 | extern "C" |
| 3778 | #endif |
| 3779 | void cfree(void* ptr) { |
| 3780 | #ifndef WTF_CHANGES |
| 3781 | MallocHook::InvokeDeleteHook(ptr); |
| 3782 | #endif |
| 3783 | do_free(ptr); |
| 3784 | } |
| 3785 | #endif |
| 3786 | |
| 3787 | #ifndef WTF_CHANGES |
| 3788 | extern "C" |
| 3789 | #else |
| 3790 | template <bool crashOnFailure> |
| 3791 | void* realloc(void*, size_t); |
| 3792 | |
| 3793 | void* fastRealloc(void* old_ptr, size_t new_size) |
| 3794 | { |
| 3795 | return realloc<true>(old_ptr, new_size); |
| 3796 | } |
| 3797 | |
| 3798 | TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size) |
| 3799 | { |
| 3800 | return realloc<false>(old_ptr, new_size); |
| 3801 | } |
| 3802 | |
| 3803 | template <bool crashOnFailure> |
| 3804 | ALWAYS_INLINE |
| 3805 | #endif |
| 3806 | void* realloc(void* old_ptr, size_t new_size) { |
| 3807 | if (old_ptr == NULL) { |
| 3808 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3809 | void* result = malloc(new_size); |
| 3810 | #else |
| 3811 | void* result = do_malloc(new_size); |
| 3812 | #ifndef WTF_CHANGES |
| 3813 | MallocHook::InvokeNewHook(result, new_size); |
| 3814 | #endif |
| 3815 | #endif |
| 3816 | return result; |
| 3817 | } |
| 3818 | if (new_size == 0) { |
| 3819 | #ifndef WTF_CHANGES |
| 3820 | MallocHook::InvokeDeleteHook(old_ptr); |
| 3821 | #endif |
| 3822 | free(old_ptr); |
| 3823 | return NULL; |
| 3824 | } |
| 3825 | |
| 3826 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3827 | if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur... |
| 3828 | return 0; |
| 3829 | new_size += sizeof(AllocAlignmentInteger); |
| 3830 | AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr); |
| 3831 | if (*header != Internal::AllocTypeMalloc) |
| 3832 | Internal::fastMallocMatchFailed(old_ptr); |
| 3833 | old_ptr = header; |
| 3834 | #endif |
| 3835 | |
| 3836 | // Get the size of the old entry |
| 3837 | const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift; |
| 3838 | size_t cl = pageheap->GetSizeClassIfCached(p); |
| 3839 | Span *span = NULL; |
| 3840 | size_t old_size; |
| 3841 | if (cl == 0) { |
| 3842 | span = pageheap->GetDescriptor(p); |
| 3843 | cl = span->sizeclass; |
| 3844 | pageheap->CacheSizeClass(p, cl); |
| 3845 | } |
| 3846 | if (cl != 0) { |
| 3847 | old_size = ByteSizeForClass(cl); |
| 3848 | } else { |
| 3849 | ASSERT(span != NULL); |
| 3850 | old_size = span->length << kPageShift; |
| 3851 | } |
| 3852 | |
| 3853 | // Reallocate if the new size is larger than the old size, |
| 3854 | // or if the new size is significantly smaller than the old size. |
| 3855 | if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) { |
| 3856 | // Need to reallocate |
| 3857 | void* new_ptr = do_malloc(new_size); |
| 3858 | if (new_ptr == NULL) { |
| 3859 | return NULL; |
| 3860 | } |
| 3861 | #ifndef WTF_CHANGES |
| 3862 | MallocHook::InvokeNewHook(new_ptr, new_size); |
| 3863 | #endif |
| 3864 | memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size)); |
| 3865 | #ifndef WTF_CHANGES |
| 3866 | MallocHook::InvokeDeleteHook(old_ptr); |
| 3867 | #endif |
| 3868 | // We could use a variant of do_free() that leverages the fact |
| 3869 | // that we already know the sizeclass of old_ptr. The benefit |
| 3870 | // would be small, so don't bother. |
| 3871 | do_free(old_ptr); |
| 3872 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3873 | new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1; |
| 3874 | #endif |
| 3875 | return new_ptr; |
| 3876 | } else { |
| 3877 | #if ENABLE(FAST_MALLOC_MATCH_VALIDATION) |
| 3878 | old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer. |
| 3879 | #endif |
| 3880 | return old_ptr; |
| 3881 | } |
| 3882 | } |
| 3883 | |
| 3884 | #ifdef WTF_CHANGES |
| 3885 | #undef do_malloc |
| 3886 | #else |
| 3887 | |
| 3888 | static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER; |
| 3889 | |
| 3890 | static inline void* cpp_alloc(size_t size, bool nothrow) { |
| 3891 | for (;;) { |
| 3892 | void* p = do_malloc(size); |
| 3893 | #ifdef PREANSINEW |
| 3894 | return p; |
| 3895 | #else |
| 3896 | if (p == NULL) { // allocation failed |
| 3897 | // Get the current new handler. NB: this function is not |
| 3898 | // thread-safe. We make a feeble stab at making it so here, but |
| 3899 | // this lock only protects against tcmalloc interfering with |
| 3900 | // itself, not with other libraries calling set_new_handler. |
| 3901 | std::new_handler nh; |
| 3902 | { |
| 3903 | SpinLockHolder h(&set_new_handler_lock); |
| 3904 | nh = std::set_new_handler(0); |
| 3905 | (void) std::set_new_handler(nh); |
| 3906 | } |
| 3907 | // If no new_handler is established, the allocation failed. |
| 3908 | if (!nh) { |
| 3909 | if (nothrow) return 0; |
| 3910 | throw std::bad_alloc(); |
| 3911 | } |
| 3912 | // Otherwise, try the new_handler. If it returns, retry the |
| 3913 | // allocation. If it throws std::bad_alloc, fail the allocation. |
| 3914 | // if it throws something else, don't interfere. |
| 3915 | try { |
| 3916 | (*nh)(); |
| 3917 | } catch (const std::bad_alloc&) { |
| 3918 | if (!nothrow) throw; |
| 3919 | return p; |
| 3920 | } |
| 3921 | } else { // allocation success |
| 3922 | return p; |
| 3923 | } |
| 3924 | #endif |
| 3925 | } |
| 3926 | } |
| 3927 | |
| 3928 | void* operator new(size_t size) { |
| 3929 | void* p = cpp_alloc(size, false); |
| 3930 | // We keep this next instruction out of cpp_alloc for a reason: when |
| 3931 | // it's in, and new just calls cpp_alloc, the optimizer may fold the |
| 3932 | // new call into cpp_alloc, which messes up our whole section-based |
| 3933 | // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc |
| 3934 | // isn't the last thing this fn calls, and prevents the folding. |
| 3935 | MallocHook::InvokeNewHook(p, size); |
| 3936 | return p; |
| 3937 | } |
| 3938 | |
| 3939 | void* operator new(size_t size, const std::nothrow_t&) __THROW { |
| 3940 | void* p = cpp_alloc(size, true); |
| 3941 | MallocHook::InvokeNewHook(p, size); |
| 3942 | return p; |
| 3943 | } |
| 3944 | |
| 3945 | void operator delete(void* p) __THROW { |
| 3946 | MallocHook::InvokeDeleteHook(p); |
| 3947 | do_free(p); |
| 3948 | } |
| 3949 | |
| 3950 | void operator delete(void* p, const std::nothrow_t&) __THROW { |
| 3951 | MallocHook::InvokeDeleteHook(p); |
| 3952 | do_free(p); |
| 3953 | } |
| 3954 | |
| 3955 | void* operator new[](size_t size) { |
| 3956 | void* p = cpp_alloc(size, false); |
| 3957 | // We keep this next instruction out of cpp_alloc for a reason: when |
| 3958 | // it's in, and new just calls cpp_alloc, the optimizer may fold the |
| 3959 | // new call into cpp_alloc, which messes up our whole section-based |
| 3960 | // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc |
| 3961 | // isn't the last thing this fn calls, and prevents the folding. |
| 3962 | MallocHook::InvokeNewHook(p, size); |
| 3963 | return p; |
| 3964 | } |
| 3965 | |
| 3966 | void* operator new[](size_t size, const std::nothrow_t&) __THROW { |
| 3967 | void* p = cpp_alloc(size, true); |
| 3968 | MallocHook::InvokeNewHook(p, size); |
| 3969 | return p; |
| 3970 | } |
| 3971 | |
| 3972 | void operator delete[](void* p) __THROW { |
| 3973 | MallocHook::InvokeDeleteHook(p); |
| 3974 | do_free(p); |
| 3975 | } |
| 3976 | |
| 3977 | void operator delete[](void* p, const std::nothrow_t&) __THROW { |
| 3978 | MallocHook::InvokeDeleteHook(p); |
| 3979 | do_free(p); |
| 3980 | } |
| 3981 | |
| 3982 | extern "C" void* memalign(size_t align, size_t size) __THROW { |
| 3983 | void* result = do_memalign(align, size); |
| 3984 | MallocHook::InvokeNewHook(result, size); |
| 3985 | return result; |
| 3986 | } |
| 3987 | |
| 3988 | extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size) |
| 3989 | __THROW { |
| 3990 | if (((align % sizeof(void*)) != 0) || |
| 3991 | ((align & (align - 1)) != 0) || |
| 3992 | (align == 0)) { |
| 3993 | return EINVAL; |
| 3994 | } |
| 3995 | |
| 3996 | void* result = do_memalign(align, size); |
| 3997 | MallocHook::InvokeNewHook(result, size); |
| 3998 | if (result == NULL) { |
| 3999 | return ENOMEM; |
| 4000 | } else { |
| 4001 | *result_ptr = result; |
| 4002 | return 0; |
| 4003 | } |
| 4004 | } |
| 4005 | |
| 4006 | static size_t pagesize = 0; |
| 4007 | |
| 4008 | extern "C" void* valloc(size_t size) __THROW { |
| 4009 | // Allocate page-aligned object of length >= size bytes |
| 4010 | if (pagesize == 0) pagesize = getpagesize(); |
| 4011 | void* result = do_memalign(pagesize, size); |
| 4012 | MallocHook::InvokeNewHook(result, size); |
| 4013 | return result; |
| 4014 | } |
| 4015 | |
| 4016 | extern "C" void* pvalloc(size_t size) __THROW { |
| 4017 | // Round up size to a multiple of pagesize |
| 4018 | if (pagesize == 0) pagesize = getpagesize(); |
| 4019 | size = (size + pagesize - 1) & ~(pagesize - 1); |
| 4020 | void* result = do_memalign(pagesize, size); |
| 4021 | MallocHook::InvokeNewHook(result, size); |
| 4022 | return result; |
| 4023 | } |
| 4024 | |
| 4025 | extern "C" void malloc_stats(void) { |
| 4026 | do_malloc_stats(); |
| 4027 | } |
| 4028 | |
| 4029 | extern "C" int mallopt(int cmd, int value) { |
| 4030 | return do_mallopt(cmd, value); |
| 4031 | } |
| 4032 | |
| 4033 | #ifdef HAVE_STRUCT_MALLINFO |
| 4034 | extern "C" struct mallinfo mallinfo(void) { |
| 4035 | return do_mallinfo(); |
| 4036 | } |
| 4037 | #endif |
| 4038 | |
| 4039 | //------------------------------------------------------------------- |
| 4040 | // Some library routines on RedHat 9 allocate memory using malloc() |
| 4041 | // and free it using __libc_free() (or vice-versa). Since we provide |
| 4042 | // our own implementations of malloc/free, we need to make sure that |
| 4043 | // the __libc_XXX variants (defined as part of glibc) also point to |
| 4044 | // the same implementations. |
| 4045 | //------------------------------------------------------------------- |
| 4046 | |
| 4047 | #if defined(__GLIBC__) |
| 4048 | extern "C" { |
| 4049 | #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__) |
| 4050 | // Potentially faster variants that use the gcc alias extension. |
| 4051 | // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check. |
| 4052 | # define ALIAS(x) __attribute__ ((weak, alias (x))) |
| 4053 | void* __libc_malloc(size_t size) ALIAS("malloc" ); |
| 4054 | void __libc_free(void* ptr) ALIAS("free" ); |
| 4055 | void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc" ); |
| 4056 | void* __libc_calloc(size_t n, size_t size) ALIAS("calloc" ); |
| 4057 | void __libc_cfree(void* ptr) ALIAS("cfree" ); |
| 4058 | void* __libc_memalign(size_t align, size_t s) ALIAS("memalign" ); |
| 4059 | void* __libc_valloc(size_t size) ALIAS("valloc" ); |
| 4060 | void* __libc_pvalloc(size_t size) ALIAS("pvalloc" ); |
| 4061 | int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign" ); |
| 4062 | # undef ALIAS |
| 4063 | # else /* not __GNUC__ */ |
| 4064 | // Portable wrappers |
| 4065 | void* __libc_malloc(size_t size) { return malloc(size); } |
| 4066 | void __libc_free(void* ptr) { free(ptr); } |
| 4067 | void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); } |
| 4068 | void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); } |
| 4069 | void __libc_cfree(void* ptr) { cfree(ptr); } |
| 4070 | void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); } |
| 4071 | void* __libc_valloc(size_t size) { return valloc(size); } |
| 4072 | void* __libc_pvalloc(size_t size) { return pvalloc(size); } |
| 4073 | int __posix_memalign(void** r, size_t a, size_t s) { |
| 4074 | return posix_memalign(r, a, s); |
| 4075 | } |
| 4076 | # endif /* __GNUC__ */ |
| 4077 | } |
| 4078 | #endif /* __GLIBC__ */ |
| 4079 | |
| 4080 | // Override __libc_memalign in libc on linux boxes specially. |
| 4081 | // They have a bug in libc that causes them to (very rarely) allocate |
| 4082 | // with __libc_memalign() yet deallocate with free() and the |
| 4083 | // definitions above don't catch it. |
| 4084 | // This function is an exception to the rule of calling MallocHook method |
| 4085 | // from the stack frame of the allocation function; |
| 4086 | // heap-checker handles this special case explicitly. |
| 4087 | static void *MemalignOverride(size_t align, size_t size, const void *caller) |
| 4088 | __THROW { |
| 4089 | void* result = do_memalign(align, size); |
| 4090 | MallocHook::InvokeNewHook(result, size); |
| 4091 | return result; |
| 4092 | } |
| 4093 | void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride; |
| 4094 | |
| 4095 | #endif |
| 4096 | |
| 4097 | #if defined(WTF_CHANGES) && OS(DARWIN) |
| 4098 | |
| 4099 | class FreeObjectFinder { |
| 4100 | const RemoteMemoryReader& m_reader; |
| 4101 | HashSet<void*> m_freeObjects; |
| 4102 | |
| 4103 | public: |
| 4104 | FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { } |
| 4105 | |
| 4106 | void visit(void* ptr) { m_freeObjects.add(ptr); } |
| 4107 | bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); } |
| 4108 | bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); } |
| 4109 | size_t freeObjectCount() const { return m_freeObjects.size(); } |
| 4110 | |
| 4111 | void findFreeObjects(TCMalloc_ThreadCache* threadCache) |
| 4112 | { |
| 4113 | for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0)) |
| 4114 | threadCache->enumerateFreeObjects(*this, m_reader); |
| 4115 | } |
| 4116 | |
| 4117 | void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList) |
| 4118 | { |
| 4119 | for (unsigned i = 0; i < numSizes; i++) |
| 4120 | centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i); |
| 4121 | } |
| 4122 | }; |
| 4123 | |
| 4124 | class PageMapFreeObjectFinder { |
| 4125 | const RemoteMemoryReader& m_reader; |
| 4126 | FreeObjectFinder& m_freeObjectFinder; |
| 4127 | |
| 4128 | public: |
| 4129 | PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder) |
| 4130 | : m_reader(reader) |
| 4131 | , m_freeObjectFinder(freeObjectFinder) |
| 4132 | { } |
| 4133 | |
| 4134 | int visit(void* ptr) const |
| 4135 | { |
| 4136 | if (!ptr) |
| 4137 | return 1; |
| 4138 | |
| 4139 | Span* span = m_reader(reinterpret_cast<Span*>(ptr)); |
| 4140 | if (span->free) { |
| 4141 | void* ptr = reinterpret_cast<void*>(span->start << kPageShift); |
| 4142 | m_freeObjectFinder.visit(ptr); |
| 4143 | } else if (span->sizeclass) { |
| 4144 | // Walk the free list of the small-object span, keeping track of each object seen |
| 4145 | for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject))) |
| 4146 | m_freeObjectFinder.visit(nextObject); |
| 4147 | } |
| 4148 | return span->length; |
| 4149 | } |
| 4150 | }; |
| 4151 | |
| 4152 | class PageMapMemoryUsageRecorder { |
| 4153 | task_t m_task; |
| 4154 | void* m_context; |
| 4155 | unsigned m_typeMask; |
| 4156 | vm_range_recorder_t* m_recorder; |
| 4157 | const RemoteMemoryReader& m_reader; |
| 4158 | const FreeObjectFinder& m_freeObjectFinder; |
| 4159 | |
| 4160 | HashSet<void*> m_seenPointers; |
| 4161 | Vector<Span*> m_coalescedSpans; |
| 4162 | |
| 4163 | public: |
| 4164 | PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder) |
| 4165 | : m_task(task) |
| 4166 | , m_context(context) |
| 4167 | , m_typeMask(typeMask) |
| 4168 | , m_recorder(recorder) |
| 4169 | , m_reader(reader) |
| 4170 | , m_freeObjectFinder(freeObjectFinder) |
| 4171 | { } |
| 4172 | |
| 4173 | ~PageMapMemoryUsageRecorder() |
| 4174 | { |
| 4175 | ASSERT(!m_coalescedSpans.size()); |
| 4176 | } |
| 4177 | |
| 4178 | void recordPendingRegions() |
| 4179 | { |
| 4180 | Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; |
| 4181 | vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 }; |
| 4182 | ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize); |
| 4183 | |
| 4184 | // Mark the memory region the spans represent as a candidate for containing pointers |
| 4185 | if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE) |
| 4186 | (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1); |
| 4187 | |
| 4188 | if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) { |
| 4189 | m_coalescedSpans.clear(); |
| 4190 | return; |
| 4191 | } |
| 4192 | |
| 4193 | Vector<vm_range_t, 1024> allocatedPointers; |
| 4194 | for (size_t i = 0; i < m_coalescedSpans.size(); ++i) { |
| 4195 | Span *theSpan = m_coalescedSpans[i]; |
| 4196 | if (theSpan->free) |
| 4197 | continue; |
| 4198 | |
| 4199 | vm_address_t spanStartAddress = theSpan->start << kPageShift; |
| 4200 | vm_size_t spanSizeInBytes = theSpan->length * kPageSize; |
| 4201 | |
| 4202 | if (!theSpan->sizeclass) { |
| 4203 | // If it's an allocated large object span, mark it as in use |
| 4204 | if (!m_freeObjectFinder.isFreeObject(spanStartAddress)) |
| 4205 | allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes}); |
| 4206 | } else { |
| 4207 | const size_t objectSize = ByteSizeForClass(theSpan->sizeclass); |
| 4208 | |
| 4209 | // Mark each allocated small object within the span as in use |
| 4210 | const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes; |
| 4211 | for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) { |
| 4212 | if (!m_freeObjectFinder.isFreeObject(object)) |
| 4213 | allocatedPointers.append((vm_range_t){object, objectSize}); |
| 4214 | } |
| 4215 | } |
| 4216 | } |
| 4217 | |
| 4218 | (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size()); |
| 4219 | |
| 4220 | m_coalescedSpans.clear(); |
| 4221 | } |
| 4222 | |
| 4223 | int visit(void* ptr) |
| 4224 | { |
| 4225 | if (!ptr) |
| 4226 | return 1; |
| 4227 | |
| 4228 | Span* span = m_reader(reinterpret_cast<Span*>(ptr)); |
| 4229 | if (!span->start) |
| 4230 | return 1; |
| 4231 | |
| 4232 | if (m_seenPointers.contains(ptr)) |
| 4233 | return span->length; |
| 4234 | m_seenPointers.add(ptr); |
| 4235 | |
| 4236 | if (!m_coalescedSpans.size()) { |
| 4237 | m_coalescedSpans.append(span); |
| 4238 | return span->length; |
| 4239 | } |
| 4240 | |
| 4241 | Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1]; |
| 4242 | vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift; |
| 4243 | vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize; |
| 4244 | |
| 4245 | // If the new span is adjacent to the previous span, do nothing for now. |
| 4246 | vm_address_t spanStartAddress = span->start << kPageShift; |
| 4247 | if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) { |
| 4248 | m_coalescedSpans.append(span); |
| 4249 | return span->length; |
| 4250 | } |
| 4251 | |
| 4252 | // New span is not adjacent to previous span, so record the spans coalesced so far. |
| 4253 | recordPendingRegions(); |
| 4254 | m_coalescedSpans.append(span); |
| 4255 | |
| 4256 | return span->length; |
| 4257 | } |
| 4258 | }; |
| 4259 | |
| 4260 | class AdminRegionRecorder { |
| 4261 | task_t m_task; |
| 4262 | void* m_context; |
| 4263 | unsigned m_typeMask; |
| 4264 | vm_range_recorder_t* m_recorder; |
| 4265 | const RemoteMemoryReader& m_reader; |
| 4266 | |
| 4267 | Vector<vm_range_t, 1024> m_pendingRegions; |
| 4268 | |
| 4269 | public: |
| 4270 | AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader) |
| 4271 | : m_task(task) |
| 4272 | , m_context(context) |
| 4273 | , m_typeMask(typeMask) |
| 4274 | , m_recorder(recorder) |
| 4275 | , m_reader(reader) |
| 4276 | { } |
| 4277 | |
| 4278 | void recordRegion(vm_address_t ptr, size_t size) |
| 4279 | { |
| 4280 | if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE) |
| 4281 | m_pendingRegions.append((vm_range_t){ ptr, size }); |
| 4282 | } |
| 4283 | |
| 4284 | void visit(void *ptr, size_t size) |
| 4285 | { |
| 4286 | recordRegion(reinterpret_cast<vm_address_t>(ptr), size); |
| 4287 | } |
| 4288 | |
| 4289 | void recordPendingRegions() |
| 4290 | { |
| 4291 | if (m_pendingRegions.size()) { |
| 4292 | (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size()); |
| 4293 | m_pendingRegions.clear(); |
| 4294 | } |
| 4295 | } |
| 4296 | |
| 4297 | ~AdminRegionRecorder() |
| 4298 | { |
| 4299 | ASSERT(!m_pendingRegions.size()); |
| 4300 | } |
| 4301 | }; |
| 4302 | |
| 4303 | kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder) |
| 4304 | { |
| 4305 | RemoteMemoryReader memoryReader(task, reader); |
| 4306 | |
| 4307 | InitSizeClasses(); |
| 4308 | |
| 4309 | FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress)); |
| 4310 | TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap); |
| 4311 | TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps); |
| 4312 | TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer); |
| 4313 | |
| 4314 | TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses); |
| 4315 | |
| 4316 | FreeObjectFinder finder(memoryReader); |
| 4317 | finder.findFreeObjects(threadHeaps); |
| 4318 | finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches); |
| 4319 | |
| 4320 | TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_; |
| 4321 | PageMapFreeObjectFinder pageMapFinder(memoryReader, finder); |
| 4322 | pageMap->visitValues(pageMapFinder, memoryReader); |
| 4323 | |
| 4324 | PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder); |
| 4325 | pageMap->visitValues(usageRecorder, memoryReader); |
| 4326 | usageRecorder.recordPendingRegions(); |
| 4327 | |
| 4328 | AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader); |
| 4329 | pageMap->visitAllocations(adminRegionRecorder, memoryReader); |
| 4330 | |
| 4331 | PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator); |
| 4332 | PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator); |
| 4333 | |
| 4334 | spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); |
| 4335 | pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader); |
| 4336 | |
| 4337 | adminRegionRecorder.recordPendingRegions(); |
| 4338 | |
| 4339 | return 0; |
| 4340 | } |
| 4341 | |
| 4342 | size_t FastMallocZone::size(malloc_zone_t*, const void*) |
| 4343 | { |
| 4344 | return 0; |
| 4345 | } |
| 4346 | |
| 4347 | void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t) |
| 4348 | { |
| 4349 | return 0; |
| 4350 | } |
| 4351 | |
| 4352 | void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t) |
| 4353 | { |
| 4354 | return 0; |
| 4355 | } |
| 4356 | |
| 4357 | void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr) |
| 4358 | { |
| 4359 | // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer |
| 4360 | // is not in this zone. When this happens, the pointer being freed was not allocated by any |
| 4361 | // zone so we need to print a useful error for the application developer. |
| 4362 | malloc_printf("*** error for object %p: pointer being freed was not allocated\n" , ptr); |
| 4363 | } |
| 4364 | |
| 4365 | void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t) |
| 4366 | { |
| 4367 | return 0; |
| 4368 | } |
| 4369 | |
| 4370 | |
| 4371 | #undef malloc |
| 4372 | #undef free |
| 4373 | #undef realloc |
| 4374 | #undef calloc |
| 4375 | |
| 4376 | extern "C" { |
| 4377 | malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print, |
| 4378 | &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics |
| 4379 | |
| 4380 | #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IPHONE_OS) |
| 4381 | , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher. |
| 4382 | #endif |
| 4383 | |
| 4384 | }; |
| 4385 | } |
| 4386 | |
| 4387 | FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator) |
| 4388 | : m_pageHeap(pageHeap) |
| 4389 | , m_threadHeaps(threadHeaps) |
| 4390 | , m_centralCaches(centralCaches) |
| 4391 | , m_spanAllocator(spanAllocator) |
| 4392 | , m_pageHeapAllocator(pageHeapAllocator) |
| 4393 | { |
| 4394 | memset(&m_zone, 0, sizeof(m_zone)); |
| 4395 | m_zone.version = 4; |
| 4396 | m_zone.zone_name = "JavaScriptCore FastMalloc" ; |
| 4397 | m_zone.size = &FastMallocZone::size; |
| 4398 | m_zone.malloc = &FastMallocZone::zoneMalloc; |
| 4399 | m_zone.calloc = &FastMallocZone::zoneCalloc; |
| 4400 | m_zone.realloc = &FastMallocZone::zoneRealloc; |
| 4401 | m_zone.free = &FastMallocZone::zoneFree; |
| 4402 | m_zone.valloc = &FastMallocZone::zoneValloc; |
| 4403 | m_zone.destroy = &FastMallocZone::zoneDestroy; |
| 4404 | m_zone.introspect = &jscore_fastmalloc_introspection; |
| 4405 | malloc_zone_register(&m_zone); |
| 4406 | } |
| 4407 | |
| 4408 | |
| 4409 | void FastMallocZone::init() |
| 4410 | { |
| 4411 | static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator); |
| 4412 | } |
| 4413 | |
| 4414 | #endif |
| 4415 | |
| 4416 | #if WTF_CHANGES |
| 4417 | void releaseFastMallocFreeMemory() |
| 4418 | { |
| 4419 | // Flush free pages in the current thread cache back to the page heap. |
| 4420 | // Low watermark mechanism in Scavenge() prevents full return on the first pass. |
| 4421 | // The second pass flushes everything. |
| 4422 | if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) { |
| 4423 | threadCache->Scavenge(); |
| 4424 | threadCache->Scavenge(); |
| 4425 | } |
| 4426 | |
| 4427 | SpinLockHolder h(&pageheap_lock); |
| 4428 | pageheap->ReleaseFreePages(); |
| 4429 | } |
| 4430 | |
| 4431 | FastMallocStatistics fastMallocStatistics() |
| 4432 | { |
| 4433 | FastMallocStatistics statistics; |
| 4434 | { |
| 4435 | SpinLockHolder lockHolder(&pageheap_lock); |
| 4436 | statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes()); |
| 4437 | statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes()); |
| 4438 | statistics.returnedSize = pageheap->ReturnedBytes(); |
| 4439 | statistics.freeSizeInCaches = 0; |
| 4440 | for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_) |
| 4441 | statistics.freeSizeInCaches += threadCache->Size(); |
| 4442 | } |
| 4443 | for (unsigned cl = 0; cl < kNumClasses; ++cl) { |
| 4444 | const int length = central_cache[cl].length(); |
| 4445 | const int tc_length = central_cache[cl].tc_length(); |
| 4446 | statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length); |
| 4447 | } |
| 4448 | return statistics; |
| 4449 | } |
| 4450 | |
| 4451 | } // namespace WTF |
| 4452 | #endif |
| 4453 | |
| 4454 | #endif // FORCE_SYSTEM_MALLOC |
| 4455 | |