PageRenderTime 63ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/JavaScriptCore/wtf/FastMalloc.cpp

https://github.com/kvlasov/qtwebkit
C++ | 3701 lines | 2555 code | 482 blank | 664 comment | 452 complexity | 7b96ce74bc292ff118fa06de30a59ff5 MD5 | raw file
Possible License(s): BSD-3-Clause, MPL-2.0-no-copyleft-exception, LGPL-2.1, LGPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. // Copyright (c) 2005, 2007, Google Inc.
  2. // All rights reserved.
  3. // Copyright (C) 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // ---
  31. // Author: Sanjay Ghemawat <opensource@google.com>
  32. //
  33. // A malloc that uses a per-thread cache to satisfy small malloc requests.
  34. // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
  35. //
  36. // See doc/tcmalloc.html for a high-level
  37. // description of how this malloc works.
  38. //
  39. // SYNCHRONIZATION
  40. // 1. The thread-specific lists are accessed without acquiring any locks.
  41. // This is safe because each such list is only accessed by one thread.
  42. // 2. We have a lock per central free-list, and hold it while manipulating
  43. // the central free list for a particular size.
  44. // 3. The central page allocator is protected by "pageheap_lock".
  45. // 4. The pagemap (which maps from page-number to descriptor),
  46. // can be read without holding any locks, and written while holding
  47. // the "pageheap_lock".
  48. // 5. To improve performance, a subset of the information one can get
  49. // from the pagemap is cached in a data structure, pagemap_cache_,
  50. // that atomically reads and writes its entries. This cache can be
  51. // read and written without locking.
  52. //
  53. // This multi-threaded access to the pagemap is safe for fairly
  54. // subtle reasons. We basically assume that when an object X is
  55. // allocated by thread A and deallocated by thread B, there must
  56. // have been appropriate synchronization in the handoff of object
  57. // X from thread A to thread B. The same logic applies to pagemap_cache_.
  58. //
  59. // THE PAGEID-TO-SIZECLASS CACHE
  60. // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
  61. // returns 0 for a particular PageID then that means "no information," not that
  62. // the sizeclass is 0. The cache may have stale information for pages that do
  63. // not hold the beginning of any free()'able object. Staleness is eliminated
  64. // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
  65. // do_memalign() for all other relevant pages.
  66. //
  67. // TODO: Bias reclamation to larger addresses
  68. // TODO: implement mallinfo/mallopt
  69. // TODO: Better testing
  70. //
  71. // 9/28/2003 (new page-level allocator replaces ptmalloc2):
  72. // * malloc/free of small objects goes from ~300 ns to ~50 ns.
  73. // * allocation of a reasonably complicated struct
  74. // goes from about 1100 ns to about 300 ns.
  75. #include "config.h"
  76. #include "FastMalloc.h"
  77. #include "Assertions.h"
  78. #if ENABLE(JSC_MULTIPLE_THREADS)
  79. #include <pthread.h>
  80. #endif
  81. #ifndef NO_TCMALLOC_SAMPLES
  82. #ifdef WTF_CHANGES
  83. #define NO_TCMALLOC_SAMPLES
  84. #endif
  85. #endif
  86. #if !defined(USE_SYSTEM_MALLOC) && defined(NDEBUG)
  87. #define FORCE_SYSTEM_MALLOC 0
  88. #else
  89. #define FORCE_SYSTEM_MALLOC 1
  90. #endif
  91. #define TCMALLOC_TRACK_DECOMMITED_SPANS (HAVE(VIRTUALALLOC))
  92. #ifndef NDEBUG
  93. namespace WTF {
  94. #if ENABLE(JSC_MULTIPLE_THREADS)
  95. static pthread_key_t isForbiddenKey;
  96. static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
  97. static void initializeIsForbiddenKey()
  98. {
  99. pthread_key_create(&isForbiddenKey, 0);
  100. }
  101. static bool isForbidden()
  102. {
  103. pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
  104. return !!pthread_getspecific(isForbiddenKey);
  105. }
  106. void fastMallocForbid()
  107. {
  108. pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
  109. pthread_setspecific(isForbiddenKey, &isForbiddenKey);
  110. }
  111. void fastMallocAllow()
  112. {
  113. pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
  114. pthread_setspecific(isForbiddenKey, 0);
  115. }
  116. #else
  117. static bool staticIsForbidden;
  118. static bool isForbidden()
  119. {
  120. return staticIsForbidden;
  121. }
  122. void fastMallocForbid()
  123. {
  124. staticIsForbidden = true;
  125. }
  126. void fastMallocAllow()
  127. {
  128. staticIsForbidden = false;
  129. }
  130. #endif // ENABLE(JSC_MULTIPLE_THREADS)
  131. } // namespace WTF
  132. #endif // NDEBUG
  133. #include <string.h>
  134. namespace WTF {
  135. void *fastZeroedMalloc(size_t n)
  136. {
  137. void *result = fastMalloc(n);
  138. if (!result)
  139. return 0;
  140. memset(result, 0, n);
  141. #ifndef WTF_CHANGES
  142. MallocHook::InvokeNewHook(result, n);
  143. #endif
  144. return result;
  145. }
  146. }
  147. #if FORCE_SYSTEM_MALLOC
  148. #include <stdlib.h>
  149. #if !PLATFORM(WIN_OS)
  150. #include <pthread.h>
  151. #endif
  152. namespace WTF {
  153. void *fastMalloc(size_t n)
  154. {
  155. ASSERT(!isForbidden());
  156. return malloc(n);
  157. }
  158. void *fastCalloc(size_t n_elements, size_t element_size)
  159. {
  160. ASSERT(!isForbidden());
  161. return calloc(n_elements, element_size);
  162. }
  163. void fastFree(void* p)
  164. {
  165. ASSERT(!isForbidden());
  166. free(p);
  167. }
  168. void *fastRealloc(void* p, size_t n)
  169. {
  170. ASSERT(!isForbidden());
  171. return realloc(p, n);
  172. }
  173. void releaseFastMallocFreeMemory() { }
  174. } // namespace WTF
  175. #if PLATFORM(DARWIN)
  176. // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
  177. // It will never be used in this case, so it's type and value are less interesting than its presence.
  178. extern "C" const int jscore_fastmalloc_introspection = 0;
  179. #endif
  180. #else // FORCE_SYSTEM_MALLOC
  181. #if HAVE(STDINT_H)
  182. #include <stdint.h>
  183. #elif HAVE(INTTYPES_H)
  184. #include <inttypes.h>
  185. #else
  186. #include <sys/types.h>
  187. #endif
  188. #include "AlwaysInline.h"
  189. #include "Assertions.h"
  190. #include "TCPackedCache.h"
  191. #include "TCPageMap.h"
  192. #include "TCSpinLock.h"
  193. #include "TCSystemAlloc.h"
  194. #include <algorithm>
  195. #include <errno.h>
  196. #include <new>
  197. #include <pthread.h>
  198. #include <stdarg.h>
  199. #include <stddef.h>
  200. #include <stdio.h>
  201. #if COMPILER(MSVC)
  202. #ifndef WIN32_LEAN_AND_MEAN
  203. #define WIN32_LEAN_AND_MEAN
  204. #endif
  205. #include <windows.h>
  206. #endif
  207. #if WTF_CHANGES
  208. #if PLATFORM(DARWIN)
  209. #include "MallocZoneSupport.h"
  210. #include <wtf/HashSet.h>
  211. #endif
  212. #ifndef PRIuS
  213. #define PRIuS "zu"
  214. #endif
  215. // Calling pthread_getspecific through a global function pointer is faster than a normal
  216. // call to the function on Mac OS X, and it's used in performance-critical code. So we
  217. // use a function pointer. But that's not necessarily faster on other platforms, and we had
  218. // problems with this technique on Windows, so we'll do this only on Mac OS X.
  219. #if PLATFORM(DARWIN)
  220. static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
  221. #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
  222. #endif
  223. #define DEFINE_VARIABLE(type, name, value, meaning) \
  224. namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
  225. type FLAGS_##name(value); \
  226. char FLAGS_no##name; \
  227. } \
  228. using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
  229. #define DEFINE_int64(name, value, meaning) \
  230. DEFINE_VARIABLE(int64_t, name, value, meaning)
  231. #define DEFINE_double(name, value, meaning) \
  232. DEFINE_VARIABLE(double, name, value, meaning)
  233. namespace WTF {
  234. #define malloc fastMalloc
  235. #define calloc fastCalloc
  236. #define free fastFree
  237. #define realloc fastRealloc
  238. #define MESSAGE LOG_ERROR
  239. #define CHECK_CONDITION ASSERT
  240. #if PLATFORM(DARWIN)
  241. class TCMalloc_PageHeap;
  242. class TCMalloc_ThreadCache;
  243. class TCMalloc_Central_FreeListPadded;
  244. class FastMallocZone {
  245. public:
  246. static void init();
  247. static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
  248. static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
  249. static boolean_t check(malloc_zone_t*) { return true; }
  250. static void print(malloc_zone_t*, boolean_t) { }
  251. static void log(malloc_zone_t*, void*) { }
  252. static void forceLock(malloc_zone_t*) { }
  253. static void forceUnlock(malloc_zone_t*) { }
  254. static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
  255. private:
  256. FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*);
  257. static size_t size(malloc_zone_t*, const void*);
  258. static void* zoneMalloc(malloc_zone_t*, size_t);
  259. static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
  260. static void zoneFree(malloc_zone_t*, void*);
  261. static void* zoneRealloc(malloc_zone_t*, void*, size_t);
  262. static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
  263. static void zoneDestroy(malloc_zone_t*) { }
  264. malloc_zone_t m_zone;
  265. TCMalloc_PageHeap* m_pageHeap;
  266. TCMalloc_ThreadCache** m_threadHeaps;
  267. TCMalloc_Central_FreeListPadded* m_centralCaches;
  268. };
  269. #endif
  270. #endif
  271. #ifndef WTF_CHANGES
  272. // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
  273. // you're porting to a system where you really can't get a stacktrace.
  274. #ifdef NO_TCMALLOC_SAMPLES
  275. // We use #define so code compiles even if you #include stacktrace.h somehow.
  276. # define GetStackTrace(stack, depth, skip) (0)
  277. #else
  278. # include <google/stacktrace.h>
  279. #endif
  280. #endif
  281. // Even if we have support for thread-local storage in the compiler
  282. // and linker, the OS may not support it. We need to check that at
  283. // runtime. Right now, we have to keep a manual set of "bad" OSes.
  284. #if defined(HAVE_TLS)
  285. static bool kernel_supports_tls = false; // be conservative
  286. static inline bool KernelSupportsTLS() {
  287. return kernel_supports_tls;
  288. }
  289. # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
  290. static void CheckIfKernelSupportsTLS() {
  291. kernel_supports_tls = false;
  292. }
  293. # else
  294. # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
  295. static void CheckIfKernelSupportsTLS() {
  296. struct utsname buf;
  297. if (uname(&buf) != 0) { // should be impossible
  298. MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
  299. kernel_supports_tls = false;
  300. } else if (strcasecmp(buf.sysname, "linux") == 0) {
  301. // The linux case: the first kernel to support TLS was 2.6.0
  302. if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
  303. kernel_supports_tls = false;
  304. else if (buf.release[0] == '2' && buf.release[1] == '.' &&
  305. buf.release[2] >= '0' && buf.release[2] < '6' &&
  306. buf.release[3] == '.') // 2.0 - 2.5
  307. kernel_supports_tls = false;
  308. else
  309. kernel_supports_tls = true;
  310. } else { // some other kernel, we'll be optimisitic
  311. kernel_supports_tls = true;
  312. }
  313. // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
  314. }
  315. # endif // HAVE_DECL_UNAME
  316. #endif // HAVE_TLS
  317. // __THROW is defined in glibc systems. It means, counter-intuitively,
  318. // "This function will never throw an exception." It's an optional
  319. // optimization tool, but we may need to use it to match glibc prototypes.
  320. #ifndef __THROW // I guess we're not on a glibc system
  321. # define __THROW // __THROW is just an optimization, so ok to make it ""
  322. #endif
  323. //-------------------------------------------------------------------
  324. // Configuration
  325. //-------------------------------------------------------------------
  326. // Not all possible combinations of the following parameters make
  327. // sense. In particular, if kMaxSize increases, you may have to
  328. // increase kNumClasses as well.
  329. static const size_t kPageShift = 12;
  330. static const size_t kPageSize = 1 << kPageShift;
  331. static const size_t kMaxSize = 8u * kPageSize;
  332. static const size_t kAlignShift = 3;
  333. static const size_t kAlignment = 1 << kAlignShift;
  334. static const size_t kNumClasses = 68;
  335. // Allocates a big block of memory for the pagemap once we reach more than
  336. // 128MB
  337. static const size_t kPageMapBigAllocationThreshold = 128 << 20;
  338. // Minimum number of pages to fetch from system at a time. Must be
  339. // significantly bigger than kBlockSize to amortize system-call
  340. // overhead, and also to reduce external fragementation. Also, we
  341. // should keep this value big because various incarnations of Linux
  342. // have small limits on the number of mmap() regions per
  343. // address-space.
  344. static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
  345. // Number of objects to move between a per-thread list and a central
  346. // list in one shot. We want this to be not too small so we can
  347. // amortize the lock overhead for accessing the central list. Making
  348. // it too big may temporarily cause unnecessary memory wastage in the
  349. // per-thread free list until the scavenger cleans up the list.
  350. static int num_objects_to_move[kNumClasses];
  351. // Maximum length we allow a per-thread free-list to have before we
  352. // move objects from it into the corresponding central free-list. We
  353. // want this big to avoid locking the central free-list too often. It
  354. // should not hurt to make this list somewhat big because the
  355. // scavenging code will shrink it down when its contents are not in use.
  356. static const int kMaxFreeListLength = 256;
  357. // Lower and upper bounds on the per-thread cache sizes
  358. static const size_t kMinThreadCacheSize = kMaxSize * 2;
  359. static const size_t kMaxThreadCacheSize = 2 << 20;
  360. // Default bound on the total amount of thread caches
  361. static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
  362. // For all span-lengths < kMaxPages we keep an exact-size list.
  363. // REQUIRED: kMaxPages >= kMinSystemAlloc;
  364. static const size_t kMaxPages = kMinSystemAlloc;
  365. /* The smallest prime > 2^n */
  366. static int primes_list[] = {
  367. // Small values might cause high rates of sampling
  368. // and hence commented out.
  369. // 2, 5, 11, 17, 37, 67, 131, 257,
  370. // 521, 1031, 2053, 4099, 8209, 16411,
  371. 32771, 65537, 131101, 262147, 524309, 1048583,
  372. 2097169, 4194319, 8388617, 16777259, 33554467 };
  373. // Twice the approximate gap between sampling actions.
  374. // I.e., we take one sample approximately once every
  375. // tcmalloc_sample_parameter/2
  376. // bytes of allocation, i.e., ~ once every 128KB.
  377. // Must be a prime number.
  378. #ifdef NO_TCMALLOC_SAMPLES
  379. DEFINE_int64(tcmalloc_sample_parameter, 0,
  380. "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
  381. static size_t sample_period = 0;
  382. #else
  383. DEFINE_int64(tcmalloc_sample_parameter, 262147,
  384. "Twice the approximate gap between sampling actions."
  385. " Must be a prime number. Otherwise will be rounded up to a "
  386. " larger prime number");
  387. static size_t sample_period = 262147;
  388. #endif
  389. // Protects sample_period above
  390. static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
  391. // Parameters for controlling how fast memory is returned to the OS.
  392. DEFINE_double(tcmalloc_release_rate, 1,
  393. "Rate at which we release unused memory to the system. "
  394. "Zero means we never release memory back to the system. "
  395. "Increase this flag to return memory faster; decrease it "
  396. "to return memory slower. Reasonable rates are in the "
  397. "range [0,10]");
  398. //-------------------------------------------------------------------
  399. // Mapping from size to size_class and vice versa
  400. //-------------------------------------------------------------------
  401. // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
  402. // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
  403. // So for these larger sizes we have an array indexed by ceil(size/128).
  404. //
  405. // We flatten both logical arrays into one physical array and use
  406. // arithmetic to compute an appropriate index. The constants used by
  407. // ClassIndex() were selected to make the flattening work.
  408. //
  409. // Examples:
  410. // Size Expression Index
  411. // -------------------------------------------------------
  412. // 0 (0 + 7) / 8 0
  413. // 1 (1 + 7) / 8 1
  414. // ...
  415. // 1024 (1024 + 7) / 8 128
  416. // 1025 (1025 + 127 + (120<<7)) / 128 129
  417. // ...
  418. // 32768 (32768 + 127 + (120<<7)) / 128 376
  419. static const size_t kMaxSmallSize = 1024;
  420. static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
  421. static const int add_amount[2] = { 7, 127 + (120 << 7) };
  422. static unsigned char class_array[377];
  423. // Compute index of the class_array[] entry for a given size
  424. static inline int ClassIndex(size_t s) {
  425. const int i = (s > kMaxSmallSize);
  426. return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
  427. }
  428. // Mapping from size class to max size storable in that class
  429. static size_t class_to_size[kNumClasses];
  430. // Mapping from size class to number of pages to allocate at a time
  431. static size_t class_to_pages[kNumClasses];
  432. // TransferCache is used to cache transfers of num_objects_to_move[size_class]
  433. // back and forth between thread caches and the central cache for a given size
  434. // class.
  435. struct TCEntry {
  436. void *head; // Head of chain of objects.
  437. void *tail; // Tail of chain of objects.
  438. };
  439. // A central cache freelist can have anywhere from 0 to kNumTransferEntries
  440. // slots to put link list chains into. To keep memory usage bounded the total
  441. // number of TCEntries across size classes is fixed. Currently each size
  442. // class is initially given one TCEntry which also means that the maximum any
  443. // one class can have is kNumClasses.
  444. static const int kNumTransferEntries = kNumClasses;
  445. // Note: the following only works for "n"s that fit in 32-bits, but
  446. // that is fine since we only use it for small sizes.
  447. static inline int LgFloor(size_t n) {
  448. int log = 0;
  449. for (int i = 4; i >= 0; --i) {
  450. int shift = (1 << i);
  451. size_t x = n >> shift;
  452. if (x != 0) {
  453. n = x;
  454. log += shift;
  455. }
  456. }
  457. ASSERT(n == 1);
  458. return log;
  459. }
  460. // Some very basic linked list functions for dealing with using void * as
  461. // storage.
  462. static inline void *SLL_Next(void *t) {
  463. return *(reinterpret_cast<void**>(t));
  464. }
  465. static inline void SLL_SetNext(void *t, void *n) {
  466. *(reinterpret_cast<void**>(t)) = n;
  467. }
  468. static inline void SLL_Push(void **list, void *element) {
  469. SLL_SetNext(element, *list);
  470. *list = element;
  471. }
  472. static inline void *SLL_Pop(void **list) {
  473. void *result = *list;
  474. *list = SLL_Next(*list);
  475. return result;
  476. }
  477. // Remove N elements from a linked list to which head points. head will be
  478. // modified to point to the new head. start and end will point to the first
  479. // and last nodes of the range. Note that end will point to NULL after this
  480. // function is called.
  481. static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
  482. if (N == 0) {
  483. *start = NULL;
  484. *end = NULL;
  485. return;
  486. }
  487. void *tmp = *head;
  488. for (int i = 1; i < N; ++i) {
  489. tmp = SLL_Next(tmp);
  490. }
  491. *start = *head;
  492. *end = tmp;
  493. *head = SLL_Next(tmp);
  494. // Unlink range from list.
  495. SLL_SetNext(tmp, NULL);
  496. }
  497. static inline void SLL_PushRange(void **head, void *start, void *end) {
  498. if (!start) return;
  499. SLL_SetNext(end, *head);
  500. *head = start;
  501. }
  502. static inline size_t SLL_Size(void *head) {
  503. int count = 0;
  504. while (head) {
  505. count++;
  506. head = SLL_Next(head);
  507. }
  508. return count;
  509. }
  510. // Setup helper functions.
  511. static ALWAYS_INLINE size_t SizeClass(size_t size) {
  512. return class_array[ClassIndex(size)];
  513. }
  514. // Get the byte-size for a specified class
  515. static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
  516. return class_to_size[cl];
  517. }
  518. static int NumMoveSize(size_t size) {
  519. if (size == 0) return 0;
  520. // Use approx 64k transfers between thread and central caches.
  521. int num = static_cast<int>(64.0 * 1024.0 / size);
  522. if (num < 2) num = 2;
  523. // Clamp well below kMaxFreeListLength to avoid ping pong between central
  524. // and thread caches.
  525. if (num > static_cast<int>(0.8 * kMaxFreeListLength))
  526. num = static_cast<int>(0.8 * kMaxFreeListLength);
  527. // Also, avoid bringing in too many objects into small object free
  528. // lists. There are lots of such lists, and if we allow each one to
  529. // fetch too many at a time, we end up having to scavenge too often
  530. // (especially when there are lots of threads and each thread gets a
  531. // small allowance for its thread cache).
  532. //
  533. // TODO: Make thread cache free list sizes dynamic so that we do not
  534. // have to equally divide a fixed resource amongst lots of threads.
  535. if (num > 32) num = 32;
  536. return num;
  537. }
  538. // Initialize the mapping arrays
  539. static void InitSizeClasses() {
  540. // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
  541. if (ClassIndex(0) < 0) {
  542. MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
  543. abort();
  544. }
  545. if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
  546. MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
  547. abort();
  548. }
  549. // Compute the size classes we want to use
  550. size_t sc = 1; // Next size class to assign
  551. unsigned char alignshift = kAlignShift;
  552. int last_lg = -1;
  553. for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
  554. int lg = LgFloor(size);
  555. if (lg > last_lg) {
  556. // Increase alignment every so often.
  557. //
  558. // Since we double the alignment every time size doubles and
  559. // size >= 128, this means that space wasted due to alignment is
  560. // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
  561. // bytes, so the space wasted as a percentage starts falling for
  562. // sizes > 2K.
  563. if ((lg >= 7) && (alignshift < 8)) {
  564. alignshift++;
  565. }
  566. last_lg = lg;
  567. }
  568. // Allocate enough pages so leftover is less than 1/8 of total.
  569. // This bounds wasted space to at most 12.5%.
  570. size_t psize = kPageSize;
  571. while ((psize % size) > (psize >> 3)) {
  572. psize += kPageSize;
  573. }
  574. const size_t my_pages = psize >> kPageShift;
  575. if (sc > 1 && my_pages == class_to_pages[sc-1]) {
  576. // See if we can merge this into the previous class without
  577. // increasing the fragmentation of the previous class.
  578. const size_t my_objects = (my_pages << kPageShift) / size;
  579. const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
  580. / class_to_size[sc-1];
  581. if (my_objects == prev_objects) {
  582. // Adjust last class to include this size
  583. class_to_size[sc-1] = size;
  584. continue;
  585. }
  586. }
  587. // Add new class
  588. class_to_pages[sc] = my_pages;
  589. class_to_size[sc] = size;
  590. sc++;
  591. }
  592. if (sc != kNumClasses) {
  593. MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
  594. sc, int(kNumClasses));
  595. abort();
  596. }
  597. // Initialize the mapping arrays
  598. int next_size = 0;
  599. for (unsigned char c = 1; c < kNumClasses; c++) {
  600. const size_t max_size_in_class = class_to_size[c];
  601. for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
  602. class_array[ClassIndex(s)] = c;
  603. }
  604. next_size = static_cast<int>(max_size_in_class + kAlignment);
  605. }
  606. // Double-check sizes just to be safe
  607. for (size_t size = 0; size <= kMaxSize; size++) {
  608. const size_t sc = SizeClass(size);
  609. if (sc == 0) {
  610. MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
  611. abort();
  612. }
  613. if (sc > 1 && size <= class_to_size[sc-1]) {
  614. MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
  615. "\n", sc, size);
  616. abort();
  617. }
  618. if (sc >= kNumClasses) {
  619. MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
  620. abort();
  621. }
  622. const size_t s = class_to_size[sc];
  623. if (size > s) {
  624. MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
  625. abort();
  626. }
  627. if (s == 0) {
  628. MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
  629. abort();
  630. }
  631. }
  632. // Initialize the num_objects_to_move array.
  633. for (size_t cl = 1; cl < kNumClasses; ++cl) {
  634. num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
  635. }
  636. #ifndef WTF_CHANGES
  637. if (false) {
  638. // Dump class sizes and maximum external wastage per size class
  639. for (size_t cl = 1; cl < kNumClasses; ++cl) {
  640. const int alloc_size = class_to_pages[cl] << kPageShift;
  641. const int alloc_objs = alloc_size / class_to_size[cl];
  642. const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
  643. const int max_waste = alloc_size - min_used;
  644. MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
  645. int(cl),
  646. int(class_to_size[cl-1] + 1),
  647. int(class_to_size[cl]),
  648. int(class_to_pages[cl] << kPageShift),
  649. max_waste * 100.0 / alloc_size
  650. );
  651. }
  652. }
  653. #endif
  654. }
  655. // -------------------------------------------------------------------------
  656. // Simple allocator for objects of a specified type. External locking
  657. // is required before accessing one of these objects.
  658. // -------------------------------------------------------------------------
  659. // Metadata allocator -- keeps stats about how many bytes allocated
  660. static uint64_t metadata_system_bytes = 0;
  661. static void* MetaDataAlloc(size_t bytes) {
  662. void* result = TCMalloc_SystemAlloc(bytes, 0);
  663. if (result != NULL) {
  664. metadata_system_bytes += bytes;
  665. }
  666. return result;
  667. }
  668. template <class T>
  669. class PageHeapAllocator {
  670. private:
  671. // How much to allocate from system at a time
  672. static const size_t kAllocIncrement = 32 << 10;
  673. // Aligned size of T
  674. static const size_t kAlignedSize
  675. = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
  676. // Free area from which to carve new objects
  677. char* free_area_;
  678. size_t free_avail_;
  679. // Free list of already carved objects
  680. void* free_list_;
  681. // Number of allocated but unfreed objects
  682. int inuse_;
  683. public:
  684. void Init() {
  685. ASSERT(kAlignedSize <= kAllocIncrement);
  686. inuse_ = 0;
  687. free_area_ = NULL;
  688. free_avail_ = 0;
  689. free_list_ = NULL;
  690. }
  691. T* New() {
  692. // Consult free list
  693. void* result;
  694. if (free_list_ != NULL) {
  695. result = free_list_;
  696. free_list_ = *(reinterpret_cast<void**>(result));
  697. } else {
  698. if (free_avail_ < kAlignedSize) {
  699. // Need more room
  700. free_area_ = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
  701. if (free_area_ == NULL) abort();
  702. free_avail_ = kAllocIncrement;
  703. }
  704. result = free_area_;
  705. free_area_ += kAlignedSize;
  706. free_avail_ -= kAlignedSize;
  707. }
  708. inuse_++;
  709. return reinterpret_cast<T*>(result);
  710. }
  711. void Delete(T* p) {
  712. *(reinterpret_cast<void**>(p)) = free_list_;
  713. free_list_ = p;
  714. inuse_--;
  715. }
  716. int inuse() const { return inuse_; }
  717. };
  718. // -------------------------------------------------------------------------
  719. // Span - a contiguous run of pages
  720. // -------------------------------------------------------------------------
  721. // Type that can hold a page number
  722. typedef uintptr_t PageID;
  723. // Type that can hold the length of a run of pages
  724. typedef uintptr_t Length;
  725. static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
  726. // Convert byte size into pages. This won't overflow, but may return
  727. // an unreasonably large value if bytes is huge enough.
  728. static inline Length pages(size_t bytes) {
  729. return (bytes >> kPageShift) +
  730. ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
  731. }
  732. // Convert a user size into the number of bytes that will actually be
  733. // allocated
  734. static size_t AllocationSize(size_t bytes) {
  735. if (bytes > kMaxSize) {
  736. // Large object: we allocate an integral number of pages
  737. ASSERT(bytes <= (kMaxValidPages << kPageShift));
  738. return pages(bytes) << kPageShift;
  739. } else {
  740. // Small object: find the size class to which it belongs
  741. return ByteSizeForClass(SizeClass(bytes));
  742. }
  743. }
  744. // Information kept for a span (a contiguous run of pages).
  745. struct Span {
  746. PageID start; // Starting page number
  747. Length length; // Number of pages in span
  748. Span* next; // Used when in link list
  749. Span* prev; // Used when in link list
  750. void* objects; // Linked list of free objects
  751. unsigned int free : 1; // Is the span free
  752. #ifndef NO_TCMALLOC_SAMPLES
  753. unsigned int sample : 1; // Sampled object?
  754. #endif
  755. unsigned int sizeclass : 8; // Size-class for small objects (or 0)
  756. unsigned int refcount : 11; // Number of non-free objects
  757. bool decommitted : 1;
  758. #undef SPAN_HISTORY
  759. #ifdef SPAN_HISTORY
  760. // For debugging, we can keep a log events per span
  761. int nexthistory;
  762. char history[64];
  763. int value[64];
  764. #endif
  765. };
  766. #if TCMALLOC_TRACK_DECOMMITED_SPANS
  767. #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
  768. #else
  769. #define ASSERT_SPAN_COMMITTED(span)
  770. #endif
  771. #ifdef SPAN_HISTORY
  772. void Event(Span* span, char op, int v = 0) {
  773. span->history[span->nexthistory] = op;
  774. span->value[span->nexthistory] = v;
  775. span->nexthistory++;
  776. if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
  777. }
  778. #else
  779. #define Event(s,o,v) ((void) 0)
  780. #endif
  781. // Allocator/deallocator for spans
  782. static PageHeapAllocator<Span> span_allocator;
  783. static Span* NewSpan(PageID p, Length len) {
  784. Span* result = span_allocator.New();
  785. memset(result, 0, sizeof(*result));
  786. result->start = p;
  787. result->length = len;
  788. #ifdef SPAN_HISTORY
  789. result->nexthistory = 0;
  790. #endif
  791. return result;
  792. }
  793. static inline void DeleteSpan(Span* span) {
  794. #ifndef NDEBUG
  795. // In debug mode, trash the contents of deleted Spans
  796. memset(span, 0x3f, sizeof(*span));
  797. #endif
  798. span_allocator.Delete(span);
  799. }
  800. // -------------------------------------------------------------------------
  801. // Doubly linked list of spans.
  802. // -------------------------------------------------------------------------
  803. static inline void DLL_Init(Span* list) {
  804. list->next = list;
  805. list->prev = list;
  806. }
  807. static inline void DLL_Remove(Span* span) {
  808. span->prev->next = span->next;
  809. span->next->prev = span->prev;
  810. span->prev = NULL;
  811. span->next = NULL;
  812. }
  813. static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
  814. return list->next == list;
  815. }
  816. #ifndef WTF_CHANGES
  817. static int DLL_Length(const Span* list) {
  818. int result = 0;
  819. for (Span* s = list->next; s != list; s = s->next) {
  820. result++;
  821. }
  822. return result;
  823. }
  824. #endif
  825. #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
  826. static void DLL_Print(const char* label, const Span* list) {
  827. MESSAGE("%-10s %p:", label, list);
  828. for (const Span* s = list->next; s != list; s = s->next) {
  829. MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
  830. }
  831. MESSAGE("\n");
  832. }
  833. #endif
  834. static inline void DLL_Prepend(Span* list, Span* span) {
  835. ASSERT(span->next == NULL);
  836. ASSERT(span->prev == NULL);
  837. span->next = list->next;
  838. span->prev = list;
  839. list->next->prev = span;
  840. list->next = span;
  841. }
  842. // -------------------------------------------------------------------------
  843. // Stack traces kept for sampled allocations
  844. // The following state is protected by pageheap_lock_.
  845. // -------------------------------------------------------------------------
  846. // size/depth are made the same size as a pointer so that some generic
  847. // code below can conveniently cast them back and forth to void*.
  848. static const int kMaxStackDepth = 31;
  849. struct StackTrace {
  850. uintptr_t size; // Size of object
  851. uintptr_t depth; // Number of PC values stored in array below
  852. void* stack[kMaxStackDepth];
  853. };
  854. static PageHeapAllocator<StackTrace> stacktrace_allocator;
  855. static Span sampled_objects;
  856. // -------------------------------------------------------------------------
  857. // Map from page-id to per-page data
  858. // -------------------------------------------------------------------------
  859. // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
  860. // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
  861. // because sometimes the sizeclass is all the information we need.
  862. // Selector class -- general selector uses 3-level map
  863. template <int BITS> class MapSelector {
  864. public:
  865. typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
  866. typedef PackedCache<BITS, uint64_t> CacheType;
  867. };
  868. // A two-level map for 32-bit machines
  869. template <> class MapSelector<32> {
  870. public:
  871. typedef TCMalloc_PageMap2<32-kPageShift> Type;
  872. typedef PackedCache<32-kPageShift, uint16_t> CacheType;
  873. };
  874. // -------------------------------------------------------------------------
  875. // Page-level allocator
  876. // * Eager coalescing
  877. //
  878. // Heap for page-level allocation. We allow allocating and freeing a
  879. // contiguous runs of pages (called a "span").
  880. // -------------------------------------------------------------------------
  881. class TCMalloc_PageHeap {
  882. public:
  883. void init();
  884. // Allocate a run of "n" pages. Returns zero if out of memory.
  885. Span* New(Length n);
  886. // Delete the span "[p, p+n-1]".
  887. // REQUIRES: span was returned by earlier call to New() and
  888. // has not yet been deleted.
  889. void Delete(Span* span);
  890. // Mark an allocated span as being used for small objects of the
  891. // specified size-class.
  892. // REQUIRES: span was returned by an earlier call to New()
  893. // and has not yet been deleted.
  894. void RegisterSizeClass(Span* span, size_t sc);
  895. // Split an allocated span into two spans: one of length "n" pages
  896. // followed by another span of length "span->length - n" pages.
  897. // Modifies "*span" to point to the first span of length "n" pages.
  898. // Returns a pointer to the second span.
  899. //
  900. // REQUIRES: "0 < n < span->length"
  901. // REQUIRES: !span->free
  902. // REQUIRES: span->sizeclass == 0
  903. Span* Split(Span* span, Length n);
  904. // Return the descriptor for the specified page.
  905. inline Span* GetDescriptor(PageID p) const {
  906. return reinterpret_cast<Span*>(pagemap_.get(p));
  907. }
  908. #ifdef WTF_CHANGES
  909. inline Span* GetDescriptorEnsureSafe(PageID p)
  910. {
  911. pagemap_.Ensure(p, 1);
  912. return GetDescriptor(p);
  913. }
  914. #endif
  915. // Dump state to stderr
  916. #ifndef WTF_CHANGES
  917. void Dump(TCMalloc_Printer* out);
  918. #endif
  919. // Return number of bytes allocated from system
  920. inline uint64_t SystemBytes() const { return system_bytes_; }
  921. // Return number of free bytes in heap
  922. uint64_t FreeBytes() const {
  923. return (static_cast<uint64_t>(free_pages_) << kPageShift);
  924. }
  925. bool Check();
  926. bool CheckList(Span* list, Length min_pages, Length max_pages);
  927. // Release all pages on the free list for reuse by the OS:
  928. void ReleaseFreePages();
  929. // Return 0 if we have no information, or else the correct sizeclass for p.
  930. // Reads and writes to pagemap_cache_ do not require locking.
  931. // The entries are 64 bits on 64-bit hardware and 16 bits on
  932. // 32-bit hardware, and we don't mind raciness as long as each read of
  933. // an entry yields a valid entry, not a partially updated entry.
  934. size_t GetSizeClassIfCached(PageID p) const {
  935. return pagemap_cache_.GetOrDefault(p, 0);
  936. }
  937. void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
  938. private:
  939. // Pick the appropriate map and cache types based on pointer size
  940. typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
  941. typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
  942. PageMap pagemap_;
  943. mutable PageMapCache pagemap_cache_;
  944. // We segregate spans of a given size into two circular linked
  945. // lists: one for normal spans, and one for spans whose memory
  946. // has been returned to the system.
  947. struct SpanList {
  948. Span normal;
  949. Span returned;
  950. };
  951. // List of free spans of length >= kMaxPages
  952. SpanList large_;
  953. // Array mapping from span length to a doubly linked list of free spans
  954. SpanList free_[kMaxPages];
  955. // Number of pages kept in free lists
  956. uintptr_t free_pages_;
  957. // Bytes allocated from system
  958. uint64_t system_bytes_;
  959. bool GrowHeap(Length n);
  960. // REQUIRES span->length >= n
  961. // Remove span from its free list, and move any leftover part of
  962. // span into appropriate free lists. Also update "span" to have
  963. // length exactly "n" and mark it as non-free so it can be returned
  964. // to the client.
  965. //
  966. // "released" is true iff "span" was found on a "returned" list.
  967. void Carve(Span* span, Length n, bool released);
  968. void RecordSpan(Span* span) {
  969. pagemap_.set(span->start, span);
  970. if (span->length > 1) {
  971. pagemap_.set(span->start + span->length - 1, span);
  972. }
  973. }
  974. // Allocate a large span of length == n. If successful, returns a
  975. // span of exactly the specified length. Else, returns NULL.
  976. Span* AllocLarge(Length n);
  977. // Incrementally release some memory to the system.
  978. // IncrementalScavenge(n) is called whenever n pages are freed.
  979. void IncrementalScavenge(Length n);
  980. // Number of pages to deallocate before doing more scavenging
  981. int64_t scavenge_counter_;
  982. // Index of last free list we scavenged
  983. size_t scavenge_index_;
  984. #if defined(WTF_CHANGES) && PLATFORM(DARWIN)
  985. friend class FastMallocZone;
  986. #endif
  987. };
  988. void TCMalloc_PageHeap::init()
  989. {
  990. pagemap_.init(MetaDataAlloc);
  991. pagemap_cache_ = PageMapCache(0);
  992. free_pages_ = 0;
  993. system_bytes_ = 0;
  994. scavenge_counter_ = 0;
  995. // Start scavenging at kMaxPages list
  996. scavenge_index_ = kMaxPages-1;
  997. COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
  998. DLL_Init(&large_.normal);
  999. DLL_Init(&large_.returned);
  1000. for (size_t i = 0; i < kMaxPages; i++) {
  1001. DLL_Init(&free_[i].normal);
  1002. DLL_Init(&free_[i].returned);
  1003. }
  1004. }
  1005. inline Span* TCMalloc_PageHeap::New(Length n) {
  1006. ASSERT(Check());
  1007. ASSERT(n > 0);
  1008. // Find first size >= n that has a non-empty list
  1009. for (Length s = n; s < kMaxPages; s++) {
  1010. Span* ll = NULL;
  1011. bool released = false;
  1012. if (!DLL_IsEmpty(&free_[s].normal)) {
  1013. // Found normal span
  1014. ll = &free_[s].normal;
  1015. } else if (!DLL_IsEmpty(&free_[s].returned)) {
  1016. // Found returned span; reallocate it
  1017. ll = &free_[s].returned;
  1018. released = true;
  1019. } else {
  1020. // Keep looking in larger classes
  1021. continue;
  1022. }
  1023. Span* result = ll->next;
  1024. Carve(result, n, released);
  1025. #if TCMALLOC_TRACK_DECOMMITED_SPANS
  1026. if (result->decommitted) {
  1027. TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
  1028. result->decommitted = false;
  1029. }
  1030. #endif
  1031. ASSERT(Check());
  1032. free_pages_ -= n;
  1033. return result;
  1034. }
  1035. Span* result = AllocLarge(n);
  1036. if (result != NULL) {
  1037. ASSERT_SPAN_COMMITTED(result);
  1038. return result;
  1039. }
  1040. // Grow the heap and try again
  1041. if (!GrowHeap(n)) {
  1042. ASSERT(Check());
  1043. return NULL;
  1044. }
  1045. return AllocLarge(n);
  1046. }
  1047. Span* TCMalloc_PageHeap::AllocLarge(Length n) {
  1048. // find the best span (closest to n in size).
  1049. // The following loops implements address-ordered best-fit.
  1050. bool from_released = false;
  1051. Span *best = NULL;
  1052. // Search through normal list
  1053. for (Span* span = large_.normal.next;
  1054. span != &large_.normal;
  1055. span = span->next) {
  1056. if (span->length >= n) {
  1057. if ((best == NULL)
  1058. || (span->length < best->length)
  1059. || ((span->length == best->length) && (span->start < best->start))) {
  1060. best = span;
  1061. from_released = false;
  1062. }
  1063. }
  1064. }
  1065. // Search through released list in case it has a better fit
  1066. for (Span* span = large_.returned.next;
  1067. span != &large_.returned;
  1068. span = span->next) {
  1069. if (span->length >= n) {
  1070. if ((best == NULL)
  1071. || (span->length < best->length)
  1072. || ((span->length == best->length) && (span->start < best->start))) {
  1073. best = span;
  1074. from_released = true;
  1075. }
  1076. }
  1077. }
  1078. if (best != NULL) {
  1079. Carve(best, n, from_released);
  1080. #if TCMALLOC_TRACK_DECOMMITED_SPANS
  1081. if (best->decommitted) {
  1082. TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
  1083. best->decommitted = false;
  1084. }
  1085. #endif
  1086. ASSERT(Check());
  1087. free_pages_ -= n;
  1088. return best;
  1089. }
  1090. return NULL;
  1091. }
  1092. Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
  1093. ASSERT(0 < n);
  1094. ASSERT(n < span->length);
  1095. ASSERT(!span->free);
  1096. ASSERT(span->sizeclass == 0);
  1097. Event(span, 'T', n);
  1098. const Length extra = span->length - n;
  1099. Span* leftover = NewSpan(span->start + n, extra);
  1100. Event(leftover, 'U', extra);
  1101. RecordSpan(leftover);
  1102. pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
  1103. span->length = n;
  1104. return leftover;
  1105. }
  1106. #if !TCMALLOC_TRACK_DECOMMITED_SPANS
  1107. static ALWAYS_INLINE void propagateDecommittedState(Span*, Span*) { }
  1108. #else
  1109. static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
  1110. {
  1111. destination->decommitted = source->decommitted;
  1112. }
  1113. #endif
  1114. inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
  1115. ASSERT(n > 0);
  1116. DLL_Remove(span);
  1117. span->free = 0;
  1118. Event(span, 'A', n);
  1119. const int extra = static_cast<int>(span->length - n);
  1120. ASSERT(extra >= 0);
  1121. if (extra > 0) {
  1122. Span* leftover = NewSpan(span->start + n, extra);
  1123. leftover->free = 1;
  1124. propagateDecommittedState(leftover, span);
  1125. Event(leftover, 'S', extra);
  1126. RecordSpan(leftover);
  1127. // Place leftover span on appropriate free list
  1128. SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
  1129. Span* dst = released ? &listpair->returned : &listpair->normal;
  1130. DLL_Prepend(dst, leftover);
  1131. span->length = n;
  1132. pagemap_.set(span->start + n - 1, span);
  1133. }
  1134. }
  1135. #if !TCMALLOC_TRACK_DECOMMITED_SPANS
  1136. static ALWAYS_INLINE void mergeDecommittedStates(Span*, Span*) { }
  1137. #else
  1138. static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
  1139. {
  1140. if (other->decommitted)
  1141. destination->decommitted = true;
  1142. }
  1143. #endif
  1144. inline void TCMalloc_PageHeap::Delete(Span* span) {
  1145. ASSERT(Check());
  1146. ASSERT(!span->free);
  1147. ASSERT(span->length > 0);
  1148. ASSERT(GetDescriptor(span->start) == span);
  1149. ASSERT(GetDescriptor(span->start + span->length - 1) == span);
  1150. span->sizeclass = 0;
  1151. #ifndef NO_TCMALLOC_SAMPLES
  1152. span->sample = 0;
  1153. #endif
  1154. // Coalesce -- we guarantee that "p" != 0, so no bounds checking
  1155. // necessary. We do not bother resetting the stale pagemap
  1156. // entries for the pieces we are merging together because we only
  1157. // care about the pagemap entries for the boundaries.
  1158. //
  1159. // Note that the spans we merge into "span" may come out of
  1160. // a "returned" list. For simplicity, we move these into the
  1161. // "normal" list of the appropriate size class.
  1162. const PageID p = span->start;
  1163. const Length n = span->length;
  1164. Span* prev = GetDescriptor(p-1);
  1165. if (prev != NULL && prev->free) {
  1166. // Merge preceding span into this span
  1167. ASSERT(prev->start + prev->length == p);
  1168. const Length len = prev->length;
  1169. mergeDecommittedStates(span, prev);
  1170. DLL_Remove(prev);
  1171. DeleteSpan(prev);
  1172. span->start -= len;
  1173. span->length += len;
  1174. pagemap_.set(span->start, span);
  1175. Event(span, 'L', len);
  1176. }
  1177. Span* next = GetDescriptor(p+n);
  1178. if (next != NULL && next->free) {
  1179. // Merge next span into this span
  1180. ASSERT(next->start == p+n);
  1181. const Length len = next->length;
  1182. mergeDecommittedStates(span, next);
  1183. DLL_Remove(next);
  1184. DeleteSpan(next);
  1185. span->length += len;
  1186. pagemap_.set(span->start + span->length - 1, span);
  1187. Event(span, 'R', len);
  1188. }
  1189. Event(span, 'D', span->length);
  1190. span->free = 1;
  1191. if (span->length < kMaxPages) {
  1192. DLL_Prepend(&free_[span->length].normal, span);
  1193. } else {
  1194. DLL_Prepend(&large_.normal, span);
  1195. }
  1196. free_pages_ += n;
  1197. IncrementalScavenge(n);
  1198. ASSERT(Check());
  1199. }
  1200. void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
  1201. // Fast path; not yet time to release memory
  1202. scavenge_counter_ -= n;
  1203. if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
  1204. // If there is nothing to release, wait for so many pages before
  1205. // scavenging again. With 4K pages, this comes to 16MB of memory.
  1206. static const size_t kDefaultReleaseDelay = 1 << 8;
  1207. // Find index of free list to scavenge
  1208. size_t index = scavenge_index_ + 1;
  1209. for (size_t i = 0; i < kMaxPages+1; i++) {
  1210. if (index > kMaxPages) index = 0;
  1211. SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
  1212. if (!DLL_IsEmpty(&slist->normal)) {
  1213. // Release the last span on the normal portion of this list
  1214. Span* s = slist->normal.prev;
  1215. DLL_Remove(s);
  1216. TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
  1217. static_cast<size_t>(s->length << kPageShift));
  1218. #if TCMALLOC_TRACK_DECOMMITED_SPANS
  1219. s->decommitted = true;
  1220. #endif
  1221. DLL_Prepend(&slist->returned, s);
  1222. scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
  1223. if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
  1224. scavenge_index_ = index - 1;
  1225. else
  1226. scavenge_index_ = index;
  1227. return;
  1228. }
  1229. index++;
  1230. }
  1231. // Nothing to scavenge, delay for a while
  1232. scavenge_counter_ = kDefaultReleaseDelay;
  1233. }
  1234. void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
  1235. // Associate span object with all interior pages as well
  1236. ASSERT(!span->free);
  1237. ASSERT(GetDescriptor(span->start) == span);
  1238. ASSERT(GetDescriptor(span->start+span->length-1) == span);
  1239. Event(span, 'C', sc);
  1240. span->sizeclass = static_cast<unsigned int>(sc);
  1241. for (Length i = 1; i < span->length-1; i++) {
  1242. pagemap_.set(span->start+i, span);
  1243. }
  1244. }
  1245. #ifndef WTF_CHANGES
  1246. static double PagesToMB(uint64_t pages) {
  1247. return (pages << kPageShift) / 1048576.0;
  1248. }
  1249. void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
  1250. int nonempty_sizes = 0;
  1251. for (int s = 0; s < kMaxPages; s++) {
  1252. if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
  1253. nonempty_sizes++;
  1254. }
  1255. }
  1256. out->printf("------------------------------------------------\n");
  1257. out->printf("PageHeap: %d sizes; %6.1f MB free\n",
  1258. nonempty_sizes, PagesToMB(free_pages_));
  1259. out->printf("------------------------------------------------\n");
  1260. uint64_t total_normal = 0;
  1261. uint64_t total_returned = 0;
  1262. for (int s = 0; s < kMaxPages; s++) {
  1263. const int n_length = DLL_Length(&free_[s].normal);
  1264. const int r_length = DLL_Length(&free_[s].returned);
  1265. if (n_length + r_length > 0) {
  1266. uint64_t n_pages = s * n_length;
  1267. uint64_t r_pages = s * r_length;
  1268. total_normal += n_pages;
  1269. total_returned += r_pages;
  1270. out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
  1271. "; unmapped: %6.1f MB; %6.1f MB cum\n",
  1272. s,
  1273. (n_length + r_length),
  1274. PagesToMB(n_pages + r_pages),
  1275. PagesToMB(total_normal + total_returned),
  1276. PagesToMB(r_pages),
  1277. PagesToMB(total_returned));
  1278. }
  1279. }
  1280. uint64_t n_pages = 0;
  1281. uint64_t r_pages = 0;
  1282. int n_spans = 0;
  1283. int r_spans = 0;
  1284. out->printf("Normal large spans:\n");
  1285. for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
  1286. out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
  1287. s->length, PagesToMB(s->length));
  1288. n_pages += s->length;
  1289. n_spans++;
  1290. }
  1291. out->printf("Unmapped large spans:\n");
  1292. for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
  1293. out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
  1294. s->length, PagesToMB(s->length));
  1295. r_pages += s->length;
  1296. r_spans++;
  1297. }
  1298. total_normal += n_pages;
  1299. total_returned += r_pages;
  1300. out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
  1301. "; unmapped: %6.1f MB; %6.1f MB cum\n",
  1302. (n_spans + r_spans),
  1303. PagesToMB(n_pages + r_pages),
  1304. PagesToMB(total_normal + total_returned),
  1305. PagesToMB(r_pages),
  1306. PagesToMB(total_returned));
  1307. }
  1308. #endif
  1309. bool TCMalloc_PageHeap::GrowHeap(Length n) {
  1310. ASSERT(kMaxPages >= kMinSystemAlloc);
  1311. if (n > kMaxValidPages) return false;
  1312. Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
  1313. size_t actual_size;
  1314. void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
  1315. if (ptr == NULL) {
  1316. if (n < ask) {
  1317. // Try growing just "n" pages
  1318. ask = n;
  1319. ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);;
  1320. }
  1321. if (ptr == NULL) return false;
  1322. }
  1323. ask = actual_size >> kPageShift;
  1324. uint64_t old_system_bytes = system_bytes_;
  1325. system_bytes_ += (ask << kPageShift);
  1326. const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
  1327. ASSERT(p > 0);
  1328. // If we have already a lot of pages allocated, just pre allocate a bunch of
  1329. // memory for the page map. This prevents fragmentation by pagemap metadata
  1330. // when a program keeps allocating and freeing large blocks.
  1331. if (old_system_bytes < kPageMapBigAllocationThreshold
  1332. && system_bytes_ >= kPageMapBigAllocationThreshold) {
  1333. pagemap_.PreallocateMoreMemory();
  1334. }
  1335. // Make sure pagemap_ has entries for all of the new pages.
  1336. // Plus ensure one before and one after so coalescing code
  1337. // does not need bounds-checking.
  1338. if (pagemap_.Ensure(p-1, ask+2)) {

Large files files are truncated, but you can click here to view the full file