PageRenderTime 83ms CodeModel.GetById 11ms app.highlight 60ms RepoModel.GetById 1ms app.codeStats 0ms

/Objects/obmalloc.c

http://unladen-swallow.googlecode.com/
C | 1765 lines | 854 code | 178 blank | 733 comment | 200 complexity | 25e3cb94ae75a45a8cdcc27df47330d6 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#include "Python.h"
   2
   3#ifdef WITH_PYMALLOC
   4
   5/* An object allocator for Python.
   6
   7   Here is an introduction to the layers of the Python memory architecture,
   8   showing where the object allocator is actually used (layer +2), It is
   9   called for every object allocation and deallocation (PyObject_New/Del),
  10   unless the object-specific allocators implement a proprietary allocation
  11   scheme (ex.: ints use a simple free list). This is also the place where
  12   the cyclic garbage collector operates selectively on container objects.
  13
  14
  15        Object-specific allocators
  16    _____   ______   ______       ________
  17   [ int ] [ dict ] [ list ] ... [ string ]       Python core         |
  18+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
  19    _______________________________       |                           |
  20   [   Python's object allocator   ]      |                           |
  21+2 | ####### Object memory ####### | <------ Internal buffers ------> |
  22    ______________________________________________________________    |
  23   [          Python's raw memory allocator (PyMem_ API)          ]   |
  24+1 | <----- Python memory (under PyMem manager's control) ------> |   |
  25    __________________________________________________________________
  26   [    Underlying general-purpose allocator (ex: C library malloc)   ]
  27 0 | <------ Virtual memory allocated for the python process -------> |
  28
  29   =========================================================================
  30    _______________________________________________________________________
  31   [                OS-specific Virtual Memory Manager (VMM)               ]
  32-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
  33    __________________________________   __________________________________
  34   [                                  ] [                                  ]
  35-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
  36
  37*/
  38/*==========================================================================*/
  39
  40/* A fast, special-purpose memory allocator for small blocks, to be used
  41   on top of a general-purpose malloc -- heavily based on previous art. */
  42
  43/* Vladimir Marangozov -- August 2000 */
  44
  45/*
  46 * "Memory management is where the rubber meets the road -- if we do the wrong
  47 * thing at any level, the results will not be good. And if we don't make the
  48 * levels work well together, we are in serious trouble." (1)
  49 *
  50 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
  51 *    "Dynamic Storage Allocation: A Survey and Critical Review",
  52 *    in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
  53 */
  54
  55/* #undef WITH_MEMORY_LIMITS */		/* disable mem limit checks  */
  56
  57/*==========================================================================*/
  58
  59/*
  60 * Allocation strategy abstract:
  61 *
  62 * For small requests, the allocator sub-allocates <Big> blocks of memory.
  63 * Requests greater than 256 bytes are routed to the system's allocator.
  64 *
  65 * Small requests are grouped in size classes spaced 8 bytes apart, due
  66 * to the required valid alignment of the returned address. Requests of
  67 * a particular size are serviced from memory pools of 4K (one VMM page).
  68 * Pools are fragmented on demand and contain free lists of blocks of one
  69 * particular size class. In other words, there is a fixed-size allocator
  70 * for each size class. Free pools are shared by the different allocators
  71 * thus minimizing the space reserved for a particular size class.
  72 *
  73 * This allocation strategy is a variant of what is known as "simple
  74 * segregated storage based on array of free lists". The main drawback of
  75 * simple segregated storage is that we might end up with lot of reserved
  76 * memory for the different free lists, which degenerate in time. To avoid
  77 * this, we partition each free list in pools and we share dynamically the
  78 * reserved space between all free lists. This technique is quite efficient
  79 * for memory intensive programs which allocate mainly small-sized blocks.
  80 *
  81 * For small requests we have the following table:
  82 *
  83 * Request in bytes	Size of allocated block      Size class idx
  84 * ----------------------------------------------------------------
  85 *        1-8                     8                       0
  86 *	  9-16                   16                       1
  87 *	 17-24                   24                       2
  88 *	 25-32                   32                       3
  89 *	 33-40                   40                       4
  90 *	 41-48                   48                       5
  91 *	 49-56                   56                       6
  92 *	 57-64                   64                       7
  93 *	 65-72                   72                       8
  94 *	  ...                   ...                     ...
  95 *	241-248                 248                      30
  96 *	249-256                 256                      31
  97 *
  98 *	0, 257 and up: routed to the underlying allocator.
  99 */
 100
 101/*==========================================================================*/
 102
 103/*
 104 * -- Main tunable settings section --
 105 */
 106
 107/*
 108 * Alignment of addresses returned to the user. 8-bytes alignment works
 109 * on most current architectures (with 32-bit or 64-bit address busses).
 110 * The alignment value is also used for grouping small requests in size
 111 * classes spaced ALIGNMENT bytes apart.
 112 *
 113 * You shouldn't change this unless you know what you are doing.
 114 */
 115#define ALIGNMENT		8		/* must be 2^N */
 116#define ALIGNMENT_SHIFT		3
 117#define ALIGNMENT_MASK		(ALIGNMENT - 1)
 118
 119/* Return the number of bytes in size class I, as a uint. */
 120#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
 121
 122/*
 123 * Max size threshold below which malloc requests are considered to be
 124 * small enough in order to use preallocated memory pools. You can tune
 125 * this value according to your application behaviour and memory needs.
 126 *
 127 * The following invariants must hold:
 128 *	1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
 129 *	2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
 130 *
 131 * Although not required, for better performance and space efficiency,
 132 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
 133 */
 134#define SMALL_REQUEST_THRESHOLD	256
 135#define NB_SMALL_SIZE_CLASSES	(SMALL_REQUEST_THRESHOLD / ALIGNMENT)
 136
 137/*
 138 * The system's VMM page size can be obtained on most unices with a
 139 * getpagesize() call or deduced from various header files. To make
 140 * things simpler, we assume that it is 4K, which is OK for most systems.
 141 * It is probably better if this is the native page size, but it doesn't
 142 * have to be.  In theory, if SYSTEM_PAGE_SIZE is larger than the native page
 143 * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation
 144 * violation fault.  4K is apparently OK for all the platforms that python
 145 * currently targets.
 146 */
 147#define SYSTEM_PAGE_SIZE	(4 * 1024)
 148#define SYSTEM_PAGE_SIZE_MASK	(SYSTEM_PAGE_SIZE - 1)
 149
 150/*
 151 * Maximum amount of memory managed by the allocator for small requests.
 152 */
 153#ifdef WITH_MEMORY_LIMITS
 154#ifndef SMALL_MEMORY_LIMIT
 155#define SMALL_MEMORY_LIMIT	(64 * 1024 * 1024)	/* 64 MB -- more? */
 156#endif
 157#endif
 158
 159/*
 160 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
 161 * on a page boundary. This is a reserved virtual address space for the
 162 * current process (obtained through a malloc call). In no way this means
 163 * that the memory arenas will be used entirely. A malloc(<Big>) is usually
 164 * an address range reservation for <Big> bytes, unless all pages within this
 165 * space are referenced subsequently. So malloc'ing big blocks and not using
 166 * them does not mean "wasting memory". It's an addressable range wastage...
 167 *
 168 * Therefore, allocating arenas with malloc is not optimal, because there is
 169 * some address space wastage, but this is the most portable way to request
 170 * memory from the system across various platforms.
 171 */
 172#define ARENA_SIZE		(256 << 10)	/* 256KB */
 173
 174#ifdef WITH_MEMORY_LIMITS
 175#define MAX_ARENAS		(SMALL_MEMORY_LIMIT / ARENA_SIZE)
 176#endif
 177
 178/*
 179 * Size of the pools used for small blocks. Should be a power of 2,
 180 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
 181 */
 182#define POOL_SIZE		SYSTEM_PAGE_SIZE	/* must be 2^N */
 183#define POOL_SIZE_MASK		SYSTEM_PAGE_SIZE_MASK
 184
 185/*
 186 * -- End of tunable settings section --
 187 */
 188
 189/*==========================================================================*/
 190
 191/*
 192 * Locking
 193 *
 194 * To reduce lock contention, it would probably be better to refine the
 195 * crude function locking with per size class locking. I'm not positive
 196 * however, whether it's worth switching to such locking policy because
 197 * of the performance penalty it might introduce.
 198 *
 199 * The following macros describe the simplest (should also be the fastest)
 200 * lock object on a particular platform and the init/fini/lock/unlock
 201 * operations on it. The locks defined here are not expected to be recursive
 202 * because it is assumed that they will always be called in the order:
 203 * INIT, [LOCK, UNLOCK]*, FINI.
 204 */
 205
 206/*
 207 * Python's threads are serialized, so object malloc locking is disabled.
 208 */
 209#define SIMPLELOCK_DECL(lock)	/* simple lock declaration		*/
 210#define SIMPLELOCK_INIT(lock)	/* allocate (if needed) and initialize	*/
 211#define SIMPLELOCK_FINI(lock)	/* free/destroy an existing lock 	*/
 212#define SIMPLELOCK_LOCK(lock)	/* acquire released lock */
 213#define SIMPLELOCK_UNLOCK(lock)	/* release acquired lock */
 214
 215/*
 216 * Basic types
 217 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
 218 */
 219#undef  uchar
 220#define uchar	unsigned char	/* assuming == 8 bits  */
 221
 222#undef  uint
 223#define uint	unsigned int	/* assuming >= 16 bits */
 224
 225#undef  ulong
 226#define ulong	unsigned long	/* assuming >= 32 bits */
 227
 228#undef uptr
 229#define uptr	Py_uintptr_t
 230
 231/* When you say memory, my mind reasons in terms of (pointers to) blocks */
 232typedef uchar block;
 233
 234/* Pool for small blocks. */
 235struct pool_header {
 236	union { block *_padding;
 237		uint count; } ref;	/* number of allocated blocks    */
 238	block *freeblock;		/* pool's free list head         */
 239	struct pool_header *nextpool;	/* next pool of this size class  */
 240	struct pool_header *prevpool;	/* previous pool       ""        */
 241	uint arenaindex;		/* index into arenas of base adr */
 242	uint szidx;			/* block size class index	 */
 243	uint nextoffset;		/* bytes to virgin block	 */
 244	uint maxnextoffset;		/* largest valid nextoffset	 */
 245};
 246
 247typedef struct pool_header *poolp;
 248
 249/* Record keeping for arenas. */
 250struct arena_object {
 251	/* The address of the arena, as returned by malloc.  Note that 0
 252	 * will never be returned by a successful malloc, and is used
 253	 * here to mark an arena_object that doesn't correspond to an
 254	 * allocated arena.
 255	 */
 256	uptr address;
 257
 258	/* Pool-aligned pointer to the next pool to be carved off. */
 259	block* pool_address;
 260
 261	/* The number of available pools in the arena:  free pools + never-
 262	 * allocated pools.
 263	 */
 264	uint nfreepools;
 265
 266	/* The total number of pools in the arena, whether or not available. */
 267	uint ntotalpools;
 268
 269	/* Singly-linked list of available pools. */
 270	struct pool_header* freepools;
 271
 272	/* Whenever this arena_object is not associated with an allocated
 273	 * arena, the nextarena member is used to link all unassociated
 274	 * arena_objects in the singly-linked `unused_arena_objects` list.
 275	 * The prevarena member is unused in this case.
 276	 *
 277	 * When this arena_object is associated with an allocated arena
 278	 * with at least one available pool, both members are used in the
 279	 * doubly-linked `usable_arenas` list, which is maintained in
 280	 * increasing order of `nfreepools` values.
 281	 *
 282	 * Else this arena_object is associated with an allocated arena
 283	 * all of whose pools are in use.  `nextarena` and `prevarena`
 284	 * are both meaningless in this case.
 285	 */
 286	struct arena_object* nextarena;
 287	struct arena_object* prevarena;
 288};
 289
 290#undef  ROUNDUP
 291#define ROUNDUP(x)		(((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
 292#define POOL_OVERHEAD		ROUNDUP(sizeof(struct pool_header))
 293
 294#define DUMMY_SIZE_IDX		0xffff	/* size class of newly cached pools */
 295
 296/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
 297#define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
 298
 299/* Return total number of blocks in pool of size index I, as a uint. */
 300#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
 301
 302/*==========================================================================*/
 303
 304/*
 305 * This malloc lock
 306 */
 307SIMPLELOCK_DECL(_malloc_lock)
 308#define LOCK()		SIMPLELOCK_LOCK(_malloc_lock)
 309#define UNLOCK()	SIMPLELOCK_UNLOCK(_malloc_lock)
 310#define LOCK_INIT()	SIMPLELOCK_INIT(_malloc_lock)
 311#define LOCK_FINI()	SIMPLELOCK_FINI(_malloc_lock)
 312
 313/*
 314 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
 315
 316This is involved.  For an index i, usedpools[i+i] is the header for a list of
 317all partially used pools holding small blocks with "size class idx" i. So
 318usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
 31916, and so on:  index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
 320
 321Pools are carved off an arena's highwater mark (an arena_object's pool_address
 322member) as needed.  Once carved off, a pool is in one of three states forever
 323after:
 324
 325used == partially used, neither empty nor full
 326    At least one block in the pool is currently allocated, and at least one
 327    block in the pool is not currently allocated (note this implies a pool
 328    has room for at least two blocks).
 329    This is a pool's initial state, as a pool is created only when malloc
 330    needs space.
 331    The pool holds blocks of a fixed size, and is in the circular list headed
 332    at usedpools[i] (see above).  It's linked to the other used pools of the
 333    same size class via the pool_header's nextpool and prevpool members.
 334    If all but one block is currently allocated, a malloc can cause a
 335    transition to the full state.  If all but one block is not currently
 336    allocated, a free can cause a transition to the empty state.
 337
 338full == all the pool's blocks are currently allocated
 339    On transition to full, a pool is unlinked from its usedpools[] list.
 340    It's not linked to from anything then anymore, and its nextpool and
 341    prevpool members are meaningless until it transitions back to used.
 342    A free of a block in a full pool puts the pool back in the used state.
 343    Then it's linked in at the front of the appropriate usedpools[] list, so
 344    that the next allocation for its size class will reuse the freed block.
 345
 346empty == all the pool's blocks are currently available for allocation
 347    On transition to empty, a pool is unlinked from its usedpools[] list,
 348    and linked to the front of its arena_object's singly-linked freepools list,
 349    via its nextpool member.  The prevpool member has no meaning in this case.
 350    Empty pools have no inherent size class:  the next time a malloc finds
 351    an empty list in usedpools[], it takes the first pool off of freepools.
 352    If the size class needed happens to be the same as the size class the pool
 353    last had, some pool initialization can be skipped.
 354
 355
 356Block Management
 357
 358Blocks within pools are again carved out as needed.  pool->freeblock points to
 359the start of a singly-linked list of free blocks within the pool.  When a
 360block is freed, it's inserted at the front of its pool's freeblock list.  Note
 361that the available blocks in a pool are *not* linked all together when a pool
 362is initialized.  Instead only "the first two" (lowest addresses) blocks are
 363set up, returning the first such block, and setting pool->freeblock to a
 364one-block list holding the second such block.  This is consistent with that
 365pymalloc strives at all levels (arena, pool, and block) never to touch a piece
 366of memory until it's actually needed.
 367
 368So long as a pool is in the used state, we're certain there *is* a block
 369available for allocating, and pool->freeblock is not NULL.  If pool->freeblock
 370points to the end of the free list before we've carved the entire pool into
 371blocks, that means we simply haven't yet gotten to one of the higher-address
 372blocks.  The offset from the pool_header to the start of "the next" virgin
 373block is stored in the pool_header nextoffset member, and the largest value
 374of nextoffset that makes sense is stored in the maxnextoffset member when a
 375pool is initialized.  All the blocks in a pool have been passed out at least
 376once when and only when nextoffset > maxnextoffset.
 377
 378
 379Major obscurity:  While the usedpools vector is declared to have poolp
 380entries, it doesn't really.  It really contains two pointers per (conceptual)
 381poolp entry, the nextpool and prevpool members of a pool_header.  The
 382excruciating initialization code below fools C so that
 383
 384    usedpool[i+i]
 385
 386"acts like" a genuine poolp, but only so long as you only reference its
 387nextpool and prevpool members.  The "- 2*sizeof(block *)" gibberish is
 388compensating for that a pool_header's nextpool and prevpool members
 389immediately follow a pool_header's first two members:
 390
 391	union { block *_padding;
 392		uint count; } ref;
 393	block *freeblock;
 394
 395each of which consume sizeof(block *) bytes.  So what usedpools[i+i] really
 396contains is a fudged-up pointer p such that *if* C believes it's a poolp
 397pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
 398circular list is empty).
 399
 400It's unclear why the usedpools setup is so convoluted.  It could be to
 401minimize the amount of cache required to hold this heavily-referenced table
 402(which only *needs* the two interpool pointer members of a pool_header). OTOH,
 403referencing code has to remember to "double the index" and doing so isn't
 404free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
 405on that C doesn't insert any padding anywhere in a pool_header at or before
 406the prevpool member.
 407**************************************************************************** */
 408
 409#define PTA(x)	((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
 410#define PT(x)	PTA(x), PTA(x)
 411
 412static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
 413	PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
 414#if NB_SMALL_SIZE_CLASSES > 8
 415	, PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
 416#if NB_SMALL_SIZE_CLASSES > 16
 417	, PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
 418#if NB_SMALL_SIZE_CLASSES > 24
 419	, PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
 420#if NB_SMALL_SIZE_CLASSES > 32
 421	, PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
 422#if NB_SMALL_SIZE_CLASSES > 40
 423	, PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
 424#if NB_SMALL_SIZE_CLASSES > 48
 425	, PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
 426#if NB_SMALL_SIZE_CLASSES > 56
 427	, PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
 428#endif /* NB_SMALL_SIZE_CLASSES > 56 */
 429#endif /* NB_SMALL_SIZE_CLASSES > 48 */
 430#endif /* NB_SMALL_SIZE_CLASSES > 40 */
 431#endif /* NB_SMALL_SIZE_CLASSES > 32 */
 432#endif /* NB_SMALL_SIZE_CLASSES > 24 */
 433#endif /* NB_SMALL_SIZE_CLASSES > 16 */
 434#endif /* NB_SMALL_SIZE_CLASSES >  8 */
 435};
 436
 437/*==========================================================================
 438Arena management.
 439
 440`arenas` is a vector of arena_objects.  It contains maxarenas entries, some of
 441which may not be currently used (== they're arena_objects that aren't
 442currently associated with an allocated arena).  Note that arenas proper are
 443separately malloc'ed.
 444
 445Prior to Python 2.5, arenas were never free()'ed.  Starting with Python 2.5,
 446we do try to free() arenas, and use some mild heuristic strategies to increase
 447the likelihood that arenas eventually can be freed.
 448
 449unused_arena_objects
 450
 451    This is a singly-linked list of the arena_objects that are currently not
 452    being used (no arena is associated with them).  Objects are taken off the
 453    head of the list in new_arena(), and are pushed on the head of the list in
 454    PyObject_Free() when the arena is empty.  Key invariant:  an arena_object
 455    is on this list if and only if its .address member is 0.
 456
 457usable_arenas
 458
 459    This is a doubly-linked list of the arena_objects associated with arenas
 460    that have pools available.  These pools are either waiting to be reused,
 461    or have not been used before.  The list is sorted to have the most-
 462    allocated arenas first (ascending order based on the nfreepools member).
 463    This means that the next allocation will come from a heavily used arena,
 464    which gives the nearly empty arenas a chance to be returned to the system.
 465    In my unscientific tests this dramatically improved the number of arenas
 466    that could be freed.
 467
 468Note that an arena_object associated with an arena all of whose pools are
 469currently in use isn't on either list.
 470*/
 471
 472/* Array of objects used to track chunks of memory (arenas). */
 473static struct arena_object* arenas = NULL;
 474/* Number of slots currently allocated in the `arenas` vector. */
 475static uint maxarenas = 0;
 476
 477/* The head of the singly-linked, NULL-terminated list of available
 478 * arena_objects.
 479 */
 480static struct arena_object* unused_arena_objects = NULL;
 481
 482/* The head of the doubly-linked, NULL-terminated at each end, list of
 483 * arena_objects associated with arenas that have pools available.
 484 */
 485static struct arena_object* usable_arenas = NULL;
 486
 487/* How many arena_objects do we initially allocate?
 488 * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the
 489 * `arenas` vector.
 490 */
 491#define INITIAL_ARENA_OBJECTS 16
 492
 493/* Number of arenas allocated that haven't been free()'d. */
 494static size_t narenas_currently_allocated = 0;
 495
 496#ifdef PYMALLOC_DEBUG
 497/* Total number of times malloc() called to allocate an arena. */
 498static size_t ntimes_arena_allocated = 0;
 499/* High water mark (max value ever seen) for narenas_currently_allocated. */
 500static size_t narenas_highwater = 0;
 501#endif
 502
 503/* Allocate a new arena.  If we run out of memory, return NULL.  Else
 504 * allocate a new arena, and return the address of an arena_object
 505 * describing the new arena.  It's expected that the caller will set
 506 * `usable_arenas` to the return value.
 507 */
 508static struct arena_object*
 509new_arena(void)
 510{
 511	struct arena_object* arenaobj;
 512	uint excess;	/* number of bytes above pool alignment */
 513
 514#ifdef PYMALLOC_DEBUG
 515	if (Py_GETENV("PYTHONMALLOCSTATS"))
 516		_PyObject_DebugMallocStats();
 517#endif
 518	if (unused_arena_objects == NULL) {
 519		uint i;
 520		uint numarenas;
 521		size_t nbytes;
 522
 523		/* Double the number of arena objects on each allocation.
 524		 * Note that it's possible for `numarenas` to overflow.
 525		 */
 526		numarenas = maxarenas ? maxarenas << 1 : INITIAL_ARENA_OBJECTS;
 527		if (numarenas <= maxarenas)
 528			return NULL;	/* overflow */
 529#if SIZEOF_SIZE_T <= SIZEOF_INT
 530		if (numarenas > PY_SIZE_MAX / sizeof(*arenas))
 531			return NULL;	/* overflow */
 532#endif
 533		nbytes = numarenas * sizeof(*arenas);
 534		arenaobj = (struct arena_object *)realloc(arenas, nbytes);
 535		if (arenaobj == NULL)
 536			return NULL;
 537		arenas = arenaobj;
 538
 539		/* We might need to fix pointers that were copied.  However,
 540		 * new_arena only gets called when all the pages in the
 541		 * previous arenas are full.  Thus, there are *no* pointers
 542		 * into the old array. Thus, we don't have to worry about
 543		 * invalid pointers.  Just to be sure, some asserts:
 544		 */
 545		assert(usable_arenas == NULL);
 546		assert(unused_arena_objects == NULL);
 547
 548		/* Put the new arenas on the unused_arena_objects list. */
 549		for (i = maxarenas; i < numarenas; ++i) {
 550			arenas[i].address = 0;	/* mark as unassociated */
 551			arenas[i].nextarena = i < numarenas - 1 ?
 552					       &arenas[i+1] : NULL;
 553		}
 554
 555		/* Update globals. */
 556		unused_arena_objects = &arenas[maxarenas];
 557		maxarenas = numarenas;
 558	}
 559
 560	/* Take the next available arena object off the head of the list. */
 561	assert(unused_arena_objects != NULL);
 562	arenaobj = unused_arena_objects;
 563	unused_arena_objects = arenaobj->nextarena;
 564	assert(arenaobj->address == 0);
 565	arenaobj->address = (uptr)malloc(ARENA_SIZE);
 566	if (arenaobj->address == 0) {
 567		/* The allocation failed: return NULL after putting the
 568		 * arenaobj back.
 569		 */
 570		arenaobj->nextarena = unused_arena_objects;
 571		unused_arena_objects = arenaobj;
 572		return NULL;
 573	}
 574
 575	++narenas_currently_allocated;
 576#ifdef PYMALLOC_DEBUG
 577	++ntimes_arena_allocated;
 578	if (narenas_currently_allocated > narenas_highwater)
 579		narenas_highwater = narenas_currently_allocated;
 580#endif
 581	arenaobj->freepools = NULL;
 582	/* pool_address <- first pool-aligned address in the arena
 583	   nfreepools <- number of whole pools that fit after alignment */
 584	arenaobj->pool_address = (block*)arenaobj->address;
 585	arenaobj->nfreepools = ARENA_SIZE / POOL_SIZE;
 586	assert(POOL_SIZE * arenaobj->nfreepools == ARENA_SIZE);
 587	excess = (uint)(arenaobj->address & POOL_SIZE_MASK);
 588	if (excess != 0) {
 589		--arenaobj->nfreepools;
 590		arenaobj->pool_address += POOL_SIZE - excess;
 591	}
 592	arenaobj->ntotalpools = arenaobj->nfreepools;
 593
 594	return arenaobj;
 595}
 596
 597/*
 598Py_ADDRESS_IN_RANGE(P, POOL)
 599
 600Return true if and only if P is an address that was allocated by pymalloc.
 601POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P)
 602(the caller is asked to compute this because the macro expands POOL more than
 603once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a
 604variable and pass the latter to the macro; because Py_ADDRESS_IN_RANGE is
 605called on every alloc/realloc/free, micro-efficiency is important here).
 606
 607Tricky:  Let B be the arena base address associated with the pool, B =
 608arenas[(POOL)->arenaindex].address.  Then P belongs to the arena if and only if
 609
 610	B <= P < B + ARENA_SIZE
 611
 612Subtracting B throughout, this is true iff
 613
 614	0 <= P-B < ARENA_SIZE
 615
 616By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
 617
 618Obscure:  A PyMem "free memory" function can call the pymalloc free or realloc
 619before the first arena has been allocated.  `arenas` is still NULL in that
 620case.  We're relying on that maxarenas is also 0 in that case, so that
 621(POOL)->arenaindex < maxarenas  must be false, saving us from trying to index
 622into a NULL arenas.
 623
 624Details:  given P and POOL, the arena_object corresponding to P is AO =
 625arenas[(POOL)->arenaindex].  Suppose obmalloc controls P.  Then (barring wild
 626stores, etc), POOL is the correct address of P's pool, AO.address is the
 627correct base address of the pool's arena, and P must be within ARENA_SIZE of
 628AO.address.  In addition, AO.address is not 0 (no arena can start at address 0
 629(NULL)).  Therefore Py_ADDRESS_IN_RANGE correctly reports that obmalloc
 630controls P.
 631
 632Now suppose obmalloc does not control P (e.g., P was obtained via a direct
 633call to the system malloc() or realloc()).  (POOL)->arenaindex may be anything
 634in this case -- it may even be uninitialized trash.  If the trash arenaindex
 635is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't
 636control P.
 637
 638Else arenaindex is < maxarena, and AO is read up.  If AO corresponds to an
 639allocated arena, obmalloc controls all the memory in slice AO.address :
 640AO.address+ARENA_SIZE.  By case assumption, P is not controlled by obmalloc,
 641so P doesn't lie in that slice, so the macro correctly reports that P is not
 642controlled by obmalloc.
 643
 644Finally, if P is not controlled by obmalloc and AO corresponds to an unused
 645arena_object (one not currently associated with an allocated arena),
 646AO.address is 0, and the second test in the macro reduces to:
 647
 648	P < ARENA_SIZE
 649
 650If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes
 651that P is not controlled by obmalloc.  However, if P < ARENA_SIZE, this part
 652of the test still passes, and the third clause (AO.address != 0) is necessary
 653to get the correct result:  AO.address is 0 in this case, so the macro
 654correctly reports that P is not controlled by obmalloc (despite that P lies in
 655slice AO.address : AO.address + ARENA_SIZE).
 656
 657Note:  The third (AO.address != 0) clause was added in Python 2.5.  Before
 6582.5, arenas were never free()'ed, and an arenaindex < maxarena always
 659corresponded to a currently-allocated arena, so the "P is not controlled by
 660obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case
 661was impossible.
 662
 663Note that the logic is excruciating, and reading up possibly uninitialized
 664memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex)
 665creates problems for some memory debuggers.  The overwhelming advantage is
 666that this test determines whether an arbitrary address is controlled by
 667obmalloc in a small constant time, independent of the number of arenas
 668obmalloc controls.  Since this test is needed at every entry point, it's
 669extremely desirable that it be this fast.
 670*/
 671#define Py_ADDRESS_IN_RANGE(P, POOL)			\
 672	((POOL)->arenaindex < maxarenas &&		\
 673	 (uptr)(P) - arenas[(POOL)->arenaindex].address < (uptr)ARENA_SIZE && \
 674	 arenas[(POOL)->arenaindex].address != 0)
 675
 676
 677/* This is only useful when running memory debuggers such as
 678 * Purify or Valgrind.  Uncomment to use.
 679 *
 680#define Py_USING_MEMORY_DEBUGGER
 681 */
 682
 683#ifdef Py_USING_MEMORY_DEBUGGER
 684
 685/* Py_ADDRESS_IN_RANGE may access uninitialized memory by design
 686 * This leads to thousands of spurious warnings when using
 687 * Purify or Valgrind.  By making a function, we can easily
 688 * suppress the uninitialized memory reads in this one function.
 689 * So we won't ignore real errors elsewhere.
 690 *
 691 * Disable the macro and use a function.
 692 */
 693
 694#undef Py_ADDRESS_IN_RANGE
 695
 696#if defined(__GNUC__) && ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) || \
 697			  (__GNUC__ >= 4))
 698#define Py_NO_INLINE __attribute__((__noinline__))
 699#else
 700#define Py_NO_INLINE
 701#endif
 702
 703/* Don't make static, to try to ensure this isn't inlined. */
 704int Py_ADDRESS_IN_RANGE(void *P, poolp pool) Py_NO_INLINE;
 705#undef Py_NO_INLINE
 706#endif
 707
 708/*==========================================================================*/
 709
 710/* malloc.  Note that nbytes==0 tries to return a non-NULL pointer, distinct
 711 * from all other currently live pointers.  This may not be possible.
 712 */
 713
 714/*
 715 * The basic blocks are ordered by decreasing execution frequency,
 716 * which minimizes the number of jumps in the most common cases,
 717 * improves branching prediction and instruction scheduling (small
 718 * block allocations typically result in a couple of instructions).
 719 * Unless the optimizer reorders everything, being too smart...
 720 */
 721
 722#undef PyObject_Malloc
 723void *
 724PyObject_Malloc(size_t nbytes)
 725{
 726	block *bp;
 727	poolp pool;
 728	poolp next;
 729	uint size;
 730
 731	/*
 732	 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
 733	 * Most python internals blindly use a signed Py_ssize_t to track
 734	 * things without checking for overflows or negatives.
 735	 * As size_t is unsigned, checking for nbytes < 0 is not required.
 736	 */
 737	if (nbytes > PY_SSIZE_T_MAX)
 738		return NULL;
 739
 740	/*
 741	 * This implicitly redirects malloc(0).
 742	 */
 743	if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
 744		LOCK();
 745		/*
 746		 * Most frequent paths first
 747		 */
 748		size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
 749		pool = usedpools[size + size];
 750		if (pool != pool->nextpool) {
 751			/*
 752			 * There is a used pool for this size class.
 753			 * Pick up the head block of its free list.
 754			 */
 755			++pool->ref.count;
 756			bp = pool->freeblock;
 757			assert(bp != NULL);
 758			if ((pool->freeblock = *(block **)bp) != NULL) {
 759				UNLOCK();
 760				return (void *)bp;
 761			}
 762			/*
 763			 * Reached the end of the free list, try to extend it.
 764			 */
 765			if (pool->nextoffset <= pool->maxnextoffset) {
 766				/* There is room for another block. */
 767				pool->freeblock = (block*)pool +
 768						  pool->nextoffset;
 769				pool->nextoffset += INDEX2SIZE(size);
 770				*(block **)(pool->freeblock) = NULL;
 771				UNLOCK();
 772				return (void *)bp;
 773			}
 774			/* Pool is full, unlink from used pools. */
 775			next = pool->nextpool;
 776			pool = pool->prevpool;
 777			next->prevpool = pool;
 778			pool->nextpool = next;
 779			UNLOCK();
 780			return (void *)bp;
 781		}
 782
 783		/* There isn't a pool of the right size class immediately
 784		 * available:  use a free pool.
 785		 */
 786		if (usable_arenas == NULL) {
 787			/* No arena has a free pool:  allocate a new arena. */
 788#ifdef WITH_MEMORY_LIMITS
 789			if (narenas_currently_allocated >= MAX_ARENAS) {
 790				UNLOCK();
 791				goto redirect;
 792			}
 793#endif
 794			usable_arenas = new_arena();
 795			if (usable_arenas == NULL) {
 796				UNLOCK();
 797				goto redirect;
 798			}
 799			usable_arenas->nextarena =
 800				usable_arenas->prevarena = NULL;
 801		}
 802		assert(usable_arenas->address != 0);
 803
 804		/* Try to get a cached free pool. */
 805		pool = usable_arenas->freepools;
 806		if (pool != NULL) {
 807			/* Unlink from cached pools. */
 808			usable_arenas->freepools = pool->nextpool;
 809
 810			/* This arena already had the smallest nfreepools
 811			 * value, so decreasing nfreepools doesn't change
 812			 * that, and we don't need to rearrange the
 813			 * usable_arenas list.  However, if the arena has
 814			 * become wholly allocated, we need to remove its
 815			 * arena_object from usable_arenas.
 816			 */
 817			--usable_arenas->nfreepools;
 818			if (usable_arenas->nfreepools == 0) {
 819				/* Wholly allocated:  remove. */
 820				assert(usable_arenas->freepools == NULL);
 821				assert(usable_arenas->nextarena == NULL ||
 822				       usable_arenas->nextarena->prevarena ==
 823					   usable_arenas);
 824
 825				usable_arenas = usable_arenas->nextarena;
 826				if (usable_arenas != NULL) {
 827					usable_arenas->prevarena = NULL;
 828					assert(usable_arenas->address != 0);
 829				}
 830			}
 831			else {
 832				/* nfreepools > 0:  it must be that freepools
 833				 * isn't NULL, or that we haven't yet carved
 834				 * off all the arena's pools for the first
 835				 * time.
 836				 */
 837				assert(usable_arenas->freepools != NULL ||
 838				       usable_arenas->pool_address <=
 839				           (block*)usable_arenas->address +
 840				               ARENA_SIZE - POOL_SIZE);
 841			}
 842		init_pool:
 843			/* Frontlink to used pools. */
 844			next = usedpools[size + size]; /* == prev */
 845			pool->nextpool = next;
 846			pool->prevpool = next;
 847			next->nextpool = pool;
 848			next->prevpool = pool;
 849			pool->ref.count = 1;
 850			if (pool->szidx == size) {
 851				/* Luckily, this pool last contained blocks
 852				 * of the same size class, so its header
 853				 * and free list are already initialized.
 854				 */
 855				bp = pool->freeblock;
 856				pool->freeblock = *(block **)bp;
 857				UNLOCK();
 858				return (void *)bp;
 859			}
 860			/*
 861			 * Initialize the pool header, set up the free list to
 862			 * contain just the second block, and return the first
 863			 * block.
 864			 */
 865			pool->szidx = size;
 866			size = INDEX2SIZE(size);
 867			bp = (block *)pool + POOL_OVERHEAD;
 868			pool->nextoffset = POOL_OVERHEAD + (size << 1);
 869			pool->maxnextoffset = POOL_SIZE - size;
 870			pool->freeblock = bp + size;
 871			*(block **)(pool->freeblock) = NULL;
 872			UNLOCK();
 873			return (void *)bp;
 874		}
 875
 876		/* Carve off a new pool. */
 877		assert(usable_arenas->nfreepools > 0);
 878		assert(usable_arenas->freepools == NULL);
 879		pool = (poolp)usable_arenas->pool_address;
 880		assert((block*)pool <= (block*)usable_arenas->address +
 881		                       ARENA_SIZE - POOL_SIZE);
 882		pool->arenaindex = usable_arenas - arenas;
 883		assert(&arenas[pool->arenaindex] == usable_arenas);
 884		pool->szidx = DUMMY_SIZE_IDX;
 885		usable_arenas->pool_address += POOL_SIZE;
 886		--usable_arenas->nfreepools;
 887
 888		if (usable_arenas->nfreepools == 0) {
 889			assert(usable_arenas->nextarena == NULL ||
 890			       usable_arenas->nextarena->prevarena ==
 891			       	   usable_arenas);
 892			/* Unlink the arena:  it is completely allocated. */
 893			usable_arenas = usable_arenas->nextarena;
 894			if (usable_arenas != NULL) {
 895				usable_arenas->prevarena = NULL;
 896				assert(usable_arenas->address != 0);
 897			}
 898		}
 899
 900		goto init_pool;
 901	}
 902
 903        /* The small block allocator ends here. */
 904
 905redirect:
 906	/* Redirect the original request to the underlying (libc) allocator.
 907	 * We jump here on bigger requests, on error in the code above (as a
 908	 * last chance to serve the request) or when the max memory limit
 909	 * has been reached.
 910	 */
 911	if (nbytes == 0)
 912		nbytes = 1;
 913	return (void *)malloc(nbytes);
 914}
 915
 916/* free */
 917
 918#undef PyObject_Free
 919void
 920PyObject_Free(void *p)
 921{
 922	poolp pool;
 923	block *lastfree;
 924	poolp next, prev;
 925	uint size;
 926
 927	if (p == NULL)	/* free(NULL) has no effect */
 928		return;
 929
 930	pool = POOL_ADDR(p);
 931	if (Py_ADDRESS_IN_RANGE(p, pool)) {
 932		/* We allocated this address. */
 933		LOCK();
 934		/* Link p to the start of the pool's freeblock list.  Since
 935		 * the pool had at least the p block outstanding, the pool
 936		 * wasn't empty (so it's already in a usedpools[] list, or
 937		 * was full and is in no list -- it's not in the freeblocks
 938		 * list in any case).
 939		 */
 940		assert(pool->ref.count > 0);	/* else it was empty */
 941		*(block **)p = lastfree = pool->freeblock;
 942		pool->freeblock = (block *)p;
 943		if (lastfree) {
 944			struct arena_object* ao;
 945			uint nf;  /* ao->nfreepools */
 946
 947			/* freeblock wasn't NULL, so the pool wasn't full,
 948			 * and the pool is in a usedpools[] list.
 949			 */
 950			if (--pool->ref.count != 0) {
 951				/* pool isn't empty:  leave it in usedpools */
 952				UNLOCK();
 953				return;
 954			}
 955			/* Pool is now empty:  unlink from usedpools, and
 956			 * link to the front of freepools.  This ensures that
 957			 * previously freed pools will be allocated later
 958			 * (being not referenced, they are perhaps paged out).
 959			 */
 960			next = pool->nextpool;
 961			prev = pool->prevpool;
 962			next->prevpool = prev;
 963			prev->nextpool = next;
 964
 965			/* Link the pool to freepools.  This is a singly-linked
 966			 * list, and pool->prevpool isn't used there.
 967			 */
 968			ao = &arenas[pool->arenaindex];
 969			pool->nextpool = ao->freepools;
 970			ao->freepools = pool;
 971			nf = ++ao->nfreepools;
 972
 973			/* All the rest is arena management.  We just freed
 974			 * a pool, and there are 4 cases for arena mgmt:
 975			 * 1. If all the pools are free, return the arena to
 976			 *    the system free().
 977			 * 2. If this is the only free pool in the arena,
 978			 *    add the arena back to the `usable_arenas` list.
 979			 * 3. If the "next" arena has a smaller count of free
 980			 *    pools, we have to "slide this arena right" to
 981			 *    restore that usable_arenas is sorted in order of
 982			 *    nfreepools.
 983			 * 4. Else there's nothing more to do.
 984			 */
 985			if (nf == ao->ntotalpools) {
 986				/* Case 1.  First unlink ao from usable_arenas.
 987				 */
 988				assert(ao->prevarena == NULL ||
 989				       ao->prevarena->address != 0);
 990				assert(ao ->nextarena == NULL ||
 991				       ao->nextarena->address != 0);
 992
 993				/* Fix the pointer in the prevarena, or the
 994				 * usable_arenas pointer.
 995				 */
 996				if (ao->prevarena == NULL) {
 997					usable_arenas = ao->nextarena;
 998					assert(usable_arenas == NULL ||
 999					       usable_arenas->address != 0);
1000				}
1001				else {
1002					assert(ao->prevarena->nextarena == ao);
1003					ao->prevarena->nextarena =
1004						ao->nextarena;
1005				}
1006				/* Fix the pointer in the nextarena. */
1007				if (ao->nextarena != NULL) {
1008					assert(ao->nextarena->prevarena == ao);
1009					ao->nextarena->prevarena =
1010						ao->prevarena;
1011				}
1012				/* Record that this arena_object slot is
1013				 * available to be reused.
1014				 */
1015				ao->nextarena = unused_arena_objects;
1016				unused_arena_objects = ao;
1017
1018				/* Free the entire arena. */
1019				free((void *)ao->address);
1020				ao->address = 0;	/* mark unassociated */
1021				--narenas_currently_allocated;
1022
1023				UNLOCK();
1024				return;
1025			}
1026			if (nf == 1) {
1027				/* Case 2.  Put ao at the head of
1028				 * usable_arenas.  Note that because
1029				 * ao->nfreepools was 0 before, ao isn't
1030				 * currently on the usable_arenas list.
1031				 */
1032				ao->nextarena = usable_arenas;
1033				ao->prevarena = NULL;
1034				if (usable_arenas)
1035					usable_arenas->prevarena = ao;
1036				usable_arenas = ao;
1037				assert(usable_arenas->address != 0);
1038
1039				UNLOCK();
1040				return;
1041			}
1042			/* If this arena is now out of order, we need to keep
1043			 * the list sorted.  The list is kept sorted so that
1044			 * the "most full" arenas are used first, which allows
1045			 * the nearly empty arenas to be completely freed.  In
1046			 * a few un-scientific tests, it seems like this
1047			 * approach allowed a lot more memory to be freed.
1048			 */
1049			if (ao->nextarena == NULL ||
1050				     nf <= ao->nextarena->nfreepools) {
1051				/* Case 4.  Nothing to do. */
1052				UNLOCK();
1053				return;
1054			}
1055			/* Case 3:  We have to move the arena towards the end
1056			 * of the list, because it has more free pools than
1057			 * the arena to its right.
1058			 * First unlink ao from usable_arenas.
1059			 */
1060			if (ao->prevarena != NULL) {
1061				/* ao isn't at the head of the list */
1062				assert(ao->prevarena->nextarena == ao);
1063				ao->prevarena->nextarena = ao->nextarena;
1064			}
1065			else {
1066				/* ao is at the head of the list */
1067				assert(usable_arenas == ao);
1068				usable_arenas = ao->nextarena;
1069			}
1070			ao->nextarena->prevarena = ao->prevarena;
1071
1072			/* Locate the new insertion point by iterating over
1073			 * the list, using our nextarena pointer.
1074			 */
1075			while (ao->nextarena != NULL &&
1076					nf > ao->nextarena->nfreepools) {
1077				ao->prevarena = ao->nextarena;
1078				ao->nextarena = ao->nextarena->nextarena;
1079			}
1080
1081			/* Insert ao at this point. */
1082			assert(ao->nextarena == NULL ||
1083				ao->prevarena == ao->nextarena->prevarena);
1084			assert(ao->prevarena->nextarena == ao->nextarena);
1085
1086			ao->prevarena->nextarena = ao;
1087			if (ao->nextarena != NULL)
1088				ao->nextarena->prevarena = ao;
1089
1090			/* Verify that the swaps worked. */
1091			assert(ao->nextarena == NULL ||
1092				  nf <= ao->nextarena->nfreepools);
1093			assert(ao->prevarena == NULL ||
1094				  nf > ao->prevarena->nfreepools);
1095			assert(ao->nextarena == NULL ||
1096				ao->nextarena->prevarena == ao);
1097			assert((usable_arenas == ao &&
1098				ao->prevarena == NULL) ||
1099				ao->prevarena->nextarena == ao);
1100
1101			UNLOCK();
1102			return;
1103		}
1104		/* Pool was full, so doesn't currently live in any list:
1105		 * link it to the front of the appropriate usedpools[] list.
1106		 * This mimics LRU pool usage for new allocations and
1107		 * targets optimal filling when several pools contain
1108		 * blocks of the same size class.
1109		 */
1110		--pool->ref.count;
1111		assert(pool->ref.count > 0);	/* else the pool is empty */
1112		size = pool->szidx;
1113		next = usedpools[size + size];
1114		prev = next->prevpool;
1115		/* insert pool before next:   prev <-> pool <-> next */
1116		pool->nextpool = next;
1117		pool->prevpool = prev;
1118		next->prevpool = pool;
1119		prev->nextpool = pool;
1120		UNLOCK();
1121		return;
1122	}
1123
1124	/* We didn't allocate this address. */
1125	free(p);
1126}
1127
1128/* realloc.  If p is NULL, this acts like malloc(nbytes).  Else if nbytes==0,
1129 * then as the Python docs promise, we do not treat this like free(p), and
1130 * return a non-NULL result.
1131 */
1132
1133#undef PyObject_Realloc
1134void *
1135PyObject_Realloc(void *p, size_t nbytes)
1136{
1137	void *bp;
1138	poolp pool;
1139	size_t size;
1140
1141	if (p == NULL)
1142		return PyObject_Malloc(nbytes);
1143
1144	/*
1145	 * Limit ourselves to PY_SSIZE_T_MAX bytes to prevent security holes.
1146	 * Most python internals blindly use a signed Py_ssize_t to track
1147	 * things without checking for overflows or negatives.
1148	 * As size_t is unsigned, checking for nbytes < 0 is not required.
1149	 */
1150	if (nbytes > PY_SSIZE_T_MAX)
1151		return NULL;
1152
1153	pool = POOL_ADDR(p);
1154	if (Py_ADDRESS_IN_RANGE(p, pool)) {
1155		/* We're in charge of this block */
1156		size = INDEX2SIZE(pool->szidx);
1157		if (nbytes <= size) {
1158			/* The block is staying the same or shrinking.  If
1159			 * it's shrinking, there's a tradeoff:  it costs
1160			 * cycles to copy the block to a smaller size class,
1161			 * but it wastes memory not to copy it.  The
1162			 * compromise here is to copy on shrink only if at
1163			 * least 25% of size can be shaved off.
1164			 */
1165			if (4 * nbytes > 3 * size) {
1166				/* It's the same,
1167				 * or shrinking and new/old > 3/4.
1168				 */
1169				return p;
1170			}
1171			size = nbytes;
1172		}
1173		bp = PyObject_Malloc(nbytes);
1174		if (bp != NULL) {
1175			memcpy(bp, p, size);
1176			PyObject_Free(p);
1177		}
1178		return bp;
1179	}
1180	/* We're not managing this block.  If nbytes <=
1181	 * SMALL_REQUEST_THRESHOLD, it's tempting to try to take over this
1182	 * block.  However, if we do, we need to copy the valid data from
1183	 * the C-managed block to one of our blocks, and there's no portable
1184	 * way to know how much of the memory space starting at p is valid.
1185	 * As bug 1185883 pointed out the hard way, it's possible that the
1186	 * C-managed block is "at the end" of allocated VM space, so that
1187	 * a memory fault can occur if we try to copy nbytes bytes starting
1188	 * at p.  Instead we punt:  let C continue to manage this block.
1189         */
1190	if (nbytes)
1191		return realloc(p, nbytes);
1192	/* C doesn't define the result of realloc(p, 0) (it may or may not
1193	 * return NULL then), but Python's docs promise that nbytes==0 never
1194	 * returns NULL.  We don't pass 0 to realloc(), to avoid that endcase
1195	 * to begin with.  Even then, we can't be sure that realloc() won't
1196	 * return NULL.
1197	 */
1198	bp = realloc(p, 1);
1199   	return bp ? bp : p;
1200}
1201
1202#else	/* ! WITH_PYMALLOC */
1203
1204/*==========================================================================*/
1205/* pymalloc not enabled:  Redirect the entry points to malloc.  These will
1206 * only be used by extensions that are compiled with pymalloc enabled. */
1207
1208void *
1209PyObject_Malloc(size_t n)
1210{
1211	return PyMem_MALLOC(n);
1212}
1213
1214void *
1215PyObject_Realloc(void *p, size_t n)
1216{
1217	return PyMem_REALLOC(p, n);
1218}
1219
1220void
1221PyObject_Free(void *p)
1222{
1223	PyMem_FREE(p);
1224}
1225#endif /* WITH_PYMALLOC */
1226
1227#ifdef PYMALLOC_DEBUG
1228/*==========================================================================*/
1229/* A x-platform debugging allocator.  This doesn't manage memory directly,
1230 * it wraps a real allocator, adding extra debugging info to the memory blocks.
1231 */
1232
1233/* Special bytes broadcast into debug memory blocks at appropriate times.
1234 * Strings of these are unlikely to be valid addresses, floats, ints or
1235 * 7-bit ASCII.
1236 */
1237#undef CLEANBYTE
1238#undef DEADBYTE
1239#undef FORBIDDENBYTE
1240#define CLEANBYTE      0xCB    /* clean (newly allocated) memory */
1241#define DEADBYTE       0xDB    /* dead (newly freed) memory */
1242#define FORBIDDENBYTE  0xFB    /* untouchable bytes at each end of a block */
1243
1244static size_t serialno = 0;	/* incremented on each debug {m,re}alloc */
1245
1246/* serialno is always incremented via calling this routine.  The point is
1247 * to supply a single place to set a breakpoint.
1248 */
1249static void
1250bumpserialno(void)
1251{
1252	++serialno;
1253}
1254
1255#define SST SIZEOF_SIZE_T
1256
1257/* Read sizeof(size_t) bytes at p as a big-endian size_t. */
1258static size_t
1259read_size_t(const void *p)
1260{
1261	const uchar *q = (const uchar *)p;
1262	size_t result = *q++;
1263	int i;
1264
1265	for (i = SST; --i > 0; ++q)
1266		result = (result << 8) | *q;
1267	return result;
1268}
1269
1270/* Write n as a big-endian size_t, MSB at address p, LSB at
1271 * p + sizeof(size_t) - 1.
1272 */
1273static void
1274write_size_t(void *p, size_t n)
1275{
1276	uchar *q = (uchar *)p + SST - 1;
1277	int i;
1278
1279	for (i = SST; --i >= 0; --q) {
1280		*q = (uchar)(n & 0xff);
1281		n >>= 8;
1282	}
1283}
1284
1285#ifdef Py_DEBUG
1286/* Is target in the list?  The list is traversed via the nextpool pointers.
1287 * The list may be NULL-terminated, or circular.  Return 1 if target is in
1288 * list, else 0.
1289 */
1290static int
1291pool_is_in_list(const poolp target, poolp list)
1292{
1293	poolp origlist = list;
1294	assert(target != NULL);
1295	if (list == NULL)
1296		return 0;
1297	do {
1298		if (target == list)
1299			return 1;
1300		list = list->nextpool;
1301	} while (list != NULL && list != origlist);
1302	return 0;
1303}
1304
1305#else
1306#define pool_is_in_list(X, Y) 1
1307
1308#endif	/* Py_DEBUG */
1309
1310/* Let S = sizeof(size_t).  The debug malloc asks for 4*S extra bytes and
1311   fills them with useful stuff, here calling the underlying malloc's result p:
1312
1313p[0: S]
1314    Number of bytes originally asked for.  This is a size_t, big-endian (easier
1315    to read in a memory dump).
1316p[S: 2*S]
1317    Copies of FORBIDDENBYTE.  Used to catch under- writes and reads.
1318p[2*S: 2*S+n]
1319    The requested memory, filled with copies of CLEANBYTE.
1320    Used to catch reference to uninitialized memory.
1321    &p[2*S] is returned.  Note that this is 8-byte aligned if pymalloc
1322    handled the request itself.
1323p[2*S+n: 2*S+n+S]
1324    Copies of FORBIDDENBYTE.  Used to catch over- writes and reads.
1325p[2*S+n+S: 2*S+n+2*S]
1326    A serial number, incremented by 1 on each call to _PyObject_DebugMalloc
1327    and _PyObject_DebugRealloc.
1328    This is a big-endian size_t.
1329    If "bad memory" is detected later, the serial number gives an
1330    excellent way to set a breakpoint on the next run, to capture the
1331    instant at which this block was passed out.
1332*/
1333
1334void *
1335_PyObject_DebugMalloc(size_t nbytes)
1336{
1337	uchar *p;	/* base address of malloc'ed block */
1338	uchar *tail;	/* p + 2*SST + nbytes == pointer to tail pad bytes */
1339	size_t total;	/* nbytes + 4*SST */
1340
1341	bumpserialno();
1342	total = nbytes + 4*SST;
1343	if (total < nbytes)
1344		/* overflow:  can't represent total as a size_t */
1345		return NULL;
1346
1347	p = (uchar *)PyObject_Malloc(total);
1348	if (p == NULL)
1349		return NULL;
1350
1351	write_size_t(p, nbytes);
1352	memset(p + SST, FORBIDDENBYTE, SST);
1353
1354	if (nbytes > 0)
1355		memset(p + 2*SST, CLEANBYTE, nbytes);
1356
1357	tail = p + 2*SST + nbytes;
1358	memset(tail, FORBIDDENBYTE, SST);
1359	write_size_t(tail + SST, serialno);
1360
1361	return p + 2*SST;
1362}
1363
1364/* The debug free first checks the 2*SST bytes on each end for sanity (in
1365   particular, that the FORBIDDENBYTEs are still intact).
1366   Then fills the original bytes with DEADBYTE.
1367   Then calls the underlying free.
1368*/
1369void
1370_PyObject_DebugFree(void *p)
1371{
1372	uchar *q = (uchar *)p - 2*SST;  /* address returned from malloc */
1373	size_t nbytes;
1374
1375	if (p == NULL)
1376		return;
1377	_PyObject_DebugCheckAddress(p);
1378	nbytes = read_size_t(q);
1379	if (nbytes > 0)
1380		memset(q, DEADBYTE, nbytes);
1381	PyObject_Free(q);
1382}
1383
1384void *
1385_PyObject_DebugRealloc(void *p, size_t nbytes)
1386{
1387	uchar *q = (uchar *)p;
1388	uchar *tail;
1389	size_t total;	/* nbytes + 4*SST */
1390	size_t original_nbytes;
1391	int i;
1392
1393	if (p == NULL)
1394		return _PyObject_DebugMalloc(nbytes);
1395
1396	_PyObject_DebugCheckAddress(p);
1397	bumpserialno();
1398	original_nbytes = read_size_t(q - 2*SST);
1399	total = nbytes + 4*SST;
1400	if (total < nbytes)
1401		/* overflow:  can't represent total as a size_t */
1402		return NULL;
1403
1404	if (nbytes < original_nbytes) {
1405		/* shrinking:  mark old extra memory dead */
1406		memset(q + nbytes, DEADBYTE, original_nbytes - nbytes);
1407	}
1408
1409	/* Resize and add decorations. */
1410	q = (uchar *)PyObject_Realloc(q - 2*SST, total);
1411	if (q == NULL)
1412		return NULL;
1413
1414	write_size_t(q, nbytes);
1415	for (i = 0; i < SST; ++i)
1416		assert(q[SST + i] == FORBIDDENBYTE);
1417	q += 2*SST;
1418	tail = q + nbytes;
1419	memset(tail, FORBIDDENBYTE, SST);
1420	write_size_t(tail + SST, serialno);
1421
1422	if (nbytes > original_nbytes) {
1423		/* growing:  mark new extra memory clean */
1424		memset(q + original_nbytes, CLEANBYTE,
1425			nbytes - original_nbytes);
1426	}
1427
1428	return q;
1429}
1430
1431/* Check the forbidden bytes on both ends of the memory allocated for p.
1432 * If anything is wrong,…

Large files files are truncated, but you can click here to view the full file