blob: fba03ea6bb688fb0c81af120b98edeea339ca32b [file] [log] [blame]
Tim Peters1221c0a2002-03-23 00:20:15 +00001#include "Python.h"
2
3#ifdef WITH_PYMALLOC
4
Neil Schemenauera35c6882001-02-27 04:45:05 +00005/* An object allocator for Python.
6
7 Here is an introduction to the layers of the Python memory architecture,
8 showing where the object allocator is actually used (layer +2), It is
9 called for every object allocation and deallocation (PyObject_New/Del),
10 unless the object-specific allocators implement a proprietary allocation
11 scheme (ex.: ints use a simple free list). This is also the place where
12 the cyclic garbage collector operates selectively on container objects.
13
14
15 Object-specific allocators
16 _____ ______ ______ ________
17 [ int ] [ dict ] [ list ] ... [ string ] Python core |
18+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
19 _______________________________ | |
20 [ Python's object allocator ] | |
21+2 | ####### Object memory ####### | <------ Internal buffers ------> |
22 ______________________________________________________________ |
23 [ Python's raw memory allocator (PyMem_ API) ] |
24+1 | <----- Python memory (under PyMem manager's control) ------> | |
25 __________________________________________________________________
26 [ Underlying general-purpose allocator (ex: C library malloc) ]
27 0 | <------ Virtual memory allocated for the python process -------> |
28
29 =========================================================================
30 _______________________________________________________________________
31 [ OS-specific Virtual Memory Manager (VMM) ]
32-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
33 __________________________________ __________________________________
34 [ ] [ ]
35-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
36
37*/
38/*==========================================================================*/
39
40/* A fast, special-purpose memory allocator for small blocks, to be used
41 on top of a general-purpose malloc -- heavily based on previous art. */
42
43/* Vladimir Marangozov -- August 2000 */
44
45/*
46 * "Memory management is where the rubber meets the road -- if we do the wrong
47 * thing at any level, the results will not be good. And if we don't make the
48 * levels work well together, we are in serious trouble." (1)
49 *
50 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
51 * "Dynamic Storage Allocation: A Survey and Critical Review",
52 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
53 */
54
55/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
Neil Schemenauera35c6882001-02-27 04:45:05 +000056
57/*==========================================================================*/
58
59/*
Neil Schemenauera35c6882001-02-27 04:45:05 +000060 * Allocation strategy abstract:
61 *
62 * For small requests, the allocator sub-allocates <Big> blocks of memory.
63 * Requests greater than 256 bytes are routed to the system's allocator.
Tim Petersce7fb9b2002-03-23 00:28:57 +000064 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000065 * Small requests are grouped in size classes spaced 8 bytes apart, due
66 * to the required valid alignment of the returned address. Requests of
67 * a particular size are serviced from memory pools of 4K (one VMM page).
68 * Pools are fragmented on demand and contain free lists of blocks of one
69 * particular size class. In other words, there is a fixed-size allocator
70 * for each size class. Free pools are shared by the different allocators
71 * thus minimizing the space reserved for a particular size class.
72 *
73 * This allocation strategy is a variant of what is known as "simple
74 * segregated storage based on array of free lists". The main drawback of
75 * simple segregated storage is that we might end up with lot of reserved
76 * memory for the different free lists, which degenerate in time. To avoid
77 * this, we partition each free list in pools and we share dynamically the
78 * reserved space between all free lists. This technique is quite efficient
79 * for memory intensive programs which allocate mainly small-sized blocks.
80 *
81 * For small requests we have the following table:
82 *
83 * Request in bytes Size of allocated block Size class idx
84 * ----------------------------------------------------------------
85 * 1-8 8 0
86 * 9-16 16 1
87 * 17-24 24 2
88 * 25-32 32 3
89 * 33-40 40 4
90 * 41-48 48 5
91 * 49-56 56 6
92 * 57-64 64 7
93 * 65-72 72 8
94 * ... ... ...
95 * 241-248 248 30
96 * 249-256 256 31
Tim Petersce7fb9b2002-03-23 00:28:57 +000097 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000098 * 0, 257 and up: routed to the underlying allocator.
99 */
100
101/*==========================================================================*/
102
103/*
104 * -- Main tunable settings section --
105 */
106
107/*
108 * Alignment of addresses returned to the user. 8-bytes alignment works
109 * on most current architectures (with 32-bit or 64-bit address busses).
110 * The alignment value is also used for grouping small requests in size
111 * classes spaced ALIGNMENT bytes apart.
112 *
113 * You shouldn't change this unless you know what you are doing.
114 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000115#define ALIGNMENT 8 /* must be 2^N */
116#define ALIGNMENT_SHIFT 3
117#define ALIGNMENT_MASK (ALIGNMENT - 1)
118
Tim Peterse70ddf32002-04-05 04:32:29 +0000119/* Return the number of bytes in size class I, as a uint. */
120#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
121
Neil Schemenauera35c6882001-02-27 04:45:05 +0000122/*
123 * Max size threshold below which malloc requests are considered to be
124 * small enough in order to use preallocated memory pools. You can tune
125 * this value according to your application behaviour and memory needs.
126 *
127 * The following invariants must hold:
128 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
Tim Petersd97a1c02002-03-30 06:09:22 +0000129 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
Neil Schemenauera35c6882001-02-27 04:45:05 +0000130 *
131 * Although not required, for better performance and space efficiency,
132 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
133 */
Tim Petersd97a1c02002-03-30 06:09:22 +0000134#define SMALL_REQUEST_THRESHOLD 256
Neil Schemenauera35c6882001-02-27 04:45:05 +0000135#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
136
137/*
138 * The system's VMM page size can be obtained on most unices with a
139 * getpagesize() call or deduced from various header files. To make
140 * things simpler, we assume that it is 4K, which is OK for most systems.
141 * It is probably better if this is the native page size, but it doesn't
142 * have to be.
143 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000144#define SYSTEM_PAGE_SIZE (4 * 1024)
145#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
146
147/*
148 * Maximum amount of memory managed by the allocator for small requests.
149 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000150#ifdef WITH_MEMORY_LIMITS
151#ifndef SMALL_MEMORY_LIMIT
152#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
153#endif
154#endif
155
156/*
157 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
158 * on a page boundary. This is a reserved virtual address space for the
159 * current process (obtained through a malloc call). In no way this means
160 * that the memory arenas will be used entirely. A malloc(<Big>) is usually
161 * an address range reservation for <Big> bytes, unless all pages within this
162 * space are referenced subsequently. So malloc'ing big blocks and not using
163 * them does not mean "wasting memory". It's an addressable range wastage...
164 *
165 * Therefore, allocating arenas with malloc is not optimal, because there is
166 * some address space wastage, but this is the most portable way to request
Tim Petersd97a1c02002-03-30 06:09:22 +0000167 * memory from the system across various platforms.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000168 */
Tim Peters3c83df22002-03-30 07:04:41 +0000169#define ARENA_SIZE (256 << 10) /* 256KB */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000170
171#ifdef WITH_MEMORY_LIMITS
172#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
173#endif
174
175/*
176 * Size of the pools used for small blocks. Should be a power of 2,
Tim Petersc2ce91a2002-03-30 21:36:04 +0000177 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000178 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000179#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
180#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
Neil Schemenauera35c6882001-02-27 04:45:05 +0000181
182/*
183 * -- End of tunable settings section --
184 */
185
186/*==========================================================================*/
187
188/*
189 * Locking
190 *
191 * To reduce lock contention, it would probably be better to refine the
192 * crude function locking with per size class locking. I'm not positive
193 * however, whether it's worth switching to such locking policy because
194 * of the performance penalty it might introduce.
195 *
196 * The following macros describe the simplest (should also be the fastest)
197 * lock object on a particular platform and the init/fini/lock/unlock
198 * operations on it. The locks defined here are not expected to be recursive
199 * because it is assumed that they will always be called in the order:
200 * INIT, [LOCK, UNLOCK]*, FINI.
201 */
202
203/*
204 * Python's threads are serialized, so object malloc locking is disabled.
205 */
206#define SIMPLELOCK_DECL(lock) /* simple lock declaration */
207#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
208#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
209#define SIMPLELOCK_LOCK(lock) /* acquire released lock */
210#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
211
212/*
213 * Basic types
214 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
215 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000216#undef uchar
217#define uchar unsigned char /* assuming == 8 bits */
218
Neil Schemenauera35c6882001-02-27 04:45:05 +0000219#undef uint
220#define uint unsigned int /* assuming >= 16 bits */
221
222#undef ulong
223#define ulong unsigned long /* assuming >= 32 bits */
224
Tim Petersd97a1c02002-03-30 06:09:22 +0000225#undef uptr
226#define uptr Py_uintptr_t
227
Neil Schemenauera35c6882001-02-27 04:45:05 +0000228/* When you say memory, my mind reasons in terms of (pointers to) blocks */
229typedef uchar block;
230
Tim Peterse70ddf32002-04-05 04:32:29 +0000231/* Pool for small blocks. */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000232struct pool_header {
Tim Petersb2336522001-03-11 18:36:13 +0000233 union { block *_padding;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000234 uint count; } ref; /* number of allocated blocks */
235 block *freeblock; /* pool's free list head */
236 struct pool_header *nextpool; /* next pool of this size class */
237 struct pool_header *prevpool; /* previous pool "" */
Tim Peters1d99af82002-03-30 10:35:09 +0000238 uint arenaindex; /* index into arenas of base adr */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000239 uint szidx; /* block size class index */
Tim Peterse70ddf32002-04-05 04:32:29 +0000240 uint nextoffset; /* bytes to virgin block */
241 uint maxnextoffset; /* largest valid nextoffset */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000242};
243
244typedef struct pool_header *poolp;
245
246#undef ROUNDUP
247#define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
248#define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
249
250#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
251
Tim Petersd97a1c02002-03-30 06:09:22 +0000252/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
Tim Peterse70ddf32002-04-05 04:32:29 +0000253#define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
254
Tim Peters16bcb6b2002-04-05 05:45:31 +0000255/* Return total number of blocks in pool of size index I, as a uint. */
256#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
Tim Petersd97a1c02002-03-30 06:09:22 +0000257
Neil Schemenauera35c6882001-02-27 04:45:05 +0000258/*==========================================================================*/
259
260/*
261 * This malloc lock
262 */
Tim Petersb2336522001-03-11 18:36:13 +0000263SIMPLELOCK_DECL(_malloc_lock);
264#define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
265#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
266#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
267#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000268
269/*
Tim Peters1e16db62002-03-31 01:05:22 +0000270 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
271
272This is involved. For an index i, usedpools[i+i] is the header for a list of
273all partially used pools holding small blocks with "size class idx" i. So
274usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
27516, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
276
Tim Peters338e0102002-04-01 19:23:44 +0000277Pools are carved off the current arena highwater mark (file static arenabase)
278as needed. Once carved off, a pool is in one of three states forever after:
Tim Peters1e16db62002-03-31 01:05:22 +0000279
Tim Peters338e0102002-04-01 19:23:44 +0000280used == partially used, neither empty nor full
281 At least one block in the pool is currently allocated, and at least one
282 block in the pool is not currently allocated (note this implies a pool
283 has room for at least two blocks).
284 This is a pool's initial state, as a pool is created only when malloc
285 needs space.
286 The pool holds blocks of a fixed size, and is in the circular list headed
287 at usedpools[i] (see above). It's linked to the other used pools of the
288 same size class via the pool_header's nextpool and prevpool members.
289 If all but one block is currently allocated, a malloc can cause a
290 transition to the full state. If all but one block is not currently
291 allocated, a free can cause a transition to the empty state.
Tim Peters1e16db62002-03-31 01:05:22 +0000292
Tim Peters338e0102002-04-01 19:23:44 +0000293full == all the pool's blocks are currently allocated
294 On transition to full, a pool is unlinked from its usedpools[] list.
295 It's not linked to from anything then anymore, and its nextpool and
296 prevpool members are meaningless until it transitions back to used.
297 A free of a block in a full pool puts the pool back in the used state.
298 Then it's linked in at the front of the appropriate usedpools[] list, so
299 that the next allocation for its size class will reuse the freed block.
300
301empty == all the pool's blocks are currently available for allocation
302 On transition to empty, a pool is unlinked from its usedpools[] list,
303 and linked to the front of the (file static) singly-linked freepools list,
304 via its nextpool member. The prevpool member has no meaning in this case.
305 Empty pools have no inherent size class: the next time a malloc finds
306 an empty list in usedpools[], it takes the first pool off of freepools.
307 If the size class needed happens to be the same as the size class the pool
Tim Peterse70ddf32002-04-05 04:32:29 +0000308 last had, some pool initialization can be skipped.
Tim Peters338e0102002-04-01 19:23:44 +0000309
310
311Block Management
312
313Blocks within pools are again carved out as needed. pool->freeblock points to
314the start of a singly-linked list of free blocks within the pool. When a
315block is freed, it's inserted at the front of its pool's freeblock list. Note
316that the available blocks in a pool are *not* linked all together when a pool
Tim Peterse70ddf32002-04-05 04:32:29 +0000317is initialized. Instead only "the first two" (lowest addresses) blocks are
318set up, returning the first such block, and setting pool->freeblock to a
319one-block list holding the second such block. This is consistent with that
320pymalloc strives at all levels (arena, pool, and block) never to touch a piece
321of memory until it's actually needed.
322
323So long as a pool is in the used state, we're certain there *is* a block
324available for allocating. If pool->freeblock is NULL then, that means we
325simply haven't yet gotten to one of the higher-address blocks. The offset
326from the pool_header to the start of "the next" virgin block is stored in
327the pool_header nextoffset member, and the largest value of nextoffset that
328makes sense is stored in the maxnextoffset member when a pool is initialized.
Tim Petersffdd22f2002-04-05 06:24:54 +0000329All the blocks in a pool have been passed out at least once when and only
330when nextoffset > maxnextoffset.
Tim Peters338e0102002-04-01 19:23:44 +0000331
Tim Peters1e16db62002-03-31 01:05:22 +0000332
333Major obscurity: While the usedpools vector is declared to have poolp
334entries, it doesn't really. It really contains two pointers per (conceptual)
335poolp entry, the nextpool and prevpool members of a pool_header. The
336excruciating initialization code below fools C so that
337
338 usedpool[i+i]
339
340"acts like" a genuine poolp, but only so long as you only reference its
341nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
342compensating for that a pool_header's nextpool and prevpool members
343immediately follow a pool_header's first two members:
344
345 union { block *_padding;
346 uint count; } ref;
347 block *freeblock;
348
349each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
350contains is a fudged-up pointer p such that *if* C believes it's a poolp
351pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
352circular list is empty).
353
354It's unclear why the usedpools setup is so convoluted. It could be to
355minimize the amount of cache required to hold this heavily-referenced table
356(which only *needs* the two interpool pointer members of a pool_header). OTOH,
357referencing code has to remember to "double the index" and doing so isn't
358free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
359on that C doesn't insert any padding anywhere in a pool_header at or before
360the prevpool member.
361**************************************************************************** */
362
Neil Schemenauera35c6882001-02-27 04:45:05 +0000363#define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
364#define PT(x) PTA(x), PTA(x)
365
366static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
367 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
368#if NB_SMALL_SIZE_CLASSES > 8
369 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
370#if NB_SMALL_SIZE_CLASSES > 16
371 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
372#if NB_SMALL_SIZE_CLASSES > 24
373 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
374#if NB_SMALL_SIZE_CLASSES > 32
375 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
376#if NB_SMALL_SIZE_CLASSES > 40
377 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
378#if NB_SMALL_SIZE_CLASSES > 48
379 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
380#if NB_SMALL_SIZE_CLASSES > 56
381 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
382#endif /* NB_SMALL_SIZE_CLASSES > 56 */
383#endif /* NB_SMALL_SIZE_CLASSES > 48 */
384#endif /* NB_SMALL_SIZE_CLASSES > 40 */
385#endif /* NB_SMALL_SIZE_CLASSES > 32 */
386#endif /* NB_SMALL_SIZE_CLASSES > 24 */
387#endif /* NB_SMALL_SIZE_CLASSES > 16 */
388#endif /* NB_SMALL_SIZE_CLASSES > 8 */
389};
390
391/*
392 * Free (cached) pools
393 */
394static poolp freepools = NULL; /* free list for cached pools */
395
Tim Petersd97a1c02002-03-30 06:09:22 +0000396/*==========================================================================*/
397/* Arena management. */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000398
Tim Petersd97a1c02002-03-30 06:09:22 +0000399/* arenas is a vector of arena base addresses, in order of allocation time.
400 * arenas currently contains narenas entries, and has space allocated
401 * for at most maxarenas entries.
402 *
403 * CAUTION: See the long comment block about thread safety in new_arena():
404 * the code currently relies in deep ways on that this vector only grows,
405 * and only grows by appending at the end. For now we never return an arena
406 * to the OS.
407 */
Tim Petersc2ce91a2002-03-30 21:36:04 +0000408static uptr *volatile arenas = NULL; /* the pointer itself is volatile */
409static volatile uint narenas = 0;
Tim Peters1d99af82002-03-30 10:35:09 +0000410static uint maxarenas = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000411
Tim Peters3c83df22002-03-30 07:04:41 +0000412/* Number of pools still available to be allocated in the current arena. */
413static uint nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000414
Tim Peters3c83df22002-03-30 07:04:41 +0000415/* Free space start address in current arena. This is pool-aligned. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000416static block *arenabase = NULL;
417
418#if 0
419static ulong wasmine = 0;
420static ulong wasntmine = 0;
421
422static void
423dumpem(void *ptr)
424{
425 if (ptr)
426 printf("inserted new arena at %08x\n", ptr);
Tim Peters1d99af82002-03-30 10:35:09 +0000427 printf("# arenas %u\n", narenas);
Tim Petersd97a1c02002-03-30 06:09:22 +0000428 printf("was mine %lu wasn't mine %lu\n", wasmine, wasntmine);
429}
430#define INCMINE ++wasmine
431#define INCTHEIRS ++wasntmine
432
433#else
434#define dumpem(ptr)
435#define INCMINE
436#define INCTHEIRS
437#endif
438
439/* Allocate a new arena and return its base address. If we run out of
440 * memory, return NULL.
441 */
442static block *
443new_arena(void)
444{
Tim Peters3c83df22002-03-30 07:04:41 +0000445 uint excess; /* number of bytes above pool alignment */
Tim Peters84c1b972002-04-04 04:44:32 +0000446 block *bp = (block *)malloc(ARENA_SIZE);
Tim Petersd97a1c02002-03-30 06:09:22 +0000447 if (bp == NULL)
448 return NULL;
449
Tim Peters3c83df22002-03-30 07:04:41 +0000450 /* arenabase <- first pool-aligned address in the arena
451 nfreepools <- number of whole pools that fit after alignment */
452 arenabase = bp;
453 nfreepools = ARENA_SIZE / POOL_SIZE;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000454 assert(POOL_SIZE * nfreepools == ARENA_SIZE);
Tim Peters3c83df22002-03-30 07:04:41 +0000455 excess = (uint)bp & POOL_SIZE_MASK;
456 if (excess != 0) {
457 --nfreepools;
458 arenabase += POOL_SIZE - excess;
459 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000460
461 /* Make room for a new entry in the arenas vector. */
462 if (arenas == NULL) {
Tim Petersc2ce91a2002-03-30 21:36:04 +0000463 assert(narenas == 0 && maxarenas == 0);
Tim Peters84c1b972002-04-04 04:44:32 +0000464 arenas = (uptr *)malloc(16 * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000465 if (arenas == NULL)
466 goto error;
467 maxarenas = 16;
Tim Petersd97a1c02002-03-30 06:09:22 +0000468 }
469 else if (narenas == maxarenas) {
470 /* Grow arenas. Don't use realloc: if this fails, we
471 * don't want to lose the base addresses we already have.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000472 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000473 * Exceedingly subtle: Someone may be calling the pymalloc
474 * free via PyMem_{DEL, Del, FREE, Free} without holding the
475 *.GIL. Someone else may simultaneously be calling the
476 * pymalloc malloc while holding the GIL via, e.g.,
477 * PyObject_New. Now the pymalloc free may index into arenas
478 * for an address check, while the pymalloc malloc calls
479 * new_arena and we end up here to grow a new arena *and*
480 * grow the arenas vector. If the value for arenas pymalloc
481 * free picks up "vanishes" during this resize, anything may
482 * happen, and it would be an incredibly rare bug. Therefore
483 * the code here takes great pains to make sure that, at every
484 * moment, arenas always points to an intact vector of
485 * addresses. It doesn't matter whether arenas points to a
486 * wholly up-to-date vector when pymalloc free checks it in
487 * this case, because the only legal (and that even this is
488 * legal is debatable) way to call PyMem_{Del, etc} while not
489 * holding the GIL is if the memory being released is not
490 * object memory, i.e. if the address check in pymalloc free
491 * is supposed to fail. Having an incomplete vector can't
492 * make a supposed-to-fail case succeed by mistake (it could
493 * only make a supposed-to-succeed case fail by mistake).
Tim Petersc2ce91a2002-03-30 21:36:04 +0000494 *
495 * In addition, without a lock we can't know for sure when
496 * an old vector is no longer referenced, so we simply let
497 * old vectors leak.
498 *
499 * And on top of that, since narenas and arenas can't be
500 * changed as-a-pair atomically without a lock, we're also
501 * careful to declare them volatile and ensure that we change
502 * arenas first. This prevents another thread from picking
503 * up an narenas value too large for the arenas value it
504 * reads up (arenas never shrinks).
505 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000506 * Read the above 50 times before changing anything in this
507 * block.
508 */
Tim Peters1d99af82002-03-30 10:35:09 +0000509 uptr *p;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000510 uint newmax = maxarenas << 1;
Tim Peters1d99af82002-03-30 10:35:09 +0000511 if (newmax <= maxarenas) /* overflow */
512 goto error;
Tim Peters84c1b972002-04-04 04:44:32 +0000513 p = (uptr *)malloc(newmax * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000514 if (p == NULL)
515 goto error;
516 memcpy(p, arenas, narenas * sizeof(*arenas));
Tim Petersc2ce91a2002-03-30 21:36:04 +0000517 arenas = p; /* old arenas deliberately leaked */
Tim Petersd97a1c02002-03-30 06:09:22 +0000518 maxarenas = newmax;
519 }
520
521 /* Append the new arena address to arenas. */
522 assert(narenas < maxarenas);
523 arenas[narenas] = (uptr)bp;
Tim Peters1d99af82002-03-30 10:35:09 +0000524 ++narenas; /* can't overflow, since narenas < maxarenas before */
Tim Petersd97a1c02002-03-30 06:09:22 +0000525 dumpem(bp);
526 return bp;
527
528error:
Tim Peters84c1b972002-04-04 04:44:32 +0000529 free(bp);
Tim Peters7b85b4a2002-03-30 10:42:09 +0000530 nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000531 return NULL;
532}
533
534/* Return true if and only if P is an address that was allocated by
535 * pymalloc. I must be the index into arenas that the address claims
536 * to come from.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000537 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000538 * Tricky: Letting B be the arena base address in arenas[I], P belongs to the
539 * arena if and only if
Tim Peters3c83df22002-03-30 07:04:41 +0000540 * B <= P < B + ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000541 * Subtracting B throughout, this is true iff
Tim Peters3c83df22002-03-30 07:04:41 +0000542 * 0 <= P-B < ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000543 * By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000544 *
545 * Obscure: A PyMem "free memory" function can call the pymalloc free or
546 * realloc before the first arena has been allocated. arenas is still
547 * NULL in that case. We're relying on that narenas is also 0 in that case,
548 * so the (I) < narenas must be false, saving us from trying to index into
549 * a NULL arenas.
Tim Petersd97a1c02002-03-30 06:09:22 +0000550 */
551#define ADDRESS_IN_RANGE(P, I) \
Tim Peters3c83df22002-03-30 07:04:41 +0000552 ((I) < narenas && (uptr)(P) - arenas[I] < (uptr)ARENA_SIZE)
Tim Peters338e0102002-04-01 19:23:44 +0000553
Neil Schemenauera35c6882001-02-27 04:45:05 +0000554/*==========================================================================*/
555
Tim Peters84c1b972002-04-04 04:44:32 +0000556/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
557 * from all other currently live pointers. This may not be possible.
558 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000559
560/*
561 * The basic blocks are ordered by decreasing execution frequency,
562 * which minimizes the number of jumps in the most common cases,
563 * improves branching prediction and instruction scheduling (small
564 * block allocations typically result in a couple of instructions).
565 * Unless the optimizer reorders everything, being too smart...
566 */
567
568void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000569_PyMalloc_Malloc(size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000570{
571 block *bp;
572 poolp pool;
573 poolp next;
574 uint size;
575
Neil Schemenauera35c6882001-02-27 04:45:05 +0000576 /*
Tim Peters84c1b972002-04-04 04:44:32 +0000577 * This implicitly redirects malloc(0).
Neil Schemenauera35c6882001-02-27 04:45:05 +0000578 */
579 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
580 LOCK();
581 /*
582 * Most frequent paths first
583 */
584 size = (uint )(nbytes - 1) >> ALIGNMENT_SHIFT;
585 pool = usedpools[size + size];
586 if (pool != pool->nextpool) {
587 /*
588 * There is a used pool for this size class.
589 * Pick up the head block of its free list.
590 */
591 ++pool->ref.count;
592 bp = pool->freeblock;
593 if ((pool->freeblock = *(block **)bp) != NULL) {
594 UNLOCK();
595 return (void *)bp;
596 }
597 /*
598 * Reached the end of the free list, try to extend it
599 */
Tim Peterse70ddf32002-04-05 04:32:29 +0000600 if (pool->nextoffset <= pool->maxnextoffset) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000601 /*
602 * There is room for another block
603 */
Tim Peterse70ddf32002-04-05 04:32:29 +0000604 pool->freeblock = (block *)pool +
605 pool->nextoffset;
606 pool->nextoffset += INDEX2SIZE(size);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000607 *(block **)(pool->freeblock) = NULL;
608 UNLOCK();
609 return (void *)bp;
610 }
611 /*
612 * Pool is full, unlink from used pools
613 */
614 next = pool->nextpool;
615 pool = pool->prevpool;
616 next->prevpool = pool;
617 pool->nextpool = next;
618 UNLOCK();
619 return (void *)bp;
620 }
621 /*
622 * Try to get a cached free pool
623 */
624 pool = freepools;
625 if (pool != NULL) {
626 /*
627 * Unlink from cached pools
628 */
629 freepools = pool->nextpool;
630 init_pool:
631 /*
632 * Frontlink to used pools
633 */
634 next = usedpools[size + size]; /* == prev */
635 pool->nextpool = next;
636 pool->prevpool = next;
637 next->nextpool = pool;
638 next->prevpool = pool;
639 pool->ref.count = 1;
640 if (pool->szidx == size) {
641 /*
642 * Luckily, this pool last contained blocks
643 * of the same size class, so its header
644 * and free list are already initialized.
645 */
646 bp = pool->freeblock;
647 pool->freeblock = *(block **)bp;
648 UNLOCK();
649 return (void *)bp;
650 }
651 /*
Tim Peterse70ddf32002-04-05 04:32:29 +0000652 * Initialize the pool header, set up the free list to
653 * contain just the second block, and return the first
654 * block.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000655 */
656 pool->szidx = size;
Tim Peterse70ddf32002-04-05 04:32:29 +0000657 size = INDEX2SIZE(size);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000658 bp = (block *)pool + POOL_OVERHEAD;
Tim Peterse70ddf32002-04-05 04:32:29 +0000659 pool->nextoffset = POOL_OVERHEAD + (size << 1);
660 pool->maxnextoffset = POOL_SIZE - size;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000661 pool->freeblock = bp + size;
662 *(block **)(pool->freeblock) = NULL;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000663 UNLOCK();
664 return (void *)bp;
665 }
666 /*
667 * Allocate new pool
668 */
Tim Peters3c83df22002-03-30 07:04:41 +0000669 if (nfreepools) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000670 commit_pool:
Tim Peters3c83df22002-03-30 07:04:41 +0000671 --nfreepools;
672 pool = (poolp)arenabase;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000673 arenabase += POOL_SIZE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000674 pool->arenaindex = narenas - 1;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000675 pool->szidx = DUMMY_SIZE_IDX;
676 goto init_pool;
677 }
678 /*
679 * Allocate new arena
680 */
681#ifdef WITH_MEMORY_LIMITS
Tim Petersd97a1c02002-03-30 06:09:22 +0000682 if (!(narenas < MAX_ARENAS)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000683 UNLOCK();
684 goto redirect;
685 }
686#endif
Tim Petersd97a1c02002-03-30 06:09:22 +0000687 bp = new_arena();
688 if (bp != NULL)
689 goto commit_pool;
690 UNLOCK();
691 goto redirect;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000692 }
693
694 /* The small block allocator ends here. */
695
Tim Petersd97a1c02002-03-30 06:09:22 +0000696redirect:
Neil Schemenauera35c6882001-02-27 04:45:05 +0000697 /*
698 * Redirect the original request to the underlying (libc) allocator.
699 * We jump here on bigger requests, on error in the code above (as a
700 * last chance to serve the request) or when the max memory limit
701 * has been reached.
702 */
Tim Peters84c1b972002-04-04 04:44:32 +0000703 return (void *)malloc(nbytes ? nbytes : 1);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000704}
705
706/* free */
707
708void
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000709_PyMalloc_Free(void *p)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000710{
711 poolp pool;
Tim Peters2c95c992002-03-31 02:18:01 +0000712 block *lastfree;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000713 poolp next, prev;
714 uint size;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000715
Neil Schemenauera35c6882001-02-27 04:45:05 +0000716 if (p == NULL) /* free(NULL) has no effect */
717 return;
718
Tim Petersd97a1c02002-03-30 06:09:22 +0000719 pool = POOL_ADDR(p);
720 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
721 /* We allocated this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000722 LOCK();
Tim Peters1e16db62002-03-31 01:05:22 +0000723 INCMINE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000724 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000725 * Link p to the start of the pool's freeblock list. Since
726 * the pool had at least the p block outstanding, the pool
727 * wasn't empty (so it's already in a usedpools[] list, or
728 * was full and is in no list -- it's not in the freeblocks
729 * list in any case).
Tim Petersd97a1c02002-03-30 06:09:22 +0000730 */
Tim Peters57b17ad2002-03-31 02:59:48 +0000731 assert(pool->ref.count > 0); /* else it was empty */
Tim Peters2c95c992002-03-31 02:18:01 +0000732 *(block **)p = lastfree = pool->freeblock;
Tim Petersd97a1c02002-03-30 06:09:22 +0000733 pool->freeblock = (block *)p;
Tim Peters2c95c992002-03-31 02:18:01 +0000734 if (lastfree) {
735 /*
736 * freeblock wasn't NULL, so the pool wasn't full,
737 * and the pool is in a usedpools[] list.
738 */
Tim Peters2c95c992002-03-31 02:18:01 +0000739 if (--pool->ref.count != 0) {
740 /* pool isn't empty: leave it in usedpools */
741 UNLOCK();
742 return;
743 }
744 /*
745 * Pool is now empty: unlink from usedpools, and
Tim Petersb1da0502002-03-31 02:51:40 +0000746 * link to the front of freepools. This ensures that
Tim Peters2c95c992002-03-31 02:18:01 +0000747 * previously freed pools will be allocated later
748 * (being not referenced, they are perhaps paged out).
749 */
750 next = pool->nextpool;
751 prev = pool->prevpool;
752 next->prevpool = prev;
753 prev->nextpool = next;
754 /* Link to freepools. This is a singly-linked list,
755 * and pool->prevpool isn't used there.
756 */
757 pool->nextpool = freepools;
758 freepools = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000759 UNLOCK();
760 return;
761 }
762 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000763 * Pool was full, so doesn't currently live in any list:
764 * link it to the front of the appropriate usedpools[] list.
765 * This mimics LRU pool usage for new allocations and
766 * targets optimal filling when several pools contain
767 * blocks of the same size class.
Tim Petersd97a1c02002-03-30 06:09:22 +0000768 */
Tim Peters2c95c992002-03-31 02:18:01 +0000769 --pool->ref.count;
770 assert(pool->ref.count > 0); /* else the pool is empty */
771 size = pool->szidx;
772 next = usedpools[size + size];
773 prev = next->prevpool;
774 /* insert pool before next: prev <-> pool <-> next */
775 pool->nextpool = next;
776 pool->prevpool = prev;
777 next->prevpool = pool;
778 prev->nextpool = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000779 UNLOCK();
Neil Schemenauera35c6882001-02-27 04:45:05 +0000780 return;
781 }
782
Tim Peters2c95c992002-03-31 02:18:01 +0000783 /* We didn't allocate this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000784 INCTHEIRS;
Tim Peters84c1b972002-04-04 04:44:32 +0000785 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000786}
787
Tim Peters84c1b972002-04-04 04:44:32 +0000788/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
789 * then as the Python docs promise, we do not treat this like free(p), and
790 * return a non-NULL result.
791 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000792
793void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000794_PyMalloc_Realloc(void *p, size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000795{
Tim Peters84c1b972002-04-04 04:44:32 +0000796 void *bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000797 poolp pool;
798 uint size;
799
Neil Schemenauera35c6882001-02-27 04:45:05 +0000800 if (p == NULL)
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000801 return _PyMalloc_Malloc(nbytes);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000802
Tim Petersd97a1c02002-03-30 06:09:22 +0000803 pool = POOL_ADDR(p);
804 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000805 /* We're in charge of this block */
Tim Petersd97a1c02002-03-30 06:09:22 +0000806 INCMINE;
Tim Peterse70ddf32002-04-05 04:32:29 +0000807 size = INDEX2SIZE(pool->szidx);
Tim Peters84c1b972002-04-04 04:44:32 +0000808 if (size >= nbytes)
809 /* Don't bother if a smaller size was requested. */
810 return p;
811 /* We need more memory. */
812 assert(nbytes != 0);
Tim Petersb7265db2002-04-04 05:08:31 +0000813 bp = _PyMalloc_Malloc(nbytes);
Tim Peters84c1b972002-04-04 04:44:32 +0000814 if (bp != NULL) {
815 memcpy(bp, p, size);
816 _PyMalloc_Free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000817 }
Tim Peters84c1b972002-04-04 04:44:32 +0000818 return bp;
819 }
820 /* We're not managing this block. */
821 INCTHEIRS;
822 if (nbytes <= SMALL_REQUEST_THRESHOLD) {
823 /* Take over this block. */
824 bp = _PyMalloc_Malloc(nbytes ? nbytes : 1);
825 if (bp != NULL) {
826 memcpy(bp, p, nbytes);
827 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000828 }
Tim Peters84c1b972002-04-04 04:44:32 +0000829 else if (nbytes == 0) {
830 /* Meet the doc's promise that nbytes==0 will
831 * never return a NULL pointer when p isn't NULL.
832 */
833 bp = p;
834 }
835
Neil Schemenauera35c6882001-02-27 04:45:05 +0000836 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000837 else {
Tim Peters84c1b972002-04-04 04:44:32 +0000838 assert(nbytes != 0);
839 bp = realloc(p, nbytes);
Tim Petersd97a1c02002-03-30 06:09:22 +0000840 }
Tim Peters84c1b972002-04-04 04:44:32 +0000841 return bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000842}
843
Tim Peters1221c0a2002-03-23 00:20:15 +0000844#else /* ! WITH_PYMALLOC */
Tim Petersddea2082002-03-23 10:03:50 +0000845
846/*==========================================================================*/
847/* pymalloc not enabled: Redirect the entry points to the PyMem family. */
Tim Peters62c06ba2002-03-23 22:28:18 +0000848
Tim Petersce7fb9b2002-03-23 00:28:57 +0000849void *
850_PyMalloc_Malloc(size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000851{
852 return PyMem_MALLOC(n);
853}
854
Tim Petersce7fb9b2002-03-23 00:28:57 +0000855void *
856_PyMalloc_Realloc(void *p, size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000857{
858 return PyMem_REALLOC(p, n);
859}
860
861void
862_PyMalloc_Free(void *p)
863{
864 PyMem_FREE(p);
865}
866#endif /* WITH_PYMALLOC */
867
Tim Peters62c06ba2002-03-23 22:28:18 +0000868/*==========================================================================*/
869/* Regardless of whether pymalloc is enabled, export entry points for
870 * the object-oriented pymalloc functions.
871 */
872
Tim Petersce7fb9b2002-03-23 00:28:57 +0000873PyObject *
874_PyMalloc_New(PyTypeObject *tp)
Tim Peters1221c0a2002-03-23 00:20:15 +0000875{
876 PyObject *op;
877 op = (PyObject *) _PyMalloc_MALLOC(_PyObject_SIZE(tp));
878 if (op == NULL)
879 return PyErr_NoMemory();
880 return PyObject_INIT(op, tp);
881}
882
883PyVarObject *
884_PyMalloc_NewVar(PyTypeObject *tp, int nitems)
885{
886 PyVarObject *op;
887 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
888 op = (PyVarObject *) _PyMalloc_MALLOC(size);
889 if (op == NULL)
890 return (PyVarObject *)PyErr_NoMemory();
891 return PyObject_INIT_VAR(op, tp, nitems);
892}
893
894void
895_PyMalloc_Del(PyObject *op)
896{
897 _PyMalloc_FREE(op);
898}
Tim Petersddea2082002-03-23 10:03:50 +0000899
900#ifdef PYMALLOC_DEBUG
901/*==========================================================================*/
Tim Peters62c06ba2002-03-23 22:28:18 +0000902/* A x-platform debugging allocator. This doesn't manage memory directly,
903 * it wraps a real allocator, adding extra debugging info to the memory blocks.
904 */
Tim Petersddea2082002-03-23 10:03:50 +0000905
906#define PYMALLOC_CLEANBYTE 0xCB /* uninitialized memory */
907#define PYMALLOC_DEADBYTE 0xDB /* free()ed memory */
908#define PYMALLOC_FORBIDDENBYTE 0xFB /* unusable memory */
909
910static ulong serialno = 0; /* incremented on each debug {m,re}alloc */
911
Tim Peterse0850172002-03-24 00:34:21 +0000912/* serialno is always incremented via calling this routine. The point is
913 to supply a single place to set a breakpoint.
914*/
915static void
Neil Schemenauerbd02b142002-03-28 21:05:38 +0000916bumpserialno(void)
Tim Peterse0850172002-03-24 00:34:21 +0000917{
918 ++serialno;
919}
920
921
Tim Petersddea2082002-03-23 10:03:50 +0000922/* Read 4 bytes at p as a big-endian ulong. */
923static ulong
924read4(const void *p)
925{
Tim Peters62c06ba2002-03-23 22:28:18 +0000926 const uchar *q = (const uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +0000927 return ((ulong)q[0] << 24) |
928 ((ulong)q[1] << 16) |
929 ((ulong)q[2] << 8) |
930 (ulong)q[3];
931}
932
933/* Write the 4 least-significant bytes of n as a big-endian unsigned int,
934 MSB at address p, LSB at p+3. */
935static void
936write4(void *p, ulong n)
937{
Tim Peters62c06ba2002-03-23 22:28:18 +0000938 uchar *q = (uchar *)p;
939 q[0] = (uchar)((n >> 24) & 0xff);
940 q[1] = (uchar)((n >> 16) & 0xff);
941 q[2] = (uchar)((n >> 8) & 0xff);
942 q[3] = (uchar)( n & 0xff);
Tim Petersddea2082002-03-23 10:03:50 +0000943}
944
Tim Petersddea2082002-03-23 10:03:50 +0000945/* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
946 here calling the underlying malloc's result p:
947
948p[0:4]
949 Number of bytes originally asked for. 4-byte unsigned integer,
950 big-endian (easier to read in a memory dump).
Tim Petersd1139e02002-03-28 07:32:11 +0000951p[4:8]
Tim Petersddea2082002-03-23 10:03:50 +0000952 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch under- writes
953 and reads.
954p[8:8+n]
955 The requested memory, filled with copies of PYMALLOC_CLEANBYTE.
956 Used to catch reference to uninitialized memory.
957 &p[8] is returned. Note that this is 8-byte aligned if PyMalloc
958 handled the request itself.
959p[8+n:8+n+4]
960 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch over- writes
961 and reads.
962p[8+n+4:8+n+8]
963 A serial number, incremented by 1 on each call to _PyMalloc_DebugMalloc
964 and _PyMalloc_DebugRealloc.
965 4-byte unsigned integer, big-endian.
966 If "bad memory" is detected later, the serial number gives an
967 excellent way to set a breakpoint on the next run, to capture the
968 instant at which this block was passed out.
969*/
970
971void *
Tim Petersd1139e02002-03-28 07:32:11 +0000972_PyMalloc_DebugMalloc(size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +0000973{
974 uchar *p; /* base address of malloc'ed block */
Tim Peters62c06ba2002-03-23 22:28:18 +0000975 uchar *tail; /* p + 8 + nbytes == pointer to tail pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +0000976 size_t total; /* nbytes + 16 */
977
Tim Peterse0850172002-03-24 00:34:21 +0000978 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +0000979 total = nbytes + 16;
980 if (total < nbytes || (total >> 31) > 1) {
981 /* overflow, or we can't represent it in 4 bytes */
982 /* Obscure: can't do (total >> 32) != 0 instead, because
983 C doesn't define what happens for a right-shift of 32
984 when size_t is a 32-bit type. At least C guarantees
985 size_t is an unsigned type. */
986 return NULL;
987 }
988
Tim Petersd1139e02002-03-28 07:32:11 +0000989 p = _PyMalloc_Malloc(total);
Tim Petersddea2082002-03-23 10:03:50 +0000990 if (p == NULL)
991 return NULL;
992
993 write4(p, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +0000994 p[4] = p[5] = p[6] = p[7] = PYMALLOC_FORBIDDENBYTE;
Tim Petersddea2082002-03-23 10:03:50 +0000995
996 if (nbytes > 0)
997 memset(p+8, PYMALLOC_CLEANBYTE, nbytes);
998
Tim Peters62c06ba2002-03-23 22:28:18 +0000999 tail = p + 8 + nbytes;
1000 tail[0] = tail[1] = tail[2] = tail[3] = PYMALLOC_FORBIDDENBYTE;
1001 write4(tail + 4, serialno);
Tim Petersddea2082002-03-23 10:03:50 +00001002
1003 return p+8;
1004}
1005
Tim Peters62c06ba2002-03-23 22:28:18 +00001006/* The debug free first checks the 8 bytes on each end for sanity (in
1007 particular, that the PYMALLOC_FORBIDDENBYTEs are still intact).
Tim Petersddea2082002-03-23 10:03:50 +00001008 Then fills the original bytes with PYMALLOC_DEADBYTE.
1009 Then calls the underlying free.
1010*/
1011void
Tim Petersd1139e02002-03-28 07:32:11 +00001012_PyMalloc_DebugFree(void *p)
Tim Petersddea2082002-03-23 10:03:50 +00001013{
Tim Peters62c06ba2002-03-23 22:28:18 +00001014 uchar *q = (uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +00001015 size_t nbytes;
1016
Tim Petersddea2082002-03-23 10:03:50 +00001017 if (p == NULL)
1018 return;
Tim Petersddea2082002-03-23 10:03:50 +00001019 _PyMalloc_DebugCheckAddress(p);
1020 nbytes = read4(q-8);
1021 if (nbytes > 0)
1022 memset(q, PYMALLOC_DEADBYTE, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001023 _PyMalloc_Free(q-8);
Tim Petersddea2082002-03-23 10:03:50 +00001024}
1025
1026void *
Tim Petersd1139e02002-03-28 07:32:11 +00001027_PyMalloc_DebugRealloc(void *p, size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +00001028{
1029 uchar *q = (uchar *)p;
1030 size_t original_nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001031 void *fresh; /* new memory block, if needed */
Tim Petersddea2082002-03-23 10:03:50 +00001032
Tim Petersddea2082002-03-23 10:03:50 +00001033 if (p == NULL)
Tim Petersd1139e02002-03-28 07:32:11 +00001034 return _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001035
Tim Petersddea2082002-03-23 10:03:50 +00001036 _PyMalloc_DebugCheckAddress(p);
Tim Petersddea2082002-03-23 10:03:50 +00001037 original_nbytes = read4(q-8);
1038 if (nbytes == original_nbytes) {
1039 /* note that this case is likely to be common due to the
1040 way Python appends to lists */
Tim Peterse0850172002-03-24 00:34:21 +00001041 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001042 write4(q + nbytes + 4, serialno);
1043 return p;
1044 }
1045
1046 if (nbytes < original_nbytes) {
1047 /* shrinking -- leave the guts alone, except to
1048 fill the excess with DEADBYTE */
1049 const size_t excess = original_nbytes - nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001050 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001051 write4(q-8, nbytes);
1052 /* kill the excess bytes plus the trailing 8 pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +00001053 q += nbytes;
1054 q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
1055 write4(q+4, serialno);
Tim Petersd1139e02002-03-28 07:32:11 +00001056 memset(q+8, PYMALLOC_DEADBYTE, excess);
Tim Petersddea2082002-03-23 10:03:50 +00001057 return p;
1058 }
1059
1060 /* More memory is needed: get it, copy over the first original_nbytes
1061 of the original data, and free the original memory. */
Tim Petersd1139e02002-03-28 07:32:11 +00001062 fresh = _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001063 if (fresh != NULL && original_nbytes > 0)
1064 memcpy(fresh, p, original_nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001065 _PyMalloc_DebugFree(p);
Tim Petersddea2082002-03-23 10:03:50 +00001066 return fresh;
1067}
1068
Tim Peters7ccfadf2002-04-01 06:04:21 +00001069/* Check the forbidden bytes on both ends of the memory allocated for p.
1070 * If anything is wrong, print info to stderr via _PyMalloc_DebugDumpAddress,
1071 * and call Py_FatalError to kill the program.
1072 */
1073 void
Tim Petersddea2082002-03-23 10:03:50 +00001074_PyMalloc_DebugCheckAddress(const void *p)
1075{
1076 const uchar *q = (const uchar *)p;
Tim Petersd1139e02002-03-28 07:32:11 +00001077 char *msg;
1078 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001079
Tim Petersd1139e02002-03-28 07:32:11 +00001080 if (p == NULL) {
Tim Petersddea2082002-03-23 10:03:50 +00001081 msg = "didn't expect a NULL pointer";
Tim Petersd1139e02002-03-28 07:32:11 +00001082 goto error;
1083 }
Tim Petersddea2082002-03-23 10:03:50 +00001084
Tim Petersd1139e02002-03-28 07:32:11 +00001085 for (i = 4; i >= 1; --i) {
1086 if (*(q-i) != PYMALLOC_FORBIDDENBYTE) {
1087 msg = "bad leading pad byte";
1088 goto error;
1089 }
1090 }
Tim Petersddea2082002-03-23 10:03:50 +00001091
Tim Petersd1139e02002-03-28 07:32:11 +00001092 {
Tim Petersddea2082002-03-23 10:03:50 +00001093 const ulong nbytes = read4(q-8);
1094 const uchar *tail = q + nbytes;
Tim Petersddea2082002-03-23 10:03:50 +00001095 for (i = 0; i < 4; ++i) {
1096 if (tail[i] != PYMALLOC_FORBIDDENBYTE) {
1097 msg = "bad trailing pad byte";
Tim Petersd1139e02002-03-28 07:32:11 +00001098 goto error;
Tim Petersddea2082002-03-23 10:03:50 +00001099 }
1100 }
1101 }
1102
Tim Petersd1139e02002-03-28 07:32:11 +00001103 return;
1104
1105error:
1106 _PyMalloc_DebugDumpAddress(p);
1107 Py_FatalError(msg);
Tim Petersddea2082002-03-23 10:03:50 +00001108}
1109
Tim Peters7ccfadf2002-04-01 06:04:21 +00001110/* Display info to stderr about the memory block at p. */
Tim Petersddea2082002-03-23 10:03:50 +00001111void
1112_PyMalloc_DebugDumpAddress(const void *p)
1113{
1114 const uchar *q = (const uchar *)p;
1115 const uchar *tail;
1116 ulong nbytes, serial;
Tim Petersd1139e02002-03-28 07:32:11 +00001117 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001118
1119 fprintf(stderr, "Debug memory block at address p=%p:\n", p);
1120 if (p == NULL)
1121 return;
1122
1123 nbytes = read4(q-8);
1124 fprintf(stderr, " %lu bytes originally allocated\n", nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001125
1126 /* In case this is nuts, check the pad bytes before trying to read up
1127 the serial number (the address deref could blow up). */
1128
Tim Petersd1139e02002-03-28 07:32:11 +00001129 fputs(" the 4 pad bytes at p-4 are ", stderr);
1130 if (*(q-4) == PYMALLOC_FORBIDDENBYTE &&
1131 *(q-3) == PYMALLOC_FORBIDDENBYTE &&
Tim Petersddea2082002-03-23 10:03:50 +00001132 *(q-2) == PYMALLOC_FORBIDDENBYTE &&
1133 *(q-1) == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001134 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001135 }
1136 else {
Tim Petersddea2082002-03-23 10:03:50 +00001137 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1138 PYMALLOC_FORBIDDENBYTE);
Tim Petersd1139e02002-03-28 07:32:11 +00001139 for (i = 4; i >= 1; --i) {
Tim Petersddea2082002-03-23 10:03:50 +00001140 const uchar byte = *(q-i);
1141 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
1142 if (byte != PYMALLOC_FORBIDDENBYTE)
1143 fputs(" *** OUCH", stderr);
1144 fputc('\n', stderr);
1145 }
1146 }
1147
1148 tail = q + nbytes;
1149 fprintf(stderr, " the 4 pad bytes at tail=%p are ", tail);
1150 if (tail[0] == PYMALLOC_FORBIDDENBYTE &&
1151 tail[1] == PYMALLOC_FORBIDDENBYTE &&
1152 tail[2] == PYMALLOC_FORBIDDENBYTE &&
1153 tail[3] == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001154 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001155 }
1156 else {
Tim Petersddea2082002-03-23 10:03:50 +00001157 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1158 PYMALLOC_FORBIDDENBYTE);
1159 for (i = 0; i < 4; ++i) {
1160 const uchar byte = tail[i];
1161 fprintf(stderr, " at tail+%d: 0x%02x",
1162 i, byte);
1163 if (byte != PYMALLOC_FORBIDDENBYTE)
1164 fputs(" *** OUCH", stderr);
1165 fputc('\n', stderr);
1166 }
1167 }
1168
1169 serial = read4(tail+4);
1170 fprintf(stderr, " the block was made by call #%lu to "
1171 "debug malloc/realloc\n", serial);
1172
1173 if (nbytes > 0) {
1174 int i = 0;
Tim Peters62c06ba2002-03-23 22:28:18 +00001175 fputs(" data at p:", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001176 /* print up to 8 bytes at the start */
1177 while (q < tail && i < 8) {
1178 fprintf(stderr, " %02x", *q);
1179 ++i;
1180 ++q;
1181 }
1182 /* and up to 8 at the end */
1183 if (q < tail) {
1184 if (tail - q > 8) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001185 fputs(" ...", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001186 q = tail - 8;
1187 }
1188 while (q < tail) {
1189 fprintf(stderr, " %02x", *q);
1190 ++q;
1191 }
1192 }
Tim Peters62c06ba2002-03-23 22:28:18 +00001193 fputc('\n', stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001194 }
1195}
1196
Tim Peters16bcb6b2002-04-05 05:45:31 +00001197static ulong
1198printone(const char* msg, ulong value)
1199{
Tim Peters49f26812002-04-06 01:45:35 +00001200 int i, k;
1201 char buf[100];
1202 ulong origvalue = value;
Tim Peters16bcb6b2002-04-05 05:45:31 +00001203
1204 fputs(msg, stderr);
Tim Peters49f26812002-04-06 01:45:35 +00001205 for (i = (int)strlen(msg); i < 35; ++i)
Tim Peters16bcb6b2002-04-05 05:45:31 +00001206 fputc(' ', stderr);
Tim Peters49f26812002-04-06 01:45:35 +00001207 fputc('=', stderr);
1208
1209 /* Write the value with commas. */
1210 i = 22;
1211 buf[i--] = '\0';
1212 buf[i--] = '\n';
1213 k = 3;
1214 do {
1215 ulong nextvalue = value / 10UL;
1216 uint digit = value - nextvalue * 10UL;
1217 value = nextvalue;
1218 buf[i--] = (char)(digit + '0');
1219 --k;
1220 if (k == 0 && value && i >= 0) {
1221 k = 3;
1222 buf[i--] = ',';
1223 }
1224 } while (value && i >= 0);
1225
1226 while (i >= 0)
1227 buf[i--] = ' ';
1228 fputs(buf, stderr);
1229
1230 return origvalue;
Tim Peters16bcb6b2002-04-05 05:45:31 +00001231}
1232
Tim Peters7ccfadf2002-04-01 06:04:21 +00001233/* Print summary info to stderr about the state of pymalloc's structures. */
1234void
1235_PyMalloc_DebugDumpStats(void)
1236{
1237 uint i;
1238 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
Tim Peters16bcb6b2002-04-05 05:45:31 +00001239 /* # of pools, allocated blocks, and free blocks per class index */
Tim Peters7ccfadf2002-04-01 06:04:21 +00001240 ulong numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
Tim Peters7ccfadf2002-04-01 06:04:21 +00001241 ulong numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
Tim Peters7ccfadf2002-04-01 06:04:21 +00001242 ulong numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
Tim Peters16bcb6b2002-04-05 05:45:31 +00001243 /* total # of allocated bytes in used and full pools */
1244 ulong allocated_bytes = 0;
1245 /* total # of available bytes in used pools */
1246 ulong available_bytes = 0;
1247 /* # of free pools + pools not yet carved out of current arena */
1248 uint numfreepools = 0;
1249 /* # of bytes for arena alignment padding */
1250 uint arena_alignment = 0;
1251 /* # of bytes in used and full pools used for pool_headers */
1252 ulong pool_header_bytes = 0;
1253 /* # of bytes in used and full pools wasted due to quantization,
1254 * i.e. the necessarily leftover space at the ends of used and
1255 * full pools.
1256 */
1257 ulong quantization = 0;
1258 /* running total -- should equal narenas * ARENA_SIZE */
1259 ulong total;
1260 char buf[128];
Tim Peters7ccfadf2002-04-01 06:04:21 +00001261
Tim Peters7ccfadf2002-04-01 06:04:21 +00001262 fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
1263 SMALL_REQUEST_THRESHOLD, numclasses);
1264 fprintf(stderr, "pymalloc malloc+realloc called %lu times.\n",
1265 serialno);
1266
1267 for (i = 0; i < numclasses; ++i)
1268 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
1269
Tim Peters6169f092002-04-01 20:12:59 +00001270 /* Because full pools aren't linked to from anything, it's easiest
1271 * to march over all the arenas. If we're lucky, most of the memory
1272 * will be living in full pools -- would be a shame to miss them.
Tim Peters7ccfadf2002-04-01 06:04:21 +00001273 */
1274 for (i = 0; i < narenas; ++i) {
1275 uint poolsinarena;
1276 uint j;
1277 uptr base = arenas[i];
1278
1279 /* round up to pool alignment */
1280 poolsinarena = ARENA_SIZE / POOL_SIZE;
1281 if (base & (uptr)POOL_SIZE_MASK) {
1282 --poolsinarena;
Tim Peters16bcb6b2002-04-05 05:45:31 +00001283 arena_alignment += POOL_SIZE;
Tim Peters7ccfadf2002-04-01 06:04:21 +00001284 base &= ~(uptr)POOL_SIZE_MASK;
1285 base += POOL_SIZE;
1286 }
1287
1288 if (i == narenas - 1) {
1289 /* current arena may have raw memory at the end */
1290 numfreepools += nfreepools;
1291 poolsinarena -= nfreepools;
1292 }
1293
1294 /* visit every pool in the arena */
1295 for (j = 0; j < poolsinarena; ++j, base += POOL_SIZE) {
1296 poolp p = (poolp)base;
1297 if (p->ref.count == 0) {
1298 /* currently unused */
1299 ++numfreepools;
1300 continue;
1301 }
1302 ++numpools[p->szidx];
1303 numblocks[p->szidx] += p->ref.count;
Tim Peters16bcb6b2002-04-05 05:45:31 +00001304 numfreeblocks[p->szidx] += NUMBLOCKS(p->szidx) -
1305 p->ref.count;
Tim Peters7ccfadf2002-04-01 06:04:21 +00001306 }
1307 }
1308
1309 fputc('\n', stderr);
Tim Peters49f26812002-04-06 01:45:35 +00001310 fputs("class size num pools blocks in use avail blocks\n"
1311 "----- ---- --------- ------------- ------------\n",
Tim Peters7ccfadf2002-04-01 06:04:21 +00001312 stderr);
1313
Tim Peters7ccfadf2002-04-01 06:04:21 +00001314 for (i = 0; i < numclasses; ++i) {
1315 ulong p = numpools[i];
1316 ulong b = numblocks[i];
1317 ulong f = numfreeblocks[i];
Tim Peterse70ddf32002-04-05 04:32:29 +00001318 uint size = INDEX2SIZE(i);
Tim Peters7ccfadf2002-04-01 06:04:21 +00001319 if (p == 0) {
1320 assert(b == 0 && f == 0);
1321 continue;
1322 }
Tim Peters49f26812002-04-06 01:45:35 +00001323 fprintf(stderr, "%5u %6u %11lu %15lu %13lu\n",
Tim Peters7ccfadf2002-04-01 06:04:21 +00001324 i, size, p, b, f);
Tim Peters16bcb6b2002-04-05 05:45:31 +00001325 allocated_bytes += b * size;
1326 available_bytes += f * size;
1327 pool_header_bytes += p * POOL_OVERHEAD;
1328 quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
Tim Peters7ccfadf2002-04-01 06:04:21 +00001329 }
1330 fputc('\n', stderr);
Tim Peters16bcb6b2002-04-05 05:45:31 +00001331
1332 PyOS_snprintf(buf, sizeof(buf),
1333 "%u arenas * %d bytes/arena", narenas, ARENA_SIZE);
1334 (void)printone(buf, (ulong)narenas * ARENA_SIZE);
1335
1336 fputc('\n', stderr);
1337
Tim Peters49f26812002-04-06 01:45:35 +00001338 total = printone("# bytes in allocated blocks", allocated_bytes);
1339
Tim Peters16bcb6b2002-04-05 05:45:31 +00001340 PyOS_snprintf(buf, sizeof(buf),
1341 "%u unused pools * %d bytes", numfreepools, POOL_SIZE);
Tim Peters49f26812002-04-06 01:45:35 +00001342 total += printone(buf, (ulong)numfreepools * POOL_SIZE);
Tim Peters16bcb6b2002-04-05 05:45:31 +00001343
Tim Peters16bcb6b2002-04-05 05:45:31 +00001344 total += printone("# bytes in available blocks", available_bytes);
1345 total += printone("# bytes lost to pool headers", pool_header_bytes);
1346 total += printone("# bytes lost to quantization", quantization);
1347 total += printone("# bytes lost to arena alignment", arena_alignment);
1348 (void)printone("Total", total);
Tim Peters7ccfadf2002-04-01 06:04:21 +00001349}
1350
Tim Petersddea2082002-03-23 10:03:50 +00001351#endif /* PYMALLOC_DEBUG */