blob: 7af823b60bf63b596dbfb86dbe831d1f8659a88a [file] [log] [blame]
Tim Peters1221c0a2002-03-23 00:20:15 +00001#include "Python.h"
2
3#ifdef WITH_PYMALLOC
4
Neil Schemenauera35c6882001-02-27 04:45:05 +00005/* An object allocator for Python.
6
7 Here is an introduction to the layers of the Python memory architecture,
8 showing where the object allocator is actually used (layer +2), It is
9 called for every object allocation and deallocation (PyObject_New/Del),
10 unless the object-specific allocators implement a proprietary allocation
11 scheme (ex.: ints use a simple free list). This is also the place where
12 the cyclic garbage collector operates selectively on container objects.
13
14
15 Object-specific allocators
16 _____ ______ ______ ________
17 [ int ] [ dict ] [ list ] ... [ string ] Python core |
18+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
19 _______________________________ | |
20 [ Python's object allocator ] | |
21+2 | ####### Object memory ####### | <------ Internal buffers ------> |
22 ______________________________________________________________ |
23 [ Python's raw memory allocator (PyMem_ API) ] |
24+1 | <----- Python memory (under PyMem manager's control) ------> | |
25 __________________________________________________________________
26 [ Underlying general-purpose allocator (ex: C library malloc) ]
27 0 | <------ Virtual memory allocated for the python process -------> |
28
29 =========================================================================
30 _______________________________________________________________________
31 [ OS-specific Virtual Memory Manager (VMM) ]
32-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
33 __________________________________ __________________________________
34 [ ] [ ]
35-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
36
37*/
38/*==========================================================================*/
39
40/* A fast, special-purpose memory allocator for small blocks, to be used
41 on top of a general-purpose malloc -- heavily based on previous art. */
42
43/* Vladimir Marangozov -- August 2000 */
44
45/*
46 * "Memory management is where the rubber meets the road -- if we do the wrong
47 * thing at any level, the results will not be good. And if we don't make the
48 * levels work well together, we are in serious trouble." (1)
49 *
50 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
51 * "Dynamic Storage Allocation: A Survey and Critical Review",
52 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
53 */
54
55/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
Neil Schemenauera35c6882001-02-27 04:45:05 +000056
57/*==========================================================================*/
58
59/*
Neil Schemenauera35c6882001-02-27 04:45:05 +000060 * Allocation strategy abstract:
61 *
62 * For small requests, the allocator sub-allocates <Big> blocks of memory.
63 * Requests greater than 256 bytes are routed to the system's allocator.
Tim Petersce7fb9b2002-03-23 00:28:57 +000064 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000065 * Small requests are grouped in size classes spaced 8 bytes apart, due
66 * to the required valid alignment of the returned address. Requests of
67 * a particular size are serviced from memory pools of 4K (one VMM page).
68 * Pools are fragmented on demand and contain free lists of blocks of one
69 * particular size class. In other words, there is a fixed-size allocator
70 * for each size class. Free pools are shared by the different allocators
71 * thus minimizing the space reserved for a particular size class.
72 *
73 * This allocation strategy is a variant of what is known as "simple
74 * segregated storage based on array of free lists". The main drawback of
75 * simple segregated storage is that we might end up with lot of reserved
76 * memory for the different free lists, which degenerate in time. To avoid
77 * this, we partition each free list in pools and we share dynamically the
78 * reserved space between all free lists. This technique is quite efficient
79 * for memory intensive programs which allocate mainly small-sized blocks.
80 *
81 * For small requests we have the following table:
82 *
83 * Request in bytes Size of allocated block Size class idx
84 * ----------------------------------------------------------------
85 * 1-8 8 0
86 * 9-16 16 1
87 * 17-24 24 2
88 * 25-32 32 3
89 * 33-40 40 4
90 * 41-48 48 5
91 * 49-56 56 6
92 * 57-64 64 7
93 * 65-72 72 8
94 * ... ... ...
95 * 241-248 248 30
96 * 249-256 256 31
Tim Petersce7fb9b2002-03-23 00:28:57 +000097 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000098 * 0, 257 and up: routed to the underlying allocator.
99 */
100
101/*==========================================================================*/
102
103/*
104 * -- Main tunable settings section --
105 */
106
107/*
108 * Alignment of addresses returned to the user. 8-bytes alignment works
109 * on most current architectures (with 32-bit or 64-bit address busses).
110 * The alignment value is also used for grouping small requests in size
111 * classes spaced ALIGNMENT bytes apart.
112 *
113 * You shouldn't change this unless you know what you are doing.
114 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000115#define ALIGNMENT 8 /* must be 2^N */
116#define ALIGNMENT_SHIFT 3
117#define ALIGNMENT_MASK (ALIGNMENT - 1)
118
Tim Peterse70ddf32002-04-05 04:32:29 +0000119/* Return the number of bytes in size class I, as a uint. */
120#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
121
Neil Schemenauera35c6882001-02-27 04:45:05 +0000122/*
123 * Max size threshold below which malloc requests are considered to be
124 * small enough in order to use preallocated memory pools. You can tune
125 * this value according to your application behaviour and memory needs.
126 *
127 * The following invariants must hold:
128 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
Tim Petersd97a1c02002-03-30 06:09:22 +0000129 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
Neil Schemenauera35c6882001-02-27 04:45:05 +0000130 *
131 * Although not required, for better performance and space efficiency,
132 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
133 */
Tim Petersd97a1c02002-03-30 06:09:22 +0000134#define SMALL_REQUEST_THRESHOLD 256
Neil Schemenauera35c6882001-02-27 04:45:05 +0000135#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
136
137/*
138 * The system's VMM page size can be obtained on most unices with a
139 * getpagesize() call or deduced from various header files. To make
140 * things simpler, we assume that it is 4K, which is OK for most systems.
141 * It is probably better if this is the native page size, but it doesn't
142 * have to be.
143 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000144#define SYSTEM_PAGE_SIZE (4 * 1024)
145#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
146
147/*
148 * Maximum amount of memory managed by the allocator for small requests.
149 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000150#ifdef WITH_MEMORY_LIMITS
151#ifndef SMALL_MEMORY_LIMIT
152#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
153#endif
154#endif
155
156/*
157 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
158 * on a page boundary. This is a reserved virtual address space for the
159 * current process (obtained through a malloc call). In no way this means
160 * that the memory arenas will be used entirely. A malloc(<Big>) is usually
161 * an address range reservation for <Big> bytes, unless all pages within this
162 * space are referenced subsequently. So malloc'ing big blocks and not using
163 * them does not mean "wasting memory". It's an addressable range wastage...
164 *
165 * Therefore, allocating arenas with malloc is not optimal, because there is
166 * some address space wastage, but this is the most portable way to request
Tim Petersd97a1c02002-03-30 06:09:22 +0000167 * memory from the system across various platforms.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000168 */
Tim Peters3c83df22002-03-30 07:04:41 +0000169#define ARENA_SIZE (256 << 10) /* 256KB */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000170
171#ifdef WITH_MEMORY_LIMITS
172#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
173#endif
174
175/*
176 * Size of the pools used for small blocks. Should be a power of 2,
Tim Petersc2ce91a2002-03-30 21:36:04 +0000177 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000178 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000179#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
180#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
Neil Schemenauera35c6882001-02-27 04:45:05 +0000181
182/*
183 * -- End of tunable settings section --
184 */
185
186/*==========================================================================*/
187
188/*
189 * Locking
190 *
191 * To reduce lock contention, it would probably be better to refine the
192 * crude function locking with per size class locking. I'm not positive
193 * however, whether it's worth switching to such locking policy because
194 * of the performance penalty it might introduce.
195 *
196 * The following macros describe the simplest (should also be the fastest)
197 * lock object on a particular platform and the init/fini/lock/unlock
198 * operations on it. The locks defined here are not expected to be recursive
199 * because it is assumed that they will always be called in the order:
200 * INIT, [LOCK, UNLOCK]*, FINI.
201 */
202
203/*
204 * Python's threads are serialized, so object malloc locking is disabled.
205 */
206#define SIMPLELOCK_DECL(lock) /* simple lock declaration */
207#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
208#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
209#define SIMPLELOCK_LOCK(lock) /* acquire released lock */
210#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
211
212/*
213 * Basic types
214 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
215 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000216#undef uchar
217#define uchar unsigned char /* assuming == 8 bits */
218
Neil Schemenauera35c6882001-02-27 04:45:05 +0000219#undef uint
220#define uint unsigned int /* assuming >= 16 bits */
221
222#undef ulong
223#define ulong unsigned long /* assuming >= 32 bits */
224
Tim Petersd97a1c02002-03-30 06:09:22 +0000225#undef uptr
226#define uptr Py_uintptr_t
227
Neil Schemenauera35c6882001-02-27 04:45:05 +0000228/* When you say memory, my mind reasons in terms of (pointers to) blocks */
229typedef uchar block;
230
Tim Peterse70ddf32002-04-05 04:32:29 +0000231/* Pool for small blocks. */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000232struct pool_header {
Tim Petersb2336522001-03-11 18:36:13 +0000233 union { block *_padding;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000234 uint count; } ref; /* number of allocated blocks */
235 block *freeblock; /* pool's free list head */
236 struct pool_header *nextpool; /* next pool of this size class */
237 struct pool_header *prevpool; /* previous pool "" */
Tim Peters1d99af82002-03-30 10:35:09 +0000238 uint arenaindex; /* index into arenas of base adr */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000239 uint szidx; /* block size class index */
Tim Peterse70ddf32002-04-05 04:32:29 +0000240 uint nextoffset; /* bytes to virgin block */
241 uint maxnextoffset; /* largest valid nextoffset */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000242};
243
244typedef struct pool_header *poolp;
245
246#undef ROUNDUP
247#define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
248#define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
249
250#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
251
Tim Petersd97a1c02002-03-30 06:09:22 +0000252/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
Tim Peterse70ddf32002-04-05 04:32:29 +0000253#define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
254
255/* Return total number of blocks in poolp P, as a uint. */
256#define NUMBLOCKS(P) \
257 ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE((P)->szidx))
Tim Petersd97a1c02002-03-30 06:09:22 +0000258
Neil Schemenauera35c6882001-02-27 04:45:05 +0000259/*==========================================================================*/
260
261/*
262 * This malloc lock
263 */
Tim Petersb2336522001-03-11 18:36:13 +0000264SIMPLELOCK_DECL(_malloc_lock);
265#define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
266#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
267#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
268#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000269
270/*
Tim Peters1e16db62002-03-31 01:05:22 +0000271 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
272
273This is involved. For an index i, usedpools[i+i] is the header for a list of
274all partially used pools holding small blocks with "size class idx" i. So
275usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
27616, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
277
Tim Peters338e0102002-04-01 19:23:44 +0000278Pools are carved off the current arena highwater mark (file static arenabase)
279as needed. Once carved off, a pool is in one of three states forever after:
Tim Peters1e16db62002-03-31 01:05:22 +0000280
Tim Peters338e0102002-04-01 19:23:44 +0000281used == partially used, neither empty nor full
282 At least one block in the pool is currently allocated, and at least one
283 block in the pool is not currently allocated (note this implies a pool
284 has room for at least two blocks).
285 This is a pool's initial state, as a pool is created only when malloc
286 needs space.
287 The pool holds blocks of a fixed size, and is in the circular list headed
288 at usedpools[i] (see above). It's linked to the other used pools of the
289 same size class via the pool_header's nextpool and prevpool members.
290 If all but one block is currently allocated, a malloc can cause a
291 transition to the full state. If all but one block is not currently
292 allocated, a free can cause a transition to the empty state.
Tim Peters1e16db62002-03-31 01:05:22 +0000293
Tim Peters338e0102002-04-01 19:23:44 +0000294full == all the pool's blocks are currently allocated
295 On transition to full, a pool is unlinked from its usedpools[] list.
296 It's not linked to from anything then anymore, and its nextpool and
297 prevpool members are meaningless until it transitions back to used.
298 A free of a block in a full pool puts the pool back in the used state.
299 Then it's linked in at the front of the appropriate usedpools[] list, so
300 that the next allocation for its size class will reuse the freed block.
301
302empty == all the pool's blocks are currently available for allocation
303 On transition to empty, a pool is unlinked from its usedpools[] list,
304 and linked to the front of the (file static) singly-linked freepools list,
305 via its nextpool member. The prevpool member has no meaning in this case.
306 Empty pools have no inherent size class: the next time a malloc finds
307 an empty list in usedpools[], it takes the first pool off of freepools.
308 If the size class needed happens to be the same as the size class the pool
Tim Peterse70ddf32002-04-05 04:32:29 +0000309 last had, some pool initialization can be skipped.
Tim Peters338e0102002-04-01 19:23:44 +0000310
311
312Block Management
313
314Blocks within pools are again carved out as needed. pool->freeblock points to
315the start of a singly-linked list of free blocks within the pool. When a
316block is freed, it's inserted at the front of its pool's freeblock list. Note
317that the available blocks in a pool are *not* linked all together when a pool
Tim Peterse70ddf32002-04-05 04:32:29 +0000318is initialized. Instead only "the first two" (lowest addresses) blocks are
319set up, returning the first such block, and setting pool->freeblock to a
320one-block list holding the second such block. This is consistent with that
321pymalloc strives at all levels (arena, pool, and block) never to touch a piece
322of memory until it's actually needed.
323
324So long as a pool is in the used state, we're certain there *is* a block
325available for allocating. If pool->freeblock is NULL then, that means we
326simply haven't yet gotten to one of the higher-address blocks. The offset
327from the pool_header to the start of "the next" virgin block is stored in
328the pool_header nextoffset member, and the largest value of nextoffset that
329makes sense is stored in the maxnextoffset member when a pool is initialized.
330All the blocks in a pool have been passed out at least when and only when
331nextoffset > maxnextoffset.
Tim Peters338e0102002-04-01 19:23:44 +0000332
Tim Peters1e16db62002-03-31 01:05:22 +0000333
334Major obscurity: While the usedpools vector is declared to have poolp
335entries, it doesn't really. It really contains two pointers per (conceptual)
336poolp entry, the nextpool and prevpool members of a pool_header. The
337excruciating initialization code below fools C so that
338
339 usedpool[i+i]
340
341"acts like" a genuine poolp, but only so long as you only reference its
342nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
343compensating for that a pool_header's nextpool and prevpool members
344immediately follow a pool_header's first two members:
345
346 union { block *_padding;
347 uint count; } ref;
348 block *freeblock;
349
350each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
351contains is a fudged-up pointer p such that *if* C believes it's a poolp
352pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
353circular list is empty).
354
355It's unclear why the usedpools setup is so convoluted. It could be to
356minimize the amount of cache required to hold this heavily-referenced table
357(which only *needs* the two interpool pointer members of a pool_header). OTOH,
358referencing code has to remember to "double the index" and doing so isn't
359free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
360on that C doesn't insert any padding anywhere in a pool_header at or before
361the prevpool member.
362**************************************************************************** */
363
Neil Schemenauera35c6882001-02-27 04:45:05 +0000364#define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
365#define PT(x) PTA(x), PTA(x)
366
367static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
368 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
369#if NB_SMALL_SIZE_CLASSES > 8
370 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
371#if NB_SMALL_SIZE_CLASSES > 16
372 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
373#if NB_SMALL_SIZE_CLASSES > 24
374 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
375#if NB_SMALL_SIZE_CLASSES > 32
376 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
377#if NB_SMALL_SIZE_CLASSES > 40
378 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
379#if NB_SMALL_SIZE_CLASSES > 48
380 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
381#if NB_SMALL_SIZE_CLASSES > 56
382 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
383#endif /* NB_SMALL_SIZE_CLASSES > 56 */
384#endif /* NB_SMALL_SIZE_CLASSES > 48 */
385#endif /* NB_SMALL_SIZE_CLASSES > 40 */
386#endif /* NB_SMALL_SIZE_CLASSES > 32 */
387#endif /* NB_SMALL_SIZE_CLASSES > 24 */
388#endif /* NB_SMALL_SIZE_CLASSES > 16 */
389#endif /* NB_SMALL_SIZE_CLASSES > 8 */
390};
391
392/*
393 * Free (cached) pools
394 */
395static poolp freepools = NULL; /* free list for cached pools */
396
Tim Petersd97a1c02002-03-30 06:09:22 +0000397/*==========================================================================*/
398/* Arena management. */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000399
Tim Petersd97a1c02002-03-30 06:09:22 +0000400/* arenas is a vector of arena base addresses, in order of allocation time.
401 * arenas currently contains narenas entries, and has space allocated
402 * for at most maxarenas entries.
403 *
404 * CAUTION: See the long comment block about thread safety in new_arena():
405 * the code currently relies in deep ways on that this vector only grows,
406 * and only grows by appending at the end. For now we never return an arena
407 * to the OS.
408 */
Tim Petersc2ce91a2002-03-30 21:36:04 +0000409static uptr *volatile arenas = NULL; /* the pointer itself is volatile */
410static volatile uint narenas = 0;
Tim Peters1d99af82002-03-30 10:35:09 +0000411static uint maxarenas = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000412
Tim Peters3c83df22002-03-30 07:04:41 +0000413/* Number of pools still available to be allocated in the current arena. */
414static uint nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000415
Tim Peters3c83df22002-03-30 07:04:41 +0000416/* Free space start address in current arena. This is pool-aligned. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000417static block *arenabase = NULL;
418
419#if 0
420static ulong wasmine = 0;
421static ulong wasntmine = 0;
422
423static void
424dumpem(void *ptr)
425{
426 if (ptr)
427 printf("inserted new arena at %08x\n", ptr);
Tim Peters1d99af82002-03-30 10:35:09 +0000428 printf("# arenas %u\n", narenas);
Tim Petersd97a1c02002-03-30 06:09:22 +0000429 printf("was mine %lu wasn't mine %lu\n", wasmine, wasntmine);
430}
431#define INCMINE ++wasmine
432#define INCTHEIRS ++wasntmine
433
434#else
435#define dumpem(ptr)
436#define INCMINE
437#define INCTHEIRS
438#endif
439
440/* Allocate a new arena and return its base address. If we run out of
441 * memory, return NULL.
442 */
443static block *
444new_arena(void)
445{
Tim Peters3c83df22002-03-30 07:04:41 +0000446 uint excess; /* number of bytes above pool alignment */
Tim Peters84c1b972002-04-04 04:44:32 +0000447 block *bp = (block *)malloc(ARENA_SIZE);
Tim Petersd97a1c02002-03-30 06:09:22 +0000448 if (bp == NULL)
449 return NULL;
450
Tim Peters3c83df22002-03-30 07:04:41 +0000451 /* arenabase <- first pool-aligned address in the arena
452 nfreepools <- number of whole pools that fit after alignment */
453 arenabase = bp;
454 nfreepools = ARENA_SIZE / POOL_SIZE;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000455 assert(POOL_SIZE * nfreepools == ARENA_SIZE);
Tim Peters3c83df22002-03-30 07:04:41 +0000456 excess = (uint)bp & POOL_SIZE_MASK;
457 if (excess != 0) {
458 --nfreepools;
459 arenabase += POOL_SIZE - excess;
460 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000461
462 /* Make room for a new entry in the arenas vector. */
463 if (arenas == NULL) {
Tim Petersc2ce91a2002-03-30 21:36:04 +0000464 assert(narenas == 0 && maxarenas == 0);
Tim Peters84c1b972002-04-04 04:44:32 +0000465 arenas = (uptr *)malloc(16 * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000466 if (arenas == NULL)
467 goto error;
468 maxarenas = 16;
Tim Petersd97a1c02002-03-30 06:09:22 +0000469 }
470 else if (narenas == maxarenas) {
471 /* Grow arenas. Don't use realloc: if this fails, we
472 * don't want to lose the base addresses we already have.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000473 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000474 * Exceedingly subtle: Someone may be calling the pymalloc
475 * free via PyMem_{DEL, Del, FREE, Free} without holding the
476 *.GIL. Someone else may simultaneously be calling the
477 * pymalloc malloc while holding the GIL via, e.g.,
478 * PyObject_New. Now the pymalloc free may index into arenas
479 * for an address check, while the pymalloc malloc calls
480 * new_arena and we end up here to grow a new arena *and*
481 * grow the arenas vector. If the value for arenas pymalloc
482 * free picks up "vanishes" during this resize, anything may
483 * happen, and it would be an incredibly rare bug. Therefore
484 * the code here takes great pains to make sure that, at every
485 * moment, arenas always points to an intact vector of
486 * addresses. It doesn't matter whether arenas points to a
487 * wholly up-to-date vector when pymalloc free checks it in
488 * this case, because the only legal (and that even this is
489 * legal is debatable) way to call PyMem_{Del, etc} while not
490 * holding the GIL is if the memory being released is not
491 * object memory, i.e. if the address check in pymalloc free
492 * is supposed to fail. Having an incomplete vector can't
493 * make a supposed-to-fail case succeed by mistake (it could
494 * only make a supposed-to-succeed case fail by mistake).
Tim Petersc2ce91a2002-03-30 21:36:04 +0000495 *
496 * In addition, without a lock we can't know for sure when
497 * an old vector is no longer referenced, so we simply let
498 * old vectors leak.
499 *
500 * And on top of that, since narenas and arenas can't be
501 * changed as-a-pair atomically without a lock, we're also
502 * careful to declare them volatile and ensure that we change
503 * arenas first. This prevents another thread from picking
504 * up an narenas value too large for the arenas value it
505 * reads up (arenas never shrinks).
506 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000507 * Read the above 50 times before changing anything in this
508 * block.
509 */
Tim Peters1d99af82002-03-30 10:35:09 +0000510 uptr *p;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000511 uint newmax = maxarenas << 1;
Tim Peters1d99af82002-03-30 10:35:09 +0000512 if (newmax <= maxarenas) /* overflow */
513 goto error;
Tim Peters84c1b972002-04-04 04:44:32 +0000514 p = (uptr *)malloc(newmax * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000515 if (p == NULL)
516 goto error;
517 memcpy(p, arenas, narenas * sizeof(*arenas));
Tim Petersc2ce91a2002-03-30 21:36:04 +0000518 arenas = p; /* old arenas deliberately leaked */
Tim Petersd97a1c02002-03-30 06:09:22 +0000519 maxarenas = newmax;
520 }
521
522 /* Append the new arena address to arenas. */
523 assert(narenas < maxarenas);
524 arenas[narenas] = (uptr)bp;
Tim Peters1d99af82002-03-30 10:35:09 +0000525 ++narenas; /* can't overflow, since narenas < maxarenas before */
Tim Petersd97a1c02002-03-30 06:09:22 +0000526 dumpem(bp);
527 return bp;
528
529error:
Tim Peters84c1b972002-04-04 04:44:32 +0000530 free(bp);
Tim Peters7b85b4a2002-03-30 10:42:09 +0000531 nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000532 return NULL;
533}
534
535/* Return true if and only if P is an address that was allocated by
536 * pymalloc. I must be the index into arenas that the address claims
537 * to come from.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000538 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000539 * Tricky: Letting B be the arena base address in arenas[I], P belongs to the
540 * arena if and only if
Tim Peters3c83df22002-03-30 07:04:41 +0000541 * B <= P < B + ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000542 * Subtracting B throughout, this is true iff
Tim Peters3c83df22002-03-30 07:04:41 +0000543 * 0 <= P-B < ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000544 * By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000545 *
546 * Obscure: A PyMem "free memory" function can call the pymalloc free or
547 * realloc before the first arena has been allocated. arenas is still
548 * NULL in that case. We're relying on that narenas is also 0 in that case,
549 * so the (I) < narenas must be false, saving us from trying to index into
550 * a NULL arenas.
Tim Petersd97a1c02002-03-30 06:09:22 +0000551 */
552#define ADDRESS_IN_RANGE(P, I) \
Tim Peters3c83df22002-03-30 07:04:41 +0000553 ((I) < narenas && (uptr)(P) - arenas[I] < (uptr)ARENA_SIZE)
Tim Peters338e0102002-04-01 19:23:44 +0000554
Neil Schemenauera35c6882001-02-27 04:45:05 +0000555/*==========================================================================*/
556
Tim Peters84c1b972002-04-04 04:44:32 +0000557/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
558 * from all other currently live pointers. This may not be possible.
559 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000560
561/*
562 * The basic blocks are ordered by decreasing execution frequency,
563 * which minimizes the number of jumps in the most common cases,
564 * improves branching prediction and instruction scheduling (small
565 * block allocations typically result in a couple of instructions).
566 * Unless the optimizer reorders everything, being too smart...
567 */
568
569void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000570_PyMalloc_Malloc(size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000571{
572 block *bp;
573 poolp pool;
574 poolp next;
575 uint size;
576
Neil Schemenauera35c6882001-02-27 04:45:05 +0000577 /*
Tim Peters84c1b972002-04-04 04:44:32 +0000578 * This implicitly redirects malloc(0).
Neil Schemenauera35c6882001-02-27 04:45:05 +0000579 */
580 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
581 LOCK();
582 /*
583 * Most frequent paths first
584 */
585 size = (uint )(nbytes - 1) >> ALIGNMENT_SHIFT;
586 pool = usedpools[size + size];
587 if (pool != pool->nextpool) {
588 /*
589 * There is a used pool for this size class.
590 * Pick up the head block of its free list.
591 */
592 ++pool->ref.count;
593 bp = pool->freeblock;
594 if ((pool->freeblock = *(block **)bp) != NULL) {
595 UNLOCK();
596 return (void *)bp;
597 }
598 /*
599 * Reached the end of the free list, try to extend it
600 */
Tim Peterse70ddf32002-04-05 04:32:29 +0000601 if (pool->nextoffset <= pool->maxnextoffset) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000602 /*
603 * There is room for another block
604 */
Tim Peterse70ddf32002-04-05 04:32:29 +0000605 pool->freeblock = (block *)pool +
606 pool->nextoffset;
607 pool->nextoffset += INDEX2SIZE(size);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000608 *(block **)(pool->freeblock) = NULL;
609 UNLOCK();
610 return (void *)bp;
611 }
612 /*
613 * Pool is full, unlink from used pools
614 */
615 next = pool->nextpool;
616 pool = pool->prevpool;
617 next->prevpool = pool;
618 pool->nextpool = next;
619 UNLOCK();
620 return (void *)bp;
621 }
622 /*
623 * Try to get a cached free pool
624 */
625 pool = freepools;
626 if (pool != NULL) {
627 /*
628 * Unlink from cached pools
629 */
630 freepools = pool->nextpool;
631 init_pool:
632 /*
633 * Frontlink to used pools
634 */
635 next = usedpools[size + size]; /* == prev */
636 pool->nextpool = next;
637 pool->prevpool = next;
638 next->nextpool = pool;
639 next->prevpool = pool;
640 pool->ref.count = 1;
641 if (pool->szidx == size) {
642 /*
643 * Luckily, this pool last contained blocks
644 * of the same size class, so its header
645 * and free list are already initialized.
646 */
647 bp = pool->freeblock;
648 pool->freeblock = *(block **)bp;
649 UNLOCK();
650 return (void *)bp;
651 }
652 /*
Tim Peterse70ddf32002-04-05 04:32:29 +0000653 * Initialize the pool header, set up the free list to
654 * contain just the second block, and return the first
655 * block.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000656 */
657 pool->szidx = size;
Tim Peterse70ddf32002-04-05 04:32:29 +0000658 size = INDEX2SIZE(size);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000659 bp = (block *)pool + POOL_OVERHEAD;
Tim Peterse70ddf32002-04-05 04:32:29 +0000660 pool->nextoffset = POOL_OVERHEAD + (size << 1);
661 pool->maxnextoffset = POOL_SIZE - size;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000662 pool->freeblock = bp + size;
663 *(block **)(pool->freeblock) = NULL;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000664 UNLOCK();
665 return (void *)bp;
666 }
667 /*
668 * Allocate new pool
669 */
Tim Peters3c83df22002-03-30 07:04:41 +0000670 if (nfreepools) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000671 commit_pool:
Tim Peters3c83df22002-03-30 07:04:41 +0000672 --nfreepools;
673 pool = (poolp)arenabase;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000674 arenabase += POOL_SIZE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000675 pool->arenaindex = narenas - 1;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000676 pool->szidx = DUMMY_SIZE_IDX;
677 goto init_pool;
678 }
679 /*
680 * Allocate new arena
681 */
682#ifdef WITH_MEMORY_LIMITS
Tim Petersd97a1c02002-03-30 06:09:22 +0000683 if (!(narenas < MAX_ARENAS)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000684 UNLOCK();
685 goto redirect;
686 }
687#endif
Tim Petersd97a1c02002-03-30 06:09:22 +0000688 bp = new_arena();
689 if (bp != NULL)
690 goto commit_pool;
691 UNLOCK();
692 goto redirect;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000693 }
694
695 /* The small block allocator ends here. */
696
Tim Petersd97a1c02002-03-30 06:09:22 +0000697redirect:
Neil Schemenauera35c6882001-02-27 04:45:05 +0000698 /*
699 * Redirect the original request to the underlying (libc) allocator.
700 * We jump here on bigger requests, on error in the code above (as a
701 * last chance to serve the request) or when the max memory limit
702 * has been reached.
703 */
Tim Peters84c1b972002-04-04 04:44:32 +0000704 return (void *)malloc(nbytes ? nbytes : 1);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000705}
706
707/* free */
708
709void
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000710_PyMalloc_Free(void *p)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000711{
712 poolp pool;
Tim Peters2c95c992002-03-31 02:18:01 +0000713 block *lastfree;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000714 poolp next, prev;
715 uint size;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000716
Neil Schemenauera35c6882001-02-27 04:45:05 +0000717 if (p == NULL) /* free(NULL) has no effect */
718 return;
719
Tim Petersd97a1c02002-03-30 06:09:22 +0000720 pool = POOL_ADDR(p);
721 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
722 /* We allocated this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000723 LOCK();
Tim Peters1e16db62002-03-31 01:05:22 +0000724 INCMINE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000725 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000726 * Link p to the start of the pool's freeblock list. Since
727 * the pool had at least the p block outstanding, the pool
728 * wasn't empty (so it's already in a usedpools[] list, or
729 * was full and is in no list -- it's not in the freeblocks
730 * list in any case).
Tim Petersd97a1c02002-03-30 06:09:22 +0000731 */
Tim Peters57b17ad2002-03-31 02:59:48 +0000732 assert(pool->ref.count > 0); /* else it was empty */
Tim Peters2c95c992002-03-31 02:18:01 +0000733 *(block **)p = lastfree = pool->freeblock;
Tim Petersd97a1c02002-03-30 06:09:22 +0000734 pool->freeblock = (block *)p;
Tim Peters2c95c992002-03-31 02:18:01 +0000735 if (lastfree) {
736 /*
737 * freeblock wasn't NULL, so the pool wasn't full,
738 * and the pool is in a usedpools[] list.
739 */
Tim Peters2c95c992002-03-31 02:18:01 +0000740 if (--pool->ref.count != 0) {
741 /* pool isn't empty: leave it in usedpools */
742 UNLOCK();
743 return;
744 }
745 /*
746 * Pool is now empty: unlink from usedpools, and
Tim Petersb1da0502002-03-31 02:51:40 +0000747 * link to the front of freepools. This ensures that
Tim Peters2c95c992002-03-31 02:18:01 +0000748 * previously freed pools will be allocated later
749 * (being not referenced, they are perhaps paged out).
750 */
751 next = pool->nextpool;
752 prev = pool->prevpool;
753 next->prevpool = prev;
754 prev->nextpool = next;
755 /* Link to freepools. This is a singly-linked list,
756 * and pool->prevpool isn't used there.
757 */
758 pool->nextpool = freepools;
759 freepools = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000760 UNLOCK();
761 return;
762 }
763 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000764 * Pool was full, so doesn't currently live in any list:
765 * link it to the front of the appropriate usedpools[] list.
766 * This mimics LRU pool usage for new allocations and
767 * targets optimal filling when several pools contain
768 * blocks of the same size class.
Tim Petersd97a1c02002-03-30 06:09:22 +0000769 */
Tim Peters2c95c992002-03-31 02:18:01 +0000770 --pool->ref.count;
771 assert(pool->ref.count > 0); /* else the pool is empty */
772 size = pool->szidx;
773 next = usedpools[size + size];
774 prev = next->prevpool;
775 /* insert pool before next: prev <-> pool <-> next */
776 pool->nextpool = next;
777 pool->prevpool = prev;
778 next->prevpool = pool;
779 prev->nextpool = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000780 UNLOCK();
Neil Schemenauera35c6882001-02-27 04:45:05 +0000781 return;
782 }
783
Tim Peters2c95c992002-03-31 02:18:01 +0000784 /* We didn't allocate this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000785 INCTHEIRS;
Tim Peters84c1b972002-04-04 04:44:32 +0000786 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000787}
788
Tim Peters84c1b972002-04-04 04:44:32 +0000789/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
790 * then as the Python docs promise, we do not treat this like free(p), and
791 * return a non-NULL result.
792 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000793
794void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000795_PyMalloc_Realloc(void *p, size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000796{
Tim Peters84c1b972002-04-04 04:44:32 +0000797 void *bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000798 poolp pool;
799 uint size;
800
Neil Schemenauera35c6882001-02-27 04:45:05 +0000801 if (p == NULL)
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000802 return _PyMalloc_Malloc(nbytes);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000803
Tim Petersd97a1c02002-03-30 06:09:22 +0000804 pool = POOL_ADDR(p);
805 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000806 /* We're in charge of this block */
Tim Petersd97a1c02002-03-30 06:09:22 +0000807 INCMINE;
Tim Peterse70ddf32002-04-05 04:32:29 +0000808 size = INDEX2SIZE(pool->szidx);
Tim Peters84c1b972002-04-04 04:44:32 +0000809 if (size >= nbytes)
810 /* Don't bother if a smaller size was requested. */
811 return p;
812 /* We need more memory. */
813 assert(nbytes != 0);
Tim Petersb7265db2002-04-04 05:08:31 +0000814 bp = _PyMalloc_Malloc(nbytes);
Tim Peters84c1b972002-04-04 04:44:32 +0000815 if (bp != NULL) {
816 memcpy(bp, p, size);
817 _PyMalloc_Free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000818 }
Tim Peters84c1b972002-04-04 04:44:32 +0000819 return bp;
820 }
821 /* We're not managing this block. */
822 INCTHEIRS;
823 if (nbytes <= SMALL_REQUEST_THRESHOLD) {
824 /* Take over this block. */
825 bp = _PyMalloc_Malloc(nbytes ? nbytes : 1);
826 if (bp != NULL) {
827 memcpy(bp, p, nbytes);
828 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000829 }
Tim Peters84c1b972002-04-04 04:44:32 +0000830 else if (nbytes == 0) {
831 /* Meet the doc's promise that nbytes==0 will
832 * never return a NULL pointer when p isn't NULL.
833 */
834 bp = p;
835 }
836
Neil Schemenauera35c6882001-02-27 04:45:05 +0000837 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000838 else {
Tim Peters84c1b972002-04-04 04:44:32 +0000839 assert(nbytes != 0);
840 bp = realloc(p, nbytes);
Tim Petersd97a1c02002-03-30 06:09:22 +0000841 }
Tim Peters84c1b972002-04-04 04:44:32 +0000842 return bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000843}
844
Tim Peters1221c0a2002-03-23 00:20:15 +0000845#else /* ! WITH_PYMALLOC */
Tim Petersddea2082002-03-23 10:03:50 +0000846
847/*==========================================================================*/
848/* pymalloc not enabled: Redirect the entry points to the PyMem family. */
Tim Peters62c06ba2002-03-23 22:28:18 +0000849
Tim Petersce7fb9b2002-03-23 00:28:57 +0000850void *
851_PyMalloc_Malloc(size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000852{
853 return PyMem_MALLOC(n);
854}
855
Tim Petersce7fb9b2002-03-23 00:28:57 +0000856void *
857_PyMalloc_Realloc(void *p, size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000858{
859 return PyMem_REALLOC(p, n);
860}
861
862void
863_PyMalloc_Free(void *p)
864{
865 PyMem_FREE(p);
866}
867#endif /* WITH_PYMALLOC */
868
Tim Peters62c06ba2002-03-23 22:28:18 +0000869/*==========================================================================*/
870/* Regardless of whether pymalloc is enabled, export entry points for
871 * the object-oriented pymalloc functions.
872 */
873
Tim Petersce7fb9b2002-03-23 00:28:57 +0000874PyObject *
875_PyMalloc_New(PyTypeObject *tp)
Tim Peters1221c0a2002-03-23 00:20:15 +0000876{
877 PyObject *op;
878 op = (PyObject *) _PyMalloc_MALLOC(_PyObject_SIZE(tp));
879 if (op == NULL)
880 return PyErr_NoMemory();
881 return PyObject_INIT(op, tp);
882}
883
884PyVarObject *
885_PyMalloc_NewVar(PyTypeObject *tp, int nitems)
886{
887 PyVarObject *op;
888 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
889 op = (PyVarObject *) _PyMalloc_MALLOC(size);
890 if (op == NULL)
891 return (PyVarObject *)PyErr_NoMemory();
892 return PyObject_INIT_VAR(op, tp, nitems);
893}
894
895void
896_PyMalloc_Del(PyObject *op)
897{
898 _PyMalloc_FREE(op);
899}
Tim Petersddea2082002-03-23 10:03:50 +0000900
901#ifdef PYMALLOC_DEBUG
902/*==========================================================================*/
Tim Peters62c06ba2002-03-23 22:28:18 +0000903/* A x-platform debugging allocator. This doesn't manage memory directly,
904 * it wraps a real allocator, adding extra debugging info to the memory blocks.
905 */
Tim Petersddea2082002-03-23 10:03:50 +0000906
907#define PYMALLOC_CLEANBYTE 0xCB /* uninitialized memory */
908#define PYMALLOC_DEADBYTE 0xDB /* free()ed memory */
909#define PYMALLOC_FORBIDDENBYTE 0xFB /* unusable memory */
910
911static ulong serialno = 0; /* incremented on each debug {m,re}alloc */
912
Tim Peterse0850172002-03-24 00:34:21 +0000913/* serialno is always incremented via calling this routine. The point is
914 to supply a single place to set a breakpoint.
915*/
916static void
Neil Schemenauerbd02b142002-03-28 21:05:38 +0000917bumpserialno(void)
Tim Peterse0850172002-03-24 00:34:21 +0000918{
919 ++serialno;
920}
921
922
Tim Petersddea2082002-03-23 10:03:50 +0000923/* Read 4 bytes at p as a big-endian ulong. */
924static ulong
925read4(const void *p)
926{
Tim Peters62c06ba2002-03-23 22:28:18 +0000927 const uchar *q = (const uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +0000928 return ((ulong)q[0] << 24) |
929 ((ulong)q[1] << 16) |
930 ((ulong)q[2] << 8) |
931 (ulong)q[3];
932}
933
934/* Write the 4 least-significant bytes of n as a big-endian unsigned int,
935 MSB at address p, LSB at p+3. */
936static void
937write4(void *p, ulong n)
938{
Tim Peters62c06ba2002-03-23 22:28:18 +0000939 uchar *q = (uchar *)p;
940 q[0] = (uchar)((n >> 24) & 0xff);
941 q[1] = (uchar)((n >> 16) & 0xff);
942 q[2] = (uchar)((n >> 8) & 0xff);
943 q[3] = (uchar)( n & 0xff);
Tim Petersddea2082002-03-23 10:03:50 +0000944}
945
Tim Petersddea2082002-03-23 10:03:50 +0000946/* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
947 here calling the underlying malloc's result p:
948
949p[0:4]
950 Number of bytes originally asked for. 4-byte unsigned integer,
951 big-endian (easier to read in a memory dump).
Tim Petersd1139e02002-03-28 07:32:11 +0000952p[4:8]
Tim Petersddea2082002-03-23 10:03:50 +0000953 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch under- writes
954 and reads.
955p[8:8+n]
956 The requested memory, filled with copies of PYMALLOC_CLEANBYTE.
957 Used to catch reference to uninitialized memory.
958 &p[8] is returned. Note that this is 8-byte aligned if PyMalloc
959 handled the request itself.
960p[8+n:8+n+4]
961 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch over- writes
962 and reads.
963p[8+n+4:8+n+8]
964 A serial number, incremented by 1 on each call to _PyMalloc_DebugMalloc
965 and _PyMalloc_DebugRealloc.
966 4-byte unsigned integer, big-endian.
967 If "bad memory" is detected later, the serial number gives an
968 excellent way to set a breakpoint on the next run, to capture the
969 instant at which this block was passed out.
970*/
971
972void *
Tim Petersd1139e02002-03-28 07:32:11 +0000973_PyMalloc_DebugMalloc(size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +0000974{
975 uchar *p; /* base address of malloc'ed block */
Tim Peters62c06ba2002-03-23 22:28:18 +0000976 uchar *tail; /* p + 8 + nbytes == pointer to tail pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +0000977 size_t total; /* nbytes + 16 */
978
Tim Peterse0850172002-03-24 00:34:21 +0000979 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +0000980 total = nbytes + 16;
981 if (total < nbytes || (total >> 31) > 1) {
982 /* overflow, or we can't represent it in 4 bytes */
983 /* Obscure: can't do (total >> 32) != 0 instead, because
984 C doesn't define what happens for a right-shift of 32
985 when size_t is a 32-bit type. At least C guarantees
986 size_t is an unsigned type. */
987 return NULL;
988 }
989
Tim Petersd1139e02002-03-28 07:32:11 +0000990 p = _PyMalloc_Malloc(total);
Tim Petersddea2082002-03-23 10:03:50 +0000991 if (p == NULL)
992 return NULL;
993
994 write4(p, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +0000995 p[4] = p[5] = p[6] = p[7] = PYMALLOC_FORBIDDENBYTE;
Tim Petersddea2082002-03-23 10:03:50 +0000996
997 if (nbytes > 0)
998 memset(p+8, PYMALLOC_CLEANBYTE, nbytes);
999
Tim Peters62c06ba2002-03-23 22:28:18 +00001000 tail = p + 8 + nbytes;
1001 tail[0] = tail[1] = tail[2] = tail[3] = PYMALLOC_FORBIDDENBYTE;
1002 write4(tail + 4, serialno);
Tim Petersddea2082002-03-23 10:03:50 +00001003
1004 return p+8;
1005}
1006
Tim Peters62c06ba2002-03-23 22:28:18 +00001007/* The debug free first checks the 8 bytes on each end for sanity (in
1008 particular, that the PYMALLOC_FORBIDDENBYTEs are still intact).
Tim Petersddea2082002-03-23 10:03:50 +00001009 Then fills the original bytes with PYMALLOC_DEADBYTE.
1010 Then calls the underlying free.
1011*/
1012void
Tim Petersd1139e02002-03-28 07:32:11 +00001013_PyMalloc_DebugFree(void *p)
Tim Petersddea2082002-03-23 10:03:50 +00001014{
Tim Peters62c06ba2002-03-23 22:28:18 +00001015 uchar *q = (uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +00001016 size_t nbytes;
1017
Tim Petersddea2082002-03-23 10:03:50 +00001018 if (p == NULL)
1019 return;
Tim Petersddea2082002-03-23 10:03:50 +00001020 _PyMalloc_DebugCheckAddress(p);
1021 nbytes = read4(q-8);
1022 if (nbytes > 0)
1023 memset(q, PYMALLOC_DEADBYTE, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001024 _PyMalloc_Free(q-8);
Tim Petersddea2082002-03-23 10:03:50 +00001025}
1026
1027void *
Tim Petersd1139e02002-03-28 07:32:11 +00001028_PyMalloc_DebugRealloc(void *p, size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +00001029{
1030 uchar *q = (uchar *)p;
1031 size_t original_nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001032 void *fresh; /* new memory block, if needed */
Tim Petersddea2082002-03-23 10:03:50 +00001033
Tim Petersddea2082002-03-23 10:03:50 +00001034 if (p == NULL)
Tim Petersd1139e02002-03-28 07:32:11 +00001035 return _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001036
Tim Petersddea2082002-03-23 10:03:50 +00001037 _PyMalloc_DebugCheckAddress(p);
Tim Petersddea2082002-03-23 10:03:50 +00001038 original_nbytes = read4(q-8);
1039 if (nbytes == original_nbytes) {
1040 /* note that this case is likely to be common due to the
1041 way Python appends to lists */
Tim Peterse0850172002-03-24 00:34:21 +00001042 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001043 write4(q + nbytes + 4, serialno);
1044 return p;
1045 }
1046
1047 if (nbytes < original_nbytes) {
1048 /* shrinking -- leave the guts alone, except to
1049 fill the excess with DEADBYTE */
1050 const size_t excess = original_nbytes - nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001051 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001052 write4(q-8, nbytes);
1053 /* kill the excess bytes plus the trailing 8 pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +00001054 q += nbytes;
1055 q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
1056 write4(q+4, serialno);
Tim Petersd1139e02002-03-28 07:32:11 +00001057 memset(q+8, PYMALLOC_DEADBYTE, excess);
Tim Petersddea2082002-03-23 10:03:50 +00001058 return p;
1059 }
1060
1061 /* More memory is needed: get it, copy over the first original_nbytes
1062 of the original data, and free the original memory. */
Tim Petersd1139e02002-03-28 07:32:11 +00001063 fresh = _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001064 if (fresh != NULL && original_nbytes > 0)
1065 memcpy(fresh, p, original_nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001066 _PyMalloc_DebugFree(p);
Tim Petersddea2082002-03-23 10:03:50 +00001067 return fresh;
1068}
1069
Tim Peters7ccfadf2002-04-01 06:04:21 +00001070/* Check the forbidden bytes on both ends of the memory allocated for p.
1071 * If anything is wrong, print info to stderr via _PyMalloc_DebugDumpAddress,
1072 * and call Py_FatalError to kill the program.
1073 */
1074 void
Tim Petersddea2082002-03-23 10:03:50 +00001075_PyMalloc_DebugCheckAddress(const void *p)
1076{
1077 const uchar *q = (const uchar *)p;
Tim Petersd1139e02002-03-28 07:32:11 +00001078 char *msg;
1079 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001080
Tim Petersd1139e02002-03-28 07:32:11 +00001081 if (p == NULL) {
Tim Petersddea2082002-03-23 10:03:50 +00001082 msg = "didn't expect a NULL pointer";
Tim Petersd1139e02002-03-28 07:32:11 +00001083 goto error;
1084 }
Tim Petersddea2082002-03-23 10:03:50 +00001085
Tim Petersd1139e02002-03-28 07:32:11 +00001086 for (i = 4; i >= 1; --i) {
1087 if (*(q-i) != PYMALLOC_FORBIDDENBYTE) {
1088 msg = "bad leading pad byte";
1089 goto error;
1090 }
1091 }
Tim Petersddea2082002-03-23 10:03:50 +00001092
Tim Petersd1139e02002-03-28 07:32:11 +00001093 {
Tim Petersddea2082002-03-23 10:03:50 +00001094 const ulong nbytes = read4(q-8);
1095 const uchar *tail = q + nbytes;
Tim Petersddea2082002-03-23 10:03:50 +00001096 for (i = 0; i < 4; ++i) {
1097 if (tail[i] != PYMALLOC_FORBIDDENBYTE) {
1098 msg = "bad trailing pad byte";
Tim Petersd1139e02002-03-28 07:32:11 +00001099 goto error;
Tim Petersddea2082002-03-23 10:03:50 +00001100 }
1101 }
1102 }
1103
Tim Petersd1139e02002-03-28 07:32:11 +00001104 return;
1105
1106error:
1107 _PyMalloc_DebugDumpAddress(p);
1108 Py_FatalError(msg);
Tim Petersddea2082002-03-23 10:03:50 +00001109}
1110
Tim Peters7ccfadf2002-04-01 06:04:21 +00001111/* Display info to stderr about the memory block at p. */
Tim Petersddea2082002-03-23 10:03:50 +00001112void
1113_PyMalloc_DebugDumpAddress(const void *p)
1114{
1115 const uchar *q = (const uchar *)p;
1116 const uchar *tail;
1117 ulong nbytes, serial;
Tim Petersd1139e02002-03-28 07:32:11 +00001118 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001119
1120 fprintf(stderr, "Debug memory block at address p=%p:\n", p);
1121 if (p == NULL)
1122 return;
1123
1124 nbytes = read4(q-8);
1125 fprintf(stderr, " %lu bytes originally allocated\n", nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001126
1127 /* In case this is nuts, check the pad bytes before trying to read up
1128 the serial number (the address deref could blow up). */
1129
Tim Petersd1139e02002-03-28 07:32:11 +00001130 fputs(" the 4 pad bytes at p-4 are ", stderr);
1131 if (*(q-4) == PYMALLOC_FORBIDDENBYTE &&
1132 *(q-3) == PYMALLOC_FORBIDDENBYTE &&
Tim Petersddea2082002-03-23 10:03:50 +00001133 *(q-2) == PYMALLOC_FORBIDDENBYTE &&
1134 *(q-1) == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001135 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001136 }
1137 else {
Tim Petersddea2082002-03-23 10:03:50 +00001138 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1139 PYMALLOC_FORBIDDENBYTE);
Tim Petersd1139e02002-03-28 07:32:11 +00001140 for (i = 4; i >= 1; --i) {
Tim Petersddea2082002-03-23 10:03:50 +00001141 const uchar byte = *(q-i);
1142 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
1143 if (byte != PYMALLOC_FORBIDDENBYTE)
1144 fputs(" *** OUCH", stderr);
1145 fputc('\n', stderr);
1146 }
1147 }
1148
1149 tail = q + nbytes;
1150 fprintf(stderr, " the 4 pad bytes at tail=%p are ", tail);
1151 if (tail[0] == PYMALLOC_FORBIDDENBYTE &&
1152 tail[1] == PYMALLOC_FORBIDDENBYTE &&
1153 tail[2] == PYMALLOC_FORBIDDENBYTE &&
1154 tail[3] == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001155 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001156 }
1157 else {
Tim Petersddea2082002-03-23 10:03:50 +00001158 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1159 PYMALLOC_FORBIDDENBYTE);
1160 for (i = 0; i < 4; ++i) {
1161 const uchar byte = tail[i];
1162 fprintf(stderr, " at tail+%d: 0x%02x",
1163 i, byte);
1164 if (byte != PYMALLOC_FORBIDDENBYTE)
1165 fputs(" *** OUCH", stderr);
1166 fputc('\n', stderr);
1167 }
1168 }
1169
1170 serial = read4(tail+4);
1171 fprintf(stderr, " the block was made by call #%lu to "
1172 "debug malloc/realloc\n", serial);
1173
1174 if (nbytes > 0) {
1175 int i = 0;
Tim Peters62c06ba2002-03-23 22:28:18 +00001176 fputs(" data at p:", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001177 /* print up to 8 bytes at the start */
1178 while (q < tail && i < 8) {
1179 fprintf(stderr, " %02x", *q);
1180 ++i;
1181 ++q;
1182 }
1183 /* and up to 8 at the end */
1184 if (q < tail) {
1185 if (tail - q > 8) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001186 fputs(" ...", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001187 q = tail - 8;
1188 }
1189 while (q < tail) {
1190 fprintf(stderr, " %02x", *q);
1191 ++q;
1192 }
1193 }
Tim Peters62c06ba2002-03-23 22:28:18 +00001194 fputc('\n', stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001195 }
1196}
1197
Tim Peters7ccfadf2002-04-01 06:04:21 +00001198/* Print summary info to stderr about the state of pymalloc's structures. */
1199void
1200_PyMalloc_DebugDumpStats(void)
1201{
1202 uint i;
1203 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
1204 uint numfreepools = 0;
1205 /* # of pools per class index */
1206 ulong numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1207 /* # of allocated blocks per class index */
1208 ulong numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1209 /* # of free blocks per class index */
1210 ulong numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1211 ulong grandtotal; /* total # of allocated bytes */
Tim Peters6169f092002-04-01 20:12:59 +00001212 ulong freegrandtotal; /* total # of available bytes in used pools */
Tim Peters7ccfadf2002-04-01 06:04:21 +00001213
1214 fprintf(stderr, "%u arenas * %d bytes/arena = %lu total bytes.\n",
1215 narenas, ARENA_SIZE, narenas * (ulong)ARENA_SIZE);
1216 fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
1217 SMALL_REQUEST_THRESHOLD, numclasses);
1218 fprintf(stderr, "pymalloc malloc+realloc called %lu times.\n",
1219 serialno);
1220
1221 for (i = 0; i < numclasses; ++i)
1222 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
1223
Tim Peters6169f092002-04-01 20:12:59 +00001224 /* Because full pools aren't linked to from anything, it's easiest
1225 * to march over all the arenas. If we're lucky, most of the memory
1226 * will be living in full pools -- would be a shame to miss them.
Tim Peters7ccfadf2002-04-01 06:04:21 +00001227 */
1228 for (i = 0; i < narenas; ++i) {
1229 uint poolsinarena;
1230 uint j;
1231 uptr base = arenas[i];
1232
1233 /* round up to pool alignment */
1234 poolsinarena = ARENA_SIZE / POOL_SIZE;
1235 if (base & (uptr)POOL_SIZE_MASK) {
1236 --poolsinarena;
1237 base &= ~(uptr)POOL_SIZE_MASK;
1238 base += POOL_SIZE;
1239 }
1240
1241 if (i == narenas - 1) {
1242 /* current arena may have raw memory at the end */
1243 numfreepools += nfreepools;
1244 poolsinarena -= nfreepools;
1245 }
1246
1247 /* visit every pool in the arena */
1248 for (j = 0; j < poolsinarena; ++j, base += POOL_SIZE) {
1249 poolp p = (poolp)base;
1250 if (p->ref.count == 0) {
1251 /* currently unused */
1252 ++numfreepools;
1253 continue;
1254 }
1255 ++numpools[p->szidx];
1256 numblocks[p->szidx] += p->ref.count;
Tim Peterse70ddf32002-04-05 04:32:29 +00001257 numfreeblocks[p->szidx] += NUMBLOCKS(p) - p->ref.count;
Tim Peters7ccfadf2002-04-01 06:04:21 +00001258 }
1259 }
1260
1261 fputc('\n', stderr);
1262 fprintf(stderr, "Number of unused pools: %u\n", numfreepools);
1263 fputc('\n', stderr);
1264 fputs("class num bytes num pools blocks in use avail blocks\n"
1265 "----- --------- --------- ------------- ------------\n",
1266 stderr);
1267
1268 grandtotal = freegrandtotal = 0;
1269 for (i = 0; i < numclasses; ++i) {
1270 ulong p = numpools[i];
1271 ulong b = numblocks[i];
1272 ulong f = numfreeblocks[i];
Tim Peterse70ddf32002-04-05 04:32:29 +00001273 uint size = INDEX2SIZE(i);
Tim Peters7ccfadf2002-04-01 06:04:21 +00001274 if (p == 0) {
1275 assert(b == 0 && f == 0);
1276 continue;
1277 }
1278 fprintf(stderr, "%5u %11u %11lu %15lu %13lu\n",
1279 i, size, p, b, f);
1280 grandtotal += b * size;
1281 freegrandtotal += f * size;
1282 }
1283 fputc('\n', stderr);
1284 fprintf(stderr, "Total bytes in allocated blocks: %lu\n",
1285 grandtotal);
1286 fprintf(stderr, "Total free bytes in used pools: %lu\n",
1287 freegrandtotal);
1288}
1289
Tim Petersddea2082002-03-23 10:03:50 +00001290#endif /* PYMALLOC_DEBUG */