blob: ec9141cedcdb87ff8020e07869c80b30cba6d0d0 [file] [log] [blame]
Tim Peters1221c0a2002-03-23 00:20:15 +00001#include "Python.h"
2
3#ifdef WITH_PYMALLOC
4
Neil Schemenauera35c6882001-02-27 04:45:05 +00005/* An object allocator for Python.
6
7 Here is an introduction to the layers of the Python memory architecture,
8 showing where the object allocator is actually used (layer +2), It is
9 called for every object allocation and deallocation (PyObject_New/Del),
10 unless the object-specific allocators implement a proprietary allocation
11 scheme (ex.: ints use a simple free list). This is also the place where
12 the cyclic garbage collector operates selectively on container objects.
13
14
15 Object-specific allocators
16 _____ ______ ______ ________
17 [ int ] [ dict ] [ list ] ... [ string ] Python core |
18+3 | <----- Object-specific memory -----> | <-- Non-object memory --> |
19 _______________________________ | |
20 [ Python's object allocator ] | |
21+2 | ####### Object memory ####### | <------ Internal buffers ------> |
22 ______________________________________________________________ |
23 [ Python's raw memory allocator (PyMem_ API) ] |
24+1 | <----- Python memory (under PyMem manager's control) ------> | |
25 __________________________________________________________________
26 [ Underlying general-purpose allocator (ex: C library malloc) ]
27 0 | <------ Virtual memory allocated for the python process -------> |
28
29 =========================================================================
30 _______________________________________________________________________
31 [ OS-specific Virtual Memory Manager (VMM) ]
32-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> |
33 __________________________________ __________________________________
34 [ ] [ ]
35-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> |
36
37*/
38/*==========================================================================*/
39
40/* A fast, special-purpose memory allocator for small blocks, to be used
41 on top of a general-purpose malloc -- heavily based on previous art. */
42
43/* Vladimir Marangozov -- August 2000 */
44
45/*
46 * "Memory management is where the rubber meets the road -- if we do the wrong
47 * thing at any level, the results will not be good. And if we don't make the
48 * levels work well together, we are in serious trouble." (1)
49 *
50 * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles,
51 * "Dynamic Storage Allocation: A Survey and Critical Review",
52 * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995.
53 */
54
55/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */
Neil Schemenauera35c6882001-02-27 04:45:05 +000056
57/*==========================================================================*/
58
59/*
Neil Schemenauera35c6882001-02-27 04:45:05 +000060 * Allocation strategy abstract:
61 *
62 * For small requests, the allocator sub-allocates <Big> blocks of memory.
63 * Requests greater than 256 bytes are routed to the system's allocator.
Tim Petersce7fb9b2002-03-23 00:28:57 +000064 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000065 * Small requests are grouped in size classes spaced 8 bytes apart, due
66 * to the required valid alignment of the returned address. Requests of
67 * a particular size are serviced from memory pools of 4K (one VMM page).
68 * Pools are fragmented on demand and contain free lists of blocks of one
69 * particular size class. In other words, there is a fixed-size allocator
70 * for each size class. Free pools are shared by the different allocators
71 * thus minimizing the space reserved for a particular size class.
72 *
73 * This allocation strategy is a variant of what is known as "simple
74 * segregated storage based on array of free lists". The main drawback of
75 * simple segregated storage is that we might end up with lot of reserved
76 * memory for the different free lists, which degenerate in time. To avoid
77 * this, we partition each free list in pools and we share dynamically the
78 * reserved space between all free lists. This technique is quite efficient
79 * for memory intensive programs which allocate mainly small-sized blocks.
80 *
81 * For small requests we have the following table:
82 *
83 * Request in bytes Size of allocated block Size class idx
84 * ----------------------------------------------------------------
85 * 1-8 8 0
86 * 9-16 16 1
87 * 17-24 24 2
88 * 25-32 32 3
89 * 33-40 40 4
90 * 41-48 48 5
91 * 49-56 56 6
92 * 57-64 64 7
93 * 65-72 72 8
94 * ... ... ...
95 * 241-248 248 30
96 * 249-256 256 31
Tim Petersce7fb9b2002-03-23 00:28:57 +000097 *
Neil Schemenauera35c6882001-02-27 04:45:05 +000098 * 0, 257 and up: routed to the underlying allocator.
99 */
100
101/*==========================================================================*/
102
103/*
104 * -- Main tunable settings section --
105 */
106
107/*
108 * Alignment of addresses returned to the user. 8-bytes alignment works
109 * on most current architectures (with 32-bit or 64-bit address busses).
110 * The alignment value is also used for grouping small requests in size
111 * classes spaced ALIGNMENT bytes apart.
112 *
113 * You shouldn't change this unless you know what you are doing.
114 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000115#define ALIGNMENT 8 /* must be 2^N */
116#define ALIGNMENT_SHIFT 3
117#define ALIGNMENT_MASK (ALIGNMENT - 1)
118
119/*
120 * Max size threshold below which malloc requests are considered to be
121 * small enough in order to use preallocated memory pools. You can tune
122 * this value according to your application behaviour and memory needs.
123 *
124 * The following invariants must hold:
125 * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
Tim Petersd97a1c02002-03-30 06:09:22 +0000126 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
Neil Schemenauera35c6882001-02-27 04:45:05 +0000127 *
128 * Although not required, for better performance and space efficiency,
129 * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
130 */
Tim Petersd97a1c02002-03-30 06:09:22 +0000131#define SMALL_REQUEST_THRESHOLD 256
Neil Schemenauera35c6882001-02-27 04:45:05 +0000132#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
133
134/*
135 * The system's VMM page size can be obtained on most unices with a
136 * getpagesize() call or deduced from various header files. To make
137 * things simpler, we assume that it is 4K, which is OK for most systems.
138 * It is probably better if this is the native page size, but it doesn't
139 * have to be.
140 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000141#define SYSTEM_PAGE_SIZE (4 * 1024)
142#define SYSTEM_PAGE_SIZE_MASK (SYSTEM_PAGE_SIZE - 1)
143
144/*
145 * Maximum amount of memory managed by the allocator for small requests.
146 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000147#ifdef WITH_MEMORY_LIMITS
148#ifndef SMALL_MEMORY_LIMIT
149#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */
150#endif
151#endif
152
153/*
154 * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
155 * on a page boundary. This is a reserved virtual address space for the
156 * current process (obtained through a malloc call). In no way this means
157 * that the memory arenas will be used entirely. A malloc(<Big>) is usually
158 * an address range reservation for <Big> bytes, unless all pages within this
159 * space are referenced subsequently. So malloc'ing big blocks and not using
160 * them does not mean "wasting memory". It's an addressable range wastage...
161 *
162 * Therefore, allocating arenas with malloc is not optimal, because there is
163 * some address space wastage, but this is the most portable way to request
Tim Petersd97a1c02002-03-30 06:09:22 +0000164 * memory from the system across various platforms.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000165 */
Tim Peters3c83df22002-03-30 07:04:41 +0000166#define ARENA_SIZE (256 << 10) /* 256KB */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000167
168#ifdef WITH_MEMORY_LIMITS
169#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
170#endif
171
172/*
173 * Size of the pools used for small blocks. Should be a power of 2,
Tim Petersc2ce91a2002-03-30 21:36:04 +0000174 * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
Neil Schemenauera35c6882001-02-27 04:45:05 +0000175 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000176#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
177#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
Neil Schemenauera35c6882001-02-27 04:45:05 +0000178
179/*
180 * -- End of tunable settings section --
181 */
182
183/*==========================================================================*/
184
185/*
186 * Locking
187 *
188 * To reduce lock contention, it would probably be better to refine the
189 * crude function locking with per size class locking. I'm not positive
190 * however, whether it's worth switching to such locking policy because
191 * of the performance penalty it might introduce.
192 *
193 * The following macros describe the simplest (should also be the fastest)
194 * lock object on a particular platform and the init/fini/lock/unlock
195 * operations on it. The locks defined here are not expected to be recursive
196 * because it is assumed that they will always be called in the order:
197 * INIT, [LOCK, UNLOCK]*, FINI.
198 */
199
200/*
201 * Python's threads are serialized, so object malloc locking is disabled.
202 */
203#define SIMPLELOCK_DECL(lock) /* simple lock declaration */
204#define SIMPLELOCK_INIT(lock) /* allocate (if needed) and initialize */
205#define SIMPLELOCK_FINI(lock) /* free/destroy an existing lock */
206#define SIMPLELOCK_LOCK(lock) /* acquire released lock */
207#define SIMPLELOCK_UNLOCK(lock) /* release acquired lock */
208
209/*
210 * Basic types
211 * I don't care if these are defined in <sys/types.h> or elsewhere. Axiom.
212 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000213#undef uchar
214#define uchar unsigned char /* assuming == 8 bits */
215
Neil Schemenauera35c6882001-02-27 04:45:05 +0000216#undef uint
217#define uint unsigned int /* assuming >= 16 bits */
218
219#undef ulong
220#define ulong unsigned long /* assuming >= 32 bits */
221
Tim Petersd97a1c02002-03-30 06:09:22 +0000222#undef uptr
223#define uptr Py_uintptr_t
224
Neil Schemenauera35c6882001-02-27 04:45:05 +0000225/* When you say memory, my mind reasons in terms of (pointers to) blocks */
226typedef uchar block;
227
228/* Pool for small blocks */
229struct pool_header {
Tim Petersb2336522001-03-11 18:36:13 +0000230 union { block *_padding;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000231 uint count; } ref; /* number of allocated blocks */
232 block *freeblock; /* pool's free list head */
233 struct pool_header *nextpool; /* next pool of this size class */
234 struct pool_header *prevpool; /* previous pool "" */
Tim Peters1d99af82002-03-30 10:35:09 +0000235 uint arenaindex; /* index into arenas of base adr */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000236 uint szidx; /* block size class index */
237 uint capacity; /* pool capacity in # of blocks */
238};
239
240typedef struct pool_header *poolp;
241
242#undef ROUNDUP
243#define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
244#define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
245
246#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
247
Tim Petersd97a1c02002-03-30 06:09:22 +0000248/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
249#define POOL_ADDR(P) \
250 ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
251
Neil Schemenauera35c6882001-02-27 04:45:05 +0000252/*==========================================================================*/
253
254/*
255 * This malloc lock
256 */
Tim Petersb2336522001-03-11 18:36:13 +0000257SIMPLELOCK_DECL(_malloc_lock);
258#define LOCK() SIMPLELOCK_LOCK(_malloc_lock)
259#define UNLOCK() SIMPLELOCK_UNLOCK(_malloc_lock)
260#define LOCK_INIT() SIMPLELOCK_INIT(_malloc_lock)
261#define LOCK_FINI() SIMPLELOCK_FINI(_malloc_lock)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000262
263/*
Tim Peters1e16db62002-03-31 01:05:22 +0000264 * Pool table -- headed, circular, doubly-linked lists of partially used pools.
265
266This is involved. For an index i, usedpools[i+i] is the header for a list of
267all partially used pools holding small blocks with "size class idx" i. So
268usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size
26916, and so on: index 2*i <-> blocks of size (i+1)<<ALIGNMENT_SHIFT.
270
Tim Peters338e0102002-04-01 19:23:44 +0000271Pools are carved off the current arena highwater mark (file static arenabase)
272as needed. Once carved off, a pool is in one of three states forever after:
Tim Peters1e16db62002-03-31 01:05:22 +0000273
Tim Peters338e0102002-04-01 19:23:44 +0000274used == partially used, neither empty nor full
275 At least one block in the pool is currently allocated, and at least one
276 block in the pool is not currently allocated (note this implies a pool
277 has room for at least two blocks).
278 This is a pool's initial state, as a pool is created only when malloc
279 needs space.
280 The pool holds blocks of a fixed size, and is in the circular list headed
281 at usedpools[i] (see above). It's linked to the other used pools of the
282 same size class via the pool_header's nextpool and prevpool members.
283 If all but one block is currently allocated, a malloc can cause a
284 transition to the full state. If all but one block is not currently
285 allocated, a free can cause a transition to the empty state.
Tim Peters1e16db62002-03-31 01:05:22 +0000286
Tim Peters338e0102002-04-01 19:23:44 +0000287full == all the pool's blocks are currently allocated
288 On transition to full, a pool is unlinked from its usedpools[] list.
289 It's not linked to from anything then anymore, and its nextpool and
290 prevpool members are meaningless until it transitions back to used.
291 A free of a block in a full pool puts the pool back in the used state.
292 Then it's linked in at the front of the appropriate usedpools[] list, so
293 that the next allocation for its size class will reuse the freed block.
294
295empty == all the pool's blocks are currently available for allocation
296 On transition to empty, a pool is unlinked from its usedpools[] list,
297 and linked to the front of the (file static) singly-linked freepools list,
298 via its nextpool member. The prevpool member has no meaning in this case.
299 Empty pools have no inherent size class: the next time a malloc finds
300 an empty list in usedpools[], it takes the first pool off of freepools.
301 If the size class needed happens to be the same as the size class the pool
302 last had, some expensive initialization can be skipped (including an
303 integer division -- XXX since the value
304
305 pool->capacity = (POOL_SIZE - POOL_OVERHEAD) / size;
306
307 is invariant across all pools of a given size class, it may make more
308 sense to compute those at compile-time into a const vector indexed by
309 size class, and lose the pool->capacity member and the runtime divisions).
310
311
312Block Management
313
314Blocks within pools are again carved out as needed. pool->freeblock points to
315the start of a singly-linked list of free blocks within the pool. When a
316block is freed, it's inserted at the front of its pool's freeblock list. Note
317that the available blocks in a pool are *not* linked all together when a pool
318is initialized. Instead only "the first" (lowest address) block is set up,
319setting pool->freeblock to NULL. This is consistent with that pymalloc
320strives at all levels (arena, pool, and block) never to touch a piece of
321memory until it's actually needed. So long as a pool is in the used state,
322we're certain there *is* a block available for allocating. If pool->freeblock
323is NULL then, that means we simply haven't yet gotten to one of the higher-
324address blocks. The address of "the next" available block can be computed
325then from pool->ref.count (the number of currently allocated blocks). This
326computation can be expensive, because it requires an integer multiply.
327However, so long as the pool's size class doesn't change, it's a one-time cost
328for that block; the computation could be made cheaper via adding a highwater
329pointer to the pool_header, but the tradeoff is murky.
330
Tim Peters1e16db62002-03-31 01:05:22 +0000331
332Major obscurity: While the usedpools vector is declared to have poolp
333entries, it doesn't really. It really contains two pointers per (conceptual)
334poolp entry, the nextpool and prevpool members of a pool_header. The
335excruciating initialization code below fools C so that
336
337 usedpool[i+i]
338
339"acts like" a genuine poolp, but only so long as you only reference its
340nextpool and prevpool members. The "- 2*sizeof(block *)" gibberish is
341compensating for that a pool_header's nextpool and prevpool members
342immediately follow a pool_header's first two members:
343
344 union { block *_padding;
345 uint count; } ref;
346 block *freeblock;
347
348each of which consume sizeof(block *) bytes. So what usedpools[i+i] really
349contains is a fudged-up pointer p such that *if* C believes it's a poolp
350pointer, then p->nextpool and p->prevpool are both p (meaning that the headed
351circular list is empty).
352
353It's unclear why the usedpools setup is so convoluted. It could be to
354minimize the amount of cache required to hold this heavily-referenced table
355(which only *needs* the two interpool pointer members of a pool_header). OTOH,
356referencing code has to remember to "double the index" and doing so isn't
357free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying
358on that C doesn't insert any padding anywhere in a pool_header at or before
359the prevpool member.
360**************************************************************************** */
361
Neil Schemenauera35c6882001-02-27 04:45:05 +0000362#define PTA(x) ((poolp )((uchar *)&(usedpools[2*(x)]) - 2*sizeof(block *)))
363#define PT(x) PTA(x), PTA(x)
364
365static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
366 PT(0), PT(1), PT(2), PT(3), PT(4), PT(5), PT(6), PT(7)
367#if NB_SMALL_SIZE_CLASSES > 8
368 , PT(8), PT(9), PT(10), PT(11), PT(12), PT(13), PT(14), PT(15)
369#if NB_SMALL_SIZE_CLASSES > 16
370 , PT(16), PT(17), PT(18), PT(19), PT(20), PT(21), PT(22), PT(23)
371#if NB_SMALL_SIZE_CLASSES > 24
372 , PT(24), PT(25), PT(26), PT(27), PT(28), PT(29), PT(30), PT(31)
373#if NB_SMALL_SIZE_CLASSES > 32
374 , PT(32), PT(33), PT(34), PT(35), PT(36), PT(37), PT(38), PT(39)
375#if NB_SMALL_SIZE_CLASSES > 40
376 , PT(40), PT(41), PT(42), PT(43), PT(44), PT(45), PT(46), PT(47)
377#if NB_SMALL_SIZE_CLASSES > 48
378 , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
379#if NB_SMALL_SIZE_CLASSES > 56
380 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
381#endif /* NB_SMALL_SIZE_CLASSES > 56 */
382#endif /* NB_SMALL_SIZE_CLASSES > 48 */
383#endif /* NB_SMALL_SIZE_CLASSES > 40 */
384#endif /* NB_SMALL_SIZE_CLASSES > 32 */
385#endif /* NB_SMALL_SIZE_CLASSES > 24 */
386#endif /* NB_SMALL_SIZE_CLASSES > 16 */
387#endif /* NB_SMALL_SIZE_CLASSES > 8 */
388};
389
390/*
391 * Free (cached) pools
392 */
393static poolp freepools = NULL; /* free list for cached pools */
394
Tim Petersd97a1c02002-03-30 06:09:22 +0000395/*==========================================================================*/
396/* Arena management. */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000397
Tim Petersd97a1c02002-03-30 06:09:22 +0000398/* arenas is a vector of arena base addresses, in order of allocation time.
399 * arenas currently contains narenas entries, and has space allocated
400 * for at most maxarenas entries.
401 *
402 * CAUTION: See the long comment block about thread safety in new_arena():
403 * the code currently relies in deep ways on that this vector only grows,
404 * and only grows by appending at the end. For now we never return an arena
405 * to the OS.
406 */
Tim Petersc2ce91a2002-03-30 21:36:04 +0000407static uptr *volatile arenas = NULL; /* the pointer itself is volatile */
408static volatile uint narenas = 0;
Tim Peters1d99af82002-03-30 10:35:09 +0000409static uint maxarenas = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000410
Tim Peters3c83df22002-03-30 07:04:41 +0000411/* Number of pools still available to be allocated in the current arena. */
412static uint nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000413
Tim Peters3c83df22002-03-30 07:04:41 +0000414/* Free space start address in current arena. This is pool-aligned. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000415static block *arenabase = NULL;
416
417#if 0
418static ulong wasmine = 0;
419static ulong wasntmine = 0;
420
421static void
422dumpem(void *ptr)
423{
424 if (ptr)
425 printf("inserted new arena at %08x\n", ptr);
Tim Peters1d99af82002-03-30 10:35:09 +0000426 printf("# arenas %u\n", narenas);
Tim Petersd97a1c02002-03-30 06:09:22 +0000427 printf("was mine %lu wasn't mine %lu\n", wasmine, wasntmine);
428}
429#define INCMINE ++wasmine
430#define INCTHEIRS ++wasntmine
431
432#else
433#define dumpem(ptr)
434#define INCMINE
435#define INCTHEIRS
436#endif
437
438/* Allocate a new arena and return its base address. If we run out of
439 * memory, return NULL.
440 */
441static block *
442new_arena(void)
443{
Tim Peters3c83df22002-03-30 07:04:41 +0000444 uint excess; /* number of bytes above pool alignment */
Tim Peters84c1b972002-04-04 04:44:32 +0000445 block *bp = (block *)malloc(ARENA_SIZE);
Tim Petersd97a1c02002-03-30 06:09:22 +0000446 if (bp == NULL)
447 return NULL;
448
Tim Peters3c83df22002-03-30 07:04:41 +0000449 /* arenabase <- first pool-aligned address in the arena
450 nfreepools <- number of whole pools that fit after alignment */
451 arenabase = bp;
452 nfreepools = ARENA_SIZE / POOL_SIZE;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000453 assert(POOL_SIZE * nfreepools == ARENA_SIZE);
Tim Peters3c83df22002-03-30 07:04:41 +0000454 excess = (uint)bp & POOL_SIZE_MASK;
455 if (excess != 0) {
456 --nfreepools;
457 arenabase += POOL_SIZE - excess;
458 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000459
460 /* Make room for a new entry in the arenas vector. */
461 if (arenas == NULL) {
Tim Petersc2ce91a2002-03-30 21:36:04 +0000462 assert(narenas == 0 && maxarenas == 0);
Tim Peters84c1b972002-04-04 04:44:32 +0000463 arenas = (uptr *)malloc(16 * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000464 if (arenas == NULL)
465 goto error;
466 maxarenas = 16;
Tim Petersd97a1c02002-03-30 06:09:22 +0000467 }
468 else if (narenas == maxarenas) {
469 /* Grow arenas. Don't use realloc: if this fails, we
470 * don't want to lose the base addresses we already have.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000471 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000472 * Exceedingly subtle: Someone may be calling the pymalloc
473 * free via PyMem_{DEL, Del, FREE, Free} without holding the
474 *.GIL. Someone else may simultaneously be calling the
475 * pymalloc malloc while holding the GIL via, e.g.,
476 * PyObject_New. Now the pymalloc free may index into arenas
477 * for an address check, while the pymalloc malloc calls
478 * new_arena and we end up here to grow a new arena *and*
479 * grow the arenas vector. If the value for arenas pymalloc
480 * free picks up "vanishes" during this resize, anything may
481 * happen, and it would be an incredibly rare bug. Therefore
482 * the code here takes great pains to make sure that, at every
483 * moment, arenas always points to an intact vector of
484 * addresses. It doesn't matter whether arenas points to a
485 * wholly up-to-date vector when pymalloc free checks it in
486 * this case, because the only legal (and that even this is
487 * legal is debatable) way to call PyMem_{Del, etc} while not
488 * holding the GIL is if the memory being released is not
489 * object memory, i.e. if the address check in pymalloc free
490 * is supposed to fail. Having an incomplete vector can't
491 * make a supposed-to-fail case succeed by mistake (it could
492 * only make a supposed-to-succeed case fail by mistake).
Tim Petersc2ce91a2002-03-30 21:36:04 +0000493 *
494 * In addition, without a lock we can't know for sure when
495 * an old vector is no longer referenced, so we simply let
496 * old vectors leak.
497 *
498 * And on top of that, since narenas and arenas can't be
499 * changed as-a-pair atomically without a lock, we're also
500 * careful to declare them volatile and ensure that we change
501 * arenas first. This prevents another thread from picking
502 * up an narenas value too large for the arenas value it
503 * reads up (arenas never shrinks).
504 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000505 * Read the above 50 times before changing anything in this
506 * block.
507 */
Tim Peters1d99af82002-03-30 10:35:09 +0000508 uptr *p;
Tim Petersc2ce91a2002-03-30 21:36:04 +0000509 uint newmax = maxarenas << 1;
Tim Peters1d99af82002-03-30 10:35:09 +0000510 if (newmax <= maxarenas) /* overflow */
511 goto error;
Tim Peters84c1b972002-04-04 04:44:32 +0000512 p = (uptr *)malloc(newmax * sizeof(*arenas));
Tim Petersd97a1c02002-03-30 06:09:22 +0000513 if (p == NULL)
514 goto error;
515 memcpy(p, arenas, narenas * sizeof(*arenas));
Tim Petersc2ce91a2002-03-30 21:36:04 +0000516 arenas = p; /* old arenas deliberately leaked */
Tim Petersd97a1c02002-03-30 06:09:22 +0000517 maxarenas = newmax;
518 }
519
520 /* Append the new arena address to arenas. */
521 assert(narenas < maxarenas);
522 arenas[narenas] = (uptr)bp;
Tim Peters1d99af82002-03-30 10:35:09 +0000523 ++narenas; /* can't overflow, since narenas < maxarenas before */
Tim Petersd97a1c02002-03-30 06:09:22 +0000524 dumpem(bp);
525 return bp;
526
527error:
Tim Peters84c1b972002-04-04 04:44:32 +0000528 free(bp);
Tim Peters7b85b4a2002-03-30 10:42:09 +0000529 nfreepools = 0;
Tim Petersd97a1c02002-03-30 06:09:22 +0000530 return NULL;
531}
532
533/* Return true if and only if P is an address that was allocated by
534 * pymalloc. I must be the index into arenas that the address claims
535 * to come from.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000536 *
Tim Petersd97a1c02002-03-30 06:09:22 +0000537 * Tricky: Letting B be the arena base address in arenas[I], P belongs to the
538 * arena if and only if
Tim Peters3c83df22002-03-30 07:04:41 +0000539 * B <= P < B + ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000540 * Subtracting B throughout, this is true iff
Tim Peters3c83df22002-03-30 07:04:41 +0000541 * 0 <= P-B < ARENA_SIZE
Tim Petersd97a1c02002-03-30 06:09:22 +0000542 * By using unsigned arithmetic, the "0 <=" half of the test can be skipped.
Tim Petersc2ce91a2002-03-30 21:36:04 +0000543 *
544 * Obscure: A PyMem "free memory" function can call the pymalloc free or
545 * realloc before the first arena has been allocated. arenas is still
546 * NULL in that case. We're relying on that narenas is also 0 in that case,
547 * so the (I) < narenas must be false, saving us from trying to index into
548 * a NULL arenas.
Tim Petersd97a1c02002-03-30 06:09:22 +0000549 */
550#define ADDRESS_IN_RANGE(P, I) \
Tim Peters3c83df22002-03-30 07:04:41 +0000551 ((I) < narenas && (uptr)(P) - arenas[I] < (uptr)ARENA_SIZE)
Tim Peters338e0102002-04-01 19:23:44 +0000552
Neil Schemenauera35c6882001-02-27 04:45:05 +0000553/*==========================================================================*/
554
Tim Peters84c1b972002-04-04 04:44:32 +0000555/* malloc. Note that nbytes==0 tries to return a non-NULL pointer, distinct
556 * from all other currently live pointers. This may not be possible.
557 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000558
559/*
560 * The basic blocks are ordered by decreasing execution frequency,
561 * which minimizes the number of jumps in the most common cases,
562 * improves branching prediction and instruction scheduling (small
563 * block allocations typically result in a couple of instructions).
564 * Unless the optimizer reorders everything, being too smart...
565 */
566
567void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000568_PyMalloc_Malloc(size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000569{
570 block *bp;
571 poolp pool;
572 poolp next;
573 uint size;
574
Neil Schemenauera35c6882001-02-27 04:45:05 +0000575 /*
Tim Peters84c1b972002-04-04 04:44:32 +0000576 * This implicitly redirects malloc(0).
Neil Schemenauera35c6882001-02-27 04:45:05 +0000577 */
578 if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
579 LOCK();
580 /*
581 * Most frequent paths first
582 */
583 size = (uint )(nbytes - 1) >> ALIGNMENT_SHIFT;
584 pool = usedpools[size + size];
585 if (pool != pool->nextpool) {
586 /*
587 * There is a used pool for this size class.
588 * Pick up the head block of its free list.
589 */
590 ++pool->ref.count;
591 bp = pool->freeblock;
592 if ((pool->freeblock = *(block **)bp) != NULL) {
593 UNLOCK();
594 return (void *)bp;
595 }
596 /*
597 * Reached the end of the free list, try to extend it
598 */
599 if (pool->ref.count < pool->capacity) {
600 /*
601 * There is room for another block
602 */
603 size++;
604 size <<= ALIGNMENT_SHIFT; /* block size */
605 pool->freeblock = (block *)pool + \
606 POOL_OVERHEAD + \
607 pool->ref.count * size;
608 *(block **)(pool->freeblock) = NULL;
609 UNLOCK();
610 return (void *)bp;
611 }
612 /*
613 * Pool is full, unlink from used pools
614 */
615 next = pool->nextpool;
616 pool = pool->prevpool;
617 next->prevpool = pool;
618 pool->nextpool = next;
619 UNLOCK();
620 return (void *)bp;
621 }
622 /*
623 * Try to get a cached free pool
624 */
625 pool = freepools;
626 if (pool != NULL) {
627 /*
628 * Unlink from cached pools
629 */
630 freepools = pool->nextpool;
631 init_pool:
632 /*
633 * Frontlink to used pools
634 */
635 next = usedpools[size + size]; /* == prev */
636 pool->nextpool = next;
637 pool->prevpool = next;
638 next->nextpool = pool;
639 next->prevpool = pool;
640 pool->ref.count = 1;
641 if (pool->szidx == size) {
642 /*
643 * Luckily, this pool last contained blocks
644 * of the same size class, so its header
645 * and free list are already initialized.
646 */
647 bp = pool->freeblock;
648 pool->freeblock = *(block **)bp;
649 UNLOCK();
650 return (void *)bp;
651 }
652 /*
653 * Initialize the pool header and free list
654 * then return the first block.
655 */
656 pool->szidx = size;
657 size++;
658 size <<= ALIGNMENT_SHIFT; /* block size */
659 bp = (block *)pool + POOL_OVERHEAD;
660 pool->freeblock = bp + size;
661 *(block **)(pool->freeblock) = NULL;
662 pool->capacity = (POOL_SIZE - POOL_OVERHEAD) / size;
663 UNLOCK();
664 return (void *)bp;
665 }
666 /*
667 * Allocate new pool
668 */
Tim Peters3c83df22002-03-30 07:04:41 +0000669 if (nfreepools) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000670 commit_pool:
Tim Peters3c83df22002-03-30 07:04:41 +0000671 --nfreepools;
672 pool = (poolp)arenabase;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000673 arenabase += POOL_SIZE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000674 pool->arenaindex = narenas - 1;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000675 pool->szidx = DUMMY_SIZE_IDX;
676 goto init_pool;
677 }
678 /*
679 * Allocate new arena
680 */
681#ifdef WITH_MEMORY_LIMITS
Tim Petersd97a1c02002-03-30 06:09:22 +0000682 if (!(narenas < MAX_ARENAS)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000683 UNLOCK();
684 goto redirect;
685 }
686#endif
Tim Petersd97a1c02002-03-30 06:09:22 +0000687 bp = new_arena();
688 if (bp != NULL)
689 goto commit_pool;
690 UNLOCK();
691 goto redirect;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000692 }
693
694 /* The small block allocator ends here. */
695
Tim Petersd97a1c02002-03-30 06:09:22 +0000696redirect:
Neil Schemenauera35c6882001-02-27 04:45:05 +0000697 /*
698 * Redirect the original request to the underlying (libc) allocator.
699 * We jump here on bigger requests, on error in the code above (as a
700 * last chance to serve the request) or when the max memory limit
701 * has been reached.
702 */
Tim Peters84c1b972002-04-04 04:44:32 +0000703 return (void *)malloc(nbytes ? nbytes : 1);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000704}
705
706/* free */
707
708void
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000709_PyMalloc_Free(void *p)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000710{
711 poolp pool;
Tim Peters2c95c992002-03-31 02:18:01 +0000712 block *lastfree;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000713 poolp next, prev;
714 uint size;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000715
Neil Schemenauera35c6882001-02-27 04:45:05 +0000716 if (p == NULL) /* free(NULL) has no effect */
717 return;
718
Tim Petersd97a1c02002-03-30 06:09:22 +0000719 pool = POOL_ADDR(p);
720 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
721 /* We allocated this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000722 LOCK();
Tim Peters1e16db62002-03-31 01:05:22 +0000723 INCMINE;
Tim Petersd97a1c02002-03-30 06:09:22 +0000724 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000725 * Link p to the start of the pool's freeblock list. Since
726 * the pool had at least the p block outstanding, the pool
727 * wasn't empty (so it's already in a usedpools[] list, or
728 * was full and is in no list -- it's not in the freeblocks
729 * list in any case).
Tim Petersd97a1c02002-03-30 06:09:22 +0000730 */
Tim Peters57b17ad2002-03-31 02:59:48 +0000731 assert(pool->ref.count > 0); /* else it was empty */
Tim Peters2c95c992002-03-31 02:18:01 +0000732 *(block **)p = lastfree = pool->freeblock;
Tim Petersd97a1c02002-03-30 06:09:22 +0000733 pool->freeblock = (block *)p;
Tim Peters2c95c992002-03-31 02:18:01 +0000734 if (lastfree) {
735 /*
736 * freeblock wasn't NULL, so the pool wasn't full,
737 * and the pool is in a usedpools[] list.
738 */
Tim Peters4c5be0c2002-03-31 02:52:29 +0000739 assert(pool->ref.count < pool->capacity);
Tim Peters2c95c992002-03-31 02:18:01 +0000740 if (--pool->ref.count != 0) {
741 /* pool isn't empty: leave it in usedpools */
742 UNLOCK();
743 return;
744 }
745 /*
746 * Pool is now empty: unlink from usedpools, and
Tim Petersb1da0502002-03-31 02:51:40 +0000747 * link to the front of freepools. This ensures that
Tim Peters2c95c992002-03-31 02:18:01 +0000748 * previously freed pools will be allocated later
749 * (being not referenced, they are perhaps paged out).
750 */
751 next = pool->nextpool;
752 prev = pool->prevpool;
753 next->prevpool = prev;
754 prev->nextpool = next;
755 /* Link to freepools. This is a singly-linked list,
756 * and pool->prevpool isn't used there.
757 */
758 pool->nextpool = freepools;
759 freepools = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000760 UNLOCK();
761 return;
762 }
763 /*
Tim Peters2c95c992002-03-31 02:18:01 +0000764 * Pool was full, so doesn't currently live in any list:
765 * link it to the front of the appropriate usedpools[] list.
766 * This mimics LRU pool usage for new allocations and
767 * targets optimal filling when several pools contain
768 * blocks of the same size class.
Tim Petersd97a1c02002-03-30 06:09:22 +0000769 */
Tim Peters2c95c992002-03-31 02:18:01 +0000770 assert(pool->ref.count == pool->capacity); /* else not full */
771 --pool->ref.count;
772 assert(pool->ref.count > 0); /* else the pool is empty */
773 size = pool->szidx;
774 next = usedpools[size + size];
775 prev = next->prevpool;
776 /* insert pool before next: prev <-> pool <-> next */
777 pool->nextpool = next;
778 pool->prevpool = prev;
779 next->prevpool = pool;
780 prev->nextpool = pool;
Tim Petersd97a1c02002-03-30 06:09:22 +0000781 UNLOCK();
Neil Schemenauera35c6882001-02-27 04:45:05 +0000782 return;
783 }
784
Tim Peters2c95c992002-03-31 02:18:01 +0000785 /* We didn't allocate this address. */
Tim Petersd97a1c02002-03-30 06:09:22 +0000786 INCTHEIRS;
Tim Peters84c1b972002-04-04 04:44:32 +0000787 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000788}
789
Tim Peters84c1b972002-04-04 04:44:32 +0000790/* realloc. If p is NULL, this acts like malloc(nbytes). Else if nbytes==0,
791 * then as the Python docs promise, we do not treat this like free(p), and
792 * return a non-NULL result.
793 */
Neil Schemenauera35c6882001-02-27 04:45:05 +0000794
795void *
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000796_PyMalloc_Realloc(void *p, size_t nbytes)
Neil Schemenauera35c6882001-02-27 04:45:05 +0000797{
Tim Peters84c1b972002-04-04 04:44:32 +0000798 void *bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000799 poolp pool;
800 uint size;
801
Neil Schemenauera35c6882001-02-27 04:45:05 +0000802 if (p == NULL)
Neil Schemenauer25f3dc22002-03-18 21:06:21 +0000803 return _PyMalloc_Malloc(nbytes);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000804
Tim Petersd97a1c02002-03-30 06:09:22 +0000805 pool = POOL_ADDR(p);
806 if (ADDRESS_IN_RANGE(p, pool->arenaindex)) {
Neil Schemenauera35c6882001-02-27 04:45:05 +0000807 /* We're in charge of this block */
Tim Petersd97a1c02002-03-30 06:09:22 +0000808 INCMINE;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000809 size = (pool->szidx + 1) << ALIGNMENT_SHIFT; /* block size */
Tim Peters84c1b972002-04-04 04:44:32 +0000810 if (size >= nbytes)
811 /* Don't bother if a smaller size was requested. */
812 return p;
813 /* We need more memory. */
814 assert(nbytes != 0);
Tim Petersb7265db2002-04-04 05:08:31 +0000815 bp = _PyMalloc_Malloc(nbytes);
Tim Peters84c1b972002-04-04 04:44:32 +0000816 if (bp != NULL) {
817 memcpy(bp, p, size);
818 _PyMalloc_Free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000819 }
Tim Peters84c1b972002-04-04 04:44:32 +0000820 return bp;
821 }
822 /* We're not managing this block. */
823 INCTHEIRS;
824 if (nbytes <= SMALL_REQUEST_THRESHOLD) {
825 /* Take over this block. */
826 bp = _PyMalloc_Malloc(nbytes ? nbytes : 1);
827 if (bp != NULL) {
828 memcpy(bp, p, nbytes);
829 free(p);
Neil Schemenauera35c6882001-02-27 04:45:05 +0000830 }
Tim Peters84c1b972002-04-04 04:44:32 +0000831 else if (nbytes == 0) {
832 /* Meet the doc's promise that nbytes==0 will
833 * never return a NULL pointer when p isn't NULL.
834 */
835 bp = p;
836 }
837
Neil Schemenauera35c6882001-02-27 04:45:05 +0000838 }
Tim Petersd97a1c02002-03-30 06:09:22 +0000839 else {
Tim Peters84c1b972002-04-04 04:44:32 +0000840 assert(nbytes != 0);
841 bp = realloc(p, nbytes);
Tim Petersd97a1c02002-03-30 06:09:22 +0000842 }
Tim Peters84c1b972002-04-04 04:44:32 +0000843 return bp;
Neil Schemenauera35c6882001-02-27 04:45:05 +0000844}
845
Tim Peters1221c0a2002-03-23 00:20:15 +0000846#else /* ! WITH_PYMALLOC */
Tim Petersddea2082002-03-23 10:03:50 +0000847
848/*==========================================================================*/
849/* pymalloc not enabled: Redirect the entry points to the PyMem family. */
Tim Peters62c06ba2002-03-23 22:28:18 +0000850
Tim Petersce7fb9b2002-03-23 00:28:57 +0000851void *
852_PyMalloc_Malloc(size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000853{
854 return PyMem_MALLOC(n);
855}
856
Tim Petersce7fb9b2002-03-23 00:28:57 +0000857void *
858_PyMalloc_Realloc(void *p, size_t n)
Tim Peters1221c0a2002-03-23 00:20:15 +0000859{
860 return PyMem_REALLOC(p, n);
861}
862
863void
864_PyMalloc_Free(void *p)
865{
866 PyMem_FREE(p);
867}
868#endif /* WITH_PYMALLOC */
869
Tim Peters62c06ba2002-03-23 22:28:18 +0000870/*==========================================================================*/
871/* Regardless of whether pymalloc is enabled, export entry points for
872 * the object-oriented pymalloc functions.
873 */
874
Tim Petersce7fb9b2002-03-23 00:28:57 +0000875PyObject *
876_PyMalloc_New(PyTypeObject *tp)
Tim Peters1221c0a2002-03-23 00:20:15 +0000877{
878 PyObject *op;
879 op = (PyObject *) _PyMalloc_MALLOC(_PyObject_SIZE(tp));
880 if (op == NULL)
881 return PyErr_NoMemory();
882 return PyObject_INIT(op, tp);
883}
884
885PyVarObject *
886_PyMalloc_NewVar(PyTypeObject *tp, int nitems)
887{
888 PyVarObject *op;
889 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
890 op = (PyVarObject *) _PyMalloc_MALLOC(size);
891 if (op == NULL)
892 return (PyVarObject *)PyErr_NoMemory();
893 return PyObject_INIT_VAR(op, tp, nitems);
894}
895
896void
897_PyMalloc_Del(PyObject *op)
898{
899 _PyMalloc_FREE(op);
900}
Tim Petersddea2082002-03-23 10:03:50 +0000901
902#ifdef PYMALLOC_DEBUG
903/*==========================================================================*/
Tim Peters62c06ba2002-03-23 22:28:18 +0000904/* A x-platform debugging allocator. This doesn't manage memory directly,
905 * it wraps a real allocator, adding extra debugging info to the memory blocks.
906 */
Tim Petersddea2082002-03-23 10:03:50 +0000907
908#define PYMALLOC_CLEANBYTE 0xCB /* uninitialized memory */
909#define PYMALLOC_DEADBYTE 0xDB /* free()ed memory */
910#define PYMALLOC_FORBIDDENBYTE 0xFB /* unusable memory */
911
912static ulong serialno = 0; /* incremented on each debug {m,re}alloc */
913
Tim Peterse0850172002-03-24 00:34:21 +0000914/* serialno is always incremented via calling this routine. The point is
915 to supply a single place to set a breakpoint.
916*/
917static void
Neil Schemenauerbd02b142002-03-28 21:05:38 +0000918bumpserialno(void)
Tim Peterse0850172002-03-24 00:34:21 +0000919{
920 ++serialno;
921}
922
923
Tim Petersddea2082002-03-23 10:03:50 +0000924/* Read 4 bytes at p as a big-endian ulong. */
925static ulong
926read4(const void *p)
927{
Tim Peters62c06ba2002-03-23 22:28:18 +0000928 const uchar *q = (const uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +0000929 return ((ulong)q[0] << 24) |
930 ((ulong)q[1] << 16) |
931 ((ulong)q[2] << 8) |
932 (ulong)q[3];
933}
934
935/* Write the 4 least-significant bytes of n as a big-endian unsigned int,
936 MSB at address p, LSB at p+3. */
937static void
938write4(void *p, ulong n)
939{
Tim Peters62c06ba2002-03-23 22:28:18 +0000940 uchar *q = (uchar *)p;
941 q[0] = (uchar)((n >> 24) & 0xff);
942 q[1] = (uchar)((n >> 16) & 0xff);
943 q[2] = (uchar)((n >> 8) & 0xff);
944 q[3] = (uchar)( n & 0xff);
Tim Petersddea2082002-03-23 10:03:50 +0000945}
946
Tim Petersddea2082002-03-23 10:03:50 +0000947/* The debug malloc asks for 16 extra bytes and fills them with useful stuff,
948 here calling the underlying malloc's result p:
949
950p[0:4]
951 Number of bytes originally asked for. 4-byte unsigned integer,
952 big-endian (easier to read in a memory dump).
Tim Petersd1139e02002-03-28 07:32:11 +0000953p[4:8]
Tim Petersddea2082002-03-23 10:03:50 +0000954 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch under- writes
955 and reads.
956p[8:8+n]
957 The requested memory, filled with copies of PYMALLOC_CLEANBYTE.
958 Used to catch reference to uninitialized memory.
959 &p[8] is returned. Note that this is 8-byte aligned if PyMalloc
960 handled the request itself.
961p[8+n:8+n+4]
962 Copies of PYMALLOC_FORBIDDENBYTE. Used to catch over- writes
963 and reads.
964p[8+n+4:8+n+8]
965 A serial number, incremented by 1 on each call to _PyMalloc_DebugMalloc
966 and _PyMalloc_DebugRealloc.
967 4-byte unsigned integer, big-endian.
968 If "bad memory" is detected later, the serial number gives an
969 excellent way to set a breakpoint on the next run, to capture the
970 instant at which this block was passed out.
971*/
972
973void *
Tim Petersd1139e02002-03-28 07:32:11 +0000974_PyMalloc_DebugMalloc(size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +0000975{
976 uchar *p; /* base address of malloc'ed block */
Tim Peters62c06ba2002-03-23 22:28:18 +0000977 uchar *tail; /* p + 8 + nbytes == pointer to tail pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +0000978 size_t total; /* nbytes + 16 */
979
Tim Peterse0850172002-03-24 00:34:21 +0000980 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +0000981 total = nbytes + 16;
982 if (total < nbytes || (total >> 31) > 1) {
983 /* overflow, or we can't represent it in 4 bytes */
984 /* Obscure: can't do (total >> 32) != 0 instead, because
985 C doesn't define what happens for a right-shift of 32
986 when size_t is a 32-bit type. At least C guarantees
987 size_t is an unsigned type. */
988 return NULL;
989 }
990
Tim Petersd1139e02002-03-28 07:32:11 +0000991 p = _PyMalloc_Malloc(total);
Tim Petersddea2082002-03-23 10:03:50 +0000992 if (p == NULL)
993 return NULL;
994
995 write4(p, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +0000996 p[4] = p[5] = p[6] = p[7] = PYMALLOC_FORBIDDENBYTE;
Tim Petersddea2082002-03-23 10:03:50 +0000997
998 if (nbytes > 0)
999 memset(p+8, PYMALLOC_CLEANBYTE, nbytes);
1000
Tim Peters62c06ba2002-03-23 22:28:18 +00001001 tail = p + 8 + nbytes;
1002 tail[0] = tail[1] = tail[2] = tail[3] = PYMALLOC_FORBIDDENBYTE;
1003 write4(tail + 4, serialno);
Tim Petersddea2082002-03-23 10:03:50 +00001004
1005 return p+8;
1006}
1007
Tim Peters62c06ba2002-03-23 22:28:18 +00001008/* The debug free first checks the 8 bytes on each end for sanity (in
1009 particular, that the PYMALLOC_FORBIDDENBYTEs are still intact).
Tim Petersddea2082002-03-23 10:03:50 +00001010 Then fills the original bytes with PYMALLOC_DEADBYTE.
1011 Then calls the underlying free.
1012*/
1013void
Tim Petersd1139e02002-03-28 07:32:11 +00001014_PyMalloc_DebugFree(void *p)
Tim Petersddea2082002-03-23 10:03:50 +00001015{
Tim Peters62c06ba2002-03-23 22:28:18 +00001016 uchar *q = (uchar *)p;
Tim Petersddea2082002-03-23 10:03:50 +00001017 size_t nbytes;
1018
Tim Petersddea2082002-03-23 10:03:50 +00001019 if (p == NULL)
1020 return;
Tim Petersddea2082002-03-23 10:03:50 +00001021 _PyMalloc_DebugCheckAddress(p);
1022 nbytes = read4(q-8);
1023 if (nbytes > 0)
1024 memset(q, PYMALLOC_DEADBYTE, nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001025 _PyMalloc_Free(q-8);
Tim Petersddea2082002-03-23 10:03:50 +00001026}
1027
1028void *
Tim Petersd1139e02002-03-28 07:32:11 +00001029_PyMalloc_DebugRealloc(void *p, size_t nbytes)
Tim Petersddea2082002-03-23 10:03:50 +00001030{
1031 uchar *q = (uchar *)p;
1032 size_t original_nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001033 void *fresh; /* new memory block, if needed */
Tim Petersddea2082002-03-23 10:03:50 +00001034
Tim Petersddea2082002-03-23 10:03:50 +00001035 if (p == NULL)
Tim Petersd1139e02002-03-28 07:32:11 +00001036 return _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001037
Tim Petersddea2082002-03-23 10:03:50 +00001038 _PyMalloc_DebugCheckAddress(p);
Tim Petersddea2082002-03-23 10:03:50 +00001039 original_nbytes = read4(q-8);
1040 if (nbytes == original_nbytes) {
1041 /* note that this case is likely to be common due to the
1042 way Python appends to lists */
Tim Peterse0850172002-03-24 00:34:21 +00001043 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001044 write4(q + nbytes + 4, serialno);
1045 return p;
1046 }
1047
1048 if (nbytes < original_nbytes) {
1049 /* shrinking -- leave the guts alone, except to
1050 fill the excess with DEADBYTE */
1051 const size_t excess = original_nbytes - nbytes;
Tim Peterse0850172002-03-24 00:34:21 +00001052 bumpserialno();
Tim Petersddea2082002-03-23 10:03:50 +00001053 write4(q-8, nbytes);
1054 /* kill the excess bytes plus the trailing 8 pad bytes */
Tim Petersddea2082002-03-23 10:03:50 +00001055 q += nbytes;
1056 q[0] = q[1] = q[2] = q[3] = PYMALLOC_FORBIDDENBYTE;
1057 write4(q+4, serialno);
Tim Petersd1139e02002-03-28 07:32:11 +00001058 memset(q+8, PYMALLOC_DEADBYTE, excess);
Tim Petersddea2082002-03-23 10:03:50 +00001059 return p;
1060 }
1061
1062 /* More memory is needed: get it, copy over the first original_nbytes
1063 of the original data, and free the original memory. */
Tim Petersd1139e02002-03-28 07:32:11 +00001064 fresh = _PyMalloc_DebugMalloc(nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001065 if (fresh != NULL && original_nbytes > 0)
1066 memcpy(fresh, p, original_nbytes);
Tim Petersd1139e02002-03-28 07:32:11 +00001067 _PyMalloc_DebugFree(p);
Tim Petersddea2082002-03-23 10:03:50 +00001068 return fresh;
1069}
1070
Tim Peters7ccfadf2002-04-01 06:04:21 +00001071/* Check the forbidden bytes on both ends of the memory allocated for p.
1072 * If anything is wrong, print info to stderr via _PyMalloc_DebugDumpAddress,
1073 * and call Py_FatalError to kill the program.
1074 */
1075 void
Tim Petersddea2082002-03-23 10:03:50 +00001076_PyMalloc_DebugCheckAddress(const void *p)
1077{
1078 const uchar *q = (const uchar *)p;
Tim Petersd1139e02002-03-28 07:32:11 +00001079 char *msg;
1080 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001081
Tim Petersd1139e02002-03-28 07:32:11 +00001082 if (p == NULL) {
Tim Petersddea2082002-03-23 10:03:50 +00001083 msg = "didn't expect a NULL pointer";
Tim Petersd1139e02002-03-28 07:32:11 +00001084 goto error;
1085 }
Tim Petersddea2082002-03-23 10:03:50 +00001086
Tim Petersd1139e02002-03-28 07:32:11 +00001087 for (i = 4; i >= 1; --i) {
1088 if (*(q-i) != PYMALLOC_FORBIDDENBYTE) {
1089 msg = "bad leading pad byte";
1090 goto error;
1091 }
1092 }
Tim Petersddea2082002-03-23 10:03:50 +00001093
Tim Petersd1139e02002-03-28 07:32:11 +00001094 {
Tim Petersddea2082002-03-23 10:03:50 +00001095 const ulong nbytes = read4(q-8);
1096 const uchar *tail = q + nbytes;
Tim Petersddea2082002-03-23 10:03:50 +00001097 for (i = 0; i < 4; ++i) {
1098 if (tail[i] != PYMALLOC_FORBIDDENBYTE) {
1099 msg = "bad trailing pad byte";
Tim Petersd1139e02002-03-28 07:32:11 +00001100 goto error;
Tim Petersddea2082002-03-23 10:03:50 +00001101 }
1102 }
1103 }
1104
Tim Petersd1139e02002-03-28 07:32:11 +00001105 return;
1106
1107error:
1108 _PyMalloc_DebugDumpAddress(p);
1109 Py_FatalError(msg);
Tim Petersddea2082002-03-23 10:03:50 +00001110}
1111
Tim Peters7ccfadf2002-04-01 06:04:21 +00001112/* Display info to stderr about the memory block at p. */
Tim Petersddea2082002-03-23 10:03:50 +00001113void
1114_PyMalloc_DebugDumpAddress(const void *p)
1115{
1116 const uchar *q = (const uchar *)p;
1117 const uchar *tail;
1118 ulong nbytes, serial;
Tim Petersd1139e02002-03-28 07:32:11 +00001119 int i;
Tim Petersddea2082002-03-23 10:03:50 +00001120
1121 fprintf(stderr, "Debug memory block at address p=%p:\n", p);
1122 if (p == NULL)
1123 return;
1124
1125 nbytes = read4(q-8);
1126 fprintf(stderr, " %lu bytes originally allocated\n", nbytes);
Tim Petersddea2082002-03-23 10:03:50 +00001127
1128 /* In case this is nuts, check the pad bytes before trying to read up
1129 the serial number (the address deref could blow up). */
1130
Tim Petersd1139e02002-03-28 07:32:11 +00001131 fputs(" the 4 pad bytes at p-4 are ", stderr);
1132 if (*(q-4) == PYMALLOC_FORBIDDENBYTE &&
1133 *(q-3) == PYMALLOC_FORBIDDENBYTE &&
Tim Petersddea2082002-03-23 10:03:50 +00001134 *(q-2) == PYMALLOC_FORBIDDENBYTE &&
1135 *(q-1) == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001136 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001137 }
1138 else {
Tim Petersddea2082002-03-23 10:03:50 +00001139 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1140 PYMALLOC_FORBIDDENBYTE);
Tim Petersd1139e02002-03-28 07:32:11 +00001141 for (i = 4; i >= 1; --i) {
Tim Petersddea2082002-03-23 10:03:50 +00001142 const uchar byte = *(q-i);
1143 fprintf(stderr, " at p-%d: 0x%02x", i, byte);
1144 if (byte != PYMALLOC_FORBIDDENBYTE)
1145 fputs(" *** OUCH", stderr);
1146 fputc('\n', stderr);
1147 }
1148 }
1149
1150 tail = q + nbytes;
1151 fprintf(stderr, " the 4 pad bytes at tail=%p are ", tail);
1152 if (tail[0] == PYMALLOC_FORBIDDENBYTE &&
1153 tail[1] == PYMALLOC_FORBIDDENBYTE &&
1154 tail[2] == PYMALLOC_FORBIDDENBYTE &&
1155 tail[3] == PYMALLOC_FORBIDDENBYTE) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001156 fputs("PYMALLOC_FORBIDDENBYTE, as expected\n", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001157 }
1158 else {
Tim Petersddea2082002-03-23 10:03:50 +00001159 fprintf(stderr, "not all PYMALLOC_FORBIDDENBYTE (0x%02x):\n",
1160 PYMALLOC_FORBIDDENBYTE);
1161 for (i = 0; i < 4; ++i) {
1162 const uchar byte = tail[i];
1163 fprintf(stderr, " at tail+%d: 0x%02x",
1164 i, byte);
1165 if (byte != PYMALLOC_FORBIDDENBYTE)
1166 fputs(" *** OUCH", stderr);
1167 fputc('\n', stderr);
1168 }
1169 }
1170
1171 serial = read4(tail+4);
1172 fprintf(stderr, " the block was made by call #%lu to "
1173 "debug malloc/realloc\n", serial);
1174
1175 if (nbytes > 0) {
1176 int i = 0;
Tim Peters62c06ba2002-03-23 22:28:18 +00001177 fputs(" data at p:", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001178 /* print up to 8 bytes at the start */
1179 while (q < tail && i < 8) {
1180 fprintf(stderr, " %02x", *q);
1181 ++i;
1182 ++q;
1183 }
1184 /* and up to 8 at the end */
1185 if (q < tail) {
1186 if (tail - q > 8) {
Tim Peters62c06ba2002-03-23 22:28:18 +00001187 fputs(" ...", stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001188 q = tail - 8;
1189 }
1190 while (q < tail) {
1191 fprintf(stderr, " %02x", *q);
1192 ++q;
1193 }
1194 }
Tim Peters62c06ba2002-03-23 22:28:18 +00001195 fputc('\n', stderr);
Tim Petersddea2082002-03-23 10:03:50 +00001196 }
1197}
1198
Tim Peters7ccfadf2002-04-01 06:04:21 +00001199/* Print summary info to stderr about the state of pymalloc's structures. */
1200void
1201_PyMalloc_DebugDumpStats(void)
1202{
1203 uint i;
1204 const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
1205 uint numfreepools = 0;
1206 /* # of pools per class index */
1207 ulong numpools[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1208 /* # of allocated blocks per class index */
1209 ulong numblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1210 /* # of free blocks per class index */
1211 ulong numfreeblocks[SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT];
1212 ulong grandtotal; /* total # of allocated bytes */
Tim Peters6169f092002-04-01 20:12:59 +00001213 ulong freegrandtotal; /* total # of available bytes in used pools */
Tim Peters7ccfadf2002-04-01 06:04:21 +00001214
1215 fprintf(stderr, "%u arenas * %d bytes/arena = %lu total bytes.\n",
1216 narenas, ARENA_SIZE, narenas * (ulong)ARENA_SIZE);
1217 fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
1218 SMALL_REQUEST_THRESHOLD, numclasses);
1219 fprintf(stderr, "pymalloc malloc+realloc called %lu times.\n",
1220 serialno);
1221
1222 for (i = 0; i < numclasses; ++i)
1223 numpools[i] = numblocks[i] = numfreeblocks[i] = 0;
1224
Tim Peters6169f092002-04-01 20:12:59 +00001225 /* Because full pools aren't linked to from anything, it's easiest
1226 * to march over all the arenas. If we're lucky, most of the memory
1227 * will be living in full pools -- would be a shame to miss them.
Tim Peters7ccfadf2002-04-01 06:04:21 +00001228 */
1229 for (i = 0; i < narenas; ++i) {
1230 uint poolsinarena;
1231 uint j;
1232 uptr base = arenas[i];
1233
1234 /* round up to pool alignment */
1235 poolsinarena = ARENA_SIZE / POOL_SIZE;
1236 if (base & (uptr)POOL_SIZE_MASK) {
1237 --poolsinarena;
1238 base &= ~(uptr)POOL_SIZE_MASK;
1239 base += POOL_SIZE;
1240 }
1241
1242 if (i == narenas - 1) {
1243 /* current arena may have raw memory at the end */
1244 numfreepools += nfreepools;
1245 poolsinarena -= nfreepools;
1246 }
1247
1248 /* visit every pool in the arena */
1249 for (j = 0; j < poolsinarena; ++j, base += POOL_SIZE) {
1250 poolp p = (poolp)base;
1251 if (p->ref.count == 0) {
1252 /* currently unused */
1253 ++numfreepools;
1254 continue;
1255 }
1256 ++numpools[p->szidx];
1257 numblocks[p->szidx] += p->ref.count;
1258 numfreeblocks[p->szidx] += p->capacity - p->ref.count;
1259 }
1260 }
1261
1262 fputc('\n', stderr);
1263 fprintf(stderr, "Number of unused pools: %u\n", numfreepools);
1264 fputc('\n', stderr);
1265 fputs("class num bytes num pools blocks in use avail blocks\n"
1266 "----- --------- --------- ------------- ------------\n",
1267 stderr);
1268
1269 grandtotal = freegrandtotal = 0;
1270 for (i = 0; i < numclasses; ++i) {
1271 ulong p = numpools[i];
1272 ulong b = numblocks[i];
1273 ulong f = numfreeblocks[i];
1274 uint size = (i+1) << ALIGNMENT_SHIFT;
1275 if (p == 0) {
1276 assert(b == 0 && f == 0);
1277 continue;
1278 }
1279 fprintf(stderr, "%5u %11u %11lu %15lu %13lu\n",
1280 i, size, p, b, f);
1281 grandtotal += b * size;
1282 freegrandtotal += f * size;
1283 }
1284 fputc('\n', stderr);
1285 fprintf(stderr, "Total bytes in allocated blocks: %lu\n",
1286 grandtotal);
1287 fprintf(stderr, "Total free bytes in used pools: %lu\n",
1288 freegrandtotal);
1289}
1290
Tim Petersddea2082002-03-23 10:03:50 +00001291#endif /* PYMALLOC_DEBUG */