blob: 885754fd0069406617056a1dda3b2cacd6fcda39 [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_alloc.c -- private/shared dyanmic memory allocation and management
Jim Cownie181b4bb2013-12-23 17:28:57 +00003 * $Revision: 42810 $
4 * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
Jim Cownie5e8470a2013-09-27 10:38:44 +00005 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#include "kmp.h"
19#include "kmp_wrapper_malloc.h"
20#include "kmp_io.h"
21
22// Disable bget when it is not used
23#if KMP_USE_BGET
24
25/* Thread private buffer management code */
26
27typedef int (*bget_compact_t)(size_t, int);
28typedef void *(*bget_acquire_t)(size_t);
29typedef void (*bget_release_t)(void *);
30
31/* NOTE: bufsize must be a signed datatype */
32
33#if KMP_OS_WINDOWS
Jim Cownie181b4bb2013-12-23 17:28:57 +000034# if KMP_ARCH_X86 || KMP_ARCH_ARM
Jim Cownie5e8470a2013-09-27 10:38:44 +000035 typedef kmp_int32 bufsize;
36# else
37 typedef kmp_int64 bufsize;
38# endif
39#else
40 typedef ssize_t bufsize;
41#endif
42
43/* The three modes of operation are, fifo search, lifo search, and best-fit */
44
45typedef enum bget_mode {
46 bget_mode_fifo = 0,
47 bget_mode_lifo = 1,
48 bget_mode_best = 2
49} bget_mode_t;
50
51
52static void bpool( kmp_info_t *th, void *buffer, bufsize len);
53static void *bget( kmp_info_t *th, bufsize size);
54static void *bgetz( kmp_info_t *th, bufsize size);
55static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
56static void brel( kmp_info_t *th, void *buf);
57static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
58
59#ifdef KMP_DEBUG
60static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
61static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
62static void bufdump( kmp_info_t *th, void *buf);
63static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
64static int bpoolv( kmp_info_t *th, void *pool);
65#endif
66
67/* BGET CONFIGURATION */
68 /* Buffer allocation size quantum:
69 all buffers allocated are a
70 multiple of this size. This
71 MUST be a power of two. */
72
73 /* On IA-32 architecture with Linux* OS,
74 malloc() does not
75 ensure 16 byte alignmnent */
76
Jim Cownie181b4bb2013-12-23 17:28:57 +000077#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +000078
79#define SizeQuant 8
80#define AlignType double
81
82#else
83
84#define SizeQuant 16
85#define AlignType _Quad
86
87#endif
88
89#define BufStats 1 /* Define this symbol to enable the
90 bstats() function which calculates
91 the total free space in the buffer
92 pool, the largest available
93 buffer, and the total space
94 currently allocated. */
95
96#ifdef KMP_DEBUG
97
98#define BufDump 1 /* Define this symbol to enable the
99 bpoold() function which dumps the
100 buffers in a buffer pool. */
101
102#define BufValid 1 /* Define this symbol to enable the
103 bpoolv() function for validating
104 a buffer pool. */
105
106#define DumpData 1 /* Define this symbol to enable the
107 bufdump() function which allows
108 dumping the contents of an allocated
109 or free buffer. */
110#ifdef NOT_USED_NOW
111
112#define FreeWipe 1 /* Wipe free buffers to a guaranteed
113 pattern of garbage to trip up
114 miscreants who attempt to use
115 pointers into released buffers. */
116
117#define BestFit 1 /* Use a best fit algorithm when
118 searching for space for an
119 allocation request. This uses
120 memory more efficiently, but
121 allocation will be much slower. */
122#endif /* NOT_USED_NOW */
123#endif /* KMP_DEBUG */
124
125
126static bufsize bget_bin_size[ ] = {
127 0,
128// 1 << 6, /* .5 Cache line */
129 1 << 7, /* 1 Cache line, new */
130 1 << 8, /* 2 Cache lines */
131 1 << 9, /* 4 Cache lines, new */
132 1 << 10, /* 8 Cache lines */
133 1 << 11, /* 16 Cache lines, new */
134 1 << 12,
135 1 << 13, /* new */
136 1 << 14,
137 1 << 15, /* new */
138 1 << 16,
139 1 << 17,
140 1 << 18,
141 1 << 19,
142 1 << 20, /* 1MB */
143 1 << 21, /* 2MB */
144 1 << 22, /* 4MB */
145 1 << 23, /* 8MB */
146 1 << 24, /* 16MB */
147 1 << 25, /* 32MB */
148};
149
150#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
151
152struct bfhead;
153
154/* Declare the interface, including the requested buffer size type,
155 bufsize. */
156
157/* Queue links */
158
159typedef struct qlinks {
160 struct bfhead *flink; /* Forward link */
161 struct bfhead *blink; /* Backward link */
162} qlinks_t;
163
164/* Header in allocated and free buffers */
165
166typedef struct bhead2 {
167 kmp_info_t *bthr; /* The thread which owns the buffer pool */
168 bufsize prevfree; /* Relative link back to previous
169 free buffer in memory or 0 if
170 previous buffer is allocated. */
171 bufsize bsize; /* Buffer size: positive if free,
172 negative if allocated. */
173} bhead2_t;
174
175/* Make sure the bhead structure is a multiple of SizeQuant in size. */
176
177typedef union bhead {
178 KMP_ALIGN( SizeQuant )
179 AlignType b_align;
180 char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
181 bhead2_t bb;
182} bhead_t;
183#define BH(p) ((bhead_t *) (p))
184
185/* Header in directly allocated buffers (by acqfcn) */
186
187typedef struct bdhead
188{
189 bufsize tsize; /* Total size, including overhead */
190 bhead_t bh; /* Common header */
191} bdhead_t;
192#define BDH(p) ((bdhead_t *) (p))
193
194/* Header in free buffers */
195
196typedef struct bfhead {
197 bhead_t bh; /* Common allocated/free header */
198 qlinks_t ql; /* Links on free list */
199} bfhead_t;
200#define BFH(p) ((bfhead_t *) (p))
201
202typedef struct thr_data {
203 bfhead_t freelist[ MAX_BGET_BINS ];
204#if BufStats
205 size_t totalloc; /* Total space currently allocated */
206 long numget, numrel; /* Number of bget() and brel() calls */
207 long numpblk; /* Number of pool blocks */
208 long numpget, numprel; /* Number of block gets and rels */
209 long numdget, numdrel; /* Number of direct gets and rels */
210#endif /* BufStats */
211
212 /* Automatic expansion block management functions */
213 bget_compact_t compfcn;
214 bget_acquire_t acqfcn;
215 bget_release_t relfcn;
216
217 bget_mode_t mode; /* what allocation mode to use? */
218
219 bufsize exp_incr; /* Expansion block size */
220 bufsize pool_len; /* 0: no bpool calls have been made
221 -1: not all pool blocks are
222 the same size
223 >0: (common) block size for all
224 bpool calls made so far
225 */
226 bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
227} thr_data_t;
228
229/* Minimum allocation quantum: */
230
231#define QLSize (sizeof(qlinks_t))
232#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
233#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
234 // Maximun for the requested size.
235
236/* End sentinel: value placed in bsize field of dummy block delimiting
237 end of pool block. The most negative number which will fit in a
238 bufsize, defined in a way that the compiler will accept. */
239
240#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
241
242/* ------------------------------------------------------------------------ */
243
244/* Thread Data management routines */
245
246static int
247bget_get_bin( bufsize size )
248{
249 // binary chop bins
250 int lo = 0, hi = MAX_BGET_BINS - 1;
251
252 KMP_DEBUG_ASSERT( size > 0 );
253
254 while ( (hi - lo) > 1 ) {
255 int mid = (lo + hi) >> 1;
256 if (size < bget_bin_size[ mid ])
257 hi = mid - 1;
258 else
259 lo = mid;
260 }
261
262 KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
263
264 return lo;
265}
266
267static void
268set_thr_data( kmp_info_t *th )
269{
270 int i;
271 thr_data_t *data;
272
273 data =
274 (thr_data_t *)(
275 ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
276 );
277
278 memset( data, '\0', sizeof( *data ) );
279
280 for (i = 0; i < MAX_BGET_BINS; ++i) {
281 data->freelist[ i ].ql.flink = & data->freelist[ i ];
282 data->freelist[ i ].ql.blink = & data->freelist[ i ];
283 }
284
285 th->th.th_local.bget_data = data;
286 th->th.th_local.bget_list = 0;
287#if ! USE_CMP_XCHG_FOR_BGET
288#ifdef USE_QUEUING_LOCK_FOR_BGET
289 __kmp_init_lock( & th->th.th_local.bget_lock );
290#else
291 __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
292#endif /* USE_LOCK_FOR_BGET */
293#endif /* ! USE_CMP_XCHG_FOR_BGET */
294}
295
296static thr_data_t *
297get_thr_data( kmp_info_t *th )
298{
299 thr_data_t *data;
300
301 data = (thr_data_t *) th->th.th_local.bget_data;
302
303 KMP_DEBUG_ASSERT( data != 0 );
304
305 return data;
306}
307
308
309#ifdef KMP_DEBUG
310
311static void
312__kmp_bget_validate_queue( kmp_info_t *th )
313{
314 /* NOTE: assume that the global_lock is held */
315
316 void *p = (void *) th->th.th_local.bget_list;
317
318 while (p != 0) {
319 bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
320
321 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322 p = (void *) b->ql.flink;
323 }
324}
325
326#endif
327
328/* Walk the free list and release the enqueued buffers */
329
330static void
331__kmp_bget_dequeue( kmp_info_t *th )
332{
333 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
334
335 if (p != 0) {
336 #if USE_CMP_XCHG_FOR_BGET
337 {
338 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
339 while ( ! KMP_COMPARE_AND_STORE_PTR(
340 & th->th.th_local.bget_list, old_value, NULL ) )
341 {
342 KMP_CPU_PAUSE();
343 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
344 }
345 p = (void *) old_value;
346 }
347 #else /* ! USE_CMP_XCHG_FOR_BGET */
348 #ifdef USE_QUEUING_LOCK_FOR_BGET
349 __kmp_acquire_lock( & th->th.th_local.bget_lock,
350 __kmp_gtid_from_thread(th) );
351 #else
352 __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
353 #endif /* USE_QUEUING_LOCK_FOR_BGET */
354
355 p = (void *) th->th.th_local.bget_list;
356 th->th.th_local.bget_list = 0;
357
358 #ifdef USE_QUEUING_LOCK_FOR_BGET
359 __kmp_release_lock( & th->th.th_local.bget_lock,
360 __kmp_gtid_from_thread(th) );
361 #else
362 __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
363 #endif
364 #endif /* USE_CMP_XCHG_FOR_BGET */
365
366 /* Check again to make sure the list is not empty */
367
368 while (p != 0) {
369 void *buf = p;
370 bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
371
372 KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
373 KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
374 (kmp_uintptr_t)th ); // clear possible mark
375 KMP_DEBUG_ASSERT( b->ql.blink == 0 );
376
377 p = (void *) b->ql.flink;
378
379 brel( th, buf );
380 }
381 }
382}
383
384/* Chain together the free buffers by using the thread owner field */
385
386static void
387__kmp_bget_enqueue( kmp_info_t *th, void *buf
388#ifdef USE_QUEUING_LOCK_FOR_BGET
389 , kmp_int32 rel_gtid
390#endif
391 )
392{
393 bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
394
395 KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
396 KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
397 (kmp_uintptr_t)th ); // clear possible mark
398
399 b->ql.blink = 0;
400
401 KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
402 __kmp_gtid_from_thread( th ) ) );
403
404#if USE_CMP_XCHG_FOR_BGET
405 {
406 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
407 /* the next pointer must be set before setting bget_list to buf to avoid
408 exposing a broken list to other threads, even for an instant. */
409 b->ql.flink = BFH( old_value );
410
411 while ( ! KMP_COMPARE_AND_STORE_PTR(
412 & th->th.th_local.bget_list, old_value, buf ) )
413 {
414 KMP_CPU_PAUSE();
415 old_value = TCR_PTR(th->th.th_local.bget_list);
416 /* the next pointer must be set before setting bget_list to buf to avoid
417 exposing a broken list to other threads, even for an instant. */
418 b->ql.flink = BFH( old_value );
419 }
420 }
421#else /* ! USE_CMP_XCHG_FOR_BGET */
422# ifdef USE_QUEUING_LOCK_FOR_BGET
423 __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
424# else
425 __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
426 # endif
427
428 b->ql.flink = BFH( th->th.th_local.bget_list );
429 th->th.th_local.bget_list = (void *) buf;
430
431# ifdef USE_QUEUING_LOCK_FOR_BGET
432 __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
433# else
434 __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
435# endif
436#endif /* USE_CMP_XCHG_FOR_BGET */
437}
438
439/* insert buffer back onto a new freelist */
440
441static void
442__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
443{
444 int bin;
445
446 KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
447 KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
448
449 bin = bget_get_bin( b->bh.bb.bsize );
450
451 KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
452 KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
453
454 b->ql.flink = &thr->freelist[ bin ];
455 b->ql.blink = thr->freelist[ bin ].ql.blink;
456
457 thr->freelist[ bin ].ql.blink = b;
458 b->ql.blink->ql.flink = b;
459}
460
461/* unlink the buffer from the old freelist */
462
463static void
464__kmp_bget_remove_from_freelist( bfhead_t *b )
465{
466 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
467 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
468
469 b->ql.blink->ql.flink = b->ql.flink;
470 b->ql.flink->ql.blink = b->ql.blink;
471}
472
473/* ------------------------------------------------------------------------ */
474
475/* GET STATS -- check info on free list */
476
477static void
478bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
479{
480 thr_data_t *thr = get_thr_data( th );
481 int bin;
482
483 *total_free = *max_free = 0;
484
485 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
486 bfhead_t *b, *best;
487
488 best = &thr->freelist[ bin ];
489 b = best->ql.flink;
490
491 while (b != &thr->freelist[ bin ]) {
492 *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
493 if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
494 best = b;
495
496 /* Link to next buffer */
497 b = b->ql.flink;
498 }
499
500 if (*max_free < best->bh.bb.bsize)
501 *max_free = best->bh.bb.bsize;
502 }
503
504 if (*max_free > (bufsize)sizeof( bhead_t ))
505 *max_free -= sizeof( bhead_t );
506}
507
508/* ------------------------------------------------------------------------ */
509
510/* BGET -- Allocate a buffer. */
511
512static void *
513bget( kmp_info_t *th, bufsize requested_size )
514{
515 thr_data_t *thr = get_thr_data( th );
516 bufsize size = requested_size;
517 bfhead_t *b;
518 void *buf;
519 int compactseq = 0;
520 int use_blink = 0;
521/* For BestFit */
522 bfhead_t *best;
523
524 if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
525 return NULL;
526 }; // if
527
528 __kmp_bget_dequeue( th ); /* Release any queued buffers */
529
530 if (size < (bufsize)SizeQ) { /* Need at least room for the */
531 size = SizeQ; /* queue links. */
532 }
533 #if defined( SizeQuant ) && ( SizeQuant > 1 )
534 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
535 #endif
536
537 size += sizeof(bhead_t); /* Add overhead in allocated buffer
538 to size required. */
539 KMP_DEBUG_ASSERT( size >= 0 );
540 KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
541
542 use_blink = ( thr->mode == bget_mode_lifo );
543
544 /* If a compact function was provided in the call to bectl(), wrap
545 a loop around the allocation process to allow compaction to
546 intervene in case we don't find a suitable buffer in the chain. */
547
548 for (;;) {
549 int bin;
550
551 for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
552 /* Link to next buffer */
553 b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
554
555 if (thr->mode == bget_mode_best) {
556 best = &thr->freelist[ bin ];
557
558 /* Scan the free list searching for the first buffer big enough
559 to hold the requested size buffer. */
560
561 while (b != &thr->freelist[ bin ]) {
562 if (b->bh.bb.bsize >= (bufsize) size) {
563 if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
564 best = b;
565 }
566 }
567
568 /* Link to next buffer */
569 b = ( use_blink ? b->ql.blink : b->ql.flink );
570 }
571 b = best;
572 }
573
574 while (b != &thr->freelist[ bin ]) {
575 if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
576
577 /* Buffer is big enough to satisfy the request. Allocate it
578 to the caller. We must decide whether the buffer is large
579 enough to split into the part given to the caller and a
580 free buffer that remains on the free list, or whether the
581 entire buffer should be removed from the free list and
582 given to the caller in its entirety. We only split the
583 buffer if enough room remains for a header plus the minimum
584 quantum of allocation. */
585
586 if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
587 bhead_t *ba, *bn;
588
589 ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
590 bn = BH(((char *) ba) + size);
591
592 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
593
594 /* Subtract size from length of free block. */
595 b->bh.bb.bsize -= (bufsize) size;
596
597 /* Link allocated buffer to the previous free buffer. */
598 ba->bb.prevfree = b->bh.bb.bsize;
599
600 /* Plug negative size into user buffer. */
601 ba->bb.bsize = -size;
602
603 /* Mark this buffer as owned by this thread. */
604 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
605 /* Mark buffer after this one not preceded by free block. */
606 bn->bb.prevfree = 0;
607
608 /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
609 __kmp_bget_remove_from_freelist( b );
610 __kmp_bget_insert_into_freelist( thr, b );
611#if BufStats
612 thr->totalloc += (size_t) size;
613 thr->numget++; /* Increment number of bget() calls */
614#endif
615 buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
616 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
617 return buf;
618 } else {
619 bhead_t *ba;
620
621 ba = BH(((char *) b) + b->bh.bb.bsize);
622
623 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
624
625 /* The buffer isn't big enough to split. Give the whole
626 shebang to the caller and remove it from the free list. */
627
628 __kmp_bget_remove_from_freelist( b );
629#if BufStats
630 thr->totalloc += (size_t) b->bh.bb.bsize;
631 thr->numget++; /* Increment number of bget() calls */
632#endif
633 /* Negate size to mark buffer allocated. */
634 b->bh.bb.bsize = -(b->bh.bb.bsize);
635
636 /* Mark this buffer as owned by this thread. */
637 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
638 /* Zero the back pointer in the next buffer in memory
639 to indicate that this buffer is allocated. */
640 ba->bb.prevfree = 0;
641
642 /* Give user buffer starting at queue links. */
643 buf = (void *) &(b->ql);
644 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
645 return buf;
646 }
647 }
648
649 /* Link to next buffer */
650 b = ( use_blink ? b->ql.blink : b->ql.flink );
651 }
652 }
653
654 /* We failed to find a buffer. If there's a compact function
655 defined, notify it of the size requested. If it returns
656 TRUE, try the allocation again. */
657
658 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
659 break;
660 }
661 }
662
663 /* No buffer available with requested size free. */
664
665 /* Don't give up yet -- look in the reserve supply. */
666
667 if (thr->acqfcn != 0) {
668 if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
669
670 /* Request is too large to fit in a single expansion
671 block. Try to satisy it by a direct buffer acquisition. */
672
673 bdhead_t *bdh;
674
675 size += sizeof(bdhead_t) - sizeof(bhead_t);
676
677 KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
678
679 /* richryan */
680 bdh = BDH((*thr->acqfcn)((bufsize) size));
681 if (bdh != NULL) {
682
683 /* Mark the buffer special by setting the size field
684 of its header to zero. */
685 bdh->bh.bb.bsize = 0;
686
687 /* Mark this buffer as owned by this thread. */
688 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
689 // because direct buffer never goes to free list
690 bdh->bh.bb.prevfree = 0;
691 bdh->tsize = size;
692#if BufStats
693 thr->totalloc += (size_t) size;
694 thr->numget++; /* Increment number of bget() calls */
695 thr->numdget++; /* Direct bget() call count */
696#endif
697 buf = (void *) (bdh + 1);
698 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
699 return buf;
700 }
701
702 } else {
703
704 /* Try to obtain a new expansion block */
705
706 void *newpool;
707
708 KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
709
710 /* richryan */
711 newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
712 KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
713 if (newpool != NULL) {
714 bpool( th, newpool, thr->exp_incr);
715 buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
716 return buf;
717 }
718 }
719 }
720
721 /* Still no buffer available */
722
723 return NULL;
724}
725
726/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
727 the entire contents of the buffer to zero, not just the
728 region requested by the caller. */
729
730static void *
731bgetz( kmp_info_t *th, bufsize size )
732{
733 char *buf = (char *) bget( th, size);
734
735 if (buf != NULL) {
736 bhead_t *b;
737 bufsize rsize;
738
739 b = BH(buf - sizeof(bhead_t));
740 rsize = -(b->bb.bsize);
741 if (rsize == 0) {
742 bdhead_t *bd;
743
744 bd = BDH(buf - sizeof(bdhead_t));
745 rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
746 } else {
747 rsize -= sizeof(bhead_t);
748 }
749
750 KMP_DEBUG_ASSERT(rsize >= size);
751
752 (void) memset(buf, 0, (bufsize) rsize);
753 }
754 return ((void *) buf);
755}
756
757/* BGETR -- Reallocate a buffer. This is a minimal implementation,
758 simply in terms of brel() and bget(). It could be
759 enhanced to allow the buffer to grow into adjacent free
760 blocks and to avoid moving data unnecessarily. */
761
762static void *
763bgetr( kmp_info_t *th, void *buf, bufsize size)
764{
765 void *nbuf;
766 bufsize osize; /* Old size of buffer */
767 bhead_t *b;
768
769 nbuf = bget( th, size );
770 if ( nbuf == NULL ) { /* Acquire new buffer */
771 return NULL;
772 }
773 if ( buf == NULL ) {
774 return nbuf;
775 }
776 b = BH(((char *) buf) - sizeof(bhead_t));
777 osize = -b->bb.bsize;
778 if (osize == 0) {
779 /* Buffer acquired directly through acqfcn. */
780 bdhead_t *bd;
781
782 bd = BDH(((char *) buf) - sizeof(bdhead_t));
783 osize = bd->tsize - (bufsize) sizeof(bdhead_t);
784 } else {
785 osize -= sizeof(bhead_t);
786 };
787
788 KMP_DEBUG_ASSERT(osize > 0);
789
790 (void) memcpy((char *) nbuf, (char *) buf, /* Copy the data */
791 (size_t) ((size < osize) ? size : osize));
792 brel( th, buf );
793
794 return nbuf;
795}
796
797/* BREL -- Release a buffer. */
798
799static void
800brel( kmp_info_t *th, void *buf )
801{
802 thr_data_t *thr = get_thr_data( th );
803 bfhead_t *b, *bn;
804 kmp_info_t *bth;
805
806 KMP_DEBUG_ASSERT(buf != NULL);
807 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
808
809 b = BFH(((char *) buf) - sizeof(bhead_t));
810
811 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
812 bdhead_t *bdh;
813
814 bdh = BDH(((char *) buf) - sizeof(bdhead_t));
815 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
816#if BufStats
817 thr->totalloc -= (size_t) bdh->tsize;
818 thr->numdrel++; /* Number of direct releases */
819 thr->numrel++; /* Increment number of brel() calls */
820#endif /* BufStats */
821#ifdef FreeWipe
822 (void) memset((char *) buf, 0x55,
823 (size_t) (bdh->tsize - sizeof(bdhead_t)));
824#endif /* FreeWipe */
825
826 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
827
828 KMP_DEBUG_ASSERT( thr->relfcn != 0 );
829 (*thr->relfcn)((void *) bdh); /* Release it directly. */
830 return;
831 }
832
833 bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
834 if ( bth != th ) {
835 /* Add this buffer to be released by the owning thread later */
836 __kmp_bget_enqueue( bth, buf
837#ifdef USE_QUEUING_LOCK_FOR_BGET
838 , __kmp_gtid_from_thread( th )
839#endif
840 );
841 return;
842 }
843
844 /* Buffer size must be negative, indicating that the buffer is
845 allocated. */
846
847 if (b->bh.bb.bsize >= 0) {
848 bn = NULL;
849 }
850 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
851
852 /* Back pointer in next buffer must be zero, indicating the
853 same thing: */
854
855 KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
856
857#if BufStats
858 thr->numrel++; /* Increment number of brel() calls */
859 thr->totalloc += (size_t) b->bh.bb.bsize;
860#endif
861
862 /* If the back link is nonzero, the previous buffer is free. */
863
864 if (b->bh.bb.prevfree != 0) {
865 /* The previous buffer is free. Consolidate this buffer with it
866 by adding the length of this buffer to the previous free
867 buffer. Note that we subtract the size in the buffer being
868 released, since it's negative to indicate that the buffer is
869 allocated. */
870
871 register bufsize size = b->bh.bb.bsize;
872
873 /* Make the previous buffer the one we're working on. */
874 KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
875 b = BFH(((char *) b) - b->bh.bb.prevfree);
876 b->bh.bb.bsize -= size;
877
878 /* unlink the buffer from the old freelist */
879 __kmp_bget_remove_from_freelist( b );
880 }
881 else {
882 /* The previous buffer isn't allocated. Mark this buffer
883 size as positive (i.e. free) and fall throught to place
884 the buffer on the free list as an isolated free block. */
885
886 b->bh.bb.bsize = -b->bh.bb.bsize;
887 }
888
889 /* insert buffer back onto a new freelist */
890 __kmp_bget_insert_into_freelist( thr, b );
891
892
893 /* Now we look at the next buffer in memory, located by advancing from
894 the start of this buffer by its size, to see if that buffer is
895 free. If it is, we combine this buffer with the next one in
896 memory, dechaining the second buffer from the free list. */
897
898 bn = BFH(((char *) b) + b->bh.bb.bsize);
899 if (bn->bh.bb.bsize > 0) {
900
901 /* The buffer is free. Remove it from the free list and add
902 its size to that of our buffer. */
903
904 KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
905
906 __kmp_bget_remove_from_freelist( bn );
907
908 b->bh.bb.bsize += bn->bh.bb.bsize;
909
910 /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
911
912 __kmp_bget_remove_from_freelist( b );
913 __kmp_bget_insert_into_freelist( thr, b );
914
915 /* Finally, advance to the buffer that follows the newly
916 consolidated free block. We must set its backpointer to the
917 head of the consolidated free block. We know the next block
918 must be an allocated block because the process of recombination
919 guarantees that two free blocks will never be contiguous in
920 memory. */
921
922 bn = BFH(((char *) b) + b->bh.bb.bsize);
923 }
924#ifdef FreeWipe
925 (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
926 (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
927#endif
928 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
929
930 /* The next buffer is allocated. Set the backpointer in it to point
931 to this buffer; the previous free buffer in memory. */
932
933 bn->bh.bb.prevfree = b->bh.bb.bsize;
934
935 /* If a block-release function is defined, and this free buffer
936 constitutes the entire block, release it. Note that pool_len
937 is defined in such a way that the test will fail unless all
938 pool blocks are the same size. */
939
940 if (thr->relfcn != 0 &&
941 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
942 {
943#if BufStats
944 if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
945#endif
946
947 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
948 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
949 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
950
951 /* Unlink the buffer from the free list */
952 __kmp_bget_remove_from_freelist( b );
953
954 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
955
956 (*thr->relfcn)(b);
957#if BufStats
958 thr->numprel++; /* Nr of expansion block releases */
959 thr->numpblk--; /* Total number of blocks */
960 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
961
962 /* avoid leaving stale last_pool pointer around if it is being dealloced */
963 if (thr->last_pool == b) thr->last_pool = 0;
964 }
965 else {
966 thr->last_pool = b;
967 }
968#endif /* BufStats */
969 }
970}
971
972/* BECTL -- Establish automatic pool expansion control */
973
974static void
975bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
976{
977 thr_data_t *thr = get_thr_data( th );
978
979 thr->compfcn = compact;
980 thr->acqfcn = acquire;
981 thr->relfcn = release;
982 thr->exp_incr = pool_incr;
983}
984
985/* BPOOL -- Add a region of memory to the buffer pool. */
986
987static void
988bpool( kmp_info_t *th, void *buf, bufsize len)
989{
990/* int bin = 0; */
991 thr_data_t *thr = get_thr_data( th );
992 bfhead_t *b = BFH(buf);
993 bhead_t *bn;
994
995 __kmp_bget_dequeue( th ); /* Release any queued buffers */
996
997#ifdef SizeQuant
998 len &= ~(SizeQuant - 1);
999#endif
1000 if (thr->pool_len == 0) {
1001 thr->pool_len = len;
1002 } else if (len != thr->pool_len) {
1003 thr->pool_len = -1;
1004 }
1005#if BufStats
1006 thr->numpget++; /* Number of block acquisitions */
1007 thr->numpblk++; /* Number of blocks total */
1008 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1009#endif /* BufStats */
1010
1011 /* Since the block is initially occupied by a single free buffer,
1012 it had better not be (much) larger than the largest buffer
1013 whose size we can store in bhead.bb.bsize. */
1014
1015 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1016
1017 /* Clear the backpointer at the start of the block to indicate that
1018 there is no free block prior to this one. That blocks
1019 recombination when the first block in memory is released. */
1020
1021 b->bh.bb.prevfree = 0;
1022
1023 /* Create a dummy allocated buffer at the end of the pool. This dummy
1024 buffer is seen when a buffer at the end of the pool is released and
1025 blocks recombination of the last buffer with the dummy buffer at
1026 the end. The length in the dummy buffer is set to the largest
1027 negative number to denote the end of the pool for diagnostic
1028 routines (this specific value is not counted on by the actual
1029 allocation and release functions). */
1030
1031 len -= sizeof(bhead_t);
1032 b->bh.bb.bsize = (bufsize) len;
1033 /* Set the owner of this buffer */
1034 TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1035
1036 /* Chain the new block to the free list. */
1037 __kmp_bget_insert_into_freelist( thr, b );
1038
1039#ifdef FreeWipe
1040 (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1041 (size_t) (len - sizeof(bfhead_t)));
1042#endif
1043 bn = BH(((char *) b) + len);
1044 bn->bb.prevfree = (bufsize) len;
1045 /* Definition of ESent assumes two's complement! */
1046 KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1047
1048 bn->bb.bsize = ESent;
1049}
1050
1051/* ------------------------------------------------------------------------ */
1052
1053/* BFREED -- Dump the free lists for this thread. */
1054
1055static void
1056bfreed( kmp_info_t *th )
1057{
1058 int bin = 0, count = 0;
1059 int gtid = __kmp_gtid_from_thread( th );
1060 thr_data_t *thr = get_thr_data( th );
1061
1062#if BufStats
1063 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1064 KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1065 " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1066 gtid, (kmp_uint64) thr->totalloc,
1067 (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1068 (kmp_int64) thr->numpblk,
1069 (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1070 (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1071#endif
1072
1073 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1074 bfhead_t *b;
1075
1076 for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1077 bufsize bs = b->bh.bb.bsize;
1078
1079 KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1080 KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1081 KMP_DEBUG_ASSERT( bs > 0 );
1082
1083 count += 1;
1084
1085 __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1086#ifdef FreeWipe
1087 {
1088 char *lerr = ((char *) b) + sizeof(bfhead_t);
1089 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1090 __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1091 }
1092 }
1093#endif
1094 }
1095 }
1096
1097 if (count == 0)
1098 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1099}
1100
1101/* ------------------------------------------------------------------------ */
1102
1103#ifdef KMP_DEBUG
1104
1105#if BufStats
1106
1107/* BSTATS -- Return buffer allocation free space statistics. */
1108
1109static void
1110bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1111{
1112 int bin = 0;
1113 thr_data_t *thr = get_thr_data( th );
1114
1115 *nget = thr->numget;
1116 *nrel = thr->numrel;
1117 *curalloc = (bufsize) thr->totalloc;
1118 *totfree = 0;
1119 *maxfree = -1;
1120
1121 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1122 bfhead_t *b = thr->freelist[ bin ].ql.flink;
1123
1124 while (b != &thr->freelist[ bin ]) {
1125 KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1126 *totfree += b->bh.bb.bsize;
1127 if (b->bh.bb.bsize > *maxfree) {
1128 *maxfree = b->bh.bb.bsize;
1129 }
1130 b = b->ql.flink; /* Link to next buffer */
1131 }
1132 }
1133}
1134
1135/* BSTATSE -- Return extended statistics */
1136
1137static void
1138bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1139{
1140 thr_data_t *thr = get_thr_data( th );
1141
1142 *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1143 *npool = thr->numpblk;
1144 *npget = thr->numpget;
1145 *nprel = thr->numprel;
1146 *ndget = thr->numdget;
1147 *ndrel = thr->numdrel;
1148}
1149
1150#endif /* BufStats */
1151
1152/* BUFDUMP -- Dump the data in a buffer. This is called with the user
1153 data pointer, and backs up to the buffer header. It will
1154 dump either a free block or an allocated one. */
1155
1156static void
1157bufdump( kmp_info_t *th, void *buf )
1158{
1159 bfhead_t *b;
1160 unsigned char *bdump;
1161 bufsize bdlen;
1162
1163 b = BFH(((char *) buf) - sizeof(bhead_t));
1164 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1165 if (b->bh.bb.bsize < 0) {
1166 bdump = (unsigned char *) buf;
1167 bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1168 } else {
1169 bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1170 bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1171 }
1172
1173 while (bdlen > 0) {
1174 int i, dupes = 0;
1175 bufsize l = bdlen;
1176 char bhex[50], bascii[20];
1177
1178 if (l > 16) {
1179 l = 16;
1180 }
1181
1182 for (i = 0; i < l; i++) {
1183 (void) sprintf(bhex + i * 3, "%02X ", bdump[i]);
1184 if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1185 bascii[ i ] = bdump[ i ];
1186 else
1187 bascii[ i ] = ' ';
1188 }
1189 bascii[i] = 0;
1190 (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1191 bdump += l;
1192 bdlen -= l;
1193 while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1194 (char *) bdump, 16) == 0)) {
1195 dupes++;
1196 bdump += 16;
1197 bdlen -= 16;
1198 }
1199 if (dupes > 1) {
1200 (void) __kmp_printf_no_lock(
1201 " (%d lines [%d bytes] identical to above line skipped)\n",
1202 dupes, dupes * 16);
1203 } else if (dupes == 1) {
1204 bdump -= 16;
1205 bdlen += 16;
1206 }
1207 }
1208}
1209
1210/* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1211 If DUMPALLOC is nonzero, the contents of allocated buffers
1212 are dumped. If DUMPFREE is nonzero, free blocks are
1213 dumped as well. If FreeWipe checking is enabled, free
1214 blocks which have been clobbered will always be dumped. */
1215
1216static void
1217bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1218{
1219 bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1220
1221 while (b->bh.bb.bsize != ESent) {
1222 bufsize bs = b->bh.bb.bsize;
1223
1224 if (bs < 0) {
1225 bs = -bs;
1226 (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1227 if (dumpalloc) {
1228 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1229 }
1230 } else {
1231 char *lerr = "";
1232
1233 KMP_DEBUG_ASSERT(bs > 0);
1234 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1235 lerr = " (Bad free list links)";
1236 }
1237 (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1238 (long) bs, lerr);
1239#ifdef FreeWipe
1240 lerr = ((char *) b) + sizeof(bfhead_t);
1241 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1242 (memcmp(lerr, lerr + 1,
1243 (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1244 (void) __kmp_printf_no_lock(
1245 "(Contents of above free block have been overstored.)\n");
1246 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1247 } else
1248#endif
1249 if (dumpfree) {
1250 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1251 }
1252 }
1253 b = BFH(((char *) b) + bs);
1254 }
1255}
1256
1257/* BPOOLV -- Validate a buffer pool. */
1258
1259static int
1260bpoolv( kmp_info_t *th, void *buf )
1261{
1262 bfhead_t *b = BFH(buf);
1263
1264 while (b->bh.bb.bsize != ESent) {
1265 bufsize bs = b->bh.bb.bsize;
1266
1267 if (bs < 0) {
1268 bs = -bs;
1269 } else {
1270#ifdef FreeWipe
1271 char *lerr = "";
1272#endif
1273
1274 KMP_DEBUG_ASSERT(bs > 0);
1275 if (bs <= 0) {
1276 return 0;
1277 }
1278 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1279 (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1280 (long) bs);
1281 KMP_DEBUG_ASSERT(0);
1282 return 0;
1283 }
1284#ifdef FreeWipe
1285 lerr = ((char *) b) + sizeof(bfhead_t);
1286 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1287 (memcmp(lerr, lerr + 1,
1288 (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1289 (void) __kmp_printf_no_lock(
1290 "(Contents of above free block have been overstored.)\n");
1291 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1292 KMP_DEBUG_ASSERT(0);
1293 return 0;
1294 }
1295#endif /* FreeWipe */
1296 }
1297 b = BFH(((char *) b) + bs);
1298 }
1299 return 1;
1300}
1301
1302#endif /* KMP_DEBUG */
1303
1304/* ------------------------------------------------------------------------ */
1305
1306void
1307__kmp_initialize_bget( kmp_info_t *th )
1308{
1309 KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1310
1311 set_thr_data( th );
1312
1313 bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1314 (bufsize) __kmp_malloc_pool_incr );
1315}
1316
1317void
1318__kmp_finalize_bget( kmp_info_t *th )
1319{
1320 thr_data_t *thr;
1321 bfhead_t *b;
1322
1323 KMP_DEBUG_ASSERT( th != 0 );
1324
1325#if BufStats
1326 thr = (thr_data_t *) th->th.th_local.bget_data;
1327 KMP_DEBUG_ASSERT( thr != NULL );
1328 b = thr->last_pool;
1329
1330 /* If a block-release function is defined, and this free buffer
1331 constitutes the entire block, release it. Note that pool_len
1332 is defined in such a way that the test will fail unless all
1333 pool blocks are the same size. */
1334
1335 /* Deallocate the last pool if one exists because we no longer do it in brel() */
1336 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1337 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1338 {
1339 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1340 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1341 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1342
1343 /* Unlink the buffer from the free list */
1344 __kmp_bget_remove_from_freelist( b );
1345
1346 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1347
1348 (*thr->relfcn)(b);
1349 thr->numprel++; /* Nr of expansion block releases */
1350 thr->numpblk--; /* Total number of blocks */
1351 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1352 }
1353#endif /* BufStats */
1354
1355 /* Deallocate bget_data */
1356 if ( th->th.th_local.bget_data != NULL ) {
1357 __kmp_free( th->th.th_local.bget_data );
1358 th->th.th_local.bget_data = NULL;
1359 }; // if
1360}
1361
1362void
1363kmpc_set_poolsize( size_t size )
1364{
1365 bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1366 (bget_release_t) free, (bufsize) size );
1367}
1368
1369size_t
1370kmpc_get_poolsize( void )
1371{
1372 thr_data_t *p;
1373
1374 p = get_thr_data( __kmp_get_thread() );
1375
1376 return p->exp_incr;
1377}
1378
1379void
1380kmpc_set_poolmode( int mode )
1381{
1382 thr_data_t *p;
1383
1384 if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1385 p = get_thr_data( __kmp_get_thread() );
1386 p->mode = (bget_mode_t) mode;
1387 }
1388}
1389
1390int
1391kmpc_get_poolmode( void )
1392{
1393 thr_data_t *p;
1394
1395 p = get_thr_data( __kmp_get_thread() );
1396
1397 return p->mode;
1398}
1399
1400void
1401kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1402{
1403 kmp_info_t *th = __kmp_get_thread();
1404 bufsize a, b;
1405
1406 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1407
1408 bcheck( th, &a, &b );
1409
1410 *maxmem = a;
1411 *allmem = b;
1412}
1413
1414void
1415kmpc_poolprint( void )
1416{
1417 kmp_info_t *th = __kmp_get_thread();
1418
1419 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1420
1421 bfreed( th );
1422}
1423
1424#endif // #if KMP_USE_BGET
1425
1426/* ------------------------------------------------------------------------ */
1427
1428void *
1429kmpc_malloc( size_t size )
1430{
1431 void * ptr;
1432 ptr = bget( __kmp_entry_thread(), (bufsize) size );
1433
1434 return ptr;
1435}
1436
1437void *
1438kmpc_calloc( size_t nelem, size_t elsize )
1439{
1440 void * ptr;
1441 ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1442
1443 return ptr;
1444}
1445
1446void *
1447kmpc_realloc( void * ptr, size_t size )
1448{
1449 void * result = NULL;
1450
1451 if ( ptr == NULL ) {
1452 // If pointer is NULL, realloc behaves like malloc.
1453 result = bget( __kmp_entry_thread(), (bufsize) size );
1454 } else if ( size == 0 ) {
1455 // If size is 0, realloc behaves like free.
1456 // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1457 // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1458 brel( __kmp_get_thread(), ptr );
1459 } else {
1460 result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1461 }; // if
1462
1463 return result;
1464}
1465
1466/* NOTE: the library must have already been initialized by a previous allocate */
1467
1468void
1469kmpc_free( void * ptr )
1470{
1471 if ( ! __kmp_init_serial ) {
1472 return;
1473 }; // if
1474 if ( ptr != NULL ) {
1475 kmp_info_t *th = __kmp_get_thread();
1476 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1477 brel( th, ptr );
1478 };
1479}
1480
1481
1482/* ------------------------------------------------------------------------ */
1483
1484void *
1485___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1486{
1487 void * ptr;
1488 KE_TRACE( 30, (
1489 "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1490 th,
1491 (int) size
1492 KMP_SRC_LOC_PARM
1493 ) );
1494 ptr = bget( th, (bufsize) size );
1495 KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1496 return ptr;
1497}
1498
1499void *
1500___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1501{
1502 void * ptr;
1503 KE_TRACE( 30, (
1504 "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1505 th,
1506 (int) nelem,
1507 (int) elsize
1508 KMP_SRC_LOC_PARM
1509 ) );
1510 ptr = bgetz( th, (bufsize) (nelem * elsize) );
1511 KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1512 return ptr;
1513}
1514
1515void *
1516___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1517{
1518 KE_TRACE( 30, (
1519 "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1520 th,
1521 ptr,
1522 (int) size
1523 KMP_SRC_LOC_PARM
1524 ) );
1525 ptr = bgetr( th, ptr, (bufsize) size );
1526 KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1527 return ptr;
1528}
1529
1530void
1531___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1532{
1533 KE_TRACE( 30, (
1534 "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1535 th,
1536 ptr
1537 KMP_SRC_LOC_PARM
1538 ) );
1539 if ( ptr != NULL ) {
1540 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1541 brel( th, ptr );
1542 }
1543 KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1544}
1545
1546/* ------------------------------------------------------------------------ */
1547/* ------------------------------------------------------------------------ */
1548/*
1549 If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1550 may be useful for debugging memory corruptions, used freed pointers, etc.
1551*/
1552/* #define LEAK_MEMORY */
1553
1554struct kmp_mem_descr { // Memory block descriptor.
1555 void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1556 size_t size_allocated; // Size of allocated memory block.
1557 void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1558 size_t size_aligned; // Size of aligned memory block.
1559};
1560typedef struct kmp_mem_descr kmp_mem_descr_t;
1561
1562/*
1563 Allocate memory on requested boundary, fill allocated memory with 0x00.
1564 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1565 Must use __kmp_free when freeing memory allocated by this routine!
1566 */
1567static
1568void *
1569___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1570{
1571 /*
1572 __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1573 return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1574 block is saved in descriptor just before the aligned pointer. This information used by
1575 __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1576
1577 +---------+------------+-----------------------------------+---------+
1578 | padding | descriptor | aligned block | padding |
1579 +---------+------------+-----------------------------------+---------+
1580 ^ ^
1581 | |
1582 | +- Aligned pointer returned to caller
1583 +- Pointer returned by malloc()
1584
1585 Aligned block is filled with zeros, paddings are filled with 0xEF.
1586 */
1587
1588 kmp_mem_descr_t descr;
1589 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1590 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1591 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1592
1593 KE_TRACE( 25, (
1594 "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1595 (int) size,
1596 (int) alignment
1597 KMP_SRC_LOC_PARM
1598 ) );
1599
1600 KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1601 KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1602 // Make sure kmp_uintptr_t is enough to store addresses.
1603
1604 descr.size_aligned = size;
1605 descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1606
1607 #if KMP_DEBUG
1608 descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1609 #else
1610 descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1611 #endif
1612 KE_TRACE( 10, (
1613 " malloc( %d ) returned %p\n",
1614 (int) descr.size_allocated,
1615 descr.ptr_allocated
1616 ) );
1617 if ( descr.ptr_allocated == NULL ) {
1618 KMP_FATAL( OutOfHeapMemory );
1619 };
1620
1621 addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1622 addr_aligned =
1623 ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1624 & ~ ( alignment - 1 );
1625 addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1626
1627 descr.ptr_aligned = (void *) addr_aligned;
1628
1629 KE_TRACE( 26, (
1630 " ___kmp_allocate_align: "
1631 "ptr_allocated=%p, size_allocated=%d, "
1632 "ptr_aligned=%p, size_aligned=%d\n",
1633 descr.ptr_allocated,
1634 (int) descr.size_allocated,
1635 descr.ptr_aligned,
1636 (int) descr.size_aligned
1637 ) );
1638
1639 KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1640 KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1641 KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1642 KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1643
1644 #ifdef KMP_DEBUG
1645 memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1646 // Fill allocated memory block with 0xEF.
1647 #endif
1648 memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1649 // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1650 // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1651 // bytes remain filled with 0xEF in debugging library.)
1652 * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1653
1654 KMP_MB();
1655
1656 KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1657 return descr.ptr_aligned;
1658
1659} // func ___kmp_allocate_align
1660
1661
1662/*
1663 Allocate memory on cache line boundary, fill allocated memory with 0x00.
1664 Do not call this func directly! Use __kmp_allocate macro instead.
1665 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1666 Must use __kmp_free when freeing memory allocated by this routine!
1667 */
1668void *
1669___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1670{
1671
1672 void * ptr;
1673 KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1674 ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1675 KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1676 return ptr;
1677
1678} // func ___kmp_allocate
1679
1680#if (BUILD_MEMORY==FIRST_TOUCH)
1681void *
1682__kmp_ft_page_allocate(size_t size)
1683{
1684 void *adr, *aadr;
1685#if KMP_OS_LINUX
1686 /* TODO: Use this function to get page size everywhere */
1687 int page_size = getpagesize();
1688#else
1689 /* TODO: Find windows function to get page size and use it everywhere */
1690 int page_size = PAGE_SIZE;
1691#endif /* KMP_OS_LINUX */
1692
1693 adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1694 size + page_size + KMP_PTR_SKIP);
1695 if ( adr == 0 )
1696 KMP_FATAL( OutOfHeapMemory );
1697
1698 /* check to see if adr is on a page boundary. */
1699 if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1700 /* nothing to do if adr is already on a page boundary. */
1701 aadr = adr;
1702 else
1703 /* else set aadr to the first page boundary in the allocated memory. */
1704 aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1705
1706 /* the first touch by the owner thread. */
1707 *((void**)aadr) = adr;
1708
1709 /* skip the memory space used for storing adr above. */
1710 return (void*)((char*)aadr + KMP_PTR_SKIP);
1711}
1712#endif
1713
1714/*
1715 Allocate memory on page boundary, fill allocated memory with 0x00.
1716 Does not call this func directly! Use __kmp_page_allocate macro instead.
1717 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1718 Must use __kmp_free when freeing memory allocated by this routine!
1719 */
1720void *
1721___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1722{
1723 int page_size = 8 * 1024;
1724 void * ptr;
1725
1726 KE_TRACE( 25, (
1727 "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1728 (int) size
1729 KMP_SRC_LOC_PARM
1730 ) );
1731 ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1732 KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1733 return ptr;
1734} // ___kmp_page_allocate
1735
1736/*
1737 Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1738 In debug mode, fill the memory block with 0xEF before call to free().
1739*/
1740void
1741___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1742{
1743
1744 kmp_mem_descr_t descr;
1745 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1746 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1747
1748 KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1749 KMP_ASSERT( ptr != NULL );
1750
1751 descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1752
1753 KE_TRACE( 26, ( " __kmp_free: "
1754 "ptr_allocated=%p, size_allocated=%d, "
1755 "ptr_aligned=%p, size_aligned=%d\n",
1756 descr.ptr_allocated, (int) descr.size_allocated,
1757 descr.ptr_aligned, (int) descr.size_aligned ));
1758
1759 addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1760 addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1761
1762 KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1763 KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1764 KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1765 KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1766 KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1767
1768 #ifdef KMP_DEBUG
1769 memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1770 // Fill memory block with 0xEF, it helps catch using freed memory.
1771 #endif
1772
1773 #ifndef LEAK_MEMORY
1774 KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1775 free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1776 #endif
1777
1778 KMP_MB();
1779
1780 KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1781
1782} // func ___kmp_free
1783
1784/* ------------------------------------------------------------------------ */
1785/* ------------------------------------------------------------------------ */
1786
1787#if USE_FAST_MEMORY == 3
1788// Allocate fast memory by first scanning the thread's free lists
1789// If a chunk the right size exists, grab it off the free list.
1790// Otherwise allocate normally using kmp_thread_malloc.
1791
1792// AC: How to choose the limit? Just get 16 for now...
1793static int const __kmp_free_list_limit = 16;
1794
1795// Always use 128 bytes for determining buckets for caching memory blocks
1796#define DCACHE_LINE 128
1797
1798void *
1799___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1800{
1801 void * ptr;
1802 int num_lines;
1803 int idx;
1804 int index;
1805 void * alloc_ptr;
1806 size_t alloc_size;
1807 kmp_mem_descr_t * descr;
1808
1809 KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1810 __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1811
1812 num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1813 idx = num_lines - 1;
1814 KMP_DEBUG_ASSERT( idx >= 0 );
1815 if ( idx < 2 ) {
1816 index = 0; // idx is [ 0, 1 ], use first free list
1817 num_lines = 2; // 1, 2 cache lines or less than cache line
1818 } else if ( ( idx >>= 2 ) == 0 ) {
1819 index = 1; // idx is [ 2, 3 ], use second free list
1820 num_lines = 4; // 3, 4 cache lines
1821 } else if ( ( idx >>= 2 ) == 0 ) {
1822 index = 2; // idx is [ 4, 15 ], use third free list
1823 num_lines = 16; // 5, 6, ..., 16 cache lines
1824 } else if ( ( idx >>= 2 ) == 0 ) {
1825 index = 3; // idx is [ 16, 63 ], use fourth free list
1826 num_lines = 64; // 17, 18, ..., 64 cache lines
1827 } else {
1828 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1829 }
1830
1831 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1832 if ( ptr != NULL ) {
1833 // pop the head of no-sync free list
1834 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1835 KMP_DEBUG_ASSERT( this_thr ==
1836 ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1837 goto end;
1838 };
1839 ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1840 if ( ptr != NULL ) {
1841 // no-sync free list is empty, use sync free list (filled in by other threads only)
1842 // pop the head of the sync free list, push NULL instead
1843 while ( ! KMP_COMPARE_AND_STORE_PTR(
1844 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1845 {
1846 KMP_CPU_PAUSE();
1847 ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1848 }
1849 // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1850 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1851 KMP_DEBUG_ASSERT( this_thr ==
1852 ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1853 goto end;
1854 }
1855
1856 alloc_call:
1857 // haven't found block in the free lists, thus allocate it
1858 size = num_lines * DCACHE_LINE;
1859
1860 alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1861 KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1862 __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1863 alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1864
1865 // align ptr to DCACHE_LINE
1866 ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1867 descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1868
1869 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1870 // we don't need size_allocated
1871 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1872 // (it is already saved in bget buffer,
1873 // but we may want to use another allocator in future)
1874 descr->size_aligned = size;
1875
1876 end:
1877 KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1878 __kmp_gtid_from_thread( this_thr ), ptr ) );
1879 return ptr;
1880} // func __kmp_fast_allocate
1881
1882// Free fast memory and place it on the thread's free list if it is of
1883// the correct size.
1884void
1885___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1886{
1887 kmp_mem_descr_t * descr;
1888 kmp_info_t * alloc_thr;
1889 size_t size;
1890 size_t idx;
1891 int index;
1892
1893 KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1894 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1895 KMP_ASSERT( ptr != NULL );
1896
1897 descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1898
1899 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1900 (int) descr->size_aligned ) );
1901
1902 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1903
1904 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1905 if ( idx == size ) {
1906 index = 0; // 2 cache lines
1907 } else if ( ( idx <<= 1 ) == size ) {
1908 index = 1; // 4 cache lines
1909 } else if ( ( idx <<= 2 ) == size ) {
1910 index = 2; // 16 cache lines
1911 } else if ( ( idx <<= 2 ) == size ) {
1912 index = 3; // 64 cache lines
1913 } else {
1914 KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1915 goto free_call; // 65 or more cache lines ( > 8KB )
1916 }
1917
1918 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1919 if ( alloc_thr == this_thr ) {
1920 // push block to self no-sync free list, linking previous head (LIFO)
1921 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1922 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1923 } else {
1924 void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1925 if ( head == NULL ) {
1926 // Create new free list
1927 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1928 *((void **)ptr) = NULL; // mark the tail of the list
1929 descr->size_allocated = (size_t)1; // head of the list keeps its length
1930 } else {
1931 // need to check existed "other" list's owner thread and size of queue
1932 kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1933 kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1934 size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1935 if ( q_th == alloc_thr && q_sz <= __kmp_free_list_limit ) {
1936 // we can add current task to "other" list, no sync needed
1937 *((void **)ptr) = head;
1938 descr->size_allocated = q_sz;
1939 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1940 } else {
1941 // either queue blocks owner is changing or size limit exceeded
1942 // return old queue to allocating thread (q_th) synchroneously,
1943 // and start new list for alloc_thr's tasks
1944 void * old_ptr;
1945 void * tail = head;
1946 void * next = *((void **)head);
1947 while ( next != NULL ) {
1948 KMP_DEBUG_ASSERT(
1949 // queue size should decrease by 1 each step through the list
1950 ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1951 ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1952 tail = next; // remember tail node
1953 next = *((void **)next);
1954 }
1955 KMP_DEBUG_ASSERT( q_th != NULL );
1956 // push block to owner's sync free list
1957 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1958 /* the next pointer must be set before setting free_list to ptr to avoid
1959 exposing a broken list to other threads, even for an instant. */
1960 *((void **)tail) = old_ptr;
1961
1962 while ( ! KMP_COMPARE_AND_STORE_PTR(
1963 &q_th->th.th_free_lists[index].th_free_list_sync,
1964 old_ptr,
1965 head ) )
1966 {
1967 KMP_CPU_PAUSE();
1968 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1969 *((void **)tail) = old_ptr;
1970 }
1971
1972 // start new list of not-selt tasks
1973 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1974 *((void **)ptr) = NULL;
1975 descr->size_allocated = (size_t)1; // head of queue keeps its length
1976 }
1977 }
1978 }
1979 goto end;
1980
1981 free_call:
1982 KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1983 __kmp_gtid_from_thread( this_thr), size ) );
1984 __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
1985 brel( this_thr, descr->ptr_allocated );
1986
1987 end:
1988 KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
1989
1990} // func __kmp_fast_free
1991
1992
1993// Initialize the thread free lists related to fast memory
1994// Only do this when a thread is initially created.
1995void
1996__kmp_initialize_fast_memory( kmp_info_t *this_thr )
1997{
1998 KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
1999
2000 memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2001}
2002
2003// Free the memory in the thread free lists related to fast memory
2004// Only do this when a thread is being reaped (destroyed).
2005void
2006__kmp_free_fast_memory( kmp_info_t *th )
2007{
2008 // Suppose we use BGET underlying allocator, walk through its structures...
2009 int bin;
2010 thr_data_t * thr = get_thr_data( th );
2011 void ** lst = NULL;
2012
2013 KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2014 __kmp_gtid_from_thread( th ) ) );
2015
2016 __kmp_bget_dequeue( th ); // Release any queued buffers
2017
2018 // Dig through free lists and extract all allocated blocks
2019 for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2020 bfhead_t * b = thr->freelist[ bin ].ql.flink;
2021 while ( b != &thr->freelist[ bin ] ) {
2022 if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2023 *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2024 lst = (void**)b; // push b into lst
2025 }
2026 b = b->ql.flink; // get next buffer
2027 }
2028 }
2029 while ( lst != NULL ) {
2030 void * next = *lst;
2031 KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2032 lst, next, th, __kmp_gtid_from_thread( th ) ) );
2033 (*thr->relfcn)(lst);
2034 #if BufStats
2035 // count blocks to prevent problems in __kmp_finalize_bget()
2036 thr->numprel++; /* Nr of expansion block releases */
2037 thr->numpblk--; /* Total number of blocks */
2038 #endif
2039 lst = (void**)next;
2040 }
2041
2042 KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2043 __kmp_gtid_from_thread( th ) ) );
2044}
2045
2046#endif // USE_FAST_MEMORY