blob: c9c57f3a1dd06187db8172cb3bd1cef4167fcdea [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
2 * kmp_alloc.c -- private/shared dyanmic memory allocation and management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_wrapper_malloc.h"
18#include "kmp_io.h"
19
20// Disable bget when it is not used
21#if KMP_USE_BGET
22
23/* Thread private buffer management code */
24
25typedef int (*bget_compact_t)(size_t, int);
26typedef void *(*bget_acquire_t)(size_t);
27typedef void (*bget_release_t)(void *);
28
29/* NOTE: bufsize must be a signed datatype */
30
31#if KMP_OS_WINDOWS
Jim Cownie181b4bb2013-12-23 17:28:57 +000032# if KMP_ARCH_X86 || KMP_ARCH_ARM
Jim Cownie5e8470a2013-09-27 10:38:44 +000033 typedef kmp_int32 bufsize;
34# else
35 typedef kmp_int64 bufsize;
36# endif
37#else
38 typedef ssize_t bufsize;
39#endif
40
41/* The three modes of operation are, fifo search, lifo search, and best-fit */
42
43typedef enum bget_mode {
44 bget_mode_fifo = 0,
45 bget_mode_lifo = 1,
46 bget_mode_best = 2
47} bget_mode_t;
48
49
50static void bpool( kmp_info_t *th, void *buffer, bufsize len);
51static void *bget( kmp_info_t *th, bufsize size);
52static void *bgetz( kmp_info_t *th, bufsize size);
53static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
54static void brel( kmp_info_t *th, void *buf);
55static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
56
57#ifdef KMP_DEBUG
58static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
59static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
60static void bufdump( kmp_info_t *th, void *buf);
61static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
62static int bpoolv( kmp_info_t *th, void *pool);
63#endif
64
65/* BGET CONFIGURATION */
66 /* Buffer allocation size quantum:
67 all buffers allocated are a
68 multiple of this size. This
69 MUST be a power of two. */
70
71 /* On IA-32 architecture with Linux* OS,
72 malloc() does not
73 ensure 16 byte alignmnent */
74
Jim Cownie181b4bb2013-12-23 17:28:57 +000075#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +000076
77#define SizeQuant 8
78#define AlignType double
79
80#else
81
82#define SizeQuant 16
83#define AlignType _Quad
84
85#endif
86
87#define BufStats 1 /* Define this symbol to enable the
88 bstats() function which calculates
89 the total free space in the buffer
90 pool, the largest available
91 buffer, and the total space
92 currently allocated. */
93
94#ifdef KMP_DEBUG
95
96#define BufDump 1 /* Define this symbol to enable the
97 bpoold() function which dumps the
98 buffers in a buffer pool. */
99
100#define BufValid 1 /* Define this symbol to enable the
101 bpoolv() function for validating
102 a buffer pool. */
103
104#define DumpData 1 /* Define this symbol to enable the
105 bufdump() function which allows
106 dumping the contents of an allocated
107 or free buffer. */
108#ifdef NOT_USED_NOW
109
110#define FreeWipe 1 /* Wipe free buffers to a guaranteed
111 pattern of garbage to trip up
112 miscreants who attempt to use
113 pointers into released buffers. */
114
115#define BestFit 1 /* Use a best fit algorithm when
116 searching for space for an
117 allocation request. This uses
118 memory more efficiently, but
119 allocation will be much slower. */
120#endif /* NOT_USED_NOW */
121#endif /* KMP_DEBUG */
122
123
124static bufsize bget_bin_size[ ] = {
125 0,
126// 1 << 6, /* .5 Cache line */
127 1 << 7, /* 1 Cache line, new */
128 1 << 8, /* 2 Cache lines */
129 1 << 9, /* 4 Cache lines, new */
130 1 << 10, /* 8 Cache lines */
131 1 << 11, /* 16 Cache lines, new */
132 1 << 12,
133 1 << 13, /* new */
134 1 << 14,
135 1 << 15, /* new */
136 1 << 16,
137 1 << 17,
138 1 << 18,
139 1 << 19,
140 1 << 20, /* 1MB */
141 1 << 21, /* 2MB */
142 1 << 22, /* 4MB */
143 1 << 23, /* 8MB */
144 1 << 24, /* 16MB */
145 1 << 25, /* 32MB */
146};
147
148#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
149
150struct bfhead;
151
152/* Declare the interface, including the requested buffer size type,
153 bufsize. */
154
155/* Queue links */
156
157typedef struct qlinks {
158 struct bfhead *flink; /* Forward link */
159 struct bfhead *blink; /* Backward link */
160} qlinks_t;
161
162/* Header in allocated and free buffers */
163
164typedef struct bhead2 {
165 kmp_info_t *bthr; /* The thread which owns the buffer pool */
166 bufsize prevfree; /* Relative link back to previous
167 free buffer in memory or 0 if
168 previous buffer is allocated. */
169 bufsize bsize; /* Buffer size: positive if free,
170 negative if allocated. */
171} bhead2_t;
172
173/* Make sure the bhead structure is a multiple of SizeQuant in size. */
174
175typedef union bhead {
176 KMP_ALIGN( SizeQuant )
177 AlignType b_align;
178 char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
179 bhead2_t bb;
180} bhead_t;
181#define BH(p) ((bhead_t *) (p))
182
183/* Header in directly allocated buffers (by acqfcn) */
184
185typedef struct bdhead
186{
187 bufsize tsize; /* Total size, including overhead */
188 bhead_t bh; /* Common header */
189} bdhead_t;
190#define BDH(p) ((bdhead_t *) (p))
191
192/* Header in free buffers */
193
194typedef struct bfhead {
195 bhead_t bh; /* Common allocated/free header */
196 qlinks_t ql; /* Links on free list */
197} bfhead_t;
198#define BFH(p) ((bfhead_t *) (p))
199
200typedef struct thr_data {
201 bfhead_t freelist[ MAX_BGET_BINS ];
202#if BufStats
203 size_t totalloc; /* Total space currently allocated */
204 long numget, numrel; /* Number of bget() and brel() calls */
205 long numpblk; /* Number of pool blocks */
206 long numpget, numprel; /* Number of block gets and rels */
207 long numdget, numdrel; /* Number of direct gets and rels */
208#endif /* BufStats */
209
210 /* Automatic expansion block management functions */
211 bget_compact_t compfcn;
212 bget_acquire_t acqfcn;
213 bget_release_t relfcn;
214
215 bget_mode_t mode; /* what allocation mode to use? */
216
217 bufsize exp_incr; /* Expansion block size */
218 bufsize pool_len; /* 0: no bpool calls have been made
219 -1: not all pool blocks are
220 the same size
221 >0: (common) block size for all
222 bpool calls made so far
223 */
224 bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
225} thr_data_t;
226
227/* Minimum allocation quantum: */
228
229#define QLSize (sizeof(qlinks_t))
230#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
231#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
232 // Maximun for the requested size.
233
234/* End sentinel: value placed in bsize field of dummy block delimiting
235 end of pool block. The most negative number which will fit in a
236 bufsize, defined in a way that the compiler will accept. */
237
238#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
239
240/* ------------------------------------------------------------------------ */
241
242/* Thread Data management routines */
243
244static int
245bget_get_bin( bufsize size )
246{
247 // binary chop bins
248 int lo = 0, hi = MAX_BGET_BINS - 1;
249
250 KMP_DEBUG_ASSERT( size > 0 );
251
252 while ( (hi - lo) > 1 ) {
253 int mid = (lo + hi) >> 1;
254 if (size < bget_bin_size[ mid ])
255 hi = mid - 1;
256 else
257 lo = mid;
258 }
259
260 KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
261
262 return lo;
263}
264
265static void
266set_thr_data( kmp_info_t *th )
267{
268 int i;
269 thr_data_t *data;
270
271 data =
272 (thr_data_t *)(
273 ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
274 );
275
276 memset( data, '\0', sizeof( *data ) );
277
278 for (i = 0; i < MAX_BGET_BINS; ++i) {
279 data->freelist[ i ].ql.flink = & data->freelist[ i ];
280 data->freelist[ i ].ql.blink = & data->freelist[ i ];
281 }
282
283 th->th.th_local.bget_data = data;
284 th->th.th_local.bget_list = 0;
285#if ! USE_CMP_XCHG_FOR_BGET
286#ifdef USE_QUEUING_LOCK_FOR_BGET
287 __kmp_init_lock( & th->th.th_local.bget_lock );
288#else
289 __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
290#endif /* USE_LOCK_FOR_BGET */
291#endif /* ! USE_CMP_XCHG_FOR_BGET */
292}
293
294static thr_data_t *
295get_thr_data( kmp_info_t *th )
296{
297 thr_data_t *data;
298
299 data = (thr_data_t *) th->th.th_local.bget_data;
300
301 KMP_DEBUG_ASSERT( data != 0 );
302
303 return data;
304}
305
306
307#ifdef KMP_DEBUG
308
309static void
310__kmp_bget_validate_queue( kmp_info_t *th )
311{
312 /* NOTE: assume that the global_lock is held */
313
314 void *p = (void *) th->th.th_local.bget_list;
315
316 while (p != 0) {
317 bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
318
319 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
320 p = (void *) b->ql.flink;
321 }
322}
323
324#endif
325
326/* Walk the free list and release the enqueued buffers */
327
328static void
329__kmp_bget_dequeue( kmp_info_t *th )
330{
331 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
332
333 if (p != 0) {
334 #if USE_CMP_XCHG_FOR_BGET
335 {
336 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
337 while ( ! KMP_COMPARE_AND_STORE_PTR(
338 & th->th.th_local.bget_list, old_value, NULL ) )
339 {
340 KMP_CPU_PAUSE();
341 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
342 }
343 p = (void *) old_value;
344 }
345 #else /* ! USE_CMP_XCHG_FOR_BGET */
346 #ifdef USE_QUEUING_LOCK_FOR_BGET
347 __kmp_acquire_lock( & th->th.th_local.bget_lock,
348 __kmp_gtid_from_thread(th) );
349 #else
350 __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
351 #endif /* USE_QUEUING_LOCK_FOR_BGET */
352
353 p = (void *) th->th.th_local.bget_list;
354 th->th.th_local.bget_list = 0;
355
356 #ifdef USE_QUEUING_LOCK_FOR_BGET
357 __kmp_release_lock( & th->th.th_local.bget_lock,
358 __kmp_gtid_from_thread(th) );
359 #else
360 __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
361 #endif
362 #endif /* USE_CMP_XCHG_FOR_BGET */
363
364 /* Check again to make sure the list is not empty */
365
366 while (p != 0) {
367 void *buf = p;
368 bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
369
370 KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
371 KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
372 (kmp_uintptr_t)th ); // clear possible mark
373 KMP_DEBUG_ASSERT( b->ql.blink == 0 );
374
375 p = (void *) b->ql.flink;
376
377 brel( th, buf );
378 }
379 }
380}
381
382/* Chain together the free buffers by using the thread owner field */
383
384static void
385__kmp_bget_enqueue( kmp_info_t *th, void *buf
386#ifdef USE_QUEUING_LOCK_FOR_BGET
387 , kmp_int32 rel_gtid
388#endif
389 )
390{
391 bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
392
393 KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
394 KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
395 (kmp_uintptr_t)th ); // clear possible mark
396
397 b->ql.blink = 0;
398
399 KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
400 __kmp_gtid_from_thread( th ) ) );
401
402#if USE_CMP_XCHG_FOR_BGET
403 {
404 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
405 /* the next pointer must be set before setting bget_list to buf to avoid
406 exposing a broken list to other threads, even for an instant. */
407 b->ql.flink = BFH( old_value );
408
409 while ( ! KMP_COMPARE_AND_STORE_PTR(
410 & th->th.th_local.bget_list, old_value, buf ) )
411 {
412 KMP_CPU_PAUSE();
413 old_value = TCR_PTR(th->th.th_local.bget_list);
414 /* the next pointer must be set before setting bget_list to buf to avoid
415 exposing a broken list to other threads, even for an instant. */
416 b->ql.flink = BFH( old_value );
417 }
418 }
419#else /* ! USE_CMP_XCHG_FOR_BGET */
420# ifdef USE_QUEUING_LOCK_FOR_BGET
421 __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
422# else
423 __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
424 # endif
425
426 b->ql.flink = BFH( th->th.th_local.bget_list );
427 th->th.th_local.bget_list = (void *) buf;
428
429# ifdef USE_QUEUING_LOCK_FOR_BGET
430 __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
431# else
432 __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
433# endif
434#endif /* USE_CMP_XCHG_FOR_BGET */
435}
436
437/* insert buffer back onto a new freelist */
438
439static void
440__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
441{
442 int bin;
443
444 KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
445 KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
446
447 bin = bget_get_bin( b->bh.bb.bsize );
448
449 KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
450 KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
451
452 b->ql.flink = &thr->freelist[ bin ];
453 b->ql.blink = thr->freelist[ bin ].ql.blink;
454
455 thr->freelist[ bin ].ql.blink = b;
456 b->ql.blink->ql.flink = b;
457}
458
459/* unlink the buffer from the old freelist */
460
461static void
462__kmp_bget_remove_from_freelist( bfhead_t *b )
463{
464 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
465 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
466
467 b->ql.blink->ql.flink = b->ql.flink;
468 b->ql.flink->ql.blink = b->ql.blink;
469}
470
471/* ------------------------------------------------------------------------ */
472
473/* GET STATS -- check info on free list */
474
475static void
476bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
477{
478 thr_data_t *thr = get_thr_data( th );
479 int bin;
480
481 *total_free = *max_free = 0;
482
483 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
484 bfhead_t *b, *best;
485
486 best = &thr->freelist[ bin ];
487 b = best->ql.flink;
488
489 while (b != &thr->freelist[ bin ]) {
490 *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
491 if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
492 best = b;
493
494 /* Link to next buffer */
495 b = b->ql.flink;
496 }
497
498 if (*max_free < best->bh.bb.bsize)
499 *max_free = best->bh.bb.bsize;
500 }
501
502 if (*max_free > (bufsize)sizeof( bhead_t ))
503 *max_free -= sizeof( bhead_t );
504}
505
506/* ------------------------------------------------------------------------ */
507
508/* BGET -- Allocate a buffer. */
509
510static void *
511bget( kmp_info_t *th, bufsize requested_size )
512{
513 thr_data_t *thr = get_thr_data( th );
514 bufsize size = requested_size;
515 bfhead_t *b;
516 void *buf;
517 int compactseq = 0;
518 int use_blink = 0;
519/* For BestFit */
520 bfhead_t *best;
521
522 if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
523 return NULL;
524 }; // if
525
526 __kmp_bget_dequeue( th ); /* Release any queued buffers */
527
528 if (size < (bufsize)SizeQ) { /* Need at least room for the */
529 size = SizeQ; /* queue links. */
530 }
531 #if defined( SizeQuant ) && ( SizeQuant > 1 )
532 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
533 #endif
534
535 size += sizeof(bhead_t); /* Add overhead in allocated buffer
536 to size required. */
537 KMP_DEBUG_ASSERT( size >= 0 );
538 KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
539
540 use_blink = ( thr->mode == bget_mode_lifo );
541
542 /* If a compact function was provided in the call to bectl(), wrap
543 a loop around the allocation process to allow compaction to
544 intervene in case we don't find a suitable buffer in the chain. */
545
546 for (;;) {
547 int bin;
548
549 for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
550 /* Link to next buffer */
551 b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
552
553 if (thr->mode == bget_mode_best) {
554 best = &thr->freelist[ bin ];
555
556 /* Scan the free list searching for the first buffer big enough
557 to hold the requested size buffer. */
558
559 while (b != &thr->freelist[ bin ]) {
560 if (b->bh.bb.bsize >= (bufsize) size) {
561 if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
562 best = b;
563 }
564 }
565
566 /* Link to next buffer */
567 b = ( use_blink ? b->ql.blink : b->ql.flink );
568 }
569 b = best;
570 }
571
572 while (b != &thr->freelist[ bin ]) {
573 if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
574
575 /* Buffer is big enough to satisfy the request. Allocate it
576 to the caller. We must decide whether the buffer is large
577 enough to split into the part given to the caller and a
578 free buffer that remains on the free list, or whether the
579 entire buffer should be removed from the free list and
580 given to the caller in its entirety. We only split the
581 buffer if enough room remains for a header plus the minimum
582 quantum of allocation. */
583
584 if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) {
585 bhead_t *ba, *bn;
586
587 ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
588 bn = BH(((char *) ba) + size);
589
590 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
591
592 /* Subtract size from length of free block. */
593 b->bh.bb.bsize -= (bufsize) size;
594
595 /* Link allocated buffer to the previous free buffer. */
596 ba->bb.prevfree = b->bh.bb.bsize;
597
598 /* Plug negative size into user buffer. */
599 ba->bb.bsize = -size;
600
601 /* Mark this buffer as owned by this thread. */
602 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
603 /* Mark buffer after this one not preceded by free block. */
604 bn->bb.prevfree = 0;
605
606 /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
607 __kmp_bget_remove_from_freelist( b );
608 __kmp_bget_insert_into_freelist( thr, b );
609#if BufStats
610 thr->totalloc += (size_t) size;
611 thr->numget++; /* Increment number of bget() calls */
612#endif
613 buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
614 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
615 return buf;
616 } else {
617 bhead_t *ba;
618
619 ba = BH(((char *) b) + b->bh.bb.bsize);
620
621 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
622
623 /* The buffer isn't big enough to split. Give the whole
624 shebang to the caller and remove it from the free list. */
625
626 __kmp_bget_remove_from_freelist( b );
627#if BufStats
628 thr->totalloc += (size_t) b->bh.bb.bsize;
629 thr->numget++; /* Increment number of bget() calls */
630#endif
631 /* Negate size to mark buffer allocated. */
632 b->bh.bb.bsize = -(b->bh.bb.bsize);
633
634 /* Mark this buffer as owned by this thread. */
635 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
636 /* Zero the back pointer in the next buffer in memory
637 to indicate that this buffer is allocated. */
638 ba->bb.prevfree = 0;
639
640 /* Give user buffer starting at queue links. */
641 buf = (void *) &(b->ql);
642 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643 return buf;
644 }
645 }
646
647 /* Link to next buffer */
648 b = ( use_blink ? b->ql.blink : b->ql.flink );
649 }
650 }
651
652 /* We failed to find a buffer. If there's a compact function
653 defined, notify it of the size requested. If it returns
654 TRUE, try the allocation again. */
655
656 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
657 break;
658 }
659 }
660
661 /* No buffer available with requested size free. */
662
663 /* Don't give up yet -- look in the reserve supply. */
664
665 if (thr->acqfcn != 0) {
666 if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
667
668 /* Request is too large to fit in a single expansion
669 block. Try to satisy it by a direct buffer acquisition. */
670
671 bdhead_t *bdh;
672
673 size += sizeof(bdhead_t) - sizeof(bhead_t);
674
675 KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
676
677 /* richryan */
678 bdh = BDH((*thr->acqfcn)((bufsize) size));
679 if (bdh != NULL) {
680
681 /* Mark the buffer special by setting the size field
682 of its header to zero. */
683 bdh->bh.bb.bsize = 0;
684
685 /* Mark this buffer as owned by this thread. */
686 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
687 // because direct buffer never goes to free list
688 bdh->bh.bb.prevfree = 0;
689 bdh->tsize = size;
690#if BufStats
691 thr->totalloc += (size_t) size;
692 thr->numget++; /* Increment number of bget() calls */
693 thr->numdget++; /* Direct bget() call count */
694#endif
695 buf = (void *) (bdh + 1);
696 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
697 return buf;
698 }
699
700 } else {
701
702 /* Try to obtain a new expansion block */
703
704 void *newpool;
705
706 KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
707
708 /* richryan */
709 newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
710 KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
711 if (newpool != NULL) {
712 bpool( th, newpool, thr->exp_incr);
713 buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
714 return buf;
715 }
716 }
717 }
718
719 /* Still no buffer available */
720
721 return NULL;
722}
723
724/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
725 the entire contents of the buffer to zero, not just the
726 region requested by the caller. */
727
728static void *
729bgetz( kmp_info_t *th, bufsize size )
730{
731 char *buf = (char *) bget( th, size);
732
733 if (buf != NULL) {
734 bhead_t *b;
735 bufsize rsize;
736
737 b = BH(buf - sizeof(bhead_t));
738 rsize = -(b->bb.bsize);
739 if (rsize == 0) {
740 bdhead_t *bd;
741
742 bd = BDH(buf - sizeof(bdhead_t));
743 rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
744 } else {
745 rsize -= sizeof(bhead_t);
746 }
747
748 KMP_DEBUG_ASSERT(rsize >= size);
749
750 (void) memset(buf, 0, (bufsize) rsize);
751 }
752 return ((void *) buf);
753}
754
755/* BGETR -- Reallocate a buffer. This is a minimal implementation,
756 simply in terms of brel() and bget(). It could be
757 enhanced to allow the buffer to grow into adjacent free
758 blocks and to avoid moving data unnecessarily. */
759
760static void *
761bgetr( kmp_info_t *th, void *buf, bufsize size)
762{
763 void *nbuf;
764 bufsize osize; /* Old size of buffer */
765 bhead_t *b;
766
767 nbuf = bget( th, size );
768 if ( nbuf == NULL ) { /* Acquire new buffer */
769 return NULL;
770 }
771 if ( buf == NULL ) {
772 return nbuf;
773 }
774 b = BH(((char *) buf) - sizeof(bhead_t));
775 osize = -b->bb.bsize;
776 if (osize == 0) {
777 /* Buffer acquired directly through acqfcn. */
778 bdhead_t *bd;
779
780 bd = BDH(((char *) buf) - sizeof(bdhead_t));
781 osize = bd->tsize - (bufsize) sizeof(bdhead_t);
782 } else {
783 osize -= sizeof(bhead_t);
784 };
785
786 KMP_DEBUG_ASSERT(osize > 0);
787
Andrey Churbanov74bf17b2015-04-02 13:27:08 +0000788 (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000789 (size_t) ((size < osize) ? size : osize));
790 brel( th, buf );
791
792 return nbuf;
793}
794
795/* BREL -- Release a buffer. */
796
797static void
798brel( kmp_info_t *th, void *buf )
799{
800 thr_data_t *thr = get_thr_data( th );
801 bfhead_t *b, *bn;
802 kmp_info_t *bth;
803
804 KMP_DEBUG_ASSERT(buf != NULL);
805 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
806
807 b = BFH(((char *) buf) - sizeof(bhead_t));
808
809 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
810 bdhead_t *bdh;
811
812 bdh = BDH(((char *) buf) - sizeof(bdhead_t));
813 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
814#if BufStats
815 thr->totalloc -= (size_t) bdh->tsize;
816 thr->numdrel++; /* Number of direct releases */
817 thr->numrel++; /* Increment number of brel() calls */
818#endif /* BufStats */
819#ifdef FreeWipe
820 (void) memset((char *) buf, 0x55,
821 (size_t) (bdh->tsize - sizeof(bdhead_t)));
822#endif /* FreeWipe */
823
824 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
825
826 KMP_DEBUG_ASSERT( thr->relfcn != 0 );
827 (*thr->relfcn)((void *) bdh); /* Release it directly. */
828 return;
829 }
830
831 bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
832 if ( bth != th ) {
833 /* Add this buffer to be released by the owning thread later */
834 __kmp_bget_enqueue( bth, buf
835#ifdef USE_QUEUING_LOCK_FOR_BGET
836 , __kmp_gtid_from_thread( th )
837#endif
838 );
839 return;
840 }
841
842 /* Buffer size must be negative, indicating that the buffer is
843 allocated. */
844
845 if (b->bh.bb.bsize >= 0) {
846 bn = NULL;
847 }
848 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
849
850 /* Back pointer in next buffer must be zero, indicating the
851 same thing: */
852
853 KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
854
855#if BufStats
856 thr->numrel++; /* Increment number of brel() calls */
857 thr->totalloc += (size_t) b->bh.bb.bsize;
858#endif
859
860 /* If the back link is nonzero, the previous buffer is free. */
861
862 if (b->bh.bb.prevfree != 0) {
863 /* The previous buffer is free. Consolidate this buffer with it
864 by adding the length of this buffer to the previous free
865 buffer. Note that we subtract the size in the buffer being
866 released, since it's negative to indicate that the buffer is
867 allocated. */
868
869 register bufsize size = b->bh.bb.bsize;
870
871 /* Make the previous buffer the one we're working on. */
872 KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
873 b = BFH(((char *) b) - b->bh.bb.prevfree);
874 b->bh.bb.bsize -= size;
875
876 /* unlink the buffer from the old freelist */
877 __kmp_bget_remove_from_freelist( b );
878 }
879 else {
880 /* The previous buffer isn't allocated. Mark this buffer
Jonathan Peyton1406f012015-05-22 22:35:51 +0000881 size as positive (i.e. free) and fall through to place
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882 the buffer on the free list as an isolated free block. */
883
884 b->bh.bb.bsize = -b->bh.bb.bsize;
885 }
886
887 /* insert buffer back onto a new freelist */
888 __kmp_bget_insert_into_freelist( thr, b );
889
890
891 /* Now we look at the next buffer in memory, located by advancing from
892 the start of this buffer by its size, to see if that buffer is
893 free. If it is, we combine this buffer with the next one in
894 memory, dechaining the second buffer from the free list. */
895
896 bn = BFH(((char *) b) + b->bh.bb.bsize);
897 if (bn->bh.bb.bsize > 0) {
898
899 /* The buffer is free. Remove it from the free list and add
900 its size to that of our buffer. */
901
902 KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
903
904 __kmp_bget_remove_from_freelist( bn );
905
906 b->bh.bb.bsize += bn->bh.bb.bsize;
907
908 /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
909
910 __kmp_bget_remove_from_freelist( b );
911 __kmp_bget_insert_into_freelist( thr, b );
912
913 /* Finally, advance to the buffer that follows the newly
914 consolidated free block. We must set its backpointer to the
915 head of the consolidated free block. We know the next block
916 must be an allocated block because the process of recombination
917 guarantees that two free blocks will never be contiguous in
918 memory. */
919
920 bn = BFH(((char *) b) + b->bh.bb.bsize);
921 }
922#ifdef FreeWipe
923 (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
924 (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
925#endif
926 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
927
928 /* The next buffer is allocated. Set the backpointer in it to point
929 to this buffer; the previous free buffer in memory. */
930
931 bn->bh.bb.prevfree = b->bh.bb.bsize;
932
933 /* If a block-release function is defined, and this free buffer
934 constitutes the entire block, release it. Note that pool_len
935 is defined in such a way that the test will fail unless all
936 pool blocks are the same size. */
937
938 if (thr->relfcn != 0 &&
939 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
940 {
941#if BufStats
942 if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
943#endif
944
945 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
946 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
947 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
948
949 /* Unlink the buffer from the free list */
950 __kmp_bget_remove_from_freelist( b );
951
952 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
953
954 (*thr->relfcn)(b);
955#if BufStats
956 thr->numprel++; /* Nr of expansion block releases */
957 thr->numpblk--; /* Total number of blocks */
958 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
959
960 /* avoid leaving stale last_pool pointer around if it is being dealloced */
961 if (thr->last_pool == b) thr->last_pool = 0;
962 }
963 else {
964 thr->last_pool = b;
965 }
966#endif /* BufStats */
967 }
968}
969
970/* BECTL -- Establish automatic pool expansion control */
971
972static void
973bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
974{
975 thr_data_t *thr = get_thr_data( th );
976
977 thr->compfcn = compact;
978 thr->acqfcn = acquire;
979 thr->relfcn = release;
980 thr->exp_incr = pool_incr;
981}
982
983/* BPOOL -- Add a region of memory to the buffer pool. */
984
985static void
986bpool( kmp_info_t *th, void *buf, bufsize len)
987{
988/* int bin = 0; */
989 thr_data_t *thr = get_thr_data( th );
990 bfhead_t *b = BFH(buf);
991 bhead_t *bn;
992
993 __kmp_bget_dequeue( th ); /* Release any queued buffers */
994
995#ifdef SizeQuant
996 len &= ~(SizeQuant - 1);
997#endif
998 if (thr->pool_len == 0) {
999 thr->pool_len = len;
1000 } else if (len != thr->pool_len) {
1001 thr->pool_len = -1;
1002 }
1003#if BufStats
1004 thr->numpget++; /* Number of block acquisitions */
1005 thr->numpblk++; /* Number of blocks total */
1006 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1007#endif /* BufStats */
1008
1009 /* Since the block is initially occupied by a single free buffer,
1010 it had better not be (much) larger than the largest buffer
1011 whose size we can store in bhead.bb.bsize. */
1012
1013 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1014
1015 /* Clear the backpointer at the start of the block to indicate that
1016 there is no free block prior to this one. That blocks
1017 recombination when the first block in memory is released. */
1018
1019 b->bh.bb.prevfree = 0;
1020
1021 /* Create a dummy allocated buffer at the end of the pool. This dummy
1022 buffer is seen when a buffer at the end of the pool is released and
1023 blocks recombination of the last buffer with the dummy buffer at
1024 the end. The length in the dummy buffer is set to the largest
1025 negative number to denote the end of the pool for diagnostic
1026 routines (this specific value is not counted on by the actual
1027 allocation and release functions). */
1028
1029 len -= sizeof(bhead_t);
1030 b->bh.bb.bsize = (bufsize) len;
1031 /* Set the owner of this buffer */
1032 TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1033
1034 /* Chain the new block to the free list. */
1035 __kmp_bget_insert_into_freelist( thr, b );
1036
1037#ifdef FreeWipe
1038 (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1039 (size_t) (len - sizeof(bfhead_t)));
1040#endif
1041 bn = BH(((char *) b) + len);
1042 bn->bb.prevfree = (bufsize) len;
1043 /* Definition of ESent assumes two's complement! */
1044 KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1045
1046 bn->bb.bsize = ESent;
1047}
1048
1049/* ------------------------------------------------------------------------ */
1050
1051/* BFREED -- Dump the free lists for this thread. */
1052
1053static void
1054bfreed( kmp_info_t *th )
1055{
1056 int bin = 0, count = 0;
1057 int gtid = __kmp_gtid_from_thread( th );
1058 thr_data_t *thr = get_thr_data( th );
1059
1060#if BufStats
1061 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1062 KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1063 " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1064 gtid, (kmp_uint64) thr->totalloc,
1065 (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1066 (kmp_int64) thr->numpblk,
1067 (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1068 (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1069#endif
1070
1071 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1072 bfhead_t *b;
1073
1074 for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1075 bufsize bs = b->bh.bb.bsize;
1076
1077 KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1078 KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1079 KMP_DEBUG_ASSERT( bs > 0 );
1080
1081 count += 1;
1082
1083 __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1084#ifdef FreeWipe
1085 {
1086 char *lerr = ((char *) b) + sizeof(bfhead_t);
1087 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1088 __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1089 }
1090 }
1091#endif
1092 }
1093 }
1094
1095 if (count == 0)
1096 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1097}
1098
1099/* ------------------------------------------------------------------------ */
1100
1101#ifdef KMP_DEBUG
1102
1103#if BufStats
1104
1105/* BSTATS -- Return buffer allocation free space statistics. */
1106
1107static void
1108bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1109{
1110 int bin = 0;
1111 thr_data_t *thr = get_thr_data( th );
1112
1113 *nget = thr->numget;
1114 *nrel = thr->numrel;
1115 *curalloc = (bufsize) thr->totalloc;
1116 *totfree = 0;
1117 *maxfree = -1;
1118
1119 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1120 bfhead_t *b = thr->freelist[ bin ].ql.flink;
1121
1122 while (b != &thr->freelist[ bin ]) {
1123 KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1124 *totfree += b->bh.bb.bsize;
1125 if (b->bh.bb.bsize > *maxfree) {
1126 *maxfree = b->bh.bb.bsize;
1127 }
1128 b = b->ql.flink; /* Link to next buffer */
1129 }
1130 }
1131}
1132
1133/* BSTATSE -- Return extended statistics */
1134
1135static void
1136bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1137{
1138 thr_data_t *thr = get_thr_data( th );
1139
1140 *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1141 *npool = thr->numpblk;
1142 *npget = thr->numpget;
1143 *nprel = thr->numprel;
1144 *ndget = thr->numdget;
1145 *ndrel = thr->numdrel;
1146}
1147
1148#endif /* BufStats */
1149
1150/* BUFDUMP -- Dump the data in a buffer. This is called with the user
1151 data pointer, and backs up to the buffer header. It will
1152 dump either a free block or an allocated one. */
1153
1154static void
1155bufdump( kmp_info_t *th, void *buf )
1156{
1157 bfhead_t *b;
1158 unsigned char *bdump;
1159 bufsize bdlen;
1160
1161 b = BFH(((char *) buf) - sizeof(bhead_t));
1162 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1163 if (b->bh.bb.bsize < 0) {
1164 bdump = (unsigned char *) buf;
1165 bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1166 } else {
1167 bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1168 bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1169 }
1170
1171 while (bdlen > 0) {
1172 int i, dupes = 0;
1173 bufsize l = bdlen;
1174 char bhex[50], bascii[20];
1175
1176 if (l > 16) {
1177 l = 16;
1178 }
1179
1180 for (i = 0; i < l; i++) {
Andrey Churbanov74bf17b2015-04-02 13:27:08 +00001181 (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex), "%02X ", bdump[i]);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001182 if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1183 bascii[ i ] = bdump[ i ];
1184 else
1185 bascii[ i ] = ' ';
1186 }
1187 bascii[i] = 0;
1188 (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1189 bdump += l;
1190 bdlen -= l;
1191 while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1192 (char *) bdump, 16) == 0)) {
1193 dupes++;
1194 bdump += 16;
1195 bdlen -= 16;
1196 }
1197 if (dupes > 1) {
1198 (void) __kmp_printf_no_lock(
1199 " (%d lines [%d bytes] identical to above line skipped)\n",
1200 dupes, dupes * 16);
1201 } else if (dupes == 1) {
1202 bdump -= 16;
1203 bdlen += 16;
1204 }
1205 }
1206}
1207
1208/* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1209 If DUMPALLOC is nonzero, the contents of allocated buffers
1210 are dumped. If DUMPFREE is nonzero, free blocks are
1211 dumped as well. If FreeWipe checking is enabled, free
1212 blocks which have been clobbered will always be dumped. */
1213
1214static void
1215bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1216{
1217 bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1218
1219 while (b->bh.bb.bsize != ESent) {
1220 bufsize bs = b->bh.bb.bsize;
1221
1222 if (bs < 0) {
1223 bs = -bs;
1224 (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1225 if (dumpalloc) {
1226 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1227 }
1228 } else {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001229 const char *lerr = "";
Jim Cownie5e8470a2013-09-27 10:38:44 +00001230
1231 KMP_DEBUG_ASSERT(bs > 0);
1232 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1233 lerr = " (Bad free list links)";
1234 }
1235 (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1236 (long) bs, lerr);
1237#ifdef FreeWipe
1238 lerr = ((char *) b) + sizeof(bfhead_t);
1239 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1240 (memcmp(lerr, lerr + 1,
1241 (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1242 (void) __kmp_printf_no_lock(
1243 "(Contents of above free block have been overstored.)\n");
1244 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1245 } else
1246#endif
1247 if (dumpfree) {
1248 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1249 }
1250 }
1251 b = BFH(((char *) b) + bs);
1252 }
1253}
1254
1255/* BPOOLV -- Validate a buffer pool. */
1256
1257static int
1258bpoolv( kmp_info_t *th, void *buf )
1259{
1260 bfhead_t *b = BFH(buf);
1261
1262 while (b->bh.bb.bsize != ESent) {
1263 bufsize bs = b->bh.bb.bsize;
1264
1265 if (bs < 0) {
1266 bs = -bs;
1267 } else {
1268#ifdef FreeWipe
1269 char *lerr = "";
1270#endif
1271
1272 KMP_DEBUG_ASSERT(bs > 0);
1273 if (bs <= 0) {
1274 return 0;
1275 }
1276 if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1277 (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1278 (long) bs);
1279 KMP_DEBUG_ASSERT(0);
1280 return 0;
1281 }
1282#ifdef FreeWipe
1283 lerr = ((char *) b) + sizeof(bfhead_t);
1284 if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1285 (memcmp(lerr, lerr + 1,
1286 (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1287 (void) __kmp_printf_no_lock(
1288 "(Contents of above free block have been overstored.)\n");
1289 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1290 KMP_DEBUG_ASSERT(0);
1291 return 0;
1292 }
1293#endif /* FreeWipe */
1294 }
1295 b = BFH(((char *) b) + bs);
1296 }
1297 return 1;
1298}
1299
1300#endif /* KMP_DEBUG */
1301
1302/* ------------------------------------------------------------------------ */
1303
1304void
1305__kmp_initialize_bget( kmp_info_t *th )
1306{
1307 KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1308
1309 set_thr_data( th );
1310
1311 bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1312 (bufsize) __kmp_malloc_pool_incr );
1313}
1314
1315void
1316__kmp_finalize_bget( kmp_info_t *th )
1317{
1318 thr_data_t *thr;
1319 bfhead_t *b;
1320
1321 KMP_DEBUG_ASSERT( th != 0 );
1322
1323#if BufStats
1324 thr = (thr_data_t *) th->th.th_local.bget_data;
1325 KMP_DEBUG_ASSERT( thr != NULL );
1326 b = thr->last_pool;
1327
1328 /* If a block-release function is defined, and this free buffer
1329 constitutes the entire block, release it. Note that pool_len
1330 is defined in such a way that the test will fail unless all
1331 pool blocks are the same size. */
1332
1333 /* Deallocate the last pool if one exists because we no longer do it in brel() */
1334 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1335 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1336 {
1337 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1338 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1339 KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1340
1341 /* Unlink the buffer from the free list */
1342 __kmp_bget_remove_from_freelist( b );
1343
1344 KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1345
1346 (*thr->relfcn)(b);
1347 thr->numprel++; /* Nr of expansion block releases */
1348 thr->numpblk--; /* Total number of blocks */
1349 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1350 }
1351#endif /* BufStats */
1352
1353 /* Deallocate bget_data */
1354 if ( th->th.th_local.bget_data != NULL ) {
1355 __kmp_free( th->th.th_local.bget_data );
1356 th->th.th_local.bget_data = NULL;
1357 }; // if
1358}
1359
1360void
1361kmpc_set_poolsize( size_t size )
1362{
1363 bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1364 (bget_release_t) free, (bufsize) size );
1365}
1366
1367size_t
1368kmpc_get_poolsize( void )
1369{
1370 thr_data_t *p;
1371
1372 p = get_thr_data( __kmp_get_thread() );
1373
1374 return p->exp_incr;
1375}
1376
1377void
1378kmpc_set_poolmode( int mode )
1379{
1380 thr_data_t *p;
1381
1382 if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1383 p = get_thr_data( __kmp_get_thread() );
1384 p->mode = (bget_mode_t) mode;
1385 }
1386}
1387
1388int
1389kmpc_get_poolmode( void )
1390{
1391 thr_data_t *p;
1392
1393 p = get_thr_data( __kmp_get_thread() );
1394
1395 return p->mode;
1396}
1397
1398void
1399kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1400{
1401 kmp_info_t *th = __kmp_get_thread();
1402 bufsize a, b;
1403
1404 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1405
1406 bcheck( th, &a, &b );
1407
1408 *maxmem = a;
1409 *allmem = b;
1410}
1411
1412void
1413kmpc_poolprint( void )
1414{
1415 kmp_info_t *th = __kmp_get_thread();
1416
1417 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1418
1419 bfreed( th );
1420}
1421
1422#endif // #if KMP_USE_BGET
1423
1424/* ------------------------------------------------------------------------ */
1425
1426void *
1427kmpc_malloc( size_t size )
1428{
1429 void * ptr;
1430 ptr = bget( __kmp_entry_thread(), (bufsize) size );
1431
1432 return ptr;
1433}
1434
1435void *
1436kmpc_calloc( size_t nelem, size_t elsize )
1437{
1438 void * ptr;
1439 ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1440
1441 return ptr;
1442}
1443
1444void *
1445kmpc_realloc( void * ptr, size_t size )
1446{
1447 void * result = NULL;
1448
1449 if ( ptr == NULL ) {
1450 // If pointer is NULL, realloc behaves like malloc.
1451 result = bget( __kmp_entry_thread(), (bufsize) size );
1452 } else if ( size == 0 ) {
1453 // If size is 0, realloc behaves like free.
1454 // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1455 // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1456 brel( __kmp_get_thread(), ptr );
1457 } else {
1458 result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1459 }; // if
1460
1461 return result;
1462}
1463
1464/* NOTE: the library must have already been initialized by a previous allocate */
1465
1466void
1467kmpc_free( void * ptr )
1468{
1469 if ( ! __kmp_init_serial ) {
1470 return;
1471 }; // if
1472 if ( ptr != NULL ) {
1473 kmp_info_t *th = __kmp_get_thread();
1474 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1475 brel( th, ptr );
1476 };
1477}
1478
1479
1480/* ------------------------------------------------------------------------ */
1481
1482void *
1483___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1484{
1485 void * ptr;
1486 KE_TRACE( 30, (
1487 "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1488 th,
1489 (int) size
1490 KMP_SRC_LOC_PARM
1491 ) );
1492 ptr = bget( th, (bufsize) size );
1493 KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1494 return ptr;
1495}
1496
1497void *
1498___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1499{
1500 void * ptr;
1501 KE_TRACE( 30, (
1502 "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1503 th,
1504 (int) nelem,
1505 (int) elsize
1506 KMP_SRC_LOC_PARM
1507 ) );
1508 ptr = bgetz( th, (bufsize) (nelem * elsize) );
1509 KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1510 return ptr;
1511}
1512
1513void *
1514___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1515{
1516 KE_TRACE( 30, (
1517 "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1518 th,
1519 ptr,
1520 (int) size
1521 KMP_SRC_LOC_PARM
1522 ) );
1523 ptr = bgetr( th, ptr, (bufsize) size );
1524 KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1525 return ptr;
1526}
1527
1528void
1529___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1530{
1531 KE_TRACE( 30, (
1532 "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1533 th,
1534 ptr
1535 KMP_SRC_LOC_PARM
1536 ) );
1537 if ( ptr != NULL ) {
1538 __kmp_bget_dequeue( th ); /* Release any queued buffers */
1539 brel( th, ptr );
1540 }
1541 KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1542}
1543
1544/* ------------------------------------------------------------------------ */
1545/* ------------------------------------------------------------------------ */
1546/*
1547 If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1548 may be useful for debugging memory corruptions, used freed pointers, etc.
1549*/
1550/* #define LEAK_MEMORY */
1551
1552struct kmp_mem_descr { // Memory block descriptor.
1553 void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1554 size_t size_allocated; // Size of allocated memory block.
1555 void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1556 size_t size_aligned; // Size of aligned memory block.
1557};
1558typedef struct kmp_mem_descr kmp_mem_descr_t;
1559
1560/*
1561 Allocate memory on requested boundary, fill allocated memory with 0x00.
1562 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1563 Must use __kmp_free when freeing memory allocated by this routine!
1564 */
1565static
1566void *
1567___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1568{
1569 /*
1570 __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1571 return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1572 block is saved in descriptor just before the aligned pointer. This information used by
1573 __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1574
1575 +---------+------------+-----------------------------------+---------+
1576 | padding | descriptor | aligned block | padding |
1577 +---------+------------+-----------------------------------+---------+
1578 ^ ^
1579 | |
1580 | +- Aligned pointer returned to caller
1581 +- Pointer returned by malloc()
1582
1583 Aligned block is filled with zeros, paddings are filled with 0xEF.
1584 */
1585
1586 kmp_mem_descr_t descr;
1587 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1588 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1589 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1590
1591 KE_TRACE( 25, (
1592 "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1593 (int) size,
1594 (int) alignment
1595 KMP_SRC_LOC_PARM
1596 ) );
1597
1598 KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1599 KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1600 // Make sure kmp_uintptr_t is enough to store addresses.
1601
1602 descr.size_aligned = size;
1603 descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1604
1605 #if KMP_DEBUG
1606 descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1607 #else
1608 descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1609 #endif
1610 KE_TRACE( 10, (
1611 " malloc( %d ) returned %p\n",
1612 (int) descr.size_allocated,
1613 descr.ptr_allocated
1614 ) );
1615 if ( descr.ptr_allocated == NULL ) {
1616 KMP_FATAL( OutOfHeapMemory );
1617 };
1618
1619 addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1620 addr_aligned =
1621 ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1622 & ~ ( alignment - 1 );
1623 addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1624
1625 descr.ptr_aligned = (void *) addr_aligned;
1626
1627 KE_TRACE( 26, (
1628 " ___kmp_allocate_align: "
1629 "ptr_allocated=%p, size_allocated=%d, "
1630 "ptr_aligned=%p, size_aligned=%d\n",
1631 descr.ptr_allocated,
1632 (int) descr.size_allocated,
1633 descr.ptr_aligned,
1634 (int) descr.size_aligned
1635 ) );
1636
1637 KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1638 KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1639 KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1640 KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1641
1642 #ifdef KMP_DEBUG
1643 memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1644 // Fill allocated memory block with 0xEF.
1645 #endif
1646 memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1647 // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1648 // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1649 // bytes remain filled with 0xEF in debugging library.)
1650 * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1651
1652 KMP_MB();
1653
1654 KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1655 return descr.ptr_aligned;
1656
1657} // func ___kmp_allocate_align
1658
1659
1660/*
1661 Allocate memory on cache line boundary, fill allocated memory with 0x00.
1662 Do not call this func directly! Use __kmp_allocate macro instead.
1663 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1664 Must use __kmp_free when freeing memory allocated by this routine!
1665 */
1666void *
1667___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1668{
1669
1670 void * ptr;
1671 KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1672 ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1673 KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1674 return ptr;
1675
1676} // func ___kmp_allocate
1677
1678#if (BUILD_MEMORY==FIRST_TOUCH)
1679void *
1680__kmp_ft_page_allocate(size_t size)
1681{
1682 void *adr, *aadr;
1683#if KMP_OS_LINUX
1684 /* TODO: Use this function to get page size everywhere */
1685 int page_size = getpagesize();
1686#else
1687 /* TODO: Find windows function to get page size and use it everywhere */
1688 int page_size = PAGE_SIZE;
1689#endif /* KMP_OS_LINUX */
1690
1691 adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1692 size + page_size + KMP_PTR_SKIP);
1693 if ( adr == 0 )
1694 KMP_FATAL( OutOfHeapMemory );
1695
1696 /* check to see if adr is on a page boundary. */
1697 if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1698 /* nothing to do if adr is already on a page boundary. */
1699 aadr = adr;
1700 else
1701 /* else set aadr to the first page boundary in the allocated memory. */
1702 aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1703
1704 /* the first touch by the owner thread. */
1705 *((void**)aadr) = adr;
1706
1707 /* skip the memory space used for storing adr above. */
1708 return (void*)((char*)aadr + KMP_PTR_SKIP);
1709}
1710#endif
1711
1712/*
1713 Allocate memory on page boundary, fill allocated memory with 0x00.
1714 Does not call this func directly! Use __kmp_page_allocate macro instead.
1715 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1716 Must use __kmp_free when freeing memory allocated by this routine!
1717 */
1718void *
1719___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1720{
1721 int page_size = 8 * 1024;
1722 void * ptr;
1723
1724 KE_TRACE( 25, (
1725 "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1726 (int) size
1727 KMP_SRC_LOC_PARM
1728 ) );
1729 ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1730 KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1731 return ptr;
1732} // ___kmp_page_allocate
1733
1734/*
1735 Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1736 In debug mode, fill the memory block with 0xEF before call to free().
1737*/
1738void
1739___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1740{
1741
1742 kmp_mem_descr_t descr;
1743 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1744 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1745
1746 KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1747 KMP_ASSERT( ptr != NULL );
1748
1749 descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1750
1751 KE_TRACE( 26, ( " __kmp_free: "
1752 "ptr_allocated=%p, size_allocated=%d, "
1753 "ptr_aligned=%p, size_aligned=%d\n",
1754 descr.ptr_allocated, (int) descr.size_allocated,
1755 descr.ptr_aligned, (int) descr.size_aligned ));
1756
1757 addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1758 addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1759
1760 KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1761 KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1762 KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1763 KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1764 KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1765
1766 #ifdef KMP_DEBUG
1767 memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1768 // Fill memory block with 0xEF, it helps catch using freed memory.
1769 #endif
1770
1771 #ifndef LEAK_MEMORY
1772 KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001773 # ifdef KMP_DEBUG
1774 _free_src_loc( descr.ptr_allocated, _file_, _line_ );
1775 # else
Jim Cownie5e8470a2013-09-27 10:38:44 +00001776 free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001777 # endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001778 #endif
1779
1780 KMP_MB();
1781
1782 KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1783
1784} // func ___kmp_free
1785
1786/* ------------------------------------------------------------------------ */
1787/* ------------------------------------------------------------------------ */
1788
1789#if USE_FAST_MEMORY == 3
1790// Allocate fast memory by first scanning the thread's free lists
1791// If a chunk the right size exists, grab it off the free list.
1792// Otherwise allocate normally using kmp_thread_malloc.
1793
1794// AC: How to choose the limit? Just get 16 for now...
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795#define KMP_FREE_LIST_LIMIT 16
Jim Cownie5e8470a2013-09-27 10:38:44 +00001796
1797// Always use 128 bytes for determining buckets for caching memory blocks
1798#define DCACHE_LINE 128
1799
1800void *
1801___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1802{
1803 void * ptr;
1804 int num_lines;
1805 int idx;
1806 int index;
1807 void * alloc_ptr;
1808 size_t alloc_size;
1809 kmp_mem_descr_t * descr;
1810
1811 KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1812 __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1813
1814 num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1815 idx = num_lines - 1;
1816 KMP_DEBUG_ASSERT( idx >= 0 );
1817 if ( idx < 2 ) {
1818 index = 0; // idx is [ 0, 1 ], use first free list
1819 num_lines = 2; // 1, 2 cache lines or less than cache line
1820 } else if ( ( idx >>= 2 ) == 0 ) {
1821 index = 1; // idx is [ 2, 3 ], use second free list
1822 num_lines = 4; // 3, 4 cache lines
1823 } else if ( ( idx >>= 2 ) == 0 ) {
1824 index = 2; // idx is [ 4, 15 ], use third free list
1825 num_lines = 16; // 5, 6, ..., 16 cache lines
1826 } else if ( ( idx >>= 2 ) == 0 ) {
1827 index = 3; // idx is [ 16, 63 ], use fourth free list
1828 num_lines = 64; // 17, 18, ..., 64 cache lines
1829 } else {
1830 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1831 }
1832
1833 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1834 if ( ptr != NULL ) {
1835 // pop the head of no-sync free list
1836 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1837 KMP_DEBUG_ASSERT( this_thr ==
1838 ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1839 goto end;
1840 };
1841 ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1842 if ( ptr != NULL ) {
1843 // no-sync free list is empty, use sync free list (filled in by other threads only)
1844 // pop the head of the sync free list, push NULL instead
1845 while ( ! KMP_COMPARE_AND_STORE_PTR(
1846 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1847 {
1848 KMP_CPU_PAUSE();
1849 ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1850 }
1851 // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1852 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1853 KMP_DEBUG_ASSERT( this_thr ==
1854 ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1855 goto end;
1856 }
1857
1858 alloc_call:
1859 // haven't found block in the free lists, thus allocate it
1860 size = num_lines * DCACHE_LINE;
1861
1862 alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1863 KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1864 __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1865 alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1866
1867 // align ptr to DCACHE_LINE
1868 ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1869 descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1870
1871 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1872 // we don't need size_allocated
1873 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1874 // (it is already saved in bget buffer,
1875 // but we may want to use another allocator in future)
1876 descr->size_aligned = size;
1877
1878 end:
1879 KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1880 __kmp_gtid_from_thread( this_thr ), ptr ) );
1881 return ptr;
1882} // func __kmp_fast_allocate
1883
1884// Free fast memory and place it on the thread's free list if it is of
1885// the correct size.
1886void
1887___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1888{
1889 kmp_mem_descr_t * descr;
1890 kmp_info_t * alloc_thr;
1891 size_t size;
1892 size_t idx;
1893 int index;
1894
1895 KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1896 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1897 KMP_ASSERT( ptr != NULL );
1898
1899 descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1900
1901 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1902 (int) descr->size_aligned ) );
1903
1904 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1905
1906 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1907 if ( idx == size ) {
1908 index = 0; // 2 cache lines
1909 } else if ( ( idx <<= 1 ) == size ) {
1910 index = 1; // 4 cache lines
1911 } else if ( ( idx <<= 2 ) == size ) {
1912 index = 2; // 16 cache lines
1913 } else if ( ( idx <<= 2 ) == size ) {
1914 index = 3; // 64 cache lines
1915 } else {
1916 KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1917 goto free_call; // 65 or more cache lines ( > 8KB )
1918 }
1919
1920 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1921 if ( alloc_thr == this_thr ) {
1922 // push block to self no-sync free list, linking previous head (LIFO)
1923 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1924 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1925 } else {
1926 void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1927 if ( head == NULL ) {
1928 // Create new free list
1929 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1930 *((void **)ptr) = NULL; // mark the tail of the list
1931 descr->size_allocated = (size_t)1; // head of the list keeps its length
1932 } else {
1933 // need to check existed "other" list's owner thread and size of queue
1934 kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1935 kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1936 size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001937 if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) {
Jim Cownie5e8470a2013-09-27 10:38:44 +00001938 // we can add current task to "other" list, no sync needed
1939 *((void **)ptr) = head;
1940 descr->size_allocated = q_sz;
1941 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1942 } else {
1943 // either queue blocks owner is changing or size limit exceeded
1944 // return old queue to allocating thread (q_th) synchroneously,
1945 // and start new list for alloc_thr's tasks
1946 void * old_ptr;
1947 void * tail = head;
1948 void * next = *((void **)head);
1949 while ( next != NULL ) {
1950 KMP_DEBUG_ASSERT(
1951 // queue size should decrease by 1 each step through the list
1952 ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1953 ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1954 tail = next; // remember tail node
1955 next = *((void **)next);
1956 }
1957 KMP_DEBUG_ASSERT( q_th != NULL );
1958 // push block to owner's sync free list
1959 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1960 /* the next pointer must be set before setting free_list to ptr to avoid
1961 exposing a broken list to other threads, even for an instant. */
1962 *((void **)tail) = old_ptr;
1963
1964 while ( ! KMP_COMPARE_AND_STORE_PTR(
1965 &q_th->th.th_free_lists[index].th_free_list_sync,
1966 old_ptr,
1967 head ) )
1968 {
1969 KMP_CPU_PAUSE();
1970 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1971 *((void **)tail) = old_ptr;
1972 }
1973
1974 // start new list of not-selt tasks
1975 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1976 *((void **)ptr) = NULL;
1977 descr->size_allocated = (size_t)1; // head of queue keeps its length
1978 }
1979 }
1980 }
1981 goto end;
1982
1983 free_call:
1984 KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
1985 __kmp_gtid_from_thread( this_thr), size ) );
1986 __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
1987 brel( this_thr, descr->ptr_allocated );
1988
1989 end:
1990 KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
1991
1992} // func __kmp_fast_free
1993
1994
1995// Initialize the thread free lists related to fast memory
1996// Only do this when a thread is initially created.
1997void
1998__kmp_initialize_fast_memory( kmp_info_t *this_thr )
1999{
2000 KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2001
2002 memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2003}
2004
2005// Free the memory in the thread free lists related to fast memory
2006// Only do this when a thread is being reaped (destroyed).
2007void
2008__kmp_free_fast_memory( kmp_info_t *th )
2009{
2010 // Suppose we use BGET underlying allocator, walk through its structures...
2011 int bin;
2012 thr_data_t * thr = get_thr_data( th );
2013 void ** lst = NULL;
2014
2015 KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2016 __kmp_gtid_from_thread( th ) ) );
2017
2018 __kmp_bget_dequeue( th ); // Release any queued buffers
2019
2020 // Dig through free lists and extract all allocated blocks
2021 for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2022 bfhead_t * b = thr->freelist[ bin ].ql.flink;
2023 while ( b != &thr->freelist[ bin ] ) {
2024 if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2025 *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2026 lst = (void**)b; // push b into lst
2027 }
2028 b = b->ql.flink; // get next buffer
2029 }
2030 }
2031 while ( lst != NULL ) {
2032 void * next = *lst;
2033 KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2034 lst, next, th, __kmp_gtid_from_thread( th ) ) );
2035 (*thr->relfcn)(lst);
2036 #if BufStats
2037 // count blocks to prevent problems in __kmp_finalize_bget()
2038 thr->numprel++; /* Nr of expansion block releases */
2039 thr->numpblk--; /* Total number of blocks */
2040 #endif
2041 lst = (void**)next;
2042 }
2043
2044 KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2045 __kmp_gtid_from_thread( th ) ) );
2046}
2047
2048#endif // USE_FAST_MEMORY