blob: c269fb96ce8b6a402beb05f590e02d93f7927b7d [file] [log] [blame]
Jim Cownie5e8470a2013-09-27 10:38:44 +00001/*
Jonathan Peytonde4749b2016-12-14 23:01:24 +00002 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
Jim Cownie5e8470a2013-09-27 10:38:44 +00003 */
4
Jim Cownie5e8470a2013-09-27 10:38:44 +00005//===----------------------------------------------------------------------===//
6//
Chandler Carruth57b08b02019-01-19 10:56:40 +00007// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Jim Cownie5e8470a2013-09-27 10:38:44 +000010//
11//===----------------------------------------------------------------------===//
12
Jim Cownie5e8470a2013-09-27 10:38:44 +000013#include "kmp.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000014#include "kmp_io.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000015#include "kmp_wrapper_malloc.h"
Jim Cownie5e8470a2013-09-27 10:38:44 +000016
17// Disable bget when it is not used
18#if KMP_USE_BGET
19
20/* Thread private buffer management code */
21
Jonathan Peyton30419822017-05-12 18:01:32 +000022typedef int (*bget_compact_t)(size_t, int);
Jim Cownie5e8470a2013-09-27 10:38:44 +000023typedef void *(*bget_acquire_t)(size_t);
Jonathan Peyton30419822017-05-12 18:01:32 +000024typedef void (*bget_release_t)(void *);
Jim Cownie5e8470a2013-09-27 10:38:44 +000025
26/* NOTE: bufsize must be a signed datatype */
27
28#if KMP_OS_WINDOWS
Jonathan Peyton30419822017-05-12 18:01:32 +000029#if KMP_ARCH_X86 || KMP_ARCH_ARM
30typedef kmp_int32 bufsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +000031#else
Jonathan Peyton30419822017-05-12 18:01:32 +000032typedef kmp_int64 bufsize;
33#endif
34#else
35typedef ssize_t bufsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +000036#endif
37
38/* The three modes of operation are, fifo search, lifo search, and best-fit */
39
40typedef enum bget_mode {
Jonathan Peyton30419822017-05-12 18:01:32 +000041 bget_mode_fifo = 0,
42 bget_mode_lifo = 1,
43 bget_mode_best = 2
Jim Cownie5e8470a2013-09-27 10:38:44 +000044} bget_mode_t;
45
Jonathan Peyton30419822017-05-12 18:01:32 +000046static void bpool(kmp_info_t *th, void *buffer, bufsize len);
47static void *bget(kmp_info_t *th, bufsize size);
48static void *bgetz(kmp_info_t *th, bufsize size);
49static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
50static void brel(kmp_info_t *th, void *buf);
51static void bectl(kmp_info_t *th, bget_compact_t compact,
52 bget_acquire_t acquire, bget_release_t release,
53 bufsize pool_incr);
Jim Cownie5e8470a2013-09-27 10:38:44 +000054
Jim Cownie5e8470a2013-09-27 10:38:44 +000055/* BGET CONFIGURATION */
Jonathan Peyton30419822017-05-12 18:01:32 +000056/* Buffer allocation size quantum: all buffers allocated are a
57 multiple of this size. This MUST be a power of two. */
Jim Cownie5e8470a2013-09-27 10:38:44 +000058
Jonathan Peyton30419822017-05-12 18:01:32 +000059/* On IA-32 architecture with Linux* OS, malloc() does not
60 ensure 16 byte alignmnent */
Jim Cownie5e8470a2013-09-27 10:38:44 +000061
Jim Cownie181b4bb2013-12-23 17:28:57 +000062#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
Jim Cownie5e8470a2013-09-27 10:38:44 +000063
Jonathan Peyton30419822017-05-12 18:01:32 +000064#define SizeQuant 8
65#define AlignType double
Jim Cownie5e8470a2013-09-27 10:38:44 +000066
67#else
68
Jonathan Peyton30419822017-05-12 18:01:32 +000069#define SizeQuant 16
70#define AlignType _Quad
Jim Cownie5e8470a2013-09-27 10:38:44 +000071
72#endif
73
Jonathan Peyton30419822017-05-12 18:01:32 +000074// Define this symbol to enable the bstats() function which calculates the
75// total free space in the buffer pool, the largest available buffer, and the
76// total space currently allocated.
77#define BufStats 1
Jim Cownie5e8470a2013-09-27 10:38:44 +000078
79#ifdef KMP_DEBUG
80
Jonathan Peyton30419822017-05-12 18:01:32 +000081// Define this symbol to enable the bpoold() function which dumps the buffers
82// in a buffer pool.
83#define BufDump 1
Jim Cownie5e8470a2013-09-27 10:38:44 +000084
Jonathan Peyton30419822017-05-12 18:01:32 +000085// Define this symbol to enable the bpoolv() function for validating a buffer
86// pool.
87#define BufValid 1
Jim Cownie5e8470a2013-09-27 10:38:44 +000088
Jonathan Peyton30419822017-05-12 18:01:32 +000089// Define this symbol to enable the bufdump() function which allows dumping the
90// contents of an allocated or free buffer.
91#define DumpData 1
92
Jim Cownie5e8470a2013-09-27 10:38:44 +000093#ifdef NOT_USED_NOW
94
Jonathan Peyton30419822017-05-12 18:01:32 +000095// Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
96// who attempt to use pointers into released buffers.
97#define FreeWipe 1
Jim Cownie5e8470a2013-09-27 10:38:44 +000098
Jonathan Peyton30419822017-05-12 18:01:32 +000099// Use a best fit algorithm when searching for space for an allocation request.
100// This uses memory more efficiently, but allocation will be much slower.
101#define BestFit 1
102
Jim Cownie5e8470a2013-09-27 10:38:44 +0000103#endif /* NOT_USED_NOW */
104#endif /* KMP_DEBUG */
105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106static bufsize bget_bin_size[] = {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000107 0,
Jonathan Peyton30419822017-05-12 18:01:32 +0000108 // 1 << 6, /* .5 Cache line */
109 1 << 7, /* 1 Cache line, new */
110 1 << 8, /* 2 Cache lines */
111 1 << 9, /* 4 Cache lines, new */
112 1 << 10, /* 8 Cache lines */
113 1 << 11, /* 16 Cache lines, new */
114 1 << 12, 1 << 13, /* new */
115 1 << 14, 1 << 15, /* new */
116 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
117 1 << 21, /* 2MB */
118 1 << 22, /* 4MB */
119 1 << 23, /* 8MB */
120 1 << 24, /* 16MB */
121 1 << 25, /* 32MB */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000122};
123
Jonathan Peyton30419822017-05-12 18:01:32 +0000124#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000125
126struct bfhead;
127
Jonathan Peyton30419822017-05-12 18:01:32 +0000128// Declare the interface, including the requested buffer size type, bufsize.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000129
130/* Queue links */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000131typedef struct qlinks {
Jonathan Peyton30419822017-05-12 18:01:32 +0000132 struct bfhead *flink; /* Forward link */
133 struct bfhead *blink; /* Backward link */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000134} qlinks_t;
135
136/* Header in allocated and free buffers */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000137typedef struct bhead2 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000138 kmp_info_t *bthr; /* The thread which owns the buffer pool */
139 bufsize prevfree; /* Relative link back to previous free buffer in memory or
140 0 if previous buffer is allocated. */
141 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000142} bhead2_t;
143
144/* Make sure the bhead structure is a multiple of SizeQuant in size. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000145typedef union bhead {
Jonathan Peyton30419822017-05-12 18:01:32 +0000146 KMP_ALIGN(SizeQuant)
147 AlignType b_align;
148 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
149 bhead2_t bb;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000150} bhead_t;
Jonathan Peyton30419822017-05-12 18:01:32 +0000151#define BH(p) ((bhead_t *)(p))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000152
153/* Header in directly allocated buffers (by acqfcn) */
Jonathan Peyton30419822017-05-12 18:01:32 +0000154typedef struct bdhead {
155 bufsize tsize; /* Total size, including overhead */
156 bhead_t bh; /* Common header */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000157} bdhead_t;
Jonathan Peyton30419822017-05-12 18:01:32 +0000158#define BDH(p) ((bdhead_t *)(p))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000159
160/* Header in free buffers */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000161typedef struct bfhead {
Jonathan Peyton30419822017-05-12 18:01:32 +0000162 bhead_t bh; /* Common allocated/free header */
163 qlinks_t ql; /* Links on free list */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000164} bfhead_t;
Jonathan Peyton30419822017-05-12 18:01:32 +0000165#define BFH(p) ((bfhead_t *)(p))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000166
167typedef struct thr_data {
Jonathan Peyton30419822017-05-12 18:01:32 +0000168 bfhead_t freelist[MAX_BGET_BINS];
Jim Cownie5e8470a2013-09-27 10:38:44 +0000169#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000170 size_t totalloc; /* Total space currently allocated */
171 long numget, numrel; /* Number of bget() and brel() calls */
172 long numpblk; /* Number of pool blocks */
173 long numpget, numprel; /* Number of block gets and rels */
174 long numdget, numdrel; /* Number of direct gets and rels */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000175#endif /* BufStats */
176
Jonathan Peyton30419822017-05-12 18:01:32 +0000177 /* Automatic expansion block management functions */
178 bget_compact_t compfcn;
179 bget_acquire_t acqfcn;
180 bget_release_t relfcn;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000181
Jonathan Peyton30419822017-05-12 18:01:32 +0000182 bget_mode_t mode; /* what allocation mode to use? */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000183
Jonathan Peyton30419822017-05-12 18:01:32 +0000184 bufsize exp_incr; /* Expansion block size */
185 bufsize pool_len; /* 0: no bpool calls have been made
186 -1: not all pool blocks are the same size
187 >0: (common) block size for all bpool calls made so far
188 */
189 bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000190} thr_data_t;
191
192/* Minimum allocation quantum: */
Jonathan Peyton30419822017-05-12 18:01:32 +0000193#define QLSize (sizeof(qlinks_t))
194#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
195#define MaxSize \
196 (bufsize)( \
197 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
198// Maximun for the requested size.
Jim Cownie5e8470a2013-09-27 10:38:44 +0000199
200/* End sentinel: value placed in bsize field of dummy block delimiting
201 end of pool block. The most negative number which will fit in a
202 bufsize, defined in a way that the compiler will accept. */
203
Jonathan Peyton30419822017-05-12 18:01:32 +0000204#define ESent \
205 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
Jim Cownie5e8470a2013-09-27 10:38:44 +0000206
207/* Thread Data management routines */
Jonathan Peyton30419822017-05-12 18:01:32 +0000208static int bget_get_bin(bufsize size) {
209 // binary chop bins
210 int lo = 0, hi = MAX_BGET_BINS - 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000211
Jonathan Peyton30419822017-05-12 18:01:32 +0000212 KMP_DEBUG_ASSERT(size > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000213
Jonathan Peyton30419822017-05-12 18:01:32 +0000214 while ((hi - lo) > 1) {
215 int mid = (lo + hi) >> 1;
216 if (size < bget_bin_size[mid])
217 hi = mid - 1;
218 else
219 lo = mid;
220 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000221
Jonathan Peyton30419822017-05-12 18:01:32 +0000222 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000223
Jonathan Peyton30419822017-05-12 18:01:32 +0000224 return lo;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000225}
226
Jonathan Peyton30419822017-05-12 18:01:32 +0000227static void set_thr_data(kmp_info_t *th) {
228 int i;
229 thr_data_t *data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000230
Jonathan Peyton30419822017-05-12 18:01:32 +0000231 data = (thr_data_t *)((!th->th.th_local.bget_data)
232 ? __kmp_allocate(sizeof(*data))
233 : th->th.th_local.bget_data);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000234
Jonathan Peyton30419822017-05-12 18:01:32 +0000235 memset(data, '\0', sizeof(*data));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000236
Jonathan Peyton30419822017-05-12 18:01:32 +0000237 for (i = 0; i < MAX_BGET_BINS; ++i) {
238 data->freelist[i].ql.flink = &data->freelist[i];
239 data->freelist[i].ql.blink = &data->freelist[i];
240 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000241
Jonathan Peyton30419822017-05-12 18:01:32 +0000242 th->th.th_local.bget_data = data;
243 th->th.th_local.bget_list = 0;
244#if !USE_CMP_XCHG_FOR_BGET
Jim Cownie5e8470a2013-09-27 10:38:44 +0000245#ifdef USE_QUEUING_LOCK_FOR_BGET
Jonathan Peyton30419822017-05-12 18:01:32 +0000246 __kmp_init_lock(&th->th.th_local.bget_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000247#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000248 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000249#endif /* USE_LOCK_FOR_BGET */
250#endif /* ! USE_CMP_XCHG_FOR_BGET */
251}
252
Jonathan Peyton30419822017-05-12 18:01:32 +0000253static thr_data_t *get_thr_data(kmp_info_t *th) {
254 thr_data_t *data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000255
Jonathan Peyton30419822017-05-12 18:01:32 +0000256 data = (thr_data_t *)th->th.th_local.bget_data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000257
Jonathan Peyton30419822017-05-12 18:01:32 +0000258 KMP_DEBUG_ASSERT(data != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000259
Jonathan Peyton30419822017-05-12 18:01:32 +0000260 return data;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000261}
262
Jim Cownie5e8470a2013-09-27 10:38:44 +0000263/* Walk the free list and release the enqueued buffers */
Jonathan Peyton30419822017-05-12 18:01:32 +0000264static void __kmp_bget_dequeue(kmp_info_t *th) {
265 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000266
Jonathan Peyton30419822017-05-12 18:01:32 +0000267 if (p != 0) {
268#if USE_CMP_XCHG_FOR_BGET
269 {
270 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000271 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000272 CCAST(void *, old_value), nullptr)) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 KMP_CPU_PAUSE();
274 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
275 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000276 p = CCAST(void *, old_value);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000277 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000278#else /* ! USE_CMP_XCHG_FOR_BGET */
279#ifdef USE_QUEUING_LOCK_FOR_BGET
280 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
281#else
282 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
283#endif /* USE_QUEUING_LOCK_FOR_BGET */
284
285 p = (void *)th->th.th_local.bget_list;
286 th->th.th_local.bget_list = 0;
287
288#ifdef USE_QUEUING_LOCK_FOR_BGET
289 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
290#else
291 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
292#endif
293#endif /* USE_CMP_XCHG_FOR_BGET */
294
295 /* Check again to make sure the list is not empty */
296 while (p != 0) {
297 void *buf = p;
298 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
299
300 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
301 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
302 (kmp_uintptr_t)th); // clear possible mark
303 KMP_DEBUG_ASSERT(b->ql.blink == 0);
304
305 p = (void *)b->ql.flink;
306
307 brel(th, buf);
308 }
309 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000310}
311
312/* Chain together the free buffers by using the thread owner field */
Jonathan Peyton30419822017-05-12 18:01:32 +0000313static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
Jim Cownie5e8470a2013-09-27 10:38:44 +0000314#ifdef USE_QUEUING_LOCK_FOR_BGET
Jonathan Peyton30419822017-05-12 18:01:32 +0000315 ,
316 kmp_int32 rel_gtid
Jim Cownie5e8470a2013-09-27 10:38:44 +0000317#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000318 ) {
319 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000320
Jonathan Peyton30419822017-05-12 18:01:32 +0000321 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
323 (kmp_uintptr_t)th); // clear possible mark
Jim Cownie5e8470a2013-09-27 10:38:44 +0000324
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 b->ql.blink = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000326
Jonathan Peyton30419822017-05-12 18:01:32 +0000327 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
328 __kmp_gtid_from_thread(th)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000329
330#if USE_CMP_XCHG_FOR_BGET
Jonathan Peyton30419822017-05-12 18:01:32 +0000331 {
332 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
333 /* the next pointer must be set before setting bget_list to buf to avoid
334 exposing a broken list to other threads, even for an instant. */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000335 b->ql.flink = BFH(CCAST(void *, old_value));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000336
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000337 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338 CCAST(void *, old_value), buf)) {
Jonathan Peyton30419822017-05-12 18:01:32 +0000339 KMP_CPU_PAUSE();
340 old_value = TCR_PTR(th->th.th_local.bget_list);
341 /* the next pointer must be set before setting bget_list to buf to avoid
342 exposing a broken list to other threads, even for an instant. */
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000343 b->ql.flink = BFH(CCAST(void *, old_value));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000344 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000345 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000346#else /* ! USE_CMP_XCHG_FOR_BGET */
Jonathan Peyton30419822017-05-12 18:01:32 +0000347#ifdef USE_QUEUING_LOCK_FOR_BGET
348 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
349#else
350 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
351#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000352
Jonathan Peyton30419822017-05-12 18:01:32 +0000353 b->ql.flink = BFH(th->th.th_local.bget_list);
354 th->th.th_local.bget_list = (void *)buf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000355
Jonathan Peyton30419822017-05-12 18:01:32 +0000356#ifdef USE_QUEUING_LOCK_FOR_BGET
357 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
358#else
359 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
360#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000361#endif /* USE_CMP_XCHG_FOR_BGET */
362}
363
364/* insert buffer back onto a new freelist */
Jonathan Peyton30419822017-05-12 18:01:32 +0000365static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
366 int bin;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000367
Jonathan Peyton30419822017-05-12 18:01:32 +0000368 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
369 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000370
Jonathan Peyton30419822017-05-12 18:01:32 +0000371 bin = bget_get_bin(b->bh.bb.bsize);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000372
Jonathan Peyton30419822017-05-12 18:01:32 +0000373 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
374 &thr->freelist[bin]);
375 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
376 &thr->freelist[bin]);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000377
Jonathan Peyton30419822017-05-12 18:01:32 +0000378 b->ql.flink = &thr->freelist[bin];
379 b->ql.blink = thr->freelist[bin].ql.blink;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000380
Jonathan Peyton30419822017-05-12 18:01:32 +0000381 thr->freelist[bin].ql.blink = b;
382 b->ql.blink->ql.flink = b;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000383}
384
385/* unlink the buffer from the old freelist */
Jonathan Peyton30419822017-05-12 18:01:32 +0000386static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
387 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
388 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000389
Jonathan Peyton30419822017-05-12 18:01:32 +0000390 b->ql.blink->ql.flink = b->ql.flink;
391 b->ql.flink->ql.blink = b->ql.blink;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000392}
393
Jim Cownie5e8470a2013-09-27 10:38:44 +0000394/* GET STATS -- check info on free list */
Jonathan Peyton30419822017-05-12 18:01:32 +0000395static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
396 thr_data_t *thr = get_thr_data(th);
397 int bin;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000398
Jonathan Peyton30419822017-05-12 18:01:32 +0000399 *total_free = *max_free = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000400
Jonathan Peyton30419822017-05-12 18:01:32 +0000401 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
402 bfhead_t *b, *best;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000403
Jonathan Peyton30419822017-05-12 18:01:32 +0000404 best = &thr->freelist[bin];
405 b = best->ql.flink;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000406
Jonathan Peyton30419822017-05-12 18:01:32 +0000407 while (b != &thr->freelist[bin]) {
408 *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
409 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
410 best = b;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000411
Jonathan Peyton30419822017-05-12 18:01:32 +0000412 /* Link to next buffer */
413 b = b->ql.flink;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000414 }
415
Jonathan Peyton30419822017-05-12 18:01:32 +0000416 if (*max_free < best->bh.bb.bsize)
417 *max_free = best->bh.bb.bsize;
418 }
419
420 if (*max_free > (bufsize)sizeof(bhead_t))
421 *max_free -= sizeof(bhead_t);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000422}
423
Jim Cownie5e8470a2013-09-27 10:38:44 +0000424/* BGET -- Allocate a buffer. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000425static void *bget(kmp_info_t *th, bufsize requested_size) {
426 thr_data_t *thr = get_thr_data(th);
427 bufsize size = requested_size;
428 bfhead_t *b;
429 void *buf;
430 int compactseq = 0;
431 int use_blink = 0;
432 /* For BestFit */
433 bfhead_t *best;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000434
Jonathan Peyton30419822017-05-12 18:01:32 +0000435 if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000436 return NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000437 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000438
439 __kmp_bget_dequeue(th); /* Release any queued buffers */
440
441 if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
442 size = SizeQ;
443 }
444#if defined(SizeQuant) && (SizeQuant > 1)
445 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
446#endif
447
448 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
449 KMP_DEBUG_ASSERT(size >= 0);
450 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
451
452 use_blink = (thr->mode == bget_mode_lifo);
453
454 /* If a compact function was provided in the call to bectl(), wrap
455 a loop around the allocation process to allow compaction to
456 intervene in case we don't find a suitable buffer in the chain. */
457
458 for (;;) {
459 int bin;
460
461 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
462 /* Link to next buffer */
463 b = (use_blink ? thr->freelist[bin].ql.blink
464 : thr->freelist[bin].ql.flink);
465
466 if (thr->mode == bget_mode_best) {
467 best = &thr->freelist[bin];
468
469 /* Scan the free list searching for the first buffer big enough
470 to hold the requested size buffer. */
471 while (b != &thr->freelist[bin]) {
472 if (b->bh.bb.bsize >= (bufsize)size) {
473 if ((best == &thr->freelist[bin]) ||
474 (b->bh.bb.bsize < best->bh.bb.bsize)) {
475 best = b;
476 }
477 }
478
479 /* Link to next buffer */
480 b = (use_blink ? b->ql.blink : b->ql.flink);
481 }
482 b = best;
483 }
484
485 while (b != &thr->freelist[bin]) {
486 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
487
488 // Buffer is big enough to satisfy the request. Allocate it to the
489 // caller. We must decide whether the buffer is large enough to split
490 // into the part given to the caller and a free buffer that remains
491 // on the free list, or whether the entire buffer should be removed
492 // from the free list and given to the caller in its entirety. We
493 // only split the buffer if enough room remains for a header plus the
494 // minimum quantum of allocation.
495 if ((b->bh.bb.bsize - (bufsize)size) >
496 (bufsize)(SizeQ + (sizeof(bhead_t)))) {
497 bhead_t *ba, *bn;
498
499 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
500 bn = BH(((char *)ba) + size);
501
502 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
503
504 /* Subtract size from length of free block. */
505 b->bh.bb.bsize -= (bufsize)size;
506
507 /* Link allocated buffer to the previous free buffer. */
508 ba->bb.prevfree = b->bh.bb.bsize;
509
510 /* Plug negative size into user buffer. */
511 ba->bb.bsize = -size;
512
513 /* Mark this buffer as owned by this thread. */
514 TCW_PTR(ba->bb.bthr,
515 th); // not an allocated address (do not mark it)
516 /* Mark buffer after this one not preceded by free block. */
517 bn->bb.prevfree = 0;
518
519 // unlink buffer from old freelist, and reinsert into new freelist
520 __kmp_bget_remove_from_freelist(b);
521 __kmp_bget_insert_into_freelist(thr, b);
522#if BufStats
523 thr->totalloc += (size_t)size;
524 thr->numget++; /* Increment number of bget() calls */
525#endif
526 buf = (void *)((((char *)ba) + sizeof(bhead_t)));
527 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
528 return buf;
529 } else {
530 bhead_t *ba;
531
532 ba = BH(((char *)b) + b->bh.bb.bsize);
533
534 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
535
536 /* The buffer isn't big enough to split. Give the whole
537 shebang to the caller and remove it from the free list. */
538
539 __kmp_bget_remove_from_freelist(b);
540#if BufStats
541 thr->totalloc += (size_t)b->bh.bb.bsize;
542 thr->numget++; /* Increment number of bget() calls */
543#endif
544 /* Negate size to mark buffer allocated. */
545 b->bh.bb.bsize = -(b->bh.bb.bsize);
546
547 /* Mark this buffer as owned by this thread. */
548 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
549 /* Zero the back pointer in the next buffer in memory
550 to indicate that this buffer is allocated. */
551 ba->bb.prevfree = 0;
552
553 /* Give user buffer starting at queue links. */
554 buf = (void *)&(b->ql);
555 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556 return buf;
557 }
558 }
559
560 /* Link to next buffer */
561 b = (use_blink ? b->ql.blink : b->ql.flink);
562 }
563 }
564
565 /* We failed to find a buffer. If there's a compact function defined,
566 notify it of the size requested. If it returns TRUE, try the allocation
567 again. */
568
569 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
570 break;
571 }
572 }
573
574 /* No buffer available with requested size free. */
575
576 /* Don't give up yet -- look in the reserve supply. */
577 if (thr->acqfcn != 0) {
578 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
579 /* Request is too large to fit in a single expansion block.
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000580 Try to satisy it by a direct buffer acquisition. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000581 bdhead_t *bdh;
582
583 size += sizeof(bdhead_t) - sizeof(bhead_t);
584
585 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
586
587 /* richryan */
588 bdh = BDH((*thr->acqfcn)((bufsize)size));
589 if (bdh != NULL) {
590
591 // Mark the buffer special by setting size field of its header to zero.
592 bdh->bh.bb.bsize = 0;
593
594 /* Mark this buffer as owned by this thread. */
595 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
596 // because direct buffer never goes to free list
597 bdh->bh.bb.prevfree = 0;
598 bdh->tsize = size;
599#if BufStats
600 thr->totalloc += (size_t)size;
601 thr->numget++; /* Increment number of bget() calls */
602 thr->numdget++; /* Direct bget() call count */
603#endif
604 buf = (void *)(bdh + 1);
605 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
606 return buf;
607 }
608
609 } else {
610
611 /* Try to obtain a new expansion block */
612 void *newpool;
613
614 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
615
616 /* richryan */
617 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
618 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
619 if (newpool != NULL) {
620 bpool(th, newpool, thr->exp_incr);
621 buf = bget(
622 th, requested_size); /* This can't, I say, can't get into a loop. */
623 return buf;
624 }
625 }
626 }
627
628 /* Still no buffer available */
629
630 return NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000631}
632
633/* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
634 the entire contents of the buffer to zero, not just the
635 region requested by the caller. */
636
Jonathan Peyton30419822017-05-12 18:01:32 +0000637static void *bgetz(kmp_info_t *th, bufsize size) {
638 char *buf = (char *)bget(th, size);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000639
Jonathan Peyton30419822017-05-12 18:01:32 +0000640 if (buf != NULL) {
641 bhead_t *b;
642 bufsize rsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000643
Jonathan Peyton30419822017-05-12 18:01:32 +0000644 b = BH(buf - sizeof(bhead_t));
645 rsize = -(b->bb.bsize);
646 if (rsize == 0) {
647 bdhead_t *bd;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000648
Jonathan Peyton30419822017-05-12 18:01:32 +0000649 bd = BDH(buf - sizeof(bdhead_t));
650 rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
651 } else {
652 rsize -= sizeof(bhead_t);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000653 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000654
655 KMP_DEBUG_ASSERT(rsize >= size);
656
657 (void)memset(buf, 0, (bufsize)rsize);
658 }
659 return ((void *)buf);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000660}
661
662/* BGETR -- Reallocate a buffer. This is a minimal implementation,
663 simply in terms of brel() and bget(). It could be
664 enhanced to allow the buffer to grow into adjacent free
665 blocks and to avoid moving data unnecessarily. */
666
Jonathan Peyton30419822017-05-12 18:01:32 +0000667static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
668 void *nbuf;
669 bufsize osize; /* Old size of buffer */
670 bhead_t *b;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000671
Jonathan Peyton30419822017-05-12 18:01:32 +0000672 nbuf = bget(th, size);
673 if (nbuf == NULL) { /* Acquire new buffer */
674 return NULL;
675 }
676 if (buf == NULL) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000677 return nbuf;
Jonathan Peyton30419822017-05-12 18:01:32 +0000678 }
679 b = BH(((char *)buf) - sizeof(bhead_t));
680 osize = -b->bb.bsize;
681 if (osize == 0) {
682 /* Buffer acquired directly through acqfcn. */
683 bdhead_t *bd;
684
685 bd = BDH(((char *)buf) - sizeof(bdhead_t));
686 osize = bd->tsize - (bufsize)sizeof(bdhead_t);
687 } else {
688 osize -= sizeof(bhead_t);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +0000689 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000690
691 KMP_DEBUG_ASSERT(osize > 0);
692
693 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
694 (size_t)((size < osize) ? size : osize));
695 brel(th, buf);
696
697 return nbuf;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000698}
699
700/* BREL -- Release a buffer. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000701static void brel(kmp_info_t *th, void *buf) {
702 thr_data_t *thr = get_thr_data(th);
703 bfhead_t *b, *bn;
704 kmp_info_t *bth;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000705
Jonathan Peyton30419822017-05-12 18:01:32 +0000706 KMP_DEBUG_ASSERT(buf != NULL);
707 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000708
Jonathan Peyton30419822017-05-12 18:01:32 +0000709 b = BFH(((char *)buf) - sizeof(bhead_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000710
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
712 bdhead_t *bdh;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000713
Jonathan Peyton30419822017-05-12 18:01:32 +0000714 bdh = BDH(((char *)buf) - sizeof(bdhead_t));
715 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000716#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000717 thr->totalloc -= (size_t)bdh->tsize;
718 thr->numdrel++; /* Number of direct releases */
719 thr->numrel++; /* Increment number of brel() calls */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000720#endif /* BufStats */
721#ifdef FreeWipe
Jonathan Peyton30419822017-05-12 18:01:32 +0000722 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000723#endif /* FreeWipe */
724
Jonathan Peyton30419822017-05-12 18:01:32 +0000725 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000726
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 KMP_DEBUG_ASSERT(thr->relfcn != 0);
728 (*thr->relfcn)((void *)bdh); /* Release it directly. */
729 return;
730 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000731
Jonathan Peyton30419822017-05-12 18:01:32 +0000732 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
733 ~1); // clear possible mark before comparison
734 if (bth != th) {
735 /* Add this buffer to be released by the owning thread later */
736 __kmp_bget_enqueue(bth, buf
Jim Cownie5e8470a2013-09-27 10:38:44 +0000737#ifdef USE_QUEUING_LOCK_FOR_BGET
Jonathan Peyton30419822017-05-12 18:01:32 +0000738 ,
739 __kmp_gtid_from_thread(th)
Jim Cownie5e8470a2013-09-27 10:38:44 +0000740#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000741 );
742 return;
743 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000744
Jonathan Peyton30419822017-05-12 18:01:32 +0000745 /* Buffer size must be negative, indicating that the buffer is allocated. */
746 if (b->bh.bb.bsize >= 0) {
747 bn = NULL;
748 }
749 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000750
Jonathan Peyton30419822017-05-12 18:01:32 +0000751 /* Back pointer in next buffer must be zero, indicating the same thing: */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000752
Jonathan Peyton30419822017-05-12 18:01:32 +0000753 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000754
755#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000756 thr->numrel++; /* Increment number of brel() calls */
757 thr->totalloc += (size_t)b->bh.bb.bsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000758#endif
759
Jonathan Peyton30419822017-05-12 18:01:32 +0000760 /* If the back link is nonzero, the previous buffer is free. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 if (b->bh.bb.prevfree != 0) {
763 /* The previous buffer is free. Consolidate this buffer with it by adding
764 the length of this buffer to the previous free buffer. Note that we
765 subtract the size in the buffer being released, since it's negative to
766 indicate that the buffer is allocated. */
Ed Maste414544c2017-07-07 21:06:05 +0000767 bufsize size = b->bh.bb.bsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000768
Jonathan Peyton30419822017-05-12 18:01:32 +0000769 /* Make the previous buffer the one we're working on. */
770 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
771 b->bh.bb.prevfree);
772 b = BFH(((char *)b) - b->bh.bb.prevfree);
773 b->bh.bb.bsize -= size;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000774
Jonathan Peyton30419822017-05-12 18:01:32 +0000775 /* unlink the buffer from the old freelist */
776 __kmp_bget_remove_from_freelist(b);
777 } else {
778 /* The previous buffer isn't allocated. Mark this buffer size as positive
779 (i.e. free) and fall through to place the buffer on the free list as an
780 isolated free block. */
781 b->bh.bb.bsize = -b->bh.bb.bsize;
782 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000783
Jonathan Peyton30419822017-05-12 18:01:32 +0000784 /* insert buffer back onto a new freelist */
785 __kmp_bget_insert_into_freelist(thr, b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000786
Jonathan Peyton30419822017-05-12 18:01:32 +0000787 /* Now we look at the next buffer in memory, located by advancing from
788 the start of this buffer by its size, to see if that buffer is
789 free. If it is, we combine this buffer with the next one in
790 memory, dechaining the second buffer from the free list. */
791 bn = BFH(((char *)b) + b->bh.bb.bsize);
792 if (bn->bh.bb.bsize > 0) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000793
Jonathan Peyton30419822017-05-12 18:01:32 +0000794 /* The buffer is free. Remove it from the free list and add
795 its size to that of our buffer. */
796 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
797 bn->bh.bb.bsize);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000798
Jonathan Peyton30419822017-05-12 18:01:32 +0000799 __kmp_bget_remove_from_freelist(bn);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000800
Jonathan Peyton30419822017-05-12 18:01:32 +0000801 b->bh.bb.bsize += bn->bh.bb.bsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000802
Jonathan Peyton30419822017-05-12 18:01:32 +0000803 /* unlink the buffer from the old freelist, and reinsert it into the new
804 * freelist */
805 __kmp_bget_remove_from_freelist(b);
806 __kmp_bget_insert_into_freelist(thr, b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000807
Jonathan Peyton30419822017-05-12 18:01:32 +0000808 /* Finally, advance to the buffer that follows the newly
809 consolidated free block. We must set its backpointer to the
810 head of the consolidated free block. We know the next block
811 must be an allocated block because the process of recombination
812 guarantees that two free blocks will never be contiguous in
813 memory. */
814 bn = BFH(((char *)b) + b->bh.bb.bsize);
815 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000816#ifdef FreeWipe
Jonathan Peyton30419822017-05-12 18:01:32 +0000817 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
818 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000819#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000820 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000821
Jonathan Peyton30419822017-05-12 18:01:32 +0000822 /* The next buffer is allocated. Set the backpointer in it to point
823 to this buffer; the previous free buffer in memory. */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000824
Jonathan Peyton30419822017-05-12 18:01:32 +0000825 bn->bh.bb.prevfree = b->bh.bb.bsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000826
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 /* If a block-release function is defined, and this free buffer
828 constitutes the entire block, release it. Note that pool_len
829 is defined in such a way that the test will fail unless all
830 pool blocks are the same size. */
831 if (thr->relfcn != 0 &&
832 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
Jim Cownie5e8470a2013-09-27 10:38:44 +0000833#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000834 if (thr->numpblk !=
835 1) { /* Do not release the last buffer until finalization time */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000836#endif
837
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
839 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
840 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
841 b->bh.bb.bsize);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000842
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 /* Unlink the buffer from the free list */
844 __kmp_bget_remove_from_freelist(b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000845
Jonathan Peyton30419822017-05-12 18:01:32 +0000846 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000847
Jonathan Peyton30419822017-05-12 18:01:32 +0000848 (*thr->relfcn)(b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000849#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 thr->numprel++; /* Nr of expansion block releases */
851 thr->numpblk--; /* Total number of blocks */
852 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000853
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 // avoid leaving stale last_pool pointer around if it is being dealloced
855 if (thr->last_pool == b)
856 thr->last_pool = 0;
857 } else {
858 thr->last_pool = b;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000859 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000860#endif /* BufStats */
861 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000862}
863
864/* BECTL -- Establish automatic pool expansion control */
Jonathan Peyton30419822017-05-12 18:01:32 +0000865static void bectl(kmp_info_t *th, bget_compact_t compact,
866 bget_acquire_t acquire, bget_release_t release,
867 bufsize pool_incr) {
868 thr_data_t *thr = get_thr_data(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000869
Jonathan Peyton30419822017-05-12 18:01:32 +0000870 thr->compfcn = compact;
871 thr->acqfcn = acquire;
872 thr->relfcn = release;
873 thr->exp_incr = pool_incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000874}
875
876/* BPOOL -- Add a region of memory to the buffer pool. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000877static void bpool(kmp_info_t *th, void *buf, bufsize len) {
878 /* int bin = 0; */
879 thr_data_t *thr = get_thr_data(th);
880 bfhead_t *b = BFH(buf);
881 bhead_t *bn;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000882
Jonathan Peyton30419822017-05-12 18:01:32 +0000883 __kmp_bget_dequeue(th); /* Release any queued buffers */
Jim Cownie5e8470a2013-09-27 10:38:44 +0000884
885#ifdef SizeQuant
Jonathan Peyton30419822017-05-12 18:01:32 +0000886 len &= ~(SizeQuant - 1);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000887#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 if (thr->pool_len == 0) {
889 thr->pool_len = len;
890 } else if (len != thr->pool_len) {
891 thr->pool_len = -1;
892 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000893#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000894 thr->numpget++; /* Number of block acquisitions */
895 thr->numpblk++; /* Number of blocks total */
896 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000897#endif /* BufStats */
898
Jonathan Peyton30419822017-05-12 18:01:32 +0000899 /* Since the block is initially occupied by a single free buffer,
900 it had better not be (much) larger than the largest buffer
901 whose size we can store in bhead.bb.bsize. */
902 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000903
Jonathan Peyton30419822017-05-12 18:01:32 +0000904 /* Clear the backpointer at the start of the block to indicate that
905 there is no free block prior to this one. That blocks
906 recombination when the first block in memory is released. */
907 b->bh.bb.prevfree = 0;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000908
Jonathan Peyton30419822017-05-12 18:01:32 +0000909 /* Create a dummy allocated buffer at the end of the pool. This dummy
910 buffer is seen when a buffer at the end of the pool is released and
911 blocks recombination of the last buffer with the dummy buffer at
912 the end. The length in the dummy buffer is set to the largest
913 negative number to denote the end of the pool for diagnostic
914 routines (this specific value is not counted on by the actual
915 allocation and release functions). */
916 len -= sizeof(bhead_t);
917 b->bh.bb.bsize = (bufsize)len;
918 /* Set the owner of this buffer */
919 TCW_PTR(b->bh.bb.bthr,
920 (kmp_info_t *)((kmp_uintptr_t)th |
921 1)); // mark the buffer as allocated address
Jim Cownie5e8470a2013-09-27 10:38:44 +0000922
Jonathan Peyton30419822017-05-12 18:01:32 +0000923 /* Chain the new block to the free list. */
924 __kmp_bget_insert_into_freelist(thr, b);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000925
926#ifdef FreeWipe
Jonathan Peyton30419822017-05-12 18:01:32 +0000927 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
928 (size_t)(len - sizeof(bfhead_t)));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000929#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000930 bn = BH(((char *)b) + len);
931 bn->bb.prevfree = (bufsize)len;
932 /* Definition of ESent assumes two's complement! */
933 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000934
Jonathan Peyton30419822017-05-12 18:01:32 +0000935 bn->bb.bsize = ESent;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000936}
937
Jim Cownie5e8470a2013-09-27 10:38:44 +0000938/* BFREED -- Dump the free lists for this thread. */
Jonathan Peyton30419822017-05-12 18:01:32 +0000939static void bfreed(kmp_info_t *th) {
940 int bin = 0, count = 0;
941 int gtid = __kmp_gtid_from_thread(th);
942 thr_data_t *thr = get_thr_data(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000943
944#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +0000945 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
946 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
947 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
948 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
949 " drel=%" KMP_INT64_SPEC "\n",
950 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
951 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
952 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
953 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000954#endif
955
Jonathan Peyton30419822017-05-12 18:01:32 +0000956 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
957 bfhead_t *b;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000958
Jonathan Peyton30419822017-05-12 18:01:32 +0000959 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
960 b = b->ql.flink) {
961 bufsize bs = b->bh.bb.bsize;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000962
Jonathan Peyton30419822017-05-12 18:01:32 +0000963 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
964 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
965 KMP_DEBUG_ASSERT(bs > 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000966
Jonathan Peyton30419822017-05-12 18:01:32 +0000967 count += 1;
Jim Cownie5e8470a2013-09-27 10:38:44 +0000968
Jonathan Peyton30419822017-05-12 18:01:32 +0000969 __kmp_printf_no_lock(
970 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
971 (long)bs);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000972#ifdef FreeWipe
Jonathan Peyton30419822017-05-12 18:01:32 +0000973 {
974 char *lerr = ((char *)b) + sizeof(bfhead_t);
975 if ((bs > sizeof(bfhead_t)) &&
976 ((*lerr != 0x55) ||
977 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
978 0))) {
979 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
980 "free block have been overstored.)\n",
981 gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000982 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000983 }
984#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +0000985 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000986 }
Jim Cownie5e8470a2013-09-27 10:38:44 +0000987
Jonathan Peyton30419822017-05-12 18:01:32 +0000988 if (count == 0)
989 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000990}
991
Jonathan Peyton30419822017-05-12 18:01:32 +0000992void __kmp_initialize_bget(kmp_info_t *th) {
993 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
Jim Cownie5e8470a2013-09-27 10:38:44 +0000994
Jonathan Peyton30419822017-05-12 18:01:32 +0000995 set_thr_data(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000996
Jonathan Peyton30419822017-05-12 18:01:32 +0000997 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
998 (bufsize)__kmp_malloc_pool_incr);
Jim Cownie5e8470a2013-09-27 10:38:44 +0000999}
1000
Jonathan Peyton30419822017-05-12 18:01:32 +00001001void __kmp_finalize_bget(kmp_info_t *th) {
1002 thr_data_t *thr;
1003 bfhead_t *b;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001004
Jonathan Peyton30419822017-05-12 18:01:32 +00001005 KMP_DEBUG_ASSERT(th != 0);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001006
1007#if BufStats
Jonathan Peyton30419822017-05-12 18:01:32 +00001008 thr = (thr_data_t *)th->th.th_local.bget_data;
1009 KMP_DEBUG_ASSERT(thr != NULL);
1010 b = thr->last_pool;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001011
Jonathan Peyton30419822017-05-12 18:01:32 +00001012 /* If a block-release function is defined, and this free buffer constitutes
1013 the entire block, release it. Note that pool_len is defined in such a way
1014 that the test will fail unless all pool blocks are the same size. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001015
Jonathan Peyton30419822017-05-12 18:01:32 +00001016 // Deallocate the last pool if one exists because we no longer do it in brel()
1017 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1018 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1019 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1020 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1021 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1022 b->bh.bb.bsize);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001023
Jonathan Peyton30419822017-05-12 18:01:32 +00001024 /* Unlink the buffer from the free list */
1025 __kmp_bget_remove_from_freelist(b);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001026
Jonathan Peyton30419822017-05-12 18:01:32 +00001027 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001028
Jonathan Peyton30419822017-05-12 18:01:32 +00001029 (*thr->relfcn)(b);
1030 thr->numprel++; /* Nr of expansion block releases */
1031 thr->numpblk--; /* Total number of blocks */
1032 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1033 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001034#endif /* BufStats */
1035
Jonathan Peyton30419822017-05-12 18:01:32 +00001036 /* Deallocate bget_data */
1037 if (th->th.th_local.bget_data != NULL) {
1038 __kmp_free(th->th.th_local.bget_data);
1039 th->th.th_local.bget_data = NULL;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001040 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001041}
1042
Jonathan Peyton30419822017-05-12 18:01:32 +00001043void kmpc_set_poolsize(size_t size) {
1044 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1045 (bget_release_t)free, (bufsize)size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001046}
1047
Jonathan Peyton30419822017-05-12 18:01:32 +00001048size_t kmpc_get_poolsize(void) {
1049 thr_data_t *p;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001050
Jonathan Peyton30419822017-05-12 18:01:32 +00001051 p = get_thr_data(__kmp_get_thread());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001052
Jonathan Peyton30419822017-05-12 18:01:32 +00001053 return p->exp_incr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001054}
1055
Jonathan Peyton30419822017-05-12 18:01:32 +00001056void kmpc_set_poolmode(int mode) {
1057 thr_data_t *p;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001058
Jonathan Peyton30419822017-05-12 18:01:32 +00001059 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1060 mode == bget_mode_best) {
1061 p = get_thr_data(__kmp_get_thread());
1062 p->mode = (bget_mode_t)mode;
1063 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001064}
1065
Jonathan Peyton30419822017-05-12 18:01:32 +00001066int kmpc_get_poolmode(void) {
1067 thr_data_t *p;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001068
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 p = get_thr_data(__kmp_get_thread());
Jim Cownie5e8470a2013-09-27 10:38:44 +00001070
Jonathan Peyton30419822017-05-12 18:01:32 +00001071 return p->mode;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001072}
1073
Jonathan Peyton30419822017-05-12 18:01:32 +00001074void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1075 kmp_info_t *th = __kmp_get_thread();
1076 bufsize a, b;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001077
Jonathan Peyton30419822017-05-12 18:01:32 +00001078 __kmp_bget_dequeue(th); /* Release any queued buffers */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001079
Jonathan Peyton30419822017-05-12 18:01:32 +00001080 bcheck(th, &a, &b);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001081
Jonathan Peyton30419822017-05-12 18:01:32 +00001082 *maxmem = a;
1083 *allmem = b;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001084}
1085
Jonathan Peyton30419822017-05-12 18:01:32 +00001086void kmpc_poolprint(void) {
1087 kmp_info_t *th = __kmp_get_thread();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001088
Jonathan Peyton30419822017-05-12 18:01:32 +00001089 __kmp_bget_dequeue(th); /* Release any queued buffers */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001090
Jonathan Peyton30419822017-05-12 18:01:32 +00001091 bfreed(th);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001092}
1093
1094#endif // #if KMP_USE_BGET
1095
Jonathan Peyton30419822017-05-12 18:01:32 +00001096void *kmpc_malloc(size_t size) {
1097 void *ptr;
1098 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1099 if (ptr != NULL) {
1100 // save allocated pointer just before one returned to user
1101 *(void **)ptr = ptr;
1102 ptr = (void **)ptr + 1;
1103 }
1104 return ptr;
1105}
Jim Cownie5e8470a2013-09-27 10:38:44 +00001106
Jonathan Peyton30419822017-05-12 18:01:32 +00001107#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1108
1109void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1110 void *ptr;
1111 void *ptr_allocated;
1112 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1113 if (!IS_POWER_OF_TWO(alignment)) {
1114 // AC: do we need to issue a warning here?
1115 errno = EINVAL;
1116 return NULL;
1117 }
1118 size = size + sizeof(void *) + alignment;
1119 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1120 if (ptr_allocated != NULL) {
1121 // save allocated pointer just before one returned to user
1122 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1123 ~(alignment - 1));
1124 *((void **)ptr - 1) = ptr_allocated;
1125 } else {
1126 ptr = NULL;
1127 }
1128 return ptr;
1129}
1130
1131void *kmpc_calloc(size_t nelem, size_t elsize) {
1132 void *ptr;
1133 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1134 if (ptr != NULL) {
1135 // save allocated pointer just before one returned to user
1136 *(void **)ptr = ptr;
1137 ptr = (void **)ptr + 1;
1138 }
1139 return ptr;
1140}
1141
1142void *kmpc_realloc(void *ptr, size_t size) {
1143 void *result = NULL;
1144 if (ptr == NULL) {
1145 // If pointer is NULL, realloc behaves like malloc.
1146 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1147 // save allocated pointer just before one returned to user
1148 if (result != NULL) {
1149 *(void **)result = result;
1150 result = (void **)result + 1;
Jonathan Peytonf83ae312016-05-12 22:00:37 +00001151 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001152 } else if (size == 0) {
1153 // If size is 0, realloc behaves like free.
1154 // The thread must be registered by the call to kmpc_malloc() or
1155 // kmpc_calloc() before.
1156 // So it should be safe to call __kmp_get_thread(), not
1157 // __kmp_entry_thread().
1158 KMP_ASSERT(*((void **)ptr - 1));
1159 brel(__kmp_get_thread(), *((void **)ptr - 1));
1160 } else {
1161 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1162 (bufsize)(size + sizeof(ptr)));
1163 if (result != NULL) {
1164 *(void **)result = result;
1165 result = (void **)result + 1;
Jonathan Peytonf83ae312016-05-12 22:00:37 +00001166 }
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001167 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001168 return result;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001169}
1170
Jonathan Peyton30419822017-05-12 18:01:32 +00001171// NOTE: the library must have already been initialized by a previous allocate
1172void kmpc_free(void *ptr) {
1173 if (!__kmp_init_serial) {
1174 return;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001175 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001176 if (ptr != NULL) {
1177 kmp_info_t *th = __kmp_get_thread();
1178 __kmp_bget_dequeue(th); /* Release any queued buffers */
1179 // extract allocated pointer and free it
1180 KMP_ASSERT(*((void **)ptr - 1));
1181 brel(th, *((void **)ptr - 1));
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001182 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001183}
1184
Jonathan Peyton30419822017-05-12 18:01:32 +00001185void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1186 void *ptr;
1187 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1188 (int)size KMP_SRC_LOC_PARM));
1189 ptr = bget(th, (bufsize)size);
1190 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1191 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001192}
1193
Jonathan Peyton30419822017-05-12 18:01:32 +00001194void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1195 size_t elsize KMP_SRC_LOC_DECL) {
1196 void *ptr;
1197 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1198 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1199 ptr = bgetz(th, (bufsize)(nelem * elsize));
1200 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1201 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001202}
1203
Jonathan Peyton30419822017-05-12 18:01:32 +00001204void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1205 size_t size KMP_SRC_LOC_DECL) {
1206 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1207 ptr, (int)size KMP_SRC_LOC_PARM));
1208 ptr = bgetr(th, ptr, (bufsize)size);
1209 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1210 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001211}
1212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1214 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1215 ptr KMP_SRC_LOC_PARM));
1216 if (ptr != NULL) {
1217 __kmp_bget_dequeue(th); /* Release any queued buffers */
1218 brel(th, ptr);
1219 }
1220 KE_TRACE(30, ("<- __kmp_thread_free()\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001221}
1222
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001223#if OMP_50_ENABLED
1224/* OMP 5.0 Memory Management support */
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001225static const char *kmp_mk_lib_name;
1226static void *h_memkind;
Jonathan Peytonebf18302019-04-08 17:59:28 +00001227/* memkind experimental API: */
1228// memkind_alloc
1229static void *(*kmp_mk_alloc)(void *k, size_t sz);
1230// memkind_free
1231static void (*kmp_mk_free)(void *kind, void *ptr);
1232// memkind_check_available
1233static int (*kmp_mk_check)(void *kind);
1234// kinds we are going to use
1235static void **mk_default;
1236static void **mk_interleave;
1237static void **mk_hbw;
1238static void **mk_hbw_interleave;
1239static void **mk_hbw_preferred;
1240static void **mk_hugetlb;
1241static void **mk_hbw_hugetlb;
1242static void **mk_hbw_preferred_hugetlb;
1243
1244#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1245static inline void chk_kind(void ***pkind) {
1246 KMP_DEBUG_ASSERT(pkind);
1247 if (*pkind) // symbol found
1248 if (kmp_mk_check(**pkind)) // kind not available or error
1249 *pkind = NULL;
1250}
1251#endif
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001252
1253void __kmp_init_memkind() {
Jonathan Peytonebf18302019-04-08 17:59:28 +00001254// as of 2018-07-31 memkind does not support Windows*, exclude it for now
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001255#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jonathan Peytonebf18302019-04-08 17:59:28 +00001256 // use of statically linked memkind is problematic, as it depends on libnuma
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001257 kmp_mk_lib_name = "libmemkind.so";
1258 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1259 if (h_memkind) {
Jonathan Peytonebf18302019-04-08 17:59:28 +00001260 kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1261 kmp_mk_alloc =
1262 (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1263 kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1264 mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1265 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1266 !kmp_mk_check(*mk_default)) {
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001267 __kmp_memkind_available = 1;
Jonathan Peytonebf18302019-04-08 17:59:28 +00001268 mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1269 chk_kind(&mk_interleave);
1270 mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1271 chk_kind(&mk_hbw);
1272 mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1273 chk_kind(&mk_hbw_interleave);
1274 mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1275 chk_kind(&mk_hbw_preferred);
1276 mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1277 chk_kind(&mk_hugetlb);
1278 mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1279 chk_kind(&mk_hbw_hugetlb);
1280 mk_hbw_preferred_hugetlb =
1281 (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1282 chk_kind(&mk_hbw_preferred_hugetlb);
1283 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1284 return; // success
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001285 }
1286 dlclose(h_memkind); // failure
1287 h_memkind = NULL;
1288 }
Jonathan Peytonebf18302019-04-08 17:59:28 +00001289 kmp_mk_check = NULL;
1290 kmp_mk_alloc = NULL;
1291 kmp_mk_free = NULL;
1292 mk_default = NULL;
1293 mk_interleave = NULL;
1294 mk_hbw = NULL;
1295 mk_hbw_interleave = NULL;
1296 mk_hbw_preferred = NULL;
1297 mk_hugetlb = NULL;
1298 mk_hbw_hugetlb = NULL;
1299 mk_hbw_preferred_hugetlb = NULL;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001300#else
1301 kmp_mk_lib_name = "";
1302 h_memkind = NULL;
Jonathan Peytonebf18302019-04-08 17:59:28 +00001303 kmp_mk_check = NULL;
1304 kmp_mk_alloc = NULL;
1305 kmp_mk_free = NULL;
1306 mk_default = NULL;
1307 mk_interleave = NULL;
1308 mk_hbw = NULL;
1309 mk_hbw_interleave = NULL;
1310 mk_hbw_preferred = NULL;
1311 mk_hugetlb = NULL;
1312 mk_hbw_hugetlb = NULL;
1313 mk_hbw_preferred_hugetlb = NULL;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001314#endif
1315}
1316
1317void __kmp_fini_memkind() {
1318#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
Jonathan Peytonebf18302019-04-08 17:59:28 +00001319 if (__kmp_memkind_available)
1320 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001321 if (h_memkind) {
1322 dlclose(h_memkind);
1323 h_memkind = NULL;
1324 }
Jonathan Peytonebf18302019-04-08 17:59:28 +00001325 kmp_mk_check = NULL;
1326 kmp_mk_alloc = NULL;
1327 kmp_mk_free = NULL;
1328 mk_default = NULL;
1329 mk_interleave = NULL;
1330 mk_hbw = NULL;
1331 mk_hbw_interleave = NULL;
1332 mk_hbw_preferred = NULL;
1333 mk_hugetlb = NULL;
1334 mk_hbw_hugetlb = NULL;
1335 mk_hbw_preferred_hugetlb = NULL;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001336#endif
1337}
1338
Jonathan Peytonebf18302019-04-08 17:59:28 +00001339omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1340 int ntraits,
1341 omp_alloctrait_t traits[]) {
1342 // OpenMP 5.0 only allows predefined memspaces
1343 KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1344 ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1345 ms == omp_high_bw_mem_space);
1346 kmp_allocator_t *al;
1347 int i;
1348 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1349 al->memspace = ms; // not used currently
1350 for (i = 0; i < ntraits; ++i) {
1351 switch (traits[i].key) {
1352 case OMP_ATK_THREADMODEL:
1353 case OMP_ATK_ACCESS:
1354 case OMP_ATK_PINNED:
1355 break;
1356 case OMP_ATK_ALIGNMENT:
1357 al->alignment = traits[i].value;
1358 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1359 break;
1360 case OMP_ATK_POOL_SIZE:
1361 al->pool_size = traits[i].value;
1362 break;
1363 case OMP_ATK_FALLBACK:
1364 al->fb = (omp_alloctrait_value_t)traits[i].value;
1365 KMP_DEBUG_ASSERT(
1366 al->fb == OMP_ATV_DEFAULT_MEM_FB || al->fb == OMP_ATV_NULL_FB ||
1367 al->fb == OMP_ATV_ABORT_FB || al->fb == OMP_ATV_ALLOCATOR_FB);
1368 break;
1369 case OMP_ATK_FB_DATA:
1370 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1371 break;
1372 case OMP_ATK_PARTITION:
1373 al->memkind = RCAST(void **, traits[i].value);
1374 break;
1375 default:
1376 KMP_ASSERT2(0, "Unexpected allocator trait");
1377 }
1378 }
1379 if (al->fb == 0) {
1380 // set default allocator
1381 al->fb = OMP_ATV_DEFAULT_MEM_FB;
1382 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1383 } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
1384 KMP_ASSERT(al->fb_data != NULL);
1385 } else if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
1386 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1387 }
1388 if (__kmp_memkind_available) {
1389 // Let's use memkind library if available
1390 if (ms == omp_high_bw_mem_space) {
1391 if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_hbw_interleave) {
1392 al->memkind = mk_hbw_interleave;
1393 } else if (mk_hbw_preferred) {
1394 // AC: do not try to use MEMKIND_HBW for now, because memkind library
1395 // cannot reliably detect exhaustion of HBW memory.
1396 // It could be possible using hbw_verify_memory_region() but memkind
1397 // manual says: "Using this function in production code may result in
1398 // serious performance penalty".
1399 al->memkind = mk_hbw_preferred;
1400 } else {
1401 // HBW is requested but not available --> return NULL allocator
1402 __kmp_free(al);
1403 return omp_null_allocator;
1404 }
1405 } else {
1406 if (al->memkind == (void *)OMP_ATV_INTERLEAVED && mk_interleave) {
1407 al->memkind = mk_interleave;
1408 } else {
1409 al->memkind = mk_default;
1410 }
1411 }
1412 } else {
1413 if (ms == omp_high_bw_mem_space) {
1414 // cannot detect HBW memory presence without memkind library
1415 __kmp_free(al);
1416 return omp_null_allocator;
1417 }
1418 }
1419 return (omp_allocator_handle_t)al;
1420}
1421
1422void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1423 if (allocator > kmp_max_mem_alloc)
1424 __kmp_free(allocator);
1425}
1426
1427void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1428 if (allocator == omp_null_allocator)
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001429 allocator = omp_default_mem_alloc;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001430 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1431}
Jonathan Peytonebf18302019-04-08 17:59:28 +00001432
1433omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001434 return __kmp_threads[gtid]->th.th_def_allocator;
1435}
1436
1437typedef struct kmp_mem_desc { // Memory block descriptor
1438 void *ptr_alloc; // Pointer returned by allocator
1439 size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1440 void *ptr_align; // Pointer to aligned memory, returned
Jonathan Peytonebf18302019-04-08 17:59:28 +00001441 kmp_allocator_t *allocator; // allocator
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001442} kmp_mem_desc_t;
1443static int alignment = sizeof(void *); // let's align to pointer size
1444
Jonathan Peytonebf18302019-04-08 17:59:28 +00001445void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1446 void *ptr = NULL;
1447 kmp_allocator_t *al;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001448 KMP_DEBUG_ASSERT(__kmp_init_serial);
Jonathan Peytonebf18302019-04-08 17:59:28 +00001449 if (allocator == omp_null_allocator)
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001450 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1451
Jonathan Peytonebf18302019-04-08 17:59:28 +00001452 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1453 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
1454
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001455 int sz_desc = sizeof(kmp_mem_desc_t);
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001456 kmp_mem_desc_t desc;
1457 kmp_uintptr_t addr; // address returned by allocator
1458 kmp_uintptr_t addr_align; // address to return to caller
1459 kmp_uintptr_t addr_descr; // address of memory block descriptor
Jonathan Peytonebf18302019-04-08 17:59:28 +00001460 int align = alignment; // default alignment
1461 if (allocator > kmp_max_mem_alloc && al->alignment > 0) {
1462 align = al->alignment; // alignment requested by user
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001463 }
Jonathan Peytonebf18302019-04-08 17:59:28 +00001464 desc.size_a = size + sz_desc + align;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001465
Jonathan Peytonebf18302019-04-08 17:59:28 +00001466 if (__kmp_memkind_available) {
1467 if (allocator < kmp_max_mem_alloc) {
1468 // pre-defined allocator
1469 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1470 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1471 } else {
1472 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1473 }
1474 } else if (al->pool_size > 0) {
1475 // custom allocator with pool size requested
1476 kmp_uint64 used =
1477 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1478 if (used + desc.size_a > al->pool_size) {
1479 // not enough space, need to go fallback path
1480 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1481 if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
1482 al = (kmp_allocator_t *)omp_default_mem_alloc;
1483 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1484 } else if (al->fb == OMP_ATV_ABORT_FB) {
1485 KMP_ASSERT(0); // abort fallback requested
1486 } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
1487 KMP_ASSERT(al != al->fb_data);
1488 al = al->fb_data;
1489 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
1490 } // else ptr == NULL;
1491 } else {
1492 // pool has enough space
1493 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1494 if (ptr == NULL) {
1495 if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
1496 al = (kmp_allocator_t *)omp_default_mem_alloc;
1497 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1498 } else if (al->fb == OMP_ATV_ABORT_FB) {
1499 KMP_ASSERT(0); // abort fallback requested
1500 } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
1501 KMP_ASSERT(al != al->fb_data);
1502 al = al->fb_data;
1503 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
1504 }
1505 }
1506 }
1507 } else {
1508 // custom allocator, pool size not requested
1509 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1510 if (ptr == NULL) {
1511 if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
1512 al = (kmp_allocator_t *)omp_default_mem_alloc;
1513 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1514 } else if (al->fb == OMP_ATV_ABORT_FB) {
1515 KMP_ASSERT(0); // abort fallback requested
1516 } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
1517 KMP_ASSERT(al != al->fb_data);
1518 al = al->fb_data;
1519 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
1520 }
1521 }
1522 }
1523 } else if (allocator < kmp_max_mem_alloc) {
1524 // pre-defined allocator
1525 if (allocator == omp_high_bw_mem_alloc) {
1526 // ptr = NULL;
1527 } else {
1528 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1529 }
1530 } else if (al->pool_size > 0) {
1531 // custom allocator with pool size requested
1532 kmp_uint64 used =
1533 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1534 if (used + desc.size_a > al->pool_size) {
1535 // not enough space, need to go fallback path
1536 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1537 if (al->fb == OMP_ATV_DEFAULT_MEM_FB) {
1538 al = (kmp_allocator_t *)omp_default_mem_alloc;
1539 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1540 } else if (al->fb == OMP_ATV_ABORT_FB) {
1541 KMP_ASSERT(0); // abort fallback requested
1542 } else if (al->fb == OMP_ATV_ALLOCATOR_FB) {
1543 KMP_ASSERT(al != al->fb_data);
1544 al = al->fb_data;
1545 return __kmpc_alloc(gtid, size, (omp_allocator_handle_t)al);
1546 } // else ptr == NULL;
1547 } else {
1548 // pool has enough space
1549 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1550 if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
1551 KMP_ASSERT(0); // abort fallback requested
1552 } // no sense to look for another fallback because of same internal alloc
1553 }
1554 } else {
1555 // custom allocator, pool size not requested
1556 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1557 if (ptr == NULL && al->fb == OMP_ATV_ABORT_FB) {
1558 KMP_ASSERT(0); // abort fallback requested
1559 } // no sense to look for another fallback because of same internal alloc
1560 }
1561 KE_TRACE(10, ("__kmpc_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001562 if (ptr == NULL)
1563 return NULL;
1564
1565 addr = (kmp_uintptr_t)ptr;
Jonathan Peytonebf18302019-04-08 17:59:28 +00001566 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001567 addr_descr = addr_align - sz_desc;
1568
1569 desc.ptr_alloc = ptr;
1570 desc.ptr_align = (void *)addr_align;
Jonathan Peytonebf18302019-04-08 17:59:28 +00001571 desc.allocator = al;
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001572 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1573 KMP_MB();
1574
1575 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", desc.ptr_align, gtid));
1576 return desc.ptr_align;
1577}
1578
Jonathan Peytonebf18302019-04-08 17:59:28 +00001579void __kmpc_free(int gtid, void *ptr, const omp_allocator_handle_t allocator) {
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001580 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1581 if (ptr == NULL)
1582 return;
1583
Jonathan Peytonebf18302019-04-08 17:59:28 +00001584 kmp_allocator_t *al;
1585 omp_allocator_handle_t oal;
1586 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001587 kmp_mem_desc_t desc;
1588 kmp_uintptr_t addr_align; // address to return to caller
1589 kmp_uintptr_t addr_descr; // address of memory block descriptor
1590
1591 addr_align = (kmp_uintptr_t)ptr;
1592 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1593 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1594
1595 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1596 if (allocator) {
Jonathan Peytonebf18302019-04-08 17:59:28 +00001597 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001598 }
Jonathan Peytonebf18302019-04-08 17:59:28 +00001599 al = desc.allocator;
1600 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
1601 KMP_DEBUG_ASSERT(al);
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001602
Jonathan Peytonebf18302019-04-08 17:59:28 +00001603 if (__kmp_memkind_available) {
1604 if (oal < kmp_max_mem_alloc) {
1605 // pre-defined allocator
1606 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1607 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1608 } else {
1609 kmp_mk_free(*mk_default, desc.ptr_alloc);
1610 }
1611 } else {
1612 if (al->pool_size > 0) { // custom allocator with pool size requested
1613 kmp_uint64 used =
1614 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1615 (void)used; // to suppress compiler warning
1616 KMP_DEBUG_ASSERT(used >= desc.size_a);
1617 }
1618 kmp_mk_free(*al->memkind, desc.ptr_alloc);
1619 }
1620 } else {
1621 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1622 kmp_uint64 used =
1623 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1624 (void)used; // to suppress compiler warning
1625 KMP_DEBUG_ASSERT(used >= desc.size_a);
1626 }
1627 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
Jonathan Peyton92ca6182018-09-07 18:25:49 +00001628 }
1629 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, desc.ptr_alloc,
1630 allocator));
1631}
1632
1633#endif
1634
Jonathan Peyton30419822017-05-12 18:01:32 +00001635/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1636 memory leaks, but it may be useful for debugging memory corruptions, used
1637 freed pointers, etc. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001638/* #define LEAK_MEMORY */
Jonathan Peyton30419822017-05-12 18:01:32 +00001639struct kmp_mem_descr { // Memory block descriptor.
1640 void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1641 size_t size_allocated; // Size of allocated memory block.
1642 void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1643 size_t size_aligned; // Size of aligned memory block.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001644};
1645typedef struct kmp_mem_descr kmp_mem_descr_t;
1646
Jonathan Peyton30419822017-05-12 18:01:32 +00001647/* Allocate memory on requested boundary, fill allocated memory with 0x00.
1648 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1649 error. Must use __kmp_free when freeing memory allocated by this routine! */
1650static void *___kmp_allocate_align(size_t size,
1651 size_t alignment KMP_SRC_LOC_DECL) {
1652 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1653 requested to return properly aligned pointer. Original pointer returned
1654 by malloc() and size of allocated block is saved in descriptor just
1655 before the aligned pointer. This information used by __kmp_free() -- it
1656 has to pass to free() original pointer, not aligned one.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001657
Jonathan Peyton30419822017-05-12 18:01:32 +00001658 +---------+------------+-----------------------------------+---------+
1659 | padding | descriptor | aligned block | padding |
1660 +---------+------------+-----------------------------------+---------+
1661 ^ ^
1662 | |
1663 | +- Aligned pointer returned to caller
1664 +- Pointer returned by malloc()
Jim Cownie5e8470a2013-09-27 10:38:44 +00001665
Jonathan Peyton30419822017-05-12 18:01:32 +00001666 Aligned block is filled with zeros, paddings are filled with 0xEF. */
Jim Cownie5e8470a2013-09-27 10:38:44 +00001667
Jonathan Peyton30419822017-05-12 18:01:32 +00001668 kmp_mem_descr_t descr;
1669 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1670 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1671 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001672
Jonathan Peyton30419822017-05-12 18:01:32 +00001673 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1674 (int)size, (int)alignment KMP_SRC_LOC_PARM));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001675
Jonathan Peyton30419822017-05-12 18:01:32 +00001676 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1677 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1678 // Make sure kmp_uintptr_t is enough to store addresses.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001679
Jonathan Peyton30419822017-05-12 18:01:32 +00001680 descr.size_aligned = size;
1681 descr.size_allocated =
1682 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001683
Jonathan Peytonee2f96c2016-03-29 20:10:00 +00001684#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001685 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
Jonathan Peytonee2f96c2016-03-29 20:10:00 +00001686#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001687 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
Jonathan Peytonee2f96c2016-03-29 20:10:00 +00001688#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001689 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1690 descr.ptr_allocated));
1691 if (descr.ptr_allocated == NULL) {
1692 KMP_FATAL(OutOfHeapMemory);
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001693 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001694
Jonathan Peyton30419822017-05-12 18:01:32 +00001695 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1696 addr_aligned =
1697 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1698 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001699
Jonathan Peyton30419822017-05-12 18:01:32 +00001700 descr.ptr_aligned = (void *)addr_aligned;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001701
Jonathan Peyton30419822017-05-12 18:01:32 +00001702 KE_TRACE(26, (" ___kmp_allocate_align: "
1703 "ptr_allocated=%p, size_allocated=%d, "
1704 "ptr_aligned=%p, size_aligned=%d\n",
1705 descr.ptr_allocated, (int)descr.size_allocated,
1706 descr.ptr_aligned, (int)descr.size_aligned));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001707
Jonathan Peyton30419822017-05-12 18:01:32 +00001708 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1709 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1710 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1711 addr_allocated + descr.size_allocated);
1712 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
Jonathan Peytonee2f96c2016-03-29 20:10:00 +00001713#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001714 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1715// Fill allocated memory block with 0xEF.
Jonathan Peytonee2f96c2016-03-29 20:10:00 +00001716#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001717 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1718 // Fill the aligned memory block (which is intended for using by caller) with
1719 // 0x00. Do not
1720 // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1721 // memory. (Padding
1722 // bytes remain filled with 0xEF in debugging library.)
1723 *((kmp_mem_descr_t *)addr_descr) = descr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001724
Jonathan Peyton30419822017-05-12 18:01:32 +00001725 KMP_MB();
Jim Cownie5e8470a2013-09-27 10:38:44 +00001726
Jonathan Peyton30419822017-05-12 18:01:32 +00001727 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1728 return descr.ptr_aligned;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001729} // func ___kmp_allocate_align
1730
Jonathan Peyton30419822017-05-12 18:01:32 +00001731/* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1732 Do not call this func directly! Use __kmp_allocate macro instead.
1733 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1734 error. Must use __kmp_free when freeing memory allocated by this routine! */
1735void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1736 void *ptr;
1737 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
1738 (int)size KMP_SRC_LOC_PARM));
1739 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
1740 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
1741 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001742} // func ___kmp_allocate
1743
Jonathan Peyton30419822017-05-12 18:01:32 +00001744/* Allocate memory on page boundary, fill allocated memory with 0x00.
1745 Does not call this func directly! Use __kmp_page_allocate macro instead.
1746 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1747 error. Must use __kmp_free when freeing memory allocated by this routine! */
1748void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
1749 int page_size = 8 * 1024;
1750 void *ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001751
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
1753 (int)size KMP_SRC_LOC_PARM));
1754 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
1755 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
1756 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001757} // ___kmp_page_allocate
1758
Jonathan Peyton30419822017-05-12 18:01:32 +00001759/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1760 In debug mode, fill the memory block with 0xEF before call to free(). */
1761void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
1762 kmp_mem_descr_t descr;
1763 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1764 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
Jim Cownie5e8470a2013-09-27 10:38:44 +00001765
Jonathan Peyton30419822017-05-12 18:01:32 +00001766 KE_TRACE(25,
1767 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
1768 KMP_ASSERT(ptr != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001769
Jonathan Peyton30419822017-05-12 18:01:32 +00001770 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001771
Jonathan Peyton30419822017-05-12 18:01:32 +00001772 KE_TRACE(26, (" __kmp_free: "
1773 "ptr_allocated=%p, size_allocated=%d, "
1774 "ptr_aligned=%p, size_aligned=%d\n",
1775 descr.ptr_allocated, (int)descr.size_allocated,
1776 descr.ptr_aligned, (int)descr.size_aligned));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001777
Jonathan Peyton30419822017-05-12 18:01:32 +00001778 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1779 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001780
Jonathan Peyton30419822017-05-12 18:01:32 +00001781 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
1782 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
1783 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
1784 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
1785 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1786 addr_allocated + descr.size_allocated);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001787
Jonathan Peyton30419822017-05-12 18:01:32 +00001788#ifdef KMP_DEBUG
1789 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1790// Fill memory block with 0xEF, it helps catch using freed memory.
1791#endif
Jim Cownie5e8470a2013-09-27 10:38:44 +00001792
Jonathan Peyton30419822017-05-12 18:01:32 +00001793#ifndef LEAK_MEMORY
1794 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
1795#ifdef KMP_DEBUG
1796 _free_src_loc(descr.ptr_allocated, _file_, _line_);
1797#else
1798 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
1799#endif
1800#endif
1801 KMP_MB();
1802 KE_TRACE(25, ("<- __kmp_free() returns\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001803} // func ___kmp_free
1804
Jim Cownie5e8470a2013-09-27 10:38:44 +00001805#if USE_FAST_MEMORY == 3
1806// Allocate fast memory by first scanning the thread's free lists
1807// If a chunk the right size exists, grab it off the free list.
1808// Otherwise allocate normally using kmp_thread_malloc.
1809
1810// AC: How to choose the limit? Just get 16 for now...
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001811#define KMP_FREE_LIST_LIMIT 16
Jim Cownie5e8470a2013-09-27 10:38:44 +00001812
1813// Always use 128 bytes for determining buckets for caching memory blocks
Jonathan Peyton30419822017-05-12 18:01:32 +00001814#define DCACHE_LINE 128
Jim Cownie5e8470a2013-09-27 10:38:44 +00001815
Jonathan Peyton30419822017-05-12 18:01:32 +00001816void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
1817 void *ptr;
1818 int num_lines;
1819 int idx;
1820 int index;
1821 void *alloc_ptr;
1822 size_t alloc_size;
1823 kmp_mem_descr_t *descr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001824
Jonathan Peyton30419822017-05-12 18:01:32 +00001825 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1826 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001827
Jonathan Peyton30419822017-05-12 18:01:32 +00001828 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
1829 idx = num_lines - 1;
1830 KMP_DEBUG_ASSERT(idx >= 0);
1831 if (idx < 2) {
1832 index = 0; // idx is [ 0, 1 ], use first free list
1833 num_lines = 2; // 1, 2 cache lines or less than cache line
1834 } else if ((idx >>= 2) == 0) {
1835 index = 1; // idx is [ 2, 3 ], use second free list
1836 num_lines = 4; // 3, 4 cache lines
1837 } else if ((idx >>= 2) == 0) {
1838 index = 2; // idx is [ 4, 15 ], use third free list
1839 num_lines = 16; // 5, 6, ..., 16 cache lines
1840 } else if ((idx >>= 2) == 0) {
1841 index = 3; // idx is [ 16, 63 ], use fourth free list
1842 num_lines = 64; // 17, 18, ..., 64 cache lines
1843 } else {
1844 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1845 }
1846
1847 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1848 if (ptr != NULL) {
1849 // pop the head of no-sync free list
1850 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1851 KMP_DEBUG_ASSERT(
1852 this_thr ==
1853 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1854 ->ptr_aligned);
1855 goto end;
Jonathan Peytonbd3a7632017-09-27 20:36:27 +00001856 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001857 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
1858 if (ptr != NULL) {
1859 // no-sync free list is empty, use sync free list (filled in by other
1860 // threads only)
1861 // pop the head of the sync free list, push NULL instead
1862 while (!KMP_COMPARE_AND_STORE_PTR(
Andrey Churbanov5ba90c72017-07-17 09:03:14 +00001863 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001864 KMP_CPU_PAUSE();
1865 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001866 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001867 // push the rest of chain into no-sync free list (can be NULL if there was
1868 // the only block)
1869 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1870 KMP_DEBUG_ASSERT(
1871 this_thr ==
1872 ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)))
1873 ->ptr_aligned);
1874 goto end;
1875 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00001876
Jonathan Peyton30419822017-05-12 18:01:32 +00001877alloc_call:
1878 // haven't found block in the free lists, thus allocate it
1879 size = num_lines * DCACHE_LINE;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001880
Jonathan Peyton30419822017-05-12 18:01:32 +00001881 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
1882 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
1883 "alloc_size %d\n",
1884 __kmp_gtid_from_thread(this_thr), alloc_size));
1885 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001886
Jonathan Peyton30419822017-05-12 18:01:32 +00001887 // align ptr to DCACHE_LINE
1888 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
1889 DCACHE_LINE) &
1890 ~(DCACHE_LINE - 1));
1891 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001892
Jonathan Peyton30419822017-05-12 18:01:32 +00001893 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1894 // we don't need size_allocated
1895 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1896 // (it is already saved in bget buffer,
1897 // but we may want to use another allocator in future)
1898 descr->size_aligned = size;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001899
Jonathan Peyton30419822017-05-12 18:01:32 +00001900end:
1901 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
1902 __kmp_gtid_from_thread(this_thr), ptr));
1903 return ptr;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001904} // func __kmp_fast_allocate
1905
1906// Free fast memory and place it on the thread's free list if it is of
1907// the correct size.
Jonathan Peyton30419822017-05-12 18:01:32 +00001908void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
1909 kmp_mem_descr_t *descr;
1910 kmp_info_t *alloc_thr;
1911 size_t size;
1912 size_t idx;
1913 int index;
Jim Cownie5e8470a2013-09-27 10:38:44 +00001914
Jonathan Peyton30419822017-05-12 18:01:32 +00001915 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1916 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
1917 KMP_ASSERT(ptr != NULL);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001918
Jonathan Peyton30419822017-05-12 18:01:32 +00001919 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001920
Jonathan Peyton30419822017-05-12 18:01:32 +00001921 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1922 (int)descr->size_aligned));
Jim Cownie5e8470a2013-09-27 10:38:44 +00001923
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
Jim Cownie5e8470a2013-09-27 10:38:44 +00001925
Jonathan Peyton30419822017-05-12 18:01:32 +00001926 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1927 if (idx == size) {
1928 index = 0; // 2 cache lines
1929 } else if ((idx <<= 1) == size) {
1930 index = 1; // 4 cache lines
1931 } else if ((idx <<= 2) == size) {
1932 index = 2; // 16 cache lines
1933 } else if ((idx <<= 2) == size) {
1934 index = 3; // 64 cache lines
1935 } else {
1936 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
1937 goto free_call; // 65 or more cache lines ( > 8KB )
1938 }
1939
1940 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1941 if (alloc_thr == this_thr) {
1942 // push block to self no-sync free list, linking previous head (LIFO)
1943 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1944 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1945 } else {
1946 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
1947 if (head == NULL) {
1948 // Create new free list
1949 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1950 *((void **)ptr) = NULL; // mark the tail of the list
1951 descr->size_allocated = (size_t)1; // head of the list keeps its length
Jim Cownie5e8470a2013-09-27 10:38:44 +00001952 } else {
Jonathan Peyton30419822017-05-12 18:01:32 +00001953 // need to check existed "other" list's owner thread and size of queue
1954 kmp_mem_descr_t *dsc =
1955 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
1956 // allocating thread, same for all queue nodes
1957 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
1958 size_t q_sz =
1959 dsc->size_allocated + 1; // new size in case we add current task
1960 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
1961 // we can add current task to "other" list, no sync needed
1962 *((void **)ptr) = head;
1963 descr->size_allocated = q_sz;
1964 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1965 } else {
1966 // either queue blocks owner is changing or size limit exceeded
1967 // return old queue to allocating thread (q_th) synchroneously,
1968 // and start new list for alloc_thr's tasks
1969 void *old_ptr;
1970 void *tail = head;
1971 void *next = *((void **)head);
1972 while (next != NULL) {
1973 KMP_DEBUG_ASSERT(
1974 // queue size should decrease by 1 each step through the list
1975 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
1976 ->size_allocated +
1977 1 ==
1978 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
1979 ->size_allocated);
1980 tail = next; // remember tail node
1981 next = *((void **)next);
Jim Cownie5e8470a2013-09-27 10:38:44 +00001982 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001983 KMP_DEBUG_ASSERT(q_th != NULL);
1984 // push block to owner's sync free list
1985 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1986 /* the next pointer must be set before setting free_list to ptr to avoid
1987 exposing a broken list to other threads, even for an instant. */
1988 *((void **)tail) = old_ptr;
1989
1990 while (!KMP_COMPARE_AND_STORE_PTR(
1991 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
1992 KMP_CPU_PAUSE();
1993 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
1994 *((void **)tail) = old_ptr;
1995 }
1996
1997 // start new list of not-selt tasks
1998 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1999 *((void **)ptr) = NULL;
2000 descr->size_allocated = (size_t)1; // head of queue keeps its length
2001 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002002 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002003 }
2004 goto end;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002005
Jonathan Peyton30419822017-05-12 18:01:32 +00002006free_call:
2007 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2008 __kmp_gtid_from_thread(this_thr), size));
2009 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2010 brel(this_thr, descr->ptr_allocated);
Jim Cownie5e8470a2013-09-27 10:38:44 +00002011
Jonathan Peyton30419822017-05-12 18:01:32 +00002012end:
2013 KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002014
2015} // func __kmp_fast_free
2016
Jim Cownie5e8470a2013-09-27 10:38:44 +00002017// Initialize the thread free lists related to fast memory
2018// Only do this when a thread is initially created.
Jonathan Peyton30419822017-05-12 18:01:32 +00002019void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2020 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002021
Jonathan Peyton30419822017-05-12 18:01:32 +00002022 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002023}
2024
2025// Free the memory in the thread free lists related to fast memory
2026// Only do this when a thread is being reaped (destroyed).
Jonathan Peyton30419822017-05-12 18:01:32 +00002027void __kmp_free_fast_memory(kmp_info_t *th) {
2028 // Suppose we use BGET underlying allocator, walk through its structures...
2029 int bin;
2030 thr_data_t *thr = get_thr_data(th);
2031 void **lst = NULL;
Jim Cownie5e8470a2013-09-27 10:38:44 +00002032
Jonathan Peyton30419822017-05-12 18:01:32 +00002033 KE_TRACE(
2034 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002035
Jonathan Peyton30419822017-05-12 18:01:32 +00002036 __kmp_bget_dequeue(th); // Release any queued buffers
Jim Cownie5e8470a2013-09-27 10:38:44 +00002037
Jonathan Peyton30419822017-05-12 18:01:32 +00002038 // Dig through free lists and extract all allocated blocks
2039 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2040 bfhead_t *b = thr->freelist[bin].ql.flink;
2041 while (b != &thr->freelist[bin]) {
2042 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2043 *((void **)b) =
2044 lst; // link the list (override bthr, but keep flink yet)
2045 lst = (void **)b; // push b into lst
2046 }
2047 b = b->ql.flink; // get next buffer
Jim Cownie5e8470a2013-09-27 10:38:44 +00002048 }
Jonathan Peyton30419822017-05-12 18:01:32 +00002049 }
2050 while (lst != NULL) {
2051 void *next = *lst;
2052 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2053 lst, next, th, __kmp_gtid_from_thread(th)));
2054 (*thr->relfcn)(lst);
2055#if BufStats
2056 // count blocks to prevent problems in __kmp_finalize_bget()
2057 thr->numprel++; /* Nr of expansion block releases */
2058 thr->numpblk--; /* Total number of blocks */
2059#endif
2060 lst = (void **)next;
2061 }
Jim Cownie5e8470a2013-09-27 10:38:44 +00002062
Jonathan Peyton30419822017-05-12 18:01:32 +00002063 KE_TRACE(
2064 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
Jim Cownie5e8470a2013-09-27 10:38:44 +00002065}
2066
2067#endif // USE_FAST_MEMORY