blob: 91b26941b83a38ac8ff3518717b471306d7abfc8 [file] [log] [blame]
Alistair Veitch85afe712016-02-02 17:58:15 -08001/*
2 *
3 * Copyright 2015-2016, Google Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above
13 * copyright notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with the
15 * distribution.
16 * * Neither the name of Google Inc. nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *
32 */
33
Alistair Veitch532519a2016-02-11 09:17:12 -080034// Implements an efficient in-memory log, optimized for multiple writers and
35// a single reader. Available log space is divided up in blocks of
36// CENSUS_LOG_2_MAX_RECORD_SIZE bytes. A block can be in one of the following
37// three data structures:
38// - Free blocks (free_block_list)
39// - Blocks with unread data (dirty_block_list)
40// - Blocks currently attached to cores (core_local_blocks[])
41//
42// census_log_start_write() moves a block from core_local_blocks[] to the end of
43// dirty_block_list when block:
44// - is out-of-space OR
45// - has an incomplete record (an incomplete record occurs when a thread calls
46// census_log_start_write() and is context-switched before calling
47// census_log_end_write()
48// So, blocks in dirty_block_list are ordered, from oldest to newest, by the
49// time when block is detached from the core.
50//
51// census_log_read_next() first iterates over dirty_block_list and then
52// core_local_blocks[]. It moves completely read blocks from dirty_block_list
53// to free_block_list. Blocks in core_local_blocks[] are not freed, even when
54// completely read.
55//
56// If the log is configured to discard old records and free_block_list is empty,
57// census_log_start_write() iterates over dirty_block_list to allocate a
58// new block. It moves the oldest available block (no pending read/write) to
59// core_local_blocks[].
60//
61// core_local_block_struct is used to implement a map from core id to the block
62// associated with that core. This mapping is advisory. It is possible that the
63// block returned by this mapping is no longer associated with that core. This
64// mapping is updated, lazily, by census_log_start_write().
65//
66// Locking in block struct:
67//
68// Exclusive g_log.lock must be held before calling any functions operating on
69// block structs except census_log_start_write() and census_log_end_write().
70//
71// Writes to a block are serialized via writer_lock. census_log_start_write()
72// acquires this lock and census_log_end_write() releases it. On failure to
73// acquire the lock, writer allocates a new block for the current core and
74// updates core_local_block accordingly.
75//
76// Simultaneous read and write access is allowed. Readers can safely read up to
77// committed bytes (bytes_committed).
78//
79// reader_lock protects the block, currently being read, from getting recycled.
80// start_read() acquires reader_lock and end_read() releases the lock.
81//
82// Read/write access to a block is disabled via try_disable_access(). It returns
83// with both writer_lock and reader_lock held. These locks are subsequently
84// released by enable_access() to enable access to the block.
85//
86// A note on naming: Most function/struct names are prepended by cl_
87// (shorthand for census_log). Further, functions that manipulate structures
88// include the name of the structure, which will be passed as the first
89// argument. E.g. cl_block_initialize() will initialize a cl_block.
Alistair Veitch85afe712016-02-02 17:58:15 -080090
91#include "src/core/census/log.h"
92#include <grpc/support/alloc.h>
93#include <grpc/support/atm.h>
94#include <grpc/support/cpu.h>
95#include <grpc/support/log.h>
96#include <grpc/support/sync.h>
97#include <grpc/support/useful.h>
98#include <stdbool.h>
99#include <string.h>
100
Alistair Veitch532519a2016-02-11 09:17:12 -0800101// End of platform specific code
Alistair Veitch85afe712016-02-02 17:58:15 -0800102
103typedef struct census_log_block_list_struct {
104 struct census_log_block_list_struct* next;
105 struct census_log_block_list_struct* prev;
106 struct census_log_block* block;
107} cl_block_list_struct;
108
109typedef struct census_log_block {
Alistair Veitch532519a2016-02-11 09:17:12 -0800110 // Pointer to underlying buffer.
Alistair Veitch85afe712016-02-02 17:58:15 -0800111 char* buffer;
112 gpr_atm writer_lock;
113 gpr_atm reader_lock;
Alistair Veitch532519a2016-02-11 09:17:12 -0800114 // Keeps completely written bytes. Declared atomic because accessed
115 // simultaneously by reader and writer.
Alistair Veitch85afe712016-02-02 17:58:15 -0800116 gpr_atm bytes_committed;
Alistair Veitch532519a2016-02-11 09:17:12 -0800117 // Bytes already read.
Alistair Veitch85afe712016-02-02 17:58:15 -0800118 size_t bytes_read;
Alistair Veitch532519a2016-02-11 09:17:12 -0800119 // Links for list.
Alistair Veitch85afe712016-02-02 17:58:15 -0800120 cl_block_list_struct link;
Alistair Veitch532519a2016-02-11 09:17:12 -0800121// We want this structure to be cacheline aligned. We assume the following
122// sizes for the various parts on 32/64bit systems:
123// type 32b size 64b size
124// char* 4 8
125// 3x gpr_atm 12 24
126// size_t 4 8
127// cl_block_list_struct 12 24
128// TOTAL 32 64
129//
130// Depending on the size of our cacheline and the architecture, we
131// selectively add char buffering to this structure. The size is checked
132// via assert in census_log_initialize().
Alistair Veitch85afe712016-02-02 17:58:15 -0800133#if defined(GPR_ARCH_64)
134#define CL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 64)
135#else
136#if defined(GPR_ARCH_32)
137#define CL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 32)
138#else
139#error "Unknown architecture"
140#endif
141#endif
142#if CL_BLOCK_PAD_SIZE > 0
143 char padding[CL_BLOCK_PAD_SIZE];
144#endif
145} cl_block;
146
Alistair Veitch532519a2016-02-11 09:17:12 -0800147// A list of cl_blocks, doubly-linked through cl_block::link.
Alistair Veitch85afe712016-02-02 17:58:15 -0800148typedef struct census_log_block_list {
Alistair Veitch532519a2016-02-11 09:17:12 -0800149 int32_t count; // Number of items in list.
150 cl_block_list_struct ht; // head/tail of linked list.
Alistair Veitch85afe712016-02-02 17:58:15 -0800151} cl_block_list;
152
Alistair Veitch532519a2016-02-11 09:17:12 -0800153// Cacheline aligned block pointers to avoid false sharing. Block pointer must
154// be initialized via set_block(), before calling other functions
Alistair Veitch85afe712016-02-02 17:58:15 -0800155typedef struct census_log_core_local_block {
156 gpr_atm block;
Alistair Veitch532519a2016-02-11 09:17:12 -0800157// Ensure cachline alignment: we assume sizeof(gpr_atm) == 4 or 8
Alistair Veitch85afe712016-02-02 17:58:15 -0800158#if defined(GPR_ARCH_64)
159#define CL_CORE_LOCAL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 8)
160#else
161#if defined(GPR_ARCH_32)
162#define CL_CORE_LOCAL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 4)
163#else
164#error "Unknown architecture"
165#endif
166#endif
167#if CL_CORE_LOCAL_BLOCK_PAD_SIZE > 0
168 char padding[CL_CORE_LOCAL_BLOCK_PAD_SIZE];
169#endif
170} cl_core_local_block;
171
172struct census_log {
173 int discard_old_records;
Alistair Veitch532519a2016-02-11 09:17:12 -0800174 // Number of cores (aka hardware-contexts)
Alistair Veitch85afe712016-02-02 17:58:15 -0800175 unsigned num_cores;
Alistair Veitch532519a2016-02-11 09:17:12 -0800176 // number of CENSUS_LOG_2_MAX_RECORD_SIZE blocks in log
Alistair Veitch85afe712016-02-02 17:58:15 -0800177 uint32_t num_blocks;
Alistair Veitch532519a2016-02-11 09:17:12 -0800178 cl_block* blocks; // Block metadata.
179 cl_core_local_block* core_local_blocks; // Keeps core to block mappings.
Alistair Veitch85afe712016-02-02 17:58:15 -0800180 gpr_mu lock;
Alistair Veitch532519a2016-02-11 09:17:12 -0800181 int initialized; // has log been initialized?
182 // Keeps the state of the reader iterator. A value of 0 indicates that
183 // iterator has reached the end. census_log_init_reader() resets the value
184 // to num_core to restart iteration.
Alistair Veitch85afe712016-02-02 17:58:15 -0800185 uint32_t read_iterator_state;
Alistair Veitch532519a2016-02-11 09:17:12 -0800186 // Points to the block being read. If non-NULL, the block is locked for
187 // reading(block_being_read_->reader_lock is held).
Alistair Veitch85afe712016-02-02 17:58:15 -0800188 cl_block* block_being_read;
Alistair Veitch85afe712016-02-02 17:58:15 -0800189 char* buffer;
190 cl_block_list free_block_list;
191 cl_block_list dirty_block_list;
192 gpr_atm out_of_space_count;
193};
194
Alistair Veitch532519a2016-02-11 09:17:12 -0800195// Single internal log.
Alistair Veitch85afe712016-02-02 17:58:15 -0800196static struct census_log g_log;
197
Alistair Veitch532519a2016-02-11 09:17:12 -0800198// Functions that operate on an atomic memory location used as a lock.
Alistair Veitch85afe712016-02-02 17:58:15 -0800199
Alistair Veitch532519a2016-02-11 09:17:12 -0800200// Returns non-zero if lock is acquired.
Alistair Veitch85afe712016-02-02 17:58:15 -0800201static int cl_try_lock(gpr_atm* lock) { return gpr_atm_acq_cas(lock, 0, 1); }
202
203static void cl_unlock(gpr_atm* lock) { gpr_atm_rel_store(lock, 0); }
204
Alistair Veitch532519a2016-02-11 09:17:12 -0800205// Functions that operate on cl_core_local_block's.
Alistair Veitch85afe712016-02-02 17:58:15 -0800206
207static void cl_core_local_block_set_block(cl_core_local_block* clb,
208 cl_block* block) {
209 gpr_atm_rel_store(&clb->block, (gpr_atm)block);
210}
211
212static cl_block* cl_core_local_block_get_block(cl_core_local_block* clb) {
213 return (cl_block*)gpr_atm_acq_load(&clb->block);
214}
215
Alistair Veitch532519a2016-02-11 09:17:12 -0800216// Functions that operate on cl_block_list_struct's.
Alistair Veitch85afe712016-02-02 17:58:15 -0800217
218static void cl_block_list_struct_initialize(cl_block_list_struct* bls,
219 cl_block* block) {
220 bls->next = bls->prev = bls;
221 bls->block = block;
222}
223
Alistair Veitch532519a2016-02-11 09:17:12 -0800224// Functions that operate on cl_block_list's.
Alistair Veitch85afe712016-02-02 17:58:15 -0800225
226static void cl_block_list_initialize(cl_block_list* list) {
227 list->count = 0;
228 cl_block_list_struct_initialize(&list->ht, NULL);
229}
230
Alistair Veitch532519a2016-02-11 09:17:12 -0800231// Returns head of *this, or NULL if empty.
Alistair Veitch85afe712016-02-02 17:58:15 -0800232static cl_block* cl_block_list_head(cl_block_list* list) {
233 return list->ht.next->block;
234}
235
Alistair Veitch532519a2016-02-11 09:17:12 -0800236// Insert element *e after *pos.
Alistair Veitch85afe712016-02-02 17:58:15 -0800237static void cl_block_list_insert(cl_block_list* list, cl_block_list_struct* pos,
238 cl_block_list_struct* e) {
239 list->count++;
240 e->next = pos->next;
241 e->prev = pos;
242 e->next->prev = e;
243 e->prev->next = e;
244}
245
Alistair Veitch532519a2016-02-11 09:17:12 -0800246// Insert block at the head of the list
Alistair Veitch85afe712016-02-02 17:58:15 -0800247static void cl_block_list_insert_at_head(cl_block_list* list, cl_block* block) {
248 cl_block_list_insert(list, &list->ht, &block->link);
249}
250
Alistair Veitch532519a2016-02-11 09:17:12 -0800251// Insert block at the tail of the list.
Alistair Veitch85afe712016-02-02 17:58:15 -0800252static void cl_block_list_insert_at_tail(cl_block_list* list, cl_block* block) {
253 cl_block_list_insert(list, list->ht.prev, &block->link);
254}
255
Alistair Veitch532519a2016-02-11 09:17:12 -0800256// Removes block *b. Requires *b be in the list.
Alistair Veitch85afe712016-02-02 17:58:15 -0800257static void cl_block_list_remove(cl_block_list* list, cl_block* b) {
258 list->count--;
259 b->link.next->prev = b->link.prev;
260 b->link.prev->next = b->link.next;
261}
262
Alistair Veitch532519a2016-02-11 09:17:12 -0800263// Functions that operate on cl_block's
Alistair Veitch85afe712016-02-02 17:58:15 -0800264
265static void cl_block_initialize(cl_block* block, char* buffer) {
266 block->buffer = buffer;
267 gpr_atm_rel_store(&block->writer_lock, 0);
268 gpr_atm_rel_store(&block->reader_lock, 0);
269 gpr_atm_rel_store(&block->bytes_committed, 0);
270 block->bytes_read = 0;
271 cl_block_list_struct_initialize(&block->link, block);
272}
273
Alistair Veitch532519a2016-02-11 09:17:12 -0800274// Guards against exposing partially written buffer to the reader.
Alistair Veitch85afe712016-02-02 17:58:15 -0800275static void cl_block_set_bytes_committed(cl_block* block,
276 size_t bytes_committed) {
277 gpr_atm_rel_store(&block->bytes_committed, (gpr_atm)bytes_committed);
278}
279
280static size_t cl_block_get_bytes_committed(cl_block* block) {
281 return (size_t)gpr_atm_acq_load(&block->bytes_committed);
282}
283
Alistair Veitch532519a2016-02-11 09:17:12 -0800284// Tries to disable future read/write access to this block. Succeeds if:
285// - no in-progress write AND
286// - no in-progress read AND
287// - 'discard_data' set to true OR no unread data
288// On success, clears the block state and returns with writer_lock_ and
289// reader_lock_ held. These locks are released by a subsequent
290// cl_block_access_enable() call.
Alistair Veitch85afe712016-02-02 17:58:15 -0800291static bool cl_block_try_disable_access(cl_block* block, int discard_data) {
292 if (!cl_try_lock(&block->writer_lock)) {
293 return false;
294 }
295 if (!cl_try_lock(&block->reader_lock)) {
296 cl_unlock(&block->writer_lock);
297 return false;
298 }
299 if (!discard_data &&
300 (block->bytes_read != cl_block_get_bytes_committed(block))) {
301 cl_unlock(&block->reader_lock);
302 cl_unlock(&block->writer_lock);
303 return false;
304 }
305 cl_block_set_bytes_committed(block, 0);
306 block->bytes_read = 0;
307 return true;
308}
309
310static void cl_block_enable_access(cl_block* block) {
311 cl_unlock(&block->reader_lock);
312 cl_unlock(&block->writer_lock);
313}
314
Alistair Veitch532519a2016-02-11 09:17:12 -0800315// Returns with writer_lock held.
Alistair Veitch85afe712016-02-02 17:58:15 -0800316static void* cl_block_start_write(cl_block* block, size_t size) {
Alistair Veitch85afe712016-02-02 17:58:15 -0800317 if (!cl_try_lock(&block->writer_lock)) {
318 return NULL;
319 }
Alistair Veitch532519a2016-02-11 09:17:12 -0800320 size_t bytes_committed = cl_block_get_bytes_committed(block);
Alistair Veitch85afe712016-02-02 17:58:15 -0800321 if (bytes_committed + size > CENSUS_LOG_MAX_RECORD_SIZE) {
322 cl_unlock(&block->writer_lock);
323 return NULL;
324 }
325 return block->buffer + bytes_committed;
326}
327
Alistair Veitch532519a2016-02-11 09:17:12 -0800328// Releases writer_lock and increments committed bytes by 'bytes_written'.
329// 'bytes_written' must be <= 'size' specified in the corresponding
330// StartWrite() call. This function is thread-safe.
Alistair Veitch85afe712016-02-02 17:58:15 -0800331static void cl_block_end_write(cl_block* block, size_t bytes_written) {
332 cl_block_set_bytes_committed(
333 block, cl_block_get_bytes_committed(block) + bytes_written);
334 cl_unlock(&block->writer_lock);
335}
336
Alistair Veitch532519a2016-02-11 09:17:12 -0800337// Returns a pointer to the first unread byte in buffer. The number of bytes
338// available are returned in 'bytes_available'. Acquires reader lock that is
339// released by a subsequent cl_block_end_read() call. Returns NULL if:
340// - read in progress
341// - no data available
Alistair Veitch85afe712016-02-02 17:58:15 -0800342static void* cl_block_start_read(cl_block* block, size_t* bytes_available) {
343 if (!cl_try_lock(&block->reader_lock)) {
344 return NULL;
345 }
Alistair Veitch532519a2016-02-11 09:17:12 -0800346 // bytes_committed may change from under us. Use bytes_available to update
347 // bytes_read below.
Alistair Veitch85afe712016-02-02 17:58:15 -0800348 size_t bytes_committed = cl_block_get_bytes_committed(block);
349 GPR_ASSERT(bytes_committed >= block->bytes_read);
350 *bytes_available = bytes_committed - block->bytes_read;
351 if (*bytes_available == 0) {
352 cl_unlock(&block->reader_lock);
353 return NULL;
354 }
355 void* record = block->buffer + block->bytes_read;
356 block->bytes_read += *bytes_available;
357 return record;
358}
359
360static void cl_block_end_read(cl_block* block) {
361 cl_unlock(&block->reader_lock);
362}
363
Alistair Veitch532519a2016-02-11 09:17:12 -0800364// Internal functions operating on g_log
Alistair Veitch85afe712016-02-02 17:58:15 -0800365
Alistair Veitch532519a2016-02-11 09:17:12 -0800366// Allocates a new free block (or recycles an available dirty block if log is
367// configured to discard old records). Returns NULL if out-of-space.
Alistair Veitch85afe712016-02-02 17:58:15 -0800368static cl_block* cl_allocate_block(void) {
369 cl_block* block = cl_block_list_head(&g_log.free_block_list);
370 if (block != NULL) {
371 cl_block_list_remove(&g_log.free_block_list, block);
372 return block;
373 }
374 if (!g_log.discard_old_records) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800375 // No free block and log is configured to keep old records.
Alistair Veitch85afe712016-02-02 17:58:15 -0800376 return NULL;
377 }
Alistair Veitch532519a2016-02-11 09:17:12 -0800378 // Recycle dirty block. Start from the oldest.
Alistair Veitch85afe712016-02-02 17:58:15 -0800379 for (block = cl_block_list_head(&g_log.dirty_block_list); block != NULL;
380 block = block->link.next->block) {
381 if (cl_block_try_disable_access(block, 1 /* discard data */)) {
382 cl_block_list_remove(&g_log.dirty_block_list, block);
383 return block;
384 }
385 }
386 return NULL;
387}
388
Alistair Veitch532519a2016-02-11 09:17:12 -0800389// Allocates a new block and updates core id => block mapping. 'old_block'
390// points to the block that the caller thinks is attached to
391// 'core_id'. 'old_block' may be NULL. Returns true if:
392// - allocated a new block OR
393// - 'core_id' => 'old_block' mapping changed (another thread allocated a
394// block before lock was acquired).
Alistair Veitch85afe712016-02-02 17:58:15 -0800395static bool cl_allocate_core_local_block(uint32_t core_id,
396 cl_block* old_block) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800397 // Now that we have the lock, check if core-local mapping has changed.
Alistair Veitch85afe712016-02-02 17:58:15 -0800398 cl_core_local_block* core_local_block = &g_log.core_local_blocks[core_id];
399 cl_block* block = cl_core_local_block_get_block(core_local_block);
400 if ((block != NULL) && (block != old_block)) {
401 return true;
402 }
403 if (block != NULL) {
404 cl_core_local_block_set_block(core_local_block, NULL);
405 cl_block_list_insert_at_tail(&g_log.dirty_block_list, block);
406 }
407 block = cl_allocate_block();
408 if (block == NULL) {
Alistair Veitch85afe712016-02-02 17:58:15 -0800409 return false;
410 }
411 cl_core_local_block_set_block(core_local_block, block);
412 cl_block_enable_access(block);
413 return true;
414}
415
416static cl_block* cl_get_block(void* record) {
417 uintptr_t p = (uintptr_t)((char*)record - g_log.buffer);
418 uintptr_t index = p >> CENSUS_LOG_2_MAX_RECORD_SIZE;
419 return &g_log.blocks[index];
420}
421
Alistair Veitch532519a2016-02-11 09:17:12 -0800422// Gets the next block to read and tries to free 'prev' block (if not NULL).
423// Returns NULL if reached the end.
Alistair Veitch85afe712016-02-02 17:58:15 -0800424static cl_block* cl_next_block_to_read(cl_block* prev) {
425 cl_block* block = NULL;
426 if (g_log.read_iterator_state == g_log.num_cores) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800427 // We are traversing dirty list; find the next dirty block.
Alistair Veitch85afe712016-02-02 17:58:15 -0800428 if (prev != NULL) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800429 // Try to free the previous block if there is no unread data. This
430 // block
431 // may have unread data if previously incomplete record completed
432 // between
433 // read_next() calls.
Alistair Veitch85afe712016-02-02 17:58:15 -0800434 block = prev->link.next->block;
435 if (cl_block_try_disable_access(prev, 0 /* do not discard data */)) {
436 cl_block_list_remove(&g_log.dirty_block_list, prev);
437 cl_block_list_insert_at_head(&g_log.free_block_list, prev);
Alistair Veitch85afe712016-02-02 17:58:15 -0800438 }
439 } else {
440 block = cl_block_list_head(&g_log.dirty_block_list);
441 }
442 if (block != NULL) {
443 return block;
444 }
Alistair Veitch532519a2016-02-11 09:17:12 -0800445 // We are done with the dirty list; moving on to core-local blocks.
Alistair Veitch85afe712016-02-02 17:58:15 -0800446 }
447 while (g_log.read_iterator_state > 0) {
448 g_log.read_iterator_state--;
449 block = cl_core_local_block_get_block(
450 &g_log.core_local_blocks[g_log.read_iterator_state]);
451 if (block != NULL) {
452 return block;
453 }
454 }
455 return NULL;
456}
457
Alistair Veitch532519a2016-02-11 09:17:12 -0800458#define CL_LOG_2_MB 20 // 2^20 = 1MB
Alistair Veitch85afe712016-02-02 17:58:15 -0800459
Alistair Veitch532519a2016-02-11 09:17:12 -0800460// External functions: primary stats_log interface
Alistair Veitch85afe712016-02-02 17:58:15 -0800461void census_log_initialize(size_t size_in_mb, int discard_old_records) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800462 // Check cacheline alignment.
Alistair Veitch85afe712016-02-02 17:58:15 -0800463 GPR_ASSERT(sizeof(cl_block) % GPR_CACHELINE_SIZE == 0);
464 GPR_ASSERT(sizeof(cl_core_local_block) % GPR_CACHELINE_SIZE == 0);
465 GPR_ASSERT(!g_log.initialized);
466 g_log.discard_old_records = discard_old_records;
467 g_log.num_cores = gpr_cpu_num_cores();
Alistair Veitch532519a2016-02-11 09:17:12 -0800468 // Ensure that we will not get any overflow in calaculating num_blocks
Alistair Veitch85afe712016-02-02 17:58:15 -0800469 GPR_ASSERT(CL_LOG_2_MB >= CENSUS_LOG_2_MAX_RECORD_SIZE);
470 GPR_ASSERT(size_in_mb < 1000);
Alistair Veitch532519a2016-02-11 09:17:12 -0800471 // Ensure at least 2x as many blocks as there are cores.
Alistair Veitch85afe712016-02-02 17:58:15 -0800472 g_log.num_blocks =
Alistair Veitch532519a2016-02-11 09:17:12 -0800473 (uint32_t)GPR_MAX(2 * g_log.num_cores, (size_in_mb << CL_LOG_2_MB) >>
474 CENSUS_LOG_2_MAX_RECORD_SIZE);
Alistair Veitch85afe712016-02-02 17:58:15 -0800475 gpr_mu_init(&g_log.lock);
476 g_log.read_iterator_state = 0;
477 g_log.block_being_read = NULL;
Alistair Veitch85afe712016-02-02 17:58:15 -0800478 g_log.core_local_blocks = (cl_core_local_block*)gpr_malloc_aligned(
479 g_log.num_cores * sizeof(cl_core_local_block), GPR_CACHELINE_SIZE_LOG);
480 memset(g_log.core_local_blocks, 0,
481 g_log.num_cores * sizeof(cl_core_local_block));
482 g_log.blocks = (cl_block*)gpr_malloc_aligned(
483 g_log.num_blocks * sizeof(cl_block), GPR_CACHELINE_SIZE_LOG);
484 memset(g_log.blocks, 0, g_log.num_blocks * sizeof(cl_block));
485 g_log.buffer = gpr_malloc(g_log.num_blocks * CENSUS_LOG_MAX_RECORD_SIZE);
486 memset(g_log.buffer, 0, g_log.num_blocks * CENSUS_LOG_MAX_RECORD_SIZE);
487 cl_block_list_initialize(&g_log.free_block_list);
488 cl_block_list_initialize(&g_log.dirty_block_list);
489 for (uint32_t i = 0; i < g_log.num_blocks; ++i) {
490 cl_block* block = g_log.blocks + i;
491 cl_block_initialize(block, g_log.buffer + (CENSUS_LOG_MAX_RECORD_SIZE * i));
492 cl_block_try_disable_access(block, 1 /* discard data */);
493 cl_block_list_insert_at_tail(&g_log.free_block_list, block);
494 }
495 gpr_atm_rel_store(&g_log.out_of_space_count, 0);
496 g_log.initialized = 1;
497}
498
499void census_log_shutdown(void) {
500 GPR_ASSERT(g_log.initialized);
501 gpr_mu_destroy(&g_log.lock);
502 gpr_free_aligned(g_log.core_local_blocks);
503 g_log.core_local_blocks = NULL;
504 gpr_free_aligned(g_log.blocks);
505 g_log.blocks = NULL;
506 gpr_free(g_log.buffer);
507 g_log.buffer = NULL;
508 g_log.initialized = 0;
509}
510
511void* census_log_start_write(size_t size) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800512 // Used to bound number of times block allocation is attempted.
Alistair Veitch85afe712016-02-02 17:58:15 -0800513 GPR_ASSERT(size > 0);
514 GPR_ASSERT(g_log.initialized);
515 if (size > CENSUS_LOG_MAX_RECORD_SIZE) {
516 return NULL;
517 }
518 uint32_t attempts_remaining = g_log.num_blocks;
519 uint32_t core_id = gpr_cpu_current_cpu();
520 do {
521 void* record = NULL;
522 cl_block* block =
523 cl_core_local_block_get_block(&g_log.core_local_blocks[core_id]);
524 if (block && (record = cl_block_start_write(block, size))) {
525 return record;
526 }
Alistair Veitch532519a2016-02-11 09:17:12 -0800527 // Need to allocate a new block. We are here if:
528 // - No block associated with the core OR
529 // - Write in-progress on the block OR
530 // - block is out of space
Alistair Veitch85afe712016-02-02 17:58:15 -0800531 gpr_mu_lock(&g_log.lock);
532 bool allocated = cl_allocate_core_local_block(core_id, block);
533 gpr_mu_unlock(&g_log.lock);
534 if (!allocated) {
535 gpr_atm_no_barrier_fetch_add(&g_log.out_of_space_count, 1);
536 return NULL;
537 }
538 } while (attempts_remaining--);
Alistair Veitch532519a2016-02-11 09:17:12 -0800539 // Give up.
Alistair Veitch85afe712016-02-02 17:58:15 -0800540 gpr_atm_no_barrier_fetch_add(&g_log.out_of_space_count, 1);
541 return NULL;
542}
543
544void census_log_end_write(void* record, size_t bytes_written) {
545 GPR_ASSERT(g_log.initialized);
546 cl_block_end_write(cl_get_block(record), bytes_written);
547}
548
549void census_log_init_reader(void) {
550 GPR_ASSERT(g_log.initialized);
551 gpr_mu_lock(&g_log.lock);
Alistair Veitch532519a2016-02-11 09:17:12 -0800552 // If a block is locked for reading unlock it.
Alistair Veitch85afe712016-02-02 17:58:15 -0800553 if (g_log.block_being_read != NULL) {
554 cl_block_end_read(g_log.block_being_read);
555 g_log.block_being_read = NULL;
556 }
557 g_log.read_iterator_state = g_log.num_cores;
558 gpr_mu_unlock(&g_log.lock);
559}
560
561const void* census_log_read_next(size_t* bytes_available) {
562 GPR_ASSERT(g_log.initialized);
563 gpr_mu_lock(&g_log.lock);
564 if (g_log.block_being_read != NULL) {
565 cl_block_end_read(g_log.block_being_read);
566 }
567 do {
568 g_log.block_being_read = cl_next_block_to_read(g_log.block_being_read);
569 if (g_log.block_being_read != NULL) {
570 void* record =
571 cl_block_start_read(g_log.block_being_read, bytes_available);
572 if (record != NULL) {
573 gpr_mu_unlock(&g_log.lock);
574 return record;
575 }
576 }
577 } while (g_log.block_being_read != NULL);
578 gpr_mu_unlock(&g_log.lock);
579 return NULL;
580}
581
582size_t census_log_remaining_space(void) {
583 GPR_ASSERT(g_log.initialized);
584 size_t space = 0;
585 gpr_mu_lock(&g_log.lock);
586 if (g_log.discard_old_records) {
Alistair Veitch532519a2016-02-11 09:17:12 -0800587 // Remaining space is not meaningful; just return the entire log space.
Alistair Veitch85afe712016-02-02 17:58:15 -0800588 space = g_log.num_blocks << CENSUS_LOG_2_MAX_RECORD_SIZE;
589 } else {
590 GPR_ASSERT(g_log.free_block_list.count >= 0);
591 space = (size_t)g_log.free_block_list.count * CENSUS_LOG_MAX_RECORD_SIZE;
592 }
593 gpr_mu_unlock(&g_log.lock);
594 return space;
595}
596
597int64_t census_log_out_of_space_count(void) {
598 GPR_ASSERT(g_log.initialized);
599 return gpr_atm_acq_load(&g_log.out_of_space_count);
600}