Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 1 | /* |
| 2 | * |
| 3 | * Copyright 2015-2016, Google Inc. |
| 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions are |
| 8 | * met: |
| 9 | * |
| 10 | * * Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * * Redistributions in binary form must reproduce the above |
| 13 | * copyright notice, this list of conditions and the following disclaimer |
| 14 | * in the documentation and/or other materials provided with the |
| 15 | * distribution. |
| 16 | * * Neither the name of Google Inc. nor the names of its |
| 17 | * contributors may be used to endorse or promote products derived from |
| 18 | * this software without specific prior written permission. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | * |
| 32 | */ |
| 33 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 34 | // Implements an efficient in-memory log, optimized for multiple writers and |
| 35 | // a single reader. Available log space is divided up in blocks of |
| 36 | // CENSUS_LOG_2_MAX_RECORD_SIZE bytes. A block can be in one of the following |
| 37 | // three data structures: |
| 38 | // - Free blocks (free_block_list) |
| 39 | // - Blocks with unread data (dirty_block_list) |
| 40 | // - Blocks currently attached to cores (core_local_blocks[]) |
| 41 | // |
| 42 | // census_log_start_write() moves a block from core_local_blocks[] to the end of |
| 43 | // dirty_block_list when block: |
| 44 | // - is out-of-space OR |
| 45 | // - has an incomplete record (an incomplete record occurs when a thread calls |
| 46 | // census_log_start_write() and is context-switched before calling |
| 47 | // census_log_end_write() |
| 48 | // So, blocks in dirty_block_list are ordered, from oldest to newest, by the |
| 49 | // time when block is detached from the core. |
| 50 | // |
| 51 | // census_log_read_next() first iterates over dirty_block_list and then |
| 52 | // core_local_blocks[]. It moves completely read blocks from dirty_block_list |
| 53 | // to free_block_list. Blocks in core_local_blocks[] are not freed, even when |
| 54 | // completely read. |
| 55 | // |
| 56 | // If the log is configured to discard old records and free_block_list is empty, |
| 57 | // census_log_start_write() iterates over dirty_block_list to allocate a |
| 58 | // new block. It moves the oldest available block (no pending read/write) to |
| 59 | // core_local_blocks[]. |
| 60 | // |
| 61 | // core_local_block_struct is used to implement a map from core id to the block |
| 62 | // associated with that core. This mapping is advisory. It is possible that the |
| 63 | // block returned by this mapping is no longer associated with that core. This |
| 64 | // mapping is updated, lazily, by census_log_start_write(). |
| 65 | // |
| 66 | // Locking in block struct: |
| 67 | // |
| 68 | // Exclusive g_log.lock must be held before calling any functions operating on |
| 69 | // block structs except census_log_start_write() and census_log_end_write(). |
| 70 | // |
| 71 | // Writes to a block are serialized via writer_lock. census_log_start_write() |
| 72 | // acquires this lock and census_log_end_write() releases it. On failure to |
| 73 | // acquire the lock, writer allocates a new block for the current core and |
| 74 | // updates core_local_block accordingly. |
| 75 | // |
| 76 | // Simultaneous read and write access is allowed. Readers can safely read up to |
| 77 | // committed bytes (bytes_committed). |
| 78 | // |
| 79 | // reader_lock protects the block, currently being read, from getting recycled. |
| 80 | // start_read() acquires reader_lock and end_read() releases the lock. |
| 81 | // |
| 82 | // Read/write access to a block is disabled via try_disable_access(). It returns |
| 83 | // with both writer_lock and reader_lock held. These locks are subsequently |
| 84 | // released by enable_access() to enable access to the block. |
| 85 | // |
| 86 | // A note on naming: Most function/struct names are prepended by cl_ |
| 87 | // (shorthand for census_log). Further, functions that manipulate structures |
| 88 | // include the name of the structure, which will be passed as the first |
| 89 | // argument. E.g. cl_block_initialize() will initialize a cl_block. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 90 | |
| 91 | #include "src/core/census/log.h" |
| 92 | #include <grpc/support/alloc.h> |
| 93 | #include <grpc/support/atm.h> |
| 94 | #include <grpc/support/cpu.h> |
| 95 | #include <grpc/support/log.h> |
| 96 | #include <grpc/support/sync.h> |
| 97 | #include <grpc/support/useful.h> |
| 98 | #include <stdbool.h> |
| 99 | #include <string.h> |
| 100 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 101 | // End of platform specific code |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 102 | |
| 103 | typedef struct census_log_block_list_struct { |
| 104 | struct census_log_block_list_struct* next; |
| 105 | struct census_log_block_list_struct* prev; |
| 106 | struct census_log_block* block; |
| 107 | } cl_block_list_struct; |
| 108 | |
| 109 | typedef struct census_log_block { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 110 | // Pointer to underlying buffer. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 111 | char* buffer; |
| 112 | gpr_atm writer_lock; |
| 113 | gpr_atm reader_lock; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 114 | // Keeps completely written bytes. Declared atomic because accessed |
| 115 | // simultaneously by reader and writer. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 116 | gpr_atm bytes_committed; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 117 | // Bytes already read. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 118 | size_t bytes_read; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 119 | // Links for list. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 120 | cl_block_list_struct link; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 121 | // We want this structure to be cacheline aligned. We assume the following |
| 122 | // sizes for the various parts on 32/64bit systems: |
| 123 | // type 32b size 64b size |
| 124 | // char* 4 8 |
| 125 | // 3x gpr_atm 12 24 |
| 126 | // size_t 4 8 |
| 127 | // cl_block_list_struct 12 24 |
| 128 | // TOTAL 32 64 |
| 129 | // |
| 130 | // Depending on the size of our cacheline and the architecture, we |
| 131 | // selectively add char buffering to this structure. The size is checked |
| 132 | // via assert in census_log_initialize(). |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 133 | #if defined(GPR_ARCH_64) |
| 134 | #define CL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 64) |
| 135 | #else |
| 136 | #if defined(GPR_ARCH_32) |
| 137 | #define CL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 32) |
| 138 | #else |
| 139 | #error "Unknown architecture" |
| 140 | #endif |
| 141 | #endif |
| 142 | #if CL_BLOCK_PAD_SIZE > 0 |
| 143 | char padding[CL_BLOCK_PAD_SIZE]; |
| 144 | #endif |
| 145 | } cl_block; |
| 146 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 147 | // A list of cl_blocks, doubly-linked through cl_block::link. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 148 | typedef struct census_log_block_list { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 149 | int32_t count; // Number of items in list. |
| 150 | cl_block_list_struct ht; // head/tail of linked list. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 151 | } cl_block_list; |
| 152 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 153 | // Cacheline aligned block pointers to avoid false sharing. Block pointer must |
| 154 | // be initialized via set_block(), before calling other functions |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 155 | typedef struct census_log_core_local_block { |
| 156 | gpr_atm block; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 157 | // Ensure cachline alignment: we assume sizeof(gpr_atm) == 4 or 8 |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 158 | #if defined(GPR_ARCH_64) |
| 159 | #define CL_CORE_LOCAL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 8) |
| 160 | #else |
| 161 | #if defined(GPR_ARCH_32) |
| 162 | #define CL_CORE_LOCAL_BLOCK_PAD_SIZE (GPR_CACHELINE_SIZE - 4) |
| 163 | #else |
| 164 | #error "Unknown architecture" |
| 165 | #endif |
| 166 | #endif |
| 167 | #if CL_CORE_LOCAL_BLOCK_PAD_SIZE > 0 |
| 168 | char padding[CL_CORE_LOCAL_BLOCK_PAD_SIZE]; |
| 169 | #endif |
| 170 | } cl_core_local_block; |
| 171 | |
| 172 | struct census_log { |
| 173 | int discard_old_records; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 174 | // Number of cores (aka hardware-contexts) |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 175 | unsigned num_cores; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 176 | // number of CENSUS_LOG_2_MAX_RECORD_SIZE blocks in log |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 177 | uint32_t num_blocks; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 178 | cl_block* blocks; // Block metadata. |
| 179 | cl_core_local_block* core_local_blocks; // Keeps core to block mappings. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 180 | gpr_mu lock; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 181 | int initialized; // has log been initialized? |
| 182 | // Keeps the state of the reader iterator. A value of 0 indicates that |
| 183 | // iterator has reached the end. census_log_init_reader() resets the value |
| 184 | // to num_core to restart iteration. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 185 | uint32_t read_iterator_state; |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 186 | // Points to the block being read. If non-NULL, the block is locked for |
| 187 | // reading(block_being_read_->reader_lock is held). |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 188 | cl_block* block_being_read; |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 189 | char* buffer; |
| 190 | cl_block_list free_block_list; |
| 191 | cl_block_list dirty_block_list; |
| 192 | gpr_atm out_of_space_count; |
| 193 | }; |
| 194 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 195 | // Single internal log. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 196 | static struct census_log g_log; |
| 197 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 198 | // Functions that operate on an atomic memory location used as a lock. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 199 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 200 | // Returns non-zero if lock is acquired. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 201 | static int cl_try_lock(gpr_atm* lock) { return gpr_atm_acq_cas(lock, 0, 1); } |
| 202 | |
| 203 | static void cl_unlock(gpr_atm* lock) { gpr_atm_rel_store(lock, 0); } |
| 204 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 205 | // Functions that operate on cl_core_local_block's. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 206 | |
| 207 | static void cl_core_local_block_set_block(cl_core_local_block* clb, |
| 208 | cl_block* block) { |
| 209 | gpr_atm_rel_store(&clb->block, (gpr_atm)block); |
| 210 | } |
| 211 | |
| 212 | static cl_block* cl_core_local_block_get_block(cl_core_local_block* clb) { |
| 213 | return (cl_block*)gpr_atm_acq_load(&clb->block); |
| 214 | } |
| 215 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 216 | // Functions that operate on cl_block_list_struct's. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 217 | |
| 218 | static void cl_block_list_struct_initialize(cl_block_list_struct* bls, |
| 219 | cl_block* block) { |
| 220 | bls->next = bls->prev = bls; |
| 221 | bls->block = block; |
| 222 | } |
| 223 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 224 | // Functions that operate on cl_block_list's. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 225 | |
| 226 | static void cl_block_list_initialize(cl_block_list* list) { |
| 227 | list->count = 0; |
| 228 | cl_block_list_struct_initialize(&list->ht, NULL); |
| 229 | } |
| 230 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 231 | // Returns head of *this, or NULL if empty. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 232 | static cl_block* cl_block_list_head(cl_block_list* list) { |
| 233 | return list->ht.next->block; |
| 234 | } |
| 235 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 236 | // Insert element *e after *pos. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 237 | static void cl_block_list_insert(cl_block_list* list, cl_block_list_struct* pos, |
| 238 | cl_block_list_struct* e) { |
| 239 | list->count++; |
| 240 | e->next = pos->next; |
| 241 | e->prev = pos; |
| 242 | e->next->prev = e; |
| 243 | e->prev->next = e; |
| 244 | } |
| 245 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 246 | // Insert block at the head of the list |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 247 | static void cl_block_list_insert_at_head(cl_block_list* list, cl_block* block) { |
| 248 | cl_block_list_insert(list, &list->ht, &block->link); |
| 249 | } |
| 250 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 251 | // Insert block at the tail of the list. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 252 | static void cl_block_list_insert_at_tail(cl_block_list* list, cl_block* block) { |
| 253 | cl_block_list_insert(list, list->ht.prev, &block->link); |
| 254 | } |
| 255 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 256 | // Removes block *b. Requires *b be in the list. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 257 | static void cl_block_list_remove(cl_block_list* list, cl_block* b) { |
| 258 | list->count--; |
| 259 | b->link.next->prev = b->link.prev; |
| 260 | b->link.prev->next = b->link.next; |
| 261 | } |
| 262 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 263 | // Functions that operate on cl_block's |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 264 | |
| 265 | static void cl_block_initialize(cl_block* block, char* buffer) { |
| 266 | block->buffer = buffer; |
| 267 | gpr_atm_rel_store(&block->writer_lock, 0); |
| 268 | gpr_atm_rel_store(&block->reader_lock, 0); |
| 269 | gpr_atm_rel_store(&block->bytes_committed, 0); |
| 270 | block->bytes_read = 0; |
| 271 | cl_block_list_struct_initialize(&block->link, block); |
| 272 | } |
| 273 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 274 | // Guards against exposing partially written buffer to the reader. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 275 | static void cl_block_set_bytes_committed(cl_block* block, |
| 276 | size_t bytes_committed) { |
| 277 | gpr_atm_rel_store(&block->bytes_committed, (gpr_atm)bytes_committed); |
| 278 | } |
| 279 | |
| 280 | static size_t cl_block_get_bytes_committed(cl_block* block) { |
| 281 | return (size_t)gpr_atm_acq_load(&block->bytes_committed); |
| 282 | } |
| 283 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 284 | // Tries to disable future read/write access to this block. Succeeds if: |
| 285 | // - no in-progress write AND |
| 286 | // - no in-progress read AND |
| 287 | // - 'discard_data' set to true OR no unread data |
| 288 | // On success, clears the block state and returns with writer_lock_ and |
| 289 | // reader_lock_ held. These locks are released by a subsequent |
| 290 | // cl_block_access_enable() call. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 291 | static bool cl_block_try_disable_access(cl_block* block, int discard_data) { |
| 292 | if (!cl_try_lock(&block->writer_lock)) { |
| 293 | return false; |
| 294 | } |
| 295 | if (!cl_try_lock(&block->reader_lock)) { |
| 296 | cl_unlock(&block->writer_lock); |
| 297 | return false; |
| 298 | } |
| 299 | if (!discard_data && |
| 300 | (block->bytes_read != cl_block_get_bytes_committed(block))) { |
| 301 | cl_unlock(&block->reader_lock); |
| 302 | cl_unlock(&block->writer_lock); |
| 303 | return false; |
| 304 | } |
| 305 | cl_block_set_bytes_committed(block, 0); |
| 306 | block->bytes_read = 0; |
| 307 | return true; |
| 308 | } |
| 309 | |
| 310 | static void cl_block_enable_access(cl_block* block) { |
| 311 | cl_unlock(&block->reader_lock); |
| 312 | cl_unlock(&block->writer_lock); |
| 313 | } |
| 314 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 315 | // Returns with writer_lock held. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 316 | static void* cl_block_start_write(cl_block* block, size_t size) { |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 317 | if (!cl_try_lock(&block->writer_lock)) { |
| 318 | return NULL; |
| 319 | } |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 320 | size_t bytes_committed = cl_block_get_bytes_committed(block); |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 321 | if (bytes_committed + size > CENSUS_LOG_MAX_RECORD_SIZE) { |
| 322 | cl_unlock(&block->writer_lock); |
| 323 | return NULL; |
| 324 | } |
| 325 | return block->buffer + bytes_committed; |
| 326 | } |
| 327 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 328 | // Releases writer_lock and increments committed bytes by 'bytes_written'. |
| 329 | // 'bytes_written' must be <= 'size' specified in the corresponding |
| 330 | // StartWrite() call. This function is thread-safe. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 331 | static void cl_block_end_write(cl_block* block, size_t bytes_written) { |
| 332 | cl_block_set_bytes_committed( |
| 333 | block, cl_block_get_bytes_committed(block) + bytes_written); |
| 334 | cl_unlock(&block->writer_lock); |
| 335 | } |
| 336 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 337 | // Returns a pointer to the first unread byte in buffer. The number of bytes |
| 338 | // available are returned in 'bytes_available'. Acquires reader lock that is |
| 339 | // released by a subsequent cl_block_end_read() call. Returns NULL if: |
| 340 | // - read in progress |
| 341 | // - no data available |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 342 | static void* cl_block_start_read(cl_block* block, size_t* bytes_available) { |
| 343 | if (!cl_try_lock(&block->reader_lock)) { |
| 344 | return NULL; |
| 345 | } |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 346 | // bytes_committed may change from under us. Use bytes_available to update |
| 347 | // bytes_read below. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 348 | size_t bytes_committed = cl_block_get_bytes_committed(block); |
| 349 | GPR_ASSERT(bytes_committed >= block->bytes_read); |
| 350 | *bytes_available = bytes_committed - block->bytes_read; |
| 351 | if (*bytes_available == 0) { |
| 352 | cl_unlock(&block->reader_lock); |
| 353 | return NULL; |
| 354 | } |
| 355 | void* record = block->buffer + block->bytes_read; |
| 356 | block->bytes_read += *bytes_available; |
| 357 | return record; |
| 358 | } |
| 359 | |
| 360 | static void cl_block_end_read(cl_block* block) { |
| 361 | cl_unlock(&block->reader_lock); |
| 362 | } |
| 363 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 364 | // Internal functions operating on g_log |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 365 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 366 | // Allocates a new free block (or recycles an available dirty block if log is |
| 367 | // configured to discard old records). Returns NULL if out-of-space. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 368 | static cl_block* cl_allocate_block(void) { |
| 369 | cl_block* block = cl_block_list_head(&g_log.free_block_list); |
| 370 | if (block != NULL) { |
| 371 | cl_block_list_remove(&g_log.free_block_list, block); |
| 372 | return block; |
| 373 | } |
| 374 | if (!g_log.discard_old_records) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 375 | // No free block and log is configured to keep old records. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 376 | return NULL; |
| 377 | } |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 378 | // Recycle dirty block. Start from the oldest. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 379 | for (block = cl_block_list_head(&g_log.dirty_block_list); block != NULL; |
| 380 | block = block->link.next->block) { |
| 381 | if (cl_block_try_disable_access(block, 1 /* discard data */)) { |
| 382 | cl_block_list_remove(&g_log.dirty_block_list, block); |
| 383 | return block; |
| 384 | } |
| 385 | } |
| 386 | return NULL; |
| 387 | } |
| 388 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 389 | // Allocates a new block and updates core id => block mapping. 'old_block' |
| 390 | // points to the block that the caller thinks is attached to |
| 391 | // 'core_id'. 'old_block' may be NULL. Returns true if: |
| 392 | // - allocated a new block OR |
| 393 | // - 'core_id' => 'old_block' mapping changed (another thread allocated a |
| 394 | // block before lock was acquired). |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 395 | static bool cl_allocate_core_local_block(uint32_t core_id, |
| 396 | cl_block* old_block) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 397 | // Now that we have the lock, check if core-local mapping has changed. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 398 | cl_core_local_block* core_local_block = &g_log.core_local_blocks[core_id]; |
| 399 | cl_block* block = cl_core_local_block_get_block(core_local_block); |
| 400 | if ((block != NULL) && (block != old_block)) { |
| 401 | return true; |
| 402 | } |
| 403 | if (block != NULL) { |
| 404 | cl_core_local_block_set_block(core_local_block, NULL); |
| 405 | cl_block_list_insert_at_tail(&g_log.dirty_block_list, block); |
| 406 | } |
| 407 | block = cl_allocate_block(); |
| 408 | if (block == NULL) { |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 409 | return false; |
| 410 | } |
| 411 | cl_core_local_block_set_block(core_local_block, block); |
| 412 | cl_block_enable_access(block); |
| 413 | return true; |
| 414 | } |
| 415 | |
| 416 | static cl_block* cl_get_block(void* record) { |
| 417 | uintptr_t p = (uintptr_t)((char*)record - g_log.buffer); |
| 418 | uintptr_t index = p >> CENSUS_LOG_2_MAX_RECORD_SIZE; |
| 419 | return &g_log.blocks[index]; |
| 420 | } |
| 421 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 422 | // Gets the next block to read and tries to free 'prev' block (if not NULL). |
| 423 | // Returns NULL if reached the end. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 424 | static cl_block* cl_next_block_to_read(cl_block* prev) { |
| 425 | cl_block* block = NULL; |
| 426 | if (g_log.read_iterator_state == g_log.num_cores) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 427 | // We are traversing dirty list; find the next dirty block. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 428 | if (prev != NULL) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 429 | // Try to free the previous block if there is no unread data. This |
| 430 | // block |
| 431 | // may have unread data if previously incomplete record completed |
| 432 | // between |
| 433 | // read_next() calls. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 434 | block = prev->link.next->block; |
| 435 | if (cl_block_try_disable_access(prev, 0 /* do not discard data */)) { |
| 436 | cl_block_list_remove(&g_log.dirty_block_list, prev); |
| 437 | cl_block_list_insert_at_head(&g_log.free_block_list, prev); |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 438 | } |
| 439 | } else { |
| 440 | block = cl_block_list_head(&g_log.dirty_block_list); |
| 441 | } |
| 442 | if (block != NULL) { |
| 443 | return block; |
| 444 | } |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 445 | // We are done with the dirty list; moving on to core-local blocks. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 446 | } |
| 447 | while (g_log.read_iterator_state > 0) { |
| 448 | g_log.read_iterator_state--; |
| 449 | block = cl_core_local_block_get_block( |
| 450 | &g_log.core_local_blocks[g_log.read_iterator_state]); |
| 451 | if (block != NULL) { |
| 452 | return block; |
| 453 | } |
| 454 | } |
| 455 | return NULL; |
| 456 | } |
| 457 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 458 | #define CL_LOG_2_MB 20 // 2^20 = 1MB |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 459 | |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 460 | // External functions: primary stats_log interface |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 461 | void census_log_initialize(size_t size_in_mb, int discard_old_records) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 462 | // Check cacheline alignment. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 463 | GPR_ASSERT(sizeof(cl_block) % GPR_CACHELINE_SIZE == 0); |
| 464 | GPR_ASSERT(sizeof(cl_core_local_block) % GPR_CACHELINE_SIZE == 0); |
| 465 | GPR_ASSERT(!g_log.initialized); |
| 466 | g_log.discard_old_records = discard_old_records; |
| 467 | g_log.num_cores = gpr_cpu_num_cores(); |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 468 | // Ensure that we will not get any overflow in calaculating num_blocks |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 469 | GPR_ASSERT(CL_LOG_2_MB >= CENSUS_LOG_2_MAX_RECORD_SIZE); |
| 470 | GPR_ASSERT(size_in_mb < 1000); |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 471 | // Ensure at least 2x as many blocks as there are cores. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 472 | g_log.num_blocks = |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 473 | (uint32_t)GPR_MAX(2 * g_log.num_cores, (size_in_mb << CL_LOG_2_MB) >> |
| 474 | CENSUS_LOG_2_MAX_RECORD_SIZE); |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 475 | gpr_mu_init(&g_log.lock); |
| 476 | g_log.read_iterator_state = 0; |
| 477 | g_log.block_being_read = NULL; |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 478 | g_log.core_local_blocks = (cl_core_local_block*)gpr_malloc_aligned( |
| 479 | g_log.num_cores * sizeof(cl_core_local_block), GPR_CACHELINE_SIZE_LOG); |
| 480 | memset(g_log.core_local_blocks, 0, |
| 481 | g_log.num_cores * sizeof(cl_core_local_block)); |
| 482 | g_log.blocks = (cl_block*)gpr_malloc_aligned( |
| 483 | g_log.num_blocks * sizeof(cl_block), GPR_CACHELINE_SIZE_LOG); |
| 484 | memset(g_log.blocks, 0, g_log.num_blocks * sizeof(cl_block)); |
| 485 | g_log.buffer = gpr_malloc(g_log.num_blocks * CENSUS_LOG_MAX_RECORD_SIZE); |
| 486 | memset(g_log.buffer, 0, g_log.num_blocks * CENSUS_LOG_MAX_RECORD_SIZE); |
| 487 | cl_block_list_initialize(&g_log.free_block_list); |
| 488 | cl_block_list_initialize(&g_log.dirty_block_list); |
| 489 | for (uint32_t i = 0; i < g_log.num_blocks; ++i) { |
| 490 | cl_block* block = g_log.blocks + i; |
| 491 | cl_block_initialize(block, g_log.buffer + (CENSUS_LOG_MAX_RECORD_SIZE * i)); |
| 492 | cl_block_try_disable_access(block, 1 /* discard data */); |
| 493 | cl_block_list_insert_at_tail(&g_log.free_block_list, block); |
| 494 | } |
| 495 | gpr_atm_rel_store(&g_log.out_of_space_count, 0); |
| 496 | g_log.initialized = 1; |
| 497 | } |
| 498 | |
| 499 | void census_log_shutdown(void) { |
| 500 | GPR_ASSERT(g_log.initialized); |
| 501 | gpr_mu_destroy(&g_log.lock); |
| 502 | gpr_free_aligned(g_log.core_local_blocks); |
| 503 | g_log.core_local_blocks = NULL; |
| 504 | gpr_free_aligned(g_log.blocks); |
| 505 | g_log.blocks = NULL; |
| 506 | gpr_free(g_log.buffer); |
| 507 | g_log.buffer = NULL; |
| 508 | g_log.initialized = 0; |
| 509 | } |
| 510 | |
| 511 | void* census_log_start_write(size_t size) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 512 | // Used to bound number of times block allocation is attempted. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 513 | GPR_ASSERT(size > 0); |
| 514 | GPR_ASSERT(g_log.initialized); |
| 515 | if (size > CENSUS_LOG_MAX_RECORD_SIZE) { |
| 516 | return NULL; |
| 517 | } |
| 518 | uint32_t attempts_remaining = g_log.num_blocks; |
| 519 | uint32_t core_id = gpr_cpu_current_cpu(); |
| 520 | do { |
| 521 | void* record = NULL; |
| 522 | cl_block* block = |
| 523 | cl_core_local_block_get_block(&g_log.core_local_blocks[core_id]); |
| 524 | if (block && (record = cl_block_start_write(block, size))) { |
| 525 | return record; |
| 526 | } |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 527 | // Need to allocate a new block. We are here if: |
| 528 | // - No block associated with the core OR |
| 529 | // - Write in-progress on the block OR |
| 530 | // - block is out of space |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 531 | gpr_mu_lock(&g_log.lock); |
| 532 | bool allocated = cl_allocate_core_local_block(core_id, block); |
| 533 | gpr_mu_unlock(&g_log.lock); |
| 534 | if (!allocated) { |
| 535 | gpr_atm_no_barrier_fetch_add(&g_log.out_of_space_count, 1); |
| 536 | return NULL; |
| 537 | } |
| 538 | } while (attempts_remaining--); |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 539 | // Give up. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 540 | gpr_atm_no_barrier_fetch_add(&g_log.out_of_space_count, 1); |
| 541 | return NULL; |
| 542 | } |
| 543 | |
| 544 | void census_log_end_write(void* record, size_t bytes_written) { |
| 545 | GPR_ASSERT(g_log.initialized); |
| 546 | cl_block_end_write(cl_get_block(record), bytes_written); |
| 547 | } |
| 548 | |
| 549 | void census_log_init_reader(void) { |
| 550 | GPR_ASSERT(g_log.initialized); |
| 551 | gpr_mu_lock(&g_log.lock); |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 552 | // If a block is locked for reading unlock it. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 553 | if (g_log.block_being_read != NULL) { |
| 554 | cl_block_end_read(g_log.block_being_read); |
| 555 | g_log.block_being_read = NULL; |
| 556 | } |
| 557 | g_log.read_iterator_state = g_log.num_cores; |
| 558 | gpr_mu_unlock(&g_log.lock); |
| 559 | } |
| 560 | |
| 561 | const void* census_log_read_next(size_t* bytes_available) { |
| 562 | GPR_ASSERT(g_log.initialized); |
| 563 | gpr_mu_lock(&g_log.lock); |
| 564 | if (g_log.block_being_read != NULL) { |
| 565 | cl_block_end_read(g_log.block_being_read); |
| 566 | } |
| 567 | do { |
| 568 | g_log.block_being_read = cl_next_block_to_read(g_log.block_being_read); |
| 569 | if (g_log.block_being_read != NULL) { |
| 570 | void* record = |
| 571 | cl_block_start_read(g_log.block_being_read, bytes_available); |
| 572 | if (record != NULL) { |
| 573 | gpr_mu_unlock(&g_log.lock); |
| 574 | return record; |
| 575 | } |
| 576 | } |
| 577 | } while (g_log.block_being_read != NULL); |
| 578 | gpr_mu_unlock(&g_log.lock); |
| 579 | return NULL; |
| 580 | } |
| 581 | |
| 582 | size_t census_log_remaining_space(void) { |
| 583 | GPR_ASSERT(g_log.initialized); |
| 584 | size_t space = 0; |
| 585 | gpr_mu_lock(&g_log.lock); |
| 586 | if (g_log.discard_old_records) { |
Alistair Veitch | 532519a | 2016-02-11 09:17:12 -0800 | [diff] [blame] | 587 | // Remaining space is not meaningful; just return the entire log space. |
Alistair Veitch | 85afe71 | 2016-02-02 17:58:15 -0800 | [diff] [blame] | 588 | space = g_log.num_blocks << CENSUS_LOG_2_MAX_RECORD_SIZE; |
| 589 | } else { |
| 590 | GPR_ASSERT(g_log.free_block_list.count >= 0); |
| 591 | space = (size_t)g_log.free_block_list.count * CENSUS_LOG_MAX_RECORD_SIZE; |
| 592 | } |
| 593 | gpr_mu_unlock(&g_log.lock); |
| 594 | return space; |
| 595 | } |
| 596 | |
| 597 | int64_t census_log_out_of_space_count(void) { |
| 598 | GPR_ASSERT(g_log.initialized); |
| 599 | return gpr_atm_acq_load(&g_log.out_of_space_count); |
| 600 | } |