Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 1 | /* |
| 2 | * |
Alistair Veitch | 0f69072 | 2016-01-13 09:08:38 -0800 | [diff] [blame] | 3 | * Copyright 2015-2016, Google Inc. |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 4 | * All rights reserved. |
| 5 | * |
| 6 | * Redistribution and use in source and binary forms, with or without |
| 7 | * modification, are permitted provided that the following conditions are |
| 8 | * met: |
| 9 | * |
| 10 | * * Redistributions of source code must retain the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer. |
| 12 | * * Redistributions in binary form must reproduce the above |
| 13 | * copyright notice, this list of conditions and the following disclaimer |
| 14 | * in the documentation and/or other materials provided with the |
| 15 | * distribution. |
| 16 | * * Neither the name of Google Inc. nor the names of its |
| 17 | * contributors may be used to endorse or promote products derived from |
| 18 | * this software without specific prior written permission. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | * |
| 32 | */ |
| 33 | |
| 34 | /* RPC-internal Census API's. These are designed to be generic enough that |
| 35 | * they can (ultimately) be used in many different RPC systems (with differing |
| 36 | * implementations). */ |
| 37 | |
| 38 | #ifndef CENSUS_CENSUS_H |
| 39 | #define CENSUS_CENSUS_H |
| 40 | |
| 41 | #include <grpc/grpc.h> |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 42 | |
Nicolas "Pixel" Noble | 1ed15e2 | 2015-06-09 02:24:35 +0200 | [diff] [blame] | 43 | #ifdef __cplusplus |
| 44 | extern "C" { |
| 45 | #endif |
| 46 | |
Alistair Veitch | 925e4a6 | 2015-07-26 16:51:23 -0700 | [diff] [blame] | 47 | /* Identify census features that can be enabled via census_initialize(). */ |
| 48 | enum census_features { |
| 49 | CENSUS_FEATURE_NONE = 0, /* Do not enable census. */ |
| 50 | CENSUS_FEATURE_TRACING = 1, /* Enable census tracing. */ |
| 51 | CENSUS_FEATURE_STATS = 2, /* Enable Census stats collection. */ |
| 52 | CENSUS_FEATURE_CPU = 4, /* Enable Census CPU usage collection. */ |
| 53 | CENSUS_FEATURE_ALL = |
| 54 | CENSUS_FEATURE_TRACING | CENSUS_FEATURE_STATS | CENSUS_FEATURE_CPU |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 55 | }; |
| 56 | |
Alistair Veitch | a4c4d3c | 2015-07-28 14:36:22 -0700 | [diff] [blame] | 57 | /** Shutdown and startup census subsystem. The 'features' argument should be |
Alistair Veitch | 925e4a6 | 2015-07-26 16:51:23 -0700 | [diff] [blame] | 58 | * the OR (|) of census_features values. If census fails to initialize, then |
Alistair Veitch | 2696762 | 2015-06-01 10:45:54 -0700 | [diff] [blame] | 59 | * census_initialize() will return a non-zero value. It is an error to call |
| 60 | * census_initialize() more than once (without an intervening |
| 61 | * census_shutdown()). */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 62 | CENSUS_API int census_initialize(int features); |
| 63 | CENSUS_API void census_shutdown(void); |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 64 | |
Alistair Veitch | 925e4a6 | 2015-07-26 16:51:23 -0700 | [diff] [blame] | 65 | /** Return the features supported by the current census implementation (not all |
| 66 | * features will be available on all platforms). */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 67 | CENSUS_API int census_supported(void); |
Alistair Veitch | 925e4a6 | 2015-07-26 16:51:23 -0700 | [diff] [blame] | 68 | |
| 69 | /** Return the census features currently enabled. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 70 | CENSUS_API int census_enabled(void); |
Alistair Veitch | fc62ddd | 2015-06-29 02:52:46 -0700 | [diff] [blame] | 71 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 72 | /** |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 73 | A Census Context is a handle used by Census to represent the current tracing |
| 74 | and stats collection information. Contexts should be propagated across RPC's |
| 75 | (this is the responsibility of the local RPC system). A context is typically |
| 76 | used as the first argument to most census functions. Conceptually, they |
| 77 | should be thought of as specific to a single RPC/thread. The user visible |
| 78 | context representation is that of a collection of key:value string pairs, |
| 79 | each of which is termed a 'tag'; these form the basis against which Census |
| 80 | metrics will be recorded. Keys are unique within a context. */ |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 81 | typedef struct census_context census_context; |
| 82 | |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 83 | /* A tag is a key:value pair. The key is a non-empty, printable (UTF-8 |
| 84 | encoded), nil-terminated string. The value is a binary string, that may be |
| 85 | printable. There are limits on the sizes of both keys and values (see |
| 86 | CENSUS_MAX_TAG_KB_LEN definition below), and the number of tags that can be |
| 87 | propagated (CENSUS_MAX_PROPAGATED_TAGS). Users should also remember that |
| 88 | some systems may have limits on, e.g., the number of bytes that can be |
| 89 | transmitted as metadata, and that larger tags means more memory consumed |
| 90 | and time in processing. */ |
| 91 | typedef struct { |
| 92 | const char *key; |
| 93 | const char *value; |
| 94 | size_t value_len; |
| 95 | uint8_t flags; |
| 96 | } census_tag; |
| 97 | |
| 98 | /* Maximum length of a tag's key or value. */ |
| 99 | #define CENSUS_MAX_TAG_KV_LEN 255 |
| 100 | /* Maximum number of propagatable tags. */ |
| 101 | #define CENSUS_MAX_PROPAGATED_TAGS 255 |
| 102 | |
| 103 | /* Tag flags. */ |
| 104 | #define CENSUS_TAG_PROPAGATE 1 /* Tag should be propagated over RPC */ |
| 105 | #define CENSUS_TAG_STATS 2 /* Tag will be used for statistics aggregation */ |
| 106 | #define CENSUS_TAG_BINARY 4 /* Tag value is not printable */ |
| 107 | #define CENSUS_TAG_RESERVED 8 /* Reserved for internal use. */ |
| 108 | /* Flag values 8,16,32,64,128 are reserved for future/internal use. Clients |
| 109 | should not use or rely on their values. */ |
| 110 | |
| 111 | #define CENSUS_TAG_IS_PROPAGATED(flags) (flags & CENSUS_TAG_PROPAGATE) |
| 112 | #define CENSUS_TAG_IS_STATS(flags) (flags & CENSUS_TAG_STATS) |
| 113 | #define CENSUS_TAG_IS_BINARY(flags) (flags & CENSUS_TAG_BINARY) |
| 114 | |
| 115 | /* An instance of this structure is kept by every context, and records the |
| 116 | basic information associated with the creation of that context. */ |
| 117 | typedef struct { |
| 118 | int n_propagated_tags; /* number of propagated printable tags */ |
| 119 | int n_propagated_binary_tags; /* number of propagated binary tags */ |
| 120 | int n_local_tags; /* number of non-propagated (local) tags */ |
| 121 | int n_deleted_tags; /* number of tags that were deleted */ |
| 122 | int n_added_tags; /* number of tags that were added */ |
| 123 | int n_modified_tags; /* number of tags that were modified */ |
| 124 | int n_invalid_tags; /* number of tags with bad keys or values (e.g. |
| 125 | longer than CENSUS_MAX_TAG_KV_LEN) */ |
| 126 | int n_ignored_tags; /* number of tags ignored because of |
| 127 | CENSUS_MAX_PROPAGATED_TAGS limit. */ |
| 128 | } census_context_status; |
| 129 | |
| 130 | /* Create a new context, adding and removing tags from an existing context. |
| 131 | This will copy all tags from the 'tags' input, so it is recommended |
| 132 | to add as many tags in a single operation as is practical for the client. |
| 133 | @param base Base context to build upon. Can be NULL. |
| 134 | @param tags A set of tags to be added/changed/deleted. Tags with keys that |
| 135 | are in 'tags', but not 'base', are added to the tag set. Keys that are in |
| 136 | both 'tags' and 'base' will have their value/flags modified. Tags with keys |
| 137 | in both, but with NULL or zero-length values, will be deleted from the tag |
| 138 | set. Tags with invalid (too long or short) keys or values will be ignored. |
| 139 | If adding a tag will result in more than CENSUS_MAX_PROPAGATED_TAGS in either |
| 140 | binary or non-binary tags, they will be ignored, as will deletions of |
| 141 | tags that don't exist. |
| 142 | @param ntags number of tags in 'tags' |
| 143 | @param status If not NULL, will return a pointer to a census_context_status |
| 144 | structure containing information about the new context and status of the |
| 145 | tags used in its creation. |
| 146 | @return A new, valid census_context. |
| 147 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 148 | CENSUS_API census_context *census_context_create( |
| 149 | const census_context *base, const census_tag *tags, int ntags, |
| 150 | census_context_status const **status); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 151 | |
| 152 | /* Destroy a context. Once this function has been called, the context cannot |
| 153 | be reused. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 154 | CENSUS_API void census_context_destroy(census_context *context); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 155 | |
| 156 | /* Get a pointer to the original status from the context creation. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 157 | CENSUS_API const census_context_status *census_context_get_status( |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 158 | const census_context *context); |
| 159 | |
| 160 | /* Structure used for iterating over the tegs in a context. API clients should |
| 161 | not use or reference internal fields - neither their contents or |
| 162 | presence/absence are guaranteed. */ |
| 163 | typedef struct { |
| 164 | const census_context *context; |
| 165 | int base; |
| 166 | int index; |
| 167 | char *kvm; |
| 168 | } census_context_iterator; |
| 169 | |
| 170 | /* Initialize a census_tag_iterator. Must be called before first use. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 171 | CENSUS_API void census_context_initialize_iterator( |
| 172 | const census_context *context, census_context_iterator *iterator); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 173 | |
| 174 | /* Get the contents of the "next" tag in the context. If there are no more |
| 175 | tags, returns 0 (and 'tag' contents will be unchanged), otherwise returns 1. |
| 176 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 177 | CENSUS_API int census_context_next_tag(census_context_iterator *iterator, |
| 178 | census_tag *tag); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 179 | |
| 180 | /* Get a context tag by key. Returns 0 if the key is not present. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 181 | CENSUS_API int census_context_get_tag(const census_context *context, |
| 182 | const char *key, census_tag *tag); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 183 | |
| 184 | /* Tag set encode/decode functionality. These functionas are intended |
| 185 | for use by RPC systems only, for purposes of transmitting/receiving contexts. |
| 186 | */ |
| 187 | |
| 188 | /* Encode a context into a buffer. The propagated tags are encoded into the |
| 189 | buffer in two regions: one for printable tags, and one for binary tags. |
| 190 | @param context context to be encoded |
| 191 | @param buffer pointer to buffer. This address will be used to encode the |
| 192 | printable tags. |
| 193 | @param buf_size number of available bytes in buffer. |
| 194 | @param print_buf_size Will be set to the number of bytes consumed by |
| 195 | printable tags. |
| 196 | @param bin_buf_size Will be set to the number of bytes used to encode the |
| 197 | binary tags. |
| 198 | @return A pointer to the binary tag's encoded, or NULL if the buffer was |
| 199 | insufficiently large to hold the encoded tags. Thus, if successful, |
| 200 | printable tags are encoded into |
| 201 | [buffer, buffer + *print_buf_size) and binary tags into |
| 202 | [returned-ptr, returned-ptr + *bin_buf_size) (and the returned |
| 203 | pointer should be buffer + *print_buf_size) */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 204 | CENSUS_API char *census_context_encode(const census_context *context, |
| 205 | char *buffer, size_t buf_size, |
| 206 | size_t *print_buf_size, |
| 207 | size_t *bin_buf_size); |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 208 | |
| 209 | /* Decode context buffers encoded with census_context_encode(). Returns NULL |
| 210 | if there is an error in parsing either buffer. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 211 | CENSUS_API census_context *census_context_decode(const char *buffer, |
| 212 | size_t size, |
| 213 | const char *bin_buffer, |
| 214 | size_t bin_size); |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 215 | |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 216 | /* Distributed traces can have a number of options. */ |
| 217 | enum census_trace_mask_values { |
Alistair Veitch | 0383d49 | 2015-07-26 15:29:00 -0700 | [diff] [blame] | 218 | CENSUS_TRACE_MASK_NONE = 0, /* Default, empty flags */ |
| 219 | CENSUS_TRACE_MASK_IS_SAMPLED = 1 /* RPC tracing enabled for this context. */ |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 220 | }; |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 221 | |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 222 | /** Get the current trace mask associated with this context. The value returned |
| 223 | will be the logical or of census_trace_mask_values values. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 224 | CENSUS_API int census_trace_mask(const census_context *context); |
Alistair Veitch | 491a9d4 | 2015-07-21 10:08:37 -0700 | [diff] [blame] | 225 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 226 | /** Set the trace mask associated with a context. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 227 | CENSUS_API void census_set_trace_mask(int trace_mask); |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 228 | |
| 229 | /* The concept of "operation" is a fundamental concept for Census. In an RPC |
| 230 | system, and operation typcially represents a single RPC, or a significant |
| 231 | sub-part thereof (e.g. a single logical "read" RPC to a distributed storage |
| 232 | system might do several other actions in parallel, from looking up metadata |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 233 | indices to making requests of other services - each of these could be a |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 234 | sub-operation with the larger RPC operation). Census uses operations for the |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 235 | following: |
Alistair Veitch | 491a9d4 | 2015-07-21 10:08:37 -0700 | [diff] [blame] | 236 | |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 237 | CPU accounting: If enabled, census will measure the thread CPU time |
| 238 | consumed between operation start and end times. |
| 239 | |
| 240 | Active operations: Census will maintain information on all currently |
| 241 | active operations. |
| 242 | |
| 243 | Distributed tracing: Each operation serves as a logical trace span. |
| 244 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 245 | Stats collection: Stats are broken down by operation (e.g. latency |
| 246 | breakdown for each unique RPC path). |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 247 | |
| 248 | The following functions serve to delineate the start and stop points for |
| 249 | each logical operation. */ |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 250 | |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 251 | /** |
Alistair Veitch | f886985 | 2015-08-25 15:24:49 -0700 | [diff] [blame] | 252 | This structure represents a timestamp as used by census to record the time |
| 253 | at which an operation begins. |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 254 | */ |
Alistair Veitch | f886985 | 2015-08-25 15:24:49 -0700 | [diff] [blame] | 255 | typedef struct { |
| 256 | /* Use gpr_timespec for default implementation. High performance |
| 257 | * implementations should use a cycle-counter based timestamp. */ |
| 258 | gpr_timespec ts; |
| 259 | } census_timestamp; |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 260 | |
| 261 | /** |
| 262 | Mark the beginning of an RPC operation. The information required to call the |
| 263 | functions to record the start of RPC operations (both client and server) may |
| 264 | not be callable at the true start time of the operation, due to information |
| 265 | not being available (e.g. the census context data will not be available in a |
| 266 | server RPC until at least initial metadata has been processed). To ensure |
| 267 | correct CPU accounting and latency recording, RPC systems can call this |
| 268 | function to get the timestamp of operation beginning. This can later be used |
| 269 | as an argument to census_start_{client,server}_rpc_op(). NB: for correct |
| 270 | CPU accounting, the system must guarantee that the same thread is used |
| 271 | for all request processing after this function is called. |
| 272 | |
| 273 | @return A timestamp representing the operation start time. |
| 274 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 275 | CENSUS_API census_timestamp census_start_rpc_op_timestamp(void); |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 276 | |
| 277 | /** |
| 278 | Represent functions to map RPC name ID to service/method names. Census |
| 279 | breaks down all RPC stats by service and method names. We leave the |
| 280 | definition and format of these to the RPC system. For efficiency purposes, |
| 281 | we encode these as a single 64 bit identifier, and allow the RPC system to |
| 282 | provide a structure for functions that can convert these to service and |
| 283 | method strings. |
| 284 | |
| 285 | TODO(aveitch): Instead of providing this as an argument to the rpc_start_op() |
| 286 | functions, maybe it should be set once at census initialization. |
| 287 | */ |
| 288 | typedef struct { |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 289 | const char *(*get_rpc_service_name)(int64_t id); |
| 290 | const char *(*get_rpc_method_name)(int64_t id); |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 291 | } census_rpc_name_info; |
| 292 | |
| 293 | /** |
| 294 | Start a client rpc operation. This function should be called as early in the |
| 295 | client RPC path as possible. This function will create a new context. If |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 296 | the context argument is non-null, then the new context will inherit all |
| 297 | its properties, with the following changes: |
| 298 | - create a new operation ID for the new context, marking it as a child of |
| 299 | the previous operation. |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 300 | - use the new RPC path and peer information for tracing and stats |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 301 | collection purposes, rather than those from the original context |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 302 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 303 | If the context argument is NULL, then a new root context is created. This |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 304 | is particularly important for tracing purposes (the trace spans generated |
| 305 | will be unassociated with any other trace spans, except those |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 306 | downstream). The trace_mask will be used for tracing operations associated |
| 307 | with the new context. |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 308 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 309 | In some RPC systems (e.g. where load balancing is used), peer information |
| 310 | may not be available at the time the operation starts. In this case, use a |
| 311 | NULL value for peer, and set it later using the |
| 312 | census_set_rpc_client_peer() function. |
| 313 | |
| 314 | @param context The parent context. Can be NULL. |
| 315 | @param rpc_name_id The rpc name identifier to be associated with this RPC. |
| 316 | @param rpc_name_info Used to decode rpc_name_id. |
| 317 | @param peer RPC peer. If not available at the time, NULL can be used, |
| 318 | and a later census_set_rpc_client_peer() call made. |
| 319 | @param trace_mask An OR of census_trace_mask_values values. Only used in |
| 320 | the creation of a new root context (context == NULL). |
| 321 | @param start_time A timestamp returned from census_start_rpc_op_timestamp(). |
| 322 | Can be NULL. Used to set the true time the operation |
| 323 | begins. |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 324 | |
| 325 | @return A new census context. |
| 326 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 327 | CENSUS_API census_context *census_start_client_rpc_op( |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 328 | const census_context *context, int64_t rpc_name_id, |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 329 | const census_rpc_name_info *rpc_name_info, const char *peer, int trace_mask, |
| 330 | const census_timestamp *start_time); |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 331 | |
| 332 | /** |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 333 | Add peer information to a context representing a client RPC operation. |
| 334 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 335 | CENSUS_API void census_set_rpc_client_peer(census_context *context, |
| 336 | const char *peer); |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 337 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 338 | /** |
| 339 | Start a server RPC operation. Returns a new context to be used in future |
| 340 | census calls. If buffer is non-NULL, then the buffer contents should |
| 341 | represent the client context, as generated by census_context_serialize(). |
| 342 | If buffer is NULL, a new root context is created. |
| 343 | |
| 344 | @param buffer Buffer containing bytes output from census_context_serialize(). |
| 345 | @param rpc_name_id The rpc name identifier to be associated with this RPC. |
| 346 | @param rpc_name_info Used to decode rpc_name_id. |
| 347 | @param peer RPC peer. |
| 348 | @param trace_mask An OR of census_trace_mask_values values. Only used in |
| 349 | the creation of a new root context (buffer == NULL). |
| 350 | @param start_time A timestamp returned from census_start_rpc_op_timestamp(). |
| 351 | Can be NULL. Used to set the true time the operation |
| 352 | begins. |
| 353 | |
| 354 | @return A new census context. |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 355 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 356 | CENSUS_API census_context *census_start_server_rpc_op( |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 357 | const char *buffer, int64_t rpc_name_id, |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 358 | const census_rpc_name_info *rpc_name_info, const char *peer, int trace_mask, |
| 359 | census_timestamp *start_time); |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 360 | |
| 361 | /** |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 362 | Start a new, non-RPC operation. In general, this function works very |
| 363 | similarly to census_start_client_rpc_op, with the primary difference being |
| 364 | the replacement of host/path information with the more generic family/name |
| 365 | tags. If the context argument is non-null, then the new context will |
| 366 | inherit all its properties, with the following changes: |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 367 | - create a new operation ID for the new context, marking it as a child of |
| 368 | the previous operation. |
| 369 | - use the family and name information for tracing and stats collection |
| 370 | purposes, rather than those from the original context |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 371 | |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 372 | If the context argument is NULL, then a new root context is created. This |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 373 | is particularly important for tracing purposes (the trace spans generated |
| 374 | will be unassociated with any other trace spans, except those |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 375 | downstream). The trace_mask will be used for tracing |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 376 | operations associated with the new context. |
| 377 | |
| 378 | @param context The base context. Can be NULL. |
| 379 | @param family Family name to associate with the trace |
| 380 | @param name Name within family to associated with traces/stats |
Alistair Veitch | e6d0ad3 | 2015-08-13 09:59:48 -0700 | [diff] [blame] | 381 | @param trace_mask An OR of census_trace_mask_values values. Only used if |
| 382 | context is NULL. |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 383 | |
| 384 | @return A new census context. |
| 385 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 386 | CENSUS_API census_context *census_start_op(census_context *context, |
| 387 | const char *family, const char *name, |
| 388 | int trace_mask); |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 389 | |
Alistair Veitch | 0879df2 | 2015-08-13 14:32:05 -0700 | [diff] [blame] | 390 | /** |
| 391 | End an operation started by any of the census_start_*_op*() calls. The |
| 392 | context used in this call will no longer be valid once this function |
| 393 | completes. |
| 394 | |
| 395 | @param context Context associated with operation which is ending. |
| 396 | @param status status associated with the operation. Not interpreted by |
| 397 | census. |
| 398 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 399 | CENSUS_API void census_end_op(census_context *context, int status); |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 400 | |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 401 | #define CENSUS_TRACE_RECORD_START_OP ((uint32_t)0) |
| 402 | #define CENSUS_TRACE_RECORD_END_OP ((uint32_t)1) |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 403 | |
Alistair Veitch | af5002f | 2015-07-26 15:11:50 -0700 | [diff] [blame] | 404 | /** Insert a trace record into the trace stream. The record consists of an |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 405 | arbitrary size buffer, the size of which is provided in 'n'. |
| 406 | @param context Trace context |
| 407 | @param type User-defined type to associate with trace entry. |
| 408 | @param buffer Pointer to buffer to use |
| 409 | @param n Number of bytes in buffer |
| 410 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 411 | CENSUS_API void census_trace_print(census_context *context, uint32_t type, |
| 412 | const char *buffer, size_t n); |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 413 | |
| 414 | /** Trace record. */ |
| 415 | typedef struct { |
| 416 | census_timestamp timestamp; /* Time of record creation */ |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 417 | uint64_t trace_id; /* Trace ID associated with record */ |
| 418 | uint64_t op_id; /* Operation ID associated with record */ |
| 419 | uint32_t type; /* Type (as used in census_trace_print() */ |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 420 | const char *buffer; /* Buffer (from census_trace_print() */ |
| 421 | size_t buf_size; /* Number of bytes inside buffer */ |
| 422 | } census_trace_record; |
| 423 | |
| 424 | /** Start a scan of existing trace records. While a scan is ongoing, addition |
| 425 | of new trace records will be blocked if the underlying trace buffers |
| 426 | fill up, so trace processing systems should endeavor to complete |
| 427 | reading as soon as possible. |
| 428 | @param consume if non-zero, indicates that reading records also "consumes" |
| 429 | the previously read record - i.e. releases space in the trace log |
| 430 | while scanning is ongoing. |
| 431 | @returns 0 on success, non-zero on failure (e.g. if a scan is already ongoing) |
| 432 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 433 | CENSUS_API int census_trace_scan_start(int consume); |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 434 | |
| 435 | /** Get a trace record. The data pointed to by the trace buffer is guaranteed |
| 436 | stable until the next census_get_trace_record() call (if the consume |
| 437 | argument to census_trace_scan_start was non-zero) or census_trace_scan_end() |
| 438 | is called (otherwise). |
| 439 | @param trace_record structure that will be filled in with oldest trace record. |
| 440 | @returns -1 if an error occurred (e.g. no previous call to |
| 441 | census_trace_scan_start()), 0 if there is no more trace data (and |
| 442 | trace_record will not be modified) or 1 otherwise. |
| 443 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 444 | CENSUS_API int census_get_trace_record(census_trace_record *trace_record); |
Alistair Veitch | 6afe53f | 2015-08-28 14:05:15 -0700 | [diff] [blame] | 445 | |
| 446 | /** End a scan previously started by census_trace_scan_start() */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 447 | CENSUS_API void census_trace_scan_end(); |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 448 | |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 449 | /* Core stats collection API's. The following concepts are used: |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 450 | * Aggregation: A collection of values. Census supports the following |
| 451 | aggregation types: |
Alistair Veitch | aafe972 | 2015-08-31 13:15:00 -0700 | [diff] [blame] | 452 | Sum - a single summation type. Typically used for keeping (e.g.) |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 453 | counts of events. |
| 454 | Distribution - statistical distribution information, used for |
| 455 | recording average, standard deviation etc. |
| 456 | Histogram - a histogram of measurements falling in defined bucket |
| 457 | boundaries. |
| 458 | Window - a count of events that happen in reolling time window. |
| 459 | New aggregation types can be added by the user, if desired (see |
| 460 | census_register_aggregation()). |
| 461 | * Metric: Each measurement is for a single metric. Examples include RPC |
| 462 | latency, CPU seconds consumed, and bytes transmitted. |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 463 | * View: A view is a combination of a metric, a tag set (in which the tag |
| 464 | values are regular expressions) and a set of aggregations. When a |
| 465 | measurement for a metric matches the view tags, it is recorded (for each |
| 466 | unique set of tags) against each aggregation. Each metric can have an |
| 467 | arbitrary number of views by which it will be broken down. |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 468 | */ |
| 469 | |
| 470 | /* A single value to be recorded comprises two parts: an ID for the particular |
| 471 | * metric and the value to be recorded against it. */ |
Alistair Veitch | 851032a | 2015-07-20 11:59:13 -0700 | [diff] [blame] | 472 | typedef struct { |
Craig Tiller | 7536af0 | 2015-12-22 13:49:30 -0800 | [diff] [blame] | 473 | uint32_t metric_id; |
Alistair Veitch | 851032a | 2015-07-20 11:59:13 -0700 | [diff] [blame] | 474 | double value; |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 475 | } census_value; |
Alistair Veitch | 851032a | 2015-07-20 11:59:13 -0700 | [diff] [blame] | 476 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 477 | /* Record new usage values against the given context. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 478 | CENSUS_API void census_record_values(census_context *context, |
| 479 | census_value *values, size_t nvalues); |
Alistair Veitch | 4d1589a | 2015-07-17 15:13:04 -0700 | [diff] [blame] | 480 | |
Alistair Veitch | 1c09acc | 2015-08-31 16:57:32 -0700 | [diff] [blame] | 481 | /** Type representing a particular aggregation */ |
| 482 | typedef struct census_aggregation_ops census_aggregation_ops; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 483 | |
Alistair Veitch | 1c09acc | 2015-08-31 16:57:32 -0700 | [diff] [blame] | 484 | /* Predefined aggregation types, for use with census_view_create(). */ |
Alistair Veitch | aafe972 | 2015-08-31 13:15:00 -0700 | [diff] [blame] | 485 | extern census_aggregation_ops census_agg_sum; |
Alistair Veitch | a24148e | 2015-08-31 08:30:32 -0700 | [diff] [blame] | 486 | extern census_aggregation_ops census_agg_distribution; |
| 487 | extern census_aggregation_ops census_agg_histogram; |
| 488 | extern census_aggregation_ops census_agg_window; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 489 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 490 | /** Information needed to instantiate a new aggregation. Used in view |
| 491 | construction via census_define_view(). */ |
| 492 | typedef struct { |
Alistair Veitch | a24148e | 2015-08-31 08:30:32 -0700 | [diff] [blame] | 493 | const census_aggregation_ops *ops; |
Alistair Veitch | a628ac9 | 2016-02-02 10:02:30 -0800 | [diff] [blame] | 494 | const void *create_arg; /* Aaggregation initialization argument. */ |
Alistair Veitch | a24148e | 2015-08-31 08:30:32 -0700 | [diff] [blame] | 495 | } census_aggregation; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 496 | |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 497 | /** A census view type. Opaque. */ |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 498 | typedef struct census_view census_view; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 499 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 500 | /** Create a new view. |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 501 | @param metric_id Metric with which this view is associated. |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 502 | @param tags tags that define the view. |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 503 | @param aggregations aggregations to associate with the view |
| 504 | @param naggregations number of aggregations |
| 505 | |
| 506 | @return A new census view |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 507 | */ |
Alistair Veitch | ddb163a | 2016-02-02 13:33:44 -0800 | [diff] [blame^] | 508 | |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 509 | /* TODO(aveitch): consider if context is the right argument type to pass in |
| 510 | tags. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 511 | CENSUS_API census_view *census_view_create( |
| 512 | uint32_t metric_id, const census_context *tags, |
| 513 | const census_aggregation *aggregations, size_t naggregations); |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 514 | |
| 515 | /** Destroy a previously created view. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 516 | CENSUS_API void census_view_delete(census_view *view); |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 517 | |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 518 | /** Metric ID associated with a view */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 519 | CENSUS_API size_t census_view_metric(const census_view *view); |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 520 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 521 | /** Number of aggregations associated with view. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 522 | CENSUS_API size_t census_view_naggregations(const census_view *view); |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 523 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 524 | /** Get tags associated with view. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 525 | CENSUS_API const census_context *census_view_tags(const census_view *view); |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 526 | |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 527 | /** Get aggregation descriptors associated with a view. */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 528 | CENSUS_API const census_aggregation *census_view_aggregrations( |
| 529 | const census_view *view); |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 530 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 531 | /** Holds all the aggregation data for a particular view instantiation. Forms |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 532 | part of the data returned by census_view_data(). */ |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 533 | typedef struct { |
Alistair Veitch | 75d5c0f | 2016-02-02 09:43:02 -0800 | [diff] [blame] | 534 | const census_context *tags; /* Tags for this set of aggregations. */ |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 535 | const void **data; /* One data set for every aggregation in the view. */ |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 536 | } census_view_aggregation_data; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 537 | |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 538 | /** Census view data as returned by census_view_get_data(). */ |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 539 | typedef struct { |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 540 | size_t n_tag_sets; /* Number of unique tag sets that matched view. */ |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 541 | const census_view_aggregation_data *data; /* n_tag_sets entries */ |
| 542 | } census_view_data; |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 543 | |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 544 | /** Get data from aggregations associated with a view. |
Alistair Veitch | e62f68c | 2015-08-27 16:24:27 -0700 | [diff] [blame] | 545 | @param view View from which to get data. |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 546 | @return Full set of data for all aggregations for the view. |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 547 | */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 548 | CENSUS_API const census_view_data *census_view_get_data( |
| 549 | const census_view *view); |
Alistair Veitch | 9a09982 | 2015-08-27 13:16:00 -0700 | [diff] [blame] | 550 | |
Alistair Veitch | b855202 | 2015-08-28 10:54:17 -0700 | [diff] [blame] | 551 | /** Reset all view data to zero for the specified view */ |
Alistair Veitch | 2e6c182 | 2016-02-02 09:51:56 -0800 | [diff] [blame] | 552 | CENSUS_API void census_view_reset(census_view *view); |
Alistair Veitch | ade0021 | 2015-08-25 15:00:26 -0700 | [diff] [blame] | 553 | |
Nicolas "Pixel" Noble | 1ed15e2 | 2015-06-09 02:24:35 +0200 | [diff] [blame] | 554 | #ifdef __cplusplus |
| 555 | } |
| 556 | #endif |
| 557 | |
Alistair Veitch | 9686dab | 2015-05-26 14:26:47 -0700 | [diff] [blame] | 558 | #endif /* CENSUS_CENSUS_H */ |