Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 1 | /** |
| 2 | * @file odb.h |
| 3 | * This file contains various definitions and interface for management |
| 4 | * of in-memory, through mmaped file, growable hash table, that stores |
| 5 | * sample files. |
| 6 | * |
| 7 | * @remark Copyright 2002 OProfile authors |
| 8 | * @remark Read the file COPYING |
| 9 | * |
| 10 | * @author Philippe Elie |
| 11 | */ |
| 12 | |
| 13 | #ifndef ODB_HASH_H |
| 14 | #define ODB_HASH_H |
| 15 | |
| 16 | #include <stddef.h> |
| 17 | #include <stdint.h> |
| 18 | |
| 19 | #include "op_list.h" |
| 20 | |
| 21 | /** the type of key. 64-bit because CG needs 32-bit pair {from,to} */ |
| 22 | typedef uint64_t odb_key_t; |
| 23 | /** the type of an information in the database */ |
| 24 | typedef unsigned int odb_value_t; |
| 25 | /** the type of index (node number), list are implemented through index */ |
| 26 | typedef unsigned int odb_index_t; |
| 27 | /** the type store node number */ |
| 28 | typedef odb_index_t odb_node_nr_t; |
| 29 | /** store the hash mask, hash table size are always power of two */ |
| 30 | typedef odb_index_t odb_hash_mask_t; |
| 31 | |
| 32 | /* there is (bucket factor * nr node) entry in hash table, this can seem |
| 33 | * excessive but hash coding eip don't give a good distributions and our |
| 34 | * goal is to get a O(1) amortized insert time. bucket factor must be a |
| 35 | * power of two. FIXME: see big comment in odb_hash_add_node, you must |
| 36 | * re-enable zeroing hash table if BUCKET_FACTOR > 2 (roughly exact, you |
| 37 | * want to read the comment in odb_hash_add_node() if you tune this define) |
| 38 | */ |
| 39 | #define BUCKET_FACTOR 1 |
| 40 | |
| 41 | /** a db hash node */ |
| 42 | typedef struct { |
| 43 | odb_key_t key; /**< eip */ |
| 44 | odb_value_t value; /**< samples count */ |
| 45 | odb_index_t next; /**< next entry for this bucket */ |
| 46 | } odb_node_t; |
| 47 | |
| 48 | /** the minimal information which must be stored in the file to reload |
| 49 | * properly the data base, following this header is the node array then |
| 50 | * the hash table (when growing we avoid to copy node array) |
| 51 | */ |
| 52 | typedef struct { |
| 53 | odb_node_nr_t size; /**< in node nr (power of two) */ |
| 54 | odb_node_nr_t current_size; /**< nr used node + 1, node 0 unused */ |
| 55 | int padding[6]; /**< for padding and future use */ |
| 56 | } odb_descr_t; |
| 57 | |
| 58 | /** a "database". this is an in memory only description. |
| 59 | * |
| 60 | * We allow to manage a database inside a mapped file with an "header" of |
| 61 | * unknown size so odb_open get a parameter to specify the size of this header. |
| 62 | * A typical use is: |
| 63 | * |
| 64 | * struct header { int etc; ... }; |
| 65 | * odb_open(&hash, filename, ODB_RW, sizeof(header)); |
| 66 | * so on this library have no dependency on the header type. |
| 67 | * |
| 68 | * the internal memory layout from base_memory is: |
| 69 | * the unknown header (sizeof_header) |
| 70 | * odb_descr_t |
| 71 | * the node array: (descr->size * sizeof(odb_node_t) entries |
| 72 | * the hash table: array of odb_index_t indexing the node array |
| 73 | * (descr->size * BUCKET_FACTOR) entries |
| 74 | */ |
| 75 | typedef struct odb_data { |
| 76 | odb_node_t * node_base; /**< base memory area of the page */ |
| 77 | odb_index_t * hash_base; /**< base memory of hash table */ |
| 78 | odb_descr_t * descr; /**< the current state of database */ |
| 79 | odb_hash_mask_t hash_mask; /**< == descr->size - 1 */ |
| 80 | unsigned int sizeof_header; /**< from base_memory to odb header */ |
| 81 | unsigned int offset_node; /**< from base_memory to node array */ |
| 82 | void * base_memory; /**< base memory of the maped memory */ |
| 83 | int fd; /**< mmaped memory file descriptor */ |
| 84 | char * filename; /**< full path name of sample file */ |
| 85 | int ref_count; /**< reference count */ |
| 86 | struct list_head list; /**< hash bucket list */ |
| 87 | } odb_data_t; |
| 88 | |
| 89 | typedef struct { |
| 90 | odb_data_t * data; |
| 91 | } odb_t; |
| 92 | |
| 93 | #ifdef __cplusplus |
| 94 | extern "C" { |
| 95 | #endif |
| 96 | |
| 97 | /* db_manage.c */ |
| 98 | |
| 99 | /** how to open the DB file */ |
| 100 | enum odb_rw { |
| 101 | ODB_RDONLY = 0, /**< open for read only */ |
| 102 | ODB_RDWR = 1 /**< open for read and/or write */ |
| 103 | }; |
| 104 | |
| 105 | /** |
| 106 | * odb_init - initialize a DB file |
| 107 | * @param odb the DB file to init |
| 108 | */ |
| 109 | void odb_init(odb_t * odb); |
| 110 | |
| 111 | /** |
| 112 | * odb_open - open a DB file |
| 113 | * @param odb the data base object to setup |
| 114 | * @param filename the filename where go the maped memory |
| 115 | * @param rw \enum ODB_RW if opening for writing, else \enum ODB_RDONLY |
| 116 | * @param sizeof_header size of the file header if any |
| 117 | * |
| 118 | * The sizeof_header parameter allows the data file to have a header |
| 119 | * at the start of the file which is skipped. |
| 120 | * odb_open() always preallocate a few number of pages. |
| 121 | * returns 0 on success, errno on failure |
| 122 | */ |
| 123 | int odb_open(odb_t * odb, char const * filename, |
| 124 | enum odb_rw rw, size_t sizeof_header); |
| 125 | |
| 126 | /** Close the given ODB file */ |
| 127 | void odb_close(odb_t * odb); |
| 128 | |
| 129 | /** return the number of times this sample file is open */ |
| 130 | int odb_open_count(odb_t const * odb); |
| 131 | |
| 132 | /** return the start of the mapped data */ |
| 133 | void * odb_get_data(odb_t * odb); |
| 134 | |
| 135 | /** issue a msync on the used size of the mmaped file */ |
| 136 | void odb_sync(odb_t const * odb); |
| 137 | |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 138 | /** |
| 139 | * grow the hashtable in such way current_size is the index of the first free |
| 140 | * node. Take care all node pointer can be invalidated by this call. |
| 141 | * |
| 142 | * Node allocation is done in a two step way 1st) ensure a free node exist |
| 143 | * eventually, caller can setup it, 2nd) commit the node allocation with |
| 144 | * odb_commit_reservation(). |
| 145 | * This is done in this way to ensure node setup is visible from another |
| 146 | * process like pp tools in an atomic way. |
| 147 | * |
| 148 | * returns 0 on success, non zero on failure in this case this function do |
| 149 | * nothing and errno is set by the first libc call failure allowing to retry |
| 150 | * after cleanup some program resource. |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 151 | */ |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 152 | int odb_grow_hashtable(odb_data_t * data); |
| 153 | /** |
| 154 | * commit a previously successfull node reservation. This can't fail. |
| 155 | */ |
| 156 | static __inline void odb_commit_reservation(odb_data_t * data) |
| 157 | { |
| 158 | ++data->descr->current_size; |
| 159 | } |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 160 | |
| 161 | /** "immpossible" node number to indicate an error from odb_hash_add_node() */ |
| 162 | #define ODB_NODE_NR_INVALID ((odb_node_nr_t)-1) |
| 163 | |
| 164 | /* db_debug.c */ |
| 165 | /** check that the hash is well built */ |
| 166 | int odb_check_hash(odb_t const * odb); |
| 167 | |
| 168 | /* db_stat.c */ |
| 169 | typedef struct odb_hash_stat_t odb_hash_stat_t; |
| 170 | odb_hash_stat_t * odb_hash_stat(odb_t const * odb); |
| 171 | void odb_hash_display_stat(odb_hash_stat_t const * stats); |
| 172 | void odb_hash_free_stat(odb_hash_stat_t * stats); |
| 173 | |
| 174 | /* db_insert.c */ |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 175 | /** update info at key by incrementing its associated value by one, |
| 176 | * if the key does not exist a new node is created and the value associated |
| 177 | * is set to one. |
| 178 | * |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 179 | * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure |
| 180 | */ |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 181 | int odb_update_node(odb_t * odb, odb_key_t key); |
| 182 | |
| 183 | /** Add a new node w/o regarding if a node with the same key already exists |
| 184 | * |
| 185 | * returns EXIT_SUCCESS on success, EXIT_FAILURE on failure |
| 186 | */ |
| 187 | int odb_add_node(odb_t * odb, odb_key_t key, odb_value_t value); |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 188 | |
| 189 | /* db_travel.c */ |
| 190 | /** |
| 191 | * return a base pointer to the node array and number of node in this array |
| 192 | * caller then will iterate through: |
| 193 | * |
| 194 | * odb_node_nr_t node_nr, pos; |
| 195 | * odb_node_t * node = odb_get_iterator(odb, &node_nr); |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 196 | * for ( pos = 0 ; pos < node_nr ; ++pos) |
| 197 | * // do something |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 198 | * |
Ben Cheng | 2b16b5f | 2008-10-23 16:07:43 -0700 | [diff] [blame^] | 199 | * note than caller does not need to filter nil key as it's a valid key, |
| 200 | * The returned range is all valid (i.e. should never contain zero value). |
Upstream | cc2ee17 | 1970-01-12 13:46:40 +0000 | [diff] [blame] | 201 | */ |
| 202 | odb_node_t * odb_get_iterator(odb_t const * odb, odb_node_nr_t * nr); |
| 203 | |
| 204 | static __inline unsigned int |
| 205 | odb_do_hash(odb_data_t const * data, odb_key_t value) |
| 206 | { |
| 207 | /* FIXME: better hash for eip value, needs to instrument code |
| 208 | * and do a lot of tests ... */ |
| 209 | /* trying to combine high order bits his a no-op: inside a binary image |
| 210 | * high order bits don't vary a lot, hash table start with 7 bits mask |
| 211 | * so this hash coding use bits 0-7, 8-15. Hash table is stored in |
| 212 | * files avoiding to rebuilding them at profiling re-start so |
| 213 | * on changing do_hash() change the file format! |
| 214 | */ |
| 215 | uint32_t temp = (value >> 32) ^ value; |
| 216 | return ((temp << 0) ^ (temp >> 8)) & data->hash_mask; |
| 217 | } |
| 218 | |
| 219 | #ifdef __cplusplus |
| 220 | } |
| 221 | #endif |
| 222 | |
| 223 | #endif /* !ODB_H */ |