Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 1 | /* |
| 2 | * tmem.h |
| 3 | * |
| 4 | * Transcendent memory |
| 5 | * |
| 6 | * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp. |
| 7 | */ |
| 8 | |
| 9 | #ifndef _TMEM_H_ |
| 10 | #define _TMEM_H_ |
| 11 | |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 12 | #include <linux/highmem.h> |
| 13 | #include <linux/hash.h> |
| 14 | #include <linux/atomic.h> |
| 15 | |
| 16 | /* |
| 17 | * These are pre-defined by the Xen<->Linux ABI |
| 18 | */ |
| 19 | #define TMEM_PUT_PAGE 4 |
| 20 | #define TMEM_GET_PAGE 5 |
| 21 | #define TMEM_FLUSH_PAGE 6 |
| 22 | #define TMEM_FLUSH_OBJECT 7 |
| 23 | #define TMEM_POOL_PERSIST 1 |
| 24 | #define TMEM_POOL_SHARED 2 |
| 25 | #define TMEM_POOL_PRECOMPRESSED 4 |
| 26 | #define TMEM_POOL_PAGESIZE_SHIFT 4 |
| 27 | #define TMEM_POOL_PAGESIZE_MASK 0xf |
| 28 | #define TMEM_POOL_RESERVED_BITS 0x00ffff00 |
| 29 | |
| 30 | /* |
| 31 | * sentinels have proven very useful for debugging but can be removed |
| 32 | * or disabled before final merge. |
| 33 | */ |
| 34 | #define SENTINELS |
| 35 | #ifdef SENTINELS |
| 36 | #define DECL_SENTINEL uint32_t sentinel; |
| 37 | #define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL) |
| 38 | #define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL) |
| 39 | #define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL) |
| 40 | #define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL) |
| 41 | #else |
| 42 | #define DECL_SENTINEL |
| 43 | #define SET_SENTINEL(_x, _y) do { } while (0) |
| 44 | #define INVERT_SENTINEL(_x, _y) do { } while (0) |
| 45 | #define ASSERT_SENTINEL(_x, _y) do { } while (0) |
| 46 | #define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0) |
| 47 | #endif |
| 48 | |
| 49 | #define ASSERT_SPINLOCK(_l) WARN_ON(!spin_is_locked(_l)) |
| 50 | |
| 51 | /* |
| 52 | * A pool is the highest-level data structure managed by tmem and |
| 53 | * usually corresponds to a large independent set of pages such as |
| 54 | * a filesystem. Each pool has an id, and certain attributes and counters. |
| 55 | * It also contains a set of hash buckets, each of which contains an rbtree |
| 56 | * of objects and a lock to manage concurrency within the pool. |
| 57 | */ |
| 58 | |
| 59 | #define TMEM_HASH_BUCKET_BITS 8 |
| 60 | #define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS) |
| 61 | |
| 62 | struct tmem_hashbucket { |
| 63 | struct rb_root obj_rb_root; |
| 64 | spinlock_t lock; |
| 65 | }; |
| 66 | |
| 67 | struct tmem_pool { |
| 68 | void *client; /* "up" for some clients, avoids table lookup */ |
| 69 | struct list_head pool_list; |
| 70 | uint32_t pool_id; |
| 71 | bool persistent; |
| 72 | bool shared; |
| 73 | atomic_t obj_count; |
| 74 | atomic_t refcount; |
| 75 | struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS]; |
| 76 | DECL_SENTINEL |
| 77 | }; |
| 78 | |
| 79 | #define is_persistent(_p) (_p->persistent) |
| 80 | #define is_ephemeral(_p) (!(_p->persistent)) |
| 81 | |
| 82 | /* |
| 83 | * An object id ("oid") is large: 192-bits (to ensure, for example, files |
| 84 | * in a modern filesystem can be uniquely identified). |
| 85 | */ |
| 86 | |
| 87 | struct tmem_oid { |
| 88 | uint64_t oid[3]; |
| 89 | }; |
| 90 | |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 91 | struct tmem_xhandle { |
| 92 | uint8_t client_id; |
| 93 | uint8_t xh_data_cksum; |
| 94 | uint16_t xh_data_size; |
| 95 | uint16_t pool_id; |
| 96 | struct tmem_oid oid; |
| 97 | uint32_t index; |
| 98 | void *extra; |
| 99 | }; |
| 100 | |
| 101 | static inline struct tmem_xhandle tmem_xhandle_fill(uint16_t client_id, |
| 102 | struct tmem_pool *pool, |
| 103 | struct tmem_oid *oidp, |
| 104 | uint32_t index) |
| 105 | { |
| 106 | struct tmem_xhandle xh; |
| 107 | xh.client_id = client_id; |
| 108 | xh.xh_data_cksum = (uint8_t)-1; |
| 109 | xh.xh_data_size = (uint16_t)-1; |
| 110 | xh.pool_id = pool->pool_id; |
| 111 | xh.oid = *oidp; |
| 112 | xh.index = index; |
| 113 | return xh; |
| 114 | } |
| 115 | |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 116 | static inline void tmem_oid_set_invalid(struct tmem_oid *oidp) |
| 117 | { |
| 118 | oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; |
| 119 | } |
| 120 | |
| 121 | static inline bool tmem_oid_valid(struct tmem_oid *oidp) |
| 122 | { |
| 123 | return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL || |
| 124 | oidp->oid[2] != -1UL; |
| 125 | } |
| 126 | |
| 127 | static inline int tmem_oid_compare(struct tmem_oid *left, |
| 128 | struct tmem_oid *right) |
| 129 | { |
| 130 | int ret; |
| 131 | |
| 132 | if (left->oid[2] == right->oid[2]) { |
| 133 | if (left->oid[1] == right->oid[1]) { |
| 134 | if (left->oid[0] == right->oid[0]) |
| 135 | ret = 0; |
| 136 | else if (left->oid[0] < right->oid[0]) |
| 137 | ret = -1; |
| 138 | else |
| 139 | return 1; |
| 140 | } else if (left->oid[1] < right->oid[1]) |
| 141 | ret = -1; |
| 142 | else |
| 143 | ret = 1; |
| 144 | } else if (left->oid[2] < right->oid[2]) |
| 145 | ret = -1; |
| 146 | else |
| 147 | ret = 1; |
| 148 | return ret; |
| 149 | } |
| 150 | |
| 151 | static inline unsigned tmem_oid_hash(struct tmem_oid *oidp) |
| 152 | { |
| 153 | return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], |
| 154 | TMEM_HASH_BUCKET_BITS); |
| 155 | } |
| 156 | |
| 157 | /* |
| 158 | * A tmem_obj contains an identifier (oid), pointers to the parent |
| 159 | * pool and the rb_tree to which it belongs, counters, and an ordered |
| 160 | * set of pampds, structured in a radix-tree-like tree. The intermediate |
| 161 | * nodes of the tree are called tmem_objnodes. |
| 162 | */ |
| 163 | |
| 164 | struct tmem_objnode; |
| 165 | |
| 166 | struct tmem_obj { |
| 167 | struct tmem_oid oid; |
| 168 | struct tmem_pool *pool; |
| 169 | struct rb_node rb_tree_node; |
| 170 | struct tmem_objnode *objnode_tree_root; |
| 171 | unsigned int objnode_tree_height; |
| 172 | unsigned long objnode_count; |
| 173 | long pampd_count; |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 174 | /* for current design of ramster, all pages belonging to |
| 175 | * an object reside on the same remotenode and extra is |
| 176 | * used to record the number of the remotenode so a |
| 177 | * flush-object operation can specify it */ |
| 178 | void *extra; /* for use by pampd implementation */ |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 179 | DECL_SENTINEL |
| 180 | }; |
| 181 | |
| 182 | #define OBJNODE_TREE_MAP_SHIFT 6 |
| 183 | #define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT) |
| 184 | #define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1) |
| 185 | #define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) |
| 186 | #define OBJNODE_TREE_MAX_PATH \ |
| 187 | (OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2) |
| 188 | |
| 189 | struct tmem_objnode { |
| 190 | struct tmem_obj *obj; |
| 191 | DECL_SENTINEL |
| 192 | void *slots[OBJNODE_TREE_MAP_SIZE]; |
| 193 | unsigned int slots_in_use; |
| 194 | }; |
| 195 | |
| 196 | /* pampd abstract datatype methods provided by the PAM implementation */ |
| 197 | struct tmem_pamops { |
| 198 | void *(*create)(char *, size_t, bool, int, |
| 199 | struct tmem_pool *, struct tmem_oid *, uint32_t); |
| 200 | int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *, |
| 201 | struct tmem_oid *, uint32_t); |
| 202 | int (*get_data_and_free)(char *, size_t *, bool, void *, |
| 203 | struct tmem_pool *, struct tmem_oid *, |
| 204 | uint32_t); |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 205 | void (*free)(void *, struct tmem_pool *, |
| 206 | struct tmem_oid *, uint32_t, bool); |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 207 | void (*free_obj)(struct tmem_pool *, struct tmem_obj *); |
| 208 | bool (*is_remote)(void *); |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 209 | void *(*repatriate_preload)(void *, struct tmem_pool *, |
| 210 | struct tmem_oid *, uint32_t, bool *); |
| 211 | int (*repatriate)(void *, void *, struct tmem_pool *, |
| 212 | struct tmem_oid *, uint32_t, bool, void *); |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 213 | void (*new_obj)(struct tmem_obj *); |
| 214 | int (*replace_in_obj)(void *, struct tmem_obj *); |
| 215 | }; |
| 216 | extern void tmem_register_pamops(struct tmem_pamops *m); |
| 217 | |
| 218 | /* memory allocation methods provided by the host implementation */ |
| 219 | struct tmem_hostops { |
| 220 | struct tmem_obj *(*obj_alloc)(struct tmem_pool *); |
| 221 | void (*obj_free)(struct tmem_obj *, struct tmem_pool *); |
| 222 | struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *); |
| 223 | void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *); |
| 224 | }; |
| 225 | extern void tmem_register_hostops(struct tmem_hostops *m); |
| 226 | |
| 227 | /* core tmem accessor functions */ |
| 228 | extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index, |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 229 | char *, size_t, bool, int); |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 230 | extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index, |
| 231 | char *, size_t *, bool, int); |
| 232 | extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index, |
| 233 | void *); |
Dan Magenheimer | c89126e | 2012-02-15 07:54:18 -0800 | [diff] [blame] | 234 | extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *, |
| 235 | uint32_t index, struct tmem_obj **, |
| 236 | void **); |
| 237 | extern void tmem_localify_finish(struct tmem_obj *, uint32_t index, |
| 238 | void *, void *, bool); |
Dan Magenheimer | 19ee3ef | 2012-02-15 07:54:16 -0800 | [diff] [blame] | 239 | extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *, |
| 240 | uint32_t index); |
| 241 | extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *); |
| 242 | extern int tmem_destroy_pool(struct tmem_pool *); |
| 243 | extern void tmem_new_pool(struct tmem_pool *, uint32_t); |
| 244 | #endif /* _TMEM_H */ |