Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 1 | /* |
| 2 | * Block Translation Table library |
| 3 | * Copyright (c) 2014-2015, Intel Corporation. |
| 4 | * |
| 5 | * This program is free software; you can redistribute it and/or modify it |
| 6 | * under the terms and conditions of the GNU General Public License, |
| 7 | * version 2, as published by the Free Software Foundation. |
| 8 | * |
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 12 | * more details. |
| 13 | */ |
| 14 | |
| 15 | #ifndef _LINUX_BTT_H |
| 16 | #define _LINUX_BTT_H |
| 17 | |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 18 | #include <linux/badblocks.h> |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 19 | #include <linux/types.h> |
| 20 | |
| 21 | #define BTT_SIG_LEN 16 |
| 22 | #define BTT_SIG "BTT_ARENA_INFO\0" |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 23 | #define MAP_ENT_SIZE 4 |
| 24 | #define MAP_TRIM_SHIFT 31 |
| 25 | #define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) |
| 26 | #define MAP_ERR_SHIFT 30 |
| 27 | #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) |
| 28 | #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) |
| 29 | #define MAP_ENT_NORMAL 0xC0000000 |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 30 | #define LOG_GRP_SIZE sizeof(struct log_group) |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 31 | #define LOG_ENT_SIZE sizeof(struct log_entry) |
| 32 | #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ |
| 33 | #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ |
| 34 | #define RTT_VALID (1UL << 31) |
| 35 | #define RTT_INVALID 0 |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 36 | #define BTT_PG_SIZE 4096 |
| 37 | #define BTT_DEFAULT_NFREE ND_MAX_LANES |
| 38 | #define LOG_SEQ_INIT 1 |
| 39 | |
| 40 | #define IB_FLAG_ERROR 0x00000001 |
| 41 | #define IB_FLAG_ERROR_MASK 0x00000001 |
| 42 | |
Vishal Verma | 0595d53 | 2017-08-30 19:35:59 -0600 | [diff] [blame] | 43 | #define ent_lba(ent) (ent & MAP_LBA_MASK) |
| 44 | #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) |
| 45 | #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 46 | #define set_e_flag(ent) (ent |= MAP_ERR_MASK) |
Vishal Verma | 4e160b9 | 2019-02-27 17:06:27 -0700 | [diff] [blame] | 47 | /* 'normal' is both e and z flags set */ |
| 48 | #define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent)) |
Vishal Verma | 0595d53 | 2017-08-30 19:35:59 -0600 | [diff] [blame] | 49 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 50 | enum btt_init_state { |
| 51 | INIT_UNCHECKED = 0, |
| 52 | INIT_NOTFOUND, |
| 53 | INIT_READY |
| 54 | }; |
| 55 | |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 56 | /* |
| 57 | * A log group represents one log 'lane', and consists of four log entries. |
| 58 | * Two of the four entries are valid entries, and the remaining two are |
| 59 | * padding. Due to an old bug in the padding location, we need to perform a |
| 60 | * test to determine the padding scheme being used, and use that scheme |
| 61 | * thereafter. |
| 62 | * |
| 63 | * In kernels prior to 4.15, 'log group' would have actual log entries at |
| 64 | * indices (0, 2) and padding at indices (1, 3), where as the correct/updated |
| 65 | * format has log entries at indices (0, 1) and padding at indices (2, 3). |
| 66 | * |
| 67 | * Old (pre 4.15) format: |
| 68 | * +-----------------+-----------------+ |
| 69 | * | ent[0] | ent[1] | |
| 70 | * | 16B | 16B | |
| 71 | * | lba/old/new/seq | pad | |
| 72 | * +-----------------------------------+ |
| 73 | * | ent[2] | ent[3] | |
| 74 | * | 16B | 16B | |
| 75 | * | lba/old/new/seq | pad | |
| 76 | * +-----------------+-----------------+ |
| 77 | * |
| 78 | * New format: |
| 79 | * +-----------------+-----------------+ |
| 80 | * | ent[0] | ent[1] | |
| 81 | * | 16B | 16B | |
| 82 | * | lba/old/new/seq | lba/old/new/seq | |
| 83 | * +-----------------------------------+ |
| 84 | * | ent[2] | ent[3] | |
| 85 | * | 16B | 16B | |
| 86 | * | pad | pad | |
| 87 | * +-----------------+-----------------+ |
| 88 | * |
| 89 | * We detect during start-up which format is in use, and set |
| 90 | * arena->log_index[(0, 1)] with the detected format. |
| 91 | */ |
| 92 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 93 | struct log_entry { |
| 94 | __le32 lba; |
| 95 | __le32 old_map; |
| 96 | __le32 new_map; |
| 97 | __le32 seq; |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 98 | }; |
| 99 | |
| 100 | struct log_group { |
| 101 | struct log_entry ent[4]; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 102 | }; |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 103 | |
| 104 | struct btt_sb { |
| 105 | u8 signature[BTT_SIG_LEN]; |
| 106 | u8 uuid[16]; |
| 107 | u8 parent_uuid[16]; |
| 108 | __le32 flags; |
| 109 | __le16 version_major; |
| 110 | __le16 version_minor; |
| 111 | __le32 external_lbasize; |
| 112 | __le32 external_nlba; |
| 113 | __le32 internal_lbasize; |
| 114 | __le32 internal_nlba; |
| 115 | __le32 nfree; |
| 116 | __le32 infosize; |
| 117 | __le64 nextoff; |
| 118 | __le64 dataoff; |
| 119 | __le64 mapoff; |
| 120 | __le64 logoff; |
| 121 | __le64 info2off; |
| 122 | u8 padding[3968]; |
| 123 | __le64 checksum; |
| 124 | }; |
| 125 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 126 | struct free_entry { |
| 127 | u32 block; |
| 128 | u8 sub; |
| 129 | u8 seq; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 130 | u8 has_err; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 131 | }; |
| 132 | |
| 133 | struct aligned_lock { |
| 134 | union { |
| 135 | spinlock_t lock; |
| 136 | u8 cacheline_padding[L1_CACHE_BYTES]; |
| 137 | }; |
| 138 | }; |
| 139 | |
| 140 | /** |
| 141 | * struct arena_info - handle for an arena |
| 142 | * @size: Size in bytes this arena occupies on the raw device. |
| 143 | * This includes arena metadata. |
| 144 | * @external_lba_start: The first external LBA in this arena. |
| 145 | * @internal_nlba: Number of internal blocks available in the arena |
| 146 | * including nfree reserved blocks |
| 147 | * @internal_lbasize: Internal and external lba sizes may be different as |
| 148 | * we can round up 'odd' external lbasizes such as 520B |
| 149 | * to be aligned. |
| 150 | * @external_nlba: Number of blocks contributed by the arena to the number |
| 151 | * reported to upper layers. (internal_nlba - nfree) |
| 152 | * @external_lbasize: LBA size as exposed to upper layers. |
| 153 | * @nfree: A reserve number of 'free' blocks that is used to |
| 154 | * handle incoming writes. |
| 155 | * @version_major: Metadata layout version major. |
| 156 | * @version_minor: Metadata layout version minor. |
Vishal Verma | 7589200 | 2017-08-30 19:36:01 -0600 | [diff] [blame] | 157 | * @sector_size: The Linux sector size - 512 or 4096 |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 158 | * @nextoff: Offset in bytes to the start of the next arena. |
| 159 | * @infooff: Offset in bytes to the info block of this arena. |
| 160 | * @dataoff: Offset in bytes to the data area of this arena. |
| 161 | * @mapoff: Offset in bytes to the map area of this arena. |
| 162 | * @logoff: Offset in bytes to the log area of this arena. |
| 163 | * @info2off: Offset in bytes to the backup info block of this arena. |
| 164 | * @freelist: Pointer to in-memory list of free blocks |
| 165 | * @rtt: Pointer to in-memory "Read Tracking Table" |
| 166 | * @map_locks: Spinlocks protecting concurrent map writes |
| 167 | * @nd_btt: Pointer to parent nd_btt structure. |
| 168 | * @list: List head for list of arenas |
| 169 | * @debugfs_dir: Debugfs dentry |
| 170 | * @flags: Arena flags - may signify error states. |
Vishal Verma | 13b7954 | 2017-12-14 17:26:13 -0700 | [diff] [blame] | 171 | * @err_lock: Mutex for synchronizing error clearing. |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 172 | * @log_index: Indices of the valid log entries in a log_group |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 173 | * |
| 174 | * arena_info is a per-arena handle. Once an arena is narrowed down for an |
| 175 | * IO, this struct is passed around for the duration of the IO. |
| 176 | */ |
| 177 | struct arena_info { |
| 178 | u64 size; /* Total bytes for this arena */ |
| 179 | u64 external_lba_start; |
| 180 | u32 internal_nlba; |
| 181 | u32 internal_lbasize; |
| 182 | u32 external_nlba; |
| 183 | u32 external_lbasize; |
| 184 | u32 nfree; |
| 185 | u16 version_major; |
| 186 | u16 version_minor; |
Vishal Verma | 7589200 | 2017-08-30 19:36:01 -0600 | [diff] [blame] | 187 | u32 sector_size; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 188 | /* Byte offsets to the different on-media structures */ |
| 189 | u64 nextoff; |
| 190 | u64 infooff; |
| 191 | u64 dataoff; |
| 192 | u64 mapoff; |
| 193 | u64 logoff; |
| 194 | u64 info2off; |
| 195 | /* Pointers to other in-memory structures for this arena */ |
| 196 | struct free_entry *freelist; |
| 197 | u32 *rtt; |
| 198 | struct aligned_lock *map_locks; |
| 199 | struct nd_btt *nd_btt; |
| 200 | struct list_head list; |
| 201 | struct dentry *debugfs_dir; |
| 202 | /* Arena flags */ |
| 203 | u32 flags; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 204 | struct mutex err_lock; |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 205 | int log_index[2]; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 206 | }; |
| 207 | |
| 208 | /** |
| 209 | * struct btt - handle for a BTT instance |
| 210 | * @btt_disk: Pointer to the gendisk for BTT device |
| 211 | * @btt_queue: Pointer to the request queue for the BTT device |
| 212 | * @arena_list: Head of the list of arenas |
| 213 | * @debugfs_dir: Debugfs dentry |
| 214 | * @nd_btt: Parent nd_btt struct |
| 215 | * @nlba: Number of logical blocks exposed to the upper layers |
| 216 | * after removing the amount of space needed by metadata |
| 217 | * @rawsize: Total size in bytes of the available backing device |
| 218 | * @lbasize: LBA size as requested and presented to upper layers. |
| 219 | * This is sector_size + size of any metadata. |
| 220 | * @sector_size: The Linux sector size - 512 or 4096 |
| 221 | * @lanes: Per-lane spinlocks |
| 222 | * @init_lock: Mutex used for the BTT initialization |
| 223 | * @init_state: Flag describing the initialization state for the BTT |
| 224 | * @num_arenas: Number of arenas in the BTT instance |
Vishal Verma | 13b7954 | 2017-12-14 17:26:13 -0700 | [diff] [blame] | 225 | * @phys_bb: Pointer to the namespace's badblocks structure |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 226 | */ |
| 227 | struct btt { |
| 228 | struct gendisk *btt_disk; |
| 229 | struct request_queue *btt_queue; |
| 230 | struct list_head arena_list; |
| 231 | struct dentry *debugfs_dir; |
| 232 | struct nd_btt *nd_btt; |
| 233 | u64 nlba; |
| 234 | unsigned long long rawsize; |
| 235 | u32 lbasize; |
| 236 | u32 sector_size; |
| 237 | struct nd_region *nd_region; |
| 238 | struct mutex init_lock; |
| 239 | int init_state; |
| 240 | int num_arenas; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 241 | struct badblocks *phys_bb; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 242 | }; |
Vishal Verma | ab45e76 | 2015-07-29 14:58:08 -0600 | [diff] [blame] | 243 | |
| 244 | bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); |
Vishal Verma | 14e4945 | 2017-06-28 14:25:00 -0600 | [diff] [blame] | 245 | int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, |
| 246 | struct btt_sb *btt_sb); |
Vishal Verma | ab45e76 | 2015-07-29 14:58:08 -0600 | [diff] [blame] | 247 | |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 248 | #endif |