Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 1 | /****************************************************************************** |
| 2 | * blkif.h |
| 3 | * |
| 4 | * Unified block-device I/O interface for Xen guest OSes. |
| 5 | * |
| 6 | * Copyright (c) 2003-2004, Keir Fraser |
| 7 | */ |
| 8 | |
| 9 | #ifndef __XEN_PUBLIC_IO_BLKIF_H__ |
| 10 | #define __XEN_PUBLIC_IO_BLKIF_H__ |
| 11 | |
David Howells | a1ce392 | 2012-10-02 18:01:25 +0100 | [diff] [blame] | 12 | #include <xen/interface/io/ring.h> |
| 13 | #include <xen/interface/grant_table.h> |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 14 | |
| 15 | /* |
| 16 | * Front->back notifications: When enqueuing a new request, sending a |
| 17 | * notification can be made conditional on req_event (i.e., the generic |
| 18 | * hold-off mechanism provided by the ring macros). Backends must set |
| 19 | * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). |
| 20 | * |
| 21 | * Back->front notifications: When enqueuing a new response, sending a |
| 22 | * notification can be made conditional on rsp_event (i.e., the generic |
| 23 | * hold-off mechanism provided by the ring macros). Frontends must set |
| 24 | * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). |
| 25 | */ |
| 26 | |
| 27 | typedef uint16_t blkif_vdev_t; |
| 28 | typedef uint64_t blkif_sector_t; |
| 29 | |
| 30 | /* |
Bob Liu | eb5df87 | 2015-11-14 11:12:10 +0800 | [diff] [blame] | 31 | * Multiple hardware queues/rings: |
| 32 | * If supported, the backend will write the key "multi-queue-max-queues" to |
| 33 | * the directory for that vbd, and set its value to the maximum supported |
| 34 | * number of queues. |
| 35 | * Frontends that are aware of this feature and wish to use it can write the |
| 36 | * key "multi-queue-num-queues" with the number they wish to use, which must be |
| 37 | * greater than zero, and no more than the value reported by the backend in |
| 38 | * "multi-queue-max-queues". |
| 39 | * |
| 40 | * For frontends requesting just one queue, the usual event-channel and |
| 41 | * ring-ref keys are written as before, simplifying the backend processing |
| 42 | * to avoid distinguishing between a frontend that doesn't understand the |
| 43 | * multi-queue feature, and one that does, but requested only one queue. |
| 44 | * |
| 45 | * Frontends requesting two or more queues must not write the toplevel |
| 46 | * event-channel and ring-ref keys, instead writing those keys under sub-keys |
| 47 | * having the name "queue-N" where N is the integer ID of the queue/ring for |
| 48 | * which those keys belong. Queues are indexed from zero. |
| 49 | * For example, a frontend with two queues must write the following set of |
| 50 | * queue-related keys: |
| 51 | * |
| 52 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
| 53 | * /local/domain/1/device/vbd/0/queue-0 = "" |
| 54 | * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" |
| 55 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
| 56 | * /local/domain/1/device/vbd/0/queue-1 = "" |
| 57 | * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" |
| 58 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
| 59 | * |
| 60 | * It is also possible to use multiple queues/rings together with |
| 61 | * feature multi-page ring buffer. |
| 62 | * For example, a frontend requests two queues/rings and the size of each ring |
| 63 | * buffer is two pages must write the following set of related keys: |
| 64 | * |
| 65 | * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" |
| 66 | * /local/domain/1/device/vbd/0/ring-page-order = "1" |
| 67 | * /local/domain/1/device/vbd/0/queue-0 = "" |
| 68 | * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" |
| 69 | * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" |
| 70 | * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" |
| 71 | * /local/domain/1/device/vbd/0/queue-1 = "" |
| 72 | * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" |
| 73 | * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" |
| 74 | * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" |
| 75 | * |
| 76 | */ |
| 77 | |
| 78 | /* |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 79 | * REQUEST CODES. |
| 80 | */ |
| 81 | #define BLKIF_OP_READ 0 |
| 82 | #define BLKIF_OP_WRITE 1 |
| 83 | /* |
| 84 | * Recognised only if "feature-barrier" is present in backend xenbus info. |
| 85 | * The "feature_barrier" node contains a boolean indicating whether barrier |
| 86 | * requests are likely to succeed or fail. Either way, a barrier request |
| 87 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
| 88 | * the underlying block-device hardware. The boolean simply indicates whether |
| 89 | * or not it is worthwhile for the frontend to attempt barrier requests. |
| 90 | * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* |
| 91 | * create the "feature-barrier" node! |
| 92 | */ |
| 93 | #define BLKIF_OP_WRITE_BARRIER 2 |
| 94 | |
| 95 | /* |
Konrad Rzeszutek Wilk | 6dcfb75 | 2011-05-05 12:41:03 -0400 | [diff] [blame] | 96 | * Recognised if "feature-flush-cache" is present in backend xenbus |
| 97 | * info. A flush will ask the underlying storage hardware to flush its |
| 98 | * non-volatile caches as appropriate. The "feature-flush-cache" node |
| 99 | * contains a boolean indicating whether flush requests are likely to |
| 100 | * succeed or fail. Either way, a flush request may fail at any time |
| 101 | * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying |
| 102 | * block-device hardware. The boolean simply indicates whether or not it |
| 103 | * is worthwhile for the frontend to attempt flushes. If a backend does |
| 104 | * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the |
| 105 | * "feature-flush-cache" node! |
| 106 | */ |
| 107 | #define BLKIF_OP_FLUSH_DISKCACHE 3 |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 108 | |
| 109 | /* |
| 110 | * Recognised only if "feature-discard" is present in backend xenbus info. |
| 111 | * The "feature-discard" node contains a boolean indicating whether trim |
| 112 | * (ATA) or unmap (SCSI) - conviently called discard requests are likely |
| 113 | * to succeed or fail. Either way, a discard request |
| 114 | * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by |
| 115 | * the underlying block-device hardware. The boolean simply indicates whether |
| 116 | * or not it is worthwhile for the frontend to attempt discard requests. |
| 117 | * If a backend does not recognise BLKIF_OP_DISCARD, it should *not* |
| 118 | * create the "feature-discard" node! |
| 119 | * |
| 120 | * Discard operation is a request for the underlying block device to mark |
| 121 | * extents to be erased. However, discard does not guarantee that the blocks |
| 122 | * will be erased from the device - it is just a hint to the device |
| 123 | * controller that these blocks are no longer in use. What the device |
| 124 | * controller does with that information is left to the controller. |
| 125 | * Discard operations are passed with sector_number as the |
| 126 | * sector index to begin discard operations at and nr_sectors as the number of |
| 127 | * sectors to be discarded. The specified sectors should be discarded if the |
| 128 | * underlying block device supports trim (ATA) or unmap (SCSI) operations, |
| 129 | * or a BLKIF_RSP_EOPNOTSUPP should be returned. |
| 130 | * More information about trim/unmap operations at: |
| 131 | * http://t13.org/Documents/UploadedDocuments/docs2008/ |
| 132 | * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc |
| 133 | * http://www.seagate.com/staticfiles/support/disc/manuals/ |
| 134 | * Interface%20manuals/100293068c.pdf |
Konrad Rzeszutek Wilk | 5ea4298 | 2011-10-12 16:23:30 -0400 | [diff] [blame] | 135 | * The backend can optionally provide three extra XenBus attributes to |
| 136 | * further optimize the discard functionality: |
Olaf Hering | 1c339ef | 2014-05-21 16:32:41 +0200 | [diff] [blame] | 137 | * 'discard-alignment' - Devices that support discard functionality may |
Konrad Rzeszutek Wilk | 5ea4298 | 2011-10-12 16:23:30 -0400 | [diff] [blame] | 138 | * internally allocate space in units that are bigger than the exported |
| 139 | * logical block size. The discard-alignment parameter indicates how many bytes |
| 140 | * the beginning of the partition is offset from the internal allocation unit's |
| 141 | * natural alignment. |
| 142 | * 'discard-granularity' - Devices that support discard functionality may |
| 143 | * internally allocate space using units that are bigger than the logical block |
| 144 | * size. The discard-granularity parameter indicates the size of the internal |
| 145 | * allocation unit in bytes if reported by the device. Otherwise the |
| 146 | * discard-granularity will be set to match the device's physical block size. |
| 147 | * 'discard-secure' - All copies of the discarded sectors (potentially created |
| 148 | * by garbage collection) must also be erased. To use this feature, the flag |
| 149 | * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim. |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 150 | */ |
| 151 | #define BLKIF_OP_DISCARD 5 |
| 152 | |
Konrad Rzeszutek Wilk | 6dcfb75 | 2011-05-05 12:41:03 -0400 | [diff] [blame] | 153 | /* |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 154 | * Recognized if "feature-max-indirect-segments" in present in the backend |
| 155 | * xenbus info. The "feature-max-indirect-segments" node contains the maximum |
| 156 | * number of segments allowed by the backend per request. If the node is |
| 157 | * present, the frontend might use blkif_request_indirect structs in order to |
| 158 | * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The |
| 159 | * maximum number of indirect segments is fixed by the backend, but the |
| 160 | * frontend can issue requests with any number of indirect segments as long as |
| 161 | * it's less than the number provided by the backend. The indirect_grefs field |
| 162 | * in blkif_request_indirect should be filled by the frontend with the |
| 163 | * grant references of the pages that are holding the indirect segments. |
Roger Pau Monne | 80bfa2f | 2014-02-04 11:26:15 +0100 | [diff] [blame] | 164 | * These pages are filled with an array of blkif_request_segment that hold the |
| 165 | * information about the segments. The number of indirect pages to use is |
| 166 | * determined by the number of segments an indirect request contains. Every |
| 167 | * indirect page can contain a maximum of |
| 168 | * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to |
| 169 | * calculate the number of indirect pages to use we have to do |
| 170 | * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 171 | * |
| 172 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* |
| 173 | * create the "feature-max-indirect-segments" node! |
| 174 | */ |
| 175 | #define BLKIF_OP_INDIRECT 6 |
| 176 | |
| 177 | /* |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 178 | * Maximum scatter/gather segments per request. |
| 179 | * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. |
| 180 | * NB. This could be 12 if the ring indexes weren't stored in the same page. |
| 181 | */ |
| 182 | #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 |
| 183 | |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 184 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 |
| 185 | |
Roger Pau Monne | 80bfa2f | 2014-02-04 11:26:15 +0100 | [diff] [blame] | 186 | struct blkif_request_segment { |
| 187 | grant_ref_t gref; /* reference to I/O buffer frame */ |
| 188 | /* @first_sect: first sector in frame to transfer (inclusive). */ |
| 189 | /* @last_sect: last sector in frame to transfer (inclusive). */ |
| 190 | uint8_t first_sect, last_sect; |
| 191 | }; |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 192 | |
Owen Smith | 51de695 | 2010-12-22 15:05:00 +0000 | [diff] [blame] | 193 | struct blkif_request_rw { |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 194 | uint8_t nr_segments; /* number of segments */ |
| 195 | blkif_vdev_t handle; /* only for read/write requests */ |
Julien Grall | 380108d | 2013-12-03 15:40:37 +0000 | [diff] [blame] | 196 | #ifndef CONFIG_X86_32 |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 197 | uint32_t _pad1; /* offsetof(blkif_request,u.rw.id) == 8 */ |
| 198 | #endif |
| 199 | uint64_t id; /* private guest value, echoed in resp */ |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 200 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
Roger Pau Monne | 80bfa2f | 2014-02-04 11:26:15 +0100 | [diff] [blame] | 201 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 202 | } __attribute__((__packed__)); |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 203 | |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 204 | struct blkif_request_discard { |
Konrad Rzeszutek Wilk | 5ea4298 | 2011-10-12 16:23:30 -0400 | [diff] [blame] | 205 | uint8_t flag; /* BLKIF_DISCARD_SECURE or zero. */ |
| 206 | #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 207 | blkif_vdev_t _pad1; /* only for read/write requests */ |
Julien Grall | 380108d | 2013-12-03 15:40:37 +0000 | [diff] [blame] | 208 | #ifndef CONFIG_X86_32 |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 209 | uint32_t _pad2; /* offsetof(blkif_req..,u.discard.id)==8*/ |
| 210 | #endif |
| 211 | uint64_t id; /* private guest value, echoed in resp */ |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 212 | blkif_sector_t sector_number; |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 213 | uint64_t nr_sectors; |
| 214 | uint8_t _pad3; |
| 215 | } __attribute__((__packed__)); |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 216 | |
David Vrabel | 0e367ae | 2013-03-07 17:32:01 +0000 | [diff] [blame] | 217 | struct blkif_request_other { |
| 218 | uint8_t _pad1; |
| 219 | blkif_vdev_t _pad2; /* only for read/write requests */ |
Julien Grall | 380108d | 2013-12-03 15:40:37 +0000 | [diff] [blame] | 220 | #ifndef CONFIG_X86_32 |
David Vrabel | 0e367ae | 2013-03-07 17:32:01 +0000 | [diff] [blame] | 221 | uint32_t _pad3; /* offsetof(blkif_req..,u.other.id)==8*/ |
| 222 | #endif |
| 223 | uint64_t id; /* private guest value, echoed in resp */ |
| 224 | } __attribute__((__packed__)); |
| 225 | |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 226 | struct blkif_request_indirect { |
| 227 | uint8_t indirect_op; |
| 228 | uint16_t nr_segments; |
Julien Grall | 380108d | 2013-12-03 15:40:37 +0000 | [diff] [blame] | 229 | #ifndef CONFIG_X86_32 |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 230 | uint32_t _pad1; /* offsetof(blkif_...,u.indirect.id) == 8 */ |
| 231 | #endif |
| 232 | uint64_t id; |
| 233 | blkif_sector_t sector_number; |
| 234 | blkif_vdev_t handle; |
| 235 | uint16_t _pad2; |
| 236 | grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; |
Julien Grall | 380108d | 2013-12-03 15:40:37 +0000 | [diff] [blame] | 237 | #ifndef CONFIG_X86_32 |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 238 | uint32_t _pad3; /* make it 64 byte aligned */ |
| 239 | #else |
| 240 | uint64_t _pad3; /* make it 64 byte aligned */ |
| 241 | #endif |
| 242 | } __attribute__((__packed__)); |
| 243 | |
Owen Smith | 51de695 | 2010-12-22 15:05:00 +0000 | [diff] [blame] | 244 | struct blkif_request { |
| 245 | uint8_t operation; /* BLKIF_OP_??? */ |
Owen Smith | 51de695 | 2010-12-22 15:05:00 +0000 | [diff] [blame] | 246 | union { |
| 247 | struct blkif_request_rw rw; |
Li Dongyang | 32a8d26 | 2011-09-01 18:39:08 +0800 | [diff] [blame] | 248 | struct blkif_request_discard discard; |
David Vrabel | 0e367ae | 2013-03-07 17:32:01 +0000 | [diff] [blame] | 249 | struct blkif_request_other other; |
Roger Pau Monne | 402b27f | 2013-04-18 16:06:54 +0200 | [diff] [blame] | 250 | struct blkif_request_indirect indirect; |
Owen Smith | 51de695 | 2010-12-22 15:05:00 +0000 | [diff] [blame] | 251 | } u; |
Konrad Rzeszutek Wilk | 97e3683 | 2011-10-12 12:12:36 -0400 | [diff] [blame] | 252 | } __attribute__((__packed__)); |
Owen Smith | 51de695 | 2010-12-22 15:05:00 +0000 | [diff] [blame] | 253 | |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 254 | struct blkif_response { |
| 255 | uint64_t id; /* copied from request */ |
| 256 | uint8_t operation; /* copied from request */ |
| 257 | int16_t status; /* BLKIF_RSP_??? */ |
| 258 | }; |
| 259 | |
| 260 | /* |
| 261 | * STATUS RETURN CODES. |
| 262 | */ |
| 263 | /* Operation not supported (only happens on barrier writes). */ |
| 264 | #define BLKIF_RSP_EOPNOTSUPP -2 |
| 265 | /* Operation failed for some unspecified reason (-EIO). */ |
| 266 | #define BLKIF_RSP_ERROR -1 |
| 267 | /* Operation completed successfully. */ |
| 268 | #define BLKIF_RSP_OKAY 0 |
| 269 | |
| 270 | /* |
| 271 | * Generate blkif ring structures and types. |
| 272 | */ |
| 273 | |
| 274 | DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); |
| 275 | |
| 276 | #define VDISK_CDROM 0x1 |
| 277 | #define VDISK_REMOVABLE 0x2 |
| 278 | #define VDISK_READONLY 0x4 |
| 279 | |
Stefano Stabellini | c80a420 | 2010-12-02 17:55:00 +0000 | [diff] [blame] | 280 | /* Xen-defined major numbers for virtual disks, they look strangely |
| 281 | * familiar */ |
| 282 | #define XEN_IDE0_MAJOR 3 |
| 283 | #define XEN_IDE1_MAJOR 22 |
| 284 | #define XEN_SCSI_DISK0_MAJOR 8 |
| 285 | #define XEN_SCSI_DISK1_MAJOR 65 |
| 286 | #define XEN_SCSI_DISK2_MAJOR 66 |
| 287 | #define XEN_SCSI_DISK3_MAJOR 67 |
| 288 | #define XEN_SCSI_DISK4_MAJOR 68 |
| 289 | #define XEN_SCSI_DISK5_MAJOR 69 |
| 290 | #define XEN_SCSI_DISK6_MAJOR 70 |
| 291 | #define XEN_SCSI_DISK7_MAJOR 71 |
| 292 | #define XEN_SCSI_DISK8_MAJOR 128 |
| 293 | #define XEN_SCSI_DISK9_MAJOR 129 |
| 294 | #define XEN_SCSI_DISK10_MAJOR 130 |
| 295 | #define XEN_SCSI_DISK11_MAJOR 131 |
| 296 | #define XEN_SCSI_DISK12_MAJOR 132 |
| 297 | #define XEN_SCSI_DISK13_MAJOR 133 |
| 298 | #define XEN_SCSI_DISK14_MAJOR 134 |
| 299 | #define XEN_SCSI_DISK15_MAJOR 135 |
| 300 | |
Jeremy Fitzhardinge | a42089d | 2007-07-17 18:37:04 -0700 | [diff] [blame] | 301 | #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ |