blob: 0c3ad214a79295336e9cb0ec959e0948514da0aa [file] [log] [blame]
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001/*
2 * blkfront.c
3 *
4 * XenLinux virtual block device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 * Copyright (c) 2004, Andrew Warfield
10 * Copyright (c) 2005, Christopher Clark
11 * Copyright (c) 2005, XenSource Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License version 2
15 * as published by the Free Software Foundation; or, when distributed
16 * separately from the Linux kernel or incorporated into other
17 * software packages, subject to the following license:
18 *
19 * Permission is hereby granted, free of charge, to any person obtaining a copy
20 * of this source file (the "Software"), to deal in the Software without
21 * restriction, including without limitation the rights to use, copy, modify,
22 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
23 * and to permit persons to whom the Software is furnished to do so, subject to
24 * the following conditions:
25 *
26 * The above copyright notice and this permission notice shall be included in
27 * all copies or substantial portions of the Software.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
34 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
35 * IN THE SOFTWARE.
36 */
37
38#include <linux/interrupt.h>
39#include <linux/blkdev.h>
Bob Liu907c3eb2015-07-13 17:55:24 +080040#include <linux/blk-mq.h>
Ian Campbell597592d2008-02-21 13:03:45 -080041#include <linux/hdreg.h>
Christian Limpach440a01a2008-06-17 10:47:08 +020042#include <linux/cdrom.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070043#include <linux/module.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020045#include <linux/mutex.h>
Jens Axboe9e973e62009-02-24 08:10:09 +010046#include <linux/scatterlist.h>
Akinobu Mita34ae2e42012-01-21 00:15:26 +090047#include <linux/bitmap.h>
Roger Pau Monne155b7ed2013-03-18 17:49:34 +010048#include <linux/list.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070049
Jeremy Fitzhardinge1ccbf532009-10-06 15:11:14 -070050#include <xen/xen.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070051#include <xen/xenbus.h>
52#include <xen/grant_table.h>
53#include <xen/events.h>
54#include <xen/page.h>
Stefano Stabellinic1c54132010-05-14 12:44:30 +010055#include <xen/platform_pci.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070056
57#include <xen/interface/grant_table.h>
58#include <xen/interface/io/blkif.h>
Markus Armbruster3e334232008-04-02 10:54:02 -070059#include <xen/interface/io/protocols.h>
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070060
61#include <asm/xen/hypervisor.h>
62
63enum blkif_state {
64 BLKIF_STATE_DISCONNECTED,
65 BLKIF_STATE_CONNECTED,
66 BLKIF_STATE_SUSPENDED,
67};
68
Roger Pau Monne0a8704a2012-10-24 18:58:45 +020069struct grant {
70 grant_ref_t gref;
Julien Gralla7a6df22015-06-30 11:58:51 +010071 struct page *page;
Roger Pau Monne155b7ed2013-03-18 17:49:34 +010072 struct list_head node;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +020073};
74
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070075struct blk_shadow {
76 struct blkif_request req;
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -040077 struct request *request;
Roger Pau Monne402b27f2013-04-18 16:06:54 +020078 struct grant **grants_used;
79 struct grant **indirect_grants;
Roger Pau Monneb7649152013-05-02 10:58:50 +020080 struct scatterlist *sg;
Julien Grallc004a6f2015-07-22 16:44:54 +010081 unsigned int num_sg;
Roger Pau Monne402b27f2013-04-18 16:06:54 +020082};
83
84struct split_bio {
85 struct bio *bio;
86 atomic_t pending;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070087};
88
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020089static DEFINE_MUTEX(blkfront_mutex);
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -070090static const struct block_device_operations xlvbd_block_fops;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -070091
Roger Pau Monne402b27f2013-04-18 16:06:54 +020092/*
93 * Maximum number of segments in indirect requests, the actual value used by
94 * the frontend driver is the minimum of this value and the value provided
95 * by the backend driver.
96 */
97
98static unsigned int xen_blkif_max_segments = 32;
Konrad Rzeszutek Wilk2d5dc3b2013-05-15 10:39:34 -040099module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
100MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200101
Bob Liu86839c52015-06-03 13:40:03 +0800102/*
103 * Maximum order of pages to be used for the shared ring between front and
104 * backend, 4KB page granularity is used.
105 */
106static unsigned int xen_blkif_max_ring_order;
107module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
108MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
109
Julien Grallc004a6f2015-07-22 16:44:54 +0100110#define BLK_RING_SIZE(info) \
111 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
112
113#define BLK_MAX_RING_SIZE \
Julien Grall9cce2912015-10-13 17:50:11 +0100114 __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
Julien Grallc004a6f2015-07-22 16:44:54 +0100115
Bob Liu86839c52015-06-03 13:40:03 +0800116/*
117 * ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
118 * characters are enough. Define to 20 to keep consist with backend.
119 */
120#define RINGREF_NAME_LEN (20)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700121
122/*
Bob Liu81f35162015-11-14 11:12:11 +0800123 * Per-ring info.
124 * Every blkfront device can associate with one or more blkfront_ring_info,
125 * depending on how many hardware queues/rings to be used.
126 */
127struct blkfront_ring_info {
128 struct blkif_front_ring ring;
129 unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
130 unsigned int evtchn, irq;
131 struct work_struct work;
132 struct gnttab_free_callback callback;
133 struct blk_shadow shadow[BLK_MAX_RING_SIZE];
134 struct list_head indirect_pages;
135 unsigned long shadow_free;
136 struct blkfront_info *dev_info;
137};
138
139/*
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700140 * We have one of these per vbd, whether ide, scsi or 'other'. They
141 * hang in private_data off the gendisk structure. We may end up
142 * putting all kinds of interesting stuff here :-)
143 */
144struct blkfront_info
145{
Steven Noonan34678112012-02-17 12:04:44 -0800146 spinlock_t io_lock;
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +0000147 struct mutex mutex;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700148 struct xenbus_device *xbdev;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700149 struct gendisk *gd;
150 int vdevice;
151 blkif_vdev_t handle;
152 enum blkif_state connected;
Bob Liu86839c52015-06-03 13:40:03 +0800153 unsigned int nr_ring_pages;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700154 struct request_queue *rq;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100155 struct list_head grants;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200156 unsigned int persistent_gnts_c;
Tejun Heo4913efe2010-09-03 11:56:16 +0200157 unsigned int feature_flush;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400158 unsigned int feature_discard:1;
159 unsigned int feature_secdiscard:1;
Li Dongyanged30bf32011-09-01 18:39:09 +0800160 unsigned int discard_granularity;
161 unsigned int discard_alignment;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200162 unsigned int feature_persistent:1;
Julien Grallc004a6f2015-07-22 16:44:54 +0100163 /* Number of 4KB segments handled */
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200164 unsigned int max_indirect_segments;
Christian Limpach1d78d702008-04-02 10:54:04 -0700165 int is_ready;
Bob Liu907c3eb2015-07-13 17:55:24 +0800166 struct blk_mq_tag_set tag_set;
Bob Liu81f35162015-11-14 11:12:11 +0800167 struct blkfront_ring_info rinfo;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700168};
169
Jan Beulich0e345822010-08-07 18:28:55 +0200170static unsigned int nr_minors;
171static unsigned long *minors;
172static DEFINE_SPINLOCK(minor_lock);
173
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700174#define GRANT_INVALID_REF 0
175
176#define PARTS_PER_DISK 16
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700177#define PARTS_PER_EXT_DISK 256
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700178
179#define BLKIF_MAJOR(dev) ((dev)>>8)
180#define BLKIF_MINOR(dev) ((dev) & 0xff)
181
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700182#define EXT_SHIFT 28
183#define EXTENDED (1<<EXT_SHIFT)
184#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
185#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000186#define EMULATED_HD_DISK_MINOR_OFFSET (0)
187#define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256)
Stefan Bader196cfe22011-07-14 15:30:22 +0200188#define EMULATED_SD_DISK_MINOR_OFFSET (0)
189#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700190
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700191#define DEV_NAME "xvd" /* name in /dev */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700192
Julien Grallc004a6f2015-07-22 16:44:54 +0100193/*
194 * Grants are always the same size as a Xen page (i.e 4KB).
195 * A physical segment is always the same size as a Linux page.
196 * Number of grants per physical segment
197 */
198#define GRANTS_PER_PSEG (PAGE_SIZE / XEN_PAGE_SIZE)
199
200#define GRANTS_PER_INDIRECT_FRAME \
201 (XEN_PAGE_SIZE / sizeof(struct blkif_request_segment))
202
203#define PSEGS_PER_INDIRECT_FRAME \
204 (GRANTS_INDIRECT_FRAME / GRANTS_PSEGS)
205
206#define INDIRECT_GREFS(_grants) \
207 DIV_ROUND_UP(_grants, GRANTS_PER_INDIRECT_FRAME)
208
209#define GREFS(_psegs) ((_psegs) * GRANTS_PER_PSEG)
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200210
Bob Liu81f35162015-11-14 11:12:11 +0800211static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo);
Bob Liud50babb2015-07-22 14:40:08 +0800212static int blkfront_gather_backend_features(struct blkfront_info *info);
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200213
Bob Liu81f35162015-11-14 11:12:11 +0800214static int get_id_from_freelist(struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700215{
Bob Liu81f35162015-11-14 11:12:11 +0800216 unsigned long free = rinfo->shadow_free;
217
218 BUG_ON(free >= BLK_RING_SIZE(rinfo->dev_info));
219 rinfo->shadow_free = rinfo->shadow[free].req.u.rw.id;
220 rinfo->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700221 return free;
222}
223
Bob Liu81f35162015-11-14 11:12:11 +0800224static int add_id_to_freelist(struct blkfront_ring_info *rinfo,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700225 unsigned long id)
226{
Bob Liu81f35162015-11-14 11:12:11 +0800227 if (rinfo->shadow[id].req.u.rw.id != id)
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400228 return -EINVAL;
Bob Liu81f35162015-11-14 11:12:11 +0800229 if (rinfo->shadow[id].request == NULL)
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400230 return -EINVAL;
Bob Liu81f35162015-11-14 11:12:11 +0800231 rinfo->shadow[id].req.u.rw.id = rinfo->shadow_free;
232 rinfo->shadow[id].request = NULL;
233 rinfo->shadow_free = id;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400234 return 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700235}
236
Bob Liu81f35162015-11-14 11:12:11 +0800237static int fill_grant_buffer(struct blkfront_ring_info *rinfo, int num)
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100238{
Bob Liu81f35162015-11-14 11:12:11 +0800239 struct blkfront_info *info = rinfo->dev_info;
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100240 struct page *granted_page;
241 struct grant *gnt_list_entry, *n;
242 int i = 0;
243
244 while(i < num) {
245 gnt_list_entry = kzalloc(sizeof(struct grant), GFP_NOIO);
246 if (!gnt_list_entry)
247 goto out_of_memory;
248
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100249 if (info->feature_persistent) {
250 granted_page = alloc_page(GFP_NOIO);
251 if (!granted_page) {
252 kfree(gnt_list_entry);
253 goto out_of_memory;
254 }
Julien Gralla7a6df22015-06-30 11:58:51 +0100255 gnt_list_entry->page = granted_page;
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100256 }
257
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100258 gnt_list_entry->gref = GRANT_INVALID_REF;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100259 list_add(&gnt_list_entry->node, &info->grants);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100260 i++;
261 }
262
263 return 0;
264
265out_of_memory:
266 list_for_each_entry_safe(gnt_list_entry, n,
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100267 &info->grants, node) {
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100268 list_del(&gnt_list_entry->node);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100269 if (info->feature_persistent)
Julien Gralla7a6df22015-06-30 11:58:51 +0100270 __free_page(gnt_list_entry->page);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100271 kfree(gnt_list_entry);
272 i--;
273 }
274 BUG_ON(i != 0);
275 return -ENOMEM;
276}
277
Julien Grall4f503fb2015-07-01 14:10:38 +0100278static struct grant *get_free_grant(struct blkfront_info *info)
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100279{
280 struct grant *gnt_list_entry;
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100281
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100282 BUG_ON(list_empty(&info->grants));
283 gnt_list_entry = list_first_entry(&info->grants, struct grant,
Julien Grall4f503fb2015-07-01 14:10:38 +0100284 node);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100285 list_del(&gnt_list_entry->node);
286
Julien Grall4f503fb2015-07-01 14:10:38 +0100287 if (gnt_list_entry->gref != GRANT_INVALID_REF)
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100288 info->persistent_gnts_c--;
Julien Grall4f503fb2015-07-01 14:10:38 +0100289
290 return gnt_list_entry;
291}
292
293static inline void grant_foreign_access(const struct grant *gnt_list_entry,
294 const struct blkfront_info *info)
295{
296 gnttab_page_grant_foreign_access_ref_one(gnt_list_entry->gref,
297 info->xbdev->otherend_id,
298 gnt_list_entry->page,
299 0);
300}
301
302static struct grant *get_grant(grant_ref_t *gref_head,
303 unsigned long gfn,
304 struct blkfront_info *info)
305{
306 struct grant *gnt_list_entry = get_free_grant(info);
307
308 if (gnt_list_entry->gref != GRANT_INVALID_REF)
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100309 return gnt_list_entry;
Julien Grall4f503fb2015-07-01 14:10:38 +0100310
311 /* Assign a gref to this page */
312 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
313 BUG_ON(gnt_list_entry->gref == -ENOSPC);
314 if (info->feature_persistent)
315 grant_foreign_access(gnt_list_entry, info);
316 else {
317 /* Grant access to the GFN passed by the caller */
318 gnttab_grant_foreign_access_ref(gnt_list_entry->gref,
319 info->xbdev->otherend_id,
320 gfn, 0);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100321 }
322
Julien Grall4f503fb2015-07-01 14:10:38 +0100323 return gnt_list_entry;
324}
325
326static struct grant *get_indirect_grant(grant_ref_t *gref_head,
327 struct blkfront_info *info)
328{
329 struct grant *gnt_list_entry = get_free_grant(info);
330
331 if (gnt_list_entry->gref != GRANT_INVALID_REF)
332 return gnt_list_entry;
333
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100334 /* Assign a gref to this page */
335 gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
336 BUG_ON(gnt_list_entry->gref == -ENOSPC);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100337 if (!info->feature_persistent) {
Julien Grall4f503fb2015-07-01 14:10:38 +0100338 struct page *indirect_page;
339
340 /* Fetch a pre-allocated page to use for indirect grefs */
Bob Liu81f35162015-11-14 11:12:11 +0800341 BUG_ON(list_empty(&info->rinfo.indirect_pages));
342 indirect_page = list_first_entry(&info->rinfo.indirect_pages,
Julien Grall4f503fb2015-07-01 14:10:38 +0100343 struct page, lru);
344 list_del(&indirect_page->lru);
345 gnt_list_entry->page = indirect_page;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +0100346 }
Julien Grall4f503fb2015-07-01 14:10:38 +0100347 grant_foreign_access(gnt_list_entry, info);
348
Roger Pau Monne9c1e0502013-03-18 17:49:35 +0100349 return gnt_list_entry;
350}
351
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -0400352static const char *op_name(int op)
353{
354 static const char *const names[] = {
355 [BLKIF_OP_READ] = "read",
356 [BLKIF_OP_WRITE] = "write",
357 [BLKIF_OP_WRITE_BARRIER] = "barrier",
358 [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
359 [BLKIF_OP_DISCARD] = "discard" };
360
361 if (op < 0 || op >= ARRAY_SIZE(names))
362 return "unknown";
363
364 if (!names[op])
365 return "reserved";
366
367 return names[op];
368}
Jan Beulich0e345822010-08-07 18:28:55 +0200369static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
370{
371 unsigned int end = minor + nr;
372 int rc;
373
374 if (end > nr_minors) {
375 unsigned long *bitmap, *old;
376
Thomas Meyerf0941482011-11-29 22:08:00 +0100377 bitmap = kcalloc(BITS_TO_LONGS(end), sizeof(*bitmap),
Jan Beulich0e345822010-08-07 18:28:55 +0200378 GFP_KERNEL);
379 if (bitmap == NULL)
380 return -ENOMEM;
381
382 spin_lock(&minor_lock);
383 if (end > nr_minors) {
384 old = minors;
385 memcpy(bitmap, minors,
386 BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
387 minors = bitmap;
388 nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
389 } else
390 old = bitmap;
391 spin_unlock(&minor_lock);
392 kfree(old);
393 }
394
395 spin_lock(&minor_lock);
396 if (find_next_bit(minors, end, minor) >= end) {
Akinobu Mita34ae2e42012-01-21 00:15:26 +0900397 bitmap_set(minors, minor, nr);
Jan Beulich0e345822010-08-07 18:28:55 +0200398 rc = 0;
399 } else
400 rc = -EBUSY;
401 spin_unlock(&minor_lock);
402
403 return rc;
404}
405
406static void xlbd_release_minors(unsigned int minor, unsigned int nr)
407{
408 unsigned int end = minor + nr;
409
410 BUG_ON(end > nr_minors);
411 spin_lock(&minor_lock);
Akinobu Mita34ae2e42012-01-21 00:15:26 +0900412 bitmap_clear(minors, minor, nr);
Jan Beulich0e345822010-08-07 18:28:55 +0200413 spin_unlock(&minor_lock);
414}
415
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700416static void blkif_restart_queue_callback(void *arg)
417{
Bob Liu81f35162015-11-14 11:12:11 +0800418 struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)arg;
419 schedule_work(&rinfo->work);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700420}
421
Harvey Harrisonafe42d72008-04-29 00:59:47 -0700422static int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
Ian Campbell597592d2008-02-21 13:03:45 -0800423{
424 /* We don't have real geometry info, but let's at least return
425 values consistent with the size of the device */
426 sector_t nsect = get_capacity(bd->bd_disk);
427 sector_t cylinders = nsect;
428
429 hg->heads = 0xff;
430 hg->sectors = 0x3f;
431 sector_div(cylinders, hg->heads * hg->sectors);
432 hg->cylinders = cylinders;
433 if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
434 hg->cylinders = 0xffff;
435 return 0;
436}
437
Al Viroa63c8482008-03-02 10:23:47 -0500438static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
Adrian Bunk62aa0052008-08-04 11:59:05 +0200439 unsigned command, unsigned long argument)
Christian Limpach440a01a2008-06-17 10:47:08 +0200440{
Al Viroa63c8482008-03-02 10:23:47 -0500441 struct blkfront_info *info = bdev->bd_disk->private_data;
Christian Limpach440a01a2008-06-17 10:47:08 +0200442 int i;
443
444 dev_dbg(&info->xbdev->dev, "command: 0x%x, argument: 0x%lx\n",
445 command, (long)argument);
446
447 switch (command) {
448 case CDROMMULTISESSION:
449 dev_dbg(&info->xbdev->dev, "FIXME: support multisession CDs later\n");
450 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
451 if (put_user(0, (char __user *)(argument + i)))
452 return -EFAULT;
453 return 0;
454
455 case CDROM_GET_CAPABILITY: {
456 struct gendisk *gd = info->gd;
457 if (gd->flags & GENHD_FL_CD)
458 return 0;
459 return -EINVAL;
460 }
461
462 default:
463 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
464 command);*/
465 return -EINVAL; /* same return as native Linux */
466 }
467
468 return 0;
469}
470
Bob Liu81f35162015-11-14 11:12:11 +0800471static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700472{
Bob Liu81f35162015-11-14 11:12:11 +0800473 struct blkfront_info *info = rinfo->dev_info;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700474 struct blkif_request *ring_req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700475 unsigned long id;
Julien Grall33204662015-06-29 17:35:24 +0100476
477 /* Fill out a communications ring structure. */
Bob Liu81f35162015-11-14 11:12:11 +0800478 ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
479 id = get_id_from_freelist(rinfo);
480 rinfo->shadow[id].request = req;
Julien Grall33204662015-06-29 17:35:24 +0100481
482 ring_req->operation = BLKIF_OP_DISCARD;
483 ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
484 ring_req->u.discard.id = id;
485 ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
486 if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
487 ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
488 else
489 ring_req->u.discard.flag = 0;
490
Bob Liu81f35162015-11-14 11:12:11 +0800491 rinfo->ring.req_prod_pvt++;
Julien Grall33204662015-06-29 17:35:24 +0100492
493 /* Keep a private copy so we can reissue requests when recovering. */
Bob Liu81f35162015-11-14 11:12:11 +0800494 rinfo->shadow[id].req = *ring_req;
Julien Grall33204662015-06-29 17:35:24 +0100495
496 return 0;
497}
498
Julien Grallc004a6f2015-07-22 16:44:54 +0100499struct setup_rw_req {
500 unsigned int grant_idx;
501 struct blkif_request_segment *segments;
Bob Liu81f35162015-11-14 11:12:11 +0800502 struct blkfront_ring_info *rinfo;
Julien Grallc004a6f2015-07-22 16:44:54 +0100503 struct blkif_request *ring_req;
504 grant_ref_t gref_head;
505 unsigned int id;
506 /* Only used when persistent grant is used and it's a read request */
507 bool need_copy;
508 unsigned int bvec_off;
509 char *bvec_data;
510};
511
512static void blkif_setup_rw_req_grant(unsigned long gfn, unsigned int offset,
513 unsigned int len, void *data)
514{
515 struct setup_rw_req *setup = data;
516 int n, ref;
517 struct grant *gnt_list_entry;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700518 unsigned int fsect, lsect;
Julien Grallc004a6f2015-07-22 16:44:54 +0100519 /* Convenient aliases */
520 unsigned int grant_idx = setup->grant_idx;
521 struct blkif_request *ring_req = setup->ring_req;
Bob Liu81f35162015-11-14 11:12:11 +0800522 struct blkfront_ring_info *rinfo = setup->rinfo;
523 struct blkfront_info *info = rinfo->dev_info;
524 struct blk_shadow *shadow = &rinfo->shadow[setup->id];
Julien Grallc004a6f2015-07-22 16:44:54 +0100525
526 if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
527 (grant_idx % GRANTS_PER_INDIRECT_FRAME == 0)) {
528 if (setup->segments)
529 kunmap_atomic(setup->segments);
530
531 n = grant_idx / GRANTS_PER_INDIRECT_FRAME;
532 gnt_list_entry = get_indirect_grant(&setup->gref_head, info);
533 shadow->indirect_grants[n] = gnt_list_entry;
534 setup->segments = kmap_atomic(gnt_list_entry->page);
535 ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
536 }
537
538 gnt_list_entry = get_grant(&setup->gref_head, gfn, info);
539 ref = gnt_list_entry->gref;
540 shadow->grants_used[grant_idx] = gnt_list_entry;
541
542 if (setup->need_copy) {
543 void *shared_data;
544
545 shared_data = kmap_atomic(gnt_list_entry->page);
546 /*
547 * this does not wipe data stored outside the
548 * range sg->offset..sg->offset+sg->length.
549 * Therefore, blkback *could* see data from
550 * previous requests. This is OK as long as
551 * persistent grants are shared with just one
552 * domain. It may need refactoring if this
553 * changes
554 */
555 memcpy(shared_data + offset,
556 setup->bvec_data + setup->bvec_off,
557 len);
558
559 kunmap_atomic(shared_data);
560 setup->bvec_off += len;
561 }
562
563 fsect = offset >> 9;
564 lsect = fsect + (len >> 9) - 1;
565 if (ring_req->operation != BLKIF_OP_INDIRECT) {
566 ring_req->u.rw.seg[grant_idx] =
567 (struct blkif_request_segment) {
568 .gref = ref,
569 .first_sect = fsect,
570 .last_sect = lsect };
571 } else {
572 setup->segments[grant_idx % GRANTS_PER_INDIRECT_FRAME] =
573 (struct blkif_request_segment) {
574 .gref = ref,
575 .first_sect = fsect,
576 .last_sect = lsect };
577 }
578
579 (setup->grant_idx)++;
580}
581
Bob Liu81f35162015-11-14 11:12:11 +0800582static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700583{
Bob Liu81f35162015-11-14 11:12:11 +0800584 struct blkfront_info *info = rinfo->dev_info;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700585 struct blkif_request *ring_req;
586 unsigned long id;
Julien Grallc004a6f2015-07-22 16:44:54 +0100587 int i;
588 struct setup_rw_req setup = {
589 .grant_idx = 0,
590 .segments = NULL,
Bob Liu81f35162015-11-14 11:12:11 +0800591 .rinfo = rinfo,
Julien Grallc004a6f2015-07-22 16:44:54 +0100592 .need_copy = rq_data_dir(req) && info->feature_persistent,
593 };
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200594
595 /*
596 * Used to store if we are able to queue the request by just using
597 * existing persistent grants, or if we have to get new grants,
598 * as there are not sufficiently many free.
599 */
600 bool new_persistent_gnts;
Jens Axboe9e973e62009-02-24 08:10:09 +0100601 struct scatterlist *sg;
Julien Grallc004a6f2015-07-22 16:44:54 +0100602 int num_sg, max_grefs, num_grant;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700603
Julien Grallc004a6f2015-07-22 16:44:54 +0100604 max_grefs = req->nr_phys_segments * GRANTS_PER_PSEG;
Roger Pau Monnec47206e2013-08-12 12:53:43 +0200605 if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
606 /*
607 * If we are using indirect segments we need to account
608 * for the indirect grefs used in the request.
609 */
Julien Grallc004a6f2015-07-22 16:44:54 +0100610 max_grefs += INDIRECT_GREFS(max_grefs);
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200611
612 /* Check if we have enough grants to allocate a requests */
613 if (info->persistent_gnts_c < max_grefs) {
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200614 new_persistent_gnts = 1;
615 if (gnttab_alloc_grant_references(
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200616 max_grefs - info->persistent_gnts_c,
Julien Grallc004a6f2015-07-22 16:44:54 +0100617 &setup.gref_head) < 0) {
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200618 gnttab_request_free_callback(
Bob Liu81f35162015-11-14 11:12:11 +0800619 &rinfo->callback,
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200620 blkif_restart_queue_callback,
Bob Liu81f35162015-11-14 11:12:11 +0800621 rinfo,
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200622 max_grefs);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200623 return 1;
624 }
625 } else
626 new_persistent_gnts = 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700627
628 /* Fill out a communications ring structure. */
Bob Liu81f35162015-11-14 11:12:11 +0800629 ring_req = RING_GET_REQUEST(&rinfo->ring, rinfo->ring.req_prod_pvt);
630 id = get_id_from_freelist(rinfo);
631 rinfo->shadow[id].request = req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700632
Julien Grall33204662015-06-29 17:35:24 +0100633 BUG_ON(info->max_indirect_segments == 0 &&
Julien Grallc004a6f2015-07-22 16:44:54 +0100634 GREFS(req->nr_phys_segments) > BLKIF_MAX_SEGMENTS_PER_REQUEST);
Julien Grall33204662015-06-29 17:35:24 +0100635 BUG_ON(info->max_indirect_segments &&
Julien Grallc004a6f2015-07-22 16:44:54 +0100636 GREFS(req->nr_phys_segments) > info->max_indirect_segments);
637
Bob Liu81f35162015-11-14 11:12:11 +0800638 num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg);
Julien Grallc004a6f2015-07-22 16:44:54 +0100639 num_grant = 0;
640 /* Calculate the number of grant used */
Bob Liu81f35162015-11-14 11:12:11 +0800641 for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i)
Julien Grallc004a6f2015-07-22 16:44:54 +0100642 num_grant += gnttab_count_grant(sg->offset, sg->length);
643
Julien Grall33204662015-06-29 17:35:24 +0100644 ring_req->u.rw.id = id;
Bob Liu81f35162015-11-14 11:12:11 +0800645 rinfo->shadow[id].num_sg = num_sg;
Julien Grallc004a6f2015-07-22 16:44:54 +0100646 if (num_grant > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
Julien Grall33204662015-06-29 17:35:24 +0100647 /*
648 * The indirect operation can only be a BLKIF_OP_READ or
649 * BLKIF_OP_WRITE
650 */
651 BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
652 ring_req->operation = BLKIF_OP_INDIRECT;
653 ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
654 BLKIF_OP_WRITE : BLKIF_OP_READ;
655 ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
656 ring_req->u.indirect.handle = info->handle;
Julien Grallc004a6f2015-07-22 16:44:54 +0100657 ring_req->u.indirect.nr_segments = num_grant;
Li Dongyanged30bf32011-09-01 18:39:09 +0800658 } else {
Julien Grall33204662015-06-29 17:35:24 +0100659 ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
660 ring_req->u.rw.handle = info->handle;
661 ring_req->operation = rq_data_dir(req) ?
662 BLKIF_OP_WRITE : BLKIF_OP_READ;
663 if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200664 /*
Julien Grall33204662015-06-29 17:35:24 +0100665 * Ideally we can do an unordered flush-to-disk.
666 * In case the backend onlysupports barriers, use that.
667 * A barrier request a superset of FUA, so we can
668 * implement it the same way. (It's also a FLUSH+FUA,
669 * since it is guaranteed ordered WRT previous writes.)
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200670 */
Julien Grall33204662015-06-29 17:35:24 +0100671 switch (info->feature_flush &
672 ((REQ_FLUSH|REQ_FUA))) {
673 case REQ_FLUSH|REQ_FUA:
674 ring_req->operation =
675 BLKIF_OP_WRITE_BARRIER;
676 break;
677 case REQ_FLUSH:
678 ring_req->operation =
679 BLKIF_OP_FLUSH_DISKCACHE;
680 break;
681 default:
682 ring_req->operation = 0;
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200683 }
Li Dongyanged30bf32011-09-01 18:39:09 +0800684 }
Julien Grallc004a6f2015-07-22 16:44:54 +0100685 ring_req->u.rw.nr_segments = num_grant;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700686 }
687
Julien Grallc004a6f2015-07-22 16:44:54 +0100688 setup.ring_req = ring_req;
689 setup.id = id;
Bob Liu81f35162015-11-14 11:12:11 +0800690 for_each_sg(rinfo->shadow[id].sg, sg, num_sg, i) {
Julien Grallc004a6f2015-07-22 16:44:54 +0100691 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
Julien Grall33204662015-06-29 17:35:24 +0100692
Julien Grallc004a6f2015-07-22 16:44:54 +0100693 if (setup.need_copy) {
694 setup.bvec_off = sg->offset;
695 setup.bvec_data = kmap_atomic(sg_page(sg));
Julien Grall33204662015-06-29 17:35:24 +0100696 }
697
Julien Grallc004a6f2015-07-22 16:44:54 +0100698 gnttab_foreach_grant_in_range(sg_page(sg),
699 sg->offset,
700 sg->length,
701 blkif_setup_rw_req_grant,
702 &setup);
Julien Grall33204662015-06-29 17:35:24 +0100703
Julien Grallc004a6f2015-07-22 16:44:54 +0100704 if (setup.need_copy)
705 kunmap_atomic(setup.bvec_data);
Julien Grall33204662015-06-29 17:35:24 +0100706 }
Julien Grallc004a6f2015-07-22 16:44:54 +0100707 if (setup.segments)
708 kunmap_atomic(setup.segments);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700709
Bob Liu81f35162015-11-14 11:12:11 +0800710 rinfo->ring.req_prod_pvt++;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700711
712 /* Keep a private copy so we can reissue requests when recovering. */
Bob Liu81f35162015-11-14 11:12:11 +0800713 rinfo->shadow[id].req = *ring_req;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700714
Roger Pau Monne0a8704a2012-10-24 18:58:45 +0200715 if (new_persistent_gnts)
Julien Grallc004a6f2015-07-22 16:44:54 +0100716 gnttab_free_grant_references(setup.gref_head);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700717
718 return 0;
719}
720
Julien Grall33204662015-06-29 17:35:24 +0100721/*
722 * Generate a Xen blkfront IO request from a blk layer request. Reads
723 * and writes are handled as expected.
724 *
725 * @req: a request struct
726 */
Bob Liu81f35162015-11-14 11:12:11 +0800727static int blkif_queue_request(struct request *req, struct blkfront_ring_info *rinfo)
Julien Grall33204662015-06-29 17:35:24 +0100728{
Bob Liu81f35162015-11-14 11:12:11 +0800729 if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
Julien Grall33204662015-06-29 17:35:24 +0100730 return 1;
731
732 if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
Bob Liu81f35162015-11-14 11:12:11 +0800733 return blkif_queue_discard_req(req, rinfo);
Julien Grall33204662015-06-29 17:35:24 +0100734 else
Bob Liu81f35162015-11-14 11:12:11 +0800735 return blkif_queue_rw_req(req, rinfo);
Julien Grall33204662015-06-29 17:35:24 +0100736}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700737
Bob Liu81f35162015-11-14 11:12:11 +0800738static inline void flush_requests(struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700739{
740 int notify;
741
Bob Liu81f35162015-11-14 11:12:11 +0800742 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&rinfo->ring, notify);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700743
744 if (notify)
Bob Liu81f35162015-11-14 11:12:11 +0800745 notify_remote_via_irq(rinfo->irq);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700746}
747
Vitaly Kuznetsovad42d392014-12-01 14:01:13 +0100748static inline bool blkif_request_flush_invalid(struct request *req,
749 struct blkfront_info *info)
Arianna Avanzini0f1ca652014-08-22 13:20:02 +0200750{
751 return ((req->cmd_type != REQ_TYPE_FS) ||
Vitaly Kuznetsovad42d392014-12-01 14:01:13 +0100752 ((req->cmd_flags & REQ_FLUSH) &&
753 !(info->feature_flush & REQ_FLUSH)) ||
754 ((req->cmd_flags & REQ_FUA) &&
755 !(info->feature_flush & REQ_FUA)));
Arianna Avanzini0f1ca652014-08-22 13:20:02 +0200756}
757
Bob Liu907c3eb2015-07-13 17:55:24 +0800758static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
759 const struct blk_mq_queue_data *qd)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700760{
Bob Liu81f35162015-11-14 11:12:11 +0800761 struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
762 struct blkfront_info *info = rinfo->dev_info;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700763
Bob Liu907c3eb2015-07-13 17:55:24 +0800764 blk_mq_start_request(qd->rq);
765 spin_lock_irq(&info->io_lock);
Bob Liu81f35162015-11-14 11:12:11 +0800766 if (RING_FULL(&rinfo->ring))
Bob Liu907c3eb2015-07-13 17:55:24 +0800767 goto out_busy;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700768
Bob Liu81f35162015-11-14 11:12:11 +0800769 if (blkif_request_flush_invalid(qd->rq, rinfo->dev_info))
Bob Liu907c3eb2015-07-13 17:55:24 +0800770 goto out_err;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700771
Bob Liu81f35162015-11-14 11:12:11 +0800772 if (blkif_queue_request(qd->rq, rinfo))
Bob Liu907c3eb2015-07-13 17:55:24 +0800773 goto out_busy;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700774
Bob Liu81f35162015-11-14 11:12:11 +0800775 flush_requests(rinfo);
Bob Liu907c3eb2015-07-13 17:55:24 +0800776 spin_unlock_irq(&info->io_lock);
777 return BLK_MQ_RQ_QUEUE_OK;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700778
Bob Liu907c3eb2015-07-13 17:55:24 +0800779out_err:
780 spin_unlock_irq(&info->io_lock);
781 return BLK_MQ_RQ_QUEUE_ERROR;
Tejun Heo296b2f62009-05-08 11:54:15 +0900782
Bob Liu907c3eb2015-07-13 17:55:24 +0800783out_busy:
784 spin_unlock_irq(&info->io_lock);
785 blk_mq_stop_hw_queue(hctx);
786 return BLK_MQ_RQ_QUEUE_BUSY;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700787}
788
Bob Liu81f35162015-11-14 11:12:11 +0800789static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
790 unsigned int index)
791{
792 struct blkfront_info *info = (struct blkfront_info *)data;
793
794 hctx->driver_data = &info->rinfo;
795 return 0;
796}
797
Bob Liu907c3eb2015-07-13 17:55:24 +0800798static struct blk_mq_ops blkfront_mq_ops = {
799 .queue_rq = blkif_queue_rq,
800 .map_queue = blk_mq_map_queue,
Bob Liu81f35162015-11-14 11:12:11 +0800801 .init_hctx = blk_mq_init_hctx,
Bob Liu907c3eb2015-07-13 17:55:24 +0800802};
803
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200804static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
Stefan Bader7c4d7d72013-05-13 16:28:15 +0200805 unsigned int physical_sector_size,
Roger Pau Monne402b27f2013-04-18 16:06:54 +0200806 unsigned int segments)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700807{
Jens Axboe165125e2007-07-24 09:28:11 +0200808 struct request_queue *rq;
Li Dongyanged30bf32011-09-01 18:39:09 +0800809 struct blkfront_info *info = gd->private_data;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700810
Bob Liu907c3eb2015-07-13 17:55:24 +0800811 memset(&info->tag_set, 0, sizeof(info->tag_set));
812 info->tag_set.ops = &blkfront_mq_ops;
813 info->tag_set.nr_hw_queues = 1;
814 info->tag_set.queue_depth = BLK_RING_SIZE(info);
815 info->tag_set.numa_node = NUMA_NO_NODE;
816 info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
817 info->tag_set.cmd_size = 0;
818 info->tag_set.driver_data = info;
819
820 if (blk_mq_alloc_tag_set(&info->tag_set))
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700821 return -1;
Bob Liu907c3eb2015-07-13 17:55:24 +0800822 rq = blk_mq_init_queue(&info->tag_set);
823 if (IS_ERR(rq)) {
824 blk_mq_free_tag_set(&info->tag_set);
825 return -1;
826 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700827
Fernando Luis Vázquez Cao66d352e2008-10-27 18:45:54 +0900828 queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700829
Li Dongyanged30bf32011-09-01 18:39:09 +0800830 if (info->feature_discard) {
831 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
832 blk_queue_max_discard_sectors(rq, get_capacity(gd));
833 rq->limits.discard_granularity = info->discard_granularity;
834 rq->limits.discard_alignment = info->discard_alignment;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -0400835 if (info->feature_secdiscard)
836 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, rq);
Li Dongyanged30bf32011-09-01 18:39:09 +0800837 }
838
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700839 /* Hard sector size and max sectors impersonate the equiv. hardware. */
Martin K. Petersene1defc42009-05-22 17:17:49 -0400840 blk_queue_logical_block_size(rq, sector_size);
Stefan Bader7c4d7d72013-05-13 16:28:15 +0200841 blk_queue_physical_block_size(rq, physical_sector_size);
Julien Grallc004a6f2015-07-22 16:44:54 +0100842 blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700843
844 /* Each segment in a request is up to an aligned page in size. */
845 blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
846 blk_queue_max_segment_size(rq, PAGE_SIZE);
847
848 /* Ensure a merged request will fit in a single I/O ring slot. */
Julien Grallc004a6f2015-07-22 16:44:54 +0100849 blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700850
851 /* Make sure buffer addresses are sector-aligned. */
852 blk_queue_dma_alignment(rq, 511);
853
Ian Campbell1c91fe12008-06-17 10:47:08 +0200854 /* Make sure we don't use bounce buffers. */
855 blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
856
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700857 gd->queue = rq;
858
859 return 0;
860}
861
Vitaly Kuznetsovfdf9b962014-12-09 15:25:10 +0100862static const char *flush_info(unsigned int feature_flush)
863{
864 switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) {
865 case REQ_FLUSH|REQ_FUA:
866 return "barrier: enabled;";
867 case REQ_FLUSH:
868 return "flush diskcache: enabled;";
869 default:
870 return "barrier or flush: disabled;";
871 }
872}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700873
Tejun Heo4913efe2010-09-03 11:56:16 +0200874static void xlvbd_flush(struct blkfront_info *info)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700875{
Tejun Heo4913efe2010-09-03 11:56:16 +0200876 blk_queue_flush(info->rq, info->feature_flush);
Vitaly Kuznetsovfdf9b962014-12-09 15:25:10 +0100877 pr_info("blkfront: %s: %s %s %s %s %s\n",
878 info->gd->disk_name, flush_info(info->feature_flush),
879 "persistent grants:", info->feature_persistent ?
880 "enabled;" : "disabled;", "indirect descriptors:",
881 info->max_indirect_segments ? "enabled;" : "disabled;");
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700882}
883
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000884static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
885{
886 int major;
887 major = BLKIF_MAJOR(vdevice);
888 *minor = BLKIF_MINOR(vdevice);
889 switch (major) {
890 case XEN_IDE0_MAJOR:
891 *offset = (*minor / 64) + EMULATED_HD_DISK_NAME_OFFSET;
892 *minor = ((*minor / 64) * PARTS_PER_DISK) +
893 EMULATED_HD_DISK_MINOR_OFFSET;
894 break;
895 case XEN_IDE1_MAJOR:
896 *offset = (*minor / 64) + 2 + EMULATED_HD_DISK_NAME_OFFSET;
897 *minor = (((*minor / 64) + 2) * PARTS_PER_DISK) +
898 EMULATED_HD_DISK_MINOR_OFFSET;
899 break;
900 case XEN_SCSI_DISK0_MAJOR:
901 *offset = (*minor / PARTS_PER_DISK) + EMULATED_SD_DISK_NAME_OFFSET;
902 *minor = *minor + EMULATED_SD_DISK_MINOR_OFFSET;
903 break;
904 case XEN_SCSI_DISK1_MAJOR:
905 case XEN_SCSI_DISK2_MAJOR:
906 case XEN_SCSI_DISK3_MAJOR:
907 case XEN_SCSI_DISK4_MAJOR:
908 case XEN_SCSI_DISK5_MAJOR:
909 case XEN_SCSI_DISK6_MAJOR:
910 case XEN_SCSI_DISK7_MAJOR:
911 *offset = (*minor / PARTS_PER_DISK) +
912 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16) +
913 EMULATED_SD_DISK_NAME_OFFSET;
914 *minor = *minor +
915 ((major - XEN_SCSI_DISK1_MAJOR + 1) * 16 * PARTS_PER_DISK) +
916 EMULATED_SD_DISK_MINOR_OFFSET;
917 break;
918 case XEN_SCSI_DISK8_MAJOR:
919 case XEN_SCSI_DISK9_MAJOR:
920 case XEN_SCSI_DISK10_MAJOR:
921 case XEN_SCSI_DISK11_MAJOR:
922 case XEN_SCSI_DISK12_MAJOR:
923 case XEN_SCSI_DISK13_MAJOR:
924 case XEN_SCSI_DISK14_MAJOR:
925 case XEN_SCSI_DISK15_MAJOR:
926 *offset = (*minor / PARTS_PER_DISK) +
927 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16) +
928 EMULATED_SD_DISK_NAME_OFFSET;
929 *minor = *minor +
930 ((major - XEN_SCSI_DISK8_MAJOR + 8) * 16 * PARTS_PER_DISK) +
931 EMULATED_SD_DISK_MINOR_OFFSET;
932 break;
933 case XENVBD_MAJOR:
934 *offset = *minor / PARTS_PER_DISK;
935 break;
936 default:
937 printk(KERN_WARNING "blkfront: your disk configuration is "
938 "incorrect, please use an xvd device instead\n");
939 return -ENODEV;
940 }
941 return 0;
942}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700943
Jan Beuliche77c78c2012-04-05 16:37:22 +0100944static char *encode_disk_name(char *ptr, unsigned int n)
945{
946 if (n >= 26)
947 ptr = encode_disk_name(ptr, n / 26 - 1);
948 *ptr = 'a' + n % 26;
949 return ptr + 1;
950}
951
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700952static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
953 struct blkfront_info *info,
Stefan Bader7c4d7d72013-05-13 16:28:15 +0200954 u16 vdisk_info, u16 sector_size,
955 unsigned int physical_sector_size)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700956{
957 struct gendisk *gd;
958 int nr_minors = 1;
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000959 int err;
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700960 unsigned int offset;
961 int minor;
962 int nr_parts;
Jan Beuliche77c78c2012-04-05 16:37:22 +0100963 char *ptr;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700964
965 BUG_ON(info->gd != NULL);
966 BUG_ON(info->rq != NULL);
967
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700968 if ((info->vdevice>>EXT_SHIFT) > 1) {
969 /* this is above the extended range; something is wrong */
970 printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice);
971 return -ENODEV;
972 }
973
974 if (!VDEV_IS_EXTENDED(info->vdevice)) {
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000975 err = xen_translate_vdev(info->vdevice, &minor, &offset);
976 if (err)
977 return err;
978 nr_parts = PARTS_PER_DISK;
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700979 } else {
980 minor = BLKIF_MINOR_EXT(info->vdevice);
981 nr_parts = PARTS_PER_EXT_DISK;
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000982 offset = minor / nr_parts;
Stefan Bader89153b52011-07-14 15:30:37 +0200983 if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4)
Stefano Stabellinic80a4202010-12-02 17:55:00 +0000984 printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with "
985 "emulated IDE disks,\n\t choose an xvd device name"
986 "from xvde on\n", info->vdevice);
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700987 }
Jan Beuliche77c78c2012-04-05 16:37:22 +0100988 if (minor >> MINORBITS) {
989 pr_warn("blkfront: %#x's minor (%#x) out of range; ignoring\n",
990 info->vdevice, minor);
991 return -ENODEV;
992 }
Chris Lalancette9246b5f2008-09-17 14:30:32 -0700993
994 if ((minor % nr_parts) == 0)
995 nr_minors = nr_parts;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -0700996
Jan Beulich0e345822010-08-07 18:28:55 +0200997 err = xlbd_reserve_minors(minor, nr_minors);
998 if (err)
999 goto out;
1000 err = -ENODEV;
1001
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001002 gd = alloc_disk(nr_minors);
1003 if (gd == NULL)
Jan Beulich0e345822010-08-07 18:28:55 +02001004 goto release;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001005
Jan Beuliche77c78c2012-04-05 16:37:22 +01001006 strcpy(gd->disk_name, DEV_NAME);
1007 ptr = encode_disk_name(gd->disk_name + sizeof(DEV_NAME) - 1, offset);
1008 BUG_ON(ptr >= gd->disk_name + DISK_NAME_LEN);
1009 if (nr_minors > 1)
1010 *ptr = 0;
1011 else
1012 snprintf(ptr, gd->disk_name + DISK_NAME_LEN - ptr,
1013 "%d", minor & (nr_parts - 1));
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001014
1015 gd->major = XENVBD_MAJOR;
1016 gd->first_minor = minor;
1017 gd->fops = &xlvbd_block_fops;
1018 gd->private_data = info;
1019 gd->driverfs_dev = &(info->xbdev->dev);
1020 set_capacity(gd, capacity);
1021
Stefan Bader7c4d7d72013-05-13 16:28:15 +02001022 if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001023 info->max_indirect_segments ? :
1024 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001025 del_gendisk(gd);
Jan Beulich0e345822010-08-07 18:28:55 +02001026 goto release;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001027 }
1028
1029 info->rq = gd->queue;
1030 info->gd = gd;
1031
Tejun Heo4913efe2010-09-03 11:56:16 +02001032 xlvbd_flush(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001033
1034 if (vdisk_info & VDISK_READONLY)
1035 set_disk_ro(gd, 1);
1036
1037 if (vdisk_info & VDISK_REMOVABLE)
1038 gd->flags |= GENHD_FL_REMOVABLE;
1039
1040 if (vdisk_info & VDISK_CDROM)
1041 gd->flags |= GENHD_FL_CD;
1042
1043 return 0;
1044
Jan Beulich0e345822010-08-07 18:28:55 +02001045 release:
1046 xlbd_release_minors(minor, nr_minors);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001047 out:
1048 return err;
1049}
1050
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001051static void xlvbd_release_gendisk(struct blkfront_info *info)
1052{
1053 unsigned int minor, nr_minors;
Bob Liu81f35162015-11-14 11:12:11 +08001054 struct blkfront_ring_info *rinfo = &info->rinfo;
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001055
1056 if (info->rq == NULL)
1057 return;
1058
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001059 /* No more blkif_request(). */
Bob Liu907c3eb2015-07-13 17:55:24 +08001060 blk_mq_stop_hw_queues(info->rq);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001061
1062 /* No more gnttab callback work. */
Bob Liu81f35162015-11-14 11:12:11 +08001063 gnttab_cancel_free_callback(&rinfo->callback);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001064
1065 /* Flush gnttab callback work. Must be done with no locks held. */
Bob Liu81f35162015-11-14 11:12:11 +08001066 flush_work(&rinfo->work);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001067
1068 del_gendisk(info->gd);
1069
1070 minor = info->gd->first_minor;
1071 nr_minors = info->gd->minors;
1072 xlbd_release_minors(minor, nr_minors);
1073
1074 blk_cleanup_queue(info->rq);
Bob Liu907c3eb2015-07-13 17:55:24 +08001075 blk_mq_free_tag_set(&info->tag_set);
Daniel Stoddena66b5ae2010-08-07 18:33:17 +02001076 info->rq = NULL;
1077
1078 put_disk(info->gd);
1079 info->gd = NULL;
1080}
1081
Bob Liu907c3eb2015-07-13 17:55:24 +08001082/* Must be called with io_lock holded */
Bob Liu81f35162015-11-14 11:12:11 +08001083static void kick_pending_request_queues(struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001084{
Bob Liu81f35162015-11-14 11:12:11 +08001085 if (!RING_FULL(&rinfo->ring))
1086 blk_mq_start_stopped_hw_queues(rinfo->dev_info->rq, true);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001087}
1088
1089static void blkif_restart_queue(struct work_struct *work)
1090{
Bob Liu81f35162015-11-14 11:12:11 +08001091 struct blkfront_ring_info *rinfo = container_of(work, struct blkfront_ring_info, work);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001092
Bob Liu81f35162015-11-14 11:12:11 +08001093 spin_lock_irq(&rinfo->dev_info->io_lock);
1094 if (rinfo->dev_info->connected == BLKIF_STATE_CONNECTED)
1095 kick_pending_request_queues(rinfo);
1096 spin_unlock_irq(&rinfo->dev_info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001097}
1098
1099static void blkif_free(struct blkfront_info *info, int suspend)
1100{
Roger Pau Monne155b7ed2013-03-18 17:49:34 +01001101 struct grant *persistent_gnt;
1102 struct grant *n;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001103 int i, j, segs;
Bob Liu81f35162015-11-14 11:12:11 +08001104 struct blkfront_ring_info *rinfo = &info->rinfo;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001105
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001106 /* Prevent new requests being issued until we fix things up. */
Steven Noonan34678112012-02-17 12:04:44 -08001107 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001108 info->connected = suspend ?
1109 BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
1110 /* No more blkif_request(). */
1111 if (info->rq)
Bob Liu907c3eb2015-07-13 17:55:24 +08001112 blk_mq_stop_hw_queues(info->rq);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001113
1114 /* Remove all persistent grants */
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001115 if (!list_empty(&info->grants)) {
Roger Pau Monne155b7ed2013-03-18 17:49:34 +01001116 list_for_each_entry_safe(persistent_gnt, n,
Bob Liu81f35162015-11-14 11:12:11 +08001117 &info->grants, node) {
Roger Pau Monne155b7ed2013-03-18 17:49:34 +01001118 list_del(&persistent_gnt->node);
Roger Pau Monne9c1e0502013-03-18 17:49:35 +01001119 if (persistent_gnt->gref != GRANT_INVALID_REF) {
1120 gnttab_end_foreign_access(persistent_gnt->gref,
1121 0, 0UL);
1122 info->persistent_gnts_c--;
1123 }
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001124 if (info->feature_persistent)
Julien Gralla7a6df22015-06-30 11:58:51 +01001125 __free_page(persistent_gnt->page);
Roger Pau Monne155b7ed2013-03-18 17:49:34 +01001126 kfree(persistent_gnt);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001127 }
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001128 }
Roger Pau Monne9c1e0502013-03-18 17:49:35 +01001129 BUG_ON(info->persistent_gnts_c != 0);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001130
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001131 /*
1132 * Remove indirect pages, this only happens when using indirect
1133 * descriptors but not persistent grants
1134 */
Bob Liu81f35162015-11-14 11:12:11 +08001135 if (!list_empty(&rinfo->indirect_pages)) {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001136 struct page *indirect_page, *n;
1137
1138 BUG_ON(info->feature_persistent);
Bob Liu81f35162015-11-14 11:12:11 +08001139 list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001140 list_del(&indirect_page->lru);
1141 __free_page(indirect_page);
1142 }
1143 }
1144
Bob Liu86839c52015-06-03 13:40:03 +08001145 for (i = 0; i < BLK_RING_SIZE(info); i++) {
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001146 /*
1147 * Clear persistent grants present in requests already
1148 * on the shared ring
1149 */
Bob Liu81f35162015-11-14 11:12:11 +08001150 if (!rinfo->shadow[i].request)
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001151 goto free_shadow;
1152
Bob Liu81f35162015-11-14 11:12:11 +08001153 segs = rinfo->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
1154 rinfo->shadow[i].req.u.indirect.nr_segments :
1155 rinfo->shadow[i].req.u.rw.nr_segments;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001156 for (j = 0; j < segs; j++) {
Bob Liu81f35162015-11-14 11:12:11 +08001157 persistent_gnt = rinfo->shadow[i].grants_used[j];
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001158 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001159 if (info->feature_persistent)
Julien Gralla7a6df22015-06-30 11:58:51 +01001160 __free_page(persistent_gnt->page);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001161 kfree(persistent_gnt);
1162 }
1163
Bob Liu81f35162015-11-14 11:12:11 +08001164 if (rinfo->shadow[i].req.operation != BLKIF_OP_INDIRECT)
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001165 /*
1166 * If this is not an indirect operation don't try to
1167 * free indirect segments
1168 */
1169 goto free_shadow;
1170
1171 for (j = 0; j < INDIRECT_GREFS(segs); j++) {
Bob Liu81f35162015-11-14 11:12:11 +08001172 persistent_gnt = rinfo->shadow[i].indirect_grants[j];
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001173 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
Julien Gralla7a6df22015-06-30 11:58:51 +01001174 __free_page(persistent_gnt->page);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001175 kfree(persistent_gnt);
1176 }
1177
1178free_shadow:
Bob Liu81f35162015-11-14 11:12:11 +08001179 kfree(rinfo->shadow[i].grants_used);
1180 rinfo->shadow[i].grants_used = NULL;
1181 kfree(rinfo->shadow[i].indirect_grants);
1182 rinfo->shadow[i].indirect_grants = NULL;
1183 kfree(rinfo->shadow[i].sg);
1184 rinfo->shadow[i].sg = NULL;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001185 }
1186
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001187 /* No more gnttab callback work. */
Bob Liu81f35162015-11-14 11:12:11 +08001188 gnttab_cancel_free_callback(&rinfo->callback);
Steven Noonan34678112012-02-17 12:04:44 -08001189 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001190
1191 /* Flush gnttab callback work. Must be done with no locks held. */
Bob Liu81f35162015-11-14 11:12:11 +08001192 flush_work(&rinfo->work);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001193
1194 /* Free resources associated with old device channel. */
Bob Liu86839c52015-06-03 13:40:03 +08001195 for (i = 0; i < info->nr_ring_pages; i++) {
Bob Liu81f35162015-11-14 11:12:11 +08001196 if (rinfo->ring_ref[i] != GRANT_INVALID_REF) {
1197 gnttab_end_foreign_access(rinfo->ring_ref[i], 0, 0);
1198 rinfo->ring_ref[i] = GRANT_INVALID_REF;
Bob Liu86839c52015-06-03 13:40:03 +08001199 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001200 }
Bob Liu81f35162015-11-14 11:12:11 +08001201 free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
1202 rinfo->ring.sring = NULL;
Bob Liu86839c52015-06-03 13:40:03 +08001203
Bob Liu81f35162015-11-14 11:12:11 +08001204 if (rinfo->irq)
1205 unbind_from_irqhandler(rinfo->irq, rinfo);
1206 rinfo->evtchn = rinfo->irq = 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001207
1208}
1209
Julien Grallc004a6f2015-07-22 16:44:54 +01001210struct copy_from_grant {
1211 const struct blk_shadow *s;
1212 unsigned int grant_idx;
1213 unsigned int bvec_offset;
1214 char *bvec_data;
1215};
1216
1217static void blkif_copy_from_grant(unsigned long gfn, unsigned int offset,
1218 unsigned int len, void *data)
1219{
1220 struct copy_from_grant *info = data;
1221 char *shared_data;
1222 /* Convenient aliases */
1223 const struct blk_shadow *s = info->s;
1224
1225 shared_data = kmap_atomic(s->grants_used[info->grant_idx]->page);
1226
1227 memcpy(info->bvec_data + info->bvec_offset,
1228 shared_data + offset, len);
1229
1230 info->bvec_offset += len;
1231 info->grant_idx++;
1232
1233 kunmap_atomic(shared_data);
1234}
1235
Bob Liu81f35162015-11-14 11:12:11 +08001236static void blkif_completion(struct blk_shadow *s, struct blkfront_ring_info *rinfo,
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001237 struct blkif_response *bret)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001238{
Roger Pau Monned62f6912012-12-07 19:00:31 +01001239 int i = 0;
Roger Pau Monneb7649152013-05-02 10:58:50 +02001240 struct scatterlist *sg;
Julien Grallc004a6f2015-07-22 16:44:54 +01001241 int num_sg, num_grant;
Bob Liu81f35162015-11-14 11:12:11 +08001242 struct blkfront_info *info = rinfo->dev_info;
Julien Grallc004a6f2015-07-22 16:44:54 +01001243 struct copy_from_grant data = {
1244 .s = s,
1245 .grant_idx = 0,
1246 };
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001247
Julien Grallc004a6f2015-07-22 16:44:54 +01001248 num_grant = s->req.operation == BLKIF_OP_INDIRECT ?
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001249 s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
Julien Grallc004a6f2015-07-22 16:44:54 +01001250 num_sg = s->num_sg;
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001251
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001252 if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
Julien Grallc004a6f2015-07-22 16:44:54 +01001253 for_each_sg(s->sg, sg, num_sg, i) {
Roger Pau Monneb7649152013-05-02 10:58:50 +02001254 BUG_ON(sg->offset + sg->length > PAGE_SIZE);
Julien Grallc004a6f2015-07-22 16:44:54 +01001255
1256 data.bvec_offset = sg->offset;
1257 data.bvec_data = kmap_atomic(sg_page(sg));
1258
1259 gnttab_foreach_grant_in_range(sg_page(sg),
1260 sg->offset,
1261 sg->length,
1262 blkif_copy_from_grant,
1263 &data);
1264
1265 kunmap_atomic(data.bvec_data);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001266 }
1267 }
1268 /* Add the persistent grant into the list of free grants */
Julien Grallc004a6f2015-07-22 16:44:54 +01001269 for (i = 0; i < num_grant; i++) {
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001270 if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
1271 /*
1272 * If the grant is still mapped by the backend (the
1273 * backend has chosen to make this grant persistent)
1274 * we add it at the head of the list, so it will be
1275 * reused first.
1276 */
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001277 if (!info->feature_persistent)
1278 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
1279 s->grants_used[i]->gref);
1280 list_add(&s->grants_used[i]->node, &info->grants);
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001281 info->persistent_gnts_c++;
1282 } else {
1283 /*
1284 * If the grant is not mapped by the backend we end the
1285 * foreign access and add it to the tail of the list,
1286 * so it will not be picked again unless we run out of
1287 * persistent grants.
1288 */
1289 gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
1290 s->grants_used[i]->gref = GRANT_INVALID_REF;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001291 list_add_tail(&s->grants_used[i]->node, &info->grants);
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001292 }
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001293 }
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001294 if (s->req.operation == BLKIF_OP_INDIRECT) {
Julien Grallc004a6f2015-07-22 16:44:54 +01001295 for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001296 if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001297 if (!info->feature_persistent)
1298 pr_alert_ratelimited("backed has not unmapped grant: %u\n",
1299 s->indirect_grants[i]->gref);
1300 list_add(&s->indirect_grants[i]->node, &info->grants);
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001301 info->persistent_gnts_c++;
1302 } else {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001303 struct page *indirect_page;
1304
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001305 gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001306 /*
1307 * Add the used indirect page back to the list of
1308 * available pages for indirect grefs.
1309 */
Bob Liu7b076752015-07-22 14:40:09 +08001310 if (!info->feature_persistent) {
Julien Gralla7a6df22015-06-30 11:58:51 +01001311 indirect_page = s->indirect_grants[i]->page;
Bob Liu81f35162015-11-14 11:12:11 +08001312 list_add(&indirect_page->lru, &rinfo->indirect_pages);
Bob Liu7b076752015-07-22 14:40:09 +08001313 }
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001314 s->indirect_grants[i]->gref = GRANT_INVALID_REF;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001315 list_add_tail(&s->indirect_grants[i]->node, &info->grants);
Roger Pau Monnefbe363c2013-08-12 12:53:44 +02001316 }
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001317 }
1318 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001319}
1320
1321static irqreturn_t blkif_interrupt(int irq, void *dev_id)
1322{
1323 struct request *req;
1324 struct blkif_response *bret;
1325 RING_IDX i, rp;
1326 unsigned long flags;
Bob Liu81f35162015-11-14 11:12:11 +08001327 struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id;
1328 struct blkfront_info *info = rinfo->dev_info;
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001329 int error;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001330
Steven Noonan34678112012-02-17 12:04:44 -08001331 spin_lock_irqsave(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001332
1333 if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
Steven Noonan34678112012-02-17 12:04:44 -08001334 spin_unlock_irqrestore(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001335 return IRQ_HANDLED;
1336 }
1337
1338 again:
Bob Liu81f35162015-11-14 11:12:11 +08001339 rp = rinfo->ring.sring->rsp_prod;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001340 rmb(); /* Ensure we see queued responses up to 'rp'. */
1341
Bob Liu81f35162015-11-14 11:12:11 +08001342 for (i = rinfo->ring.rsp_cons; i != rp; i++) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001343 unsigned long id;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001344
Bob Liu81f35162015-11-14 11:12:11 +08001345 bret = RING_GET_RESPONSE(&rinfo->ring, i);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001346 id = bret->id;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001347 /*
1348 * The backend has messed up and given us an id that we would
1349 * never have given to it (we stamp it up to BLK_RING_SIZE -
1350 * look in get_id_from_freelist.
1351 */
Bob Liu86839c52015-06-03 13:40:03 +08001352 if (id >= BLK_RING_SIZE(info)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001353 WARN(1, "%s: response to %s has incorrect id (%ld)\n",
1354 info->gd->disk_name, op_name(bret->operation), id);
1355 /* We can't safely get the 'struct request' as
1356 * the id is busted. */
1357 continue;
1358 }
Bob Liu81f35162015-11-14 11:12:11 +08001359 req = rinfo->shadow[id].request;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001360
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001361 if (bret->operation != BLKIF_OP_DISCARD)
Bob Liu81f35162015-11-14 11:12:11 +08001362 blkif_completion(&rinfo->shadow[id], rinfo, bret);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001363
Bob Liu81f35162015-11-14 11:12:11 +08001364 if (add_id_to_freelist(rinfo, id)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001365 WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
1366 info->gd->disk_name, op_name(bret->operation), id);
1367 continue;
1368 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001369
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001370 error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001371 switch (bret->operation) {
Li Dongyanged30bf32011-09-01 18:39:09 +08001372 case BLKIF_OP_DISCARD:
1373 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
1374 struct request_queue *rq = info->rq;
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001375 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
1376 info->gd->disk_name, op_name(bret->operation));
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001377 error = -EOPNOTSUPP;
Li Dongyanged30bf32011-09-01 18:39:09 +08001378 info->feature_discard = 0;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001379 info->feature_secdiscard = 0;
Li Dongyanged30bf32011-09-01 18:39:09 +08001380 queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001381 queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
Li Dongyanged30bf32011-09-01 18:39:09 +08001382 }
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001383 blk_mq_complete_request(req, error);
Li Dongyanged30bf32011-09-01 18:39:09 +08001384 break;
Konrad Rzeszutek Wilkedf6ef52011-05-03 12:01:11 -04001385 case BLKIF_OP_FLUSH_DISKCACHE:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001386 case BLKIF_OP_WRITE_BARRIER:
1387 if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001388 printk(KERN_WARNING "blkfront: %s: %s op failed\n",
1389 info->gd->disk_name, op_name(bret->operation));
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001390 error = -EOPNOTSUPP;
Jeremy Fitzhardingedcb8bae2010-11-02 11:55:58 -04001391 }
1392 if (unlikely(bret->status == BLKIF_RSP_ERROR &&
Bob Liu81f35162015-11-14 11:12:11 +08001393 rinfo->shadow[id].req.u.rw.nr_segments == 0)) {
Konrad Rzeszutek Wilk6878c322012-05-25 17:34:51 -04001394 printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
1395 info->gd->disk_name, op_name(bret->operation));
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001396 error = -EOPNOTSUPP;
Jeremy Fitzhardingedcb8bae2010-11-02 11:55:58 -04001397 }
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001398 if (unlikely(error)) {
1399 if (error == -EOPNOTSUPP)
1400 error = 0;
Tejun Heo4913efe2010-09-03 11:56:16 +02001401 info->feature_flush = 0;
1402 xlvbd_flush(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001403 }
1404 /* fall through */
1405 case BLKIF_OP_READ:
1406 case BLKIF_OP_WRITE:
1407 if (unlikely(bret->status != BLKIF_RSP_OKAY))
1408 dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
1409 "request: %x\n", bret->status);
1410
Christoph Hellwigf4829a92015-09-27 21:01:50 +02001411 blk_mq_complete_request(req, error);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001412 break;
1413 default:
1414 BUG();
1415 }
1416 }
1417
Bob Liu81f35162015-11-14 11:12:11 +08001418 rinfo->ring.rsp_cons = i;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001419
Bob Liu81f35162015-11-14 11:12:11 +08001420 if (i != rinfo->ring.req_prod_pvt) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001421 int more_to_do;
Bob Liu81f35162015-11-14 11:12:11 +08001422 RING_FINAL_CHECK_FOR_RESPONSES(&rinfo->ring, more_to_do);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001423 if (more_to_do)
1424 goto again;
1425 } else
Bob Liu81f35162015-11-14 11:12:11 +08001426 rinfo->ring.sring->rsp_event = i + 1;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001427
Bob Liu81f35162015-11-14 11:12:11 +08001428 kick_pending_request_queues(rinfo);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001429
Steven Noonan34678112012-02-17 12:04:44 -08001430 spin_unlock_irqrestore(&info->io_lock, flags);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001431
1432 return IRQ_HANDLED;
1433}
1434
1435
1436static int setup_blkring(struct xenbus_device *dev,
Bob Liu81f35162015-11-14 11:12:11 +08001437 struct blkfront_ring_info *rinfo)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001438{
1439 struct blkif_sring *sring;
Bob Liu86839c52015-06-03 13:40:03 +08001440 int err, i;
Bob Liu81f35162015-11-14 11:12:11 +08001441 struct blkfront_info *info = rinfo->dev_info;
Julien Grallc004a6f2015-07-22 16:44:54 +01001442 unsigned long ring_size = info->nr_ring_pages * XEN_PAGE_SIZE;
Julien Grall9cce2912015-10-13 17:50:11 +01001443 grant_ref_t gref[XENBUS_MAX_RING_GRANTS];
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001444
Bob Liu86839c52015-06-03 13:40:03 +08001445 for (i = 0; i < info->nr_ring_pages; i++)
Bob Liu81f35162015-11-14 11:12:11 +08001446 rinfo->ring_ref[i] = GRANT_INVALID_REF;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001447
Bob Liu86839c52015-06-03 13:40:03 +08001448 sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
1449 get_order(ring_size));
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001450 if (!sring) {
1451 xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
1452 return -ENOMEM;
1453 }
1454 SHARED_RING_INIT(sring);
Bob Liu81f35162015-11-14 11:12:11 +08001455 FRONT_RING_INIT(&rinfo->ring, sring, ring_size);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001456
Bob Liu81f35162015-11-14 11:12:11 +08001457 err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001458 if (err < 0) {
Bob Liu86839c52015-06-03 13:40:03 +08001459 free_pages((unsigned long)sring, get_order(ring_size));
Bob Liu81f35162015-11-14 11:12:11 +08001460 rinfo->ring.sring = NULL;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001461 goto fail;
1462 }
Bob Liu86839c52015-06-03 13:40:03 +08001463 for (i = 0; i < info->nr_ring_pages; i++)
Bob Liu81f35162015-11-14 11:12:11 +08001464 rinfo->ring_ref[i] = gref[i];
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001465
Bob Liu81f35162015-11-14 11:12:11 +08001466 err = xenbus_alloc_evtchn(dev, &rinfo->evtchn);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001467 if (err)
1468 goto fail;
1469
Bob Liu81f35162015-11-14 11:12:11 +08001470 err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0,
1471 "blkif", rinfo);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001472 if (err <= 0) {
1473 xenbus_dev_fatal(dev, err,
1474 "bind_evtchn_to_irqhandler failed");
1475 goto fail;
1476 }
Bob Liu81f35162015-11-14 11:12:11 +08001477 rinfo->irq = err;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001478
1479 return 0;
1480fail:
1481 blkif_free(info, 0);
1482 return err;
1483}
1484
1485
1486/* Common code used when first setting up, and when resuming. */
Ian Campbell203fd612009-12-04 15:33:54 +00001487static int talk_to_blkback(struct xenbus_device *dev,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001488 struct blkfront_info *info)
1489{
1490 const char *message = NULL;
1491 struct xenbus_transaction xbt;
Bob Liu86839c52015-06-03 13:40:03 +08001492 int err, i;
1493 unsigned int max_page_order = 0;
1494 unsigned int ring_page_order = 0;
Bob Liu81f35162015-11-14 11:12:11 +08001495 struct blkfront_ring_info *rinfo = &info->rinfo;
Bob Liu86839c52015-06-03 13:40:03 +08001496
1497 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
1498 "max-ring-page-order", "%u", &max_page_order);
1499 if (err != 1)
1500 info->nr_ring_pages = 1;
1501 else {
1502 ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
1503 info->nr_ring_pages = 1 << ring_page_order;
1504 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001505
1506 /* Create shared ring, alloc event channel. */
Bob Liu81f35162015-11-14 11:12:11 +08001507 err = setup_blkring(dev, rinfo);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001508 if (err)
1509 goto out;
1510
1511again:
1512 err = xenbus_transaction_start(&xbt);
1513 if (err) {
1514 xenbus_dev_fatal(dev, err, "starting transaction");
1515 goto destroy_blkring;
1516 }
1517
Bob Liu86839c52015-06-03 13:40:03 +08001518 if (info->nr_ring_pages == 1) {
1519 err = xenbus_printf(xbt, dev->nodename,
Bob Liu81f35162015-11-14 11:12:11 +08001520 "ring-ref", "%u", rinfo->ring_ref[0]);
Bob Liu86839c52015-06-03 13:40:03 +08001521 if (err) {
1522 message = "writing ring-ref";
1523 goto abort_transaction;
1524 }
1525 } else {
1526 err = xenbus_printf(xbt, dev->nodename,
1527 "ring-page-order", "%u", ring_page_order);
1528 if (err) {
1529 message = "writing ring-page-order";
1530 goto abort_transaction;
1531 }
1532
1533 for (i = 0; i < info->nr_ring_pages; i++) {
1534 char ring_ref_name[RINGREF_NAME_LEN];
1535
1536 snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
1537 err = xenbus_printf(xbt, dev->nodename, ring_ref_name,
Bob Liu81f35162015-11-14 11:12:11 +08001538 "%u", rinfo->ring_ref[i]);
Bob Liu86839c52015-06-03 13:40:03 +08001539 if (err) {
1540 message = "writing ring-ref";
1541 goto abort_transaction;
1542 }
1543 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001544 }
1545 err = xenbus_printf(xbt, dev->nodename,
Bob Liu81f35162015-11-14 11:12:11 +08001546 "event-channel", "%u", rinfo->evtchn);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001547 if (err) {
1548 message = "writing event-channel";
1549 goto abort_transaction;
1550 }
Markus Armbruster3e334232008-04-02 10:54:02 -07001551 err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
1552 XEN_IO_PROTO_ABI_NATIVE);
1553 if (err) {
1554 message = "writing protocol";
1555 goto abort_transaction;
1556 }
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001557 err = xenbus_printf(xbt, dev->nodename,
Roger Pau Monnecb5bd4d2012-11-02 16:43:04 +01001558 "feature-persistent", "%u", 1);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001559 if (err)
1560 dev_warn(&dev->dev,
1561 "writing persistent grants feature to xenbus");
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001562
1563 err = xenbus_transaction_end(xbt, 0);
1564 if (err) {
1565 if (err == -EAGAIN)
1566 goto again;
1567 xenbus_dev_fatal(dev, err, "completing transaction");
1568 goto destroy_blkring;
1569 }
1570
Bob Liu86839c52015-06-03 13:40:03 +08001571 for (i = 0; i < BLK_RING_SIZE(info); i++)
Bob Liu81f35162015-11-14 11:12:11 +08001572 rinfo->shadow[i].req.u.rw.id = i+1;
1573 rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001574 xenbus_switch_state(dev, XenbusStateInitialised);
1575
1576 return 0;
1577
1578 abort_transaction:
1579 xenbus_transaction_end(xbt, 1);
1580 if (message)
1581 xenbus_dev_fatal(dev, err, "%s", message);
1582 destroy_blkring:
1583 blkif_free(info, 0);
1584 out:
1585 return err;
1586}
1587
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001588/**
1589 * Entry point to this code when a new device is created. Allocate the basic
1590 * structures and the ring buffer for communication with the backend, and
1591 * inform the backend of the appropriate details for those. Switch to
1592 * Initialised state.
1593 */
1594static int blkfront_probe(struct xenbus_device *dev,
1595 const struct xenbus_device_id *id)
1596{
Bob Liu86839c52015-06-03 13:40:03 +08001597 int err, vdevice;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001598 struct blkfront_info *info;
Bob Liu81f35162015-11-14 11:12:11 +08001599 struct blkfront_ring_info *rinfo;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001600
1601 /* FIXME: Use dynamic device id if this is not set. */
1602 err = xenbus_scanf(XBT_NIL, dev->nodename,
1603 "virtual-device", "%i", &vdevice);
1604 if (err != 1) {
Chris Lalancette9246b5f2008-09-17 14:30:32 -07001605 /* go looking in the extended area instead */
1606 err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
1607 "%i", &vdevice);
1608 if (err != 1) {
1609 xenbus_dev_fatal(dev, err, "reading virtual-device");
1610 return err;
1611 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001612 }
1613
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001614 if (xen_hvm_domain()) {
1615 char *type;
1616 int len;
1617 /* no unplug has been done: do not hook devices != xen vbds */
Konrad Rzeszutek Wilk51c71a32013-11-26 15:05:40 -05001618 if (xen_has_pv_and_legacy_disk_devices()) {
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001619 int major;
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001620
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001621 if (!VDEV_IS_EXTENDED(vdevice))
1622 major = BLKIF_MAJOR(vdevice);
1623 else
1624 major = XENVBD_MAJOR;
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001625
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001626 if (major != XENVBD_MAJOR) {
1627 printk(KERN_INFO
1628 "%s: HVM does not support vbd %d as xen block device\n",
Rasmus Villemoes02f1f212015-02-12 15:01:31 -08001629 __func__, vdevice);
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001630 return -ENODEV;
1631 }
1632 }
1633 /* do not create a PV cdrom device if we are an HVM guest */
1634 type = xenbus_read(XBT_NIL, dev->nodename, "device-type", &len);
1635 if (IS_ERR(type))
1636 return -ENODEV;
1637 if (strncmp(type, "cdrom", 5) == 0) {
1638 kfree(type);
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001639 return -ENODEV;
1640 }
Stefano Stabellinib98a4092010-07-29 14:53:16 +01001641 kfree(type);
Stefano Stabellinic1c54132010-05-14 12:44:30 +01001642 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001643 info = kzalloc(sizeof(*info), GFP_KERNEL);
1644 if (!info) {
1645 xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
1646 return -ENOMEM;
1647 }
1648
Bob Liu81f35162015-11-14 11:12:11 +08001649 rinfo = &info->rinfo;
1650 INIT_LIST_HEAD(&rinfo->indirect_pages);
1651 rinfo->dev_info = info;
1652 INIT_WORK(&rinfo->work, blkif_restart_queue);
1653
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001654 mutex_init(&info->mutex);
Steven Noonan34678112012-02-17 12:04:44 -08001655 spin_lock_init(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001656 info->xbdev = dev;
1657 info->vdevice = vdevice;
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001658 INIT_LIST_HEAD(&info->grants);
Roger Pau Monne0a8704a2012-10-24 18:58:45 +02001659 info->persistent_gnts_c = 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001660 info->connected = BLKIF_STATE_DISCONNECTED;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001661
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001662 /* Front end dir is a number, which is used as the id. */
1663 info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001664 dev_set_drvdata(&dev->dev, info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001665
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001666 return 0;
1667}
1668
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001669static void split_bio_end(struct bio *bio)
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001670{
1671 struct split_bio *split_bio = bio->bi_private;
1672
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001673 if (atomic_dec_and_test(&split_bio->pending)) {
1674 split_bio->bio->bi_phys_segments = 0;
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001675 split_bio->bio->bi_error = bio->bi_error;
1676 bio_endio(split_bio->bio);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001677 kfree(split_bio);
1678 }
1679 bio_put(bio);
1680}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001681
1682static int blkif_recover(struct blkfront_info *info)
1683{
1684 int i;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001685 struct request *req, *n;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001686 struct blk_shadow *copy;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001687 int rc;
1688 struct bio *bio, *cloned_bio;
1689 struct bio_list bio_list, merge_bio;
1690 unsigned int segs, offset;
1691 int pending, size;
1692 struct split_bio *split_bio;
1693 struct list_head requests;
Bob Liu81f35162015-11-14 11:12:11 +08001694 struct blkfront_ring_info *rinfo = &info->rinfo;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001695
1696 /* Stage 1: Make a safe copy of the shadow state. */
Bob Liu81f35162015-11-14 11:12:11 +08001697 copy = kmemdup(rinfo->shadow, sizeof(rinfo->shadow),
Ian Campbella144ff02008-06-17 10:47:08 +02001698 GFP_NOIO | __GFP_REPEAT | __GFP_HIGH);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001699 if (!copy)
1700 return -ENOMEM;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001701
1702 /* Stage 2: Set up free list. */
Bob Liu81f35162015-11-14 11:12:11 +08001703 memset(&rinfo->shadow, 0, sizeof(rinfo->shadow));
Bob Liu86839c52015-06-03 13:40:03 +08001704 for (i = 0; i < BLK_RING_SIZE(info); i++)
Bob Liu81f35162015-11-14 11:12:11 +08001705 rinfo->shadow[i].req.u.rw.id = i+1;
1706 rinfo->shadow_free = rinfo->ring.req_prod_pvt;
1707 rinfo->shadow[BLK_RING_SIZE(info)-1].req.u.rw.id = 0x0fffffff;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001708
Bob Liud50babb2015-07-22 14:40:08 +08001709 rc = blkfront_gather_backend_features(info);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001710 if (rc) {
1711 kfree(copy);
1712 return rc;
1713 }
1714
1715 segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
1716 blk_queue_max_segments(info->rq, segs);
1717 bio_list_init(&bio_list);
1718 INIT_LIST_HEAD(&requests);
Bob Liu86839c52015-06-03 13:40:03 +08001719 for (i = 0; i < BLK_RING_SIZE(info); i++) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001720 /* Not in use? */
Jeremy Fitzhardingea945b982010-11-01 17:03:14 -04001721 if (!copy[i].request)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001722 continue;
1723
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001724 /*
1725 * Get the bios in the request so we can re-queue them.
1726 */
1727 if (copy[i].request->cmd_flags &
1728 (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
1729 /*
1730 * Flush operations don't contain bios, so
1731 * we need to requeue the whole request
1732 */
1733 list_add(&copy[i].request->queuelist, &requests);
1734 continue;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001735 }
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001736 merge_bio.head = copy[i].request->bio;
1737 merge_bio.tail = copy[i].request->biotail;
1738 bio_list_merge(&bio_list, &merge_bio);
1739 copy[i].request->bio = NULL;
Roger Pau Monne3bb8c982015-02-02 11:28:21 +00001740 blk_end_request_all(copy[i].request, 0);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001741 }
1742
1743 kfree(copy);
1744
1745 xenbus_switch_state(info->xbdev, XenbusStateConnected);
1746
Steven Noonan34678112012-02-17 12:04:44 -08001747 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001748
1749 /* Now safe for us to use the shared ring */
1750 info->connected = BLKIF_STATE_CONNECTED;
1751
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001752 /* Kick any other new requests queued since we resumed */
Bob Liu81f35162015-11-14 11:12:11 +08001753 kick_pending_request_queues(rinfo);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001754
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001755 list_for_each_entry_safe(req, n, &requests, queuelist) {
1756 /* Requeue pending requests (flush or discard) */
1757 list_del_init(&req->queuelist);
1758 BUG_ON(req->nr_phys_segments > segs);
Bob Liu907c3eb2015-07-13 17:55:24 +08001759 blk_mq_requeue_request(req);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001760 }
Steven Noonan34678112012-02-17 12:04:44 -08001761 spin_unlock_irq(&info->io_lock);
Bob Liu907c3eb2015-07-13 17:55:24 +08001762 blk_mq_kick_requeue_list(info->rq);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001763
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001764 while ((bio = bio_list_pop(&bio_list)) != NULL) {
1765 /* Traverse the list of pending bios and re-queue them */
1766 if (bio_segments(bio) > segs) {
1767 /*
1768 * This bio has more segments than what we can
1769 * handle, we have to split it.
1770 */
1771 pending = (bio_segments(bio) + segs - 1) / segs;
1772 split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
1773 BUG_ON(split_bio == NULL);
1774 atomic_set(&split_bio->pending, pending);
1775 split_bio->bio = bio;
1776 for (i = 0; i < pending; i++) {
Julien Grallc004a6f2015-07-22 16:44:54 +01001777 offset = (i * segs * XEN_PAGE_SIZE) >> 9;
1778 size = min((unsigned int)(segs * XEN_PAGE_SIZE) >> 9,
Kent Overstreet4f024f32013-10-11 15:44:27 -07001779 (unsigned int)bio_sectors(bio) - offset);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001780 cloned_bio = bio_clone(bio, GFP_NOIO);
1781 BUG_ON(cloned_bio == NULL);
Kent Overstreet6678d832013-08-07 11:14:32 -07001782 bio_trim(cloned_bio, offset, size);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001783 cloned_bio->bi_private = split_bio;
1784 cloned_bio->bi_end_io = split_bio_end;
1785 submit_bio(cloned_bio->bi_rw, cloned_bio);
1786 }
1787 /*
1788 * Now we have to wait for all those smaller bios to
1789 * end, so we can also end the "parent" bio.
1790 */
1791 continue;
1792 }
1793 /* We don't need to split this bio */
1794 submit_bio(bio->bi_rw, bio);
1795 }
1796
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001797 return 0;
1798}
1799
1800/**
1801 * We are reconnecting to the backend, due to a suspend/resume, or a backend
1802 * driver restart. We tear down our blkif structure and recreate it, but
1803 * leave the device-layer structures intact so that this is transparent to the
1804 * rest of the kernel.
1805 */
1806static int blkfront_resume(struct xenbus_device *dev)
1807{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07001808 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001809 int err;
1810
1811 dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
1812
1813 blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
1814
Ian Campbell203fd612009-12-04 15:33:54 +00001815 err = talk_to_blkback(dev, info);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001816
1817 /*
1818 * We have to wait for the backend to switch to
1819 * connected state, since we want to read which
1820 * features it supports.
1821 */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001822
1823 return err;
1824}
1825
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001826static void
1827blkfront_closing(struct blkfront_info *info)
1828{
1829 struct xenbus_device *xbdev = info->xbdev;
1830 struct block_device *bdev = NULL;
1831
1832 mutex_lock(&info->mutex);
1833
1834 if (xbdev->state == XenbusStateClosing) {
1835 mutex_unlock(&info->mutex);
1836 return;
1837 }
1838
1839 if (info->gd)
1840 bdev = bdget_disk(info->gd, 0);
1841
1842 mutex_unlock(&info->mutex);
1843
1844 if (!bdev) {
1845 xenbus_frontend_closed(xbdev);
1846 return;
1847 }
1848
1849 mutex_lock(&bdev->bd_mutex);
1850
Daniel Stodden7b32d102010-04-30 22:01:23 +00001851 if (bdev->bd_openers) {
Daniel Stoddenb70f5fa2010-04-30 22:01:19 +00001852 xenbus_dev_error(xbdev, -EBUSY,
1853 "Device in use; refusing to close");
1854 xenbus_switch_state(xbdev, XenbusStateClosing);
1855 } else {
1856 xlvbd_release_gendisk(info);
1857 xenbus_frontend_closed(xbdev);
1858 }
1859
1860 mutex_unlock(&bdev->bd_mutex);
1861 bdput(bdev);
1862}
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001863
Li Dongyanged30bf32011-09-01 18:39:09 +08001864static void blkfront_setup_discard(struct blkfront_info *info)
1865{
1866 int err;
Li Dongyanged30bf32011-09-01 18:39:09 +08001867 unsigned int discard_granularity;
1868 unsigned int discard_alignment;
Konrad Rzeszutek Wilk5ea42982011-10-12 16:23:30 -04001869 unsigned int discard_secure;
Li Dongyanged30bf32011-09-01 18:39:09 +08001870
Olaf Hering1c8cad62014-05-21 16:32:40 +02001871 info->feature_discard = 1;
1872 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1873 "discard-granularity", "%u", &discard_granularity,
1874 "discard-alignment", "%u", &discard_alignment,
1875 NULL);
1876 if (!err) {
1877 info->discard_granularity = discard_granularity;
1878 info->discard_alignment = discard_alignment;
1879 }
1880 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1881 "discard-secure", "%d", &discard_secure,
1882 NULL);
1883 if (!err)
1884 info->feature_secdiscard = !!discard_secure;
Li Dongyanged30bf32011-09-01 18:39:09 +08001885}
1886
Bob Liu81f35162015-11-14 11:12:11 +08001887static int blkfront_setup_indirect(struct blkfront_ring_info *rinfo)
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001888{
Julien Grallc004a6f2015-07-22 16:44:54 +01001889 unsigned int psegs, grants;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001890 int err, i;
Bob Liu81f35162015-11-14 11:12:11 +08001891 struct blkfront_info *info = rinfo->dev_info;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001892
Bob Liud50babb2015-07-22 14:40:08 +08001893 if (info->max_indirect_segments == 0)
Julien Grallc004a6f2015-07-22 16:44:54 +01001894 grants = BLKIF_MAX_SEGMENTS_PER_REQUEST;
Bob Liud50babb2015-07-22 14:40:08 +08001895 else
Julien Grallc004a6f2015-07-22 16:44:54 +01001896 grants = info->max_indirect_segments;
1897 psegs = grants / GRANTS_PER_PSEG;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001898
Bob Liu81f35162015-11-14 11:12:11 +08001899 err = fill_grant_buffer(rinfo,
Julien Grallc004a6f2015-07-22 16:44:54 +01001900 (grants + INDIRECT_GREFS(grants)) * BLK_RING_SIZE(info));
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001901 if (err)
1902 goto out_of_memory;
1903
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001904 if (!info->feature_persistent && info->max_indirect_segments) {
1905 /*
1906 * We are using indirect descriptors but not persistent
1907 * grants, we need to allocate a set of pages that can be
1908 * used for mapping indirect grefs
1909 */
Julien Grallc004a6f2015-07-22 16:44:54 +01001910 int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001911
Bob Liu81f35162015-11-14 11:12:11 +08001912 BUG_ON(!list_empty(&rinfo->indirect_pages));
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001913 for (i = 0; i < num; i++) {
1914 struct page *indirect_page = alloc_page(GFP_NOIO);
1915 if (!indirect_page)
1916 goto out_of_memory;
Bob Liu81f35162015-11-14 11:12:11 +08001917 list_add(&indirect_page->lru, &rinfo->indirect_pages);
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001918 }
1919 }
1920
Bob Liu86839c52015-06-03 13:40:03 +08001921 for (i = 0; i < BLK_RING_SIZE(info); i++) {
Bob Liu81f35162015-11-14 11:12:11 +08001922 rinfo->shadow[i].grants_used = kzalloc(
1923 sizeof(rinfo->shadow[i].grants_used[0]) * grants,
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001924 GFP_NOIO);
Bob Liu81f35162015-11-14 11:12:11 +08001925 rinfo->shadow[i].sg = kzalloc(sizeof(rinfo->shadow[i].sg[0]) * psegs, GFP_NOIO);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001926 if (info->max_indirect_segments)
Bob Liu81f35162015-11-14 11:12:11 +08001927 rinfo->shadow[i].indirect_grants = kzalloc(
1928 sizeof(rinfo->shadow[i].indirect_grants[0]) *
Julien Grallc004a6f2015-07-22 16:44:54 +01001929 INDIRECT_GREFS(grants),
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001930 GFP_NOIO);
Bob Liu81f35162015-11-14 11:12:11 +08001931 if ((rinfo->shadow[i].grants_used == NULL) ||
1932 (rinfo->shadow[i].sg == NULL) ||
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001933 (info->max_indirect_segments &&
Bob Liu81f35162015-11-14 11:12:11 +08001934 (rinfo->shadow[i].indirect_grants == NULL)))
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001935 goto out_of_memory;
Bob Liu81f35162015-11-14 11:12:11 +08001936 sg_init_table(rinfo->shadow[i].sg, psegs);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001937 }
1938
1939
1940 return 0;
1941
1942out_of_memory:
Bob Liu86839c52015-06-03 13:40:03 +08001943 for (i = 0; i < BLK_RING_SIZE(info); i++) {
Bob Liu81f35162015-11-14 11:12:11 +08001944 kfree(rinfo->shadow[i].grants_used);
1945 rinfo->shadow[i].grants_used = NULL;
1946 kfree(rinfo->shadow[i].sg);
1947 rinfo->shadow[i].sg = NULL;
1948 kfree(rinfo->shadow[i].indirect_grants);
1949 rinfo->shadow[i].indirect_grants = NULL;
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001950 }
Bob Liu81f35162015-11-14 11:12:11 +08001951 if (!list_empty(&rinfo->indirect_pages)) {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001952 struct page *indirect_page, *n;
Bob Liu81f35162015-11-14 11:12:11 +08001953 list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
Roger Pau Monnebfe11d62013-10-29 18:31:14 +01001954 list_del(&indirect_page->lru);
1955 __free_page(indirect_page);
1956 }
1957 }
Roger Pau Monne402b27f2013-04-18 16:06:54 +02001958 return -ENOMEM;
1959}
1960
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07001961/*
Bob Liud50babb2015-07-22 14:40:08 +08001962 * Gather all backend feature-*
1963 */
1964static int blkfront_gather_backend_features(struct blkfront_info *info)
1965{
1966 int err;
1967 int barrier, flush, discard, persistent;
1968 unsigned int indirect_segments;
1969
1970 info->feature_flush = 0;
1971
1972 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1973 "feature-barrier", "%d", &barrier,
1974 NULL);
1975
1976 /*
1977 * If there's no "feature-barrier" defined, then it means
1978 * we're dealing with a very old backend which writes
1979 * synchronously; nothing to do.
1980 *
1981 * If there are barriers, then we use flush.
1982 */
1983 if (!err && barrier)
1984 info->feature_flush = REQ_FLUSH | REQ_FUA;
1985 /*
1986 * And if there is "feature-flush-cache" use that above
1987 * barriers.
1988 */
1989 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1990 "feature-flush-cache", "%d", &flush,
1991 NULL);
1992
1993 if (!err && flush)
1994 info->feature_flush = REQ_FLUSH;
1995
1996 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
1997 "feature-discard", "%d", &discard,
1998 NULL);
1999
2000 if (!err && discard)
2001 blkfront_setup_discard(info);
2002
2003 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2004 "feature-persistent", "%u", &persistent,
2005 NULL);
2006 if (err)
2007 info->feature_persistent = 0;
2008 else
2009 info->feature_persistent = persistent;
2010
2011 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2012 "feature-max-indirect-segments", "%u", &indirect_segments,
2013 NULL);
2014 if (err)
2015 info->max_indirect_segments = 0;
2016 else
2017 info->max_indirect_segments = min(indirect_segments,
2018 xen_blkif_max_segments);
2019
Bob Liu81f35162015-11-14 11:12:11 +08002020 return blkfront_setup_indirect(&info->rinfo);
Bob Liud50babb2015-07-22 14:40:08 +08002021}
2022
2023/*
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002024 * Invoked when the backend is finally 'ready' (and has told produced
2025 * the details about the physical device - #sectors, size, etc).
2026 */
2027static void blkfront_connect(struct blkfront_info *info)
2028{
2029 unsigned long long sectors;
2030 unsigned long sector_size;
Stefan Bader7c4d7d72013-05-13 16:28:15 +02002031 unsigned int physical_sector_size;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002032 unsigned int binfo;
2033 int err;
Bob Liu81f35162015-11-14 11:12:11 +08002034 struct blkfront_ring_info *rinfo = &info->rinfo;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002035
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08002036 switch (info->connected) {
2037 case BLKIF_STATE_CONNECTED:
2038 /*
2039 * Potentially, the back-end may be signalling
2040 * a capacity change; update the capacity.
2041 */
2042 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
2043 "sectors", "%Lu", &sectors);
2044 if (XENBUS_EXIST_ERR(err))
2045 return;
2046 printk(KERN_INFO "Setting capacity to %Lu\n",
2047 sectors);
2048 set_capacity(info->gd, sectors);
K. Y. Srinivasan2def1412010-03-18 15:00:54 -07002049 revalidate_disk(info->gd);
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08002050
Roger Pau Monne402b27f2013-04-18 16:06:54 +02002051 return;
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08002052 case BLKIF_STATE_SUSPENDED:
Roger Pau Monne402b27f2013-04-18 16:06:54 +02002053 /*
2054 * If we are recovering from suspension, we need to wait
2055 * for the backend to announce it's features before
2056 * reconnecting, at least we need to know if the backend
2057 * supports indirect descriptors, and how many.
2058 */
2059 blkif_recover(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002060 return;
2061
Jeremy Fitzhardingeb4dddb42010-03-11 15:10:40 -08002062 default:
2063 break;
K. Y. Srinivasan1fa73be2010-03-11 13:42:26 -08002064 }
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002065
2066 dev_dbg(&info->xbdev->dev, "%s:%s.\n",
2067 __func__, info->xbdev->otherend);
2068
2069 err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
2070 "sectors", "%llu", &sectors,
2071 "info", "%u", &binfo,
2072 "sector-size", "%lu", &sector_size,
2073 NULL);
2074 if (err) {
2075 xenbus_dev_fatal(info->xbdev, err,
2076 "reading backend fields at %s",
2077 info->xbdev->otherend);
2078 return;
2079 }
2080
Stefan Bader7c4d7d72013-05-13 16:28:15 +02002081 /*
2082 * physcial-sector-size is a newer field, so old backends may not
2083 * provide this. Assume physical sector size to be the same as
2084 * sector_size in that case.
2085 */
2086 err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
2087 "physical-sector-size", "%u", &physical_sector_size);
2088 if (err != 1)
2089 physical_sector_size = sector_size;
2090
Bob Liud50babb2015-07-22 14:40:08 +08002091 err = blkfront_gather_backend_features(info);
Roger Pau Monne402b27f2013-04-18 16:06:54 +02002092 if (err) {
2093 xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
2094 info->xbdev->otherend);
2095 return;
2096 }
2097
Stefan Bader7c4d7d72013-05-13 16:28:15 +02002098 err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
2099 physical_sector_size);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002100 if (err) {
2101 xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
2102 info->xbdev->otherend);
2103 return;
2104 }
2105
2106 xenbus_switch_state(info->xbdev, XenbusStateConnected);
2107
2108 /* Kick pending requests. */
Steven Noonan34678112012-02-17 12:04:44 -08002109 spin_lock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002110 info->connected = BLKIF_STATE_CONNECTED;
Bob Liu81f35162015-11-14 11:12:11 +08002111 kick_pending_request_queues(rinfo);
Steven Noonan34678112012-02-17 12:04:44 -08002112 spin_unlock_irq(&info->io_lock);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002113
2114 add_disk(info->gd);
Christian Limpach1d78d702008-04-02 10:54:04 -07002115
2116 info->is_ready = 1;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002117}
2118
2119/**
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002120 * Callback received when the backend's state changes.
2121 */
Ian Campbell203fd612009-12-04 15:33:54 +00002122static void blkback_changed(struct xenbus_device *dev,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002123 enum xenbus_state backend_state)
2124{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07002125 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002126
Ian Campbell203fd612009-12-04 15:33:54 +00002127 dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002128
2129 switch (backend_state) {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002130 case XenbusStateInitWait:
Bob Liua9b54bb2015-06-19 00:23:00 -04002131 if (dev->state != XenbusStateInitialising)
2132 break;
Bob Liu8ab01442015-06-03 13:40:02 +08002133 if (talk_to_blkback(dev, info)) {
2134 kfree(info);
2135 dev_set_drvdata(&dev->dev, NULL);
2136 break;
2137 }
2138 case XenbusStateInitialising:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002139 case XenbusStateInitialised:
Noboru Iwamatsub78c9512009-10-13 17:22:29 -04002140 case XenbusStateReconfiguring:
2141 case XenbusStateReconfigured:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002142 case XenbusStateUnknown:
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002143 break;
2144
2145 case XenbusStateConnected:
2146 blkfront_connect(info);
2147 break;
2148
David Vrabel36613712014-02-04 18:53:56 +00002149 case XenbusStateClosed:
2150 if (dev->state == XenbusStateClosed)
2151 break;
2152 /* Missed the backend's Closing state -- fallthrough */
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002153 case XenbusStateClosing:
Cathy Averya54c8f02015-10-02 09:35:01 -04002154 if (info)
2155 blkfront_closing(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002156 break;
2157 }
2158}
2159
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002160static int blkfront_remove(struct xenbus_device *xbdev)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002161{
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002162 struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
2163 struct block_device *bdev = NULL;
2164 struct gendisk *disk;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002165
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002166 dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002167
2168 blkif_free(info, 0);
2169
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002170 mutex_lock(&info->mutex);
2171
2172 disk = info->gd;
2173 if (disk)
2174 bdev = bdget_disk(disk, 0);
2175
2176 info->xbdev = NULL;
2177 mutex_unlock(&info->mutex);
2178
2179 if (!bdev) {
Jan Beulich0e345822010-08-07 18:28:55 +02002180 kfree(info);
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002181 return 0;
2182 }
2183
2184 /*
2185 * The xbdev was removed before we reached the Closed
2186 * state. See if it's safe to remove the disk. If the bdev
2187 * isn't closed yet, we let release take care of it.
2188 */
2189
2190 mutex_lock(&bdev->bd_mutex);
2191 info = disk->private_data;
2192
Daniel Stoddend54142c2010-08-07 18:51:21 +02002193 dev_warn(disk_to_dev(disk),
2194 "%s was hot-unplugged, %d stale handles\n",
2195 xbdev->nodename, bdev->bd_openers);
2196
Daniel Stodden7b32d102010-04-30 22:01:23 +00002197 if (info && !bdev->bd_openers) {
Daniel Stoddenfa1bd352010-04-30 22:01:22 +00002198 xlvbd_release_gendisk(info);
2199 disk->private_data = NULL;
2200 kfree(info);
2201 }
2202
2203 mutex_unlock(&bdev->bd_mutex);
2204 bdput(bdev);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002205
2206 return 0;
2207}
2208
Christian Limpach1d78d702008-04-02 10:54:04 -07002209static int blkfront_is_ready(struct xenbus_device *dev)
2210{
Greg Kroah-Hartmana1b4b122009-04-30 14:43:31 -07002211 struct blkfront_info *info = dev_get_drvdata(&dev->dev);
Christian Limpach1d78d702008-04-02 10:54:04 -07002212
Jan Beulich5d7ed202010-08-07 18:31:12 +02002213 return info->is_ready && info->xbdev;
Christian Limpach1d78d702008-04-02 10:54:04 -07002214}
2215
Al Viroa63c8482008-03-02 10:23:47 -05002216static int blkif_open(struct block_device *bdev, fmode_t mode)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002217{
Daniel Stodden13961742010-08-07 18:36:53 +02002218 struct gendisk *disk = bdev->bd_disk;
2219 struct blkfront_info *info;
2220 int err = 0;
Arnd Bergmann6e9624b2010-08-07 18:25:34 +02002221
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02002222 mutex_lock(&blkfront_mutex);
Arnd Bergmann6e9624b2010-08-07 18:25:34 +02002223
Daniel Stodden13961742010-08-07 18:36:53 +02002224 info = disk->private_data;
2225 if (!info) {
2226 /* xbdev gone */
2227 err = -ERESTARTSYS;
2228 goto out;
2229 }
2230
2231 mutex_lock(&info->mutex);
2232
2233 if (!info->gd)
2234 /* xbdev is closed */
2235 err = -ERESTARTSYS;
2236
2237 mutex_unlock(&info->mutex);
2238
Daniel Stodden13961742010-08-07 18:36:53 +02002239out:
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02002240 mutex_unlock(&blkfront_mutex);
Daniel Stodden13961742010-08-07 18:36:53 +02002241 return err;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002242}
2243
Al Virodb2a1442013-05-05 21:52:57 -04002244static void blkif_release(struct gendisk *disk, fmode_t mode)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002245{
Al Viroa63c8482008-03-02 10:23:47 -05002246 struct blkfront_info *info = disk->private_data;
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002247 struct block_device *bdev;
2248 struct xenbus_device *xbdev;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002249
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02002250 mutex_lock(&blkfront_mutex);
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002251
2252 bdev = bdget_disk(disk, 0);
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002253
Felipe Pena2f089cb2013-11-09 13:36:09 -02002254 if (!bdev) {
2255 WARN(1, "Block device %s yanked out from us!\n", disk->disk_name);
2256 goto out_mutex;
2257 }
Daniel Stoddenacfca3c2010-08-07 18:47:26 +02002258 if (bdev->bd_openers)
2259 goto out;
2260
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002261 /*
2262 * Check if we have been instructed to close. We will have
2263 * deferred this request, because the bdev was still open.
2264 */
2265
2266 mutex_lock(&info->mutex);
2267 xbdev = info->xbdev;
2268
2269 if (xbdev && xbdev->state == XenbusStateClosing) {
2270 /* pending switch to state closed */
Daniel Stoddend54142c2010-08-07 18:51:21 +02002271 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002272 xlvbd_release_gendisk(info);
2273 xenbus_frontend_closed(info->xbdev);
2274 }
2275
2276 mutex_unlock(&info->mutex);
2277
2278 if (!xbdev) {
2279 /* sudden device removal */
Daniel Stoddend54142c2010-08-07 18:51:21 +02002280 dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002281 xlvbd_release_gendisk(info);
2282 disk->private_data = NULL;
2283 kfree(info);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002284 }
Daniel Stodden7fd152f2010-08-07 18:45:12 +02002285
Jens Axboea4cc14e2010-08-08 21:50:05 -04002286out:
Andrew Jonesdad5cf62012-02-16 13:16:25 +01002287 bdput(bdev);
Felipe Pena2f089cb2013-11-09 13:36:09 -02002288out_mutex:
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02002289 mutex_unlock(&blkfront_mutex);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002290}
2291
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07002292static const struct block_device_operations xlvbd_block_fops =
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002293{
2294 .owner = THIS_MODULE,
Al Viroa63c8482008-03-02 10:23:47 -05002295 .open = blkif_open,
2296 .release = blkif_release,
Ian Campbell597592d2008-02-21 13:03:45 -08002297 .getgeo = blkif_getgeo,
Arnd Bergmann8a6cfeb2010-07-08 10:18:46 +02002298 .ioctl = blkif_ioctl,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002299};
2300
2301
Márton Némethec9c42e2010-01-10 13:39:52 +01002302static const struct xenbus_device_id blkfront_ids[] = {
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002303 { "vbd" },
2304 { "" }
2305};
2306
David Vrabel95afae42014-09-08 17:30:41 +01002307static struct xenbus_driver blkfront_driver = {
2308 .ids = blkfront_ids,
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002309 .probe = blkfront_probe,
2310 .remove = blkfront_remove,
2311 .resume = blkfront_resume,
Ian Campbell203fd612009-12-04 15:33:54 +00002312 .otherend_changed = blkback_changed,
Christian Limpach1d78d702008-04-02 10:54:04 -07002313 .is_ready = blkfront_is_ready,
David Vrabel95afae42014-09-08 17:30:41 +01002314};
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002315
2316static int __init xlblk_init(void)
2317{
Laszlo Ersek469738e2011-10-07 21:34:38 +02002318 int ret;
2319
Jeremy Fitzhardinge6e833582008-08-19 13:16:17 -07002320 if (!xen_domain())
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002321 return -ENODEV;
2322
Julien Grall9cce2912015-10-13 17:50:11 +01002323 if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
Bob Liu86839c52015-06-03 13:40:03 +08002324 pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
Julien Grall9cce2912015-10-13 17:50:11 +01002325 xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
Bob Liu86839c52015-06-03 13:40:03 +08002326 xen_blkif_max_ring_order = 0;
2327 }
2328
Konrad Rzeszutek Wilk51c71a32013-11-26 15:05:40 -05002329 if (!xen_has_pv_disk_devices())
Igor Mammedovb9136d22012-03-21 15:08:38 +01002330 return -ENODEV;
2331
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002332 if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
2333 printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
2334 XENVBD_MAJOR, DEV_NAME);
2335 return -ENODEV;
2336 }
2337
Jan Beulich73db1442011-12-22 09:08:13 +00002338 ret = xenbus_register_frontend(&blkfront_driver);
Laszlo Ersek469738e2011-10-07 21:34:38 +02002339 if (ret) {
2340 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
2341 return ret;
2342 }
2343
2344 return 0;
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002345}
2346module_init(xlblk_init);
2347
2348
Jan Beulich5a60d0c2008-06-17 10:47:08 +02002349static void __exit xlblk_exit(void)
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002350{
Jan Beulich86050672012-04-05 16:04:52 +01002351 xenbus_unregister_driver(&blkfront_driver);
2352 unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
2353 kfree(minors);
Jeremy Fitzhardinge9f27ee52007-07-17 18:37:06 -07002354}
2355module_exit(xlblk_exit);
2356
2357MODULE_DESCRIPTION("Xen virtual block device frontend");
2358MODULE_LICENSE("GPL");
2359MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);
Mark McLoughlind2f0c522008-04-02 10:54:05 -07002360MODULE_ALIAS("xen:vbd");
Mark McLoughlin4f93f09b2008-04-02 10:54:06 -07002361MODULE_ALIAS("xenblk");