blob: 3bd15d8095b101233455d7985b834f828d89be92 [file] [log] [blame]
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +09001/*
2 * Zoned block device handling
3 *
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
6 *
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/rbtree.h>
14#include <linux/blkdev.h>
15
16static inline sector_t blk_zone_start(struct request_queue *q,
17 sector_t sector)
18{
Damien Le Moalf99e8642017-01-12 07:58:32 -070019 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +090020
21 return sector & ~zone_mask;
22}
23
24/*
25 * Check that a zone report belongs to the partition.
26 * If yes, fix its start sector and write pointer, copy it in the
27 * zone information array and return true. Return false otherwise.
28 */
29static bool blkdev_report_zone(struct block_device *bdev,
30 struct blk_zone *rep,
31 struct blk_zone *zone)
32{
33 sector_t offset = get_start_sect(bdev);
34
35 if (rep->start < offset)
36 return false;
37
38 rep->start -= offset;
39 if (rep->start + rep->len > bdev->bd_part->nr_sects)
40 return false;
41
42 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
43 rep->wp = rep->start + rep->len;
44 else
45 rep->wp -= offset;
46 memcpy(zone, rep, sizeof(struct blk_zone));
47
48 return true;
49}
50
51/**
52 * blkdev_report_zones - Get zones information
53 * @bdev: Target block device
54 * @sector: Sector from which to report zones
55 * @zones: Array of zone structures where to return the zones information
56 * @nr_zones: Number of zone structures in the zone array
57 * @gfp_mask: Memory allocation flags (for bio_alloc)
58 *
59 * Description:
60 * Get zone information starting from the zone containing @sector.
61 * The number of zone information reported may be less than the number
62 * requested by @nr_zones. The number of zones actually reported is
63 * returned in @nr_zones.
64 */
65int blkdev_report_zones(struct block_device *bdev,
66 sector_t sector,
67 struct blk_zone *zones,
68 unsigned int *nr_zones,
69 gfp_t gfp_mask)
70{
71 struct request_queue *q = bdev_get_queue(bdev);
72 struct blk_zone_report_hdr *hdr;
73 unsigned int nrz = *nr_zones;
74 struct page *page;
75 unsigned int nr_rep;
76 size_t rep_bytes;
77 unsigned int nr_pages;
78 struct bio *bio;
79 struct bio_vec *bv;
80 unsigned int i, n, nz;
81 unsigned int ofst;
82 void *addr;
Arnd Bergmann3c4da7582016-10-21 17:42:33 +020083 int ret;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +090084
85 if (!q)
86 return -ENXIO;
87
88 if (!blk_queue_is_zoned(q))
89 return -EOPNOTSUPP;
90
91 if (!nrz)
92 return 0;
93
94 if (sector > bdev->bd_part->nr_sects) {
95 *nr_zones = 0;
96 return 0;
97 }
98
99 /*
100 * The zone report has a header. So make room for it in the
101 * payload. Also make sure that the report fits in a single BIO
102 * that will not be split down the stack.
103 */
104 rep_bytes = sizeof(struct blk_zone_report_hdr) +
105 sizeof(struct blk_zone) * nrz;
106 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
107 if (rep_bytes > (queue_max_sectors(q) << 9))
108 rep_bytes = queue_max_sectors(q) << 9;
109
110 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
111 rep_bytes >> PAGE_SHIFT);
112 nr_pages = min_t(unsigned int, nr_pages,
113 queue_max_segments(q));
114
115 bio = bio_alloc(gfp_mask, nr_pages);
116 if (!bio)
117 return -ENOMEM;
118
119 bio->bi_bdev = bdev;
120 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
121 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
122
123 for (i = 0; i < nr_pages; i++) {
124 page = alloc_page(gfp_mask);
125 if (!page) {
126 ret = -ENOMEM;
127 goto out;
128 }
129 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
130 __free_page(page);
131 break;
132 }
133 }
134
135 if (i == 0)
136 ret = -ENOMEM;
137 else
138 ret = submit_bio_wait(bio);
139 if (ret)
140 goto out;
141
142 /*
143 * Process the report result: skip the header and go through the
144 * reported zones to fixup and fixup the zone information for
145 * partitions. At the same time, return the zone information into
146 * the zone array.
147 */
148 n = 0;
149 nz = 0;
150 nr_rep = 0;
151 bio_for_each_segment_all(bv, bio, i) {
152
153 if (!bv->bv_page)
154 break;
155
156 addr = kmap_atomic(bv->bv_page);
157
158 /* Get header in the first page */
159 ofst = 0;
160 if (!nr_rep) {
161 hdr = (struct blk_zone_report_hdr *) addr;
162 nr_rep = hdr->nr_zones;
163 ofst = sizeof(struct blk_zone_report_hdr);
164 }
165
166 /* Fixup and report zones */
167 while (ofst < bv->bv_len &&
168 n < nr_rep && nz < nrz) {
169 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
170 nz++;
171 ofst += sizeof(struct blk_zone);
172 n++;
173 }
174
175 kunmap_atomic(addr);
176
177 if (n >= nr_rep || nz >= nrz)
178 break;
179
180 }
181
Arnd Bergmann3c4da7582016-10-21 17:42:33 +0200182 *nr_zones = nz;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900183out:
184 bio_for_each_segment_all(bv, bio, i)
185 __free_page(bv->bv_page);
186 bio_put(bio);
187
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900188 return ret;
189}
190EXPORT_SYMBOL_GPL(blkdev_report_zones);
191
192/**
193 * blkdev_reset_zones - Reset zones write pointer
194 * @bdev: Target block device
195 * @sector: Start sector of the first zone to reset
196 * @nr_sectors: Number of sectors, at least the length of one zone
197 * @gfp_mask: Memory allocation flags (for bio_alloc)
198 *
199 * Description:
200 * Reset the write pointer of the zones contained in the range
201 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
202 * is valid, but the specified range should not contain conventional zones.
203 */
204int blkdev_reset_zones(struct block_device *bdev,
205 sector_t sector, sector_t nr_sectors,
206 gfp_t gfp_mask)
207{
208 struct request_queue *q = bdev_get_queue(bdev);
209 sector_t zone_sectors;
210 sector_t end_sector = sector + nr_sectors;
211 struct bio *bio;
212 int ret;
213
214 if (!q)
215 return -ENXIO;
216
217 if (!blk_queue_is_zoned(q))
218 return -EOPNOTSUPP;
219
220 if (end_sector > bdev->bd_part->nr_sects)
221 /* Out of range */
222 return -EINVAL;
223
224 /* Check alignment (handle eventual smaller last zone) */
Damien Le Moalf99e8642017-01-12 07:58:32 -0700225 zone_sectors = blk_queue_zone_sectors(q);
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900226 if (sector & (zone_sectors - 1))
227 return -EINVAL;
228
229 if ((nr_sectors & (zone_sectors - 1)) &&
230 end_sector != bdev->bd_part->nr_sects)
231 return -EINVAL;
232
233 while (sector < end_sector) {
234
235 bio = bio_alloc(gfp_mask, 0);
236 bio->bi_iter.bi_sector = sector;
237 bio->bi_bdev = bdev;
238 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
239
240 ret = submit_bio_wait(bio);
241 bio_put(bio);
242
243 if (ret)
244 return ret;
245
246 sector += zone_sectors;
247
248 /* This may take a while, so be nice to others */
249 cond_resched();
250
251 }
252
253 return 0;
254}
255EXPORT_SYMBOL_GPL(blkdev_reset_zones);
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900256
257/**
258 * BLKREPORTZONE ioctl processing.
259 * Called from blkdev_ioctl.
260 */
261int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
262 unsigned int cmd, unsigned long arg)
263{
264 void __user *argp = (void __user *)arg;
265 struct request_queue *q;
266 struct blk_zone_report rep;
267 struct blk_zone *zones;
268 int ret;
269
270 if (!argp)
271 return -EINVAL;
272
273 q = bdev_get_queue(bdev);
274 if (!q)
275 return -ENXIO;
276
277 if (!blk_queue_is_zoned(q))
278 return -ENOTTY;
279
280 if (!capable(CAP_SYS_ADMIN))
281 return -EACCES;
282
283 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
284 return -EFAULT;
285
286 if (!rep.nr_zones)
287 return -EINVAL;
288
289 zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
290 if (!zones)
291 return -ENOMEM;
292
293 ret = blkdev_report_zones(bdev, rep.sector,
294 zones, &rep.nr_zones,
295 GFP_KERNEL);
296 if (ret)
297 goto out;
298
299 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
300 ret = -EFAULT;
301 goto out;
302 }
303
304 if (rep.nr_zones) {
305 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
306 sizeof(struct blk_zone) * rep.nr_zones))
307 ret = -EFAULT;
308 }
309
310 out:
311 kfree(zones);
312
313 return ret;
314}
315
316/**
317 * BLKRESETZONE ioctl processing.
318 * Called from blkdev_ioctl.
319 */
320int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
321 unsigned int cmd, unsigned long arg)
322{
323 void __user *argp = (void __user *)arg;
324 struct request_queue *q;
325 struct blk_zone_range zrange;
326
327 if (!argp)
328 return -EINVAL;
329
330 q = bdev_get_queue(bdev);
331 if (!q)
332 return -ENXIO;
333
334 if (!blk_queue_is_zoned(q))
335 return -ENOTTY;
336
337 if (!capable(CAP_SYS_ADMIN))
338 return -EACCES;
339
340 if (!(mode & FMODE_WRITE))
341 return -EBADF;
342
343 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
344 return -EFAULT;
345
346 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
347 GFP_KERNEL);
348}