blob: bb4ed69f917feb7553ee8d2a8d099fb67f4a0c48 [file] [log] [blame]
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +09001/*
2 * Zoned block device handling
3 *
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
6 *
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
9 */
10
11#include <linux/kernel.h>
12#include <linux/module.h>
13#include <linux/rbtree.h>
14#include <linux/blkdev.h>
15
16static inline sector_t blk_zone_start(struct request_queue *q,
17 sector_t sector)
18{
Damien Le Moalf99e8642017-01-12 07:58:32 -070019 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +090020
21 return sector & ~zone_mask;
22}
23
24/*
Christoph Hellwig6cc77e92017-12-21 15:43:38 +090025 * Return true if a request is a write requests that needs zone write locking.
26 */
27bool blk_req_needs_zone_write_lock(struct request *rq)
28{
29 if (!rq->q->seq_zones_wlock)
30 return false;
31
32 if (blk_rq_is_passthrough(rq))
33 return false;
34
35 switch (req_op(rq)) {
36 case REQ_OP_WRITE_ZEROES:
37 case REQ_OP_WRITE_SAME:
38 case REQ_OP_WRITE:
39 return blk_rq_zone_is_seq(rq);
40 default:
41 return false;
42 }
43}
44EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
45
46void __blk_req_zone_write_lock(struct request *rq)
47{
48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
49 rq->q->seq_zones_wlock)))
50 return;
51
52 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
53 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
54}
55EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
56
57void __blk_req_zone_write_unlock(struct request *rq)
58{
59 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
60 if (rq->q->seq_zones_wlock)
61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
62 rq->q->seq_zones_wlock));
63}
64EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
65
Damien Le Moala91e1382018-10-12 19:08:43 +090066static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
67 sector_t nr_sectors)
68{
69 unsigned long zone_sectors = blk_queue_zone_sectors(q);
70
71 return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
72}
73
74/**
75 * blkdev_nr_zones - Get number of zones
76 * @bdev: Target block device
77 *
78 * Description:
79 * Return the total number of zones of a zoned block device.
80 * For a regular block device, the number of zones is always 0.
81 */
82unsigned int blkdev_nr_zones(struct block_device *bdev)
83{
84 struct request_queue *q = bdev_get_queue(bdev);
85
86 if (!blk_queue_is_zoned(q))
87 return 0;
88
89 return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
90}
91EXPORT_SYMBOL_GPL(blkdev_nr_zones);
92
Christoph Hellwig6cc77e92017-12-21 15:43:38 +090093/*
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +090094 * Check that a zone report belongs to the partition.
95 * If yes, fix its start sector and write pointer, copy it in the
96 * zone information array and return true. Return false otherwise.
97 */
98static bool blkdev_report_zone(struct block_device *bdev,
99 struct blk_zone *rep,
100 struct blk_zone *zone)
101{
102 sector_t offset = get_start_sect(bdev);
103
104 if (rep->start < offset)
105 return false;
106
107 rep->start -= offset;
108 if (rep->start + rep->len > bdev->bd_part->nr_sects)
109 return false;
110
111 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
112 rep->wp = rep->start + rep->len;
113 else
114 rep->wp -= offset;
115 memcpy(zone, rep, sizeof(struct blk_zone));
116
117 return true;
118}
119
120/**
121 * blkdev_report_zones - Get zones information
122 * @bdev: Target block device
123 * @sector: Sector from which to report zones
124 * @zones: Array of zone structures where to return the zones information
125 * @nr_zones: Number of zone structures in the zone array
126 * @gfp_mask: Memory allocation flags (for bio_alloc)
127 *
128 * Description:
129 * Get zone information starting from the zone containing @sector.
130 * The number of zone information reported may be less than the number
131 * requested by @nr_zones. The number of zones actually reported is
132 * returned in @nr_zones.
133 */
134int blkdev_report_zones(struct block_device *bdev,
135 sector_t sector,
136 struct blk_zone *zones,
137 unsigned int *nr_zones,
138 gfp_t gfp_mask)
139{
140 struct request_queue *q = bdev_get_queue(bdev);
141 struct blk_zone_report_hdr *hdr;
142 unsigned int nrz = *nr_zones;
143 struct page *page;
144 unsigned int nr_rep;
145 size_t rep_bytes;
146 unsigned int nr_pages;
147 struct bio *bio;
148 struct bio_vec *bv;
149 unsigned int i, n, nz;
150 unsigned int ofst;
151 void *addr;
Arnd Bergmann3c4da7582016-10-21 17:42:33 +0200152 int ret;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900153
154 if (!q)
155 return -ENXIO;
156
157 if (!blk_queue_is_zoned(q))
158 return -EOPNOTSUPP;
159
160 if (!nrz)
161 return 0;
162
163 if (sector > bdev->bd_part->nr_sects) {
164 *nr_zones = 0;
165 return 0;
166 }
167
168 /*
169 * The zone report has a header. So make room for it in the
170 * payload. Also make sure that the report fits in a single BIO
171 * that will not be split down the stack.
172 */
173 rep_bytes = sizeof(struct blk_zone_report_hdr) +
174 sizeof(struct blk_zone) * nrz;
175 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
176 if (rep_bytes > (queue_max_sectors(q) << 9))
177 rep_bytes = queue_max_sectors(q) << 9;
178
179 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
180 rep_bytes >> PAGE_SHIFT);
181 nr_pages = min_t(unsigned int, nr_pages,
182 queue_max_segments(q));
183
184 bio = bio_alloc(gfp_mask, nr_pages);
185 if (!bio)
186 return -ENOMEM;
187
Christoph Hellwig74d46992017-08-23 19:10:32 +0200188 bio_set_dev(bio, bdev);
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900189 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
190 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
191
192 for (i = 0; i < nr_pages; i++) {
193 page = alloc_page(gfp_mask);
194 if (!page) {
195 ret = -ENOMEM;
196 goto out;
197 }
198 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
199 __free_page(page);
200 break;
201 }
202 }
203
204 if (i == 0)
205 ret = -ENOMEM;
206 else
207 ret = submit_bio_wait(bio);
208 if (ret)
209 goto out;
210
211 /*
212 * Process the report result: skip the header and go through the
213 * reported zones to fixup and fixup the zone information for
214 * partitions. At the same time, return the zone information into
215 * the zone array.
216 */
217 n = 0;
218 nz = 0;
219 nr_rep = 0;
220 bio_for_each_segment_all(bv, bio, i) {
221
222 if (!bv->bv_page)
223 break;
224
225 addr = kmap_atomic(bv->bv_page);
226
227 /* Get header in the first page */
228 ofst = 0;
229 if (!nr_rep) {
Bart Van Asschef4411082018-06-15 14:55:17 -0700230 hdr = addr;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900231 nr_rep = hdr->nr_zones;
232 ofst = sizeof(struct blk_zone_report_hdr);
233 }
234
235 /* Fixup and report zones */
236 while (ofst < bv->bv_len &&
237 n < nr_rep && nz < nrz) {
238 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
239 nz++;
240 ofst += sizeof(struct blk_zone);
241 n++;
242 }
243
244 kunmap_atomic(addr);
245
246 if (n >= nr_rep || nz >= nrz)
247 break;
248
249 }
250
Arnd Bergmann3c4da7582016-10-21 17:42:33 +0200251 *nr_zones = nz;
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900252out:
253 bio_for_each_segment_all(bv, bio, i)
254 __free_page(bv->bv_page);
255 bio_put(bio);
256
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900257 return ret;
258}
259EXPORT_SYMBOL_GPL(blkdev_report_zones);
260
261/**
262 * blkdev_reset_zones - Reset zones write pointer
263 * @bdev: Target block device
264 * @sector: Start sector of the first zone to reset
265 * @nr_sectors: Number of sectors, at least the length of one zone
266 * @gfp_mask: Memory allocation flags (for bio_alloc)
267 *
268 * Description:
269 * Reset the write pointer of the zones contained in the range
270 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
271 * is valid, but the specified range should not contain conventional zones.
272 */
273int blkdev_reset_zones(struct block_device *bdev,
274 sector_t sector, sector_t nr_sectors,
275 gfp_t gfp_mask)
276{
277 struct request_queue *q = bdev_get_queue(bdev);
278 sector_t zone_sectors;
279 sector_t end_sector = sector + nr_sectors;
280 struct bio *bio;
281 int ret;
282
283 if (!q)
284 return -ENXIO;
285
286 if (!blk_queue_is_zoned(q))
287 return -EOPNOTSUPP;
288
289 if (end_sector > bdev->bd_part->nr_sects)
290 /* Out of range */
291 return -EINVAL;
292
293 /* Check alignment (handle eventual smaller last zone) */
Damien Le Moalf99e8642017-01-12 07:58:32 -0700294 zone_sectors = blk_queue_zone_sectors(q);
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900295 if (sector & (zone_sectors - 1))
296 return -EINVAL;
297
298 if ((nr_sectors & (zone_sectors - 1)) &&
299 end_sector != bdev->bd_part->nr_sects)
300 return -EINVAL;
301
302 while (sector < end_sector) {
303
304 bio = bio_alloc(gfp_mask, 0);
305 bio->bi_iter.bi_sector = sector;
Christoph Hellwig74d46992017-08-23 19:10:32 +0200306 bio_set_dev(bio, bdev);
Hannes Reinecke6a0cb1b2016-10-18 15:40:33 +0900307 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
308
309 ret = submit_bio_wait(bio);
310 bio_put(bio);
311
312 if (ret)
313 return ret;
314
315 sector += zone_sectors;
316
317 /* This may take a while, so be nice to others */
318 cond_resched();
319
320 }
321
322 return 0;
323}
324EXPORT_SYMBOL_GPL(blkdev_reset_zones);
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900325
Bart Van Assche56c4bdd2018-03-08 15:28:50 -0800326/*
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900327 * BLKREPORTZONE ioctl processing.
328 * Called from blkdev_ioctl.
329 */
330int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
331 unsigned int cmd, unsigned long arg)
332{
333 void __user *argp = (void __user *)arg;
334 struct request_queue *q;
335 struct blk_zone_report rep;
336 struct blk_zone *zones;
337 int ret;
338
339 if (!argp)
340 return -EINVAL;
341
342 q = bdev_get_queue(bdev);
343 if (!q)
344 return -ENXIO;
345
346 if (!blk_queue_is_zoned(q))
347 return -ENOTTY;
348
349 if (!capable(CAP_SYS_ADMIN))
350 return -EACCES;
351
352 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
353 return -EFAULT;
354
355 if (!rep.nr_zones)
356 return -EINVAL;
357
Damien Le Moal2e85fba2018-10-12 19:08:44 +0900358 rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
Bart Van Assche327ea4a2018-05-22 08:27:22 -0700359
Kees Cook344476e2018-06-12 14:04:32 -0700360 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
361 GFP_KERNEL | __GFP_ZERO);
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900362 if (!zones)
363 return -ENOMEM;
364
365 ret = blkdev_report_zones(bdev, rep.sector,
366 zones, &rep.nr_zones,
367 GFP_KERNEL);
368 if (ret)
369 goto out;
370
371 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
372 ret = -EFAULT;
373 goto out;
374 }
375
376 if (rep.nr_zones) {
377 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
378 sizeof(struct blk_zone) * rep.nr_zones))
379 ret = -EFAULT;
380 }
381
382 out:
Bart Van Assche327ea4a2018-05-22 08:27:22 -0700383 kvfree(zones);
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900384
385 return ret;
386}
387
Bart Van Assche56c4bdd2018-03-08 15:28:50 -0800388/*
Shaun Tancheff3ed05a92016-10-18 15:40:35 +0900389 * BLKRESETZONE ioctl processing.
390 * Called from blkdev_ioctl.
391 */
392int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
393 unsigned int cmd, unsigned long arg)
394{
395 void __user *argp = (void __user *)arg;
396 struct request_queue *q;
397 struct blk_zone_range zrange;
398
399 if (!argp)
400 return -EINVAL;
401
402 q = bdev_get_queue(bdev);
403 if (!q)
404 return -ENXIO;
405
406 if (!blk_queue_is_zoned(q))
407 return -ENOTTY;
408
409 if (!capable(CAP_SYS_ADMIN))
410 return -EACCES;
411
412 if (!(mode & FMODE_WRITE))
413 return -EBADF;
414
415 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
416 return -EFAULT;
417
418 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
419 GFP_KERNEL);
420}