blob: 8c2ddd5025ab6afe4adec11f42d150ac9ff529a5 [file] [log] [blame]
Christoph Hellwig21d34712015-11-26 09:08:36 +01001/*
2 * NVM Express device driver
3 * Copyright (c) 2011-2014, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/blkdev.h>
16#include <linux/blk-mq.h>
Christoph Hellwig5fd4ce12015-11-28 15:03:49 +010017#include <linux/delay.h>
Christoph Hellwig21d34712015-11-26 09:08:36 +010018#include <linux/errno.h>
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010019#include <linux/hdreg.h>
Christoph Hellwig21d34712015-11-26 09:08:36 +010020#include <linux/kernel.h>
Christoph Hellwig5bae7f72015-11-28 15:39:07 +010021#include <linux/module.h>
22#include <linux/list_sort.h>
Christoph Hellwig21d34712015-11-26 09:08:36 +010023#include <linux/slab.h>
24#include <linux/types.h>
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010025#include <linux/pr.h>
26#include <linux/ptrace.h>
27#include <linux/nvme_ioctl.h>
28#include <linux/t10-pi.h>
29#include <scsi/sg.h>
30#include <asm/unaligned.h>
Christoph Hellwig21d34712015-11-26 09:08:36 +010031
32#include "nvme.h"
33
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +010034#define NVME_MINORS (1U << MINORBITS)
35
Christoph Hellwig5bae7f72015-11-28 15:39:07 +010036static int nvme_major;
37module_param(nvme_major, int, 0);
38
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +010039static int nvme_char_major;
40module_param(nvme_char_major, int, 0);
41
42static LIST_HEAD(nvme_ctrl_list);
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010043DEFINE_SPINLOCK(dev_list_lock);
44
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +010045static struct class *nvme_class;
46
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010047static void nvme_free_ns(struct kref *kref)
48{
49 struct nvme_ns *ns = container_of(kref, struct nvme_ns, kref);
50
51 if (ns->type == NVME_NS_LIGHTNVM)
52 nvme_nvm_unregister(ns->queue, ns->disk->disk_name);
53
54 spin_lock(&dev_list_lock);
55 ns->disk->private_data = NULL;
56 spin_unlock(&dev_list_lock);
57
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010058 put_disk(ns->disk);
Keith Busch075790e2016-02-24 09:15:53 -070059 ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
60 nvme_put_ctrl(ns->ctrl);
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010061 kfree(ns);
62}
63
Christoph Hellwig5bae7f72015-11-28 15:39:07 +010064static void nvme_put_ns(struct nvme_ns *ns)
Christoph Hellwig1673f1f2015-11-26 10:54:19 +010065{
66 kref_put(&ns->kref, nvme_free_ns);
67}
68
69static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
70{
71 struct nvme_ns *ns;
72
73 spin_lock(&dev_list_lock);
74 ns = disk->private_data;
75 if (ns && !kref_get_unless_zero(&ns->kref))
76 ns = NULL;
77 spin_unlock(&dev_list_lock);
78
79 return ns;
80}
81
Christoph Hellwig7688faa2015-11-28 15:41:58 +010082void nvme_requeue_req(struct request *req)
83{
84 unsigned long flags;
85
86 blk_mq_requeue_request(req);
87 spin_lock_irqsave(req->q->queue_lock, flags);
88 if (!blk_queue_stopped(req->q))
89 blk_mq_kick_requeue_list(req->q);
90 spin_unlock_irqrestore(req->q->queue_lock, flags);
91}
92
Christoph Hellwig41609822015-11-20 09:00:02 +010093struct request *nvme_alloc_request(struct request_queue *q,
94 struct nvme_command *cmd, unsigned int flags)
Christoph Hellwig21d34712015-11-26 09:08:36 +010095{
96 bool write = cmd->common.opcode & 1;
Christoph Hellwig21d34712015-11-26 09:08:36 +010097 struct request *req;
Christoph Hellwig21d34712015-11-26 09:08:36 +010098
Christoph Hellwig41609822015-11-20 09:00:02 +010099 req = blk_mq_alloc_request(q, write, flags);
Christoph Hellwig21d34712015-11-26 09:08:36 +0100100 if (IS_ERR(req))
Christoph Hellwig41609822015-11-20 09:00:02 +0100101 return req;
Christoph Hellwig21d34712015-11-26 09:08:36 +0100102
103 req->cmd_type = REQ_TYPE_DRV_PRIV;
104 req->cmd_flags |= REQ_FAILFAST_DRIVER;
105 req->__data_len = 0;
106 req->__sector = (sector_t) -1;
107 req->bio = req->biotail = NULL;
108
Christoph Hellwig21d34712015-11-26 09:08:36 +0100109 req->cmd = (unsigned char *)cmd;
110 req->cmd_len = sizeof(struct nvme_command);
111 req->special = (void *)0;
112
Christoph Hellwig41609822015-11-20 09:00:02 +0100113 return req;
114}
115
116/*
117 * Returns 0 on success. If the result is negative, it's a Linux error code;
118 * if the result is positive, it's an NVM Express status code
119 */
120int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
121 void *buffer, unsigned bufflen, u32 *result, unsigned timeout)
122{
123 struct request *req;
124 int ret;
125
126 req = nvme_alloc_request(q, cmd, 0);
127 if (IS_ERR(req))
128 return PTR_ERR(req);
129
130 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
131
Christoph Hellwig21d34712015-11-26 09:08:36 +0100132 if (buffer && bufflen) {
133 ret = blk_rq_map_kern(q, req, buffer, bufflen, GFP_KERNEL);
134 if (ret)
135 goto out;
Christoph Hellwig41609822015-11-20 09:00:02 +0100136 }
137
138 blk_execute_rq(req->q, NULL, req, 0);
139 if (result)
140 *result = (u32)(uintptr_t)req->special;
141 ret = req->errors;
142 out:
143 blk_mq_free_request(req);
144 return ret;
145}
146
147int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
148 void *buffer, unsigned bufflen)
149{
150 return __nvme_submit_sync_cmd(q, cmd, buffer, bufflen, NULL, 0);
151}
152
Keith Busch0b7f1f22015-10-23 09:47:28 -0600153int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
154 void __user *ubuffer, unsigned bufflen,
155 void __user *meta_buffer, unsigned meta_len, u32 meta_seed,
156 u32 *result, unsigned timeout)
Christoph Hellwig41609822015-11-20 09:00:02 +0100157{
Keith Busch0b7f1f22015-10-23 09:47:28 -0600158 bool write = cmd->common.opcode & 1;
159 struct nvme_ns *ns = q->queuedata;
160 struct gendisk *disk = ns ? ns->disk : NULL;
Christoph Hellwig41609822015-11-20 09:00:02 +0100161 struct request *req;
Keith Busch0b7f1f22015-10-23 09:47:28 -0600162 struct bio *bio = NULL;
163 void *meta = NULL;
Christoph Hellwig41609822015-11-20 09:00:02 +0100164 int ret;
165
166 req = nvme_alloc_request(q, cmd, 0);
167 if (IS_ERR(req))
168 return PTR_ERR(req);
169
170 req->timeout = timeout ? timeout : ADMIN_TIMEOUT;
171
172 if (ubuffer && bufflen) {
Christoph Hellwig21d34712015-11-26 09:08:36 +0100173 ret = blk_rq_map_user(q, req, NULL, ubuffer, bufflen,
174 GFP_KERNEL);
175 if (ret)
176 goto out;
177 bio = req->bio;
Christoph Hellwig21d34712015-11-26 09:08:36 +0100178
Keith Busch0b7f1f22015-10-23 09:47:28 -0600179 if (!disk)
180 goto submit;
181 bio->bi_bdev = bdget_disk(disk, 0);
182 if (!bio->bi_bdev) {
183 ret = -ENODEV;
184 goto out_unmap;
185 }
186
187 if (meta_buffer) {
188 struct bio_integrity_payload *bip;
189
190 meta = kmalloc(meta_len, GFP_KERNEL);
191 if (!meta) {
192 ret = -ENOMEM;
193 goto out_unmap;
194 }
195
196 if (write) {
197 if (copy_from_user(meta, meta_buffer,
198 meta_len)) {
199 ret = -EFAULT;
200 goto out_free_meta;
201 }
202 }
203
204 bip = bio_integrity_alloc(bio, GFP_KERNEL, 1);
Keith Busch06c1e392015-12-03 09:32:21 -0700205 if (IS_ERR(bip)) {
206 ret = PTR_ERR(bip);
Keith Busch0b7f1f22015-10-23 09:47:28 -0600207 goto out_free_meta;
208 }
209
210 bip->bip_iter.bi_size = meta_len;
211 bip->bip_iter.bi_sector = meta_seed;
212
213 ret = bio_integrity_add_page(bio, virt_to_page(meta),
214 meta_len, offset_in_page(meta));
215 if (ret != meta_len) {
216 ret = -ENOMEM;
217 goto out_free_meta;
218 }
219 }
220 }
221 submit:
222 blk_execute_rq(req->q, disk, req, 0);
223 ret = req->errors;
Christoph Hellwig21d34712015-11-26 09:08:36 +0100224 if (result)
225 *result = (u32)(uintptr_t)req->special;
Keith Busch0b7f1f22015-10-23 09:47:28 -0600226 if (meta && !ret && !write) {
227 if (copy_to_user(meta_buffer, meta, meta_len))
228 ret = -EFAULT;
229 }
230 out_free_meta:
231 kfree(meta);
232 out_unmap:
233 if (bio) {
234 if (disk && bio->bi_bdev)
235 bdput(bio->bi_bdev);
236 blk_rq_unmap_user(bio);
237 }
Christoph Hellwig21d34712015-11-26 09:08:36 +0100238 out:
239 blk_mq_free_request(req);
240 return ret;
241}
242
Keith Busch0b7f1f22015-10-23 09:47:28 -0600243int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
244 void __user *ubuffer, unsigned bufflen, u32 *result,
245 unsigned timeout)
246{
247 return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0,
248 result, timeout);
249}
250
Christoph Hellwig1c63dc62015-11-26 10:06:56 +0100251int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
Christoph Hellwig21d34712015-11-26 09:08:36 +0100252{
253 struct nvme_command c = { };
254 int error;
255
256 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
257 c.identify.opcode = nvme_admin_identify;
258 c.identify.cns = cpu_to_le32(1);
259
260 *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
261 if (!*id)
262 return -ENOMEM;
263
264 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
265 sizeof(struct nvme_id_ctrl));
266 if (error)
267 kfree(*id);
268 return error;
269}
270
Keith Busch540c8012015-10-22 15:45:06 -0600271static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
272{
273 struct nvme_command c = { };
274
275 c.identify.opcode = nvme_admin_identify;
276 c.identify.cns = cpu_to_le32(2);
277 c.identify.nsid = cpu_to_le32(nsid);
278 return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
279}
280
Christoph Hellwig1c63dc62015-11-26 10:06:56 +0100281int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
Christoph Hellwig21d34712015-11-26 09:08:36 +0100282 struct nvme_id_ns **id)
283{
284 struct nvme_command c = { };
285 int error;
286
287 /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
288 c.identify.opcode = nvme_admin_identify,
289 c.identify.nsid = cpu_to_le32(nsid),
290
291 *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
292 if (!*id)
293 return -ENOMEM;
294
295 error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
296 sizeof(struct nvme_id_ns));
297 if (error)
298 kfree(*id);
299 return error;
300}
301
Christoph Hellwig1c63dc62015-11-26 10:06:56 +0100302int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid,
Christoph Hellwig21d34712015-11-26 09:08:36 +0100303 dma_addr_t dma_addr, u32 *result)
304{
305 struct nvme_command c;
306
307 memset(&c, 0, sizeof(c));
308 c.features.opcode = nvme_admin_get_features;
309 c.features.nsid = cpu_to_le32(nsid);
310 c.features.prp1 = cpu_to_le64(dma_addr);
311 c.features.fid = cpu_to_le32(fid);
312
Christoph Hellwig41609822015-11-20 09:00:02 +0100313 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
Christoph Hellwig21d34712015-11-26 09:08:36 +0100314}
315
Christoph Hellwig1c63dc62015-11-26 10:06:56 +0100316int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
Christoph Hellwig21d34712015-11-26 09:08:36 +0100317 dma_addr_t dma_addr, u32 *result)
318{
319 struct nvme_command c;
320
321 memset(&c, 0, sizeof(c));
322 c.features.opcode = nvme_admin_set_features;
323 c.features.prp1 = cpu_to_le64(dma_addr);
324 c.features.fid = cpu_to_le32(fid);
325 c.features.dword11 = cpu_to_le32(dword11);
326
Christoph Hellwig41609822015-11-20 09:00:02 +0100327 return __nvme_submit_sync_cmd(dev->admin_q, &c, NULL, 0, result, 0);
Christoph Hellwig21d34712015-11-26 09:08:36 +0100328}
329
Christoph Hellwig1c63dc62015-11-26 10:06:56 +0100330int nvme_get_log_page(struct nvme_ctrl *dev, struct nvme_smart_log **log)
Christoph Hellwig21d34712015-11-26 09:08:36 +0100331{
332 struct nvme_command c = { };
333 int error;
334
335 c.common.opcode = nvme_admin_get_log_page,
336 c.common.nsid = cpu_to_le32(0xFFFFFFFF),
337 c.common.cdw10[0] = cpu_to_le32(
338 (((sizeof(struct nvme_smart_log) / 4) - 1) << 16) |
339 NVME_LOG_SMART),
340
341 *log = kmalloc(sizeof(struct nvme_smart_log), GFP_KERNEL);
342 if (!*log)
343 return -ENOMEM;
344
345 error = nvme_submit_sync_cmd(dev->admin_q, &c, *log,
346 sizeof(struct nvme_smart_log));
347 if (error)
348 kfree(*log);
349 return error;
350}
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100351
Christoph Hellwig9a0be7a2015-11-26 11:09:06 +0100352int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
353{
354 u32 q_count = (*count - 1) | ((*count - 1) << 16);
355 u32 result;
356 int status, nr_io_queues;
357
358 status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, 0,
359 &result);
360 if (status)
361 return status;
362
363 nr_io_queues = min(result & 0xffff, result >> 16) + 1;
364 *count = min(*count, nr_io_queues);
365 return 0;
366}
367
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100368static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
369{
370 struct nvme_user_io io;
371 struct nvme_command c;
372 unsigned length, meta_len;
373 void __user *metadata;
374
375 if (copy_from_user(&io, uio, sizeof(io)))
376 return -EFAULT;
377
378 switch (io.opcode) {
379 case nvme_cmd_write:
380 case nvme_cmd_read:
381 case nvme_cmd_compare:
382 break;
383 default:
384 return -EINVAL;
385 }
386
387 length = (io.nblocks + 1) << ns->lba_shift;
388 meta_len = (io.nblocks + 1) * ns->ms;
389 metadata = (void __user *)(uintptr_t)io.metadata;
390
391 if (ns->ext) {
392 length += meta_len;
393 meta_len = 0;
394 } else if (meta_len) {
395 if ((io.metadata & 3) || !io.metadata)
396 return -EINVAL;
397 }
398
399 memset(&c, 0, sizeof(c));
400 c.rw.opcode = io.opcode;
401 c.rw.flags = io.flags;
402 c.rw.nsid = cpu_to_le32(ns->ns_id);
403 c.rw.slba = cpu_to_le64(io.slba);
404 c.rw.length = cpu_to_le16(io.nblocks);
405 c.rw.control = cpu_to_le16(io.control);
406 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt);
407 c.rw.reftag = cpu_to_le32(io.reftag);
408 c.rw.apptag = cpu_to_le16(io.apptag);
409 c.rw.appmask = cpu_to_le16(io.appmask);
410
411 return __nvme_submit_user_cmd(ns->queue, &c,
412 (void __user *)(uintptr_t)io.addr, length,
413 metadata, meta_len, io.slba, NULL, 0);
414}
415
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100416static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100417 struct nvme_passthru_cmd __user *ucmd)
418{
419 struct nvme_passthru_cmd cmd;
420 struct nvme_command c;
421 unsigned timeout = 0;
422 int status;
423
424 if (!capable(CAP_SYS_ADMIN))
425 return -EACCES;
426 if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
427 return -EFAULT;
428
429 memset(&c, 0, sizeof(c));
430 c.common.opcode = cmd.opcode;
431 c.common.flags = cmd.flags;
432 c.common.nsid = cpu_to_le32(cmd.nsid);
433 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
434 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
435 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10);
436 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11);
437 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12);
438 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13);
439 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14);
440 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15);
441
442 if (cmd.timeout_ms)
443 timeout = msecs_to_jiffies(cmd.timeout_ms);
444
445 status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
Arnd Bergmannd1ea7be2015-12-08 16:22:17 +0100446 (void __user *)(uintptr_t)cmd.addr, cmd.data_len,
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100447 &cmd.result, timeout);
448 if (status >= 0) {
449 if (put_user(cmd.result, &ucmd->result))
450 return -EFAULT;
451 }
452
453 return status;
454}
455
456static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
457 unsigned int cmd, unsigned long arg)
458{
459 struct nvme_ns *ns = bdev->bd_disk->private_data;
460
461 switch (cmd) {
462 case NVME_IOCTL_ID:
463 force_successful_syscall_return();
464 return ns->ns_id;
465 case NVME_IOCTL_ADMIN_CMD:
466 return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
467 case NVME_IOCTL_IO_CMD:
468 return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
469 case NVME_IOCTL_SUBMIT_IO:
470 return nvme_submit_io(ns, (void __user *)arg);
Christoph Hellwig44907332015-12-24 15:27:02 +0100471#ifdef CONFIG_BLK_DEV_NVME_SCSI
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100472 case SG_GET_VERSION_NUM:
473 return nvme_sg_get_version_num((void __user *)arg);
474 case SG_IO:
475 return nvme_sg_io(ns, (void __user *)arg);
Christoph Hellwig44907332015-12-24 15:27:02 +0100476#endif
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100477 default:
478 return -ENOTTY;
479 }
480}
481
482#ifdef CONFIG_COMPAT
483static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
484 unsigned int cmd, unsigned long arg)
485{
486 switch (cmd) {
487 case SG_IO:
488 return -ENOIOCTLCMD;
489 }
490 return nvme_ioctl(bdev, mode, cmd, arg);
491}
492#else
493#define nvme_compat_ioctl NULL
494#endif
495
496static int nvme_open(struct block_device *bdev, fmode_t mode)
497{
498 return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
499}
500
501static void nvme_release(struct gendisk *disk, fmode_t mode)
502{
503 nvme_put_ns(disk->private_data);
504}
505
506static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
507{
508 /* some standard values */
509 geo->heads = 1 << 6;
510 geo->sectors = 1 << 5;
511 geo->cylinders = get_capacity(bdev->bd_disk) >> 11;
512 return 0;
513}
514
515#ifdef CONFIG_BLK_DEV_INTEGRITY
516static void nvme_init_integrity(struct nvme_ns *ns)
517{
518 struct blk_integrity integrity;
519
520 switch (ns->pi_type) {
521 case NVME_NS_DPS_PI_TYPE3:
522 integrity.profile = &t10_pi_type3_crc;
523 break;
524 case NVME_NS_DPS_PI_TYPE1:
525 case NVME_NS_DPS_PI_TYPE2:
526 integrity.profile = &t10_pi_type1_crc;
527 break;
528 default:
529 integrity.profile = NULL;
530 break;
531 }
532 integrity.tuple_size = ns->ms;
533 blk_integrity_register(ns->disk, &integrity);
534 blk_queue_max_integrity_segments(ns->queue, 1);
535}
536#else
537static void nvme_init_integrity(struct nvme_ns *ns)
538{
539}
540#endif /* CONFIG_BLK_DEV_INTEGRITY */
541
542static void nvme_config_discard(struct nvme_ns *ns)
543{
544 u32 logical_block_size = queue_logical_block_size(ns->queue);
545 ns->queue->limits.discard_zeroes_data = 0;
546 ns->queue->limits.discard_alignment = logical_block_size;
547 ns->queue->limits.discard_granularity = logical_block_size;
548 blk_queue_max_discard_sectors(ns->queue, 0xffffffff);
549 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue);
550}
551
Christoph Hellwig5bae7f72015-11-28 15:39:07 +0100552static int nvme_revalidate_disk(struct gendisk *disk)
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100553{
554 struct nvme_ns *ns = disk->private_data;
555 struct nvme_id_ns *id;
556 u8 lbaf, pi_type;
557 u16 old_ms;
558 unsigned short bs;
559
560 if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
561 dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
562 __func__, ns->ctrl->instance, ns->ns_id);
563 return -ENODEV;
564 }
565 if (id->ncap == 0) {
566 kfree(id);
567 return -ENODEV;
568 }
569
570 if (nvme_nvm_ns_supported(ns, id) && ns->type != NVME_NS_LIGHTNVM) {
571 if (nvme_nvm_register(ns->queue, disk->disk_name)) {
572 dev_warn(ns->ctrl->dev,
573 "%s: LightNVM init failure\n", __func__);
574 kfree(id);
575 return -ENODEV;
576 }
577 ns->type = NVME_NS_LIGHTNVM;
578 }
579
Keith Busch2b9b6e82015-12-22 10:10:45 -0700580 if (ns->ctrl->vs >= NVME_VS(1, 1))
581 memcpy(ns->eui, id->eui64, sizeof(ns->eui));
582 if (ns->ctrl->vs >= NVME_VS(1, 2))
583 memcpy(ns->uuid, id->nguid, sizeof(ns->uuid));
584
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100585 old_ms = ns->ms;
586 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
587 ns->lba_shift = id->lbaf[lbaf].ds;
588 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
589 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
590
591 /*
592 * If identify namespace failed, use default 512 byte block size so
593 * block layer can use before failing read/write for 0 capacity.
594 */
595 if (ns->lba_shift == 0)
596 ns->lba_shift = 9;
597 bs = 1 << ns->lba_shift;
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100598 /* XXX: PI implementation requires metadata equal t10 pi tuple size */
599 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
600 id->dps & NVME_NS_DPS_PI_MASK : 0;
601
602 blk_mq_freeze_queue(disk->queue);
603 if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
604 ns->ms != old_ms ||
605 bs != queue_logical_block_size(disk->queue) ||
606 (ns->ms && ns->ext)))
607 blk_integrity_unregister(disk);
608
609 ns->pi_type = pi_type;
610 blk_queue_logical_block_size(ns->queue, bs);
611
Keith Busch4b9d5b12015-11-20 09:13:30 +0100612 if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100613 nvme_init_integrity(ns);
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100614 if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
615 set_capacity(disk, 0);
616 else
617 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
618
619 if (ns->ctrl->oncs & NVME_CTRL_ONCS_DSM)
620 nvme_config_discard(ns);
621 blk_mq_unfreeze_queue(disk->queue);
622
623 kfree(id);
624 return 0;
625}
626
627static char nvme_pr_type(enum pr_type type)
628{
629 switch (type) {
630 case PR_WRITE_EXCLUSIVE:
631 return 1;
632 case PR_EXCLUSIVE_ACCESS:
633 return 2;
634 case PR_WRITE_EXCLUSIVE_REG_ONLY:
635 return 3;
636 case PR_EXCLUSIVE_ACCESS_REG_ONLY:
637 return 4;
638 case PR_WRITE_EXCLUSIVE_ALL_REGS:
639 return 5;
640 case PR_EXCLUSIVE_ACCESS_ALL_REGS:
641 return 6;
642 default:
643 return 0;
644 }
645};
646
647static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
648 u64 key, u64 sa_key, u8 op)
649{
650 struct nvme_ns *ns = bdev->bd_disk->private_data;
651 struct nvme_command c;
652 u8 data[16] = { 0, };
653
654 put_unaligned_le64(key, &data[0]);
655 put_unaligned_le64(sa_key, &data[8]);
656
657 memset(&c, 0, sizeof(c));
658 c.common.opcode = op;
659 c.common.nsid = cpu_to_le32(ns->ns_id);
660 c.common.cdw10[0] = cpu_to_le32(cdw10);
661
662 return nvme_submit_sync_cmd(ns->queue, &c, data, 16);
663}
664
665static int nvme_pr_register(struct block_device *bdev, u64 old,
666 u64 new, unsigned flags)
667{
668 u32 cdw10;
669
670 if (flags & ~PR_FL_IGNORE_KEY)
671 return -EOPNOTSUPP;
672
673 cdw10 = old ? 2 : 0;
674 cdw10 |= (flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0;
675 cdw10 |= (1 << 30) | (1 << 31); /* PTPL=1 */
676 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_register);
677}
678
679static int nvme_pr_reserve(struct block_device *bdev, u64 key,
680 enum pr_type type, unsigned flags)
681{
682 u32 cdw10;
683
684 if (flags & ~PR_FL_IGNORE_KEY)
685 return -EOPNOTSUPP;
686
687 cdw10 = nvme_pr_type(type) << 8;
688 cdw10 |= ((flags & PR_FL_IGNORE_KEY) ? 1 << 3 : 0);
689 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_acquire);
690}
691
692static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new,
693 enum pr_type type, bool abort)
694{
695 u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1;
696 return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire);
697}
698
699static int nvme_pr_clear(struct block_device *bdev, u64 key)
700{
Dan Carpenter8c0b3912015-12-09 13:24:06 +0300701 u32 cdw10 = 1 | (key ? 1 << 3 : 0);
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100702 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register);
703}
704
705static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
706{
707 u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0;
708 return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release);
709}
710
711static const struct pr_ops nvme_pr_ops = {
712 .pr_register = nvme_pr_register,
713 .pr_reserve = nvme_pr_reserve,
714 .pr_release = nvme_pr_release,
715 .pr_preempt = nvme_pr_preempt,
716 .pr_clear = nvme_pr_clear,
717};
718
Christoph Hellwig5bae7f72015-11-28 15:39:07 +0100719static const struct block_device_operations nvme_fops = {
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100720 .owner = THIS_MODULE,
721 .ioctl = nvme_ioctl,
722 .compat_ioctl = nvme_compat_ioctl,
723 .open = nvme_open,
724 .release = nvme_release,
725 .getgeo = nvme_getgeo,
726 .revalidate_disk= nvme_revalidate_disk,
727 .pr_ops = &nvme_pr_ops,
728};
729
Christoph Hellwig5fd4ce12015-11-28 15:03:49 +0100730static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
731{
732 unsigned long timeout =
733 ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
734 u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
735 int ret;
736
737 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
738 if ((csts & NVME_CSTS_RDY) == bit)
739 break;
740
741 msleep(100);
742 if (fatal_signal_pending(current))
743 return -EINTR;
744 if (time_after(jiffies, timeout)) {
745 dev_err(ctrl->dev,
746 "Device not ready; aborting %s\n", enabled ?
747 "initialisation" : "reset");
748 return -ENODEV;
749 }
750 }
751
752 return ret;
753}
754
755/*
756 * If the device has been passed off to us in an enabled state, just clear
757 * the enabled bit. The spec says we should set the 'shutdown notification
758 * bits', but doing so may cause the device to complete commands to the
759 * admin queue ... and we don't know what memory that might be pointing at!
760 */
761int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
762{
763 int ret;
764
765 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
766 ctrl->ctrl_config &= ~NVME_CC_ENABLE;
767
768 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
769 if (ret)
770 return ret;
771 return nvme_wait_ready(ctrl, cap, false);
772}
773
774int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
775{
776 /*
777 * Default to a 4K page size, with the intention to update this
778 * path in the future to accomodate architectures with differing
779 * kernel and IO page sizes.
780 */
781 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
782 int ret;
783
784 if (page_shift < dev_page_min) {
785 dev_err(ctrl->dev,
786 "Minimum device page size %u too large for host (%u)\n",
787 1 << dev_page_min, 1 << page_shift);
788 return -ENODEV;
789 }
790
791 ctrl->page_size = 1 << page_shift;
792
793 ctrl->ctrl_config = NVME_CC_CSS_NVM;
794 ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
795 ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
796 ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
797 ctrl->ctrl_config |= NVME_CC_ENABLE;
798
799 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
800 if (ret)
801 return ret;
802 return nvme_wait_ready(ctrl, cap, true);
803}
804
805int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
806{
807 unsigned long timeout = SHUTDOWN_TIMEOUT + jiffies;
808 u32 csts;
809 int ret;
810
811 ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
812 ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
813
814 ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
815 if (ret)
816 return ret;
817
818 while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
819 if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
820 break;
821
822 msleep(100);
823 if (fatal_signal_pending(current))
824 return -EINTR;
825 if (time_after(jiffies, timeout)) {
826 dev_err(ctrl->dev,
827 "Device shutdown incomplete; abort shutdown\n");
828 return -ENODEV;
829 }
830 }
831
832 return ret;
833}
834
Christoph Hellwig7fd89302015-11-28 15:37:52 +0100835/*
836 * Initialize the cached copies of the Identify data and various controller
837 * register in our nvme_ctrl structure. This should be called as soon as
838 * the admin queue is fully up and running.
839 */
840int nvme_init_identify(struct nvme_ctrl *ctrl)
841{
842 struct nvme_id_ctrl *id;
843 u64 cap;
844 int ret, page_shift;
845
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100846 ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
847 if (ret) {
848 dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
849 return ret;
850 }
851
Christoph Hellwig7fd89302015-11-28 15:37:52 +0100852 ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
853 if (ret) {
854 dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
855 return ret;
856 }
857 page_shift = NVME_CAP_MPSMIN(cap) + 12;
858
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100859 if (ctrl->vs >= NVME_VS(1, 1))
860 ctrl->subsystem = NVME_CAP_NSSRC(cap);
861
Christoph Hellwig7fd89302015-11-28 15:37:52 +0100862 ret = nvme_identify_ctrl(ctrl, &id);
863 if (ret) {
864 dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
865 return -EIO;
866 }
867
868 ctrl->oncs = le16_to_cpup(&id->oncs);
Christoph Hellwig6bf25d12015-11-20 09:36:44 +0100869 atomic_set(&ctrl->abort_limit, id->acl + 1);
Christoph Hellwig7fd89302015-11-28 15:37:52 +0100870 ctrl->vwc = id->vwc;
871 memcpy(ctrl->serial, id->sn, sizeof(id->sn));
872 memcpy(ctrl->model, id->mn, sizeof(id->mn));
873 memcpy(ctrl->firmware_rev, id->fr, sizeof(id->fr));
874 if (id->mdts)
875 ctrl->max_hw_sectors = 1 << (id->mdts + page_shift - 9);
876 else
877 ctrl->max_hw_sectors = UINT_MAX;
878
879 if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && id->vs[3]) {
880 unsigned int max_hw_sectors;
881
882 ctrl->stripe_size = 1 << (id->vs[3] + page_shift);
883 max_hw_sectors = ctrl->stripe_size >> (page_shift - 9);
884 if (ctrl->max_hw_sectors) {
885 ctrl->max_hw_sectors = min(max_hw_sectors,
886 ctrl->max_hw_sectors);
887 } else {
888 ctrl->max_hw_sectors = max_hw_sectors;
889 }
890 }
891
892 kfree(id);
893 return 0;
894}
895
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100896static int nvme_dev_open(struct inode *inode, struct file *file)
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100897{
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100898 struct nvme_ctrl *ctrl;
899 int instance = iminor(inode);
900 int ret = -ENODEV;
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100901
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100902 spin_lock(&dev_list_lock);
903 list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
904 if (ctrl->instance != instance)
905 continue;
906
907 if (!ctrl->admin_q) {
908 ret = -EWOULDBLOCK;
909 break;
910 }
911 if (!kref_get_unless_zero(&ctrl->kref))
912 break;
913 file->private_data = ctrl;
914 ret = 0;
915 break;
916 }
917 spin_unlock(&dev_list_lock);
918
919 return ret;
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100920}
921
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100922static int nvme_dev_release(struct inode *inode, struct file *file)
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100923{
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100924 nvme_put_ctrl(file->private_data);
925 return 0;
Christoph Hellwig1673f1f2015-11-26 10:54:19 +0100926}
927
Christoph Hellwigbfd89472015-12-24 15:27:01 +0100928static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp)
929{
930 struct nvme_ns *ns;
931 int ret;
932
933 mutex_lock(&ctrl->namespaces_mutex);
934 if (list_empty(&ctrl->namespaces)) {
935 ret = -ENOTTY;
936 goto out_unlock;
937 }
938
939 ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list);
940 if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
941 dev_warn(ctrl->dev,
942 "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
943 ret = -EINVAL;
944 goto out_unlock;
945 }
946
947 dev_warn(ctrl->dev,
948 "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
949 kref_get(&ns->kref);
950 mutex_unlock(&ctrl->namespaces_mutex);
951
952 ret = nvme_user_cmd(ctrl, ns, argp);
953 nvme_put_ns(ns);
954 return ret;
955
956out_unlock:
957 mutex_unlock(&ctrl->namespaces_mutex);
958 return ret;
959}
960
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100961static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
962 unsigned long arg)
963{
964 struct nvme_ctrl *ctrl = file->private_data;
965 void __user *argp = (void __user *)arg;
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100966
967 switch (cmd) {
968 case NVME_IOCTL_ADMIN_CMD:
969 return nvme_user_cmd(ctrl, NULL, argp);
970 case NVME_IOCTL_IO_CMD:
Christoph Hellwigbfd89472015-12-24 15:27:01 +0100971 return nvme_dev_user_cmd(ctrl, argp);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +0100972 case NVME_IOCTL_RESET:
973 dev_warn(ctrl->dev, "resetting controller\n");
974 return ctrl->ops->reset_ctrl(ctrl);
975 case NVME_IOCTL_SUBSYS_RESET:
976 return nvme_reset_subsystem(ctrl);
977 default:
978 return -ENOTTY;
979 }
980}
981
982static const struct file_operations nvme_dev_fops = {
983 .owner = THIS_MODULE,
984 .open = nvme_dev_open,
985 .release = nvme_dev_release,
986 .unlocked_ioctl = nvme_dev_ioctl,
987 .compat_ioctl = nvme_dev_ioctl,
988};
989
990static ssize_t nvme_sysfs_reset(struct device *dev,
991 struct device_attribute *attr, const char *buf,
992 size_t count)
993{
994 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
995 int ret;
996
997 ret = ctrl->ops->reset_ctrl(ctrl);
998 if (ret < 0)
999 return ret;
1000 return count;
1001}
1002static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
1003
Keith Busch2b9b6e82015-12-22 10:10:45 -07001004static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
1005 char *buf)
1006{
1007 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1008 return sprintf(buf, "%pU\n", ns->uuid);
1009}
1010static DEVICE_ATTR(uuid, S_IRUGO, uuid_show, NULL);
1011
1012static ssize_t eui_show(struct device *dev, struct device_attribute *attr,
1013 char *buf)
1014{
1015 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1016 return sprintf(buf, "%8phd\n", ns->eui);
1017}
1018static DEVICE_ATTR(eui, S_IRUGO, eui_show, NULL);
1019
1020static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
1021 char *buf)
1022{
1023 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1024 return sprintf(buf, "%d\n", ns->ns_id);
1025}
1026static DEVICE_ATTR(nsid, S_IRUGO, nsid_show, NULL);
1027
1028static struct attribute *nvme_ns_attrs[] = {
1029 &dev_attr_uuid.attr,
1030 &dev_attr_eui.attr,
1031 &dev_attr_nsid.attr,
1032 NULL,
1033};
1034
1035static umode_t nvme_attrs_are_visible(struct kobject *kobj,
1036 struct attribute *a, int n)
1037{
1038 struct device *dev = container_of(kobj, struct device, kobj);
1039 struct nvme_ns *ns = dev_to_disk(dev)->private_data;
1040
1041 if (a == &dev_attr_uuid.attr) {
1042 if (!memchr_inv(ns->uuid, 0, sizeof(ns->uuid)))
1043 return 0;
1044 }
1045 if (a == &dev_attr_eui.attr) {
1046 if (!memchr_inv(ns->eui, 0, sizeof(ns->eui)))
1047 return 0;
1048 }
1049 return a->mode;
1050}
1051
1052static const struct attribute_group nvme_ns_attr_group = {
1053 .attrs = nvme_ns_attrs,
1054 .is_visible = nvme_attrs_are_visible,
1055};
1056
Keith Busch779ff7562016-01-12 15:09:31 -07001057#define nvme_show_function(field) \
1058static ssize_t field##_show(struct device *dev, \
1059 struct device_attribute *attr, char *buf) \
1060{ \
1061 struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \
1062 return sprintf(buf, "%.*s\n", (int)sizeof(ctrl->field), ctrl->field); \
1063} \
1064static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);
1065
1066nvme_show_function(model);
1067nvme_show_function(serial);
1068nvme_show_function(firmware_rev);
1069
1070static struct attribute *nvme_dev_attrs[] = {
1071 &dev_attr_reset_controller.attr,
1072 &dev_attr_model.attr,
1073 &dev_attr_serial.attr,
1074 &dev_attr_firmware_rev.attr,
1075 NULL
1076};
1077
1078static struct attribute_group nvme_dev_attrs_group = {
1079 .attrs = nvme_dev_attrs,
1080};
1081
1082static const struct attribute_group *nvme_dev_attr_groups[] = {
1083 &nvme_dev_attrs_group,
1084 NULL,
1085};
1086
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001087static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
1088{
1089 struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
1090 struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
1091
1092 return nsa->ns_id - nsb->ns_id;
1093}
1094
1095static struct nvme_ns *nvme_find_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1096{
1097 struct nvme_ns *ns;
1098
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001099 lockdep_assert_held(&ctrl->namespaces_mutex);
1100
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001101 list_for_each_entry(ns, &ctrl->namespaces, list) {
1102 if (ns->ns_id == nsid)
1103 return ns;
1104 if (ns->ns_id > nsid)
1105 break;
1106 }
1107 return NULL;
1108}
1109
1110static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1111{
1112 struct nvme_ns *ns;
1113 struct gendisk *disk;
1114 int node = dev_to_node(ctrl->dev);
1115
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001116 lockdep_assert_held(&ctrl->namespaces_mutex);
1117
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001118 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
1119 if (!ns)
1120 return;
1121
Keith Busch075790e2016-02-24 09:15:53 -07001122 ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
1123 if (ns->instance < 0)
1124 goto out_free_ns;
1125
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001126 ns->queue = blk_mq_init_queue(ctrl->tagset);
1127 if (IS_ERR(ns->queue))
Keith Busch075790e2016-02-24 09:15:53 -07001128 goto out_release_instance;
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001129 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
1130 ns->queue->queuedata = ns;
1131 ns->ctrl = ctrl;
1132
1133 disk = alloc_disk_node(0, node);
1134 if (!disk)
1135 goto out_free_queue;
1136
1137 kref_init(&ns->kref);
1138 ns->ns_id = nsid;
1139 ns->disk = disk;
1140 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001141
1142 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
1143 if (ctrl->max_hw_sectors) {
1144 blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
1145 blk_queue_max_segments(ns->queue,
1146 (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
1147 }
1148 if (ctrl->stripe_size)
1149 blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
1150 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
1151 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
1152 blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
1153
1154 disk->major = nvme_major;
1155 disk->first_minor = 0;
1156 disk->fops = &nvme_fops;
1157 disk->private_data = ns;
1158 disk->queue = ns->queue;
1159 disk->driverfs_dev = ctrl->device;
1160 disk->flags = GENHD_FL_EXT_DEVT;
Keith Busch075790e2016-02-24 09:15:53 -07001161 sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001162
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001163 if (nvme_revalidate_disk(ns->disk))
1164 goto out_free_disk;
1165
Keith Busch4b9d5b12015-11-20 09:13:30 +01001166 list_add_tail(&ns->list, &ctrl->namespaces);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001167 kref_get(&ctrl->kref);
Keith Busch2b9b6e82015-12-22 10:10:45 -07001168 if (ns->type == NVME_NS_LIGHTNVM)
1169 return;
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001170
Keith Busch2b9b6e82015-12-22 10:10:45 -07001171 add_disk(ns->disk);
1172 if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
1173 &nvme_ns_attr_group))
1174 pr_warn("%s: failed to create sysfs group for identification\n",
1175 ns->disk->disk_name);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001176 return;
1177 out_free_disk:
1178 kfree(disk);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001179 out_free_queue:
1180 blk_cleanup_queue(ns->queue);
Keith Busch075790e2016-02-24 09:15:53 -07001181 out_release_instance:
1182 ida_simple_remove(&ctrl->ns_ida, ns->instance);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001183 out_free_ns:
1184 kfree(ns);
1185}
1186
1187static void nvme_ns_remove(struct nvme_ns *ns)
1188{
Keith Busch646017a2016-02-24 09:15:54 -07001189 bool kill;
1190
1191 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
1192 return;
1193
1194 kill = nvme_io_incapable(ns->ctrl) &&
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001195 !blk_queue_dying(ns->queue);
Linus Torvalds3e1e21c2016-01-21 19:58:02 -08001196 if (kill) {
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001197 blk_set_queue_dying(ns->queue);
Linus Torvalds3e1e21c2016-01-21 19:58:02 -08001198
1199 /*
1200 * The controller was shutdown first if we got here through
1201 * device removal. The shutdown may requeue outstanding
1202 * requests. These need to be aborted immediately so
1203 * del_gendisk doesn't block indefinitely for their completion.
1204 */
1205 blk_mq_abort_requeue_list(ns->queue);
1206 }
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001207 if (ns->disk->flags & GENHD_FL_UP) {
1208 if (blk_get_integrity(ns->disk))
1209 blk_integrity_unregister(ns->disk);
Keith Busch2b9b6e82015-12-22 10:10:45 -07001210 sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
1211 &nvme_ns_attr_group);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001212 del_gendisk(ns->disk);
1213 }
1214 if (kill || !blk_queue_dying(ns->queue)) {
1215 blk_mq_abort_requeue_list(ns->queue);
1216 blk_cleanup_queue(ns->queue);
1217 }
Keith Busch646017a2016-02-24 09:15:54 -07001218 mutex_lock(&ns->ctrl->namespaces_mutex);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001219 list_del_init(&ns->list);
Keith Busch646017a2016-02-24 09:15:54 -07001220 mutex_unlock(&ns->ctrl->namespaces_mutex);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001221 nvme_put_ns(ns);
1222}
1223
Keith Busch540c8012015-10-22 15:45:06 -06001224static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
1225{
1226 struct nvme_ns *ns;
1227
1228 ns = nvme_find_ns(ctrl, nsid);
1229 if (ns) {
1230 if (revalidate_disk(ns->disk))
1231 nvme_ns_remove(ns);
1232 } else
1233 nvme_alloc_ns(ctrl, nsid);
1234}
1235
1236static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
1237{
1238 struct nvme_ns *ns;
1239 __le32 *ns_list;
1240 unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
1241 int ret = 0;
1242
1243 ns_list = kzalloc(0x1000, GFP_KERNEL);
1244 if (!ns_list)
1245 return -ENOMEM;
1246
1247 for (i = 0; i < num_lists; i++) {
1248 ret = nvme_identify_ns_list(ctrl, prev, ns_list);
1249 if (ret)
1250 goto out;
1251
1252 for (j = 0; j < min(nn, 1024U); j++) {
1253 nsid = le32_to_cpu(ns_list[j]);
1254 if (!nsid)
1255 goto out;
1256
1257 nvme_validate_ns(ctrl, nsid);
1258
1259 while (++prev < nsid) {
1260 ns = nvme_find_ns(ctrl, prev);
1261 if (ns)
1262 nvme_ns_remove(ns);
1263 }
1264 }
1265 nn -= j;
1266 }
1267 out:
1268 kfree(ns_list);
1269 return ret;
1270}
1271
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001272static void __nvme_scan_namespaces(struct nvme_ctrl *ctrl, unsigned nn)
1273{
1274 struct nvme_ns *ns, *next;
1275 unsigned i;
1276
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001277 lockdep_assert_held(&ctrl->namespaces_mutex);
1278
Keith Busch540c8012015-10-22 15:45:06 -06001279 for (i = 1; i <= nn; i++)
1280 nvme_validate_ns(ctrl, i);
1281
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001282 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
1283 if (ns->ns_id > nn)
1284 nvme_ns_remove(ns);
1285 }
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001286}
1287
1288void nvme_scan_namespaces(struct nvme_ctrl *ctrl)
1289{
1290 struct nvme_id_ctrl *id;
Keith Busch540c8012015-10-22 15:45:06 -06001291 unsigned nn;
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001292
1293 if (nvme_identify_ctrl(ctrl, &id))
1294 return;
Keith Busch540c8012015-10-22 15:45:06 -06001295
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001296 mutex_lock(&ctrl->namespaces_mutex);
Keith Busch540c8012015-10-22 15:45:06 -06001297 nn = le32_to_cpu(id->nn);
1298 if (ctrl->vs >= NVME_VS(1, 1) &&
1299 !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
1300 if (!nvme_scan_ns_list(ctrl, nn))
1301 goto done;
1302 }
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001303 __nvme_scan_namespaces(ctrl, le32_to_cpup(&id->nn));
Keith Busch540c8012015-10-22 15:45:06 -06001304 done:
1305 list_sort(NULL, &ctrl->namespaces, ns_cmp);
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001306 mutex_unlock(&ctrl->namespaces_mutex);
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001307 kfree(id);
1308}
1309
1310void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
1311{
1312 struct nvme_ns *ns, *next;
1313
1314 list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
1315 nvme_ns_remove(ns);
1316}
1317
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001318static DEFINE_IDA(nvme_instance_ida);
1319
1320static int nvme_set_instance(struct nvme_ctrl *ctrl)
1321{
1322 int instance, error;
1323
1324 do {
1325 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
1326 return -ENODEV;
1327
1328 spin_lock(&dev_list_lock);
1329 error = ida_get_new(&nvme_instance_ida, &instance);
1330 spin_unlock(&dev_list_lock);
1331 } while (error == -EAGAIN);
1332
1333 if (error)
1334 return -ENODEV;
1335
1336 ctrl->instance = instance;
1337 return 0;
1338}
1339
1340static void nvme_release_instance(struct nvme_ctrl *ctrl)
1341{
1342 spin_lock(&dev_list_lock);
1343 ida_remove(&nvme_instance_ida, ctrl->instance);
1344 spin_unlock(&dev_list_lock);
1345}
1346
Keith Busch53029b02015-11-28 15:41:02 +01001347void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
1348 {
Keith Busch53029b02015-11-28 15:41:02 +01001349 device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001350
1351 spin_lock(&dev_list_lock);
1352 list_del(&ctrl->node);
1353 spin_unlock(&dev_list_lock);
Keith Busch53029b02015-11-28 15:41:02 +01001354}
1355
1356static void nvme_free_ctrl(struct kref *kref)
1357{
1358 struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001359
1360 put_device(ctrl->device);
1361 nvme_release_instance(ctrl);
Keith Busch075790e2016-02-24 09:15:53 -07001362 ida_destroy(&ctrl->ns_ida);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001363
1364 ctrl->ops->free_ctrl(ctrl);
1365}
1366
1367void nvme_put_ctrl(struct nvme_ctrl *ctrl)
1368{
1369 kref_put(&ctrl->kref, nvme_free_ctrl);
1370}
1371
1372/*
1373 * Initialize a NVMe controller structures. This needs to be called during
1374 * earliest initialization so that we have the initialized structured around
1375 * during probing.
1376 */
1377int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
1378 const struct nvme_ctrl_ops *ops, unsigned long quirks)
1379{
1380 int ret;
1381
1382 INIT_LIST_HEAD(&ctrl->namespaces);
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001383 mutex_init(&ctrl->namespaces_mutex);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001384 kref_init(&ctrl->kref);
1385 ctrl->dev = dev;
1386 ctrl->ops = ops;
1387 ctrl->quirks = quirks;
1388
1389 ret = nvme_set_instance(ctrl);
1390 if (ret)
1391 goto out;
1392
Keith Busch779ff7562016-01-12 15:09:31 -07001393 ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001394 MKDEV(nvme_char_major, ctrl->instance),
Keith Busch779ff7562016-01-12 15:09:31 -07001395 dev, nvme_dev_attr_groups,
1396 "nvme%d", ctrl->instance);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001397 if (IS_ERR(ctrl->device)) {
1398 ret = PTR_ERR(ctrl->device);
1399 goto out_release_instance;
1400 }
1401 get_device(ctrl->device);
1402 dev_set_drvdata(ctrl->device, ctrl);
Keith Busch075790e2016-02-24 09:15:53 -07001403 ida_init(&ctrl->ns_ida);
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001404
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001405 spin_lock(&dev_list_lock);
1406 list_add_tail(&ctrl->node, &nvme_ctrl_list);
1407 spin_unlock(&dev_list_lock);
1408
1409 return 0;
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001410out_release_instance:
1411 nvme_release_instance(ctrl);
1412out:
1413 return ret;
1414}
1415
Keith Busch25646262016-01-04 09:10:57 -07001416void nvme_stop_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001417{
1418 struct nvme_ns *ns;
1419
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001420 mutex_lock(&ctrl->namespaces_mutex);
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001421 list_for_each_entry(ns, &ctrl->namespaces, list) {
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001422 spin_lock_irq(ns->queue->queue_lock);
1423 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
1424 spin_unlock_irq(ns->queue->queue_lock);
1425
1426 blk_mq_cancel_requeue_work(ns->queue);
1427 blk_mq_stop_hw_queues(ns->queue);
1428 }
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001429 mutex_unlock(&ctrl->namespaces_mutex);
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001430}
1431
Keith Busch25646262016-01-04 09:10:57 -07001432void nvme_start_queues(struct nvme_ctrl *ctrl)
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001433{
1434 struct nvme_ns *ns;
1435
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001436 mutex_lock(&ctrl->namespaces_mutex);
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001437 list_for_each_entry(ns, &ctrl->namespaces, list) {
1438 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001439 blk_mq_start_stopped_hw_queues(ns->queue, true);
1440 blk_mq_kick_requeue_list(ns->queue);
1441 }
Christoph Hellwig69d3b8a2015-12-24 15:27:00 +01001442 mutex_unlock(&ctrl->namespaces_mutex);
Sagi Grimberg363c9aa2015-12-24 15:26:59 +01001443}
1444
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001445int __init nvme_core_init(void)
1446{
1447 int result;
1448
1449 result = register_blkdev(nvme_major, "nvme");
1450 if (result < 0)
1451 return result;
1452 else if (result > 0)
1453 nvme_major = result;
1454
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001455 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
1456 &nvme_dev_fops);
1457 if (result < 0)
1458 goto unregister_blkdev;
1459 else if (result > 0)
1460 nvme_char_major = result;
1461
1462 nvme_class = class_create(THIS_MODULE, "nvme");
1463 if (IS_ERR(nvme_class)) {
1464 result = PTR_ERR(nvme_class);
1465 goto unregister_chrdev;
1466 }
1467
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001468 return 0;
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001469
1470 unregister_chrdev:
1471 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
1472 unregister_blkdev:
1473 unregister_blkdev(nvme_major, "nvme");
1474 return result;
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001475}
1476
1477void nvme_core_exit(void)
1478{
1479 unregister_blkdev(nvme_major, "nvme");
Christoph Hellwigf3ca80f2015-11-28 15:40:19 +01001480 class_destroy(nvme_class);
1481 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
Christoph Hellwig5bae7f72015-11-28 15:39:07 +01001482}