lightnvm: implement generic path for sync I/O
Implement a generic path for sending sync I/O on LightNVM. This allows
to reuse the standard synchronous path trough blk_execute_rq(), instead
of implementing a wait_for_completion on the target side (e.g., pblk).
Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 0e5f772..fe21f4d 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -720,12 +720,25 @@
}
EXPORT_SYMBOL(nvm_submit_io);
-static void nvm_end_io_sync(struct nvm_rq *rqd)
+int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
{
- struct completion *waiting = rqd->private;
+ struct nvm_dev *dev = tgt_dev->parent;
+ int ret;
- complete(waiting);
+ if (!dev->ops->submit_io_sync)
+ return -ENODEV;
+
+ nvm_rq_tgt_to_dev(tgt_dev, rqd);
+
+ rqd->dev = tgt_dev;
+
+ /* In case of error, fail with right address format */
+ ret = dev->ops->submit_io_sync(dev, rqd);
+ nvm_rq_dev_to_tgt(tgt_dev, rqd);
+
+ return ret;
}
+EXPORT_SYMBOL(nvm_submit_io_sync);
int nvm_erase_sync(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
int nr_ppas)
@@ -733,25 +746,21 @@
struct nvm_geo *geo = &tgt_dev->geo;
struct nvm_rq rqd;
int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
rqd.opcode = NVM_OP_ERASE;
- rqd.end_io = nvm_end_io_sync;
- rqd.private = &wait;
rqd.flags = geo->plane_mode >> 1;
ret = nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
if (ret)
return ret;
- ret = nvm_submit_io(tgt_dev, &rqd);
+ ret = nvm_submit_io_sync(tgt_dev, &rqd);
if (ret) {
pr_err("rrpr: erase I/O submission failed: %d\n", ret);
goto free_ppa_list;
}
- wait_for_completion_io(&wait);
free_ppa_list:
nvm_free_rqd_ppalist(tgt_dev, &rqd);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 4199119..ce90213 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -412,34 +412,11 @@
struct nvm_tgt_dev *dev = pblk->dev;
#ifdef CONFIG_NVM_DEBUG
- struct ppa_addr *ppa_list;
+ int ret;
- ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
- if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
- WARN_ON(1);
- return -EINVAL;
- }
-
- if (rqd->opcode == NVM_OP_PWRITE) {
- struct pblk_line *line;
- struct ppa_addr ppa;
- int i;
-
- for (i = 0; i < rqd->nr_ppas; i++) {
- ppa = ppa_list[i];
- line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
-
- spin_lock(&line->lock);
- if (line->state != PBLK_LINESTATE_OPEN) {
- pr_err("pblk: bad ppa: line:%d,state:%d\n",
- line->id, line->state);
- WARN_ON(1);
- spin_unlock(&line->lock);
- return -EINVAL;
- }
- spin_unlock(&line->lock);
- }
- }
+ ret = pblk_check_io(pblk, rqd);
+ if (ret)
+ return ret;
#endif
atomic_inc(&pblk->inflight_io);
@@ -447,6 +424,23 @@
return nvm_submit_io(dev, rqd);
}
+int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+
+#ifdef CONFIG_NVM_DEBUG
+ int ret;
+
+ ret = pblk_check_io(pblk, rqd);
+ if (ret)
+ return ret;
+#endif
+
+ atomic_inc(&pblk->inflight_io);
+
+ return nvm_submit_io_sync(dev, rqd);
+}
+
static void pblk_bio_map_addr_endio(struct bio *bio)
{
bio_put(bio);
@@ -597,7 +591,6 @@
int cmd_op, bio_op;
int i, j;
int ret;
- DECLARE_COMPLETION_ONSTACK(wait);
if (dir == PBLK_WRITE) {
bio_op = REQ_OP_WRITE;
@@ -639,8 +632,6 @@
rqd.dma_ppa_list = dma_ppa_list;
rqd.opcode = cmd_op;
rqd.nr_ppas = rq_ppas;
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
if (dir == PBLK_WRITE) {
struct pblk_sec_meta *meta_list = rqd.meta_list;
@@ -694,19 +685,14 @@
}
}
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
pr_err("pblk: emeta I/O submission failed: %d\n", ret);
bio_put(bio);
goto free_rqd_dma;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: emeta I/O timed out\n");
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);
if (rqd.error) {
if (dir == PBLK_WRITE)
@@ -750,7 +736,6 @@
int i, ret;
int cmd_op, bio_op;
int flags;
- DECLARE_COMPLETION_ONSTACK(wait);
if (dir == PBLK_WRITE) {
bio_op = REQ_OP_WRITE;
@@ -787,8 +772,6 @@
rqd.opcode = cmd_op;
rqd.flags = flags;
rqd.nr_ppas = lm->smeta_sec;
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
for (i = 0; i < lm->smeta_sec; i++, paddr++) {
struct pblk_sec_meta *meta_list = rqd.meta_list;
@@ -807,17 +790,13 @@
* the write thread is the only one sending write and erase commands,
* there is no need to take the LUN semaphore.
*/
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
pr_err("pblk: smeta I/O submission failed: %d\n", ret);
bio_put(bio);
goto free_ppa_list;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: smeta I/O timed out\n");
- }
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
@@ -861,19 +840,15 @@
{
struct nvm_rq rqd;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
pblk_setup_e_rq(pblk, &rqd, ppa);
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
-
/* The write thread schedules erases so that it minimizes disturbances
* with writes. Thus, there is no need to take the LUN semaphore.
*/
- ret = pblk_submit_io(pblk, &rqd);
+ ret = pblk_submit_io_sync(pblk, &rqd);
if (ret) {
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
@@ -886,11 +861,6 @@
goto out;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: sync erase timed out\n");
- }
-
out:
rqd.private = pblk;
__pblk_end_io_erase(pblk, &rqd);
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 71c5850..ca79d8f 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -207,7 +207,6 @@
int nr_secs = rqd->nr_ppas;
int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs);
int i, ret, hole;
- DECLARE_COMPLETION_ONSTACK(wait);
/* Re-use allocated memory for intermediate lbas */
lba_list_mem = (((void *)rqd->ppa_list) + pblk_dma_ppa_size);
@@ -232,8 +231,6 @@
rqd->bio = new_bio;
rqd->nr_ppas = nr_holes;
rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
if (unlikely(nr_holes == 1)) {
ppa_ptr = rqd->ppa_list;
@@ -241,18 +238,13 @@
rqd->ppa_addr = rqd->ppa_list[0];
}
- ret = pblk_submit_read_io(pblk, rqd);
+ ret = pblk_submit_io_sync(pblk, rqd);
if (ret) {
bio_put(rqd->bio);
- pr_err("pblk: read IO submission failed\n");
+ pr_err("pblk: sync read IO submission failed\n");
goto err;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: partial read I/O timed out\n");
- }
-
if (rqd->error) {
atomic_long_inc(&pblk->read_failed);
#ifdef CONFIG_NVM_DEBUG
@@ -537,7 +529,6 @@
struct nvm_rq rqd;
int data_len;
int ret = NVM_IO_OK;
- DECLARE_COMPLETION_ONSTACK(wait);
memset(&rqd, 0, sizeof(struct nvm_rq));
@@ -577,22 +568,16 @@
bio_set_op_attrs(bio, REQ_OP_READ, 0);
rqd.opcode = NVM_OP_PREAD;
- rqd.end_io = pblk_end_io_sync;
- rqd.private = &wait;
rqd.nr_ppas = gc_rq->secs_to_gc;
rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
rqd.bio = bio;
- if (pblk_submit_read_io(pblk, &rqd)) {
+ if (pblk_submit_io_sync(pblk, &rqd)) {
ret = -EIO;
pr_err("pblk: GC read request failed\n");
goto err_free_bio;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: GC read I/O timed out\n");
- }
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index 9772a94..eadb3eb 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -216,7 +216,6 @@
int rq_ppas, rq_len;
int i, j;
int ret = 0;
- DECLARE_COMPLETION_ONSTACK(wait);
ppa_list = p.ppa_list;
meta_list = p.meta_list;
@@ -253,8 +252,6 @@
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
if (pblk_io_aligned(pblk, rq_ppas))
rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
@@ -280,19 +277,13 @@
}
/* If read fails, more padding is needed */
- ret = pblk_submit_io(pblk, rqd);
+ ret = pblk_submit_io_sync(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
return ret;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery read timed out\n");
- return -EINTR;
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);
/* At this point, the read should not fail. If it does, it is a problem
* we cannot recover from here. Need FTL log.
@@ -504,7 +495,6 @@
int ret = 0;
int rec_round;
int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec;
- DECLARE_COMPLETION_ONSTACK(wait);
ppa_list = p.ppa_list;
meta_list = p.meta_list;
@@ -539,8 +529,6 @@
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
if (pblk_io_aligned(pblk, rq_ppas))
rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
@@ -566,18 +554,13 @@
addr_to_gen_ppa(pblk, w_ptr, line->id);
}
- ret = pblk_submit_io(pblk, rqd);
+ ret = pblk_submit_io_sync(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
return ret;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery read timed out\n");
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);
/* This should not happen since the read failed during normal recovery,
* but the media works funny sometimes...
@@ -645,7 +628,6 @@
int i, j;
int ret = 0;
int left_ppas = pblk_calc_sec_in_line(pblk, line);
- DECLARE_COMPLETION_ONSTACK(wait);
ppa_list = p.ppa_list;
meta_list = p.meta_list;
@@ -678,8 +660,6 @@
rqd->ppa_list = ppa_list;
rqd->dma_ppa_list = dma_ppa_list;
rqd->dma_meta_list = dma_meta_list;
- rqd->end_io = pblk_end_io_sync;
- rqd->private = &wait;
if (pblk_io_aligned(pblk, rq_ppas))
rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
@@ -705,19 +685,14 @@
addr_to_gen_ppa(pblk, paddr, line->id);
}
- ret = pblk_submit_io(pblk, rqd);
+ ret = pblk_submit_io_sync(pblk, rqd);
if (ret) {
pr_err("pblk: I/O submission failed: %d\n", ret);
bio_put(bio);
return ret;
}
- if (!wait_for_completion_io_timeout(&wait,
- msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
- pr_err("pblk: L2P recovery read timed out\n");
- }
atomic_dec(&pblk->inflight_io);
- reinit_completion(&wait);
/* Reached the end of the written line */
if (rqd->error) {
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 6c9ea9a..6b64288 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -714,6 +714,7 @@
void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
unsigned int nr_secs, unsigned int len,
@@ -1203,7 +1204,6 @@
pr_err("error:%d, ppa_status:%llx\n", error, rqd->ppa_status);
}
-#endif
static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
struct ppa_addr *ppas, int nr_ppas)
@@ -1224,14 +1224,50 @@
ppa->g.sec < geo->sec_per_pg)
continue;
-#ifdef CONFIG_NVM_DEBUG
print_ppa(ppa, "boundary", i);
-#endif
+
return 1;
}
return 0;
}
+static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd)
+{
+ struct nvm_tgt_dev *dev = pblk->dev;
+ struct ppa_addr *ppa_list;
+
+ ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+
+ if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (rqd->opcode == NVM_OP_PWRITE) {
+ struct pblk_line *line;
+ struct ppa_addr ppa;
+ int i;
+
+ for (i = 0; i < rqd->nr_ppas; i++) {
+ ppa = ppa_list[i];
+ line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
+
+ spin_lock(&line->lock);
+ if (line->state != PBLK_LINESTATE_OPEN) {
+ pr_err("pblk: bad ppa: line:%d,state:%d\n",
+ line->id, line->state);
+ WARN_ON(1);
+ spin_unlock(&line->lock);
+ return -EINVAL;
+ }
+ spin_unlock(&line->lock);
+ }
+ }
+
+ return 0;
+}
+#endif
+
static inline int pblk_boundary_paddr_checks(struct pblk *pblk, u64 paddr)
{
struct pblk_line_meta *lm = &pblk->lm;
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 6017153..8fc949c 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -492,33 +492,46 @@
blk_mq_free_request(rq);
}
+static struct request *nvme_nvm_alloc_request(struct request_queue *q,
+ struct nvm_rq *rqd,
+ struct nvme_nvm_command *cmd)
+{
+ struct nvme_ns *ns = q->queuedata;
+ struct request *rq;
+
+ nvme_nvm_rqtocmd(rqd, ns, cmd);
+
+ rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
+ if (IS_ERR(rq))
+ return rq;
+
+ rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
+
+ if (rqd->bio) {
+ blk_init_request_from_bio(rq, rqd->bio);
+ } else {
+ rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ rq->__data_len = 0;
+ }
+
+ return rq;
+}
+
static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
{
struct request_queue *q = dev->q;
- struct nvme_ns *ns = q->queuedata;
- struct request *rq;
- struct bio *bio = rqd->bio;
struct nvme_nvm_command *cmd;
+ struct request *rq;
cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
- nvme_nvm_rqtocmd(rqd, ns, cmd);
-
- rq = nvme_alloc_request(q, (struct nvme_command *)cmd, 0, NVME_QID_ANY);
+ rq = nvme_nvm_alloc_request(q, rqd, cmd);
if (IS_ERR(rq)) {
kfree(cmd);
return PTR_ERR(rq);
}
- rq->cmd_flags &= ~REQ_FAILFAST_DRIVER;
-
- if (bio) {
- blk_init_request_from_bio(rq, bio);
- } else {
- rq->ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
- rq->__data_len = 0;
- }
rq->end_io_data = rqd;
@@ -527,6 +540,34 @@
return 0;
}
+static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+ struct request_queue *q = dev->q;
+ struct request *rq;
+ struct nvme_nvm_command cmd;
+ int ret = 0;
+
+ memset(&cmd, 0, sizeof(struct nvme_nvm_command));
+
+ rq = nvme_nvm_alloc_request(q, rqd, &cmd);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ /* I/Os can fail and the error is signaled through rqd. Callers must
+ * handle the error accordingly.
+ */
+ blk_execute_rq(q, NULL, rq, 0);
+ if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
+ ret = -EINTR;
+
+ rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
+ rqd->error = nvme_req(rq)->status;
+
+ blk_mq_free_request(rq);
+
+ return ret;
+}
+
static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name)
{
struct nvme_ns *ns = nvmdev->q->queuedata;
@@ -562,6 +603,7 @@
.set_bb_tbl = nvme_nvm_set_bb_tbl,
.submit_io = nvme_nvm_submit_io,
+ .submit_io_sync = nvme_nvm_submit_io_sync,
.create_dma_pool = nvme_nvm_create_dma_pool,
.destroy_dma_pool = nvme_nvm_destroy_dma_pool,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 4f0e4a0..b7f111f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -56,6 +56,7 @@
typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
typedef void (nvm_destroy_dma_pool_fn)(void *);
typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@ -69,6 +70,7 @@
nvm_op_set_bb_fn *set_bb_tbl;
nvm_submit_io_fn *submit_io;
+ nvm_submit_io_sync_fn *submit_io_sync;
nvm_create_dma_pool_fn *create_dma_pool;
nvm_destroy_dma_pool_fn *destroy_dma_pool;
@@ -477,6 +479,7 @@
int, int);
extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
+extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
void *);