libnvdimm, nfit, nd_blk: driver for BLK-mode access persistent memory
The libnvdimm implementation handles allocating dimm address space (DPA)
between PMEM and BLK mode interfaces. After DPA has been allocated from
a BLK-region to a BLK-namespace the nd_blk driver attaches to handle I/O
as a struct bio based block device. Unlike PMEM, BLK is required to
handle platform specific details like mmio register formats and memory
controller interleave. For this reason the libnvdimm generic nd_blk
driver calls back into the bus provider to carry out the I/O.
This initial implementation handles the BLK interface defined by the
ACPI 6 NFIT [1] and the NVDIMM DSM Interface Example [2] composed from
DCR (dimm control region), BDW (block data window), IDT (interleave
descriptor) NFIT structures and the hardware register format.
[1]: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf
[2]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 204ee07..72226ac 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -34,6 +34,19 @@
Say Y if you want to use an NVDIMM
+config ND_BLK
+ tristate "BLK: Block data window (aperture) device support"
+ default LIBNVDIMM
+ select ND_BTT if BTT
+ help
+ Support NVDIMMs, or other devices, that implement a BLK-mode
+ access capability. BLK-mode access uses memory-mapped-i/o
+ apertures to access persistent media.
+
+ Say Y if your platform firmware emits an ACPI.NFIT table
+ (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
+ capabilities.
+
config ND_BTT
tristate
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index d2aab6c..594bb97 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -1,11 +1,14 @@
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
nd_pmem-y := pmem.o
nd_btt-y := btt.o
+nd_blk-y := blk.o
+
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
new file mode 100644
index 0000000..9ac0c26
--- /dev/null
+++ b/drivers/nvdimm/blk.c
@@ -0,0 +1,245 @@
+/*
+ * NVDIMM Block Window Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/nd.h>
+#include <linux/sizes.h>
+#include "nd.h"
+
+struct nd_blk_device {
+ struct request_queue *queue;
+ struct gendisk *disk;
+ struct nd_namespace_blk *nsblk;
+ struct nd_blk_region *ndbr;
+ size_t disk_size;
+};
+
+static int nd_blk_major;
+
+static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
+ resource_size_t ns_offset, unsigned int len)
+{
+ int i;
+
+ for (i = 0; i < nsblk->num_resources; i++) {
+ if (ns_offset < resource_size(nsblk->res[i])) {
+ if (ns_offset + len > resource_size(nsblk->res[i])) {
+ dev_WARN_ONCE(&nsblk->common.dev, 1,
+ "illegal request\n");
+ return SIZE_MAX;
+ }
+ return nsblk->res[i]->start + ns_offset;
+ }
+ ns_offset -= resource_size(nsblk->res[i]);
+ }
+
+ dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
+ return SIZE_MAX;
+}
+
+static void nd_blk_make_request(struct request_queue *q, struct bio *bio)
+{
+ struct block_device *bdev = bio->bi_bdev;
+ struct gendisk *disk = bdev->bd_disk;
+ struct nd_namespace_blk *nsblk;
+ struct nd_blk_device *blk_dev;
+ struct nd_blk_region *ndbr;
+ struct bvec_iter iter;
+ struct bio_vec bvec;
+ int err = 0, rw;
+
+ blk_dev = disk->private_data;
+ nsblk = blk_dev->nsblk;
+ ndbr = blk_dev->ndbr;
+ rw = bio_data_dir(bio);
+ bio_for_each_segment(bvec, bio, iter) {
+ unsigned int len = bvec.bv_len;
+ resource_size_t dev_offset;
+ void *iobuf;
+
+ BUG_ON(len > PAGE_SIZE);
+
+ dev_offset = to_dev_offset(nsblk,
+ iter.bi_sector << SECTOR_SHIFT, len);
+ if (dev_offset == SIZE_MAX) {
+ err = -EIO;
+ goto out;
+ }
+
+ iobuf = kmap_atomic(bvec.bv_page);
+ err = ndbr->do_io(ndbr, dev_offset, iobuf + bvec.bv_offset,
+ len, rw);
+ kunmap_atomic(iobuf);
+ if (err)
+ goto out;
+ }
+
+ out:
+ bio_endio(bio, err);
+}
+
+static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
+ resource_size_t offset, void *iobuf, size_t n, int rw)
+{
+ struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
+ struct nd_namespace_blk *nsblk = blk_dev->nsblk;
+ struct nd_blk_region *ndbr = blk_dev->ndbr;
+ resource_size_t dev_offset;
+
+ dev_offset = to_dev_offset(nsblk, offset, n);
+
+ if (unlikely(offset + n > blk_dev->disk_size)) {
+ dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
+ return -EFAULT;
+ }
+
+ if (dev_offset == SIZE_MAX)
+ return -EIO;
+
+ return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
+}
+
+static const struct block_device_operations nd_blk_fops = {
+ .owner = THIS_MODULE,
+};
+
+static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
+ struct nd_blk_device *blk_dev)
+{
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
+ struct gendisk *disk;
+
+ blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
+ if (!blk_dev->queue)
+ return -ENOMEM;
+
+ blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
+ blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
+ blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
+ blk_queue_logical_block_size(blk_dev->queue, nsblk->lbasize);
+ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
+
+ disk = blk_dev->disk = alloc_disk(0);
+ if (!disk) {
+ blk_cleanup_queue(blk_dev->queue);
+ return -ENOMEM;
+ }
+
+ disk->driverfs_dev = &ndns->dev;
+ disk->major = nd_blk_major;
+ disk->first_minor = 0;
+ disk->fops = &nd_blk_fops;
+ disk->private_data = blk_dev;
+ disk->queue = blk_dev->queue;
+ disk->flags = GENHD_FL_EXT_DEVT;
+ nvdimm_namespace_disk_name(ndns, disk->disk_name);
+ set_capacity(disk, blk_dev->disk_size >> SECTOR_SHIFT);
+ add_disk(disk);
+
+ return 0;
+}
+
+static int nd_blk_probe(struct device *dev)
+{
+ struct nd_namespace_common *ndns;
+ struct nd_blk_device *blk_dev;
+ int rc;
+
+ ndns = nvdimm_namespace_common_probe(dev);
+ if (IS_ERR(ndns))
+ return PTR_ERR(ndns);
+
+ blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
+ if (!blk_dev)
+ return -ENOMEM;
+
+ blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
+ blk_dev->ndbr = to_nd_blk_region(dev->parent);
+ blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
+ dev_set_drvdata(dev, blk_dev);
+
+ ndns->rw_bytes = nd_blk_rw_bytes;
+ if (is_nd_btt(dev))
+ rc = nvdimm_namespace_attach_btt(ndns);
+ else if (nd_btt_probe(ndns, blk_dev) == 0) {
+ /* we'll come back as btt-blk */
+ rc = -ENXIO;
+ } else
+ rc = nd_blk_attach_disk(ndns, blk_dev);
+ if (rc)
+ kfree(blk_dev);
+ return rc;
+}
+
+static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
+{
+ del_gendisk(blk_dev->disk);
+ put_disk(blk_dev->disk);
+ blk_cleanup_queue(blk_dev->queue);
+}
+
+static int nd_blk_remove(struct device *dev)
+{
+ struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
+
+ if (is_nd_btt(dev))
+ nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+ else
+ nd_blk_detach_disk(blk_dev);
+ kfree(blk_dev);
+
+ return 0;
+}
+
+static struct nd_device_driver nd_blk_driver = {
+ .probe = nd_blk_probe,
+ .remove = nd_blk_remove,
+ .drv = {
+ .name = "nd_blk",
+ },
+ .type = ND_DRIVER_NAMESPACE_BLK,
+};
+
+static int __init nd_blk_init(void)
+{
+ int rc;
+
+ rc = register_blkdev(0, "nd_blk");
+ if (rc < 0)
+ return rc;
+
+ nd_blk_major = rc;
+ rc = nd_driver_register(&nd_blk_driver);
+
+ if (rc < 0)
+ unregister_blkdev(nd_blk_major, "nd_blk");
+
+ return rc;
+}
+
+static void __exit nd_blk_exit(void)
+{
+ driver_unregister(&nd_blk_driver.drv);
+ unregister_blkdev(nd_blk_major, "nd_blk");
+}
+
+MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
+module_init(nd_blk_init);
+module_exit(nd_blk_exit);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 83b179e..c05eb80 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -209,6 +209,15 @@
}
EXPORT_SYMBOL_GPL(to_nvdimm);
+struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
+{
+ struct nd_region *nd_region = &ndbr->nd_region;
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+
+ return nd_mapping->nvdimm;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
+
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
{
struct nvdimm *nvdimm = nd_mapping->nvdimm;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4aa647c..1ce1e70 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -173,6 +173,65 @@
return size;
}
+static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+ struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_label_id label_id;
+ struct resource *res;
+ int count, i;
+
+ if (!nsblk->uuid || !nsblk->lbasize || !ndd)
+ return false;
+
+ count = 0;
+ nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
+ for_each_dpa_resource(ndd, res) {
+ if (strcmp(res->name, label_id.id) != 0)
+ continue;
+ /*
+ * Resources with unacknoweldged adjustments indicate a
+ * failure to update labels
+ */
+ if (res->flags & DPA_RESOURCE_ADJUSTED)
+ return false;
+ count++;
+ }
+
+ /* These values match after a successful label update */
+ if (count != nsblk->num_resources)
+ return false;
+
+ for (i = 0; i < nsblk->num_resources; i++) {
+ struct resource *found = NULL;
+
+ for_each_dpa_resource(ndd, res)
+ if (res == nsblk->res[i]) {
+ found = res;
+ break;
+ }
+ /* stale resource */
+ if (!found)
+ return false;
+ }
+
+ return true;
+}
+
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
+{
+ resource_size_t size;
+
+ nvdimm_bus_lock(&nsblk->common.dev);
+ size = __nd_namespace_blk_validate(nsblk);
+ nvdimm_bus_unlock(&nsblk->common.dev);
+
+ return size;
+}
+EXPORT_SYMBOL(nd_namespace_blk_validate);
+
+
static int nd_namespace_label_update(struct nd_region *nd_region,
struct device *dev)
{
@@ -1224,7 +1283,11 @@
return ERR_PTR(-ENODEV);
}
} else if (is_namespace_blk(&ndns->dev)) {
- return ERR_PTR(-ENODEV); /* TODO */
+ struct nd_namespace_blk *nsblk;
+
+ nsblk = to_nd_namespace_blk(&ndns->dev);
+ if (!nd_namespace_blk_validate(nsblk))
+ return ERR_PTR(-ENODEV);
}
return ndns;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 5e64139..e1970c7 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -43,9 +43,8 @@
};
bool is_nvdimm(struct device *dev);
-bool is_nd_blk(struct device *dev);
bool is_nd_pmem(struct device *dev);
-struct nd_btt;
+bool is_nd_blk(struct device *dev);
struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
int __init nvdimm_bus_init(void);
void nvdimm_bus_exit(void);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 1b937c23..f153f43 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -102,6 +102,15 @@
struct nd_mapping mapping[0];
};
+struct nd_blk_region {
+ int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+ void (*disable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
+ int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
+ void *iobuf, u64 len, int rw);
+ void *blk_provider_data;
+ struct nd_region nd_region;
+};
+
/*
* Lookup next in the repeating sequence of 01, 10, and 11.
*/
@@ -171,8 +180,6 @@
#endif
struct nd_region *to_nd_region(struct device *dev);
-unsigned int nd_region_acquire_lane(struct nd_region *nd_region);
-void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
int nd_region_to_nstype(struct nd_region *nd_region);
int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
@@ -192,4 +199,6 @@
int nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name);
+int nd_blk_region_init(struct nd_region *nd_region);
+resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
#endif /* __ND_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index eb8aebc..f28f78c 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -18,11 +18,10 @@
static int nd_region_probe(struct device *dev)
{
- int err;
+ int err, rc;
static unsigned long once;
struct nd_region_namespaces *num_ns;
struct nd_region *nd_region = to_nd_region(dev);
- int rc = nd_region_register_namespaces(nd_region, &err);
if (nd_region->num_lanes > num_online_cpus()
&& nd_region->num_lanes < num_possible_cpus()
@@ -34,6 +33,11 @@
nd_region->num_lanes);
}
+ rc = nd_blk_region_init(nd_region);
+ if (rc)
+ return rc;
+
+ rc = nd_region_register_namespaces(nd_region, &err);
num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
if (!num_ns)
return -ENOMEM;
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index fe8ec21..2cfb3f7 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -11,6 +11,7 @@
* General Public License for more details.
*/
#include <linux/scatterlist.h>
+#include <linux/highmem.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sort.h>
@@ -34,7 +35,10 @@
}
free_percpu(nd_region->lane);
ida_simple_remove(®ion_ida, nd_region->id);
- kfree(nd_region);
+ if (is_nd_blk(dev))
+ kfree(to_nd_blk_region(dev));
+ else
+ kfree(nd_region);
}
static struct device_type nd_blk_device_type = {
@@ -71,6 +75,33 @@
}
EXPORT_SYMBOL_GPL(to_nd_region);
+struct nd_blk_region *to_nd_blk_region(struct device *dev)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ WARN_ON(!is_nd_blk(dev));
+ return container_of(nd_region, struct nd_blk_region, nd_region);
+}
+EXPORT_SYMBOL_GPL(to_nd_blk_region);
+
+void *nd_region_provider_data(struct nd_region *nd_region)
+{
+ return nd_region->provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_region_provider_data);
+
+void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
+{
+ return ndbr->blk_provider_data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
+
+void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
+{
+ ndbr->blk_provider_data = data;
+}
+EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
+
/**
* nd_region_to_nstype() - region to an integer namespace type
* @nd_region: region-device to interrogate
@@ -365,7 +396,8 @@
/*
* Upon successful probe/remove, take/release a reference on the
* associated interleave set (if present), and plant new btt + namespace
- * seeds.
+ * seeds. Also, on the removal of a BLK region, notify the provider to
+ * disable the region.
*/
static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
struct device *dev, bool probe)
@@ -385,8 +417,14 @@
nd_mapping->labels = NULL;
put_ndd(ndd);
nd_mapping->ndd = NULL;
- atomic_dec(&nvdimm->busy);
+ if (ndd)
+ atomic_dec(&nvdimm->busy);
}
+
+ if (is_nd_pmem(dev))
+ return;
+
+ to_nd_blk_region(dev)->disable(nvdimm_bus, dev);
}
if (dev->parent && is_nd_blk(dev->parent) && probe) {
nd_region = to_nd_region(dev->parent);
@@ -526,11 +564,21 @@
};
EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);
-void *nd_region_provider_data(struct nd_region *nd_region)
+int nd_blk_region_init(struct nd_region *nd_region)
{
- return nd_region->provider_data;
+ struct device *dev = &nd_region->dev;
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+ if (!is_nd_blk(dev))
+ return 0;
+
+ if (nd_region->ndr_mappings < 1) {
+ dev_err(dev, "invalid BLK region\n");
+ return -ENXIO;
+ }
+
+ return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
}
-EXPORT_SYMBOL_GPL(nd_region_provider_data);
/**
* nd_region_acquire_lane - allocate and lock a lane
@@ -591,6 +639,7 @@
{
struct nd_region *nd_region;
struct device *dev;
+ void *region_buf;
unsigned int i;
for (i = 0; i < ndr_desc->num_mappings; i++) {
@@ -605,10 +654,30 @@
}
}
- nd_region = kzalloc(sizeof(struct nd_region)
- + sizeof(struct nd_mapping) * ndr_desc->num_mappings,
- GFP_KERNEL);
- if (!nd_region)
+ if (dev_type == &nd_blk_device_type) {
+ struct nd_blk_region_desc *ndbr_desc;
+ struct nd_blk_region *ndbr;
+
+ ndbr_desc = to_blk_region_desc(ndr_desc);
+ ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
+ * ndr_desc->num_mappings,
+ GFP_KERNEL);
+ if (ndbr) {
+ nd_region = &ndbr->nd_region;
+ ndbr->enable = ndbr_desc->enable;
+ ndbr->disable = ndbr_desc->disable;
+ ndbr->do_io = ndbr_desc->do_io;
+ }
+ region_buf = ndbr;
+ } else {
+ nd_region = kzalloc(sizeof(struct nd_region)
+ + sizeof(struct nd_mapping)
+ * ndr_desc->num_mappings,
+ GFP_KERNEL);
+ region_buf = nd_region;
+ }
+
+ if (!region_buf)
return NULL;
nd_region->id = ida_simple_get(®ion_ida, 0, 0, GFP_KERNEL);
if (nd_region->id < 0)
@@ -654,7 +723,7 @@
err_percpu:
ida_simple_remove(®ion_ida, nd_region->id);
err_id:
- kfree(nd_region);
+ kfree(region_buf);
return NULL;
}