blob: 1a58542ee8fd15f281de621f8a7f5daa1057acaf [file] [log] [blame]
Dan Williams7b6be842017-04-11 09:49:49 -07001/*
2 * Copyright(c) 2017 Intel Corporation. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 */
13#include <linux/pagemap.h>
14#include <linux/module.h>
15#include <linux/mount.h>
16#include <linux/magic.h>
17#include <linux/cdev.h>
18#include <linux/hash.h>
19#include <linux/slab.h>
Dan Williams6568b082017-01-24 18:44:18 -080020#include <linux/dax.h>
Dan Williams7b6be842017-04-11 09:49:49 -070021#include <linux/fs.h>
22
23static int nr_dax = CONFIG_NR_DEV_DAX;
24module_param(nr_dax, int, S_IRUGO);
25MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
26
27static dev_t dax_devt;
28DEFINE_STATIC_SRCU(dax_srcu);
29static struct vfsmount *dax_mnt;
30static DEFINE_IDA(dax_minor_ida);
31static struct kmem_cache *dax_cache __read_mostly;
32static struct super_block *dax_superblock __read_mostly;
33
Dan Williams72058002017-04-19 15:14:31 -070034#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
35static struct hlist_head dax_host_list[DAX_HASH_SIZE];
36static DEFINE_SPINLOCK(dax_host_lock);
37
Dan Williams7b6be842017-04-11 09:49:49 -070038int dax_read_lock(void)
39{
40 return srcu_read_lock(&dax_srcu);
41}
42EXPORT_SYMBOL_GPL(dax_read_lock);
43
44void dax_read_unlock(int id)
45{
46 srcu_read_unlock(&dax_srcu, id);
47}
48EXPORT_SYMBOL_GPL(dax_read_unlock);
49
50/**
51 * struct dax_device - anchor object for dax services
52 * @inode: core vfs
53 * @cdev: optional character interface for "device dax"
Dan Williams72058002017-04-19 15:14:31 -070054 * @host: optional name for lookups where the device path is not available
Dan Williams7b6be842017-04-11 09:49:49 -070055 * @private: dax driver private data
56 * @alive: !alive + rcu grace period == no new operations / mappings
57 */
58struct dax_device {
Dan Williams72058002017-04-19 15:14:31 -070059 struct hlist_node list;
Dan Williams7b6be842017-04-11 09:49:49 -070060 struct inode inode;
61 struct cdev cdev;
Dan Williams72058002017-04-19 15:14:31 -070062 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -070063 void *private;
64 bool alive;
Dan Williams6568b082017-01-24 18:44:18 -080065 const struct dax_operations *ops;
Dan Williams7b6be842017-04-11 09:49:49 -070066};
67
68bool dax_alive(struct dax_device *dax_dev)
69{
70 lockdep_assert_held(&dax_srcu);
71 return dax_dev->alive;
72}
73EXPORT_SYMBOL_GPL(dax_alive);
74
Dan Williams72058002017-04-19 15:14:31 -070075static int dax_host_hash(const char *host)
76{
77 return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
78}
79
Dan Williams7b6be842017-04-11 09:49:49 -070080/*
81 * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
82 * that any fault handlers or operations that might have seen
83 * dax_alive(), have completed. Any operations that start after
84 * synchronize_srcu() has run will abort upon seeing !dax_alive().
85 */
86void kill_dax(struct dax_device *dax_dev)
87{
88 if (!dax_dev)
89 return;
90
91 dax_dev->alive = false;
Dan Williams72058002017-04-19 15:14:31 -070092
Dan Williams7b6be842017-04-11 09:49:49 -070093 synchronize_srcu(&dax_srcu);
Dan Williams72058002017-04-19 15:14:31 -070094
95 spin_lock(&dax_host_lock);
96 hlist_del_init(&dax_dev->list);
97 spin_unlock(&dax_host_lock);
98
Dan Williams7b6be842017-04-11 09:49:49 -070099 dax_dev->private = NULL;
100}
101EXPORT_SYMBOL_GPL(kill_dax);
102
103static struct inode *dax_alloc_inode(struct super_block *sb)
104{
105 struct dax_device *dax_dev;
106
107 dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
108 return &dax_dev->inode;
109}
110
111static struct dax_device *to_dax_dev(struct inode *inode)
112{
113 return container_of(inode, struct dax_device, inode);
114}
115
116static void dax_i_callback(struct rcu_head *head)
117{
118 struct inode *inode = container_of(head, struct inode, i_rcu);
119 struct dax_device *dax_dev = to_dax_dev(inode);
120
Dan Williams72058002017-04-19 15:14:31 -0700121 kfree(dax_dev->host);
122 dax_dev->host = NULL;
Dan Williams7b6be842017-04-11 09:49:49 -0700123 ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
124 kmem_cache_free(dax_cache, dax_dev);
125}
126
127static void dax_destroy_inode(struct inode *inode)
128{
129 struct dax_device *dax_dev = to_dax_dev(inode);
130
131 WARN_ONCE(dax_dev->alive,
132 "kill_dax() must be called before final iput()\n");
133 call_rcu(&inode->i_rcu, dax_i_callback);
134}
135
136static const struct super_operations dax_sops = {
137 .statfs = simple_statfs,
138 .alloc_inode = dax_alloc_inode,
139 .destroy_inode = dax_destroy_inode,
140 .drop_inode = generic_delete_inode,
141};
142
143static struct dentry *dax_mount(struct file_system_type *fs_type,
144 int flags, const char *dev_name, void *data)
145{
146 return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
147}
148
149static struct file_system_type dax_fs_type = {
150 .name = "dax",
151 .mount = dax_mount,
152 .kill_sb = kill_anon_super,
153};
154
155static int dax_test(struct inode *inode, void *data)
156{
157 dev_t devt = *(dev_t *) data;
158
159 return inode->i_rdev == devt;
160}
161
162static int dax_set(struct inode *inode, void *data)
163{
164 dev_t devt = *(dev_t *) data;
165
166 inode->i_rdev = devt;
167 return 0;
168}
169
170static struct dax_device *dax_dev_get(dev_t devt)
171{
172 struct dax_device *dax_dev;
173 struct inode *inode;
174
175 inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
176 dax_test, dax_set, &devt);
177
178 if (!inode)
179 return NULL;
180
181 dax_dev = to_dax_dev(inode);
182 if (inode->i_state & I_NEW) {
183 dax_dev->alive = true;
184 inode->i_cdev = &dax_dev->cdev;
185 inode->i_mode = S_IFCHR;
186 inode->i_flags = S_DAX;
187 mapping_set_gfp_mask(&inode->i_data, GFP_USER);
188 unlock_new_inode(inode);
189 }
190
191 return dax_dev;
192}
193
Dan Williams72058002017-04-19 15:14:31 -0700194static void dax_add_host(struct dax_device *dax_dev, const char *host)
195{
196 int hash;
197
198 /*
199 * Unconditionally init dax_dev since it's coming from a
200 * non-zeroed slab cache
201 */
202 INIT_HLIST_NODE(&dax_dev->list);
203 dax_dev->host = host;
204 if (!host)
205 return;
206
207 hash = dax_host_hash(host);
208 spin_lock(&dax_host_lock);
209 hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
210 spin_unlock(&dax_host_lock);
211}
212
Dan Williams6568b082017-01-24 18:44:18 -0800213struct dax_device *alloc_dax(void *private, const char *__host,
214 const struct dax_operations *ops)
Dan Williams7b6be842017-04-11 09:49:49 -0700215{
216 struct dax_device *dax_dev;
Dan Williams72058002017-04-19 15:14:31 -0700217 const char *host;
Dan Williams7b6be842017-04-11 09:49:49 -0700218 dev_t devt;
219 int minor;
220
Dan Williams72058002017-04-19 15:14:31 -0700221 host = kstrdup(__host, GFP_KERNEL);
222 if (__host && !host)
223 return NULL;
224
Dan Williams7b6be842017-04-11 09:49:49 -0700225 minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
226 if (minor < 0)
Dan Williams72058002017-04-19 15:14:31 -0700227 goto err_minor;
Dan Williams7b6be842017-04-11 09:49:49 -0700228
229 devt = MKDEV(MAJOR(dax_devt), minor);
230 dax_dev = dax_dev_get(devt);
231 if (!dax_dev)
Dan Williams72058002017-04-19 15:14:31 -0700232 goto err_dev;
Dan Williams7b6be842017-04-11 09:49:49 -0700233
Dan Williams72058002017-04-19 15:14:31 -0700234 dax_add_host(dax_dev, host);
Dan Williams6568b082017-01-24 18:44:18 -0800235 dax_dev->ops = ops;
Dan Williams7b6be842017-04-11 09:49:49 -0700236 dax_dev->private = private;
237 return dax_dev;
238
Dan Williams72058002017-04-19 15:14:31 -0700239 err_dev:
Dan Williams7b6be842017-04-11 09:49:49 -0700240 ida_simple_remove(&dax_minor_ida, minor);
Dan Williams72058002017-04-19 15:14:31 -0700241 err_minor:
242 kfree(host);
Dan Williams7b6be842017-04-11 09:49:49 -0700243 return NULL;
244}
245EXPORT_SYMBOL_GPL(alloc_dax);
246
247void put_dax(struct dax_device *dax_dev)
248{
249 if (!dax_dev)
250 return;
251 iput(&dax_dev->inode);
252}
253EXPORT_SYMBOL_GPL(put_dax);
254
255/**
Dan Williams72058002017-04-19 15:14:31 -0700256 * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
257 * @host: alternate name for the device registered by a dax driver
258 */
259struct dax_device *dax_get_by_host(const char *host)
260{
261 struct dax_device *dax_dev, *found = NULL;
262 int hash, id;
263
264 if (!host)
265 return NULL;
266
267 hash = dax_host_hash(host);
268
269 id = dax_read_lock();
270 spin_lock(&dax_host_lock);
271 hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
272 if (!dax_alive(dax_dev)
273 || strcmp(host, dax_dev->host) != 0)
274 continue;
275
276 if (igrab(&dax_dev->inode))
277 found = dax_dev;
278 break;
279 }
280 spin_unlock(&dax_host_lock);
281 dax_read_unlock(id);
282
283 return found;
284}
285EXPORT_SYMBOL_GPL(dax_get_by_host);
286
287/**
Dan Williams7b6be842017-04-11 09:49:49 -0700288 * inode_dax: convert a public inode into its dax_dev
289 * @inode: An inode with i_cdev pointing to a dax_dev
290 *
291 * Note this is not equivalent to to_dax_dev() which is for private
292 * internal use where we know the inode filesystem type == dax_fs_type.
293 */
294struct dax_device *inode_dax(struct inode *inode)
295{
296 struct cdev *cdev = inode->i_cdev;
297
298 return container_of(cdev, struct dax_device, cdev);
299}
300EXPORT_SYMBOL_GPL(inode_dax);
301
302struct inode *dax_inode(struct dax_device *dax_dev)
303{
304 return &dax_dev->inode;
305}
306EXPORT_SYMBOL_GPL(dax_inode);
307
308void *dax_get_private(struct dax_device *dax_dev)
309{
310 return dax_dev->private;
311}
312EXPORT_SYMBOL_GPL(dax_get_private);
313
314static void init_once(void *_dax_dev)
315{
316 struct dax_device *dax_dev = _dax_dev;
317 struct inode *inode = &dax_dev->inode;
318
319 inode_init_once(inode);
320}
321
322static int __dax_fs_init(void)
323{
324 int rc;
325
326 dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
327 (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
328 SLAB_MEM_SPREAD|SLAB_ACCOUNT),
329 init_once);
330 if (!dax_cache)
331 return -ENOMEM;
332
333 rc = register_filesystem(&dax_fs_type);
334 if (rc)
335 goto err_register_fs;
336
337 dax_mnt = kern_mount(&dax_fs_type);
338 if (IS_ERR(dax_mnt)) {
339 rc = PTR_ERR(dax_mnt);
340 goto err_mount;
341 }
342 dax_superblock = dax_mnt->mnt_sb;
343
344 return 0;
345
346 err_mount:
347 unregister_filesystem(&dax_fs_type);
348 err_register_fs:
349 kmem_cache_destroy(dax_cache);
350
351 return rc;
352}
353
354static void __dax_fs_exit(void)
355{
356 kern_unmount(dax_mnt);
357 unregister_filesystem(&dax_fs_type);
358 kmem_cache_destroy(dax_cache);
359}
360
361static int __init dax_fs_init(void)
362{
363 int rc;
364
365 rc = __dax_fs_init();
366 if (rc)
367 return rc;
368
369 nr_dax = max(nr_dax, 256);
370 rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
371 if (rc)
372 __dax_fs_exit();
373 return rc;
374}
375
376static void __exit dax_fs_exit(void)
377{
378 unregister_chrdev_region(dax_devt, nr_dax);
379 ida_destroy(&dax_minor_ida);
380 __dax_fs_exit();
381}
382
383MODULE_AUTHOR("Intel Corporation");
384MODULE_LICENSE("GPL v2");
385subsys_initcall(dax_fs_init);
386module_exit(dax_fs_exit);