blob: 7044ef76a19d52a1ccab6c23255092144d4f63f9 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Network block device - make block devices work over TCP
3 *
4 * Note that you can not swap over this thing, yet. Seems to work but
5 * deadlocks sometimes - you can not swap over TCP in general.
6 *
Pavel Macheka2531292010-07-18 14:27:13 +02007 * Copyright 1997-2000, 2008 Pavel Machek <pavel@ucw.cz>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Parts copyright 2001 Steven Whitehouse <steve@chygwyn.com>
9 *
Pavel Machekdbf492d2006-06-25 05:47:42 -070010 * This file is released under GPLv2 or later.
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 *
Pavel Machekdbf492d2006-06-25 05:47:42 -070012 * (part of code stolen from loop.c)
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 */
14
15#include <linux/major.h>
16
17#include <linux/blkdev.h>
18#include <linux/module.h>
19#include <linux/init.h>
20#include <linux/sched.h>
21#include <linux/fs.h>
22#include <linux/bio.h>
23#include <linux/stat.h>
24#include <linux/errno.h>
25#include <linux/file.h>
26#include <linux/ioctl.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020027#include <linux/mutex.h>
Herbert Xu4b2f0262006-01-06 00:09:47 -080028#include <linux/compiler.h>
29#include <linux/err.h>
30#include <linux/kernel.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <net/sock.h>
Trond Myklebust91cf45f2007-11-12 18:10:39 -080033#include <linux/net.h>
Laurent Vivier48cf6062008-04-29 01:02:46 -070034#include <linux/kthread.h>
Markus Pargmannb9c495b2015-04-02 10:11:37 +020035#include <linux/types.h>
Markus Pargmann30d53d92015-08-17 08:20:06 +020036#include <linux/debugfs.h>
Josef Bacikfd8383f2016-09-08 12:33:37 -070037#include <linux/blk-mq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080039#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <asm/types.h>
41
42#include <linux/nbd.h>
43
Josef Bacikb0d91112017-02-01 16:11:40 -050044static DEFINE_IDR(nbd_index_idr);
45static DEFINE_MUTEX(nbd_index_mutex);
46
Josef Bacik9561a7a2016-11-22 14:04:40 -050047struct nbd_sock {
48 struct socket *sock;
49 struct mutex tx_lock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -040050 struct request *pending;
51 int sent;
Josef Bacikf3733242017-04-06 17:01:57 -040052 bool dead;
53 int fallback_index;
Josef Bacik9561a7a2016-11-22 14:04:40 -050054};
55
Josef Bacik9b4a6ba2016-09-08 12:33:39 -070056#define NBD_TIMEDOUT 0
57#define NBD_DISCONNECT_REQUESTED 1
Josef Bacik9561a7a2016-11-22 14:04:40 -050058#define NBD_DISCONNECTED 2
59#define NBD_RUNNING 3
Josef Bacik9b4a6ba2016-09-08 12:33:39 -070060
Markus Pargmann13e71d62015-04-02 10:11:35 +020061struct nbd_device {
Markus Pargmann22d109c2015-08-17 08:20:09 +020062 u32 flags;
Josef Bacik9b4a6ba2016-09-08 12:33:39 -070063 unsigned long runtime_flags;
Josef Bacik9561a7a2016-11-22 14:04:40 -050064 struct nbd_sock **socks;
Markus Pargmann13e71d62015-04-02 10:11:35 +020065 int magic;
66
Josef Bacikfd8383f2016-09-08 12:33:37 -070067 struct blk_mq_tag_set tag_set;
Markus Pargmann13e71d62015-04-02 10:11:35 +020068
Josef Bacik9561a7a2016-11-22 14:04:40 -050069 struct mutex config_lock;
Markus Pargmann13e71d62015-04-02 10:11:35 +020070 struct gendisk *disk;
Josef Bacik9561a7a2016-11-22 14:04:40 -050071 int num_connections;
72 atomic_t recv_threads;
73 wait_queue_head_t recv_wq;
Josef Bacikef77b512016-12-02 16:19:12 -050074 loff_t blksize;
Markus Pargmannb9c495b2015-04-02 10:11:37 +020075 loff_t bytesize;
Markus Pargmann7e2893a2015-08-17 08:20:00 +020076
Markus Pargmann7e2893a2015-08-17 08:20:00 +020077 struct task_struct *task_recv;
Josef Bacik9561a7a2016-11-22 14:04:40 -050078 struct task_struct *task_setup;
Markus Pargmann30d53d92015-08-17 08:20:06 +020079
80#if IS_ENABLED(CONFIG_DEBUG_FS)
81 struct dentry *dbg_dir;
82#endif
Markus Pargmann13e71d62015-04-02 10:11:35 +020083};
84
Josef Bacikfd8383f2016-09-08 12:33:37 -070085struct nbd_cmd {
86 struct nbd_device *nbd;
Josef Bacikf3733242017-04-06 17:01:57 -040087 int index;
Josef Bacik9561a7a2016-11-22 14:04:40 -050088 struct completion send_complete;
Josef Bacikfd8383f2016-09-08 12:33:37 -070089};
90
Markus Pargmann30d53d92015-08-17 08:20:06 +020091#if IS_ENABLED(CONFIG_DEBUG_FS)
92static struct dentry *nbd_dbg_dir;
93#endif
94
95#define nbd_name(nbd) ((nbd)->disk->disk_name)
96
Wanlong Gaof4507162012-03-28 14:42:51 -070097#define NBD_MAGIC 0x68797548
Linus Torvalds1da177e2005-04-16 15:20:36 -070098
Ingo van Lil9c7a4162006-07-01 04:36:36 -070099static unsigned int nbds_max = 16;
Laurent Vivierd71a6d72008-04-29 01:02:51 -0700100static int max_part;
Josef Bacik124d6db2017-02-01 16:11:11 -0500101static struct workqueue_struct *recv_workqueue;
Josef Bacikb0d91112017-02-01 16:11:40 -0500102static int part_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103
Josef Bacik9442b732017-02-07 17:10:22 -0500104static int nbd_dev_dbg_init(struct nbd_device *nbd);
105static void nbd_dev_dbg_close(struct nbd_device *nbd);
106
107
Markus Pargmannd18509f2015-04-02 10:11:38 +0200108static inline struct device *nbd_to_dev(struct nbd_device *nbd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109{
Markus Pargmannd18509f2015-04-02 10:11:38 +0200110 return disk_to_dev(nbd->disk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Markus Pargmann37091fd2015-07-27 07:36:49 +0200113static bool nbd_is_connected(struct nbd_device *nbd)
114{
115 return !!nbd->task_recv;
116}
117
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118static const char *nbdcmd_to_ascii(int cmd)
119{
120 switch (cmd) {
121 case NBD_CMD_READ: return "read";
122 case NBD_CMD_WRITE: return "write";
123 case NBD_CMD_DISC: return "disconnect";
Alex Bligh75f187a2013-02-27 17:05:23 -0800124 case NBD_CMD_FLUSH: return "flush";
Paul Clementsa336d292012-10-04 17:16:18 -0700125 case NBD_CMD_TRIM: return "trim/discard";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 }
127 return "invalid";
128}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Josef Bacikf3733242017-04-06 17:01:57 -0400130static void nbd_mark_nsock_dead(struct nbd_sock *nsock)
131{
132 if (!nsock->dead)
133 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
134 nsock->dead = true;
135 nsock->pending = NULL;
136 nsock->sent = 0;
137}
138
Markus Pargmann37091fd2015-07-27 07:36:49 +0200139static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
140{
Ratna Manoj Bollaabbbdf12017-03-24 14:08:29 -0400141 if (bdev->bd_openers <= 1)
142 bd_set_size(bdev, 0);
Markus Pargmann37091fd2015-07-27 07:36:49 +0200143 set_capacity(nbd->disk, 0);
144 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
145
146 return 0;
147}
148
149static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
150{
Josef Bacike5445412017-02-13 10:39:47 -0500151 blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
152 blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
153 bd_set_size(bdev, nbd->bytesize);
Markus Pargmann37091fd2015-07-27 07:36:49 +0200154 set_capacity(nbd->disk, nbd->bytesize >> 9);
155 kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
156}
157
Josef Bacike5445412017-02-13 10:39:47 -0500158static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
Josef Bacikef77b512016-12-02 16:19:12 -0500159 loff_t blocksize, loff_t nr_blocks)
Markus Pargmann37091fd2015-07-27 07:36:49 +0200160{
Markus Pargmann37091fd2015-07-27 07:36:49 +0200161 nbd->blksize = blocksize;
Josef Bacikef77b512016-12-02 16:19:12 -0500162 nbd->bytesize = blocksize * nr_blocks;
Josef Bacike5445412017-02-13 10:39:47 -0500163 if (nbd_is_connected(nbd))
164 nbd_size_update(nbd, bdev);
Markus Pargmann37091fd2015-07-27 07:36:49 +0200165}
166
Josef Bacikfd8383f2016-09-08 12:33:37 -0700167static void nbd_end_request(struct nbd_cmd *cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700169 struct nbd_device *nbd = cmd->nbd;
170 struct request *req = blk_mq_rq_from_pdu(cmd);
Kiyoshi Ueda097c94a2007-12-11 17:44:06 -0500171 int error = req->errors ? -EIO : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Josef Bacikfd8383f2016-09-08 12:33:37 -0700173 dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
Markus Pargmannd18509f2015-04-02 10:11:38 +0200174 error ? "failed" : "done");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175
Josef Bacikfd8383f2016-09-08 12:33:37 -0700176 blk_mq_complete_request(req, error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
Markus Pargmanne018e752015-04-02 10:11:39 +0200179/*
180 * Forcibly shutdown the socket causing all listeners to error
181 */
Markus Pargmann36e47be2015-08-17 08:20:01 +0200182static void sock_shutdown(struct nbd_device *nbd)
Paul Clements7fdfd402007-10-16 23:27:37 -0700183{
Josef Bacik9561a7a2016-11-22 14:04:40 -0500184 int i;
Josef Bacikc2611892016-09-08 12:33:38 -0700185
Josef Bacik9561a7a2016-11-22 14:04:40 -0500186 if (nbd->num_connections == 0)
Markus Pargmann260bbce2015-08-17 08:20:02 +0200187 return;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500188 if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
189 return;
190
191 for (i = 0; i < nbd->num_connections; i++) {
192 struct nbd_sock *nsock = nbd->socks[i];
193 mutex_lock(&nsock->tx_lock);
194 kernel_sock_shutdown(nsock->sock, SHUT_RDWR);
195 mutex_unlock(&nsock->tx_lock);
Markus Pargmann23272a672015-10-29 11:51:16 +0100196 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500197 dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n");
Paul Clements7fdfd402007-10-16 23:27:37 -0700198}
199
Josef Bacik0eadf372016-09-08 12:33:40 -0700200static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
201 bool reserved)
Paul Clements7fdfd402007-10-16 23:27:37 -0700202{
Josef Bacik0eadf372016-09-08 12:33:40 -0700203 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
204 struct nbd_device *nbd = cmd->nbd;
Paul Clements7fdfd402007-10-16 23:27:37 -0700205
Josef Bacikf3733242017-04-06 17:01:57 -0400206 if (nbd->num_connections > 1) {
207 dev_err_ratelimited(nbd_to_dev(nbd),
208 "Connection timed out, retrying\n");
209 mutex_lock(&nbd->config_lock);
210 /*
211 * Hooray we have more connections, requeue this IO, the submit
212 * path will put it on a real connection.
213 */
214 if (nbd->socks && nbd->num_connections > 1) {
215 if (cmd->index < nbd->num_connections) {
216 struct nbd_sock *nsock =
217 nbd->socks[cmd->index];
218 mutex_lock(&nsock->tx_lock);
219 nbd_mark_nsock_dead(nsock);
220 mutex_unlock(&nsock->tx_lock);
221 }
222 mutex_unlock(&nbd->config_lock);
223 blk_mq_requeue_request(req, true);
224 return BLK_EH_NOT_HANDLED;
225 }
226 mutex_unlock(&nbd->config_lock);
227 } else {
228 dev_err_ratelimited(nbd_to_dev(nbd),
229 "Connection timed out\n");
230 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500231 set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
Josef Bacikc103b4d2017-03-24 14:08:27 -0400232 req->errors = -EIO;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500233
Josef Bacik9561a7a2016-11-22 14:04:40 -0500234 mutex_lock(&nbd->config_lock);
235 sock_shutdown(nbd);
236 mutex_unlock(&nbd->config_lock);
Josef Bacik0eadf372016-09-08 12:33:40 -0700237 return BLK_EH_HANDLED;
Paul Clements7fdfd402007-10-16 23:27:37 -0700238}
239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240/*
241 * Send or receive packet.
242 */
Al Viroc9f2b6a2015-11-12 05:09:35 -0500243static int sock_xmit(struct nbd_device *nbd, int index, int send,
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400244 struct iov_iter *iter, int msg_flags, int *sent)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
Josef Bacik9561a7a2016-11-22 14:04:40 -0500246 struct socket *sock = nbd->socks[index]->sock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 int result;
248 struct msghdr msg;
Mel Gorman7f338fe2012-07-31 16:44:32 -0700249 unsigned long pflags = current->flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250
Mike Snitzerffc41cf2008-04-02 13:04:47 -0700251 if (unlikely(!sock)) {
Josef Bacika897b662016-12-05 16:20:29 -0500252 dev_err_ratelimited(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200253 "Attempted %s on closed socket in sock_xmit\n",
254 (send ? "send" : "recv"));
Mike Snitzerffc41cf2008-04-02 13:04:47 -0700255 return -EINVAL;
256 }
257
Al Viroc9f2b6a2015-11-12 05:09:35 -0500258 msg.msg_iter = *iter;
Al Viroc1696ca2015-11-12 04:51:19 -0500259
Mel Gorman7f338fe2012-07-31 16:44:32 -0700260 current->flags |= PF_MEMALLOC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 do {
Mel Gorman7f338fe2012-07-31 16:44:32 -0700262 sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 msg.msg_name = NULL;
264 msg.msg_namelen = 0;
265 msg.msg_control = NULL;
266 msg.msg_controllen = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
268
Markus Pargmann7e2893a2015-08-17 08:20:00 +0200269 if (send)
Al Viroc1696ca2015-11-12 04:51:19 -0500270 result = sock_sendmsg(sock, &msg);
Markus Pargmann7e2893a2015-08-17 08:20:00 +0200271 else
Al Viroc1696ca2015-11-12 04:51:19 -0500272 result = sock_recvmsg(sock, &msg, msg.msg_flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274 if (result <= 0) {
275 if (result == 0)
276 result = -EPIPE; /* short read */
277 break;
278 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400279 if (sent)
280 *sent += result;
Al Viroc1696ca2015-11-12 04:51:19 -0500281 } while (msg_data_left(&msg));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282
Mel Gorman7f338fe2012-07-31 16:44:32 -0700283 tsk_restore_flags(current, pflags, PF_MEMALLOC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
285 return result;
286}
287
Paul Clements7fdfd402007-10-16 23:27:37 -0700288/* always call with the tx_lock held */
Josef Bacik9561a7a2016-11-22 14:04:40 -0500289static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700291 struct request *req = blk_mq_rq_from_pdu(cmd);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400292 struct nbd_sock *nsock = nbd->socks[index];
Josef Bacikd61b7f92017-01-19 16:08:49 -0500293 int result;
Al Viroc9f2b6a2015-11-12 05:09:35 -0500294 struct nbd_request request = {.magic = htonl(NBD_REQUEST_MAGIC)};
295 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
296 struct iov_iter from;
Tejun Heo1011c1b2009-05-07 22:24:45 +0900297 unsigned long size = blk_rq_bytes(req);
Jens Axboe429a7872016-11-17 12:30:37 -0700298 struct bio *bio;
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200299 u32 type;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500300 u32 tag = blk_mq_unique_tag(req);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400301 int sent = nsock->sent, skip = 0;
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200302
Al Viroc9f2b6a2015-11-12 05:09:35 -0500303 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
304
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100305 switch (req_op(req)) {
306 case REQ_OP_DISCARD:
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200307 type = NBD_CMD_TRIM;
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100308 break;
309 case REQ_OP_FLUSH:
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200310 type = NBD_CMD_FLUSH;
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100311 break;
312 case REQ_OP_WRITE:
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200313 type = NBD_CMD_WRITE;
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100314 break;
315 case REQ_OP_READ:
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200316 type = NBD_CMD_READ;
Christoph Hellwigaebf5262017-01-31 16:57:31 +0100317 break;
318 default:
319 return -EIO;
320 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321
Christoph Hellwig09fc54cc2017-01-31 16:57:28 +0100322 if (rq_data_dir(req) == WRITE &&
323 (nbd->flags & NBD_FLAG_READ_ONLY)) {
324 dev_err_ratelimited(disk_to_dev(nbd->disk),
325 "Write on read-only\n");
326 return -EIO;
327 }
328
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400329 /* We did a partial send previously, and we at least sent the whole
330 * request struct, so just go and send the rest of the pages in the
331 * request.
332 */
333 if (sent) {
334 if (sent >= sizeof(request)) {
335 skip = sent - sizeof(request);
336 goto send_pages;
337 }
338 iov_iter_advance(&from, sent);
339 }
Josef Bacikf3733242017-04-06 17:01:57 -0400340 cmd->index = index;
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200341 request.type = htonl(type);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500342 if (type != NBD_CMD_FLUSH) {
Alex Bligh75f187a2013-02-27 17:05:23 -0800343 request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
344 request.len = htonl(size);
345 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500346 memcpy(request.handle, &tag, sizeof(tag));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
Markus Pargmannd18509f2015-04-02 10:11:38 +0200348 dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
Josef Bacikfd8383f2016-09-08 12:33:37 -0700349 cmd, nbdcmd_to_ascii(type),
Markus Pargmannd18509f2015-04-02 10:11:38 +0200350 (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
Al Viroc9f2b6a2015-11-12 05:09:35 -0500351 result = sock_xmit(nbd, index, 1, &from,
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400352 (type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 if (result <= 0) {
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400354 if (result == -ERESTARTSYS) {
355 /* If we havne't sent anything we can just return BUSY,
356 * however if we have sent something we need to make
357 * sure we only allow this req to be sent until we are
358 * completely done.
359 */
360 if (sent) {
361 nsock->pending = req;
362 nsock->sent = sent;
363 }
364 return BLK_MQ_RQ_QUEUE_BUSY;
365 }
Josef Bacika897b662016-12-05 16:20:29 -0500366 dev_err_ratelimited(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200367 "Send control failed (result %d)\n", result);
Josef Bacikf3733242017-04-06 17:01:57 -0400368 return -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400370send_pages:
Jens Axboe429a7872016-11-17 12:30:37 -0700371 if (type != NBD_CMD_WRITE)
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400372 goto out;
Jens Axboe429a7872016-11-17 12:30:37 -0700373
Jens Axboe429a7872016-11-17 12:30:37 -0700374 bio = req->bio;
375 while (bio) {
376 struct bio *next = bio->bi_next;
377 struct bvec_iter iter;
Kent Overstreet79886132013-11-23 17:19:00 -0800378 struct bio_vec bvec;
Jens Axboe429a7872016-11-17 12:30:37 -0700379
380 bio_for_each_segment(bvec, bio, iter) {
381 bool is_last = !next && bio_iter_last(bvec, iter);
Josef Bacikd61b7f92017-01-19 16:08:49 -0500382 int flags = is_last ? 0 : MSG_MORE;
Jens Axboe429a7872016-11-17 12:30:37 -0700383
Markus Pargmannd18509f2015-04-02 10:11:38 +0200384 dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
Josef Bacikfd8383f2016-09-08 12:33:37 -0700385 cmd, bvec.bv_len);
Al Viroc9f2b6a2015-11-12 05:09:35 -0500386 iov_iter_bvec(&from, ITER_BVEC | WRITE,
387 &bvec, 1, bvec.bv_len);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400388 if (skip) {
389 if (skip >= iov_iter_count(&from)) {
390 skip -= iov_iter_count(&from);
391 continue;
392 }
393 iov_iter_advance(&from, skip);
394 skip = 0;
395 }
396 result = sock_xmit(nbd, index, 1, &from, flags, &sent);
Jens Axboe6c92e692007-08-16 13:43:12 +0200397 if (result <= 0) {
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400398 if (result == -ERESTARTSYS) {
399 /* We've already sent the header, we
400 * have no choice but to set pending and
401 * return BUSY.
402 */
403 nsock->pending = req;
404 nsock->sent = sent;
405 return BLK_MQ_RQ_QUEUE_BUSY;
406 }
Wanlong Gaof4507162012-03-28 14:42:51 -0700407 dev_err(disk_to_dev(nbd->disk),
WANG Cong7f1b90f2011-08-19 14:48:22 +0200408 "Send data failed (result %d)\n",
409 result);
Josef Bacikf3733242017-04-06 17:01:57 -0400410 return -EAGAIN;
Jens Axboe6c92e692007-08-16 13:43:12 +0200411 }
Jens Axboe429a7872016-11-17 12:30:37 -0700412 /*
413 * The completion might already have come in,
414 * so break for the last one instead of letting
415 * the iterator do it. This prevents use-after-free
416 * of the bio.
417 */
418 if (is_last)
419 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 }
Jens Axboe429a7872016-11-17 12:30:37 -0700421 bio = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400423out:
424 nsock->pending = NULL;
425 nsock->sent = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427}
428
Josef Bacikf3733242017-04-06 17:01:57 -0400429static int nbd_disconnected(struct nbd_device *nbd)
430{
431 return test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) ||
432 test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
433}
434
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435/* NULL returned = something went wrong, inform userspace */
Josef Bacik9561a7a2016-11-22 14:04:40 -0500436static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437{
438 int result;
439 struct nbd_reply reply;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700440 struct nbd_cmd *cmd;
441 struct request *req = NULL;
442 u16 hwq;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500443 u32 tag;
Al Viroc9f2b6a2015-11-12 05:09:35 -0500444 struct kvec iov = {.iov_base = &reply, .iov_len = sizeof(reply)};
445 struct iov_iter to;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700446
447 reply.magic = 0;
Al Viroc9f2b6a2015-11-12 05:09:35 -0500448 iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400449 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 if (result <= 0) {
Josef Bacikf3733242017-04-06 17:01:57 -0400451 if (!nbd_disconnected(nbd))
Josef Bacik9561a7a2016-11-22 14:04:40 -0500452 dev_err(disk_to_dev(nbd->disk),
453 "Receive control failed (result %d)\n", result);
Markus Pargmann19391832015-08-17 08:20:03 +0200454 return ERR_PTR(result);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 }
Michal Feixe4b57e02006-07-30 03:03:31 -0700456
457 if (ntohl(reply.magic) != NBD_REPLY_MAGIC) {
Wanlong Gaof4507162012-03-28 14:42:51 -0700458 dev_err(disk_to_dev(nbd->disk), "Wrong magic (0x%lx)\n",
Michal Feixe4b57e02006-07-30 03:03:31 -0700459 (unsigned long)ntohl(reply.magic));
Markus Pargmann19391832015-08-17 08:20:03 +0200460 return ERR_PTR(-EPROTO);
Michal Feixe4b57e02006-07-30 03:03:31 -0700461 }
462
Josef Bacik9561a7a2016-11-22 14:04:40 -0500463 memcpy(&tag, reply.handle, sizeof(u32));
Herbert Xu4b2f0262006-01-06 00:09:47 -0800464
Josef Bacikfd8383f2016-09-08 12:33:37 -0700465 hwq = blk_mq_unique_tag_to_hwq(tag);
466 if (hwq < nbd->tag_set.nr_hw_queues)
467 req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
468 blk_mq_unique_tag_to_tag(tag));
469 if (!req || !blk_mq_request_started(req)) {
470 dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
471 tag, req);
472 return ERR_PTR(-ENOENT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 }
Josef Bacikfd8383f2016-09-08 12:33:37 -0700474 cmd = blk_mq_rq_to_pdu(req);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (ntohl(reply.error)) {
Wanlong Gaof4507162012-03-28 14:42:51 -0700476 dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
WANG Cong7f1b90f2011-08-19 14:48:22 +0200477 ntohl(reply.error));
Josef Bacikc103b4d2017-03-24 14:08:27 -0400478 req->errors = -EIO;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700479 return cmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480 }
481
Josef Bacikfd8383f2016-09-08 12:33:37 -0700482 dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
Christoph Hellwig9dc6c802015-04-17 22:37:21 +0200483 if (rq_data_dir(req) != WRITE) {
NeilBrown5705f702007-09-25 12:35:59 +0200484 struct req_iterator iter;
Kent Overstreet79886132013-11-23 17:19:00 -0800485 struct bio_vec bvec;
NeilBrown5705f702007-09-25 12:35:59 +0200486
487 rq_for_each_segment(bvec, req, iter) {
Al Viroc9f2b6a2015-11-12 05:09:35 -0500488 iov_iter_bvec(&to, ITER_BVEC | READ,
489 &bvec, 1, bvec.bv_len);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400490 result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
Jens Axboe6c92e692007-08-16 13:43:12 +0200491 if (result <= 0) {
Wanlong Gaof4507162012-03-28 14:42:51 -0700492 dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
WANG Cong7f1b90f2011-08-19 14:48:22 +0200493 result);
Josef Bacikf3733242017-04-06 17:01:57 -0400494 /*
495 * If we've disconnected or we only have 1
496 * connection then we need to make sure we
497 * complete this request, otherwise error out
498 * and let the timeout stuff handle resubmitting
499 * this request onto another connection.
500 */
501 if (nbd_disconnected(nbd) ||
502 nbd->num_connections <= 1) {
503 req->errors = -EIO;
504 return cmd;
505 }
506 return ERR_PTR(-EIO);
Jens Axboe6c92e692007-08-16 13:43:12 +0200507 }
Markus Pargmannd18509f2015-04-02 10:11:38 +0200508 dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
Josef Bacikfd8383f2016-09-08 12:33:37 -0700509 cmd, bvec.bv_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500511 } else {
512 /* See the comment in nbd_queue_rq. */
513 wait_for_completion(&cmd->send_complete);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 }
Josef Bacikfd8383f2016-09-08 12:33:37 -0700515 return cmd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516}
517
Kay Sieversedfaa7c2007-05-21 22:08:01 +0200518static ssize_t pid_show(struct device *dev,
519 struct device_attribute *attr, char *buf)
Paul Clements6b39bb62006-12-06 20:40:53 -0800520{
Kay Sieversedfaa7c2007-05-21 22:08:01 +0200521 struct gendisk *disk = dev_to_disk(dev);
Markus Pargmann6521d392015-08-17 08:20:05 +0200522 struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
Kay Sieversedfaa7c2007-05-21 22:08:01 +0200523
Markus Pargmann6521d392015-08-17 08:20:05 +0200524 return sprintf(buf, "%d\n", task_pid_nr(nbd->task_recv));
Paul Clements6b39bb62006-12-06 20:40:53 -0800525}
526
Kay Sieversedfaa7c2007-05-21 22:08:01 +0200527static struct device_attribute pid_attr = {
Parag Warudkar01e8ef12008-10-18 20:28:50 -0700528 .attr = { .name = "pid", .mode = S_IRUGO},
Paul Clements6b39bb62006-12-06 20:40:53 -0800529 .show = pid_show,
530};
531
Josef Bacik9561a7a2016-11-22 14:04:40 -0500532struct recv_thread_args {
533 struct work_struct work;
534 struct nbd_device *nbd;
535 int index;
536};
537
538static void recv_work(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539{
Josef Bacik9561a7a2016-11-22 14:04:40 -0500540 struct recv_thread_args *args = container_of(work,
541 struct recv_thread_args,
542 work);
543 struct nbd_device *nbd = args->nbd;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700544 struct nbd_cmd *cmd;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500545 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546
Wanlong Gaof4507162012-03-28 14:42:51 -0700547 BUG_ON(nbd->magic != NBD_MAGIC);
Markus Pargmann19391832015-08-17 08:20:03 +0200548 while (1) {
Josef Bacik9561a7a2016-11-22 14:04:40 -0500549 cmd = nbd_read_stat(nbd, args->index);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700550 if (IS_ERR(cmd)) {
Josef Bacikf3733242017-04-06 17:01:57 -0400551 struct nbd_sock *nsock = nbd->socks[args->index];
552
553 mutex_lock(&nsock->tx_lock);
554 nbd_mark_nsock_dead(nsock);
555 mutex_unlock(&nsock->tx_lock);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700556 ret = PTR_ERR(cmd);
Markus Pargmann19391832015-08-17 08:20:03 +0200557 break;
558 }
559
Josef Bacikfd8383f2016-09-08 12:33:37 -0700560 nbd_end_request(cmd);
Markus Pargmann19391832015-08-17 08:20:03 +0200561 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500562 atomic_dec(&nbd->recv_threads);
563 wake_up(&nbd->recv_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564}
565
Josef Bacikfd8383f2016-09-08 12:33:37 -0700566static void nbd_clear_req(struct request *req, void *data, bool reserved)
567{
568 struct nbd_cmd *cmd;
569
570 if (!blk_mq_request_started(req))
571 return;
572 cmd = blk_mq_rq_to_pdu(req);
Josef Bacikc103b4d2017-03-24 14:08:27 -0400573 req->errors = -EIO;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700574 nbd_end_request(cmd);
575}
576
Wanlong Gaof4507162012-03-28 14:42:51 -0700577static void nbd_clear_que(struct nbd_device *nbd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578{
Wanlong Gaof4507162012-03-28 14:42:51 -0700579 BUG_ON(nbd->magic != NBD_MAGIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580
Josef Bacikfd8383f2016-09-08 12:33:37 -0700581 blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
Markus Pargmanne78273c2015-08-17 08:20:04 +0200582 dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583}
584
Josef Bacikf3733242017-04-06 17:01:57 -0400585static int find_fallback(struct nbd_device *nbd, int index)
586{
587 int new_index = -1;
588 struct nbd_sock *nsock = nbd->socks[index];
589 int fallback = nsock->fallback_index;
590
591 if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags))
592 return new_index;
593
594 if (nbd->num_connections <= 1) {
595 dev_err_ratelimited(disk_to_dev(nbd->disk),
596 "Attempted send on invalid socket\n");
597 return new_index;
598 }
599
600 if (fallback >= 0 && fallback < nbd->num_connections &&
601 !nbd->socks[fallback]->dead)
602 return fallback;
603
604 if (nsock->fallback_index < 0 ||
605 nsock->fallback_index >= nbd->num_connections ||
606 nbd->socks[nsock->fallback_index]->dead) {
607 int i;
608 for (i = 0; i < nbd->num_connections; i++) {
609 if (i == index)
610 continue;
611 if (!nbd->socks[i]->dead) {
612 new_index = i;
613 break;
614 }
615 }
616 nsock->fallback_index = new_index;
617 if (new_index < 0) {
618 dev_err_ratelimited(disk_to_dev(nbd->disk),
619 "Dead connection, failed to find a fallback\n");
620 return new_index;
621 }
622 }
623 new_index = nsock->fallback_index;
624 return new_index;
625}
Paul Clements7fdfd402007-10-16 23:27:37 -0700626
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400627static int nbd_handle_cmd(struct nbd_cmd *cmd, int index)
Laurent Vivier48cf6062008-04-29 01:02:46 -0700628{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700629 struct request *req = blk_mq_rq_from_pdu(cmd);
630 struct nbd_device *nbd = cmd->nbd;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500631 struct nbd_sock *nsock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400632 int ret;
Josef Bacikfd8383f2016-09-08 12:33:37 -0700633
Josef Bacik9561a7a2016-11-22 14:04:40 -0500634 if (index >= nbd->num_connections) {
Josef Bacika897b662016-12-05 16:20:29 -0500635 dev_err_ratelimited(disk_to_dev(nbd->disk),
636 "Attempted send on invalid socket\n");
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400637 return -EINVAL;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500638 }
Laurent Vivier48cf6062008-04-29 01:02:46 -0700639 req->errors = 0;
Josef Bacikf3733242017-04-06 17:01:57 -0400640again:
Josef Bacik9561a7a2016-11-22 14:04:40 -0500641 nsock = nbd->socks[index];
642 mutex_lock(&nsock->tx_lock);
Josef Bacikf3733242017-04-06 17:01:57 -0400643 if (nsock->dead) {
644 index = find_fallback(nbd, index);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500645 mutex_unlock(&nsock->tx_lock);
Josef Bacikf3733242017-04-06 17:01:57 -0400646 if (index < 0)
647 return -EIO;
648 goto again;
Laurent Vivier48cf6062008-04-29 01:02:46 -0700649 }
650
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400651 /* Handle the case that we have a pending request that was partially
652 * transmitted that _has_ to be serviced first. We need to call requeue
653 * here so that it gets put _after_ the request that is already on the
654 * dispatch list.
655 */
656 if (unlikely(nsock->pending && nsock->pending != req)) {
657 blk_mq_requeue_request(req, true);
658 ret = 0;
659 goto out;
Laurent Vivier48cf6062008-04-29 01:02:46 -0700660 }
Josef Bacikf3733242017-04-06 17:01:57 -0400661 /*
662 * Some failures are related to the link going down, so anything that
663 * returns EAGAIN can be retried on a different socket.
664 */
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400665 ret = nbd_send_cmd(nbd, cmd, index);
Josef Bacikf3733242017-04-06 17:01:57 -0400666 if (ret == -EAGAIN) {
667 dev_err_ratelimited(disk_to_dev(nbd->disk),
668 "Request send failed trying another connection\n");
669 nbd_mark_nsock_dead(nsock);
670 mutex_unlock(&nsock->tx_lock);
671 goto again;
672 }
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400673out:
Josef Bacik9561a7a2016-11-22 14:04:40 -0500674 mutex_unlock(&nsock->tx_lock);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400675 return ret;
Laurent Vivier48cf6062008-04-29 01:02:46 -0700676}
677
Josef Bacikfd8383f2016-09-08 12:33:37 -0700678static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
679 const struct blk_mq_queue_data *bd)
Laurent Vivier48cf6062008-04-29 01:02:46 -0700680{
Josef Bacikfd8383f2016-09-08 12:33:37 -0700681 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400682 int ret;
Laurent Vivier48cf6062008-04-29 01:02:46 -0700683
Josef Bacik9561a7a2016-11-22 14:04:40 -0500684 /*
685 * Since we look at the bio's to send the request over the network we
686 * need to make sure the completion work doesn't mark this request done
687 * before we are done doing our send. This keeps us from dereferencing
688 * freed data if we have particularly fast completions (ie we get the
689 * completion before we exit sock_xmit on the last bvec) or in the case
690 * that the server is misbehaving (or there was an error) before we're
691 * done sending everything over the wire.
692 */
693 init_completion(&cmd->send_complete);
Josef Bacikfd8383f2016-09-08 12:33:37 -0700694 blk_mq_start_request(bd->rq);
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400695
696 /* We can be called directly from the user space process, which means we
697 * could possibly have signals pending so our sendmsg will fail. In
698 * this case we need to return that we are busy, otherwise error out as
699 * appropriate.
700 */
701 ret = nbd_handle_cmd(cmd, hctx->queue_num);
702 if (ret < 0)
703 ret = BLK_MQ_RQ_QUEUE_ERROR;
704 if (!ret)
705 ret = BLK_MQ_RQ_QUEUE_OK;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500706 complete(&cmd->send_complete);
707
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400708 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709}
710
Josef Bacik9442b732017-02-07 17:10:22 -0500711static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
712 unsigned long arg)
Markus Pargmann23272a672015-10-29 11:51:16 +0100713{
Josef Bacik9442b732017-02-07 17:10:22 -0500714 struct socket *sock;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500715 struct nbd_sock **socks;
716 struct nbd_sock *nsock;
Josef Bacik9442b732017-02-07 17:10:22 -0500717 int err;
718
719 sock = sockfd_lookup(arg, &err);
720 if (!sock)
721 return err;
Markus Pargmann23272a672015-10-29 11:51:16 +0100722
Josef Bacik9561a7a2016-11-22 14:04:40 -0500723 if (!nbd->task_setup)
724 nbd->task_setup = current;
725 if (nbd->task_setup != current) {
726 dev_err(disk_to_dev(nbd->disk),
727 "Device being setup by another task");
Josef Bacik9b1355d2017-04-06 17:01:56 -0400728 sockfd_put(sock);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500729 return -EINVAL;
Markus Pargmann23272a672015-10-29 11:51:16 +0100730 }
731
Josef Bacik9561a7a2016-11-22 14:04:40 -0500732 socks = krealloc(nbd->socks, (nbd->num_connections + 1) *
733 sizeof(struct nbd_sock *), GFP_KERNEL);
Josef Bacik9b1355d2017-04-06 17:01:56 -0400734 if (!socks) {
735 sockfd_put(sock);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500736 return -ENOMEM;
Josef Bacik9b1355d2017-04-06 17:01:56 -0400737 }
Josef Bacik9561a7a2016-11-22 14:04:40 -0500738 nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
Josef Bacik9b1355d2017-04-06 17:01:56 -0400739 if (!nsock) {
740 sockfd_put(sock);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500741 return -ENOMEM;
Josef Bacik9b1355d2017-04-06 17:01:56 -0400742 }
Markus Pargmann23272a672015-10-29 11:51:16 +0100743
Josef Bacik9561a7a2016-11-22 14:04:40 -0500744 nbd->socks = socks;
Markus Pargmann23272a672015-10-29 11:51:16 +0100745
Josef Bacikf3733242017-04-06 17:01:57 -0400746 nsock->fallback_index = -1;
747 nsock->dead = false;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500748 mutex_init(&nsock->tx_lock);
749 nsock->sock = sock;
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400750 nsock->pending = NULL;
751 nsock->sent = 0;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500752 socks[nbd->num_connections++] = nsock;
753
Josef Bacik9442b732017-02-07 17:10:22 -0500754 if (max_part)
755 bdev->bd_invalidated = 1;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500756 return 0;
Markus Pargmann23272a672015-10-29 11:51:16 +0100757}
758
Markus Pargmann0e4f0f62015-10-29 12:04:51 +0100759/* Reset all properties of an NBD device */
760static void nbd_reset(struct nbd_device *nbd)
761{
Josef Bacik9b4a6ba2016-09-08 12:33:39 -0700762 nbd->runtime_flags = 0;
Markus Pargmann0e4f0f62015-10-29 12:04:51 +0100763 nbd->blksize = 1024;
764 nbd->bytesize = 0;
765 set_capacity(nbd->disk, 0);
766 nbd->flags = 0;
Josef Bacik0eadf372016-09-08 12:33:40 -0700767 nbd->tag_set.timeout = 0;
Markus Pargmann0e4f0f62015-10-29 12:04:51 +0100768 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
Markus Pargmann0e4f0f62015-10-29 12:04:51 +0100769}
770
771static void nbd_bdev_reset(struct block_device *bdev)
772{
Ratna Manoj Bollaabbbdf12017-03-24 14:08:29 -0400773 if (bdev->bd_openers > 1)
774 return;
Markus Pargmann0e4f0f62015-10-29 12:04:51 +0100775 set_device_ro(bdev, false);
776 bdev->bd_inode->i_size = 0;
777 if (max_part > 0) {
778 blkdev_reread_part(bdev);
779 bdev->bd_invalidated = 1;
780 }
781}
782
Markus Pargmannd02cf532015-10-29 12:06:15 +0100783static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev)
784{
785 if (nbd->flags & NBD_FLAG_READ_ONLY)
786 set_device_ro(bdev, true);
787 if (nbd->flags & NBD_FLAG_SEND_TRIM)
788 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
789 if (nbd->flags & NBD_FLAG_SEND_FLUSH)
Jens Axboeaafb1ee2016-03-30 10:10:53 -0600790 blk_queue_write_cache(nbd->disk->queue, true, false);
Markus Pargmannd02cf532015-10-29 12:06:15 +0100791 else
Jens Axboeaafb1ee2016-03-30 10:10:53 -0600792 blk_queue_write_cache(nbd->disk->queue, false, false);
Markus Pargmannd02cf532015-10-29 12:06:15 +0100793}
794
Josef Bacik9561a7a2016-11-22 14:04:40 -0500795static void send_disconnects(struct nbd_device *nbd)
796{
Al Viroc9f2b6a2015-11-12 05:09:35 -0500797 struct nbd_request request = {
798 .magic = htonl(NBD_REQUEST_MAGIC),
799 .type = htonl(NBD_CMD_DISC),
800 };
801 struct kvec iov = {.iov_base = &request, .iov_len = sizeof(request)};
802 struct iov_iter from;
Josef Bacik9561a7a2016-11-22 14:04:40 -0500803 int i, ret;
804
Josef Bacik9561a7a2016-11-22 14:04:40 -0500805 for (i = 0; i < nbd->num_connections; i++) {
Al Viroc9f2b6a2015-11-12 05:09:35 -0500806 iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
Josef Bacik9dd5d3a2017-03-24 14:08:26 -0400807 ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500808 if (ret <= 0)
809 dev_err(disk_to_dev(nbd->disk),
810 "Send disconnect failed %d\n", ret);
811 }
812}
813
Josef Bacik9442b732017-02-07 17:10:22 -0500814static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
815{
816 dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
817 if (!nbd->socks)
818 return -EINVAL;
819
820 mutex_unlock(&nbd->config_lock);
821 fsync_bdev(bdev);
822 mutex_lock(&nbd->config_lock);
823
824 /* Check again after getting mutex back. */
825 if (!nbd->socks)
826 return -EINVAL;
827
828 if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
829 &nbd->runtime_flags))
830 send_disconnects(nbd);
831 return 0;
832}
833
834static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
835{
836 sock_shutdown(nbd);
837 nbd_clear_que(nbd);
Ratna Manoj Bollaabbbdf12017-03-24 14:08:29 -0400838
839 __invalidate_device(bdev, true);
Josef Bacik9442b732017-02-07 17:10:22 -0500840 nbd_bdev_reset(bdev);
841 /*
842 * We want to give the run thread a chance to wait for everybody
843 * to clean up and then do it's own cleanup.
844 */
845 if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
846 nbd->num_connections) {
847 int i;
848
Josef Bacik6a8a2152017-03-01 11:47:22 -0500849 for (i = 0; i < nbd->num_connections; i++) {
850 sockfd_put(nbd->socks[i]->sock);
Josef Bacik9442b732017-02-07 17:10:22 -0500851 kfree(nbd->socks[i]);
Josef Bacik6a8a2152017-03-01 11:47:22 -0500852 }
Josef Bacik9442b732017-02-07 17:10:22 -0500853 kfree(nbd->socks);
854 nbd->socks = NULL;
855 nbd->num_connections = 0;
856 }
857 nbd->task_setup = NULL;
858
859 return 0;
860}
861
862static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
863{
864 struct recv_thread_args *args;
865 int num_connections = nbd->num_connections;
866 int error = 0, i;
867
868 if (nbd->task_recv)
869 return -EBUSY;
870 if (!nbd->socks)
871 return -EINVAL;
872 if (num_connections > 1 &&
873 !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
874 dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
875 error = -EINVAL;
876 goto out_err;
877 }
878
879 set_bit(NBD_RUNNING, &nbd->runtime_flags);
880 blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
881 args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
882 if (!args) {
883 error = -ENOMEM;
884 goto out_err;
885 }
886 nbd->task_recv = current;
887 mutex_unlock(&nbd->config_lock);
888
889 nbd_parse_flags(nbd, bdev);
890
891 error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
892 if (error) {
893 dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
894 goto out_recv;
895 }
896
897 nbd_size_update(nbd, bdev);
898
899 nbd_dev_dbg_init(nbd);
900 for (i = 0; i < num_connections; i++) {
901 sk_set_memalloc(nbd->socks[i]->sock->sk);
902 atomic_inc(&nbd->recv_threads);
903 INIT_WORK(&args[i].work, recv_work);
904 args[i].nbd = nbd;
905 args[i].index = i;
906 queue_work(recv_workqueue, &args[i].work);
907 }
908 wait_event_interruptible(nbd->recv_wq,
909 atomic_read(&nbd->recv_threads) == 0);
910 for (i = 0; i < num_connections; i++)
911 flush_work(&args[i].work);
912 nbd_dev_dbg_close(nbd);
913 nbd_size_clear(nbd, bdev);
914 device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
915out_recv:
916 mutex_lock(&nbd->config_lock);
917 nbd->task_recv = NULL;
918out_err:
919 clear_bit(NBD_RUNNING, &nbd->runtime_flags);
920 nbd_clear_sock(nbd, bdev);
921
922 /* user requested, ignore socket errors */
923 if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
924 error = 0;
925 if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
926 error = -ETIMEDOUT;
927
928 nbd_reset(nbd);
929 return error;
930}
Markus Pargmann30d53d92015-08-17 08:20:06 +0200931
Josef Bacik9561a7a2016-11-22 14:04:40 -0500932/* Must be called with config_lock held */
Wanlong Gaof4507162012-03-28 14:42:51 -0700933static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
Pavel Machek1a2ad212009-04-02 16:58:41 -0700934 unsigned int cmd, unsigned long arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 switch (cmd) {
Josef Bacik9442b732017-02-07 17:10:22 -0500937 case NBD_DISCONNECT:
938 return nbd_disconnect(nbd, bdev);
Markus Pargmann23272a672015-10-29 11:51:16 +0100939 case NBD_CLEAR_SOCK:
Josef Bacik9442b732017-02-07 17:10:22 -0500940 return nbd_clear_sock(nbd, bdev);
941 case NBD_SET_SOCK:
942 return nbd_add_socket(nbd, bdev, arg);
943 case NBD_SET_BLKSIZE:
Josef Bacike5445412017-02-13 10:39:47 -0500944 nbd_size_set(nbd, bdev, arg,
945 div_s64(nbd->bytesize, arg));
946 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 case NBD_SET_SIZE:
Josef Bacike5445412017-02-13 10:39:47 -0500948 nbd_size_set(nbd, bdev, nbd->blksize,
949 div_s64(arg, nbd->blksize));
950 return 0;
Markus Pargmann37091fd2015-07-27 07:36:49 +0200951 case NBD_SET_SIZE_BLOCKS:
Josef Bacike5445412017-02-13 10:39:47 -0500952 nbd_size_set(nbd, bdev, nbd->blksize, arg);
953 return 0;
Paul Clements7fdfd402007-10-16 23:27:37 -0700954 case NBD_SET_TIMEOUT:
Josef Bacikf8586852017-03-24 14:08:28 -0400955 if (arg) {
956 nbd->tag_set.timeout = arg * HZ;
957 blk_queue_rq_timeout(nbd->disk->queue, arg * HZ);
958 }
Paul Clements7fdfd402007-10-16 23:27:37 -0700959 return 0;
Pavel Machek1a2ad212009-04-02 16:58:41 -0700960
Paul Clements2f012502012-10-04 17:16:15 -0700961 case NBD_SET_FLAGS:
962 nbd->flags = arg;
963 return 0;
Josef Bacik9442b732017-02-07 17:10:22 -0500964 case NBD_DO_IT:
965 return nbd_start_device(nbd, bdev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 case NBD_CLEAR_QUE:
Herbert Xu4b2f0262006-01-06 00:09:47 -0800967 /*
968 * This is for compatibility only. The queue is always cleared
969 * by NBD_DO_IT or NBD_CLEAR_SOCK.
970 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 return 0;
972 case NBD_PRINT_DEBUG:
Josef Bacikfd8383f2016-09-08 12:33:37 -0700973 /*
974 * For compatibility only, we no longer keep a list of
975 * outstanding requests.
976 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 return 0;
978 }
Pavel Machek1a2ad212009-04-02 16:58:41 -0700979 return -ENOTTY;
980}
981
982static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
983 unsigned int cmd, unsigned long arg)
984{
Wanlong Gaof4507162012-03-28 14:42:51 -0700985 struct nbd_device *nbd = bdev->bd_disk->private_data;
Pavel Machek1a2ad212009-04-02 16:58:41 -0700986 int error;
987
988 if (!capable(CAP_SYS_ADMIN))
989 return -EPERM;
990
Wanlong Gaof4507162012-03-28 14:42:51 -0700991 BUG_ON(nbd->magic != NBD_MAGIC);
Pavel Machek1a2ad212009-04-02 16:58:41 -0700992
Josef Bacik9561a7a2016-11-22 14:04:40 -0500993 mutex_lock(&nbd->config_lock);
Wanlong Gaof4507162012-03-28 14:42:51 -0700994 error = __nbd_ioctl(bdev, nbd, cmd, arg);
Josef Bacik9561a7a2016-11-22 14:04:40 -0500995 mutex_unlock(&nbd->config_lock);
Pavel Machek1a2ad212009-04-02 16:58:41 -0700996
997 return error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998}
999
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07001000static const struct block_device_operations nbd_fops =
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001{
1002 .owner = THIS_MODULE,
Arnd Bergmann8a6cfeb2010-07-08 10:18:46 +02001003 .ioctl = nbd_ioctl,
Al Viro263a3df2016-01-07 10:04:37 -05001004 .compat_ioctl = nbd_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005};
1006
Markus Pargmann30d53d92015-08-17 08:20:06 +02001007#if IS_ENABLED(CONFIG_DEBUG_FS)
1008
1009static int nbd_dbg_tasks_show(struct seq_file *s, void *unused)
1010{
1011 struct nbd_device *nbd = s->private;
1012
1013 if (nbd->task_recv)
1014 seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv));
Markus Pargmann30d53d92015-08-17 08:20:06 +02001015
1016 return 0;
1017}
1018
1019static int nbd_dbg_tasks_open(struct inode *inode, struct file *file)
1020{
1021 return single_open(file, nbd_dbg_tasks_show, inode->i_private);
1022}
1023
1024static const struct file_operations nbd_dbg_tasks_ops = {
1025 .open = nbd_dbg_tasks_open,
1026 .read = seq_read,
1027 .llseek = seq_lseek,
1028 .release = single_release,
1029};
1030
1031static int nbd_dbg_flags_show(struct seq_file *s, void *unused)
1032{
1033 struct nbd_device *nbd = s->private;
1034 u32 flags = nbd->flags;
1035
1036 seq_printf(s, "Hex: 0x%08x\n\n", flags);
1037
1038 seq_puts(s, "Known flags:\n");
1039
1040 if (flags & NBD_FLAG_HAS_FLAGS)
1041 seq_puts(s, "NBD_FLAG_HAS_FLAGS\n");
1042 if (flags & NBD_FLAG_READ_ONLY)
1043 seq_puts(s, "NBD_FLAG_READ_ONLY\n");
1044 if (flags & NBD_FLAG_SEND_FLUSH)
1045 seq_puts(s, "NBD_FLAG_SEND_FLUSH\n");
1046 if (flags & NBD_FLAG_SEND_TRIM)
1047 seq_puts(s, "NBD_FLAG_SEND_TRIM\n");
1048
1049 return 0;
1050}
1051
1052static int nbd_dbg_flags_open(struct inode *inode, struct file *file)
1053{
1054 return single_open(file, nbd_dbg_flags_show, inode->i_private);
1055}
1056
1057static const struct file_operations nbd_dbg_flags_ops = {
1058 .open = nbd_dbg_flags_open,
1059 .read = seq_read,
1060 .llseek = seq_lseek,
1061 .release = single_release,
1062};
1063
1064static int nbd_dev_dbg_init(struct nbd_device *nbd)
1065{
1066 struct dentry *dir;
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001067
1068 if (!nbd_dbg_dir)
1069 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001070
1071 dir = debugfs_create_dir(nbd_name(nbd), nbd_dbg_dir);
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001072 if (!dir) {
1073 dev_err(nbd_to_dev(nbd), "Failed to create debugfs dir for '%s'\n",
1074 nbd_name(nbd));
1075 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001076 }
1077 nbd->dbg_dir = dir;
1078
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001079 debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
1080 debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
Josef Bacik0eadf372016-09-08 12:33:40 -07001081 debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
Josef Bacikef77b512016-12-02 16:19:12 -05001082 debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize);
Josef Bacikd366a0f2016-06-08 10:32:10 -04001083 debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
Markus Pargmann30d53d92015-08-17 08:20:06 +02001084
1085 return 0;
1086}
1087
1088static void nbd_dev_dbg_close(struct nbd_device *nbd)
1089{
1090 debugfs_remove_recursive(nbd->dbg_dir);
1091}
1092
1093static int nbd_dbg_init(void)
1094{
1095 struct dentry *dbg_dir;
1096
1097 dbg_dir = debugfs_create_dir("nbd", NULL);
Markus Pargmann27ea43f2015-10-24 21:15:34 +02001098 if (!dbg_dir)
1099 return -EIO;
Markus Pargmann30d53d92015-08-17 08:20:06 +02001100
1101 nbd_dbg_dir = dbg_dir;
1102
1103 return 0;
1104}
1105
1106static void nbd_dbg_close(void)
1107{
1108 debugfs_remove_recursive(nbd_dbg_dir);
1109}
1110
1111#else /* IS_ENABLED(CONFIG_DEBUG_FS) */
1112
1113static int nbd_dev_dbg_init(struct nbd_device *nbd)
1114{
1115 return 0;
1116}
1117
1118static void nbd_dev_dbg_close(struct nbd_device *nbd)
1119{
1120}
1121
1122static int nbd_dbg_init(void)
1123{
1124 return 0;
1125}
1126
1127static void nbd_dbg_close(void)
1128{
1129}
1130
1131#endif
1132
Josef Bacikfd8383f2016-09-08 12:33:37 -07001133static int nbd_init_request(void *data, struct request *rq,
1134 unsigned int hctx_idx, unsigned int request_idx,
1135 unsigned int numa_node)
1136{
1137 struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
Josef Bacikfd8383f2016-09-08 12:33:37 -07001138 cmd->nbd = data;
Josef Bacikfd8383f2016-09-08 12:33:37 -07001139 return 0;
1140}
1141
Eric Biggersf363b082017-03-30 13:39:16 -07001142static const struct blk_mq_ops nbd_mq_ops = {
Josef Bacikfd8383f2016-09-08 12:33:37 -07001143 .queue_rq = nbd_queue_rq,
Josef Bacikfd8383f2016-09-08 12:33:37 -07001144 .init_request = nbd_init_request,
Josef Bacik0eadf372016-09-08 12:33:40 -07001145 .timeout = nbd_xmit_timeout,
Josef Bacikfd8383f2016-09-08 12:33:37 -07001146};
1147
Josef Bacikb0d91112017-02-01 16:11:40 -05001148static void nbd_dev_remove(struct nbd_device *nbd)
1149{
1150 struct gendisk *disk = nbd->disk;
1151 nbd->magic = 0;
1152 if (disk) {
1153 del_gendisk(disk);
1154 blk_cleanup_queue(disk->queue);
1155 blk_mq_free_tag_set(&nbd->tag_set);
1156 put_disk(disk);
1157 }
1158 kfree(nbd);
1159}
1160
1161static int nbd_dev_add(int index)
1162{
1163 struct nbd_device *nbd;
1164 struct gendisk *disk;
1165 struct request_queue *q;
1166 int err = -ENOMEM;
1167
1168 nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
1169 if (!nbd)
1170 goto out;
1171
1172 disk = alloc_disk(1 << part_shift);
1173 if (!disk)
1174 goto out_free_nbd;
1175
1176 if (index >= 0) {
1177 err = idr_alloc(&nbd_index_idr, nbd, index, index + 1,
1178 GFP_KERNEL);
1179 if (err == -ENOSPC)
1180 err = -EEXIST;
1181 } else {
1182 err = idr_alloc(&nbd_index_idr, nbd, 0, 0, GFP_KERNEL);
1183 if (err >= 0)
1184 index = err;
1185 }
1186 if (err < 0)
1187 goto out_free_disk;
1188
1189 nbd->disk = disk;
1190 nbd->tag_set.ops = &nbd_mq_ops;
1191 nbd->tag_set.nr_hw_queues = 1;
1192 nbd->tag_set.queue_depth = 128;
1193 nbd->tag_set.numa_node = NUMA_NO_NODE;
1194 nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
1195 nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
1196 BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
1197 nbd->tag_set.driver_data = nbd;
1198
1199 err = blk_mq_alloc_tag_set(&nbd->tag_set);
1200 if (err)
1201 goto out_free_idr;
1202
1203 q = blk_mq_init_queue(&nbd->tag_set);
1204 if (IS_ERR(q)) {
1205 err = PTR_ERR(q);
1206 goto out_free_tags;
1207 }
1208 disk->queue = q;
1209
1210 /*
1211 * Tell the block layer that we are not a rotational device
1212 */
1213 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, disk->queue);
1214 queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, disk->queue);
1215 disk->queue->limits.discard_granularity = 512;
1216 blk_queue_max_discard_sectors(disk->queue, UINT_MAX);
Josef Bacikb0d91112017-02-01 16:11:40 -05001217 blk_queue_max_hw_sectors(disk->queue, 65536);
1218 disk->queue->limits.max_sectors = 256;
1219
1220 nbd->magic = NBD_MAGIC;
1221 mutex_init(&nbd->config_lock);
1222 disk->major = NBD_MAJOR;
1223 disk->first_minor = index << part_shift;
1224 disk->fops = &nbd_fops;
1225 disk->private_data = nbd;
1226 sprintf(disk->disk_name, "nbd%d", index);
1227 init_waitqueue_head(&nbd->recv_wq);
1228 nbd_reset(nbd);
1229 add_disk(disk);
1230 return index;
1231
1232out_free_tags:
1233 blk_mq_free_tag_set(&nbd->tag_set);
1234out_free_idr:
1235 idr_remove(&nbd_index_idr, index);
1236out_free_disk:
1237 put_disk(disk);
1238out_free_nbd:
1239 kfree(nbd);
1240out:
1241 return err;
1242}
1243
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244/*
1245 * And here should be modules and kernel interface
1246 * (Just smiley confuses emacs :-)
1247 */
1248
1249static int __init nbd_init(void)
1250{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251 int i;
1252
Adrian Bunk5b7b18c2006-03-25 03:07:04 -08001253 BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
Laurent Vivierd71a6d72008-04-29 01:02:51 -07001255 if (max_part < 0) {
WANG Cong7742ce42011-08-19 14:48:28 +02001256 printk(KERN_ERR "nbd: max_part must be >= 0\n");
Laurent Vivierd71a6d72008-04-29 01:02:51 -07001257 return -EINVAL;
1258 }
1259
1260 part_shift = 0;
Namhyung Kim5988ce22011-05-28 14:44:46 +02001261 if (max_part > 0) {
Laurent Vivierd71a6d72008-04-29 01:02:51 -07001262 part_shift = fls(max_part);
1263
Namhyung Kim5988ce22011-05-28 14:44:46 +02001264 /*
1265 * Adjust max_part according to part_shift as it is exported
1266 * to user space so that user can know the max number of
1267 * partition kernel should be able to manage.
1268 *
1269 * Note that -1 is required because partition 0 is reserved
1270 * for the whole disk.
1271 */
1272 max_part = (1UL << part_shift) - 1;
1273 }
1274
Namhyung Kim3b271082011-05-28 14:44:46 +02001275 if ((1UL << part_shift) > DISK_MAX_PARTS)
1276 return -EINVAL;
1277
1278 if (nbds_max > 1UL << (MINORBITS - part_shift))
1279 return -EINVAL;
Josef Bacik124d6db2017-02-01 16:11:11 -05001280 recv_workqueue = alloc_workqueue("knbd-recv",
1281 WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
1282 if (!recv_workqueue)
1283 return -ENOMEM;
Namhyung Kim3b271082011-05-28 14:44:46 +02001284
Josef Bacik6330a2d2017-02-15 16:49:48 -05001285 if (register_blkdev(NBD_MAJOR, "nbd")) {
1286 destroy_workqueue(recv_workqueue);
Josef Bacikb0d91112017-02-01 16:11:40 -05001287 return -EIO;
Josef Bacik6330a2d2017-02-15 16:49:48 -05001288 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Markus Pargmann30d53d92015-08-17 08:20:06 +02001290 nbd_dbg_init();
1291
Josef Bacikb0d91112017-02-01 16:11:40 -05001292 mutex_lock(&nbd_index_mutex);
1293 for (i = 0; i < nbds_max; i++)
1294 nbd_dev_add(i);
1295 mutex_unlock(&nbd_index_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296 return 0;
Josef Bacikb0d91112017-02-01 16:11:40 -05001297}
1298
1299static int nbd_exit_cb(int id, void *ptr, void *data)
1300{
1301 struct nbd_device *nbd = ptr;
1302 nbd_dev_remove(nbd);
1303 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304}
1305
1306static void __exit nbd_cleanup(void)
1307{
Markus Pargmann30d53d92015-08-17 08:20:06 +02001308 nbd_dbg_close();
1309
Josef Bacikb0d91112017-02-01 16:11:40 -05001310 idr_for_each(&nbd_index_idr, &nbd_exit_cb, NULL);
1311 idr_destroy(&nbd_index_idr);
Josef Bacik124d6db2017-02-01 16:11:11 -05001312 destroy_workqueue(recv_workqueue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 unregister_blkdev(NBD_MAJOR, "nbd");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314}
1315
1316module_init(nbd_init);
1317module_exit(nbd_cleanup);
1318
1319MODULE_DESCRIPTION("Network Block Device");
1320MODULE_LICENSE("GPL");
1321
Lars Marowsky-Bree40be0c22005-05-01 08:59:07 -07001322module_param(nbds_max, int, 0444);
Laurent Vivierd71a6d72008-04-29 01:02:51 -07001323MODULE_PARM_DESC(nbds_max, "number of network block devices to initialize (default: 16)");
1324module_param(max_part, int, 0444);
1325MODULE_PARM_DESC(max_part, "number of partitions per device (default: 0)");