blob: 6d139d20ec994be98481309a20f03186e04a6c13 [file] [log] [blame]
Jens Axboe3d6392c2007-07-09 12:38:05 +02001/*
2 * bsg.c - block layer implementation of the sg v3 interface
3 *
4 * Copyright (C) 2004 Jens Axboe <axboe@suse.de> SUSE Labs
5 * Copyright (C) 2004 Peter M. Jones <pjones@redhat.com>
6 *
7 * This file is subject to the terms and conditions of the GNU General Public
8 * License version 2. See the file "COPYING" in the main directory of this
9 * archive for more details.
10 *
11 */
12/*
13 * TODO
14 * - Should this get merged, block/scsi_ioctl.c will be migrated into
15 * this file. To keep maintenance down, it's easier to have them
16 * seperated right now.
17 *
18 */
Jens Axboe3d6392c2007-07-09 12:38:05 +020019#include <linux/module.h>
20#include <linux/init.h>
21#include <linux/file.h>
22#include <linux/blkdev.h>
23#include <linux/poll.h>
24#include <linux/cdev.h>
25#include <linux/percpu.h>
26#include <linux/uio.h>
27#include <linux/bsg.h>
28
29#include <scsi/scsi.h>
30#include <scsi/scsi_ioctl.h>
31#include <scsi/scsi_cmnd.h>
32#include <scsi/sg.h>
33
34static char bsg_version[] = "block layer sg (bsg) 0.4";
35
36struct bsg_command;
37
38struct bsg_device {
39 struct gendisk *disk;
40 request_queue_t *queue;
41 spinlock_t lock;
42 struct list_head busy_list;
43 struct list_head done_list;
44 struct hlist_node dev_list;
45 atomic_t ref_count;
46 int minor;
47 int queued_cmds;
48 int done_cmds;
49 unsigned long *cmd_bitmap;
50 struct bsg_command *cmd_map;
51 wait_queue_head_t wq_done;
52 wait_queue_head_t wq_free;
53 char name[BDEVNAME_SIZE];
54 int max_queue;
55 unsigned long flags;
56};
57
58enum {
59 BSG_F_BLOCK = 1,
60 BSG_F_WRITE_PERM = 2,
61};
62
63/*
64 * command allocation bitmap defines
65 */
66#define BSG_CMDS_PAGE_ORDER (1)
67#define BSG_CMDS_PER_LONG (sizeof(unsigned long) * 8)
68#define BSG_CMDS_MASK (BSG_CMDS_PER_LONG - 1)
69#define BSG_CMDS_BYTES (PAGE_SIZE * (1 << BSG_CMDS_PAGE_ORDER))
70#define BSG_CMDS (BSG_CMDS_BYTES / sizeof(struct bsg_command))
71
72#undef BSG_DEBUG
73
74#ifdef BSG_DEBUG
75#define dprintk(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ##args)
76#else
77#define dprintk(fmt, args...)
78#endif
79
80#define list_entry_bc(entry) list_entry((entry), struct bsg_command, list)
81
82/*
83 * just for testing
84 */
85#define BSG_MAJOR (240)
86
87static DEFINE_MUTEX(bsg_mutex);
88static int bsg_device_nr;
89
90#define BSG_LIST_SIZE (8)
91#define bsg_list_idx(minor) ((minor) & (BSG_LIST_SIZE - 1))
92static struct hlist_head bsg_device_list[BSG_LIST_SIZE];
93
94static struct class *bsg_class;
95static LIST_HEAD(bsg_class_list);
96
97/*
98 * our internal command type
99 */
100struct bsg_command {
101 struct bsg_device *bd;
102 struct list_head list;
103 struct request *rq;
104 struct bio *bio;
105 int err;
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100106 struct sg_io_v4 hdr;
107 struct sg_io_v4 __user *uhdr;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200108 char sense[SCSI_SENSE_BUFFERSIZE];
109};
110
111static void bsg_free_command(struct bsg_command *bc)
112{
113 struct bsg_device *bd = bc->bd;
114 unsigned long bitnr = bc - bd->cmd_map;
115 unsigned long flags;
116
117 dprintk("%s: command bit offset %lu\n", bd->name, bitnr);
118
119 spin_lock_irqsave(&bd->lock, flags);
120 bd->queued_cmds--;
121 __clear_bit(bitnr, bd->cmd_bitmap);
122 spin_unlock_irqrestore(&bd->lock, flags);
123
124 wake_up(&bd->wq_free);
125}
126
127static struct bsg_command *__bsg_alloc_command(struct bsg_device *bd)
128{
129 struct bsg_command *bc = NULL;
130 unsigned long *map;
131 int free_nr;
132
133 spin_lock_irq(&bd->lock);
134
135 if (bd->queued_cmds >= bd->max_queue)
136 goto out;
137
138 for (free_nr = 0, map = bd->cmd_bitmap; *map == ~0UL; map++)
139 free_nr += BSG_CMDS_PER_LONG;
140
141 BUG_ON(*map == ~0UL);
142
143 bd->queued_cmds++;
144 free_nr += ffz(*map);
145 __set_bit(free_nr, bd->cmd_bitmap);
146 spin_unlock_irq(&bd->lock);
147
148 bc = bd->cmd_map + free_nr;
149 memset(bc, 0, sizeof(*bc));
150 bc->bd = bd;
151 INIT_LIST_HEAD(&bc->list);
152 dprintk("%s: returning free cmd %p (bit %d)\n", bd->name, bc, free_nr);
153 return bc;
154out:
155 dprintk("%s: failed (depth %d)\n", bd->name, bd->queued_cmds);
156 spin_unlock_irq(&bd->lock);
157 return bc;
158}
159
160static inline void
161bsg_del_done_cmd(struct bsg_device *bd, struct bsg_command *bc)
162{
163 bd->done_cmds--;
164 list_del(&bc->list);
165}
166
167static inline void
168bsg_add_done_cmd(struct bsg_device *bd, struct bsg_command *bc)
169{
170 bd->done_cmds++;
171 list_add_tail(&bc->list, &bd->done_list);
172 wake_up(&bd->wq_done);
173}
174
175static inline int bsg_io_schedule(struct bsg_device *bd, int state)
176{
177 DEFINE_WAIT(wait);
178 int ret = 0;
179
180 spin_lock_irq(&bd->lock);
181
182 BUG_ON(bd->done_cmds > bd->queued_cmds);
183
184 /*
185 * -ENOSPC or -ENODATA? I'm going for -ENODATA, meaning "I have no
186 * work to do", even though we return -ENOSPC after this same test
187 * during bsg_write() -- there, it means our buffer can't have more
188 * bsg_commands added to it, thus has no space left.
189 */
190 if (bd->done_cmds == bd->queued_cmds) {
191 ret = -ENODATA;
192 goto unlock;
193 }
194
195 if (!test_bit(BSG_F_BLOCK, &bd->flags)) {
196 ret = -EAGAIN;
197 goto unlock;
198 }
199
200 prepare_to_wait(&bd->wq_done, &wait, state);
201 spin_unlock_irq(&bd->lock);
202 io_schedule();
203 finish_wait(&bd->wq_done, &wait);
204
205 if ((state == TASK_INTERRUPTIBLE) && signal_pending(current))
206 ret = -ERESTARTSYS;
207
208 return ret;
209unlock:
210 spin_unlock_irq(&bd->lock);
211 return ret;
212}
213
214/*
215 * get a new free command, blocking if needed and specified
216 */
217static struct bsg_command *bsg_get_command(struct bsg_device *bd)
218{
219 struct bsg_command *bc;
220 int ret;
221
222 do {
223 bc = __bsg_alloc_command(bd);
224 if (bc)
225 break;
226
227 ret = bsg_io_schedule(bd, TASK_INTERRUPTIBLE);
228 if (ret) {
229 bc = ERR_PTR(ret);
230 break;
231 }
232
233 } while (1);
234
235 return bc;
236}
237
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100238static int blk_fill_sgv4_hdr_rq(request_queue_t *q, struct request *rq,
239 struct sg_io_v4 *hdr, int has_write_perm)
Jens Axboe3d6392c2007-07-09 12:38:05 +0200240{
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100241 memset(rq->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
242
243 if (copy_from_user(rq->cmd, (void *)(unsigned long)hdr->request,
244 hdr->request_len))
245 return -EFAULT;
246 if (blk_verify_command(rq->cmd, has_write_perm))
247 return -EPERM;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200248
249 /*
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100250 * fill in request structure
Jens Axboe3d6392c2007-07-09 12:38:05 +0200251 */
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100252 rq->cmd_len = hdr->request_len;
253 rq->cmd_type = REQ_TYPE_BLOCK_PC;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200254
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100255 rq->timeout = (hdr->timeout * HZ) / 1000;
256 if (!rq->timeout)
257 rq->timeout = q->sg_timeout;
258 if (!rq->timeout)
259 rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200260
261 return 0;
262}
263
264/*
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100265 * Check if sg_io_v4 from user is allowed and valid
266 */
267static int
268bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
269{
270 if (hdr->guard != 'Q')
271 return -EINVAL;
272 if (hdr->request_len > BLK_MAX_CDB)
273 return -EINVAL;
274 if (hdr->dout_xfer_len > (q->max_sectors << 9) ||
275 hdr->din_xfer_len > (q->max_sectors << 9))
276 return -EIO;
277
278 /* not supported currently */
279 if (hdr->protocol || hdr->subprotocol)
280 return -EINVAL;
281
282 /*
283 * looks sane, if no data then it should be fine from our POV
284 */
285 if (!hdr->dout_xfer_len && !hdr->din_xfer_len)
286 return 0;
287
288 /* not supported currently */
289 if (hdr->dout_xfer_len && hdr->din_xfer_len)
290 return -EINVAL;
291
292 *rw = hdr->dout_xfer_len ? WRITE : READ;
293
294 return 0;
295}
296
297/*
298 * map sg_io_v4 to a request.
Jens Axboe3d6392c2007-07-09 12:38:05 +0200299 */
300static struct request *
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100301bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
Jens Axboe3d6392c2007-07-09 12:38:05 +0200302{
303 request_queue_t *q = bd->queue;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200304 struct request *rq;
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100305 int ret, rw;
306 unsigned int dxfer_len;
307 void *dxferp = NULL;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200308
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100309 dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
310 hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
311 hdr->din_xfer_len);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200312
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100313 ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200314 if (ret)
315 return ERR_PTR(ret);
316
317 /*
318 * map scatter-gather elements seperately and string them to request
319 */
320 rq = blk_get_request(q, rw, GFP_KERNEL);
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100321 ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
322 &bd->flags));
Jens Axboe3d6392c2007-07-09 12:38:05 +0200323 if (ret) {
324 blk_put_request(rq);
325 return ERR_PTR(ret);
326 }
327
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100328 if (hdr->dout_xfer_len) {
329 dxfer_len = hdr->dout_xfer_len;
330 dxferp = (void*)(unsigned long)hdr->dout_xferp;
331 } else if (hdr->din_xfer_len) {
332 dxfer_len = hdr->din_xfer_len;
333 dxferp = (void*)(unsigned long)hdr->din_xferp;
334 } else
335 dxfer_len = 0;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200336
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100337 if (dxfer_len) {
338 ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
339 if (ret) {
340 dprintk("failed map at %d\n", ret);
341 blk_put_request(rq);
342 rq = ERR_PTR(ret);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200343 }
Jens Axboe3d6392c2007-07-09 12:38:05 +0200344 }
345
346 return rq;
347}
348
349/*
350 * async completion call-back from the block layer, when scsi/ide/whatever
351 * calls end_that_request_last() on a request
352 */
353static void bsg_rq_end_io(struct request *rq, int uptodate)
354{
355 struct bsg_command *bc = rq->end_io_data;
356 struct bsg_device *bd = bc->bd;
357 unsigned long flags;
358
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100359 dprintk("%s: finished rq %p bc %p, bio %p offset %Zd stat %d\n",
FUJITA Tomonori9e69fbb2006-12-20 11:18:22 +0100360 bd->name, rq, bc, bc->bio, bc - bd->cmd_map, uptodate);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200361
362 bc->hdr.duration = jiffies_to_msecs(jiffies - bc->hdr.duration);
363
364 spin_lock_irqsave(&bd->lock, flags);
365 list_del(&bc->list);
366 bsg_add_done_cmd(bd, bc);
367 spin_unlock_irqrestore(&bd->lock, flags);
368}
369
370/*
371 * do final setup of a 'bc' and submit the matching 'rq' to the block
372 * layer for io
373 */
374static void bsg_add_command(struct bsg_device *bd, request_queue_t *q,
375 struct bsg_command *bc, struct request *rq)
376{
377 rq->sense = bc->sense;
378 rq->sense_len = 0;
379
380 /*
381 * add bc command to busy queue and submit rq for io
382 */
383 bc->rq = rq;
384 bc->bio = rq->bio;
385 bc->hdr.duration = jiffies;
386 spin_lock_irq(&bd->lock);
387 list_add_tail(&bc->list, &bd->busy_list);
388 spin_unlock_irq(&bd->lock);
389
390 dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
391
392 rq->end_io_data = bc;
393 blk_execute_rq_nowait(q, bd->disk, rq, 1, bsg_rq_end_io);
394}
395
396static inline struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
397{
398 struct bsg_command *bc = NULL;
399
400 spin_lock_irq(&bd->lock);
401 if (bd->done_cmds) {
402 bc = list_entry_bc(bd->done_list.next);
403 bsg_del_done_cmd(bd, bc);
404 }
405 spin_unlock_irq(&bd->lock);
406
407 return bc;
408}
409
410/*
411 * Get a finished command from the done list
412 */
413static struct bsg_command *__bsg_get_done_cmd(struct bsg_device *bd, int state)
414{
415 struct bsg_command *bc;
416 int ret;
417
418 do {
419 bc = bsg_next_done_cmd(bd);
420 if (bc)
421 break;
422
423 ret = bsg_io_schedule(bd, state);
424 if (ret) {
425 bc = ERR_PTR(ret);
426 break;
427 }
428 } while (1);
429
430 dprintk("%s: returning done %p\n", bd->name, bc);
431
432 return bc;
433}
434
435static struct bsg_command *
436bsg_get_done_cmd(struct bsg_device *bd, const struct iovec *iov)
437{
438 return __bsg_get_done_cmd(bd, TASK_INTERRUPTIBLE);
439}
440
441static struct bsg_command *
442bsg_get_done_cmd_nosignals(struct bsg_device *bd)
443{
444 return __bsg_get_done_cmd(bd, TASK_UNINTERRUPTIBLE);
445}
446
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100447static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
448 struct bio *bio)
449{
450 int ret = 0;
451
452 dprintk("rq %p bio %p %u\n", rq, bio, rq->errors);
453 /*
454 * fill in all the output members
455 */
456 hdr->device_status = status_byte(rq->errors);
457 hdr->transport_status = host_byte(rq->errors);
458 hdr->driver_status = driver_byte(rq->errors);
459 hdr->info = 0;
460 if (hdr->device_status || hdr->transport_status || hdr->driver_status)
461 hdr->info |= SG_INFO_CHECK;
462 hdr->din_resid = rq->data_len;
463 hdr->response_len = 0;
464
465 if (rq->sense_len && hdr->response) {
466 int len = min((unsigned int) hdr->max_response_len,
467 rq->sense_len);
468
469 ret = copy_to_user((void*)(unsigned long)hdr->response,
470 rq->sense, len);
471 if (!ret)
472 hdr->response_len = len;
473 else
474 ret = -EFAULT;
475 }
476
477 blk_rq_unmap_user(bio);
478 blk_put_request(rq);
479
480 return ret;
481}
482
Jens Axboe3d6392c2007-07-09 12:38:05 +0200483static int bsg_complete_all_commands(struct bsg_device *bd)
484{
485 struct bsg_command *bc;
486 int ret, tret;
487
488 dprintk("%s: entered\n", bd->name);
489
490 set_bit(BSG_F_BLOCK, &bd->flags);
491
492 /*
493 * wait for all commands to complete
494 */
495 ret = 0;
496 do {
497 ret = bsg_io_schedule(bd, TASK_UNINTERRUPTIBLE);
498 /*
499 * look for -ENODATA specifically -- we'll sometimes get
500 * -ERESTARTSYS when we've taken a signal, but we can't
501 * return until we're done freeing the queue, so ignore
502 * it. The signal will get handled when we're done freeing
503 * the bsg_device.
504 */
505 } while (ret != -ENODATA);
506
507 /*
508 * discard done commands
509 */
510 ret = 0;
511 do {
512 bc = bsg_get_done_cmd_nosignals(bd);
513
514 /*
515 * we _must_ complete before restarting, because
516 * bsg_release can't handle this failing.
517 */
518 if (PTR_ERR(bc) == -ERESTARTSYS)
519 continue;
520 if (IS_ERR(bc)) {
521 ret = PTR_ERR(bc);
522 break;
523 }
524
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100525 tret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200526 if (!ret)
527 ret = tret;
528
529 bsg_free_command(bc);
530 } while (1);
531
532 return ret;
533}
534
535typedef struct bsg_command *(*bsg_command_callback)(struct bsg_device *bd, const struct iovec *iov);
536
537static ssize_t
538__bsg_read(char __user *buf, size_t count, bsg_command_callback get_bc,
539 struct bsg_device *bd, const struct iovec *iov, ssize_t *bytes_read)
540{
541 struct bsg_command *bc;
542 int nr_commands, ret;
543
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100544 if (count % sizeof(struct sg_io_v4))
Jens Axboe3d6392c2007-07-09 12:38:05 +0200545 return -EINVAL;
546
547 ret = 0;
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100548 nr_commands = count / sizeof(struct sg_io_v4);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200549 while (nr_commands) {
550 bc = get_bc(bd, iov);
551 if (IS_ERR(bc)) {
552 ret = PTR_ERR(bc);
553 break;
554 }
555
556 /*
557 * this is the only case where we need to copy data back
558 * after completing the request. so do that here,
559 * bsg_complete_work() cannot do that for us
560 */
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100561 ret = blk_complete_sgv4_hdr_rq(bc->rq, &bc->hdr, bc->bio);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200562
563 if (copy_to_user(buf, (char *) &bc->hdr, sizeof(bc->hdr)))
564 ret = -EFAULT;
565
566 bsg_free_command(bc);
567
568 if (ret)
569 break;
570
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100571 buf += sizeof(struct sg_io_v4);
572 *bytes_read += sizeof(struct sg_io_v4);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200573 nr_commands--;
574 }
575
576 return ret;
577}
578
579static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
580{
581 if (file->f_flags & O_NONBLOCK)
582 clear_bit(BSG_F_BLOCK, &bd->flags);
583 else
584 set_bit(BSG_F_BLOCK, &bd->flags);
585}
586
587static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
588{
589 if (file->f_mode & FMODE_WRITE)
590 set_bit(BSG_F_WRITE_PERM, &bd->flags);
591 else
592 clear_bit(BSG_F_WRITE_PERM, &bd->flags);
593}
594
595static inline int err_block_err(int ret)
596{
597 if (ret && ret != -ENOSPC && ret != -ENODATA && ret != -EAGAIN)
598 return 1;
599
600 return 0;
601}
602
603static ssize_t
604bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
605{
606 struct bsg_device *bd = file->private_data;
607 int ret;
608 ssize_t bytes_read;
609
FUJITA Tomonori9e69fbb2006-12-20 11:18:22 +0100610 dprintk("%s: read %Zd bytes\n", bd->name, count);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200611
612 bsg_set_block(bd, file);
613 bytes_read = 0;
614 ret = __bsg_read(buf, count, bsg_get_done_cmd,
615 bd, NULL, &bytes_read);
616 *ppos = bytes_read;
617
618 if (!bytes_read || (bytes_read && err_block_err(ret)))
619 bytes_read = ret;
620
621 return bytes_read;
622}
623
624static ssize_t __bsg_write(struct bsg_device *bd, const char __user *buf,
625 size_t count, ssize_t *bytes_read)
626{
627 struct bsg_command *bc;
628 struct request *rq;
629 int ret, nr_commands;
630
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100631 if (count % sizeof(struct sg_io_v4))
Jens Axboe3d6392c2007-07-09 12:38:05 +0200632 return -EINVAL;
633
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100634 nr_commands = count / sizeof(struct sg_io_v4);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200635 rq = NULL;
636 bc = NULL;
637 ret = 0;
638 while (nr_commands) {
639 request_queue_t *q = bd->queue;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200640
641 bc = bsg_get_command(bd);
642 if (!bc)
643 break;
644 if (IS_ERR(bc)) {
645 ret = PTR_ERR(bc);
646 bc = NULL;
647 break;
648 }
649
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100650 bc->uhdr = (struct sg_io_v4 __user *) buf;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200651 if (copy_from_user(&bc->hdr, buf, sizeof(bc->hdr))) {
652 ret = -EFAULT;
653 break;
654 }
655
656 /*
657 * get a request, fill in the blanks, and add to request queue
658 */
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100659 rq = bsg_map_hdr(bd, &bc->hdr);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200660 if (IS_ERR(rq)) {
661 ret = PTR_ERR(rq);
662 rq = NULL;
663 break;
664 }
665
666 bsg_add_command(bd, q, bc, rq);
667 bc = NULL;
668 rq = NULL;
669 nr_commands--;
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100670 buf += sizeof(struct sg_io_v4);
671 *bytes_read += sizeof(struct sg_io_v4);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200672 }
673
Jens Axboe3d6392c2007-07-09 12:38:05 +0200674 if (bc)
675 bsg_free_command(bc);
676
677 return ret;
678}
679
680static ssize_t
681bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
682{
683 struct bsg_device *bd = file->private_data;
684 ssize_t bytes_read;
685 int ret;
686
FUJITA Tomonori9e69fbb2006-12-20 11:18:22 +0100687 dprintk("%s: write %Zd bytes\n", bd->name, count);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200688
689 bsg_set_block(bd, file);
690 bsg_set_write_perm(bd, file);
691
692 bytes_read = 0;
693 ret = __bsg_write(bd, buf, count, &bytes_read);
694 *ppos = bytes_read;
695
696 /*
697 * return bytes written on non-fatal errors
698 */
699 if (!bytes_read || (bytes_read && err_block_err(ret)))
700 bytes_read = ret;
701
FUJITA Tomonori9e69fbb2006-12-20 11:18:22 +0100702 dprintk("%s: returning %Zd\n", bd->name, bytes_read);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200703 return bytes_read;
704}
705
706static void bsg_free_device(struct bsg_device *bd)
707{
708 if (bd->cmd_map)
709 free_pages((unsigned long) bd->cmd_map, BSG_CMDS_PAGE_ORDER);
710
711 kfree(bd->cmd_bitmap);
712 kfree(bd);
713}
714
715static struct bsg_device *bsg_alloc_device(void)
716{
717 struct bsg_command *cmd_map;
718 unsigned long *cmd_bitmap;
719 struct bsg_device *bd;
720 int bits;
721
722 bd = kzalloc(sizeof(struct bsg_device), GFP_KERNEL);
723 if (unlikely(!bd))
724 return NULL;
725
726 spin_lock_init(&bd->lock);
727
728 bd->max_queue = BSG_CMDS;
729
730 bits = (BSG_CMDS / BSG_CMDS_PER_LONG) + 1;
731 cmd_bitmap = kzalloc(bits * sizeof(unsigned long), GFP_KERNEL);
732 if (!cmd_bitmap)
733 goto out_free_bd;
734 bd->cmd_bitmap = cmd_bitmap;
735
736 cmd_map = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
737 BSG_CMDS_PAGE_ORDER);
738 if (!cmd_map)
739 goto out_free_bitmap;
740 bd->cmd_map = cmd_map;
741
742 INIT_LIST_HEAD(&bd->busy_list);
743 INIT_LIST_HEAD(&bd->done_list);
744 INIT_HLIST_NODE(&bd->dev_list);
745
746 init_waitqueue_head(&bd->wq_free);
747 init_waitqueue_head(&bd->wq_done);
748 return bd;
749
750out_free_bitmap:
751 kfree(cmd_bitmap);
752out_free_bd:
753 kfree(bd);
754 return NULL;
755}
756
757static int bsg_put_device(struct bsg_device *bd)
758{
759 int ret = 0;
760
761 mutex_lock(&bsg_mutex);
762
763 if (!atomic_dec_and_test(&bd->ref_count))
764 goto out;
765
766 dprintk("%s: tearing down\n", bd->name);
767
768 /*
769 * close can always block
770 */
771 set_bit(BSG_F_BLOCK, &bd->flags);
772
773 /*
774 * correct error detection baddies here again. it's the responsibility
775 * of the app to properly reap commands before close() if it wants
776 * fool-proof error detection
777 */
778 ret = bsg_complete_all_commands(bd);
779
780 blk_put_queue(bd->queue);
781 hlist_del(&bd->dev_list);
782 bsg_free_device(bd);
783out:
784 mutex_unlock(&bsg_mutex);
785 return ret;
786}
787
788static struct bsg_device *bsg_add_device(struct inode *inode,
789 struct gendisk *disk,
790 struct file *file)
791{
792 struct bsg_device *bd = NULL;
793#ifdef BSG_DEBUG
794 unsigned char buf[32];
795#endif
796
797 bd = bsg_alloc_device();
798 if (!bd)
799 return ERR_PTR(-ENOMEM);
800
801 bd->disk = disk;
802 bd->queue = disk->queue;
803 kobject_get(&disk->queue->kobj);
804 bsg_set_block(bd, file);
805
806 atomic_set(&bd->ref_count, 1);
807 bd->minor = iminor(inode);
808 mutex_lock(&bsg_mutex);
809 hlist_add_head(&bd->dev_list,&bsg_device_list[bsg_list_idx(bd->minor)]);
810
811 strncpy(bd->name, disk->disk_name, sizeof(bd->name) - 1);
812 dprintk("bound to <%s>, max queue %d\n",
FUJITA Tomonori9e69fbb2006-12-20 11:18:22 +0100813 format_dev_t(buf, inode->i_rdev), bd->max_queue);
Jens Axboe3d6392c2007-07-09 12:38:05 +0200814
815 mutex_unlock(&bsg_mutex);
816 return bd;
817}
818
819static struct bsg_device *__bsg_get_device(int minor)
820{
821 struct hlist_head *list = &bsg_device_list[bsg_list_idx(minor)];
822 struct bsg_device *bd = NULL;
823 struct hlist_node *entry;
824
825 mutex_lock(&bsg_mutex);
826
827 hlist_for_each(entry, list) {
828 bd = hlist_entry(entry, struct bsg_device, dev_list);
829 if (bd->minor == minor) {
830 atomic_inc(&bd->ref_count);
831 break;
832 }
833
834 bd = NULL;
835 }
836
837 mutex_unlock(&bsg_mutex);
838 return bd;
839}
840
841static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
842{
843 struct bsg_device *bd = __bsg_get_device(iminor(inode));
844 struct bsg_class_device *bcd, *__bcd;
845
846 if (bd)
847 return bd;
848
849 /*
850 * find the class device
851 */
852 bcd = NULL;
853 mutex_lock(&bsg_mutex);
854 list_for_each_entry(__bcd, &bsg_class_list, list) {
855 if (__bcd->minor == iminor(inode)) {
856 bcd = __bcd;
857 break;
858 }
859 }
860 mutex_unlock(&bsg_mutex);
861
862 if (!bcd)
863 return ERR_PTR(-ENODEV);
864
865 return bsg_add_device(inode, bcd->disk, file);
866}
867
868static int bsg_open(struct inode *inode, struct file *file)
869{
870 struct bsg_device *bd = bsg_get_device(inode, file);
871
872 if (IS_ERR(bd))
873 return PTR_ERR(bd);
874
875 file->private_data = bd;
876 return 0;
877}
878
879static int bsg_release(struct inode *inode, struct file *file)
880{
881 struct bsg_device *bd = file->private_data;
882
883 file->private_data = NULL;
884 return bsg_put_device(bd);
885}
886
887static unsigned int bsg_poll(struct file *file, poll_table *wait)
888{
889 struct bsg_device *bd = file->private_data;
890 unsigned int mask = 0;
891
892 poll_wait(file, &bd->wq_done, wait);
893 poll_wait(file, &bd->wq_free, wait);
894
895 spin_lock_irq(&bd->lock);
896 if (!list_empty(&bd->done_list))
897 mask |= POLLIN | POLLRDNORM;
898 if (bd->queued_cmds >= bd->max_queue)
899 mask |= POLLOUT;
900 spin_unlock_irq(&bd->lock);
901
902 return mask;
903}
904
905static int
906bsg_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
907 unsigned long arg)
908{
909 struct bsg_device *bd = file->private_data;
910 int __user *uarg = (int __user *) arg;
911
912 if (!bd)
913 return -ENXIO;
914
915 switch (cmd) {
916 /*
917 * our own ioctls
918 */
919 case SG_GET_COMMAND_Q:
920 return put_user(bd->max_queue, uarg);
921 case SG_SET_COMMAND_Q: {
922 int queue;
923
924 if (get_user(queue, uarg))
925 return -EFAULT;
926 if (queue > BSG_CMDS || queue < 1)
927 return -EINVAL;
928
929 bd->max_queue = queue;
930 return 0;
931 }
932
933 /*
934 * SCSI/sg ioctls
935 */
936 case SG_GET_VERSION_NUM:
937 case SCSI_IOCTL_GET_IDLUN:
938 case SCSI_IOCTL_GET_BUS_NUMBER:
939 case SG_SET_TIMEOUT:
940 case SG_GET_TIMEOUT:
941 case SG_GET_RESERVED_SIZE:
942 case SG_SET_RESERVED_SIZE:
943 case SG_EMULATED_HOST:
Jens Axboe3d6392c2007-07-09 12:38:05 +0200944 case SCSI_IOCTL_SEND_COMMAND: {
945 void __user *uarg = (void __user *) arg;
946 return scsi_cmd_ioctl(file, bd->disk, cmd, uarg);
947 }
FUJITA Tomonori70e36ec2006-12-20 11:20:15 +0100948 case SG_IO:
949 return -EINVAL;
Jens Axboe3d6392c2007-07-09 12:38:05 +0200950 /*
951 * block device ioctls
952 */
953 default:
954#if 0
955 return ioctl_by_bdev(bd->bdev, cmd, arg);
956#else
957 return -ENOTTY;
958#endif
959 }
960}
961
962static struct file_operations bsg_fops = {
963 .read = bsg_read,
964 .write = bsg_write,
965 .poll = bsg_poll,
966 .open = bsg_open,
967 .release = bsg_release,
968 .ioctl = bsg_ioctl,
969 .owner = THIS_MODULE,
970};
971
972void bsg_unregister_disk(struct gendisk *disk)
973{
974 struct bsg_class_device *bcd = &disk->bsg_dev;
975
976 if (!bcd->class_dev)
977 return;
978
979 mutex_lock(&bsg_mutex);
980 sysfs_remove_link(&bcd->disk->queue->kobj, "bsg");
981 class_device_destroy(bsg_class, MKDEV(BSG_MAJOR, bcd->minor));
982 bcd->class_dev = NULL;
983 list_del_init(&bcd->list);
984 mutex_unlock(&bsg_mutex);
985}
986
987int bsg_register_disk(struct gendisk *disk)
988{
989 request_queue_t *q = disk->queue;
990 struct bsg_class_device *bcd;
991 dev_t dev;
992
993 /*
994 * we need a proper transport to send commands, not a stacked device
995 */
996 if (!q->request_fn)
997 return 0;
998
999 bcd = &disk->bsg_dev;
1000 memset(bcd, 0, sizeof(*bcd));
1001 INIT_LIST_HEAD(&bcd->list);
1002
1003 mutex_lock(&bsg_mutex);
1004 dev = MKDEV(BSG_MAJOR, bsg_device_nr);
1005 bcd->minor = bsg_device_nr;
1006 bsg_device_nr++;
1007 bcd->disk = disk;
1008 bcd->class_dev = class_device_create(bsg_class, NULL, dev, bcd->dev, "%s", disk->disk_name);
1009 list_add_tail(&bcd->list, &bsg_class_list);
1010 sysfs_create_link(&q->kobj, &bcd->class_dev->kobj, "bsg");
1011 mutex_unlock(&bsg_mutex);
1012 return 0;
1013}
1014
1015static int __init bsg_init(void)
1016{
1017 int ret, i;
1018
1019 for (i = 0; i < BSG_LIST_SIZE; i++)
1020 INIT_HLIST_HEAD(&bsg_device_list[i]);
1021
1022 bsg_class = class_create(THIS_MODULE, "bsg");
1023 if (IS_ERR(bsg_class))
1024 return PTR_ERR(bsg_class);
1025
1026 ret = register_chrdev(BSG_MAJOR, "bsg", &bsg_fops);
1027 if (ret) {
1028 class_destroy(bsg_class);
1029 return ret;
1030 }
1031
1032 printk(KERN_INFO "%s loaded\n", bsg_version);
1033 return 0;
1034}
1035
1036MODULE_AUTHOR("Jens Axboe");
1037MODULE_DESCRIPTION("Block layer SGSI generic (sg) driver");
1038MODULE_LICENSE("GPL");
1039
1040subsys_initcall(bsg_init);