blob: c7067674dcb77ca8d1a2e4ee51f032a2bfb9aac8 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2004 SUSE LINUX Products GmbH. All rights reserved.
3 * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 *
7 * Multipath support for EMC CLARiiON AX/CX-series hardware.
8 */
9
10#include "dm.h"
11#include "dm-hw-handler.h"
12#include <scsi/scsi.h>
13#include <scsi/scsi_cmnd.h>
14
15struct emc_handler {
16 spinlock_t lock;
17
18 /* Whether we should send the short trespass command (FC-series)
19 * or the long version (default for AX/CX CLARiiON arrays). */
20 unsigned short_trespass;
21 /* Whether or not to honor SCSI reservations when initiating a
22 * switch-over. Default: Don't. */
23 unsigned hr;
24
25 unsigned char sense[SCSI_SENSE_BUFFERSIZE];
26};
27
28#define TRESPASS_PAGE 0x22
29#define EMC_FAILOVER_TIMEOUT (60 * HZ)
30
31/* Code borrowed from dm-lsi-rdac by Mike Christie */
32
33static inline void free_bio(struct bio *bio)
34{
35 __free_page(bio->bi_io_vec[0].bv_page);
36 bio_put(bio);
37}
38
39static int emc_endio(struct bio *bio, unsigned int bytes_done, int error)
40{
41 struct path *path = bio->bi_private;
42
43 if (bio->bi_size)
44 return 1;
45
46 /* We also need to look at the sense keys here whether or not to
47 * switch to the next PG etc.
48 *
49 * For now simple logic: either it works or it doesn't.
50 */
51 if (error)
52 dm_pg_init_complete(path, MP_FAIL_PATH);
53 else
54 dm_pg_init_complete(path, 0);
55
56 /* request is freed in block layer */
57 free_bio(bio);
58
59 return 0;
60}
61
62static struct bio *get_failover_bio(struct path *path, unsigned data_size)
63{
64 struct bio *bio;
65 struct page *page;
66
67 bio = bio_alloc(GFP_ATOMIC, 1);
68 if (!bio) {
69 DMERR("dm-emc: get_failover_bio: bio_alloc() failed.");
70 return NULL;
71 }
72
73 bio->bi_rw |= (1 << BIO_RW);
74 bio->bi_bdev = path->dev->bdev;
75 bio->bi_sector = 0;
76 bio->bi_private = path;
77 bio->bi_end_io = emc_endio;
78
79 page = alloc_page(GFP_ATOMIC);
80 if (!page) {
81 DMERR("dm-emc: get_failover_bio: alloc_page() failed.");
82 bio_put(bio);
83 return NULL;
84 }
85
86 if (bio_add_page(bio, page, data_size, 0) != data_size) {
87 DMERR("dm-emc: get_failover_bio: alloc_page() failed.");
88 __free_page(page);
89 bio_put(bio);
90 return NULL;
91 }
92
93 return bio;
94}
95
96static struct request *get_failover_req(struct emc_handler *h,
97 struct bio *bio, struct path *path)
98{
99 struct request *rq;
100 struct block_device *bdev = bio->bi_bdev;
101 struct request_queue *q = bdev_get_queue(bdev);
102
103 /* FIXME: Figure out why it fails with GFP_ATOMIC. */
104 rq = blk_get_request(q, WRITE, __GFP_WAIT);
105 if (!rq) {
106 DMERR("dm-emc: get_failover_req: blk_get_request failed");
107 return NULL;
108 }
109
110 rq->bio = rq->biotail = bio;
111 blk_rq_bio_prep(q, rq, bio);
112
113 rq->rq_disk = bdev->bd_contains->bd_disk;
114
115 /* bio backed don't set data */
116 rq->buffer = rq->data = NULL;
117 /* rq data_len used for pc cmd's request_bufflen */
118 rq->data_len = bio->bi_size;
119
120 rq->sense = h->sense;
121 memset(rq->sense, 0, SCSI_SENSE_BUFFERSIZE);
122 rq->sense_len = 0;
123
124 memset(&rq->cmd, 0, BLK_MAX_CDB);
125
126 rq->timeout = EMC_FAILOVER_TIMEOUT;
127 rq->flags |= (REQ_BLOCK_PC | REQ_FAILFAST | REQ_NOMERGE);
128
129 return rq;
130}
131
132static struct request *emc_trespass_get(struct emc_handler *h,
133 struct path *path)
134{
135 struct bio *bio;
136 struct request *rq;
137 unsigned char *page22;
138 unsigned char long_trespass_pg[] = {
139 0, 0, 0, 0,
140 TRESPASS_PAGE, /* Page code */
141 0x09, /* Page length - 2 */
142 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
143 0xff, 0xff, /* Trespass target */
144 0, 0, 0, 0, 0, 0 /* Reserved bytes / unknown */
145 };
146 unsigned char short_trespass_pg[] = {
147 0, 0, 0, 0,
148 TRESPASS_PAGE, /* Page code */
149 0x02, /* Page length - 2 */
150 h->hr ? 0x01 : 0x81, /* Trespass code + Honor reservation bit */
151 0xff, /* Trespass target */
152 };
153 unsigned data_size = h->short_trespass ? sizeof(short_trespass_pg) :
154 sizeof(long_trespass_pg);
155
156 /* get bio backing */
157 if (data_size > PAGE_SIZE)
158 /* this should never happen */
159 return NULL;
160
161 bio = get_failover_bio(path, data_size);
162 if (!bio) {
163 DMERR("dm-emc: emc_trespass_get: no bio");
164 return NULL;
165 }
166
167 page22 = (unsigned char *)bio_data(bio);
168 memset(page22, 0, data_size);
169
170 memcpy(page22, h->short_trespass ?
171 short_trespass_pg : long_trespass_pg, data_size);
172
173 /* get request for block layer packet command */
174 rq = get_failover_req(h, bio, path);
175 if (!rq) {
176 DMERR("dm-emc: emc_trespass_get: no rq");
177 free_bio(bio);
178 return NULL;
179 }
180
181 /* Prepare the command. */
182 rq->cmd[0] = MODE_SELECT;
183 rq->cmd[1] = 0x10;
184 rq->cmd[4] = data_size;
185 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
186
187 return rq;
188}
189
190static void emc_pg_init(struct hw_handler *hwh, unsigned bypassed,
191 struct path *path)
192{
193 struct request *rq;
194 struct request_queue *q = bdev_get_queue(path->dev->bdev);
195
196 /*
197 * We can either blindly init the pg (then look at the sense),
198 * or we can send some commands to get the state here (then
199 * possibly send the fo cmnd), or we can also have the
200 * initial state passed into us and then get an update here.
201 */
202 if (!q) {
203 DMINFO("dm-emc: emc_pg_init: no queue");
204 goto fail_path;
205 }
206
207 /* FIXME: The request should be pre-allocated. */
208 rq = emc_trespass_get(hwh->context, path);
209 if (!rq) {
210 DMERR("dm-emc: emc_pg_init: no rq");
211 goto fail_path;
212 }
213
214 DMINFO("dm-emc: emc_pg_init: sending switch-over command");
215 elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 1);
216 return;
217
218fail_path:
219 dm_pg_init_complete(path, MP_FAIL_PATH);
220}
221
222static struct emc_handler *alloc_emc_handler(void)
223{
224 struct emc_handler *h = kmalloc(sizeof(*h), GFP_KERNEL);
225
Alasdair G Kergonf1daa402005-05-05 16:16:08 -0700226 if (h) {
227 memset(h, 0, sizeof(*h));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228 spin_lock_init(&h->lock);
Alasdair G Kergonf1daa402005-05-05 16:16:08 -0700229 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230
231 return h;
232}
233
234static int emc_create(struct hw_handler *hwh, unsigned argc, char **argv)
235{
236 struct emc_handler *h;
237 unsigned hr, short_trespass;
238
239 if (argc == 0) {
240 /* No arguments: use defaults */
241 hr = 0;
242 short_trespass = 0;
243 } else if (argc != 2) {
244 DMWARN("dm-emc hwhandler: incorrect number of arguments");
245 return -EINVAL;
246 } else {
247 if ((sscanf(argv[0], "%u", &short_trespass) != 1)
248 || (short_trespass > 1)) {
249 DMWARN("dm-emc: invalid trespass mode selected");
250 return -EINVAL;
251 }
252
253 if ((sscanf(argv[1], "%u", &hr) != 1)
254 || (hr > 1)) {
255 DMWARN("dm-emc: invalid honor reservation flag selected");
256 return -EINVAL;
257 }
258 }
259
260 h = alloc_emc_handler();
261 if (!h)
262 return -ENOMEM;
263
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 hwh->context = h;
265
266 if ((h->short_trespass = short_trespass))
267 DMWARN("dm-emc: short trespass command will be send");
268 else
269 DMWARN("dm-emc: long trespass command will be send");
270
271 if ((h->hr = hr))
272 DMWARN("dm-emc: honor reservation bit will be set");
273 else
274 DMWARN("dm-emc: honor reservation bit will not be set (default)");
275
276 return 0;
277}
278
279static void emc_destroy(struct hw_handler *hwh)
280{
281 struct emc_handler *h = (struct emc_handler *) hwh->context;
282
283 kfree(h);
284 hwh->context = NULL;
285}
286
287static unsigned emc_error(struct hw_handler *hwh, struct bio *bio)
288{
289 /* FIXME: Patch from axboe still missing */
290#if 0
291 int sense;
292
293 if (bio->bi_error & BIO_SENSE) {
294 sense = bio->bi_error & 0xffffff; /* sense key / asc / ascq */
295
296 if (sense == 0x020403) {
297 /* LUN Not Ready - Manual Intervention Required
298 * indicates this is a passive path.
299 *
300 * FIXME: However, if this is seen and EVPD C0
301 * indicates that this is due to a NDU in
302 * progress, we should set FAIL_PATH too.
303 * This indicates we might have to do a SCSI
304 * inquiry in the end_io path. Ugh. */
305 return MP_BYPASS_PG | MP_RETRY_IO;
306 } else if (sense == 0x052501) {
307 /* An array based copy is in progress. Do not
308 * fail the path, do not bypass to another PG,
309 * do not retry. Fail the IO immediately.
310 * (Actually this is the same conclusion as in
311 * the default handler, but lets make sure.) */
312 return 0;
313 } else if (sense == 0x062900) {
314 /* Unit Attention Code. This is the first IO
315 * to the new path, so just retry. */
316 return MP_RETRY_IO;
317 }
318 }
319#endif
320
321 /* Try default handler */
322 return dm_scsi_err_handler(hwh, bio);
323}
324
325static struct hw_handler_type emc_hwh = {
326 .name = "emc",
327 .module = THIS_MODULE,
328 .create = emc_create,
329 .destroy = emc_destroy,
330 .pg_init = emc_pg_init,
331 .error = emc_error,
332};
333
334static int __init dm_emc_init(void)
335{
336 int r = dm_register_hw_handler(&emc_hwh);
337
338 if (r < 0)
339 DMERR("emc: register failed %d", r);
340
341 DMINFO("dm-emc version 0.0.3 loaded");
342
343 return r;
344}
345
346static void __exit dm_emc_exit(void)
347{
348 int r = dm_unregister_hw_handler(&emc_hwh);
349
350 if (r < 0)
351 DMERR("emc: unregister failed %d", r);
352}
353
354module_init(dm_emc_init);
355module_exit(dm_emc_exit);
356
357MODULE_DESCRIPTION(DM_NAME " EMC CX/AX/FC-family multipath");
358MODULE_AUTHOR("Lars Marowsky-Bree <lmb@suse.de>");
359MODULE_LICENSE("GPL");