blob: 2d10ff780d84380c54dbedf05672b63268e44d1e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
Mikulas Patocka586e80e2008-10-21 17:44:59 +01008#include <linux/device-mapper.h>
9
Mike Snitzer4cc96132016-05-12 16:28:10 -040010#include "dm-rq.h"
Mike Snitzer76e33fe2016-05-19 16:15:14 -040011#include "dm-bio-record.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070012#include "dm-path-selector.h"
Mike Andersonb15546f2007-10-19 22:48:02 +010013#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014
Mike Snitzere5863d92014-12-17 21:08:12 -050015#include <linux/blkdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <linux/ctype.h>
17#include <linux/init.h>
18#include <linux/mempool.h>
19#include <linux/module.h>
20#include <linux/pagemap.h>
21#include <linux/slab.h>
22#include <linux/time.h>
23#include <linux/workqueue.h>
Mikulas Patocka35991652012-06-03 00:29:58 +010024#include <linux/delay.h>
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070025#include <scsi/scsi_dh.h>
Arun Sharma600634972011-07-26 16:09:06 -070026#include <linux/atomic.h>
Mike Snitzer78ce23b2016-01-31 17:38:28 -050027#include <linux/blk-mq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028
Alasdair G Kergon72d94862006-06-26 00:27:35 -070029#define DM_MSG_PREFIX "multipath"
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000030#define DM_PG_INIT_DELAY_MSECS 2000
31#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
33/* Path properties */
34struct pgpath {
35 struct list_head list;
36
37 struct priority_group *pg; /* Owning PG */
38 unsigned fail_count; /* Cumulative failure count */
39
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -080040 struct dm_path path;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000041 struct delayed_work activate_path;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050042
43 bool is_active:1; /* Path status */
Linus Torvalds1da177e2005-04-16 15:20:36 -070044};
45
46#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
47
48/*
49 * Paths are grouped into Priority Groups and numbered from 1 upwards.
50 * Each has a path selector which controls which path gets used.
51 */
52struct priority_group {
53 struct list_head list;
54
55 struct multipath *m; /* Owning multipath instance */
56 struct path_selector ps;
57
58 unsigned pg_num; /* Reference number */
Linus Torvalds1da177e2005-04-16 15:20:36 -070059 unsigned nr_pgpaths; /* Number of paths in PG */
60 struct list_head pgpaths;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050061
62 bool bypassed:1; /* Temporarily bypass this PG? */
Linus Torvalds1da177e2005-04-16 15:20:36 -070063};
64
65/* Multipath context */
66struct multipath {
67 struct list_head list;
68 struct dm_target *ti;
69
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070070 const char *hw_handler_name;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -070071 char *hw_handler_params;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000072
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010073 spinlock_t lock;
74
Linus Torvalds1da177e2005-04-16 15:20:36 -070075 unsigned nr_priority_groups;
76 struct list_head priority_groups;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000077
78 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 struct pgpath *current_pgpath;
81 struct priority_group *current_pg;
82 struct priority_group *next_pg; /* Switch to this PG if set */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
Mike Snitzer518257b2016-03-17 16:32:10 -040084 unsigned long flags; /* Multipath state flags */
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010085
Dave Wysochanskic9e45582007-10-19 22:47:53 +010086 unsigned pg_init_retries; /* Number of times to retry pg_init */
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000087 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
Mike Snitzer91e968a2016-03-17 17:10:15 -040089 atomic_t nr_valid_paths; /* Total number of usable paths */
90 atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
91 atomic_t pg_init_count; /* Number of times pg_init called */
92
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 /*
Alasdair G Kergon028867a2007-07-12 17:26:32 +010094 * We must use a mempool of dm_mpath_io structs so that we
Linus Torvalds1da177e2005-04-16 15:20:36 -070095 * can resubmit bios on error.
96 */
97 mempool_t *mpio_pool;
Mike Anderson6380f262009-12-10 23:52:21 +000098
99 struct mutex work_mutex;
Mike Snitzer20800cb2016-03-17 17:13:10 -0400100 struct work_struct trigger_event;
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400101
102 struct work_struct process_queued_bios;
103 struct bio_list queued_bios;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104};
105
106/*
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400107 * Context information attached to each io we process.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100109struct dm_mpath_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110 struct pgpath *pgpath;
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100111 size_t nr_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112};
113
114typedef int (*action_fn) (struct pgpath *pgpath);
115
Christoph Lametere18b8902006-12-06 20:33:20 -0800116static struct kmem_cache *_mpio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700118static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
David Howellsc4028952006-11-22 14:57:56 +0000119static void trigger_event(struct work_struct *work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700120static void activate_path(struct work_struct *work);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400121static void process_queued_bios(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Mike Snitzer518257b2016-03-17 16:32:10 -0400123/*-----------------------------------------------
124 * Multipath state flags.
125 *-----------------------------------------------*/
126
127#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
128#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
129#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
130#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */
131#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
132#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
133#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400134#define MPATHF_BIO_BASED 7 /* Device is bio-based? */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136/*-----------------------------------------------
137 * Allocation routines
138 *-----------------------------------------------*/
139
140static struct pgpath *alloc_pgpath(void)
141{
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700142 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Mike Anderson224cb3e2008-08-29 09:36:09 +0200144 if (pgpath) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500145 pgpath->is_active = true;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000146 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
Mike Anderson224cb3e2008-08-29 09:36:09 +0200147 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 return pgpath;
150}
151
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100152static void free_pgpath(struct pgpath *pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153{
154 kfree(pgpath);
155}
156
157static struct priority_group *alloc_priority_group(void)
158{
159 struct priority_group *pg;
160
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700161 pg = kzalloc(sizeof(*pg), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700163 if (pg)
164 INIT_LIST_HEAD(&pg->pgpaths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165
166 return pg;
167}
168
169static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
170{
171 struct pgpath *pgpath, *tmp;
172
173 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
174 list_del(&pgpath->list);
175 dm_put_device(ti, pgpath->path.dev);
176 free_pgpath(pgpath);
177 }
178}
179
180static void free_priority_group(struct priority_group *pg,
181 struct dm_target *ti)
182{
183 struct path_selector *ps = &pg->ps;
184
185 if (ps->type) {
186 ps->type->destroy(ps);
187 dm_put_path_selector(ps->type);
188 }
189
190 free_pgpaths(&pg->pgpaths, ti);
191 kfree(pg);
192}
193
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400194static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq,
195 bool bio_based)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196{
197 struct multipath *m;
198
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700199 m = kzalloc(sizeof(*m), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 if (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 INIT_LIST_HEAD(&m->priority_groups);
202 spin_lock_init(&m->lock);
Mike Snitzer518257b2016-03-17 16:32:10 -0400203 set_bit(MPATHF_QUEUE_IO, &m->flags);
Mike Snitzer91e968a2016-03-17 17:10:15 -0400204 atomic_set(&m->nr_valid_paths, 0);
205 atomic_set(&m->pg_init_in_progress, 0);
206 atomic_set(&m->pg_init_count, 0);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000207 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
David Howellsc4028952006-11-22 14:57:56 +0000208 INIT_WORK(&m->trigger_event, trigger_event);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000209 init_waitqueue_head(&m->pg_init_wait);
Mike Anderson6380f262009-12-10 23:52:21 +0000210 mutex_init(&m->work_mutex);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500211
212 m->mpio_pool = NULL;
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400213 if (!use_blk_mq && !bio_based) {
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500214 unsigned min_ios = dm_get_reserved_rq_based_ios();
215
216 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
217 if (!m->mpio_pool) {
218 kfree(m);
219 return NULL;
220 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221 }
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500222
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400223 if (bio_based) {
224 INIT_WORK(&m->process_queued_bios, process_queued_bios);
225 set_bit(MPATHF_BIO_BASED, &m->flags);
226 /*
227 * bio-based doesn't support any direct scsi_dh management;
228 * it just discovers if a scsi_dh is attached.
229 */
230 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
231 }
232
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700233 m->ti = ti;
234 ti->private = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235 }
236
237 return m;
238}
239
240static void free_multipath(struct multipath *m)
241{
242 struct priority_group *pg, *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243
244 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
245 list_del(&pg->list);
246 free_priority_group(pg, m->ti);
247 }
248
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700249 kfree(m->hw_handler_name);
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700250 kfree(m->hw_handler_params);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 mempool_destroy(m->mpio_pool);
252 kfree(m);
253}
254
Mike Snitzer2eff1922016-02-03 09:13:14 -0500255static struct dm_mpath_io *get_mpio(union map_info *info)
256{
257 return info->ptr;
258}
259
260static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100261{
262 struct dm_mpath_io *mpio;
263
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500264 if (!m->mpio_pool) {
265 /* Use blk-mq pdu memory requested via per_io_data_size */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500266 mpio = get_mpio(info);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500267 memset(mpio, 0, sizeof(*mpio));
268 return mpio;
269 }
270
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100271 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
272 if (!mpio)
Mike Snitzer2eff1922016-02-03 09:13:14 -0500273 return NULL;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100274
275 memset(mpio, 0, sizeof(*mpio));
276 info->ptr = mpio;
277
Mike Snitzer2eff1922016-02-03 09:13:14 -0500278 return mpio;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100279}
280
Mike Snitzer2eff1922016-02-03 09:13:14 -0500281static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100282{
Mike Snitzer2eff1922016-02-03 09:13:14 -0500283 /* Only needed for non blk-mq (.request_fn) multipath */
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500284 if (m->mpio_pool) {
285 struct dm_mpath_io *mpio = info->ptr;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100286
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500287 info->ptr = NULL;
288 mempool_free(mpio, m->mpio_pool);
289 }
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100290}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
Mike Snitzerbf661be2016-05-24 15:48:08 -0400292static size_t multipath_per_bio_data_size(void)
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400293{
Mike Snitzerbf661be2016-05-24 15:48:08 -0400294 return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400295}
296
Mike Snitzerbf661be2016-05-24 15:48:08 -0400297static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
298{
299 return dm_per_bio_data(bio, multipath_per_bio_data_size());
300}
301
302static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
303{
304 /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
305 struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
306 void *bio_details = mpio + 1;
307
308 return bio_details;
309}
310
311static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
312 struct dm_bio_details **bio_details_p)
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400313{
314 struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
Mike Snitzerbf661be2016-05-24 15:48:08 -0400315 struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400316
317 memset(mpio, 0, sizeof(*mpio));
Mike Snitzerbf661be2016-05-24 15:48:08 -0400318 memset(bio_details, 0, sizeof(*bio_details));
319 dm_bio_record(bio_details, bio);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400320
Mike Snitzerbf661be2016-05-24 15:48:08 -0400321 if (mpio_p)
322 *mpio_p = mpio;
323 if (bio_details_p)
324 *bio_details_p = bio_details;
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400325}
326
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327/*-----------------------------------------------
328 * Path selection
329 *-----------------------------------------------*/
330
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100331static int __pg_init_all_paths(struct multipath *m)
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000332{
333 struct pgpath *pgpath;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000334 unsigned long pg_init_delay = 0;
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000335
Mike Snitzer91e968a2016-03-17 17:10:15 -0400336 if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100337 return 0;
Hannes Reinecke17f4ff42014-02-28 15:33:42 +0100338
Mike Snitzer91e968a2016-03-17 17:10:15 -0400339 atomic_inc(&m->pg_init_count);
Mike Snitzer518257b2016-03-17 16:32:10 -0400340 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100341
342 /* Check here to reset pg_init_required */
343 if (!m->current_pg)
344 return 0;
345
Mike Snitzer518257b2016-03-17 16:32:10 -0400346 if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000347 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
348 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000349 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
350 /* Skip failed paths */
351 if (!pgpath->is_active)
352 continue;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000353 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
354 pg_init_delay))
Mike Snitzer91e968a2016-03-17 17:10:15 -0400355 atomic_inc(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000356 }
Mike Snitzer91e968a2016-03-17 17:10:15 -0400357 return atomic_read(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000358}
359
Mike Snitzer2da16102016-03-17 18:38:17 -0400360static int pg_init_all_paths(struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361{
Mike Snitzer2da16102016-03-17 18:38:17 -0400362 int r;
363 unsigned long flags;
364
365 spin_lock_irqsave(&m->lock, flags);
366 r = __pg_init_all_paths(m);
367 spin_unlock_irqrestore(&m->lock, flags);
368
369 return r;
370}
371
372static void __switch_pg(struct multipath *m, struct priority_group *pg)
373{
374 m->current_pg = pg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375
376 /* Must we initialise the PG first, and queue I/O till it's ready? */
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700377 if (m->hw_handler_name) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400378 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
379 set_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 } else {
Mike Snitzer518257b2016-03-17 16:32:10 -0400381 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
382 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 }
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100384
Mike Snitzer91e968a2016-03-17 17:10:15 -0400385 atomic_set(&m->pg_init_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386}
387
Mike Snitzer2da16102016-03-17 18:38:17 -0400388static struct pgpath *choose_path_in_pg(struct multipath *m,
389 struct priority_group *pg,
390 size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
Mike Snitzer2da16102016-03-17 18:38:17 -0400392 unsigned long flags;
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -0800393 struct dm_path *path;
Mike Snitzer2da16102016-03-17 18:38:17 -0400394 struct pgpath *pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
Mike Snitzer90a43232016-02-17 21:29:17 -0500396 path = pg->ps.type->select_path(&pg->ps, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 if (!path)
Mike Snitzer2da16102016-03-17 18:38:17 -0400398 return ERR_PTR(-ENXIO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
Mike Snitzer2da16102016-03-17 18:38:17 -0400400 pgpath = path_to_pgpath(path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401
Mike Snitzer2da16102016-03-17 18:38:17 -0400402 if (unlikely(lockless_dereference(m->current_pg) != pg)) {
403 /* Only update current_pgpath if pg changed */
404 spin_lock_irqsave(&m->lock, flags);
405 m->current_pgpath = pgpath;
406 __switch_pg(m, pg);
407 spin_unlock_irqrestore(&m->lock, flags);
408 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409
Mike Snitzer2da16102016-03-17 18:38:17 -0400410 return pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411}
412
Mike Snitzer2da16102016-03-17 18:38:17 -0400413static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414{
Mike Snitzer2da16102016-03-17 18:38:17 -0400415 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 struct priority_group *pg;
Mike Snitzer2da16102016-03-17 18:38:17 -0400417 struct pgpath *pgpath;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500418 bool bypassed = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
Mike Snitzer91e968a2016-03-17 17:10:15 -0400420 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400421 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700422 goto failed;
Benjamin Marzinski1f271972014-08-13 13:53:42 -0500423 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
425 /* Were we instructed to switch PG? */
Mike Snitzer2da16102016-03-17 18:38:17 -0400426 if (lockless_dereference(m->next_pg)) {
427 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428 pg = m->next_pg;
Mike Snitzer2da16102016-03-17 18:38:17 -0400429 if (!pg) {
430 spin_unlock_irqrestore(&m->lock, flags);
431 goto check_current_pg;
432 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 m->next_pg = NULL;
Mike Snitzer2da16102016-03-17 18:38:17 -0400434 spin_unlock_irqrestore(&m->lock, flags);
435 pgpath = choose_path_in_pg(m, pg, nr_bytes);
436 if (!IS_ERR_OR_NULL(pgpath))
437 return pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 }
439
440 /* Don't change PG until it has no remaining paths */
Mike Snitzer2da16102016-03-17 18:38:17 -0400441check_current_pg:
442 pg = lockless_dereference(m->current_pg);
443 if (pg) {
444 pgpath = choose_path_in_pg(m, pg, nr_bytes);
445 if (!IS_ERR_OR_NULL(pgpath))
446 return pgpath;
447 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448
449 /*
450 * Loop through priority groups until we find a valid path.
451 * First time we skip PGs marked 'bypassed'.
Mike Christief220fd42012-06-03 00:29:45 +0100452 * Second time we only try the ones we skipped, but set
453 * pg_init_delay_retry so we do not hammer controllers.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 */
455 do {
456 list_for_each_entry(pg, &m->priority_groups, list) {
457 if (pg->bypassed == bypassed)
458 continue;
Mike Snitzer2da16102016-03-17 18:38:17 -0400459 pgpath = choose_path_in_pg(m, pg, nr_bytes);
460 if (!IS_ERR_OR_NULL(pgpath)) {
Mike Christief220fd42012-06-03 00:29:45 +0100461 if (!bypassed)
Mike Snitzer518257b2016-03-17 16:32:10 -0400462 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
Mike Snitzer2da16102016-03-17 18:38:17 -0400463 return pgpath;
Mike Christief220fd42012-06-03 00:29:45 +0100464 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700465 }
466 } while (bypassed--);
467
468failed:
Mike Snitzer2da16102016-03-17 18:38:17 -0400469 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470 m->current_pgpath = NULL;
471 m->current_pg = NULL;
Mike Snitzer2da16102016-03-17 18:38:17 -0400472 spin_unlock_irqrestore(&m->lock, flags);
473
474 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475}
476
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800477/*
478 * Check whether bios must be queued in the device-mapper core rather
479 * than here in the target.
480 *
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800481 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
482 * same value then we are not between multipath_presuspend()
483 * and multipath_resume() calls and we have no need to check
484 * for the DMF_NOFLUSH_SUSPENDING flag.
485 */
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400486static bool __must_push_back(struct multipath *m)
487{
488 return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
489 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
490 dm_noflush_suspending(m->ti));
491}
492
493static bool must_push_back_rq(struct multipath *m)
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800494{
Mike Snitzer518257b2016-03-17 16:32:10 -0400495 return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400496 __must_push_back(m));
497}
498
499static bool must_push_back_bio(struct multipath *m)
500{
501 return __must_push_back(m);
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800502}
503
Hannes Reinecke36fcffc2014-02-28 15:33:47 +0100504/*
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400505 * Map cloned requests (request-based multipath)
Hannes Reinecke36fcffc2014-02-28 15:33:47 +0100506 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500507static int __multipath_map(struct dm_target *ti, struct request *clone,
508 union map_info *map_context,
509 struct request *rq, struct request **__clone)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510{
Mike Snitzer7943bd62016-02-02 21:53:15 -0500511 struct multipath *m = ti->private;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100512 int r = DM_MAPIO_REQUEUE;
Mike Snitzere5863d92014-12-17 21:08:12 -0500513 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 struct pgpath *pgpath;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +0100515 struct block_device *bdev;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100516 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 /* Do we need to select a new pgpath? */
Mike Snitzer2da16102016-03-17 18:38:17 -0400519 pgpath = lockless_dereference(m->current_pgpath);
520 if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
521 pgpath = choose_pgpath(m, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100523 if (!pgpath) {
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400524 if (!must_push_back_rq(m))
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100525 r = -EIO; /* Failed */
Mike Snitzer2da16102016-03-17 18:38:17 -0400526 return r;
Mike Snitzer518257b2016-03-17 16:32:10 -0400527 } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
528 test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
Mike Snitzer2da16102016-03-17 18:38:17 -0400529 pg_init_all_paths(m);
530 return r;
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100531 }
Mike Snitzer6afbc012014-07-08 11:55:09 -0400532
Mike Snitzer2eff1922016-02-03 09:13:14 -0500533 mpio = set_mpio(m, map_context);
534 if (!mpio)
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100535 /* ENOMEM, requeue */
Mike Snitzer2da16102016-03-17 18:38:17 -0400536 return r;
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100537
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100538 mpio->pgpath = pgpath;
539 mpio->nr_bytes = nr_bytes;
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600540
541 bdev = pgpath->path.dev->bdev;
542
Mike Snitzere5863d92014-12-17 21:08:12 -0500543 if (clone) {
Mike Snitzerc5248f72016-02-20 14:02:49 -0500544 /*
545 * Old request-based interface: allocated clone is passed in.
546 * Used by: .request_fn stacked on .request_fn path(s).
547 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500548 clone->q = bdev_get_queue(bdev);
549 clone->rq_disk = bdev->bd_disk;
550 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
551 } else {
Mike Snitzereca7ee62016-02-20 13:45:38 -0500552 /*
553 * blk-mq request-based interface; used by both:
554 * .request_fn stacked on blk-mq path(s) and
555 * blk-mq stacked on blk-mq path(s).
556 */
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500557 *__clone = blk_mq_alloc_request(bdev_get_queue(bdev),
558 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400559 if (IS_ERR(*__clone)) {
Mike Snitzere5863d92014-12-17 21:08:12 -0500560 /* ENOMEM, requeue */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500561 clear_request_fn_mpio(m, map_context);
Mike Snitzere5863d92014-12-17 21:08:12 -0500562 return r;
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400563 }
Mike Snitzere5863d92014-12-17 21:08:12 -0500564 (*__clone)->bio = (*__clone)->biotail = NULL;
565 (*__clone)->rq_disk = bdev->bd_disk;
566 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
567 }
568
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100569 if (pgpath->pg->ps.type->start_io)
570 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
571 &pgpath->path,
572 nr_bytes);
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600573 return DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574}
575
Mike Snitzere5863d92014-12-17 21:08:12 -0500576static int multipath_map(struct dm_target *ti, struct request *clone,
577 union map_info *map_context)
578{
579 return __multipath_map(ti, clone, map_context, NULL, NULL);
580}
581
582static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
583 union map_info *map_context,
584 struct request **clone)
585{
586 return __multipath_map(ti, NULL, map_context, rq, clone);
587}
588
589static void multipath_release_clone(struct request *clone)
590{
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500591 blk_mq_free_request(clone);
Mike Snitzere5863d92014-12-17 21:08:12 -0500592}
593
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594/*
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400595 * Map cloned bios (bio-based multipath)
596 */
597static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
598{
599 size_t nr_bytes = bio->bi_iter.bi_size;
600 struct pgpath *pgpath;
601 unsigned long flags;
602 bool queue_io;
603
604 /* Do we need to select a new pgpath? */
605 pgpath = lockless_dereference(m->current_pgpath);
606 queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
607 if (!pgpath || !queue_io)
608 pgpath = choose_pgpath(m, nr_bytes);
609
610 if ((pgpath && queue_io) ||
611 (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
612 /* Queue for the daemon to resubmit */
613 spin_lock_irqsave(&m->lock, flags);
614 bio_list_add(&m->queued_bios, bio);
615 spin_unlock_irqrestore(&m->lock, flags);
616 /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
617 if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
618 pg_init_all_paths(m);
619 else if (!queue_io)
620 queue_work(kmultipathd, &m->process_queued_bios);
621 return DM_MAPIO_SUBMITTED;
622 }
623
624 if (!pgpath) {
625 if (!must_push_back_bio(m))
626 return -EIO;
627 return DM_MAPIO_REQUEUE;
628 }
629
630 mpio->pgpath = pgpath;
631 mpio->nr_bytes = nr_bytes;
632
633 bio->bi_error = 0;
634 bio->bi_bdev = pgpath->path.dev->bdev;
635 bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
636
637 if (pgpath->pg->ps.type->start_io)
638 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
639 &pgpath->path,
640 nr_bytes);
641 return DM_MAPIO_REMAPPED;
642}
643
644static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
645{
646 struct multipath *m = ti->private;
Mike Snitzerbf661be2016-05-24 15:48:08 -0400647 struct dm_mpath_io *mpio = NULL;
648
649 multipath_init_per_bio_data(bio, &mpio, NULL);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400650
651 return __multipath_map_bio(m, bio, mpio);
652}
653
654static void process_queued_bios_list(struct multipath *m)
655{
656 if (test_bit(MPATHF_BIO_BASED, &m->flags))
657 queue_work(kmultipathd, &m->process_queued_bios);
658}
659
660static void process_queued_bios(struct work_struct *work)
661{
662 int r;
663 unsigned long flags;
664 struct bio *bio;
665 struct bio_list bios;
666 struct blk_plug plug;
667 struct multipath *m =
668 container_of(work, struct multipath, process_queued_bios);
669
670 bio_list_init(&bios);
671
672 spin_lock_irqsave(&m->lock, flags);
673
674 if (bio_list_empty(&m->queued_bios)) {
675 spin_unlock_irqrestore(&m->lock, flags);
676 return;
677 }
678
679 bio_list_merge(&bios, &m->queued_bios);
680 bio_list_init(&m->queued_bios);
681
682 spin_unlock_irqrestore(&m->lock, flags);
683
684 blk_start_plug(&plug);
685 while ((bio = bio_list_pop(&bios))) {
686 r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
687 if (r < 0 || r == DM_MAPIO_REQUEUE) {
688 bio->bi_error = r;
689 bio_endio(bio);
690 } else if (r == DM_MAPIO_REMAPPED)
691 generic_make_request(bio);
692 }
693 blk_finish_plug(&plug);
694}
695
696/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 * If we run out of usable paths, should we queue I/O or error it?
698 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500699static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
700 bool save_old_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700701{
702 unsigned long flags;
703
704 spin_lock_irqsave(&m->lock, flags);
705
Mike Snitzer518257b2016-03-17 16:32:10 -0400706 if (save_old_value) {
707 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
708 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
709 else
710 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
711 } else {
712 if (queue_if_no_path)
713 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
714 else
715 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
716 }
717 if (queue_if_no_path)
718 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700719 else
Mike Snitzer518257b2016-03-17 16:32:10 -0400720 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
721
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 spin_unlock_irqrestore(&m->lock, flags);
723
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400724 if (!queue_if_no_path) {
Hannes Reinecke63d832c2014-05-26 14:45:39 +0200725 dm_table_run_md_queue_async(m->ti->table);
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400726 process_queued_bios_list(m);
727 }
Hannes Reinecke63d832c2014-05-26 14:45:39 +0200728
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 return 0;
730}
731
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732/*
733 * An event is triggered whenever a path is taken out of use.
734 * Includes path failure and PG bypass.
735 */
David Howellsc4028952006-11-22 14:57:56 +0000736static void trigger_event(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737{
David Howellsc4028952006-11-22 14:57:56 +0000738 struct multipath *m =
739 container_of(work, struct multipath, trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
741 dm_table_event(m->ti->table);
742}
743
744/*-----------------------------------------------------------------
745 * Constructor/argument parsing:
746 * <#multipath feature args> [<arg>]*
747 * <#hw_handler args> [hw_handler [<arg>]*]
748 * <#priority groups>
749 * <initial priority group>
750 * [<selector> <#selector args> [<arg>]*
751 * <#paths> <#per-path selector args>
752 * [<path> [<arg>]* ]+ ]+
753 *---------------------------------------------------------------*/
Mike Snitzer498f0102011-08-02 12:32:04 +0100754static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 struct dm_target *ti)
756{
757 int r;
758 struct path_selector_type *pst;
759 unsigned ps_argc;
760
Mike Snitzer498f0102011-08-02 12:32:04 +0100761 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700762 {0, 1024, "invalid number of path selector args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 };
764
Mike Snitzer498f0102011-08-02 12:32:04 +0100765 pst = dm_get_path_selector(dm_shift_arg(as));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 if (!pst) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700767 ti->error = "unknown path selector type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 return -EINVAL;
769 }
770
Mike Snitzer498f0102011-08-02 12:32:04 +0100771 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100772 if (r) {
773 dm_put_path_selector(pst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 return -EINVAL;
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100775 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776
777 r = pst->create(&pg->ps, ps_argc, as->argv);
778 if (r) {
779 dm_put_path_selector(pst);
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700780 ti->error = "path selector constructor failed";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 return r;
782 }
783
784 pg->ps.type = pst;
Mike Snitzer498f0102011-08-02 12:32:04 +0100785 dm_consume_args(as, ps_argc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786
787 return 0;
788}
789
Mike Snitzer498f0102011-08-02 12:32:04 +0100790static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791 struct dm_target *ti)
792{
793 int r;
794 struct pgpath *p;
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700795 struct multipath *m = ti->private;
Mike Snitzera58a9352012-07-27 15:08:04 +0100796 struct request_queue *q = NULL;
797 const char *attached_handler_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798
799 /* we need at least a path arg */
800 if (as->argc < 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700801 ti->error = "no device given";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100802 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 }
804
805 p = alloc_pgpath();
806 if (!p)
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100807 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Mike Snitzer498f0102011-08-02 12:32:04 +0100809 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +0000810 &p->path.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700812 ti->error = "error getting device";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 goto bad;
814 }
815
Mike Snitzer518257b2016-03-17 16:32:10 -0400816 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
Mike Snitzera58a9352012-07-27 15:08:04 +0100817 q = bdev_get_queue(p->path.dev->bdev);
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100818
Mike Snitzer518257b2016-03-17 16:32:10 -0400819 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200820retain:
Mike Snitzera58a9352012-07-27 15:08:04 +0100821 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
822 if (attached_handler_name) {
823 /*
824 * Reset hw_handler_name to match the attached handler
825 * and clear any hw_handler_params associated with the
826 * ignored handler.
827 *
828 * NB. This modifies the table line to show the actual
829 * handler instead of the original table passed in.
830 */
831 kfree(m->hw_handler_name);
832 m->hw_handler_name = attached_handler_name;
833
834 kfree(m->hw_handler_params);
835 m->hw_handler_params = NULL;
836 }
837 }
838
839 if (m->hw_handler_name) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100840 r = scsi_dh_attach(q, m->hw_handler_name);
841 if (r == -EBUSY) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200842 char b[BDEVNAME_SIZE];
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100843
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200844 printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
845 bdevname(p->path.dev->bdev, b));
846 goto retain;
847 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700848 if (r < 0) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100849 ti->error = "error attaching hardware handler";
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700850 dm_put_device(ti, p->path.dev);
851 goto bad;
852 }
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700853
854 if (m->hw_handler_params) {
855 r = scsi_dh_set_params(q, m->hw_handler_params);
856 if (r < 0) {
857 ti->error = "unable to set hardware "
858 "handler parameters";
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700859 dm_put_device(ti, p->path.dev);
860 goto bad;
861 }
862 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700863 }
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
866 if (r) {
867 dm_put_device(ti, p->path.dev);
868 goto bad;
869 }
870
871 return p;
872
873 bad:
874 free_pgpath(p);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100875 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876}
877
Mike Snitzer498f0102011-08-02 12:32:04 +0100878static struct priority_group *parse_priority_group(struct dm_arg_set *as,
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700879 struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880{
Mike Snitzer498f0102011-08-02 12:32:04 +0100881 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700882 {1, 1024, "invalid number of paths"},
883 {0, 1024, "invalid number of selector args"}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 };
885
886 int r;
Mike Snitzer498f0102011-08-02 12:32:04 +0100887 unsigned i, nr_selector_args, nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 struct priority_group *pg;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700889 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891 if (as->argc < 2) {
892 as->argc = 0;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100893 ti->error = "not enough priority group arguments";
894 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 }
896
897 pg = alloc_priority_group();
898 if (!pg) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700899 ti->error = "couldn't allocate priority group";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100900 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 }
902 pg->m = m;
903
904 r = parse_path_selector(as, pg, ti);
905 if (r)
906 goto bad;
907
908 /*
909 * read the paths
910 */
Mike Snitzer498f0102011-08-02 12:32:04 +0100911 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 if (r)
913 goto bad;
914
Mike Snitzer498f0102011-08-02 12:32:04 +0100915 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 if (r)
917 goto bad;
918
Mike Snitzer498f0102011-08-02 12:32:04 +0100919 nr_args = 1 + nr_selector_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 for (i = 0; i < pg->nr_pgpaths; i++) {
921 struct pgpath *pgpath;
Mike Snitzer498f0102011-08-02 12:32:04 +0100922 struct dm_arg_set path_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
Mike Snitzer498f0102011-08-02 12:32:04 +0100924 if (as->argc < nr_args) {
Mikulas Patocka148acff2008-07-21 12:00:30 +0100925 ti->error = "not enough path parameters";
Alasdair G Kergon6bbf79a2010-08-12 04:13:49 +0100926 r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 goto bad;
Mikulas Patocka148acff2008-07-21 12:00:30 +0100928 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929
Mike Snitzer498f0102011-08-02 12:32:04 +0100930 path_args.argc = nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700931 path_args.argv = as->argv;
932
933 pgpath = parse_path(&path_args, &pg->ps, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100934 if (IS_ERR(pgpath)) {
935 r = PTR_ERR(pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 goto bad;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100937 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938
939 pgpath->pg = pg;
940 list_add_tail(&pgpath->list, &pg->pgpaths);
Mike Snitzer498f0102011-08-02 12:32:04 +0100941 dm_consume_args(as, nr_args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 }
943
944 return pg;
945
946 bad:
947 free_priority_group(pg, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100948 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949}
950
Mike Snitzer498f0102011-08-02 12:32:04 +0100951static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 unsigned hw_argc;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700954 int ret;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700955 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
Mike Snitzer498f0102011-08-02 12:32:04 +0100957 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700958 {0, 1024, "invalid number of hardware handler args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 };
960
Mike Snitzer498f0102011-08-02 12:32:04 +0100961 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 return -EINVAL;
963
964 if (!hw_argc)
965 return 0;
966
Mike Snitzer76e33fe2016-05-19 16:15:14 -0400967 if (test_bit(MPATHF_BIO_BASED, &m->flags)) {
968 dm_consume_args(as, hw_argc);
969 DMERR("bio-based multipath doesn't allow hardware handler args");
970 return 0;
971 }
972
Mike Snitzer498f0102011-08-02 12:32:04 +0100973 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
Chandra Seetharaman14e98c52008-11-13 23:39:06 +0000974
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700975 if (hw_argc > 1) {
976 char *p;
977 int i, j, len = 4;
978
979 for (i = 0; i <= hw_argc - 2; i++)
980 len += strlen(as->argv[i]) + 1;
981 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
982 if (!p) {
983 ti->error = "memory allocation failed";
984 ret = -ENOMEM;
985 goto fail;
986 }
987 j = sprintf(p, "%d", hw_argc - 1);
988 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
989 j = sprintf(p, "%s", as->argv[i]);
990 }
Mike Snitzer498f0102011-08-02 12:32:04 +0100991 dm_consume_args(as, hw_argc - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
993 return 0;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700994fail:
995 kfree(m->hw_handler_name);
996 m->hw_handler_name = NULL;
997 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998}
999
Mike Snitzer498f0102011-08-02 12:32:04 +01001000static int parse_features(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001{
1002 int r;
1003 unsigned argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -07001004 struct dm_target *ti = m->ti;
Mike Snitzer498f0102011-08-02 12:32:04 +01001005 const char *arg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
Mike Snitzer498f0102011-08-02 12:32:04 +01001007 static struct dm_arg _args[] = {
Mike Snitzera58a9352012-07-27 15:08:04 +01001008 {0, 6, "invalid number of feature args"},
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001009 {1, 50, "pg_init_retries must be between 1 and 50"},
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001010 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001011 };
1012
Mike Snitzer498f0102011-08-02 12:32:04 +01001013 r = dm_read_arg_group(_args, as, &argc, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 if (r)
1015 return -EINVAL;
1016
1017 if (!argc)
1018 return 0;
1019
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001020 do {
Mike Snitzer498f0102011-08-02 12:32:04 +01001021 arg_name = dm_shift_arg(as);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001022 argc--;
1023
Mike Snitzer498f0102011-08-02 12:32:04 +01001024 if (!strcasecmp(arg_name, "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001025 r = queue_if_no_path(m, true, false);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001026 continue;
1027 }
1028
Mike Snitzera58a9352012-07-27 15:08:04 +01001029 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001030 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
Mike Snitzera58a9352012-07-27 15:08:04 +01001031 continue;
1032 }
1033
Mike Snitzer498f0102011-08-02 12:32:04 +01001034 if (!strcasecmp(arg_name, "pg_init_retries") &&
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001035 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +01001036 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001037 argc--;
1038 continue;
1039 }
1040
Mike Snitzer498f0102011-08-02 12:32:04 +01001041 if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001042 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +01001043 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001044 argc--;
1045 continue;
1046 }
1047
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048 ti->error = "Unrecognised multipath feature request";
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001049 r = -EINVAL;
1050 } while (argc && !r);
1051
1052 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053}
1054
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001055static int __multipath_ctr(struct dm_target *ti, unsigned int argc,
1056 char **argv, bool bio_based)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057{
Mike Snitzer498f0102011-08-02 12:32:04 +01001058 /* target arguments */
1059 static struct dm_arg _args[] = {
Mike Snitzera490a072011-03-24 13:54:33 +00001060 {0, 1024, "invalid number of priority groups"},
1061 {0, 1024, "invalid initial priority group number"},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 };
1063
1064 int r;
1065 struct multipath *m;
Mike Snitzer498f0102011-08-02 12:32:04 +01001066 struct dm_arg_set as;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 unsigned pg_count = 0;
1068 unsigned next_pg_num;
Mike Snitzer8637a6b2016-01-31 12:08:36 -05001069 bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001070
1071 as.argc = argc;
1072 as.argv = argv;
1073
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001074 m = alloc_multipath(ti, use_blk_mq, bio_based);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 if (!m) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001076 ti->error = "can't allocate multipath";
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 return -EINVAL;
1078 }
1079
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -07001080 r = parse_features(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 if (r)
1082 goto bad;
1083
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -07001084 r = parse_hw_handler(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 if (r)
1086 goto bad;
1087
Mike Snitzer498f0102011-08-02 12:32:04 +01001088 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 if (r)
1090 goto bad;
1091
Mike Snitzer498f0102011-08-02 12:32:04 +01001092 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 if (r)
1094 goto bad;
1095
Mike Snitzera490a072011-03-24 13:54:33 +00001096 if ((!m->nr_priority_groups && next_pg_num) ||
1097 (m->nr_priority_groups && !next_pg_num)) {
1098 ti->error = "invalid initial priority group";
1099 r = -EINVAL;
1100 goto bad;
1101 }
1102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 /* parse the priority groups */
1104 while (as.argc) {
1105 struct priority_group *pg;
Mike Snitzer91e968a2016-03-17 17:10:15 -04001106 unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -07001108 pg = parse_priority_group(&as, m);
Benjamin Marzinski01460f32008-10-10 13:36:57 +01001109 if (IS_ERR(pg)) {
1110 r = PTR_ERR(pg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111 goto bad;
1112 }
1113
Mike Snitzer91e968a2016-03-17 17:10:15 -04001114 nr_valid_paths += pg->nr_pgpaths;
1115 atomic_set(&m->nr_valid_paths, nr_valid_paths);
1116
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 list_add_tail(&pg->list, &m->priority_groups);
1118 pg_count++;
1119 pg->pg_num = pg_count;
1120 if (!--next_pg_num)
1121 m->next_pg = pg;
1122 }
1123
1124 if (pg_count != m->nr_priority_groups) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001125 ti->error = "priority group count mismatch";
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 r = -EINVAL;
1127 goto bad;
1128 }
1129
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +00001130 ti->num_flush_bios = 1;
1131 ti->num_discard_bios = 1;
Mike Snitzer042bcef2013-05-10 14:37:16 +01001132 ti->num_write_same_bios = 1;
Mike Snitzerbf661be2016-05-24 15:48:08 -04001133 if (bio_based)
1134 ti->per_io_data_size = multipath_per_bio_data_size();
1135 else if (use_blk_mq)
Mike Snitzer8637a6b2016-01-31 12:08:36 -05001136 ti->per_io_data_size = sizeof(struct dm_mpath_io);
Mikulas Patocka86279212009-06-22 10:12:24 +01001137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 return 0;
1139
1140 bad:
1141 free_multipath(m);
1142 return r;
1143}
1144
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001145static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
1146{
1147 return __multipath_ctr(ti, argc, argv, false);
1148}
1149
1150static int multipath_bio_ctr(struct dm_target *ti, unsigned argc, char **argv)
1151{
1152 return __multipath_ctr(ti, argc, argv, true);
1153}
1154
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001155static void multipath_wait_for_pg_init_completion(struct multipath *m)
1156{
1157 DECLARE_WAITQUEUE(wait, current);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001158
1159 add_wait_queue(&m->pg_init_wait, &wait);
1160
1161 while (1) {
1162 set_current_state(TASK_UNINTERRUPTIBLE);
1163
Mike Snitzer91e968a2016-03-17 17:10:15 -04001164 if (!atomic_read(&m->pg_init_in_progress))
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001165 break;
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001166
1167 io_schedule();
1168 }
1169 set_current_state(TASK_RUNNING);
1170
1171 remove_wait_queue(&m->pg_init_wait, &wait);
1172}
1173
1174static void flush_multipath_work(struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175{
Mike Snitzer518257b2016-03-17 16:32:10 -04001176 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1177 smp_mb__after_atomic();
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +00001178
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001179 flush_workqueue(kmpath_handlerd);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001180 multipath_wait_for_pg_init_completion(m);
Alasdair G Kergona044d012005-07-12 15:53:02 -07001181 flush_workqueue(kmultipathd);
Tejun Heo43829732012-08-20 14:51:24 -07001182 flush_work(&m->trigger_event);
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +00001183
Mike Snitzer518257b2016-03-17 16:32:10 -04001184 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1185 smp_mb__after_atomic();
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001186}
1187
1188static void multipath_dtr(struct dm_target *ti)
1189{
1190 struct multipath *m = ti->private;
1191
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001192 flush_multipath_work(m);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001193 free_multipath(m);
1194}
1195
1196/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 * Take a path out of use.
1198 */
1199static int fail_path(struct pgpath *pgpath)
1200{
1201 unsigned long flags;
1202 struct multipath *m = pgpath->pg->m;
1203
1204 spin_lock_irqsave(&m->lock, flags);
1205
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001206 if (!pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 goto out;
1208
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001209 DMWARN("Failing path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210
1211 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001212 pgpath->is_active = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 pgpath->fail_count++;
1214
Mike Snitzer91e968a2016-03-17 17:10:15 -04001215 atomic_dec(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216
1217 if (pgpath == m->current_pgpath)
1218 m->current_pgpath = NULL;
1219
Mike Andersonb15546f2007-10-19 22:48:02 +01001220 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001221 pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
Mike Andersonb15546f2007-10-19 22:48:02 +01001222
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001223 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224
1225out:
1226 spin_unlock_irqrestore(&m->lock, flags);
1227
1228 return 0;
1229}
1230
1231/*
1232 * Reinstate a previously-failed path
1233 */
1234static int reinstate_path(struct pgpath *pgpath)
1235{
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001236 int r = 0, run_queue = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 unsigned long flags;
1238 struct multipath *m = pgpath->pg->m;
Mike Snitzer91e968a2016-03-17 17:10:15 -04001239 unsigned nr_valid_paths;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240
1241 spin_lock_irqsave(&m->lock, flags);
1242
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001243 if (pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001244 goto out;
1245
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001246 DMWARN("Reinstating path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
1248 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1249 if (r)
1250 goto out;
1251
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001252 pgpath->is_active = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253
Mike Snitzer91e968a2016-03-17 17:10:15 -04001254 nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
1255 if (nr_valid_paths == 1) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001256 m->current_pgpath = NULL;
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001257 run_queue = 1;
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001258 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001259 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
Mike Snitzer91e968a2016-03-17 17:10:15 -04001260 atomic_inc(&m->pg_init_in_progress);
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001261 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262
Mike Andersonb15546f2007-10-19 22:48:02 +01001263 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001264 pgpath->path.dev->name, nr_valid_paths);
Mike Andersonb15546f2007-10-19 22:48:02 +01001265
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001266 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267
1268out:
1269 spin_unlock_irqrestore(&m->lock, flags);
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001270 if (run_queue) {
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001271 dm_table_run_md_queue_async(m->ti->table);
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001272 process_queued_bios_list(m);
1273 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001274
1275 return r;
1276}
1277
1278/*
1279 * Fail or reinstate all paths that match the provided struct dm_dev.
1280 */
1281static int action_dev(struct multipath *m, struct dm_dev *dev,
1282 action_fn action)
1283{
Mike Snitzer19040c02011-03-24 13:54:31 +00001284 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001285 struct pgpath *pgpath;
1286 struct priority_group *pg;
1287
1288 list_for_each_entry(pg, &m->priority_groups, list) {
1289 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1290 if (pgpath->path.dev == dev)
1291 r = action(pgpath);
1292 }
1293 }
1294
1295 return r;
1296}
1297
1298/*
1299 * Temporarily try to avoid having to use the specified PG
1300 */
1301static void bypass_pg(struct multipath *m, struct priority_group *pg,
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001302 bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303{
1304 unsigned long flags;
1305
1306 spin_lock_irqsave(&m->lock, flags);
1307
1308 pg->bypassed = bypassed;
1309 m->current_pgpath = NULL;
1310 m->current_pg = NULL;
1311
1312 spin_unlock_irqrestore(&m->lock, flags);
1313
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001314 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315}
1316
1317/*
1318 * Switch to using the specified PG from the next I/O that gets mapped
1319 */
1320static int switch_pg_num(struct multipath *m, const char *pgstr)
1321{
1322 struct priority_group *pg;
1323 unsigned pgnum;
1324 unsigned long flags;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001325 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001327 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 (pgnum > m->nr_priority_groups)) {
1329 DMWARN("invalid PG number supplied to switch_pg_num");
1330 return -EINVAL;
1331 }
1332
1333 spin_lock_irqsave(&m->lock, flags);
1334 list_for_each_entry(pg, &m->priority_groups, list) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001335 pg->bypassed = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 if (--pgnum)
1337 continue;
1338
1339 m->current_pgpath = NULL;
1340 m->current_pg = NULL;
1341 m->next_pg = pg;
1342 }
1343 spin_unlock_irqrestore(&m->lock, flags);
1344
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001345 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 return 0;
1347}
1348
1349/*
1350 * Set/clear bypassed status of a PG.
1351 * PGs are numbered upwards from 1 in the order they were declared.
1352 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001353static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354{
1355 struct priority_group *pg;
1356 unsigned pgnum;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001357 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001358
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001359 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 (pgnum > m->nr_priority_groups)) {
1361 DMWARN("invalid PG number supplied to bypass_pg");
1362 return -EINVAL;
1363 }
1364
1365 list_for_each_entry(pg, &m->priority_groups, list) {
1366 if (!--pgnum)
1367 break;
1368 }
1369
1370 bypass_pg(m, pg, bypassed);
1371 return 0;
1372}
1373
1374/*
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001375 * Should we retry pg_init immediately?
1376 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001377static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001378{
1379 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001380 bool limit_reached = false;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001381
1382 spin_lock_irqsave(&m->lock, flags);
1383
Mike Snitzer91e968a2016-03-17 17:10:15 -04001384 if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
1385 !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Mike Snitzer518257b2016-03-17 16:32:10 -04001386 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001387 else
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001388 limit_reached = true;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001389
1390 spin_unlock_irqrestore(&m->lock, flags);
1391
1392 return limit_reached;
1393}
1394
Chandra Seetharaman3ae31f62009-10-21 09:22:46 -07001395static void pg_init_done(void *data, int errors)
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001396{
Moger, Babu83c0d5d2010-03-06 02:29:45 +00001397 struct pgpath *pgpath = data;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001398 struct priority_group *pg = pgpath->pg;
1399 struct multipath *m = pg->m;
1400 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001401 bool delay_retry = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001402
1403 /* device or driver problems */
1404 switch (errors) {
1405 case SCSI_DH_OK:
1406 break;
1407 case SCSI_DH_NOSYS:
1408 if (!m->hw_handler_name) {
1409 errors = 0;
1410 break;
1411 }
Moger, Babuf7b934c2010-03-06 02:29:49 +00001412 DMERR("Could not failover the device: Handler scsi_dh_%s "
1413 "Error %d.", m->hw_handler_name, errors);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001414 /*
1415 * Fail path for now, so we do not ping pong
1416 */
1417 fail_path(pgpath);
1418 break;
1419 case SCSI_DH_DEV_TEMP_BUSY:
1420 /*
1421 * Probably doing something like FW upgrade on the
1422 * controller so try the other pg.
1423 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001424 bypass_pg(m, pg, true);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001425 break;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001426 case SCSI_DH_RETRY:
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001427 /* Wait before retrying. */
1428 delay_retry = 1;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001429 case SCSI_DH_IMM_RETRY:
1430 case SCSI_DH_RES_TEMP_UNAVAIL:
1431 if (pg_init_limit_reached(m, pgpath))
1432 fail_path(pgpath);
1433 errors = 0;
1434 break;
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001435 case SCSI_DH_DEV_OFFLINED:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001436 default:
1437 /*
1438 * We probably do not want to fail the path for a device
1439 * error, but this is what the old dm did. In future
1440 * patches we can do more advanced handling.
1441 */
1442 fail_path(pgpath);
1443 }
1444
1445 spin_lock_irqsave(&m->lock, flags);
1446 if (errors) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001447 if (pgpath == m->current_pgpath) {
1448 DMERR("Could not failover device. Error %d.", errors);
1449 m->current_pgpath = NULL;
1450 m->current_pg = NULL;
1451 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001452 } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001453 pg->bypassed = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001454
Mike Snitzer91e968a2016-03-17 17:10:15 -04001455 if (atomic_dec_return(&m->pg_init_in_progress) > 0)
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001456 /* Activations of other paths are still on going */
1457 goto out;
1458
Mike Snitzer518257b2016-03-17 16:32:10 -04001459 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
1460 if (delay_retry)
1461 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1462 else
1463 clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1464
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001465 if (__pg_init_all_paths(m))
1466 goto out;
1467 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001468 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001469
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001470 process_queued_bios_list(m);
1471
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001472 /*
1473 * Wake up any thread waiting to suspend.
1474 */
1475 wake_up(&m->pg_init_wait);
1476
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001477out:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001478 spin_unlock_irqrestore(&m->lock, flags);
1479}
1480
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001481static void activate_path(struct work_struct *work)
1482{
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001483 struct pgpath *pgpath =
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001484 container_of(work, struct pgpath, activate_path.work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001485
Hannes Reinecke3a017502014-02-28 15:33:49 +01001486 if (pgpath->is_active)
1487 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1488 pg_init_done, pgpath);
1489 else
1490 pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001491}
1492
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001493static int noretry_error(int error)
1494{
1495 switch (error) {
1496 case -EOPNOTSUPP:
1497 case -EREMOTEIO:
1498 case -EILSEQ:
1499 case -ENODATA:
Jun'ichi Nomuracc9d3c32013-09-13 14:54:30 +09001500 case -ENOSPC:
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001501 return 1;
1502 }
1503
1504 /* Anything else could be a path failure, so should be retried */
1505 return 0;
1506}
1507
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508/*
1509 * end_io handling
1510 */
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001511static int do_end_io(struct multipath *m, struct request *clone,
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001512 int error, struct dm_mpath_io *mpio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513{
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001514 /*
1515 * We don't queue any clone request inside the multipath target
1516 * during end I/O handling, since those clone requests don't have
1517 * bio clones. If we queue them inside the multipath target,
1518 * we need to make bio clones, that requires memory allocation.
Mike Snitzer4cc96132016-05-12 16:28:10 -04001519 * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001520 * don't have bio clones.)
1521 * Instead of queueing the clone request here, we queue the original
1522 * request into dm core, which will remake a clone request and
1523 * clone bios for it and resubmit it later.
1524 */
1525 int r = DM_ENDIO_REQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001526
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001527 if (!error && !clone->errors)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 return 0; /* I/O complete */
1529
Mike Snitzer7eee4ae2014-06-02 15:50:06 -04001530 if (noretry_error(error))
Mike Snitzer959eb4e2010-08-12 04:14:32 +01001531 return error;
1532
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001533 if (mpio->pgpath)
1534 fail_path(mpio->pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001535
Mike Snitzer91e968a2016-03-17 17:10:15 -04001536 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001537 if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001538 if (!must_push_back_rq(m))
Hannes Reinecke751b2a72011-01-18 10:13:12 +01001539 r = -EIO;
1540 } else {
1541 if (error == -EBADE)
1542 r = error;
1543 }
1544 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001546 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547}
1548
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001549static int multipath_end_io(struct dm_target *ti, struct request *clone,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 int error, union map_info *map_context)
1551{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001552 struct multipath *m = ti->private;
Mike Snitzer2eff1922016-02-03 09:13:14 -05001553 struct dm_mpath_io *mpio = get_mpio(map_context);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001554 struct pgpath *pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555 struct path_selector *ps;
1556 int r;
1557
Jun'ichi Nomura466891f2012-03-28 18:41:25 +01001558 BUG_ON(!mpio);
1559
Mike Snitzer2eff1922016-02-03 09:13:14 -05001560 r = do_end_io(m, clone, error, mpio);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001561 pgpath = mpio->pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562 if (pgpath) {
1563 ps = &pgpath->pg->ps;
1564 if (ps->type->end_io)
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +01001565 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 }
Mike Snitzer2eff1922016-02-03 09:13:14 -05001567 clear_request_fn_mpio(m, map_context);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
1569 return r;
1570}
1571
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001572static int do_end_io_bio(struct multipath *m, struct bio *clone,
1573 int error, struct dm_mpath_io *mpio)
1574{
1575 unsigned long flags;
1576
1577 if (!error)
1578 return 0; /* I/O complete */
1579
1580 if (noretry_error(error))
1581 return error;
1582
1583 if (mpio->pgpath)
1584 fail_path(mpio->pgpath);
1585
1586 if (!atomic_read(&m->nr_valid_paths)) {
1587 if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1588 if (!must_push_back_bio(m))
1589 return -EIO;
1590 return DM_ENDIO_REQUEUE;
1591 } else {
1592 if (error == -EBADE)
1593 return error;
1594 }
1595 }
1596
1597 /* Queue for the daemon to resubmit */
Mike Snitzerbf661be2016-05-24 15:48:08 -04001598 dm_bio_restore(get_bio_details_from_bio(clone), clone);
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001599
1600 spin_lock_irqsave(&m->lock, flags);
1601 bio_list_add(&m->queued_bios, clone);
1602 spin_unlock_irqrestore(&m->lock, flags);
1603 if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
1604 queue_work(kmultipathd, &m->process_queued_bios);
1605
1606 return DM_ENDIO_INCOMPLETE;
1607}
1608
1609static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, int error)
1610{
1611 struct multipath *m = ti->private;
1612 struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1613 struct pgpath *pgpath;
1614 struct path_selector *ps;
1615 int r;
1616
1617 BUG_ON(!mpio);
1618
1619 r = do_end_io_bio(m, clone, error, mpio);
1620 pgpath = mpio->pgpath;
1621 if (pgpath) {
1622 ps = &pgpath->pg->ps;
1623 if (ps->type->end_io)
1624 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1625 }
1626
1627 return r;
1628}
1629
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630/*
1631 * Suspend can't complete until all the I/O is processed so if
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001632 * the last path fails we must error any remaining I/O.
1633 * Note that if the freeze_bdev fails while suspending, the
1634 * queue_if_no_path state is lost - userspace should reset it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 */
1636static void multipath_presuspend(struct dm_target *ti)
1637{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001638 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001639
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001640 queue_if_no_path(m, false, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641}
1642
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001643static void multipath_postsuspend(struct dm_target *ti)
1644{
Mike Anderson6380f262009-12-10 23:52:21 +00001645 struct multipath *m = ti->private;
1646
1647 mutex_lock(&m->work_mutex);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001648 flush_multipath_work(m);
Mike Anderson6380f262009-12-10 23:52:21 +00001649 mutex_unlock(&m->work_mutex);
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001650}
1651
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001652/*
1653 * Restore the queue_if_no_path setting.
1654 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655static void multipath_resume(struct dm_target *ti)
1656{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001657 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658
Mike Snitzer518257b2016-03-17 16:32:10 -04001659 if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
1660 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1661 else
1662 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1663 smp_mb__after_atomic();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664}
1665
1666/*
1667 * Info output has the following format:
1668 * num_multipath_feature_args [multipath_feature_args]*
1669 * num_handler_status_args [handler_status_args]*
1670 * num_groups init_group_number
1671 * [A|D|E num_ps_status_args [ps_status_args]*
1672 * num_paths num_selector_args
1673 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1674 *
1675 * Table output has the following format (identical to the constructor string):
1676 * num_feature_args [features_args]*
1677 * num_handler_args hw_handler [hw_handler_args]*
1678 * num_groups init_group_number
1679 * [priority selector-name num_ps_args [ps_args]*
1680 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1681 */
Mikulas Patockafd7c0922013-03-01 22:45:44 +00001682static void multipath_status(struct dm_target *ti, status_type_t type,
1683 unsigned status_flags, char *result, unsigned maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684{
1685 int sz = 0;
1686 unsigned long flags;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001687 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 struct priority_group *pg;
1689 struct pgpath *p;
1690 unsigned pg_num;
1691 char state;
1692
1693 spin_lock_irqsave(&m->lock, flags);
1694
1695 /* Features */
1696 if (type == STATUSTYPE_INFO)
Mike Snitzer91e968a2016-03-17 17:10:15 -04001697 DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
1698 atomic_read(&m->pg_init_count));
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001699 else {
Mike Snitzer518257b2016-03-17 16:32:10 -04001700 DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001701 (m->pg_init_retries > 0) * 2 +
Mike Snitzera58a9352012-07-27 15:08:04 +01001702 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
Mike Snitzer518257b2016-03-17 16:32:10 -04001703 test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
1704 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001705 DMEMIT("queue_if_no_path ");
1706 if (m->pg_init_retries)
1707 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001708 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
1709 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
Mike Snitzer518257b2016-03-17 16:32:10 -04001710 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
Mike Snitzera58a9352012-07-27 15:08:04 +01001711 DMEMIT("retain_attached_hw_handler ");
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001712 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001714 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 DMEMIT("0 ");
1716 else
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001717 DMEMIT("1 %s ", m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718
1719 DMEMIT("%u ", m->nr_priority_groups);
1720
1721 if (m->next_pg)
1722 pg_num = m->next_pg->pg_num;
1723 else if (m->current_pg)
1724 pg_num = m->current_pg->pg_num;
1725 else
Mike Snitzera490a072011-03-24 13:54:33 +00001726 pg_num = (m->nr_priority_groups ? 1 : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727
1728 DMEMIT("%u ", pg_num);
1729
1730 switch (type) {
1731 case STATUSTYPE_INFO:
1732 list_for_each_entry(pg, &m->priority_groups, list) {
1733 if (pg->bypassed)
1734 state = 'D'; /* Disabled */
1735 else if (pg == m->current_pg)
1736 state = 'A'; /* Currently Active */
1737 else
1738 state = 'E'; /* Enabled */
1739
1740 DMEMIT("%c ", state);
1741
1742 if (pg->ps.type->status)
1743 sz += pg->ps.type->status(&pg->ps, NULL, type,
1744 result + sz,
1745 maxlen - sz);
1746 else
1747 DMEMIT("0 ");
1748
1749 DMEMIT("%u %u ", pg->nr_pgpaths,
1750 pg->ps.type->info_args);
1751
1752 list_for_each_entry(p, &pg->pgpaths, list) {
1753 DMEMIT("%s %s %u ", p->path.dev->name,
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001754 p->is_active ? "A" : "F",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755 p->fail_count);
1756 if (pg->ps.type->status)
1757 sz += pg->ps.type->status(&pg->ps,
1758 &p->path, type, result + sz,
1759 maxlen - sz);
1760 }
1761 }
1762 break;
1763
1764 case STATUSTYPE_TABLE:
1765 list_for_each_entry(pg, &m->priority_groups, list) {
1766 DMEMIT("%s ", pg->ps.type->name);
1767
1768 if (pg->ps.type->status)
1769 sz += pg->ps.type->status(&pg->ps, NULL, type,
1770 result + sz,
1771 maxlen - sz);
1772 else
1773 DMEMIT("0 ");
1774
1775 DMEMIT("%u %u ", pg->nr_pgpaths,
1776 pg->ps.type->table_args);
1777
1778 list_for_each_entry(p, &pg->pgpaths, list) {
1779 DMEMIT("%s ", p->path.dev->name);
1780 if (pg->ps.type->status)
1781 sz += pg->ps.type->status(&pg->ps,
1782 &p->path, type, result + sz,
1783 maxlen - sz);
1784 }
1785 }
1786 break;
1787 }
1788
1789 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790}
1791
1792static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1793{
Mike Anderson6380f262009-12-10 23:52:21 +00001794 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 struct dm_dev *dev;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001796 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001797 action_fn action;
1798
Mike Anderson6380f262009-12-10 23:52:21 +00001799 mutex_lock(&m->work_mutex);
1800
Kiyoshi Uedac2f3d242009-12-10 23:52:27 +00001801 if (dm_suspended(ti)) {
1802 r = -EBUSY;
1803 goto out;
1804 }
1805
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 if (argc == 1) {
Mike Snitzer498f0102011-08-02 12:32:04 +01001807 if (!strcasecmp(argv[0], "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001808 r = queue_if_no_path(m, true, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001809 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001810 } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001811 r = queue_if_no_path(m, false, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001812 goto out;
1813 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 }
1815
Mike Anderson6380f262009-12-10 23:52:21 +00001816 if (argc != 2) {
Jose Castilloa356e422014-01-29 17:52:45 +01001817 DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
Mike Anderson6380f262009-12-10 23:52:21 +00001818 goto out;
1819 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
Mike Snitzer498f0102011-08-02 12:32:04 +01001821 if (!strcasecmp(argv[0], "disable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001822 r = bypass_pg_num(m, argv[1], true);
Mike Anderson6380f262009-12-10 23:52:21 +00001823 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001824 } else if (!strcasecmp(argv[0], "enable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001825 r = bypass_pg_num(m, argv[1], false);
Mike Anderson6380f262009-12-10 23:52:21 +00001826 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001827 } else if (!strcasecmp(argv[0], "switch_group")) {
Mike Anderson6380f262009-12-10 23:52:21 +00001828 r = switch_pg_num(m, argv[1]);
1829 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001830 } else if (!strcasecmp(argv[0], "reinstate_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 action = reinstate_path;
Mike Snitzer498f0102011-08-02 12:32:04 +01001832 else if (!strcasecmp(argv[0], "fail_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 action = fail_path;
Mike Anderson6380f262009-12-10 23:52:21 +00001834 else {
Jose Castilloa356e422014-01-29 17:52:45 +01001835 DMWARN("Unrecognised multipath message received: %s", argv[0]);
Mike Anderson6380f262009-12-10 23:52:21 +00001836 goto out;
1837 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +00001839 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001841 DMWARN("message: error getting device %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 argv[1]);
Mike Anderson6380f262009-12-10 23:52:21 +00001843 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001844 }
1845
1846 r = action_dev(m, dev, action);
1847
1848 dm_put_device(ti, dev);
1849
Mike Anderson6380f262009-12-10 23:52:21 +00001850out:
1851 mutex_unlock(&m->work_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001852 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001853}
1854
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001855static int multipath_prepare_ioctl(struct dm_target *ti,
1856 struct block_device **bdev, fmode_t *mode)
Milan Broz9af4aa32006-10-03 01:15:20 -07001857{
Mikulas Patocka35991652012-06-03 00:29:58 +01001858 struct multipath *m = ti->private;
Mike Snitzer2da16102016-03-17 18:38:17 -04001859 struct pgpath *current_pgpath;
Mikulas Patocka35991652012-06-03 00:29:58 +01001860 int r;
1861
Mike Snitzer2da16102016-03-17 18:38:17 -04001862 current_pgpath = lockless_dereference(m->current_pgpath);
1863 if (!current_pgpath)
1864 current_pgpath = choose_pgpath(m, 0);
Milan Broz9af4aa32006-10-03 01:15:20 -07001865
Mike Snitzer2da16102016-03-17 18:38:17 -04001866 if (current_pgpath) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001867 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
Mike Snitzer2da16102016-03-17 18:38:17 -04001868 *bdev = current_pgpath->path.dev->bdev;
1869 *mode = current_pgpath->path.dev->mode;
Junichi Nomura43e43c92015-11-17 09:36:56 +00001870 r = 0;
1871 } else {
1872 /* pg_init has not started or completed */
1873 r = -ENOTCONN;
1874 }
1875 } else {
1876 /* No path is available */
Mike Snitzer518257b2016-03-17 16:32:10 -04001877 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Junichi Nomura43e43c92015-11-17 09:36:56 +00001878 r = -ENOTCONN;
1879 else
1880 r = -EIO;
Milan Broze90dae12006-10-03 01:15:22 -07001881 }
Milan Broz9af4aa32006-10-03 01:15:20 -07001882
Junichi Nomura5bbbfdf2015-11-17 09:39:26 +00001883 if (r == -ENOTCONN) {
Mike Snitzer2da16102016-03-17 18:38:17 -04001884 if (!lockless_dereference(m->current_pg)) {
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001885 /* Path status changed, redo selection */
Mike Snitzer2da16102016-03-17 18:38:17 -04001886 (void) choose_pgpath(m, 0);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001887 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001888 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Mike Snitzer2da16102016-03-17 18:38:17 -04001889 pg_init_all_paths(m);
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001890 dm_table_run_md_queue_async(m->ti->table);
Mike Snitzer76e33fe2016-05-19 16:15:14 -04001891 process_queued_bios_list(m);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001892 }
Mikulas Patocka35991652012-06-03 00:29:58 +01001893
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001894 /*
1895 * Only pass ioctls through if the device sizes match exactly.
1896 */
1897 if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
1898 return 1;
1899 return r;
Milan Broz9af4aa32006-10-03 01:15:20 -07001900}
1901
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001902static int multipath_iterate_devices(struct dm_target *ti,
1903 iterate_devices_callout_fn fn, void *data)
1904{
1905 struct multipath *m = ti->private;
1906 struct priority_group *pg;
1907 struct pgpath *p;
1908 int ret = 0;
1909
1910 list_for_each_entry(pg, &m->priority_groups, list) {
1911 list_for_each_entry(p, &pg->pgpaths, list) {
Mike Snitzer5dea2712009-07-23 20:30:42 +01001912 ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001913 if (ret)
1914 goto out;
1915 }
1916 }
1917
1918out:
1919 return ret;
1920}
1921
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001922static int pgpath_busy(struct pgpath *pgpath)
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001923{
1924 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1925
Mike Snitzer52b09912015-02-23 16:36:41 -05001926 return blk_lld_busy(q);
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001927}
1928
1929/*
1930 * We return "busy", only when we can map I/Os but underlying devices
1931 * are busy (so even if we map I/Os now, the I/Os will wait on
1932 * the underlying queue).
1933 * In other words, if we want to kill I/Os or queue them inside us
1934 * due to map unavailability, we don't return "busy". Otherwise,
1935 * dm core won't give us the I/Os and we can't do what we want.
1936 */
1937static int multipath_busy(struct dm_target *ti)
1938{
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001939 bool busy = false, has_active = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001940 struct multipath *m = ti->private;
Mike Snitzer2da16102016-03-17 18:38:17 -04001941 struct priority_group *pg, *next_pg;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001942 struct pgpath *pgpath;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001943
Jun'ichi Nomura7a7a3b42014-07-08 00:55:14 +00001944 /* pg_init in progress or no paths available */
Mike Snitzer91e968a2016-03-17 17:10:15 -04001945 if (atomic_read(&m->pg_init_in_progress) ||
Mike Snitzer2da16102016-03-17 18:38:17 -04001946 (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
1947 return true;
1948
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001949 /* Guess which priority_group will be used at next mapping time */
Mike Snitzer2da16102016-03-17 18:38:17 -04001950 pg = lockless_dereference(m->current_pg);
1951 next_pg = lockless_dereference(m->next_pg);
1952 if (unlikely(!lockless_dereference(m->current_pgpath) && next_pg))
1953 pg = next_pg;
1954
1955 if (!pg) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001956 /*
1957 * We don't know which pg will be used at next mapping time.
Mike Snitzer2da16102016-03-17 18:38:17 -04001958 * We don't call choose_pgpath() here to avoid to trigger
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001959 * pg_init just by busy checking.
1960 * So we don't know whether underlying devices we will be using
1961 * at next mapping time are busy or not. Just try mapping.
1962 */
Mike Snitzer2da16102016-03-17 18:38:17 -04001963 return busy;
1964 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001965
1966 /*
1967 * If there is one non-busy active path at least, the path selector
1968 * will be able to select it. So we consider such a pg as not busy.
1969 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001970 busy = true;
Mike Snitzer2da16102016-03-17 18:38:17 -04001971 list_for_each_entry(pgpath, &pg->pgpaths, list) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001972 if (pgpath->is_active) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001973 has_active = true;
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001974 if (!pgpath_busy(pgpath)) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001975 busy = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001976 break;
1977 }
1978 }
Mike Snitzer2da16102016-03-17 18:38:17 -04001979 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001980
Mike Snitzer2da16102016-03-17 18:38:17 -04001981 if (!has_active) {
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001982 /*
1983 * No active path in this pg, so this pg won't be used and
1984 * the current_pg will be changed at next mapping time.
1985 * We need to try mapping to determine it.
1986 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001987 busy = false;
Mike Snitzer2da16102016-03-17 18:38:17 -04001988 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001989
1990 return busy;
1991}
1992
Linus Torvalds1da177e2005-04-16 15:20:36 -07001993/*-----------------------------------------------------------------
1994 * Module setup
1995 *---------------------------------------------------------------*/
1996static struct target_type multipath_target = {
1997 .name = "multipath",
Mike Snitzer16f12262016-01-31 17:22:27 -05001998 .version = {1, 11, 0},
1999 .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000 .module = THIS_MODULE,
2001 .ctr = multipath_ctr,
2002 .dtr = multipath_dtr,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01002003 .map_rq = multipath_map,
Mike Snitzere5863d92014-12-17 21:08:12 -05002004 .clone_and_map_rq = multipath_clone_and_map,
2005 .release_clone_rq = multipath_release_clone,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01002006 .rq_end_io = multipath_end_io,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002007 .presuspend = multipath_presuspend,
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00002008 .postsuspend = multipath_postsuspend,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002009 .resume = multipath_resume,
2010 .status = multipath_status,
2011 .message = multipath_message,
Christoph Hellwige56f81e2015-10-15 14:10:50 +02002012 .prepare_ioctl = multipath_prepare_ioctl,
Mike Snitzeraf4874e2009-06-22 10:12:33 +01002013 .iterate_devices = multipath_iterate_devices,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01002014 .busy = multipath_busy,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015};
2016
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002017static struct target_type multipath_bio_target = {
2018 .name = "multipath-bio",
2019 .version = {1, 0, 0},
2020 .module = THIS_MODULE,
2021 .ctr = multipath_bio_ctr,
2022 .dtr = multipath_dtr,
2023 .map = multipath_map_bio,
2024 .end_io = multipath_end_io_bio,
2025 .presuspend = multipath_presuspend,
2026 .postsuspend = multipath_postsuspend,
2027 .resume = multipath_resume,
2028 .status = multipath_status,
2029 .message = multipath_message,
2030 .prepare_ioctl = multipath_prepare_ioctl,
2031 .iterate_devices = multipath_iterate_devices,
2032 .busy = multipath_busy,
2033};
2034
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035static int __init dm_multipath_init(void)
2036{
2037 int r;
2038
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002039 /* allocate a slab for the dm_mpath_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01002040 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002041 if (!_mpio_cache)
2042 return -ENOMEM;
2043
2044 r = dm_register_target(&multipath_target);
2045 if (r < 0) {
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002046 DMERR("request-based register failed %d", r);
Johannes Thumshirnff658e92015-01-11 12:45:23 +01002047 r = -EINVAL;
2048 goto bad_register_target;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 }
2050
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002051 r = dm_register_target(&multipath_bio_target);
2052 if (r < 0) {
2053 DMERR("bio-based register failed %d", r);
2054 r = -EINVAL;
2055 goto bad_register_bio_based_target;
2056 }
2057
Tejun Heo4d4d66a2011-01-13 19:59:57 +00002058 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07002059 if (!kmultipathd) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01002060 DMERR("failed to create workqueue kmpathd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01002061 r = -ENOMEM;
2062 goto bad_alloc_kmultipathd;
Alasdair G Kergonc5573082005-05-05 16:16:07 -07002063 }
2064
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07002065 /*
2066 * A separate workqueue is used to handle the device handlers
2067 * to avoid overloading existing workqueue. Overloading the
2068 * old workqueue would also create a bottleneck in the
2069 * path of the storage hardware device activation.
2070 */
Tejun Heo4d4d66a2011-01-13 19:59:57 +00002071 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
2072 WQ_MEM_RECLAIM);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07002073 if (!kmpath_handlerd) {
2074 DMERR("failed to create workqueue kmpath_handlerd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01002075 r = -ENOMEM;
2076 goto bad_alloc_kmpath_handlerd;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07002077 }
2078
Johannes Thumshirnff658e92015-01-11 12:45:23 +01002079 return 0;
2080
2081bad_alloc_kmpath_handlerd:
2082 destroy_workqueue(kmultipathd);
2083bad_alloc_kmultipathd:
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002084 dm_unregister_target(&multipath_bio_target);
2085bad_register_bio_based_target:
Johannes Thumshirnff658e92015-01-11 12:45:23 +01002086 dm_unregister_target(&multipath_target);
2087bad_register_target:
2088 kmem_cache_destroy(_mpio_cache);
2089
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 return r;
2091}
2092
2093static void __exit dm_multipath_exit(void)
2094{
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07002095 destroy_workqueue(kmpath_handlerd);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07002096 destroy_workqueue(kmultipathd);
2097
Mikulas Patocka10d3bd02009-01-06 03:04:58 +00002098 dm_unregister_target(&multipath_target);
Mike Snitzer76e33fe2016-05-19 16:15:14 -04002099 dm_unregister_target(&multipath_bio_target);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002100 kmem_cache_destroy(_mpio_cache);
2101}
2102
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103module_init(dm_multipath_init);
2104module_exit(dm_multipath_exit);
2105
2106MODULE_DESCRIPTION(DM_NAME " multipath target");
2107MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
2108MODULE_LICENSE("GPL");