blob: 54daf96980c27d07b432f3157e639941084a1078 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
Mikulas Patocka586e80e2008-10-21 17:44:59 +01008#include <linux/device-mapper.h>
9
Mike Snitzerf4790822013-09-12 18:06:12 -040010#include "dm.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070011#include "dm-path-selector.h"
Mike Andersonb15546f2007-10-19 22:48:02 +010012#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070013
Mike Snitzere5863d92014-12-17 21:08:12 -050014#include <linux/blkdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070015#include <linux/ctype.h>
16#include <linux/init.h>
17#include <linux/mempool.h>
18#include <linux/module.h>
19#include <linux/pagemap.h>
20#include <linux/slab.h>
21#include <linux/time.h>
22#include <linux/workqueue.h>
Mikulas Patocka35991652012-06-03 00:29:58 +010023#include <linux/delay.h>
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070024#include <scsi/scsi_dh.h>
Arun Sharma600634972011-07-26 16:09:06 -070025#include <linux/atomic.h>
Mike Snitzer78ce23b2016-01-31 17:38:28 -050026#include <linux/blk-mq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070027
Alasdair G Kergon72d94862006-06-26 00:27:35 -070028#define DM_MSG_PREFIX "multipath"
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000029#define DM_PG_INIT_DELAY_MSECS 2000
30#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
Linus Torvalds1da177e2005-04-16 15:20:36 -070031
32/* Path properties */
33struct pgpath {
34 struct list_head list;
35
36 struct priority_group *pg; /* Owning PG */
37 unsigned fail_count; /* Cumulative failure count */
38
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -080039 struct dm_path path;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000040 struct delayed_work activate_path;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050041
42 bool is_active:1; /* Path status */
Linus Torvalds1da177e2005-04-16 15:20:36 -070043};
44
45#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
46
47/*
48 * Paths are grouped into Priority Groups and numbered from 1 upwards.
49 * Each has a path selector which controls which path gets used.
50 */
51struct priority_group {
52 struct list_head list;
53
54 struct multipath *m; /* Owning multipath instance */
55 struct path_selector ps;
56
57 unsigned pg_num; /* Reference number */
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 unsigned nr_pgpaths; /* Number of paths in PG */
59 struct list_head pgpaths;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -050060
61 bool bypassed:1; /* Temporarily bypass this PG? */
Linus Torvalds1da177e2005-04-16 15:20:36 -070062};
63
64/* Multipath context */
65struct multipath {
66 struct list_head list;
67 struct dm_target *ti;
68
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070069 const char *hw_handler_name;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -070070 char *hw_handler_params;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000071
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010072 spinlock_t lock;
73
Linus Torvalds1da177e2005-04-16 15:20:36 -070074 unsigned nr_priority_groups;
75 struct list_head priority_groups;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000076
77 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
78
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 struct pgpath *current_pgpath;
80 struct priority_group *current_pg;
81 struct priority_group *next_pg; /* Switch to this PG if set */
Linus Torvalds1da177e2005-04-16 15:20:36 -070082
Mike Snitzer518257b2016-03-17 16:32:10 -040083 unsigned long flags; /* Multipath state flags */
Mike Snitzer1fbdd2b2012-06-03 00:29:43 +010084
Dave Wysochanskic9e45582007-10-19 22:47:53 +010085 unsigned pg_init_retries; /* Number of times to retry pg_init */
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +000086 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
Linus Torvalds1da177e2005-04-16 15:20:36 -070087
Mike Snitzer91e968a2016-03-17 17:10:15 -040088 atomic_t nr_valid_paths; /* Total number of usable paths */
89 atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
90 atomic_t pg_init_count; /* Number of times pg_init called */
91
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 /*
Alasdair G Kergon028867a2007-07-12 17:26:32 +010093 * We must use a mempool of dm_mpath_io structs so that we
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 * can resubmit bios on error.
95 */
96 mempool_t *mpio_pool;
Mike Anderson6380f262009-12-10 23:52:21 +000097
98 struct mutex work_mutex;
Mike Snitzer20800cb2016-03-17 17:13:10 -040099 struct work_struct trigger_event;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100};
101
102/*
103 * Context information attached to each bio we process.
104 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100105struct dm_mpath_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 struct pgpath *pgpath;
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100107 size_t nr_bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108};
109
110typedef int (*action_fn) (struct pgpath *pgpath);
111
Christoph Lametere18b8902006-12-06 20:33:20 -0800112static struct kmem_cache *_mpio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700113
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700114static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
David Howellsc4028952006-11-22 14:57:56 +0000115static void trigger_event(struct work_struct *work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700116static void activate_path(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Mike Snitzer518257b2016-03-17 16:32:10 -0400118/*-----------------------------------------------
119 * Multipath state flags.
120 *-----------------------------------------------*/
121
122#define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */
123#define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */
124#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */
125#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */
126#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
127#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
128#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130/*-----------------------------------------------
131 * Allocation routines
132 *-----------------------------------------------*/
133
134static struct pgpath *alloc_pgpath(void)
135{
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700136 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137
Mike Anderson224cb3e2008-08-29 09:36:09 +0200138 if (pgpath) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500139 pgpath->is_active = true;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000140 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
Mike Anderson224cb3e2008-08-29 09:36:09 +0200141 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
143 return pgpath;
144}
145
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100146static void free_pgpath(struct pgpath *pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147{
148 kfree(pgpath);
149}
150
151static struct priority_group *alloc_priority_group(void)
152{
153 struct priority_group *pg;
154
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700155 pg = kzalloc(sizeof(*pg), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700157 if (pg)
158 INIT_LIST_HEAD(&pg->pgpaths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159
160 return pg;
161}
162
163static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
164{
165 struct pgpath *pgpath, *tmp;
166
167 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
168 list_del(&pgpath->list);
169 dm_put_device(ti, pgpath->path.dev);
170 free_pgpath(pgpath);
171 }
172}
173
174static void free_priority_group(struct priority_group *pg,
175 struct dm_target *ti)
176{
177 struct path_selector *ps = &pg->ps;
178
179 if (ps->type) {
180 ps->type->destroy(ps);
181 dm_put_path_selector(ps->type);
182 }
183
184 free_pgpaths(&pg->pgpaths, ti);
185 kfree(pg);
186}
187
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500188static struct multipath *alloc_multipath(struct dm_target *ti, bool use_blk_mq)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189{
190 struct multipath *m;
191
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700192 m = kzalloc(sizeof(*m), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 if (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 INIT_LIST_HEAD(&m->priority_groups);
195 spin_lock_init(&m->lock);
Mike Snitzer518257b2016-03-17 16:32:10 -0400196 set_bit(MPATHF_QUEUE_IO, &m->flags);
Mike Snitzer91e968a2016-03-17 17:10:15 -0400197 atomic_set(&m->nr_valid_paths, 0);
198 atomic_set(&m->pg_init_in_progress, 0);
199 atomic_set(&m->pg_init_count, 0);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000200 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
David Howellsc4028952006-11-22 14:57:56 +0000201 INIT_WORK(&m->trigger_event, trigger_event);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000202 init_waitqueue_head(&m->pg_init_wait);
Mike Anderson6380f262009-12-10 23:52:21 +0000203 mutex_init(&m->work_mutex);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500204
205 m->mpio_pool = NULL;
206 if (!use_blk_mq) {
207 unsigned min_ios = dm_get_reserved_rq_based_ios();
208
209 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache);
210 if (!m->mpio_pool) {
211 kfree(m);
212 return NULL;
213 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214 }
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500215
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700216 m->ti = ti;
217 ti->private = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 }
219
220 return m;
221}
222
223static void free_multipath(struct multipath *m)
224{
225 struct priority_group *pg, *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
228 list_del(&pg->list);
229 free_priority_group(pg, m->ti);
230 }
231
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700232 kfree(m->hw_handler_name);
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700233 kfree(m->hw_handler_params);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 mempool_destroy(m->mpio_pool);
235 kfree(m);
236}
237
Mike Snitzer2eff1922016-02-03 09:13:14 -0500238static struct dm_mpath_io *get_mpio(union map_info *info)
239{
240 return info->ptr;
241}
242
243static struct dm_mpath_io *set_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100244{
245 struct dm_mpath_io *mpio;
246
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500247 if (!m->mpio_pool) {
248 /* Use blk-mq pdu memory requested via per_io_data_size */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500249 mpio = get_mpio(info);
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500250 memset(mpio, 0, sizeof(*mpio));
251 return mpio;
252 }
253
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100254 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
255 if (!mpio)
Mike Snitzer2eff1922016-02-03 09:13:14 -0500256 return NULL;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100257
258 memset(mpio, 0, sizeof(*mpio));
259 info->ptr = mpio;
260
Mike Snitzer2eff1922016-02-03 09:13:14 -0500261 return mpio;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100262}
263
Mike Snitzer2eff1922016-02-03 09:13:14 -0500264static void clear_request_fn_mpio(struct multipath *m, union map_info *info)
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100265{
Mike Snitzer2eff1922016-02-03 09:13:14 -0500266 /* Only needed for non blk-mq (.request_fn) multipath */
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500267 if (m->mpio_pool) {
268 struct dm_mpath_io *mpio = info->ptr;
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100269
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500270 info->ptr = NULL;
271 mempool_free(mpio, m->mpio_pool);
272 }
Jun'ichi Nomura466891f2012-03-28 18:41:25 +0100273}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275/*-----------------------------------------------
276 * Path selection
277 *-----------------------------------------------*/
278
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100279static int __pg_init_all_paths(struct multipath *m)
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000280{
281 struct pgpath *pgpath;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000282 unsigned long pg_init_delay = 0;
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000283
Mike Snitzer91e968a2016-03-17 17:10:15 -0400284 if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100285 return 0;
Hannes Reinecke17f4ff42014-02-28 15:33:42 +0100286
Mike Snitzer91e968a2016-03-17 17:10:15 -0400287 atomic_inc(&m->pg_init_count);
Mike Snitzer518257b2016-03-17 16:32:10 -0400288 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +0100289
290 /* Check here to reset pg_init_required */
291 if (!m->current_pg)
292 return 0;
293
Mike Snitzer518257b2016-03-17 16:32:10 -0400294 if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000295 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
296 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000297 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
298 /* Skip failed paths */
299 if (!pgpath->is_active)
300 continue;
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000301 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
302 pg_init_delay))
Mike Snitzer91e968a2016-03-17 17:10:15 -0400303 atomic_inc(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000304 }
Mike Snitzer91e968a2016-03-17 17:10:15 -0400305 return atomic_read(&m->pg_init_in_progress);
Kiyoshi Uedafb612642010-03-06 02:32:18 +0000306}
307
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
309{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310 m->current_pg = pgpath->pg;
311
312 /* Must we initialise the PG first, and queue I/O till it's ready? */
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700313 if (m->hw_handler_name) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400314 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
315 set_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 } else {
Mike Snitzer518257b2016-03-17 16:32:10 -0400317 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
318 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 }
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100320
Mike Snitzer91e968a2016-03-17 17:10:15 -0400321 atomic_set(&m->pg_init_count, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322}
323
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100324static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
325 size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700326{
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -0800327 struct dm_path *path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328
Mike Snitzer90a43232016-02-17 21:29:17 -0500329 path = pg->ps.type->select_path(&pg->ps, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 if (!path)
331 return -ENXIO;
332
333 m->current_pgpath = path_to_pgpath(path);
334
335 if (m->current_pg != pg)
336 __switch_pg(m, m->current_pgpath);
337
338 return 0;
339}
340
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100341static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342{
343 struct priority_group *pg;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500344 bool bypassed = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
Mike Snitzer91e968a2016-03-17 17:10:15 -0400346 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400347 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 goto failed;
Benjamin Marzinski1f271972014-08-13 13:53:42 -0500349 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350
351 /* Were we instructed to switch PG? */
352 if (m->next_pg) {
353 pg = m->next_pg;
354 m->next_pg = NULL;
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100355 if (!__choose_path_in_pg(m, pg, nr_bytes))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 return;
357 }
358
359 /* Don't change PG until it has no remaining paths */
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100360 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 return;
362
363 /*
364 * Loop through priority groups until we find a valid path.
365 * First time we skip PGs marked 'bypassed'.
Mike Christief220fd42012-06-03 00:29:45 +0100366 * Second time we only try the ones we skipped, but set
367 * pg_init_delay_retry so we do not hammer controllers.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 */
369 do {
370 list_for_each_entry(pg, &m->priority_groups, list) {
371 if (pg->bypassed == bypassed)
372 continue;
Mike Christief220fd42012-06-03 00:29:45 +0100373 if (!__choose_path_in_pg(m, pg, nr_bytes)) {
374 if (!bypassed)
Mike Snitzer518257b2016-03-17 16:32:10 -0400375 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 return;
Mike Christief220fd42012-06-03 00:29:45 +0100377 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 }
379 } while (bypassed--);
380
381failed:
382 m->current_pgpath = NULL;
383 m->current_pg = NULL;
384}
385
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800386/*
387 * Check whether bios must be queued in the device-mapper core rather
388 * than here in the target.
389 *
390 * m->lock must be held on entry.
391 *
392 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
393 * same value then we are not between multipath_presuspend()
394 * and multipath_resume() calls and we have no need to check
395 * for the DMF_NOFLUSH_SUSPENDING flag.
396 */
397static int __must_push_back(struct multipath *m)
398{
Mike Snitzer518257b2016-03-17 16:32:10 -0400399 return (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) ||
400 ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) !=
401 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) &&
Hannes Reineckee8099172014-02-28 15:33:44 +0100402 dm_noflush_suspending(m->ti)));
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800403}
404
Hannes Reinecke36fcffc2014-02-28 15:33:47 +0100405/*
406 * Map cloned requests
407 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500408static int __multipath_map(struct dm_target *ti, struct request *clone,
409 union map_info *map_context,
410 struct request *rq, struct request **__clone)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411{
Mike Snitzer7943bd62016-02-02 21:53:15 -0500412 struct multipath *m = ti->private;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100413 int r = DM_MAPIO_REQUEUE;
Mike Snitzere5863d92014-12-17 21:08:12 -0500414 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 struct pgpath *pgpath;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +0100416 struct block_device *bdev;
Hannes Reineckee3bde042014-02-28 15:33:46 +0100417 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600419 spin_lock_irq(&m->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420
421 /* Do we need to select a new pgpath? */
Mike Snitzer518257b2016-03-17 16:32:10 -0400422 if (!m->current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +0100423 __choose_pgpath(m, nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424
425 pgpath = m->current_pgpath;
426
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100427 if (!pgpath) {
428 if (!__must_push_back(m))
429 r = -EIO; /* Failed */
430 goto out_unlock;
Mike Snitzer518257b2016-03-17 16:32:10 -0400431 } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
432 test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
Hannes Reineckee3bde042014-02-28 15:33:46 +0100433 __pg_init_all_paths(m);
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100434 goto out_unlock;
435 }
Mike Snitzer6afbc012014-07-08 11:55:09 -0400436
Mike Snitzer2eff1922016-02-03 09:13:14 -0500437 mpio = set_mpio(m, map_context);
438 if (!mpio)
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100439 /* ENOMEM, requeue */
440 goto out_unlock;
441
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100442 mpio->pgpath = pgpath;
443 mpio->nr_bytes = nr_bytes;
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600444
445 bdev = pgpath->path.dev->bdev;
446
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600447 spin_unlock_irq(&m->lock);
448
Mike Snitzere5863d92014-12-17 21:08:12 -0500449 if (clone) {
Mike Snitzerc5248f72016-02-20 14:02:49 -0500450 /*
451 * Old request-based interface: allocated clone is passed in.
452 * Used by: .request_fn stacked on .request_fn path(s).
453 */
Mike Snitzere5863d92014-12-17 21:08:12 -0500454 clone->q = bdev_get_queue(bdev);
455 clone->rq_disk = bdev->bd_disk;
456 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
457 } else {
Mike Snitzereca7ee62016-02-20 13:45:38 -0500458 /*
459 * blk-mq request-based interface; used by both:
460 * .request_fn stacked on blk-mq path(s) and
461 * blk-mq stacked on blk-mq path(s).
462 */
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500463 *__clone = blk_mq_alloc_request(bdev_get_queue(bdev),
464 rq_data_dir(rq), BLK_MQ_REQ_NOWAIT);
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400465 if (IS_ERR(*__clone)) {
Mike Snitzere5863d92014-12-17 21:08:12 -0500466 /* ENOMEM, requeue */
Mike Snitzer2eff1922016-02-03 09:13:14 -0500467 clear_request_fn_mpio(m, map_context);
Mike Snitzere5863d92014-12-17 21:08:12 -0500468 return r;
Mike Snitzer4c6dd532015-05-27 15:23:56 -0400469 }
Mike Snitzere5863d92014-12-17 21:08:12 -0500470 (*__clone)->bio = (*__clone)->biotail = NULL;
471 (*__clone)->rq_disk = bdev->bd_disk;
472 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
473 }
474
Mike Snitzer9bf59a62014-02-28 15:33:48 +0100475 if (pgpath->pg->ps.type->start_io)
476 pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
477 &pgpath->path,
478 nr_bytes);
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600479 return DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700480
Hannes Reineckee3bde042014-02-28 15:33:46 +0100481out_unlock:
Keith Busch2eb6e1e2014-10-17 17:46:36 -0600482 spin_unlock_irq(&m->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484 return r;
485}
486
Mike Snitzere5863d92014-12-17 21:08:12 -0500487static int multipath_map(struct dm_target *ti, struct request *clone,
488 union map_info *map_context)
489{
490 return __multipath_map(ti, clone, map_context, NULL, NULL);
491}
492
493static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
494 union map_info *map_context,
495 struct request **clone)
496{
497 return __multipath_map(ti, NULL, map_context, rq, clone);
498}
499
500static void multipath_release_clone(struct request *clone)
501{
Mike Snitzer78ce23b2016-01-31 17:38:28 -0500502 blk_mq_free_request(clone);
Mike Snitzere5863d92014-12-17 21:08:12 -0500503}
504
Linus Torvalds1da177e2005-04-16 15:20:36 -0700505/*
506 * If we run out of usable paths, should we queue I/O or error it?
507 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500508static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
509 bool save_old_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510{
511 unsigned long flags;
512
513 spin_lock_irqsave(&m->lock, flags);
514
Mike Snitzer518257b2016-03-17 16:32:10 -0400515 if (save_old_value) {
516 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
517 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
518 else
519 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
520 } else {
521 if (queue_if_no_path)
522 set_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
523 else
524 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
525 }
526 if (queue_if_no_path)
527 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700528 else
Mike Snitzer518257b2016-03-17 16:32:10 -0400529 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
530
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 spin_unlock_irqrestore(&m->lock, flags);
532
Hannes Reinecke63d832c2014-05-26 14:45:39 +0200533 if (!queue_if_no_path)
534 dm_table_run_md_queue_async(m->ti->table);
535
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return 0;
537}
538
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539/*
540 * An event is triggered whenever a path is taken out of use.
541 * Includes path failure and PG bypass.
542 */
David Howellsc4028952006-11-22 14:57:56 +0000543static void trigger_event(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544{
David Howellsc4028952006-11-22 14:57:56 +0000545 struct multipath *m =
546 container_of(work, struct multipath, trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547
548 dm_table_event(m->ti->table);
549}
550
551/*-----------------------------------------------------------------
552 * Constructor/argument parsing:
553 * <#multipath feature args> [<arg>]*
554 * <#hw_handler args> [hw_handler [<arg>]*]
555 * <#priority groups>
556 * <initial priority group>
557 * [<selector> <#selector args> [<arg>]*
558 * <#paths> <#per-path selector args>
559 * [<path> [<arg>]* ]+ ]+
560 *---------------------------------------------------------------*/
Mike Snitzer498f0102011-08-02 12:32:04 +0100561static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 struct dm_target *ti)
563{
564 int r;
565 struct path_selector_type *pst;
566 unsigned ps_argc;
567
Mike Snitzer498f0102011-08-02 12:32:04 +0100568 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700569 {0, 1024, "invalid number of path selector args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 };
571
Mike Snitzer498f0102011-08-02 12:32:04 +0100572 pst = dm_get_path_selector(dm_shift_arg(as));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 if (!pst) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700574 ti->error = "unknown path selector type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 return -EINVAL;
576 }
577
Mike Snitzer498f0102011-08-02 12:32:04 +0100578 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100579 if (r) {
580 dm_put_path_selector(pst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 return -EINVAL;
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100582 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583
584 r = pst->create(&pg->ps, ps_argc, as->argv);
585 if (r) {
586 dm_put_path_selector(pst);
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700587 ti->error = "path selector constructor failed";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700588 return r;
589 }
590
591 pg->ps.type = pst;
Mike Snitzer498f0102011-08-02 12:32:04 +0100592 dm_consume_args(as, ps_argc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
594 return 0;
595}
596
Mike Snitzer498f0102011-08-02 12:32:04 +0100597static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 struct dm_target *ti)
599{
600 int r;
601 struct pgpath *p;
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700602 struct multipath *m = ti->private;
Mike Snitzera58a9352012-07-27 15:08:04 +0100603 struct request_queue *q = NULL;
604 const char *attached_handler_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605
606 /* we need at least a path arg */
607 if (as->argc < 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700608 ti->error = "no device given";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100609 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610 }
611
612 p = alloc_pgpath();
613 if (!p)
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100614 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
Mike Snitzer498f0102011-08-02 12:32:04 +0100616 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +0000617 &p->path.dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700619 ti->error = "error getting device";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 goto bad;
621 }
622
Mike Snitzer518257b2016-03-17 16:32:10 -0400623 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
Mike Snitzera58a9352012-07-27 15:08:04 +0100624 q = bdev_get_queue(p->path.dev->bdev);
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100625
Mike Snitzer518257b2016-03-17 16:32:10 -0400626 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200627retain:
Mike Snitzera58a9352012-07-27 15:08:04 +0100628 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
629 if (attached_handler_name) {
630 /*
631 * Reset hw_handler_name to match the attached handler
632 * and clear any hw_handler_params associated with the
633 * ignored handler.
634 *
635 * NB. This modifies the table line to show the actual
636 * handler instead of the original table passed in.
637 */
638 kfree(m->hw_handler_name);
639 m->hw_handler_name = attached_handler_name;
640
641 kfree(m->hw_handler_params);
642 m->hw_handler_params = NULL;
643 }
644 }
645
646 if (m->hw_handler_name) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100647 r = scsi_dh_attach(q, m->hw_handler_name);
648 if (r == -EBUSY) {
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200649 char b[BDEVNAME_SIZE];
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100650
Christoph Hellwig1bab0de2015-08-27 14:16:54 +0200651 printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
652 bdevname(p->path.dev->bdev, b));
653 goto retain;
654 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700655 if (r < 0) {
Hannes Reineckea0cf7ea2009-06-22 10:12:11 +0100656 ti->error = "error attaching hardware handler";
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700657 dm_put_device(ti, p->path.dev);
658 goto bad;
659 }
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700660
661 if (m->hw_handler_params) {
662 r = scsi_dh_set_params(q, m->hw_handler_params);
663 if (r < 0) {
664 ti->error = "unable to set hardware "
665 "handler parameters";
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700666 dm_put_device(ti, p->path.dev);
667 goto bad;
668 }
669 }
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700670 }
671
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
673 if (r) {
674 dm_put_device(ti, p->path.dev);
675 goto bad;
676 }
677
678 return p;
679
680 bad:
681 free_pgpath(p);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100682 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683}
684
Mike Snitzer498f0102011-08-02 12:32:04 +0100685static struct priority_group *parse_priority_group(struct dm_arg_set *as,
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700686 struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687{
Mike Snitzer498f0102011-08-02 12:32:04 +0100688 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700689 {1, 1024, "invalid number of paths"},
690 {0, 1024, "invalid number of selector args"}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691 };
692
693 int r;
Mike Snitzer498f0102011-08-02 12:32:04 +0100694 unsigned i, nr_selector_args, nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 struct priority_group *pg;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700696 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
698 if (as->argc < 2) {
699 as->argc = 0;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100700 ti->error = "not enough priority group arguments";
701 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 }
703
704 pg = alloc_priority_group();
705 if (!pg) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700706 ti->error = "couldn't allocate priority group";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100707 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708 }
709 pg->m = m;
710
711 r = parse_path_selector(as, pg, ti);
712 if (r)
713 goto bad;
714
715 /*
716 * read the paths
717 */
Mike Snitzer498f0102011-08-02 12:32:04 +0100718 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 if (r)
720 goto bad;
721
Mike Snitzer498f0102011-08-02 12:32:04 +0100722 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 if (r)
724 goto bad;
725
Mike Snitzer498f0102011-08-02 12:32:04 +0100726 nr_args = 1 + nr_selector_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 for (i = 0; i < pg->nr_pgpaths; i++) {
728 struct pgpath *pgpath;
Mike Snitzer498f0102011-08-02 12:32:04 +0100729 struct dm_arg_set path_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
Mike Snitzer498f0102011-08-02 12:32:04 +0100731 if (as->argc < nr_args) {
Mikulas Patocka148acff2008-07-21 12:00:30 +0100732 ti->error = "not enough path parameters";
Alasdair G Kergon6bbf79a2010-08-12 04:13:49 +0100733 r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 goto bad;
Mikulas Patocka148acff2008-07-21 12:00:30 +0100735 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Mike Snitzer498f0102011-08-02 12:32:04 +0100737 path_args.argc = nr_args;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 path_args.argv = as->argv;
739
740 pgpath = parse_path(&path_args, &pg->ps, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100741 if (IS_ERR(pgpath)) {
742 r = PTR_ERR(pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 goto bad;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100744 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
746 pgpath->pg = pg;
747 list_add_tail(&pgpath->list, &pg->pgpaths);
Mike Snitzer498f0102011-08-02 12:32:04 +0100748 dm_consume_args(as, nr_args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 }
750
751 return pg;
752
753 bad:
754 free_priority_group(pg, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100755 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756}
757
Mike Snitzer498f0102011-08-02 12:32:04 +0100758static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 unsigned hw_argc;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700761 int ret;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700762 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763
Mike Snitzer498f0102011-08-02 12:32:04 +0100764 static struct dm_arg _args[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700765 {0, 1024, "invalid number of hardware handler args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 };
767
Mike Snitzer498f0102011-08-02 12:32:04 +0100768 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 return -EINVAL;
770
771 if (!hw_argc)
772 return 0;
773
Mike Snitzer498f0102011-08-02 12:32:04 +0100774 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
Chandra Seetharaman14e98c52008-11-13 23:39:06 +0000775
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700776 if (hw_argc > 1) {
777 char *p;
778 int i, j, len = 4;
779
780 for (i = 0; i <= hw_argc - 2; i++)
781 len += strlen(as->argv[i]) + 1;
782 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
783 if (!p) {
784 ti->error = "memory allocation failed";
785 ret = -ENOMEM;
786 goto fail;
787 }
788 j = sprintf(p, "%d", hw_argc - 1);
789 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
790 j = sprintf(p, "%s", as->argv[i]);
791 }
Mike Snitzer498f0102011-08-02 12:32:04 +0100792 dm_consume_args(as, hw_argc - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793
794 return 0;
Chandra Seetharaman2bfd2e12009-08-03 12:42:45 -0700795fail:
796 kfree(m->hw_handler_name);
797 m->hw_handler_name = NULL;
798 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799}
800
Mike Snitzer498f0102011-08-02 12:32:04 +0100801static int parse_features(struct dm_arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802{
803 int r;
804 unsigned argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700805 struct dm_target *ti = m->ti;
Mike Snitzer498f0102011-08-02 12:32:04 +0100806 const char *arg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
Mike Snitzer498f0102011-08-02 12:32:04 +0100808 static struct dm_arg _args[] = {
Mike Snitzera58a9352012-07-27 15:08:04 +0100809 {0, 6, "invalid number of feature args"},
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100810 {1, 50, "pg_init_retries must be between 1 and 50"},
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000811 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 };
813
Mike Snitzer498f0102011-08-02 12:32:04 +0100814 r = dm_read_arg_group(_args, as, &argc, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 if (r)
816 return -EINVAL;
817
818 if (!argc)
819 return 0;
820
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100821 do {
Mike Snitzer498f0102011-08-02 12:32:04 +0100822 arg_name = dm_shift_arg(as);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100823 argc--;
824
Mike Snitzer498f0102011-08-02 12:32:04 +0100825 if (!strcasecmp(arg_name, "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -0500826 r = queue_if_no_path(m, true, false);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100827 continue;
828 }
829
Mike Snitzera58a9352012-07-27 15:08:04 +0100830 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
Mike Snitzer518257b2016-03-17 16:32:10 -0400831 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
Mike Snitzera58a9352012-07-27 15:08:04 +0100832 continue;
833 }
834
Mike Snitzer498f0102011-08-02 12:32:04 +0100835 if (!strcasecmp(arg_name, "pg_init_retries") &&
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100836 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +0100837 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100838 argc--;
839 continue;
840 }
841
Mike Snitzer498f0102011-08-02 12:32:04 +0100842 if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000843 (argc >= 1)) {
Mike Snitzer498f0102011-08-02 12:32:04 +0100844 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +0000845 argc--;
846 continue;
847 }
848
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 ti->error = "Unrecognised multipath feature request";
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100850 r = -EINVAL;
851 } while (argc && !r);
852
853 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854}
855
856static int multipath_ctr(struct dm_target *ti, unsigned int argc,
857 char **argv)
858{
Mike Snitzer498f0102011-08-02 12:32:04 +0100859 /* target arguments */
860 static struct dm_arg _args[] = {
Mike Snitzera490a072011-03-24 13:54:33 +0000861 {0, 1024, "invalid number of priority groups"},
862 {0, 1024, "invalid initial priority group number"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 };
864
865 int r;
866 struct multipath *m;
Mike Snitzer498f0102011-08-02 12:32:04 +0100867 struct dm_arg_set as;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 unsigned pg_count = 0;
869 unsigned next_pg_num;
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500870 bool use_blk_mq = dm_use_blk_mq(dm_table_get_md(ti->table));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
872 as.argc = argc;
873 as.argv = argv;
874
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500875 m = alloc_multipath(ti, use_blk_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 if (!m) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700877 ti->error = "can't allocate multipath";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 return -EINVAL;
879 }
880
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700881 r = parse_features(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 if (r)
883 goto bad;
884
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700885 r = parse_hw_handler(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 if (r)
887 goto bad;
888
Mike Snitzer498f0102011-08-02 12:32:04 +0100889 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890 if (r)
891 goto bad;
892
Mike Snitzer498f0102011-08-02 12:32:04 +0100893 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 if (r)
895 goto bad;
896
Mike Snitzera490a072011-03-24 13:54:33 +0000897 if ((!m->nr_priority_groups && next_pg_num) ||
898 (m->nr_priority_groups && !next_pg_num)) {
899 ti->error = "invalid initial priority group";
900 r = -EINVAL;
901 goto bad;
902 }
903
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 /* parse the priority groups */
905 while (as.argc) {
906 struct priority_group *pg;
Mike Snitzer91e968a2016-03-17 17:10:15 -0400907 unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700909 pg = parse_priority_group(&as, m);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100910 if (IS_ERR(pg)) {
911 r = PTR_ERR(pg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 goto bad;
913 }
914
Mike Snitzer91e968a2016-03-17 17:10:15 -0400915 nr_valid_paths += pg->nr_pgpaths;
916 atomic_set(&m->nr_valid_paths, nr_valid_paths);
917
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 list_add_tail(&pg->list, &m->priority_groups);
919 pg_count++;
920 pg->pg_num = pg_count;
921 if (!--next_pg_num)
922 m->next_pg = pg;
923 }
924
925 if (pg_count != m->nr_priority_groups) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700926 ti->error = "priority group count mismatch";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 r = -EINVAL;
928 goto bad;
929 }
930
Alasdair G Kergon55a62ee2013-03-01 22:45:47 +0000931 ti->num_flush_bios = 1;
932 ti->num_discard_bios = 1;
Mike Snitzer042bcef2013-05-10 14:37:16 +0100933 ti->num_write_same_bios = 1;
Mike Snitzer8637a6b2016-01-31 12:08:36 -0500934 if (use_blk_mq)
935 ti->per_io_data_size = sizeof(struct dm_mpath_io);
Mikulas Patocka86279212009-06-22 10:12:24 +0100936
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 return 0;
938
939 bad:
940 free_multipath(m);
941 return r;
942}
943
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000944static void multipath_wait_for_pg_init_completion(struct multipath *m)
945{
946 DECLARE_WAITQUEUE(wait, current);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000947
948 add_wait_queue(&m->pg_init_wait, &wait);
949
950 while (1) {
951 set_current_state(TASK_UNINTERRUPTIBLE);
952
Mike Snitzer91e968a2016-03-17 17:10:15 -0400953 if (!atomic_read(&m->pg_init_in_progress))
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000954 break;
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000955
956 io_schedule();
957 }
958 set_current_state(TASK_RUNNING);
959
960 remove_wait_queue(&m->pg_init_wait, &wait);
961}
962
963static void flush_multipath_work(struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964{
Mike Snitzer518257b2016-03-17 16:32:10 -0400965 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
966 smp_mb__after_atomic();
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +0000967
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700968 flush_workqueue(kmpath_handlerd);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000969 multipath_wait_for_pg_init_completion(m);
Alasdair G Kergona044d012005-07-12 15:53:02 -0700970 flush_workqueue(kmultipathd);
Tejun Heo43829732012-08-20 14:51:24 -0700971 flush_work(&m->trigger_event);
Shiva Krishna Merla954a73d2013-10-30 03:26:38 +0000972
Mike Snitzer518257b2016-03-17 16:32:10 -0400973 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
974 smp_mb__after_atomic();
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +0000975}
976
977static void multipath_dtr(struct dm_target *ti)
978{
979 struct multipath *m = ti->private;
980
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +0000981 flush_multipath_work(m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700982 free_multipath(m);
983}
984
985/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 * Take a path out of use.
987 */
988static int fail_path(struct pgpath *pgpath)
989{
990 unsigned long flags;
991 struct multipath *m = pgpath->pg->m;
992
993 spin_lock_irqsave(&m->lock, flags);
994
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100995 if (!pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700996 goto out;
997
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700998 DMWARN("Failing path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999
1000 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001001 pgpath->is_active = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 pgpath->fail_count++;
1003
Mike Snitzer91e968a2016-03-17 17:10:15 -04001004 atomic_dec(&m->nr_valid_paths);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005
1006 if (pgpath == m->current_pgpath)
1007 m->current_pgpath = NULL;
1008
Mike Andersonb15546f2007-10-19 22:48:02 +01001009 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001010 pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
Mike Andersonb15546f2007-10-19 22:48:02 +01001011
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001012 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013
1014out:
1015 spin_unlock_irqrestore(&m->lock, flags);
1016
1017 return 0;
1018}
1019
1020/*
1021 * Reinstate a previously-failed path
1022 */
1023static int reinstate_path(struct pgpath *pgpath)
1024{
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001025 int r = 0, run_queue = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 unsigned long flags;
1027 struct multipath *m = pgpath->pg->m;
Mike Snitzer91e968a2016-03-17 17:10:15 -04001028 unsigned nr_valid_paths;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029
1030 spin_lock_irqsave(&m->lock, flags);
1031
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001032 if (pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033 goto out;
1034
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001035 DMWARN("Reinstating path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001036
1037 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1038 if (r)
1039 goto out;
1040
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001041 pgpath->is_active = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042
Mike Snitzer91e968a2016-03-17 17:10:15 -04001043 nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
1044 if (nr_valid_paths == 1) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001045 m->current_pgpath = NULL;
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001046 run_queue = 1;
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001047 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001048 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
Mike Snitzer91e968a2016-03-17 17:10:15 -04001049 atomic_inc(&m->pg_init_in_progress);
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001050 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Mike Andersonb15546f2007-10-19 22:48:02 +01001052 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
Mike Snitzer91e968a2016-03-17 17:10:15 -04001053 pgpath->path.dev->name, nr_valid_paths);
Mike Andersonb15546f2007-10-19 22:48:02 +01001054
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001055 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056
1057out:
1058 spin_unlock_irqrestore(&m->lock, flags);
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001059 if (run_queue)
1060 dm_table_run_md_queue_async(m->ti->table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061
1062 return r;
1063}
1064
1065/*
1066 * Fail or reinstate all paths that match the provided struct dm_dev.
1067 */
1068static int action_dev(struct multipath *m, struct dm_dev *dev,
1069 action_fn action)
1070{
Mike Snitzer19040c02011-03-24 13:54:31 +00001071 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 struct pgpath *pgpath;
1073 struct priority_group *pg;
1074
1075 list_for_each_entry(pg, &m->priority_groups, list) {
1076 list_for_each_entry(pgpath, &pg->pgpaths, list) {
1077 if (pgpath->path.dev == dev)
1078 r = action(pgpath);
1079 }
1080 }
1081
1082 return r;
1083}
1084
1085/*
1086 * Temporarily try to avoid having to use the specified PG
1087 */
1088static void bypass_pg(struct multipath *m, struct priority_group *pg,
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001089 bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090{
1091 unsigned long flags;
1092
1093 spin_lock_irqsave(&m->lock, flags);
1094
1095 pg->bypassed = bypassed;
1096 m->current_pgpath = NULL;
1097 m->current_pg = NULL;
1098
1099 spin_unlock_irqrestore(&m->lock, flags);
1100
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001101 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102}
1103
1104/*
1105 * Switch to using the specified PG from the next I/O that gets mapped
1106 */
1107static int switch_pg_num(struct multipath *m, const char *pgstr)
1108{
1109 struct priority_group *pg;
1110 unsigned pgnum;
1111 unsigned long flags;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001112 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001114 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 (pgnum > m->nr_priority_groups)) {
1116 DMWARN("invalid PG number supplied to switch_pg_num");
1117 return -EINVAL;
1118 }
1119
1120 spin_lock_irqsave(&m->lock, flags);
1121 list_for_each_entry(pg, &m->priority_groups, list) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001122 pg->bypassed = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 if (--pgnum)
1124 continue;
1125
1126 m->current_pgpath = NULL;
1127 m->current_pg = NULL;
1128 m->next_pg = pg;
1129 }
1130 spin_unlock_irqrestore(&m->lock, flags);
1131
Alasdair G Kergonfe9cf302009-01-06 03:05:13 +00001132 schedule_work(&m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 return 0;
1134}
1135
1136/*
1137 * Set/clear bypassed status of a PG.
1138 * PGs are numbered upwards from 1 in the order they were declared.
1139 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001140static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141{
1142 struct priority_group *pg;
1143 unsigned pgnum;
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001144 char dummy;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145
Mikulas Patocka31998ef2012-03-28 18:41:26 +01001146 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 (pgnum > m->nr_priority_groups)) {
1148 DMWARN("invalid PG number supplied to bypass_pg");
1149 return -EINVAL;
1150 }
1151
1152 list_for_each_entry(pg, &m->priority_groups, list) {
1153 if (!--pgnum)
1154 break;
1155 }
1156
1157 bypass_pg(m, pg, bypassed);
1158 return 0;
1159}
1160
1161/*
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001162 * Should we retry pg_init immediately?
1163 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001164static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001165{
1166 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001167 bool limit_reached = false;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001168
1169 spin_lock_irqsave(&m->lock, flags);
1170
Mike Snitzer91e968a2016-03-17 17:10:15 -04001171 if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
1172 !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
Mike Snitzer518257b2016-03-17 16:32:10 -04001173 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001174 else
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001175 limit_reached = true;
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001176
1177 spin_unlock_irqrestore(&m->lock, flags);
1178
1179 return limit_reached;
1180}
1181
Chandra Seetharaman3ae31f62009-10-21 09:22:46 -07001182static void pg_init_done(void *data, int errors)
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001183{
Moger, Babu83c0d5d2010-03-06 02:29:45 +00001184 struct pgpath *pgpath = data;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001185 struct priority_group *pg = pgpath->pg;
1186 struct multipath *m = pg->m;
1187 unsigned long flags;
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001188 bool delay_retry = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001189
1190 /* device or driver problems */
1191 switch (errors) {
1192 case SCSI_DH_OK:
1193 break;
1194 case SCSI_DH_NOSYS:
1195 if (!m->hw_handler_name) {
1196 errors = 0;
1197 break;
1198 }
Moger, Babuf7b934c2010-03-06 02:29:49 +00001199 DMERR("Could not failover the device: Handler scsi_dh_%s "
1200 "Error %d.", m->hw_handler_name, errors);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001201 /*
1202 * Fail path for now, so we do not ping pong
1203 */
1204 fail_path(pgpath);
1205 break;
1206 case SCSI_DH_DEV_TEMP_BUSY:
1207 /*
1208 * Probably doing something like FW upgrade on the
1209 * controller so try the other pg.
1210 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001211 bypass_pg(m, pg, true);
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001212 break;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001213 case SCSI_DH_RETRY:
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001214 /* Wait before retrying. */
1215 delay_retry = 1;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001216 case SCSI_DH_IMM_RETRY:
1217 case SCSI_DH_RES_TEMP_UNAVAIL:
1218 if (pg_init_limit_reached(m, pgpath))
1219 fail_path(pgpath);
1220 errors = 0;
1221 break;
Mike Snitzerec31f3f2016-02-20 12:49:43 -05001222 case SCSI_DH_DEV_OFFLINED:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001223 default:
1224 /*
1225 * We probably do not want to fail the path for a device
1226 * error, but this is what the old dm did. In future
1227 * patches we can do more advanced handling.
1228 */
1229 fail_path(pgpath);
1230 }
1231
1232 spin_lock_irqsave(&m->lock, flags);
1233 if (errors) {
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001234 if (pgpath == m->current_pgpath) {
1235 DMERR("Could not failover device. Error %d.", errors);
1236 m->current_pgpath = NULL;
1237 m->current_pg = NULL;
1238 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001239 } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001240 pg->bypassed = false;
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001241
Mike Snitzer91e968a2016-03-17 17:10:15 -04001242 if (atomic_dec_return(&m->pg_init_in_progress) > 0)
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001243 /* Activations of other paths are still on going */
1244 goto out;
1245
Mike Snitzer518257b2016-03-17 16:32:10 -04001246 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
1247 if (delay_retry)
1248 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1249 else
1250 clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1251
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001252 if (__pg_init_all_paths(m))
1253 goto out;
1254 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001255 clear_bit(MPATHF_QUEUE_IO, &m->flags);
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001256
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001257 /*
1258 * Wake up any thread waiting to suspend.
1259 */
1260 wake_up(&m->pg_init_wait);
1261
Kiyoshi Uedad0259bf2010-03-06 02:30:02 +00001262out:
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001263 spin_unlock_irqrestore(&m->lock, flags);
1264}
1265
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001266static void activate_path(struct work_struct *work)
1267{
Chandra Seetharamane54f77d2009-06-22 10:12:12 +01001268 struct pgpath *pgpath =
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001269 container_of(work, struct pgpath, activate_path.work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001270
Hannes Reinecke3a017502014-02-28 15:33:49 +01001271 if (pgpath->is_active)
1272 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1273 pg_init_done, pgpath);
1274 else
1275 pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001276}
1277
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001278static int noretry_error(int error)
1279{
1280 switch (error) {
1281 case -EOPNOTSUPP:
1282 case -EREMOTEIO:
1283 case -EILSEQ:
1284 case -ENODATA:
Jun'ichi Nomuracc9d3c32013-09-13 14:54:30 +09001285 case -ENOSPC:
Hannes Reinecke7e782af2013-07-01 15:16:26 +02001286 return 1;
1287 }
1288
1289 /* Anything else could be a path failure, so should be retried */
1290 return 0;
1291}
1292
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293/*
1294 * end_io handling
1295 */
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001296static int do_end_io(struct multipath *m, struct request *clone,
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001297 int error, struct dm_mpath_io *mpio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298{
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001299 /*
1300 * We don't queue any clone request inside the multipath target
1301 * during end I/O handling, since those clone requests don't have
1302 * bio clones. If we queue them inside the multipath target,
1303 * we need to make bio clones, that requires memory allocation.
1304 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1305 * don't have bio clones.)
1306 * Instead of queueing the clone request here, we queue the original
1307 * request into dm core, which will remake a clone request and
1308 * clone bios for it and resubmit it later.
1309 */
1310 int r = DM_ENDIO_REQUEUE;
Stefan Bader640eb3b2005-11-21 21:32:35 -08001311 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001313 if (!error && !clone->errors)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001314 return 0; /* I/O complete */
1315
Mike Snitzer7eee4ae2014-06-02 15:50:06 -04001316 if (noretry_error(error))
Mike Snitzer959eb4e2010-08-12 04:14:32 +01001317 return error;
1318
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001319 if (mpio->pgpath)
1320 fail_path(mpio->pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321
Stefan Bader640eb3b2005-11-21 21:32:35 -08001322 spin_lock_irqsave(&m->lock, flags);
Mike Snitzer91e968a2016-03-17 17:10:15 -04001323 if (!atomic_read(&m->nr_valid_paths)) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001324 if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
Hannes Reinecke751b2a72011-01-18 10:13:12 +01001325 if (!__must_push_back(m))
1326 r = -EIO;
1327 } else {
1328 if (error == -EBADE)
1329 r = error;
1330 }
1331 }
Stefan Bader640eb3b2005-11-21 21:32:35 -08001332 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001333
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001334 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335}
1336
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001337static int multipath_end_io(struct dm_target *ti, struct request *clone,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338 int error, union map_info *map_context)
1339{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001340 struct multipath *m = ti->private;
Mike Snitzer2eff1922016-02-03 09:13:14 -05001341 struct dm_mpath_io *mpio = get_mpio(map_context);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001342 struct pgpath *pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343 struct path_selector *ps;
1344 int r;
1345
Jun'ichi Nomura466891f2012-03-28 18:41:25 +01001346 BUG_ON(!mpio);
1347
Mike Snitzer2eff1922016-02-03 09:13:14 -05001348 r = do_end_io(m, clone, error, mpio);
Wei Yongjuna71a2612012-10-12 16:59:42 +01001349 pgpath = mpio->pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 if (pgpath) {
1351 ps = &pgpath->pg->ps;
1352 if (ps->type->end_io)
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +01001353 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 }
Mike Snitzer2eff1922016-02-03 09:13:14 -05001355 clear_request_fn_mpio(m, map_context);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356
1357 return r;
1358}
1359
1360/*
1361 * Suspend can't complete until all the I/O is processed so if
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001362 * the last path fails we must error any remaining I/O.
1363 * Note that if the freeze_bdev fails while suspending, the
1364 * queue_if_no_path state is lost - userspace should reset it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001365 */
1366static void multipath_presuspend(struct dm_target *ti)
1367{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001368 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001370 queue_if_no_path(m, false, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371}
1372
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001373static void multipath_postsuspend(struct dm_target *ti)
1374{
Mike Anderson6380f262009-12-10 23:52:21 +00001375 struct multipath *m = ti->private;
1376
1377 mutex_lock(&m->work_mutex);
Kiyoshi Ueda2bded7b2010-03-06 02:32:13 +00001378 flush_multipath_work(m);
Mike Anderson6380f262009-12-10 23:52:21 +00001379 mutex_unlock(&m->work_mutex);
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001380}
1381
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001382/*
1383 * Restore the queue_if_no_path setting.
1384 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385static void multipath_resume(struct dm_target *ti)
1386{
Mike Snitzer7943bd62016-02-02 21:53:15 -05001387 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388
Mike Snitzer518257b2016-03-17 16:32:10 -04001389 if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags))
1390 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1391 else
1392 clear_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
1393 smp_mb__after_atomic();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394}
1395
1396/*
1397 * Info output has the following format:
1398 * num_multipath_feature_args [multipath_feature_args]*
1399 * num_handler_status_args [handler_status_args]*
1400 * num_groups init_group_number
1401 * [A|D|E num_ps_status_args [ps_status_args]*
1402 * num_paths num_selector_args
1403 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1404 *
1405 * Table output has the following format (identical to the constructor string):
1406 * num_feature_args [features_args]*
1407 * num_handler_args hw_handler [hw_handler_args]*
1408 * num_groups init_group_number
1409 * [priority selector-name num_ps_args [ps_args]*
1410 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1411 */
Mikulas Patockafd7c0922013-03-01 22:45:44 +00001412static void multipath_status(struct dm_target *ti, status_type_t type,
1413 unsigned status_flags, char *result, unsigned maxlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001414{
1415 int sz = 0;
1416 unsigned long flags;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001417 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 struct priority_group *pg;
1419 struct pgpath *p;
1420 unsigned pg_num;
1421 char state;
1422
1423 spin_lock_irqsave(&m->lock, flags);
1424
1425 /* Features */
1426 if (type == STATUSTYPE_INFO)
Mike Snitzer91e968a2016-03-17 17:10:15 -04001427 DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
1428 atomic_read(&m->pg_init_count));
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001429 else {
Mike Snitzer518257b2016-03-17 16:32:10 -04001430 DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001431 (m->pg_init_retries > 0) * 2 +
Mike Snitzera58a9352012-07-27 15:08:04 +01001432 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
Mike Snitzer518257b2016-03-17 16:32:10 -04001433 test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags));
1434 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001435 DMEMIT("queue_if_no_path ");
1436 if (m->pg_init_retries)
1437 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
Chandra Seetharaman4e2d19e2011-01-13 20:00:01 +00001438 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
1439 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
Mike Snitzer518257b2016-03-17 16:32:10 -04001440 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
Mike Snitzera58a9352012-07-27 15:08:04 +01001441 DMEMIT("retain_attached_hw_handler ");
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001442 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001444 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 DMEMIT("0 ");
1446 else
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001447 DMEMIT("1 %s ", m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
1449 DMEMIT("%u ", m->nr_priority_groups);
1450
1451 if (m->next_pg)
1452 pg_num = m->next_pg->pg_num;
1453 else if (m->current_pg)
1454 pg_num = m->current_pg->pg_num;
1455 else
Mike Snitzera490a072011-03-24 13:54:33 +00001456 pg_num = (m->nr_priority_groups ? 1 : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457
1458 DMEMIT("%u ", pg_num);
1459
1460 switch (type) {
1461 case STATUSTYPE_INFO:
1462 list_for_each_entry(pg, &m->priority_groups, list) {
1463 if (pg->bypassed)
1464 state = 'D'; /* Disabled */
1465 else if (pg == m->current_pg)
1466 state = 'A'; /* Currently Active */
1467 else
1468 state = 'E'; /* Enabled */
1469
1470 DMEMIT("%c ", state);
1471
1472 if (pg->ps.type->status)
1473 sz += pg->ps.type->status(&pg->ps, NULL, type,
1474 result + sz,
1475 maxlen - sz);
1476 else
1477 DMEMIT("0 ");
1478
1479 DMEMIT("%u %u ", pg->nr_pgpaths,
1480 pg->ps.type->info_args);
1481
1482 list_for_each_entry(p, &pg->pgpaths, list) {
1483 DMEMIT("%s %s %u ", p->path.dev->name,
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001484 p->is_active ? "A" : "F",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 p->fail_count);
1486 if (pg->ps.type->status)
1487 sz += pg->ps.type->status(&pg->ps,
1488 &p->path, type, result + sz,
1489 maxlen - sz);
1490 }
1491 }
1492 break;
1493
1494 case STATUSTYPE_TABLE:
1495 list_for_each_entry(pg, &m->priority_groups, list) {
1496 DMEMIT("%s ", pg->ps.type->name);
1497
1498 if (pg->ps.type->status)
1499 sz += pg->ps.type->status(&pg->ps, NULL, type,
1500 result + sz,
1501 maxlen - sz);
1502 else
1503 DMEMIT("0 ");
1504
1505 DMEMIT("%u %u ", pg->nr_pgpaths,
1506 pg->ps.type->table_args);
1507
1508 list_for_each_entry(p, &pg->pgpaths, list) {
1509 DMEMIT("%s ", p->path.dev->name);
1510 if (pg->ps.type->status)
1511 sz += pg->ps.type->status(&pg->ps,
1512 &p->path, type, result + sz,
1513 maxlen - sz);
1514 }
1515 }
1516 break;
1517 }
1518
1519 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520}
1521
1522static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1523{
Mike Anderson6380f262009-12-10 23:52:21 +00001524 int r = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 struct dm_dev *dev;
Mike Snitzer7943bd62016-02-02 21:53:15 -05001526 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527 action_fn action;
1528
Mike Anderson6380f262009-12-10 23:52:21 +00001529 mutex_lock(&m->work_mutex);
1530
Kiyoshi Uedac2f3d242009-12-10 23:52:27 +00001531 if (dm_suspended(ti)) {
1532 r = -EBUSY;
1533 goto out;
1534 }
1535
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 if (argc == 1) {
Mike Snitzer498f0102011-08-02 12:32:04 +01001537 if (!strcasecmp(argv[0], "queue_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001538 r = queue_if_no_path(m, true, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001539 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001540 } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001541 r = queue_if_no_path(m, false, false);
Mike Anderson6380f262009-12-10 23:52:21 +00001542 goto out;
1543 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 }
1545
Mike Anderson6380f262009-12-10 23:52:21 +00001546 if (argc != 2) {
Jose Castilloa356e422014-01-29 17:52:45 +01001547 DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
Mike Anderson6380f262009-12-10 23:52:21 +00001548 goto out;
1549 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550
Mike Snitzer498f0102011-08-02 12:32:04 +01001551 if (!strcasecmp(argv[0], "disable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001552 r = bypass_pg_num(m, argv[1], true);
Mike Anderson6380f262009-12-10 23:52:21 +00001553 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001554 } else if (!strcasecmp(argv[0], "enable_group")) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001555 r = bypass_pg_num(m, argv[1], false);
Mike Anderson6380f262009-12-10 23:52:21 +00001556 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001557 } else if (!strcasecmp(argv[0], "switch_group")) {
Mike Anderson6380f262009-12-10 23:52:21 +00001558 r = switch_pg_num(m, argv[1]);
1559 goto out;
Mike Snitzer498f0102011-08-02 12:32:04 +01001560 } else if (!strcasecmp(argv[0], "reinstate_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 action = reinstate_path;
Mike Snitzer498f0102011-08-02 12:32:04 +01001562 else if (!strcasecmp(argv[0], "fail_path"))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 action = fail_path;
Mike Anderson6380f262009-12-10 23:52:21 +00001564 else {
Jose Castilloa356e422014-01-29 17:52:45 +01001565 DMWARN("Unrecognised multipath message received: %s", argv[0]);
Mike Anderson6380f262009-12-10 23:52:21 +00001566 goto out;
1567 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568
Nikanth Karthikesan8215d6e2010-03-06 02:32:27 +00001569 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001571 DMWARN("message: error getting device %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 argv[1]);
Mike Anderson6380f262009-12-10 23:52:21 +00001573 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574 }
1575
1576 r = action_dev(m, dev, action);
1577
1578 dm_put_device(ti, dev);
1579
Mike Anderson6380f262009-12-10 23:52:21 +00001580out:
1581 mutex_unlock(&m->work_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583}
1584
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001585static int multipath_prepare_ioctl(struct dm_target *ti,
1586 struct block_device **bdev, fmode_t *mode)
Milan Broz9af4aa32006-10-03 01:15:20 -07001587{
Mikulas Patocka35991652012-06-03 00:29:58 +01001588 struct multipath *m = ti->private;
Milan Broz9af4aa32006-10-03 01:15:20 -07001589 unsigned long flags;
Mikulas Patocka35991652012-06-03 00:29:58 +01001590 int r;
1591
Milan Broz9af4aa32006-10-03 01:15:20 -07001592 spin_lock_irqsave(&m->lock, flags);
1593
1594 if (!m->current_pgpath)
Kiyoshi Ueda02ab8232009-06-22 10:12:27 +01001595 __choose_pgpath(m, 0);
Milan Broz9af4aa32006-10-03 01:15:20 -07001596
Junichi Nomura43e43c92015-11-17 09:36:56 +00001597 if (m->current_pgpath) {
Mike Snitzer518257b2016-03-17 16:32:10 -04001598 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
Junichi Nomura43e43c92015-11-17 09:36:56 +00001599 *bdev = m->current_pgpath->path.dev->bdev;
1600 *mode = m->current_pgpath->path.dev->mode;
1601 r = 0;
1602 } else {
1603 /* pg_init has not started or completed */
1604 r = -ENOTCONN;
1605 }
1606 } else {
1607 /* No path is available */
Mike Snitzer518257b2016-03-17 16:32:10 -04001608 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
Junichi Nomura43e43c92015-11-17 09:36:56 +00001609 r = -ENOTCONN;
1610 else
1611 r = -EIO;
Milan Broze90dae12006-10-03 01:15:22 -07001612 }
Milan Broz9af4aa32006-10-03 01:15:20 -07001613
Milan Broz9af4aa32006-10-03 01:15:20 -07001614 spin_unlock_irqrestore(&m->lock, flags);
1615
Junichi Nomura5bbbfdf2015-11-17 09:39:26 +00001616 if (r == -ENOTCONN) {
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001617 spin_lock_irqsave(&m->lock, flags);
1618 if (!m->current_pg) {
1619 /* Path status changed, redo selection */
1620 __choose_pgpath(m, 0);
1621 }
Mike Snitzer518257b2016-03-17 16:32:10 -04001622 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001623 __pg_init_all_paths(m);
Mike Snitzer4cdd2ad2014-05-13 13:49:39 -04001624 spin_unlock_irqrestore(&m->lock, flags);
Hannes Reinecke63d832c2014-05-26 14:45:39 +02001625 dm_table_run_md_queue_async(m->ti->table);
Hannes Reinecke3e9f1be2014-02-28 15:33:45 +01001626 }
Mikulas Patocka35991652012-06-03 00:29:58 +01001627
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001628 /*
1629 * Only pass ioctls through if the device sizes match exactly.
1630 */
1631 if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
1632 return 1;
1633 return r;
Milan Broz9af4aa32006-10-03 01:15:20 -07001634}
1635
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001636static int multipath_iterate_devices(struct dm_target *ti,
1637 iterate_devices_callout_fn fn, void *data)
1638{
1639 struct multipath *m = ti->private;
1640 struct priority_group *pg;
1641 struct pgpath *p;
1642 int ret = 0;
1643
1644 list_for_each_entry(pg, &m->priority_groups, list) {
1645 list_for_each_entry(p, &pg->pgpaths, list) {
Mike Snitzer5dea2712009-07-23 20:30:42 +01001646 ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001647 if (ret)
1648 goto out;
1649 }
1650 }
1651
1652out:
1653 return ret;
1654}
1655
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001656static int pgpath_busy(struct pgpath *pgpath)
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001657{
1658 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1659
Mike Snitzer52b09912015-02-23 16:36:41 -05001660 return blk_lld_busy(q);
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001661}
1662
1663/*
1664 * We return "busy", only when we can map I/Os but underlying devices
1665 * are busy (so even if we map I/Os now, the I/Os will wait on
1666 * the underlying queue).
1667 * In other words, if we want to kill I/Os or queue them inside us
1668 * due to map unavailability, we don't return "busy". Otherwise,
1669 * dm core won't give us the I/Os and we can't do what we want.
1670 */
1671static int multipath_busy(struct dm_target *ti)
1672{
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001673 bool busy = false, has_active = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001674 struct multipath *m = ti->private;
1675 struct priority_group *pg;
1676 struct pgpath *pgpath;
1677 unsigned long flags;
1678
1679 spin_lock_irqsave(&m->lock, flags);
1680
Jun'ichi Nomura7a7a3b42014-07-08 00:55:14 +00001681 /* pg_init in progress or no paths available */
Mike Snitzer91e968a2016-03-17 17:10:15 -04001682 if (atomic_read(&m->pg_init_in_progress) ||
1683 (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001684 busy = true;
Hannes Reineckeb63349a2013-10-01 11:49:56 +02001685 goto out;
1686 }
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001687 /* Guess which priority_group will be used at next mapping time */
1688 if (unlikely(!m->current_pgpath && m->next_pg))
1689 pg = m->next_pg;
1690 else if (likely(m->current_pg))
1691 pg = m->current_pg;
1692 else
1693 /*
1694 * We don't know which pg will be used at next mapping time.
1695 * We don't call __choose_pgpath() here to avoid to trigger
1696 * pg_init just by busy checking.
1697 * So we don't know whether underlying devices we will be using
1698 * at next mapping time are busy or not. Just try mapping.
1699 */
1700 goto out;
1701
1702 /*
1703 * If there is one non-busy active path at least, the path selector
1704 * will be able to select it. So we consider such a pg as not busy.
1705 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001706 busy = true;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001707 list_for_each_entry(pgpath, &pg->pgpaths, list)
1708 if (pgpath->is_active) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001709 has_active = true;
Mike Snitzer9f54cec2016-02-11 21:42:28 -05001710 if (!pgpath_busy(pgpath)) {
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001711 busy = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001712 break;
1713 }
1714 }
1715
1716 if (!has_active)
1717 /*
1718 * No active path in this pg, so this pg won't be used and
1719 * the current_pg will be changed at next mapping time.
1720 * We need to try mapping to determine it.
1721 */
Mike Snitzerbe7d31c2016-02-10 13:02:21 -05001722 busy = false;
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001723
1724out:
1725 spin_unlock_irqrestore(&m->lock, flags);
1726
1727 return busy;
1728}
1729
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730/*-----------------------------------------------------------------
1731 * Module setup
1732 *---------------------------------------------------------------*/
1733static struct target_type multipath_target = {
1734 .name = "multipath",
Mike Snitzer16f12262016-01-31 17:22:27 -05001735 .version = {1, 11, 0},
1736 .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 .module = THIS_MODULE,
1738 .ctr = multipath_ctr,
1739 .dtr = multipath_dtr,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001740 .map_rq = multipath_map,
Mike Snitzere5863d92014-12-17 21:08:12 -05001741 .clone_and_map_rq = multipath_clone_and_map,
1742 .release_clone_rq = multipath_release_clone,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001743 .rq_end_io = multipath_end_io,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001744 .presuspend = multipath_presuspend,
Kiyoshi Ueda6df400a2009-12-10 23:52:19 +00001745 .postsuspend = multipath_postsuspend,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001746 .resume = multipath_resume,
1747 .status = multipath_status,
1748 .message = multipath_message,
Christoph Hellwige56f81e2015-10-15 14:10:50 +02001749 .prepare_ioctl = multipath_prepare_ioctl,
Mike Snitzeraf4874e2009-06-22 10:12:33 +01001750 .iterate_devices = multipath_iterate_devices,
Kiyoshi Uedaf40c67f2009-06-22 10:12:37 +01001751 .busy = multipath_busy,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752};
1753
1754static int __init dm_multipath_init(void)
1755{
1756 int r;
1757
1758 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001759 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 if (!_mpio_cache)
1761 return -ENOMEM;
1762
1763 r = dm_register_target(&multipath_target);
1764 if (r < 0) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001765 DMERR("register failed %d", r);
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001766 r = -EINVAL;
1767 goto bad_register_target;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768 }
1769
Tejun Heo4d4d66a2011-01-13 19:59:57 +00001770 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001771 if (!kmultipathd) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001772 DMERR("failed to create workqueue kmpathd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001773 r = -ENOMEM;
1774 goto bad_alloc_kmultipathd;
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001775 }
1776
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001777 /*
1778 * A separate workqueue is used to handle the device handlers
1779 * to avoid overloading existing workqueue. Overloading the
1780 * old workqueue would also create a bottleneck in the
1781 * path of the storage hardware device activation.
1782 */
Tejun Heo4d4d66a2011-01-13 19:59:57 +00001783 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
1784 WQ_MEM_RECLAIM);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001785 if (!kmpath_handlerd) {
1786 DMERR("failed to create workqueue kmpath_handlerd");
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001787 r = -ENOMEM;
1788 goto bad_alloc_kmpath_handlerd;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001789 }
1790
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001791 DMINFO("version %u.%u.%u loaded",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 multipath_target.version[0], multipath_target.version[1],
1793 multipath_target.version[2]);
1794
Johannes Thumshirnff658e92015-01-11 12:45:23 +01001795 return 0;
1796
1797bad_alloc_kmpath_handlerd:
1798 destroy_workqueue(kmultipathd);
1799bad_alloc_kmultipathd:
1800 dm_unregister_target(&multipath_target);
1801bad_register_target:
1802 kmem_cache_destroy(_mpio_cache);
1803
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804 return r;
1805}
1806
1807static void __exit dm_multipath_exit(void)
1808{
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001809 destroy_workqueue(kmpath_handlerd);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001810 destroy_workqueue(kmultipathd);
1811
Mikulas Patocka10d3bd02009-01-06 03:04:58 +00001812 dm_unregister_target(&multipath_target);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001813 kmem_cache_destroy(_mpio_cache);
1814}
1815
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816module_init(dm_multipath_init);
1817module_exit(dm_multipath_exit);
1818
1819MODULE_DESCRIPTION(DM_NAME " multipath target");
1820MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1821MODULE_LICENSE("GPL");