s390/dasd: fix incorrect locking order for LCU device add/remove
The correct lock order for LCU lock and cdev lock is to take the cdev
lock first and afterwards the LCU lock. This is caused by the fact
that LCU functions are called in an interrupt context with the cdev
lock implicitly hold by CIO.
To assure the right locking order but also be able to iterate over
devices in a LCU introduce a trylock block that can be called with
the device lock for one device hold and then takes the LCU lock and
try to lock all devices accounted to this LCU. Afterwards all devices
and the LCU itself are locked.
Signed-off-by: Stefan Haberland <stefan.haberland@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
index 286782c..cbbdd3e 100644
--- a/drivers/s390/block/dasd_alias.c
+++ b/drivers/s390/block/dasd_alias.c
@@ -319,22 +319,14 @@
struct dasd_eckd_private *private;
struct alias_pav_group *group;
struct dasd_uid uid;
- unsigned long flags;
private = (struct dasd_eckd_private *) device->private;
- /* only lock if not already locked */
- if (device != pos)
- spin_lock_irqsave_nested(get_ccwdev_lock(device->cdev), flags,
- CDEV_NESTED_SECOND);
private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type;
private->uid.base_unit_addr =
lcu->uac->unit[private->uid.real_unit_addr].base_ua;
uid = private->uid;
- if (device != pos)
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
-
/* if we have no PAV anyway, we don't need to bother with PAV groups */
if (lcu->pav == NO_PAV) {
list_move(&device->alias_list, &lcu->active_devices);
@@ -411,6 +403,130 @@
return 0;
}
+/*
+ * This function tries to lock all devices on an lcu via trylock
+ * return NULL on success otherwise return first failed device
+ */
+static struct dasd_device *_trylock_all_devices_on_lcu(struct alias_lcu *lcu,
+ struct dasd_device *pos)
+
+{
+ struct alias_pav_group *pavgroup;
+ struct dasd_device *device;
+
+ list_for_each_entry(device, &lcu->active_devices, alias_list) {
+ if (device == pos)
+ continue;
+ if (!spin_trylock(get_ccwdev_lock(device->cdev)))
+ return device;
+ }
+ list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
+ if (device == pos)
+ continue;
+ if (!spin_trylock(get_ccwdev_lock(device->cdev)))
+ return device;
+ }
+ list_for_each_entry(pavgroup, &lcu->grouplist, group) {
+ list_for_each_entry(device, &pavgroup->baselist, alias_list) {
+ if (device == pos)
+ continue;
+ if (!spin_trylock(get_ccwdev_lock(device->cdev)))
+ return device;
+ }
+ list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
+ if (device == pos)
+ continue;
+ if (!spin_trylock(get_ccwdev_lock(device->cdev)))
+ return device;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * unlock all devices except the one that is specified as pos
+ * stop if enddev is specified and reached
+ */
+static void _unlock_all_devices_on_lcu(struct alias_lcu *lcu,
+ struct dasd_device *pos,
+ struct dasd_device *enddev)
+
+{
+ struct alias_pav_group *pavgroup;
+ struct dasd_device *device;
+
+ list_for_each_entry(device, &lcu->active_devices, alias_list) {
+ if (device == pos)
+ continue;
+ if (device == enddev)
+ return;
+ spin_unlock(get_ccwdev_lock(device->cdev));
+ }
+ list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
+ if (device == pos)
+ continue;
+ if (device == enddev)
+ return;
+ spin_unlock(get_ccwdev_lock(device->cdev));
+ }
+ list_for_each_entry(pavgroup, &lcu->grouplist, group) {
+ list_for_each_entry(device, &pavgroup->baselist, alias_list) {
+ if (device == pos)
+ continue;
+ if (device == enddev)
+ return;
+ spin_unlock(get_ccwdev_lock(device->cdev));
+ }
+ list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
+ if (device == pos)
+ continue;
+ if (device == enddev)
+ return;
+ spin_unlock(get_ccwdev_lock(device->cdev));
+ }
+ }
+}
+
+/*
+ * this function is needed because the locking order
+ * device lock -> lcu lock
+ * needs to be assured when iterating over devices in an LCU
+ *
+ * if a device is specified in pos then the device lock is already hold
+ */
+static void _trylock_and_lock_lcu_irqsave(struct alias_lcu *lcu,
+ struct dasd_device *pos,
+ unsigned long *flags)
+{
+ struct dasd_device *failed;
+
+ do {
+ spin_lock_irqsave(&lcu->lock, *flags);
+ failed = _trylock_all_devices_on_lcu(lcu, pos);
+ if (failed) {
+ _unlock_all_devices_on_lcu(lcu, pos, failed);
+ spin_unlock_irqrestore(&lcu->lock, *flags);
+ cpu_relax();
+ }
+ } while (failed);
+}
+
+static void _trylock_and_lock_lcu(struct alias_lcu *lcu,
+ struct dasd_device *pos)
+{
+ struct dasd_device *failed;
+
+ do {
+ spin_lock(&lcu->lock);
+ failed = _trylock_all_devices_on_lcu(lcu, pos);
+ if (failed) {
+ _unlock_all_devices_on_lcu(lcu, pos, failed);
+ spin_unlock(&lcu->lock);
+ cpu_relax();
+ }
+ } while (failed);
+}
+
static int read_unit_address_configuration(struct dasd_device *device,
struct alias_lcu *lcu)
{
@@ -505,10 +621,7 @@
if (rc)
return rc;
- /* need to take cdev lock before lcu lock */
- spin_lock_irqsave_nested(get_ccwdev_lock(refdev->cdev), flags,
- CDEV_NESTED_FIRST);
- spin_lock(&lcu->lock);
+ _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags);
lcu->pav = NO_PAV;
for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) {
switch (lcu->uac->unit[i].ua_type) {
@@ -527,8 +640,8 @@
alias_list) {
_add_device_to_lcu(lcu, device, refdev);
}
- spin_unlock(&lcu->lock);
- spin_unlock_irqrestore(get_ccwdev_lock(refdev->cdev), flags);
+ _unlock_all_devices_on_lcu(lcu, NULL, NULL);
+ spin_unlock_irqrestore(&lcu->lock, flags);
return 0;
}
@@ -616,8 +729,6 @@
private = (struct dasd_eckd_private *) device->private;
lcu = private->lcu;
rc = 0;
-
- /* need to take cdev lock before lcu lock */
spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
spin_lock(&lcu->lock);
if (!(lcu->flags & UPDATE_PENDING)) {
@@ -754,30 +865,19 @@
struct alias_pav_group *pavgroup;
struct dasd_device *device;
struct dasd_eckd_private *private;
- unsigned long flags;
/* active and inactive list can contain alias as well as base devices */
list_for_each_entry(device, &lcu->active_devices, alias_list) {
private = (struct dasd_eckd_private *) device->private;
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
- if (private->uid.type != UA_BASE_DEVICE) {
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev),
- flags);
+ if (private->uid.type != UA_BASE_DEVICE)
continue;
- }
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
dasd_schedule_block_bh(device->block);
dasd_schedule_device_bh(device);
}
list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
private = (struct dasd_eckd_private *) device->private;
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
- if (private->uid.type != UA_BASE_DEVICE) {
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev),
- flags);
+ if (private->uid.type != UA_BASE_DEVICE)
continue;
- }
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
dasd_schedule_block_bh(device->block);
dasd_schedule_device_bh(device);
}
@@ -841,38 +941,20 @@
spin_unlock_irqrestore(&lcu->lock, flags);
}
-static void __stop_device_on_lcu(struct dasd_device *device,
- struct dasd_device *pos)
-{
- /* If pos == device then device is already locked! */
- if (pos == device) {
- dasd_device_set_stop_bits(pos, DASD_STOPPED_SU);
- return;
- }
- spin_lock(get_ccwdev_lock(pos->cdev));
- dasd_device_set_stop_bits(pos, DASD_STOPPED_SU);
- spin_unlock(get_ccwdev_lock(pos->cdev));
-}
-
-/*
- * This function is called in interrupt context, so the
- * cdev lock for device is already locked!
- */
-static void _stop_all_devices_on_lcu(struct alias_lcu *lcu,
- struct dasd_device *device)
+static void _stop_all_devices_on_lcu(struct alias_lcu *lcu)
{
struct alias_pav_group *pavgroup;
- struct dasd_device *pos;
+ struct dasd_device *device;
- list_for_each_entry(pos, &lcu->active_devices, alias_list)
- __stop_device_on_lcu(device, pos);
- list_for_each_entry(pos, &lcu->inactive_devices, alias_list)
- __stop_device_on_lcu(device, pos);
+ list_for_each_entry(device, &lcu->active_devices, alias_list)
+ dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
+ list_for_each_entry(device, &lcu->inactive_devices, alias_list)
+ dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
list_for_each_entry(pavgroup, &lcu->grouplist, group) {
- list_for_each_entry(pos, &pavgroup->baselist, alias_list)
- __stop_device_on_lcu(device, pos);
- list_for_each_entry(pos, &pavgroup->aliaslist, alias_list)
- __stop_device_on_lcu(device, pos);
+ list_for_each_entry(device, &pavgroup->baselist, alias_list)
+ dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
+ list_for_each_entry(device, &pavgroup->aliaslist, alias_list)
+ dasd_device_set_stop_bits(device, DASD_STOPPED_SU);
}
}
@@ -880,33 +962,16 @@
{
struct alias_pav_group *pavgroup;
struct dasd_device *device;
- unsigned long flags;
- list_for_each_entry(device, &lcu->active_devices, alias_list) {
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+ list_for_each_entry(device, &lcu->active_devices, alias_list)
dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
- }
-
- list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+ list_for_each_entry(device, &lcu->inactive_devices, alias_list)
dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
- }
-
list_for_each_entry(pavgroup, &lcu->grouplist, group) {
- list_for_each_entry(device, &pavgroup->baselist, alias_list) {
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+ list_for_each_entry(device, &pavgroup->baselist, alias_list)
dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev),
- flags);
- }
- list_for_each_entry(device, &pavgroup->aliaslist, alias_list) {
- spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
+ list_for_each_entry(device, &pavgroup->aliaslist, alias_list)
dasd_device_remove_stop_bits(device, DASD_STOPPED_SU);
- spin_unlock_irqrestore(get_ccwdev_lock(device->cdev),
- flags);
- }
}
}
@@ -932,13 +997,14 @@
spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
reset_summary_unit_check(lcu, device, suc_data->reason);
- spin_lock_irqsave(&lcu->lock, flags);
+ _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags);
_unstop_all_devices_on_lcu(lcu);
_restart_all_base_devices_on_lcu(lcu);
/* 3. read new alias configuration */
_schedule_lcu_update(lcu, device);
lcu->suc_data.device = NULL;
dasd_put_device(device);
+ _unlock_all_devices_on_lcu(lcu, NULL, NULL);
spin_unlock_irqrestore(&lcu->lock, flags);
}
@@ -974,10 +1040,7 @@
" unit check (no lcu structure)");
return;
}
- spin_lock(&lcu->lock);
- _stop_all_devices_on_lcu(lcu, device);
- /* prepare for lcu_update */
- private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING;
+ _trylock_and_lock_lcu(lcu, device);
/* If this device is about to be removed just return and wait for
* the next interrupt on a different device
*/
@@ -985,6 +1048,7 @@
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"device is in offline processing,"
" don't do summary unit check handling");
+ _unlock_all_devices_on_lcu(lcu, device, NULL);
spin_unlock(&lcu->lock);
return;
}
@@ -993,12 +1057,17 @@
DBF_DEV_EVENT(DBF_WARNING, device, "%s",
"previous instance of summary unit check worker"
" still pending");
+ _unlock_all_devices_on_lcu(lcu, device, NULL);
spin_unlock(&lcu->lock);
return ;
}
+ _stop_all_devices_on_lcu(lcu);
+ /* prepare for lcu_update */
+ private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING;
lcu->suc_data.reason = reason;
lcu->suc_data.device = device;
dasd_get_device(device);
+ _unlock_all_devices_on_lcu(lcu, device, NULL);
spin_unlock(&lcu->lock);
if (!schedule_work(&lcu->suc_data.worker))
dasd_put_device(device);