[DLM] abort recovery more quickly
When we abort one recovery to do another, break out of the ping_members()
routine more quickly, and wake up the dlm_recoverd thread more quickly
instead of waiting for it to time out.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index cd0c51e..ebb33c3 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -162,11 +162,22 @@
/* send a status request to all members just to establish comms connections */
-static void ping_members(struct dlm_ls *ls)
+static int ping_members(struct dlm_ls *ls)
{
struct dlm_member *memb;
- list_for_each_entry(memb, &ls->ls_nodes, list)
- dlm_rcom_status(ls, memb->nodeid);
+ int error = 0;
+
+ list_for_each_entry(memb, &ls->ls_nodes, list) {
+ error = dlm_recovery_stopped(ls);
+ if (error)
+ break;
+ error = dlm_rcom_status(ls, memb->nodeid);
+ if (error)
+ break;
+ }
+ if (error)
+ log_debug(ls, "ping_members aborted %d", error);
+ return error;
}
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
@@ -212,10 +223,13 @@
dlm_set_recover_status(ls, DLM_RS_NODES);
*neg_out = neg;
- ping_members(ls);
+ error = ping_members(ls);
+ if (error)
+ goto out;
error = dlm_recover_members_wait(ls);
- log_debug(ls, "total members %d", ls->ls_num_nodes);
+ out:
+ log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
return error;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 7010353..eac8e9f 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -275,6 +275,7 @@
void dlm_recoverd_suspend(struct dlm_ls *ls)
{
+ wake_up(&ls->ls_wait_general);
mutex_lock(&ls->ls_recoverd_active);
}