md-cluster: gather resync infos and enable recv_thread after bitmap is ready The in-memory bitmap is not ready when node joins cluster, so it doesn't make sense to make gather_all_resync_info() called so earlier, we need to call it after the node's bitmap is setup. Also, recv_thread could be wake up after node joins cluster, but it could cause problem if node receives RESYNCING message without persionality since mddev->pers->quiesce is called in process_suspend_info. This commit introduces a new cluster interface load_bitmaps to fix above problems, load_bitmaps is called in bitmap_load where bitmap and persionality are ready, and load_bitmaps does the following tasks: 1. call gather_all_resync_info to load all the node's bitmap info. 2. set MD_CLUSTER_ALREADY_IN_CLUSTER bit to recv_thread could be wake up, and wake up recv_thread if there is pending recv event. Then ack_bast only wakes up recv_thread after IN_CLUSTER bit is ready otherwise MD_CLUSTER_PENDING_RESYNC_EVENT is set. Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>

commit: 51e453aecb267b6a99b1d2853bccd5bba7340236 [log] [tgz]
author: Guoqing Jiang <gqjiang@suse.com> Wed May 04 02:17:09 2016 -0400
committer: Shaohua Li <shli@fb.com> Mon May 09 09:24:03 2016 -0700
tree: c1a405b2f33b0798da25fa3e410d760c1364dc22
parent: 85ad1d13ee9b3db00615ea24b031c15e5ba14fd1 [diff] [blame]
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index a55b5f4..bef6a47 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c

@@ -61,6 +61,10 @@
  * the lock.
  */
 #define		MD_CLUSTER_SEND_LOCKED_ALREADY		5
+/* We should receive message after node joined cluster and
+ * set up all the related infos such as bitmap and personality */
+#define		MD_CLUSTER_ALREADY_IN_CLUSTER		6
+#define		MD_CLUSTER_PENDING_RECV_EVENT		7
 
 
 struct md_cluster_info {
@@ -376,8 +380,12 @@
 	struct dlm_lock_resource *res = arg;
 	struct md_cluster_info *cinfo = res->mddev->cluster_info;
 
-	if (mode == DLM_LOCK_EX)
-		md_wakeup_thread(cinfo->recv_thread);
+	if (mode == DLM_LOCK_EX) {
+		if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
+			md_wakeup_thread(cinfo->recv_thread);
+		else
+			set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
+	}
 }
 
 static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
@@ -846,10 +854,6 @@
 	if (!cinfo->resync_lockres)
 		goto err;
 
-	ret = gather_all_resync_info(mddev, nodes);
-	if (ret)
-		goto err;
-
 	return 0;
 err:
 	md_unregister_thread(&cinfo->recovery_thread);
@@ -867,6 +871,19 @@
 	return ret;
 }
 
+static void load_bitmaps(struct mddev *mddev, int total_slots)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	/* load all the node's bitmap info for resync */
+	if (gather_all_resync_info(mddev, total_slots))
+		pr_err("md-cluster: failed to gather all resyn infos\n");
+	set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
+	/* wake up recv thread in case something need to be handled */
+	if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
+		md_wakeup_thread(cinfo->recv_thread);
+}
+
 static void resync_bitmap(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
@@ -1208,6 +1225,7 @@
 	.add_new_disk_cancel = add_new_disk_cancel,
 	.new_disk_ack = new_disk_ack,
 	.remove_disk = remove_disk,
+	.load_bitmaps = load_bitmaps,
 	.gather_bitmaps = gather_bitmaps,
 	.lock_all_bitmaps = lock_all_bitmaps,
 	.unlock_all_bitmaps = unlock_all_bitmaps,
commit	51e453aecb267b6a99b1d2853bccd5bba7340236	[log] [tgz]
author	Guoqing Jiang <gqjiang@suse.com>	Wed May 04 02:17:09 2016 -0400
committer	Shaohua Li <shli@fb.com>	Mon May 09 09:24:03 2016 -0700
tree	c1a405b2f33b0798da25fa3e410d760c1364dc22
parent	85ad1d13ee9b3db00615ea24b031c15e5ba14fd1 [diff] [blame]