Gather on-going resync information of other nodes When a node joins, it does not know of other nodes performing resync. So, each node keeps the resync information in it's LVB. When a new node joins, it reads the LVB of each "online" bitmap. [TODO] The new node attempts to get the PW lock on other bitmap, if it is successful, it reads the bitmap and performs the resync (if required) on it's behalf. If the node does not get the PW, it requests CR and reads the LVB for the resync information. Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>

commit: 96ae923ab659e37dd5fc1e05ecbf654e2f94bcbe [log] [tgz]
author: Goldwyn Rodrigues <rgoldwyn@suse.com> Fri Jun 06 12:35:34 2014 -0500
committer: Goldwyn Rodrigues <rgoldwyn@suse.com> Mon Feb 23 07:30:11 2015 -0600
tree: 9a8868524e7dacb26caec999dd2a65884b22893d
parent: 54519c5f4b398bcfe599f652b4ef4004d5fa63ff [diff]
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 75c6602..b59c3a0 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c

@@ -27,6 +27,18 @@
 	struct mddev *mddev; /* pointing back to mddev. */
 };
 
+struct suspend_info {
+	int slot;
+	sector_t lo;
+	sector_t hi;
+	struct list_head list;
+};
+
+struct resync_info {
+	__le64 lo;
+	__le64 hi;
+};
+
 struct md_cluster_info {
 	/* dlm lock space and resources for clustered raid. */
 	dlm_lockspace_t *lockspace;
@@ -35,6 +47,8 @@
 	struct dlm_lock_resource *sb_lock;
 	struct mutex sb_mutex;
 	struct dlm_lock_resource *bitmap_lockres;
+	struct list_head suspend_list;
+	spinlock_t suspend_lock;
 };
 
 static void sync_ast(void *arg)
@@ -139,6 +153,37 @@
 	return dest;
 }
 
+static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
+		sector_t lo, sector_t hi)
+{
+	struct resync_info *ri;
+
+	ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
+	ri->lo = cpu_to_le64(lo);
+	ri->hi = cpu_to_le64(hi);
+}
+
+static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
+{
+	struct resync_info ri;
+	struct suspend_info *s = NULL;
+	sector_t hi = 0;
+
+	dlm_lock_sync(lockres, DLM_LOCK_CR);
+	memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
+	hi = le64_to_cpu(ri.hi);
+	if (ri.hi > 0) {
+		s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
+		if (!s)
+			goto out;
+		s->hi = hi;
+		s->lo = le64_to_cpu(ri.lo);
+	}
+	dlm_unlock_sync(lockres);
+out:
+	return s;
+}
+
 static void recover_prep(void *arg)
 {
 }
@@ -171,6 +216,53 @@
 	.recover_done = recover_done,
 };
 
+static int gather_all_resync_info(struct mddev *mddev, int total_slots)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+	int i, ret = 0;
+	struct dlm_lock_resource *bm_lockres;
+	struct suspend_info *s;
+	char str[64];
+
+
+	for (i = 0; i < total_slots; i++) {
+		memset(str, '\0', 64);
+		snprintf(str, 64, "bitmap%04d", i);
+		bm_lockres = lockres_init(mddev, str, NULL, 1);
+		if (!bm_lockres)
+			return -ENOMEM;
+		if (i == (cinfo->slot_number - 1))
+			continue;
+
+		bm_lockres->flags |= DLM_LKF_NOQUEUE;
+		ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+		if (ret == -EAGAIN) {
+			memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
+			s = read_resync_info(mddev, bm_lockres);
+			if (s) {
+				pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
+						__func__, __LINE__,
+						(unsigned long long) s->lo,
+						(unsigned long long) s->hi, i);
+				spin_lock_irq(&cinfo->suspend_lock);
+				s->slot = i;
+				list_add(&s->list, &cinfo->suspend_list);
+				spin_unlock_irq(&cinfo->suspend_lock);
+			}
+			ret = 0;
+			lockres_free(bm_lockres);
+			continue;
+		}
+		if (ret)
+			goto out;
+		/* TODO: Read the disk bitmap sb and check if it needs recovery */
+		dlm_unlock_sync(bm_lockres);
+		lockres_free(bm_lockres);
+	}
+out:
+	return ret;
+}
+
 static int join(struct mddev *mddev, int nodes)
 {
 	struct md_cluster_info *cinfo;
@@ -221,8 +313,17 @@
 		goto err;
 	}
 
+	INIT_LIST_HEAD(&cinfo->suspend_list);
+	spin_lock_init(&cinfo->suspend_lock);
+
+	ret = gather_all_resync_info(mddev, nodes);
+	if (ret)
+		goto err;
+
 	return 0;
 err:
+	lockres_free(cinfo->bitmap_lockres);
+	lockres_free(cinfo->sb_lock);
 	if (cinfo->lockspace)
 		dlm_release_lockspace(cinfo->lockspace, 2);
 	mddev->cluster_info = NULL;
@@ -254,10 +355,20 @@
 	return cinfo->slot_number - 1;
 }
 
+static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
+	/* Re-acquire the lock to refresh LVB */
+	dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
+}
+
 static struct md_cluster_operations cluster_ops = {
 	.join   = join,
 	.leave  = leave,
 	.slot_number = slot_number,
+	.resync_info_update = resync_info_update,
 };
 
 static int __init cluster_init(void)

diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 52a21e0..51a24df 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h

@@ -11,6 +11,7 @@
 	int (*join)(struct mddev *mddev, int nodes);
 	int (*leave)(struct mddev *mddev);
 	int (*slot_number)(struct mddev *mddev);
+	void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
 };
 
 #endif /* _MD_CLUSTER_H */

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8f310d9..71f6550 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -7626,6 +7626,9 @@
 	md_new_event(mddev);
 	update_time = jiffies;
 
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->resync_info_update(mddev, j, max_sectors);
+
 	blk_start_plug(&plug);
 	while (j < max_sectors) {
 		sector_t sectors;
@@ -7686,6 +7689,8 @@
 		j += sectors;
 		if (j > 2)
 			mddev->curr_resync = j;
+		if (mddev_is_clustered(mddev))
+			md_cluster_ops->resync_info_update(mddev, j, max_sectors);
 		mddev->curr_mark_cnt = io_sectors;
 		if (last_check == 0)
 			/* this is the earliest that rebuild will be
@@ -7746,6 +7751,9 @@
 	/* tell personality that we are finished */
 	mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
 
+	if (mddev_is_clustered(mddev))
+		md_cluster_ops->resync_info_update(mddev, 0, 0);
+
 	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
 	    mddev->curr_resync > 2) {
 		if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
commit	96ae923ab659e37dd5fc1e05ecbf654e2f94bcbe	[log] [tgz]
author	Goldwyn Rodrigues <rgoldwyn@suse.com>	Fri Jun 06 12:35:34 2014 -0500
committer	Goldwyn Rodrigues <rgoldwyn@suse.com>	Mon Feb 23 07:30:11 2015 -0600
tree	9a8868524e7dacb26caec999dd2a65884b22893d
parent	54519c5f4b398bcfe599f652b4ef4004d5fa63ff [diff]