ceph: re-send AIO write request when getting -EOLDSNAP error When receiving -EOLDSNAP from OSD, we need to re-send corresponding write request. Due to locking issue, we can send new request inside another OSD request's complete callback. So we use worker to re-send request for AIO write. Signed-off-by: Yan, Zheng <zyan@redhat.com>

commit: 5be0389dac662995eade757ec678931f0be23d33 [log] [tgz]
author: Yan, Zheng <zyan@redhat.com> Thu Dec 24 08:44:20 2015 +0800
committer: Ilya Dryomov <idryomov@gmail.com> Thu Jan 21 19:36:08 2016 +0100
tree: 89951af8adafe581682591bf755006a60517edb5
parent: c8fe9b17d055fe80e1a1591f5900ce41fbf6b796 [diff] [blame]
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8e924b7..41c2267 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c

@@ -554,9 +554,17 @@
 	struct list_head osd_reqs;
 	unsigned num_reqs;
 	atomic_t pending_reqs;
+	struct timespec mtime;
 	struct ceph_cap_flush *prealloc_cf;
 };
 
+struct ceph_aio_work {
+	struct work_struct work;
+	struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
 static void ceph_aio_complete(struct inode *inode,
 			      struct ceph_aio_request *aio_req)
 {
@@ -614,10 +622,19 @@
 	     inode, rc, osd_data->length);
 
 	if (rc == -EOLDSNAPC) {
-		BUG_ON(1);
-	}
+		struct ceph_aio_work *aio_work;
+		BUG_ON(!aio_req->write);
 
-	if (!aio_req->write) {
+		aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+		if (aio_work) {
+			INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+			aio_work->req = req;
+			queue_work(ceph_inode_to_client(inode)->wb_wq,
+				   &aio_work->work);
+			return;
+		}
+		rc = -ENOMEM;
+	} else if (!aio_req->write) {
 		if (rc == -ENOENT)
 			rc = 0;
 		if (rc >= 0 && osd_data->length > rc) {
@@ -653,6 +670,69 @@
 	return;
 }
 
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+	struct ceph_aio_work *aio_work =
+		container_of(work, struct ceph_aio_work, work);
+	struct ceph_osd_request *orig_req = aio_work->req;
+	struct ceph_aio_request *aio_req = orig_req->r_priv;
+	struct inode *inode = orig_req->r_inode;
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_snap_context *snapc;
+	struct ceph_osd_request *req;
+	int ret;
+
+	spin_lock(&ci->i_ceph_lock);
+	if (__ceph_have_pending_cap_snap(ci)) {
+		struct ceph_cap_snap *capsnap =
+			list_last_entry(&ci->i_cap_snaps,
+					struct ceph_cap_snap,
+					ci_item);
+		snapc = ceph_get_snap_context(capsnap->context);
+	} else {
+		BUG_ON(!ci->i_head_snapc);
+		snapc = ceph_get_snap_context(ci->i_head_snapc);
+	}
+	spin_unlock(&ci->i_ceph_lock);
+
+	req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+			false, GFP_NOFS);
+	if (IS_ERR(req)) {
+		ret = PTR_ERR(req);
+		req = orig_req;
+		goto out;
+	}
+
+	req->r_flags =	CEPH_OSD_FLAG_ORDERSNAP |
+			CEPH_OSD_FLAG_ONDISK |
+			CEPH_OSD_FLAG_WRITE;
+	req->r_base_oloc = orig_req->r_base_oloc;
+	req->r_base_oid = orig_req->r_base_oid;
+
+	req->r_ops[0] = orig_req->r_ops[0];
+	osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+	ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+				snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+	ceph_put_snap_context(snapc);
+	ceph_osdc_put_request(orig_req);
+
+	req->r_callback = ceph_aio_complete_req;
+	req->r_inode = inode;
+	req->r_priv = aio_req;
+
+	ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+	if (ret < 0) {
+		BUG_ON(ret == -EOLDSNAPC);
+		req->r_result = ret;
+		ceph_aio_complete_req(req, NULL);
+	}
+
+	kfree(aio_work);
+}
+
 /*
  * Write commit request unsafe callback, called to tell us when a
  * request is unsafe (that is, in flight--has been handed to the
@@ -772,6 +852,7 @@
 				aio_req->write = write;
 				INIT_LIST_HEAD(&aio_req->osd_reqs);
 				if (write) {
+					aio_req->mtime = mtime;
 					swap(aio_req->prealloc_cf, *pcf);
 				}
 			}
@@ -867,6 +948,7 @@
 				ret = ceph_osdc_start_request(req->r_osdc,
 							      req, false);
 			if (ret < 0) {
+				BUG_ON(ret == -EOLDSNAPC);
 				req->r_result = ret;
 				ceph_aio_complete_req(req, NULL);
 			}
commit	5be0389dac662995eade757ec678931f0be23d33	[log] [tgz]
author	Yan, Zheng <zyan@redhat.com>	Thu Dec 24 08:44:20 2015 +0800
committer	Ilya Dryomov <idryomov@gmail.com>	Thu Jan 21 19:36:08 2016 +0100
tree	89951af8adafe581682591bf755006a60517edb5
parent	c8fe9b17d055fe80e1a1591f5900ce41fbf6b796 [diff] [blame]