libceph: follow {read,write}_tier fields on osd request submission
Overwrite ceph_osd_request::r_oloc.pool with read_tier for read ops and
write_tier for write and read+write ops (aka basic tiering support).
{read,write}_tier are part of pg_pool_t since v9. This commit bumps
our pg_pool_t decode compat version from v7 to v9, all new fields
except for {read,write}_tier are ignored.
Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 10360de..0eb009e 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1250,6 +1250,32 @@
}
/*
+ * Calculate mapping of a request to a PG. Takes tiering into account.
+ */
+static int __calc_request_pg(struct ceph_osdmap *osdmap,
+ struct ceph_osd_request *req,
+ struct ceph_pg *pg_out)
+{
+ if ((req->r_flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
+ struct ceph_pg_pool_info *pi;
+
+ pi = ceph_pg_pool_by_id(osdmap, req->r_oloc.pool);
+ if (pi) {
+ if ((req->r_flags & CEPH_OSD_FLAG_READ) &&
+ pi->read_tier >= 0)
+ req->r_oloc.pool = pi->read_tier;
+ if ((req->r_flags & CEPH_OSD_FLAG_WRITE) &&
+ pi->write_tier >= 0)
+ req->r_oloc.pool = pi->write_tier;
+ }
+ /* !pi is caught in ceph_oloc_oid_to_pg() */
+ }
+
+ return ceph_oloc_oid_to_pg(osdmap, &req->r_oloc,
+ &req->r_oid, pg_out);
+}
+
+/*
* Pick an osd (the first 'up' osd in the pg), allocate the osd struct
* (as needed), and set the request r_osd appropriately. If there is
* no up osd, set r_osd to NULL. Move the request to the appropriate list
@@ -1269,8 +1295,8 @@
bool was_paused;
dout("map_request %p tid %lld\n", req, req->r_tid);
- err = ceph_oloc_oid_to_pg(osdc->osdmap, &req->r_oloc, &req->r_oid,
- &pgid);
+
+ err = __calc_request_pg(osdc->osdmap, req, &pgid);
if (err) {
list_move(&req->r_req_lru_item, &osdc->req_notarget);
return err;