ceph: drop support for preferred_osd pgs
This was an ill-conceived feature that has been removed from Ceph. Do
this gracefully:
- reject attempts to specify a preferred_osd via the ioctl
- stop exposing this information via virtual xattrs
- always fill in -1 for requests, in case we talk to an older server
- don't calculate preferred_osd placements/pgids
Reviewed-by: Alex Elder <elder@inktank.com>
Signed-off-by: Sage Weil <sage@inktank.com>
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index c1f7701..a67fa63 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -935,7 +935,6 @@
layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
layout->fl_stripe_count = cpu_to_le32(1);
layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
- layout->fl_pg_preferred = cpu_to_le32(-1);
layout->fl_pg_pool = cpu_to_le32(dev->poolid);
ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
req, ops);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ed72428..988d4f3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -54,7 +54,6 @@
req->r_fmode = ceph_flags_to_mode(flags);
req->r_args.open.flags = cpu_to_le32(flags);
req->r_args.open.mode = cpu_to_le32(create_mode);
- req->r_args.open.preferred = cpu_to_le32(-1);
out:
return req;
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 790914a59..4feab52 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -26,8 +26,7 @@
l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
l.object_size = ceph_file_layout_object_size(ci->i_layout);
l.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
- l.preferred_osd =
- (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+ l.preferred_osd = (s32)-1;
if (copy_to_user(arg, &l, sizeof(l)))
return -EFAULT;
}
@@ -49,6 +48,10 @@
if (copy_from_user(&l, arg, sizeof(l)))
return -EFAULT;
+ /* preferred_osd is no longer supported */
+ if (l.preferred_osd != -1)
+ return -EINVAL;
+
/* validate changed params against current layout */
err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
if (!err) {
@@ -56,8 +59,6 @@
nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
nl.object_size = ceph_file_layout_object_size(ci->i_layout);
nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
- nl.preferred_osd =
- (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
} else
return err;
@@ -69,8 +70,6 @@
nl.object_size = l.object_size;
if (l.data_pool)
nl.data_pool = l.data_pool;
- if (l.preferred_osd)
- nl.preferred_osd = l.preferred_osd;
if ((nl.object_size & ~PAGE_MASK) ||
(nl.stripe_unit & ~PAGE_MASK) ||
@@ -106,8 +105,6 @@
req->r_args.setlayout.layout.fl_object_size =
cpu_to_le32(l.object_size);
req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool);
- req->r_args.setlayout.layout.fl_pg_preferred =
- cpu_to_le32(l.preferred_osd);
parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
@@ -171,8 +168,6 @@
cpu_to_le32(l.object_size);
req->r_args.setlayout.layout.fl_pg_pool =
cpu_to_le32(l.data_pool);
- req->r_args.setlayout.layout.fl_pg_preferred =
- cpu_to_le32(l.preferred_osd);
err = ceph_mdsc_do_request(mdsc, inode, req);
ceph_mdsc_put_request(req);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 35b8633..785cb30 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -118,15 +118,6 @@
(unsigned long long)ceph_file_layout_su(ci->i_layout),
(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
(unsigned long long)ceph_file_layout_object_size(ci->i_layout));
-
- if (ceph_file_layout_pg_preferred(ci->i_layout) >= 0) {
- val += ret;
- size -= ret;
- ret += snprintf(val, size, "preferred_osd=%lld\n",
- (unsigned long long)ceph_file_layout_pg_preferred(
- ci->i_layout));
- }
-
return ret;
}
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index b8c6069..e81ab30 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -65,7 +65,7 @@
__le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */
/* object -> pg layout */
- __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+ __le32 fl_unused; /* unused; used to be preferred primary (-1) */
__le32 fl_pg_pool; /* namespace, crush ruleset, rep level */
} __attribute__ ((packed));
@@ -384,7 +384,7 @@
__le32 stripe_count; /* ... */
__le32 object_size;
__le32 file_replication;
- __le32 preferred;
+ __le32 unused; /* used to be preferred osd */
} __attribute__ ((packed)) open;
struct {
__le32 flags;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index ba4c205..311ef8d 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -65,8 +65,6 @@
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
#define ceph_file_layout_object_su(l) \
((__s32)le32_to_cpu((l).fl_object_stripe_unit))
-#define ceph_file_layout_pg_preferred(l) \
- ((__s32)le32_to_cpu((l).fl_pg_preferred))
#define ceph_file_layout_pg_pool(l) \
((__s32)le32_to_cpu((l).fl_pg_pool))
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 29ad46e..7d39f3c 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1000,7 +1000,6 @@
{
unsigned num, num_mask;
struct ceph_pg pgid;
- s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred);
int poolid = le32_to_cpu(fl->fl_pg_pool);
struct ceph_pg_pool_info *pool;
unsigned ps;
@@ -1011,23 +1010,13 @@
if (!pool)
return -EIO;
ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid));
- if (preferred >= 0) {
- ps += preferred;
- num = le32_to_cpu(pool->v.lpg_num);
- num_mask = pool->lpg_num_mask;
- } else {
- num = le32_to_cpu(pool->v.pg_num);
- num_mask = pool->pg_num_mask;
- }
+ num = le32_to_cpu(pool->v.pg_num);
+ num_mask = pool->pg_num_mask;
pgid.ps = cpu_to_le16(ps);
- pgid.preferred = cpu_to_le16(preferred);
+ pgid.preferred = cpu_to_le16(-1);
pgid.pool = fl->fl_pg_pool;
- if (preferred >= 0)
- dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps,
- (int)preferred);
- else
- dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
+ dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps);
ol->ol_pgid = pgid;
ol->ol_stripe_unit = fl->fl_object_stripe_unit;
@@ -1046,23 +1035,17 @@
struct ceph_pg_pool_info *pool;
int ruleno;
unsigned poolid, ps, pps, t;
- int preferred;
poolid = le32_to_cpu(pgid.pool);
ps = le16_to_cpu(pgid.ps);
- preferred = (s16)le16_to_cpu(pgid.preferred);
pool = __lookup_pg_pool(&osdmap->pg_pools, poolid);
if (!pool)
return NULL;
/* pg_temp? */
- if (preferred >= 0)
- t = ceph_stable_mod(ps, le32_to_cpu(pool->v.lpg_num),
- pool->lpgp_num_mask);
- else
- t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
- pool->pgp_num_mask);
+ t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num),
+ pool->pgp_num_mask);
pgid.ps = cpu_to_le16(t);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
@@ -1080,23 +1063,13 @@
return NULL;
}
- /* don't forcefeed bad device ids to crush */
- if (preferred >= osdmap->max_osd ||
- preferred >= osdmap->crush->max_devices)
- preferred = -1;
-
- if (preferred >= 0)
- pps = ceph_stable_mod(ps,
- le32_to_cpu(pool->v.lpgp_num),
- pool->lpgp_num_mask);
- else
- pps = ceph_stable_mod(ps,
- le32_to_cpu(pool->v.pgp_num),
- pool->pgp_num_mask);
+ pps = ceph_stable_mod(ps,
+ le32_to_cpu(pool->v.pgp_num),
+ pool->pgp_num_mask);
pps += poolid;
*num = crush_do_rule(osdmap->crush, ruleno, pps, osds,
min_t(int, pool->v.size, *num),
- preferred, osdmap->osd_weight);
+ -1, osdmap->osd_weight);
return osds;
}