blob: 532c95a6c9fa907d967909e760a8217799789b6d [file] [log] [blame]
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07001#include <linux/ceph/ceph_debug.h>
2
Sage Weil355da1e2009-10-06 11:31:08 -07003#include "super.h"
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -07004#include "mds_client.h"
5
6#include <linux/ceph/decode.h>
Sage Weil355da1e2009-10-06 11:31:08 -07007
8#include <linux/xattr.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09009#include <linux/slab.h>
Sage Weil355da1e2009-10-06 11:31:08 -070010
Alex Elder22891902012-01-23 15:49:28 -060011#define XATTR_CEPH_PREFIX "ceph."
12#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
13
Sage Weil355da1e2009-10-06 11:31:08 -070014static bool ceph_is_valid_xattr(const char *name)
15{
Alex Elder22891902012-01-23 15:49:28 -060016 return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
Sage Weil1a756272010-05-11 11:40:25 -070017 !strncmp(name, XATTR_SECURITY_PREFIX,
Sage Weil355da1e2009-10-06 11:31:08 -070018 XATTR_SECURITY_PREFIX_LEN) ||
19 !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
20 !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
21}
22
23/*
24 * These define virtual xattrs exposing the recursive directory
25 * statistics and layout metadata.
26 */
Alex Elder881a5fa2012-01-23 15:49:28 -060027struct ceph_vxattr {
Sage Weil355da1e2009-10-06 11:31:08 -070028 char *name;
Alex Elder3ce6cd12012-01-23 15:49:28 -060029 size_t name_size; /* strlen(name) + 1 (for '\0') */
Sage Weil355da1e2009-10-06 11:31:08 -070030 size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
31 size_t size);
Sage Weil88601472013-01-31 11:53:27 -080032 bool readonly, hidden;
Sage Weilf36e4472013-01-20 21:59:29 -080033 bool (*exists_cb)(struct ceph_inode_info *ci);
Sage Weil355da1e2009-10-06 11:31:08 -070034};
35
Sage Weil32ab0bd2013-01-19 16:46:32 -080036/* layouts */
37
38static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
39{
40 size_t s;
41 char *p = (char *)&ci->i_layout;
42
43 for (s = 0; s < sizeof(ci->i_layout); s++, p++)
44 if (*p)
45 return true;
46 return false;
47}
48
49static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
50 size_t size)
51{
52 int ret;
53 struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
54 struct ceph_osd_client *osdc = &fsc->client->osdc;
55 s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
56 const char *pool_name;
57
58 dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
59 down_read(&osdc->map_sem);
60 pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
61 if (pool_name)
62 ret = snprintf(val, size,
63 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
64 (unsigned long long)ceph_file_layout_su(ci->i_layout),
65 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
66 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
67 pool_name);
68 else
69 ret = snprintf(val, size,
70 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
71 (unsigned long long)ceph_file_layout_su(ci->i_layout),
72 (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
73 (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
74 (unsigned long long)pool);
75
76 up_read(&osdc->map_sem);
77 return ret;
78}
79
Sage Weil355da1e2009-10-06 11:31:08 -070080/* directories */
81
Alex Elderaa4066e2012-01-23 15:49:28 -060082static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -070083 size_t size)
84{
85 return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
86}
87
Alex Elderaa4066e2012-01-23 15:49:28 -060088static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -070089 size_t size)
90{
91 return snprintf(val, size, "%lld", ci->i_files);
92}
93
Alex Elderaa4066e2012-01-23 15:49:28 -060094static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -070095 size_t size)
96{
97 return snprintf(val, size, "%lld", ci->i_subdirs);
98}
99
Alex Elderaa4066e2012-01-23 15:49:28 -0600100static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -0700101 size_t size)
102{
103 return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
104}
105
Alex Elderaa4066e2012-01-23 15:49:28 -0600106static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -0700107 size_t size)
108{
109 return snprintf(val, size, "%lld", ci->i_rfiles);
110}
111
Alex Elderaa4066e2012-01-23 15:49:28 -0600112static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -0700113 size_t size)
114{
115 return snprintf(val, size, "%lld", ci->i_rsubdirs);
116}
117
Alex Elderaa4066e2012-01-23 15:49:28 -0600118static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -0700119 size_t size)
120{
121 return snprintf(val, size, "%lld", ci->i_rbytes);
122}
123
Alex Elderaa4066e2012-01-23 15:49:28 -0600124static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
Sage Weil355da1e2009-10-06 11:31:08 -0700125 size_t size)
126{
Alex Elder3489b422012-03-08 16:50:09 -0600127 return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
Sage Weil355da1e2009-10-06 11:31:08 -0700128 (long)ci->i_rctime.tv_nsec);
129}
130
Sage Weil32ab0bd2013-01-19 16:46:32 -0800131
Alex Eldereb788082012-01-23 15:49:28 -0600132#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
133
Sage Weil88601472013-01-31 11:53:27 -0800134#define XATTR_NAME_CEPH(_type, _name) \
135 { \
136 .name = CEPH_XATTR_NAME(_type, _name), \
137 .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
138 .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
139 .readonly = true, \
140 .hidden = false, \
Sage Weilf36e4472013-01-20 21:59:29 -0800141 .exists_cb = NULL, \
Sage Weil88601472013-01-31 11:53:27 -0800142 }
Alex Eldereb788082012-01-23 15:49:28 -0600143
Alex Elder881a5fa2012-01-23 15:49:28 -0600144static struct ceph_vxattr ceph_dir_vxattrs[] = {
Alex Eldereb788082012-01-23 15:49:28 -0600145 XATTR_NAME_CEPH(dir, entries),
146 XATTR_NAME_CEPH(dir, files),
147 XATTR_NAME_CEPH(dir, subdirs),
148 XATTR_NAME_CEPH(dir, rentries),
149 XATTR_NAME_CEPH(dir, rfiles),
150 XATTR_NAME_CEPH(dir, rsubdirs),
151 XATTR_NAME_CEPH(dir, rbytes),
152 XATTR_NAME_CEPH(dir, rctime),
153 { 0 } /* Required table terminator */
Sage Weil355da1e2009-10-06 11:31:08 -0700154};
Alex Elder3ce6cd12012-01-23 15:49:28 -0600155static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
Sage Weil355da1e2009-10-06 11:31:08 -0700156
157/* files */
158
Alex Elder881a5fa2012-01-23 15:49:28 -0600159static struct ceph_vxattr ceph_file_vxattrs[] = {
Sage Weil32ab0bd2013-01-19 16:46:32 -0800160 {
161 .name = "ceph.file.layout",
162 .name_size = sizeof("ceph.file.layout"),
163 .getxattr_cb = ceph_vxattrcb_layout,
164 .readonly = false,
165 .hidden = false,
166 .exists_cb = ceph_vxattrcb_layout_exists,
167 },
Alex Eldereb788082012-01-23 15:49:28 -0600168 { 0 } /* Required table terminator */
Sage Weil355da1e2009-10-06 11:31:08 -0700169};
Alex Elder3ce6cd12012-01-23 15:49:28 -0600170static size_t ceph_file_vxattrs_name_size; /* total size of all names */
Sage Weil355da1e2009-10-06 11:31:08 -0700171
Alex Elder881a5fa2012-01-23 15:49:28 -0600172static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
Sage Weil355da1e2009-10-06 11:31:08 -0700173{
174 if (S_ISDIR(inode->i_mode))
175 return ceph_dir_vxattrs;
176 else if (S_ISREG(inode->i_mode))
177 return ceph_file_vxattrs;
178 return NULL;
179}
180
Alex Elder3ce6cd12012-01-23 15:49:28 -0600181static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
182{
183 if (vxattrs == ceph_dir_vxattrs)
184 return ceph_dir_vxattrs_name_size;
185 if (vxattrs == ceph_file_vxattrs)
186 return ceph_file_vxattrs_name_size;
187 BUG();
188
189 return 0;
190}
191
192/*
193 * Compute the aggregate size (including terminating '\0') of all
194 * virtual extended attribute names in the given vxattr table.
195 */
196static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
197{
198 struct ceph_vxattr *vxattr;
199 size_t size = 0;
200
201 for (vxattr = vxattrs; vxattr->name; vxattr++)
Sage Weil88601472013-01-31 11:53:27 -0800202 if (!vxattr->hidden)
203 size += vxattr->name_size;
Alex Elder3ce6cd12012-01-23 15:49:28 -0600204
205 return size;
206}
207
208/* Routines called at initialization and exit time */
209
210void __init ceph_xattr_init(void)
211{
212 ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
213 ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
214}
215
216void ceph_xattr_exit(void)
217{
218 ceph_dir_vxattrs_name_size = 0;
219 ceph_file_vxattrs_name_size = 0;
220}
221
Alex Elder881a5fa2012-01-23 15:49:28 -0600222static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
Sage Weil355da1e2009-10-06 11:31:08 -0700223 const char *name)
224{
Alex Elder881a5fa2012-01-23 15:49:28 -0600225 struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
Alex Elder06476a62012-01-23 15:49:27 -0600226
227 if (vxattr) {
228 while (vxattr->name) {
229 if (!strcmp(vxattr->name, name))
230 return vxattr;
231 vxattr++;
232 }
233 }
234
Sage Weil355da1e2009-10-06 11:31:08 -0700235 return NULL;
236}
237
238static int __set_xattr(struct ceph_inode_info *ci,
239 const char *name, int name_len,
240 const char *val, int val_len,
241 int dirty,
242 int should_free_name, int should_free_val,
243 struct ceph_inode_xattr **newxattr)
244{
245 struct rb_node **p;
246 struct rb_node *parent = NULL;
247 struct ceph_inode_xattr *xattr = NULL;
248 int c;
249 int new = 0;
250
251 p = &ci->i_xattrs.index.rb_node;
252 while (*p) {
253 parent = *p;
254 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
255 c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
256 if (c < 0)
257 p = &(*p)->rb_left;
258 else if (c > 0)
259 p = &(*p)->rb_right;
260 else {
261 if (name_len == xattr->name_len)
262 break;
263 else if (name_len < xattr->name_len)
264 p = &(*p)->rb_left;
265 else
266 p = &(*p)->rb_right;
267 }
268 xattr = NULL;
269 }
270
271 if (!xattr) {
272 new = 1;
273 xattr = *newxattr;
274 xattr->name = name;
275 xattr->name_len = name_len;
276 xattr->should_free_name = should_free_name;
277
278 ci->i_xattrs.count++;
279 dout("__set_xattr count=%d\n", ci->i_xattrs.count);
280 } else {
281 kfree(*newxattr);
282 *newxattr = NULL;
283 if (xattr->should_free_val)
284 kfree((void *)xattr->val);
285
286 if (should_free_name) {
287 kfree((void *)name);
288 name = xattr->name;
289 }
290 ci->i_xattrs.names_size -= xattr->name_len;
291 ci->i_xattrs.vals_size -= xattr->val_len;
292 }
Sage Weil355da1e2009-10-06 11:31:08 -0700293 ci->i_xattrs.names_size += name_len;
294 ci->i_xattrs.vals_size += val_len;
295 if (val)
296 xattr->val = val;
297 else
298 xattr->val = "";
299
300 xattr->val_len = val_len;
301 xattr->dirty = dirty;
302 xattr->should_free_val = (val && should_free_val);
303
304 if (new) {
305 rb_link_node(&xattr->node, parent, p);
306 rb_insert_color(&xattr->node, &ci->i_xattrs.index);
307 dout("__set_xattr_val p=%p\n", p);
308 }
309
310 dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
311 ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
312
313 return 0;
314}
315
316static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
317 const char *name)
318{
319 struct rb_node **p;
320 struct rb_node *parent = NULL;
321 struct ceph_inode_xattr *xattr = NULL;
Sage Weil17db1432011-01-13 15:27:29 -0800322 int name_len = strlen(name);
Sage Weil355da1e2009-10-06 11:31:08 -0700323 int c;
324
325 p = &ci->i_xattrs.index.rb_node;
326 while (*p) {
327 parent = *p;
328 xattr = rb_entry(parent, struct ceph_inode_xattr, node);
329 c = strncmp(name, xattr->name, xattr->name_len);
Sage Weil17db1432011-01-13 15:27:29 -0800330 if (c == 0 && name_len > xattr->name_len)
331 c = 1;
Sage Weil355da1e2009-10-06 11:31:08 -0700332 if (c < 0)
333 p = &(*p)->rb_left;
334 else if (c > 0)
335 p = &(*p)->rb_right;
336 else {
337 dout("__get_xattr %s: found %.*s\n", name,
338 xattr->val_len, xattr->val);
339 return xattr;
340 }
341 }
342
343 dout("__get_xattr %s: not found\n", name);
344
345 return NULL;
346}
347
348static void __free_xattr(struct ceph_inode_xattr *xattr)
349{
350 BUG_ON(!xattr);
351
352 if (xattr->should_free_name)
353 kfree((void *)xattr->name);
354 if (xattr->should_free_val)
355 kfree((void *)xattr->val);
356
357 kfree(xattr);
358}
359
360static int __remove_xattr(struct ceph_inode_info *ci,
361 struct ceph_inode_xattr *xattr)
362{
363 if (!xattr)
364 return -EOPNOTSUPP;
365
366 rb_erase(&xattr->node, &ci->i_xattrs.index);
367
368 if (xattr->should_free_name)
369 kfree((void *)xattr->name);
370 if (xattr->should_free_val)
371 kfree((void *)xattr->val);
372
373 ci->i_xattrs.names_size -= xattr->name_len;
374 ci->i_xattrs.vals_size -= xattr->val_len;
375 ci->i_xattrs.count--;
376 kfree(xattr);
377
378 return 0;
379}
380
381static int __remove_xattr_by_name(struct ceph_inode_info *ci,
382 const char *name)
383{
384 struct rb_node **p;
385 struct ceph_inode_xattr *xattr;
386 int err;
387
388 p = &ci->i_xattrs.index.rb_node;
389 xattr = __get_xattr(ci, name);
390 err = __remove_xattr(ci, xattr);
391 return err;
392}
393
394static char *__copy_xattr_names(struct ceph_inode_info *ci,
395 char *dest)
396{
397 struct rb_node *p;
398 struct ceph_inode_xattr *xattr = NULL;
399
400 p = rb_first(&ci->i_xattrs.index);
401 dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
402
403 while (p) {
404 xattr = rb_entry(p, struct ceph_inode_xattr, node);
405 memcpy(dest, xattr->name, xattr->name_len);
406 dest[xattr->name_len] = '\0';
407
408 dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
409 xattr->name_len, ci->i_xattrs.names_size);
410
411 dest += xattr->name_len + 1;
412 p = rb_next(p);
413 }
414
415 return dest;
416}
417
418void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
419{
420 struct rb_node *p, *tmp;
421 struct ceph_inode_xattr *xattr = NULL;
422
423 p = rb_first(&ci->i_xattrs.index);
424
425 dout("__ceph_destroy_xattrs p=%p\n", p);
426
427 while (p) {
428 xattr = rb_entry(p, struct ceph_inode_xattr, node);
429 tmp = p;
430 p = rb_next(tmp);
431 dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
432 xattr->name_len, xattr->name);
433 rb_erase(tmp, &ci->i_xattrs.index);
434
435 __free_xattr(xattr);
436 }
437
438 ci->i_xattrs.names_size = 0;
439 ci->i_xattrs.vals_size = 0;
440 ci->i_xattrs.index_version = 0;
441 ci->i_xattrs.count = 0;
442 ci->i_xattrs.index = RB_ROOT;
443}
444
445static int __build_xattrs(struct inode *inode)
Sage Weilbe655592011-11-30 09:47:09 -0800446 __releases(ci->i_ceph_lock)
447 __acquires(ci->i_ceph_lock)
Sage Weil355da1e2009-10-06 11:31:08 -0700448{
449 u32 namelen;
450 u32 numattr = 0;
451 void *p, *end;
452 u32 len;
453 const char *name, *val;
454 struct ceph_inode_info *ci = ceph_inode(inode);
455 int xattr_version;
456 struct ceph_inode_xattr **xattrs = NULL;
Sage Weil63ff78b2009-11-01 17:51:15 -0800457 int err = 0;
Sage Weil355da1e2009-10-06 11:31:08 -0700458 int i;
459
460 dout("__build_xattrs() len=%d\n",
461 ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
462
463 if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
464 return 0; /* already built */
465
466 __ceph_destroy_xattrs(ci);
467
468start:
469 /* updated internal xattr rb tree */
470 if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
471 p = ci->i_xattrs.blob->vec.iov_base;
472 end = p + ci->i_xattrs.blob->vec.iov_len;
473 ceph_decode_32_safe(&p, end, numattr, bad);
474 xattr_version = ci->i_xattrs.version;
Sage Weilbe655592011-11-30 09:47:09 -0800475 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700476
477 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
478 GFP_NOFS);
479 err = -ENOMEM;
480 if (!xattrs)
481 goto bad_lock;
482 memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
483 for (i = 0; i < numattr; i++) {
484 xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
485 GFP_NOFS);
486 if (!xattrs[i])
487 goto bad_lock;
488 }
489
Sage Weilbe655592011-11-30 09:47:09 -0800490 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700491 if (ci->i_xattrs.version != xattr_version) {
492 /* lost a race, retry */
493 for (i = 0; i < numattr; i++)
494 kfree(xattrs[i]);
495 kfree(xattrs);
Alan Cox21ec6ff2012-07-20 08:18:36 -0500496 xattrs = NULL;
Sage Weil355da1e2009-10-06 11:31:08 -0700497 goto start;
498 }
499 err = -EIO;
500 while (numattr--) {
501 ceph_decode_32_safe(&p, end, len, bad);
502 namelen = len;
503 name = p;
504 p += len;
505 ceph_decode_32_safe(&p, end, len, bad);
506 val = p;
507 p += len;
508
509 err = __set_xattr(ci, name, namelen, val, len,
510 0, 0, 0, &xattrs[numattr]);
511
512 if (err < 0)
513 goto bad;
514 }
515 kfree(xattrs);
516 }
517 ci->i_xattrs.index_version = ci->i_xattrs.version;
518 ci->i_xattrs.dirty = false;
519
520 return err;
521bad_lock:
Sage Weilbe655592011-11-30 09:47:09 -0800522 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700523bad:
524 if (xattrs) {
525 for (i = 0; i < numattr; i++)
526 kfree(xattrs[i]);
527 kfree(xattrs);
528 }
529 ci->i_xattrs.names_size = 0;
530 return err;
531}
532
533static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
534 int val_size)
535{
536 /*
537 * 4 bytes for the length, and additional 4 bytes per each xattr name,
538 * 4 bytes per each value
539 */
540 int size = 4 + ci->i_xattrs.count*(4 + 4) +
541 ci->i_xattrs.names_size +
542 ci->i_xattrs.vals_size;
543 dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
544 ci->i_xattrs.count, ci->i_xattrs.names_size,
545 ci->i_xattrs.vals_size);
546
547 if (name_size)
548 size += 4 + 4 + name_size + val_size;
549
550 return size;
551}
552
553/*
554 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
555 * and swap into place.
556 */
557void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
558{
559 struct rb_node *p;
560 struct ceph_inode_xattr *xattr = NULL;
561 void *dest;
562
563 dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
564 if (ci->i_xattrs.dirty) {
565 int need = __get_required_blob_size(ci, 0, 0);
566
567 BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
568
569 p = rb_first(&ci->i_xattrs.index);
570 dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
571
572 ceph_encode_32(&dest, ci->i_xattrs.count);
573 while (p) {
574 xattr = rb_entry(p, struct ceph_inode_xattr, node);
575
576 ceph_encode_32(&dest, xattr->name_len);
577 memcpy(dest, xattr->name, xattr->name_len);
578 dest += xattr->name_len;
579 ceph_encode_32(&dest, xattr->val_len);
580 memcpy(dest, xattr->val, xattr->val_len);
581 dest += xattr->val_len;
582
583 p = rb_next(p);
584 }
585
586 /* adjust buffer len; it may be larger than we need */
587 ci->i_xattrs.prealloc_blob->vec.iov_len =
588 dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
589
Sage Weilb6c1d5b2009-12-07 12:17:17 -0800590 if (ci->i_xattrs.blob)
591 ceph_buffer_put(ci->i_xattrs.blob);
Sage Weil355da1e2009-10-06 11:31:08 -0700592 ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
593 ci->i_xattrs.prealloc_blob = NULL;
594 ci->i_xattrs.dirty = false;
Sage Weil4a625be2010-08-22 15:03:56 -0700595 ci->i_xattrs.version++;
Sage Weil355da1e2009-10-06 11:31:08 -0700596 }
597}
598
599ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
600 size_t size)
601{
602 struct inode *inode = dentry->d_inode;
603 struct ceph_inode_info *ci = ceph_inode(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700604 int err;
605 struct ceph_inode_xattr *xattr;
Alex Elder881a5fa2012-01-23 15:49:28 -0600606 struct ceph_vxattr *vxattr = NULL;
Sage Weil355da1e2009-10-06 11:31:08 -0700607
608 if (!ceph_is_valid_xattr(name))
609 return -ENODATA;
610
Sage Weilbe655592011-11-30 09:47:09 -0800611 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700612 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
613 ci->i_xattrs.version, ci->i_xattrs.index_version);
614
Sage Weil0bee82f2013-01-20 22:00:58 -0800615 /* let's see if a virtual xattr was requested */
616 vxattr = ceph_match_vxattr(inode, name);
617 if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
618 err = vxattr->getxattr_cb(ci, value, size);
619 goto out;
620 }
621
Sage Weil355da1e2009-10-06 11:31:08 -0700622 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
623 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
624 goto get_xattr;
625 } else {
Sage Weilbe655592011-11-30 09:47:09 -0800626 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700627 /* get xattrs from mds (if we don't already have them) */
628 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
629 if (err)
630 return err;
631 }
632
Sage Weilbe655592011-11-30 09:47:09 -0800633 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700634
Sage Weil355da1e2009-10-06 11:31:08 -0700635 err = __build_xattrs(inode);
636 if (err < 0)
637 goto out;
638
639get_xattr:
640 err = -ENODATA; /* == ENOATTR */
641 xattr = __get_xattr(ci, name);
Sage Weil0bee82f2013-01-20 22:00:58 -0800642 if (!xattr)
Sage Weil355da1e2009-10-06 11:31:08 -0700643 goto out;
Sage Weil355da1e2009-10-06 11:31:08 -0700644
645 err = -ERANGE;
646 if (size && size < xattr->val_len)
647 goto out;
648
649 err = xattr->val_len;
650 if (size == 0)
651 goto out;
652
653 memcpy(value, xattr->val, xattr->val_len);
654
655out:
Sage Weilbe655592011-11-30 09:47:09 -0800656 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700657 return err;
658}
659
660ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
661{
662 struct inode *inode = dentry->d_inode;
663 struct ceph_inode_info *ci = ceph_inode(inode);
Alex Elder881a5fa2012-01-23 15:49:28 -0600664 struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700665 u32 vir_namelen = 0;
666 u32 namelen;
667 int err;
668 u32 len;
669 int i;
670
Sage Weilbe655592011-11-30 09:47:09 -0800671 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700672 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
673 ci->i_xattrs.version, ci->i_xattrs.index_version);
674
675 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
Henry C Changbddfa3c2010-04-29 09:32:28 -0700676 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
Sage Weil355da1e2009-10-06 11:31:08 -0700677 goto list_xattr;
678 } else {
Sage Weilbe655592011-11-30 09:47:09 -0800679 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700680 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
681 if (err)
682 return err;
683 }
684
Sage Weilbe655592011-11-30 09:47:09 -0800685 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700686
687 err = __build_xattrs(inode);
688 if (err < 0)
689 goto out;
690
691list_xattr:
Alex Elder3ce6cd12012-01-23 15:49:28 -0600692 /*
693 * Start with virtual dir xattr names (if any) (including
694 * terminating '\0' characters for each).
695 */
696 vir_namelen = ceph_vxattrs_name_size(vxattrs);
697
Sage Weil355da1e2009-10-06 11:31:08 -0700698 /* adding 1 byte per each variable due to the null termination */
Sage Weilb65917d2013-01-20 22:02:39 -0800699 namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
Sage Weil355da1e2009-10-06 11:31:08 -0700700 err = -ERANGE;
Sage Weilb65917d2013-01-20 22:02:39 -0800701 if (size && vir_namelen + namelen > size)
Sage Weil355da1e2009-10-06 11:31:08 -0700702 goto out;
703
Sage Weilb65917d2013-01-20 22:02:39 -0800704 err = namelen + vir_namelen;
Sage Weil355da1e2009-10-06 11:31:08 -0700705 if (size == 0)
706 goto out;
707
708 names = __copy_xattr_names(ci, names);
709
710 /* virtual xattr names, too */
Sage Weilb65917d2013-01-20 22:02:39 -0800711 err = namelen;
712 if (vxattrs) {
Sage Weil355da1e2009-10-06 11:31:08 -0700713 for (i = 0; vxattrs[i].name; i++) {
Sage Weilb65917d2013-01-20 22:02:39 -0800714 if (!vxattrs[i].hidden &&
715 !(vxattrs[i].exists_cb &&
716 !vxattrs[i].exists_cb(ci))) {
717 len = sprintf(names, "%s", vxattrs[i].name);
718 names += len + 1;
719 err += len + 1;
720 }
Sage Weil355da1e2009-10-06 11:31:08 -0700721 }
Sage Weilb65917d2013-01-20 22:02:39 -0800722 }
Sage Weil355da1e2009-10-06 11:31:08 -0700723
724out:
Sage Weilbe655592011-11-30 09:47:09 -0800725 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700726 return err;
727}
728
729static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
730 const char *value, size_t size, int flags)
731{
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700732 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
Sage Weil355da1e2009-10-06 11:31:08 -0700733 struct inode *inode = dentry->d_inode;
734 struct ceph_inode_info *ci = ceph_inode(inode);
Sage Weil5f21c962011-07-26 11:30:29 -0700735 struct inode *parent_inode;
Sage Weil355da1e2009-10-06 11:31:08 -0700736 struct ceph_mds_request *req;
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700737 struct ceph_mds_client *mdsc = fsc->mdsc;
Sage Weil355da1e2009-10-06 11:31:08 -0700738 int err;
739 int i, nr_pages;
740 struct page **pages = NULL;
741 void *kaddr;
742
743 /* copy value into some pages */
744 nr_pages = calc_pages_for(0, size);
745 if (nr_pages) {
746 pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
747 if (!pages)
748 return -ENOMEM;
749 err = -ENOMEM;
750 for (i = 0; i < nr_pages; i++) {
Yehuda Sadeh31459fe2010-03-17 13:54:02 -0700751 pages[i] = __page_cache_alloc(GFP_NOFS);
Sage Weil355da1e2009-10-06 11:31:08 -0700752 if (!pages[i]) {
753 nr_pages = i;
754 goto out;
755 }
756 kaddr = kmap(pages[i]);
757 memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
758 min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
759 }
760 }
761
762 dout("setxattr value=%.*s\n", (int)size, value);
763
764 /* do request */
765 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
766 USE_AUTH_MDS);
Julia Lawall60d87732009-11-21 12:53:08 +0100767 if (IS_ERR(req)) {
768 err = PTR_ERR(req);
769 goto out;
770 }
Sage Weil70b666c2011-05-27 09:24:26 -0700771 req->r_inode = inode;
772 ihold(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700773 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
774 req->r_num_caps = 1;
775 req->r_args.setxattr.flags = cpu_to_le32(flags);
776 req->r_path2 = kstrdup(name, GFP_NOFS);
777
778 req->r_pages = pages;
779 req->r_num_pages = nr_pages;
780 req->r_data_len = size;
781
782 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
Sage Weil5f21c962011-07-26 11:30:29 -0700783 parent_inode = ceph_get_dentry_parent_inode(dentry);
Sage Weil355da1e2009-10-06 11:31:08 -0700784 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
Sage Weil5f21c962011-07-26 11:30:29 -0700785 iput(parent_inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700786 ceph_mdsc_put_request(req);
787 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
788
789out:
790 if (pages) {
791 for (i = 0; i < nr_pages; i++)
792 __free_page(pages[i]);
793 kfree(pages);
794 }
795 return err;
796}
797
798int ceph_setxattr(struct dentry *dentry, const char *name,
799 const void *value, size_t size, int flags)
800{
801 struct inode *inode = dentry->d_inode;
Alex Elder881a5fa2012-01-23 15:49:28 -0600802 struct ceph_vxattr *vxattr;
Sage Weil355da1e2009-10-06 11:31:08 -0700803 struct ceph_inode_info *ci = ceph_inode(inode);
Alex Elder18fa8b32012-01-23 15:49:28 -0600804 int issued;
Sage Weil355da1e2009-10-06 11:31:08 -0700805 int err;
Alex Elder18fa8b32012-01-23 15:49:28 -0600806 int dirty;
Sage Weil355da1e2009-10-06 11:31:08 -0700807 int name_len = strlen(name);
808 int val_len = size;
809 char *newname = NULL;
810 char *newval = NULL;
811 struct ceph_inode_xattr *xattr = NULL;
Sage Weil355da1e2009-10-06 11:31:08 -0700812 int required_blob_size;
813
814 if (ceph_snap(inode) != CEPH_NOSNAP)
815 return -EROFS;
816
817 if (!ceph_is_valid_xattr(name))
818 return -EOPNOTSUPP;
819
Alex Elder06476a62012-01-23 15:49:27 -0600820 vxattr = ceph_match_vxattr(inode, name);
821 if (vxattr && vxattr->readonly)
822 return -EOPNOTSUPP;
Sage Weil355da1e2009-10-06 11:31:08 -0700823
Sage Weil3adf6542013-01-31 11:53:41 -0800824 /* pass any unhandled ceph.* xattrs through to the MDS */
825 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
826 goto do_sync_unlocked;
827
Sage Weil355da1e2009-10-06 11:31:08 -0700828 /* preallocate memory for xattr name, value, index node */
829 err = -ENOMEM;
Julia Lawall61413c22010-10-17 21:55:21 +0200830 newname = kmemdup(name, name_len + 1, GFP_NOFS);
Sage Weil355da1e2009-10-06 11:31:08 -0700831 if (!newname)
832 goto out;
Sage Weil355da1e2009-10-06 11:31:08 -0700833
834 if (val_len) {
Alex Elderb829c192012-01-23 15:49:27 -0600835 newval = kmemdup(value, val_len, GFP_NOFS);
Sage Weil355da1e2009-10-06 11:31:08 -0700836 if (!newval)
837 goto out;
Sage Weil355da1e2009-10-06 11:31:08 -0700838 }
839
840 xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
841 if (!xattr)
842 goto out;
843
Sage Weilbe655592011-11-30 09:47:09 -0800844 spin_lock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700845retry:
846 issued = __ceph_caps_issued(ci, NULL);
Alex Elder18fa8b32012-01-23 15:49:28 -0600847 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
Sage Weil355da1e2009-10-06 11:31:08 -0700848 if (!(issued & CEPH_CAP_XATTR_EXCL))
849 goto do_sync;
850 __build_xattrs(inode);
851
852 required_blob_size = __get_required_blob_size(ci, name_len, val_len);
853
854 if (!ci->i_xattrs.prealloc_blob ||
855 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
Alex Elder18fa8b32012-01-23 15:49:28 -0600856 struct ceph_buffer *blob;
Sage Weil355da1e2009-10-06 11:31:08 -0700857
Sage Weilbe655592011-11-30 09:47:09 -0800858 spin_unlock(&ci->i_ceph_lock);
Sage Weil355da1e2009-10-06 11:31:08 -0700859 dout(" preaallocating new blob size=%d\n", required_blob_size);
Sage Weilb6c1d5b2009-12-07 12:17:17 -0800860 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
Sage Weil355da1e2009-10-06 11:31:08 -0700861 if (!blob)
862 goto out;
Sage Weilbe655592011-11-30 09:47:09 -0800863 spin_lock(&ci->i_ceph_lock);
Sage Weilb6c1d5b2009-12-07 12:17:17 -0800864 if (ci->i_xattrs.prealloc_blob)
865 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
Sage Weil355da1e2009-10-06 11:31:08 -0700866 ci->i_xattrs.prealloc_blob = blob;
867 goto retry;
868 }
869
Sage Weil355da1e2009-10-06 11:31:08 -0700870 err = __set_xattr(ci, newname, name_len, newval,
871 val_len, 1, 1, 1, &xattr);
Alex Elder18fa8b32012-01-23 15:49:28 -0600872
Sage Weilfca65b42011-05-04 11:33:47 -0700873 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
Sage Weil355da1e2009-10-06 11:31:08 -0700874 ci->i_xattrs.dirty = true;
875 inode->i_ctime = CURRENT_TIME;
Alex Elder18fa8b32012-01-23 15:49:28 -0600876
Sage Weilbe655592011-11-30 09:47:09 -0800877 spin_unlock(&ci->i_ceph_lock);
Sage Weilfca65b42011-05-04 11:33:47 -0700878 if (dirty)
879 __mark_inode_dirty(inode, dirty);
Sage Weil355da1e2009-10-06 11:31:08 -0700880 return err;
881
882do_sync:
Sage Weilbe655592011-11-30 09:47:09 -0800883 spin_unlock(&ci->i_ceph_lock);
Sage Weil3adf6542013-01-31 11:53:41 -0800884do_sync_unlocked:
Sage Weil355da1e2009-10-06 11:31:08 -0700885 err = ceph_sync_setxattr(dentry, name, value, size, flags);
886out:
887 kfree(newname);
888 kfree(newval);
889 kfree(xattr);
890 return err;
891}
892
893static int ceph_send_removexattr(struct dentry *dentry, const char *name)
894{
Yehuda Sadeh3d14c5d2010-04-06 15:14:15 -0700895 struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
896 struct ceph_mds_client *mdsc = fsc->mdsc;
Sage Weil355da1e2009-10-06 11:31:08 -0700897 struct inode *inode = dentry->d_inode;
Sage Weil5f21c962011-07-26 11:30:29 -0700898 struct inode *parent_inode;
Sage Weil355da1e2009-10-06 11:31:08 -0700899 struct ceph_mds_request *req;
900 int err;
901
902 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
903 USE_AUTH_MDS);
904 if (IS_ERR(req))
905 return PTR_ERR(req);
Sage Weil70b666c2011-05-27 09:24:26 -0700906 req->r_inode = inode;
907 ihold(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700908 req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
909 req->r_num_caps = 1;
910 req->r_path2 = kstrdup(name, GFP_NOFS);
911
Sage Weil5f21c962011-07-26 11:30:29 -0700912 parent_inode = ceph_get_dentry_parent_inode(dentry);
Sage Weil355da1e2009-10-06 11:31:08 -0700913 err = ceph_mdsc_do_request(mdsc, parent_inode, req);
Sage Weil5f21c962011-07-26 11:30:29 -0700914 iput(parent_inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700915 ceph_mdsc_put_request(req);
916 return err;
917}
918
919int ceph_removexattr(struct dentry *dentry, const char *name)
920{
921 struct inode *inode = dentry->d_inode;
Alex Elder881a5fa2012-01-23 15:49:28 -0600922 struct ceph_vxattr *vxattr;
Sage Weil355da1e2009-10-06 11:31:08 -0700923 struct ceph_inode_info *ci = ceph_inode(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700924 int issued;
925 int err;
Alex Elder83eb26a2012-01-11 17:41:01 -0800926 int required_blob_size;
Sage Weilfca65b42011-05-04 11:33:47 -0700927 int dirty;
Sage Weil355da1e2009-10-06 11:31:08 -0700928
929 if (ceph_snap(inode) != CEPH_NOSNAP)
930 return -EROFS;
931
932 if (!ceph_is_valid_xattr(name))
933 return -EOPNOTSUPP;
934
Alex Elder06476a62012-01-23 15:49:27 -0600935 vxattr = ceph_match_vxattr(inode, name);
936 if (vxattr && vxattr->readonly)
937 return -EOPNOTSUPP;
Sage Weil355da1e2009-10-06 11:31:08 -0700938
Sage Weild421acb2013-01-20 21:55:30 -0800939 /* pass any unhandled ceph.* xattrs through to the MDS */
940 if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
941 goto do_sync_unlocked;
942
Alex Elder83eb26a2012-01-11 17:41:01 -0800943 err = -ENOMEM;
Sage Weilbe655592011-11-30 09:47:09 -0800944 spin_lock(&ci->i_ceph_lock);
Alex Elder83eb26a2012-01-11 17:41:01 -0800945retry:
Sage Weil355da1e2009-10-06 11:31:08 -0700946 issued = __ceph_caps_issued(ci, NULL);
947 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
948
949 if (!(issued & CEPH_CAP_XATTR_EXCL))
950 goto do_sync;
Alex Elder18fa8b32012-01-23 15:49:28 -0600951 __build_xattrs(inode);
Sage Weil355da1e2009-10-06 11:31:08 -0700952
Alex Elder83eb26a2012-01-11 17:41:01 -0800953 required_blob_size = __get_required_blob_size(ci, 0, 0);
954
955 if (!ci->i_xattrs.prealloc_blob ||
956 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
957 struct ceph_buffer *blob;
958
959 spin_unlock(&ci->i_ceph_lock);
960 dout(" preaallocating new blob size=%d\n", required_blob_size);
961 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
962 if (!blob)
963 goto out;
964 spin_lock(&ci->i_ceph_lock);
965 if (ci->i_xattrs.prealloc_blob)
966 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
967 ci->i_xattrs.prealloc_blob = blob;
968 goto retry;
969 }
970
Sage Weil355da1e2009-10-06 11:31:08 -0700971 err = __remove_xattr_by_name(ceph_inode(inode), name);
Alex Elder18fa8b32012-01-23 15:49:28 -0600972
Sage Weilfca65b42011-05-04 11:33:47 -0700973 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
Sage Weil355da1e2009-10-06 11:31:08 -0700974 ci->i_xattrs.dirty = true;
975 inode->i_ctime = CURRENT_TIME;
Sage Weilbe655592011-11-30 09:47:09 -0800976 spin_unlock(&ci->i_ceph_lock);
Sage Weilfca65b42011-05-04 11:33:47 -0700977 if (dirty)
978 __mark_inode_dirty(inode, dirty);
Sage Weil355da1e2009-10-06 11:31:08 -0700979 return err;
980do_sync:
Sage Weilbe655592011-11-30 09:47:09 -0800981 spin_unlock(&ci->i_ceph_lock);
Sage Weild421acb2013-01-20 21:55:30 -0800982do_sync_unlocked:
Sage Weil355da1e2009-10-06 11:31:08 -0700983 err = ceph_send_removexattr(dentry, name);
Alex Elder83eb26a2012-01-11 17:41:01 -0800984out:
Sage Weil355da1e2009-10-06 11:31:08 -0700985 return err;
986}
987