blob: 9d67572fb3285490eefbf8d8b12b3aafaddcb224 [file] [log] [blame]
Sage Weila8e63b72009-10-06 11:31:13 -07001#include "ceph_debug.h"
2
3#include <linux/exportfs.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09004#include <linux/slab.h>
Sage Weila8e63b72009-10-06 11:31:13 -07005#include <asm/unaligned.h>
6
7#include "super.h"
8
9/*
10 * NFS export support
11 *
12 * NFS re-export of a ceph mount is, at present, only semireliable.
13 * The basic issue is that the Ceph architectures doesn't lend itself
14 * well to generating filehandles that will remain valid forever.
15 *
16 * So, we do our best. If you're lucky, your inode will be in the
17 * client's cache. If it's not, and you have a connectable fh, then
18 * the MDS server may be able to find it for you. Otherwise, you get
19 * ESTALE.
20 *
21 * There are ways to this more reliable, but in the non-connectable fh
22 * case, we won't every work perfectly, and in the connectable case,
23 * some changes are needed on the MDS side to work better.
24 */
25
26/*
27 * Basic fh
28 */
29struct ceph_nfs_fh {
30 u64 ino;
31} __attribute__ ((packed));
32
33/*
34 * Larger 'connectable' fh that includes parent ino and name hash.
35 * Use this whenever possible, as it works more reliably.
36 */
37struct ceph_nfs_confh {
38 u64 ino, parent_ino;
39 u32 parent_name_hash;
40} __attribute__ ((packed));
41
42static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
43 int connectable)
44{
45 struct ceph_nfs_fh *fh = (void *)rawfh;
46 struct ceph_nfs_confh *cfh = (void *)rawfh;
47 struct dentry *parent = dentry->d_parent;
48 struct inode *inode = dentry->d_inode;
49 int type;
50
51 /* don't re-export snaps */
52 if (ceph_snap(inode) != CEPH_NOSNAP)
53 return -EINVAL;
54
55 if (*max_len >= sizeof(*cfh)) {
56 dout("encode_fh %p connectable\n", dentry);
57 cfh->ino = ceph_ino(dentry->d_inode);
58 cfh->parent_ino = ceph_ino(parent->d_inode);
59 cfh->parent_name_hash = parent->d_name.hash;
60 *max_len = sizeof(*cfh);
61 type = 2;
62 } else if (*max_len > sizeof(*fh)) {
63 if (connectable)
64 return -ENOSPC;
65 dout("encode_fh %p\n", dentry);
66 fh->ino = ceph_ino(dentry->d_inode);
67 *max_len = sizeof(*fh);
68 type = 1;
69 } else {
70 return -ENOSPC;
71 }
72 return type;
73}
74
75/*
76 * convert regular fh to dentry
77 *
78 * FIXME: we should try harder by querying the mds for the ino.
79 */
80static struct dentry *__fh_to_dentry(struct super_block *sb,
81 struct ceph_nfs_fh *fh)
82{
83 struct inode *inode;
84 struct dentry *dentry;
85 struct ceph_vino vino;
86 int err;
87
88 dout("__fh_to_dentry %llx\n", fh->ino);
89 vino.ino = fh->ino;
90 vino.snap = CEPH_NOSNAP;
91 inode = ceph_find_inode(sb, vino);
92 if (!inode)
93 return ERR_PTR(-ESTALE);
94
95 dentry = d_obtain_alias(inode);
96 if (!dentry) {
97 pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n",
98 fh->ino, inode);
99 iput(inode);
100 return ERR_PTR(-ENOMEM);
101 }
102 err = ceph_init_dentry(dentry);
103
104 if (err < 0) {
105 iput(inode);
106 return ERR_PTR(err);
107 }
108 dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry);
109 return dentry;
110}
111
112/*
113 * convert connectable fh to dentry
114 */
115static struct dentry *__cfh_to_dentry(struct super_block *sb,
116 struct ceph_nfs_confh *cfh)
117{
118 struct ceph_mds_client *mdsc = &ceph_client(sb)->mdsc;
119 struct inode *inode;
120 struct dentry *dentry;
121 struct ceph_vino vino;
122 int err;
123
124 dout("__cfh_to_dentry %llx (%llx/%x)\n",
125 cfh->ino, cfh->parent_ino, cfh->parent_name_hash);
126
127 vino.ino = cfh->ino;
128 vino.snap = CEPH_NOSNAP;
129 inode = ceph_find_inode(sb, vino);
130 if (!inode) {
131 struct ceph_mds_request *req;
132
133 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH,
134 USE_ANY_MDS);
135 if (IS_ERR(req))
136 return ERR_PTR(PTR_ERR(req));
137
138 req->r_ino1 = vino;
139 req->r_ino2.ino = cfh->parent_ino;
140 req->r_ino2.snap = CEPH_NOSNAP;
141 req->r_path2 = kmalloc(16, GFP_NOFS);
142 snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash);
143 req->r_num_caps = 1;
144 err = ceph_mdsc_do_request(mdsc, NULL, req);
145 ceph_mdsc_put_request(req);
146 inode = ceph_find_inode(sb, vino);
147 if (!inode)
148 return ERR_PTR(err ? err : -ESTALE);
149 }
150
151 dentry = d_obtain_alias(inode);
152 if (!dentry) {
153 pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n",
154 cfh->ino, inode);
155 iput(inode);
156 return ERR_PTR(-ENOMEM);
157 }
158 err = ceph_init_dentry(dentry);
159 if (err < 0) {
160 iput(inode);
161 return ERR_PTR(err);
162 }
163 dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry);
164 return dentry;
165}
166
167static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid,
168 int fh_len, int fh_type)
169{
170 if (fh_type == 1)
171 return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw);
172 else
173 return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw);
174}
175
176/*
177 * get parent, if possible.
178 *
179 * FIXME: we could do better by querying the mds to discover the
180 * parent.
181 */
182static struct dentry *ceph_fh_to_parent(struct super_block *sb,
183 struct fid *fid,
184 int fh_len, int fh_type)
185{
186 struct ceph_nfs_confh *cfh = (void *)fid->raw;
187 struct ceph_vino vino;
188 struct inode *inode;
189 struct dentry *dentry;
190 int err;
191
192 if (fh_type == 1)
193 return ERR_PTR(-ESTALE);
194
195 pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino,
196 cfh->parent_name_hash);
197
198 vino.ino = cfh->ino;
199 vino.snap = CEPH_NOSNAP;
200 inode = ceph_find_inode(sb, vino);
201 if (!inode)
202 return ERR_PTR(-ESTALE);
203
204 dentry = d_obtain_alias(inode);
205 if (!dentry) {
206 pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n",
207 cfh->ino, inode);
208 iput(inode);
209 return ERR_PTR(-ENOMEM);
210 }
211 err = ceph_init_dentry(dentry);
212 if (err < 0) {
213 iput(inode);
214 return ERR_PTR(err);
215 }
216 dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry);
217 return dentry;
218}
219
220const struct export_operations ceph_export_ops = {
221 .encode_fh = ceph_encode_fh,
222 .fh_to_dentry = ceph_fh_to_dentry,
223 .fh_to_parent = ceph_fh_to_parent,
224};