blob: ba7dec40771e6d902e43551efcba9f528034faf1 [file] [log] [blame]
Mike Marshall5db11c22015-07-17 10:38:12 -04001/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7#include "protocol.h"
Mike Marshall575e9462015-12-04 12:56:14 -05008#include "orangefs-kernel.h"
9#include "orangefs-bufmap.h"
Mike Marshall5db11c22015-07-17 10:38:12 -040010
Mike Marshall5db11c22015-07-17 10:38:12 -040011/*
Mike Marshall1808f8c2016-01-15 13:10:52 -050012 * decode routine used by kmod to deal with the blob sent from
13 * userspace for readdirs. The blob contains zero or more of these
14 * sub-blobs:
15 * __u32 - represents length of the character string that follows.
16 * string - between 1 and ORANGEFS_NAME_MAX bytes long.
17 * padding - (if needed) to cause the __u32 plus the string to be
18 * eight byte aligned.
19 * khandle - sizeof(khandle) bytes.
Mike Marshall5db11c22015-07-17 10:38:12 -040020 */
Al Viro80928952015-10-09 18:11:10 -040021static long decode_dirents(char *ptr, size_t size,
Yi Liu8bb8aef2015-11-24 15:12:14 -050022 struct orangefs_readdir_response_s *readdir)
Mike Marshall5db11c22015-07-17 10:38:12 -040023{
24 int i;
Yi Liu8bb8aef2015-11-24 15:12:14 -050025 struct orangefs_readdir_response_s *rd =
26 (struct orangefs_readdir_response_s *) ptr;
Mike Marshall5db11c22015-07-17 10:38:12 -040027 char *buf = ptr;
Mike Marshall1808f8c2016-01-15 13:10:52 -050028 int khandle_size = sizeof(struct orangefs_khandle);
29 size_t offset = offsetof(struct orangefs_readdir_response_s,
30 dirent_array);
31 /* 8 reflects eight byte alignment */
32 int smallest_blob = khandle_size + 8;
33 __u32 len;
34 int aligned_len;
35 int sizeof_u32 = sizeof(__u32);
36 long ret;
Mike Marshall5db11c22015-07-17 10:38:12 -040037
Mike Marshall1808f8c2016-01-15 13:10:52 -050038 gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
39
40 /* size is = offset on empty dirs, > offset on non-empty dirs... */
41 if (size < offset) {
42 gossip_err("%s: size:%zu: offset:%zu:\n",
43 __func__,
44 size,
45 offset);
46 ret = -EINVAL;
47 goto out;
48 }
49
50 if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
51 gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
52 __func__,
53 size,
54 readdir->orangefs_dirent_outcount);
55 ret = -EINVAL;
56 goto out;
57 }
Al Viro80928952015-10-09 18:11:10 -040058
Mike Marshall5db11c22015-07-17 10:38:12 -040059 readdir->token = rd->token;
Yi Liu8bb8aef2015-11-24 15:12:14 -050060 readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
61 readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
Mike Marshall5db11c22015-07-17 10:38:12 -040062 sizeof(*readdir->dirent_array),
63 GFP_KERNEL);
Mike Marshall1808f8c2016-01-15 13:10:52 -050064 if (readdir->dirent_array == NULL) {
65 gossip_err("%s: kcalloc failed.\n", __func__);
66 ret = -ENOMEM;
67 goto out;
68 }
Al Viro80928952015-10-09 18:11:10 -040069
Mike Marshall1808f8c2016-01-15 13:10:52 -050070 buf += offset;
71 size -= offset;
Al Viro80928952015-10-09 18:11:10 -040072
Yi Liu8bb8aef2015-11-24 15:12:14 -050073 for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
Mike Marshall1808f8c2016-01-15 13:10:52 -050074 if (size < smallest_blob) {
75 gossip_err("%s: size:%zu: smallest_blob:%d:\n",
76 __func__,
77 size,
78 smallest_blob);
79 ret = -EINVAL;
80 goto free;
81 }
Al Viro80928952015-10-09 18:11:10 -040082
83 len = *(__u32 *)buf;
Mike Marshall1808f8c2016-01-15 13:10:52 -050084 if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
85 gossip_err("%s: len:%d:\n", __func__, len);
86 ret = -EINVAL;
87 goto free;
88 }
Al Viro80928952015-10-09 18:11:10 -040089
Mike Marshall1808f8c2016-01-15 13:10:52 -050090 gossip_debug(GOSSIP_DIR_DEBUG,
91 "%s: size:%zu: len:%d:\n",
92 __func__,
93 size,
94 len);
95
96 readdir->dirent_array[i].d_name = buf + sizeof_u32;
Al Viro9be68b02015-10-09 17:43:15 -040097 readdir->dirent_array[i].d_length = len;
Al Viro80928952015-10-09 18:11:10 -040098
Martin Brandenburg7d221482016-01-04 15:05:28 -050099 /*
Mike Marshall1808f8c2016-01-15 13:10:52 -0500100 * Calculate "aligned" length of this string and its
101 * associated __u32 descriptor.
Martin Brandenburg7d221482016-01-04 15:05:28 -0500102 */
Mike Marshall1808f8c2016-01-15 13:10:52 -0500103 aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
104 gossip_debug(GOSSIP_DIR_DEBUG,
105 "%s: aligned_len:%d:\n",
106 __func__,
107 aligned_len);
Al Viro80928952015-10-09 18:11:10 -0400108
Mike Marshall1808f8c2016-01-15 13:10:52 -0500109 /*
110 * The end of the blob should coincide with the end
111 * of the last sub-blob.
112 */
113 if (size < aligned_len + khandle_size) {
114 gossip_err("%s: ran off the end of the blob.\n",
115 __func__);
116 ret = -EINVAL;
117 goto free;
118 }
119 size -= aligned_len + khandle_size;
120
121 buf += aligned_len;
Al Viro80928952015-10-09 18:11:10 -0400122
Mike Marshall5db11c22015-07-17 10:38:12 -0400123 readdir->dirent_array[i].khandle =
Yi Liu8bb8aef2015-11-24 15:12:14 -0500124 *(struct orangefs_khandle *) buf;
Mike Marshall1808f8c2016-01-15 13:10:52 -0500125 buf += khandle_size;
Mike Marshall5db11c22015-07-17 10:38:12 -0400126 }
Mike Marshall1808f8c2016-01-15 13:10:52 -0500127 ret = buf - ptr;
128 gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
129 goto out;
130
131free:
Al Viro80928952015-10-09 18:11:10 -0400132 kfree(readdir->dirent_array);
133 readdir->dirent_array = NULL;
Mike Marshall1808f8c2016-01-15 13:10:52 -0500134
135out:
136 return ret;
Mike Marshall5db11c22015-07-17 10:38:12 -0400137}
138
Mike Marshall5db11c22015-07-17 10:38:12 -0400139/*
140 * Read directory entries from an instance of an open directory.
Mike Marshall5db11c22015-07-17 10:38:12 -0400141 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500142static int orangefs_readdir(struct file *file, struct dir_context *ctx)
Mike Marshall5db11c22015-07-17 10:38:12 -0400143{
Mike Marshall5db11c22015-07-17 10:38:12 -0400144 int ret = 0;
145 int buffer_index;
Mike Marshall88309aa2015-09-23 16:48:40 -0400146 /*
147 * ptoken supports Orangefs' distributed directory logic, added
148 * in 2.9.2.
149 */
Mike Marshall5db11c22015-07-17 10:38:12 -0400150 __u64 *ptoken = file->private_data;
151 __u64 pos = 0;
152 ino_t ino = 0;
153 struct dentry *dentry = file->f_path.dentry;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500154 struct orangefs_kernel_op_s *new_op = NULL;
155 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
Mike Marshall5db11c22015-07-17 10:38:12 -0400156 int buffer_full = 0;
Al Viro9f5e2f72016-02-16 19:54:13 -0500157 struct orangefs_readdir_response_s readdir_response;
158 void *dents_buf;
Mike Marshall5db11c22015-07-17 10:38:12 -0400159 int i = 0;
160 int len = 0;
161 ino_t current_ino = 0;
162 char *current_entry = NULL;
163 long bytes_decoded;
164
Mike Marshall88309aa2015-09-23 16:48:40 -0400165 gossip_debug(GOSSIP_DIR_DEBUG,
166 "%s: ctx->pos:%lld, ptoken = %llu\n",
167 __func__,
168 lld(ctx->pos),
169 llu(*ptoken));
Mike Marshall5db11c22015-07-17 10:38:12 -0400170
171 pos = (__u64) ctx->pos;
172
173 /* are we done? */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500174 if (pos == ORANGEFS_READDIR_END) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400175 gossip_debug(GOSSIP_DIR_DEBUG,
176 "Skipping to termination path\n");
177 return 0;
178 }
179
180 gossip_debug(GOSSIP_DIR_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500181 "orangefs_readdir called on %s (pos=%llu)\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400182 dentry->d_name.name, llu(pos));
183
Al Viro9f5e2f72016-02-16 19:54:13 -0500184 memset(&readdir_response, 0, sizeof(readdir_response));
Mike Marshall5db11c22015-07-17 10:38:12 -0400185
Yi Liu8bb8aef2015-11-24 15:12:14 -0500186 new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
Mike Marshall5db11c22015-07-17 10:38:12 -0400187 if (!new_op)
188 return -ENOMEM;
189
Martin Brandenburgee3b8d32016-02-17 12:55:42 -0500190 /*
191 * Only the indices are shared. No memory is actually shared, but the
192 * mechanism is used.
193 */
Mike Marshall5db11c22015-07-17 10:38:12 -0400194 new_op->uses_shared_memory = 1;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500195 new_op->upcall.req.readdir.refn = orangefs_inode->refn;
Martin Brandenburg7d221482016-01-04 15:05:28 -0500196 new_op->upcall.req.readdir.max_dirent_count =
197 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
Mike Marshall5db11c22015-07-17 10:38:12 -0400198
199 gossip_debug(GOSSIP_DIR_DEBUG,
200 "%s: upcall.req.readdir.refn.khandle: %pU\n",
201 __func__,
202 &new_op->upcall.req.readdir.refn.khandle);
203
Mike Marshall5db11c22015-07-17 10:38:12 -0400204 new_op->upcall.req.readdir.token = *ptoken;
205
206get_new_buffer_index:
Al Virob8a99a82016-02-16 20:10:26 -0500207 buffer_index = orangefs_readdir_index_get();
208 if (buffer_index < 0) {
209 ret = buffer_index;
Martin Brandenburg7d221482016-01-04 15:05:28 -0500210 gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400211 ret);
212 goto out_free_op;
213 }
214 new_op->upcall.req.readdir.buf_index = buffer_index;
215
216 ret = service_operation(new_op,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500217 "orangefs_readdir",
Mike Marshall5db11c22015-07-17 10:38:12 -0400218 get_interruptible_flag(dentry->d_inode));
219
220 gossip_debug(GOSSIP_DIR_DEBUG,
221 "Readdir downcall status is %d. ret:%d\n",
222 new_op->downcall.status,
223 ret);
224
Martin Brandenburgee3b8d32016-02-17 12:55:42 -0500225 orangefs_readdir_index_put(buffer_index);
226
Mike Marshall5db11c22015-07-17 10:38:12 -0400227 if (ret == -EAGAIN && op_state_purged(new_op)) {
Martin Brandenburgee3b8d32016-02-17 12:55:42 -0500228 /* Client-core indices are invalid after it restarted. */
Mike Marshall5db11c22015-07-17 10:38:12 -0400229 gossip_debug(GOSSIP_DIR_DEBUG,
230 "%s: Getting new buffer_index for retry of readdir..\n",
231 __func__);
Mike Marshall5db11c22015-07-17 10:38:12 -0400232 goto get_new_buffer_index;
233 }
234
235 if (ret == -EIO && op_state_purged(new_op)) {
236 gossip_err("%s: Client is down. Aborting readdir call.\n",
237 __func__);
Martin Brandenburg641bb322016-03-28 17:18:27 -0400238 goto out_free_op;
Mike Marshall5db11c22015-07-17 10:38:12 -0400239 }
240
241 if (ret < 0 || new_op->downcall.status != 0) {
242 gossip_debug(GOSSIP_DIR_DEBUG,
243 "Readdir request failed. Status:%d\n",
244 new_op->downcall.status);
Mike Marshall5db11c22015-07-17 10:38:12 -0400245 if (ret >= 0)
246 ret = new_op->downcall.status;
Martin Brandenburg641bb322016-03-28 17:18:27 -0400247 goto out_free_op;
Mike Marshall5db11c22015-07-17 10:38:12 -0400248 }
249
Al Viro9f5e2f72016-02-16 19:54:13 -0500250 dents_buf = new_op->downcall.trailer_buf;
251 if (dents_buf == NULL) {
252 gossip_err("Invalid NULL buffer in readdir response\n");
253 ret = -ENOMEM;
Martin Brandenburg641bb322016-03-28 17:18:27 -0400254 goto out_free_op;
Al Viro9f5e2f72016-02-16 19:54:13 -0500255 }
256
257 bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
258 &readdir_response);
Mike Marshall5db11c22015-07-17 10:38:12 -0400259 if (bytes_decoded < 0) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400260 ret = bytes_decoded;
Al Viro9f5e2f72016-02-16 19:54:13 -0500261 gossip_err("Could not decode readdir from buffer %d\n", ret);
262 goto out_vfree;
Mike Marshall5db11c22015-07-17 10:38:12 -0400263 }
264
265 if (bytes_decoded != new_op->downcall.trailer_size) {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500266 gossip_err("orangefs_readdir: # bytes decoded (%ld) "
Mike Marshall88309aa2015-09-23 16:48:40 -0400267 "!= trailer size (%ld)\n",
268 bytes_decoded,
269 (long)new_op->downcall.trailer_size);
Mike Marshall5db11c22015-07-17 10:38:12 -0400270 ret = -EINVAL;
271 goto out_destroy_handle;
272 }
273
Mike Marshall88309aa2015-09-23 16:48:40 -0400274 /*
Yi Liu8bb8aef2015-11-24 15:12:14 -0500275 * orangefs doesn't actually store dot and dot-dot, but
Mike Marshall88309aa2015-09-23 16:48:40 -0400276 * we need to have them represented.
277 */
Mike Marshall5db11c22015-07-17 10:38:12 -0400278 if (pos == 0) {
279 ino = get_ino_from_khandle(dentry->d_inode);
280 gossip_debug(GOSSIP_DIR_DEBUG,
281 "%s: calling dir_emit of \".\" with pos = %llu\n",
282 __func__,
283 llu(pos));
284 ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
Mike Marshall88309aa2015-09-23 16:48:40 -0400285 pos += 1;
Mike Marshall5db11c22015-07-17 10:38:12 -0400286 }
287
288 if (pos == 1) {
289 ino = get_parent_ino_from_dentry(dentry);
290 gossip_debug(GOSSIP_DIR_DEBUG,
291 "%s: calling dir_emit of \"..\" with pos = %llu\n",
292 __func__,
293 llu(pos));
294 ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
Mike Marshall88309aa2015-09-23 16:48:40 -0400295 pos += 1;
Mike Marshall5db11c22015-07-17 10:38:12 -0400296 }
297
Mike Marshall88309aa2015-09-23 16:48:40 -0400298 /*
Yi Liu8bb8aef2015-11-24 15:12:14 -0500299 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
Mike Marshall88309aa2015-09-23 16:48:40 -0400300 * to prevent "finding" dot and dot-dot on any iteration
301 * other than the first.
302 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500303 if (ctx->pos == ORANGEFS_ITERATE_NEXT)
Mike Marshall88309aa2015-09-23 16:48:40 -0400304 ctx->pos = 0;
305
Mike Marshallcf07c0b2016-03-09 13:11:45 -0500306 gossip_debug(GOSSIP_DIR_DEBUG,
307 "%s: dirent_outcount:%d:\n",
308 __func__,
Al Viro9f5e2f72016-02-16 19:54:13 -0500309 readdir_response.orangefs_dirent_outcount);
Mike Marshall88309aa2015-09-23 16:48:40 -0400310 for (i = ctx->pos;
Al Viro9f5e2f72016-02-16 19:54:13 -0500311 i < readdir_response.orangefs_dirent_outcount;
Mike Marshall88309aa2015-09-23 16:48:40 -0400312 i++) {
Al Viro9f5e2f72016-02-16 19:54:13 -0500313 len = readdir_response.dirent_array[i].d_length;
314 current_entry = readdir_response.dirent_array[i].d_name;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500315 current_ino = orangefs_khandle_to_ino(
Al Viro9f5e2f72016-02-16 19:54:13 -0500316 &readdir_response.dirent_array[i].khandle);
Mike Marshall5db11c22015-07-17 10:38:12 -0400317
318 gossip_debug(GOSSIP_DIR_DEBUG,
Mike Marshall88309aa2015-09-23 16:48:40 -0400319 "calling dir_emit for %s with len %d"
320 ", ctx->pos %ld\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400321 current_entry,
322 len,
Mike Marshall88309aa2015-09-23 16:48:40 -0400323 (unsigned long)ctx->pos);
324 /*
325 * type is unknown. We don't return object type
326 * in the dirent_array. This leaves getdents
327 * clueless about type.
328 */
Mike Marshall5db11c22015-07-17 10:38:12 -0400329 ret =
330 dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
Mike Marshall88309aa2015-09-23 16:48:40 -0400331 if (!ret)
332 break;
Mike Marshall5db11c22015-07-17 10:38:12 -0400333 ctx->pos++;
Mike Marshall88309aa2015-09-23 16:48:40 -0400334 gossip_debug(GOSSIP_DIR_DEBUG,
Mike Marshall5db11c22015-07-17 10:38:12 -0400335 "%s: ctx->pos:%lld\n",
336 __func__,
337 lld(ctx->pos));
338
Mike Marshall5db11c22015-07-17 10:38:12 -0400339 }
340
Mike Marshall54804942015-10-05 13:44:24 -0400341 /*
Mike Marshall88309aa2015-09-23 16:48:40 -0400342 * we ran all the way through the last batch, set up for
343 * getting another batch...
344 */
345 if (ret) {
Al Viro9f5e2f72016-02-16 19:54:13 -0500346 *ptoken = readdir_response.token;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500347 ctx->pos = ORANGEFS_ITERATE_NEXT;
Mike Marshall5db11c22015-07-17 10:38:12 -0400348 }
349
350 /*
351 * Did we hit the end of the directory?
352 */
Al Viro9f5e2f72016-02-16 19:54:13 -0500353 if (readdir_response.token == ORANGEFS_READDIR_END &&
Mike Marshall5db11c22015-07-17 10:38:12 -0400354 !buffer_full) {
Mike Marshall88309aa2015-09-23 16:48:40 -0400355 gossip_debug(GOSSIP_DIR_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500356 "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
357 ctx->pos = ORANGEFS_READDIR_END;
Mike Marshall5db11c22015-07-17 10:38:12 -0400358 }
359
Mike Marshall5db11c22015-07-17 10:38:12 -0400360out_destroy_handle:
Al Viro9f5e2f72016-02-16 19:54:13 -0500361 /* kfree(NULL) is safe */
362 kfree(readdir_response.dirent_array);
363out_vfree:
364 gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
365 vfree(dents_buf);
Mike Marshall5db11c22015-07-17 10:38:12 -0400366out_free_op:
367 op_release(new_op);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500368 gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
Mike Marshall5db11c22015-07-17 10:38:12 -0400369 return ret;
370}
371
Yi Liu8bb8aef2015-11-24 15:12:14 -0500372static int orangefs_dir_open(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400373{
374 __u64 *ptoken;
375
376 file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
377 if (!file->private_data)
378 return -ENOMEM;
379
380 ptoken = file->private_data;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500381 *ptoken = ORANGEFS_READDIR_START;
Mike Marshall5db11c22015-07-17 10:38:12 -0400382 return 0;
383}
384
Yi Liu8bb8aef2015-11-24 15:12:14 -0500385static int orangefs_dir_release(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400386{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500387 orangefs_flush_inode(inode);
Mike Marshall5db11c22015-07-17 10:38:12 -0400388 kfree(file->private_data);
389 return 0;
390}
391
Yi Liu8bb8aef2015-11-24 15:12:14 -0500392/** ORANGEFS implementation of VFS directory operations */
393const struct file_operations orangefs_dir_operations = {
Mike Marshall5db11c22015-07-17 10:38:12 -0400394 .read = generic_read_dir,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500395 .iterate = orangefs_readdir,
396 .open = orangefs_dir_open,
397 .release = orangefs_dir_release,
Mike Marshall5db11c22015-07-17 10:38:12 -0400398};