blob: e2c2699d8016274dbe2225b14c0ab8828ceae00f [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Mike Marshall5db11c22015-07-17 10:38:12 -04002/*
Martin Brandenburg382f4582017-04-25 15:37:59 -04003 * Copyright 2017 Omnibond Systems, L.L.C.
Mike Marshall5db11c22015-07-17 10:38:12 -04004 */
5
6#include "protocol.h"
Mike Marshall575e9462015-12-04 12:56:14 -05007#include "orangefs-kernel.h"
8#include "orangefs-bufmap.h"
Mike Marshall5db11c22015-07-17 10:38:12 -04009
Martin Brandenburg480e3e52017-04-25 15:38:01 -040010struct orangefs_dir_part {
11 struct orangefs_dir_part *next;
12 size_t len;
13};
14
15struct orangefs_dir {
16 __u64 token;
17 struct orangefs_dir_part *part;
18 loff_t end;
19 int error;
20};
21
22#define PART_SHIFT (24)
23#define PART_SIZE (1<<24)
24#define PART_MASK (~(PART_SIZE - 1))
25
Mike Marshall5db11c22015-07-17 10:38:12 -040026/*
Martin Brandenburg382f4582017-04-25 15:37:59 -040027 * There can be up to 512 directory entries. Each entry is encoded as
28 * follows:
29 * 4 bytes: string size (n)
30 * n bytes: string
31 * 1 byte: trailing zero
32 * padding to 8 bytes
33 * 16 bytes: khandle
34 * padding to 8 bytes
Martin Brandenburg382f4582017-04-25 15:37:59 -040035 *
36 * The trailer_buf starts with a struct orangefs_readdir_response_s
37 * which must be skipped to get to the directory data.
Martin Brandenburg480e3e52017-04-25 15:38:01 -040038 *
39 * The data which is received from the userspace daemon is termed a
40 * part and is stored in a linked list in case more than one part is
41 * needed for a large directory.
42 *
43 * The position pointer (ctx->pos) encodes the part and offset on which
44 * to begin reading at. Bits above PART_SHIFT encode the part and bits
45 * below PART_SHIFT encode the offset. Parts are stored in a linked
46 * list which grows as data is received from the server. The overhead
47 * associated with managing the list is presumed to be small compared to
48 * the overhead of communicating with the server.
49 *
50 * As data is received from the server, it is placed at the end of the
51 * part list. Data is parsed from the current position as it is needed.
52 * When data is determined to be corrupt, it is either because the
53 * userspace component has sent back corrupt data or because the file
54 * pointer has been moved to an invalid location. Since the two cannot
55 * be differentiated, return EIO.
56 *
57 * Part zero is synthesized to contains `.' and `..'. Part one is the
58 * first part of the part list.
Mike Marshall5db11c22015-07-17 10:38:12 -040059 */
Martin Brandenburg382f4582017-04-25 15:37:59 -040060
Martin Brandenburg480e3e52017-04-25 15:38:01 -040061static int do_readdir(struct orangefs_inode_s *oi,
62 struct orangefs_dir *od, struct dentry *dentry,
63 struct orangefs_kernel_op_s *op)
Mike Marshall5db11c22015-07-17 10:38:12 -040064{
Martin Brandenburg382f4582017-04-25 15:37:59 -040065 struct orangefs_readdir_response_s *resp;
Martin Brandenburg382f4582017-04-25 15:37:59 -040066 int bufi, r;
Mike Marshall5db11c22015-07-17 10:38:12 -040067
Martin Brandenburgee3b8d32016-02-17 12:55:42 -050068 /*
Martin Brandenburg382f4582017-04-25 15:37:59 -040069 * Despite the badly named field, readdir does not use shared
70 * memory. However, there are a limited number of readdir
71 * slots, which must be allocated here. This flag simply tells
72 * the op scheduler to return the op here for retry.
Martin Brandenburgee3b8d32016-02-17 12:55:42 -050073 */
Martin Brandenburg382f4582017-04-25 15:37:59 -040074 op->uses_shared_memory = 1;
75 op->upcall.req.readdir.refn = oi->refn;
76 op->upcall.req.readdir.token = od->token;
77 op->upcall.req.readdir.max_dirent_count =
Martin Brandenburg7d221482016-01-04 15:05:28 -050078 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
Mike Marshall5db11c22015-07-17 10:38:12 -040079
Martin Brandenburg382f4582017-04-25 15:37:59 -040080again:
81 bufi = orangefs_readdir_index_get();
82 if (bufi < 0) {
Martin Brandenburg382f4582017-04-25 15:37:59 -040083 od->error = bufi;
84 return bufi;
Mike Marshall5db11c22015-07-17 10:38:12 -040085 }
86
Martin Brandenburg382f4582017-04-25 15:37:59 -040087 op->upcall.req.readdir.buf_index = bufi;
88
89 r = service_operation(op, "orangefs_readdir",
90 get_interruptible_flag(dentry->d_inode));
91
92 orangefs_readdir_index_put(bufi);
93
94 if (op_state_purged(op)) {
95 if (r == -EAGAIN) {
96 vfree(op->downcall.trailer_buf);
97 goto again;
98 } else if (r == -EIO) {
99 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -0400100 od->error = r;
101 return r;
102 }
Mike Marshall5db11c22015-07-17 10:38:12 -0400103 }
104
Martin Brandenburg382f4582017-04-25 15:37:59 -0400105 if (r < 0) {
106 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -0400107 od->error = r;
108 return r;
109 } else if (op->downcall.status) {
110 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -0400111 od->error = op->downcall.status;
112 return op->downcall.status;
Mike Marshall5db11c22015-07-17 10:38:12 -0400113 }
114
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400115 /*
116 * The maximum size is size per entry times the 512 entries plus
117 * the header. This is well under the limit.
118 */
119 if (op->downcall.trailer_size > PART_SIZE) {
120 vfree(op->downcall.trailer_buf);
121 od->error = -EIO;
122 return -EIO;
123 }
124
Martin Brandenburg382f4582017-04-25 15:37:59 -0400125 resp = (struct orangefs_readdir_response_s *)
126 op->downcall.trailer_buf;
127 od->token = resp->token;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400128 return 0;
129}
Martin Brandenburg382f4582017-04-25 15:37:59 -0400130
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400131static int parse_readdir(struct orangefs_dir *od,
132 struct orangefs_kernel_op_s *op)
133{
134 struct orangefs_dir_part *part, *new;
135 size_t count;
136
137 count = 1;
138 part = od->part;
Martin Brandenburg2f713b52017-05-04 13:16:04 -0400139 while (part) {
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400140 count++;
Martin Brandenburg2f713b52017-05-04 13:16:04 -0400141 if (part->next)
142 part = part->next;
143 else
144 break;
Al Viro9f5e2f72016-02-16 19:54:13 -0500145 }
146
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400147 new = (void *)op->downcall.trailer_buf;
148 new->next = NULL;
149 new->len = op->downcall.trailer_size -
150 sizeof(struct orangefs_readdir_response_s);
151 if (!od->part)
152 od->part = new;
153 else
154 part->next = new;
155 count++;
156 od->end = count << PART_SHIFT;
157
Martin Brandenburg382f4582017-04-25 15:37:59 -0400158 return 0;
159}
Mike Marshall5db11c22015-07-17 10:38:12 -0400160
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400161static int orangefs_dir_more(struct orangefs_inode_s *oi,
162 struct orangefs_dir *od, struct dentry *dentry)
163{
164 struct orangefs_kernel_op_s *op;
165 int r;
166
167 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
168 if (!op) {
169 od->error = -ENOMEM;
170 return -ENOMEM;
171 }
172 r = do_readdir(oi, od, dentry, op);
173 if (r) {
174 od->error = r;
175 goto out;
176 }
177 r = parse_readdir(od, op);
178 if (r) {
179 od->error = r;
180 goto out;
181 }
182
183 od->error = 0;
184out:
185 op_release(op);
186 return od->error;
187}
188
189static int fill_from_part(struct orangefs_dir_part *part,
190 struct dir_context *ctx)
191{
192 const int offset = sizeof(struct orangefs_readdir_response_s);
193 struct orangefs_khandle *khandle;
194 __u32 *len, padlen;
195 loff_t i;
196 char *s;
197 i = ctx->pos & ~PART_MASK;
198
199 /* The file offset from userspace is too large. */
200 if (i > part->len)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400201 return 1;
202
203 /*
204 * If the seek pointer is positioned just before an entry it
205 * should find the next entry.
206 */
207 if (i % 8)
208 i = i + (8 - i%8)%8;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400209
210 while (i < part->len) {
211 if (part->len < i + sizeof *len)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400212 break;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400213 len = (void *)part + offset + i;
214 /*
215 * len is the size of the string itself. padlen is the
216 * total size of the encoded string.
217 */
218 padlen = (sizeof *len + *len + 1) +
219 (8 - (sizeof *len + *len + 1)%8)%8;
220 if (part->len < i + padlen + sizeof *khandle)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400221 goto next;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400222 s = (void *)part + offset + i + sizeof *len;
223 if (s[*len] != 0)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400224 goto next;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400225 khandle = (void *)part + offset + i + padlen;
226 if (!dir_emit(ctx, s, *len,
227 orangefs_khandle_to_ino(khandle),
228 DT_UNKNOWN))
229 return 0;
230 i += padlen + sizeof *khandle;
231 i = i + (8 - i%8)%8;
232 BUG_ON(i > part->len);
233 ctx->pos = (ctx->pos & PART_MASK) | i;
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400234 continue;
235next:
236 i += 8;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400237 }
238 return 1;
239}
240
Martin Brandenburg382f4582017-04-25 15:37:59 -0400241static int orangefs_dir_fill(struct orangefs_inode_s *oi,
242 struct orangefs_dir *od, struct dentry *dentry,
243 struct dir_context *ctx)
244{
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400245 struct orangefs_dir_part *part;
246 size_t count;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400247
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400248 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
249
250 part = od->part;
251 while (part->next && count) {
252 count--;
253 part = part->next;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400254 }
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400255 /* This means the userspace file offset is invalid. */
256 if (count) {
257 od->error = -EIO;
258 return -EIO;
259 }
260
261 while (part && part->len) {
262 int r;
263 r = fill_from_part(part, ctx);
264 if (r < 0) {
265 od->error = r;
266 return r;
267 } else if (r == 0) {
268 /* Userspace buffer is full. */
269 break;
270 } else {
271 /*
272 * The part ran out of data. Move to the next
273 * part. */
274 ctx->pos = (ctx->pos & PART_MASK) +
275 (1 << PART_SHIFT);
276 part = part->next;
277 }
278 }
Martin Brandenburg382f4582017-04-25 15:37:59 -0400279 return 0;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400280}
281
Martin Brandenburg942835d2017-05-02 12:15:11 -0400282static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
283 int whence)
284{
285 struct orangefs_dir *od = file->private_data;
286 /*
287 * Delete the stored data so userspace sees new directory
288 * entries.
289 */
290 if (!whence && offset < od->end) {
291 struct orangefs_dir_part *part = od->part;
292 while (part) {
293 struct orangefs_dir_part *next = part->next;
294 vfree(part);
295 part = next;
296 }
297 od->token = ORANGEFS_ITERATE_START;
298 od->part = NULL;
299 od->end = 1 << PART_SHIFT;
300 }
301 return default_llseek(file, offset, whence);
302}
303
Martin Brandenburg382f4582017-04-25 15:37:59 -0400304static int orangefs_dir_iterate(struct file *file,
305 struct dir_context *ctx)
306{
307 struct orangefs_inode_s *oi;
308 struct orangefs_dir *od;
309 struct dentry *dentry;
310 int r;
311
312 dentry = file->f_path.dentry;
313 oi = ORANGEFS_I(dentry->d_inode);
314 od = file->private_data;
315
316 if (od->error)
317 return od->error;
318
319 if (ctx->pos == 0) {
320 if (!dir_emit_dot(file, ctx))
321 return 0;
Mike Marshall5db11c22015-07-17 10:38:12 -0400322 ctx->pos++;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400323 }
324 if (ctx->pos == 1) {
325 if (!dir_emit_dotdot(file, ctx))
326 return 0;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400327 ctx->pos = 1 << PART_SHIFT;
Mike Marshall5db11c22015-07-17 10:38:12 -0400328 }
329
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400330 /*
331 * The seek position is in the first synthesized part but is not
332 * valid.
333 */
334 if ((ctx->pos & PART_MASK) == 0)
335 return -EIO;
336
Martin Brandenburg382f4582017-04-25 15:37:59 -0400337 r = 0;
338
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400339 /*
340 * Must read more if the user has sought past what has been read
341 * so far. Stop a user who has sought past the end.
342 */
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400343 while (od->token != ORANGEFS_ITERATE_END &&
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400344 ctx->pos > od->end) {
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400345 r = orangefs_dir_more(oi, od, dentry);
346 if (r)
347 return r;
348 }
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400349 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400350 return -EIO;
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400351
352 /* Then try to fill if there's any left in the buffer. */
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400353 if (ctx->pos < od->end) {
Martin Brandenburg382f4582017-04-25 15:37:59 -0400354 r = orangefs_dir_fill(oi, od, dentry, ctx);
355 if (r)
356 return r;
Mike Marshall5db11c22015-07-17 10:38:12 -0400357 }
358
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400359 /* Finally get some more and try to fill. */
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400360 if (od->token != ORANGEFS_ITERATE_END) {
Martin Brandenburg382f4582017-04-25 15:37:59 -0400361 r = orangefs_dir_more(oi, od, dentry);
362 if (r)
363 return r;
364 r = orangefs_dir_fill(oi, od, dentry, ctx);
Mike Marshall5db11c22015-07-17 10:38:12 -0400365 }
366
Martin Brandenburg382f4582017-04-25 15:37:59 -0400367 return r;
Mike Marshall5db11c22015-07-17 10:38:12 -0400368}
369
Yi Liu8bb8aef2015-11-24 15:12:14 -0500370static int orangefs_dir_open(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400371{
Martin Brandenburg382f4582017-04-25 15:37:59 -0400372 struct orangefs_dir *od;
373 file->private_data = kmalloc(sizeof(struct orangefs_dir),
374 GFP_KERNEL);
Mike Marshall5db11c22015-07-17 10:38:12 -0400375 if (!file->private_data)
376 return -ENOMEM;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400377 od = file->private_data;
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400378 od->token = ORANGEFS_ITERATE_START;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400379 od->part = NULL;
380 od->end = 1 << PART_SHIFT;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400381 od->error = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -0400382 return 0;
383}
384
Yi Liu8bb8aef2015-11-24 15:12:14 -0500385static int orangefs_dir_release(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400386{
Martin Brandenburg382f4582017-04-25 15:37:59 -0400387 struct orangefs_dir *od = file->private_data;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400388 struct orangefs_dir_part *part = od->part;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400389 while (part) {
390 struct orangefs_dir_part *next = part->next;
391 vfree(part);
392 part = next;
393 }
Martin Brandenburg382f4582017-04-25 15:37:59 -0400394 kfree(od);
Mike Marshall5db11c22015-07-17 10:38:12 -0400395 return 0;
396}
397
Yi Liu8bb8aef2015-11-24 15:12:14 -0500398const struct file_operations orangefs_dir_operations = {
Martin Brandenburg942835d2017-05-02 12:15:11 -0400399 .llseek = orangefs_dir_llseek,
Mike Marshall5db11c22015-07-17 10:38:12 -0400400 .read = generic_read_dir,
Martin Brandenburg382f4582017-04-25 15:37:59 -0400401 .iterate = orangefs_dir_iterate,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500402 .open = orangefs_dir_open,
Martin Brandenburg382f4582017-04-25 15:37:59 -0400403 .release = orangefs_dir_release
Mike Marshall5db11c22015-07-17 10:38:12 -0400404};