blob: d327cbd17756dcbc18ca787602e436c24e70d198 [file] [log] [blame]
Mike Marshall5db11c22015-07-17 10:38:12 -04001/*
Martin Brandenburg382f4582017-04-25 15:37:59 -04002 * Copyright 2017 Omnibond Systems, L.L.C.
Mike Marshall5db11c22015-07-17 10:38:12 -04003 */
4
5#include "protocol.h"
Mike Marshall575e9462015-12-04 12:56:14 -05006#include "orangefs-kernel.h"
7#include "orangefs-bufmap.h"
Mike Marshall5db11c22015-07-17 10:38:12 -04008
Martin Brandenburg480e3e52017-04-25 15:38:01 -04009struct orangefs_dir_part {
10 struct orangefs_dir_part *next;
11 size_t len;
12};
13
14struct orangefs_dir {
15 __u64 token;
16 struct orangefs_dir_part *part;
17 loff_t end;
18 int error;
19};
20
21#define PART_SHIFT (24)
22#define PART_SIZE (1<<24)
23#define PART_MASK (~(PART_SIZE - 1))
24
Mike Marshall5db11c22015-07-17 10:38:12 -040025/*
Martin Brandenburg382f4582017-04-25 15:37:59 -040026 * There can be up to 512 directory entries. Each entry is encoded as
27 * follows:
28 * 4 bytes: string size (n)
29 * n bytes: string
30 * 1 byte: trailing zero
31 * padding to 8 bytes
32 * 16 bytes: khandle
33 * padding to 8 bytes
Martin Brandenburg382f4582017-04-25 15:37:59 -040034 *
35 * The trailer_buf starts with a struct orangefs_readdir_response_s
36 * which must be skipped to get to the directory data.
Martin Brandenburg480e3e52017-04-25 15:38:01 -040037 *
38 * The data which is received from the userspace daemon is termed a
39 * part and is stored in a linked list in case more than one part is
40 * needed for a large directory.
41 *
42 * The position pointer (ctx->pos) encodes the part and offset on which
43 * to begin reading at. Bits above PART_SHIFT encode the part and bits
44 * below PART_SHIFT encode the offset. Parts are stored in a linked
45 * list which grows as data is received from the server. The overhead
46 * associated with managing the list is presumed to be small compared to
47 * the overhead of communicating with the server.
48 *
49 * As data is received from the server, it is placed at the end of the
50 * part list. Data is parsed from the current position as it is needed.
51 * When data is determined to be corrupt, it is either because the
52 * userspace component has sent back corrupt data or because the file
53 * pointer has been moved to an invalid location. Since the two cannot
54 * be differentiated, return EIO.
55 *
56 * Part zero is synthesized to contains `.' and `..'. Part one is the
57 * first part of the part list.
Mike Marshall5db11c22015-07-17 10:38:12 -040058 */
Martin Brandenburg382f4582017-04-25 15:37:59 -040059
Martin Brandenburg480e3e52017-04-25 15:38:01 -040060static int do_readdir(struct orangefs_inode_s *oi,
61 struct orangefs_dir *od, struct dentry *dentry,
62 struct orangefs_kernel_op_s *op)
Mike Marshall5db11c22015-07-17 10:38:12 -040063{
Martin Brandenburg382f4582017-04-25 15:37:59 -040064 struct orangefs_readdir_response_s *resp;
Martin Brandenburg382f4582017-04-25 15:37:59 -040065 int bufi, r;
Mike Marshall5db11c22015-07-17 10:38:12 -040066
Martin Brandenburgee3b8d32016-02-17 12:55:42 -050067 /*
Martin Brandenburg382f4582017-04-25 15:37:59 -040068 * Despite the badly named field, readdir does not use shared
69 * memory. However, there are a limited number of readdir
70 * slots, which must be allocated here. This flag simply tells
71 * the op scheduler to return the op here for retry.
Martin Brandenburgee3b8d32016-02-17 12:55:42 -050072 */
Martin Brandenburg382f4582017-04-25 15:37:59 -040073 op->uses_shared_memory = 1;
74 op->upcall.req.readdir.refn = oi->refn;
75 op->upcall.req.readdir.token = od->token;
76 op->upcall.req.readdir.max_dirent_count =
Martin Brandenburg7d221482016-01-04 15:05:28 -050077 ORANGEFS_MAX_DIRENT_COUNT_READDIR;
Mike Marshall5db11c22015-07-17 10:38:12 -040078
Martin Brandenburg382f4582017-04-25 15:37:59 -040079again:
80 bufi = orangefs_readdir_index_get();
81 if (bufi < 0) {
Martin Brandenburg382f4582017-04-25 15:37:59 -040082 od->error = bufi;
83 return bufi;
Mike Marshall5db11c22015-07-17 10:38:12 -040084 }
85
Martin Brandenburg382f4582017-04-25 15:37:59 -040086 op->upcall.req.readdir.buf_index = bufi;
87
88 r = service_operation(op, "orangefs_readdir",
89 get_interruptible_flag(dentry->d_inode));
90
91 orangefs_readdir_index_put(bufi);
92
93 if (op_state_purged(op)) {
94 if (r == -EAGAIN) {
95 vfree(op->downcall.trailer_buf);
96 goto again;
97 } else if (r == -EIO) {
98 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -040099 od->error = r;
100 return r;
101 }
Mike Marshall5db11c22015-07-17 10:38:12 -0400102 }
103
Martin Brandenburg382f4582017-04-25 15:37:59 -0400104 if (r < 0) {
105 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -0400106 od->error = r;
107 return r;
108 } else if (op->downcall.status) {
109 vfree(op->downcall.trailer_buf);
Martin Brandenburg382f4582017-04-25 15:37:59 -0400110 od->error = op->downcall.status;
111 return op->downcall.status;
Mike Marshall5db11c22015-07-17 10:38:12 -0400112 }
113
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400114 /*
115 * The maximum size is size per entry times the 512 entries plus
116 * the header. This is well under the limit.
117 */
118 if (op->downcall.trailer_size > PART_SIZE) {
119 vfree(op->downcall.trailer_buf);
120 od->error = -EIO;
121 return -EIO;
122 }
123
Martin Brandenburg382f4582017-04-25 15:37:59 -0400124 resp = (struct orangefs_readdir_response_s *)
125 op->downcall.trailer_buf;
126 od->token = resp->token;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400127 return 0;
128}
Martin Brandenburg382f4582017-04-25 15:37:59 -0400129
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400130static int parse_readdir(struct orangefs_dir *od,
131 struct orangefs_kernel_op_s *op)
132{
133 struct orangefs_dir_part *part, *new;
134 size_t count;
135
136 count = 1;
137 part = od->part;
Martin Brandenburg2f713b52017-05-04 13:16:04 -0400138 while (part) {
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400139 count++;
Martin Brandenburg2f713b52017-05-04 13:16:04 -0400140 if (part->next)
141 part = part->next;
142 else
143 break;
Al Viro9f5e2f72016-02-16 19:54:13 -0500144 }
145
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400146 new = (void *)op->downcall.trailer_buf;
147 new->next = NULL;
148 new->len = op->downcall.trailer_size -
149 sizeof(struct orangefs_readdir_response_s);
150 if (!od->part)
151 od->part = new;
152 else
153 part->next = new;
154 count++;
155 od->end = count << PART_SHIFT;
156
Martin Brandenburg382f4582017-04-25 15:37:59 -0400157 return 0;
158}
Mike Marshall5db11c22015-07-17 10:38:12 -0400159
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400160static int orangefs_dir_more(struct orangefs_inode_s *oi,
161 struct orangefs_dir *od, struct dentry *dentry)
162{
163 struct orangefs_kernel_op_s *op;
164 int r;
165
166 op = op_alloc(ORANGEFS_VFS_OP_READDIR);
167 if (!op) {
168 od->error = -ENOMEM;
169 return -ENOMEM;
170 }
171 r = do_readdir(oi, od, dentry, op);
172 if (r) {
173 od->error = r;
174 goto out;
175 }
176 r = parse_readdir(od, op);
177 if (r) {
178 od->error = r;
179 goto out;
180 }
181
182 od->error = 0;
183out:
184 op_release(op);
185 return od->error;
186}
187
188static int fill_from_part(struct orangefs_dir_part *part,
189 struct dir_context *ctx)
190{
191 const int offset = sizeof(struct orangefs_readdir_response_s);
192 struct orangefs_khandle *khandle;
193 __u32 *len, padlen;
194 loff_t i;
195 char *s;
196 i = ctx->pos & ~PART_MASK;
197
198 /* The file offset from userspace is too large. */
199 if (i > part->len)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400200 return 1;
201
202 /*
203 * If the seek pointer is positioned just before an entry it
204 * should find the next entry.
205 */
206 if (i % 8)
207 i = i + (8 - i%8)%8;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400208
209 while (i < part->len) {
210 if (part->len < i + sizeof *len)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400211 break;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400212 len = (void *)part + offset + i;
213 /*
214 * len is the size of the string itself. padlen is the
215 * total size of the encoded string.
216 */
217 padlen = (sizeof *len + *len + 1) +
218 (8 - (sizeof *len + *len + 1)%8)%8;
219 if (part->len < i + padlen + sizeof *khandle)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400220 goto next;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400221 s = (void *)part + offset + i + sizeof *len;
222 if (s[*len] != 0)
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400223 goto next;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400224 khandle = (void *)part + offset + i + padlen;
225 if (!dir_emit(ctx, s, *len,
226 orangefs_khandle_to_ino(khandle),
227 DT_UNKNOWN))
228 return 0;
229 i += padlen + sizeof *khandle;
230 i = i + (8 - i%8)%8;
231 BUG_ON(i > part->len);
232 ctx->pos = (ctx->pos & PART_MASK) | i;
Martin Brandenburgbf15ba72017-05-02 12:15:10 -0400233 continue;
234next:
235 i += 8;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400236 }
237 return 1;
238}
239
Martin Brandenburg382f4582017-04-25 15:37:59 -0400240static int orangefs_dir_fill(struct orangefs_inode_s *oi,
241 struct orangefs_dir *od, struct dentry *dentry,
242 struct dir_context *ctx)
243{
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400244 struct orangefs_dir_part *part;
245 size_t count;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400246
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400247 count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
248
249 part = od->part;
250 while (part->next && count) {
251 count--;
252 part = part->next;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400253 }
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400254 /* This means the userspace file offset is invalid. */
255 if (count) {
256 od->error = -EIO;
257 return -EIO;
258 }
259
260 while (part && part->len) {
261 int r;
262 r = fill_from_part(part, ctx);
263 if (r < 0) {
264 od->error = r;
265 return r;
266 } else if (r == 0) {
267 /* Userspace buffer is full. */
268 break;
269 } else {
270 /*
271 * The part ran out of data. Move to the next
272 * part. */
273 ctx->pos = (ctx->pos & PART_MASK) +
274 (1 << PART_SHIFT);
275 part = part->next;
276 }
277 }
Martin Brandenburg382f4582017-04-25 15:37:59 -0400278 return 0;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400279}
280
Martin Brandenburg942835d2017-05-02 12:15:11 -0400281static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
282 int whence)
283{
284 struct orangefs_dir *od = file->private_data;
285 /*
286 * Delete the stored data so userspace sees new directory
287 * entries.
288 */
289 if (!whence && offset < od->end) {
290 struct orangefs_dir_part *part = od->part;
291 while (part) {
292 struct orangefs_dir_part *next = part->next;
293 vfree(part);
294 part = next;
295 }
296 od->token = ORANGEFS_ITERATE_START;
297 od->part = NULL;
298 od->end = 1 << PART_SHIFT;
299 }
300 return default_llseek(file, offset, whence);
301}
302
Martin Brandenburg382f4582017-04-25 15:37:59 -0400303static int orangefs_dir_iterate(struct file *file,
304 struct dir_context *ctx)
305{
306 struct orangefs_inode_s *oi;
307 struct orangefs_dir *od;
308 struct dentry *dentry;
309 int r;
310
311 dentry = file->f_path.dentry;
312 oi = ORANGEFS_I(dentry->d_inode);
313 od = file->private_data;
314
315 if (od->error)
316 return od->error;
317
318 if (ctx->pos == 0) {
319 if (!dir_emit_dot(file, ctx))
320 return 0;
Mike Marshall5db11c22015-07-17 10:38:12 -0400321 ctx->pos++;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400322 }
323 if (ctx->pos == 1) {
324 if (!dir_emit_dotdot(file, ctx))
325 return 0;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400326 ctx->pos = 1 << PART_SHIFT;
Mike Marshall5db11c22015-07-17 10:38:12 -0400327 }
328
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400329 /*
330 * The seek position is in the first synthesized part but is not
331 * valid.
332 */
333 if ((ctx->pos & PART_MASK) == 0)
334 return -EIO;
335
Martin Brandenburg382f4582017-04-25 15:37:59 -0400336 r = 0;
337
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400338 /*
339 * Must read more if the user has sought past what has been read
340 * so far. Stop a user who has sought past the end.
341 */
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400342 while (od->token != ORANGEFS_ITERATE_END &&
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400343 ctx->pos > od->end) {
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400344 r = orangefs_dir_more(oi, od, dentry);
345 if (r)
346 return r;
347 }
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400348 if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400349 return -EIO;
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400350
351 /* Then try to fill if there's any left in the buffer. */
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400352 if (ctx->pos < od->end) {
Martin Brandenburg382f4582017-04-25 15:37:59 -0400353 r = orangefs_dir_fill(oi, od, dentry, ctx);
354 if (r)
355 return r;
Mike Marshall5db11c22015-07-17 10:38:12 -0400356 }
357
Martin Brandenburg72f66b82017-04-25 15:38:00 -0400358 /* Finally get some more and try to fill. */
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400359 if (od->token != ORANGEFS_ITERATE_END) {
Martin Brandenburg382f4582017-04-25 15:37:59 -0400360 r = orangefs_dir_more(oi, od, dentry);
361 if (r)
362 return r;
363 r = orangefs_dir_fill(oi, od, dentry, ctx);
Mike Marshall5db11c22015-07-17 10:38:12 -0400364 }
365
Martin Brandenburg382f4582017-04-25 15:37:59 -0400366 return r;
Mike Marshall5db11c22015-07-17 10:38:12 -0400367}
368
Yi Liu8bb8aef2015-11-24 15:12:14 -0500369static int orangefs_dir_open(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400370{
Martin Brandenburg382f4582017-04-25 15:37:59 -0400371 struct orangefs_dir *od;
372 file->private_data = kmalloc(sizeof(struct orangefs_dir),
373 GFP_KERNEL);
Mike Marshall5db11c22015-07-17 10:38:12 -0400374 if (!file->private_data)
375 return -ENOMEM;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400376 od = file->private_data;
Martin Brandenburg7b796ae2017-04-25 15:38:02 -0400377 od->token = ORANGEFS_ITERATE_START;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400378 od->part = NULL;
379 od->end = 1 << PART_SHIFT;
Martin Brandenburg382f4582017-04-25 15:37:59 -0400380 od->error = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -0400381 return 0;
382}
383
Yi Liu8bb8aef2015-11-24 15:12:14 -0500384static int orangefs_dir_release(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400385{
Martin Brandenburg382f4582017-04-25 15:37:59 -0400386 struct orangefs_dir *od = file->private_data;
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400387 struct orangefs_dir_part *part = od->part;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500388 orangefs_flush_inode(inode);
Martin Brandenburg480e3e52017-04-25 15:38:01 -0400389 while (part) {
390 struct orangefs_dir_part *next = part->next;
391 vfree(part);
392 part = next;
393 }
Martin Brandenburg382f4582017-04-25 15:37:59 -0400394 kfree(od);
Mike Marshall5db11c22015-07-17 10:38:12 -0400395 return 0;
396}
397
Yi Liu8bb8aef2015-11-24 15:12:14 -0500398const struct file_operations orangefs_dir_operations = {
Martin Brandenburg942835d2017-05-02 12:15:11 -0400399 .llseek = orangefs_dir_llseek,
Mike Marshall5db11c22015-07-17 10:38:12 -0400400 .read = generic_read_dir,
Martin Brandenburg382f4582017-04-25 15:37:59 -0400401 .iterate = orangefs_dir_iterate,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500402 .open = orangefs_dir_open,
Martin Brandenburg382f4582017-04-25 15:37:59 -0400403 .release = orangefs_dir_release
Mike Marshall5db11c22015-07-17 10:38:12 -0400404};