Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 1 | /* |
| 2 | * (C) 2001 Clemson University and The University of Chicago |
| 3 | * |
| 4 | * See COPYING in top-level directory. |
| 5 | */ |
| 6 | |
| 7 | #include "protocol.h" |
| 8 | #include "pvfs2-kernel.h" |
| 9 | #include "pvfs2-bufmap.h" |
| 10 | |
| 11 | struct readdir_handle_s { |
| 12 | int buffer_index; |
| 13 | struct pvfs2_readdir_response_s readdir_response; |
| 14 | void *dents_buf; |
| 15 | }; |
| 16 | |
| 17 | /* |
| 18 | * decode routine needed by kmod to make sense of the shared page for readdirs. |
| 19 | */ |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 20 | static long decode_dirents(char *ptr, size_t size, |
| 21 | struct pvfs2_readdir_response_s *readdir) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 22 | { |
| 23 | int i; |
| 24 | struct pvfs2_readdir_response_s *rd = |
| 25 | (struct pvfs2_readdir_response_s *) ptr; |
| 26 | char *buf = ptr; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 27 | |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 28 | if (size < offsetof(struct pvfs2_readdir_response_s, dirent_array)) |
| 29 | return -EINVAL; |
| 30 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 31 | readdir->token = rd->token; |
| 32 | readdir->pvfs_dirent_outcount = rd->pvfs_dirent_outcount; |
Al Viro | ef4af94 | 2015-10-09 13:23:16 -0400 | [diff] [blame] | 33 | readdir->dirent_array = kcalloc(readdir->pvfs_dirent_outcount, |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 34 | sizeof(*readdir->dirent_array), |
| 35 | GFP_KERNEL); |
| 36 | if (readdir->dirent_array == NULL) |
| 37 | return -ENOMEM; |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 38 | |
Al Viro | 9be68b0 | 2015-10-09 17:43:15 -0400 | [diff] [blame] | 39 | buf += offsetof(struct pvfs2_readdir_response_s, dirent_array); |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 40 | size -= offsetof(struct pvfs2_readdir_response_s, dirent_array); |
| 41 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 42 | for (i = 0; i < readdir->pvfs_dirent_outcount; i++) { |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 43 | __u32 len; |
| 44 | |
| 45 | if (size < 4) |
| 46 | goto Einval; |
| 47 | |
| 48 | len = *(__u32 *)buf; |
| 49 | if (len >= (unsigned)-24) |
| 50 | goto Einval; |
| 51 | |
Al Viro | 9be68b0 | 2015-10-09 17:43:15 -0400 | [diff] [blame] | 52 | readdir->dirent_array[i].d_name = buf + 4; |
Al Viro | 9be68b0 | 2015-10-09 17:43:15 -0400 | [diff] [blame] | 53 | readdir->dirent_array[i].d_length = len; |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 54 | |
| 55 | len = roundup8(4 + len + 1); |
| 56 | if (size < len + 16) |
| 57 | goto Einval; |
| 58 | size -= len + 16; |
| 59 | |
| 60 | buf += len; |
| 61 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 62 | readdir->dirent_array[i].khandle = |
Al Viro | 9be68b0 | 2015-10-09 17:43:15 -0400 | [diff] [blame] | 63 | *(struct pvfs2_khandle *) buf; |
| 64 | buf += 16; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 65 | } |
Al Viro | 9be68b0 | 2015-10-09 17:43:15 -0400 | [diff] [blame] | 66 | return buf - ptr; |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 67 | Einval: |
| 68 | kfree(readdir->dirent_array); |
| 69 | readdir->dirent_array = NULL; |
| 70 | return -EINVAL; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 71 | } |
| 72 | |
| 73 | static long readdir_handle_ctor(struct readdir_handle_s *rhandle, void *buf, |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 74 | size_t size, int buffer_index) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 75 | { |
| 76 | long ret; |
| 77 | |
| 78 | if (buf == NULL) { |
| 79 | gossip_err |
| 80 | ("Invalid NULL buffer specified in readdir_handle_ctor\n"); |
| 81 | return -ENOMEM; |
| 82 | } |
| 83 | if (buffer_index < 0) { |
| 84 | gossip_err |
| 85 | ("Invalid buffer index specified in readdir_handle_ctor\n"); |
| 86 | return -EINVAL; |
| 87 | } |
| 88 | rhandle->buffer_index = buffer_index; |
| 89 | rhandle->dents_buf = buf; |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 90 | ret = decode_dirents(buf, size, &rhandle->readdir_response); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 91 | if (ret < 0) { |
| 92 | gossip_err("Could not decode readdir from buffer %ld\n", ret); |
| 93 | rhandle->buffer_index = -1; |
| 94 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", buf); |
| 95 | vfree(buf); |
| 96 | rhandle->dents_buf = NULL; |
| 97 | } |
| 98 | return ret; |
| 99 | } |
| 100 | |
| 101 | static void readdir_handle_dtor(struct pvfs2_bufmap *bufmap, |
| 102 | struct readdir_handle_s *rhandle) |
| 103 | { |
| 104 | if (rhandle == NULL) |
| 105 | return; |
| 106 | |
| 107 | /* kfree(NULL) is safe */ |
| 108 | kfree(rhandle->readdir_response.dirent_array); |
| 109 | rhandle->readdir_response.dirent_array = NULL; |
| 110 | |
| 111 | if (rhandle->buffer_index >= 0) { |
| 112 | readdir_index_put(bufmap, rhandle->buffer_index); |
| 113 | rhandle->buffer_index = -1; |
| 114 | } |
| 115 | if (rhandle->dents_buf) { |
| 116 | gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", |
| 117 | rhandle->dents_buf); |
| 118 | vfree(rhandle->dents_buf); |
| 119 | rhandle->dents_buf = NULL; |
| 120 | } |
| 121 | } |
| 122 | |
| 123 | /* |
| 124 | * Read directory entries from an instance of an open directory. |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 125 | */ |
| 126 | static int pvfs2_readdir(struct file *file, struct dir_context *ctx) |
| 127 | { |
| 128 | struct pvfs2_bufmap *bufmap = NULL; |
| 129 | int ret = 0; |
| 130 | int buffer_index; |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 131 | /* |
| 132 | * ptoken supports Orangefs' distributed directory logic, added |
| 133 | * in 2.9.2. |
| 134 | */ |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 135 | __u64 *ptoken = file->private_data; |
| 136 | __u64 pos = 0; |
| 137 | ino_t ino = 0; |
| 138 | struct dentry *dentry = file->f_path.dentry; |
| 139 | struct pvfs2_kernel_op_s *new_op = NULL; |
| 140 | struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(dentry->d_inode); |
| 141 | int buffer_full = 0; |
| 142 | struct readdir_handle_s rhandle; |
| 143 | int i = 0; |
| 144 | int len = 0; |
| 145 | ino_t current_ino = 0; |
| 146 | char *current_entry = NULL; |
| 147 | long bytes_decoded; |
| 148 | |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 149 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 150 | "%s: ctx->pos:%lld, ptoken = %llu\n", |
| 151 | __func__, |
| 152 | lld(ctx->pos), |
| 153 | llu(*ptoken)); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 154 | |
| 155 | pos = (__u64) ctx->pos; |
| 156 | |
| 157 | /* are we done? */ |
| 158 | if (pos == PVFS_READDIR_END) { |
| 159 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 160 | "Skipping to termination path\n"); |
| 161 | return 0; |
| 162 | } |
| 163 | |
| 164 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 165 | "pvfs2_readdir called on %s (pos=%llu)\n", |
| 166 | dentry->d_name.name, llu(pos)); |
| 167 | |
| 168 | rhandle.buffer_index = -1; |
| 169 | rhandle.dents_buf = NULL; |
| 170 | memset(&rhandle.readdir_response, 0, sizeof(rhandle.readdir_response)); |
| 171 | |
| 172 | new_op = op_alloc(PVFS2_VFS_OP_READDIR); |
| 173 | if (!new_op) |
| 174 | return -ENOMEM; |
| 175 | |
| 176 | new_op->uses_shared_memory = 1; |
| 177 | new_op->upcall.req.readdir.refn = pvfs2_inode->refn; |
| 178 | new_op->upcall.req.readdir.max_dirent_count = MAX_DIRENT_COUNT_READDIR; |
| 179 | |
| 180 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 181 | "%s: upcall.req.readdir.refn.khandle: %pU\n", |
| 182 | __func__, |
| 183 | &new_op->upcall.req.readdir.refn.khandle); |
| 184 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 185 | new_op->upcall.req.readdir.token = *ptoken; |
| 186 | |
| 187 | get_new_buffer_index: |
| 188 | ret = readdir_index_get(&bufmap, &buffer_index); |
| 189 | if (ret < 0) { |
| 190 | gossip_lerr("pvfs2_readdir: readdir_index_get() failure (%d)\n", |
| 191 | ret); |
| 192 | goto out_free_op; |
| 193 | } |
| 194 | new_op->upcall.req.readdir.buf_index = buffer_index; |
| 195 | |
| 196 | ret = service_operation(new_op, |
| 197 | "pvfs2_readdir", |
| 198 | get_interruptible_flag(dentry->d_inode)); |
| 199 | |
| 200 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 201 | "Readdir downcall status is %d. ret:%d\n", |
| 202 | new_op->downcall.status, |
| 203 | ret); |
| 204 | |
| 205 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
| 206 | /* |
| 207 | * readdir shared memory aread has been wiped due to |
| 208 | * pvfs2-client-core restarting, so we must get a new |
| 209 | * index into the shared memory. |
| 210 | */ |
| 211 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 212 | "%s: Getting new buffer_index for retry of readdir..\n", |
| 213 | __func__); |
| 214 | readdir_index_put(bufmap, buffer_index); |
| 215 | goto get_new_buffer_index; |
| 216 | } |
| 217 | |
| 218 | if (ret == -EIO && op_state_purged(new_op)) { |
| 219 | gossip_err("%s: Client is down. Aborting readdir call.\n", |
| 220 | __func__); |
| 221 | readdir_index_put(bufmap, buffer_index); |
| 222 | goto out_free_op; |
| 223 | } |
| 224 | |
| 225 | if (ret < 0 || new_op->downcall.status != 0) { |
| 226 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 227 | "Readdir request failed. Status:%d\n", |
| 228 | new_op->downcall.status); |
| 229 | readdir_index_put(bufmap, buffer_index); |
| 230 | if (ret >= 0) |
| 231 | ret = new_op->downcall.status; |
| 232 | goto out_free_op; |
| 233 | } |
| 234 | |
| 235 | bytes_decoded = |
| 236 | readdir_handle_ctor(&rhandle, |
| 237 | new_op->downcall.trailer_buf, |
Al Viro | 8092895 | 2015-10-09 18:11:10 -0400 | [diff] [blame^] | 238 | new_op->downcall.trailer_size, |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 239 | buffer_index); |
| 240 | if (bytes_decoded < 0) { |
| 241 | gossip_err("pvfs2_readdir: Could not decode trailer buffer into a readdir response %d\n", |
| 242 | ret); |
| 243 | ret = bytes_decoded; |
| 244 | readdir_index_put(bufmap, buffer_index); |
| 245 | goto out_free_op; |
| 246 | } |
| 247 | |
| 248 | if (bytes_decoded != new_op->downcall.trailer_size) { |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 249 | gossip_err("pvfs2_readdir: # bytes decoded (%ld) " |
| 250 | "!= trailer size (%ld)\n", |
| 251 | bytes_decoded, |
| 252 | (long)new_op->downcall.trailer_size); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 253 | ret = -EINVAL; |
| 254 | goto out_destroy_handle; |
| 255 | } |
| 256 | |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 257 | /* |
| 258 | * pvfs2 doesn't actually store dot and dot-dot, but |
| 259 | * we need to have them represented. |
| 260 | */ |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 261 | if (pos == 0) { |
| 262 | ino = get_ino_from_khandle(dentry->d_inode); |
| 263 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 264 | "%s: calling dir_emit of \".\" with pos = %llu\n", |
| 265 | __func__, |
| 266 | llu(pos)); |
| 267 | ret = dir_emit(ctx, ".", 1, ino, DT_DIR); |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 268 | pos += 1; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 269 | } |
| 270 | |
| 271 | if (pos == 1) { |
| 272 | ino = get_parent_ino_from_dentry(dentry); |
| 273 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 274 | "%s: calling dir_emit of \"..\" with pos = %llu\n", |
| 275 | __func__, |
| 276 | llu(pos)); |
| 277 | ret = dir_emit(ctx, "..", 2, ino, DT_DIR); |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 278 | pos += 1; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 279 | } |
| 280 | |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 281 | /* |
| 282 | * we stored PVFS_ITERATE_NEXT in ctx->pos last time around |
| 283 | * to prevent "finding" dot and dot-dot on any iteration |
| 284 | * other than the first. |
| 285 | */ |
| 286 | if (ctx->pos == PVFS_ITERATE_NEXT) |
| 287 | ctx->pos = 0; |
| 288 | |
| 289 | for (i = ctx->pos; |
| 290 | i < rhandle.readdir_response.pvfs_dirent_outcount; |
| 291 | i++) { |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 292 | len = rhandle.readdir_response.dirent_array[i].d_length; |
| 293 | current_entry = rhandle.readdir_response.dirent_array[i].d_name; |
| 294 | current_ino = pvfs2_khandle_to_ino( |
| 295 | &(rhandle.readdir_response.dirent_array[i].khandle)); |
| 296 | |
| 297 | gossip_debug(GOSSIP_DIR_DEBUG, |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 298 | "calling dir_emit for %s with len %d" |
| 299 | ", ctx->pos %ld\n", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 300 | current_entry, |
| 301 | len, |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 302 | (unsigned long)ctx->pos); |
| 303 | /* |
| 304 | * type is unknown. We don't return object type |
| 305 | * in the dirent_array. This leaves getdents |
| 306 | * clueless about type. |
| 307 | */ |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 308 | ret = |
| 309 | dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN); |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 310 | if (!ret) |
| 311 | break; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 312 | ctx->pos++; |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 313 | gossip_debug(GOSSIP_DIR_DEBUG, |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 314 | "%s: ctx->pos:%lld\n", |
| 315 | __func__, |
| 316 | lld(ctx->pos)); |
| 317 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 318 | } |
| 319 | |
Mike Marshall | 5480494 | 2015-10-05 13:44:24 -0400 | [diff] [blame] | 320 | /* |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 321 | * we ran all the way through the last batch, set up for |
| 322 | * getting another batch... |
| 323 | */ |
| 324 | if (ret) { |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 325 | *ptoken = rhandle.readdir_response.token; |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 326 | ctx->pos = PVFS_ITERATE_NEXT; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 327 | } |
| 328 | |
| 329 | /* |
| 330 | * Did we hit the end of the directory? |
| 331 | */ |
| 332 | if (rhandle.readdir_response.token == PVFS_READDIR_END && |
| 333 | !buffer_full) { |
Mike Marshall | 88309aa | 2015-09-23 16:48:40 -0400 | [diff] [blame] | 334 | gossip_debug(GOSSIP_DIR_DEBUG, |
| 335 | "End of dir detected; setting ctx->pos to PVFS_READDIR_END.\n"); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 336 | ctx->pos = PVFS_READDIR_END; |
| 337 | } |
| 338 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 339 | out_destroy_handle: |
| 340 | readdir_handle_dtor(bufmap, &rhandle); |
| 341 | out_free_op: |
| 342 | op_release(new_op); |
| 343 | gossip_debug(GOSSIP_DIR_DEBUG, "pvfs2_readdir returning %d\n", ret); |
| 344 | return ret; |
| 345 | } |
| 346 | |
| 347 | static int pvfs2_dir_open(struct inode *inode, struct file *file) |
| 348 | { |
| 349 | __u64 *ptoken; |
| 350 | |
| 351 | file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL); |
| 352 | if (!file->private_data) |
| 353 | return -ENOMEM; |
| 354 | |
| 355 | ptoken = file->private_data; |
| 356 | *ptoken = PVFS_READDIR_START; |
| 357 | return 0; |
| 358 | } |
| 359 | |
| 360 | static int pvfs2_dir_release(struct inode *inode, struct file *file) |
| 361 | { |
| 362 | pvfs2_flush_inode(inode); |
| 363 | kfree(file->private_data); |
| 364 | return 0; |
| 365 | } |
| 366 | |
| 367 | /** PVFS2 implementation of VFS directory operations */ |
| 368 | const struct file_operations pvfs2_dir_operations = { |
| 369 | .read = generic_read_dir, |
| 370 | .iterate = pvfs2_readdir, |
| 371 | .open = pvfs2_dir_open, |
| 372 | .release = pvfs2_dir_release, |
| 373 | }; |