blob: 41b356fefaa55ed25034851297d9ff4c5a0c021d [file] [log] [blame]
Mark Fashehccd979b2005-12-15 14:31:24 -08001/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * dir.c
5 *
6 * Creates, reads, walks and deletes directory-nodes
7 *
8 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
9 *
10 * Portions of this code from linux/fs/ext3/dir.c
11 *
12 * Copyright (C) 1992, 1993, 1994, 1995
13 * Remy Card (card@masi.ibp.fr)
14 * Laboratoire MASI - Institut Blaise pascal
15 * Universite Pierre et Marie Curie (Paris VI)
16 *
17 * from
18 *
19 * linux/fs/minix/dir.c
20 *
21 * Copyright (C) 1991, 1992 Linux Torvalds
22 *
23 * This program is free software; you can redistribute it and/or
24 * modify it under the terms of the GNU General Public
25 * License as published by the Free Software Foundation; either
26 * version 2 of the License, or (at your option) any later version.
27 *
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
31 * General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public
34 * License along with this program; if not, write to the
35 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36 * Boston, MA 021110-1307, USA.
37 */
38
39#include <linux/fs.h>
40#include <linux/types.h>
41#include <linux/slab.h>
42#include <linux/highmem.h>
Jan Karaa90714c2008-10-09 19:38:40 +020043#include <linux/quotaops.h>
Mark Fasheh9b7895e2008-11-12 16:27:44 -080044#include <linux/sort.h>
Mark Fashehccd979b2005-12-15 14:31:24 -080045
46#define MLOG_MASK_PREFIX ML_NAMEI
47#include <cluster/masklog.h>
48
49#include "ocfs2.h"
50
51#include "alloc.h"
Joel Beckerc175a512008-12-10 17:58:22 -080052#include "blockcheck.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080053#include "dir.h"
54#include "dlmglue.h"
55#include "extent_map.h"
56#include "file.h"
57#include "inode.h"
58#include "journal.h"
59#include "namei.h"
60#include "suballoc.h"
Mark Fasheh316f4b92007-09-07 18:21:26 -070061#include "super.h"
Mark Fasheh9b7895e2008-11-12 16:27:44 -080062#include "sysfile.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080063#include "uptodate.h"
64
65#include "buffer_head_io.h"
66
Mark Fasheh316f4b92007-09-07 18:21:26 -070067#define NAMEI_RA_CHUNKS 2
68#define NAMEI_RA_BLOCKS 4
69#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
71
Mark Fashehccd979b2005-12-15 14:31:24 -080072static unsigned char ocfs2_filetype_table[] = {
73 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
74};
75
Mark Fasheh316f4b92007-09-07 18:21:26 -070076static int ocfs2_do_extend_dir(struct super_block *sb,
77 handle_t *handle,
78 struct inode *dir,
79 struct buffer_head *parent_fe_bh,
80 struct ocfs2_alloc_context *data_ac,
81 struct ocfs2_alloc_context *meta_ac,
82 struct buffer_head **new_bh);
Mark Fashehe7c17e42009-01-29 18:17:46 -080083static int ocfs2_dir_indexed(struct inode *inode);
Mark Fasheh316f4b92007-09-07 18:21:26 -070084
Mark Fasheh23193e52007-09-12 13:01:18 -070085/*
Mark Fasheh87d35a72008-12-10 17:36:25 -080086 * These are distinct checks because future versions of the file system will
87 * want to have a trailing dirent structure independent of indexing.
88 */
Mark Fashehe7c17e42009-01-29 18:17:46 -080089static int ocfs2_supports_dir_trailer(struct inode *dir)
Mark Fasheh87d35a72008-12-10 17:36:25 -080090{
Mark Fashehe7c17e42009-01-29 18:17:46 -080091 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
92
Mark Fasheh87d35a72008-12-10 17:36:25 -080093 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
94 return 0;
95
Mark Fashehe7c17e42009-01-29 18:17:46 -080096 return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir);
Mark Fasheh87d35a72008-12-10 17:36:25 -080097}
98
Mark Fashehe7c17e42009-01-29 18:17:46 -080099/*
100 * "new' here refers to the point at which we're creating a new
101 * directory via "mkdir()", but also when we're expanding an inline
102 * directory. In either case, we don't yet have the indexing bit set
103 * on the directory, so the standard checks will fail in when metaecc
104 * is turned off. Only directory-initialization type functions should
105 * use this then. Everything else wants ocfs2_supports_dir_trailer()
106 */
107static int ocfs2_new_dir_wants_trailer(struct inode *dir)
Mark Fasheh87d35a72008-12-10 17:36:25 -0800108{
Mark Fashehe7c17e42009-01-29 18:17:46 -0800109 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
110
111 return ocfs2_meta_ecc(osb) ||
112 ocfs2_supports_indexed_dirs(osb);
Mark Fasheh87d35a72008-12-10 17:36:25 -0800113}
114
115static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
116{
117 return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
118}
119
120#define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
121
Joel Beckerc175a512008-12-10 17:58:22 -0800122/* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
123 * them more consistent? */
124struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
125 void *data)
126{
127 char *p = data;
128
129 p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
130 return (struct ocfs2_dir_block_trailer *)p;
131}
132
Mark Fasheh87d35a72008-12-10 17:36:25 -0800133/*
134 * XXX: This is executed once on every dirent. We should consider optimizing
135 * it.
136 */
137static int ocfs2_skip_dir_trailer(struct inode *dir,
138 struct ocfs2_dir_entry *de,
139 unsigned long offset,
140 unsigned long blklen)
141{
142 unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
143
Mark Fashehe7c17e42009-01-29 18:17:46 -0800144 if (!ocfs2_supports_dir_trailer(dir))
Mark Fasheh87d35a72008-12-10 17:36:25 -0800145 return 0;
146
147 if (offset != toff)
148 return 0;
149
150 return 1;
151}
152
153static void ocfs2_init_dir_trailer(struct inode *inode,
Mark Fashehe7c17e42009-01-29 18:17:46 -0800154 struct buffer_head *bh, u16 rec_len)
Mark Fasheh87d35a72008-12-10 17:36:25 -0800155{
156 struct ocfs2_dir_block_trailer *trailer;
157
158 trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
159 strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
160 trailer->db_compat_rec_len =
161 cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
162 trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
163 trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
Mark Fashehe7c17e42009-01-29 18:17:46 -0800164 trailer->db_free_rec_len = cpu_to_le16(rec_len);
165}
166/*
167 * Link an unindexed block with a dir trailer structure into the index free
168 * list. This function will modify dirdata_bh, but assumes you've already
169 * passed it to the journal.
170 */
171static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
172 struct buffer_head *dx_root_bh,
173 struct buffer_head *dirdata_bh)
174{
175 int ret;
176 struct ocfs2_dx_root_block *dx_root;
177 struct ocfs2_dir_block_trailer *trailer;
178
Joel Becker0cf2f762009-02-12 16:41:25 -0800179 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
Mark Fashehe7c17e42009-01-29 18:17:46 -0800180 OCFS2_JOURNAL_ACCESS_WRITE);
181 if (ret) {
182 mlog_errno(ret);
183 goto out;
184 }
185 trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
186 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
187
188 trailer->db_free_next = dx_root->dr_free_blk;
189 dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
190
191 ocfs2_journal_dirty(handle, dx_root_bh);
192
193out:
194 return ret;
195}
196
197static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
198{
199 return res->dl_prev_leaf_bh == NULL;
Mark Fasheh87d35a72008-12-10 17:36:25 -0800200}
201
Mark Fasheh4a12ca32008-11-12 15:43:34 -0800202void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
203{
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800204 brelse(res->dl_dx_root_bh);
Mark Fasheh4a12ca32008-11-12 15:43:34 -0800205 brelse(res->dl_leaf_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800206 brelse(res->dl_dx_leaf_bh);
Mark Fashehe7c17e42009-01-29 18:17:46 -0800207 brelse(res->dl_prev_leaf_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800208}
209
210static int ocfs2_dir_indexed(struct inode *inode)
211{
212 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
213 return 1;
214 return 0;
215}
216
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800217static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
218{
219 return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
220}
221
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800222/*
223 * Hashing code adapted from ext3
224 */
225#define DELTA 0x9E3779B9
226
227static void TEA_transform(__u32 buf[4], __u32 const in[])
228{
229 __u32 sum = 0;
230 __u32 b0 = buf[0], b1 = buf[1];
231 __u32 a = in[0], b = in[1], c = in[2], d = in[3];
232 int n = 16;
233
234 do {
235 sum += DELTA;
236 b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
237 b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
238 } while (--n);
239
240 buf[0] += b0;
241 buf[1] += b1;
242}
243
244static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
245{
246 __u32 pad, val;
247 int i;
248
249 pad = (__u32)len | ((__u32)len << 8);
250 pad |= pad << 16;
251
252 val = pad;
253 if (len > num*4)
254 len = num * 4;
255 for (i = 0; i < len; i++) {
256 if ((i % 4) == 0)
257 val = pad;
258 val = msg[i] + (val << 8);
259 if ((i % 4) == 3) {
260 *buf++ = val;
261 val = pad;
262 num--;
263 }
264 }
265 if (--num >= 0)
266 *buf++ = val;
267 while (--num >= 0)
268 *buf++ = pad;
269}
270
271static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
272 struct ocfs2_dx_hinfo *hinfo)
273{
274 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
275 const char *p;
276 __u32 in[8], buf[4];
277
278 /*
279 * XXX: Is this really necessary, if the index is never looked
280 * at by readdir? Is a hash value of '0' a bad idea?
281 */
282 if ((len == 1 && !strncmp(".", name, 1)) ||
283 (len == 2 && !strncmp("..", name, 2))) {
284 buf[0] = buf[1] = 0;
285 goto out;
286 }
287
288#ifdef OCFS2_DEBUG_DX_DIRS
289 /*
290 * This makes it very easy to debug indexing problems. We
291 * should never allow this to be selected without hand editing
292 * this file though.
293 */
294 buf[0] = buf[1] = len;
295 goto out;
296#endif
297
298 memcpy(buf, osb->osb_dx_seed, sizeof(buf));
299
300 p = name;
301 while (len > 0) {
302 str2hashbuf(p, len, in, 4);
303 TEA_transform(buf, in);
304 len -= 16;
305 p += 16;
306 }
307
308out:
309 hinfo->major_hash = buf[0];
310 hinfo->minor_hash = buf[1];
Mark Fasheh4a12ca32008-11-12 15:43:34 -0800311}
312
Mark Fasheh87d35a72008-12-10 17:36:25 -0800313/*
Mark Fasheh23193e52007-09-12 13:01:18 -0700314 * bh passed here can be an inode block or a dir data block, depending
315 * on the inode inline data flag.
316 */
Mark Fasheh5eae5b92007-09-10 17:50:51 -0700317static int ocfs2_check_dir_entry(struct inode * dir,
318 struct ocfs2_dir_entry * de,
319 struct buffer_head * bh,
320 unsigned long offset)
Mark Fasheh316f4b92007-09-07 18:21:26 -0700321{
322 const char *error_msg = NULL;
323 const int rlen = le16_to_cpu(de->rec_len);
324
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800325 if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
Mark Fasheh316f4b92007-09-07 18:21:26 -0700326 error_msg = "rec_len is smaller than minimal";
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800327 else if (unlikely(rlen % 4 != 0))
Mark Fasheh316f4b92007-09-07 18:21:26 -0700328 error_msg = "rec_len % 4 != 0";
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800329 else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
Mark Fasheh316f4b92007-09-07 18:21:26 -0700330 error_msg = "rec_len is too small for name_len";
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800331 else if (unlikely(
332 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
Mark Fasheh316f4b92007-09-07 18:21:26 -0700333 error_msg = "directory entry across blocks";
334
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800335 if (unlikely(error_msg != NULL))
Mark Fasheh316f4b92007-09-07 18:21:26 -0700336 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
337 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
338 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
339 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
340 de->name_len);
Tao Ma1dd9ffc2011-01-24 23:23:30 +0800341
Mark Fasheh316f4b92007-09-07 18:21:26 -0700342 return error_msg == NULL ? 1 : 0;
343}
344
345static inline int ocfs2_match(int len,
346 const char * const name,
347 struct ocfs2_dir_entry *de)
348{
349 if (len != de->name_len)
350 return 0;
351 if (!de->inode)
352 return 0;
353 return !memcmp(name, de->name, len);
354}
355
356/*
357 * Returns 0 if not found, -1 on failure, and 1 on success
358 */
359static int inline ocfs2_search_dirblock(struct buffer_head *bh,
360 struct inode *dir,
361 const char *name, int namelen,
362 unsigned long offset,
Mark Fasheh23193e52007-09-12 13:01:18 -0700363 char *first_de,
364 unsigned int bytes,
Mark Fasheh316f4b92007-09-07 18:21:26 -0700365 struct ocfs2_dir_entry **res_dir)
366{
367 struct ocfs2_dir_entry *de;
368 char *dlimit, *de_buf;
369 int de_len;
370 int ret = 0;
371
372 mlog_entry_void();
373
Mark Fasheh23193e52007-09-12 13:01:18 -0700374 de_buf = first_de;
375 dlimit = de_buf + bytes;
Mark Fasheh316f4b92007-09-07 18:21:26 -0700376
377 while (de_buf < dlimit) {
378 /* this code is executed quadratically often */
379 /* do minimal checking `by hand' */
380
381 de = (struct ocfs2_dir_entry *) de_buf;
382
383 if (de_buf + namelen <= dlimit &&
384 ocfs2_match(namelen, name, de)) {
385 /* found a match - just to be sure, do a full check */
386 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
387 ret = -1;
388 goto bail;
389 }
390 *res_dir = de;
391 ret = 1;
392 goto bail;
393 }
394
395 /* prevent looping on a bad block */
396 de_len = le16_to_cpu(de->rec_len);
397 if (de_len <= 0) {
398 ret = -1;
399 goto bail;
400 }
401
402 de_buf += de_len;
403 offset += de_len;
404 }
405
406bail:
407 mlog_exit(ret);
408 return ret;
409}
410
Mark Fasheh23193e52007-09-12 13:01:18 -0700411static struct buffer_head *ocfs2_find_entry_id(const char *name,
412 int namelen,
413 struct inode *dir,
414 struct ocfs2_dir_entry **res_dir)
415{
416 int ret, found;
417 struct buffer_head *di_bh = NULL;
418 struct ocfs2_dinode *di;
419 struct ocfs2_inline_data *data;
420
Joel Beckerb657c952008-11-13 14:49:11 -0800421 ret = ocfs2_read_inode_block(dir, &di_bh);
Mark Fasheh23193e52007-09-12 13:01:18 -0700422 if (ret) {
423 mlog_errno(ret);
424 goto out;
425 }
426
427 di = (struct ocfs2_dinode *)di_bh->b_data;
428 data = &di->id2.i_data;
429
430 found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
431 data->id_data, i_size_read(dir), res_dir);
432 if (found == 1)
433 return di_bh;
434
435 brelse(di_bh);
436out:
437 return NULL;
438}
439
Joel Beckera22305c2008-11-13 14:49:17 -0800440static int ocfs2_validate_dir_block(struct super_block *sb,
441 struct buffer_head *bh)
442{
Joel Beckerc175a512008-12-10 17:58:22 -0800443 int rc;
444 struct ocfs2_dir_block_trailer *trailer =
445 ocfs2_trailer_from_bh(bh, sb);
446
447
Joel Beckera22305c2008-11-13 14:49:17 -0800448 /*
Joel Beckerc175a512008-12-10 17:58:22 -0800449 * We don't validate dirents here, that's handled
Joel Beckera22305c2008-11-13 14:49:17 -0800450 * in-place when the code walks them.
451 */
Joel Becker970e4932008-11-13 14:49:19 -0800452 mlog(0, "Validating dirblock %llu\n",
453 (unsigned long long)bh->b_blocknr);
Joel Beckera22305c2008-11-13 14:49:17 -0800454
Joel Beckerc175a512008-12-10 17:58:22 -0800455 BUG_ON(!buffer_uptodate(bh));
456
457 /*
458 * If the ecc fails, we return the error but otherwise
459 * leave the filesystem running. We know any error is
460 * local to this block.
461 *
462 * Note that we are safe to call this even if the directory
463 * doesn't have a trailer. Filesystems without metaecc will do
464 * nothing, and filesystems with it will have one.
465 */
466 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
467 if (rc)
468 mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
469 (unsigned long long)bh->b_blocknr);
470
471 return rc;
Joel Beckera22305c2008-11-13 14:49:17 -0800472}
473
474/*
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800475 * Validate a directory trailer.
476 *
477 * We check the trailer here rather than in ocfs2_validate_dir_block()
478 * because that function doesn't have the inode to test.
479 */
480static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
481{
482 int rc = 0;
483 struct ocfs2_dir_block_trailer *trailer;
484
485 trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
486 if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
487 rc = -EINVAL;
488 ocfs2_error(dir->i_sb,
489 "Invalid dirblock #%llu: "
490 "signature = %.*s\n",
491 (unsigned long long)bh->b_blocknr, 7,
492 trailer->db_signature);
493 goto out;
494 }
495 if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
496 rc = -EINVAL;
497 ocfs2_error(dir->i_sb,
498 "Directory block #%llu has an invalid "
499 "db_blkno of %llu",
500 (unsigned long long)bh->b_blocknr,
501 (unsigned long long)le64_to_cpu(trailer->db_blkno));
502 goto out;
503 }
504 if (le64_to_cpu(trailer->db_parent_dinode) !=
505 OCFS2_I(dir)->ip_blkno) {
506 rc = -EINVAL;
507 ocfs2_error(dir->i_sb,
508 "Directory block #%llu on dinode "
509 "#%llu has an invalid parent_dinode "
510 "of %llu",
511 (unsigned long long)bh->b_blocknr,
512 (unsigned long long)OCFS2_I(dir)->ip_blkno,
513 (unsigned long long)le64_to_cpu(trailer->db_blkno));
514 goto out;
515 }
516out:
517 return rc;
518}
519
520/*
Joel Beckera22305c2008-11-13 14:49:17 -0800521 * This function forces all errors to -EIO for consistency with its
522 * predecessor, ocfs2_bread(). We haven't audited what returning the
523 * real error codes would do to callers. We log the real codes with
524 * mlog_errno() before we squash them.
525 */
526static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
527 struct buffer_head **bh, int flags)
528{
529 int rc = 0;
530 struct buffer_head *tmp = *bh;
Joel Beckera22305c2008-11-13 14:49:17 -0800531
Joel Becker511308d2008-11-13 14:49:21 -0800532 rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
533 ocfs2_validate_dir_block);
Mark Fasheh87d35a72008-12-10 17:36:25 -0800534 if (rc) {
Joel Beckera22305c2008-11-13 14:49:17 -0800535 mlog_errno(rc);
Mark Fasheh87d35a72008-12-10 17:36:25 -0800536 goto out;
537 }
538
Mark Fasheh87d35a72008-12-10 17:36:25 -0800539 if (!(flags & OCFS2_BH_READAHEAD) &&
Mark Fashehe7c17e42009-01-29 18:17:46 -0800540 ocfs2_supports_dir_trailer(inode)) {
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800541 rc = ocfs2_check_dir_trailer(inode, tmp);
542 if (rc) {
543 if (!*bh)
544 brelse(tmp);
545 mlog_errno(rc);
Mark Fasheh87d35a72008-12-10 17:36:25 -0800546 goto out;
547 }
548 }
Joel Beckera22305c2008-11-13 14:49:17 -0800549
Joel Becker511308d2008-11-13 14:49:21 -0800550 /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
Mark Fasheh87d35a72008-12-10 17:36:25 -0800551 if (!*bh)
Joel Beckera22305c2008-11-13 14:49:17 -0800552 *bh = tmp;
553
Mark Fasheh87d35a72008-12-10 17:36:25 -0800554out:
Joel Beckera22305c2008-11-13 14:49:17 -0800555 return rc ? -EIO : 0;
556}
557
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800558/*
559 * Read the block at 'phys' which belongs to this directory
560 * inode. This function does no virtual->physical block translation -
561 * what's passed in is assumed to be a valid directory block.
562 */
563static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
564 struct buffer_head **bh)
565{
566 int ret;
567 struct buffer_head *tmp = *bh;
568
Joel Becker8cb471e2009-02-10 20:00:41 -0800569 ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
570 ocfs2_validate_dir_block);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800571 if (ret) {
572 mlog_errno(ret);
573 goto out;
574 }
575
576 if (ocfs2_supports_dir_trailer(dir)) {
577 ret = ocfs2_check_dir_trailer(dir, tmp);
578 if (ret) {
579 if (!*bh)
580 brelse(tmp);
581 mlog_errno(ret);
582 goto out;
583 }
584 }
585
586 if (!ret && !*bh)
587 *bh = tmp;
588out:
589 return ret;
590}
591
592static int ocfs2_validate_dx_root(struct super_block *sb,
593 struct buffer_head *bh)
594{
595 int ret;
596 struct ocfs2_dx_root_block *dx_root;
597
598 BUG_ON(!buffer_uptodate(bh));
599
600 dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
601
602 ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
603 if (ret) {
604 mlog(ML_ERROR,
605 "Checksum failed for dir index root block %llu\n",
606 (unsigned long long)bh->b_blocknr);
607 return ret;
608 }
609
610 if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
611 ocfs2_error(sb,
612 "Dir Index Root # %llu has bad signature %.*s",
613 (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
614 7, dx_root->dr_signature);
615 return -EINVAL;
616 }
617
618 return 0;
619}
620
621static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
622 struct buffer_head **dx_root_bh)
623{
624 int ret;
625 u64 blkno = le64_to_cpu(di->i_dx_root);
626 struct buffer_head *tmp = *dx_root_bh;
627
Joel Becker8cb471e2009-02-10 20:00:41 -0800628 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
629 ocfs2_validate_dx_root);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800630
631 /* If ocfs2_read_block() got us a new bh, pass it up. */
632 if (!ret && !*dx_root_bh)
633 *dx_root_bh = tmp;
634
635 return ret;
636}
637
638static int ocfs2_validate_dx_leaf(struct super_block *sb,
639 struct buffer_head *bh)
640{
641 int ret;
642 struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
643
644 BUG_ON(!buffer_uptodate(bh));
645
646 ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
647 if (ret) {
648 mlog(ML_ERROR,
649 "Checksum failed for dir index leaf block %llu\n",
650 (unsigned long long)bh->b_blocknr);
651 return ret;
652 }
653
654 if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
655 ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s",
656 7, dx_leaf->dl_signature);
657 return -EROFS;
658 }
659
660 return 0;
661}
662
663static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
664 struct buffer_head **dx_leaf_bh)
665{
666 int ret;
667 struct buffer_head *tmp = *dx_leaf_bh;
668
Joel Becker8cb471e2009-02-10 20:00:41 -0800669 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
670 ocfs2_validate_dx_leaf);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800671
672 /* If ocfs2_read_block() got us a new bh, pass it up. */
673 if (!ret && !*dx_leaf_bh)
674 *dx_leaf_bh = tmp;
675
676 return ret;
677}
678
679/*
680 * Read a series of dx_leaf blocks. This expects all buffer_head
681 * pointers to be NULL on function entry.
682 */
683static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
684 struct buffer_head **dx_leaf_bhs)
685{
686 int ret;
687
Joel Becker8cb471e2009-02-10 20:00:41 -0800688 ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800689 ocfs2_validate_dx_leaf);
690 if (ret)
691 mlog_errno(ret);
692
693 return ret;
694}
695
Adrian Bunk0af4bd32007-10-24 18:23:27 +0200696static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
697 struct inode *dir,
698 struct ocfs2_dir_entry **res_dir)
Mark Fasheh316f4b92007-09-07 18:21:26 -0700699{
700 struct super_block *sb;
701 struct buffer_head *bh_use[NAMEI_RA_SIZE];
702 struct buffer_head *bh, *ret = NULL;
703 unsigned long start, block, b;
704 int ra_max = 0; /* Number of bh's in the readahead
705 buffer, bh_use[] */
706 int ra_ptr = 0; /* Current index into readahead
707 buffer */
708 int num = 0;
709 int nblocks, i, err;
710
711 mlog_entry_void();
712
Mark Fasheh316f4b92007-09-07 18:21:26 -0700713 sb = dir->i_sb;
714
715 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
716 start = OCFS2_I(dir)->ip_dir_start_lookup;
717 if (start >= nblocks)
718 start = 0;
719 block = start;
720
721restart:
722 do {
723 /*
724 * We deal with the read-ahead logic here.
725 */
726 if (ra_ptr >= ra_max) {
727 /* Refill the readahead buffer */
728 ra_ptr = 0;
729 b = block;
730 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
731 /*
732 * Terminate if we reach the end of the
733 * directory and must wrap, or if our
734 * search has finished at this block.
735 */
736 if (b >= nblocks || (num && block == start)) {
737 bh_use[ra_max] = NULL;
738 break;
739 }
740 num++;
741
Joel Beckera22305c2008-11-13 14:49:17 -0800742 bh = NULL;
743 err = ocfs2_read_dir_block(dir, b++, &bh,
744 OCFS2_BH_READAHEAD);
Mark Fasheh316f4b92007-09-07 18:21:26 -0700745 bh_use[ra_max] = bh;
746 }
747 }
748 if ((bh = bh_use[ra_ptr++]) == NULL)
749 goto next;
Joel Beckera22305c2008-11-13 14:49:17 -0800750 if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
Joel Becker5e0b3de2008-10-09 17:20:33 -0700751 /* read error, skip block & hope for the best.
Joel Beckera22305c2008-11-13 14:49:17 -0800752 * ocfs2_read_dir_block() has released the bh. */
Mark Fasheh316f4b92007-09-07 18:21:26 -0700753 ocfs2_error(dir->i_sb, "reading directory %llu, "
754 "offset %lu\n",
755 (unsigned long long)OCFS2_I(dir)->ip_blkno,
756 block);
Mark Fasheh316f4b92007-09-07 18:21:26 -0700757 goto next;
758 }
759 i = ocfs2_search_dirblock(bh, dir, name, namelen,
760 block << sb->s_blocksize_bits,
Mark Fasheh23193e52007-09-12 13:01:18 -0700761 bh->b_data, sb->s_blocksize,
Mark Fasheh316f4b92007-09-07 18:21:26 -0700762 res_dir);
763 if (i == 1) {
764 OCFS2_I(dir)->ip_dir_start_lookup = block;
765 ret = bh;
766 goto cleanup_and_exit;
767 } else {
768 brelse(bh);
769 if (i < 0)
770 goto cleanup_and_exit;
771 }
772 next:
773 if (++block >= nblocks)
774 block = 0;
775 } while (block != start);
776
777 /*
778 * If the directory has grown while we were searching, then
779 * search the last part of the directory before giving up.
780 */
781 block = nblocks;
782 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
783 if (block < nblocks) {
784 start = 0;
785 goto restart;
786 }
787
788cleanup_and_exit:
789 /* Clean up the read-ahead blocks */
790 for (; ra_ptr < ra_max; ra_ptr++)
791 brelse(bh_use[ra_ptr]);
792
793 mlog_exit_ptr(ret);
794 return ret;
795}
796
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800797static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
798 struct ocfs2_extent_list *el,
799 u32 major_hash,
800 u32 *ret_cpos,
801 u64 *ret_phys_blkno,
802 unsigned int *ret_clen)
803{
804 int ret = 0, i, found;
805 struct buffer_head *eb_bh = NULL;
806 struct ocfs2_extent_block *eb;
807 struct ocfs2_extent_rec *rec = NULL;
808
809 if (el->l_tree_depth) {
Joel Beckerfacdb772009-02-12 18:08:48 -0800810 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
811 &eb_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800812 if (ret) {
813 mlog_errno(ret);
814 goto out;
815 }
816
817 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
818 el = &eb->h_list;
819
820 if (el->l_tree_depth) {
821 ocfs2_error(inode->i_sb,
822 "Inode %lu has non zero tree depth in "
823 "btree tree block %llu\n", inode->i_ino,
824 (unsigned long long)eb_bh->b_blocknr);
825 ret = -EROFS;
826 goto out;
827 }
828 }
829
830 found = 0;
831 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
832 rec = &el->l_recs[i];
833
834 if (le32_to_cpu(rec->e_cpos) <= major_hash) {
835 found = 1;
836 break;
837 }
838 }
839
840 if (!found) {
841 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
842 "record (%u, %u, 0) in btree", inode->i_ino,
843 le32_to_cpu(rec->e_cpos),
844 ocfs2_rec_clusters(el, rec));
845 ret = -EROFS;
846 goto out;
847 }
848
849 if (ret_phys_blkno)
850 *ret_phys_blkno = le64_to_cpu(rec->e_blkno);
851 if (ret_cpos)
852 *ret_cpos = le32_to_cpu(rec->e_cpos);
853 if (ret_clen)
854 *ret_clen = le16_to_cpu(rec->e_leaf_clusters);
855
856out:
857 brelse(eb_bh);
858 return ret;
859}
860
861/*
862 * Returns the block index, from the start of the cluster which this
863 * hash belongs too.
864 */
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800865static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
866 u32 minor_hash)
867{
868 return minor_hash & osb->osb_dx_mask;
869}
870
871static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800872 struct ocfs2_dx_hinfo *hinfo)
873{
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800874 return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800875}
876
877static int ocfs2_dx_dir_lookup(struct inode *inode,
878 struct ocfs2_extent_list *el,
879 struct ocfs2_dx_hinfo *hinfo,
880 u32 *ret_cpos,
881 u64 *ret_phys_blkno)
882{
883 int ret = 0;
884 unsigned int cend, uninitialized_var(clen);
885 u32 uninitialized_var(cpos);
886 u64 uninitialized_var(blkno);
887 u32 name_hash = hinfo->major_hash;
888
889 ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
890 &clen);
891 if (ret) {
892 mlog_errno(ret);
893 goto out;
894 }
895
896 cend = cpos + clen;
897 if (name_hash >= cend) {
898 /* We want the last cluster */
899 blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
900 cpos += clen - 1;
901 } else {
902 blkno += ocfs2_clusters_to_blocks(inode->i_sb,
903 name_hash - cpos);
904 cpos = name_hash;
905 }
906
907 /*
908 * We now have the cluster which should hold our entry. To
909 * find the exact block from the start of the cluster to
910 * search, we take the lower bits of the hash.
911 */
912 blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
913
914 if (ret_phys_blkno)
915 *ret_phys_blkno = blkno;
916 if (ret_cpos)
917 *ret_cpos = cpos;
918
919out:
920
921 return ret;
922}
923
924static int ocfs2_dx_dir_search(const char *name, int namelen,
925 struct inode *dir,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800926 struct ocfs2_dx_root_block *dx_root,
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800927 struct ocfs2_dir_lookup_result *res)
928{
929 int ret, i, found;
930 u64 uninitialized_var(phys);
931 struct buffer_head *dx_leaf_bh = NULL;
932 struct ocfs2_dx_leaf *dx_leaf;
933 struct ocfs2_dx_entry *dx_entry = NULL;
934 struct buffer_head *dir_ent_bh = NULL;
935 struct ocfs2_dir_entry *dir_ent = NULL;
936 struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800937 struct ocfs2_extent_list *dr_el;
938 struct ocfs2_dx_entry_list *entry_list;
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800939
940 ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
941
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800942 if (ocfs2_dx_root_inline(dx_root)) {
943 entry_list = &dx_root->dr_entries;
944 goto search;
945 }
946
947 dr_el = &dx_root->dr_list;
948
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800949 ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
950 if (ret) {
951 mlog_errno(ret);
952 goto out;
953 }
954
955 mlog(0, "Dir %llu: name: \"%.*s\", lookup of hash: %u.0x%x "
956 "returns: %llu\n",
957 (unsigned long long)OCFS2_I(dir)->ip_blkno,
958 namelen, name, hinfo->major_hash, hinfo->minor_hash,
959 (unsigned long long)phys);
960
961 ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
962 if (ret) {
963 mlog_errno(ret);
964 goto out;
965 }
966
967 dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
968
969 mlog(0, "leaf info: num_used: %d, count: %d\n",
970 le16_to_cpu(dx_leaf->dl_list.de_num_used),
971 le16_to_cpu(dx_leaf->dl_list.de_count));
972
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800973 entry_list = &dx_leaf->dl_list;
974
975search:
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800976 /*
977 * Empty leaf is legal, so no need to check for that.
978 */
979 found = 0;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -0800980 for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
981 dx_entry = &entry_list->de_entries[i];
Mark Fasheh9b7895e2008-11-12 16:27:44 -0800982
983 if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
984 || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
985 continue;
986
987 /*
988 * Search unindexed leaf block now. We're not
989 * guaranteed to find anything.
990 */
991 ret = ocfs2_read_dir_block_direct(dir,
992 le64_to_cpu(dx_entry->dx_dirent_blk),
993 &dir_ent_bh);
994 if (ret) {
995 mlog_errno(ret);
996 goto out;
997 }
998
999 /*
1000 * XXX: We should check the unindexed block here,
1001 * before using it.
1002 */
1003
1004 found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen,
1005 0, dir_ent_bh->b_data,
1006 dir->i_sb->s_blocksize, &dir_ent);
1007 if (found == 1)
1008 break;
1009
1010 if (found == -1) {
1011 /* This means we found a bad directory entry. */
1012 ret = -EIO;
1013 mlog_errno(ret);
1014 goto out;
1015 }
1016
1017 brelse(dir_ent_bh);
1018 dir_ent_bh = NULL;
1019 }
1020
1021 if (found <= 0) {
1022 ret = -ENOENT;
1023 goto out;
1024 }
1025
1026 res->dl_leaf_bh = dir_ent_bh;
1027 res->dl_entry = dir_ent;
1028 res->dl_dx_leaf_bh = dx_leaf_bh;
1029 res->dl_dx_entry = dx_entry;
1030
1031 ret = 0;
1032out:
1033 if (ret) {
1034 brelse(dx_leaf_bh);
1035 brelse(dir_ent_bh);
1036 }
1037 return ret;
1038}
1039
1040static int ocfs2_find_entry_dx(const char *name, int namelen,
1041 struct inode *dir,
1042 struct ocfs2_dir_lookup_result *lookup)
1043{
1044 int ret;
1045 struct buffer_head *di_bh = NULL;
1046 struct ocfs2_dinode *di;
1047 struct buffer_head *dx_root_bh = NULL;
1048 struct ocfs2_dx_root_block *dx_root;
1049
1050 ret = ocfs2_read_inode_block(dir, &di_bh);
1051 if (ret) {
1052 mlog_errno(ret);
1053 goto out;
1054 }
1055
1056 di = (struct ocfs2_dinode *)di_bh->b_data;
1057
1058 ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
1059 if (ret) {
1060 mlog_errno(ret);
1061 goto out;
1062 }
1063 dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
1064
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001065 ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001066 if (ret) {
1067 if (ret != -ENOENT)
1068 mlog_errno(ret);
1069 goto out;
1070 }
1071
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001072 lookup->dl_dx_root_bh = dx_root_bh;
1073 dx_root_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001074out:
1075 brelse(di_bh);
1076 brelse(dx_root_bh);
1077 return ret;
1078}
1079
Mark Fasheh23193e52007-09-12 13:01:18 -07001080/*
1081 * Try to find an entry of the provided name within 'dir'.
1082 *
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001083 * If nothing was found, -ENOENT is returned. Otherwise, zero is
1084 * returned and the struct 'res' will contain information useful to
1085 * other directory manipulation functions.
Mark Fasheh23193e52007-09-12 13:01:18 -07001086 *
1087 * Caller can NOT assume anything about the contents of the
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001088 * buffer_heads - they are passed back only so that it can be passed
1089 * into any one of the manipulation functions (add entry, delete
1090 * entry, etc). As an example, bh in the extent directory case is a
1091 * data block, in the inline-data case it actually points to an inode,
1092 * in the indexed directory case, multiple buffers are involved.
Mark Fasheh23193e52007-09-12 13:01:18 -07001093 */
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001094int ocfs2_find_entry(const char *name, int namelen,
1095 struct inode *dir, struct ocfs2_dir_lookup_result *lookup)
Mark Fasheh23193e52007-09-12 13:01:18 -07001096{
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001097 struct buffer_head *bh;
1098 struct ocfs2_dir_entry *res_dir = NULL;
Mark Fasheh23193e52007-09-12 13:01:18 -07001099
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001100 if (ocfs2_dir_indexed(dir))
1101 return ocfs2_find_entry_dx(name, namelen, dir, lookup);
1102
1103 /*
1104 * The unindexed dir code only uses part of the lookup
1105 * structure, so there's no reason to push it down further
1106 * than this.
1107 */
Mark Fasheh23193e52007-09-12 13:01:18 -07001108 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001109 bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
1110 else
1111 bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
Mark Fasheh23193e52007-09-12 13:01:18 -07001112
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001113 if (bh == NULL)
1114 return -ENOENT;
1115
1116 lookup->dl_leaf_bh = bh;
1117 lookup->dl_entry = res_dir;
1118 return 0;
Mark Fasheh23193e52007-09-12 13:01:18 -07001119}
1120
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001121/*
1122 * Update inode number and type of a previously found directory entry.
1123 */
Mark Fasheh38760e22007-09-11 17:21:56 -07001124int ocfs2_update_entry(struct inode *dir, handle_t *handle,
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001125 struct ocfs2_dir_lookup_result *res,
Mark Fasheh38760e22007-09-11 17:21:56 -07001126 struct inode *new_entry_inode)
1127{
1128 int ret;
Joel Becker13723d02008-10-17 19:25:01 -07001129 ocfs2_journal_access_func access = ocfs2_journal_access_db;
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001130 struct ocfs2_dir_entry *de = res->dl_entry;
1131 struct buffer_head *de_bh = res->dl_leaf_bh;
Mark Fasheh38760e22007-09-11 17:21:56 -07001132
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001133 /*
1134 * The same code works fine for both inline-data and extent
Joel Becker13723d02008-10-17 19:25:01 -07001135 * based directories, so no need to split this up. The only
1136 * difference is the journal_access function.
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001137 */
1138
Joel Becker13723d02008-10-17 19:25:01 -07001139 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1140 access = ocfs2_journal_access_di;
1141
Joel Becker0cf2f762009-02-12 16:41:25 -08001142 ret = access(handle, INODE_CACHE(dir), de_bh,
1143 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh38760e22007-09-11 17:21:56 -07001144 if (ret) {
1145 mlog_errno(ret);
1146 goto out;
1147 }
1148
1149 de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
1150 ocfs2_set_de_type(de, new_entry_inode->i_mode);
1151
1152 ocfs2_journal_dirty(handle, de_bh);
1153
1154out:
1155 return ret;
1156}
1157
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001158/*
1159 * __ocfs2_delete_entry deletes a directory entry by merging it with the
1160 * previous entry
1161 */
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001162static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1163 struct ocfs2_dir_entry *de_del,
1164 struct buffer_head *bh, char *first_de,
1165 unsigned int bytes)
Mark Fasheh316f4b92007-09-07 18:21:26 -07001166{
1167 struct ocfs2_dir_entry *de, *pde;
1168 int i, status = -ENOENT;
Joel Becker13723d02008-10-17 19:25:01 -07001169 ocfs2_journal_access_func access = ocfs2_journal_access_db;
Mark Fasheh316f4b92007-09-07 18:21:26 -07001170
1171 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1172
Joel Becker13723d02008-10-17 19:25:01 -07001173 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1174 access = ocfs2_journal_access_di;
1175
Mark Fasheh316f4b92007-09-07 18:21:26 -07001176 i = 0;
1177 pde = NULL;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001178 de = (struct ocfs2_dir_entry *) first_de;
1179 while (i < bytes) {
Mark Fasheh316f4b92007-09-07 18:21:26 -07001180 if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1181 status = -EIO;
1182 mlog_errno(status);
1183 goto bail;
1184 }
1185 if (de == de_del) {
Joel Becker0cf2f762009-02-12 16:41:25 -08001186 status = access(handle, INODE_CACHE(dir), bh,
Joel Becker13723d02008-10-17 19:25:01 -07001187 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh316f4b92007-09-07 18:21:26 -07001188 if (status < 0) {
1189 status = -EIO;
1190 mlog_errno(status);
1191 goto bail;
1192 }
1193 if (pde)
Marcin Slusarz0dd32562008-02-13 00:06:18 +01001194 le16_add_cpu(&pde->rec_len,
1195 le16_to_cpu(de->rec_len));
Mark Fasheh316f4b92007-09-07 18:21:26 -07001196 else
1197 de->inode = 0;
1198 dir->i_version++;
Joel Beckerec20cec2010-03-19 14:13:52 -07001199 ocfs2_journal_dirty(handle, bh);
Mark Fasheh316f4b92007-09-07 18:21:26 -07001200 goto bail;
1201 }
1202 i += le16_to_cpu(de->rec_len);
1203 pde = de;
1204 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1205 }
1206bail:
1207 mlog_exit(status);
1208 return status;
1209}
1210
Mark Fashehe7c17e42009-01-29 18:17:46 -08001211static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
1212{
1213 unsigned int hole;
1214
1215 if (le64_to_cpu(de->inode) == 0)
1216 hole = le16_to_cpu(de->rec_len);
1217 else
1218 hole = le16_to_cpu(de->rec_len) -
1219 OCFS2_DIR_REC_LEN(de->name_len);
1220
1221 return hole;
1222}
1223
1224static int ocfs2_find_max_rec_len(struct super_block *sb,
1225 struct buffer_head *dirblock_bh)
1226{
1227 int size, this_hole, largest_hole = 0;
1228 char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data;
1229 struct ocfs2_dir_entry *de;
1230
1231 trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb);
1232 size = ocfs2_dir_trailer_blk_off(sb);
1233 limit = start + size;
1234 de_buf = start;
1235 de = (struct ocfs2_dir_entry *)de_buf;
1236 do {
1237 if (de_buf != trailer) {
1238 this_hole = ocfs2_figure_dirent_hole(de);
1239 if (this_hole > largest_hole)
1240 largest_hole = this_hole;
1241 }
1242
1243 de_buf += le16_to_cpu(de->rec_len);
1244 de = (struct ocfs2_dir_entry *)de_buf;
1245 } while (de_buf < limit);
1246
1247 if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
1248 return largest_hole;
1249 return 0;
1250}
1251
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001252static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
1253 int index)
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001254{
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001255 int num_used = le16_to_cpu(entry_list->de_num_used);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001256
1257 if (num_used == 1 || index == (num_used - 1))
1258 goto clear;
1259
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001260 memmove(&entry_list->de_entries[index],
1261 &entry_list->de_entries[index + 1],
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001262 (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
1263clear:
1264 num_used--;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001265 memset(&entry_list->de_entries[num_used], 0,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001266 sizeof(struct ocfs2_dx_entry));
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001267 entry_list->de_num_used = cpu_to_le16(num_used);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001268}
1269
1270static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1271 struct ocfs2_dir_lookup_result *lookup)
1272{
Mark Fashehe7c17e42009-01-29 18:17:46 -08001273 int ret, index, max_rec_len, add_to_free_list = 0;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001274 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001275 struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
1276 struct ocfs2_dx_leaf *dx_leaf;
1277 struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
Mark Fashehe7c17e42009-01-29 18:17:46 -08001278 struct ocfs2_dir_block_trailer *trailer;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001279 struct ocfs2_dx_root_block *dx_root;
1280 struct ocfs2_dx_entry_list *entry_list;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001281
Mark Fashehe7c17e42009-01-29 18:17:46 -08001282 /*
1283 * This function gets a bit messy because we might have to
1284 * modify the root block, regardless of whether the indexed
1285 * entries are stored inline.
1286 */
1287
1288 /*
1289 * *Only* set 'entry_list' here, based on where we're looking
1290 * for the indexed entries. Later, we might still want to
1291 * journal both blocks, based on free list state.
1292 */
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001293 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
1294 if (ocfs2_dx_root_inline(dx_root)) {
1295 entry_list = &dx_root->dr_entries;
1296 } else {
1297 dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
1298 entry_list = &dx_leaf->dl_list;
1299 }
1300
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001301 /* Neither of these are a disk corruption - that should have
1302 * been caught by lookup, before we got here. */
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001303 BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
1304 BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001305
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001306 index = (char *)dx_entry - (char *)entry_list->de_entries;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001307 index /= sizeof(*dx_entry);
1308
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001309 if (index >= le16_to_cpu(entry_list->de_num_used)) {
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001310 mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001311 (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
1312 entry_list, dx_entry);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001313 return -EIO;
1314 }
1315
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001316 /*
Mark Fashehe7c17e42009-01-29 18:17:46 -08001317 * We know that removal of this dirent will leave enough room
1318 * for a new one, so add this block to the free list if it
1319 * isn't already there.
1320 */
1321 trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
1322 if (trailer->db_free_rec_len == 0)
1323 add_to_free_list = 1;
1324
1325 /*
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001326 * Add the block holding our index into the journal before
1327 * removing the unindexed entry. If we get an error return
1328 * from __ocfs2_delete_entry(), then it hasn't removed the
1329 * entry yet. Likewise, successful return means we *must*
1330 * remove the indexed entry.
1331 *
Mark Fashehe3a93c22009-02-17 15:29:35 -08001332 * We're also careful to journal the root tree block here as
1333 * the entry count needs to be updated. Also, we might be
1334 * adding to the start of the free list.
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001335 */
Joel Becker0cf2f762009-02-12 16:41:25 -08001336 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
Mark Fashehe3a93c22009-02-17 15:29:35 -08001337 OCFS2_JOURNAL_ACCESS_WRITE);
1338 if (ret) {
1339 mlog_errno(ret);
1340 goto out;
Mark Fashehe7c17e42009-01-29 18:17:46 -08001341 }
1342
1343 if (!ocfs2_dx_root_inline(dx_root)) {
Joel Becker0cf2f762009-02-12 16:41:25 -08001344 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001345 lookup->dl_dx_leaf_bh,
1346 OCFS2_JOURNAL_ACCESS_WRITE);
1347 if (ret) {
1348 mlog_errno(ret);
1349 goto out;
1350 }
1351 }
1352
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001353 mlog(0, "Dir %llu: delete entry at index: %d\n",
1354 (unsigned long long)OCFS2_I(dir)->ip_blkno, index);
1355
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001356 ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
1357 leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
1358 if (ret) {
1359 mlog_errno(ret);
1360 goto out;
1361 }
1362
Mark Fashehe7c17e42009-01-29 18:17:46 -08001363 max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh);
1364 trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1365 if (add_to_free_list) {
1366 trailer->db_free_next = dx_root->dr_free_blk;
1367 dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr);
1368 ocfs2_journal_dirty(handle, dx_root_bh);
1369 }
1370
1371 /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */
1372 ocfs2_journal_dirty(handle, leaf_bh);
1373
Mark Fashehe3a93c22009-02-17 15:29:35 -08001374 le32_add_cpu(&dx_root->dr_num_entries, -1);
1375 ocfs2_journal_dirty(handle, dx_root_bh);
1376
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001377 ocfs2_dx_list_remove_entry(entry_list, index);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001378
Mark Fashehe3a93c22009-02-17 15:29:35 -08001379 if (!ocfs2_dx_root_inline(dx_root))
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001380 ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001381
1382out:
1383 return ret;
1384}
1385
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001386static inline int ocfs2_delete_entry_id(handle_t *handle,
1387 struct inode *dir,
1388 struct ocfs2_dir_entry *de_del,
1389 struct buffer_head *bh)
1390{
1391 int ret;
1392 struct buffer_head *di_bh = NULL;
1393 struct ocfs2_dinode *di;
1394 struct ocfs2_inline_data *data;
1395
Joel Beckerb657c952008-11-13 14:49:11 -08001396 ret = ocfs2_read_inode_block(dir, &di_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001397 if (ret) {
1398 mlog_errno(ret);
1399 goto out;
1400 }
1401
1402 di = (struct ocfs2_dinode *)di_bh->b_data;
1403 data = &di->id2.i_data;
1404
1405 ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
1406 i_size_read(dir));
1407
1408 brelse(di_bh);
1409out:
1410 return ret;
1411}
1412
1413static inline int ocfs2_delete_entry_el(handle_t *handle,
1414 struct inode *dir,
1415 struct ocfs2_dir_entry *de_del,
1416 struct buffer_head *bh)
1417{
1418 return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
1419 bh->b_size);
1420}
1421
1422/*
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001423 * Delete a directory entry. Hide the details of directory
1424 * implementation from the caller.
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001425 */
1426int ocfs2_delete_entry(handle_t *handle,
1427 struct inode *dir,
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001428 struct ocfs2_dir_lookup_result *res)
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001429{
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001430 if (ocfs2_dir_indexed(dir))
1431 return ocfs2_delete_entry_dx(handle, dir, res);
1432
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001433 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001434 return ocfs2_delete_entry_id(handle, dir, res->dl_entry,
1435 res->dl_leaf_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001436
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001437 return ocfs2_delete_entry_el(handle, dir, res->dl_entry,
1438 res->dl_leaf_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001439}
1440
Mark Fasheh8553cf42007-09-13 16:29:01 -07001441/*
1442 * Check whether 'de' has enough room to hold an entry of
1443 * 'new_rec_len' bytes.
1444 */
1445static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
1446 unsigned int new_rec_len)
1447{
1448 unsigned int de_really_used;
1449
1450 /* Check whether this is an empty record with enough space */
1451 if (le64_to_cpu(de->inode) == 0 &&
1452 le16_to_cpu(de->rec_len) >= new_rec_len)
1453 return 1;
1454
1455 /*
1456 * Record might have free space at the end which we can
1457 * use.
1458 */
1459 de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
1460 if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
1461 return 1;
1462
1463 return 0;
1464}
1465
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001466static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
1467 struct ocfs2_dx_entry *dx_new_entry)
1468{
1469 int i;
1470
1471 i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
1472 dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
1473
1474 le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
1475}
1476
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001477static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
1478 struct ocfs2_dx_hinfo *hinfo,
1479 u64 dirent_blk)
1480{
1481 int i;
1482 struct ocfs2_dx_entry *dx_entry;
1483
1484 i = le16_to_cpu(entry_list->de_num_used);
1485 dx_entry = &entry_list->de_entries[i];
1486
1487 memset(dx_entry, 0, sizeof(*dx_entry));
1488 dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
1489 dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
1490 dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
1491
1492 le16_add_cpu(&entry_list->de_num_used, 1);
1493}
1494
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001495static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
1496 struct ocfs2_dx_hinfo *hinfo,
1497 u64 dirent_blk,
1498 struct buffer_head *dx_leaf_bh)
1499{
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001500 int ret;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001501 struct ocfs2_dx_leaf *dx_leaf;
1502
Joel Becker0cf2f762009-02-12 16:41:25 -08001503 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001504 OCFS2_JOURNAL_ACCESS_WRITE);
1505 if (ret) {
1506 mlog_errno(ret);
1507 goto out;
1508 }
1509
1510 dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001511 ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001512 ocfs2_journal_dirty(handle, dx_leaf_bh);
1513
1514out:
1515 return ret;
1516}
1517
Mark Fashehe3a93c22009-02-17 15:29:35 -08001518static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
1519 struct ocfs2_dx_hinfo *hinfo,
1520 u64 dirent_blk,
1521 struct ocfs2_dx_root_block *dx_root)
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001522{
Mark Fashehe3a93c22009-02-17 15:29:35 -08001523 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
1524}
1525
1526static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1527 struct ocfs2_dir_lookup_result *lookup)
1528{
1529 int ret = 0;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001530 struct ocfs2_dx_root_block *dx_root;
Mark Fashehe3a93c22009-02-17 15:29:35 -08001531 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001532
Joel Becker0cf2f762009-02-12 16:41:25 -08001533 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001534 OCFS2_JOURNAL_ACCESS_WRITE);
1535 if (ret) {
1536 mlog_errno(ret);
1537 goto out;
1538 }
1539
Mark Fashehe3a93c22009-02-17 15:29:35 -08001540 dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
1541 if (ocfs2_dx_root_inline(dx_root)) {
1542 ocfs2_dx_inline_root_insert(dir, handle,
1543 &lookup->dl_hinfo,
1544 lookup->dl_leaf_bh->b_blocknr,
1545 dx_root);
1546 } else {
1547 ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
1548 lookup->dl_leaf_bh->b_blocknr,
1549 lookup->dl_dx_leaf_bh);
1550 if (ret)
1551 goto out;
1552 }
1553
1554 le32_add_cpu(&dx_root->dr_num_entries, 1);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001555 ocfs2_journal_dirty(handle, dx_root_bh);
1556
1557out:
1558 return ret;
1559}
1560
Mark Fashehe7c17e42009-01-29 18:17:46 -08001561static void ocfs2_remove_block_from_free_list(struct inode *dir,
1562 handle_t *handle,
1563 struct ocfs2_dir_lookup_result *lookup)
1564{
1565 struct ocfs2_dir_block_trailer *trailer, *prev;
1566 struct ocfs2_dx_root_block *dx_root;
1567 struct buffer_head *bh;
1568
1569 trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1570
1571 if (ocfs2_free_list_at_root(lookup)) {
1572 bh = lookup->dl_dx_root_bh;
1573 dx_root = (struct ocfs2_dx_root_block *)bh->b_data;
1574 dx_root->dr_free_blk = trailer->db_free_next;
1575 } else {
1576 bh = lookup->dl_prev_leaf_bh;
1577 prev = ocfs2_trailer_from_bh(bh, dir->i_sb);
1578 prev->db_free_next = trailer->db_free_next;
1579 }
1580
1581 trailer->db_free_rec_len = cpu_to_le16(0);
1582 trailer->db_free_next = cpu_to_le64(0);
1583
1584 ocfs2_journal_dirty(handle, bh);
1585 ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1586}
1587
1588/*
1589 * This expects that a journal write has been reserved on
1590 * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh
1591 */
1592static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle,
1593 struct ocfs2_dir_lookup_result *lookup)
1594{
1595 int max_rec_len;
1596 struct ocfs2_dir_block_trailer *trailer;
1597
1598 /* Walk dl_leaf_bh to figure out what the new free rec_len is. */
1599 max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh);
1600 if (max_rec_len) {
1601 /*
1602 * There's still room in this block, so no need to remove it
1603 * from the free list. In this case, we just want to update
1604 * the rec len accounting.
1605 */
1606 trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1607 trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1608 ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1609 } else {
1610 ocfs2_remove_block_from_free_list(dir, handle, lookup);
1611 }
1612}
1613
Mark Fasheh316f4b92007-09-07 18:21:26 -07001614/* we don't always have a dentry for what we want to add, so people
1615 * like orphan dir can call this instead.
1616 *
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001617 * The lookup context must have been filled from
1618 * ocfs2_prepare_dir_for_insert.
Mark Fasheh316f4b92007-09-07 18:21:26 -07001619 */
1620int __ocfs2_add_entry(handle_t *handle,
1621 struct inode *dir,
1622 const char *name, int namelen,
1623 struct inode *inode, u64 blkno,
1624 struct buffer_head *parent_fe_bh,
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001625 struct ocfs2_dir_lookup_result *lookup)
Mark Fasheh316f4b92007-09-07 18:21:26 -07001626{
1627 unsigned long offset;
1628 unsigned short rec_len;
1629 struct ocfs2_dir_entry *de, *de1;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001630 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
1631 struct super_block *sb = dir->i_sb;
Mark Fasheh316f4b92007-09-07 18:21:26 -07001632 int retval, status;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001633 unsigned int size = sb->s_blocksize;
Mark Fasheh4a12ca32008-11-12 15:43:34 -08001634 struct buffer_head *insert_bh = lookup->dl_leaf_bh;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001635 char *data_start = insert_bh->b_data;
Mark Fasheh316f4b92007-09-07 18:21:26 -07001636
1637 mlog_entry_void();
1638
Mark Fasheh316f4b92007-09-07 18:21:26 -07001639 if (!namelen)
1640 return -EINVAL;
1641
Mark Fashehe7c17e42009-01-29 18:17:46 -08001642 if (ocfs2_dir_indexed(dir)) {
1643 struct buffer_head *bh;
1644
1645 /*
1646 * An indexed dir may require that we update the free space
1647 * list. Reserve a write to the previous node in the list so
1648 * that we don't fail later.
1649 *
1650 * XXX: This can be either a dx_root_block, or an unindexed
1651 * directory tree leaf block.
1652 */
1653 if (ocfs2_free_list_at_root(lookup)) {
1654 bh = lookup->dl_dx_root_bh;
Joel Becker0cf2f762009-02-12 16:41:25 -08001655 retval = ocfs2_journal_access_dr(handle,
1656 INODE_CACHE(dir), bh,
Mark Fashehe7c17e42009-01-29 18:17:46 -08001657 OCFS2_JOURNAL_ACCESS_WRITE);
1658 } else {
1659 bh = lookup->dl_prev_leaf_bh;
Joel Becker0cf2f762009-02-12 16:41:25 -08001660 retval = ocfs2_journal_access_db(handle,
1661 INODE_CACHE(dir), bh,
Mark Fashehe7c17e42009-01-29 18:17:46 -08001662 OCFS2_JOURNAL_ACCESS_WRITE);
1663 }
1664 if (retval) {
1665 mlog_errno(retval);
1666 return retval;
1667 }
1668 } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001669 data_start = di->id2.i_data.id_data;
1670 size = i_size_read(dir);
1671
1672 BUG_ON(insert_bh != parent_fe_bh);
1673 }
1674
Mark Fasheh316f4b92007-09-07 18:21:26 -07001675 rec_len = OCFS2_DIR_REC_LEN(namelen);
1676 offset = 0;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001677 de = (struct ocfs2_dir_entry *) data_start;
Mark Fasheh316f4b92007-09-07 18:21:26 -07001678 while (1) {
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07001679 BUG_ON((char *)de >= (size + data_start));
1680
Mark Fasheh316f4b92007-09-07 18:21:26 -07001681 /* These checks should've already been passed by the
1682 * prepare function, but I guess we can leave them
1683 * here anyway. */
1684 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1685 retval = -ENOENT;
1686 goto bail;
1687 }
1688 if (ocfs2_match(namelen, name, de)) {
1689 retval = -EEXIST;
1690 goto bail;
1691 }
Mark Fasheh8553cf42007-09-13 16:29:01 -07001692
Mark Fasheh87d35a72008-12-10 17:36:25 -08001693 /* We're guaranteed that we should have space, so we
1694 * can't possibly have hit the trailer...right? */
1695 mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
1696 "Hit dir trailer trying to insert %.*s "
1697 "(namelen %d) into directory %llu. "
1698 "offset is %lu, trailer offset is %d\n",
1699 namelen, name, namelen,
1700 (unsigned long long)parent_fe_bh->b_blocknr,
1701 offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
1702
Mark Fasheh8553cf42007-09-13 16:29:01 -07001703 if (ocfs2_dirent_would_fit(de, rec_len)) {
Mark Fasheh316f4b92007-09-07 18:21:26 -07001704 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1705 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
1706 if (retval < 0) {
1707 mlog_errno(retval);
1708 goto bail;
1709 }
1710
Joel Becker13723d02008-10-17 19:25:01 -07001711 if (insert_bh == parent_fe_bh)
Joel Becker0cf2f762009-02-12 16:41:25 -08001712 status = ocfs2_journal_access_di(handle,
1713 INODE_CACHE(dir),
Joel Becker13723d02008-10-17 19:25:01 -07001714 insert_bh,
1715 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001716 else {
Joel Becker0cf2f762009-02-12 16:41:25 -08001717 status = ocfs2_journal_access_db(handle,
1718 INODE_CACHE(dir),
Joel Becker13723d02008-10-17 19:25:01 -07001719 insert_bh,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001720 OCFS2_JOURNAL_ACCESS_WRITE);
1721
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001722 if (ocfs2_dir_indexed(dir)) {
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08001723 status = ocfs2_dx_dir_insert(dir,
1724 handle,
1725 lookup);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001726 if (status) {
1727 mlog_errno(status);
1728 goto bail;
1729 }
1730 }
1731 }
1732
Mark Fasheh316f4b92007-09-07 18:21:26 -07001733 /* By now the buffer is marked for journaling */
1734 offset += le16_to_cpu(de->rec_len);
1735 if (le64_to_cpu(de->inode)) {
1736 de1 = (struct ocfs2_dir_entry *)((char *) de +
1737 OCFS2_DIR_REC_LEN(de->name_len));
1738 de1->rec_len =
1739 cpu_to_le16(le16_to_cpu(de->rec_len) -
1740 OCFS2_DIR_REC_LEN(de->name_len));
1741 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1742 de = de1;
1743 }
1744 de->file_type = OCFS2_FT_UNKNOWN;
1745 if (blkno) {
1746 de->inode = cpu_to_le64(blkno);
1747 ocfs2_set_de_type(de, inode->i_mode);
1748 } else
1749 de->inode = 0;
1750 de->name_len = namelen;
1751 memcpy(de->name, name, namelen);
1752
Mark Fashehe7c17e42009-01-29 18:17:46 -08001753 if (ocfs2_dir_indexed(dir))
1754 ocfs2_recalc_free_list(dir, handle, lookup);
1755
Mark Fasheh316f4b92007-09-07 18:21:26 -07001756 dir->i_version++;
Joel Beckerec20cec2010-03-19 14:13:52 -07001757 ocfs2_journal_dirty(handle, insert_bh);
Mark Fasheh316f4b92007-09-07 18:21:26 -07001758 retval = 0;
1759 goto bail;
1760 }
Mark Fasheh87d35a72008-12-10 17:36:25 -08001761
Mark Fasheh316f4b92007-09-07 18:21:26 -07001762 offset += le16_to_cpu(de->rec_len);
1763 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1764 }
1765
1766 /* when you think about it, the assert above should prevent us
1767 * from ever getting here. */
1768 retval = -ENOSPC;
1769bail:
1770
1771 mlog_exit(retval);
1772 return retval;
1773}
1774
Mark Fasheh23193e52007-09-12 13:01:18 -07001775static int ocfs2_dir_foreach_blk_id(struct inode *inode,
Mathieu Desnoyers2b47c362007-10-16 23:27:21 -07001776 u64 *f_version,
Mark Fasheh23193e52007-09-12 13:01:18 -07001777 loff_t *f_pos, void *priv,
Mark Fashehe7b34012007-09-24 14:25:27 -07001778 filldir_t filldir, int *filldir_err)
Mark Fasheh23193e52007-09-12 13:01:18 -07001779{
1780 int ret, i, filldir_ret;
1781 unsigned long offset = *f_pos;
1782 struct buffer_head *di_bh = NULL;
1783 struct ocfs2_dinode *di;
1784 struct ocfs2_inline_data *data;
1785 struct ocfs2_dir_entry *de;
1786
Joel Beckerb657c952008-11-13 14:49:11 -08001787 ret = ocfs2_read_inode_block(inode, &di_bh);
Mark Fasheh23193e52007-09-12 13:01:18 -07001788 if (ret) {
1789 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
1790 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1791 goto out;
1792 }
1793
1794 di = (struct ocfs2_dinode *)di_bh->b_data;
1795 data = &di->id2.i_data;
1796
1797 while (*f_pos < i_size_read(inode)) {
1798revalidate:
1799 /* If the dir block has changed since the last call to
1800 * readdir(2), then we might be pointing to an invalid
1801 * dirent right now. Scan from the start of the block
1802 * to make sure. */
1803 if (*f_version != inode->i_version) {
1804 for (i = 0; i < i_size_read(inode) && i < offset; ) {
1805 de = (struct ocfs2_dir_entry *)
1806 (data->id_data + i);
1807 /* It's too expensive to do a full
1808 * dirent test each time round this
1809 * loop, but we do have to test at
1810 * least that it is non-zero. A
1811 * failure will be detected in the
1812 * dirent test below. */
1813 if (le16_to_cpu(de->rec_len) <
1814 OCFS2_DIR_REC_LEN(1))
1815 break;
1816 i += le16_to_cpu(de->rec_len);
1817 }
1818 *f_pos = offset = i;
1819 *f_version = inode->i_version;
1820 }
1821
1822 de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos);
1823 if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
1824 /* On error, skip the f_pos to the end. */
1825 *f_pos = i_size_read(inode);
1826 goto out;
1827 }
1828 offset += le16_to_cpu(de->rec_len);
1829 if (le64_to_cpu(de->inode)) {
1830 /* We might block in the next section
1831 * if the data destination is
1832 * currently swapped out. So, use a
1833 * version stamp to detect whether or
1834 * not the directory has been modified
1835 * during the copy operation.
1836 */
Mathieu Desnoyers2b47c362007-10-16 23:27:21 -07001837 u64 version = *f_version;
Mark Fasheh23193e52007-09-12 13:01:18 -07001838 unsigned char d_type = DT_UNKNOWN;
1839
1840 if (de->file_type < OCFS2_FT_MAX)
1841 d_type = ocfs2_filetype_table[de->file_type];
1842
1843 filldir_ret = filldir(priv, de->name,
1844 de->name_len,
1845 *f_pos,
1846 le64_to_cpu(de->inode),
1847 d_type);
Mark Fashehe7b34012007-09-24 14:25:27 -07001848 if (filldir_ret) {
1849 if (filldir_err)
1850 *filldir_err = filldir_ret;
Mark Fasheh23193e52007-09-12 13:01:18 -07001851 break;
Mark Fashehe7b34012007-09-24 14:25:27 -07001852 }
Mark Fasheh23193e52007-09-12 13:01:18 -07001853 if (version != *f_version)
1854 goto revalidate;
1855 }
1856 *f_pos += le16_to_cpu(de->rec_len);
1857 }
1858
1859out:
1860 brelse(di_bh);
1861
1862 return 0;
1863}
1864
Mark Fasheh9b7895e2008-11-12 16:27:44 -08001865/*
1866 * NOTE: This function can be called against unindexed directories,
1867 * and indexed ones.
1868 */
Mark Fasheh23193e52007-09-12 13:01:18 -07001869static int ocfs2_dir_foreach_blk_el(struct inode *inode,
Mathieu Desnoyers2b47c362007-10-16 23:27:21 -07001870 u64 *f_version,
Mark Fasheh23193e52007-09-12 13:01:18 -07001871 loff_t *f_pos, void *priv,
Mark Fashehe7b34012007-09-24 14:25:27 -07001872 filldir_t filldir, int *filldir_err)
Mark Fashehccd979b2005-12-15 14:31:24 -08001873{
1874 int error = 0;
Mark Fashehaa958872006-04-21 13:49:02 -07001875 unsigned long offset, blk, last_ra_blk = 0;
1876 int i, stored;
Mark Fashehccd979b2005-12-15 14:31:24 -08001877 struct buffer_head * bh, * tmp;
1878 struct ocfs2_dir_entry * de;
Mark Fashehccd979b2005-12-15 14:31:24 -08001879 struct super_block * sb = inode->i_sb;
Mark Fashehaa958872006-04-21 13:49:02 -07001880 unsigned int ra_sectors = 16;
Mark Fashehccd979b2005-12-15 14:31:24 -08001881
1882 stored = 0;
1883 bh = NULL;
1884
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001885 offset = (*f_pos) & (sb->s_blocksize - 1);
Mark Fashehccd979b2005-12-15 14:31:24 -08001886
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001887 while (!error && !stored && *f_pos < i_size_read(inode)) {
1888 blk = (*f_pos) >> sb->s_blocksize_bits;
Joel Beckera22305c2008-11-13 14:49:17 -08001889 if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
1890 /* Skip the corrupt dirblock and keep trying */
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001891 *f_pos += sb->s_blocksize - offset;
Mark Fashehccd979b2005-12-15 14:31:24 -08001892 continue;
1893 }
1894
Mark Fashehaa958872006-04-21 13:49:02 -07001895 /* The idea here is to begin with 8k read-ahead and to stay
1896 * 4k ahead of our current position.
1897 *
1898 * TODO: Use the pagecache for this. We just need to
1899 * make sure it's cluster-safe... */
1900 if (!last_ra_blk
1901 || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
1902 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
Mark Fashehccd979b2005-12-15 14:31:24 -08001903 i > 0; i--) {
Joel Beckera22305c2008-11-13 14:49:17 -08001904 tmp = NULL;
1905 if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
1906 OCFS2_BH_READAHEAD))
1907 brelse(tmp);
Mark Fashehccd979b2005-12-15 14:31:24 -08001908 }
Mark Fashehaa958872006-04-21 13:49:02 -07001909 last_ra_blk = blk;
1910 ra_sectors = 8;
Mark Fashehccd979b2005-12-15 14:31:24 -08001911 }
1912
1913revalidate:
1914 /* If the dir block has changed since the last call to
1915 * readdir(2), then we might be pointing to an invalid
1916 * dirent right now. Scan from the start of the block
1917 * to make sure. */
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001918 if (*f_version != inode->i_version) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001919 for (i = 0; i < sb->s_blocksize && i < offset; ) {
1920 de = (struct ocfs2_dir_entry *) (bh->b_data + i);
1921 /* It's too expensive to do a full
1922 * dirent test each time round this
1923 * loop, but we do have to test at
1924 * least that it is non-zero. A
1925 * failure will be detected in the
1926 * dirent test below. */
1927 if (le16_to_cpu(de->rec_len) <
1928 OCFS2_DIR_REC_LEN(1))
1929 break;
1930 i += le16_to_cpu(de->rec_len);
1931 }
1932 offset = i;
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001933 *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1))
Mark Fashehccd979b2005-12-15 14:31:24 -08001934 | offset;
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001935 *f_version = inode->i_version;
Mark Fashehccd979b2005-12-15 14:31:24 -08001936 }
1937
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001938 while (!error && *f_pos < i_size_read(inode)
Mark Fashehccd979b2005-12-15 14:31:24 -08001939 && offset < sb->s_blocksize) {
1940 de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
1941 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
1942 /* On error, skip the f_pos to the
1943 next block. */
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001944 *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1;
Mark Fashehccd979b2005-12-15 14:31:24 -08001945 brelse(bh);
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001946 goto out;
Mark Fashehccd979b2005-12-15 14:31:24 -08001947 }
1948 offset += le16_to_cpu(de->rec_len);
1949 if (le64_to_cpu(de->inode)) {
1950 /* We might block in the next section
1951 * if the data destination is
1952 * currently swapped out. So, use a
1953 * version stamp to detect whether or
1954 * not the directory has been modified
1955 * during the copy operation.
1956 */
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001957 unsigned long version = *f_version;
Mark Fashehccd979b2005-12-15 14:31:24 -08001958 unsigned char d_type = DT_UNKNOWN;
1959
1960 if (de->file_type < OCFS2_FT_MAX)
1961 d_type = ocfs2_filetype_table[de->file_type];
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001962 error = filldir(priv, de->name,
Mark Fashehccd979b2005-12-15 14:31:24 -08001963 de->name_len,
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001964 *f_pos,
Mark Fasheh7e853672007-09-10 17:30:26 -07001965 le64_to_cpu(de->inode),
Mark Fashehccd979b2005-12-15 14:31:24 -08001966 d_type);
Mark Fashehe7b34012007-09-24 14:25:27 -07001967 if (error) {
1968 if (filldir_err)
1969 *filldir_err = error;
Mark Fashehccd979b2005-12-15 14:31:24 -08001970 break;
Mark Fashehe7b34012007-09-24 14:25:27 -07001971 }
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001972 if (version != *f_version)
Mark Fashehccd979b2005-12-15 14:31:24 -08001973 goto revalidate;
1974 stored ++;
1975 }
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001976 *f_pos += le16_to_cpu(de->rec_len);
Mark Fashehccd979b2005-12-15 14:31:24 -08001977 }
1978 offset = 0;
1979 brelse(bh);
Joel Beckera22305c2008-11-13 14:49:17 -08001980 bh = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08001981 }
1982
1983 stored = 0;
Mark Fashehb8bc5f42007-09-10 17:17:52 -07001984out:
1985 return stored;
1986}
1987
Mathieu Desnoyers2b47c362007-10-16 23:27:21 -07001988static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version,
Mark Fashehe7b34012007-09-24 14:25:27 -07001989 loff_t *f_pos, void *priv, filldir_t filldir,
1990 int *filldir_err)
Mark Fasheh23193e52007-09-12 13:01:18 -07001991{
1992 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1993 return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
Mark Fashehe7b34012007-09-24 14:25:27 -07001994 filldir, filldir_err);
Mark Fasheh23193e52007-09-12 13:01:18 -07001995
Mark Fashehe7b34012007-09-24 14:25:27 -07001996 return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
1997 filldir_err);
Mark Fasheh23193e52007-09-12 13:01:18 -07001998}
1999
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002000/*
Mark Fasheh5eae5b92007-09-10 17:50:51 -07002001 * This is intended to be called from inside other kernel functions,
2002 * so we fake some arguments.
2003 */
2004int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
2005 filldir_t filldir)
2006{
Mark Fashehe7b34012007-09-24 14:25:27 -07002007 int ret = 0, filldir_err = 0;
Mathieu Desnoyers2b47c362007-10-16 23:27:21 -07002008 u64 version = inode->i_version;
Mark Fasheh5eae5b92007-09-10 17:50:51 -07002009
2010 while (*f_pos < i_size_read(inode)) {
2011 ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
Mark Fashehe7b34012007-09-24 14:25:27 -07002012 filldir, &filldir_err);
2013 if (ret || filldir_err)
Mark Fasheh5eae5b92007-09-10 17:50:51 -07002014 break;
2015 }
2016
Mark Fashehe7b34012007-09-24 14:25:27 -07002017 if (ret > 0)
2018 ret = -EIO;
2019
Mark Fasheh5eae5b92007-09-10 17:50:51 -07002020 return 0;
2021}
2022
2023/*
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002024 * ocfs2_readdir()
2025 *
2026 */
2027int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
2028{
2029 int error = 0;
2030 struct inode *inode = filp->f_path.dentry->d_inode;
2031 int lock_level = 0;
2032
2033 mlog_entry("dirino=%llu\n",
2034 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2035
Mark Fashehe63aecb62007-10-18 15:30:42 -07002036 error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002037 if (lock_level && error >= 0) {
2038 /* We release EX lock which used to update atime
2039 * and get PR lock again to reduce contention
2040 * on commonly accessed directories. */
Mark Fashehe63aecb62007-10-18 15:30:42 -07002041 ocfs2_inode_unlock(inode, 1);
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002042 lock_level = 0;
Mark Fashehe63aecb62007-10-18 15:30:42 -07002043 error = ocfs2_inode_lock(inode, NULL, 0);
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002044 }
2045 if (error < 0) {
2046 if (error != -ENOENT)
2047 mlog_errno(error);
2048 /* we haven't got any yet, so propagate the error. */
2049 goto bail_nolock;
2050 }
2051
2052 error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
Mark Fashehe7b34012007-09-24 14:25:27 -07002053 dirent, filldir, NULL);
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002054
Mark Fashehe63aecb62007-10-18 15:30:42 -07002055 ocfs2_inode_unlock(inode, lock_level);
Mark Fashehccd979b2005-12-15 14:31:24 -08002056
Mark Fashehaa958872006-04-21 13:49:02 -07002057bail_nolock:
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002058 mlog_exit(error);
Mark Fashehccd979b2005-12-15 14:31:24 -08002059
Mark Fashehb8bc5f42007-09-10 17:17:52 -07002060 return error;
Mark Fashehccd979b2005-12-15 14:31:24 -08002061}
2062
2063/*
Jes Sorensen1b1dcc12006-01-09 15:59:24 -08002064 * NOTE: this should always be called with parent dir i_mutex taken.
Mark Fashehccd979b2005-12-15 14:31:24 -08002065 */
2066int ocfs2_find_files_on_disk(const char *name,
2067 int namelen,
2068 u64 *blkno,
2069 struct inode *inode,
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002070 struct ocfs2_dir_lookup_result *lookup)
Mark Fashehccd979b2005-12-15 14:31:24 -08002071{
2072 int status = -ENOENT;
Mark Fashehccd979b2005-12-15 14:31:24 -08002073
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002074 mlog(0, "name=%.*s, blkno=%p, inode=%llu\n", namelen, name, blkno,
2075 (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08002076
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002077 status = ocfs2_find_entry(name, namelen, inode, lookup);
2078 if (status)
Mark Fashehccd979b2005-12-15 14:31:24 -08002079 goto leave;
Mark Fashehccd979b2005-12-15 14:31:24 -08002080
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002081 *blkno = le64_to_cpu(lookup->dl_entry->inode);
Mark Fashehccd979b2005-12-15 14:31:24 -08002082
2083 status = 0;
2084leave:
Mark Fashehccd979b2005-12-15 14:31:24 -08002085
Mark Fashehccd979b2005-12-15 14:31:24 -08002086 return status;
2087}
2088
Mark Fashehbe94d112007-09-11 15:22:06 -07002089/*
2090 * Convenience function for callers which just want the block number
2091 * mapped to a name and don't require the full dirent info, etc.
2092 */
2093int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
2094 int namelen, u64 *blkno)
2095{
2096 int ret;
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002097 struct ocfs2_dir_lookup_result lookup = { NULL, };
Mark Fashehbe94d112007-09-11 15:22:06 -07002098
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002099 ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup);
2100 ocfs2_free_dir_lookup_result(&lookup);
Mark Fashehbe94d112007-09-11 15:22:06 -07002101
2102 return ret;
2103}
2104
Mark Fashehccd979b2005-12-15 14:31:24 -08002105/* Check for a name within a directory.
2106 *
2107 * Return 0 if the name does not exist
2108 * Return -EEXIST if the directory contains the name
2109 *
Jes Sorensen1b1dcc12006-01-09 15:59:24 -08002110 * Callers should have i_mutex + a cluster lock on dir
Mark Fashehccd979b2005-12-15 14:31:24 -08002111 */
2112int ocfs2_check_dir_for_entry(struct inode *dir,
2113 const char *name,
2114 int namelen)
2115{
2116 int ret;
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002117 struct ocfs2_dir_lookup_result lookup = { NULL, };
Mark Fashehccd979b2005-12-15 14:31:24 -08002118
Mark Fashehb06970532006-03-03 10:24:33 -08002119 mlog_entry("dir %llu, name '%.*s'\n",
2120 (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
Mark Fashehccd979b2005-12-15 14:31:24 -08002121
2122 ret = -EEXIST;
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002123 if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0)
Mark Fashehccd979b2005-12-15 14:31:24 -08002124 goto bail;
2125
2126 ret = 0;
2127bail:
Mark Fasheh4a12ca32008-11-12 15:43:34 -08002128 ocfs2_free_dir_lookup_result(&lookup);
Mark Fashehccd979b2005-12-15 14:31:24 -08002129
2130 mlog_exit(ret);
2131 return ret;
2132}
2133
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002134struct ocfs2_empty_dir_priv {
2135 unsigned seen_dot;
2136 unsigned seen_dot_dot;
2137 unsigned seen_other;
Mark Fashehe3a93c22009-02-17 15:29:35 -08002138 unsigned dx_dir;
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002139};
2140static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
2141 loff_t pos, u64 ino, unsigned type)
2142{
2143 struct ocfs2_empty_dir_priv *p = priv;
2144
2145 /*
2146 * Check the positions of "." and ".." records to be sure
2147 * they're in the correct place.
Mark Fashehe3a93c22009-02-17 15:29:35 -08002148 *
2149 * Indexed directories don't need to proceed past the first
2150 * two entries, so we end the scan after seeing '..'. Despite
2151 * that, we allow the scan to proceed In the event that we
2152 * have a corrupted indexed directory (no dot or dot dot
2153 * entries). This allows us to double check for existing
2154 * entries which might not have been found in the index.
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002155 */
2156 if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
2157 p->seen_dot = 1;
2158 return 0;
2159 }
2160
2161 if (name_len == 2 && !strncmp("..", name, 2) &&
2162 pos == OCFS2_DIR_REC_LEN(1)) {
2163 p->seen_dot_dot = 1;
Mark Fashehe3a93c22009-02-17 15:29:35 -08002164
2165 if (p->dx_dir && p->seen_dot)
2166 return 1;
2167
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002168 return 0;
2169 }
2170
2171 p->seen_other = 1;
2172 return 1;
2173}
Mark Fashehe3a93c22009-02-17 15:29:35 -08002174
2175static int ocfs2_empty_dir_dx(struct inode *inode,
2176 struct ocfs2_empty_dir_priv *priv)
2177{
2178 int ret;
2179 struct buffer_head *di_bh = NULL;
2180 struct buffer_head *dx_root_bh = NULL;
2181 struct ocfs2_dinode *di;
2182 struct ocfs2_dx_root_block *dx_root;
2183
2184 priv->dx_dir = 1;
2185
2186 ret = ocfs2_read_inode_block(inode, &di_bh);
2187 if (ret) {
2188 mlog_errno(ret);
2189 goto out;
2190 }
2191 di = (struct ocfs2_dinode *)di_bh->b_data;
2192
2193 ret = ocfs2_read_dx_root(inode, di, &dx_root_bh);
2194 if (ret) {
2195 mlog_errno(ret);
2196 goto out;
2197 }
2198 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2199
2200 if (le32_to_cpu(dx_root->dr_num_entries) != 2)
2201 priv->seen_other = 1;
2202
2203out:
2204 brelse(di_bh);
2205 brelse(dx_root_bh);
2206 return ret;
2207}
2208
Mark Fashehccd979b2005-12-15 14:31:24 -08002209/*
2210 * routine to check that the specified directory is empty (for rmdir)
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002211 *
2212 * Returns 1 if dir is empty, zero otherwise.
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002213 *
Mark Fashehe3a93c22009-02-17 15:29:35 -08002214 * XXX: This is a performance problem for unindexed directories.
Mark Fashehccd979b2005-12-15 14:31:24 -08002215 */
2216int ocfs2_empty_dir(struct inode *inode)
2217{
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002218 int ret;
2219 loff_t start = 0;
2220 struct ocfs2_empty_dir_priv priv;
Mark Fashehccd979b2005-12-15 14:31:24 -08002221
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002222 memset(&priv, 0, sizeof(priv));
2223
Mark Fashehe3a93c22009-02-17 15:29:35 -08002224 if (ocfs2_dir_indexed(inode)) {
2225 ret = ocfs2_empty_dir_dx(inode, &priv);
2226 if (ret)
2227 mlog_errno(ret);
2228 /*
2229 * We still run ocfs2_dir_foreach to get the checks
2230 * for "." and "..".
2231 */
2232 }
2233
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002234 ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
2235 if (ret)
2236 mlog_errno(ret);
2237
2238 if (!priv.seen_dot || !priv.seen_dot_dot) {
2239 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
Mark Fashehb06970532006-03-03 10:24:33 -08002240 (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002241 /*
2242 * XXX: Is it really safe to allow an unlink to continue?
2243 */
Mark Fashehccd979b2005-12-15 14:31:24 -08002244 return 1;
2245 }
2246
Mark Fasheh0bfbbf62007-09-12 11:19:00 -07002247 return !priv.seen_other;
Mark Fashehccd979b2005-12-15 14:31:24 -08002248}
2249
Mark Fasheh87d35a72008-12-10 17:36:25 -08002250/*
2251 * Fills "." and ".." dirents in a new directory block. Returns dirent for
2252 * "..", which might be used during creation of a directory with a trailing
2253 * header. It is otherwise safe to ignore the return code.
2254 */
2255static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
2256 struct inode *parent,
2257 char *start,
2258 unsigned int size)
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002259{
2260 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
2261
2262 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
2263 de->name_len = 1;
2264 de->rec_len =
2265 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
2266 strcpy(de->name, ".");
2267 ocfs2_set_de_type(de, S_IFDIR);
2268
2269 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
2270 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
2271 de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
2272 de->name_len = 2;
2273 strcpy(de->name, "..");
2274 ocfs2_set_de_type(de, S_IFDIR);
Mark Fasheh87d35a72008-12-10 17:36:25 -08002275
2276 return de;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002277}
2278
2279/*
2280 * This works together with code in ocfs2_mknod_locked() which sets
2281 * the inline-data flag and initializes the inline-data section.
2282 */
2283static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2284 handle_t *handle,
2285 struct inode *parent,
2286 struct inode *inode,
2287 struct buffer_head *di_bh)
2288{
2289 int ret;
2290 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2291 struct ocfs2_inline_data *data = &di->id2.i_data;
2292 unsigned int size = le16_to_cpu(data->id_count);
2293
Joel Becker0cf2f762009-02-12 16:41:25 -08002294 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
Joel Becker13723d02008-10-17 19:25:01 -07002295 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002296 if (ret) {
2297 mlog_errno(ret);
2298 goto out;
2299 }
2300
2301 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002302 ocfs2_journal_dirty(handle, di_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002303
2304 i_size_write(inode, size);
2305 inode->i_nlink = 2;
2306 inode->i_blocks = ocfs2_inode_sector_count(inode);
2307
2308 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
2309 if (ret < 0)
2310 mlog_errno(ret);
2311
2312out:
2313 return ret;
2314}
2315
2316static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2317 handle_t *handle,
2318 struct inode *parent,
2319 struct inode *inode,
2320 struct buffer_head *fe_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002321 struct ocfs2_alloc_context *data_ac,
2322 struct buffer_head **ret_new_bh)
Mark Fasheh316f4b92007-09-07 18:21:26 -07002323{
2324 int status;
Mark Fasheh87d35a72008-12-10 17:36:25 -08002325 unsigned int size = osb->sb->s_blocksize;
Mark Fasheh316f4b92007-09-07 18:21:26 -07002326 struct buffer_head *new_bh = NULL;
Mark Fasheh87d35a72008-12-10 17:36:25 -08002327 struct ocfs2_dir_entry *de;
Mark Fasheh316f4b92007-09-07 18:21:26 -07002328
2329 mlog_entry_void();
2330
Mark Fashehe7c17e42009-01-29 18:17:46 -08002331 if (ocfs2_new_dir_wants_trailer(inode))
Mark Fasheh87d35a72008-12-10 17:36:25 -08002332 size = ocfs2_dir_trailer_blk_off(parent->i_sb);
2333
Mark Fasheh316f4b92007-09-07 18:21:26 -07002334 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
2335 data_ac, NULL, &new_bh);
2336 if (status < 0) {
2337 mlog_errno(status);
2338 goto bail;
2339 }
2340
Joel Becker8cb471e2009-02-10 20:00:41 -08002341 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
Mark Fasheh316f4b92007-09-07 18:21:26 -07002342
Joel Becker0cf2f762009-02-12 16:41:25 -08002343 status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
Joel Becker13723d02008-10-17 19:25:01 -07002344 OCFS2_JOURNAL_ACCESS_CREATE);
Mark Fasheh316f4b92007-09-07 18:21:26 -07002345 if (status < 0) {
2346 mlog_errno(status);
2347 goto bail;
2348 }
2349 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
2350
Mark Fasheh87d35a72008-12-10 17:36:25 -08002351 de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
Mark Fashehe7c17e42009-01-29 18:17:46 -08002352 if (ocfs2_new_dir_wants_trailer(inode)) {
2353 int size = le16_to_cpu(de->rec_len);
2354
2355 /*
2356 * Figure out the size of the hole left over after
2357 * insertion of '.' and '..'. The trailer wants this
2358 * information.
2359 */
2360 size -= OCFS2_DIR_REC_LEN(2);
2361 size -= sizeof(struct ocfs2_dir_block_trailer);
2362
2363 ocfs2_init_dir_trailer(inode, new_bh, size);
2364 }
Mark Fasheh316f4b92007-09-07 18:21:26 -07002365
Joel Beckerec20cec2010-03-19 14:13:52 -07002366 ocfs2_journal_dirty(handle, new_bh);
Mark Fasheh316f4b92007-09-07 18:21:26 -07002367
2368 i_size_write(inode, inode->i_sb->s_blocksize);
2369 inode->i_nlink = 2;
2370 inode->i_blocks = ocfs2_inode_sector_count(inode);
2371 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2372 if (status < 0) {
2373 mlog_errno(status);
2374 goto bail;
2375 }
2376
2377 status = 0;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002378 if (ret_new_bh) {
2379 *ret_new_bh = new_bh;
2380 new_bh = NULL;
2381 }
Mark Fasheh316f4b92007-09-07 18:21:26 -07002382bail:
Mark Fasheha81cb882008-10-07 14:25:16 -07002383 brelse(new_bh);
Mark Fasheh316f4b92007-09-07 18:21:26 -07002384
2385 mlog_exit(status);
2386 return status;
2387}
2388
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002389static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2390 handle_t *handle, struct inode *dir,
2391 struct buffer_head *di_bh,
Mark Fashehe7c17e42009-01-29 18:17:46 -08002392 struct buffer_head *dirdata_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002393 struct ocfs2_alloc_context *meta_ac,
Mark Fashehe3a93c22009-02-17 15:29:35 -08002394 int dx_inline, u32 num_entries,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002395 struct buffer_head **ret_dx_root_bh)
2396{
2397 int ret;
2398 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
2399 u16 dr_suballoc_bit;
Joel Becker2b6cb572010-03-26 10:09:15 +08002400 u64 suballoc_loc, dr_blkno;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002401 unsigned int num_bits;
2402 struct buffer_head *dx_root_bh = NULL;
2403 struct ocfs2_dx_root_block *dx_root;
Mark Fashehe7c17e42009-01-29 18:17:46 -08002404 struct ocfs2_dir_block_trailer *trailer =
2405 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002406
Joel Becker2b6cb572010-03-26 10:09:15 +08002407 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
2408 &dr_suballoc_bit, &num_bits, &dr_blkno);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002409 if (ret) {
2410 mlog_errno(ret);
2411 goto out;
2412 }
2413
2414 mlog(0, "Dir %llu, attach new index block: %llu\n",
2415 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2416 (unsigned long long)dr_blkno);
2417
2418 dx_root_bh = sb_getblk(osb->sb, dr_blkno);
2419 if (dx_root_bh == NULL) {
2420 ret = -EIO;
2421 goto out;
2422 }
Joel Becker8cb471e2009-02-10 20:00:41 -08002423 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002424
Joel Becker0cf2f762009-02-12 16:41:25 -08002425 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002426 OCFS2_JOURNAL_ACCESS_CREATE);
2427 if (ret < 0) {
2428 mlog_errno(ret);
2429 goto out;
2430 }
2431
2432 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2433 memset(dx_root, 0, osb->sb->s_blocksize);
2434 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
Tiger Yangb89c5422010-01-25 14:11:06 +08002435 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
Joel Becker2b6cb572010-03-26 10:09:15 +08002436 dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002437 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
2438 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2439 dx_root->dr_blkno = cpu_to_le64(dr_blkno);
2440 dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
Mark Fashehe3a93c22009-02-17 15:29:35 -08002441 dx_root->dr_num_entries = cpu_to_le32(num_entries);
Mark Fashehe7c17e42009-01-29 18:17:46 -08002442 if (le16_to_cpu(trailer->db_free_rec_len))
2443 dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
2444 else
2445 dx_root->dr_free_blk = cpu_to_le64(0);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002446
2447 if (dx_inline) {
2448 dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
2449 dx_root->dr_entries.de_count =
2450 cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
2451 } else {
2452 dx_root->dr_list.l_count =
2453 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
2454 }
Joel Beckerec20cec2010-03-19 14:13:52 -07002455 ocfs2_journal_dirty(handle, dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002456
Joel Becker0cf2f762009-02-12 16:41:25 -08002457 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002458 OCFS2_JOURNAL_ACCESS_CREATE);
2459 if (ret) {
2460 mlog_errno(ret);
2461 goto out;
2462 }
2463
2464 di->i_dx_root = cpu_to_le64(dr_blkno);
2465
Tao Ma8ac33dc2010-12-15 16:30:00 +08002466 spin_lock(&OCFS2_I(dir)->ip_lock);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002467 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2468 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
Tao Ma8ac33dc2010-12-15 16:30:00 +08002469 spin_unlock(&OCFS2_I(dir)->ip_lock);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002470
Joel Beckerec20cec2010-03-19 14:13:52 -07002471 ocfs2_journal_dirty(handle, di_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002472
2473 *ret_dx_root_bh = dx_root_bh;
2474 dx_root_bh = NULL;
2475
2476out:
2477 brelse(dx_root_bh);
2478 return ret;
2479}
2480
2481static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2482 handle_t *handle, struct inode *dir,
2483 struct buffer_head **dx_leaves,
2484 int num_dx_leaves, u64 start_blk)
2485{
2486 int ret, i;
2487 struct ocfs2_dx_leaf *dx_leaf;
2488 struct buffer_head *bh;
2489
2490 for (i = 0; i < num_dx_leaves; i++) {
2491 bh = sb_getblk(osb->sb, start_blk + i);
2492 if (bh == NULL) {
2493 ret = -EIO;
2494 goto out;
2495 }
2496 dx_leaves[i] = bh;
2497
Joel Becker8cb471e2009-02-10 20:00:41 -08002498 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002499
Joel Becker0cf2f762009-02-12 16:41:25 -08002500 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002501 OCFS2_JOURNAL_ACCESS_CREATE);
2502 if (ret < 0) {
2503 mlog_errno(ret);
2504 goto out;
2505 }
2506
2507 dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data;
2508
2509 memset(dx_leaf, 0, osb->sb->s_blocksize);
2510 strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
2511 dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation);
2512 dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr);
2513 dx_leaf->dl_list.de_count =
2514 cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
2515
2516 mlog(0,
2517 "Dir %llu, format dx_leaf: %llu, entry count: %u\n",
2518 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2519 (unsigned long long)bh->b_blocknr,
2520 le16_to_cpu(dx_leaf->dl_list.de_count));
2521
2522 ocfs2_journal_dirty(handle, bh);
2523 }
2524
2525 ret = 0;
2526out:
2527 return ret;
2528}
2529
2530/*
2531 * Allocates and formats a new cluster for use in an indexed dir
2532 * leaf. This version will not do the extent insert, so that it can be
2533 * used by operations which need careful ordering.
2534 */
2535static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
2536 u32 cpos, handle_t *handle,
2537 struct ocfs2_alloc_context *data_ac,
2538 struct buffer_head **dx_leaves,
2539 int num_dx_leaves, u64 *ret_phys_blkno)
2540{
2541 int ret;
2542 u32 phys, num;
2543 u64 phys_blkno;
2544 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2545
2546 /*
2547 * XXX: For create, this should claim cluster for the index
2548 * *before* the unindexed insert so that we have a better
2549 * chance of contiguousness as the directory grows in number
2550 * of entries.
2551 */
Joel Becker1ed9b772010-05-06 13:59:06 +08002552 ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002553 if (ret) {
2554 mlog_errno(ret);
2555 goto out;
2556 }
2557
2558 /*
2559 * Format the new cluster first. That way, we're inserting
2560 * valid data.
2561 */
2562 phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys);
2563 ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves,
2564 num_dx_leaves, phys_blkno);
2565 if (ret) {
2566 mlog_errno(ret);
2567 goto out;
2568 }
2569
2570 *ret_phys_blkno = phys_blkno;
2571out:
2572 return ret;
2573}
2574
2575static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2576 struct ocfs2_extent_tree *et,
2577 u32 cpos, handle_t *handle,
2578 struct ocfs2_alloc_context *data_ac,
2579 struct ocfs2_alloc_context *meta_ac,
2580 struct buffer_head **dx_leaves,
2581 int num_dx_leaves)
2582{
2583 int ret;
2584 u64 phys_blkno;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002585
2586 ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
2587 num_dx_leaves, &phys_blkno);
2588 if (ret) {
2589 mlog_errno(ret);
2590 goto out;
2591 }
2592
Joel Beckercc79d8c2009-02-13 03:24:43 -08002593 ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002594 meta_ac);
2595 if (ret)
2596 mlog_errno(ret);
2597out:
2598 return ret;
2599}
2600
2601static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb,
2602 int *ret_num_leaves)
2603{
2604 int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1);
2605 struct buffer_head **dx_leaves;
2606
2607 dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *),
2608 GFP_NOFS);
2609 if (dx_leaves && ret_num_leaves)
2610 *ret_num_leaves = num_dx_leaves;
2611
2612 return dx_leaves;
2613}
2614
2615static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
2616 handle_t *handle,
2617 struct inode *parent,
2618 struct inode *inode,
2619 struct buffer_head *di_bh,
2620 struct ocfs2_alloc_context *data_ac,
2621 struct ocfs2_alloc_context *meta_ac)
2622{
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002623 int ret;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002624 struct buffer_head *leaf_bh = NULL;
2625 struct buffer_head *dx_root_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002626 struct ocfs2_dx_hinfo hinfo;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002627 struct ocfs2_dx_root_block *dx_root;
2628 struct ocfs2_dx_entry_list *entry_list;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002629
2630 /*
2631 * Our strategy is to create the directory as though it were
2632 * unindexed, then add the index block. This works with very
2633 * little complication since the state of a new directory is a
2634 * very well known quantity.
2635 *
2636 * Essentially, we have two dirents ("." and ".."), in the 1st
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002637 * block which need indexing. These are easily inserted into
2638 * the index block.
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002639 */
2640
2641 ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
2642 data_ac, &leaf_bh);
2643 if (ret) {
2644 mlog_errno(ret);
2645 goto out;
2646 }
2647
Mark Fashehe7c17e42009-01-29 18:17:46 -08002648 ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh,
Mark Fashehe3a93c22009-02-17 15:29:35 -08002649 meta_ac, 1, 2, &dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002650 if (ret) {
2651 mlog_errno(ret);
2652 goto out;
2653 }
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002654 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2655 entry_list = &dx_root->dr_entries;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002656
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002657 /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
Mark Fashehe7c17e42009-01-29 18:17:46 -08002658 ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002659 ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002660
2661 ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002662 ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002663
2664out:
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002665 brelse(dx_root_bh);
2666 brelse(leaf_bh);
2667 return ret;
2668}
2669
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002670int ocfs2_fill_new_dir(struct ocfs2_super *osb,
2671 handle_t *handle,
2672 struct inode *parent,
2673 struct inode *inode,
2674 struct buffer_head *fe_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002675 struct ocfs2_alloc_context *data_ac,
2676 struct ocfs2_alloc_context *meta_ac)
2677
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002678{
2679 BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
2680
2681 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
2682 return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
2683
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002684 if (ocfs2_supports_indexed_dirs(osb))
2685 return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh,
2686 data_ac, meta_ac);
2687
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002688 return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002689 data_ac, NULL);
2690}
2691
2692static int ocfs2_dx_dir_index_block(struct inode *dir,
2693 handle_t *handle,
2694 struct buffer_head **dx_leaves,
2695 int num_dx_leaves,
Mark Fashehe3a93c22009-02-17 15:29:35 -08002696 u32 *num_dx_entries,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002697 struct buffer_head *dirent_bh)
2698{
Tao Ma0fba8132009-03-19 05:08:43 +08002699 int ret = 0, namelen, i;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002700 char *de_buf, *limit;
2701 struct ocfs2_dir_entry *de;
2702 struct buffer_head *dx_leaf_bh;
2703 struct ocfs2_dx_hinfo hinfo;
2704 u64 dirent_blk = dirent_bh->b_blocknr;
2705
2706 de_buf = dirent_bh->b_data;
2707 limit = de_buf + dir->i_sb->s_blocksize;
2708
2709 while (de_buf < limit) {
2710 de = (struct ocfs2_dir_entry *)de_buf;
2711
2712 namelen = de->name_len;
2713 if (!namelen || !de->inode)
2714 goto inc;
2715
2716 ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo);
2717
2718 i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo);
2719 dx_leaf_bh = dx_leaves[i];
2720
2721 ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo,
2722 dirent_blk, dx_leaf_bh);
2723 if (ret) {
2724 mlog_errno(ret);
2725 goto out;
2726 }
2727
Mark Fashehe3a93c22009-02-17 15:29:35 -08002728 *num_dx_entries = *num_dx_entries + 1;
2729
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002730inc:
2731 de_buf += le16_to_cpu(de->rec_len);
2732 }
2733
2734out:
2735 return ret;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002736}
Mark Fashehe7c17e42009-01-29 18:17:46 -08002737
2738/*
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002739 * XXX: This expects dx_root_bh to already be part of the transaction.
2740 */
2741static void ocfs2_dx_dir_index_root_block(struct inode *dir,
2742 struct buffer_head *dx_root_bh,
2743 struct buffer_head *dirent_bh)
2744{
2745 char *de_buf, *limit;
2746 struct ocfs2_dx_root_block *dx_root;
2747 struct ocfs2_dir_entry *de;
2748 struct ocfs2_dx_hinfo hinfo;
2749 u64 dirent_blk = dirent_bh->b_blocknr;
2750
2751 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2752
2753 de_buf = dirent_bh->b_data;
2754 limit = de_buf + dir->i_sb->s_blocksize;
2755
2756 while (de_buf < limit) {
2757 de = (struct ocfs2_dir_entry *)de_buf;
2758
2759 if (!de->name_len || !de->inode)
2760 goto inc;
2761
2762 ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
2763
2764 mlog(0,
2765 "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n",
2766 (unsigned long long)dir->i_ino, hinfo.major_hash,
2767 hinfo.minor_hash,
2768 le16_to_cpu(dx_root->dr_entries.de_num_used),
2769 de->name_len, de->name);
2770
2771 ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
2772 dirent_blk);
Mark Fashehe3a93c22009-02-17 15:29:35 -08002773
2774 le32_add_cpu(&dx_root->dr_num_entries, 1);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002775inc:
2776 de_buf += le16_to_cpu(de->rec_len);
2777 }
2778}
2779
2780/*
2781 * Count the number of inline directory entries in di_bh and compare
2782 * them against the number of entries we can hold in an inline dx root
2783 * block.
2784 */
2785static int ocfs2_new_dx_should_be_inline(struct inode *dir,
2786 struct buffer_head *di_bh)
2787{
2788 int dirent_count = 0;
2789 char *de_buf, *limit;
2790 struct ocfs2_dir_entry *de;
2791 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2792
2793 de_buf = di->id2.i_data.id_data;
2794 limit = de_buf + i_size_read(dir);
2795
2796 while (de_buf < limit) {
2797 de = (struct ocfs2_dir_entry *)de_buf;
2798
2799 if (de->name_len && de->inode)
2800 dirent_count++;
2801
2802 de_buf += le16_to_cpu(de->rec_len);
2803 }
2804
2805 /* We are careful to leave room for one extra record. */
2806 return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
2807}
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002808
Mark Fasheh87d35a72008-12-10 17:36:25 -08002809/*
2810 * Expand rec_len of the rightmost dirent in a directory block so that it
2811 * contains the end of our valid space for dirents. We do this during
2812 * expansion from an inline directory to one with extents. The first dir block
2813 * in that case is taken from the inline data portion of the inode block.
2814 *
Mark Fashehe7c17e42009-01-29 18:17:46 -08002815 * This will also return the largest amount of contiguous space for a dirent
2816 * in the block. That value is *not* necessarily the last dirent, even after
2817 * expansion. The directory indexing code wants this value for free space
2818 * accounting. We do this here since we're already walking the entire dir
2819 * block.
2820 *
Mark Fasheh87d35a72008-12-10 17:36:25 -08002821 * We add the dir trailer if this filesystem wants it.
2822 */
Mark Fashehe7c17e42009-01-29 18:17:46 -08002823static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size,
2824 struct inode *dir)
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002825{
Mark Fashehe7c17e42009-01-29 18:17:46 -08002826 struct super_block *sb = dir->i_sb;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002827 struct ocfs2_dir_entry *de;
2828 struct ocfs2_dir_entry *prev_de;
2829 char *de_buf, *limit;
Mark Fasheh87d35a72008-12-10 17:36:25 -08002830 unsigned int new_size = sb->s_blocksize;
Mark Fashehe7c17e42009-01-29 18:17:46 -08002831 unsigned int bytes, this_hole;
2832 unsigned int largest_hole = 0;
Mark Fasheh87d35a72008-12-10 17:36:25 -08002833
Mark Fashehe7c17e42009-01-29 18:17:46 -08002834 if (ocfs2_new_dir_wants_trailer(dir))
Mark Fasheh87d35a72008-12-10 17:36:25 -08002835 new_size = ocfs2_dir_trailer_blk_off(sb);
2836
2837 bytes = new_size - old_size;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002838
2839 limit = start + old_size;
2840 de_buf = start;
2841 de = (struct ocfs2_dir_entry *)de_buf;
2842 do {
Mark Fashehe7c17e42009-01-29 18:17:46 -08002843 this_hole = ocfs2_figure_dirent_hole(de);
2844 if (this_hole > largest_hole)
2845 largest_hole = this_hole;
2846
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002847 prev_de = de;
2848 de_buf += le16_to_cpu(de->rec_len);
2849 de = (struct ocfs2_dir_entry *)de_buf;
2850 } while (de_buf < limit);
2851
2852 le16_add_cpu(&prev_de->rec_len, bytes);
Mark Fashehe7c17e42009-01-29 18:17:46 -08002853
2854 /* We need to double check this after modification of the final
2855 * dirent. */
2856 this_hole = ocfs2_figure_dirent_hole(prev_de);
2857 if (this_hole > largest_hole)
2858 largest_hole = this_hole;
2859
2860 if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
2861 return largest_hole;
2862 return 0;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002863}
2864
2865/*
2866 * We allocate enough clusters to fulfill "blocks_wanted", but set
2867 * i_size to exactly one block. Ocfs2_extend_dir() will handle the
2868 * rest automatically for us.
2869 *
2870 * *first_block_bh is a pointer to the 1st data block allocated to the
2871 * directory.
2872 */
2873static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2874 unsigned int blocks_wanted,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002875 struct ocfs2_dir_lookup_result *lookup,
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002876 struct buffer_head **first_block_bh)
2877{
Mark Fashehe3a93c22009-02-17 15:29:35 -08002878 u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002879 struct super_block *sb = dir->i_sb;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002880 int ret, i, num_dx_leaves = 0, dx_inline = 0,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002881 credits = ocfs2_inline_to_extents_credits(sb);
2882 u64 dx_insert_blkno, blkno,
2883 bytes = blocks_wanted << sb->s_blocksize_bits;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002884 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2885 struct ocfs2_inode_info *oi = OCFS2_I(dir);
2886 struct ocfs2_alloc_context *data_ac;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002887 struct ocfs2_alloc_context *meta_ac = NULL;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002888 struct buffer_head *dirdata_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002889 struct buffer_head *dx_root_bh = NULL;
2890 struct buffer_head **dx_leaves = NULL;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002891 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2892 handle_t *handle;
Joel Beckerf99b9b72008-08-20 19:36:33 -07002893 struct ocfs2_extent_tree et;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002894 struct ocfs2_extent_tree dx_et;
2895 int did_quota = 0, bytes_allocated = 0;
Joel Beckerf99b9b72008-08-20 19:36:33 -07002896
Joel Becker5e404e92009-02-13 03:54:22 -08002897 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002898
2899 alloc = ocfs2_clusters_for_bytes(sb, bytes);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002900 dx_alloc = 0;
2901
Jan Karaedd45c02009-06-02 14:24:03 +02002902 down_write(&oi->ip_alloc_sem);
2903
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002904 if (ocfs2_supports_indexed_dirs(osb)) {
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002905 credits += ocfs2_add_dir_index_credits(sb);
2906
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002907 dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
2908 if (!dx_inline) {
2909 /* Add one more cluster for an index leaf */
2910 dx_alloc++;
2911 dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
2912 &num_dx_leaves);
2913 if (!dx_leaves) {
2914 ret = -ENOMEM;
2915 mlog_errno(ret);
2916 goto out;
2917 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002918 }
2919
2920 /* This gets us the dx_root */
2921 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
2922 if (ret) {
2923 mlog_errno(ret);
2924 goto out;
2925 }
2926 }
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002927
2928 /*
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002929 * We should never need more than 2 clusters for the unindexed
2930 * tree - maximum dirent size is far less than one block. In
2931 * fact, the only time we'd need more than one cluster is if
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002932 * blocksize == clustersize and the dirent won't fit in the
2933 * extra space that the expansion to a single block gives. As
2934 * of today, that only happens on 4k/4k file systems.
2935 */
2936 BUG_ON(alloc > 2);
2937
Tao Ma035a5712009-04-07 07:40:57 +08002938 ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002939 if (ret) {
2940 mlog_errno(ret);
2941 goto out;
2942 }
2943
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002944 /*
Joe Perchesc78bad12008-02-03 17:33:42 +02002945 * Prepare for worst case allocation scenario of two separate
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002946 * extents in the unindexed tree.
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002947 */
2948 if (alloc == 2)
2949 credits += OCFS2_SUBALLOC_ALLOC;
2950
2951 handle = ocfs2_start_trans(osb, credits);
2952 if (IS_ERR(handle)) {
2953 ret = PTR_ERR(handle);
2954 mlog_errno(ret);
Jan Karaedd45c02009-06-02 14:24:03 +02002955 goto out;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002956 }
2957
Christoph Hellwig5dd40562010-03-03 09:05:00 -05002958 ret = dquot_alloc_space_nodirty(dir,
2959 ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
2960 if (ret)
Jan Karaa90714c2008-10-09 19:38:40 +02002961 goto out_commit;
Jan Karaa90714c2008-10-09 19:38:40 +02002962 did_quota = 1;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002963
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08002964 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002965 /*
2966 * Allocate our index cluster first, to maximize the
2967 * possibility that unindexed leaves grow
2968 * contiguously.
2969 */
2970 ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac,
2971 dx_leaves, num_dx_leaves,
2972 &dx_insert_blkno);
2973 if (ret) {
2974 mlog_errno(ret);
2975 goto out_commit;
2976 }
2977 bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
2978 }
2979
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002980 /*
2981 * Try to claim as many clusters as the bitmap can give though
2982 * if we only get one now, that's enough to continue. The rest
2983 * will be claimed after the conversion to extents.
2984 */
Mark Fasheh83f92312010-04-05 18:17:16 -07002985 if (ocfs2_dir_resv_allowed(osb))
2986 data_ac->ac_resv = &oi->ip_la_data_resv;
Joel Becker1ed9b772010-05-06 13:59:06 +08002987 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002988 if (ret) {
2989 mlog_errno(ret);
2990 goto out_commit;
2991 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08002992 bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07002993
2994 /*
2995 * Operations are carefully ordered so that we set up the new
2996 * data block first. The conversion from inline data to
2997 * extents follows.
2998 */
2999 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
3000 dirdata_bh = sb_getblk(sb, blkno);
3001 if (!dirdata_bh) {
3002 ret = -EIO;
3003 mlog_errno(ret);
3004 goto out_commit;
3005 }
3006
Joel Becker8cb471e2009-02-10 20:00:41 -08003007 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003008
Joel Becker0cf2f762009-02-12 16:41:25 -08003009 ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
Joel Becker13723d02008-10-17 19:25:01 -07003010 OCFS2_JOURNAL_ACCESS_CREATE);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003011 if (ret) {
3012 mlog_errno(ret);
3013 goto out_commit;
3014 }
3015
3016 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
3017 memset(dirdata_bh->b_data + i_size_read(dir), 0,
3018 sb->s_blocksize - i_size_read(dir));
Mark Fashehe7c17e42009-01-29 18:17:46 -08003019 i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir);
3020 if (ocfs2_new_dir_wants_trailer(dir)) {
3021 /*
3022 * Prepare the dir trailer up front. It will otherwise look
3023 * like a valid dirent. Even if inserting the index fails
3024 * (unlikely), then all we'll have done is given first dir
3025 * block a small amount of fragmentation.
3026 */
3027 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
3028 }
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003029
Joel Beckerec20cec2010-03-19 14:13:52 -07003030 ocfs2_journal_dirty(handle, dirdata_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003031
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08003032 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
3033 /*
3034 * Dx dirs with an external cluster need to do this up
3035 * front. Inline dx root's get handled later, after
Mark Fashehe3a93c22009-02-17 15:29:35 -08003036 * we've allocated our root block. We get passed back
3037 * a total number of items so that dr_num_entries can
3038 * be correctly set once the dx_root has been
3039 * allocated.
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08003040 */
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003041 ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
Mark Fashehe3a93c22009-02-17 15:29:35 -08003042 num_dx_leaves, &num_dx_entries,
3043 dirdata_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003044 if (ret) {
3045 mlog_errno(ret);
3046 goto out_commit;
3047 }
3048 }
3049
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003050 /*
3051 * Set extent, i_size, etc on the directory. After this, the
3052 * inode should contain the same exact dirents as before and
3053 * be fully accessible from system calls.
3054 *
3055 * We let the later dirent insert modify c/mtime - to the user
3056 * the data hasn't changed.
3057 */
Joel Becker0cf2f762009-02-12 16:41:25 -08003058 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
Joel Becker13723d02008-10-17 19:25:01 -07003059 OCFS2_JOURNAL_ACCESS_CREATE);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003060 if (ret) {
3061 mlog_errno(ret);
3062 goto out_commit;
3063 }
3064
3065 spin_lock(&oi->ip_lock);
3066 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
3067 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
3068 spin_unlock(&oi->ip_lock);
3069
3070 ocfs2_dinode_new_extent_list(dir, di);
3071
3072 i_size_write(dir, sb->s_blocksize);
3073 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3074
3075 di->i_size = cpu_to_le64(sb->s_blocksize);
3076 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
3077 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003078
3079 /*
3080 * This should never fail as our extent list is empty and all
3081 * related blocks have been journaled already.
3082 */
Joel Beckercc79d8c2009-02-13 03:24:43 -08003083 ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
Joel Beckerf99b9b72008-08-20 19:36:33 -07003084 0, NULL);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003085 if (ret) {
3086 mlog_errno(ret);
Tao Ma83cab532008-08-21 14:14:27 +08003087 goto out_commit;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003088 }
3089
Mark Fasheh9780eb62008-08-05 11:32:46 -07003090 /*
3091 * Set i_blocks after the extent insert for the most up to
3092 * date ip_clusters value.
3093 */
3094 dir->i_blocks = ocfs2_inode_sector_count(dir);
3095
Joel Beckerec20cec2010-03-19 14:13:52 -07003096 ocfs2_journal_dirty(handle, di_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003097
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003098 if (ocfs2_supports_indexed_dirs(osb)) {
3099 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
Mark Fashehe7c17e42009-01-29 18:17:46 -08003100 dirdata_bh, meta_ac, dx_inline,
Mark Fashehe3a93c22009-02-17 15:29:35 -08003101 num_dx_entries, &dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003102 if (ret) {
3103 mlog_errno(ret);
3104 goto out_commit;
3105 }
3106
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08003107 if (dx_inline) {
3108 ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
3109 dirdata_bh);
3110 } else {
Joel Becker5e404e92009-02-13 03:54:22 -08003111 ocfs2_init_dx_root_extent_tree(&dx_et,
3112 INODE_CACHE(dir),
3113 dx_root_bh);
Joel Beckercc79d8c2009-02-13 03:24:43 -08003114 ret = ocfs2_insert_extent(handle, &dx_et, 0,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08003115 dx_insert_blkno, 1, 0, NULL);
3116 if (ret)
3117 mlog_errno(ret);
3118 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003119 }
3120
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003121 /*
3122 * We asked for two clusters, but only got one in the 1st
3123 * pass. Claim the 2nd cluster as a separate extent.
3124 */
3125 if (alloc > len) {
Joel Becker1ed9b772010-05-06 13:59:06 +08003126 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003127 &len);
3128 if (ret) {
3129 mlog_errno(ret);
3130 goto out_commit;
3131 }
3132 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
3133
Joel Beckercc79d8c2009-02-13 03:24:43 -08003134 ret = ocfs2_insert_extent(handle, &et, 1,
Joel Beckerf99b9b72008-08-20 19:36:33 -07003135 blkno, len, 0, NULL);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003136 if (ret) {
3137 mlog_errno(ret);
Tao Ma83cab532008-08-21 14:14:27 +08003138 goto out_commit;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003139 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003140 bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003141 }
3142
3143 *first_block_bh = dirdata_bh;
3144 dirdata_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003145 if (ocfs2_supports_indexed_dirs(osb)) {
3146 unsigned int off;
3147
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08003148 if (!dx_inline) {
3149 /*
3150 * We need to return the correct block within the
3151 * cluster which should hold our entry.
3152 */
3153 off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
3154 &lookup->dl_hinfo);
3155 get_bh(dx_leaves[off]);
3156 lookup->dl_dx_leaf_bh = dx_leaves[off];
3157 }
3158 lookup->dl_dx_root_bh = dx_root_bh;
3159 dx_root_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003160 }
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003161
3162out_commit:
Jan Karaa90714c2008-10-09 19:38:40 +02003163 if (ret < 0 && did_quota)
Christoph Hellwig5dd40562010-03-03 09:05:00 -05003164 dquot_free_space_nodirty(dir, bytes_allocated);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003165
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003166 ocfs2_commit_trans(osb, handle);
3167
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003168out:
Jan Karaedd45c02009-06-02 14:24:03 +02003169 up_write(&oi->ip_alloc_sem);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003170 if (data_ac)
3171 ocfs2_free_alloc_context(data_ac);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003172 if (meta_ac)
3173 ocfs2_free_alloc_context(meta_ac);
3174
3175 if (dx_leaves) {
3176 for (i = 0; i < num_dx_leaves; i++)
3177 brelse(dx_leaves[i]);
3178 kfree(dx_leaves);
3179 }
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003180
3181 brelse(dirdata_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003182 brelse(dx_root_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003183
3184 return ret;
3185}
3186
Mark Fashehccd979b2005-12-15 14:31:24 -08003187/* returns a bh of the 1st new block in the allocation. */
Mark Fasheh316f4b92007-09-07 18:21:26 -07003188static int ocfs2_do_extend_dir(struct super_block *sb,
3189 handle_t *handle,
3190 struct inode *dir,
3191 struct buffer_head *parent_fe_bh,
3192 struct ocfs2_alloc_context *data_ac,
3193 struct ocfs2_alloc_context *meta_ac,
3194 struct buffer_head **new_bh)
Mark Fashehccd979b2005-12-15 14:31:24 -08003195{
3196 int status;
Jan Karaa90714c2008-10-09 19:38:40 +02003197 int extend, did_quota = 0;
Mark Fasheh8110b072007-03-22 16:53:23 -07003198 u64 p_blkno, v_blkno;
Mark Fashehccd979b2005-12-15 14:31:24 -08003199
3200 spin_lock(&OCFS2_I(dir)->ip_lock);
3201 extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
3202 spin_unlock(&OCFS2_I(dir)->ip_lock);
3203
3204 if (extend) {
Mark Fashehdcd05382007-01-16 11:32:23 -08003205 u32 offset = OCFS2_I(dir)->ip_clusters;
3206
Christoph Hellwig5dd40562010-03-03 09:05:00 -05003207 status = dquot_alloc_space_nodirty(dir,
3208 ocfs2_clusters_to_bytes(sb, 1));
3209 if (status)
Jan Karaa90714c2008-10-09 19:38:40 +02003210 goto bail;
Jan Karaa90714c2008-10-09 19:38:40 +02003211 did_quota = 1;
3212
Tao Ma0eb8d472008-08-18 17:38:45 +08003213 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
3214 1, 0, parent_fe_bh, handle,
3215 data_ac, meta_ac, NULL);
Mark Fashehccd979b2005-12-15 14:31:24 -08003216 BUG_ON(status == -EAGAIN);
3217 if (status < 0) {
3218 mlog_errno(status);
3219 goto bail;
3220 }
3221 }
3222
Mark Fasheh8110b072007-03-22 16:53:23 -07003223 v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
3224 status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
Mark Fashehccd979b2005-12-15 14:31:24 -08003225 if (status < 0) {
3226 mlog_errno(status);
3227 goto bail;
3228 }
3229
3230 *new_bh = sb_getblk(sb, p_blkno);
3231 if (!*new_bh) {
3232 status = -EIO;
3233 mlog_errno(status);
3234 goto bail;
3235 }
3236 status = 0;
3237bail:
Jan Karaa90714c2008-10-09 19:38:40 +02003238 if (did_quota && status < 0)
Christoph Hellwig5dd40562010-03-03 09:05:00 -05003239 dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
Mark Fashehccd979b2005-12-15 14:31:24 -08003240 mlog_exit(status);
3241 return status;
3242}
3243
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003244/*
3245 * Assumes you already have a cluster lock on the directory.
3246 *
3247 * 'blocks_wanted' is only used if we have an inline directory which
3248 * is to be turned into an extent based one. The size of the dirent to
3249 * insert might be larger than the space gained by growing to just one
3250 * block, so we may have to grow the inode by two blocks in that case.
Mark Fashehe7c17e42009-01-29 18:17:46 -08003251 *
3252 * If the directory is already indexed, dx_root_bh must be provided.
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003253 */
Mark Fashehccd979b2005-12-15 14:31:24 -08003254static int ocfs2_extend_dir(struct ocfs2_super *osb,
3255 struct inode *dir,
3256 struct buffer_head *parent_fe_bh,
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003257 unsigned int blocks_wanted,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003258 struct ocfs2_dir_lookup_result *lookup,
Mark Fashehccd979b2005-12-15 14:31:24 -08003259 struct buffer_head **new_de_bh)
3260{
3261 int status = 0;
Joel Beckeree19a772007-03-28 18:27:07 -07003262 int credits, num_free_extents, drop_alloc_sem = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08003263 loff_t dir_i_size;
3264 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
Tao Ma811f9332008-08-18 17:38:43 +08003265 struct ocfs2_extent_list *el = &fe->id2.i_list;
Mark Fashehccd979b2005-12-15 14:31:24 -08003266 struct ocfs2_alloc_context *data_ac = NULL;
3267 struct ocfs2_alloc_context *meta_ac = NULL;
Mark Fasheh1fabe142006-10-09 18:11:45 -07003268 handle_t *handle = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003269 struct buffer_head *new_bh = NULL;
3270 struct ocfs2_dir_entry * de;
3271 struct super_block *sb = osb->sb;
Joel Beckerf99b9b72008-08-20 19:36:33 -07003272 struct ocfs2_extent_tree et;
Mark Fashehe7c17e42009-01-29 18:17:46 -08003273 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
Mark Fashehccd979b2005-12-15 14:31:24 -08003274
3275 mlog_entry_void();
3276
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003277 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
Mark Fashehe7c17e42009-01-29 18:17:46 -08003278 /*
3279 * This would be a code error as an inline directory should
3280 * never have an index root.
3281 */
3282 BUG_ON(dx_root_bh);
3283
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003284 status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003285 blocks_wanted, lookup,
3286 &new_bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003287 if (status) {
3288 mlog_errno(status);
3289 goto bail;
3290 }
3291
Mark Fashehe7c17e42009-01-29 18:17:46 -08003292 /* Expansion from inline to an indexed directory will
3293 * have given us this. */
3294 dx_root_bh = lookup->dl_dx_root_bh;
3295
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003296 if (blocks_wanted == 1) {
3297 /*
3298 * If the new dirent will fit inside the space
3299 * created by pushing out to one block, then
3300 * we can complete the operation
3301 * here. Otherwise we have to expand i_size
3302 * and format the 2nd block below.
3303 */
3304 BUG_ON(new_bh == NULL);
3305 goto bail_bh;
3306 }
3307
3308 /*
3309 * Get rid of 'new_bh' - we want to format the 2nd
3310 * data block and return that instead.
3311 */
3312 brelse(new_bh);
3313 new_bh = NULL;
3314
Jan Karaedd45c02009-06-02 14:24:03 +02003315 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3316 drop_alloc_sem = 1;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003317 dir_i_size = i_size_read(dir);
3318 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3319 goto do_extend;
3320 }
3321
Jan Karaedd45c02009-06-02 14:24:03 +02003322 down_write(&OCFS2_I(dir)->ip_alloc_sem);
3323 drop_alloc_sem = 1;
Mark Fashehccd979b2005-12-15 14:31:24 -08003324 dir_i_size = i_size_read(dir);
Mark Fashehb06970532006-03-03 10:24:33 -08003325 mlog(0, "extending dir %llu (i_size = %lld)\n",
3326 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
Mark Fashehccd979b2005-12-15 14:31:24 -08003327
Mark Fashehccd979b2005-12-15 14:31:24 -08003328 /* dir->i_size is always block aligned. */
3329 spin_lock(&OCFS2_I(dir)->ip_lock);
3330 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
3331 spin_unlock(&OCFS2_I(dir)->ip_lock);
Joel Becker5e404e92009-02-13 03:54:22 -08003332 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
3333 parent_fe_bh);
Joel Becker3d03a302009-02-12 17:49:26 -08003334 num_free_extents = ocfs2_num_free_extents(osb, &et);
Mark Fashehccd979b2005-12-15 14:31:24 -08003335 if (num_free_extents < 0) {
3336 status = num_free_extents;
3337 mlog_errno(status);
3338 goto bail;
3339 }
3340
3341 if (!num_free_extents) {
Tao Ma811f9332008-08-18 17:38:43 +08003342 status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
Mark Fashehccd979b2005-12-15 14:31:24 -08003343 if (status < 0) {
3344 if (status != -ENOSPC)
3345 mlog_errno(status);
3346 goto bail;
3347 }
3348 }
3349
Mark Fashehda5cbf22006-10-06 18:34:35 -07003350 status = ocfs2_reserve_clusters(osb, 1, &data_ac);
Mark Fashehccd979b2005-12-15 14:31:24 -08003351 if (status < 0) {
3352 if (status != -ENOSPC)
3353 mlog_errno(status);
3354 goto bail;
3355 }
3356
Mark Fasheh83f92312010-04-05 18:17:16 -07003357 if (ocfs2_dir_resv_allowed(osb))
3358 data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
Mark Fashehe3b4a972009-12-07 13:16:07 -08003359
Tao Ma811f9332008-08-18 17:38:43 +08003360 credits = ocfs2_calc_extend_credits(sb, el, 1);
Mark Fashehccd979b2005-12-15 14:31:24 -08003361 } else {
3362 spin_unlock(&OCFS2_I(dir)->ip_lock);
3363 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3364 }
3365
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003366do_extend:
Mark Fashehe7c17e42009-01-29 18:17:46 -08003367 if (ocfs2_dir_indexed(dir))
3368 credits++; /* For attaching the new dirent block to the
3369 * dx_root */
3370
Mark Fasheh65eff9c2006-10-09 17:26:22 -07003371 handle = ocfs2_start_trans(osb, credits);
Mark Fashehccd979b2005-12-15 14:31:24 -08003372 if (IS_ERR(handle)) {
3373 status = PTR_ERR(handle);
3374 handle = NULL;
3375 mlog_errno(status);
3376 goto bail;
3377 }
3378
3379 status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
3380 data_ac, meta_ac, &new_bh);
3381 if (status < 0) {
3382 mlog_errno(status);
3383 goto bail;
3384 }
3385
Joel Becker8cb471e2009-02-10 20:00:41 -08003386 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08003387
Joel Becker0cf2f762009-02-12 16:41:25 -08003388 status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
Joel Becker13723d02008-10-17 19:25:01 -07003389 OCFS2_JOURNAL_ACCESS_CREATE);
Mark Fashehccd979b2005-12-15 14:31:24 -08003390 if (status < 0) {
3391 mlog_errno(status);
3392 goto bail;
3393 }
3394 memset(new_bh->b_data, 0, sb->s_blocksize);
Mark Fasheh87d35a72008-12-10 17:36:25 -08003395
Mark Fashehccd979b2005-12-15 14:31:24 -08003396 de = (struct ocfs2_dir_entry *) new_bh->b_data;
3397 de->inode = 0;
Mark Fashehe7c17e42009-01-29 18:17:46 -08003398 if (ocfs2_supports_dir_trailer(dir)) {
Mark Fasheh87d35a72008-12-10 17:36:25 -08003399 de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
Mark Fashehe7c17e42009-01-29 18:17:46 -08003400
3401 ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len));
3402
3403 if (ocfs2_dir_indexed(dir)) {
3404 status = ocfs2_dx_dir_link_trailer(dir, handle,
3405 dx_root_bh, new_bh);
3406 if (status) {
3407 mlog_errno(status);
3408 goto bail;
3409 }
3410 }
Mark Fasheh87d35a72008-12-10 17:36:25 -08003411 } else {
3412 de->rec_len = cpu_to_le16(sb->s_blocksize);
3413 }
Joel Beckerec20cec2010-03-19 14:13:52 -07003414 ocfs2_journal_dirty(handle, new_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08003415
3416 dir_i_size += dir->i_sb->s_blocksize;
3417 i_size_write(dir, dir_i_size);
Mark Fasheh8110b072007-03-22 16:53:23 -07003418 dir->i_blocks = ocfs2_inode_sector_count(dir);
Mark Fashehccd979b2005-12-15 14:31:24 -08003419 status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
3420 if (status < 0) {
3421 mlog_errno(status);
3422 goto bail;
3423 }
3424
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003425bail_bh:
Mark Fashehccd979b2005-12-15 14:31:24 -08003426 *new_de_bh = new_bh;
3427 get_bh(*new_de_bh);
3428bail:
3429 if (handle)
Mark Fasheh02dc1af2006-10-09 16:48:10 -07003430 ocfs2_commit_trans(osb, handle);
Jan Karaedd45c02009-06-02 14:24:03 +02003431 if (drop_alloc_sem)
3432 up_write(&OCFS2_I(dir)->ip_alloc_sem);
Mark Fashehccd979b2005-12-15 14:31:24 -08003433
3434 if (data_ac)
3435 ocfs2_free_alloc_context(data_ac);
3436 if (meta_ac)
3437 ocfs2_free_alloc_context(meta_ac);
3438
Mark Fasheha81cb882008-10-07 14:25:16 -07003439 brelse(new_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08003440
3441 mlog_exit(status);
3442 return status;
3443}
3444
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003445static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
3446 const char *name, int namelen,
3447 struct buffer_head **ret_de_bh,
3448 unsigned int *blocks_wanted)
3449{
3450 int ret;
Mark Fasheh87d35a72008-12-10 17:36:25 -08003451 struct super_block *sb = dir->i_sb;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003452 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3453 struct ocfs2_dir_entry *de, *last_de = NULL;
3454 char *de_buf, *limit;
3455 unsigned long offset = 0;
Mark Fasheh87d35a72008-12-10 17:36:25 -08003456 unsigned int rec_len, new_rec_len, free_space = dir->i_sb->s_blocksize;
3457
3458 /*
3459 * This calculates how many free bytes we'd have in block zero, should
3460 * this function force expansion to an extent tree.
3461 */
Mark Fashehe7c17e42009-01-29 18:17:46 -08003462 if (ocfs2_new_dir_wants_trailer(dir))
Mark Fasheh87d35a72008-12-10 17:36:25 -08003463 free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
3464 else
3465 free_space = dir->i_sb->s_blocksize - i_size_read(dir);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003466
3467 de_buf = di->id2.i_data.id_data;
3468 limit = de_buf + i_size_read(dir);
3469 rec_len = OCFS2_DIR_REC_LEN(namelen);
3470
3471 while (de_buf < limit) {
3472 de = (struct ocfs2_dir_entry *)de_buf;
3473
3474 if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
3475 ret = -ENOENT;
3476 goto out;
3477 }
3478 if (ocfs2_match(namelen, name, de)) {
3479 ret = -EEXIST;
3480 goto out;
3481 }
Mark Fasheh87d35a72008-12-10 17:36:25 -08003482 /*
3483 * No need to check for a trailing dirent record here as
3484 * they're not used for inline dirs.
3485 */
3486
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003487 if (ocfs2_dirent_would_fit(de, rec_len)) {
3488 /* Ok, we found a spot. Return this bh and let
3489 * the caller actually fill it in. */
3490 *ret_de_bh = di_bh;
3491 get_bh(*ret_de_bh);
3492 ret = 0;
3493 goto out;
3494 }
3495
3496 last_de = de;
3497 de_buf += le16_to_cpu(de->rec_len);
3498 offset += le16_to_cpu(de->rec_len);
3499 }
3500
3501 /*
3502 * We're going to require expansion of the directory - figure
3503 * out how many blocks we'll need so that a place for the
3504 * dirent can be found.
3505 */
3506 *blocks_wanted = 1;
Mark Fasheh87d35a72008-12-10 17:36:25 -08003507 new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003508 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
3509 *blocks_wanted = 2;
3510
3511 ret = -ENOSPC;
3512out:
3513 return ret;
3514}
3515
3516static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
3517 int namelen, struct buffer_head **ret_de_bh)
Mark Fashehccd979b2005-12-15 14:31:24 -08003518{
3519 unsigned long offset;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003520 struct buffer_head *bh = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -08003521 unsigned short rec_len;
Mark Fashehccd979b2005-12-15 14:31:24 -08003522 struct ocfs2_dir_entry *de;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003523 struct super_block *sb = dir->i_sb;
Mark Fashehccd979b2005-12-15 14:31:24 -08003524 int status;
Mark Fasheh87d35a72008-12-10 17:36:25 -08003525 int blocksize = dir->i_sb->s_blocksize;
Mark Fashehccd979b2005-12-15 14:31:24 -08003526
Joel Beckera22305c2008-11-13 14:49:17 -08003527 status = ocfs2_read_dir_block(dir, 0, &bh, 0);
3528 if (status) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003529 mlog_errno(status);
3530 goto bail;
3531 }
3532
3533 rec_len = OCFS2_DIR_REC_LEN(namelen);
3534 offset = 0;
3535 de = (struct ocfs2_dir_entry *) bh->b_data;
3536 while (1) {
3537 if ((char *)de >= sb->s_blocksize + bh->b_data) {
3538 brelse(bh);
3539 bh = NULL;
3540
3541 if (i_size_read(dir) <= offset) {
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003542 /*
3543 * Caller will have to expand this
3544 * directory.
3545 */
3546 status = -ENOSPC;
Mark Fashehccd979b2005-12-15 14:31:24 -08003547 goto bail;
3548 }
Joel Beckera22305c2008-11-13 14:49:17 -08003549 status = ocfs2_read_dir_block(dir,
3550 offset >> sb->s_blocksize_bits,
3551 &bh, 0);
3552 if (status) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003553 mlog_errno(status);
3554 goto bail;
3555 }
3556 /* move to next block */
3557 de = (struct ocfs2_dir_entry *) bh->b_data;
3558 }
3559 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
3560 status = -ENOENT;
3561 goto bail;
3562 }
3563 if (ocfs2_match(namelen, name, de)) {
3564 status = -EEXIST;
3565 goto bail;
3566 }
Mark Fasheh87d35a72008-12-10 17:36:25 -08003567
3568 if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
3569 blocksize))
3570 goto next;
3571
Mark Fasheh8553cf42007-09-13 16:29:01 -07003572 if (ocfs2_dirent_would_fit(de, rec_len)) {
Mark Fashehccd979b2005-12-15 14:31:24 -08003573 /* Ok, we found a spot. Return this bh and let
3574 * the caller actually fill it in. */
3575 *ret_de_bh = bh;
3576 get_bh(*ret_de_bh);
3577 status = 0;
3578 goto bail;
3579 }
Mark Fasheh87d35a72008-12-10 17:36:25 -08003580next:
Mark Fashehccd979b2005-12-15 14:31:24 -08003581 offset += le16_to_cpu(de->rec_len);
3582 de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
3583 }
3584
3585 status = 0;
3586bail:
Mark Fasheha81cb882008-10-07 14:25:16 -07003587 brelse(bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08003588
3589 mlog_exit(status);
3590 return status;
3591}
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07003592
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003593static int dx_leaf_sort_cmp(const void *a, const void *b)
3594{
3595 const struct ocfs2_dx_entry *entry1 = a;
3596 const struct ocfs2_dx_entry *entry2 = b;
3597 u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash);
3598 u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash);
3599 u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash);
3600 u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash);
3601
3602 if (major_hash1 > major_hash2)
3603 return 1;
3604 if (major_hash1 < major_hash2)
3605 return -1;
3606
3607 /*
3608 * It is not strictly necessary to sort by minor
3609 */
3610 if (minor_hash1 > minor_hash2)
3611 return 1;
3612 if (minor_hash1 < minor_hash2)
3613 return -1;
3614 return 0;
3615}
3616
3617static void dx_leaf_sort_swap(void *a, void *b, int size)
3618{
3619 struct ocfs2_dx_entry *entry1 = a;
3620 struct ocfs2_dx_entry *entry2 = b;
3621 struct ocfs2_dx_entry tmp;
3622
3623 BUG_ON(size != sizeof(*entry1));
3624
3625 tmp = *entry1;
3626 *entry1 = *entry2;
3627 *entry2 = tmp;
3628}
3629
3630static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
3631{
3632 struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
3633 int i, num = le16_to_cpu(dl_list->de_num_used);
3634
3635 for (i = 0; i < (num - 1); i++) {
3636 if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) !=
3637 le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash))
3638 return 0;
3639 }
3640
3641 return 1;
3642}
3643
3644/*
3645 * Find the optimal value to split this leaf on. This expects the leaf
3646 * entries to be in sorted order.
3647 *
3648 * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
3649 * the hash we want to insert.
3650 *
3651 * This function is only concerned with the major hash - that which
3652 * determines which cluster an item belongs to.
3653 */
3654static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
3655 u32 leaf_cpos, u32 insert_hash,
3656 u32 *split_hash)
3657{
3658 struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
3659 int i, num_used = le16_to_cpu(dl_list->de_num_used);
3660 int allsame;
3661
3662 /*
3663 * There's a couple rare, but nasty corner cases we have to
3664 * check for here. All of them involve a leaf where all value
3665 * have the same hash, which is what we look for first.
3666 *
3667 * Most of the time, all of the above is false, and we simply
3668 * pick the median value for a split.
3669 */
3670 allsame = ocfs2_dx_leaf_same_major(dx_leaf);
3671 if (allsame) {
3672 u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash);
3673
3674 if (val == insert_hash) {
3675 /*
3676 * No matter where we would choose to split,
3677 * the new entry would want to occupy the same
3678 * block as these. Since there's no space left
3679 * in their existing block, we know there
3680 * won't be space after the split.
3681 */
3682 return -ENOSPC;
3683 }
3684
3685 if (val == leaf_cpos) {
3686 /*
3687 * Because val is the same as leaf_cpos (which
3688 * is the smallest value this leaf can have),
3689 * yet is not equal to insert_hash, then we
3690 * know that insert_hash *must* be larger than
3691 * val (and leaf_cpos). At least cpos+1 in value.
3692 *
3693 * We also know then, that there cannot be an
3694 * adjacent extent (otherwise we'd be looking
3695 * at it). Choosing this value gives us a
3696 * chance to get some contiguousness.
3697 */
3698 *split_hash = leaf_cpos + 1;
3699 return 0;
3700 }
3701
3702 if (val > insert_hash) {
3703 /*
3704 * val can not be the same as insert hash, and
3705 * also must be larger than leaf_cpos. Also,
3706 * we know that there can't be a leaf between
3707 * cpos and val, otherwise the entries with
3708 * hash 'val' would be there.
3709 */
3710 *split_hash = val;
3711 return 0;
3712 }
3713
3714 *split_hash = insert_hash;
3715 return 0;
3716 }
3717
3718 /*
3719 * Since the records are sorted and the checks above
3720 * guaranteed that not all records in this block are the same,
3721 * we simple travel forward, from the median, and pick the 1st
3722 * record whose value is larger than leaf_cpos.
3723 */
3724 for (i = (num_used / 2); i < num_used; i++)
3725 if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) >
3726 leaf_cpos)
3727 break;
3728
3729 BUG_ON(i == num_used); /* Should be impossible */
3730 *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash);
3731 return 0;
3732}
3733
3734/*
3735 * Transfer all entries in orig_dx_leaves whose major hash is equal to or
3736 * larger than split_hash into new_dx_leaves. We use a temporary
3737 * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks.
3738 *
3739 * Since the block offset inside a leaf (cluster) is a constant mask
3740 * of minor_hash, we can optimize - an item at block offset X within
3741 * the original cluster, will be at offset X within the new cluster.
3742 */
3743static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
3744 handle_t *handle,
3745 struct ocfs2_dx_leaf *tmp_dx_leaf,
3746 struct buffer_head **orig_dx_leaves,
3747 struct buffer_head **new_dx_leaves,
3748 int num_dx_leaves)
3749{
3750 int i, j, num_used;
3751 u32 major_hash;
3752 struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
3753 struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list;
3754 struct ocfs2_dx_entry *dx_entry;
3755
3756 tmp_list = &tmp_dx_leaf->dl_list;
3757
3758 for (i = 0; i < num_dx_leaves; i++) {
3759 orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
3760 orig_list = &orig_dx_leaf->dl_list;
3761 new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
3762 new_list = &new_dx_leaf->dl_list;
3763
3764 num_used = le16_to_cpu(orig_list->de_num_used);
3765
3766 memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize);
3767 tmp_list->de_num_used = cpu_to_le16(0);
3768 memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used);
3769
3770 for (j = 0; j < num_used; j++) {
3771 dx_entry = &orig_list->de_entries[j];
3772 major_hash = le32_to_cpu(dx_entry->dx_major_hash);
3773 if (major_hash >= split_hash)
3774 ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf,
3775 dx_entry);
3776 else
3777 ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf,
3778 dx_entry);
3779 }
3780 memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize);
3781
3782 ocfs2_journal_dirty(handle, orig_dx_leaves[i]);
3783 ocfs2_journal_dirty(handle, new_dx_leaves[i]);
3784 }
3785}
3786
3787static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
3788 struct ocfs2_dx_root_block *dx_root)
3789{
3790 int credits = ocfs2_clusters_to_blocks(osb->sb, 2);
3791
3792 credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list, 1);
3793 credits += ocfs2_quota_trans_credits(osb->sb);
3794 return credits;
3795}
3796
3797/*
3798 * Find the median value in dx_leaf_bh and allocate a new leaf to move
3799 * half our entries into.
3800 */
3801static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3802 struct buffer_head *dx_root_bh,
3803 struct buffer_head *dx_leaf_bh,
3804 struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos,
3805 u64 leaf_blkno)
3806{
3807 struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3808 int credits, ret, i, num_used, did_quota = 0;
3809 u32 cpos, split_hash, insert_hash = hinfo->major_hash;
3810 u64 orig_leaves_start;
3811 int num_dx_leaves;
3812 struct buffer_head **orig_dx_leaves = NULL;
3813 struct buffer_head **new_dx_leaves = NULL;
3814 struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL;
3815 struct ocfs2_extent_tree et;
3816 handle_t *handle = NULL;
3817 struct ocfs2_dx_root_block *dx_root;
3818 struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
3819
3820 mlog(0, "DX Dir: %llu, rebalance leaf leaf_blkno: %llu insert: %u\n",
3821 (unsigned long long)OCFS2_I(dir)->ip_blkno,
3822 (unsigned long long)leaf_blkno, insert_hash);
3823
Joel Becker5e404e92009-02-13 03:54:22 -08003824 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003825
3826 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3827 /*
3828 * XXX: This is a rather large limit. We should use a more
3829 * realistic value.
3830 */
3831 if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX)
3832 return -ENOSPC;
3833
3834 num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used);
3835 if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) {
3836 mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: "
3837 "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno,
3838 (unsigned long long)leaf_blkno, num_used);
3839 ret = -EIO;
3840 goto out;
3841 }
3842
3843 orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
3844 if (!orig_dx_leaves) {
3845 ret = -ENOMEM;
3846 mlog_errno(ret);
3847 goto out;
3848 }
3849
3850 new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL);
3851 if (!new_dx_leaves) {
3852 ret = -ENOMEM;
3853 mlog_errno(ret);
3854 goto out;
3855 }
3856
3857 ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac);
3858 if (ret) {
3859 if (ret != -ENOSPC)
3860 mlog_errno(ret);
3861 goto out;
3862 }
3863
3864 credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root);
3865 handle = ocfs2_start_trans(osb, credits);
3866 if (IS_ERR(handle)) {
3867 ret = PTR_ERR(handle);
3868 handle = NULL;
3869 mlog_errno(ret);
3870 goto out;
3871 }
3872
Christoph Hellwig5dd40562010-03-03 09:05:00 -05003873 ret = dquot_alloc_space_nodirty(dir,
3874 ocfs2_clusters_to_bytes(dir->i_sb, 1));
3875 if (ret)
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003876 goto out_commit;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003877 did_quota = 1;
3878
Joel Becker0cf2f762009-02-12 16:41:25 -08003879 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003880 OCFS2_JOURNAL_ACCESS_WRITE);
3881 if (ret) {
3882 mlog_errno(ret);
3883 goto out_commit;
3884 }
3885
3886 /*
3887 * This block is changing anyway, so we can sort it in place.
3888 */
3889 sort(dx_leaf->dl_list.de_entries, num_used,
3890 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
3891 dx_leaf_sort_swap);
3892
Joel Beckerec20cec2010-03-19 14:13:52 -07003893 ocfs2_journal_dirty(handle, dx_leaf_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003894
3895 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
3896 &split_hash);
3897 if (ret) {
3898 mlog_errno(ret);
3899 goto out_commit;
3900 }
3901
3902 mlog(0, "Split leaf (%u) at %u, insert major hash is %u\n",
3903 leaf_cpos, split_hash, insert_hash);
3904
3905 /*
3906 * We have to carefully order operations here. There are items
3907 * which want to be in the new cluster before insert, but in
3908 * order to put those items in the new cluster, we alter the
3909 * old cluster. A failure to insert gets nasty.
3910 *
3911 * So, start by reserving writes to the old
3912 * cluster. ocfs2_dx_dir_new_cluster will reserve writes on
3913 * the new cluster for us, before inserting it. The insert
3914 * won't happen if there's an error before that. Once the
3915 * insert is done then, we can transfer from one leaf into the
3916 * other without fear of hitting any error.
3917 */
3918
3919 /*
3920 * The leaf transfer wants some scratch space so that we don't
3921 * wind up doing a bunch of expensive memmove().
3922 */
3923 tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
3924 if (!tmp_dx_leaf) {
3925 ret = -ENOMEM;
3926 mlog_errno(ret);
3927 goto out_commit;
3928 }
3929
Mark Fasheh1d46dc02009-02-19 13:17:05 -08003930 orig_leaves_start = ocfs2_block_to_cluster_start(dir->i_sb, leaf_blkno);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003931 ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves,
3932 orig_dx_leaves);
3933 if (ret) {
3934 mlog_errno(ret);
3935 goto out_commit;
3936 }
3937
Tristan Ye0f4da212010-09-08 17:12:38 +08003938 cpos = split_hash;
3939 ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
3940 data_ac, meta_ac, new_dx_leaves,
3941 num_dx_leaves);
3942 if (ret) {
3943 mlog_errno(ret);
3944 goto out_commit;
3945 }
3946
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003947 for (i = 0; i < num_dx_leaves; i++) {
Joel Becker0cf2f762009-02-12 16:41:25 -08003948 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3949 orig_dx_leaves[i],
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003950 OCFS2_JOURNAL_ACCESS_WRITE);
3951 if (ret) {
3952 mlog_errno(ret);
3953 goto out_commit;
3954 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003955
Tristan Ye0f4da212010-09-08 17:12:38 +08003956 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3957 new_dx_leaves[i],
3958 OCFS2_JOURNAL_ACCESS_WRITE);
3959 if (ret) {
3960 mlog_errno(ret);
3961 goto out_commit;
3962 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003963 }
3964
3965 ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
3966 orig_dx_leaves, new_dx_leaves, num_dx_leaves);
3967
3968out_commit:
3969 if (ret < 0 && did_quota)
Christoph Hellwig5dd40562010-03-03 09:05:00 -05003970 dquot_free_space_nodirty(dir,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08003971 ocfs2_clusters_to_bytes(dir->i_sb, 1));
3972
3973 ocfs2_commit_trans(osb, handle);
3974
3975out:
3976 if (orig_dx_leaves || new_dx_leaves) {
3977 for (i = 0; i < num_dx_leaves; i++) {
3978 if (orig_dx_leaves)
3979 brelse(orig_dx_leaves[i]);
3980 if (new_dx_leaves)
3981 brelse(new_dx_leaves[i]);
3982 }
3983 kfree(orig_dx_leaves);
3984 kfree(new_dx_leaves);
3985 }
3986
3987 if (meta_ac)
3988 ocfs2_free_alloc_context(meta_ac);
3989 if (data_ac)
3990 ocfs2_free_alloc_context(data_ac);
3991
3992 kfree(tmp_dx_leaf);
3993 return ret;
3994}
3995
Mark Fashehe7c17e42009-01-29 18:17:46 -08003996static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
3997 struct buffer_head *di_bh,
3998 struct buffer_head *dx_root_bh,
3999 const char *name, int namelen,
4000 struct ocfs2_dir_lookup_result *lookup)
4001{
4002 int ret, rebalanced = 0;
4003 struct ocfs2_dx_root_block *dx_root;
4004 struct buffer_head *dx_leaf_bh = NULL;
4005 struct ocfs2_dx_leaf *dx_leaf;
4006 u64 blkno;
4007 u32 leaf_cpos;
4008
4009 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4010
4011restart_search:
4012 ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
4013 &leaf_cpos, &blkno);
4014 if (ret) {
4015 mlog_errno(ret);
4016 goto out;
4017 }
4018
4019 ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh);
4020 if (ret) {
4021 mlog_errno(ret);
4022 goto out;
4023 }
4024
4025 dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
4026
4027 if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
4028 le16_to_cpu(dx_leaf->dl_list.de_count)) {
4029 if (rebalanced) {
4030 /*
4031 * Rebalancing should have provided us with
4032 * space in an appropriate leaf.
4033 *
4034 * XXX: Is this an abnormal condition then?
4035 * Should we print a message here?
4036 */
4037 ret = -ENOSPC;
4038 goto out;
4039 }
4040
4041 ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
4042 &lookup->dl_hinfo, leaf_cpos,
4043 blkno);
4044 if (ret) {
4045 if (ret != -ENOSPC)
4046 mlog_errno(ret);
4047 goto out;
4048 }
4049
4050 /*
4051 * Restart the lookup. The rebalance might have
4052 * changed which block our item fits into. Mark our
4053 * progress, so we only execute this once.
4054 */
4055 brelse(dx_leaf_bh);
4056 dx_leaf_bh = NULL;
4057 rebalanced = 1;
4058 goto restart_search;
4059 }
4060
4061 lookup->dl_dx_leaf_bh = dx_leaf_bh;
4062 dx_leaf_bh = NULL;
4063
4064out:
4065 brelse(dx_leaf_bh);
4066 return ret;
4067}
4068
4069static int ocfs2_search_dx_free_list(struct inode *dir,
4070 struct buffer_head *dx_root_bh,
4071 int namelen,
4072 struct ocfs2_dir_lookup_result *lookup)
4073{
4074 int ret = -ENOSPC;
4075 struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL;
4076 struct ocfs2_dir_block_trailer *db;
4077 u64 next_block;
4078 int rec_len = OCFS2_DIR_REC_LEN(namelen);
4079 struct ocfs2_dx_root_block *dx_root;
4080
4081 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4082 next_block = le64_to_cpu(dx_root->dr_free_blk);
4083
4084 while (next_block) {
4085 brelse(prev_leaf_bh);
4086 prev_leaf_bh = leaf_bh;
4087 leaf_bh = NULL;
4088
4089 ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh);
4090 if (ret) {
4091 mlog_errno(ret);
4092 goto out;
4093 }
4094
4095 db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
4096 if (rec_len <= le16_to_cpu(db->db_free_rec_len)) {
4097 lookup->dl_leaf_bh = leaf_bh;
4098 lookup->dl_prev_leaf_bh = prev_leaf_bh;
4099 leaf_bh = NULL;
4100 prev_leaf_bh = NULL;
4101 break;
4102 }
4103
4104 next_block = le64_to_cpu(db->db_free_next);
4105 }
4106
4107 if (!next_block)
4108 ret = -ENOSPC;
4109
4110out:
4111
4112 brelse(leaf_bh);
4113 brelse(prev_leaf_bh);
4114 return ret;
4115}
4116
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004117static int ocfs2_expand_inline_dx_root(struct inode *dir,
4118 struct buffer_head *dx_root_bh)
4119{
4120 int ret, num_dx_leaves, i, j, did_quota = 0;
4121 struct buffer_head **dx_leaves = NULL;
4122 struct ocfs2_extent_tree et;
4123 u64 insert_blkno;
4124 struct ocfs2_alloc_context *data_ac = NULL;
4125 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4126 handle_t *handle = NULL;
4127 struct ocfs2_dx_root_block *dx_root;
4128 struct ocfs2_dx_entry_list *entry_list;
4129 struct ocfs2_dx_entry *dx_entry;
4130 struct ocfs2_dx_leaf *target_leaf;
4131
4132 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
4133 if (ret) {
4134 mlog_errno(ret);
4135 goto out;
4136 }
4137
4138 dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
4139 if (!dx_leaves) {
4140 ret = -ENOMEM;
4141 mlog_errno(ret);
4142 goto out;
4143 }
4144
4145 handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
4146 if (IS_ERR(handle)) {
4147 ret = PTR_ERR(handle);
4148 mlog_errno(ret);
4149 goto out;
4150 }
4151
Christoph Hellwig5dd40562010-03-03 09:05:00 -05004152 ret = dquot_alloc_space_nodirty(dir,
4153 ocfs2_clusters_to_bytes(osb->sb, 1));
4154 if (ret)
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004155 goto out_commit;
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004156 did_quota = 1;
4157
4158 /*
4159 * We do this up front, before the allocation, so that a
4160 * failure to add the dx_root_bh to the journal won't result
4161 * us losing clusters.
4162 */
Joel Becker0cf2f762009-02-12 16:41:25 -08004163 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004164 OCFS2_JOURNAL_ACCESS_WRITE);
4165 if (ret) {
4166 mlog_errno(ret);
4167 goto out_commit;
4168 }
4169
4170 ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
4171 num_dx_leaves, &insert_blkno);
4172 if (ret) {
4173 mlog_errno(ret);
4174 goto out_commit;
4175 }
4176
4177 /*
4178 * Transfer the entries from our dx_root into the appropriate
4179 * block
4180 */
4181 dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4182 entry_list = &dx_root->dr_entries;
4183
4184 for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
4185 dx_entry = &entry_list->de_entries[i];
4186
4187 j = __ocfs2_dx_dir_hash_idx(osb,
4188 le32_to_cpu(dx_entry->dx_minor_hash));
4189 target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
4190
4191 ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
4192
4193 /* Each leaf has been passed to the journal already
4194 * via __ocfs2_dx_dir_new_cluster() */
4195 }
4196
4197 dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
4198 memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
4199 offsetof(struct ocfs2_dx_root_block, dr_list));
4200 dx_root->dr_list.l_count =
4201 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
4202
4203 /* This should never fail considering we start with an empty
4204 * dx_root. */
Joel Becker5e404e92009-02-13 03:54:22 -08004205 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
Joel Beckercc79d8c2009-02-13 03:24:43 -08004206 ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004207 if (ret)
4208 mlog_errno(ret);
4209 did_quota = 0;
4210
4211 ocfs2_journal_dirty(handle, dx_root_bh);
4212
4213out_commit:
4214 if (ret < 0 && did_quota)
Christoph Hellwig5dd40562010-03-03 09:05:00 -05004215 dquot_free_space_nodirty(dir,
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004216 ocfs2_clusters_to_bytes(dir->i_sb, 1));
4217
4218 ocfs2_commit_trans(osb, handle);
4219
4220out:
4221 if (data_ac)
4222 ocfs2_free_alloc_context(data_ac);
4223
4224 if (dx_leaves) {
4225 for (i = 0; i < num_dx_leaves; i++)
4226 brelse(dx_leaves[i]);
4227 kfree(dx_leaves);
4228 }
4229 return ret;
4230}
4231
4232static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
4233{
4234 struct ocfs2_dx_root_block *dx_root;
4235 struct ocfs2_dx_entry_list *entry_list;
4236
4237 dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4238 entry_list = &dx_root->dr_entries;
4239
4240 if (le16_to_cpu(entry_list->de_num_used) >=
4241 le16_to_cpu(entry_list->de_count))
4242 return -ENOSPC;
4243
4244 return 0;
4245}
4246
Mark Fashehe7c17e42009-01-29 18:17:46 -08004247static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
4248 struct buffer_head *di_bh,
4249 const char *name,
4250 int namelen,
4251 struct ocfs2_dir_lookup_result *lookup)
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004252{
Mark Fashehe7c17e42009-01-29 18:17:46 -08004253 int ret, free_dx_root = 1;
4254 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004255 struct buffer_head *dx_root_bh = NULL;
Mark Fashehe7c17e42009-01-29 18:17:46 -08004256 struct buffer_head *leaf_bh = NULL;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004257 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
Mark Fashehe7c17e42009-01-29 18:17:46 -08004258 struct ocfs2_dx_root_block *dx_root;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004259
4260 ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
4261 if (ret) {
4262 mlog_errno(ret);
4263 goto out;
4264 }
4265
4266 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
Mark Fashehe3a93c22009-02-17 15:29:35 -08004267 if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) {
4268 ret = -ENOSPC;
4269 mlog_errno(ret);
4270 goto out;
4271 }
4272
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004273 if (ocfs2_dx_root_inline(dx_root)) {
4274 ret = ocfs2_inline_dx_has_space(dx_root_bh);
4275
4276 if (ret == 0)
4277 goto search_el;
4278
4279 /*
4280 * We ran out of room in the root block. Expand it to
4281 * an extent, then allow ocfs2_find_dir_space_dx to do
4282 * the rest.
4283 */
4284 ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
4285 if (ret) {
4286 mlog_errno(ret);
4287 goto out;
4288 }
4289 }
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004290
Mark Fashehe7c17e42009-01-29 18:17:46 -08004291 /*
4292 * Insert preparation for an indexed directory is split into two
4293 * steps. The call to find_dir_space_dx reserves room in the index for
4294 * an additional item. If we run out of space there, it's a real error
4295 * we can't continue on.
4296 */
4297 ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name,
4298 namelen, lookup);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004299 if (ret) {
4300 mlog_errno(ret);
4301 goto out;
4302 }
4303
Mark Fashehe7c17e42009-01-29 18:17:46 -08004304search_el:
4305 /*
4306 * Next, we need to find space in the unindexed tree. This call
4307 * searches using the free space linked list. If the unindexed tree
4308 * lacks sufficient space, we'll expand it below. The expansion code
4309 * is smart enough to add any new blocks to the free space list.
4310 */
4311 ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup);
4312 if (ret && ret != -ENOSPC) {
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004313 mlog_errno(ret);
4314 goto out;
4315 }
4316
Mark Fashehe7c17e42009-01-29 18:17:46 -08004317 /* Do this up here - ocfs2_extend_dir might need the dx_root */
4318 lookup->dl_dx_root_bh = dx_root_bh;
4319 free_dx_root = 0;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004320
Mark Fashehe7c17e42009-01-29 18:17:46 -08004321 if (ret == -ENOSPC) {
4322 ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004323
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004324 if (ret) {
Mark Fashehe7c17e42009-01-29 18:17:46 -08004325 mlog_errno(ret);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004326 goto out;
4327 }
4328
4329 /*
Mark Fashehe7c17e42009-01-29 18:17:46 -08004330 * We make the assumption here that new leaf blocks are added
4331 * to the front of our free list.
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004332 */
Mark Fashehe7c17e42009-01-29 18:17:46 -08004333 lookup->dl_prev_leaf_bh = NULL;
4334 lookup->dl_leaf_bh = leaf_bh;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004335 }
4336
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004337out:
Mark Fashehe7c17e42009-01-29 18:17:46 -08004338 if (free_dx_root)
4339 brelse(dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004340 return ret;
4341}
4342
Mark Fasheh4a12ca32008-11-12 15:43:34 -08004343/*
4344 * Get a directory ready for insert. Any directory allocation required
4345 * happens here. Success returns zero, and enough context in the dir
4346 * lookup result that ocfs2_add_entry() will be able complete the task
4347 * with minimal performance impact.
4348 */
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004349int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
4350 struct inode *dir,
4351 struct buffer_head *parent_fe_bh,
4352 const char *name,
4353 int namelen,
Mark Fasheh4a12ca32008-11-12 15:43:34 -08004354 struct ocfs2_dir_lookup_result *lookup)
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004355{
4356 int ret;
4357 unsigned int blocks_wanted = 1;
4358 struct buffer_head *bh = NULL;
4359
4360 mlog(0, "getting ready to insert namelen %d into dir %llu\n",
4361 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
4362
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004363 if (!namelen) {
4364 ret = -EINVAL;
4365 mlog_errno(ret);
4366 goto out;
4367 }
4368
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004369 /*
4370 * Do this up front to reduce confusion.
4371 *
4372 * The directory might start inline, then be turned into an
4373 * indexed one, in which case we'd need to hash deep inside
4374 * ocfs2_find_dir_space_id(). Since
4375 * ocfs2_prepare_dx_dir_for_insert() also needs this hash
4376 * done, there seems no point in spreading out the calls. We
4377 * can optimize away the case where the file system doesn't
4378 * support indexing.
4379 */
4380 if (ocfs2_supports_indexed_dirs(osb))
4381 ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo);
4382
4383 if (ocfs2_dir_indexed(dir)) {
Mark Fashehe7c17e42009-01-29 18:17:46 -08004384 ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh,
4385 name, namelen, lookup);
4386 if (ret)
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004387 mlog_errno(ret);
Mark Fashehe7c17e42009-01-29 18:17:46 -08004388 goto out;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004389 }
4390
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004391 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
4392 ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
4393 namelen, &bh, &blocks_wanted);
4394 } else
4395 ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
4396
4397 if (ret && ret != -ENOSPC) {
4398 mlog_errno(ret);
4399 goto out;
4400 }
4401
4402 if (ret == -ENOSPC) {
4403 /*
4404 * We have to expand the directory to add this name.
4405 */
4406 BUG_ON(bh);
4407
4408 ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004409 lookup, &bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004410 if (ret) {
4411 if (ret != -ENOSPC)
4412 mlog_errno(ret);
4413 goto out;
4414 }
4415
4416 BUG_ON(!bh);
4417 }
4418
Mark Fasheh4a12ca32008-11-12 15:43:34 -08004419 lookup->dl_leaf_bh = bh;
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004420 bh = NULL;
4421out:
Mark Fasheha81cb882008-10-07 14:25:16 -07004422 brelse(bh);
Mark Fasheh5b6a3a22007-09-13 16:33:54 -07004423 return ret;
4424}
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004425
4426static int ocfs2_dx_dir_remove_index(struct inode *dir,
4427 struct buffer_head *di_bh,
4428 struct buffer_head *dx_root_bh)
4429{
4430 int ret;
4431 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4432 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4433 struct ocfs2_dx_root_block *dx_root;
4434 struct inode *dx_alloc_inode = NULL;
4435 struct buffer_head *dx_alloc_bh = NULL;
4436 handle_t *handle;
4437 u64 blk;
4438 u16 bit;
4439 u64 bg_blkno;
4440
4441 dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
4442
4443 dx_alloc_inode = ocfs2_get_system_file_inode(osb,
4444 EXTENT_ALLOC_SYSTEM_INODE,
4445 le16_to_cpu(dx_root->dr_suballoc_slot));
4446 if (!dx_alloc_inode) {
4447 ret = -ENOMEM;
4448 mlog_errno(ret);
4449 goto out;
4450 }
4451 mutex_lock(&dx_alloc_inode->i_mutex);
4452
4453 ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
4454 if (ret) {
4455 mlog_errno(ret);
4456 goto out_mutex;
4457 }
4458
4459 handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS);
4460 if (IS_ERR(handle)) {
4461 ret = PTR_ERR(handle);
4462 mlog_errno(ret);
4463 goto out_unlock;
4464 }
4465
Joel Becker0cf2f762009-02-12 16:41:25 -08004466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004467 OCFS2_JOURNAL_ACCESS_WRITE);
4468 if (ret) {
4469 mlog_errno(ret);
4470 goto out_commit;
4471 }
4472
Tao Ma8ac33dc2010-12-15 16:30:00 +08004473 spin_lock(&OCFS2_I(dir)->ip_lock);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004474 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4475 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
Tao Ma8ac33dc2010-12-15 16:30:00 +08004476 spin_unlock(&OCFS2_I(dir)->ip_lock);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004477 di->i_dx_root = cpu_to_le64(0ULL);
4478
4479 ocfs2_journal_dirty(handle, di_bh);
4480
4481 blk = le64_to_cpu(dx_root->dr_blkno);
4482 bit = le16_to_cpu(dx_root->dr_suballoc_bit);
Tao Ma74380c42010-03-22 14:20:18 +08004483 if (dx_root->dr_suballoc_loc)
4484 bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
4485 else
4486 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004487 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
4488 bit, bg_blkno, 1);
4489 if (ret)
4490 mlog_errno(ret);
4491
4492out_commit:
4493 ocfs2_commit_trans(osb, handle);
4494
4495out_unlock:
4496 ocfs2_inode_unlock(dx_alloc_inode, 1);
4497
4498out_mutex:
4499 mutex_unlock(&dx_alloc_inode->i_mutex);
4500 brelse(dx_alloc_bh);
4501out:
4502 iput(dx_alloc_inode);
4503 return ret;
4504}
4505
4506int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4507{
4508 int ret;
4509 unsigned int uninitialized_var(clen);
4510 u32 major_hash = UINT_MAX, p_cpos, uninitialized_var(cpos);
4511 u64 uninitialized_var(blkno);
4512 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
4513 struct buffer_head *dx_root_bh = NULL;
4514 struct ocfs2_dx_root_block *dx_root;
4515 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4516 struct ocfs2_cached_dealloc_ctxt dealloc;
4517 struct ocfs2_extent_tree et;
4518
4519 ocfs2_init_dealloc_ctxt(&dealloc);
4520
4521 if (!ocfs2_dir_indexed(dir))
4522 return 0;
4523
4524 ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
4525 if (ret) {
4526 mlog_errno(ret);
4527 goto out;
4528 }
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004529 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4530
4531 if (ocfs2_dx_root_inline(dx_root))
4532 goto remove_index;
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004533
Joel Becker5e404e92009-02-13 03:54:22 -08004534 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004535
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004536 /* XXX: What if dr_clusters is too large? */
4537 while (le32_to_cpu(dx_root->dr_clusters)) {
4538 ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list,
4539 major_hash, &cpos, &blkno, &clen);
4540 if (ret) {
4541 mlog_errno(ret);
4542 goto out;
4543 }
4544
4545 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4546
Tristan Ye78f94672010-05-11 17:54:42 +08004547 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4548 &dealloc, 0);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004549 if (ret) {
4550 mlog_errno(ret);
4551 goto out;
4552 }
4553
4554 if (cpos == 0)
4555 break;
4556
4557 major_hash = cpos - 1;
4558 }
4559
Mark Fasheh4ed8a6b2008-11-24 17:02:08 -08004560remove_index:
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004561 ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
4562 if (ret) {
4563 mlog_errno(ret);
4564 goto out;
4565 }
4566
Joel Becker8cb471e2009-02-10 20:00:41 -08004567 ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
Mark Fasheh9b7895e2008-11-12 16:27:44 -08004568out:
4569 ocfs2_schedule_truncate_log_flush(osb, 1);
4570 ocfs2_run_deallocs(osb, &dealloc);
4571
4572 brelse(dx_root_bh);
4573 return ret;
4574}