Blame - fs/ocfs2/dir.c - kernel/msm

blob: c2f3fd93be5ce180967b8a1077a631fc184b1d38 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* dir.c
				5	*
				6	* Creates, reads, walks and deletes directory-nodes
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* Portions of this code from linux/fs/ext3/dir.c
				11	*
				12	* Copyright (C) 1992, 1993, 1994, 1995
				13	* Remy Card (card@masi.ibp.fr)
				14	* Laboratoire MASI - Institut Blaise pascal
				15	* Universite Pierre et Marie Curie (Paris VI)
				16	*
				17	* from
				18	*
				19	* linux/fs/minix/dir.c
				20	*
				21	* Copyright (C) 1991, 1992 Linux Torvalds
				22	*
				23	* This program is free software; you can redistribute it and/or
				24	* modify it under the terms of the GNU General Public
				25	* License as published by the Free Software Foundation; either
				26	* version 2 of the License, or (at your option) any later version.
				27	*
				28	* This program is distributed in the hope that it will be useful,
				29	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				30	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				31	* General Public License for more details.
				32	*
				33	* You should have received a copy of the GNU General Public
				34	* License along with this program; if not, write to the
				35	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				36	* Boston, MA 021110-1307, USA.
				37	*/
				38
				39	#include <linux/fs.h>
				40	#include <linux/types.h>
				41	#include <linux/slab.h>
				42	#include <linux/highmem.h>
				43
				44	#define MLOG_MASK_PREFIX ML_NAMEI
				45	#include <cluster/masklog.h>
				46
				47	#include "ocfs2.h"
				48
				49	#include "alloc.h"
				50	#include "dir.h"
				51	#include "dlmglue.h"
				52	#include "extent_map.h"
				53	#include "file.h"
				54	#include "inode.h"
				55	#include "journal.h"
				56	#include "namei.h"
				57	#include "suballoc.h"
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	58	#include "super.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	59	#include "uptodate.h"
				60
				61	#include "buffer_head_io.h"
				62
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	63	#define NAMEI_RA_CHUNKS 2
				64	#define NAMEI_RA_BLOCKS 4
				65	#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
				66	#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
				67
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	68	static unsigned char ocfs2_filetype_table[] = {
				69	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
				70	};
				71
				72	static int ocfs2_extend_dir(struct ocfs2_super *osb,
				73	struct inode *dir,
				74	struct buffer_head *parent_fe_bh,
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	75	unsigned int blocks_wanted,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	76	struct buffer_head **new_de_bh);
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	77	static int ocfs2_do_extend_dir(struct super_block *sb,
				78	handle_t *handle,
				79	struct inode *dir,
				80	struct buffer_head *parent_fe_bh,
				81	struct ocfs2_alloc_context *data_ac,
				82	struct ocfs2_alloc_context *meta_ac,
				83	struct buffer_head **new_bh);
				84
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	85	/*
				86	* bh passed here can be an inode block or a dir data block, depending
				87	* on the inode inline data flag.
				88	*/
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	89	static int ocfs2_check_dir_entry(struct inode * dir,
				90	struct ocfs2_dir_entry * de,
				91	struct buffer_head * bh,
				92	unsigned long offset)
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	93	{
				94	const char *error_msg = NULL;
				95	const int rlen = le16_to_cpu(de->rec_len);
				96
				97	if (rlen < OCFS2_DIR_REC_LEN(1))
				98	error_msg = "rec_len is smaller than minimal";
				99	else if (rlen % 4 != 0)
				100	error_msg = "rec_len % 4 != 0";
				101	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
				102	error_msg = "rec_len is too small for name_len";
				103	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
				104	error_msg = "directory entry across blocks";
				105
				106	if (error_msg != NULL)
				107	mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
				108	"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
				109	(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
				110	offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
				111	de->name_len);
				112	return error_msg == NULL ? 1 : 0;
				113	}
				114
				115	static inline int ocfs2_match(int len,
				116	const char * const name,
				117	struct ocfs2_dir_entry *de)
				118	{
				119	if (len != de->name_len)
				120	return 0;
				121	if (!de->inode)
				122	return 0;
				123	return !memcmp(name, de->name, len);
				124	}
				125
				126	/*
				127	* Returns 0 if not found, -1 on failure, and 1 on success
				128	*/
				129	static int inline ocfs2_search_dirblock(struct buffer_head *bh,
				130	struct inode *dir,
				131	const char *name, int namelen,
				132	unsigned long offset,
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	133	char *first_de,
				134	unsigned int bytes,
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	135	struct ocfs2_dir_entry **res_dir)
				136	{
				137	struct ocfs2_dir_entry *de;
				138	char dlimit, de_buf;
				139	int de_len;
				140	int ret = 0;
				141
				142	mlog_entry_void();
				143
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	144	de_buf = first_de;
				145	dlimit = de_buf + bytes;
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	146
				147	while (de_buf < dlimit) {
				148	/* this code is executed quadratically often */
				149	/* do minimal checking `by hand' */
				150
				151	de = (struct ocfs2_dir_entry *) de_buf;
				152
				153	if (de_buf + namelen <= dlimit &&
				154	ocfs2_match(namelen, name, de)) {
				155	/* found a match - just to be sure, do a full check */
				156	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
				157	ret = -1;
				158	goto bail;
				159	}
				160	*res_dir = de;
				161	ret = 1;
				162	goto bail;
				163	}
				164
				165	/* prevent looping on a bad block */
				166	de_len = le16_to_cpu(de->rec_len);
				167	if (de_len <= 0) {
				168	ret = -1;
				169	goto bail;
				170	}
				171
				172	de_buf += de_len;
				173	offset += de_len;
				174	}
				175
				176	bail:
				177	mlog_exit(ret);
				178	return ret;
				179	}
				180
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	181	static struct buffer_head ocfs2_find_entry_id(const char name,
				182	int namelen,
				183	struct inode *dir,
				184	struct ocfs2_dir_entry **res_dir)
				185	{
				186	int ret, found;
				187	struct buffer_head *di_bh = NULL;
				188	struct ocfs2_dinode *di;
				189	struct ocfs2_inline_data *data;
				190
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	191	ret = ocfs2_read_inode_block(dir, &di_bh);
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	192	if (ret) {
				193	mlog_errno(ret);
				194	goto out;
				195	}
				196
				197	di = (struct ocfs2_dinode *)di_bh->b_data;
				198	data = &di->id2.i_data;
				199
				200	found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
				201	data->id_data, i_size_read(dir), res_dir);
				202	if (found == 1)
				203	return di_bh;
				204
				205	brelse(di_bh);
				206	out:
				207	return NULL;
				208	}
				209
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	210	static int ocfs2_validate_dir_block(struct super_block *sb,
				211	struct buffer_head *bh)
				212	{
				213	/*
				214	* Nothing yet. We don't validate dirents here, that's handled
				215	* in-place when the code walks them.
				216	*/
				217
				218	return 0;
				219	}
				220
				221	/*
				222	* This function forces all errors to -EIO for consistency with its
				223	* predecessor, ocfs2_bread(). We haven't audited what returning the
				224	* real error codes would do to callers. We log the real codes with
				225	* mlog_errno() before we squash them.
				226	*/
				227	static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
				228	struct buffer_head **bh, int flags)
				229	{
				230	int rc = 0;
				231	struct buffer_head tmp = bh;
				232	u64 p_blkno;
				233
				234	if (((u64)v_block << inode->i_sb->s_blocksize_bits) >=
				235	i_size_read(inode)) {
				236	BUG_ON(!(flags & OCFS2_BH_READAHEAD));
				237	goto out;
				238	}
				239
				240	down_read(&OCFS2_I(inode)->ip_alloc_sem);
				241	rc = ocfs2_extent_map_get_blocks(inode, v_block, &p_blkno, NULL,
				242	NULL);
				243	up_read(&OCFS2_I(inode)->ip_alloc_sem);
				244	if (rc) {
				245	mlog_errno(rc);
				246	goto out;
				247	}
				248
				249	if (!p_blkno) {
				250	rc = -EIO;
				251	mlog(ML_ERROR,
				252	"Directory #%llu contains a hole at offset %llu\n",
				253	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				254	(unsigned long long)v_block << inode->i_sb->s_blocksize_bits);
				255	goto out;
				256	}
				257
				258	rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags);
				259	if (rc) {
				260	mlog_errno(rc);
				261	goto out;
				262	}
				263
				264	if (!(flags & OCFS2_BH_READAHEAD)) {
				265	rc = ocfs2_validate_dir_block(inode->i_sb, tmp);
				266	if (rc) {
				267	brelse(tmp);
				268	goto out;
				269	}
				270	}
				271
				272	/* If ocfs2_read_blocks() got us a new bh, pass it up. */
				273	if (!*bh)
				274	*bh = tmp;
				275
				276	out:
				277	return rc ? -EIO : 0;
				278	}
				279
Adrian Bunk	0af4bd3	2007-10-24 18:23:27 +0200	[diff] [blame]	280	static struct buffer_head ocfs2_find_entry_el(const char name, int namelen,
				281	struct inode *dir,
				282	struct ocfs2_dir_entry **res_dir)
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	283	{
				284	struct super_block *sb;
				285	struct buffer_head *bh_use[NAMEI_RA_SIZE];
				286	struct buffer_head bh, ret = NULL;
				287	unsigned long start, block, b;
				288	int ra_max = 0; /* Number of bh's in the readahead
				289	buffer, bh_use[] */
				290	int ra_ptr = 0; /* Current index into readahead
				291	buffer */
				292	int num = 0;
				293	int nblocks, i, err;
				294
				295	mlog_entry_void();
				296
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	297	sb = dir->i_sb;
				298
				299	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				300	start = OCFS2_I(dir)->ip_dir_start_lookup;
				301	if (start >= nblocks)
				302	start = 0;
				303	block = start;
				304
				305	restart:
				306	do {
				307	/*
				308	* We deal with the read-ahead logic here.
				309	*/
				310	if (ra_ptr >= ra_max) {
				311	/* Refill the readahead buffer */
				312	ra_ptr = 0;
				313	b = block;
				314	for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				315	/*
				316	* Terminate if we reach the end of the
				317	* directory and must wrap, or if our
				318	* search has finished at this block.
				319	*/
				320	if (b >= nblocks \|\| (num && block == start)) {
				321	bh_use[ra_max] = NULL;
				322	break;
				323	}
				324	num++;
				325
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	326	bh = NULL;
				327	err = ocfs2_read_dir_block(dir, b++, &bh,
				328	OCFS2_BH_READAHEAD);
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	329	bh_use[ra_max] = bh;
				330	}
				331	}
				332	if ((bh = bh_use[ra_ptr++]) == NULL)
				333	goto next;
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	334	if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
Joel Becker	5e0b3de	2008-10-09 17:20:33 -0700	[diff] [blame]	335	/* read error, skip block & hope for the best.
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	336	* ocfs2_read_dir_block() has released the bh. */
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	337	ocfs2_error(dir->i_sb, "reading directory %llu, "
				338	"offset %lu\n",
				339	(unsigned long long)OCFS2_I(dir)->ip_blkno,
				340	block);
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	341	goto next;
				342	}
				343	i = ocfs2_search_dirblock(bh, dir, name, namelen,
				344	block << sb->s_blocksize_bits,
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	345	bh->b_data, sb->s_blocksize,
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	346	res_dir);
				347	if (i == 1) {
				348	OCFS2_I(dir)->ip_dir_start_lookup = block;
				349	ret = bh;
				350	goto cleanup_and_exit;
				351	} else {
				352	brelse(bh);
				353	if (i < 0)
				354	goto cleanup_and_exit;
				355	}
				356	next:
				357	if (++block >= nblocks)
				358	block = 0;
				359	} while (block != start);
				360
				361	/*
				362	* If the directory has grown while we were searching, then
				363	* search the last part of the directory before giving up.
				364	*/
				365	block = nblocks;
				366	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				367	if (block < nblocks) {
				368	start = 0;
				369	goto restart;
				370	}
				371
				372	cleanup_and_exit:
				373	/* Clean up the read-ahead blocks */
				374	for (; ra_ptr < ra_max; ra_ptr++)
				375	brelse(bh_use[ra_ptr]);
				376
				377	mlog_exit_ptr(ret);
				378	return ret;
				379	}
				380
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	381	/*
				382	* Try to find an entry of the provided name within 'dir'.
				383	*
				384	* If nothing was found, NULL is returned. Otherwise, a buffer_head
				385	* and pointer to the dir entry are passed back.
				386	*
				387	* Caller can NOT assume anything about the contents of the
				388	* buffer_head - it is passed back only so that it can be passed into
				389	* any one of the manipulation functions (add entry, delete entry,
				390	* etc). As an example, bh in the extent directory case is a data
				391	* block, in the inline-data case it actually points to an inode.
				392	*/
				393	struct buffer_head ocfs2_find_entry(const char name, int namelen,
				394	struct inode *dir,
				395	struct ocfs2_dir_entry **res_dir)
				396	{
				397	*res_dir = NULL;
				398
				399	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
				400	return ocfs2_find_entry_id(name, namelen, dir, res_dir);
				401
				402	return ocfs2_find_entry_el(name, namelen, dir, res_dir);
				403	}
				404
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	405	/*
				406	* Update inode number and type of a previously found directory entry.
				407	*/
Mark Fasheh	38760e2	2007-09-11 17:21:56 -0700	[diff] [blame]	408	int ocfs2_update_entry(struct inode dir, handle_t handle,
				409	struct buffer_head de_bh, struct ocfs2_dir_entry de,
				410	struct inode *new_entry_inode)
				411	{
				412	int ret;
				413
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	414	/*
				415	* The same code works fine for both inline-data and extent
				416	* based directories, so no need to split this up.
				417	*/
				418
Mark Fasheh	38760e2	2007-09-11 17:21:56 -0700	[diff] [blame]	419	ret = ocfs2_journal_access(handle, dir, de_bh,
				420	OCFS2_JOURNAL_ACCESS_WRITE);
				421	if (ret) {
				422	mlog_errno(ret);
				423	goto out;
				424	}
				425
				426	de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
				427	ocfs2_set_de_type(de, new_entry_inode->i_mode);
				428
				429	ocfs2_journal_dirty(handle, de_bh);
				430
				431	out:
				432	return ret;
				433	}
				434
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	435	static int __ocfs2_delete_entry(handle_t handle, struct inode dir,
				436	struct ocfs2_dir_entry *de_del,
				437	struct buffer_head bh, char first_de,
				438	unsigned int bytes)
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	439	{
				440	struct ocfs2_dir_entry de, pde;
				441	int i, status = -ENOENT;
				442
				443	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
				444
				445	i = 0;
				446	pde = NULL;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	447	de = (struct ocfs2_dir_entry *) first_de;
				448	while (i < bytes) {
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	449	if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
				450	status = -EIO;
				451	mlog_errno(status);
				452	goto bail;
				453	}
				454	if (de == de_del) {
				455	status = ocfs2_journal_access(handle, dir, bh,
				456	OCFS2_JOURNAL_ACCESS_WRITE);
				457	if (status < 0) {
				458	status = -EIO;
				459	mlog_errno(status);
				460	goto bail;
				461	}
				462	if (pde)
Marcin Slusarz	0dd3256	2008-02-13 00:06:18 +0100	[diff] [blame]	463	le16_add_cpu(&pde->rec_len,
				464	le16_to_cpu(de->rec_len));
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	465	else
				466	de->inode = 0;
				467	dir->i_version++;
				468	status = ocfs2_journal_dirty(handle, bh);
				469	goto bail;
				470	}
				471	i += le16_to_cpu(de->rec_len);
				472	pde = de;
				473	de = (struct ocfs2_dir_entry )((char )de + le16_to_cpu(de->rec_len));
				474	}
				475	bail:
				476	mlog_exit(status);
				477	return status;
				478	}
				479
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	480	static inline int ocfs2_delete_entry_id(handle_t *handle,
				481	struct inode *dir,
				482	struct ocfs2_dir_entry *de_del,
				483	struct buffer_head *bh)
				484	{
				485	int ret;
				486	struct buffer_head *di_bh = NULL;
				487	struct ocfs2_dinode *di;
				488	struct ocfs2_inline_data *data;
				489
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	490	ret = ocfs2_read_inode_block(dir, &di_bh);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	491	if (ret) {
				492	mlog_errno(ret);
				493	goto out;
				494	}
				495
				496	di = (struct ocfs2_dinode *)di_bh->b_data;
				497	data = &di->id2.i_data;
				498
				499	ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
				500	i_size_read(dir));
				501
				502	brelse(di_bh);
				503	out:
				504	return ret;
				505	}
				506
				507	static inline int ocfs2_delete_entry_el(handle_t *handle,
				508	struct inode *dir,
				509	struct ocfs2_dir_entry *de_del,
				510	struct buffer_head *bh)
				511	{
				512	return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
				513	bh->b_size);
				514	}
				515
				516	/*
				517	* ocfs2_delete_entry deletes a directory entry by merging it with the
				518	* previous entry
				519	*/
				520	int ocfs2_delete_entry(handle_t *handle,
				521	struct inode *dir,
				522	struct ocfs2_dir_entry *de_del,
				523	struct buffer_head *bh)
				524	{
				525	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
				526	return ocfs2_delete_entry_id(handle, dir, de_del, bh);
				527
				528	return ocfs2_delete_entry_el(handle, dir, de_del, bh);
				529	}
				530
Mark Fasheh	8553cf4	2007-09-13 16:29:01 -0700	[diff] [blame]	531	/*
				532	* Check whether 'de' has enough room to hold an entry of
				533	* 'new_rec_len' bytes.
				534	*/
				535	static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
				536	unsigned int new_rec_len)
				537	{
				538	unsigned int de_really_used;
				539
				540	/* Check whether this is an empty record with enough space */
				541	if (le64_to_cpu(de->inode) == 0 &&
				542	le16_to_cpu(de->rec_len) >= new_rec_len)
				543	return 1;
				544
				545	/*
				546	* Record might have free space at the end which we can
				547	* use.
				548	*/
				549	de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
				550	if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
				551	return 1;
				552
				553	return 0;
				554	}
				555
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	556	/* we don't always have a dentry for what we want to add, so people
				557	* like orphan dir can call this instead.
				558	*
				559	* If you pass me insert_bh, I'll skip the search of the other dir
				560	* blocks and put the record in there.
				561	*/
				562	int __ocfs2_add_entry(handle_t *handle,
				563	struct inode *dir,
				564	const char *name, int namelen,
				565	struct inode *inode, u64 blkno,
				566	struct buffer_head *parent_fe_bh,
				567	struct buffer_head *insert_bh)
				568	{
				569	unsigned long offset;
				570	unsigned short rec_len;
				571	struct ocfs2_dir_entry de, de1;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	572	struct ocfs2_dinode di = (struct ocfs2_dinode )parent_fe_bh->b_data;
				573	struct super_block *sb = dir->i_sb;
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	574	int retval, status;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	575	unsigned int size = sb->s_blocksize;
				576	char *data_start = insert_bh->b_data;
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	577
				578	mlog_entry_void();
				579
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	580	if (!namelen)
				581	return -EINVAL;
				582
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	583	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				584	data_start = di->id2.i_data.id_data;
				585	size = i_size_read(dir);
				586
				587	BUG_ON(insert_bh != parent_fe_bh);
				588	}
				589
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	590	rec_len = OCFS2_DIR_REC_LEN(namelen);
				591	offset = 0;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	592	de = (struct ocfs2_dir_entry *) data_start;
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	593	while (1) {
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	594	BUG_ON((char *)de >= (size + data_start));
				595
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	596	/* These checks should've already been passed by the
				597	* prepare function, but I guess we can leave them
				598	* here anyway. */
				599	if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
				600	retval = -ENOENT;
				601	goto bail;
				602	}
				603	if (ocfs2_match(namelen, name, de)) {
				604	retval = -EEXIST;
				605	goto bail;
				606	}
Mark Fasheh	8553cf4	2007-09-13 16:29:01 -0700	[diff] [blame]	607
				608	if (ocfs2_dirent_would_fit(de, rec_len)) {
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	609	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
				610	retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
				611	if (retval < 0) {
				612	mlog_errno(retval);
				613	goto bail;
				614	}
				615
				616	status = ocfs2_journal_access(handle, dir, insert_bh,
				617	OCFS2_JOURNAL_ACCESS_WRITE);
				618	/* By now the buffer is marked for journaling */
				619	offset += le16_to_cpu(de->rec_len);
				620	if (le64_to_cpu(de->inode)) {
				621	de1 = (struct ocfs2_dir_entry )((char ) de +
				622	OCFS2_DIR_REC_LEN(de->name_len));
				623	de1->rec_len =
				624	cpu_to_le16(le16_to_cpu(de->rec_len) -
				625	OCFS2_DIR_REC_LEN(de->name_len));
				626	de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				627	de = de1;
				628	}
				629	de->file_type = OCFS2_FT_UNKNOWN;
				630	if (blkno) {
				631	de->inode = cpu_to_le64(blkno);
				632	ocfs2_set_de_type(de, inode->i_mode);
				633	} else
				634	de->inode = 0;
				635	de->name_len = namelen;
				636	memcpy(de->name, name, namelen);
				637
				638	dir->i_version++;
				639	status = ocfs2_journal_dirty(handle, insert_bh);
				640	retval = 0;
				641	goto bail;
				642	}
				643	offset += le16_to_cpu(de->rec_len);
				644	de = (struct ocfs2_dir_entry ) ((char ) de + le16_to_cpu(de->rec_len));
				645	}
				646
				647	/* when you think about it, the assert above should prevent us
				648	* from ever getting here. */
				649	retval = -ENOSPC;
				650	bail:
				651
				652	mlog_exit(retval);
				653	return retval;
				654	}
				655
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	656	static int ocfs2_dir_foreach_blk_id(struct inode *inode,
Mathieu Desnoyers	2b47c36	2007-10-16 23:27:21 -0700	[diff] [blame]	657	u64 *f_version,
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	658	loff_t f_pos, void priv,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	659	filldir_t filldir, int *filldir_err)
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	660	{
				661	int ret, i, filldir_ret;
				662	unsigned long offset = *f_pos;
				663	struct buffer_head *di_bh = NULL;
				664	struct ocfs2_dinode *di;
				665	struct ocfs2_inline_data *data;
				666	struct ocfs2_dir_entry *de;
				667
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	668	ret = ocfs2_read_inode_block(inode, &di_bh);
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	669	if (ret) {
				670	mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
				671	(unsigned long long)OCFS2_I(inode)->ip_blkno);
				672	goto out;
				673	}
				674
				675	di = (struct ocfs2_dinode *)di_bh->b_data;
				676	data = &di->id2.i_data;
				677
				678	while (*f_pos < i_size_read(inode)) {
				679	revalidate:
				680	/* If the dir block has changed since the last call to
				681	* readdir(2), then we might be pointing to an invalid
				682	* dirent right now. Scan from the start of the block
				683	* to make sure. */
				684	if (*f_version != inode->i_version) {
				685	for (i = 0; i < i_size_read(inode) && i < offset; ) {
				686	de = (struct ocfs2_dir_entry *)
				687	(data->id_data + i);
				688	/* It's too expensive to do a full
				689	* dirent test each time round this
				690	* loop, but we do have to test at
				691	* least that it is non-zero. A
				692	* failure will be detected in the
				693	* dirent test below. */
				694	if (le16_to_cpu(de->rec_len) <
				695	OCFS2_DIR_REC_LEN(1))
				696	break;
				697	i += le16_to_cpu(de->rec_len);
				698	}
				699	*f_pos = offset = i;
				700	*f_version = inode->i_version;
				701	}
				702
				703	de = (struct ocfs2_dir_entry ) (data->id_data + f_pos);
				704	if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
				705	/* On error, skip the f_pos to the end. */
				706	*f_pos = i_size_read(inode);
				707	goto out;
				708	}
				709	offset += le16_to_cpu(de->rec_len);
				710	if (le64_to_cpu(de->inode)) {
				711	/* We might block in the next section
				712	* if the data destination is
				713	* currently swapped out. So, use a
				714	* version stamp to detect whether or
				715	* not the directory has been modified
				716	* during the copy operation.
				717	*/
Mathieu Desnoyers	2b47c36	2007-10-16 23:27:21 -0700	[diff] [blame]	718	u64 version = *f_version;
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	719	unsigned char d_type = DT_UNKNOWN;
				720
				721	if (de->file_type < OCFS2_FT_MAX)
				722	d_type = ocfs2_filetype_table[de->file_type];
				723
				724	filldir_ret = filldir(priv, de->name,
				725	de->name_len,
				726	*f_pos,
				727	le64_to_cpu(de->inode),
				728	d_type);
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	729	if (filldir_ret) {
				730	if (filldir_err)
				731	*filldir_err = filldir_ret;
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	732	break;
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	733	}
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	734	if (version != *f_version)
				735	goto revalidate;
				736	}
				737	*f_pos += le16_to_cpu(de->rec_len);
				738	}
				739
				740	out:
				741	brelse(di_bh);
				742
				743	return 0;
				744	}
				745
				746	static int ocfs2_dir_foreach_blk_el(struct inode *inode,
Mathieu Desnoyers	2b47c36	2007-10-16 23:27:21 -0700	[diff] [blame]	747	u64 *f_version,
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	748	loff_t f_pos, void priv,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	749	filldir_t filldir, int *filldir_err)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	750	{
				751	int error = 0;
Mark Fasheh	aa95887	2006-04-21 13:49:02 -0700	[diff] [blame]	752	unsigned long offset, blk, last_ra_blk = 0;
				753	int i, stored;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	754	struct buffer_head * bh, * tmp;
				755	struct ocfs2_dir_entry * de;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	756	struct super_block * sb = inode->i_sb;
Mark Fasheh	aa95887	2006-04-21 13:49:02 -0700	[diff] [blame]	757	unsigned int ra_sectors = 16;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	758
				759	stored = 0;
				760	bh = NULL;
				761
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	762	offset = (*f_pos) & (sb->s_blocksize - 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	763
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	764	while (!error && !stored && *f_pos < i_size_read(inode)) {
				765	blk = (*f_pos) >> sb->s_blocksize_bits;
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	766	if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
				767	/* Skip the corrupt dirblock and keep trying */
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	768	*f_pos += sb->s_blocksize - offset;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	769	continue;
				770	}
				771
Mark Fasheh	aa95887	2006-04-21 13:49:02 -0700	[diff] [blame]	772	/* The idea here is to begin with 8k read-ahead and to stay
				773	* 4k ahead of our current position.
				774	*
				775	* TODO: Use the pagecache for this. We just need to
				776	* make sure it's cluster-safe... */
				777	if (!last_ra_blk
				778	\|\| (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
				779	for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	780	i > 0; i--) {
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	781	tmp = NULL;
				782	if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
				783	OCFS2_BH_READAHEAD))
				784	brelse(tmp);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	785	}
Mark Fasheh	aa95887	2006-04-21 13:49:02 -0700	[diff] [blame]	786	last_ra_blk = blk;
				787	ra_sectors = 8;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	788	}
				789
				790	revalidate:
				791	/* If the dir block has changed since the last call to
				792	* readdir(2), then we might be pointing to an invalid
				793	* dirent right now. Scan from the start of the block
				794	* to make sure. */
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	795	if (*f_version != inode->i_version) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	796	for (i = 0; i < sb->s_blocksize && i < offset; ) {
				797	de = (struct ocfs2_dir_entry *) (bh->b_data + i);
				798	/* It's too expensive to do a full
				799	* dirent test each time round this
				800	* loop, but we do have to test at
				801	* least that it is non-zero. A
				802	* failure will be detected in the
				803	* dirent test below. */
				804	if (le16_to_cpu(de->rec_len) <
				805	OCFS2_DIR_REC_LEN(1))
				806	break;
				807	i += le16_to_cpu(de->rec_len);
				808	}
				809	offset = i;
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	810	f_pos = ((f_pos) & ~(sb->s_blocksize - 1))
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	811	\| offset;
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	812	*f_version = inode->i_version;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	813	}
				814
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	815	while (!error && *f_pos < i_size_read(inode)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	816	&& offset < sb->s_blocksize) {
				817	de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
				818	if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
				819	/* On error, skip the f_pos to the
				820	next block. */
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	821	f_pos = ((f_pos) \| (sb->s_blocksize - 1)) + 1;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	822	brelse(bh);
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	823	goto out;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	824	}
				825	offset += le16_to_cpu(de->rec_len);
				826	if (le64_to_cpu(de->inode)) {
				827	/* We might block in the next section
				828	* if the data destination is
				829	* currently swapped out. So, use a
				830	* version stamp to detect whether or
				831	* not the directory has been modified
				832	* during the copy operation.
				833	*/
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	834	unsigned long version = *f_version;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	835	unsigned char d_type = DT_UNKNOWN;
				836
				837	if (de->file_type < OCFS2_FT_MAX)
				838	d_type = ocfs2_filetype_table[de->file_type];
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	839	error = filldir(priv, de->name,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	840	de->name_len,
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	841	*f_pos,
Mark Fasheh	7e85367	2007-09-10 17:30:26 -0700	[diff] [blame]	842	le64_to_cpu(de->inode),
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	843	d_type);
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	844	if (error) {
				845	if (filldir_err)
				846	*filldir_err = error;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	847	break;
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	848	}
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	849	if (version != *f_version)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	850	goto revalidate;
				851	stored ++;
				852	}
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	853	*f_pos += le16_to_cpu(de->rec_len);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	854	}
				855	offset = 0;
				856	brelse(bh);
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	857	bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	858	}
				859
				860	stored = 0;
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	861	out:
				862	return stored;
				863	}
				864
Mathieu Desnoyers	2b47c36	2007-10-16 23:27:21 -0700	[diff] [blame]	865	static int ocfs2_dir_foreach_blk(struct inode inode, u64 f_version,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	866	loff_t f_pos, void priv, filldir_t filldir,
				867	int *filldir_err)
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	868	{
				869	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
				870	return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	871	filldir, filldir_err);
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	872
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	873	return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
				874	filldir_err);
Mark Fasheh	23193e5	2007-09-12 13:01:18 -0700	[diff] [blame]	875	}
				876
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	877	/*
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	878	* This is intended to be called from inside other kernel functions,
				879	* so we fake some arguments.
				880	*/
				881	int ocfs2_dir_foreach(struct inode inode, loff_t f_pos, void *priv,
				882	filldir_t filldir)
				883	{
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	884	int ret = 0, filldir_err = 0;
Mathieu Desnoyers	2b47c36	2007-10-16 23:27:21 -0700	[diff] [blame]	885	u64 version = inode->i_version;
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	886
				887	while (*f_pos < i_size_read(inode)) {
				888	ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	889	filldir, &filldir_err);
				890	if (ret \|\| filldir_err)
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	891	break;
				892	}
				893
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	894	if (ret > 0)
				895	ret = -EIO;
				896
Mark Fasheh	5eae5b9	2007-09-10 17:50:51 -0700	[diff] [blame]	897	return 0;
				898	}
				899
				900	/*
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	901	* ocfs2_readdir()
				902	*
				903	*/
				904	int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
				905	{
				906	int error = 0;
				907	struct inode *inode = filp->f_path.dentry->d_inode;
				908	int lock_level = 0;
				909
				910	mlog_entry("dirino=%llu\n",
				911	(unsigned long long)OCFS2_I(inode)->ip_blkno);
				912
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	913	error = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	914	if (lock_level && error >= 0) {
				915	/* We release EX lock which used to update atime
				916	* and get PR lock again to reduce contention
				917	* on commonly accessed directories. */
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	918	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	919	lock_level = 0;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	920	error = ocfs2_inode_lock(inode, NULL, 0);
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	921	}
				922	if (error < 0) {
				923	if (error != -ENOENT)
				924	mlog_errno(error);
				925	/* we haven't got any yet, so propagate the error. */
				926	goto bail_nolock;
				927	}
				928
				929	error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
Mark Fasheh	e7b3401	2007-09-24 14:25:27 -0700	[diff] [blame]	930	dirent, filldir, NULL);
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	931
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	932	ocfs2_inode_unlock(inode, lock_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	933
Mark Fasheh	aa95887	2006-04-21 13:49:02 -0700	[diff] [blame]	934	bail_nolock:
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	935	mlog_exit(error);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	936
Mark Fasheh	b8bc5f4	2007-09-10 17:17:52 -0700	[diff] [blame]	937	return error;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	938	}
				939
				940	/*
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	941	* NOTE: this should always be called with parent dir i_mutex taken.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	942	*/
				943	int ocfs2_find_files_on_disk(const char *name,
				944	int namelen,
				945	u64 *blkno,
				946	struct inode *inode,
				947	struct buffer_head **dirent_bh,
				948	struct ocfs2_dir_entry **dirent)
				949	{
				950	int status = -ENOENT;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	951
Joel Becker	2b388c6	2006-05-10 18:28:59 -0700	[diff] [blame]	952	mlog_entry("(name=%.*s, blkno=%p, inode=%p, dirent_bh=%p, dirent=%p)\n",
				953	namelen, name, blkno, inode, dirent_bh, dirent);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	954
				955	*dirent_bh = ocfs2_find_entry(name, namelen, inode, dirent);
				956	if (!dirent_bh \|\| !dirent) {
				957	status = -ENOENT;
				958	goto leave;
				959	}
				960
				961	blkno = le64_to_cpu((dirent)->inode);
				962
				963	status = 0;
				964	leave:
				965	if (status < 0) {
				966	*dirent = NULL;
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	967	brelse(*dirent_bh);
				968	*dirent_bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	969	}
				970
				971	mlog_exit(status);
				972	return status;
				973	}
				974
Mark Fasheh	be94d11	2007-09-11 15:22:06 -0700	[diff] [blame]	975	/*
				976	* Convenience function for callers which just want the block number
				977	* mapped to a name and don't require the full dirent info, etc.
				978	*/
				979	int ocfs2_lookup_ino_from_name(struct inode dir, const char name,
				980	int namelen, u64 *blkno)
				981	{
				982	int ret;
				983	struct buffer_head *bh = NULL;
				984	struct ocfs2_dir_entry *dirent = NULL;
				985
				986	ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent);
				987	brelse(bh);
				988
				989	return ret;
				990	}
				991
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	992	/* Check for a name within a directory.
				993	*
				994	* Return 0 if the name does not exist
				995	* Return -EEXIST if the directory contains the name
				996	*
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	997	* Callers should have i_mutex + a cluster lock on dir
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	998	*/
				999	int ocfs2_check_dir_for_entry(struct inode *dir,
				1000	const char *name,
				1001	int namelen)
				1002	{
				1003	int ret;
				1004	struct buffer_head *dirent_bh = NULL;
				1005	struct ocfs2_dir_entry *dirent = NULL;
				1006
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1007	mlog_entry("dir %llu, name '%.*s'\n",
				1008	(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1009
				1010	ret = -EEXIST;
				1011	dirent_bh = ocfs2_find_entry(name, namelen, dir, &dirent);
				1012	if (dirent_bh)
				1013	goto bail;
				1014
				1015	ret = 0;
				1016	bail:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1017	brelse(dirent_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1018
				1019	mlog_exit(ret);
				1020	return ret;
				1021	}
				1022
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1023	struct ocfs2_empty_dir_priv {
				1024	unsigned seen_dot;
				1025	unsigned seen_dot_dot;
				1026	unsigned seen_other;
				1027	};
				1028	static int ocfs2_empty_dir_filldir(void priv, const char name, int name_len,
				1029	loff_t pos, u64 ino, unsigned type)
				1030	{
				1031	struct ocfs2_empty_dir_priv *p = priv;
				1032
				1033	/*
				1034	* Check the positions of "." and ".." records to be sure
				1035	* they're in the correct place.
				1036	*/
				1037	if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
				1038	p->seen_dot = 1;
				1039	return 0;
				1040	}
				1041
				1042	if (name_len == 2 && !strncmp("..", name, 2) &&
				1043	pos == OCFS2_DIR_REC_LEN(1)) {
				1044	p->seen_dot_dot = 1;
				1045	return 0;
				1046	}
				1047
				1048	p->seen_other = 1;
				1049	return 1;
				1050	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1051	/*
				1052	* routine to check that the specified directory is empty (for rmdir)
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1053	*
				1054	* Returns 1 if dir is empty, zero otherwise.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1055	*/
				1056	int ocfs2_empty_dir(struct inode *inode)
				1057	{
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1058	int ret;
				1059	loff_t start = 0;
				1060	struct ocfs2_empty_dir_priv priv;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1061
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1062	memset(&priv, 0, sizeof(priv));
				1063
				1064	ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
				1065	if (ret)
				1066	mlog_errno(ret);
				1067
				1068	if (!priv.seen_dot \|\| !priv.seen_dot_dot) {
				1069	mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1070	(unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1071	/*
				1072	* XXX: Is it really safe to allow an unlink to continue?
				1073	*/
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1074	return 1;
				1075	}
				1076
Mark Fasheh	0bfbbf6	2007-09-12 11:19:00 -0700	[diff] [blame]	1077	return !priv.seen_other;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1078	}
				1079
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1080	static void ocfs2_fill_initial_dirents(struct inode *inode,
				1081	struct inode *parent,
				1082	char *start, unsigned int size)
				1083	{
				1084	struct ocfs2_dir_entry de = (struct ocfs2_dir_entry )start;
				1085
				1086	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
				1087	de->name_len = 1;
				1088	de->rec_len =
				1089	cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				1090	strcpy(de->name, ".");
				1091	ocfs2_set_de_type(de, S_IFDIR);
				1092
				1093	de = (struct ocfs2_dir_entry ) ((char )de + le16_to_cpu(de->rec_len));
				1094	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
				1095	de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
				1096	de->name_len = 2;
				1097	strcpy(de->name, "..");
				1098	ocfs2_set_de_type(de, S_IFDIR);
				1099	}
				1100
				1101	/*
				1102	* This works together with code in ocfs2_mknod_locked() which sets
				1103	* the inline-data flag and initializes the inline-data section.
				1104	*/
				1105	static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
				1106	handle_t *handle,
				1107	struct inode *parent,
				1108	struct inode *inode,
				1109	struct buffer_head *di_bh)
				1110	{
				1111	int ret;
				1112	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
				1113	struct ocfs2_inline_data *data = &di->id2.i_data;
				1114	unsigned int size = le16_to_cpu(data->id_count);
				1115
				1116	ret = ocfs2_journal_access(handle, inode, di_bh,
				1117	OCFS2_JOURNAL_ACCESS_WRITE);
				1118	if (ret) {
				1119	mlog_errno(ret);
				1120	goto out;
				1121	}
				1122
				1123	ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
				1124
				1125	ocfs2_journal_dirty(handle, di_bh);
				1126	if (ret) {
				1127	mlog_errno(ret);
				1128	goto out;
				1129	}
				1130
				1131	i_size_write(inode, size);
				1132	inode->i_nlink = 2;
				1133	inode->i_blocks = ocfs2_inode_sector_count(inode);
				1134
				1135	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
				1136	if (ret < 0)
				1137	mlog_errno(ret);
				1138
				1139	out:
				1140	return ret;
				1141	}
				1142
				1143	static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
				1144	handle_t *handle,
				1145	struct inode *parent,
				1146	struct inode *inode,
				1147	struct buffer_head *fe_bh,
				1148	struct ocfs2_alloc_context *data_ac)
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	1149	{
				1150	int status;
				1151	struct buffer_head *new_bh = NULL;
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	1152
				1153	mlog_entry_void();
				1154
				1155	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
				1156	data_ac, NULL, &new_bh);
				1157	if (status < 0) {
				1158	mlog_errno(status);
				1159	goto bail;
				1160	}
				1161
				1162	ocfs2_set_new_buffer_uptodate(inode, new_bh);
				1163
				1164	status = ocfs2_journal_access(handle, inode, new_bh,
				1165	OCFS2_JOURNAL_ACCESS_CREATE);
				1166	if (status < 0) {
				1167	mlog_errno(status);
				1168	goto bail;
				1169	}
				1170	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
				1171
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1172	ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
				1173	osb->sb->s_blocksize);
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	1174
				1175	status = ocfs2_journal_dirty(handle, new_bh);
				1176	if (status < 0) {
				1177	mlog_errno(status);
				1178	goto bail;
				1179	}
				1180
				1181	i_size_write(inode, inode->i_sb->s_blocksize);
				1182	inode->i_nlink = 2;
				1183	inode->i_blocks = ocfs2_inode_sector_count(inode);
				1184	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
				1185	if (status < 0) {
				1186	mlog_errno(status);
				1187	goto bail;
				1188	}
				1189
				1190	status = 0;
				1191	bail:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1192	brelse(new_bh);
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	1193
				1194	mlog_exit(status);
				1195	return status;
				1196	}
				1197
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1198	int ocfs2_fill_new_dir(struct ocfs2_super *osb,
				1199	handle_t *handle,
				1200	struct inode *parent,
				1201	struct inode *inode,
				1202	struct buffer_head *fe_bh,
				1203	struct ocfs2_alloc_context *data_ac)
				1204	{
				1205	BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
				1206
				1207	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
				1208	return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
				1209
				1210	return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
				1211	data_ac);
				1212	}
				1213
				1214	static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
				1215	unsigned int new_size)
				1216	{
				1217	struct ocfs2_dir_entry *de;
				1218	struct ocfs2_dir_entry *prev_de;
				1219	char de_buf, limit;
				1220	unsigned int bytes = new_size - old_size;
				1221
				1222	limit = start + old_size;
				1223	de_buf = start;
				1224	de = (struct ocfs2_dir_entry *)de_buf;
				1225	do {
				1226	prev_de = de;
				1227	de_buf += le16_to_cpu(de->rec_len);
				1228	de = (struct ocfs2_dir_entry *)de_buf;
				1229	} while (de_buf < limit);
				1230
				1231	le16_add_cpu(&prev_de->rec_len, bytes);
				1232	}
				1233
				1234	/*
				1235	* We allocate enough clusters to fulfill "blocks_wanted", but set
				1236	* i_size to exactly one block. Ocfs2_extend_dir() will handle the
				1237	* rest automatically for us.
				1238	*
				1239	* *first_block_bh is a pointer to the 1st data block allocated to the
				1240	* directory.
				1241	*/
				1242	static int ocfs2_expand_inline_dir(struct inode dir, struct buffer_head di_bh,
				1243	unsigned int blocks_wanted,
				1244	struct buffer_head **first_block_bh)
				1245	{
				1246	int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
				1247	u32 alloc, bit_off, len;
				1248	struct super_block *sb = dir->i_sb;
				1249	u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
				1250	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
				1251	struct ocfs2_inode_info *oi = OCFS2_I(dir);
				1252	struct ocfs2_alloc_context *data_ac;
				1253	struct buffer_head *dirdata_bh = NULL;
				1254	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
				1255	handle_t *handle;
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	1256	struct ocfs2_extent_tree et;
				1257
Joel Becker	8d6220d	2008-08-22 12:46:09 -0700	[diff] [blame]	1258	ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1259
				1260	alloc = ocfs2_clusters_for_bytes(sb, bytes);
				1261
				1262	/*
				1263	* We should never need more than 2 clusters for this -
				1264	* maximum dirent size is far less than one block. In fact,
				1265	* the only time we'd need more than one cluster is if
				1266	* blocksize == clustersize and the dirent won't fit in the
				1267	* extra space that the expansion to a single block gives. As
				1268	* of today, that only happens on 4k/4k file systems.
				1269	*/
				1270	BUG_ON(alloc > 2);
				1271
				1272	ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
				1273	if (ret) {
				1274	mlog_errno(ret);
				1275	goto out;
				1276	}
				1277
				1278	down_write(&oi->ip_alloc_sem);
				1279
				1280	/*
Joe Perches	c78bad1	2008-02-03 17:33:42 +0200	[diff] [blame]	1281	* Prepare for worst case allocation scenario of two separate
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1282	* extents.
				1283	*/
				1284	if (alloc == 2)
				1285	credits += OCFS2_SUBALLOC_ALLOC;
				1286
				1287	handle = ocfs2_start_trans(osb, credits);
				1288	if (IS_ERR(handle)) {
				1289	ret = PTR_ERR(handle);
				1290	mlog_errno(ret);
				1291	goto out_sem;
				1292	}
				1293
				1294	/*
				1295	* Try to claim as many clusters as the bitmap can give though
				1296	* if we only get one now, that's enough to continue. The rest
				1297	* will be claimed after the conversion to extents.
				1298	*/
				1299	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
				1300	if (ret) {
				1301	mlog_errno(ret);
				1302	goto out_commit;
				1303	}
				1304
				1305	/*
				1306	* Operations are carefully ordered so that we set up the new
				1307	* data block first. The conversion from inline data to
				1308	* extents follows.
				1309	*/
				1310	blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
				1311	dirdata_bh = sb_getblk(sb, blkno);
				1312	if (!dirdata_bh) {
				1313	ret = -EIO;
				1314	mlog_errno(ret);
				1315	goto out_commit;
				1316	}
				1317
				1318	ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
				1319
				1320	ret = ocfs2_journal_access(handle, dir, dirdata_bh,
				1321	OCFS2_JOURNAL_ACCESS_CREATE);
				1322	if (ret) {
				1323	mlog_errno(ret);
				1324	goto out_commit;
				1325	}
				1326
				1327	memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
				1328	memset(dirdata_bh->b_data + i_size_read(dir), 0,
				1329	sb->s_blocksize - i_size_read(dir));
				1330	ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
				1331	sb->s_blocksize);
				1332
				1333	ret = ocfs2_journal_dirty(handle, dirdata_bh);
				1334	if (ret) {
				1335	mlog_errno(ret);
				1336	goto out_commit;
				1337	}
				1338
				1339	/*
				1340	* Set extent, i_size, etc on the directory. After this, the
				1341	* inode should contain the same exact dirents as before and
				1342	* be fully accessible from system calls.
				1343	*
				1344	* We let the later dirent insert modify c/mtime - to the user
				1345	* the data hasn't changed.
				1346	*/
				1347	ret = ocfs2_journal_access(handle, dir, di_bh,
				1348	OCFS2_JOURNAL_ACCESS_CREATE);
				1349	if (ret) {
				1350	mlog_errno(ret);
				1351	goto out_commit;
				1352	}
				1353
				1354	spin_lock(&oi->ip_lock);
				1355	oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
				1356	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
				1357	spin_unlock(&oi->ip_lock);
				1358
				1359	ocfs2_dinode_new_extent_list(dir, di);
				1360
				1361	i_size_write(dir, sb->s_blocksize);
				1362	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
				1363
				1364	di->i_size = cpu_to_le64(sb->s_blocksize);
				1365	di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
				1366	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1367
				1368	/*
				1369	* This should never fail as our extent list is empty and all
				1370	* related blocks have been journaled already.
				1371	*/
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	1372	ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
				1373	0, NULL);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1374	if (ret) {
				1375	mlog_errno(ret);
Tao Ma	83cab53	2008-08-21 14:14:27 +0800	[diff] [blame]	1376	goto out_commit;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1377	}
				1378
Mark Fasheh	9780eb6	2008-08-05 11:32:46 -0700	[diff] [blame]	1379	/*
				1380	* Set i_blocks after the extent insert for the most up to
				1381	* date ip_clusters value.
				1382	*/
				1383	dir->i_blocks = ocfs2_inode_sector_count(dir);
				1384
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1385	ret = ocfs2_journal_dirty(handle, di_bh);
				1386	if (ret) {
				1387	mlog_errno(ret);
				1388	goto out_commit;
				1389	}
				1390
				1391	/*
				1392	* We asked for two clusters, but only got one in the 1st
				1393	* pass. Claim the 2nd cluster as a separate extent.
				1394	*/
				1395	if (alloc > len) {
				1396	ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
				1397	&len);
				1398	if (ret) {
				1399	mlog_errno(ret);
				1400	goto out_commit;
				1401	}
				1402	blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
				1403
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	1404	ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
				1405	blkno, len, 0, NULL);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1406	if (ret) {
				1407	mlog_errno(ret);
Tao Ma	83cab53	2008-08-21 14:14:27 +0800	[diff] [blame]	1408	goto out_commit;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1409	}
				1410	}
				1411
				1412	*first_block_bh = dirdata_bh;
				1413	dirdata_bh = NULL;
				1414
				1415	out_commit:
				1416	ocfs2_commit_trans(osb, handle);
				1417
				1418	out_sem:
				1419	up_write(&oi->ip_alloc_sem);
				1420
				1421	out:
				1422	if (data_ac)
				1423	ocfs2_free_alloc_context(data_ac);
				1424
				1425	brelse(dirdata_bh);
				1426
				1427	return ret;
				1428	}
				1429
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1430	/* returns a bh of the 1st new block in the allocation. */
Mark Fasheh	316f4b9	2007-09-07 18:21:26 -0700	[diff] [blame]	1431	static int ocfs2_do_extend_dir(struct super_block *sb,
				1432	handle_t *handle,
				1433	struct inode *dir,
				1434	struct buffer_head *parent_fe_bh,
				1435	struct ocfs2_alloc_context *data_ac,
				1436	struct ocfs2_alloc_context *meta_ac,
				1437	struct buffer_head **new_bh)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1438	{
				1439	int status;
				1440	int extend;
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	1441	u64 p_blkno, v_blkno;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1442
				1443	spin_lock(&OCFS2_I(dir)->ip_lock);
				1444	extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
				1445	spin_unlock(&OCFS2_I(dir)->ip_lock);
				1446
				1447	if (extend) {
Mark Fasheh	dcd0538	2007-01-16 11:32:23 -0800	[diff] [blame]	1448	u32 offset = OCFS2_I(dir)->ip_clusters;
				1449
Tao Ma	0eb8d47	2008-08-18 17:38:45 +0800	[diff] [blame]	1450	status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
				1451	1, 0, parent_fe_bh, handle,
				1452	data_ac, meta_ac, NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1453	BUG_ON(status == -EAGAIN);
				1454	if (status < 0) {
				1455	mlog_errno(status);
				1456	goto bail;
				1457	}
				1458	}
				1459
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	1460	v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
				1461	status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1462	if (status < 0) {
				1463	mlog_errno(status);
				1464	goto bail;
				1465	}
				1466
				1467	*new_bh = sb_getblk(sb, p_blkno);
				1468	if (!*new_bh) {
				1469	status = -EIO;
				1470	mlog_errno(status);
				1471	goto bail;
				1472	}
				1473	status = 0;
				1474	bail:
				1475	mlog_exit(status);
				1476	return status;
				1477	}
				1478
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1479	/*
				1480	* Assumes you already have a cluster lock on the directory.
				1481	*
				1482	* 'blocks_wanted' is only used if we have an inline directory which
				1483	* is to be turned into an extent based one. The size of the dirent to
				1484	* insert might be larger than the space gained by growing to just one
				1485	* block, so we may have to grow the inode by two blocks in that case.
				1486	*/
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1487	static int ocfs2_extend_dir(struct ocfs2_super *osb,
				1488	struct inode *dir,
				1489	struct buffer_head *parent_fe_bh,
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1490	unsigned int blocks_wanted,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1491	struct buffer_head **new_de_bh)
				1492	{
				1493	int status = 0;
Joel Becker	ee19a77	2007-03-28 18:27:07 -0700	[diff] [blame]	1494	int credits, num_free_extents, drop_alloc_sem = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1495	loff_t dir_i_size;
				1496	struct ocfs2_dinode fe = (struct ocfs2_dinode ) parent_fe_bh->b_data;
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	1497	struct ocfs2_extent_list *el = &fe->id2.i_list;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1498	struct ocfs2_alloc_context *data_ac = NULL;
				1499	struct ocfs2_alloc_context *meta_ac = NULL;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	1500	handle_t *handle = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1501	struct buffer_head *new_bh = NULL;
				1502	struct ocfs2_dir_entry * de;
				1503	struct super_block *sb = osb->sb;
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	1504	struct ocfs2_extent_tree et;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1505
				1506	mlog_entry_void();
				1507
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1508	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1509	status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
				1510	blocks_wanted, &new_bh);
				1511	if (status) {
				1512	mlog_errno(status);
				1513	goto bail;
				1514	}
				1515
				1516	if (blocks_wanted == 1) {
				1517	/*
				1518	* If the new dirent will fit inside the space
				1519	* created by pushing out to one block, then
				1520	* we can complete the operation
				1521	* here. Otherwise we have to expand i_size
				1522	* and format the 2nd block below.
				1523	*/
				1524	BUG_ON(new_bh == NULL);
				1525	goto bail_bh;
				1526	}
				1527
				1528	/*
				1529	* Get rid of 'new_bh' - we want to format the 2nd
				1530	* data block and return that instead.
				1531	*/
				1532	brelse(new_bh);
				1533	new_bh = NULL;
				1534
				1535	dir_i_size = i_size_read(dir);
				1536	credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
				1537	goto do_extend;
				1538	}
				1539
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1540	dir_i_size = i_size_read(dir);
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1541	mlog(0, "extending dir %llu (i_size = %lld)\n",
				1542	(unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1543
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1544	/* dir->i_size is always block aligned. */
				1545	spin_lock(&OCFS2_I(dir)->ip_lock);
				1546	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
				1547	spin_unlock(&OCFS2_I(dir)->ip_lock);
Joel Becker	8d6220d	2008-08-22 12:46:09 -0700	[diff] [blame]	1548	ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	1549	num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1550	if (num_free_extents < 0) {
				1551	status = num_free_extents;
				1552	mlog_errno(status);
				1553	goto bail;
				1554	}
				1555
				1556	if (!num_free_extents) {
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	1557	status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1558	if (status < 0) {
				1559	if (status != -ENOSPC)
				1560	mlog_errno(status);
				1561	goto bail;
				1562	}
				1563	}
				1564
Mark Fasheh	da5cbf2	2006-10-06 18:34:35 -0700	[diff] [blame]	1565	status = ocfs2_reserve_clusters(osb, 1, &data_ac);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1566	if (status < 0) {
				1567	if (status != -ENOSPC)
				1568	mlog_errno(status);
				1569	goto bail;
				1570	}
				1571
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	1572	credits = ocfs2_calc_extend_credits(sb, el, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1573	} else {
				1574	spin_unlock(&OCFS2_I(dir)->ip_lock);
				1575	credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
				1576	}
				1577
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1578	do_extend:
Joel Becker	ee19a77	2007-03-28 18:27:07 -0700	[diff] [blame]	1579	down_write(&OCFS2_I(dir)->ip_alloc_sem);
				1580	drop_alloc_sem = 1;
				1581
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	1582	handle = ocfs2_start_trans(osb, credits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1583	if (IS_ERR(handle)) {
				1584	status = PTR_ERR(handle);
				1585	handle = NULL;
				1586	mlog_errno(status);
				1587	goto bail;
				1588	}
				1589
				1590	status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
				1591	data_ac, meta_ac, &new_bh);
				1592	if (status < 0) {
				1593	mlog_errno(status);
				1594	goto bail;
				1595	}
				1596
				1597	ocfs2_set_new_buffer_uptodate(dir, new_bh);
				1598
				1599	status = ocfs2_journal_access(handle, dir, new_bh,
				1600	OCFS2_JOURNAL_ACCESS_CREATE);
				1601	if (status < 0) {
				1602	mlog_errno(status);
				1603	goto bail;
				1604	}
				1605	memset(new_bh->b_data, 0, sb->s_blocksize);
				1606	de = (struct ocfs2_dir_entry *) new_bh->b_data;
				1607	de->inode = 0;
				1608	de->rec_len = cpu_to_le16(sb->s_blocksize);
				1609	status = ocfs2_journal_dirty(handle, new_bh);
				1610	if (status < 0) {
				1611	mlog_errno(status);
				1612	goto bail;
				1613	}
				1614
				1615	dir_i_size += dir->i_sb->s_blocksize;
				1616	i_size_write(dir, dir_i_size);
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	1617	dir->i_blocks = ocfs2_inode_sector_count(dir);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1618	status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
				1619	if (status < 0) {
				1620	mlog_errno(status);
				1621	goto bail;
				1622	}
				1623
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1624	bail_bh:
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1625	*new_de_bh = new_bh;
				1626	get_bh(*new_de_bh);
				1627	bail:
Joel Becker	ee19a77	2007-03-28 18:27:07 -0700	[diff] [blame]	1628	if (drop_alloc_sem)
				1629	up_write(&OCFS2_I(dir)->ip_alloc_sem);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1630	if (handle)
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	1631	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1632
				1633	if (data_ac)
				1634	ocfs2_free_alloc_context(data_ac);
				1635	if (meta_ac)
				1636	ocfs2_free_alloc_context(meta_ac);
				1637
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1638	brelse(new_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1639
				1640	mlog_exit(status);
				1641	return status;
				1642	}
				1643
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1644	static int ocfs2_find_dir_space_id(struct inode dir, struct buffer_head di_bh,
				1645	const char *name, int namelen,
				1646	struct buffer_head **ret_de_bh,
				1647	unsigned int *blocks_wanted)
				1648	{
				1649	int ret;
				1650	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
				1651	struct ocfs2_dir_entry de, last_de = NULL;
				1652	char de_buf, limit;
				1653	unsigned long offset = 0;
				1654	unsigned int rec_len, new_rec_len;
				1655
				1656	de_buf = di->id2.i_data.id_data;
				1657	limit = de_buf + i_size_read(dir);
				1658	rec_len = OCFS2_DIR_REC_LEN(namelen);
				1659
				1660	while (de_buf < limit) {
				1661	de = (struct ocfs2_dir_entry *)de_buf;
				1662
				1663	if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
				1664	ret = -ENOENT;
				1665	goto out;
				1666	}
				1667	if (ocfs2_match(namelen, name, de)) {
				1668	ret = -EEXIST;
				1669	goto out;
				1670	}
				1671	if (ocfs2_dirent_would_fit(de, rec_len)) {
				1672	/* Ok, we found a spot. Return this bh and let
				1673	* the caller actually fill it in. */
				1674	*ret_de_bh = di_bh;
				1675	get_bh(*ret_de_bh);
				1676	ret = 0;
				1677	goto out;
				1678	}
				1679
				1680	last_de = de;
				1681	de_buf += le16_to_cpu(de->rec_len);
				1682	offset += le16_to_cpu(de->rec_len);
				1683	}
				1684
				1685	/*
				1686	* We're going to require expansion of the directory - figure
				1687	* out how many blocks we'll need so that a place for the
				1688	* dirent can be found.
				1689	*/
				1690	*blocks_wanted = 1;
				1691	new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
				1692	if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
				1693	*blocks_wanted = 2;
				1694
				1695	ret = -ENOSPC;
				1696	out:
				1697	return ret;
				1698	}
				1699
				1700	static int ocfs2_find_dir_space_el(struct inode dir, const char name,
				1701	int namelen, struct buffer_head **ret_de_bh)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1702	{
				1703	unsigned long offset;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1704	struct buffer_head *bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1705	unsigned short rec_len;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1706	struct ocfs2_dir_entry *de;
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1707	struct super_block *sb = dir->i_sb;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1708	int status;
				1709
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	1710	status = ocfs2_read_dir_block(dir, 0, &bh, 0);
				1711	if (status) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1712	mlog_errno(status);
				1713	goto bail;
				1714	}
				1715
				1716	rec_len = OCFS2_DIR_REC_LEN(namelen);
				1717	offset = 0;
				1718	de = (struct ocfs2_dir_entry *) bh->b_data;
				1719	while (1) {
				1720	if ((char *)de >= sb->s_blocksize + bh->b_data) {
				1721	brelse(bh);
				1722	bh = NULL;
				1723
				1724	if (i_size_read(dir) <= offset) {
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1725	/*
				1726	* Caller will have to expand this
				1727	* directory.
				1728	*/
				1729	status = -ENOSPC;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1730	goto bail;
				1731	}
Joel Becker	a22305c	2008-11-13 14:49:17 -0800	[diff] [blame^]	1732	status = ocfs2_read_dir_block(dir,
				1733	offset >> sb->s_blocksize_bits,
				1734	&bh, 0);
				1735	if (status) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1736	mlog_errno(status);
				1737	goto bail;
				1738	}
				1739	/* move to next block */
				1740	de = (struct ocfs2_dir_entry *) bh->b_data;
				1741	}
				1742	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
				1743	status = -ENOENT;
				1744	goto bail;
				1745	}
				1746	if (ocfs2_match(namelen, name, de)) {
				1747	status = -EEXIST;
				1748	goto bail;
				1749	}
Mark Fasheh	8553cf4	2007-09-13 16:29:01 -0700	[diff] [blame]	1750	if (ocfs2_dirent_would_fit(de, rec_len)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1751	/* Ok, we found a spot. Return this bh and let
				1752	* the caller actually fill it in. */
				1753	*ret_de_bh = bh;
				1754	get_bh(*ret_de_bh);
				1755	status = 0;
				1756	goto bail;
				1757	}
				1758	offset += le16_to_cpu(de->rec_len);
				1759	de = (struct ocfs2_dir_entry )((char ) de + le16_to_cpu(de->rec_len));
				1760	}
				1761
				1762	status = 0;
				1763	bail:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1764	brelse(bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1765
				1766	mlog_exit(status);
				1767	return status;
				1768	}
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1769
				1770	int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
				1771	struct inode *dir,
				1772	struct buffer_head *parent_fe_bh,
				1773	const char *name,
				1774	int namelen,
				1775	struct buffer_head **ret_de_bh)
				1776	{
				1777	int ret;
				1778	unsigned int blocks_wanted = 1;
				1779	struct buffer_head *bh = NULL;
				1780
				1781	mlog(0, "getting ready to insert namelen %d into dir %llu\n",
				1782	namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
				1783
				1784	*ret_de_bh = NULL;
				1785
				1786	if (!namelen) {
				1787	ret = -EINVAL;
				1788	mlog_errno(ret);
				1789	goto out;
				1790	}
				1791
				1792	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1793	ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
				1794	namelen, &bh, &blocks_wanted);
				1795	} else
				1796	ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
				1797
				1798	if (ret && ret != -ENOSPC) {
				1799	mlog_errno(ret);
				1800	goto out;
				1801	}
				1802
				1803	if (ret == -ENOSPC) {
				1804	/*
				1805	* We have to expand the directory to add this name.
				1806	*/
				1807	BUG_ON(bh);
				1808
				1809	ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
				1810	&bh);
				1811	if (ret) {
				1812	if (ret != -ENOSPC)
				1813	mlog_errno(ret);
				1814	goto out;
				1815	}
				1816
				1817	BUG_ON(!bh);
				1818	}
				1819
				1820	*ret_de_bh = bh;
				1821	bh = NULL;
				1822	out:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1823	brelse(bh);
Mark Fasheh	5b6a3a2	2007-09-13 16:33:54 -0700	[diff] [blame]	1824	return ret;
				1825	}