Blame - fs/ocfs2/namei.c - kernel/msm

blob: 0673862c8bdd24178f438f58840a06eec014b970 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* namei.c
				5	*
				6	* Create and rename file, directory, symlinks
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* Portions of this code from linux/fs/ext3/dir.c
				11	*
				12	* Copyright (C) 1992, 1993, 1994, 1995
				13	* Remy Card (card@masi.ibp.fr)
				14	* Laboratoire MASI - Institut Blaise pascal
				15	* Universite Pierre et Marie Curie (Paris VI)
				16	*
				17	* from
				18	*
				19	* linux/fs/minix/dir.c
				20	*
				21	* Copyright (C) 1991, 1992 Linux Torvalds
				22	*
				23	* This program is free software; you can redistribute it and/or
				24	* modify it under the terms of the GNU General Public
				25	* License as published by the Free Software Foundation; either
				26	* version 2 of the License, or (at your option) any later version.
				27	*
				28	* This program is distributed in the hope that it will be useful,
				29	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				30	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				31	* General Public License for more details.
				32	*
				33	* You should have received a copy of the GNU General Public
				34	* License along with this program; if not, write to the
				35	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				36	* Boston, MA 021110-1307, USA.
				37	*/
				38
				39	#include <linux/fs.h>
				40	#include <linux/types.h>
				41	#include <linux/slab.h>
				42	#include <linux/highmem.h>
				43
				44	#define MLOG_MASK_PREFIX ML_NAMEI
				45	#include <cluster/masklog.h>
				46
				47	#include "ocfs2.h"
				48
				49	#include "alloc.h"
				50	#include "dcache.h"
				51	#include "dir.h"
				52	#include "dlmglue.h"
				53	#include "extent_map.h"
				54	#include "file.h"
				55	#include "inode.h"
				56	#include "journal.h"
				57	#include "namei.h"
				58	#include "suballoc.h"
				59	#include "symlink.h"
				60	#include "sysfile.h"
				61	#include "uptodate.h"
				62	#include "vote.h"
				63
				64	#include "buffer_head_io.h"
				65
				66	#define NAMEI_RA_CHUNKS 2
				67	#define NAMEI_RA_BLOCKS 4
				68	#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
				69	#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
				70
				71	static int inline ocfs2_search_dirblock(struct buffer_head *bh,
				72	struct inode *dir,
				73	const char *name, int namelen,
				74	unsigned long offset,
				75	struct ocfs2_dir_entry **res_dir);
				76
				77	static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
				78	struct inode *dir,
				79	struct ocfs2_dir_entry *de_del,
				80	struct buffer_head *bh);
				81
				82	static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				83	struct inode *dir,
				84	const char *name, int namelen,
				85	struct inode *inode, u64 blkno,
				86	struct buffer_head *parent_fe_bh,
				87	struct buffer_head *insert_bh);
				88
				89	static int ocfs2_mknod_locked(struct ocfs2_super *osb,
				90	struct inode *dir,
				91	struct dentry *dentry, int mode,
				92	dev_t dev,
				93	struct buffer_head **new_fe_bh,
				94	struct buffer_head *parent_fe_bh,
				95	struct ocfs2_journal_handle *handle,
				96	struct inode **ret_inode,
				97	struct ocfs2_alloc_context *inode_ac);
				98
				99	static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
				100	struct ocfs2_journal_handle *handle,
				101	struct inode *parent,
				102	struct inode *inode,
				103	struct buffer_head *fe_bh,
				104	struct ocfs2_alloc_context *data_ac);
				105
				106	static int ocfs2_double_lock(struct ocfs2_super *osb,
				107	struct ocfs2_journal_handle *handle,
				108	struct buffer_head **bh1,
				109	struct inode *inode1,
				110	struct buffer_head **bh2,
				111	struct inode *inode2);
				112
				113	static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
				114	struct ocfs2_journal_handle *handle,
				115	struct inode *inode,
				116	char *name,
				117	struct buffer_head **de_bh);
				118
				119	static int ocfs2_orphan_add(struct ocfs2_super *osb,
				120	struct ocfs2_journal_handle *handle,
				121	struct inode *inode,
				122	struct ocfs2_dinode *fe,
				123	char *name,
				124	struct buffer_head *de_bh);
				125
				126	static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
				127	struct ocfs2_journal_handle *handle,
				128	struct inode *inode,
				129	const char *symname);
				130
				131	static inline int ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				132	struct dentry *dentry,
				133	struct inode *inode, u64 blkno,
				134	struct buffer_head *parent_fe_bh,
				135	struct buffer_head *insert_bh)
				136	{
				137	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
				138	dentry->d_name.name, dentry->d_name.len,
				139	inode, blkno, parent_fe_bh, insert_bh);
				140	}
				141
				142	/* An orphan dir name is an 8 byte value, printed as a hex string */
				143	#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
				144
				145	static struct dentry ocfs2_lookup(struct inode dir, struct dentry *dentry,
				146	struct nameidata *nd)
				147	{
				148	int status;
				149	u64 blkno;
				150	struct buffer_head *dirent_bh = NULL;
				151	struct inode *inode = NULL;
				152	struct dentry *ret;
				153	struct ocfs2_dir_entry *dirent;
				154	struct ocfs2_inode_info *oi;
				155
				156	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
				157	dentry->d_name.len, dentry->d_name.name);
				158
				159	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
				160	ret = ERR_PTR(-ENAMETOOLONG);
				161	goto bail;
				162	}
				163
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	164	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
				165	dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	166
				167	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
				168	if (status < 0) {
				169	if (status != -ENOENT)
				170	mlog_errno(status);
				171	ret = ERR_PTR(status);
				172	goto bail;
				173	}
				174
				175	status = ocfs2_find_files_on_disk(dentry->d_name.name,
				176	dentry->d_name.len, &blkno,
				177	dir, &dirent_bh, &dirent);
				178	if (status < 0)
				179	goto bail_add;
				180
				181	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
				182	if (IS_ERR(inode)) {
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	183	mlog(ML_ERROR, "Unable to create inode %llu\n",
				184	(unsigned long long)blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	185	ret = ERR_PTR(-EACCES);
				186	goto bail_unlock;
				187	}
				188
				189	oi = OCFS2_I(inode);
				190	/* Clear any orphaned state... If we were able to look up the
				191	* inode from a directory, it certainly can't be orphaned. We
				192	* might have the bad state from a node which intended to
				193	* orphan this inode but crashed before it could commit the
				194	* unlink. */
				195	spin_lock(&oi->ip_lock);
				196	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
				197	oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
				198	spin_unlock(&oi->ip_lock);
				199
				200	bail_add:
				201
				202	dentry->d_op = &ocfs2_dentry_ops;
				203	ret = d_splice_alias(inode, dentry);
				204
				205	bail_unlock:
				206	/* Don't drop the cluster lock until after the d_add --
				207	* unlink on another node will message us to remove that
				208	* dentry under this lock so otherwise we can race this with
				209	* the vote thread and have a stale dentry. */
				210	ocfs2_meta_unlock(dir, 0);
				211
				212	bail:
				213	if (dirent_bh)
				214	brelse(dirent_bh);
				215
				216	mlog_exit_ptr(ret);
				217
				218	return ret;
				219	}
				220
				221	static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
				222	struct ocfs2_journal_handle *handle,
				223	struct inode *parent,
				224	struct inode *inode,
				225	struct buffer_head *fe_bh,
				226	struct ocfs2_alloc_context *data_ac)
				227	{
				228	int status;
				229	struct buffer_head *new_bh = NULL;
				230	struct ocfs2_dir_entry *de = NULL;
				231
				232	mlog_entry_void();
				233
				234	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
				235	data_ac, NULL, &new_bh);
				236	if (status < 0) {
				237	mlog_errno(status);
				238	goto bail;
				239	}
				240
				241	ocfs2_set_new_buffer_uptodate(inode, new_bh);
				242
				243	status = ocfs2_journal_access(handle, inode, new_bh,
				244	OCFS2_JOURNAL_ACCESS_CREATE);
				245	if (status < 0) {
				246	mlog_errno(status);
				247	goto bail;
				248	}
				249	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
				250
				251	de = (struct ocfs2_dir_entry *) new_bh->b_data;
				252	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
				253	de->name_len = 1;
				254	de->rec_len =
				255	cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				256	strcpy(de->name, ".");
				257	ocfs2_set_de_type(de, S_IFDIR);
				258	de = (struct ocfs2_dir_entry ) ((char )de + le16_to_cpu(de->rec_len));
				259	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
				260	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
				261	OCFS2_DIR_REC_LEN(1));
				262	de->name_len = 2;
				263	strcpy(de->name, "..");
				264	ocfs2_set_de_type(de, S_IFDIR);
				265
				266	status = ocfs2_journal_dirty(handle, new_bh);
				267	if (status < 0) {
				268	mlog_errno(status);
				269	goto bail;
				270	}
				271
				272	i_size_write(inode, inode->i_sb->s_blocksize);
				273	inode->i_nlink = 2;
				274	inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
				275	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
				276	if (status < 0) {
				277	mlog_errno(status);
				278	goto bail;
				279	}
				280
				281	status = 0;
				282	bail:
				283	if (new_bh)
				284	brelse(new_bh);
				285
				286	mlog_exit(status);
				287	return status;
				288	}
				289
				290	static int ocfs2_mknod(struct inode *dir,
				291	struct dentry *dentry,
				292	int mode,
				293	dev_t dev)
				294	{
				295	int status = 0;
				296	struct buffer_head *parent_fe_bh = NULL;
				297	struct ocfs2_journal_handle *handle = NULL;
				298	struct ocfs2_super *osb;
				299	struct ocfs2_dinode *dirfe;
				300	struct buffer_head *new_fe_bh = NULL;
				301	struct buffer_head *de_bh = NULL;
				302	struct inode *inode = NULL;
				303	struct ocfs2_alloc_context *inode_ac = NULL;
				304	struct ocfs2_alloc_context *data_ac = NULL;
				305
				306	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
				307	(unsigned long)dev, dentry->d_name.len,
				308	dentry->d_name.name);
				309
				310	/* get our super block */
				311	osb = OCFS2_SB(dir->i_sb);
				312
				313	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	314	mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
				315	(unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	316	status = -EMLINK;
				317	goto leave;
				318	}
				319
				320	handle = ocfs2_alloc_handle(osb);
				321	if (handle == NULL) {
				322	status = -ENOMEM;
				323	mlog_errno(status);
				324	goto leave;
				325	}
				326
				327	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				328	if (status < 0) {
				329	if (status != -ENOENT)
				330	mlog_errno(status);
				331	goto leave;
				332	}
				333
				334	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
				335	if (!dirfe->i_links_count) {
				336	/* can't make a file in a deleted directory. */
				337	status = -ENOENT;
				338	goto leave;
				339	}
				340
				341	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				342	dentry->d_name.len);
				343	if (status)
				344	goto leave;
				345
				346	/* get a spot inside the dir. */
				347	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				348	dentry->d_name.name,
				349	dentry->d_name.len, &de_bh);
				350	if (status < 0) {
				351	mlog_errno(status);
				352	goto leave;
				353	}
				354
				355	/* reserve an inode spot */
				356	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
				357	if (status < 0) {
				358	if (status != -ENOSPC)
				359	mlog_errno(status);
				360	goto leave;
				361	}
				362
				363	/* are we making a directory? If so, reserve a cluster for his
				364	* 1st extent. */
				365	if (S_ISDIR(mode)) {
				366	status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
				367	if (status < 0) {
				368	if (status != -ENOSPC)
				369	mlog_errno(status);
				370	goto leave;
				371	}
				372	}
				373
				374	handle = ocfs2_start_trans(osb, handle, OCFS2_MKNOD_CREDITS);
				375	if (IS_ERR(handle)) {
				376	status = PTR_ERR(handle);
				377	handle = NULL;
				378	mlog_errno(status);
				379	goto leave;
				380	}
				381
				382	/* do the real work now. */
				383	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
				384	&new_fe_bh, parent_fe_bh, handle,
				385	&inode, inode_ac);
				386	if (status < 0) {
				387	mlog_errno(status);
				388	goto leave;
				389	}
				390
				391	if (S_ISDIR(mode)) {
				392	status = ocfs2_fill_new_dir(osb, handle, dir, inode,
				393	new_fe_bh, data_ac);
				394	if (status < 0) {
				395	mlog_errno(status);
				396	goto leave;
				397	}
				398
				399	status = ocfs2_journal_access(handle, dir, parent_fe_bh,
				400	OCFS2_JOURNAL_ACCESS_WRITE);
				401	if (status < 0) {
				402	mlog_errno(status);
				403	goto leave;
				404	}
				405	le16_add_cpu(&dirfe->i_links_count, 1);
				406	status = ocfs2_journal_dirty(handle, parent_fe_bh);
				407	if (status < 0) {
				408	mlog_errno(status);
				409	goto leave;
				410	}
				411	dir->i_nlink++;
				412	}
				413
				414	status = ocfs2_add_entry(handle, dentry, inode,
				415	OCFS2_I(inode)->ip_blkno, parent_fe_bh,
				416	de_bh);
				417	if (status < 0) {
				418	mlog_errno(status);
				419	goto leave;
				420	}
				421
				422	insert_inode_hash(inode);
				423	dentry->d_op = &ocfs2_dentry_ops;
				424	d_instantiate(dentry, inode);
				425	status = 0;
				426	leave:
				427	if (handle)
				428	ocfs2_commit_trans(handle);
				429
				430	if (status == -ENOSPC)
				431	mlog(0, "Disk is full\n");
				432
				433	if (new_fe_bh)
				434	brelse(new_fe_bh);
				435
				436	if (de_bh)
				437	brelse(de_bh);
				438
				439	if (parent_fe_bh)
				440	brelse(parent_fe_bh);
				441
				442	if ((status < 0) && inode)
				443	iput(inode);
				444
				445	if (inode_ac)
				446	ocfs2_free_alloc_context(inode_ac);
				447
				448	if (data_ac)
				449	ocfs2_free_alloc_context(data_ac);
				450
				451	mlog_exit(status);
				452
				453	return status;
				454	}
				455
				456	static int ocfs2_mknod_locked(struct ocfs2_super *osb,
				457	struct inode *dir,
				458	struct dentry *dentry, int mode,
				459	dev_t dev,
				460	struct buffer_head **new_fe_bh,
				461	struct buffer_head *parent_fe_bh,
				462	struct ocfs2_journal_handle *handle,
				463	struct inode **ret_inode,
				464	struct ocfs2_alloc_context *inode_ac)
				465	{
				466	int status = 0;
				467	struct ocfs2_dinode *fe = NULL;
				468	struct ocfs2_extent_list *fel;
				469	u64 fe_blkno = 0;
				470	u16 suballoc_bit;
				471	struct inode *inode = NULL;
				472
				473	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
				474	(unsigned long)dev, dentry->d_name.len,
				475	dentry->d_name.name);
				476
				477	*new_fe_bh = NULL;
				478	*ret_inode = NULL;
				479
				480	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
				481	&fe_blkno);
				482	if (status < 0) {
				483	mlog_errno(status);
				484	goto leave;
				485	}
				486
				487	inode = new_inode(dir->i_sb);
				488	if (IS_ERR(inode)) {
				489	status = PTR_ERR(inode);
				490	mlog(ML_ERROR, "new_inode failed!\n");
				491	goto leave;
				492	}
				493
				494	/* populate as many fields early on as possible - many of
				495	* these are used by the support functions here and in
				496	* callers. */
				497	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
				498	OCFS2_I(inode)->ip_blkno = fe_blkno;
				499	if (S_ISDIR(mode))
				500	inode->i_nlink = 2;
				501	else
				502	inode->i_nlink = 1;
				503	inode->i_mode = mode;
				504	spin_lock(&osb->osb_lock);
				505	inode->i_generation = osb->s_next_generation++;
				506	spin_unlock(&osb->osb_lock);
				507
				508	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
				509	if (!*new_fe_bh) {
				510	status = -EIO;
				511	mlog_errno(status);
				512	goto leave;
				513	}
				514	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
				515
				516	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
				517	OCFS2_JOURNAL_ACCESS_CREATE);
				518	if (status < 0) {
				519	mlog_errno(status);
				520	goto leave;
				521	}
				522
				523	fe = (struct ocfs2_dinode ) (new_fe_bh)->b_data;
				524	memset(fe, 0, osb->sb->s_blocksize);
				525
				526	fe->i_generation = cpu_to_le32(inode->i_generation);
				527	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
				528	fe->i_blkno = cpu_to_le64(fe_blkno);
				529	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
				530	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
				531	fe->i_uid = cpu_to_le32(current->fsuid);
				532	if (dir->i_mode & S_ISGID) {
				533	fe->i_gid = cpu_to_le32(dir->i_gid);
				534	if (S_ISDIR(mode))
				535	mode \|= S_ISGID;
				536	} else
				537	fe->i_gid = cpu_to_le32(current->fsgid);
				538	fe->i_mode = cpu_to_le16(mode);
				539	if (S_ISCHR(mode) \|\| S_ISBLK(mode))
				540	fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
				541
				542	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				543
				544	fe->i_last_eb_blk = 0;
				545	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
				546	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
				547	fe->i_atime = fe->i_ctime = fe->i_mtime =
				548	cpu_to_le64(CURRENT_TIME.tv_sec);
				549	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
				550	cpu_to_le32(CURRENT_TIME.tv_nsec);
				551	fe->i_dtime = 0;
				552
				553	fel = &fe->id2.i_list;
				554	fel->l_tree_depth = 0;
				555	fel->l_next_free_rec = 0;
				556	fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
				557
				558	status = ocfs2_journal_dirty(handle, *new_fe_bh);
				559	if (status < 0) {
				560	mlog_errno(status);
				561	goto leave;
				562	}
				563
				564	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
				565	mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	566	"i_blkno=%llu, i_ino=%lu\n",
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	567	(unsigned long long) (*new_fe_bh)->b_blocknr,
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	568	(unsigned long long)fe->i_blkno, inode->i_ino);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	569	BUG();
				570	}
				571
				572	ocfs2_inode_set_new(osb, inode);
				573	status = ocfs2_create_new_inode_locks(inode);
				574	if (status < 0)
				575	mlog_errno(status);
				576
				577	status = 0; /* error in ocfs2_create_new_inode_locks is not
				578	* critical */
				579
				580	*ret_inode = inode;
				581	leave:
				582	if (status < 0) {
				583	if (*new_fe_bh) {
				584	brelse(*new_fe_bh);
				585	*new_fe_bh = NULL;
				586	}
				587	if (inode)
				588	iput(inode);
				589	}
				590
				591	mlog_exit(status);
				592	return status;
				593	}
				594
				595	static int ocfs2_mkdir(struct inode *dir,
				596	struct dentry *dentry,
				597	int mode)
				598	{
				599	int ret;
				600
				601	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
				602	dentry->d_name.len, dentry->d_name.name);
				603	ret = ocfs2_mknod(dir, dentry, mode \| S_IFDIR, 0);
				604	mlog_exit(ret);
				605
				606	return ret;
				607	}
				608
				609	static int ocfs2_create(struct inode *dir,
				610	struct dentry *dentry,
				611	int mode,
				612	struct nameidata *nd)
				613	{
				614	int ret;
				615
				616	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
				617	dentry->d_name.len, dentry->d_name.name);
				618	ret = ocfs2_mknod(dir, dentry, mode \| S_IFREG, 0);
				619	mlog_exit(ret);
				620
				621	return ret;
				622	}
				623
				624	static int ocfs2_link(struct dentry *old_dentry,
				625	struct inode *dir,
				626	struct dentry *dentry)
				627	{
				628	struct ocfs2_journal_handle *handle = NULL;
				629	struct inode *inode = old_dentry->d_inode;
				630	int err;
				631	struct buffer_head *fe_bh = NULL;
				632	struct buffer_head *parent_fe_bh = NULL;
				633	struct buffer_head *de_bh = NULL;
				634	struct ocfs2_dinode *fe = NULL;
				635	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
				636
				637	mlog_entry("(inode=%lu, old='%.s' new='%.s')\n", inode->i_ino,
				638	old_dentry->d_name.len, old_dentry->d_name.name,
				639	dentry->d_name.len, dentry->d_name.name);
				640
				641	if (S_ISDIR(inode->i_mode)) {
				642	err = -EPERM;
				643	goto bail;
				644	}
				645
				646	if (inode->i_nlink >= OCFS2_LINK_MAX) {
				647	err = -EMLINK;
				648	goto bail;
				649	}
				650
				651	handle = ocfs2_alloc_handle(osb);
				652	if (handle == NULL) {
				653	err = -ENOMEM;
				654	goto bail;
				655	}
				656
				657	err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				658	if (err < 0) {
				659	if (err != -ENOENT)
				660	mlog_errno(err);
				661	goto bail;
				662	}
				663
				664	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				665	dentry->d_name.len);
				666	if (err)
				667	goto bail;
				668
				669	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				670	dentry->d_name.name,
				671	dentry->d_name.len, &de_bh);
				672	if (err < 0) {
				673	mlog_errno(err);
				674	goto bail;
				675	}
				676
				677	err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
				678	if (err < 0) {
				679	if (err != -ENOENT)
				680	mlog_errno(err);
				681	goto bail;
				682	}
				683
				684	fe = (struct ocfs2_dinode *) fe_bh->b_data;
				685	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
				686	err = -EMLINK;
				687	goto bail;
				688	}
				689
				690	handle = ocfs2_start_trans(osb, handle, OCFS2_LINK_CREDITS);
				691	if (IS_ERR(handle)) {
				692	err = PTR_ERR(handle);
				693	handle = NULL;
				694	mlog_errno(err);
				695	goto bail;
				696	}
				697
				698	err = ocfs2_journal_access(handle, inode, fe_bh,
				699	OCFS2_JOURNAL_ACCESS_WRITE);
				700	if (err < 0) {
				701	mlog_errno(err);
				702	goto bail;
				703	}
				704
				705	inode->i_nlink++;
				706	inode->i_ctime = CURRENT_TIME;
				707	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				708	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
				709	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
				710
				711	err = ocfs2_journal_dirty(handle, fe_bh);
				712	if (err < 0) {
				713	le16_add_cpu(&fe->i_links_count, -1);
				714	inode->i_nlink--;
				715	mlog_errno(err);
				716	goto bail;
				717	}
				718
				719	err = ocfs2_add_entry(handle, dentry, inode,
				720	OCFS2_I(inode)->ip_blkno,
				721	parent_fe_bh, de_bh);
				722	if (err) {
				723	le16_add_cpu(&fe->i_links_count, -1);
				724	inode->i_nlink--;
				725	mlog_errno(err);
				726	goto bail;
				727	}
				728
				729	atomic_inc(&inode->i_count);
				730	dentry->d_op = &ocfs2_dentry_ops;
				731	d_instantiate(dentry, inode);
				732	bail:
				733	if (handle)
				734	ocfs2_commit_trans(handle);
				735	if (de_bh)
				736	brelse(de_bh);
				737	if (fe_bh)
				738	brelse(fe_bh);
				739	if (parent_fe_bh)
				740	brelse(parent_fe_bh);
				741
				742	mlog_exit(err);
				743
				744	return err;
				745	}
				746
				747	static int ocfs2_unlink(struct inode *dir,
				748	struct dentry *dentry)
				749	{
				750	int status;
				751	unsigned int saved_nlink = 0;
				752	struct inode *inode = dentry->d_inode;
				753	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
				754	u64 blkno;
				755	struct ocfs2_dinode *fe = NULL;
				756	struct buffer_head *fe_bh = NULL;
				757	struct buffer_head *parent_node_bh = NULL;
				758	struct ocfs2_journal_handle *handle = NULL;
				759	struct ocfs2_dir_entry *dirent = NULL;
				760	struct buffer_head *dirent_bh = NULL;
				761	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
				762	struct buffer_head *orphan_entry_bh = NULL;
				763
				764	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
				765	dentry->d_name.len, dentry->d_name.name);
				766
				767	BUG_ON(dentry->d_parent->d_inode != dir);
				768
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	769	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	770
				771	if (inode == osb->root_inode) {
				772	mlog(0, "Cannot delete the root directory\n");
				773	status = -EPERM;
				774	goto leave;
				775	}
				776
				777	handle = ocfs2_alloc_handle(osb);
				778	if (handle == NULL) {
				779	status = -ENOMEM;
				780	mlog_errno(status);
				781	goto leave;
				782	}
				783
				784	status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
				785	if (status < 0) {
				786	if (status != -ENOENT)
				787	mlog_errno(status);
				788	goto leave;
				789	}
				790
				791	status = ocfs2_find_files_on_disk(dentry->d_name.name,
				792	dentry->d_name.len, &blkno,
				793	dir, &dirent_bh, &dirent);
				794	if (status < 0) {
				795	if (status != -ENOENT)
				796	mlog_errno(status);
				797	goto leave;
				798	}
				799
				800	if (OCFS2_I(inode)->ip_blkno != blkno) {
				801	status = -ENOENT;
				802
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	803	mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
				804	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				805	(unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	806	goto leave;
				807	}
				808
				809	status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
				810	if (status < 0) {
				811	if (status != -ENOENT)
				812	mlog_errno(status);
				813	goto leave;
				814	}
				815
				816	if (S_ISDIR(inode->i_mode)) {
				817	if (!ocfs2_empty_dir(inode)) {
				818	status = -ENOTEMPTY;
				819	goto leave;
				820	} else if (inode->i_nlink != 2) {
				821	status = -ENOTEMPTY;
				822	goto leave;
				823	}
				824	}
				825
				826	/* There are still a few steps left until we can consider the
				827	* unlink to have succeeded. Save off nlink here before
				828	* modification so we can set it back in case we hit an issue
				829	* before commit. */
				830	saved_nlink = inode->i_nlink;
				831	if (S_ISDIR(inode->i_mode))
				832	inode->i_nlink = 0;
				833	else
				834	inode->i_nlink--;
				835
				836	status = ocfs2_request_unlink_vote(inode, dentry,
				837	(unsigned int) inode->i_nlink);
				838	if (status < 0) {
				839	/* This vote should succeed under all normal
				840	* circumstances. */
				841	mlog_errno(status);
				842	goto leave;
				843	}
				844
				845	if (!inode->i_nlink) {
				846	status = ocfs2_prepare_orphan_dir(osb, handle, inode,
				847	orphan_name,
				848	&orphan_entry_bh);
				849	if (status < 0) {
				850	mlog_errno(status);
				851	goto leave;
				852	}
				853	}
				854
				855	handle = ocfs2_start_trans(osb, handle, OCFS2_UNLINK_CREDITS);
				856	if (IS_ERR(handle)) {
				857	status = PTR_ERR(handle);
				858	handle = NULL;
				859	mlog_errno(status);
				860	goto leave;
				861	}
				862
				863	status = ocfs2_journal_access(handle, inode, fe_bh,
				864	OCFS2_JOURNAL_ACCESS_WRITE);
				865	if (status < 0) {
				866	mlog_errno(status);
				867	goto leave;
				868	}
				869
				870	fe = (struct ocfs2_dinode *) fe_bh->b_data;
				871
				872	if (!inode->i_nlink) {
				873	status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
				874	orphan_entry_bh);
				875	if (status < 0) {
				876	mlog_errno(status);
				877	goto leave;
				878	}
				879	}
				880
				881	/* delete the name from the parent dir */
				882	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
				883	if (status < 0) {
				884	mlog_errno(status);
				885	goto leave;
				886	}
				887
				888	/* We can set nlink on the dinode now. clear the saved version
				889	* so that it doesn't get set later. */
				890	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				891	saved_nlink = 0;
				892
				893	status = ocfs2_journal_dirty(handle, fe_bh);
				894	if (status < 0) {
				895	mlog_errno(status);
				896	goto leave;
				897	}
				898
				899	if (S_ISDIR(inode->i_mode)) {
				900	dir->i_nlink--;
				901	status = ocfs2_mark_inode_dirty(handle, dir,
				902	parent_node_bh);
				903	if (status < 0) {
				904	mlog_errno(status);
				905	dir->i_nlink++;
				906	}
				907	}
				908
				909	leave:
				910	if (status < 0 && saved_nlink)
				911	inode->i_nlink = saved_nlink;
				912
				913	if (handle)
				914	ocfs2_commit_trans(handle);
				915
				916	if (fe_bh)
				917	brelse(fe_bh);
				918
				919	if (dirent_bh)
				920	brelse(dirent_bh);
				921
				922	if (parent_node_bh)
				923	brelse(parent_node_bh);
				924
				925	if (orphan_entry_bh)
				926	brelse(orphan_entry_bh);
				927
				928	mlog_exit(status);
				929
				930	return status;
				931	}
				932
				933	/*
				934	* The only place this should be used is rename!
				935	* if they have the same id, then the 1st one is the only one locked.
				936	*/
				937	static int ocfs2_double_lock(struct ocfs2_super *osb,
				938	struct ocfs2_journal_handle *handle,
				939	struct buffer_head **bh1,
				940	struct inode *inode1,
				941	struct buffer_head **bh2,
				942	struct inode *inode2)
				943	{
				944	int status;
				945	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
				946	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
				947	struct buffer_head **tmpbh;
				948	struct inode *tmpinode;
				949
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	950	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
				951	(unsigned long long)oi1->ip_blkno,
				952	(unsigned long long)oi2->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	953
				954	BUG_ON(!handle);
				955
				956	if (*bh1)
				957	*bh1 = NULL;
				958	if (*bh2)
				959	*bh2 = NULL;
				960
				961	/* we always want to lock the one with the lower lockid first. */
				962	if (oi1->ip_blkno != oi2->ip_blkno) {
				963	if (oi1->ip_blkno < oi2->ip_blkno) {
				964	/* switch id1 and id2 around */
				965	mlog(0, "switching them around...\n");
				966	tmpbh = bh2;
				967	bh2 = bh1;
				968	bh1 = tmpbh;
				969
				970	tmpinode = inode2;
				971	inode2 = inode1;
				972	inode1 = tmpinode;
				973	}
				974	/* lock id2 */
				975	status = ocfs2_meta_lock(inode2, handle, bh2, 1);
				976	if (status < 0) {
				977	if (status != -ENOENT)
				978	mlog_errno(status);
				979	goto bail;
				980	}
				981	}
				982	/* lock id1 */
				983	status = ocfs2_meta_lock(inode1, handle, bh1, 1);
				984	if (status < 0) {
				985	if (status != -ENOENT)
				986	mlog_errno(status);
				987	goto bail;
				988	}
				989	bail:
				990	mlog_exit(status);
				991	return status;
				992	}
				993
				994	#define PARENT_INO(buffer) \
				995	((struct ocfs2_dir_entry *) \
				996	((char *)buffer + \
				997	le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
				998
				999	static int ocfs2_rename(struct inode *old_dir,
				1000	struct dentry *old_dentry,
				1001	struct inode *new_dir,
				1002	struct dentry *new_dentry)
				1003	{
				1004	int status = 0, rename_lock = 0;
				1005	struct inode *old_inode = old_dentry->d_inode;
				1006	struct inode *new_inode = new_dentry->d_inode;
				1007	struct ocfs2_dinode *newfe = NULL;
				1008	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
				1009	struct buffer_head *orphan_entry_bh = NULL;
				1010	struct buffer_head *newfe_bh = NULL;
				1011	struct buffer_head *insert_entry_bh = NULL;
				1012	struct ocfs2_super *osb = NULL;
				1013	u64 newfe_blkno;
				1014	struct ocfs2_journal_handle *handle = NULL;
				1015	struct buffer_head *old_dir_bh = NULL;
				1016	struct buffer_head *new_dir_bh = NULL;
				1017	struct ocfs2_dir_entry old_de = NULL, new_de = NULL; // dirent for old_dentry
				1018	// and new_dentry
				1019	struct buffer_head new_de_bh = NULL, old_de_bh = NULL; // bhs for above
				1020	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
				1021	// this is the 1st dirent bh
				1022	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
				1023	unsigned int links_count;
				1024
				1025	/* At some point it might be nice to break this function up a
				1026	* bit. */
				1027
				1028	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.s' to='%.s')\n",
				1029	old_dir, old_dentry, new_dir, new_dentry,
				1030	old_dentry->d_name.len, old_dentry->d_name.name,
				1031	new_dentry->d_name.len, new_dentry->d_name.name);
				1032
				1033	osb = OCFS2_SB(old_dir->i_sb);
				1034
				1035	if (new_inode) {
				1036	if (!igrab(new_inode))
				1037	BUG();
				1038	}
				1039
				1040	if (atomic_read(&old_dentry->d_count) > 2) {
				1041	shrink_dcache_parent(old_dentry);
				1042	if (atomic_read(&old_dentry->d_count) > 2) {
				1043	status = -EBUSY;
				1044	goto bail;
				1045	}
				1046	}
				1047
				1048	/* Assume a directory heirarchy thusly:
				1049	* a/b/c
				1050	* a/d
				1051	* a,b,c, and d are all directories.
				1052	*
				1053	* from cwd of 'a' on both nodes:
				1054	* node1: mv b/c d
				1055	* node2: mv d b/c
				1056	*
				1057	* And that's why, just like the VFS, we need a file system
				1058	* rename lock. */
				1059	if (old_dentry != new_dentry) {
				1060	status = ocfs2_rename_lock(osb);
				1061	if (status < 0) {
				1062	mlog_errno(status);
				1063	goto bail;
				1064	}
				1065	rename_lock = 1;
				1066	}
				1067
				1068	handle = ocfs2_alloc_handle(osb);
				1069	if (handle == NULL) {
				1070	status = -ENOMEM;
				1071	mlog_errno(status);
				1072	goto bail;
				1073	}
				1074
				1075	/* if old and new are the same, this'll just do one lock. */
				1076	status = ocfs2_double_lock(osb, handle,
				1077	&old_dir_bh, old_dir,
				1078	&new_dir_bh, new_dir);
				1079	if (status < 0) {
				1080	mlog_errno(status);
				1081	goto bail;
				1082	}
				1083
				1084	/* make sure both dirs have bhs
				1085	* get an extra ref on old_dir_bh if old==new */
				1086	if (!new_dir_bh) {
				1087	if (old_dir_bh) {
				1088	new_dir_bh = old_dir_bh;
				1089	get_bh(new_dir_bh);
				1090	} else {
				1091	mlog(ML_ERROR, "no old_dir_bh!\n");
				1092	status = -EIO;
				1093	goto bail;
				1094	}
				1095	}
				1096
				1097	if (S_ISDIR(old_inode->i_mode)) {
				1098	/* Directories actually require metadata updates to
				1099	* the directory info so we can't get away with not
				1100	* doing node locking on it. */
				1101	status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
				1102	if (status < 0) {
				1103	if (status != -ENOENT)
				1104	mlog_errno(status);
				1105	goto bail;
				1106	}
				1107
				1108	status = ocfs2_request_rename_vote(old_inode, old_dentry);
				1109	if (status < 0) {
				1110	mlog_errno(status);
				1111	goto bail;
				1112	}
				1113
				1114	status = -EIO;
				1115	old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
				1116	if (!old_inode_de_bh)
				1117	goto bail;
				1118
				1119	status = -EIO;
				1120	if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
				1121	OCFS2_I(old_dir)->ip_blkno)
				1122	goto bail;
				1123	status = -EMLINK;
				1124	if (!new_inode && new_dir!=old_dir &&
				1125	new_dir->i_nlink >= OCFS2_LINK_MAX)
				1126	goto bail;
				1127	} else {
				1128	/* Ah, the simple case - we're a file so just send a
				1129	* message. */
				1130	status = ocfs2_request_rename_vote(old_inode, old_dentry);
				1131	if (status < 0) {
				1132	mlog_errno(status);
				1133	goto bail;
				1134	}
				1135	}
				1136
				1137	status = -ENOENT;
				1138	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
				1139	old_dentry->d_name.len,
				1140	old_dir, &old_de);
				1141	if (!old_de_bh)
				1142	goto bail;
				1143
				1144	/*
				1145	* Check for inode number is _not_ due to possible IO errors.
				1146	* We might rmdir the source, keep it as pwd of some process
				1147	* and merrily kill the link to whatever was created under the
				1148	* same name. Goodbye sticky bit ;-<
				1149	*/
				1150	if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
				1151	goto bail;
				1152
				1153	/* check if the target already exists (in which case we need
				1154	* to delete it */
				1155	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
				1156	new_dentry->d_name.len,
				1157	&newfe_blkno, new_dir, &new_de_bh,
				1158	&new_de);
				1159	/* The only error we allow here is -ENOENT because the new
				1160	* file not existing is perfectly valid. */
				1161	if ((status < 0) && (status != -ENOENT)) {
				1162	/* If we cannot find the file specified we should just */
				1163	/* return the error... */
				1164	mlog_errno(status);
				1165	goto bail;
				1166	}
				1167
				1168	if (!new_de && new_inode)
				1169	mlog(ML_ERROR, "inode %lu does not exist in it's parent "
				1170	"directory!", new_inode->i_ino);
				1171
				1172	/* In case we need to overwrite an existing file, we blow it
				1173	* away first */
				1174	if (new_de) {
				1175	/* VFS didn't think there existed an inode here, but
				1176	* someone else in the cluster must have raced our
				1177	* rename to create one. Today we error cleanly, in
				1178	* the future we should consider calling iget to build
				1179	* a new struct inode for this entry. */
				1180	if (!new_inode) {
				1181	status = -EACCES;
				1182
				1183	mlog(0, "We found an inode for name %.*s but VFS "
				1184	"didn't give us one.\n", new_dentry->d_name.len,
				1185	new_dentry->d_name.name);
				1186	goto bail;
				1187	}
				1188
				1189	if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
				1190	status = -EACCES;
				1191
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1192	mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
				1193	(unsigned long long)OCFS2_I(new_inode)->ip_blkno,
				1194	(unsigned long long)newfe_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1195	OCFS2_I(new_inode)->ip_flags);
				1196	goto bail;
				1197	}
				1198
				1199	status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
				1200	if (status < 0) {
				1201	if (status != -ENOENT)
				1202	mlog_errno(status);
				1203	goto bail;
				1204	}
				1205
				1206	if (S_ISDIR(new_inode->i_mode))
				1207	links_count = 0;
				1208	else
				1209	links_count = (unsigned int) (new_inode->i_nlink - 1);
				1210
				1211	status = ocfs2_request_unlink_vote(new_inode, new_dentry,
				1212	links_count);
				1213	if (status < 0) {
				1214	mlog_errno(status);
				1215	goto bail;
				1216	}
				1217
				1218	newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
				1219
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1220	mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
				1221	"newfebh=%p bhblocknr=%llu\n", new_de,
				1222	(unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1223	(unsigned long long)newfe_bh->b_blocknr : 0ULL);
				1224
				1225	if (S_ISDIR(new_inode->i_mode) \|\| (new_inode->i_nlink == 1)) {
				1226	status = ocfs2_prepare_orphan_dir(osb, handle,
				1227	new_inode,
				1228	orphan_name,
				1229	&orphan_entry_bh);
				1230	if (status < 0) {
				1231	mlog_errno(status);
				1232	goto bail;
				1233	}
				1234	}
				1235	} else {
				1236	BUG_ON(new_dentry->d_parent->d_inode != new_dir);
				1237
				1238	status = ocfs2_check_dir_for_entry(new_dir,
				1239	new_dentry->d_name.name,
				1240	new_dentry->d_name.len);
				1241	if (status)
				1242	goto bail;
				1243
				1244	status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
				1245	new_dentry->d_name.name,
				1246	new_dentry->d_name.len,
				1247	&insert_entry_bh);
				1248	if (status < 0) {
				1249	mlog_errno(status);
				1250	goto bail;
				1251	}
				1252	}
				1253
				1254	handle = ocfs2_start_trans(osb, handle, OCFS2_RENAME_CREDITS);
				1255	if (IS_ERR(handle)) {
				1256	status = PTR_ERR(handle);
				1257	handle = NULL;
				1258	mlog_errno(status);
				1259	goto bail;
				1260	}
				1261
				1262	if (new_de) {
				1263	if (S_ISDIR(new_inode->i_mode)) {
				1264	if (!ocfs2_empty_dir(new_inode) \|\|
				1265	new_inode->i_nlink != 2) {
				1266	status = -ENOTEMPTY;
				1267	goto bail;
				1268	}
				1269	}
				1270	status = ocfs2_journal_access(handle, new_inode, newfe_bh,
				1271	OCFS2_JOURNAL_ACCESS_WRITE);
				1272	if (status < 0) {
				1273	mlog_errno(status);
				1274	goto bail;
				1275	}
				1276
				1277	if (S_ISDIR(new_inode->i_mode) \|\|
				1278	(newfe->i_links_count == cpu_to_le16(1))){
				1279	status = ocfs2_orphan_add(osb, handle, new_inode,
				1280	newfe, orphan_name,
				1281	orphan_entry_bh);
				1282	if (status < 0) {
				1283	mlog_errno(status);
				1284	goto bail;
				1285	}
				1286	}
				1287
				1288	/* change the dirent to point to the correct inode */
				1289	status = ocfs2_journal_access(handle, new_dir, new_de_bh,
				1290	OCFS2_JOURNAL_ACCESS_WRITE);
				1291	if (status < 0) {
				1292	mlog_errno(status);
				1293	goto bail;
				1294	}
				1295	new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
				1296	new_de->file_type = old_de->file_type;
				1297	new_dir->i_version++;
				1298	status = ocfs2_journal_dirty(handle, new_de_bh);
				1299	if (status < 0) {
				1300	mlog_errno(status);
				1301	goto bail;
				1302	}
				1303
				1304	if (S_ISDIR(new_inode->i_mode))
				1305	newfe->i_links_count = 0;
				1306	else
				1307	le16_add_cpu(&newfe->i_links_count, -1);
				1308
				1309	status = ocfs2_journal_dirty(handle, newfe_bh);
				1310	if (status < 0) {
				1311	mlog_errno(status);
				1312	goto bail;
				1313	}
				1314	} else {
				1315	/* if the name was not found in new_dir, add it now */
				1316	status = ocfs2_add_entry(handle, new_dentry, old_inode,
				1317	OCFS2_I(old_inode)->ip_blkno,
				1318	new_dir_bh, insert_entry_bh);
				1319	}
				1320
				1321	old_inode->i_ctime = CURRENT_TIME;
				1322	mark_inode_dirty(old_inode);
				1323
				1324	/* now that the name has been added to new_dir, remove the old name */
				1325	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
				1326	if (status < 0) {
				1327	mlog_errno(status);
				1328	goto bail;
				1329	}
				1330
				1331	if (new_inode) {
				1332	new_inode->i_nlink--;
				1333	new_inode->i_ctime = CURRENT_TIME;
				1334	}
				1335	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
				1336	if (old_inode_de_bh) {
				1337	status = ocfs2_journal_access(handle, old_inode,
				1338	old_inode_de_bh,
				1339	OCFS2_JOURNAL_ACCESS_WRITE);
				1340	PARENT_INO(old_inode_de_bh->b_data) =
				1341	cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
				1342	status = ocfs2_journal_dirty(handle, old_inode_de_bh);
				1343	old_dir->i_nlink--;
				1344	if (new_inode) {
				1345	new_inode->i_nlink--;
				1346	} else {
				1347	new_dir->i_nlink++;
				1348	mark_inode_dirty(new_dir);
				1349	}
				1350	}
				1351	mark_inode_dirty(old_dir);
				1352	if (new_inode)
				1353	mark_inode_dirty(new_inode);
				1354
				1355	if (old_dir != new_dir)
				1356	if (new_dir_nlink != new_dir->i_nlink) {
				1357	if (!new_dir_bh) {
				1358	mlog(ML_ERROR, "need to change nlink for new "
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1359	"dir %llu from %d to %d but bh is NULL\n",
				1360	(unsigned long long)OCFS2_I(new_dir)->ip_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1361	(int)new_dir_nlink, new_dir->i_nlink);
				1362	} else {
				1363	struct ocfs2_dinode *fe;
				1364	status = ocfs2_journal_access(handle,
				1365	new_dir,
				1366	new_dir_bh,
				1367	OCFS2_JOURNAL_ACCESS_WRITE);
				1368	fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
				1369	fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
				1370	status = ocfs2_journal_dirty(handle, new_dir_bh);
				1371	}
				1372	}
				1373
				1374	if (old_dir_nlink != old_dir->i_nlink) {
				1375	if (!old_dir_bh) {
				1376	mlog(ML_ERROR, "need to change nlink for old dir "
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1377	"%llu from %d to %d but bh is NULL!\n",
				1378	(unsigned long long)OCFS2_I(old_dir)->ip_blkno,
				1379	(int)old_dir_nlink, old_dir->i_nlink);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1380	} else {
				1381	struct ocfs2_dinode *fe;
				1382	status = ocfs2_journal_access(handle, old_dir,
				1383	old_dir_bh,
				1384	OCFS2_JOURNAL_ACCESS_WRITE);
				1385	fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
				1386	fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
				1387	status = ocfs2_journal_dirty(handle, old_dir_bh);
				1388	}
				1389	}
				1390
				1391	status = 0;
				1392	bail:
				1393	if (rename_lock)
				1394	ocfs2_rename_unlock(osb);
				1395
				1396	if (handle)
				1397	ocfs2_commit_trans(handle);
				1398
				1399	if (new_inode)
				1400	sync_mapping_buffers(old_inode->i_mapping);
				1401
				1402	if (new_inode)
				1403	iput(new_inode);
				1404	if (newfe_bh)
				1405	brelse(newfe_bh);
				1406	if (old_dir_bh)
				1407	brelse(old_dir_bh);
				1408	if (new_dir_bh)
				1409	brelse(new_dir_bh);
				1410	if (new_de_bh)
				1411	brelse(new_de_bh);
				1412	if (old_de_bh)
				1413	brelse(old_de_bh);
				1414	if (old_inode_de_bh)
				1415	brelse(old_inode_de_bh);
				1416	if (orphan_entry_bh)
				1417	brelse(orphan_entry_bh);
				1418	if (insert_entry_bh)
				1419	brelse(insert_entry_bh);
				1420
				1421	mlog_exit(status);
				1422
				1423	return status;
				1424	}
				1425
				1426	/*
				1427	* we expect i_size = strlen(symname). Copy symname into the file
				1428	* data, including the null terminator.
				1429	*/
				1430	static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
				1431	struct ocfs2_journal_handle *handle,
				1432	struct inode *inode,
				1433	const char *symname)
				1434	{
				1435	struct buffer_head **bhs = NULL;
				1436	const char *c;
				1437	struct super_block *sb = osb->sb;
				1438	u64 p_blkno;
				1439	int p_blocks;
				1440	int virtual, blocks, status, i, bytes_left;
				1441
				1442	bytes_left = i_size_read(inode) + 1;
				1443	/* we can't trust i_blocks because we're actually going to
				1444	* write i_size + 1 bytes. */
				1445	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
				1446
Andrew Morton	5515eff	2006-03-26 01:37:53 -0800	[diff] [blame]	1447	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
				1448	(unsigned long long)inode->i_blocks,
				1449	i_size_read(inode), blocks);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1450
				1451	/* Sanity check -- make sure we're going to fit. */
				1452	if (bytes_left >
				1453	ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
				1454	status = -EIO;
				1455	mlog_errno(status);
				1456	goto bail;
				1457	}
				1458
				1459	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
				1460	if (!bhs) {
				1461	status = -ENOMEM;
				1462	mlog_errno(status);
				1463	goto bail;
				1464	}
				1465
				1466	status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
				1467	&p_blocks);
				1468	if (status < 0) {
				1469	mlog_errno(status);
				1470	goto bail;
				1471	}
				1472
				1473	/* links can never be larger than one cluster so we know this
				1474	* is all going to be contiguous, but do a sanity check
				1475	* anyway. */
				1476	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
				1477	status = -EIO;
				1478	mlog_errno(status);
				1479	goto bail;
				1480	}
				1481
				1482	virtual = 0;
				1483	while(bytes_left > 0) {
				1484	c = &symname[virtual * sb->s_blocksize];
				1485
				1486	bhs[virtual] = sb_getblk(sb, p_blkno);
				1487	if (!bhs[virtual]) {
				1488	status = -ENOMEM;
				1489	mlog_errno(status);
				1490	goto bail;
				1491	}
				1492	ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
				1493
				1494	status = ocfs2_journal_access(handle, inode, bhs[virtual],
				1495	OCFS2_JOURNAL_ACCESS_CREATE);
				1496	if (status < 0) {
				1497	mlog_errno(status);
				1498	goto bail;
				1499	}
				1500
				1501	memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
				1502
				1503	memcpy(bhs[virtual]->b_data, c,
				1504	(bytes_left > sb->s_blocksize) ? sb->s_blocksize :
				1505	bytes_left);
				1506
				1507	status = ocfs2_journal_dirty(handle, bhs[virtual]);
				1508	if (status < 0) {
				1509	mlog_errno(status);
				1510	goto bail;
				1511	}
				1512
				1513	virtual++;
				1514	p_blkno++;
				1515	bytes_left -= sb->s_blocksize;
				1516	}
				1517
				1518	status = 0;
				1519	bail:
				1520
				1521	if (bhs) {
				1522	for(i = 0; i < blocks; i++)
				1523	if (bhs[i])
				1524	brelse(bhs[i]);
				1525	kfree(bhs);
				1526	}
				1527
				1528	mlog_exit(status);
				1529	return status;
				1530	}
				1531
				1532	static int ocfs2_symlink(struct inode *dir,
				1533	struct dentry *dentry,
				1534	const char *symname)
				1535	{
				1536	int status, l, credits;
				1537	u64 newsize;
				1538	struct ocfs2_super *osb = NULL;
				1539	struct inode *inode = NULL;
				1540	struct super_block *sb;
				1541	struct buffer_head *new_fe_bh = NULL;
				1542	struct buffer_head *de_bh = NULL;
				1543	struct buffer_head *parent_fe_bh = NULL;
				1544	struct ocfs2_dinode *fe = NULL;
				1545	struct ocfs2_dinode *dirfe;
				1546	struct ocfs2_journal_handle *handle = NULL;
				1547	struct ocfs2_alloc_context *inode_ac = NULL;
				1548	struct ocfs2_alloc_context *data_ac = NULL;
				1549
				1550	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
				1551	dentry, symname, dentry->d_name.len, dentry->d_name.name);
				1552
				1553	sb = dir->i_sb;
				1554	osb = OCFS2_SB(sb);
				1555
				1556	l = strlen(symname) + 1;
				1557
				1558	credits = ocfs2_calc_symlink_credits(sb);
				1559
				1560	handle = ocfs2_alloc_handle(osb);
				1561	if (handle == NULL) {
				1562	status = -ENOMEM;
				1563	mlog_errno(status);
				1564	goto bail;
				1565	}
				1566
				1567	/* lock the parent directory */
				1568	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				1569	if (status < 0) {
				1570	if (status != -ENOENT)
				1571	mlog_errno(status);
				1572	goto bail;
				1573	}
				1574
				1575	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
				1576	if (!dirfe->i_links_count) {
				1577	/* can't make a file in a deleted directory. */
				1578	status = -ENOENT;
				1579	goto bail;
				1580	}
				1581
				1582	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				1583	dentry->d_name.len);
				1584	if (status)
				1585	goto bail;
				1586
				1587	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				1588	dentry->d_name.name,
				1589	dentry->d_name.len, &de_bh);
				1590	if (status < 0) {
				1591	mlog_errno(status);
				1592	goto bail;
				1593	}
				1594
				1595	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
				1596	if (status < 0) {
				1597	if (status != -ENOSPC)
				1598	mlog_errno(status);
				1599	goto bail;
				1600	}
				1601
				1602	/* don't reserve bitmap space for fast symlinks. */
				1603	if (l > ocfs2_fast_symlink_chars(sb)) {
				1604	status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
				1605	if (status < 0) {
				1606	if (status != -ENOSPC)
				1607	mlog_errno(status);
				1608	goto bail;
				1609	}
				1610	}
				1611
				1612	handle = ocfs2_start_trans(osb, handle, credits);
				1613	if (IS_ERR(handle)) {
				1614	status = PTR_ERR(handle);
				1615	handle = NULL;
				1616	mlog_errno(status);
				1617	goto bail;
				1618	}
				1619
				1620	status = ocfs2_mknod_locked(osb, dir, dentry,
				1621	S_IFLNK \| S_IRWXUGO, 0,
				1622	&new_fe_bh, parent_fe_bh, handle,
				1623	&inode, inode_ac);
				1624	if (status < 0) {
				1625	mlog_errno(status);
				1626	goto bail;
				1627	}
				1628
				1629	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
				1630	inode->i_rdev = 0;
				1631	newsize = l - 1;
				1632	if (l > ocfs2_fast_symlink_chars(sb)) {
				1633	inode->i_op = &ocfs2_symlink_inode_operations;
				1634	status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
				1635	handle, data_ac, NULL,
				1636	NULL);
				1637	if (status < 0) {
				1638	if (status != -ENOSPC && status != -EINTR) {
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1639	mlog(ML_ERROR,
				1640	"Failed to extend file to %llu\n",
				1641	(unsigned long long)newsize);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1642	mlog_errno(status);
				1643	status = -ENOSPC;
				1644	}
				1645	goto bail;
				1646	}
				1647	i_size_write(inode, newsize);
				1648	inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
				1649	} else {
				1650	inode->i_op = &ocfs2_fast_symlink_inode_operations;
				1651	memcpy((char *) fe->id2.i_symlink, symname, l);
				1652	i_size_write(inode, newsize);
				1653	inode->i_blocks = 0;
				1654	}
				1655
				1656	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
				1657	if (status < 0) {
				1658	mlog_errno(status);
				1659	goto bail;
				1660	}
				1661
				1662	if (!ocfs2_inode_is_fast_symlink(inode)) {
				1663	status = ocfs2_create_symlink_data(osb, handle, inode,
				1664	symname);
				1665	if (status < 0) {
				1666	mlog_errno(status);
				1667	goto bail;
				1668	}
				1669	}
				1670
				1671	status = ocfs2_add_entry(handle, dentry, inode,
				1672	le64_to_cpu(fe->i_blkno), parent_fe_bh,
				1673	de_bh);
				1674	if (status < 0) {
				1675	mlog_errno(status);
				1676	goto bail;
				1677	}
				1678
				1679	insert_inode_hash(inode);
				1680	dentry->d_op = &ocfs2_dentry_ops;
				1681	d_instantiate(dentry, inode);
				1682	bail:
				1683	if (handle)
				1684	ocfs2_commit_trans(handle);
				1685	if (new_fe_bh)
				1686	brelse(new_fe_bh);
				1687	if (parent_fe_bh)
				1688	brelse(parent_fe_bh);
				1689	if (de_bh)
				1690	brelse(de_bh);
				1691	if (inode_ac)
				1692	ocfs2_free_alloc_context(inode_ac);
				1693	if (data_ac)
				1694	ocfs2_free_alloc_context(data_ac);
				1695	if ((status < 0) && inode)
				1696	iput(inode);
				1697
				1698	mlog_exit(status);
				1699
				1700	return status;
				1701	}
				1702
				1703	int ocfs2_check_dir_entry(struct inode * dir,
				1704	struct ocfs2_dir_entry * de,
				1705	struct buffer_head * bh,
				1706	unsigned long offset)
				1707	{
				1708	const char *error_msg = NULL;
				1709	const int rlen = le16_to_cpu(de->rec_len);
				1710
				1711	if (rlen < OCFS2_DIR_REC_LEN(1))
				1712	error_msg = "rec_len is smaller than minimal";
				1713	else if (rlen % 4 != 0)
				1714	error_msg = "rec_len % 4 != 0";
				1715	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
				1716	error_msg = "rec_len is too small for name_len";
				1717	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
				1718	error_msg = "directory entry across blocks";
				1719
				1720	if (error_msg != NULL)
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	1721	mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
				1722	"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
				1723	(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
				1724	offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
				1725	de->name_len);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1726	return error_msg == NULL ? 1 : 0;
				1727	}
				1728
				1729	/* we don't always have a dentry for what we want to add, so people
				1730	* like orphan dir can call this instead.
				1731	*
				1732	* If you pass me insert_bh, I'll skip the search of the other dir
				1733	* blocks and put the record in there.
				1734	*/
				1735	static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				1736	struct inode *dir,
				1737	const char *name, int namelen,
				1738	struct inode *inode, u64 blkno,
				1739	struct buffer_head *parent_fe_bh,
				1740	struct buffer_head *insert_bh)
				1741	{
				1742	unsigned long offset;
				1743	unsigned short rec_len;
				1744	struct ocfs2_dir_entry de, de1;
				1745	struct super_block *sb;
				1746	int retval, status;
				1747
				1748	mlog_entry_void();
				1749
				1750	sb = dir->i_sb;
				1751
				1752	if (!namelen)
				1753	return -EINVAL;
				1754
				1755	rec_len = OCFS2_DIR_REC_LEN(namelen);
				1756	offset = 0;
				1757	de = (struct ocfs2_dir_entry *) insert_bh->b_data;
				1758	while (1) {
				1759	BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
				1760	/* These checks should've already been passed by the
				1761	* prepare function, but I guess we can leave them
				1762	* here anyway. */
				1763	if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
				1764	retval = -ENOENT;
				1765	goto bail;
				1766	}
				1767	if (ocfs2_match(namelen, name, de)) {
				1768	retval = -EEXIST;
				1769	goto bail;
				1770	}
				1771	if (((le64_to_cpu(de->inode) == 0) &&
				1772	(le16_to_cpu(de->rec_len) >= rec_len)) \|\|
				1773	(le16_to_cpu(de->rec_len) >=
				1774	(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
				1775	status = ocfs2_journal_access(handle, dir, insert_bh,
				1776	OCFS2_JOURNAL_ACCESS_WRITE);
				1777	/* By now the buffer is marked for journaling */
				1778	offset += le16_to_cpu(de->rec_len);
				1779	if (le64_to_cpu(de->inode)) {
				1780	de1 = (struct ocfs2_dir_entry )((char ) de +
				1781	OCFS2_DIR_REC_LEN(de->name_len));
				1782	de1->rec_len =
				1783	cpu_to_le16(le16_to_cpu(de->rec_len) -
				1784	OCFS2_DIR_REC_LEN(de->name_len));
				1785	de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				1786	de = de1;
				1787	}
				1788	de->file_type = OCFS2_FT_UNKNOWN;
				1789	if (blkno) {
				1790	de->inode = cpu_to_le64(blkno);
				1791	ocfs2_set_de_type(de, inode->i_mode);
				1792	} else
				1793	de->inode = 0;
				1794	de->name_len = namelen;
				1795	memcpy(de->name, name, namelen);
				1796
				1797	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
				1798	dir->i_version++;
				1799	status = ocfs2_journal_dirty(handle, insert_bh);
				1800	retval = 0;
				1801	goto bail;
				1802	}
				1803	offset += le16_to_cpu(de->rec_len);
				1804	de = (struct ocfs2_dir_entry ) ((char ) de + le16_to_cpu(de->rec_len));
				1805	}
				1806
				1807	/* when you think about it, the assert above should prevent us
				1808	* from ever getting here. */
				1809	retval = -ENOSPC;
				1810	bail:
				1811
				1812	mlog_exit(retval);
				1813	return retval;
				1814	}
				1815
				1816
				1817	/*
				1818	* ocfs2_delete_entry deletes a directory entry by merging it with the
				1819	* previous entry
				1820	*/
				1821	static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
				1822	struct inode *dir,
				1823	struct ocfs2_dir_entry *de_del,
				1824	struct buffer_head *bh)
				1825	{
				1826	struct ocfs2_dir_entry de, pde;
				1827	int i, status = -ENOENT;
				1828
				1829	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
				1830
				1831	i = 0;
				1832	pde = NULL;
				1833	de = (struct ocfs2_dir_entry *) bh->b_data;
				1834	while (i < bh->b_size) {
				1835	if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
				1836	status = -EIO;
				1837	mlog_errno(status);
				1838	goto bail;
				1839	}
				1840	if (de == de_del) {
				1841	status = ocfs2_journal_access(handle, dir, bh,
				1842	OCFS2_JOURNAL_ACCESS_WRITE);
				1843	if (status < 0) {
				1844	status = -EIO;
				1845	mlog_errno(status);
				1846	goto bail;
				1847	}
				1848	if (pde)
				1849	pde->rec_len =
				1850	cpu_to_le16(le16_to_cpu(pde->rec_len) +
				1851	le16_to_cpu(de->rec_len));
				1852	else
				1853	de->inode = 0;
				1854	dir->i_version++;
				1855	status = ocfs2_journal_dirty(handle, bh);
				1856	goto bail;
				1857	}
				1858	i += le16_to_cpu(de->rec_len);
				1859	pde = de;
				1860	de = (struct ocfs2_dir_entry )((char )de + le16_to_cpu(de->rec_len));
				1861	}
				1862	bail:
				1863	mlog_exit(status);
				1864	return status;
				1865	}
				1866
				1867	/*
				1868	* Returns 0 if not found, -1 on failure, and 1 on success
				1869	*/
				1870	static int inline ocfs2_search_dirblock(struct buffer_head *bh,
				1871	struct inode *dir,
				1872	const char *name, int namelen,
				1873	unsigned long offset,
				1874	struct ocfs2_dir_entry **res_dir)
				1875	{
				1876	struct ocfs2_dir_entry *de;
				1877	char dlimit, de_buf;
				1878	int de_len;
				1879	int ret = 0;
				1880
				1881	mlog_entry_void();
				1882
				1883	de_buf = bh->b_data;
				1884	dlimit = de_buf + dir->i_sb->s_blocksize;
				1885
				1886	while (de_buf < dlimit) {
				1887	/* this code is executed quadratically often */
				1888	/* do minimal checking `by hand' */
				1889
				1890	de = (struct ocfs2_dir_entry *) de_buf;
				1891
				1892	if (de_buf + namelen <= dlimit &&
				1893	ocfs2_match(namelen, name, de)) {
				1894	/* found a match - just to be sure, do a full check */
				1895	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
				1896	ret = -1;
				1897	goto bail;
				1898	}
				1899	*res_dir = de;
				1900	ret = 1;
				1901	goto bail;
				1902	}
				1903
				1904	/* prevent looping on a bad block */
				1905	de_len = le16_to_cpu(de->rec_len);
				1906	if (de_len <= 0) {
				1907	ret = -1;
				1908	goto bail;
				1909	}
				1910
				1911	de_buf += de_len;
				1912	offset += de_len;
				1913	}
				1914
				1915	bail:
				1916	mlog_exit(ret);
				1917	return ret;
				1918	}
				1919
				1920	struct buffer_head ocfs2_find_entry(const char name, int namelen,
				1921	struct inode *dir,
				1922	struct ocfs2_dir_entry **res_dir)
				1923	{
				1924	struct super_block *sb;
				1925	struct buffer_head *bh_use[NAMEI_RA_SIZE];
				1926	struct buffer_head bh, ret = NULL;
				1927	unsigned long start, block, b;
				1928	int ra_max = 0; /* Number of bh's in the readahead
				1929	buffer, bh_use[] */
				1930	int ra_ptr = 0; /* Current index into readahead
				1931	buffer */
				1932	int num = 0;
				1933	int nblocks, i, err;
				1934
				1935	mlog_entry_void();
				1936
				1937	*res_dir = NULL;
				1938	sb = dir->i_sb;
				1939
				1940	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				1941	start = OCFS2_I(dir)->ip_dir_start_lookup;
				1942	if (start >= nblocks)
				1943	start = 0;
				1944	block = start;
				1945
				1946	restart:
				1947	do {
				1948	/*
				1949	* We deal with the read-ahead logic here.
				1950	*/
				1951	if (ra_ptr >= ra_max) {
				1952	/* Refill the readahead buffer */
				1953	ra_ptr = 0;
				1954	b = block;
				1955	for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				1956	/*
				1957	* Terminate if we reach the end of the
				1958	* directory and must wrap, or if our
				1959	* search has finished at this block.
				1960	*/
				1961	if (b >= nblocks \|\| (num && block == start)) {
				1962	bh_use[ra_max] = NULL;
				1963	break;
				1964	}
				1965	num++;
				1966
				1967	/* XXX: questionable readahead stuff here */
				1968	bh = ocfs2_bread(dir, b++, &err, 1);
				1969	bh_use[ra_max] = bh;
				1970	#if 0 // ???
				1971	if (bh)
				1972	ll_rw_block(READ, 1, &bh);
				1973	#endif
				1974	}
				1975	}
				1976	if ((bh = bh_use[ra_ptr++]) == NULL)
				1977	goto next;
				1978	wait_on_buffer(bh);
				1979	if (!buffer_uptodate(bh)) {
				1980	/* read error, skip block & hope for the best */
				1981	brelse(bh);
				1982	goto next;
				1983	}
				1984	i = ocfs2_search_dirblock(bh, dir, name, namelen,
				1985	block << sb->s_blocksize_bits,
				1986	res_dir);
				1987	if (i == 1) {
				1988	OCFS2_I(dir)->ip_dir_start_lookup = block;
				1989	ret = bh;
				1990	goto cleanup_and_exit;
				1991	} else {
				1992	brelse(bh);
				1993	if (i < 0)
				1994	goto cleanup_and_exit;
				1995	}
				1996	next:
				1997	if (++block >= nblocks)
				1998	block = 0;
				1999	} while (block != start);
				2000
				2001	/*
				2002	* If the directory has grown while we were searching, then
				2003	* search the last part of the directory before giving up.
				2004	*/
				2005	block = nblocks;
				2006	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				2007	if (block < nblocks) {
				2008	start = 0;
				2009	goto restart;
				2010	}
				2011
				2012	cleanup_and_exit:
				2013	/* Clean up the read-ahead blocks */
				2014	for (; ra_ptr < ra_max; ra_ptr++)
				2015	brelse(bh_use[ra_ptr]);
				2016
				2017	mlog_exit_ptr(ret);
				2018	return ret;
				2019	}
				2020
				2021	static int ocfs2_blkno_stringify(u64 blkno, char *name)
				2022	{
				2023	int status, namelen;
				2024
				2025	mlog_entry_void();
				2026
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	2027	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
				2028	(long long)blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2029	if (namelen <= 0) {
				2030	if (namelen)
				2031	status = namelen;
				2032	else
				2033	status = -EINVAL;
				2034	mlog_errno(status);
				2035	goto bail;
				2036	}
				2037	if (namelen != OCFS2_ORPHAN_NAMELEN) {
				2038	status = -EINVAL;
				2039	mlog_errno(status);
				2040	goto bail;
				2041	}
				2042
				2043	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
				2044	namelen);
				2045
				2046	status = 0;
				2047	bail:
				2048	mlog_exit(status);
				2049	return status;
				2050	}
				2051
				2052	static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
				2053	struct ocfs2_journal_handle *handle,
				2054	struct inode *inode,
				2055	char *name,
				2056	struct buffer_head **de_bh)
				2057	{
				2058	struct inode *orphan_dir_inode = NULL;
				2059	struct buffer_head *orphan_dir_bh = NULL;
				2060	int status = 0;
				2061
				2062	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
				2063	if (status < 0) {
				2064	mlog_errno(status);
				2065	goto leave;
				2066	}
				2067
				2068	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
				2069	ORPHAN_DIR_SYSTEM_INODE,
				2070	osb->slot_num);
				2071	if (!orphan_dir_inode) {
				2072	status = -ENOENT;
				2073	mlog_errno(status);
				2074	goto leave;
				2075	}
				2076
				2077	ocfs2_handle_add_inode(handle, orphan_dir_inode);
				2078	status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
				2079	if (status < 0) {
				2080	mlog_errno(status);
				2081	goto leave;
				2082	}
				2083
				2084	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
				2085	orphan_dir_bh, name,
				2086	OCFS2_ORPHAN_NAMELEN, de_bh);
				2087	if (status < 0) {
				2088	mlog_errno(status);
				2089	goto leave;
				2090	}
				2091
				2092	leave:
				2093	if (orphan_dir_inode)
				2094	iput(orphan_dir_inode);
				2095
				2096	if (orphan_dir_bh)
				2097	brelse(orphan_dir_bh);
				2098
				2099	mlog_exit(status);
				2100	return status;
				2101	}
				2102
				2103	static int ocfs2_orphan_add(struct ocfs2_super *osb,
				2104	struct ocfs2_journal_handle *handle,
				2105	struct inode *inode,
				2106	struct ocfs2_dinode *fe,
				2107	char *name,
				2108	struct buffer_head *de_bh)
				2109	{
				2110	struct inode *orphan_dir_inode = NULL;
				2111	struct buffer_head *orphan_dir_bh = NULL;
				2112	int status = 0;
				2113	struct ocfs2_dinode *orphan_fe;
				2114
				2115	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
				2116
				2117	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
				2118	ORPHAN_DIR_SYSTEM_INODE,
				2119	osb->slot_num);
				2120	if (!orphan_dir_inode) {
				2121	status = -ENOENT;
				2122	mlog_errno(status);
				2123	goto leave;
				2124	}
				2125
				2126	status = ocfs2_read_block(osb,
				2127	OCFS2_I(orphan_dir_inode)->ip_blkno,
				2128	&orphan_dir_bh, OCFS2_BH_CACHED,
				2129	orphan_dir_inode);
				2130	if (status < 0) {
				2131	mlog_errno(status);
				2132	goto leave;
				2133	}
				2134
				2135	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
				2136	OCFS2_JOURNAL_ACCESS_WRITE);
				2137	if (status < 0) {
				2138	mlog_errno(status);
				2139	goto leave;
				2140	}
				2141
				2142	/* we're a cluster, and nlink can change on disk from
				2143	* underneath us... */
				2144	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
				2145	if (S_ISDIR(inode->i_mode))
				2146	le16_add_cpu(&orphan_fe->i_links_count, 1);
				2147	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
				2148
				2149	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
				2150	if (status < 0) {
				2151	mlog_errno(status);
				2152	goto leave;
				2153	}
				2154
				2155	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
				2156	OCFS2_ORPHAN_NAMELEN, inode,
				2157	OCFS2_I(inode)->ip_blkno,
				2158	orphan_dir_bh, de_bh);
				2159	if (status < 0) {
				2160	mlog_errno(status);
				2161	goto leave;
				2162	}
				2163
				2164	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
				2165
				2166	/* Record which orphan dir our inode now resides
				2167	* in. delete_inode will use this to determine which orphan
				2168	* dir to lock. */
				2169	spin_lock(&OCFS2_I(inode)->ip_lock);
				2170	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
				2171	spin_unlock(&OCFS2_I(inode)->ip_lock);
				2172
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	2173	mlog(0, "Inode %llu orphaned in slot %d\n",
				2174	(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2175
				2176	leave:
				2177	if (orphan_dir_inode)
				2178	iput(orphan_dir_inode);
				2179
				2180	if (orphan_dir_bh)
				2181	brelse(orphan_dir_bh);
				2182
				2183	mlog_exit(status);
				2184	return status;
				2185	}
				2186
				2187	/* unlike orphan_add, we expect the orphan dir to already be locked here. */
				2188	int ocfs2_orphan_del(struct ocfs2_super *osb,
				2189	struct ocfs2_journal_handle *handle,
				2190	struct inode *orphan_dir_inode,
				2191	struct inode *inode,
				2192	struct buffer_head *orphan_dir_bh)
				2193	{
				2194	char name[OCFS2_ORPHAN_NAMELEN + 1];
				2195	struct ocfs2_dinode *orphan_fe;
				2196	int status = 0;
				2197	struct buffer_head *target_de_bh = NULL;
				2198	struct ocfs2_dir_entry *target_de = NULL;
				2199
				2200	mlog_entry_void();
				2201
				2202	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
				2203	if (status < 0) {
				2204	mlog_errno(status);
				2205	goto leave;
				2206	}
				2207
Mark Fasheh	b069705	2006-03-03 10:24:33 -0800	[diff] [blame]	2208	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
				2209	name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
				2210	OCFS2_ORPHAN_NAMELEN);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2211
				2212	/* find it's spot in the orphan directory */
				2213	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
				2214	orphan_dir_inode, &target_de);
				2215	if (!target_de_bh) {
				2216	status = -ENOENT;
				2217	mlog_errno(status);
				2218	goto leave;
				2219	}
				2220
				2221	/* remove it from the orphan directory */
				2222	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
				2223	target_de_bh);
				2224	if (status < 0) {
				2225	mlog_errno(status);
				2226	goto leave;
				2227	}
				2228
				2229	status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh,
				2230	OCFS2_JOURNAL_ACCESS_WRITE);
				2231	if (status < 0) {
				2232	mlog_errno(status);
				2233	goto leave;
				2234	}
				2235
				2236	/* do the i_nlink dance! :) */
				2237	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
				2238	if (S_ISDIR(inode->i_mode))
				2239	le16_add_cpu(&orphan_fe->i_links_count, -1);
				2240	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
				2241
				2242	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
				2243	if (status < 0) {
				2244	mlog_errno(status);
				2245	goto leave;
				2246	}
				2247
				2248	leave:
				2249	if (target_de_bh)
				2250	brelse(target_de_bh);
				2251
				2252	mlog_exit(status);
				2253	return status;
				2254	}
				2255
				2256	struct inode_operations ocfs2_dir_iops = {
				2257	.create = ocfs2_create,
				2258	.lookup = ocfs2_lookup,
				2259	.link = ocfs2_link,
				2260	.unlink = ocfs2_unlink,
				2261	.rmdir = ocfs2_unlink,
				2262	.symlink = ocfs2_symlink,
				2263	.mkdir = ocfs2_mkdir,
				2264	.mknod = ocfs2_mknod,
				2265	.rename = ocfs2_rename,
				2266	.setattr = ocfs2_setattr,
				2267	.getattr = ocfs2_getattr,
				2268	};