Blame - fs/ocfs2/namei.c - kernel/msm-4.9

blob: 274f61d0cda90854b981e86720c9038364238770 [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* namei.c
				5	*
				6	* Create and rename file, directory, symlinks
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* Portions of this code from linux/fs/ext3/dir.c
				11	*
				12	* Copyright (C) 1992, 1993, 1994, 1995
				13	* Remy Card (card@masi.ibp.fr)
				14	* Laboratoire MASI - Institut Blaise pascal
				15	* Universite Pierre et Marie Curie (Paris VI)
				16	*
				17	* from
				18	*
				19	* linux/fs/minix/dir.c
				20	*
				21	* Copyright (C) 1991, 1992 Linux Torvalds
				22	*
				23	* This program is free software; you can redistribute it and/or
				24	* modify it under the terms of the GNU General Public
				25	* License as published by the Free Software Foundation; either
				26	* version 2 of the License, or (at your option) any later version.
				27	*
				28	* This program is distributed in the hope that it will be useful,
				29	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				30	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				31	* General Public License for more details.
				32	*
				33	* You should have received a copy of the GNU General Public
				34	* License along with this program; if not, write to the
				35	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				36	* Boston, MA 021110-1307, USA.
				37	*/
				38
				39	#include <linux/fs.h>
				40	#include <linux/types.h>
				41	#include <linux/slab.h>
				42	#include <linux/highmem.h>
				43
				44	#define MLOG_MASK_PREFIX ML_NAMEI
				45	#include <cluster/masklog.h>
				46
				47	#include "ocfs2.h"
				48
				49	#include "alloc.h"
				50	#include "dcache.h"
				51	#include "dir.h"
				52	#include "dlmglue.h"
				53	#include "extent_map.h"
				54	#include "file.h"
				55	#include "inode.h"
				56	#include "journal.h"
				57	#include "namei.h"
				58	#include "suballoc.h"
				59	#include "symlink.h"
				60	#include "sysfile.h"
				61	#include "uptodate.h"
				62	#include "vote.h"
				63
				64	#include "buffer_head_io.h"
				65
				66	#define NAMEI_RA_CHUNKS 2
				67	#define NAMEI_RA_BLOCKS 4
				68	#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
				69	#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
				70
				71	static int inline ocfs2_search_dirblock(struct buffer_head *bh,
				72	struct inode *dir,
				73	const char *name, int namelen,
				74	unsigned long offset,
				75	struct ocfs2_dir_entry **res_dir);
				76
				77	static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
				78	struct inode *dir,
				79	struct ocfs2_dir_entry *de_del,
				80	struct buffer_head *bh);
				81
				82	static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				83	struct inode *dir,
				84	const char *name, int namelen,
				85	struct inode *inode, u64 blkno,
				86	struct buffer_head *parent_fe_bh,
				87	struct buffer_head *insert_bh);
				88
				89	static int ocfs2_mknod_locked(struct ocfs2_super *osb,
				90	struct inode *dir,
				91	struct dentry *dentry, int mode,
				92	dev_t dev,
				93	struct buffer_head **new_fe_bh,
				94	struct buffer_head *parent_fe_bh,
				95	struct ocfs2_journal_handle *handle,
				96	struct inode **ret_inode,
				97	struct ocfs2_alloc_context *inode_ac);
				98
				99	static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
				100	struct ocfs2_journal_handle *handle,
				101	struct inode *parent,
				102	struct inode *inode,
				103	struct buffer_head *fe_bh,
				104	struct ocfs2_alloc_context *data_ac);
				105
				106	static int ocfs2_double_lock(struct ocfs2_super *osb,
				107	struct ocfs2_journal_handle *handle,
				108	struct buffer_head **bh1,
				109	struct inode *inode1,
				110	struct buffer_head **bh2,
				111	struct inode *inode2);
				112
				113	static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
				114	struct ocfs2_journal_handle *handle,
				115	struct inode *inode,
				116	char *name,
				117	struct buffer_head **de_bh);
				118
				119	static int ocfs2_orphan_add(struct ocfs2_super *osb,
				120	struct ocfs2_journal_handle *handle,
				121	struct inode *inode,
				122	struct ocfs2_dinode *fe,
				123	char *name,
				124	struct buffer_head *de_bh);
				125
				126	static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
				127	struct ocfs2_journal_handle *handle,
				128	struct inode *inode,
				129	const char *symname);
				130
				131	static inline int ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				132	struct dentry *dentry,
				133	struct inode *inode, u64 blkno,
				134	struct buffer_head *parent_fe_bh,
				135	struct buffer_head *insert_bh)
				136	{
				137	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
				138	dentry->d_name.name, dentry->d_name.len,
				139	inode, blkno, parent_fe_bh, insert_bh);
				140	}
				141
				142	/* An orphan dir name is an 8 byte value, printed as a hex string */
				143	#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
				144
				145	static struct dentry ocfs2_lookup(struct inode dir, struct dentry *dentry,
				146	struct nameidata *nd)
				147	{
				148	int status;
				149	u64 blkno;
				150	struct buffer_head *dirent_bh = NULL;
				151	struct inode *inode = NULL;
				152	struct dentry *ret;
				153	struct ocfs2_dir_entry *dirent;
				154	struct ocfs2_inode_info *oi;
				155
				156	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
				157	dentry->d_name.len, dentry->d_name.name);
				158
				159	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
				160	ret = ERR_PTR(-ENAMETOOLONG);
				161	goto bail;
				162	}
				163
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	164	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
				165	dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	166
				167	status = ocfs2_meta_lock(dir, NULL, NULL, 0);
				168	if (status < 0) {
				169	if (status != -ENOENT)
				170	mlog_errno(status);
				171	ret = ERR_PTR(status);
				172	goto bail;
				173	}
				174
				175	status = ocfs2_find_files_on_disk(dentry->d_name.name,
				176	dentry->d_name.len, &blkno,
				177	dir, &dirent_bh, &dirent);
				178	if (status < 0)
				179	goto bail_add;
				180
				181	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno);
				182	if (IS_ERR(inode)) {
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	183	mlog(ML_ERROR, "Unable to create inode %llu\n",
				184	(unsigned long long)blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	185	ret = ERR_PTR(-EACCES);
				186	goto bail_unlock;
				187	}
				188
				189	oi = OCFS2_I(inode);
				190	/* Clear any orphaned state... If we were able to look up the
				191	* inode from a directory, it certainly can't be orphaned. We
				192	* might have the bad state from a node which intended to
				193	* orphan this inode but crashed before it could commit the
				194	* unlink. */
				195	spin_lock(&oi->ip_lock);
				196	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
				197	oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
				198	spin_unlock(&oi->ip_lock);
				199
				200	bail_add:
				201
				202	dentry->d_op = &ocfs2_dentry_ops;
				203	ret = d_splice_alias(inode, dentry);
				204
				205	bail_unlock:
				206	/* Don't drop the cluster lock until after the d_add --
				207	* unlink on another node will message us to remove that
				208	* dentry under this lock so otherwise we can race this with
				209	* the vote thread and have a stale dentry. */
				210	ocfs2_meta_unlock(dir, 0);
				211
				212	bail:
				213	if (dirent_bh)
				214	brelse(dirent_bh);
				215
				216	mlog_exit_ptr(ret);
				217
				218	return ret;
				219	}
				220
				221	static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
				222	struct ocfs2_journal_handle *handle,
				223	struct inode *parent,
				224	struct inode *inode,
				225	struct buffer_head *fe_bh,
				226	struct ocfs2_alloc_context *data_ac)
				227	{
				228	int status;
				229	struct buffer_head *new_bh = NULL;
				230	struct ocfs2_dir_entry *de = NULL;
				231
				232	mlog_entry_void();
				233
				234	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
				235	data_ac, NULL, &new_bh);
				236	if (status < 0) {
				237	mlog_errno(status);
				238	goto bail;
				239	}
				240
				241	ocfs2_set_new_buffer_uptodate(inode, new_bh);
				242
				243	status = ocfs2_journal_access(handle, inode, new_bh,
				244	OCFS2_JOURNAL_ACCESS_CREATE);
				245	if (status < 0) {
				246	mlog_errno(status);
				247	goto bail;
				248	}
				249	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
				250
				251	de = (struct ocfs2_dir_entry *) new_bh->b_data;
				252	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
				253	de->name_len = 1;
				254	de->rec_len =
				255	cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				256	strcpy(de->name, ".");
				257	ocfs2_set_de_type(de, S_IFDIR);
				258	de = (struct ocfs2_dir_entry ) ((char )de + le16_to_cpu(de->rec_len));
				259	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
				260	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
				261	OCFS2_DIR_REC_LEN(1));
				262	de->name_len = 2;
				263	strcpy(de->name, "..");
				264	ocfs2_set_de_type(de, S_IFDIR);
				265
				266	status = ocfs2_journal_dirty(handle, new_bh);
				267	if (status < 0) {
				268	mlog_errno(status);
				269	goto bail;
				270	}
				271
				272	i_size_write(inode, inode->i_sb->s_blocksize);
				273	inode->i_nlink = 2;
				274	inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
				275	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
				276	if (status < 0) {
				277	mlog_errno(status);
				278	goto bail;
				279	}
				280
				281	status = 0;
				282	bail:
				283	if (new_bh)
				284	brelse(new_bh);
				285
				286	mlog_exit(status);
				287	return status;
				288	}
				289
				290	static int ocfs2_mknod(struct inode *dir,
				291	struct dentry *dentry,
				292	int mode,
				293	dev_t dev)
				294	{
				295	int status = 0;
				296	struct buffer_head *parent_fe_bh = NULL;
				297	struct ocfs2_journal_handle *handle = NULL;
				298	struct ocfs2_super *osb;
				299	struct ocfs2_dinode *dirfe;
				300	struct buffer_head *new_fe_bh = NULL;
				301	struct buffer_head *de_bh = NULL;
				302	struct inode *inode = NULL;
				303	struct ocfs2_alloc_context *inode_ac = NULL;
				304	struct ocfs2_alloc_context *data_ac = NULL;
				305
				306	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
				307	(unsigned long)dev, dentry->d_name.len,
				308	dentry->d_name.name);
				309
				310	/* get our super block */
				311	osb = OCFS2_SB(dir->i_sb);
				312
				313	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	314	mlog(ML_ERROR, "inode %llu has i_nlink of %u\n",
				315	(unsigned long long)OCFS2_I(dir)->ip_blkno, dir->i_nlink);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	316	status = -EMLINK;
				317	goto leave;
				318	}
				319
				320	handle = ocfs2_alloc_handle(osb);
				321	if (handle == NULL) {
				322	status = -ENOMEM;
				323	mlog_errno(status);
				324	goto leave;
				325	}
				326
				327	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				328	if (status < 0) {
				329	if (status != -ENOENT)
				330	mlog_errno(status);
				331	goto leave;
				332	}
				333
				334	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
				335	if (!dirfe->i_links_count) {
				336	/* can't make a file in a deleted directory. */
				337	status = -ENOENT;
				338	goto leave;
				339	}
				340
				341	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				342	dentry->d_name.len);
				343	if (status)
				344	goto leave;
				345
				346	/* get a spot inside the dir. */
				347	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				348	dentry->d_name.name,
				349	dentry->d_name.len, &de_bh);
				350	if (status < 0) {
				351	mlog_errno(status);
				352	goto leave;
				353	}
				354
				355	/* reserve an inode spot */
				356	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
				357	if (status < 0) {
				358	if (status != -ENOSPC)
				359	mlog_errno(status);
				360	goto leave;
				361	}
				362
				363	/* are we making a directory? If so, reserve a cluster for his
				364	* 1st extent. */
				365	if (S_ISDIR(mode)) {
				366	status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
				367	if (status < 0) {
				368	if (status != -ENOSPC)
				369	mlog_errno(status);
				370	goto leave;
				371	}
				372	}
				373
				374	handle = ocfs2_start_trans(osb, handle, OCFS2_MKNOD_CREDITS);
				375	if (IS_ERR(handle)) {
				376	status = PTR_ERR(handle);
				377	handle = NULL;
				378	mlog_errno(status);
				379	goto leave;
				380	}
				381
				382	/* do the real work now. */
				383	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
				384	&new_fe_bh, parent_fe_bh, handle,
				385	&inode, inode_ac);
				386	if (status < 0) {
				387	mlog_errno(status);
				388	goto leave;
				389	}
				390
				391	if (S_ISDIR(mode)) {
				392	status = ocfs2_fill_new_dir(osb, handle, dir, inode,
				393	new_fe_bh, data_ac);
				394	if (status < 0) {
				395	mlog_errno(status);
				396	goto leave;
				397	}
				398
				399	status = ocfs2_journal_access(handle, dir, parent_fe_bh,
				400	OCFS2_JOURNAL_ACCESS_WRITE);
				401	if (status < 0) {
				402	mlog_errno(status);
				403	goto leave;
				404	}
				405	le16_add_cpu(&dirfe->i_links_count, 1);
				406	status = ocfs2_journal_dirty(handle, parent_fe_bh);
				407	if (status < 0) {
				408	mlog_errno(status);
				409	goto leave;
				410	}
				411	dir->i_nlink++;
				412	}
				413
				414	status = ocfs2_add_entry(handle, dentry, inode,
				415	OCFS2_I(inode)->ip_blkno, parent_fe_bh,
				416	de_bh);
				417	if (status < 0) {
				418	mlog_errno(status);
				419	goto leave;
				420	}
				421
				422	insert_inode_hash(inode);
				423	dentry->d_op = &ocfs2_dentry_ops;
				424	d_instantiate(dentry, inode);
				425	status = 0;
				426	leave:
				427	if (handle)
				428	ocfs2_commit_trans(handle);
				429
				430	if (status == -ENOSPC)
				431	mlog(0, "Disk is full\n");
				432
				433	if (new_fe_bh)
				434	brelse(new_fe_bh);
				435
				436	if (de_bh)
				437	brelse(de_bh);
				438
				439	if (parent_fe_bh)
				440	brelse(parent_fe_bh);
				441
				442	if ((status < 0) && inode)
				443	iput(inode);
				444
				445	if (inode_ac)
				446	ocfs2_free_alloc_context(inode_ac);
				447
				448	if (data_ac)
				449	ocfs2_free_alloc_context(data_ac);
				450
				451	mlog_exit(status);
				452
				453	return status;
				454	}
				455
				456	static int ocfs2_mknod_locked(struct ocfs2_super *osb,
				457	struct inode *dir,
				458	struct dentry *dentry, int mode,
				459	dev_t dev,
				460	struct buffer_head **new_fe_bh,
				461	struct buffer_head *parent_fe_bh,
				462	struct ocfs2_journal_handle *handle,
				463	struct inode **ret_inode,
				464	struct ocfs2_alloc_context *inode_ac)
				465	{
				466	int status = 0;
				467	struct ocfs2_dinode *fe = NULL;
				468	struct ocfs2_extent_list *fel;
				469	u64 fe_blkno = 0;
				470	u16 suballoc_bit;
				471	struct inode *inode = NULL;
				472
				473	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
				474	(unsigned long)dev, dentry->d_name.len,
				475	dentry->d_name.name);
				476
				477	*new_fe_bh = NULL;
				478	*ret_inode = NULL;
				479
				480	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
				481	&fe_blkno);
				482	if (status < 0) {
				483	mlog_errno(status);
				484	goto leave;
				485	}
				486
				487	inode = new_inode(dir->i_sb);
				488	if (IS_ERR(inode)) {
				489	status = PTR_ERR(inode);
				490	mlog(ML_ERROR, "new_inode failed!\n");
				491	goto leave;
				492	}
				493
				494	/* populate as many fields early on as possible - many of
				495	* these are used by the support functions here and in
				496	* callers. */
				497	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
				498	OCFS2_I(inode)->ip_blkno = fe_blkno;
				499	if (S_ISDIR(mode))
				500	inode->i_nlink = 2;
				501	else
				502	inode->i_nlink = 1;
				503	inode->i_mode = mode;
				504	spin_lock(&osb->osb_lock);
				505	inode->i_generation = osb->s_next_generation++;
				506	spin_unlock(&osb->osb_lock);
				507
				508	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
				509	if (!*new_fe_bh) {
				510	status = -EIO;
				511	mlog_errno(status);
				512	goto leave;
				513	}
				514	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
				515
				516	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
				517	OCFS2_JOURNAL_ACCESS_CREATE);
				518	if (status < 0) {
				519	mlog_errno(status);
				520	goto leave;
				521	}
				522
				523	fe = (struct ocfs2_dinode ) (new_fe_bh)->b_data;
				524	memset(fe, 0, osb->sb->s_blocksize);
				525
				526	fe->i_generation = cpu_to_le32(inode->i_generation);
				527	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
				528	fe->i_blkno = cpu_to_le64(fe_blkno);
				529	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
				530	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
				531	fe->i_uid = cpu_to_le32(current->fsuid);
				532	if (dir->i_mode & S_ISGID) {
				533	fe->i_gid = cpu_to_le32(dir->i_gid);
				534	if (S_ISDIR(mode))
				535	mode \|= S_ISGID;
				536	} else
				537	fe->i_gid = cpu_to_le32(current->fsgid);
				538	fe->i_mode = cpu_to_le16(mode);
				539	if (S_ISCHR(mode) \|\| S_ISBLK(mode))
				540	fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
				541
				542	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				543
				544	fe->i_last_eb_blk = 0;
				545	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
				546	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
				547	fe->i_atime = fe->i_ctime = fe->i_mtime =
				548	cpu_to_le64(CURRENT_TIME.tv_sec);
				549	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
				550	cpu_to_le32(CURRENT_TIME.tv_nsec);
				551	fe->i_dtime = 0;
				552
				553	fel = &fe->id2.i_list;
				554	fel->l_tree_depth = 0;
				555	fel->l_next_free_rec = 0;
				556	fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
				557
				558	status = ocfs2_journal_dirty(handle, *new_fe_bh);
				559	if (status < 0) {
				560	mlog_errno(status);
				561	goto leave;
				562	}
				563
				564	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
				565	mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	566	"i_blkno=%llu, i_ino=%lu\n",
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	567	(unsigned long long) (*new_fe_bh)->b_blocknr,
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	568	(unsigned long long)fe->i_blkno, inode->i_ino);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	569	BUG();
				570	}
				571
				572	ocfs2_inode_set_new(osb, inode);
				573	status = ocfs2_create_new_inode_locks(inode);
				574	if (status < 0)
				575	mlog_errno(status);
				576
				577	status = 0; /* error in ocfs2_create_new_inode_locks is not
				578	* critical */
				579
				580	*ret_inode = inode;
				581	leave:
				582	if (status < 0) {
				583	if (*new_fe_bh) {
				584	brelse(*new_fe_bh);
				585	*new_fe_bh = NULL;
				586	}
				587	if (inode)
				588	iput(inode);
				589	}
				590
				591	mlog_exit(status);
				592	return status;
				593	}
				594
				595	static int ocfs2_mkdir(struct inode *dir,
				596	struct dentry *dentry,
				597	int mode)
				598	{
				599	int ret;
				600
				601	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
				602	dentry->d_name.len, dentry->d_name.name);
				603	ret = ocfs2_mknod(dir, dentry, mode \| S_IFDIR, 0);
				604	mlog_exit(ret);
				605
				606	return ret;
				607	}
				608
				609	static int ocfs2_create(struct inode *dir,
				610	struct dentry *dentry,
				611	int mode,
				612	struct nameidata *nd)
				613	{
				614	int ret;
				615
				616	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
				617	dentry->d_name.len, dentry->d_name.name);
				618	ret = ocfs2_mknod(dir, dentry, mode \| S_IFREG, 0);
				619	mlog_exit(ret);
				620
				621	return ret;
				622	}
				623
				624	static int ocfs2_link(struct dentry *old_dentry,
				625	struct inode *dir,
				626	struct dentry *dentry)
				627	{
				628	struct ocfs2_journal_handle *handle = NULL;
				629	struct inode *inode = old_dentry->d_inode;
				630	int err;
				631	struct buffer_head *fe_bh = NULL;
				632	struct buffer_head *parent_fe_bh = NULL;
				633	struct buffer_head *de_bh = NULL;
				634	struct ocfs2_dinode *fe = NULL;
				635	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
				636
				637	mlog_entry("(inode=%lu, old='%.s' new='%.s')\n", inode->i_ino,
				638	old_dentry->d_name.len, old_dentry->d_name.name,
				639	dentry->d_name.len, dentry->d_name.name);
				640
				641	if (S_ISDIR(inode->i_mode)) {
				642	err = -EPERM;
				643	goto bail;
				644	}
				645
				646	if (inode->i_nlink >= OCFS2_LINK_MAX) {
				647	err = -EMLINK;
				648	goto bail;
				649	}
				650
				651	handle = ocfs2_alloc_handle(osb);
				652	if (handle == NULL) {
				653	err = -ENOMEM;
				654	goto bail;
				655	}
				656
				657	err = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				658	if (err < 0) {
				659	if (err != -ENOENT)
				660	mlog_errno(err);
				661	goto bail;
				662	}
				663
				664	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				665	dentry->d_name.len);
				666	if (err)
				667	goto bail;
				668
				669	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				670	dentry->d_name.name,
				671	dentry->d_name.len, &de_bh);
				672	if (err < 0) {
				673	mlog_errno(err);
				674	goto bail;
				675	}
				676
				677	err = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
				678	if (err < 0) {
				679	if (err != -ENOENT)
				680	mlog_errno(err);
				681	goto bail;
				682	}
				683
				684	fe = (struct ocfs2_dinode *) fe_bh->b_data;
				685	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
				686	err = -EMLINK;
				687	goto bail;
				688	}
				689
				690	handle = ocfs2_start_trans(osb, handle, OCFS2_LINK_CREDITS);
				691	if (IS_ERR(handle)) {
				692	err = PTR_ERR(handle);
				693	handle = NULL;
				694	mlog_errno(err);
				695	goto bail;
				696	}
				697
				698	err = ocfs2_journal_access(handle, inode, fe_bh,
				699	OCFS2_JOURNAL_ACCESS_WRITE);
				700	if (err < 0) {
				701	mlog_errno(err);
				702	goto bail;
				703	}
				704
				705	inode->i_nlink++;
				706	inode->i_ctime = CURRENT_TIME;
				707	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				708	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
				709	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
				710
				711	err = ocfs2_journal_dirty(handle, fe_bh);
				712	if (err < 0) {
				713	le16_add_cpu(&fe->i_links_count, -1);
				714	inode->i_nlink--;
				715	mlog_errno(err);
				716	goto bail;
				717	}
				718
				719	err = ocfs2_add_entry(handle, dentry, inode,
				720	OCFS2_I(inode)->ip_blkno,
				721	parent_fe_bh, de_bh);
				722	if (err) {
				723	le16_add_cpu(&fe->i_links_count, -1);
				724	inode->i_nlink--;
				725	mlog_errno(err);
				726	goto bail;
				727	}
				728
				729	atomic_inc(&inode->i_count);
				730	dentry->d_op = &ocfs2_dentry_ops;
				731	d_instantiate(dentry, inode);
				732	bail:
				733	if (handle)
				734	ocfs2_commit_trans(handle);
				735	if (de_bh)
				736	brelse(de_bh);
				737	if (fe_bh)
				738	brelse(fe_bh);
				739	if (parent_fe_bh)
				740	brelse(parent_fe_bh);
				741
				742	mlog_exit(err);
				743
				744	return err;
				745	}
				746
				747	static int ocfs2_unlink(struct inode *dir,
				748	struct dentry *dentry)
				749	{
				750	int status;
				751	unsigned int saved_nlink = 0;
				752	struct inode *inode = dentry->d_inode;
				753	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
				754	u64 blkno;
				755	struct ocfs2_dinode *fe = NULL;
				756	struct buffer_head *fe_bh = NULL;
				757	struct buffer_head *parent_node_bh = NULL;
				758	struct ocfs2_journal_handle *handle = NULL;
				759	struct ocfs2_dir_entry *dirent = NULL;
				760	struct buffer_head *dirent_bh = NULL;
				761	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
				762	struct buffer_head *orphan_entry_bh = NULL;
				763
				764	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
				765	dentry->d_name.len, dentry->d_name.name);
				766
				767	BUG_ON(dentry->d_parent->d_inode != dir);
				768
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	769	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	770
				771	if (inode == osb->root_inode) {
				772	mlog(0, "Cannot delete the root directory\n");
				773	status = -EPERM;
				774	goto leave;
				775	}
				776
				777	handle = ocfs2_alloc_handle(osb);
				778	if (handle == NULL) {
				779	status = -ENOMEM;
				780	mlog_errno(status);
				781	goto leave;
				782	}
				783
				784	status = ocfs2_meta_lock(dir, handle, &parent_node_bh, 1);
				785	if (status < 0) {
				786	if (status != -ENOENT)
				787	mlog_errno(status);
				788	goto leave;
				789	}
				790
				791	status = ocfs2_find_files_on_disk(dentry->d_name.name,
				792	dentry->d_name.len, &blkno,
				793	dir, &dirent_bh, &dirent);
				794	if (status < 0) {
				795	if (status != -ENOENT)
				796	mlog_errno(status);
				797	goto leave;
				798	}
				799
				800	if (OCFS2_I(inode)->ip_blkno != blkno) {
				801	status = -ENOENT;
				802
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	803	mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
				804	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				805	(unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	806	goto leave;
				807	}
				808
				809	status = ocfs2_meta_lock(inode, handle, &fe_bh, 1);
				810	if (status < 0) {
				811	if (status != -ENOENT)
				812	mlog_errno(status);
				813	goto leave;
				814	}
				815
				816	if (S_ISDIR(inode->i_mode)) {
				817	if (!ocfs2_empty_dir(inode)) {
				818	status = -ENOTEMPTY;
				819	goto leave;
				820	} else if (inode->i_nlink != 2) {
				821	status = -ENOTEMPTY;
				822	goto leave;
				823	}
				824	}
				825
				826	/* There are still a few steps left until we can consider the
				827	* unlink to have succeeded. Save off nlink here before
				828	* modification so we can set it back in case we hit an issue
				829	* before commit. */
				830	saved_nlink = inode->i_nlink;
				831	if (S_ISDIR(inode->i_mode))
				832	inode->i_nlink = 0;
				833	else
				834	inode->i_nlink--;
				835
				836	status = ocfs2_request_unlink_vote(inode, dentry,
				837	(unsigned int) inode->i_nlink);
				838	if (status < 0) {
				839	/* This vote should succeed under all normal
				840	* circumstances. */
				841	mlog_errno(status);
				842	goto leave;
				843	}
				844
				845	if (!inode->i_nlink) {
				846	status = ocfs2_prepare_orphan_dir(osb, handle, inode,
				847	orphan_name,
				848	&orphan_entry_bh);
				849	if (status < 0) {
				850	mlog_errno(status);
				851	goto leave;
				852	}
				853	}
				854
				855	handle = ocfs2_start_trans(osb, handle, OCFS2_UNLINK_CREDITS);
				856	if (IS_ERR(handle)) {
				857	status = PTR_ERR(handle);
				858	handle = NULL;
				859	mlog_errno(status);
				860	goto leave;
				861	}
				862
				863	status = ocfs2_journal_access(handle, inode, fe_bh,
				864	OCFS2_JOURNAL_ACCESS_WRITE);
				865	if (status < 0) {
				866	mlog_errno(status);
				867	goto leave;
				868	}
				869
				870	fe = (struct ocfs2_dinode *) fe_bh->b_data;
				871
				872	if (!inode->i_nlink) {
				873	status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
				874	orphan_entry_bh);
				875	if (status < 0) {
				876	mlog_errno(status);
				877	goto leave;
				878	}
				879	}
				880
				881	/* delete the name from the parent dir */
				882	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
				883	if (status < 0) {
				884	mlog_errno(status);
				885	goto leave;
				886	}
				887
				888	/* We can set nlink on the dinode now. clear the saved version
				889	* so that it doesn't get set later. */
				890	fe->i_links_count = cpu_to_le16(inode->i_nlink);
				891	saved_nlink = 0;
				892
				893	status = ocfs2_journal_dirty(handle, fe_bh);
				894	if (status < 0) {
				895	mlog_errno(status);
				896	goto leave;
				897	}
				898
				899	if (S_ISDIR(inode->i_mode)) {
				900	dir->i_nlink--;
				901	status = ocfs2_mark_inode_dirty(handle, dir,
				902	parent_node_bh);
				903	if (status < 0) {
				904	mlog_errno(status);
				905	dir->i_nlink++;
				906	}
				907	}
				908
				909	leave:
				910	if (status < 0 && saved_nlink)
				911	inode->i_nlink = saved_nlink;
				912
				913	if (handle)
				914	ocfs2_commit_trans(handle);
				915
				916	if (fe_bh)
				917	brelse(fe_bh);
				918
				919	if (dirent_bh)
				920	brelse(dirent_bh);
				921
				922	if (parent_node_bh)
				923	brelse(parent_node_bh);
				924
				925	if (orphan_entry_bh)
				926	brelse(orphan_entry_bh);
				927
				928	mlog_exit(status);
				929
				930	return status;
				931	}
				932
				933	/*
				934	* The only place this should be used is rename!
				935	* if they have the same id, then the 1st one is the only one locked.
				936	*/
				937	static int ocfs2_double_lock(struct ocfs2_super *osb,
				938	struct ocfs2_journal_handle *handle,
				939	struct buffer_head **bh1,
				940	struct inode *inode1,
				941	struct buffer_head **bh2,
				942	struct inode *inode2)
				943	{
				944	int status;
				945	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
				946	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
				947	struct buffer_head **tmpbh;
				948	struct inode *tmpinode;
				949
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	950	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
				951	(unsigned long long)oi1->ip_blkno,
				952	(unsigned long long)oi2->ip_blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	953
				954	BUG_ON(!handle);
				955
				956	if (*bh1)
				957	*bh1 = NULL;
				958	if (*bh2)
				959	*bh2 = NULL;
				960
				961	/* we always want to lock the one with the lower lockid first. */
				962	if (oi1->ip_blkno != oi2->ip_blkno) {
				963	if (oi1->ip_blkno < oi2->ip_blkno) {
				964	/* switch id1 and id2 around */
				965	mlog(0, "switching them around...\n");
				966	tmpbh = bh2;
				967	bh2 = bh1;
				968	bh1 = tmpbh;
				969
				970	tmpinode = inode2;
				971	inode2 = inode1;
				972	inode1 = tmpinode;
				973	}
				974	/* lock id2 */
				975	status = ocfs2_meta_lock(inode2, handle, bh2, 1);
				976	if (status < 0) {
				977	if (status != -ENOENT)
				978	mlog_errno(status);
				979	goto bail;
				980	}
				981	}
				982	/* lock id1 */
				983	status = ocfs2_meta_lock(inode1, handle, bh1, 1);
				984	if (status < 0) {
				985	if (status != -ENOENT)
				986	mlog_errno(status);
				987	goto bail;
				988	}
				989	bail:
				990	mlog_exit(status);
				991	return status;
				992	}
				993
				994	#define PARENT_INO(buffer) \
				995	((struct ocfs2_dir_entry *) \
				996	((char *)buffer + \
				997	le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
				998
				999	static int ocfs2_rename(struct inode *old_dir,
				1000	struct dentry *old_dentry,
				1001	struct inode *new_dir,
				1002	struct dentry *new_dentry)
				1003	{
				1004	int status = 0, rename_lock = 0;
				1005	struct inode *old_inode = old_dentry->d_inode;
				1006	struct inode *new_inode = new_dentry->d_inode;
				1007	struct ocfs2_dinode *newfe = NULL;
				1008	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
				1009	struct buffer_head *orphan_entry_bh = NULL;
				1010	struct buffer_head *newfe_bh = NULL;
				1011	struct buffer_head *insert_entry_bh = NULL;
				1012	struct ocfs2_super *osb = NULL;
				1013	u64 newfe_blkno;
				1014	struct ocfs2_journal_handle *handle = NULL;
				1015	struct buffer_head *old_dir_bh = NULL;
				1016	struct buffer_head *new_dir_bh = NULL;
				1017	struct ocfs2_dir_entry old_de = NULL, new_de = NULL; // dirent for old_dentry
				1018	// and new_dentry
				1019	struct buffer_head new_de_bh = NULL, old_de_bh = NULL; // bhs for above
				1020	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
				1021	// this is the 1st dirent bh
				1022	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
				1023	unsigned int links_count;
				1024
				1025	/* At some point it might be nice to break this function up a
				1026	* bit. */
				1027
				1028	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.s' to='%.s')\n",
				1029	old_dir, old_dentry, new_dir, new_dentry,
				1030	old_dentry->d_name.len, old_dentry->d_name.name,
				1031	new_dentry->d_name.len, new_dentry->d_name.name);
				1032
				1033	osb = OCFS2_SB(old_dir->i_sb);
				1034
				1035	if (new_inode) {
				1036	if (!igrab(new_inode))
				1037	BUG();
				1038	}
				1039
				1040	if (atomic_read(&old_dentry->d_count) > 2) {
				1041	shrink_dcache_parent(old_dentry);
				1042	if (atomic_read(&old_dentry->d_count) > 2) {
				1043	status = -EBUSY;
				1044	goto bail;
				1045	}
				1046	}
				1047
				1048	/* Assume a directory heirarchy thusly:
				1049	* a/b/c
				1050	* a/d
				1051	* a,b,c, and d are all directories.
				1052	*
				1053	* from cwd of 'a' on both nodes:
				1054	* node1: mv b/c d
				1055	* node2: mv d b/c
				1056	*
				1057	* And that's why, just like the VFS, we need a file system
				1058	* rename lock. */
				1059	if (old_dentry != new_dentry) {
				1060	status = ocfs2_rename_lock(osb);
				1061	if (status < 0) {
				1062	mlog_errno(status);
				1063	goto bail;
				1064	}
				1065	rename_lock = 1;
				1066	}
				1067
				1068	handle = ocfs2_alloc_handle(osb);
				1069	if (handle == NULL) {
				1070	status = -ENOMEM;
				1071	mlog_errno(status);
				1072	goto bail;
				1073	}
				1074
				1075	/* if old and new are the same, this'll just do one lock. */
				1076	status = ocfs2_double_lock(osb, handle,
				1077	&old_dir_bh, old_dir,
				1078	&new_dir_bh, new_dir);
				1079	if (status < 0) {
				1080	mlog_errno(status);
				1081	goto bail;
				1082	}
				1083
				1084	/* make sure both dirs have bhs
				1085	* get an extra ref on old_dir_bh if old==new */
				1086	if (!new_dir_bh) {
				1087	if (old_dir_bh) {
				1088	new_dir_bh = old_dir_bh;
				1089	get_bh(new_dir_bh);
				1090	} else {
				1091	mlog(ML_ERROR, "no old_dir_bh!\n");
				1092	status = -EIO;
				1093	goto bail;
				1094	}
				1095	}
				1096
				1097	if (S_ISDIR(old_inode->i_mode)) {
				1098	/* Directories actually require metadata updates to
				1099	* the directory info so we can't get away with not
				1100	* doing node locking on it. */
				1101	status = ocfs2_meta_lock(old_inode, handle, NULL, 1);
				1102	if (status < 0) {
				1103	if (status != -ENOENT)
				1104	mlog_errno(status);
				1105	goto bail;
				1106	}
				1107
				1108	status = ocfs2_request_rename_vote(old_inode, old_dentry);
				1109	if (status < 0) {
				1110	mlog_errno(status);
				1111	goto bail;
				1112	}
				1113
				1114	status = -EIO;
				1115	old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
				1116	if (!old_inode_de_bh)
				1117	goto bail;
				1118
				1119	status = -EIO;
				1120	if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
				1121	OCFS2_I(old_dir)->ip_blkno)
				1122	goto bail;
				1123	status = -EMLINK;
				1124	if (!new_inode && new_dir!=old_dir &&
				1125	new_dir->i_nlink >= OCFS2_LINK_MAX)
				1126	goto bail;
				1127	} else {
				1128	/* Ah, the simple case - we're a file so just send a
				1129	* message. */
				1130	status = ocfs2_request_rename_vote(old_inode, old_dentry);
				1131	if (status < 0) {
				1132	mlog_errno(status);
				1133	goto bail;
				1134	}
				1135	}
				1136
				1137	status = -ENOENT;
				1138	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
				1139	old_dentry->d_name.len,
				1140	old_dir, &old_de);
				1141	if (!old_de_bh)
				1142	goto bail;
				1143
				1144	/*
				1145	* Check for inode number is _not_ due to possible IO errors.
				1146	* We might rmdir the source, keep it as pwd of some process
				1147	* and merrily kill the link to whatever was created under the
				1148	* same name. Goodbye sticky bit ;-<
				1149	*/
				1150	if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
				1151	goto bail;
				1152
				1153	/* check if the target already exists (in which case we need
				1154	* to delete it */
				1155	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
				1156	new_dentry->d_name.len,
				1157	&newfe_blkno, new_dir, &new_de_bh,
				1158	&new_de);
				1159	/* The only error we allow here is -ENOENT because the new
				1160	* file not existing is perfectly valid. */
				1161	if ((status < 0) && (status != -ENOENT)) {
				1162	/* If we cannot find the file specified we should just */
				1163	/* return the error... */
				1164	mlog_errno(status);
				1165	goto bail;
				1166	}
				1167
				1168	if (!new_de && new_inode)
				1169	mlog(ML_ERROR, "inode %lu does not exist in it's parent "
				1170	"directory!", new_inode->i_ino);
				1171
				1172	/* In case we need to overwrite an existing file, we blow it
				1173	* away first */
				1174	if (new_de) {
				1175	/* VFS didn't think there existed an inode here, but
				1176	* someone else in the cluster must have raced our
				1177	* rename to create one. Today we error cleanly, in
				1178	* the future we should consider calling iget to build
				1179	* a new struct inode for this entry. */
				1180	if (!new_inode) {
				1181	status = -EACCES;
				1182
				1183	mlog(0, "We found an inode for name %.*s but VFS "
				1184	"didn't give us one.\n", new_dentry->d_name.len,
				1185	new_dentry->d_name.name);
				1186	goto bail;
				1187	}
				1188
				1189	if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
				1190	status = -EACCES;
				1191
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1192	mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
				1193	(unsigned long long)OCFS2_I(new_inode)->ip_blkno,
				1194	(unsigned long long)newfe_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1195	OCFS2_I(new_inode)->ip_flags);
				1196	goto bail;
				1197	}
				1198
				1199	status = ocfs2_meta_lock(new_inode, handle, &newfe_bh, 1);
				1200	if (status < 0) {
				1201	if (status != -ENOENT)
				1202	mlog_errno(status);
				1203	goto bail;
				1204	}
				1205
				1206	if (S_ISDIR(new_inode->i_mode))
				1207	links_count = 0;
				1208	else
				1209	links_count = (unsigned int) (new_inode->i_nlink - 1);
				1210
				1211	status = ocfs2_request_unlink_vote(new_inode, new_dentry,
				1212	links_count);
				1213	if (status < 0) {
				1214	mlog_errno(status);
				1215	goto bail;
				1216	}
				1217
				1218	newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
				1219
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1220	mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
				1221	"newfebh=%p bhblocknr=%llu\n", new_de,
				1222	(unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1223	(unsigned long long)newfe_bh->b_blocknr : 0ULL);
				1224
				1225	if (S_ISDIR(new_inode->i_mode) \|\| (new_inode->i_nlink == 1)) {
				1226	status = ocfs2_prepare_orphan_dir(osb, handle,
				1227	new_inode,
				1228	orphan_name,
				1229	&orphan_entry_bh);
				1230	if (status < 0) {
				1231	mlog_errno(status);
				1232	goto bail;
				1233	}
				1234	}
				1235	} else {
				1236	BUG_ON(new_dentry->d_parent->d_inode != new_dir);
				1237
				1238	status = ocfs2_check_dir_for_entry(new_dir,
				1239	new_dentry->d_name.name,
				1240	new_dentry->d_name.len);
				1241	if (status)
				1242	goto bail;
				1243
				1244	status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
				1245	new_dentry->d_name.name,
				1246	new_dentry->d_name.len,
				1247	&insert_entry_bh);
				1248	if (status < 0) {
				1249	mlog_errno(status);
				1250	goto bail;
				1251	}
				1252	}
				1253
				1254	handle = ocfs2_start_trans(osb, handle, OCFS2_RENAME_CREDITS);
				1255	if (IS_ERR(handle)) {
				1256	status = PTR_ERR(handle);
				1257	handle = NULL;
				1258	mlog_errno(status);
				1259	goto bail;
				1260	}
				1261
				1262	if (new_de) {
				1263	if (S_ISDIR(new_inode->i_mode)) {
				1264	if (!ocfs2_empty_dir(new_inode) \|\|
				1265	new_inode->i_nlink != 2) {
				1266	status = -ENOTEMPTY;
				1267	goto bail;
				1268	}
				1269	}
				1270	status = ocfs2_journal_access(handle, new_inode, newfe_bh,
				1271	OCFS2_JOURNAL_ACCESS_WRITE);
				1272	if (status < 0) {
				1273	mlog_errno(status);
				1274	goto bail;
				1275	}
				1276
				1277	if (S_ISDIR(new_inode->i_mode) \|\|
				1278	(newfe->i_links_count == cpu_to_le16(1))){
				1279	status = ocfs2_orphan_add(osb, handle, new_inode,
				1280	newfe, orphan_name,
				1281	orphan_entry_bh);
				1282	if (status < 0) {
				1283	mlog_errno(status);
				1284	goto bail;
				1285	}
				1286	}
				1287
				1288	/* change the dirent to point to the correct inode */
				1289	status = ocfs2_journal_access(handle, new_dir, new_de_bh,
				1290	OCFS2_JOURNAL_ACCESS_WRITE);
				1291	if (status < 0) {
				1292	mlog_errno(status);
				1293	goto bail;
				1294	}
				1295	new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
				1296	new_de->file_type = old_de->file_type;
				1297	new_dir->i_version++;
				1298	status = ocfs2_journal_dirty(handle, new_de_bh);
				1299	if (status < 0) {
				1300	mlog_errno(status);
				1301	goto bail;
				1302	}
				1303
				1304	if (S_ISDIR(new_inode->i_mode))
				1305	newfe->i_links_count = 0;
				1306	else
				1307	le16_add_cpu(&newfe->i_links_count, -1);
				1308
				1309	status = ocfs2_journal_dirty(handle, newfe_bh);
				1310	if (status < 0) {
				1311	mlog_errno(status);
				1312	goto bail;
				1313	}
				1314	} else {
				1315	/* if the name was not found in new_dir, add it now */
				1316	status = ocfs2_add_entry(handle, new_dentry, old_inode,
				1317	OCFS2_I(old_inode)->ip_blkno,
				1318	new_dir_bh, insert_entry_bh);
				1319	}
				1320
				1321	old_inode->i_ctime = CURRENT_TIME;
				1322	mark_inode_dirty(old_inode);
				1323
				1324	/* now that the name has been added to new_dir, remove the old name */
				1325	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
				1326	if (status < 0) {
				1327	mlog_errno(status);
				1328	goto bail;
				1329	}
				1330
				1331	if (new_inode) {
				1332	new_inode->i_nlink--;
				1333	new_inode->i_ctime = CURRENT_TIME;
				1334	}
				1335	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
				1336	if (old_inode_de_bh) {
				1337	status = ocfs2_journal_access(handle, old_inode,
				1338	old_inode_de_bh,
				1339	OCFS2_JOURNAL_ACCESS_WRITE);
				1340	PARENT_INO(old_inode_de_bh->b_data) =
				1341	cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
				1342	status = ocfs2_journal_dirty(handle, old_inode_de_bh);
				1343	old_dir->i_nlink--;
				1344	if (new_inode) {
				1345	new_inode->i_nlink--;
				1346	} else {
				1347	new_dir->i_nlink++;
				1348	mark_inode_dirty(new_dir);
				1349	}
				1350	}
				1351	mark_inode_dirty(old_dir);
				1352	if (new_inode)
				1353	mark_inode_dirty(new_inode);
				1354
				1355	if (old_dir != new_dir)
				1356	if (new_dir_nlink != new_dir->i_nlink) {
				1357	if (!new_dir_bh) {
				1358	mlog(ML_ERROR, "need to change nlink for new "
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1359	"dir %llu from %d to %d but bh is NULL\n",
				1360	(unsigned long long)OCFS2_I(new_dir)->ip_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1361	(int)new_dir_nlink, new_dir->i_nlink);
				1362	} else {
				1363	struct ocfs2_dinode *fe;
				1364	status = ocfs2_journal_access(handle,
				1365	new_dir,
				1366	new_dir_bh,
				1367	OCFS2_JOURNAL_ACCESS_WRITE);
				1368	fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
				1369	fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
				1370	status = ocfs2_journal_dirty(handle, new_dir_bh);
				1371	}
				1372	}
				1373
				1374	if (old_dir_nlink != old_dir->i_nlink) {
				1375	if (!old_dir_bh) {
				1376	mlog(ML_ERROR, "need to change nlink for old dir "
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1377	"%llu from %d to %d but bh is NULL!\n",
				1378	(unsigned long long)OCFS2_I(old_dir)->ip_blkno,
				1379	(int)old_dir_nlink, old_dir->i_nlink);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1380	} else {
				1381	struct ocfs2_dinode *fe;
				1382	status = ocfs2_journal_access(handle, old_dir,
				1383	old_dir_bh,
				1384	OCFS2_JOURNAL_ACCESS_WRITE);
				1385	fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
				1386	fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
				1387	status = ocfs2_journal_dirty(handle, old_dir_bh);
				1388	}
				1389	}
				1390
				1391	status = 0;
				1392	bail:
				1393	if (rename_lock)
				1394	ocfs2_rename_unlock(osb);
				1395
				1396	if (handle)
				1397	ocfs2_commit_trans(handle);
				1398
				1399	if (new_inode)
				1400	sync_mapping_buffers(old_inode->i_mapping);
				1401
				1402	if (new_inode)
				1403	iput(new_inode);
				1404	if (newfe_bh)
				1405	brelse(newfe_bh);
				1406	if (old_dir_bh)
				1407	brelse(old_dir_bh);
				1408	if (new_dir_bh)
				1409	brelse(new_dir_bh);
				1410	if (new_de_bh)
				1411	brelse(new_de_bh);
				1412	if (old_de_bh)
				1413	brelse(old_de_bh);
				1414	if (old_inode_de_bh)
				1415	brelse(old_inode_de_bh);
				1416	if (orphan_entry_bh)
				1417	brelse(orphan_entry_bh);
				1418	if (insert_entry_bh)
				1419	brelse(insert_entry_bh);
				1420
				1421	mlog_exit(status);
				1422
				1423	return status;
				1424	}
				1425
				1426	/*
				1427	* we expect i_size = strlen(symname). Copy symname into the file
				1428	* data, including the null terminator.
				1429	*/
				1430	static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
				1431	struct ocfs2_journal_handle *handle,
				1432	struct inode *inode,
				1433	const char *symname)
				1434	{
				1435	struct buffer_head **bhs = NULL;
				1436	const char *c;
				1437	struct super_block *sb = osb->sb;
				1438	u64 p_blkno;
				1439	int p_blocks;
				1440	int virtual, blocks, status, i, bytes_left;
				1441
				1442	bytes_left = i_size_read(inode) + 1;
				1443	/* we can't trust i_blocks because we're actually going to
				1444	* write i_size + 1 bytes. */
				1445	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
				1446
				1447	mlog_entry("i_blocks = %lu, i_size = %llu, blocks = %d\n",
				1448	inode->i_blocks, i_size_read(inode), blocks);
				1449
				1450	/* Sanity check -- make sure we're going to fit. */
				1451	if (bytes_left >
				1452	ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
				1453	status = -EIO;
				1454	mlog_errno(status);
				1455	goto bail;
				1456	}
				1457
				1458	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
				1459	if (!bhs) {
				1460	status = -ENOMEM;
				1461	mlog_errno(status);
				1462	goto bail;
				1463	}
				1464
				1465	status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
				1466	&p_blocks);
				1467	if (status < 0) {
				1468	mlog_errno(status);
				1469	goto bail;
				1470	}
				1471
				1472	/* links can never be larger than one cluster so we know this
				1473	* is all going to be contiguous, but do a sanity check
				1474	* anyway. */
				1475	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
				1476	status = -EIO;
				1477	mlog_errno(status);
				1478	goto bail;
				1479	}
				1480
				1481	virtual = 0;
				1482	while(bytes_left > 0) {
				1483	c = &symname[virtual * sb->s_blocksize];
				1484
				1485	bhs[virtual] = sb_getblk(sb, p_blkno);
				1486	if (!bhs[virtual]) {
				1487	status = -ENOMEM;
				1488	mlog_errno(status);
				1489	goto bail;
				1490	}
				1491	ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
				1492
				1493	status = ocfs2_journal_access(handle, inode, bhs[virtual],
				1494	OCFS2_JOURNAL_ACCESS_CREATE);
				1495	if (status < 0) {
				1496	mlog_errno(status);
				1497	goto bail;
				1498	}
				1499
				1500	memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
				1501
				1502	memcpy(bhs[virtual]->b_data, c,
				1503	(bytes_left > sb->s_blocksize) ? sb->s_blocksize :
				1504	bytes_left);
				1505
				1506	status = ocfs2_journal_dirty(handle, bhs[virtual]);
				1507	if (status < 0) {
				1508	mlog_errno(status);
				1509	goto bail;
				1510	}
				1511
				1512	virtual++;
				1513	p_blkno++;
				1514	bytes_left -= sb->s_blocksize;
				1515	}
				1516
				1517	status = 0;
				1518	bail:
				1519
				1520	if (bhs) {
				1521	for(i = 0; i < blocks; i++)
				1522	if (bhs[i])
				1523	brelse(bhs[i]);
				1524	kfree(bhs);
				1525	}
				1526
				1527	mlog_exit(status);
				1528	return status;
				1529	}
				1530
				1531	static int ocfs2_symlink(struct inode *dir,
				1532	struct dentry *dentry,
				1533	const char *symname)
				1534	{
				1535	int status, l, credits;
				1536	u64 newsize;
				1537	struct ocfs2_super *osb = NULL;
				1538	struct inode *inode = NULL;
				1539	struct super_block *sb;
				1540	struct buffer_head *new_fe_bh = NULL;
				1541	struct buffer_head *de_bh = NULL;
				1542	struct buffer_head *parent_fe_bh = NULL;
				1543	struct ocfs2_dinode *fe = NULL;
				1544	struct ocfs2_dinode *dirfe;
				1545	struct ocfs2_journal_handle *handle = NULL;
				1546	struct ocfs2_alloc_context *inode_ac = NULL;
				1547	struct ocfs2_alloc_context *data_ac = NULL;
				1548
				1549	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
				1550	dentry, symname, dentry->d_name.len, dentry->d_name.name);
				1551
				1552	sb = dir->i_sb;
				1553	osb = OCFS2_SB(sb);
				1554
				1555	l = strlen(symname) + 1;
				1556
				1557	credits = ocfs2_calc_symlink_credits(sb);
				1558
				1559	handle = ocfs2_alloc_handle(osb);
				1560	if (handle == NULL) {
				1561	status = -ENOMEM;
				1562	mlog_errno(status);
				1563	goto bail;
				1564	}
				1565
				1566	/* lock the parent directory */
				1567	status = ocfs2_meta_lock(dir, handle, &parent_fe_bh, 1);
				1568	if (status < 0) {
				1569	if (status != -ENOENT)
				1570	mlog_errno(status);
				1571	goto bail;
				1572	}
				1573
				1574	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
				1575	if (!dirfe->i_links_count) {
				1576	/* can't make a file in a deleted directory. */
				1577	status = -ENOENT;
				1578	goto bail;
				1579	}
				1580
				1581	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
				1582	dentry->d_name.len);
				1583	if (status)
				1584	goto bail;
				1585
				1586	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
				1587	dentry->d_name.name,
				1588	dentry->d_name.len, &de_bh);
				1589	if (status < 0) {
				1590	mlog_errno(status);
				1591	goto bail;
				1592	}
				1593
				1594	status = ocfs2_reserve_new_inode(osb, handle, &inode_ac);
				1595	if (status < 0) {
				1596	if (status != -ENOSPC)
				1597	mlog_errno(status);
				1598	goto bail;
				1599	}
				1600
				1601	/* don't reserve bitmap space for fast symlinks. */
				1602	if (l > ocfs2_fast_symlink_chars(sb)) {
				1603	status = ocfs2_reserve_clusters(osb, handle, 1, &data_ac);
				1604	if (status < 0) {
				1605	if (status != -ENOSPC)
				1606	mlog_errno(status);
				1607	goto bail;
				1608	}
				1609	}
				1610
				1611	handle = ocfs2_start_trans(osb, handle, credits);
				1612	if (IS_ERR(handle)) {
				1613	status = PTR_ERR(handle);
				1614	handle = NULL;
				1615	mlog_errno(status);
				1616	goto bail;
				1617	}
				1618
				1619	status = ocfs2_mknod_locked(osb, dir, dentry,
				1620	S_IFLNK \| S_IRWXUGO, 0,
				1621	&new_fe_bh, parent_fe_bh, handle,
				1622	&inode, inode_ac);
				1623	if (status < 0) {
				1624	mlog_errno(status);
				1625	goto bail;
				1626	}
				1627
				1628	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
				1629	inode->i_rdev = 0;
				1630	newsize = l - 1;
				1631	if (l > ocfs2_fast_symlink_chars(sb)) {
				1632	inode->i_op = &ocfs2_symlink_inode_operations;
				1633	status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
				1634	handle, data_ac, NULL,
				1635	NULL);
				1636	if (status < 0) {
				1637	if (status != -ENOSPC && status != -EINTR) {
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1638	mlog(ML_ERROR,
				1639	"Failed to extend file to %llu\n",
				1640	(unsigned long long)newsize);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1641	mlog_errno(status);
				1642	status = -ENOSPC;
				1643	}
				1644	goto bail;
				1645	}
				1646	i_size_write(inode, newsize);
				1647	inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
				1648	} else {
				1649	inode->i_op = &ocfs2_fast_symlink_inode_operations;
				1650	memcpy((char *) fe->id2.i_symlink, symname, l);
				1651	i_size_write(inode, newsize);
				1652	inode->i_blocks = 0;
				1653	}
				1654
				1655	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
				1656	if (status < 0) {
				1657	mlog_errno(status);
				1658	goto bail;
				1659	}
				1660
				1661	if (!ocfs2_inode_is_fast_symlink(inode)) {
				1662	status = ocfs2_create_symlink_data(osb, handle, inode,
				1663	symname);
				1664	if (status < 0) {
				1665	mlog_errno(status);
				1666	goto bail;
				1667	}
				1668	}
				1669
				1670	status = ocfs2_add_entry(handle, dentry, inode,
				1671	le64_to_cpu(fe->i_blkno), parent_fe_bh,
				1672	de_bh);
				1673	if (status < 0) {
				1674	mlog_errno(status);
				1675	goto bail;
				1676	}
				1677
				1678	insert_inode_hash(inode);
				1679	dentry->d_op = &ocfs2_dentry_ops;
				1680	d_instantiate(dentry, inode);
				1681	bail:
				1682	if (handle)
				1683	ocfs2_commit_trans(handle);
				1684	if (new_fe_bh)
				1685	brelse(new_fe_bh);
				1686	if (parent_fe_bh)
				1687	brelse(parent_fe_bh);
				1688	if (de_bh)
				1689	brelse(de_bh);
				1690	if (inode_ac)
				1691	ocfs2_free_alloc_context(inode_ac);
				1692	if (data_ac)
				1693	ocfs2_free_alloc_context(data_ac);
				1694	if ((status < 0) && inode)
				1695	iput(inode);
				1696
				1697	mlog_exit(status);
				1698
				1699	return status;
				1700	}
				1701
				1702	int ocfs2_check_dir_entry(struct inode * dir,
				1703	struct ocfs2_dir_entry * de,
				1704	struct buffer_head * bh,
				1705	unsigned long offset)
				1706	{
				1707	const char *error_msg = NULL;
				1708	const int rlen = le16_to_cpu(de->rec_len);
				1709
				1710	if (rlen < OCFS2_DIR_REC_LEN(1))
				1711	error_msg = "rec_len is smaller than minimal";
				1712	else if (rlen % 4 != 0)
				1713	error_msg = "rec_len % 4 != 0";
				1714	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
				1715	error_msg = "rec_len is too small for name_len";
				1716	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
				1717	error_msg = "directory entry across blocks";
				1718
				1719	if (error_msg != NULL)
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	1720	mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
				1721	"offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
				1722	(unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
				1723	offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
				1724	de->name_len);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1725	return error_msg == NULL ? 1 : 0;
				1726	}
				1727
				1728	/* we don't always have a dentry for what we want to add, so people
				1729	* like orphan dir can call this instead.
				1730	*
				1731	* If you pass me insert_bh, I'll skip the search of the other dir
				1732	* blocks and put the record in there.
				1733	*/
				1734	static int __ocfs2_add_entry(struct ocfs2_journal_handle *handle,
				1735	struct inode *dir,
				1736	const char *name, int namelen,
				1737	struct inode *inode, u64 blkno,
				1738	struct buffer_head *parent_fe_bh,
				1739	struct buffer_head *insert_bh)
				1740	{
				1741	unsigned long offset;
				1742	unsigned short rec_len;
				1743	struct ocfs2_dir_entry de, de1;
				1744	struct super_block *sb;
				1745	int retval, status;
				1746
				1747	mlog_entry_void();
				1748
				1749	sb = dir->i_sb;
				1750
				1751	if (!namelen)
				1752	return -EINVAL;
				1753
				1754	rec_len = OCFS2_DIR_REC_LEN(namelen);
				1755	offset = 0;
				1756	de = (struct ocfs2_dir_entry *) insert_bh->b_data;
				1757	while (1) {
				1758	BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
				1759	/* These checks should've already been passed by the
				1760	* prepare function, but I guess we can leave them
				1761	* here anyway. */
				1762	if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
				1763	retval = -ENOENT;
				1764	goto bail;
				1765	}
				1766	if (ocfs2_match(namelen, name, de)) {
				1767	retval = -EEXIST;
				1768	goto bail;
				1769	}
				1770	if (((le64_to_cpu(de->inode) == 0) &&
				1771	(le16_to_cpu(de->rec_len) >= rec_len)) \|\|
				1772	(le16_to_cpu(de->rec_len) >=
				1773	(OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
				1774	status = ocfs2_journal_access(handle, dir, insert_bh,
				1775	OCFS2_JOURNAL_ACCESS_WRITE);
				1776	/* By now the buffer is marked for journaling */
				1777	offset += le16_to_cpu(de->rec_len);
				1778	if (le64_to_cpu(de->inode)) {
				1779	de1 = (struct ocfs2_dir_entry )((char ) de +
				1780	OCFS2_DIR_REC_LEN(de->name_len));
				1781	de1->rec_len =
				1782	cpu_to_le16(le16_to_cpu(de->rec_len) -
				1783	OCFS2_DIR_REC_LEN(de->name_len));
				1784	de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
				1785	de = de1;
				1786	}
				1787	de->file_type = OCFS2_FT_UNKNOWN;
				1788	if (blkno) {
				1789	de->inode = cpu_to_le64(blkno);
				1790	ocfs2_set_de_type(de, inode->i_mode);
				1791	} else
				1792	de->inode = 0;
				1793	de->name_len = namelen;
				1794	memcpy(de->name, name, namelen);
				1795
				1796	dir->i_mtime = dir->i_ctime = CURRENT_TIME;
				1797	dir->i_version++;
				1798	status = ocfs2_journal_dirty(handle, insert_bh);
				1799	retval = 0;
				1800	goto bail;
				1801	}
				1802	offset += le16_to_cpu(de->rec_len);
				1803	de = (struct ocfs2_dir_entry ) ((char ) de + le16_to_cpu(de->rec_len));
				1804	}
				1805
				1806	/* when you think about it, the assert above should prevent us
				1807	* from ever getting here. */
				1808	retval = -ENOSPC;
				1809	bail:
				1810
				1811	mlog_exit(retval);
				1812	return retval;
				1813	}
				1814
				1815
				1816	/*
				1817	* ocfs2_delete_entry deletes a directory entry by merging it with the
				1818	* previous entry
				1819	*/
				1820	static int ocfs2_delete_entry(struct ocfs2_journal_handle *handle,
				1821	struct inode *dir,
				1822	struct ocfs2_dir_entry *de_del,
				1823	struct buffer_head *bh)
				1824	{
				1825	struct ocfs2_dir_entry de, pde;
				1826	int i, status = -ENOENT;
				1827
				1828	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
				1829
				1830	i = 0;
				1831	pde = NULL;
				1832	de = (struct ocfs2_dir_entry *) bh->b_data;
				1833	while (i < bh->b_size) {
				1834	if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
				1835	status = -EIO;
				1836	mlog_errno(status);
				1837	goto bail;
				1838	}
				1839	if (de == de_del) {
				1840	status = ocfs2_journal_access(handle, dir, bh,
				1841	OCFS2_JOURNAL_ACCESS_WRITE);
				1842	if (status < 0) {
				1843	status = -EIO;
				1844	mlog_errno(status);
				1845	goto bail;
				1846	}
				1847	if (pde)
				1848	pde->rec_len =
				1849	cpu_to_le16(le16_to_cpu(pde->rec_len) +
				1850	le16_to_cpu(de->rec_len));
				1851	else
				1852	de->inode = 0;
				1853	dir->i_version++;
				1854	status = ocfs2_journal_dirty(handle, bh);
				1855	goto bail;
				1856	}
				1857	i += le16_to_cpu(de->rec_len);
				1858	pde = de;
				1859	de = (struct ocfs2_dir_entry )((char )de + le16_to_cpu(de->rec_len));
				1860	}
				1861	bail:
				1862	mlog_exit(status);
				1863	return status;
				1864	}
				1865
				1866	/*
				1867	* Returns 0 if not found, -1 on failure, and 1 on success
				1868	*/
				1869	static int inline ocfs2_search_dirblock(struct buffer_head *bh,
				1870	struct inode *dir,
				1871	const char *name, int namelen,
				1872	unsigned long offset,
				1873	struct ocfs2_dir_entry **res_dir)
				1874	{
				1875	struct ocfs2_dir_entry *de;
				1876	char dlimit, de_buf;
				1877	int de_len;
				1878	int ret = 0;
				1879
				1880	mlog_entry_void();
				1881
				1882	de_buf = bh->b_data;
				1883	dlimit = de_buf + dir->i_sb->s_blocksize;
				1884
				1885	while (de_buf < dlimit) {
				1886	/* this code is executed quadratically often */
				1887	/* do minimal checking `by hand' */
				1888
				1889	de = (struct ocfs2_dir_entry *) de_buf;
				1890
				1891	if (de_buf + namelen <= dlimit &&
				1892	ocfs2_match(namelen, name, de)) {
				1893	/* found a match - just to be sure, do a full check */
				1894	if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
				1895	ret = -1;
				1896	goto bail;
				1897	}
				1898	*res_dir = de;
				1899	ret = 1;
				1900	goto bail;
				1901	}
				1902
				1903	/* prevent looping on a bad block */
				1904	de_len = le16_to_cpu(de->rec_len);
				1905	if (de_len <= 0) {
				1906	ret = -1;
				1907	goto bail;
				1908	}
				1909
				1910	de_buf += de_len;
				1911	offset += de_len;
				1912	}
				1913
				1914	bail:
				1915	mlog_exit(ret);
				1916	return ret;
				1917	}
				1918
				1919	struct buffer_head ocfs2_find_entry(const char name, int namelen,
				1920	struct inode *dir,
				1921	struct ocfs2_dir_entry **res_dir)
				1922	{
				1923	struct super_block *sb;
				1924	struct buffer_head *bh_use[NAMEI_RA_SIZE];
				1925	struct buffer_head bh, ret = NULL;
				1926	unsigned long start, block, b;
				1927	int ra_max = 0; /* Number of bh's in the readahead
				1928	buffer, bh_use[] */
				1929	int ra_ptr = 0; /* Current index into readahead
				1930	buffer */
				1931	int num = 0;
				1932	int nblocks, i, err;
				1933
				1934	mlog_entry_void();
				1935
				1936	*res_dir = NULL;
				1937	sb = dir->i_sb;
				1938
				1939	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				1940	start = OCFS2_I(dir)->ip_dir_start_lookup;
				1941	if (start >= nblocks)
				1942	start = 0;
				1943	block = start;
				1944
				1945	restart:
				1946	do {
				1947	/*
				1948	* We deal with the read-ahead logic here.
				1949	*/
				1950	if (ra_ptr >= ra_max) {
				1951	/* Refill the readahead buffer */
				1952	ra_ptr = 0;
				1953	b = block;
				1954	for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
				1955	/*
				1956	* Terminate if we reach the end of the
				1957	* directory and must wrap, or if our
				1958	* search has finished at this block.
				1959	*/
				1960	if (b >= nblocks \|\| (num && block == start)) {
				1961	bh_use[ra_max] = NULL;
				1962	break;
				1963	}
				1964	num++;
				1965
				1966	/* XXX: questionable readahead stuff here */
				1967	bh = ocfs2_bread(dir, b++, &err, 1);
				1968	bh_use[ra_max] = bh;
				1969	#if 0 // ???
				1970	if (bh)
				1971	ll_rw_block(READ, 1, &bh);
				1972	#endif
				1973	}
				1974	}
				1975	if ((bh = bh_use[ra_ptr++]) == NULL)
				1976	goto next;
				1977	wait_on_buffer(bh);
				1978	if (!buffer_uptodate(bh)) {
				1979	/* read error, skip block & hope for the best */
				1980	brelse(bh);
				1981	goto next;
				1982	}
				1983	i = ocfs2_search_dirblock(bh, dir, name, namelen,
				1984	block << sb->s_blocksize_bits,
				1985	res_dir);
				1986	if (i == 1) {
				1987	OCFS2_I(dir)->ip_dir_start_lookup = block;
				1988	ret = bh;
				1989	goto cleanup_and_exit;
				1990	} else {
				1991	brelse(bh);
				1992	if (i < 0)
				1993	goto cleanup_and_exit;
				1994	}
				1995	next:
				1996	if (++block >= nblocks)
				1997	block = 0;
				1998	} while (block != start);
				1999
				2000	/*
				2001	* If the directory has grown while we were searching, then
				2002	* search the last part of the directory before giving up.
				2003	*/
				2004	block = nblocks;
				2005	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
				2006	if (block < nblocks) {
				2007	start = 0;
				2008	goto restart;
				2009	}
				2010
				2011	cleanup_and_exit:
				2012	/* Clean up the read-ahead blocks */
				2013	for (; ra_ptr < ra_max; ra_ptr++)
				2014	brelse(bh_use[ra_ptr]);
				2015
				2016	mlog_exit_ptr(ret);
				2017	return ret;
				2018	}
				2019
				2020	static int ocfs2_blkno_stringify(u64 blkno, char *name)
				2021	{
				2022	int status, namelen;
				2023
				2024	mlog_entry_void();
				2025
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	2026	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
				2027	(long long)blkno);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2028	if (namelen <= 0) {
				2029	if (namelen)
				2030	status = namelen;
				2031	else
				2032	status = -EINVAL;
				2033	mlog_errno(status);
				2034	goto bail;
				2035	}
				2036	if (namelen != OCFS2_ORPHAN_NAMELEN) {
				2037	status = -EINVAL;
				2038	mlog_errno(status);
				2039	goto bail;
				2040	}
				2041
				2042	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
				2043	namelen);
				2044
				2045	status = 0;
				2046	bail:
				2047	mlog_exit(status);
				2048	return status;
				2049	}
				2050
				2051	static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
				2052	struct ocfs2_journal_handle *handle,
				2053	struct inode *inode,
				2054	char *name,
				2055	struct buffer_head **de_bh)
				2056	{
				2057	struct inode *orphan_dir_inode = NULL;
				2058	struct buffer_head *orphan_dir_bh = NULL;
				2059	int status = 0;
				2060
				2061	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
				2062	if (status < 0) {
				2063	mlog_errno(status);
				2064	goto leave;
				2065	}
				2066
				2067	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
				2068	ORPHAN_DIR_SYSTEM_INODE,
				2069	osb->slot_num);
				2070	if (!orphan_dir_inode) {
				2071	status = -ENOENT;
				2072	mlog_errno(status);
				2073	goto leave;
				2074	}
				2075
				2076	ocfs2_handle_add_inode(handle, orphan_dir_inode);
				2077	status = ocfs2_meta_lock(orphan_dir_inode, handle, &orphan_dir_bh, 1);
				2078	if (status < 0) {
				2079	mlog_errno(status);
				2080	goto leave;
				2081	}
				2082
				2083	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
				2084	orphan_dir_bh, name,
				2085	OCFS2_ORPHAN_NAMELEN, de_bh);
				2086	if (status < 0) {
				2087	mlog_errno(status);
				2088	goto leave;
				2089	}
				2090
				2091	leave:
				2092	if (orphan_dir_inode)
				2093	iput(orphan_dir_inode);
				2094
				2095	if (orphan_dir_bh)
				2096	brelse(orphan_dir_bh);
				2097
				2098	mlog_exit(status);
				2099	return status;
				2100	}
				2101
				2102	static int ocfs2_orphan_add(struct ocfs2_super *osb,
				2103	struct ocfs2_journal_handle *handle,
				2104	struct inode *inode,
				2105	struct ocfs2_dinode *fe,
				2106	char *name,
				2107	struct buffer_head *de_bh)
				2108	{
				2109	struct inode *orphan_dir_inode = NULL;
				2110	struct buffer_head *orphan_dir_bh = NULL;
				2111	int status = 0;
				2112	struct ocfs2_dinode *orphan_fe;
				2113
				2114	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
				2115
				2116	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
				2117	ORPHAN_DIR_SYSTEM_INODE,
				2118	osb->slot_num);
				2119	if (!orphan_dir_inode) {
				2120	status = -ENOENT;
				2121	mlog_errno(status);
				2122	goto leave;
				2123	}
				2124
				2125	status = ocfs2_read_block(osb,
				2126	OCFS2_I(orphan_dir_inode)->ip_blkno,
				2127	&orphan_dir_bh, OCFS2_BH_CACHED,
				2128	orphan_dir_inode);
				2129	if (status < 0) {
				2130	mlog_errno(status);
				2131	goto leave;
				2132	}
				2133
				2134	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
				2135	OCFS2_JOURNAL_ACCESS_WRITE);
				2136	if (status < 0) {
				2137	mlog_errno(status);
				2138	goto leave;
				2139	}
				2140
				2141	/* we're a cluster, and nlink can change on disk from
				2142	* underneath us... */
				2143	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
				2144	if (S_ISDIR(inode->i_mode))
				2145	le16_add_cpu(&orphan_fe->i_links_count, 1);
				2146	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
				2147
				2148	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
				2149	if (status < 0) {
				2150	mlog_errno(status);
				2151	goto leave;
				2152	}
				2153
				2154	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
				2155	OCFS2_ORPHAN_NAMELEN, inode,
				2156	OCFS2_I(inode)->ip_blkno,
				2157	orphan_dir_bh, de_bh);
				2158	if (status < 0) {
				2159	mlog_errno(status);
				2160	goto leave;
				2161	}
				2162
				2163	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
				2164
				2165	/* Record which orphan dir our inode now resides
				2166	* in. delete_inode will use this to determine which orphan
				2167	* dir to lock. */
				2168	spin_lock(&OCFS2_I(inode)->ip_lock);
				2169	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
				2170	spin_unlock(&OCFS2_I(inode)->ip_lock);
				2171
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	2172	mlog(0, "Inode %llu orphaned in slot %d\n",
				2173	(unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2174
				2175	leave:
				2176	if (orphan_dir_inode)
				2177	iput(orphan_dir_inode);
				2178
				2179	if (orphan_dir_bh)
				2180	brelse(orphan_dir_bh);
				2181
				2182	mlog_exit(status);
				2183	return status;
				2184	}
				2185
				2186	/* unlike orphan_add, we expect the orphan dir to already be locked here. */
				2187	int ocfs2_orphan_del(struct ocfs2_super *osb,
				2188	struct ocfs2_journal_handle *handle,
				2189	struct inode *orphan_dir_inode,
				2190	struct inode *inode,
				2191	struct buffer_head *orphan_dir_bh)
				2192	{
				2193	char name[OCFS2_ORPHAN_NAMELEN + 1];
				2194	struct ocfs2_dinode *orphan_fe;
				2195	int status = 0;
				2196	struct buffer_head *target_de_bh = NULL;
				2197	struct ocfs2_dir_entry *target_de = NULL;
				2198
				2199	mlog_entry_void();
				2200
				2201	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
				2202	if (status < 0) {
				2203	mlog_errno(status);
				2204	goto leave;
				2205	}
				2206
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame^]	2207	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
				2208	name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
				2209	OCFS2_ORPHAN_NAMELEN);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2210
				2211	/* find it's spot in the orphan directory */
				2212	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
				2213	orphan_dir_inode, &target_de);
				2214	if (!target_de_bh) {
				2215	status = -ENOENT;
				2216	mlog_errno(status);
				2217	goto leave;
				2218	}
				2219
				2220	/* remove it from the orphan directory */
				2221	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
				2222	target_de_bh);
				2223	if (status < 0) {
				2224	mlog_errno(status);
				2225	goto leave;
				2226	}
				2227
				2228	status = ocfs2_journal_access(handle,orphan_dir_inode, orphan_dir_bh,
				2229	OCFS2_JOURNAL_ACCESS_WRITE);
				2230	if (status < 0) {
				2231	mlog_errno(status);
				2232	goto leave;
				2233	}
				2234
				2235	/* do the i_nlink dance! :) */
				2236	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
				2237	if (S_ISDIR(inode->i_mode))
				2238	le16_add_cpu(&orphan_fe->i_links_count, -1);
				2239	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
				2240
				2241	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
				2242	if (status < 0) {
				2243	mlog_errno(status);
				2244	goto leave;
				2245	}
				2246
				2247	leave:
				2248	if (target_de_bh)
				2249	brelse(target_de_bh);
				2250
				2251	mlog_exit(status);
				2252	return status;
				2253	}
				2254
				2255	struct inode_operations ocfs2_dir_iops = {
				2256	.create = ocfs2_create,
				2257	.lookup = ocfs2_lookup,
				2258	.link = ocfs2_link,
				2259	.unlink = ocfs2_unlink,
				2260	.rmdir = ocfs2_unlink,
				2261	.symlink = ocfs2_symlink,
				2262	.mkdir = ocfs2_mkdir,
				2263	.mknod = ocfs2_mknod,
				2264	.rename = ocfs2_rename,
				2265	.setattr = ocfs2_setattr,
				2266	.getattr = ocfs2_getattr,
				2267	};