Blame - fs/ocfs2/file.c - kernel/msm-4.9

blob: 81296b4e364632dd5936f59d8adeab9832f2d2fd [file] [log] [blame]

Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1	/* -- mode: c; c-basic-offset: 8; --
				2	* vim: noexpandtab sw=8 ts=8 sts=0:
				3	*
				4	* file.c
				5	*
				6	* File open, close, extend, truncate
				7	*
				8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
				9	*
				10	* This program is free software; you can redistribute it and/or
				11	* modify it under the terms of the GNU General Public
				12	* License as published by the Free Software Foundation; either
				13	* version 2 of the License, or (at your option) any later version.
				14	*
				15	* This program is distributed in the hope that it will be useful,
				16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				18	* General Public License for more details.
				19	*
				20	* You should have received a copy of the GNU General Public
				21	* License along with this program; if not, write to the
				22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
				23	* Boston, MA 021110-1307, USA.
				24	*/
				25
Randy Dunlap	16f7e0f	2006-01-11 12:17:46 -0800	[diff] [blame]	26	#include <linux/capability.h>
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	27	#include <linux/fs.h>
				28	#include <linux/types.h>
				29	#include <linux/slab.h>
				30	#include <linux/highmem.h>
				31	#include <linux/pagemap.h>
				32	#include <linux/uio.h>
Mark Fasheh	e2057c5	2006-10-03 17:53:05 -0700	[diff] [blame]	33	#include <linux/sched.h>
Jens Axboe	d6b29d7	2007-06-04 09:59:47 +0200	[diff] [blame]	34	#include <linux/splice.h>
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	35	#include <linux/mount.h>
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	36	#include <linux/writeback.h>
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	37	#include <linux/falloc.h>
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	38	#include <linux/quotaops.h>
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	39
				40	#define MLOG_MASK_PREFIX ML_INODE
				41	#include <cluster/masklog.h>
				42
				43	#include "ocfs2.h"
				44
				45	#include "alloc.h"
				46	#include "aops.h"
				47	#include "dir.h"
				48	#include "dlmglue.h"
				49	#include "extent_map.h"
				50	#include "file.h"
				51	#include "sysfile.h"
				52	#include "inode.h"
Herbert Poetzl	ca4d147	2006-07-03 17:27:12 -0700	[diff] [blame]	53	#include "ioctl.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	54	#include "journal.h"
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	55	#include "locks.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	56	#include "mmap.h"
				57	#include "suballoc.h"
				58	#include "super.h"
Tiger Yang	cf1d6c7	2008-08-18 17:11:00 +0800	[diff] [blame]	59	#include "xattr.h"
Tiger Yang	23fc270	2008-11-14 11:17:18 +0800	[diff] [blame]	60	#include "acl.h"
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	61	#include "quota.h"
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	62	#include "refcounttree.h"
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	63
				64	#include "buffer_head_io.h"
				65
				66	static int ocfs2_sync_inode(struct inode *inode)
				67	{
				68	filemap_fdatawrite(inode->i_mapping);
				69	return sync_mapping_buffers(inode->i_mapping);
				70	}
				71
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	72	static int ocfs2_init_file_private(struct inode inode, struct file file)
				73	{
				74	struct ocfs2_file_private *fp;
				75
				76	fp = kzalloc(sizeof(struct ocfs2_file_private), GFP_KERNEL);
				77	if (!fp)
				78	return -ENOMEM;
				79
				80	fp->fp_file = file;
				81	mutex_init(&fp->fp_mutex);
				82	ocfs2_file_lock_res_init(&fp->fp_flock, fp);
				83	file->private_data = fp;
				84
				85	return 0;
				86	}
				87
				88	static void ocfs2_free_file_private(struct inode inode, struct file file)
				89	{
				90	struct ocfs2_file_private *fp = file->private_data;
				91	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				92
				93	if (fp) {
				94	ocfs2_simple_drop_lockres(osb, &fp->fp_flock);
				95	ocfs2_lock_res_free(&fp->fp_flock);
				96	kfree(fp);
				97	file->private_data = NULL;
				98	}
				99	}
				100
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	101	static int ocfs2_file_open(struct inode inode, struct file file)
				102	{
				103	int status;
				104	int mode = file->f_flags;
				105	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				106
				107	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	108	file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	109
Christoph Hellwig	907f455	2010-03-03 09:05:06 -0500	[diff] [blame]	110	if (file->f_mode & FMODE_WRITE)
Christoph Hellwig	871a293	2010-03-03 09:05:07 -0500	[diff] [blame]	111	dquot_initialize(inode);
Christoph Hellwig	907f455	2010-03-03 09:05:06 -0500	[diff] [blame]	112
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	113	spin_lock(&oi->ip_lock);
				114
				115	/* Check that the inode hasn't been wiped from disk by another
				116	* node. If it hasn't then we're safe as long as we hold the
				117	* spin lock until our increment of open count. */
				118	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) {
				119	spin_unlock(&oi->ip_lock);
				120
				121	status = -ENOENT;
				122	goto leave;
				123	}
				124
				125	if (mode & O_DIRECT)
				126	oi->ip_flags \|= OCFS2_INODE_OPEN_DIRECT;
				127
				128	oi->ip_open_count++;
				129	spin_unlock(&oi->ip_lock);
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	130
				131	status = ocfs2_init_file_private(inode, file);
				132	if (status) {
				133	/*
				134	* We want to set open count back if we're failing the
				135	* open.
				136	*/
				137	spin_lock(&oi->ip_lock);
				138	oi->ip_open_count--;
				139	spin_unlock(&oi->ip_lock);
				140	}
				141
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	142	leave:
				143	mlog_exit(status);
				144	return status;
				145	}
				146
				147	static int ocfs2_file_release(struct inode inode, struct file file)
				148	{
				149	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				150
				151	mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	152	file->f_path.dentry->d_name.len,
				153	file->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	154
				155	spin_lock(&oi->ip_lock);
				156	if (!--oi->ip_open_count)
				157	oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
				158	spin_unlock(&oi->ip_lock);
				159
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	160	ocfs2_free_file_private(inode, file);
				161
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	162	mlog_exit(0);
				163
				164	return 0;
				165	}
				166
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	167	static int ocfs2_dir_open(struct inode inode, struct file file)
				168	{
				169	return ocfs2_init_file_private(inode, file);
				170	}
				171
				172	static int ocfs2_dir_release(struct inode inode, struct file file)
				173	{
				174	ocfs2_free_file_private(inode, file);
				175	return 0;
				176	}
				177
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	178	static int ocfs2_sync_file(struct file *file, int datasync)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	179	{
				180	int err = 0;
				181	journal_t *journal;
Christoph Hellwig	7ea8085	2010-05-26 17:53:25 +0200	[diff] [blame]	182	struct dentry *dentry = file->f_path.dentry;
				183	struct inode *inode = file->f_mapping->host;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	184	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				185
				186	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
				187	dentry->d_name.len, dentry->d_name.name);
				188
				189	err = ocfs2_sync_inode(dentry->d_inode);
				190	if (err)
				191	goto bail;
				192
Hisashi Hifumi	e04cc15	2009-06-09 16:47:45 +0900	[diff] [blame]	193	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
				194	goto bail;
				195
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	196	journal = osb->journal->j_journal;
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	197	err = jbd2_journal_force_commit(journal);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	198
				199	bail:
				200	mlog_exit(err);
				201
				202	return (err < 0) ? -EIO : 0;
				203	}
				204
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	205	int ocfs2_should_update_atime(struct inode *inode,
				206	struct vfsmount *vfsmnt)
				207	{
				208	struct timespec now;
				209	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				210
				211	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
				212	return 0;
				213
				214	if ((inode->i_flags & S_NOATIME) \|\|
				215	((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)))
				216	return 0;
				217
Mark Fasheh	6c2aad0	2006-12-19 15:25:52 -0800	[diff] [blame]	218	/*
				219	* We can be called with no vfsmnt structure - NFSD will
				220	* sometimes do this.
				221	*
				222	* Note that our action here is different than touch_atime() -
				223	* if we can't tell whether this is a noatime mount, then we
				224	* don't know whether to trust the value of s_atime_quantum.
				225	*/
				226	if (vfsmnt == NULL)
				227	return 0;
				228
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	229	if ((vfsmnt->mnt_flags & MNT_NOATIME) \|\|
				230	((vfsmnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)))
				231	return 0;
				232
Mark Fasheh	7e913c5	2006-12-13 00:34:35 -0800	[diff] [blame]	233	if (vfsmnt->mnt_flags & MNT_RELATIME) {
				234	if ((timespec_compare(&inode->i_atime, &inode->i_mtime) <= 0) \|\|
				235	(timespec_compare(&inode->i_atime, &inode->i_ctime) <= 0))
				236	return 1;
				237
				238	return 0;
				239	}
				240
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	241	now = CURRENT_TIME;
				242	if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
				243	return 0;
				244	else
				245	return 1;
				246	}
				247
				248	int ocfs2_update_inode_atime(struct inode *inode,
				249	struct buffer_head *bh)
				250	{
				251	int ret;
				252	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				253	handle_t *handle;
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	254	struct ocfs2_dinode di = (struct ocfs2_dinode ) bh->b_data;
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	255
				256	mlog_entry_void();
				257
				258	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	259	if (IS_ERR(handle)) {
				260	ret = PTR_ERR(handle);
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	261	mlog_errno(ret);
				262	goto out;
				263	}
				264
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	265	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	266	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	267	if (ret) {
				268	mlog_errno(ret);
				269	goto out_commit;
				270	}
				271
				272	/*
				273	* Don't use ocfs2_mark_inode_dirty() here as we don't always
				274	* have i_mutex to guard against concurrent changes to other
				275	* inode fields.
				276	*/
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	277	inode->i_atime = CURRENT_TIME;
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	278	di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
				279	di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	280	ocfs2_journal_dirty(handle, bh);
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	281
Mark Fasheh	c11e9fa	2007-07-20 11:24:53 -0700	[diff] [blame]	282	out_commit:
Tiger Yang	7f1a37e	2006-11-15 15:48:42 +0800	[diff] [blame]	283	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
				284	out:
				285	mlog_exit(ret);
				286	return ret;
				287	}
				288
Adrian Bunk	6cb129f	2007-04-26 00:29:35 -0700	[diff] [blame]	289	static int ocfs2_set_inode_size(handle_t *handle,
				290	struct inode *inode,
				291	struct buffer_head *fe_bh,
				292	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	293	{
				294	int status;
				295
				296	mlog_entry_void();
				297	i_size_write(inode, new_i_size);
Mark Fasheh	8110b07	2007-03-22 16:53:23 -0700	[diff] [blame]	298	inode->i_blocks = ocfs2_inode_sector_count(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	299	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				300
				301	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
				302	if (status < 0) {
				303	mlog_errno(status);
				304	goto bail;
				305	}
				306
				307	bail:
				308	mlog_exit(status);
				309	return status;
				310	}
				311
Jan Kara	9e33d69	2008-08-25 19:56:50 +0200	[diff] [blame]	312	int ocfs2_simple_size_update(struct inode *inode,
				313	struct buffer_head *di_bh,
				314	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	315	{
				316	int ret;
				317	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	318	handle_t *handle = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	319
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	320	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	321	if (IS_ERR(handle)) {
				322	ret = PTR_ERR(handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	323	mlog_errno(ret);
				324	goto out;
				325	}
				326
				327	ret = ocfs2_set_inode_size(handle, inode, di_bh,
				328	new_i_size);
				329	if (ret < 0)
				330	mlog_errno(ret);
				331
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	332	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	333	out:
				334	return ret;
				335	}
				336
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	337	static int ocfs2_cow_file_pos(struct inode *inode,
				338	struct buffer_head *fe_bh,
				339	u64 offset)
				340	{
				341	int status;
				342	u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				343	unsigned int num_clusters = 0;
				344	unsigned int ext_flags = 0;
				345
				346	/*
				347	* If the new offset is aligned to the range of the cluster, there is
				348	* no space for ocfs2_zero_range_for_truncate to fill, so no need to
				349	* CoW either.
				350	*/
				351	if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
				352	return 0;
				353
				354	status = ocfs2_get_clusters(inode, cpos, &phys,
				355	&num_clusters, &ext_flags);
				356	if (status) {
				357	mlog_errno(status);
				358	goto out;
				359	}
				360
				361	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
				362	goto out;
				363
				364	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
				365
				366	out:
				367	return status;
				368	}
				369
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	370	static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
				371	struct inode *inode,
				372	struct buffer_head *fe_bh,
				373	u64 new_i_size)
				374	{
				375	int status;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	376	handle_t *handle;
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	377	struct ocfs2_dinode *di;
Mark Fasheh	35edec1	2007-07-06 14:41:18 -0700	[diff] [blame]	378	u64 cluster_bytes;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	379
				380	mlog_entry_void();
				381
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	382	/*
				383	* We need to CoW the cluster contains the offset if it is reflinked
				384	* since we will call ocfs2_zero_range_for_truncate later which will
				385	* write "0" from offset to the end of the cluster.
				386	*/
				387	status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
				388	if (status) {
				389	mlog_errno(status);
				390	return status;
				391	}
				392
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	393	/* TODO: This needs to actually orphan the inode in this
				394	* transaction. */
				395
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	396	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	397	if (IS_ERR(handle)) {
				398	status = PTR_ERR(handle);
				399	mlog_errno(status);
				400	goto out;
				401	}
				402
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	403	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	404	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	405	if (status < 0) {
				406	mlog_errno(status);
				407	goto out_commit;
				408	}
				409
				410	/*
				411	* Do this before setting i_size.
				412	*/
Mark Fasheh	35edec1	2007-07-06 14:41:18 -0700	[diff] [blame]	413	cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
				414	status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
				415	cluster_bytes);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	416	if (status) {
				417	mlog_errno(status);
				418	goto out_commit;
				419	}
				420
				421	i_size_write(inode, new_i_size);
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	422	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				423
				424	di = (struct ocfs2_dinode *) fe_bh->b_data;
				425	di->i_size = cpu_to_le64(new_i_size);
				426	di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
				427	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
				428
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	429	ocfs2_journal_dirty(handle, fe_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	430
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	431	out_commit:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	432	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	433	out:
Mark Fasheh	60b1139	2007-02-16 11:46:50 -0800	[diff] [blame]	434
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	435	mlog_exit(status);
				436	return status;
				437	}
				438
				439	static int ocfs2_truncate_file(struct inode *inode,
				440	struct buffer_head *di_bh,
				441	u64 new_i_size)
				442	{
				443	int status = 0;
				444	struct ocfs2_dinode *fe = NULL;
				445	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	446
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	447	mlog_entry("(inode = %llu, new_i_size = %llu\n",
				448	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				449	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	450
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	451	/* We trust di_bh because it comes from ocfs2_inode_lock(), which
				452	* already validated it */
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	453	fe = (struct ocfs2_dinode *) di_bh->b_data;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	454
				455	mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode),
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	456	"Inode %llu, inode i_size = %lld != di "
				457	"i_size = %llu, i_flags = 0x%x\n",
				458	(unsigned long long)OCFS2_I(inode)->ip_blkno,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	459	i_size_read(inode),
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	460	(unsigned long long)le64_to_cpu(fe->i_size),
				461	le32_to_cpu(fe->i_flags));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	462
				463	if (new_i_size > le64_to_cpu(fe->i_size)) {
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	464	mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n",
				465	(unsigned long long)le64_to_cpu(fe->i_size),
				466	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	467	status = -EINVAL;
				468	mlog_errno(status);
				469	goto bail;
				470	}
				471
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	472	mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n",
				473	(unsigned long long)le64_to_cpu(fe->i_blkno),
				474	(unsigned long long)le64_to_cpu(fe->i_size),
				475	(unsigned long long)new_i_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	476
				477	/* lets handle the simple truncate cases before doing any more
				478	* cluster locking. */
				479	if (new_i_size == le64_to_cpu(fe->i_size))
				480	goto bail;
				481
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	482	down_write(&OCFS2_I(inode)->ip_alloc_sem);
				483
Mark Fasheh	4fe370a	2009-12-07 13:15:40 -0800	[diff] [blame]	484	ocfs2_resv_discard(&osb->osb_la_resmap,
				485	&OCFS2_I(inode)->ip_la_data_resv);
				486
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	487	/*
				488	* The inode lock forced other nodes to sync and drop their
				489	* pages, which (correctly) happens even if we have a truncate
				490	* without allocation change - ocfs2 cluster sizes can be much
				491	* greater than page size, so we have to truncate them
				492	* anyway.
				493	*/
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	494	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
				495	truncate_inode_pages(inode->i_mapping, new_i_size);
				496
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	497	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				498	status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	499	i_size_read(inode), 1);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	500	if (status)
				501	mlog_errno(status);
				502
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	503	goto bail_unlock_sem;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	504	}
				505
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	506	/* alright, we're going to need to do a full blown alloc size
				507	* change. Orphan the inode so that recovery can complete the
				508	* truncate if necessary. This does the task of marking
				509	* i_size. */
				510	status = ocfs2_orphan_for_truncate(osb, inode, di_bh, new_i_size);
				511	if (status < 0) {
				512	mlog_errno(status);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	513	goto bail_unlock_sem;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	514	}
				515
Tristan Ye	78f9467	2010-05-11 17:54:42 +0800	[diff] [blame]	516	status = ocfs2_commit_truncate(osb, inode, di_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	517	if (status < 0) {
				518	mlog_errno(status);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	519	goto bail_unlock_sem;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	520	}
				521
				522	/* TODO: orphan dir cleanup here. */
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	523	bail_unlock_sem:
Mark Fasheh	2e89b2e	2007-05-09 13:40:18 -0700	[diff] [blame]	524	up_write(&OCFS2_I(inode)->ip_alloc_sem);
				525
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	526	bail:
Tao Ma	8b2c0db	2009-08-18 11:43:49 +0800	[diff] [blame]	527	if (!status && OCFS2_I(inode)->ip_clusters == 0)
				528	status = ocfs2_try_remove_refcount_tree(inode, di_bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	529
				530	mlog_exit(status);
				531	return status;
				532	}
				533
				534	/*
Tao Ma	0eb8d47	2008-08-18 17:38:45 +0800	[diff] [blame]	535	* extend file allocation only here.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	536	* we'll update all the disk stuff, and oip->alloc_size
				537	*
				538	* expect stuff to be locked, a transaction started and enough data /
				539	* metadata reservations in the contexts.
				540	*
				541	* Will return -EAGAIN, and a reason if a restart is needed.
				542	* If passed in, *reason will always be set, even in error.
				543	*/
Tao Ma	0eb8d47	2008-08-18 17:38:45 +0800	[diff] [blame]	544	int ocfs2_add_inode_data(struct ocfs2_super *osb,
				545	struct inode *inode,
				546	u32 *logical_offset,
				547	u32 clusters_to_add,
				548	int mark_unwritten,
				549	struct buffer_head *fe_bh,
				550	handle_t *handle,
				551	struct ocfs2_alloc_context *data_ac,
				552	struct ocfs2_alloc_context *meta_ac,
				553	enum ocfs2_alloc_restarted *reason_ret)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	554	{
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	555	int ret;
				556	struct ocfs2_extent_tree et;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	557
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	558	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
Joel Becker	cbee7e1	2009-02-13 03:34:15 -0800	[diff] [blame]	559	ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
				560	clusters_to_add, mark_unwritten,
				561	data_ac, meta_ac, reason_ret);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	562
				563	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	564	}
				565
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	566	static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
				567	u32 clusters_to_add, int mark_unwritten)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	568	{
				569	int status = 0;
				570	int restart_func = 0;
Mark Fasheh	abf8b15	2007-01-17 13:07:24 -0800	[diff] [blame]	571	int credits;
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	572	u32 prev_clusters;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	573	struct buffer_head *bh = NULL;
				574	struct ocfs2_dinode *fe = NULL;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	575	handle_t *handle = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	576	struct ocfs2_alloc_context *data_ac = NULL;
				577	struct ocfs2_alloc_context *meta_ac = NULL;
				578	enum ocfs2_alloc_restarted why;
				579	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	580	struct ocfs2_extent_tree et;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	581	int did_quota = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	582
				583	mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
				584
Mark Fasheh	dcd0538	2007-01-16 11:32:23 -0800	[diff] [blame]	585	/*
				586	* This function only exists for file systems which don't
				587	* support holes.
				588	*/
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	589	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
Mark Fasheh	dcd0538	2007-01-16 11:32:23 -0800	[diff] [blame]	590
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	591	status = ocfs2_read_inode_block(inode, &bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	592	if (status < 0) {
				593	mlog_errno(status);
				594	goto leave;
				595	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	596	fe = (struct ocfs2_dinode *) bh->b_data;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	597
				598	restart_all:
				599	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
				600
Tao Ma	e7d4cb6	2008-08-18 17:38:44 +0800	[diff] [blame]	601	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
				602	"clusters_to_add = %u\n",
				603	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				604	(long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
				605	clusters_to_add);
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	606	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
Joel Becker	f99b9b7	2008-08-20 19:36:33 -0700	[diff] [blame]	607	status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
				608	&data_ac, &meta_ac);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	609	if (status) {
				610	mlog_errno(status);
				611	goto leave;
				612	}
				613
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	614	credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
				615	clusters_to_add);
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	616	handle = ocfs2_start_trans(osb, credits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	617	if (IS_ERR(handle)) {
				618	status = PTR_ERR(handle);
				619	handle = NULL;
				620	mlog_errno(status);
				621	goto leave;
				622	}
				623
				624	restarted_transaction:
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	625	status = dquot_alloc_space_nodirty(inode,
				626	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
				627	if (status)
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	628	goto leave;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	629	did_quota = 1;
				630
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	631	/* reserve a write to the file entry early on - that we if we
				632	* run out of credits in the allocation path, we can still
				633	* update i_size. */
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	634	status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	635	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	636	if (status < 0) {
				637	mlog_errno(status);
				638	goto leave;
				639	}
				640
				641	prev_clusters = OCFS2_I(inode)->ip_clusters;
				642
Tao Ma	0eb8d47	2008-08-18 17:38:45 +0800	[diff] [blame]	643	status = ocfs2_add_inode_data(osb,
				644	inode,
				645	&logical_start,
				646	clusters_to_add,
				647	mark_unwritten,
				648	bh,
				649	handle,
				650	data_ac,
				651	meta_ac,
				652	&why);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	653	if ((status < 0) && (status != -EAGAIN)) {
				654	if (status != -ENOSPC)
				655	mlog_errno(status);
				656	goto leave;
				657	}
				658
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	659	ocfs2_journal_dirty(handle, bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	660
				661	spin_lock(&OCFS2_I(inode)->ip_lock);
				662	clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
				663	spin_unlock(&OCFS2_I(inode)->ip_lock);
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	664	/* Release unused quota reservation */
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	665	dquot_free_space(inode,
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	666	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
				667	did_quota = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	668
				669	if (why != RESTART_NONE && clusters_to_add) {
				670	if (why == RESTART_META) {
				671	mlog(0, "restarting function.\n");
				672	restart_func = 1;
Tao Ma	7968184	2010-04-16 13:59:25 +0800	[diff] [blame]	673	status = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	674	} else {
				675	BUG_ON(why != RESTART_TRANS);
				676
				677	mlog(0, "restarting transaction.\n");
				678	/* TODO: This can be more intelligent. */
				679	credits = ocfs2_calc_extend_credits(osb->sb,
Tao Ma	811f933	2008-08-18 17:38:43 +0800	[diff] [blame]	680	&fe->id2.i_list,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	681	clusters_to_add);
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	682	status = ocfs2_extend_trans(handle, credits);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	683	if (status < 0) {
				684	/* handle still has to be committed at
				685	* this point. */
				686	status = -ENOMEM;
				687	mlog_errno(status);
				688	goto leave;
				689	}
				690	goto restarted_transaction;
				691	}
				692	}
				693
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	694	mlog(0, "fe: i_clusters = %u, i_size=%llu\n",
Mark Fasheh	1ca1a11	2007-04-27 16:01:25 -0700	[diff] [blame]	695	le32_to_cpu(fe->i_clusters),
				696	(unsigned long long)le64_to_cpu(fe->i_size));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	697	mlog(0, "inode: ip_clusters=%u, i_size=%lld\n",
Jan Kara	634bf74	2007-12-19 15:25:42 +0100	[diff] [blame]	698	OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	699
				700	leave:
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	701	if (status < 0 && did_quota)
Christoph Hellwig	5dd4056	2010-03-03 09:05:00 -0500	[diff] [blame]	702	dquot_free_space(inode,
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	703	ocfs2_clusters_to_bytes(osb->sb, clusters_to_add));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	704	if (handle) {
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	705	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	706	handle = NULL;
				707	}
				708	if (data_ac) {
				709	ocfs2_free_alloc_context(data_ac);
				710	data_ac = NULL;
				711	}
				712	if (meta_ac) {
				713	ocfs2_free_alloc_context(meta_ac);
				714	meta_ac = NULL;
				715	}
				716	if ((!status) && restart_func) {
				717	restart_func = 0;
				718	goto restart_all;
				719	}
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	720	brelse(bh);
				721	bh = NULL;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	722
				723	mlog_exit(status);
				724	return status;
				725	}
				726
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	727	/*
				728	* While a write will already be ordering the data, a truncate will not.
				729	* Thus, we need to explicitly order the zeroed pages.
				730	*/
				731	static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode)
				732	{
				733	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				734	handle_t *handle = NULL;
				735	int ret = 0;
				736
				737	if (!ocfs2_should_order_data(inode))
				738	goto out;
				739
				740	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				741	if (IS_ERR(handle)) {
				742	ret = -ENOMEM;
				743	mlog_errno(ret);
				744	goto out;
				745	}
				746
				747	ret = ocfs2_jbd2_file_inode(handle, inode);
				748	if (ret < 0)
				749	mlog_errno(ret);
				750
				751	out:
				752	if (ret) {
				753	if (!IS_ERR(handle))
				754	ocfs2_commit_trans(osb, handle);
				755	handle = ERR_PTR(ret);
				756	}
				757	return handle;
				758	}
				759
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	760	/* Some parts of this taken from generic_cont_expand, which turned out
				761	* to be too fragile to do exactly what we need without us having to
Nick Piggin	4e02ed4	2008-10-29 14:00:55 -0700	[diff] [blame]	762	* worry about recursive locking in ->write_begin() and ->write_end(). */
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	763	static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
				764	u64 abs_to)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	765	{
				766	struct address_space *mapping = inode->i_mapping;
				767	struct page *page;
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	768	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	769	handle_t *handle = NULL;
Joel Becker	5453258	2010-07-16 13:32:33 -0700	[diff] [blame]	770	int ret = 0;
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	771	unsigned zero_from, zero_to, block_start, block_end;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	772
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	773	BUG_ON(abs_from >= abs_to);
				774	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
				775	BUG_ON(abs_from & (inode->i_blkbits - 1));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	776
				777	page = grab_cache_page(mapping, index);
				778	if (!page) {
				779	ret = -ENOMEM;
				780	mlog_errno(ret);
				781	goto out;
				782	}
				783
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	784	/* Get the offsets within the page that we want to zero */
				785	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
				786	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
				787	if (!zero_to)
				788	zero_to = PAGE_CACHE_SIZE;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	789
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	790	mlog(0,
				791	"abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
				792	(unsigned long long)abs_from, (unsigned long long)abs_to,
				793	index, zero_from, zero_to);
				794
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	795	/* We know that zero_from is block aligned */
				796	for (block_start = zero_from; block_start < zero_to;
				797	block_start = block_end) {
				798	block_end = block_start + (1 << inode->i_blkbits);
				799
				800	/*
				801	* block_start is block-aligned. Bump it by one to
				802	* force ocfs2_{prepare,commit}_write() to zero the
				803	* whole block.
				804	*/
				805	ret = ocfs2_prepare_write_nolock(inode, page,
				806	block_start + 1,
				807	block_start + 1);
				808	if (ret < 0) {
				809	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	810	goto out_unlock;
				811	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	812
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	813	if (!handle) {
				814	handle = ocfs2_zero_start_ordered_transaction(inode);
				815	if (IS_ERR(handle)) {
				816	ret = PTR_ERR(handle);
				817	handle = NULL;
				818	break;
				819	}
				820	}
				821
				822	/* must not update i_size! */
				823	ret = block_commit_write(page, block_start + 1,
				824	block_start + 1);
				825	if (ret < 0)
				826	mlog_errno(ret);
				827	else
				828	ret = 0;
				829	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	830
				831	if (handle)
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	832	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
Joel Becker	a4bfb4c	2010-07-06 14:36:06 -0700	[diff] [blame]	833
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	834	out_unlock:
				835	unlock_page(page);
				836	page_cache_release(page);
				837	out:
				838	return ret;
				839	}
				840
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	841	/*
				842	* Find the next range to zero. We do this in terms of bytes because
				843	* that's what ocfs2_zero_extend() wants, and it is dealing with the
				844	* pagecache. We may return multiple extents.
				845	*
				846	* zero_start and zero_end are ocfs2_zero_extend()s current idea of what
				847	* needs to be zeroed. range_start and range_end return the next zeroing
				848	* range. A subsequent call should pass the previous range_end as its
				849	* zero_start. If range_end is 0, there's nothing to do.
				850	*
				851	* Unwritten extents are skipped over. Refcounted extents are CoWd.
				852	*/
				853	static int ocfs2_zero_extend_get_range(struct inode *inode,
				854	struct buffer_head *di_bh,
				855	u64 zero_start, u64 zero_end,
				856	u64 range_start, u64 range_end)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	857	{
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	858	int rc = 0, needs_cow = 0;
				859	u32 p_cpos, zero_clusters = 0;
				860	u32 zero_cpos =
				861	zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				862	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
				863	unsigned int num_clusters = 0;
				864	unsigned int ext_flags = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	865
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	866	while (zero_cpos < last_cpos) {
				867	rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
				868	&num_clusters, &ext_flags);
				869	if (rc) {
				870	mlog_errno(rc);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	871	goto out;
				872	}
				873
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	874	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
				875	zero_clusters = num_clusters;
				876	if (ext_flags & OCFS2_EXT_REFCOUNTED)
				877	needs_cow = 1;
				878	break;
				879	}
				880
				881	zero_cpos += num_clusters;
				882	}
				883	if (!zero_clusters) {
				884	*range_end = 0;
				885	goto out;
				886	}
				887
				888	while ((zero_cpos + zero_clusters) < last_cpos) {
				889	rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
				890	&p_cpos, &num_clusters,
				891	&ext_flags);
				892	if (rc) {
				893	mlog_errno(rc);
				894	goto out;
				895	}
				896
				897	if (!p_cpos \|\| (ext_flags & OCFS2_EXT_UNWRITTEN))
				898	break;
				899	if (ext_flags & OCFS2_EXT_REFCOUNTED)
				900	needs_cow = 1;
				901	zero_clusters += num_clusters;
				902	}
				903	if ((zero_cpos + zero_clusters) > last_cpos)
				904	zero_clusters = last_cpos - zero_cpos;
				905
				906	if (needs_cow) {
				907	rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
				908	UINT_MAX);
				909	if (rc) {
				910	mlog_errno(rc);
				911	goto out;
				912	}
				913	}
				914
				915	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
				916	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
				917	zero_cpos + zero_clusters);
				918
				919	out:
				920	return rc;
				921	}
				922
				923	/*
				924	* Zero one range returned from ocfs2_zero_extend_get_range(). The caller
				925	* has made sure that the entire range needs zeroing.
				926	*/
				927	static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
				928	u64 range_end)
				929	{
				930	int rc = 0;
				931	u64 next_pos;
				932	u64 zero_pos = range_start;
				933
				934	mlog(0, "range_start = %llu, range_end = %llu\n",
				935	(unsigned long long)range_start,
				936	(unsigned long long)range_end);
				937	BUG_ON(range_start >= range_end);
				938
				939	while (zero_pos < range_end) {
				940	next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
				941	if (next_pos > range_end)
				942	next_pos = range_end;
				943	rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
				944	if (rc < 0) {
				945	mlog_errno(rc);
				946	break;
				947	}
				948	zero_pos = next_pos;
Mark Fasheh	e2057c5	2006-10-03 17:53:05 -0700	[diff] [blame]	949
				950	/*
				951	* Very large extends have the potential to lock up
				952	* the cpu for extended periods of time.
				953	*/
				954	cond_resched();
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	955	}
				956
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	957	return rc;
				958	}
				959
				960	int ocfs2_zero_extend(struct inode inode, struct buffer_head di_bh,
				961	loff_t zero_to_size)
				962	{
				963	int ret = 0;
				964	u64 zero_start, range_start = 0, range_end = 0;
				965	struct super_block *sb = inode->i_sb;
				966
				967	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
				968	mlog(0, "zero_start %llu for i_size %llu\n",
				969	(unsigned long long)zero_start,
				970	(unsigned long long)i_size_read(inode));
				971	while (zero_start < zero_to_size) {
				972	ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
				973	zero_to_size,
				974	&range_start,
				975	&range_end);
				976	if (ret) {
				977	mlog_errno(ret);
				978	break;
				979	}
				980	if (!range_end)
				981	break;
				982	/* Trim the ends */
				983	if (range_start < zero_start)
				984	range_start = zero_start;
				985	if (range_end > zero_to_size)
				986	range_end = zero_to_size;
				987
				988	ret = ocfs2_zero_extend_range(inode, range_start,
				989	range_end);
				990	if (ret) {
				991	mlog_errno(ret);
				992	break;
				993	}
				994	zero_start = range_end;
				995	}
				996
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	997	return ret;
				998	}
				999
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1000	int ocfs2_extend_no_holes(struct inode inode, struct buffer_head di_bh,
				1001	u64 new_i_size, u64 zero_to)
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1002	{
				1003	int ret;
				1004	u32 clusters_to_add;
				1005	struct ocfs2_inode_info *oi = OCFS2_I(inode);
				1006
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1007	/*
				1008	* Only quota files call this without a bh, and they can't be
				1009	* refcounted.
				1010	*/
				1011	BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
				1012	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
				1013
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1014	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
				1015	if (clusters_to_add < oi->ip_clusters)
				1016	clusters_to_add = 0;
				1017	else
				1018	clusters_to_add -= oi->ip_clusters;
				1019
				1020	if (clusters_to_add) {
				1021	ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
				1022	clusters_to_add, 0);
				1023	if (ret) {
				1024	mlog_errno(ret);
				1025	goto out;
				1026	}
				1027	}
				1028
				1029	/*
				1030	* Call this even if we don't add any clusters to the tree. We
				1031	* still need to zero the area between the old i_size and the
				1032	* new i_size.
				1033	*/
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1034	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1035	if (ret < 0)
				1036	mlog_errno(ret);
				1037
				1038	out:
				1039	return ret;
				1040	}
				1041
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1042	static int ocfs2_extend_file(struct inode *inode,
				1043	struct buffer_head *di_bh,
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1044	u64 new_i_size)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1045	{
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1046	int ret = 0;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1047	struct ocfs2_inode_info *oi = OCFS2_I(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1048
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1049	BUG_ON(!di_bh);
Mark Fasheh	53013cb	2006-05-05 19:04:03 -0700	[diff] [blame]	1050
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1051	/* setattr sometimes calls us like this. */
				1052	if (new_i_size == 0)
				1053	goto out;
				1054
				1055	if (i_size_read(inode) == new_i_size)
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1056	goto out;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1057	BUG_ON(new_i_size < i_size_read(inode));
				1058
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1059	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1060	* The alloc sem blocks people in read/write from reading our
				1061	* allocation until we're done changing it. We depend on
				1062	* i_mutex to block other extend/truncate calls while we're
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1063	* here. We even have to hold it for sparse files because there
				1064	* might be some tail zeroing.
Mark Fasheh	0effef7	2006-10-03 17:44:42 -0700	[diff] [blame]	1065	*/
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1066	down_write(&oi->ip_alloc_sem);
				1067
				1068	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1069	/*
				1070	* We can optimize small extends by keeping the inodes
				1071	* inline data.
				1072	*/
				1073	if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
				1074	up_write(&oi->ip_alloc_sem);
				1075	goto out_update_size;
				1076	}
				1077
				1078	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
				1079	if (ret) {
				1080	up_write(&oi->ip_alloc_sem);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1081	mlog_errno(ret);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1082	goto out;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1083	}
				1084	}
				1085
Joel Becker	5693486	2010-07-01 15:13:31 -0700	[diff] [blame]	1086	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
				1087	ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
				1088	else
				1089	ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
				1090	new_i_size);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1091
				1092	up_write(&oi->ip_alloc_sem);
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1093
Mark Fasheh	0effef7	2006-10-03 17:44:42 -0700	[diff] [blame]	1094	if (ret < 0) {
				1095	mlog_errno(ret);
Mark Fasheh	c934a92	2007-10-18 15:23:46 -0700	[diff] [blame]	1096	goto out;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1097	}
				1098
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	1099	out_update_size:
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1100	ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
				1101	if (ret < 0)
				1102	mlog_errno(ret);
Mark Fasheh	53013cb	2006-05-05 19:04:03 -0700	[diff] [blame]	1103
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1104	out:
				1105	return ret;
				1106	}
				1107
				1108	int ocfs2_setattr(struct dentry dentry, struct iattr attr)
				1109	{
				1110	int status = 0, size_change;
				1111	struct inode *inode = dentry->d_inode;
				1112	struct super_block *sb = inode->i_sb;
				1113	struct ocfs2_super *osb = OCFS2_SB(sb);
				1114	struct buffer_head *bh = NULL;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	1115	handle_t *handle = NULL;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1116	struct dquot *transfer_to[MAXQUOTAS] = { };
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1117	int qtype;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1118
				1119	mlog_entry("(0x%p, '%.*s')\n", dentry,
				1120	dentry->d_name.len, dentry->d_name.name);
				1121
Sunil Mushran	bc53580	2008-04-18 10:23:53 -0700	[diff] [blame]	1122	/* ensuring we don't even attempt to truncate a symlink */
				1123	if (S_ISLNK(inode->i_mode))
				1124	attr->ia_valid &= ~ATTR_SIZE;
				1125
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1126	if (attr->ia_valid & ATTR_MODE)
				1127	mlog(0, "mode change: %d\n", attr->ia_mode);
				1128	if (attr->ia_valid & ATTR_UID)
				1129	mlog(0, "uid change: %d\n", attr->ia_uid);
				1130	if (attr->ia_valid & ATTR_GID)
				1131	mlog(0, "gid change: %d\n", attr->ia_gid);
				1132	if (attr->ia_valid & ATTR_SIZE)
				1133	mlog(0, "size change...\n");
				1134	if (attr->ia_valid & (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME))
				1135	mlog(0, "time change...\n");
				1136
				1137	#define OCFS2_VALID_ATTRS (ATTR_ATIME \| ATTR_MTIME \| ATTR_CTIME \| ATTR_SIZE \
				1138	\| ATTR_GID \| ATTR_UID \| ATTR_MODE)
				1139	if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) {
				1140	mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid);
				1141	return 0;
				1142	}
				1143
				1144	status = inode_change_ok(inode, attr);
				1145	if (status)
				1146	return status;
				1147
Dmitry Monakhov	1275562	2010-04-08 22:04:20 +0400	[diff] [blame]	1148	if (is_quota_modification(inode, attr))
				1149	dquot_initialize(inode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1150	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
				1151	if (size_change) {
				1152	status = ocfs2_rw_lock(inode, 1);
				1153	if (status < 0) {
				1154	mlog_errno(status);
				1155	goto bail;
				1156	}
				1157	}
				1158
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1159	status = ocfs2_inode_lock(inode, &bh, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1160	if (status < 0) {
				1161	if (status != -ENOENT)
				1162	mlog_errno(status);
				1163	goto bail_unlock_rw;
				1164	}
				1165
				1166	if (size_change && attr->ia_size != i_size_read(inode)) {
Wengang Wang	5051f76	2010-02-26 18:18:25 +0800	[diff] [blame]	1167	status = inode_newsize_ok(inode, attr->ia_size);
				1168	if (status)
Mark Fasheh	ce76fd3	2007-07-20 12:02:14 -0700	[diff] [blame]	1169	goto bail_unlock;
Mark Fasheh	ce76fd3	2007-07-20 12:02:14 -0700	[diff] [blame]	1170
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	1171	if (i_size_read(inode) > attr->ia_size) {
				1172	if (ocfs2_should_order_data(inode)) {
				1173	status = ocfs2_begin_ordered_truncate(inode,
				1174	attr->ia_size);
				1175	if (status)
				1176	goto bail_unlock;
				1177	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1178	status = ocfs2_truncate_file(inode, bh, attr->ia_size);
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	1179	} else
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	1180	status = ocfs2_extend_file(inode, bh, attr->ia_size);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1181	if (status < 0) {
				1182	if (status != -ENOSPC)
				1183	mlog_errno(status);
				1184	status = -ENOSPC;
				1185	goto bail_unlock;
				1186	}
				1187	}
				1188
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1189	if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) \|\|
				1190	(attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1191	/*
				1192	* Gather pointers to quota structures so that allocation /
				1193	* freeing of quota structures happens here and not inside
Christoph Hellwig	b43fa82	2010-03-03 09:05:03 -0500	[diff] [blame]	1194	* dquot_transfer() where we have problems with lock ordering
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1195	*/
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1196	if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
				1197	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
				1198	OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1199	transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
				1200	USRQUOTA);
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1201	if (!transfer_to[USRQUOTA]) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1202	status = -ESRCH;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1203	goto bail_unlock;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1204	}
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1205	}
				1206	if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
				1207	&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
				1208	OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1209	transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
				1210	GRPQUOTA);
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1211	if (!transfer_to[GRPQUOTA]) {
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1212	status = -ESRCH;
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1213	goto bail_unlock;
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1214	}
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1215	}
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1216	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
				1217	2 * ocfs2_quota_trans_credits(sb));
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1218	if (IS_ERR(handle)) {
				1219	status = PTR_ERR(handle);
				1220	mlog_errno(status);
				1221	goto bail_unlock;
				1222	}
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1223	status = __dquot_transfer(inode, transfer_to);
Jan Kara	a90714c	2008-10-09 19:38:40 +0200	[diff] [blame]	1224	if (status < 0)
				1225	goto bail_commit;
				1226	} else {
				1227	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				1228	if (IS_ERR(handle)) {
				1229	status = PTR_ERR(handle);
				1230	mlog_errno(status);
				1231	goto bail_unlock;
				1232	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1233	}
				1234
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1235	/*
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	1236	* This will intentionally not wind up calling truncate_setsize(),
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1237	* since all the work for a size change has been done above.
				1238	* Otherwise, we could get into problems with truncate as
				1239	* ip_alloc_sem is used there to protect against i_size
				1240	* changes.
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1241	*
				1242	* XXX: this means the conditional below can probably be removed.
Mark Fasheh	7307de8	2007-05-09 15:16:19 -0700	[diff] [blame]	1243	*/
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1244	if ((attr->ia_valid & ATTR_SIZE) &&
				1245	attr->ia_size != i_size_read(inode)) {
				1246	status = vmtruncate(inode, attr->ia_size);
				1247	if (status) {
				1248	mlog_errno(status);
				1249	goto bail_commit;
				1250	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1251	}
				1252
Christoph Hellwig	1025774	2010-06-04 11:30:02 +0200	[diff] [blame]	1253	setattr_copy(inode, attr);
				1254	mark_inode_dirty(inode);
				1255
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1256	status = ocfs2_mark_inode_dirty(handle, inode, bh);
				1257	if (status < 0)
				1258	mlog_errno(status);
				1259
				1260	bail_commit:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	1261	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1262	bail_unlock:
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1263	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1264	bail_unlock_rw:
				1265	if (size_change)
				1266	ocfs2_rw_unlock(inode, 1);
				1267	bail:
Mark Fasheh	a81cb88	2008-10-07 14:25:16 -0700	[diff] [blame]	1268	brelse(bh);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1269
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1270	/* Release quota pointers in case we acquired them */
Jan Kara	52a9ee2	2010-05-13 20:18:45 +0200	[diff] [blame]	1271	for (qtype = 0; qtype < MAXQUOTAS; qtype++)
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1272	dqput(transfer_to[qtype]);
Jan Kara	65bac57	2009-06-02 14:24:01 +0200	[diff] [blame]	1273
Tiger Yang	060bc66	2008-11-14 11:17:29 +0800	[diff] [blame]	1274	if (!status && attr->ia_valid & ATTR_MODE) {
				1275	status = ocfs2_acl_chmod(inode);
				1276	if (status < 0)
				1277	mlog_errno(status);
				1278	}
				1279
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1280	mlog_exit(status);
				1281	return status;
				1282	}
				1283
				1284	int ocfs2_getattr(struct vfsmount *mnt,
				1285	struct dentry *dentry,
				1286	struct kstat *stat)
				1287	{
				1288	struct inode *inode = dentry->d_inode;
				1289	struct super_block *sb = dentry->d_inode->i_sb;
				1290	struct ocfs2_super *osb = sb->s_fs_info;
				1291	int err;
				1292
				1293	mlog_entry_void();
				1294
				1295	err = ocfs2_inode_revalidate(dentry);
				1296	if (err) {
				1297	if (err != -ENOENT)
				1298	mlog_errno(err);
				1299	goto bail;
				1300	}
				1301
				1302	generic_fillattr(inode, stat);
				1303
				1304	/* We set the blksize from the cluster size for performance */
				1305	stat->blksize = osb->s_clustersize;
				1306
				1307	bail:
				1308	mlog_exit(err);
				1309
				1310	return err;
				1311	}
				1312
Al Viro	e6305c4	2008-07-15 21:03:57 -0400	[diff] [blame]	1313	int ocfs2_permission(struct inode *inode, int mask)
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1314	{
				1315	int ret;
				1316
				1317	mlog_entry_void();
				1318
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1319	ret = ocfs2_inode_lock(inode, NULL, 0);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1320	if (ret) {
Mark Fasheh	a9f5f70	2007-04-26 11:43:43 -0700	[diff] [blame]	1321	if (ret != -ENOENT)
				1322	mlog_errno(ret);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1323	goto out;
				1324	}
				1325
Tiger Yang	23fc270	2008-11-14 11:17:18 +0800	[diff] [blame]	1326	ret = generic_permission(inode, mask, ocfs2_check_acl);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1327
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1328	ocfs2_inode_unlock(inode, 0);
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	1329	out:
				1330	mlog_exit(ret);
				1331	return ret;
				1332	}
				1333
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1334	static int __ocfs2_write_remove_suid(struct inode *inode,
				1335	struct buffer_head *bh)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1336	{
				1337	int ret;
Mark Fasheh	1fabe14	2006-10-09 18:11:45 -0700	[diff] [blame]	1338	handle_t *handle;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1339	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1340	struct ocfs2_dinode *di;
				1341
Mark Fasheh	b0697053	2006-03-03 10:24:33 -0800	[diff] [blame]	1342	mlog_entry("(Inode %llu, mode 0%o)\n",
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1343	(unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1344
Mark Fasheh	65eff9c	2006-10-09 17:26:22 -0700	[diff] [blame]	1345	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	1346	if (IS_ERR(handle)) {
				1347	ret = PTR_ERR(handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1348	mlog_errno(ret);
				1349	goto out;
				1350	}
				1351
Joel Becker	0cf2f76	2009-02-12 16:41:25 -0800	[diff] [blame]	1352	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
Joel Becker	13723d0	2008-10-17 19:25:01 -0700	[diff] [blame]	1353	OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1354	if (ret < 0) {
				1355	mlog_errno(ret);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1356	goto out_trans;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1357	}
				1358
				1359	inode->i_mode &= ~S_ISUID;
				1360	if ((inode->i_mode & S_ISGID) && (inode->i_mode & S_IXGRP))
				1361	inode->i_mode &= ~S_ISGID;
				1362
				1363	di = (struct ocfs2_dinode *) bh->b_data;
				1364	di->i_mode = cpu_to_le16(inode->i_mode);
				1365
Joel Becker	ec20cec	2010-03-19 14:13:52 -0700	[diff] [blame]	1366	ocfs2_journal_dirty(handle, bh);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1367
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1368	out_trans:
Mark Fasheh	02dc1af	2006-10-09 16:48:10 -0700	[diff] [blame]	1369	ocfs2_commit_trans(osb, handle);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	1370	out:
				1371	mlog_exit(ret);
				1372	return ret;
				1373	}
				1374
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1375	/*
				1376	* Will look for holes and unwritten extents in the range starting at
				1377	* pos for count bytes (inclusive).
				1378	*/
				1379	static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
				1380	size_t count)
				1381	{
				1382	int ret = 0;
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1383	unsigned int extent_flags;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1384	u32 cpos, clusters, extent_len, phys_cpos;
				1385	struct super_block *sb = inode->i_sb;
				1386
				1387	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
				1388	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
				1389
				1390	while (clusters) {
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1391	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
				1392	&extent_flags);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1393	if (ret < 0) {
				1394	mlog_errno(ret);
				1395	goto out;
				1396	}
				1397
Mark Fasheh	49cb8d2	2007-03-09 16:21:46 -0800	[diff] [blame]	1398	if (phys_cpos == 0 \|\| (extent_flags & OCFS2_EXT_UNWRITTEN)) {
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	1399	ret = 1;
				1400	break;
				1401	}
				1402
				1403	if (extent_len > clusters)
				1404	extent_len = clusters;
				1405
				1406	clusters -= extent_len;
				1407	cpos += extent_len;
				1408	}
				1409	out:
				1410	return ret;
				1411	}
				1412
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1413	static int ocfs2_write_remove_suid(struct inode *inode)
				1414	{
				1415	int ret;
				1416	struct buffer_head *bh = NULL;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1417
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	1418	ret = ocfs2_read_inode_block(inode, &bh);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1419	if (ret < 0) {
				1420	mlog_errno(ret);
				1421	goto out;
				1422	}
				1423
				1424	ret = __ocfs2_write_remove_suid(inode, bh);
				1425	out:
				1426	brelse(bh);
				1427	return ret;
				1428	}
				1429
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1430	/*
				1431	* Allocate enough extents to cover the region starting at byte offset
				1432	* start for len bytes. Existing extents are skipped, any extents
				1433	* added are marked as "unwritten".
				1434	*/
				1435	static int ocfs2_allocate_unwritten_extents(struct inode *inode,
				1436	u64 start, u64 len)
				1437	{
				1438	int ret;
				1439	u32 cpos, phys_cpos, clusters, alloc_size;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1440	u64 end = start + len;
				1441	struct buffer_head *di_bh = NULL;
				1442
				1443	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
Joel Becker	b657c95	2008-11-13 14:49:11 -0800	[diff] [blame]	1444	ret = ocfs2_read_inode_block(inode, &di_bh);
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1445	if (ret) {
				1446	mlog_errno(ret);
				1447	goto out;
				1448	}
				1449
				1450	/*
				1451	* Nothing to do if the requested reservation range
				1452	* fits within the inode.
				1453	*/
				1454	if (ocfs2_size_fits_inline_data(di_bh, end))
				1455	goto out;
				1456
				1457	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
				1458	if (ret) {
				1459	mlog_errno(ret);
				1460	goto out;
				1461	}
				1462	}
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1463
				1464	/*
				1465	* We consider both start and len to be inclusive.
				1466	*/
				1467	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				1468	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
				1469	clusters -= cpos;
				1470
				1471	while (clusters) {
				1472	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
				1473	&alloc_size, NULL);
				1474	if (ret) {
				1475	mlog_errno(ret);
				1476	goto out;
				1477	}
				1478
				1479	/*
				1480	* Hole or existing extent len can be arbitrary, so
				1481	* cap it to our own allocation request.
				1482	*/
				1483	if (alloc_size > clusters)
				1484	alloc_size = clusters;
				1485
				1486	if (phys_cpos) {
				1487	/*
				1488	* We already have an allocation at this
				1489	* region so we can safely skip it.
				1490	*/
				1491	goto next;
				1492	}
				1493
				1494	ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
				1495	if (ret) {
				1496	if (ret != -ENOSPC)
				1497	mlog_errno(ret);
				1498	goto out;
				1499	}
				1500
				1501	next:
				1502	cpos += alloc_size;
				1503	clusters -= alloc_size;
				1504	}
				1505
				1506	ret = 0;
				1507	out:
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1508
				1509	brelse(di_bh);
Mark Fasheh	2ae99a6	2007-03-09 16:43:28 -0800	[diff] [blame]	1510	return ret;
				1511	}
				1512
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1513	/*
				1514	* Truncate a byte range, avoiding pages within partial clusters. This
				1515	* preserves those pages for the zeroing code to write to.
				1516	*/
				1517	static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
				1518	u64 byte_len)
				1519	{
				1520	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1521	loff_t start, end;
				1522	struct address_space *mapping = inode->i_mapping;
				1523
				1524	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
				1525	end = byte_start + byte_len;
				1526	end = end & ~(osb->s_clustersize - 1);
				1527
				1528	if (start < end) {
				1529	unmap_mapping_range(mapping, start, end - start, 0);
				1530	truncate_inode_pages_range(mapping, start, end - 1);
				1531	}
				1532	}
				1533
				1534	static int ocfs2_zero_partial_clusters(struct inode *inode,
				1535	u64 start, u64 len)
				1536	{
				1537	int ret = 0;
				1538	u64 tmpend, end = start + len;
				1539	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1540	unsigned int csize = osb->s_clustersize;
				1541	handle_t *handle;
				1542
				1543	/*
				1544	* The "start" and "end" values are NOT necessarily part of
				1545	* the range whose allocation is being deleted. Rather, this
				1546	* is what the user passed in with the request. We must zero
				1547	* partial clusters here. There's no need to worry about
				1548	* physical allocation - the zeroing code knows to skip holes.
				1549	*/
				1550	mlog(0, "byte start: %llu, end: %llu\n",
				1551	(unsigned long long)start, (unsigned long long)end);
				1552
				1553	/*
				1554	* If both edges are on a cluster boundary then there's no
				1555	* zeroing required as the region is part of the allocation to
				1556	* be truncated.
				1557	*/
				1558	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
				1559	goto out;
				1560
				1561	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
Jan Kara	fa38e92	2008-10-20 19:23:51 +0200	[diff] [blame]	1562	if (IS_ERR(handle)) {
				1563	ret = PTR_ERR(handle);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1564	mlog_errno(ret);
				1565	goto out;
				1566	}
				1567
				1568	/*
				1569	* We want to get the byte offset of the end of the 1st cluster.
				1570	*/
				1571	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
				1572	if (tmpend > end)
				1573	tmpend = end;
				1574
				1575	mlog(0, "1st range: start: %llu, tmpend: %llu\n",
				1576	(unsigned long long)start, (unsigned long long)tmpend);
				1577
				1578	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
				1579	if (ret)
				1580	mlog_errno(ret);
				1581
				1582	if (tmpend < end) {
				1583	/*
				1584	* This may make start and end equal, but the zeroing
				1585	* code will skip any work in that case so there's no
				1586	* need to catch it up here.
				1587	*/
				1588	start = end & ~(osb->s_clustersize - 1);
				1589
				1590	mlog(0, "2nd range: start: %llu, end: %llu\n",
				1591	(unsigned long long)start, (unsigned long long)end);
				1592
				1593	ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
				1594	if (ret)
				1595	mlog_errno(ret);
				1596	}
				1597
				1598	ocfs2_commit_trans(osb, handle);
				1599	out:
				1600	return ret;
				1601	}
				1602
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1603	static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
				1604	{
				1605	int i;
				1606	struct ocfs2_extent_rec *rec = NULL;
				1607
				1608	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
				1609
				1610	rec = &el->l_recs[i];
				1611
				1612	if (le32_to_cpu(rec->e_cpos) < pos)
				1613	break;
				1614	}
				1615
				1616	return i;
				1617	}
				1618
				1619	/*
				1620	* Helper to calculate the punching pos and length in one run, we handle the
				1621	* following three cases in order:
				1622	*
				1623	* - remove the entire record
				1624	* - remove a partial record
				1625	* - no record needs to be removed (hole-punching completed)
				1626	*/
				1627	static void ocfs2_calc_trunc_pos(struct inode *inode,
				1628	struct ocfs2_extent_list *el,
				1629	struct ocfs2_extent_rec *rec,
				1630	u32 trunc_start, u32 *trunc_cpos,
				1631	u32 trunc_len, u32 trunc_end,
				1632	u64 blkno, int done)
				1633	{
				1634	int ret = 0;
				1635	u32 coff, range;
				1636
				1637	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
				1638
				1639	if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
				1640	*trunc_cpos = le32_to_cpu(rec->e_cpos);
				1641	/*
				1642	* Skip holes if any.
				1643	*/
				1644	if (range < *trunc_end)
				1645	*trunc_end = range;
				1646	trunc_len = trunc_end - le32_to_cpu(rec->e_cpos);
				1647	*blkno = le64_to_cpu(rec->e_blkno);
				1648	*trunc_end = le32_to_cpu(rec->e_cpos);
				1649	} else if (range > trunc_start) {
				1650	*trunc_cpos = trunc_start;
				1651	trunc_len = trunc_end - trunc_start;
				1652	coff = trunc_start - le32_to_cpu(rec->e_cpos);
				1653	*blkno = le64_to_cpu(rec->e_blkno) +
				1654	ocfs2_clusters_to_blocks(inode->i_sb, coff);
				1655	*trunc_end = trunc_start;
				1656	} else {
				1657	/*
				1658	* It may have two following possibilities:
				1659	*
				1660	* - last record has been removed
				1661	* - trunc_start was within a hole
				1662	*
				1663	* both two cases mean the completion of hole punching.
				1664	*/
				1665	ret = 1;
				1666	}
				1667
				1668	*done = ret;
				1669	}
				1670
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1671	static int ocfs2_remove_inode_range(struct inode *inode,
				1672	struct buffer_head *di_bh, u64 byte_start,
				1673	u64 byte_len)
				1674	{
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1675	int ret = 0, flags = 0, done = 0, i;
				1676	u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
				1677	u32 cluster_in_el;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1678	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1679	struct ocfs2_cached_dealloc_ctxt dealloc;
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1680	struct address_space *mapping = inode->i_mapping;
Mark Fasheh	fecc011	2008-11-12 15:16:38 -0800	[diff] [blame]	1681	struct ocfs2_extent_tree et;
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1682	struct ocfs2_path *path = NULL;
				1683	struct ocfs2_extent_list *el = NULL;
				1684	struct ocfs2_extent_rec *rec = NULL;
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1685	struct ocfs2_dinode di = (struct ocfs2_dinode )di_bh->b_data;
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1686	u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1687
Joel Becker	5e404e9	2009-02-13 03:54:22 -0800	[diff] [blame]	1688	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1689	ocfs2_init_dealloc_ctxt(&dealloc);
				1690
				1691	if (byte_len == 0)
				1692	return 0;
				1693
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1694	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				1695	ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1696	byte_start + byte_len, 0);
				1697	if (ret) {
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1698	mlog_errno(ret);
Mark Fasheh	b1967d0	2007-11-20 11:56:39 -0800	[diff] [blame]	1699	goto out;
				1700	}
				1701	/*
				1702	* There's no need to get fancy with the page cache
				1703	* truncate of an inline-data inode. We're talking
				1704	* about less than a page here, which will be cached
				1705	* in the dinode buffer anyway.
				1706	*/
				1707	unmap_mapping_range(mapping, 0, 0, 0);
				1708	truncate_inode_pages(mapping, 0);
				1709	goto out;
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	1710	}
				1711
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1712	/*
				1713	* For reflinks, we may need to CoW 2 clusters which might be
				1714	* partially zero'd later, if hole's start and end offset were
				1715	* within one cluster(means is not exactly aligned to clustersize).
				1716	*/
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1717
Tristan Ye	e8aec06	2010-05-11 17:54:43 +0800	[diff] [blame]	1718	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
				1719
				1720	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
				1721	if (ret) {
				1722	mlog_errno(ret);
				1723	goto out;
				1724	}
				1725
				1726	ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
				1727	if (ret) {
				1728	mlog_errno(ret);
				1729	goto out;
				1730	}
				1731	}
				1732
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1733	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1734	trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
				1735	cluster_in_el = trunc_end;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1736
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1737	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1738	(unsigned long long)OCFS2_I(inode)->ip_blkno,
				1739	(unsigned long long)byte_start,
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1740	(unsigned long long)byte_len, trunc_start, trunc_end);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1741
				1742	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
				1743	if (ret) {
				1744	mlog_errno(ret);
				1745	goto out;
				1746	}
				1747
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1748	path = ocfs2_new_path_from_et(&et);
				1749	if (!path) {
				1750	ret = -ENOMEM;
				1751	mlog_errno(ret);
				1752	goto out;
				1753	}
				1754
				1755	while (trunc_end > trunc_start) {
				1756
				1757	ret = ocfs2_find_path(INODE_CACHE(inode), path,
				1758	cluster_in_el);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1759	if (ret) {
				1760	mlog_errno(ret);
				1761	goto out;
				1762	}
				1763
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1764	el = path_leaf_el(path);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1765
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1766	i = ocfs2_find_rec(el, trunc_end);
				1767	/*
				1768	* Need to go to previous extent block.
				1769	*/
				1770	if (i < 0) {
				1771	if (path->p_tree_depth == 0)
				1772	break;
				1773
				1774	ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
				1775	path,
				1776	&cluster_in_el);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1777	if (ret) {
				1778	mlog_errno(ret);
				1779	goto out;
				1780	}
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1781
				1782	/*
				1783	* We've reached the leftmost extent block,
				1784	* it's safe to leave.
				1785	*/
				1786	if (cluster_in_el == 0)
				1787	break;
				1788
				1789	/*
				1790	* The 'pos' searched for previous extent block is
				1791	* always one cluster less than actual trunc_end.
				1792	*/
				1793	trunc_end = cluster_in_el + 1;
				1794
				1795	ocfs2_reinit_path(path, 1);
				1796
				1797	continue;
				1798
				1799	} else
				1800	rec = &el->l_recs[i];
				1801
				1802	ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
				1803	&trunc_len, &trunc_end, &blkno, &done);
				1804	if (done)
				1805	break;
				1806
				1807	flags = rec->e_flags;
				1808	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
				1809
				1810	ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
				1811	phys_cpos, trunc_len, flags,
				1812	&dealloc, refcount_loc);
				1813	if (ret < 0) {
				1814	mlog_errno(ret);
				1815	goto out;
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1816	}
				1817
Tristan Ye	c1631d4	2010-05-11 17:54:45 +0800	[diff] [blame]	1818	cluster_in_el = trunc_end;
				1819
				1820	ocfs2_reinit_path(path, 1);
Mark Fasheh	063c456	2007-07-03 13:34:11 -0700	[diff] [blame]	1821	}
				1822
				1823	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
				1824
				1825	out:
				1826	ocfs2_schedule_truncate_log_flush(osb, 1);
				1827	ocfs2_run_deallocs(osb, &dealloc);
				1828
				1829	return ret;
				1830	}
				1831
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1832	/*
				1833	* Parts of this function taken from xfs_change_file_space()
				1834	*/
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1835	static int __ocfs2_change_file_space(struct file file, struct inode inode,
				1836	loff_t f_pos, unsigned int cmd,
				1837	struct ocfs2_space_resv *sr,
				1838	int change_size)
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1839	{
				1840	int ret;
				1841	s64 llen;
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1842	loff_t size;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1843	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1844	struct buffer_head *di_bh = NULL;
				1845	handle_t *handle;
Mark Fasheh	a00cce3	2007-07-20 11:28:30 -0700	[diff] [blame]	1846	unsigned long long max_off = inode->i_sb->s_maxbytes;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1847
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1848	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb))
				1849	return -EROFS;
				1850
				1851	mutex_lock(&inode->i_mutex);
				1852
				1853	/*
				1854	* This prevents concurrent writes on other nodes
				1855	*/
				1856	ret = ocfs2_rw_lock(inode, 1);
				1857	if (ret) {
				1858	mlog_errno(ret);
				1859	goto out;
				1860	}
				1861
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1862	ret = ocfs2_inode_lock(inode, &di_bh, 1);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1863	if (ret) {
				1864	mlog_errno(ret);
				1865	goto out_rw_unlock;
				1866	}
				1867
				1868	if (inode->i_flags & (S_IMMUTABLE\|S_APPEND)) {
				1869	ret = -EPERM;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1870	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1871	}
				1872
				1873	switch (sr->l_whence) {
				1874	case 0: /SEEK_SET/
				1875	break;
				1876	case 1: /SEEK_CUR/
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1877	sr->l_start += f_pos;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1878	break;
				1879	case 2: /SEEK_END/
				1880	sr->l_start += i_size_read(inode);
				1881	break;
				1882	default:
				1883	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1884	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1885	}
				1886	sr->l_whence = 0;
				1887
				1888	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
				1889
				1890	if (sr->l_start < 0
				1891	\|\| sr->l_start > max_off
				1892	\|\| (sr->l_start + llen) < 0
				1893	\|\| (sr->l_start + llen) > max_off) {
				1894	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1895	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1896	}
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1897	size = sr->l_start + sr->l_len;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1898
				1899	if (cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64) {
				1900	if (sr->l_len <= 0) {
				1901	ret = -EINVAL;
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1902	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1903	}
				1904	}
				1905
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1906	if (file && should_remove_suid(file->f_path.dentry)) {
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1907	ret = __ocfs2_write_remove_suid(inode, di_bh);
				1908	if (ret) {
				1909	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1910	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1911	}
				1912	}
				1913
				1914	down_write(&OCFS2_I(inode)->ip_alloc_sem);
				1915	switch (cmd) {
				1916	case OCFS2_IOC_RESVSP:
				1917	case OCFS2_IOC_RESVSP64:
				1918	/*
				1919	* This takes unsigned offsets, but the signed ones we
				1920	* pass have been checked against overflow above.
				1921	*/
				1922	ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
				1923	sr->l_len);
				1924	break;
				1925	case OCFS2_IOC_UNRESVSP:
				1926	case OCFS2_IOC_UNRESVSP64:
				1927	ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
				1928	sr->l_len);
				1929	break;
				1930	default:
				1931	ret = -EINVAL;
				1932	}
				1933	up_write(&OCFS2_I(inode)->ip_alloc_sem);
				1934	if (ret) {
				1935	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1936	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1937	}
				1938
				1939	/*
				1940	* We update c/mtime for these changes
				1941	*/
				1942	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
				1943	if (IS_ERR(handle)) {
				1944	ret = PTR_ERR(handle);
				1945	mlog_errno(ret);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1946	goto out_inode_unlock;
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1947	}
				1948
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1949	if (change_size && i_size_read(inode) < size)
				1950	i_size_write(inode, size);
				1951
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1952	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
				1953	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
				1954	if (ret < 0)
				1955	mlog_errno(ret);
				1956
				1957	ocfs2_commit_trans(osb, handle);
				1958
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1959	out_inode_unlock:
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1960	brelse(di_bh);
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	1961	ocfs2_inode_unlock(inode, 1);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1962	out_rw_unlock:
				1963	ocfs2_rw_unlock(inode, 1);
				1964
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1965	out:
Julia Lawall	c259ae5	2008-07-21 09:59:15 +0200	[diff] [blame]	1966	mutex_unlock(&inode->i_mutex);
Mark Fasheh	b258010	2007-03-09 16:53:21 -0800	[diff] [blame]	1967	return ret;
				1968	}
				1969
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1970	int ocfs2_change_file_space(struct file *file, unsigned int cmd,
				1971	struct ocfs2_space_resv *sr)
				1972	{
				1973	struct inode *inode = file->f_path.dentry->d_inode;
Fernando Carrijo	c19a28e	2009-01-07 18:09:08 -0800	[diff] [blame]	1974	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	1975
				1976	if ((cmd == OCFS2_IOC_RESVSP \|\| cmd == OCFS2_IOC_RESVSP64) &&
				1977	!ocfs2_writes_unwritten_extents(osb))
				1978	return -ENOTTY;
				1979	else if ((cmd == OCFS2_IOC_UNRESVSP \|\| cmd == OCFS2_IOC_UNRESVSP64) &&
				1980	!ocfs2_sparse_alloc(osb))
				1981	return -ENOTTY;
				1982
				1983	if (!S_ISREG(inode->i_mode))
				1984	return -EINVAL;
				1985
				1986	if (!(file->f_mode & FMODE_WRITE))
				1987	return -EBADF;
				1988
				1989	return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
				1990	}
				1991
				1992	static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
				1993	loff_t len)
				1994	{
				1995	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
				1996	struct ocfs2_space_resv sr;
				1997	int change_size = 1;
				1998
				1999	if (!ocfs2_writes_unwritten_extents(osb))
				2000	return -EOPNOTSUPP;
				2001
				2002	if (S_ISDIR(inode->i_mode))
				2003	return -ENODEV;
				2004
				2005	if (mode & FALLOC_FL_KEEP_SIZE)
				2006	change_size = 0;
				2007
				2008	sr.l_whence = 0;
				2009	sr.l_start = (s64)offset;
				2010	sr.l_len = (s64)len;
				2011
				2012	return __ocfs2_change_file_space(NULL, inode, offset,
				2013	OCFS2_IOC_RESVSP64, &sr, change_size);
				2014	}
				2015
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2016	int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
				2017	size_t count)
				2018	{
				2019	int ret = 0;
				2020	unsigned int extent_flags;
				2021	u32 cpos, clusters, extent_len, phys_cpos;
				2022	struct super_block *sb = inode->i_sb;
				2023
				2024	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) \|\|
Tao Ma	2f48d59	2009-10-15 11:10:49 +0800	[diff] [blame]	2025	!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) \|\|
				2026	OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2027	return 0;
				2028
				2029	cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
				2030	clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
				2031
				2032	while (clusters) {
				2033	ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
				2034	&extent_flags);
				2035	if (ret < 0) {
				2036	mlog_errno(ret);
				2037	goto out;
				2038	}
				2039
				2040	if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
				2041	ret = 1;
				2042	break;
				2043	}
				2044
				2045	if (extent_len > clusters)
				2046	extent_len = clusters;
				2047
				2048	clusters -= extent_len;
				2049	cpos += extent_len;
				2050	}
				2051	out:
				2052	return ret;
				2053	}
				2054
				2055	static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
				2056	loff_t pos, size_t count,
				2057	int *meta_level)
				2058	{
				2059	int ret;
				2060	struct buffer_head *di_bh = NULL;
				2061	u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
				2062	u32 clusters =
				2063	ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
				2064
				2065	ret = ocfs2_inode_lock(inode, &di_bh, 1);
				2066	if (ret) {
				2067	mlog_errno(ret);
				2068	goto out;
				2069	}
				2070
				2071	*meta_level = 1;
				2072
Tao Ma	37f8a2b	2009-08-26 09:47:28 +0800	[diff] [blame]	2073	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2074	if (ret)
				2075	mlog_errno(ret);
				2076	out:
				2077	brelse(di_bh);
				2078	return ret;
				2079	}
				2080
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2081	static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
				2082	loff_t *ppos,
				2083	size_t count,
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2084	int appending,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2085	int *direct_io,
				2086	int *has_refcount)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2087	{
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2088	int ret = 0, meta_level = 0;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2089	struct inode *inode = dentry->d_inode;
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2090	loff_t saved_pos, end;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2091
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2092	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2093	* We start with a read level meta lock and only jump to an ex
				2094	* if we need to make modifications here.
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2095	*/
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2096	for(;;) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2097	ret = ocfs2_inode_lock(inode, NULL, meta_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2098	if (ret < 0) {
				2099	meta_level = -1;
				2100	mlog_errno(ret);
				2101	goto out;
				2102	}
				2103
				2104	/* Clear suid / sgid if necessary. We do this here
				2105	* instead of later in the write path because
				2106	* remove_suid() calls ->setattr without any hint that
				2107	* we may have already done our cluster locking. Since
				2108	* ocfs2_setattr() must take cluster locks to
				2109	* proceeed, this will lead us to recursively lock the
				2110	* inode. There's also the dinode i_size state which
				2111	* can be lost via setattr during extending writes (we
				2112	* set inode->i_size at the end of a write. */
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2113	if (should_remove_suid(dentry)) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2114	if (meta_level == 0) {
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2115	ocfs2_inode_unlock(inode, meta_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2116	meta_level = 1;
				2117	continue;
				2118	}
				2119
				2120	ret = ocfs2_write_remove_suid(inode);
				2121	if (ret < 0) {
				2122	mlog_errno(ret);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2123	goto out_unlock;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2124	}
				2125	}
				2126
				2127	/* work on a copy of ppos until we're sure that we won't have
				2128	* to recalculate it due to relocking. */
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2129	if (appending) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2130	saved_pos = i_size_read(inode);
				2131	mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos);
				2132	} else {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2133	saved_pos = *ppos;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2134	}
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2135
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2136	end = saved_pos + count;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2137
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2138	ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
				2139	if (ret == 1) {
				2140	ocfs2_inode_unlock(inode, meta_level);
				2141	meta_level = -1;
				2142
				2143	ret = ocfs2_prepare_inode_for_refcount(inode,
				2144	saved_pos,
				2145	count,
				2146	&meta_level);
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2147	if (has_refcount)
				2148	*has_refcount = 1;
Wengang Wang	96a1cc7	2010-02-09 14:57:45 +0800	[diff] [blame]	2149	if (direct_io)
				2150	*direct_io = 0;
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2151	}
				2152
				2153	if (ret < 0) {
				2154	mlog_errno(ret);
				2155	goto out_unlock;
				2156	}
				2157
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2158	/*
				2159	* Skip the O_DIRECT checks if we don't need
				2160	* them.
				2161	*/
				2162	if (!direct_io \|\| !(*direct_io))
				2163	break;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2164
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2165	/*
Mark Fasheh	1afc32b	2007-09-07 14:46:51 -0700	[diff] [blame]	2166	* There's no sane way to do direct writes to an inode
				2167	* with inline data.
				2168	*/
				2169	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
				2170	*direct_io = 0;
				2171	break;
				2172	}
				2173
				2174	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2175	* Allowing concurrent direct writes means
				2176	* i_size changes wouldn't be synchronized, so
				2177	* one node could wind up truncating another
				2178	* nodes writes.
				2179	*/
				2180	if (end > i_size_read(inode)) {
				2181	*direct_io = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2182	break;
				2183	}
				2184
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2185	/*
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2186	* We don't fill holes during direct io, so
				2187	* check for them here. If any are found, the
				2188	* caller will have to retake some cluster
				2189	* locks and initiate the io as buffered.
Mark Fasheh	3a0782d	2007-01-17 12:53:31 -0800	[diff] [blame]	2190	*/
Mark Fasheh	65ed39d	2007-08-28 17:13:23 -0700	[diff] [blame]	2191	ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
				2192	if (ret == 1) {
				2193	*direct_io = 0;
				2194	ret = 0;
				2195	} else if (ret < 0)
				2196	mlog_errno(ret);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2197	break;
				2198	}
				2199
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2200	if (appending)
				2201	*ppos = saved_pos;
				2202
				2203	out_unlock:
Tao Ma	293b2f7	2009-08-25 08:02:48 +0800	[diff] [blame]	2204	if (meta_level >= 0)
				2205	ocfs2_inode_unlock(inode, meta_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2206
				2207	out:
				2208	return ret;
				2209	}
				2210
				2211	static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
				2212	const struct iovec *iov,
				2213	unsigned long nr_segs,
				2214	loff_t pos)
				2215	{
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2216	int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2217	int can_do_direct, has_refcount = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2218	ssize_t written = 0;
				2219	size_t ocount; /* original count */
				2220	size_t count; /* after file limit checks */
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2221	loff_t old_size, *ppos = &iocb->ki_pos;
				2222	u32 old_clusters;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2223	struct file *file = iocb->ki_filp;
				2224	struct inode *inode = file->f_path.dentry->d_inode;
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2225	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2226
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2227	mlog_entry("(0x%p, %u, '%.*s')\n", file,
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2228	(unsigned int)nr_segs,
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2229	file->f_path.dentry->d_name.len,
				2230	file->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2231
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2232	if (iocb->ki_left == 0)
				2233	return 0;
				2234
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2235	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
				2236
				2237	appending = file->f_flags & O_APPEND ? 1 : 0;
				2238	direct_io = file->f_flags & O_DIRECT ? 1 : 0;
				2239
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2240	mutex_lock(&inode->i_mutex);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2241
				2242	relock:
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2243	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2244	if (direct_io) {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2245	down_read(&inode->i_alloc_sem);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2246	have_alloc_sem = 1;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2247	}
				2248
				2249	/* concurrent O_DIRECT writes are allowed */
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2250	rw_level = !direct_io;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2251	ret = ocfs2_rw_lock(inode, rw_level);
				2252	if (ret < 0) {
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2253	mlog_errno(ret);
				2254	goto out_sems;
				2255	}
				2256
				2257	can_do_direct = direct_io;
				2258	ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
				2259	iocb->ki_left, appending,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2260	&can_do_direct, &has_refcount);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2261	if (ret < 0) {
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2262	mlog_errno(ret);
				2263	goto out;
				2264	}
				2265
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2266	/*
				2267	* We can't complete the direct I/O as requested, fall back to
				2268	* buffered I/O.
				2269	*/
				2270	if (direct_io && !can_do_direct) {
				2271	ocfs2_rw_unlock(inode, rw_level);
				2272	up_read(&inode->i_alloc_sem);
				2273
				2274	have_alloc_sem = 0;
				2275	rw_level = -1;
				2276
				2277	direct_io = 0;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2278	goto relock;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2279	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2280
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2281	/*
				2282	* To later detect whether a journal commit for sync writes is
				2283	* necessary, we sample i_size, and cluster count here.
				2284	*/
				2285	old_size = i_size_read(inode);
				2286	old_clusters = OCFS2_I(inode)->ip_clusters;
				2287
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2288	/* communicate with ocfs2_dio_end_io */
Mark Fasheh	7cdfc3a	2007-04-16 17:28:51 -0700	[diff] [blame]	2289	ocfs2_iocb_set_rw_locked(iocb, rw_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2290
Li Dongyang	6b933c8	2010-04-17 17:49:10 +0800	[diff] [blame]	2291	ret = generic_segment_checks(iov, &nr_segs, &ocount,
				2292	VERIFY_READ);
				2293	if (ret)
				2294	goto out_dio;
				2295
				2296	count = ocount;
				2297	ret = generic_write_checks(file, ppos, &count,
				2298	S_ISBLK(inode->i_mode));
				2299	if (ret)
				2300	goto out_dio;
				2301
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2302	if (direct_io) {
				2303	written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
				2304	ppos, count, ocount);
				2305	if (written < 0) {
Dmitri Monakhov	c435400	2008-10-27 13:01:49 -0700	[diff] [blame]	2306	/*
				2307	* direct write may have instantiated a few
				2308	* blocks outside i_size. Trim these off again.
				2309	* Don't need i_size_read because we hold i_mutex.
npiggin@suse.de	15c6fd9	2010-05-27 01:05:34 +1000	[diff] [blame]	2310	*
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	2311	* XXX(truncate): this looks buggy because ocfs2 did not
npiggin@suse.de	15c6fd9	2010-05-27 01:05:34 +1000	[diff] [blame]	2312	* actually implement ->truncate. Take a look at
				2313	* the new truncate sequence and update this accordingly
Dmitri Monakhov	c435400	2008-10-27 13:01:49 -0700	[diff] [blame]	2314	*/
				2315	if (*ppos + count > inode->i_size)
Christoph Hellwig	2c27c65	2010-06-04 11:30:04 +0200	[diff] [blame]	2316	truncate_setsize(inode, inode->i_size);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2317	ret = written;
				2318	goto out_dio;
				2319	}
				2320	} else {
Li Dongyang	6b933c8	2010-04-17 17:49:10 +0800	[diff] [blame]	2321	current->backing_dev_info = file->f_mapping->backing_dev_info;
				2322	written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
				2323	ppos, count, 0);
				2324	current->backing_dev_info = NULL;
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2325	}
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2326
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2327	out_dio:
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2328	/* buffered aio wouldn't have proper lock coverage today */
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2329	BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2330
Tao Ma	60c4867	2010-02-03 09:56:04 +0800	[diff] [blame]	2331	if (((file->f_flags & O_DSYNC) && !direct_io) \|\| IS_SYNC(inode) \|\|
				2332	((file->f_flags & O_DIRECT) && has_refcount)) {
Jan Kara	918941a	2009-08-17 18:50:08 +0200	[diff] [blame]	2333	ret = filemap_fdatawrite_range(file->f_mapping, pos,
				2334	pos + count - 1);
				2335	if (ret < 0)
				2336	written = ret;
				2337
Coly Li	a03ab78	2010-03-26 05:15:12 +0800	[diff] [blame]	2338	if (!ret && ((old_size != i_size_read(inode)) \|\|
				2339	(old_clusters != OCFS2_I(inode)->ip_clusters) \|\|
				2340	has_refcount)) {
Joel Becker	2b4e30f	2008-09-03 20:03:41 -0700	[diff] [blame]	2341	ret = jbd2_journal_force_commit(osb->journal->j_journal);
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2342	if (ret < 0)
				2343	written = ret;
				2344	}
Jan Kara	918941a	2009-08-17 18:50:08 +0200	[diff] [blame]	2345
				2346	if (!ret)
				2347	ret = filemap_fdatawait_range(file->f_mapping, pos,
				2348	pos + count - 1);
Mark Fasheh	9ea2d32	2007-10-18 14:14:45 -0700	[diff] [blame]	2349	}
				2350
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2351	/*
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2352	* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
				2353	* function pointer which is called when o_direct io completes so that
				2354	* it can unlock our rw lock. (it's the clustered equivalent of
				2355	* i_alloc_sem; protects truncate from racing with pending ios).
				2356	* Unfortunately there are error cases which call end_io and others
				2357	* that don't. so we don't have to unlock the rw_lock if either an
				2358	* async dio is going to do it in the future or an end_io after an
				2359	* error has already done it.
				2360	*/
Coly Li	66b116c	2010-02-25 14:57:13 +0800	[diff] [blame]	2361	if ((ret == -EIOCBQUEUED) \|\| (!ocfs2_iocb_is_rw_locked(iocb))) {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2362	rw_level = -1;
				2363	have_alloc_sem = 0;
				2364	}
				2365
				2366	out:
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2367	if (rw_level != -1)
				2368	ocfs2_rw_unlock(inode, rw_level);
				2369
				2370	out_sems:
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2371	if (have_alloc_sem)
				2372	up_read(&inode->i_alloc_sem);
Mark Fasheh	9517bac	2007-02-09 20:24:12 -0800	[diff] [blame]	2373
Jes Sorensen	1b1dcc1	2006-01-09 15:59:24 -0800	[diff] [blame]	2374	mutex_unlock(&inode->i_mutex);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2375
Wengang Wang	812e7a6	2009-07-10 13:26:04 +0800	[diff] [blame]	2376	if (written)
				2377	ret = written;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2378	mlog_exit(ret);
Wengang Wang	812e7a6	2009-07-10 13:26:04 +0800	[diff] [blame]	2379	return ret;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2380	}
				2381
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2382	static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
				2383	struct file *out,
				2384	struct splice_desc *sd)
				2385	{
				2386	int ret;
				2387
				2388	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
Tao Ma	86470e9	2009-12-03 21:55:05 +0800	[diff] [blame]	2389	sd->total_len, 0, NULL, NULL);
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2390	if (ret < 0) {
				2391	mlog_errno(ret);
				2392	return ret;
				2393	}
				2394
				2395	return splice_from_pipe_feed(pipe, sd, pipe_to_file);
				2396	}
				2397
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2398	static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
				2399	struct file *out,
				2400	loff_t *ppos,
				2401	size_t len,
				2402	unsigned int flags)
				2403	{
				2404	int ret;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2405	struct address_space *mapping = out->f_mapping;
				2406	struct inode *inode = mapping->host;
				2407	struct splice_desc sd = {
				2408	.total_len = len,
				2409	.flags = flags,
				2410	.pos = *ppos,
				2411	.u.file = out,
				2412	};
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2413
				2414	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
				2415	(unsigned int)len,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2416	out->f_path.dentry->d_name.len,
				2417	out->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2418
Miklos Szeredi	7bfac9e	2009-04-06 17:41:00 +0200	[diff] [blame]	2419	if (pipe->inode)
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2420	mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
				2421
				2422	splice_from_pipe_begin(&sd);
				2423	do {
				2424	ret = splice_from_pipe_next(pipe, &sd);
				2425	if (ret <= 0)
				2426	break;
				2427
				2428	mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
				2429	ret = ocfs2_rw_lock(inode, 1);
				2430	if (ret < 0)
				2431	mlog_errno(ret);
				2432	else {
				2433	ret = ocfs2_splice_to_file(pipe, out, &sd);
				2434	ocfs2_rw_unlock(inode, 1);
				2435	}
				2436	mutex_unlock(&inode->i_mutex);
				2437	} while (ret > 0);
				2438	splice_from_pipe_end(pipe, &sd);
				2439
Miklos Szeredi	7bfac9e	2009-04-06 17:41:00 +0200	[diff] [blame]	2440	if (pipe->inode)
				2441	mutex_unlock(&pipe->inode->i_mutex);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2442
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2443	if (sd.num_spliced)
				2444	ret = sd.num_spliced;
				2445
				2446	if (ret > 0) {
				2447	unsigned long nr_pages;
Jan Kara	d23c937	2009-08-18 18:24:31 +0200	[diff] [blame]	2448	int err;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2449
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2450	nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
				2451
Jan Kara	d23c937	2009-08-18 18:24:31 +0200	[diff] [blame]	2452	err = generic_write_sync(out, *ppos, ret);
				2453	if (err)
				2454	ret = err;
				2455	else
				2456	*ppos += ret;
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2457
Miklos Szeredi	328eaab	2009-04-14 19:48:39 +0200	[diff] [blame]	2458	balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
				2459	}
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2460
				2461	mlog_exit(ret);
				2462	return ret;
				2463	}
				2464
				2465	static ssize_t ocfs2_file_splice_read(struct file *in,
				2466	loff_t *ppos,
				2467	struct pipe_inode_info *pipe,
				2468	size_t len,
				2469	unsigned int flags)
				2470	{
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2471	int ret = 0, lock_level = 0;
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2472	struct inode *inode = in->f_path.dentry->d_inode;
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2473
				2474	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe,
				2475	(unsigned int)len,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2476	in->f_path.dentry->d_name.len,
				2477	in->f_path.dentry->d_name.name);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2478
				2479	/*
				2480	* See the comment in ocfs2_file_aio_read()
				2481	*/
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2482	ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2483	if (ret < 0) {
				2484	mlog_errno(ret);
				2485	goto bail;
				2486	}
Tao Ma	1962f39	2009-06-19 15:36:52 +0800	[diff] [blame]	2487	ocfs2_inode_unlock(inode, lock_level);
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2488
				2489	ret = generic_file_splice_read(in, ppos, pipe, len, flags);
				2490
				2491	bail:
				2492	mlog_exit(ret);
				2493	return ret;
				2494	}
				2495
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2496	static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2497	const struct iovec *iov,
				2498	unsigned long nr_segs,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2499	loff_t pos)
				2500	{
Tiger Yang	25899de	2006-11-15 15:49:02 +0800	[diff] [blame]	2501	int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2502	struct file *filp = iocb->ki_filp;
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2503	struct inode *inode = filp->f_path.dentry->d_inode;
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2504
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2505	mlog_entry("(0x%p, %u, '%.*s')\n", filp,
				2506	(unsigned int)nr_segs,
Josef Sipek	d28c917	2006-12-08 02:37:25 -0800	[diff] [blame]	2507	filp->f_path.dentry->d_name.len,
				2508	filp->f_path.dentry->d_name.name);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2509
				2510	if (!inode) {
				2511	ret = -EINVAL;
				2512	mlog_errno(ret);
				2513	goto bail;
				2514	}
				2515
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2516	/*
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2517	* buffered reads protect themselves in ->readpage(). O_DIRECT reads
				2518	* need locks to protect pending reads from racing with truncate.
				2519	*/
				2520	if (filp->f_flags & O_DIRECT) {
				2521	down_read(&inode->i_alloc_sem);
				2522	have_alloc_sem = 1;
				2523
				2524	ret = ocfs2_rw_lock(inode, 0);
				2525	if (ret < 0) {
				2526	mlog_errno(ret);
				2527	goto bail;
				2528	}
				2529	rw_level = 0;
				2530	/* communicate with ocfs2_dio_end_io */
Mark Fasheh	7cdfc3a	2007-04-16 17:28:51 -0700	[diff] [blame]	2531	ocfs2_iocb_set_rw_locked(iocb, rw_level);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2532	}
				2533
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2534	/*
				2535	* We're fine letting folks race truncates and extending
				2536	* writes with read across the cluster, just like they can
				2537	* locally. Hence no rw_lock during read.
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2538	*
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2539	* Take and drop the meta data lock to update inode fields
				2540	* like i_size. This allows the checks down below
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2541	* generic_file_aio_read() a chance of actually working.
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2542	*/
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2543	ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2544	if (ret < 0) {
				2545	mlog_errno(ret);
				2546	goto bail;
				2547	}
Mark Fasheh	e63aecb6	2007-10-18 15:30:42 -0700	[diff] [blame]	2548	ocfs2_inode_unlock(inode, lock_level);
Mark Fasheh	c4374f8	2006-05-05 19:04:35 -0700	[diff] [blame]	2549
Badari Pulavarty	027445c	2006-09-30 23:28:46 -0700	[diff] [blame]	2550	ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos);
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2551	if (ret == -EINVAL)
Sunil Mushran	56753bd	2008-06-09 11:24:41 -0700	[diff] [blame]	2552	mlog(0, "generic_file_aio_read returned -EINVAL\n");
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2553
				2554	/* buffered aio wouldn't have proper lock coverage today */
				2555	BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT));
				2556
				2557	/* see ocfs2_file_aio_write */
				2558	if (ret == -EIOCBQUEUED \|\| !ocfs2_iocb_is_rw_locked(iocb)) {
				2559	rw_level = -1;
				2560	have_alloc_sem = 0;
				2561	}
				2562
				2563	bail:
				2564	if (have_alloc_sem)
				2565	up_read(&inode->i_alloc_sem);
Sunil Mushran	2bd6321	2010-01-25 16:57:38 -0800	[diff] [blame]	2566	if (rw_level != -1)
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2567	ocfs2_rw_unlock(inode, rw_level);
				2568	mlog_exit(ret);
				2569
				2570	return ret;
				2571	}
				2572
Arjan van de Ven	92e1d5b	2007-02-12 00:55:39 -0800	[diff] [blame]	2573	const struct inode_operations ocfs2_file_iops = {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2574	.setattr = ocfs2_setattr,
				2575	.getattr = ocfs2_getattr,
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	2576	.permission = ocfs2_permission,
Tiger Yang	cf1d6c7	2008-08-18 17:11:00 +0800	[diff] [blame]	2577	.setxattr = generic_setxattr,
				2578	.getxattr = generic_getxattr,
				2579	.listxattr = ocfs2_listxattr,
				2580	.removexattr = generic_removexattr,
Mark Fasheh	385820a	2007-07-19 00:14:38 -0700	[diff] [blame]	2581	.fallocate = ocfs2_fallocate,
Mark Fasheh	00dc417	2008-10-03 17:32:11 -0400	[diff] [blame]	2582	.fiemap = ocfs2_fiemap,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2583	};
				2584
Arjan van de Ven	92e1d5b	2007-02-12 00:55:39 -0800	[diff] [blame]	2585	const struct inode_operations ocfs2_special_file_iops = {
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2586	.setattr = ocfs2_setattr,
				2587	.getattr = ocfs2_getattr,
Tiger Yang	d38eb8d	2006-11-27 09:59:21 +0800	[diff] [blame]	2588	.permission = ocfs2_permission,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2589	};
				2590
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2591	/*
				2592	* Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
				2593	* ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
				2594	*/
Arjan van de Ven	4b6f5d2	2006-03-28 01:56:42 -0800	[diff] [blame]	2595	const struct file_operations ocfs2_fops = {
Jan Kara	32c3c0e	2007-12-19 15:24:52 +0100	[diff] [blame]	2596	.llseek = generic_file_llseek,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2597	.read = do_sync_read,
				2598	.write = do_sync_write,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2599	.mmap = ocfs2_mmap,
				2600	.fsync = ocfs2_sync_file,
				2601	.release = ocfs2_file_release,
				2602	.open = ocfs2_file_open,
				2603	.aio_read = ocfs2_file_aio_read,
				2604	.aio_write = ocfs2_file_aio_write,
Andi Kleen	c9ec148	2008-01-27 03:17:17 +0100	[diff] [blame]	2605	.unlocked_ioctl = ocfs2_ioctl,
Mark Fasheh	586d232	2007-03-09 15:56:28 -0800	[diff] [blame]	2606	#ifdef CONFIG_COMPAT
				2607	.compat_ioctl = ocfs2_compat_ioctl,
				2608	#endif
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2609	.lock = ocfs2_lock,
				2610	.flock = ocfs2_flock,
				2611	.splice_read = ocfs2_file_splice_read,
				2612	.splice_write = ocfs2_file_splice_write,
				2613	};
				2614
				2615	const struct file_operations ocfs2_dops = {
				2616	.llseek = generic_file_llseek,
				2617	.read = generic_read_dir,
				2618	.readdir = ocfs2_readdir,
				2619	.fsync = ocfs2_sync_file,
				2620	.release = ocfs2_dir_release,
				2621	.open = ocfs2_dir_open,
				2622	.unlocked_ioctl = ocfs2_ioctl,
				2623	#ifdef CONFIG_COMPAT
				2624	.compat_ioctl = ocfs2_compat_ioctl,
				2625	#endif
				2626	.lock = ocfs2_lock,
				2627	.flock = ocfs2_flock,
				2628	};
				2629
				2630	/*
				2631	* POSIX-lockless variants of our file_operations.
				2632	*
				2633	* These will be used if the underlying cluster stack does not support
				2634	* posix file locking, if the user passes the "localflocks" mount
				2635	* option, or if we have a local-only fs.
				2636	*
				2637	* ocfs2_flock is in here because all stacks handle UNIX file locks,
				2638	* so we still want it in the case of no stack support for
				2639	* plocks. Internally, it will do the right thing when asked to ignore
				2640	* the cluster.
				2641	*/
				2642	const struct file_operations ocfs2_fops_no_plocks = {
				2643	.llseek = generic_file_llseek,
				2644	.read = do_sync_read,
				2645	.write = do_sync_write,
				2646	.mmap = ocfs2_mmap,
				2647	.fsync = ocfs2_sync_file,
				2648	.release = ocfs2_file_release,
				2649	.open = ocfs2_file_open,
				2650	.aio_read = ocfs2_file_aio_read,
				2651	.aio_write = ocfs2_file_aio_write,
				2652	.unlocked_ioctl = ocfs2_ioctl,
				2653	#ifdef CONFIG_COMPAT
				2654	.compat_ioctl = ocfs2_compat_ioctl,
				2655	#endif
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2656	.flock = ocfs2_flock,
Tiger Yang	8659ac2	2006-10-17 18:29:52 -0700	[diff] [blame]	2657	.splice_read = ocfs2_file_splice_read,
				2658	.splice_write = ocfs2_file_splice_write,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2659	};
				2660
Mark Fasheh	53da493	2008-07-21 14:29:16 -0700	[diff] [blame]	2661	const struct file_operations ocfs2_dops_no_plocks = {
Jan Kara	32c3c0e	2007-12-19 15:24:52 +0100	[diff] [blame]	2662	.llseek = generic_file_llseek,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2663	.read = generic_read_dir,
				2664	.readdir = ocfs2_readdir,
				2665	.fsync = ocfs2_sync_file,
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2666	.release = ocfs2_dir_release,
				2667	.open = ocfs2_dir_open,
Andi Kleen	c9ec148	2008-01-27 03:17:17 +0100	[diff] [blame]	2668	.unlocked_ioctl = ocfs2_ioctl,
Mark Fasheh	586d232	2007-03-09 15:56:28 -0800	[diff] [blame]	2669	#ifdef CONFIG_COMPAT
				2670	.compat_ioctl = ocfs2_compat_ioctl,
				2671	#endif
Mark Fasheh	53fc622	2007-12-20 16:49:04 -0800	[diff] [blame]	2672	.flock = ocfs2_flock,
Mark Fasheh	ccd979b	2005-12-15 14:31:24 -0800	[diff] [blame]	2673	};