Blame - fs/reiserfs/file.c - kernel/msm-4.19

blob: f6860e83521d2e66880c05ce3d59841f560b7914 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
				3	*/
				4
				5
				6	#include <linux/time.h>
				7	#include <linux/reiserfs_fs.h>
				8	#include <linux/reiserfs_acl.h>
				9	#include <linux/reiserfs_xattr.h>
				10	#include <linux/smp_lock.h>
				11	#include <asm/uaccess.h>
				12	#include <linux/pagemap.h>
				13	#include <linux/swap.h>
				14	#include <linux/writeback.h>
				15	#include <linux/blkdev.h>
				16	#include <linux/buffer_head.h>
				17	#include <linux/quotaops.h>
				18
				19	/*
				20	** We pack the tails of files on file close, not at the time they are written.
				21	** This implies an unnecessary copy of the tail and an unnecessary indirect item
				22	** insertion/balancing, for files that are written in one write.
				23	** It avoids unnecessary tail packings (balances) for files that are written in
				24	** multiple writes and are small enough to have tails.
				25	**
				26	** file_release is called by the VFS layer when the file is closed. If
				27	** this is the last open file descriptor, and the file
				28	** small enough to have a tail, and the tail is currently in an
				29	** unformatted node, the tail is converted back into a direct item.
				30	**
				31	** We use reiserfs_truncate_file to pack the tail, since it already has
				32	** all the conditions coded.
				33	*/
				34	static int reiserfs_file_release (struct inode * inode, struct file * filp)
				35	{
				36
				37	struct reiserfs_transaction_handle th ;
				38	int err;
				39	int jbegin_failure = 0;
				40
				41	if (!S_ISREG (inode->i_mode))
				42	BUG ();
				43
				44	/* fast out for when nothing needs to be done */
				45	if ((atomic_read(&inode->i_count) > 1 \|\|
				46	!(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) \|\|
				47	!tail_has_to_be_packed(inode)) &&
				48	REISERFS_I(inode)->i_prealloc_count <= 0) {
				49	return 0;
				50	}
				51
				52	reiserfs_write_lock(inode->i_sb);
				53	down (&inode->i_sem);
				54	/* freeing preallocation only involves relogging blocks that
				55	* are already in the current transaction. preallocation gets
				56	* freed at the end of each transaction, so it is impossible for
				57	* us to log any additional blocks (including quota blocks)
				58	*/
				59	err = journal_begin(&th, inode->i_sb, 1);
				60	if (err) {
				61	/* uh oh, we can't allow the inode to go away while there
				62	* is still preallocation blocks pending. Try to join the
				63	* aborted transaction
				64	*/
				65	jbegin_failure = err;
				66	err = journal_join_abort(&th, inode->i_sb, 1);
				67
				68	if (err) {
				69	/* hmpf, our choices here aren't good. We can pin the inode
				70	* which will disallow unmount from every happening, we can
				71	* do nothing, which will corrupt random memory on unmount,
				72	* or we can forcibly remove the file from the preallocation
				73	* list, which will leak blocks on disk. Lets pin the inode
				74	* and let the admin know what is going on.
				75	*/
				76	igrab(inode);
				77	reiserfs_warning(inode->i_sb, "pinning inode %lu because the "
				78	"preallocation can't be freed");
				79	goto out;
				80	}
				81	}
				82	reiserfs_update_inode_transaction(inode) ;
				83
				84	#ifdef REISERFS_PREALLOCATE
				85	reiserfs_discard_prealloc (&th, inode);
				86	#endif
				87	err = journal_end(&th, inode->i_sb, 1);
				88
				89	/* copy back the error code from journal_begin */
				90	if (!err)
				91	err = jbegin_failure;
				92
				93	if (!err && atomic_read(&inode->i_count) <= 1 &&
				94	(REISERFS_I(inode)->i_flags & i_pack_on_close_mask) &&
				95	tail_has_to_be_packed (inode)) {
				96	/* if regular file is released by last holder and it has been
				97	appended (we append by unformatted node only) or its direct
				98	item(s) had to be converted, then it may have to be
				99	indirect2direct converted */
				100	err = reiserfs_truncate_file(inode, 0) ;
				101	}
				102	out:
				103	up (&inode->i_sem);
				104	reiserfs_write_unlock(inode->i_sb);
				105	return err;
				106	}
				107
				108	static void reiserfs_vfs_truncate_file(struct inode *inode) {
				109	reiserfs_truncate_file(inode, 1) ;
				110	}
				111
				112	/* Sync a reiserfs file. */
				113
				114	/*
				115	* FIXME: sync_mapping_buffers() never has anything to sync. Can
				116	* be removed...
				117	*/
				118
				119	static int reiserfs_sync_file(
				120	struct file * p_s_filp,
				121	struct dentry * p_s_dentry,
				122	int datasync
				123	) {
				124	struct inode * p_s_inode = p_s_dentry->d_inode;
				125	int n_err;
				126	int barrier_done;
				127
				128	if (!S_ISREG(p_s_inode->i_mode))
				129	BUG ();
				130	n_err = sync_mapping_buffers(p_s_inode->i_mapping) ;
				131	reiserfs_write_lock(p_s_inode->i_sb);
				132	barrier_done = reiserfs_commit_for_inode(p_s_inode);
				133	reiserfs_write_unlock(p_s_inode->i_sb);
				134	if (barrier_done != 1)
				135	blkdev_issue_flush(p_s_inode->i_sb->s_bdev, NULL);
				136	if (barrier_done < 0)
				137	return barrier_done;
				138	return ( n_err < 0 ) ? -EIO : 0;
				139	}
				140
				141	/* I really do not want to play with memory shortage right now, so
				142	to simplify the code, we are not going to write more than this much pages at
				143	a time. This still should considerably improve performance compared to 4k
				144	at a time case. This is 32 pages of 4k size. */
				145	#define REISERFS_WRITE_PAGES_AT_A_TIME (128 * 1024) / PAGE_CACHE_SIZE
				146
				147	/* Allocates blocks for a file to fulfil write request.
				148	Maps all unmapped but prepared pages from the list.
				149	Updates metadata with newly allocated blocknumbers as needed */
				150	static int reiserfs_allocate_blocks_for_region(
				151	struct reiserfs_transaction_handle *th,
				152	struct inode inode, / Inode we work with */
				153	loff_t pos, /* Writing position */
				154	int num_pages, /* number of pages write going
				155	to touch */
				156	int write_bytes, /* amount of bytes to write */
				157	struct page *prepared_pages, / array of
				158	prepared pages
				159	*/
				160	int blocks_to_allocate /* Amount of blocks we
				161	need to allocate to
				162	fit the data into file
				163	*/
				164	)
				165	{
				166	struct cpu_key key; // cpu key of item that we are going to deal with
				167	struct item_head *ih; // pointer to item head that we are going to deal with
				168	struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
				169	__u32 * item; // pointer to item we are going to deal with
				170	INITIALIZE_PATH(path); // path to item, that we are going to deal with.
				171	b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
				172	reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
				173	size_t res; // return value of various functions that we call.
				174	int curr_block; // current block used to keep track of unmapped blocks.
				175	int i; // loop counter
				176	int itempos; // position in item
				177	unsigned int from = (pos & (PAGE_CACHE_SIZE - 1)); // writing position in
				178	// first page
				179	unsigned int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1; /* last modified byte offset in last page */
				180	__u64 hole_size ; // amount of blocks for a file hole, if it needed to be created.
				181	int modifying_this_item = 0; // Flag for items traversal code to keep track
				182	// of the fact that we already prepared
				183	// current block for journal
				184	int will_prealloc = 0;
				185	RFALSE(!blocks_to_allocate, "green-9004: tried to allocate zero blocks?");
				186
				187	/* only preallocate if this is a small write */
				188	if (REISERFS_I(inode)->i_prealloc_count \|\|
				189	(!(write_bytes & (inode->i_sb->s_blocksize -1)) &&
				190	blocks_to_allocate <
				191	REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize))
				192	will_prealloc = REISERFS_SB(inode->i_sb)->s_alloc_options.preallocsize;
				193
				194	allocated_blocks = kmalloc((blocks_to_allocate + will_prealloc) *
				195	sizeof(b_blocknr_t), GFP_NOFS);
				196
				197	/* First we compose a key to point at the writing position, we want to do
				198	that outside of any locking region. */
				199	make_cpu_key (&key, inode, pos+1, TYPE_ANY, 3/key length/);
				200
				201	/* If we came here, it means we absolutely need to open a transaction,
				202	since we need to allocate some blocks */
				203	reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
				204	res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
				205	if (res)
				206	goto error_exit;
				207	reiserfs_update_inode_transaction(inode) ;
				208
				209	/* Look for the in-tree position of our write, need path for block allocator */
				210	res = search_for_position_by_key(inode->i_sb, &key, &path);
				211	if ( res == IO_ERROR ) {
				212	res = -EIO;
				213	goto error_exit;
				214	}
				215
				216	/* Allocate blocks */
				217	/* First fill in "hint" structure for block allocator */
				218	hint.th = th; // transaction handle.
				219	hint.path = &path; // Path, so that block allocator can determine packing locality or whatever it needs to determine.
				220	hint.inode = inode; // Inode is needed by block allocator too.
				221	hint.search_start = 0; // We have no hint on where to search free blocks for block allocator.
				222	hint.key = key.on_disk_key; // on disk key of file.
				223	hint.block = inode->i_blocks>>(inode->i_sb->s_blocksize_bits-9); // Number of disk blocks this file occupies already.
				224	hint.formatted_node = 0; // We are allocating blocks for unformatted node.
				225	hint.preallocate = will_prealloc;
				226
				227	/* Call block allocator to allocate blocks */
				228	res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
				229	if ( res != CARRY_ON ) {
				230	if ( res == NO_DISK_SPACE ) {
				231	/* We flush the transaction in case of no space. This way some
				232	blocks might become free */
				233	SB_JOURNAL(inode->i_sb)->j_must_wait = 1;
				234	res = restart_transaction(th, inode, &path);
				235	if (res)
				236	goto error_exit;
				237
				238	/* We might have scheduled, so search again */
				239	res = search_for_position_by_key(inode->i_sb, &key, &path);
				240	if ( res == IO_ERROR ) {
				241	res = -EIO;
				242	goto error_exit;
				243	}
				244
				245	/* update changed info for hint structure. */
				246	res = reiserfs_allocate_blocknrs(&hint, allocated_blocks, blocks_to_allocate, blocks_to_allocate);
				247	if ( res != CARRY_ON ) {
				248	res = -ENOSPC;
				249	pathrelse(&path);
				250	goto error_exit;
				251	}
				252	} else {
				253	res = -ENOSPC;
				254	pathrelse(&path);
				255	goto error_exit;
				256	}
				257	}
				258
				259	#ifdef __BIG_ENDIAN
				260	// Too bad, I have not found any way to convert a given region from
				261	// cpu format to little endian format
				262	{
				263	int i;
				264	for ( i = 0; i < blocks_to_allocate ; i++)
				265	allocated_blocks[i]=cpu_to_le32(allocated_blocks[i]);
				266	}
				267	#endif
				268
				269	/* Blocks allocating well might have scheduled and tree might have changed,
				270	let's search the tree again */
				271	/* find where in the tree our write should go */
				272	res = search_for_position_by_key(inode->i_sb, &key, &path);
				273	if ( res == IO_ERROR ) {
				274	res = -EIO;
				275	goto error_exit_free_blocks;
				276	}
				277
				278	bh = get_last_bh( &path ); // Get a bufferhead for last element in path.
				279	ih = get_ih( &path ); // Get a pointer to last item head in path.
				280	item = get_item( &path ); // Get a pointer to last item in path
				281
				282	/* Let's see what we have found */
				283	if ( res != POSITION_FOUND ) { /* position not found, this means that we
				284	might need to append file with holes
				285	first */
				286	// Since we are writing past the file's end, we need to find out if
				287	// there is a hole that needs to be inserted before our writing
				288	// position, and how many blocks it is going to cover (we need to
				289	// populate pointers to file blocks representing the hole with zeros)
				290
				291	{
				292	int item_offset = 1;
				293	/*
				294	* if ih is stat data, its offset is 0 and we don't want to
				295	* add 1 to pos in the hole_size calculation
				296	*/
				297	if (is_statdata_le_ih(ih))
				298	item_offset = 0;
				299	hole_size = (pos + item_offset -
				300	(le_key_k_offset( get_inode_item_key_version(inode),
				301	&(ih->ih_key)) +
				302	op_bytes_number(ih, inode->i_sb->s_blocksize))) >>
				303	inode->i_sb->s_blocksize_bits;
				304	}
				305
				306	if ( hole_size > 0 ) {
				307	int to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE ); // How much data to insert first time.
				308	/* area filled with zeroes, to supply as list of zero blocknumbers
				309	We allocate it outside of loop just in case loop would spin for
				310	several iterations. */
				311	char zeros = kmalloc(to_pasteUNFM_P_SIZE, GFP_ATOMIC); // We cannot insert more than MAX_ITEM_LEN bytes anyway.
				312	if ( !zeros ) {
				313	res = -ENOMEM;
				314	goto error_exit_free_blocks;
				315	}
				316	memset ( zeros, 0, to_paste*UNFM_P_SIZE);
				317	do {
				318	to_paste = min_t(__u64, hole_size, MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE );
				319	if ( is_indirect_le_ih(ih) ) {
				320	/* Ok, there is existing indirect item already. Need to append it */
				321	/* Calculate position past inserted item */
				322	make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3);
				323	res = reiserfs_paste_into_item( th, &path, &key, inode, (char )zeros, UNFM_P_SIZEto_paste);
				324	if ( res ) {
				325	kfree(zeros);
				326	goto error_exit_free_blocks;
				327	}
				328	} else if ( is_statdata_le_ih(ih) ) {
				329	/* No existing item, create it */
				330	/* item head for new item */
				331	struct item_head ins_ih;
				332
				333	/* create a key for our new item */
				334	make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3);
				335
				336	/* Create new item head for our new item */
				337	make_le_item_head (&ins_ih, &key, key.version, 1,
				338	TYPE_INDIRECT, to_paste*UNFM_P_SIZE,
				339	0 /* free space */);
				340
				341	/* Find where such item should live in the tree */
				342	res = search_item (inode->i_sb, &key, &path);
				343	if ( res != ITEM_NOT_FOUND ) {
				344	/* item should not exist, otherwise we have error */
				345	if ( res != -ENOSPC ) {
				346	reiserfs_warning (inode->i_sb,
				347	"green-9008: search_by_key (%K) returned %d",
				348	&key, res);
				349	}
				350	res = -EIO;
				351	kfree(zeros);
				352	goto error_exit_free_blocks;
				353	}
				354	res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)zeros);
				355	} else {
				356	reiserfs_panic(inode->i_sb, "green-9011: Unexpected key type %K\n", &key);
				357	}
				358	if ( res ) {
				359	kfree(zeros);
				360	goto error_exit_free_blocks;
				361	}
				362	/* Now we want to check if transaction is too full, and if it is
				363	we restart it. This will also free the path. */
				364	if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
				365	res = restart_transaction(th, inode, &path);
				366	if (res) {
				367	pathrelse (&path);
				368	kfree(zeros);
				369	goto error_exit;
				370	}
				371	}
				372
				373	/* Well, need to recalculate path and stuff */
				374	set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + (to_paste << inode->i_blkbits));
				375	res = search_for_position_by_key(inode->i_sb, &key, &path);
				376	if ( res == IO_ERROR ) {
				377	res = -EIO;
				378	kfree(zeros);
				379	goto error_exit_free_blocks;
				380	}
				381	bh=get_last_bh(&path);
				382	ih=get_ih(&path);
				383	item = get_item(&path);
				384	hole_size -= to_paste;
				385	} while ( hole_size );
				386	kfree(zeros);
				387	}
				388	}
				389
				390	// Go through existing indirect items first
				391	// replace all zeroes with blocknumbers from list
				392	// Note that if no corresponding item was found, by previous search,
				393	// it means there are no existing in-tree representation for file area
				394	// we are going to overwrite, so there is nothing to scan through for holes.
				395	for ( curr_block = 0, itempos = path.pos_in_item ; curr_block < blocks_to_allocate && res == POSITION_FOUND ; ) {
				396	retry:
				397
				398	if ( itempos >= ih_item_len(ih)/UNFM_P_SIZE ) {
				399	/* We run out of data in this indirect item, let's look for another
				400	one. */
				401	/* First if we are already modifying current item, log it */
				402	if ( modifying_this_item ) {
				403	journal_mark_dirty (th, inode->i_sb, bh);
				404	modifying_this_item = 0;
				405	}
				406	/* Then set the key to look for a new indirect item (offset of old
				407	item is added to old item length */
				408	set_cpu_key_k_offset( &key, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize));
				409	/* Search ofor position of new key in the tree. */
				410	res = search_for_position_by_key(inode->i_sb, &key, &path);
				411	if ( res == IO_ERROR) {
				412	res = -EIO;
				413	goto error_exit_free_blocks;
				414	}
				415	bh=get_last_bh(&path);
				416	ih=get_ih(&path);
				417	item = get_item(&path);
				418	itempos = path.pos_in_item;
				419	continue; // loop to check all kinds of conditions and so on.
				420	}
				421	/* Ok, we have correct position in item now, so let's see if it is
				422	representing file hole (blocknumber is zero) and fill it if needed */
				423	if ( !item[itempos] ) {
				424	/* Ok, a hole. Now we need to check if we already prepared this
				425	block to be journaled */
				426	while ( !modifying_this_item ) { // loop until succeed
				427	/* Well, this item is not journaled yet, so we must prepare
				428	it for journal first, before we can change it */
				429	struct item_head tmp_ih; // We copy item head of found item,
				430	// here to detect if fs changed under
				431	// us while we were preparing for
				432	// journal.
				433	int fs_gen; // We store fs generation here to find if someone
				434	// changes fs under our feet
				435
				436	copy_item_head (&tmp_ih, ih); // Remember itemhead
				437	fs_gen = get_generation (inode->i_sb); // remember fs generation
				438	reiserfs_prepare_for_journal(inode->i_sb, bh, 1); // Prepare a buffer within which indirect item is stored for changing.
				439	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
				440	// Sigh, fs was changed under us, we need to look for new
				441	// location of item we are working with
				442
				443	/* unmark prepaerd area as journaled and search for it's
				444	new position */
				445	reiserfs_restore_prepared_buffer(inode->i_sb, bh);
				446	res = search_for_position_by_key(inode->i_sb, &key, &path);
				447	if ( res == IO_ERROR) {
				448	res = -EIO;
				449	goto error_exit_free_blocks;
				450	}
				451	bh=get_last_bh(&path);
				452	ih=get_ih(&path);
				453	item = get_item(&path);
				454	itempos = path.pos_in_item;
				455	goto retry;
				456	}
				457	modifying_this_item = 1;
				458	}
				459	item[itempos] = allocated_blocks[curr_block]; // Assign new block
				460	curr_block++;
				461	}
				462	itempos++;
				463	}
				464
				465	if ( modifying_this_item ) { // We need to log last-accessed block, if it
				466	// was modified, but not logged yet.
				467	journal_mark_dirty (th, inode->i_sb, bh);
				468	}
				469
				470	if ( curr_block < blocks_to_allocate ) {
				471	// Oh, well need to append to indirect item, or to create indirect item
				472	// if there weren't any
				473	if ( is_indirect_le_ih(ih) ) {
				474	// Existing indirect item - append. First calculate key for append
				475	// position. We do not need to recalculate path as it should
				476	// already point to correct place.
				477	make_cpu_key( &key, inode, le_key_k_offset( get_inode_item_key_version(inode), &(ih->ih_key)) + op_bytes_number(ih, inode->i_sb->s_blocksize), TYPE_INDIRECT, 3);
				478	res = reiserfs_paste_into_item( th, &path, &key, inode, (char )(allocated_blocks+curr_block), UNFM_P_SIZE(blocks_to_allocate-curr_block));
				479	if ( res ) {
				480	goto error_exit_free_blocks;
				481	}
				482	} else if (is_statdata_le_ih(ih) ) {
				483	// Last found item was statdata. That means we need to create indirect item.
				484	struct item_head ins_ih; /* itemhead for new item */
				485
				486	/* create a key for our new item */
				487	make_cpu_key( &key, inode, 1, TYPE_INDIRECT, 3); // Position one,
				488	// because that's
				489	// where first
				490	// indirect item
				491	// begins
				492	/* Create new item head for our new item */
				493	make_le_item_head (&ins_ih, &key, key.version, 1, TYPE_INDIRECT,
				494	(blocks_to_allocate-curr_block)*UNFM_P_SIZE,
				495	0 /* free space */);
				496	/* Find where such item should live in the tree */
				497	res = search_item (inode->i_sb, &key, &path);
				498	if ( res != ITEM_NOT_FOUND ) {
				499	/* Well, if we have found such item already, or some error
				500	occured, we need to warn user and return error */
				501	if ( res != -ENOSPC ) {
				502	reiserfs_warning (inode->i_sb,
				503	"green-9009: search_by_key (%K) "
				504	"returned %d", &key, res);
				505	}
				506	res = -EIO;
				507	goto error_exit_free_blocks;
				508	}
				509	/* Insert item into the tree with the data as its body */
				510	res = reiserfs_insert_item( th, &path, &key, &ins_ih, inode, (char *)(allocated_blocks+curr_block));
				511	} else {
				512	reiserfs_panic(inode->i_sb, "green-9010: unexpected item type for key %K\n",&key);
				513	}
				514	}
				515
				516	// the caller is responsible for closing the transaction
				517	// unless we return an error, they are also responsible for logging
				518	// the inode.
				519	//
				520	pathrelse(&path);
				521	/*
				522	* cleanup prellocation from previous writes
				523	* if this is a partial block write
				524	*/
				525	if (write_bytes & (inode->i_sb->s_blocksize -1))
				526	reiserfs_discard_prealloc(th, inode);
				527	reiserfs_write_unlock(inode->i_sb);
				528
				529	// go through all the pages/buffers and map the buffers to newly allocated
				530	// blocks (so that system knows where to write these pages later).
				531	curr_block = 0;
				532	for ( i = 0; i < num_pages ; i++ ) {
				533	struct page *page=prepared_pages[i]; //current page
				534	struct buffer_head *head = page_buffers(page);// first buffer for a page
				535	int block_start, block_end; // in-page offsets for buffers.
				536
				537	if (!page_buffers(page))
				538	reiserfs_panic(inode->i_sb, "green-9005: No buffers for prepared page???");
				539
				540	/* For each buffer in page */
				541	for(bh = head, block_start = 0; bh != head \|\| !block_start;
				542	block_start=block_end, bh = bh->b_this_page) {
				543	if (!bh)
				544	reiserfs_panic(inode->i_sb, "green-9006: Allocated but absent buffer for a page?");
				545	block_end = block_start+inode->i_sb->s_blocksize;
				546	if (i == 0 && block_end <= from )
				547	/* if this buffer is before requested data to map, skip it */
				548	continue;
				549	if (i == num_pages - 1 && block_start >= to)
				550	/* If this buffer is after requested data to map, abort
				551	processing of current page */
				552	break;
				553
				554	if ( !buffer_mapped(bh) ) { // Ok, unmapped buffer, need to map it
				555	map_bh( bh, inode->i_sb, le32_to_cpu(allocated_blocks[curr_block]));
				556	curr_block++;
				557	set_buffer_new(bh);
				558	}
				559	}
				560	}
				561
				562	RFALSE( curr_block > blocks_to_allocate, "green-9007: Used too many blocks? weird");
				563
				564	kfree(allocated_blocks);
				565	return 0;
				566
				567	// Need to deal with transaction here.
				568	error_exit_free_blocks:
				569	pathrelse(&path);
				570	// free blocks
				571	for( i = 0; i < blocks_to_allocate; i++ )
				572	reiserfs_free_block(th, inode, le32_to_cpu(allocated_blocks[i]), 1);
				573
				574	error_exit:
				575	if (th->t_trans_id) {
				576	int err;
				577	// update any changes we made to blk count
				578	reiserfs_update_sd(th, inode);
				579	err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
				580	if (err)
				581	res = err;
				582	}
				583	reiserfs_write_unlock(inode->i_sb);
				584	kfree(allocated_blocks);
				585
				586	return res;
				587	}
				588
				589	/* Unlock pages prepared by reiserfs_prepare_file_region_for_write */
				590	static void reiserfs_unprepare_pages(struct page *prepared_pages, / list of locked pages */
				591	size_t num_pages /* amount of pages */) {
				592	int i; // loop counter
				593
				594	for (i=0; i < num_pages ; i++) {
				595	struct page *page = prepared_pages[i];
				596
				597	try_to_free_buffers(page);
				598	unlock_page(page);
				599	page_cache_release(page);
				600	}
				601	}
				602
				603	/* This function will copy data from userspace to specified pages within
				604	supplied byte range */
				605	static int reiserfs_copy_from_user_to_file_region(
				606	loff_t pos, /* In-file position */
				607	int num_pages, /* Number of pages affected */
				608	int write_bytes, /* Amount of bytes to write */
				609	struct page *prepared_pages, / pointer to
				610	array to
				611	prepared pages
				612	*/
				613	const char __user buf / Pointer to user-supplied
				614	data*/
				615	)
				616	{
				617	long page_fault=0; // status of copy_from_user.
				618	int i; // loop counter.
				619	int offset; // offset in page
				620
				621	for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
				622	size_t count = min_t(size_t,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
				623	struct page *page=prepared_pages[i]; // Current page we process.
				624
				625	fault_in_pages_readable( buf, count);
				626
				627	/* Copy data from userspace to the current page */
				628	kmap(page);
				629	page_fault = __copy_from_user(page_address(page)+offset, buf, count); // Copy the data.
				630	/* Flush processor's dcache for this page */
				631	flush_dcache_page(page);
				632	kunmap(page);
				633	buf+=count;
				634	write_bytes-=count;
				635
				636	if (page_fault)
				637	break; // Was there a fault? abort.
				638	}
				639
				640	return page_fault?-EFAULT:0;
				641	}
				642
				643	/* taken fs/buffer.c:__block_commit_write */
				644	int reiserfs_commit_page(struct inode inode, struct page page,
				645	unsigned from, unsigned to)
				646	{
				647	unsigned block_start, block_end;
				648	int partial = 0;
				649	unsigned blocksize;
				650	struct buffer_head bh, head;
				651	unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
				652	int new;
				653	int logit = reiserfs_file_data_log(inode);
				654	struct super_block *s = inode->i_sb;
				655	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
				656	struct reiserfs_transaction_handle th;
				657	int ret = 0;
				658
				659	th.t_trans_id = 0;
				660	blocksize = 1 << inode->i_blkbits;
				661
				662	if (logit) {
				663	reiserfs_write_lock(s);
				664	ret = journal_begin(&th, s, bh_per_page + 1);
				665	if (ret)
				666	goto drop_write_lock;
				667	reiserfs_update_inode_transaction(inode);
				668	}
				669	for(bh = head = page_buffers(page), block_start = 0;
				670	bh != head \|\| !block_start;
				671	block_start=block_end, bh = bh->b_this_page)
				672	{
				673
				674	new = buffer_new(bh);
				675	clear_buffer_new(bh);
				676	block_end = block_start + blocksize;
				677	if (block_end <= from \|\| block_start >= to) {
				678	if (!buffer_uptodate(bh))
				679	partial = 1;
				680	} else {
				681	set_buffer_uptodate(bh);
				682	if (logit) {
				683	reiserfs_prepare_for_journal(s, bh, 1);
				684	journal_mark_dirty(&th, s, bh);
				685	} else if (!buffer_dirty(bh)) {
				686	mark_buffer_dirty(bh);
				687	/* do data=ordered on any page past the end
				688	* of file and any buffer marked BH_New.
				689	*/
				690	if (reiserfs_data_ordered(inode->i_sb) &&
				691	(new \|\| page->index >= i_size_index)) {
				692	reiserfs_add_ordered_list(inode, bh);
				693	}
				694	}
				695	}
				696	}
				697	if (logit) {
				698	ret = journal_end(&th, s, bh_per_page + 1);
				699	drop_write_lock:
				700	reiserfs_write_unlock(s);
				701	}
				702	/*
				703	* If this is a partial write which happened to make all buffers
				704	* uptodate then we can optimize away a bogus readpage() for
				705	* the next read(). Here we 'discover' whether the page went
				706	* uptodate as a result of this (potentially partial) write.
				707	*/
				708	if (!partial)
				709	SetPageUptodate(page);
				710	return ret;
				711	}
				712
				713
				714	/* Submit pages for write. This was separated from actual file copying
				715	because we might want to allocate block numbers in-between.
				716	This function assumes that caller will adjust file size to correct value. */
				717	static int reiserfs_submit_file_region_for_write(
				718	struct reiserfs_transaction_handle *th,
				719	struct inode *inode,
				720	loff_t pos, /* Writing position offset */
				721	size_t num_pages, /* Number of pages to write */
				722	size_t write_bytes, /* number of bytes to write */
				723	struct page *prepared_pages / list of pages */
				724	)
				725	{
				726	int status; // return status of block_commit_write.
				727	int retval = 0; // Return value we are going to return.
				728	int i; // loop counter
				729	int offset; // Writing offset in page.
				730	int orig_write_bytes = write_bytes;
				731	int sd_update = 0;
				732
				733	for ( i = 0, offset = (pos & (PAGE_CACHE_SIZE-1)); i < num_pages ; i++,offset=0) {
				734	int count = min_t(int,PAGE_CACHE_SIZE-offset,write_bytes); // How much of bytes to write to this page
				735	struct page *page=prepared_pages[i]; // Current page we process.
				736
				737	status = reiserfs_commit_page(inode, page, offset, offset+count);
				738	if ( status )
				739	retval = status; // To not overcomplicate matters We are going to
				740	// submit all the pages even if there was error.
				741	// we only remember error status to report it on
				742	// exit.
				743	write_bytes-=count;
				744	}
				745	/* now that we've gotten all the ordered buffers marked dirty,
				746	* we can safely update i_size and close any running transaction
				747	*/
				748	if ( pos + orig_write_bytes > inode->i_size) {
				749	inode->i_size = pos + orig_write_bytes; // Set new size
				750	/* If the file have grown so much that tail packing is no
				751	* longer possible, reset "need to pack" flag */
				752	if ( (have_large_tails (inode->i_sb) &&
				753	inode->i_size > i_block_size (inode)*4) \|\|
				754	(have_small_tails (inode->i_sb) &&
				755	inode->i_size > i_block_size(inode)) )
				756	REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
				757	else if ( (have_large_tails (inode->i_sb) &&
				758	inode->i_size < i_block_size (inode)*4) \|\|
				759	(have_small_tails (inode->i_sb) &&
				760	inode->i_size < i_block_size(inode)) )
				761	REISERFS_I(inode)->i_flags \|= i_pack_on_close_mask ;
				762
				763	if (th->t_trans_id) {
				764	reiserfs_write_lock(inode->i_sb);
				765	reiserfs_update_sd(th, inode); // And update on-disk metadata
				766	reiserfs_write_unlock(inode->i_sb);
				767	} else
				768	inode->i_sb->s_op->dirty_inode(inode);
				769
				770	sd_update = 1;
				771	}
				772	if (th->t_trans_id) {
				773	reiserfs_write_lock(inode->i_sb);
				774	if (!sd_update)
				775	reiserfs_update_sd(th, inode);
				776	status = journal_end(th, th->t_super, th->t_blocks_allocated);
				777	if (status)
				778	retval = status;
				779	reiserfs_write_unlock(inode->i_sb);
				780	}
				781	th->t_trans_id = 0;
				782
				783	/*
				784	* we have to unlock the pages after updating i_size, otherwise
				785	* we race with writepage
				786	*/
				787	for ( i = 0; i < num_pages ; i++) {
				788	struct page *page=prepared_pages[i];
				789	unlock_page(page);
				790	mark_page_accessed(page);
				791	page_cache_release(page);
				792	}
				793	return retval;
				794	}
				795
				796	/* Look if passed writing region is going to touch file's tail
				797	(if it is present). And if it is, convert the tail to unformatted node */
				798	static int reiserfs_check_for_tail_and_convert( struct inode inode, / inode to deal with */
				799	loff_t pos, /* Writing position */
				800	int write_bytes /* amount of bytes to write */
				801	)
				802	{
				803	INITIALIZE_PATH(path); // needed for search_for_position
				804	struct cpu_key key; // Key that would represent last touched writing byte.
				805	struct item_head *ih; // item header of found block;
				806	int res; // Return value of various functions we call.
				807	int cont_expand_offset; // We will put offset for generic_cont_expand here
				808	// This can be int just because tails are created
				809	// only for small files.
				810
				811	/* this embodies a dependency on a particular tail policy */
				812	if ( inode->i_size >= inode->i_sb->s_blocksize*4 ) {
				813	/* such a big files do not have tails, so we won't bother ourselves
				814	to look for tails, simply return */
				815	return 0;
				816	}
				817
				818	reiserfs_write_lock(inode->i_sb);
				819	/* find the item containing the last byte to be written, or if
				820	* writing past the end of the file then the last item of the
				821	* file (and then we check its type). */
				822	make_cpu_key (&key, inode, pos+write_bytes+1, TYPE_ANY, 3/key length/);
				823	res = search_for_position_by_key(inode->i_sb, &key, &path);
				824	if ( res == IO_ERROR ) {
				825	reiserfs_write_unlock(inode->i_sb);
				826	return -EIO;
				827	}
				828	ih = get_ih(&path);
				829	res = 0;
				830	if ( is_direct_le_ih(ih) ) {
				831	/* Ok, closest item is file tail (tails are stored in "direct"
				832	* items), so we need to unpack it. */
				833	/* To not overcomplicate matters, we just call generic_cont_expand
				834	which will in turn call other stuff and finally will boil down to
				835	reiserfs_get_block() that would do necessary conversion. */
				836	cont_expand_offset = le_key_k_offset(get_inode_item_key_version(inode), &(ih->ih_key));
				837	pathrelse(&path);
				838	res = generic_cont_expand( inode, cont_expand_offset);
				839	} else
				840	pathrelse(&path);
				841
				842	reiserfs_write_unlock(inode->i_sb);
				843	return res;
				844	}
				845
				846	/* This function locks pages starting from @pos for @inode.
				847	@num_pages pages are locked and stored in
				848	@prepared_pages array. Also buffers are allocated for these pages.
				849	First and last page of the region is read if it is overwritten only
				850	partially. If last page did not exist before write (file hole or file
				851	append), it is zeroed, then.
				852	Returns number of unallocated blocks that should be allocated to cover
				853	new file data.*/
				854	static int reiserfs_prepare_file_region_for_write(
				855	struct inode inode / Inode of the file */,
				856	loff_t pos, /* position in the file */
				857	size_t num_pages, /* number of pages to
				858	prepare */
				859	size_t write_bytes, /* Amount of bytes to be
				860	overwritten from
				861	@pos */
				862	struct page *prepared_pages / pointer to array
				863	where to store
				864	prepared pages */
				865	)
				866	{
				867	int res=0; // Return values of different functions we call.
				868	unsigned long index = pos >> PAGE_CACHE_SHIFT; // Offset in file in pages.
				869	int from = (pos & (PAGE_CACHE_SIZE - 1)); // Writing offset in first page
				870	int to = ((pos + write_bytes - 1) & (PAGE_CACHE_SIZE - 1)) + 1;
				871	/* offset of last modified byte in last
				872	page */
				873	struct address_space *mapping = inode->i_mapping; // Pages are mapped here.
				874	int i; // Simple counter
				875	int blocks = 0; /* Return value (blocks that should be allocated) */
				876	struct buffer_head bh, head; // Current bufferhead and first bufferhead
				877	// of a page.
				878	unsigned block_start, block_end; // Starting and ending offsets of current
				879	// buffer in the page.
				880	struct buffer_head wait[2], *wait_bh=wait; // Buffers for page, if
				881	// Page appeared to be not up
				882	// to date. Note how we have
				883	// at most 2 buffers, this is
				884	// because we at most may
				885	// partially overwrite two
				886	// buffers for one page. One at // the beginning of write area
				887	// and one at the end.
				888	// Everything inthe middle gets // overwritten totally.
				889
				890	struct cpu_key key; // cpu key of item that we are going to deal with
				891	struct item_head *ih = NULL; // pointer to item head that we are going to deal with
				892	struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with
				893	INITIALIZE_PATH(path); // path to item, that we are going to deal with.
				894	__u32 * item=NULL; // pointer to item we are going to deal with
				895	int item_pos=-1; /* Position in indirect item */
				896
				897
				898	if ( num_pages < 1 ) {
				899	reiserfs_warning (inode->i_sb,
				900	"green-9001: reiserfs_prepare_file_region_for_write "
				901	"called with zero number of pages to process");
				902	return -EFAULT;
				903	}
				904
				905	/* We have 2 loops for pages. In first loop we grab and lock the pages, so
				906	that nobody would touch these until we release the pages. Then
				907	we'd start to deal with mapping buffers to blocks. */
				908	for ( i = 0; i < num_pages; i++) {
				909	prepared_pages[i] = grab_cache_page(mapping, index + i); // locks the page
				910	if ( !prepared_pages[i]) {
				911	res = -ENOMEM;
				912	goto failed_page_grabbing;
				913	}
				914	if (!page_has_buffers(prepared_pages[i]))
				915	create_empty_buffers(prepared_pages[i], inode->i_sb->s_blocksize, 0);
				916	}
				917
				918	/* Let's count amount of blocks for a case where all the blocks
				919	overwritten are new (we will substract already allocated blocks later)*/
				920	if ( num_pages > 2 )
				921	/* These are full-overwritten pages so we count all the blocks in
				922	these pages are counted as needed to be allocated */
				923	blocks = (num_pages - 2) << (PAGE_CACHE_SHIFT - inode->i_blkbits);
				924
				925	/* count blocks needed for first page (possibly partially written) */
				926	blocks += ((PAGE_CACHE_SIZE - from) >> inode->i_blkbits) +
				927	!!(from & (inode->i_sb->s_blocksize-1)); /* roundup */
				928
				929	/* Now we account for last page. If last page == first page (we
				930	overwrite only one page), we substract all the blocks past the
				931	last writing position in a page out of already calculated number
				932	of blocks */
				933	blocks += ((num_pages > 1) << (PAGE_CACHE_SHIFT-inode->i_blkbits)) -
				934	((PAGE_CACHE_SIZE - to) >> inode->i_blkbits);
				935	/* Note how we do not roundup here since partial blocks still
				936	should be allocated */
				937
				938	/* Now if all the write area lies past the file end, no point in
				939	maping blocks, since there is none, so we just zero out remaining
				940	parts of first and last pages in write area (if needed) */
				941	if ( (pos & ~((loff_t)PAGE_CACHE_SIZE - 1)) > inode->i_size ) {
				942	if ( from != 0 ) {/* First page needs to be partially zeroed */
				943	char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
				944	memset(kaddr, 0, from);
				945	kunmap_atomic( kaddr, KM_USER0);
				946	}
				947	if ( to != PAGE_CACHE_SIZE ) { /* Last page needs to be partially zeroed */
				948	char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
				949	memset(kaddr+to, 0, PAGE_CACHE_SIZE - to);
				950	kunmap_atomic( kaddr, KM_USER0);
				951	}
				952
				953	/* Since all blocks are new - use already calculated value */
				954	return blocks;
				955	}
				956
				957	/* Well, since we write somewhere into the middle of a file, there is
				958	possibility we are writing over some already allocated blocks, so
				959	let's map these blocks and substract number of such blocks out of blocks
				960	we need to allocate (calculated above) */
				961	/* Mask write position to start on blocksize, we do it out of the
				962	loop for performance reasons */
				963	pos &= ~((loff_t) inode->i_sb->s_blocksize - 1);
				964	/* Set cpu key to the starting position in a file (on left block boundary)*/
				965	make_cpu_key (&key, inode, 1 + ((pos) & ~((loff_t) inode->i_sb->s_blocksize - 1)), TYPE_ANY, 3/key length/);
				966
				967	reiserfs_write_lock(inode->i_sb); // We need that for at least search_by_key()
				968	for ( i = 0; i < num_pages ; i++ ) {
				969
				970	head = page_buffers(prepared_pages[i]);
				971	/* For each buffer in the page */
				972	for(bh = head, block_start = 0; bh != head \|\| !block_start;
				973	block_start=block_end, bh = bh->b_this_page) {
				974	if (!bh)
				975	reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
				976	/* Find where this buffer ends */
				977	block_end = block_start+inode->i_sb->s_blocksize;
				978	if (i == 0 && block_end <= from )
				979	/* if this buffer is before requested data to map, skip it*/
				980	continue;
				981
				982	if (i == num_pages - 1 && block_start >= to) {
				983	/* If this buffer is after requested data to map, abort
				984	processing of current page */
				985	break;
				986	}
				987
				988	if ( buffer_mapped(bh) && bh->b_blocknr !=0 ) {
				989	/* This is optimisation for a case where buffer is mapped
				990	and have blocknumber assigned. In case significant amount
				991	of such buffers are present, we may avoid some amount
				992	of search_by_key calls.
				993	Probably it would be possible to move parts of this code
				994	out of BKL, but I afraid that would overcomplicate code
				995	without any noticeable benefit.
				996	*/
				997	item_pos++;
				998	/* Update the key */
				999	set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
				1000	blocks--; // Decrease the amount of blocks that need to be
				1001	// allocated
				1002	continue; // Go to the next buffer
				1003	}
				1004
				1005	if ( !itembuf \|\| /* if first iteration */
				1006	item_pos >= ih_item_len(ih)/UNFM_P_SIZE)
				1007	{ /* or if we progressed past the
				1008	current unformatted_item */
				1009	/* Try to find next item */
				1010	res = search_for_position_by_key(inode->i_sb, &key, &path);
				1011	/* Abort if no more items */
				1012	if ( res != POSITION_FOUND ) {
				1013	/* make sure later loops don't use this item */
				1014	itembuf = NULL;
				1015	item = NULL;
				1016	break;
				1017	}
				1018
				1019	/* Update information about current indirect item */
				1020	itembuf = get_last_bh( &path );
				1021	ih = get_ih( &path );
				1022	item = get_item( &path );
				1023	item_pos = path.pos_in_item;
				1024
				1025	RFALSE( !is_indirect_le_ih (ih), "green-9003: indirect item expected");
				1026	}
				1027
				1028	/* See if there is some block associated with the file
				1029	at that position, map the buffer to this block */
				1030	if ( get_block_num(item,item_pos) ) {
				1031	map_bh(bh, inode->i_sb, get_block_num(item,item_pos));
				1032	blocks--; // Decrease the amount of blocks that need to be
				1033	// allocated
				1034	}
				1035	item_pos++;
				1036	/* Update the key */
				1037	set_cpu_key_k_offset( &key, cpu_key_k_offset(&key) + inode->i_sb->s_blocksize);
				1038	}
				1039	}
				1040	pathrelse(&path); // Free the path
				1041	reiserfs_write_unlock(inode->i_sb);
				1042
				1043	/* Now zero out unmappend buffers for the first and last pages of
				1044	write area or issue read requests if page is mapped. */
				1045	/* First page, see if it is not uptodate */
				1046	if ( !PageUptodate(prepared_pages[0]) ) {
				1047	head = page_buffers(prepared_pages[0]);
				1048
				1049	/* For each buffer in page */
				1050	for(bh = head, block_start = 0; bh != head \|\| !block_start;
				1051	block_start=block_end, bh = bh->b_this_page) {
				1052
				1053	if (!bh)
				1054	reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
				1055	/* Find where this buffer ends */
				1056	block_end = block_start+inode->i_sb->s_blocksize;
				1057	if ( block_end <= from )
				1058	/* if this buffer is before requested data to map, skip it*/
				1059	continue;
				1060	if ( block_start < from ) { /* Aha, our partial buffer */
				1061	if ( buffer_mapped(bh) ) { /* If it is mapped, we need to
				1062	issue READ request for it to
				1063	not loose data */
				1064	ll_rw_block(READ, 1, &bh);
				1065	*wait_bh++=bh;
				1066	} else { /* Not mapped, zero it */
				1067	char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
				1068	memset(kaddr+block_start, 0, from-block_start);
				1069	kunmap_atomic( kaddr, KM_USER0);
				1070	set_buffer_uptodate(bh);
				1071	}
				1072	}
				1073	}
				1074	}
				1075
				1076	/* Last page, see if it is not uptodate, or if the last page is past the end of the file. */
				1077	if ( !PageUptodate(prepared_pages[num_pages-1]) \|\|
				1078	((pos+write_bytes)>>PAGE_CACHE_SHIFT) > (inode->i_size>>PAGE_CACHE_SHIFT) ) {
				1079	head = page_buffers(prepared_pages[num_pages-1]);
				1080
				1081	/* for each buffer in page */
				1082	for(bh = head, block_start = 0; bh != head \|\| !block_start;
				1083	block_start=block_end, bh = bh->b_this_page) {
				1084
				1085	if (!bh)
				1086	reiserfs_panic(inode->i_sb, "green-9002: Allocated but absent buffer for a page?");
				1087	/* Find where this buffer ends */
				1088	block_end = block_start+inode->i_sb->s_blocksize;
				1089	if ( block_start >= to )
				1090	/* if this buffer is after requested data to map, skip it*/
				1091	break;
				1092	if ( block_end > to ) { /* Aha, our partial buffer */
				1093	if ( buffer_mapped(bh) ) { /* If it is mapped, we need to
				1094	issue READ request for it to
				1095	not loose data */
				1096	ll_rw_block(READ, 1, &bh);
				1097	*wait_bh++=bh;
				1098	} else { /* Not mapped, zero it */
				1099	char *kaddr = kmap_atomic(prepared_pages[num_pages-1], KM_USER0);
				1100	memset(kaddr+to, 0, block_end-to);
				1101	kunmap_atomic( kaddr, KM_USER0);
				1102	set_buffer_uptodate(bh);
				1103	}
				1104	}
				1105	}
				1106	}
				1107
				1108	/* Wait for read requests we made to happen, if necessary */
				1109	while(wait_bh > wait) {
				1110	wait_on_buffer(*--wait_bh);
				1111	if (!buffer_uptodate(*wait_bh)) {
				1112	res = -EIO;
				1113	goto failed_read;
				1114	}
				1115	}
				1116
				1117	return blocks;
				1118	failed_page_grabbing:
				1119	num_pages = i;
				1120	failed_read:
				1121	reiserfs_unprepare_pages(prepared_pages, num_pages);
				1122	return res;
				1123	}
				1124
				1125	/* Write @count bytes at position @ppos in a file indicated by @file
				1126	from the buffer @buf.
				1127
				1128	generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
				1129	something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was
				1130	written for (ext2/3). This is for several reasons:
				1131
				1132	* It has no understanding of any filesystem specific optimizations.
				1133
				1134	* It enters the filesystem repeatedly for each page that is written.
				1135
				1136	* It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
				1137	* operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
				1138	* to reiserfs which allows for fewer tree traversals.
				1139
				1140	* Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
				1141
				1142	* Asking the block allocation code for blocks one at a time is slightly less efficient.
				1143
				1144	All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
				1145	use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make
				1146	things right finally.
				1147
				1148	Future Features: providing search_by_key with hints.
				1149
				1150	*/
				1151	static ssize_t reiserfs_file_write( struct file file, / the file we are going to write into */
				1152	const char __user buf, / pointer to user supplied data
				1153	(in userspace) */
				1154	size_t count, /* amount of bytes to write */
				1155	loff_t ppos / pointer to position in file that we start writing at. Should be updated to
				1156	* new current position before returning. */ )
				1157	{
				1158	size_t already_written = 0; // Number of bytes already written to the file.
				1159	loff_t pos; // Current position in the file.
				1160	ssize_t res; // return value of various functions that we call.
				1161	int err = 0;
				1162	struct inode *inode = file->f_dentry->d_inode; // Inode of the file that we are writing to.
				1163	/* To simplify coding at this time, we store
				1164	locked pages in array for now */
				1165	struct page * prepared_pages[REISERFS_WRITE_PAGES_AT_A_TIME];
				1166	struct reiserfs_transaction_handle th;
				1167	th.t_trans_id = 0;
				1168
				1169	if ( file->f_flags & O_DIRECT) { // Direct IO needs treatment
				1170	ssize_t result, after_file_end = 0;
				1171	if ( (*ppos + count >= inode->i_size) \|\| (file->f_flags & O_APPEND) ) {
				1172	/* If we are appending a file, we need to put this savelink in here.
				1173	If we will crash while doing direct io, finish_unfinished will
				1174	cut the garbage from the file end. */
				1175	reiserfs_write_lock(inode->i_sb);
				1176	err = journal_begin(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
				1177	if (err) {
				1178	reiserfs_write_unlock (inode->i_sb);
				1179	return err;
				1180	}
				1181	reiserfs_update_inode_transaction(inode);
				1182	add_save_link (&th, inode, 1 /* Truncate */);
				1183	after_file_end = 1;
				1184	err = journal_end(&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT );
				1185	reiserfs_write_unlock(inode->i_sb);
				1186	if (err)
				1187	return err;
				1188	}
				1189	result = generic_file_write(file, buf, count, ppos);
				1190
				1191	if ( after_file_end ) { /* Now update i_size and remove the savelink */
				1192	struct reiserfs_transaction_handle th;
				1193	reiserfs_write_lock(inode->i_sb);
				1194	err = journal_begin(&th, inode->i_sb, 1);
				1195	if (err) {
				1196	reiserfs_write_unlock (inode->i_sb);
				1197	return err;
				1198	}
				1199	reiserfs_update_inode_transaction(inode);
				1200	reiserfs_update_sd(&th, inode);
				1201	err = journal_end(&th, inode->i_sb, 1);
				1202	if (err) {
				1203	reiserfs_write_unlock (inode->i_sb);
				1204	return err;
				1205	}
				1206	err = remove_save_link (inode, 1/* truncate */);
				1207	reiserfs_write_unlock(inode->i_sb);
				1208	if (err)
				1209	return err;
				1210	}
				1211
				1212	return result;
				1213	}
				1214
				1215	if ( unlikely((ssize_t) count < 0 ))
				1216	return -EINVAL;
				1217
				1218	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
				1219	return -EFAULT;
				1220
				1221	down(&inode->i_sem); // locks the entire file for just us
				1222
				1223	pos = *ppos;
				1224
				1225	/* Check if we can write to specified region of file, file
				1226	is not overly big and this kind of stuff. Adjust pos and
				1227	count, if needed */
				1228	res = generic_write_checks(file, &pos, &count, 0);
				1229	if (res)
				1230	goto out;
				1231
				1232	if ( count == 0 )
				1233	goto out;
				1234
				1235	res = remove_suid(file->f_dentry);
				1236	if (res)
				1237	goto out;
				1238
				1239	inode_update_time(inode, 1); /* Both mtime and ctime */
				1240
				1241	// Ok, we are done with all the checks.
				1242
				1243	// Now we should start real work
				1244
				1245	/* If we are going to write past the file's packed tail or if we are going
				1246	to overwrite part of the tail, we need that tail to be converted into
				1247	unformatted node */
				1248	res = reiserfs_check_for_tail_and_convert( inode, pos, count);
				1249	if (res)
				1250	goto out;
				1251
				1252	while ( count > 0) {
				1253	/* This is the main loop in which we running until some error occures
				1254	or until we write all of the data. */
				1255	size_t num_pages;/* amount of pages we are going to write this iteration */
				1256	size_t write_bytes; /* amount of bytes to write during this iteration */
				1257	size_t blocks_to_allocate; /* how much blocks we need to allocate for this iteration */
				1258
				1259	/* (pos & (PAGE_CACHE_SIZE-1)) is an idiom for offset into a page of pos*/
				1260	num_pages = !!((pos+count) & (PAGE_CACHE_SIZE - 1)) + /* round up partial
				1261	pages */
				1262	((count + (pos & (PAGE_CACHE_SIZE-1))) >> PAGE_CACHE_SHIFT);
				1263	/* convert size to amount of
				1264	pages */
				1265	reiserfs_write_lock(inode->i_sb);
				1266	if ( num_pages > REISERFS_WRITE_PAGES_AT_A_TIME
				1267	\|\| num_pages > reiserfs_can_fit_pages(inode->i_sb) ) {
				1268	/* If we were asked to write more data than we want to or if there
				1269	is not that much space, then we shorten amount of data to write
				1270	for this iteration. */
				1271	num_pages = min_t(size_t, REISERFS_WRITE_PAGES_AT_A_TIME, reiserfs_can_fit_pages(inode->i_sb));
				1272	/* Also we should not forget to set size in bytes accordingly */
				1273	write_bytes = (num_pages << PAGE_CACHE_SHIFT) -
				1274	(pos & (PAGE_CACHE_SIZE-1));
				1275	/* If position is not on the
				1276	start of the page, we need
				1277	to substract the offset
				1278	within page */
				1279	} else
				1280	write_bytes = count;
				1281
				1282	/* reserve the blocks to be allocated later, so that later on
				1283	we still have the space to write the blocks to */
				1284	reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
				1285	reiserfs_write_unlock(inode->i_sb);
				1286
Jan Kara	127144d	2005-05-01 08:59:07 -0700	[diff] [blame^]	1287	if ( !num_pages ) { /* If we do not have enough space even for a single page... */
				1288	if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) {
				1289	res = -ENOSPC;
				1290	break; // In case we are writing past the end of the last file block, break.
				1291	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1292	// Otherwise we are possibly overwriting the file, so
				1293	// let's set write size to be equal or less than blocksize.
				1294	// This way we get it correctly for file holes.
				1295	// But overwriting files on absolutelly full volumes would not
				1296	// be very efficient. Well, people are not supposed to fill
				1297	// 100% of disk space anyway.
				1298	write_bytes = min_t(size_t, count, inode->i_sb->s_blocksize - (pos & (inode->i_sb->s_blocksize - 1)));
				1299	num_pages = 1;
				1300	// No blocks were claimed before, so do it now.
				1301	reiserfs_claim_blocks_to_be_allocated(inode->i_sb, 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits));
				1302	}
				1303
				1304	/* Prepare for writing into the region, read in all the
				1305	partially overwritten pages, if needed. And lock the pages,
				1306	so that nobody else can access these until we are done.
				1307	We get number of actual blocks needed as a result.*/
				1308	blocks_to_allocate = reiserfs_prepare_file_region_for_write(inode, pos, num_pages, write_bytes, prepared_pages);
				1309	if ( blocks_to_allocate < 0 ) {
				1310	res = blocks_to_allocate;
				1311	reiserfs_release_claimed_blocks(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
				1312	break;
				1313	}
				1314
				1315	/* First we correct our estimate of how many blocks we need */
				1316	reiserfs_release_claimed_blocks(inode->i_sb, (num_pages << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits)) - blocks_to_allocate );
				1317
				1318	if ( blocks_to_allocate > 0) {/We only allocate blocks if we need to/
				1319	/* Fill in all the possible holes and append the file if needed */
				1320	res = reiserfs_allocate_blocks_for_region(&th, inode, pos, num_pages, write_bytes, prepared_pages, blocks_to_allocate);
				1321	}
				1322
				1323	/* well, we have allocated the blocks, so it is time to free
				1324	the reservation we made earlier. */
				1325	reiserfs_release_claimed_blocks(inode->i_sb, blocks_to_allocate);
				1326	if ( res ) {
				1327	reiserfs_unprepare_pages(prepared_pages, num_pages);
				1328	break;
				1329	}
				1330
				1331	/* NOTE that allocating blocks and filling blocks can be done in reverse order
				1332	and probably we would do that just to get rid of garbage in files after a
				1333	crash */
				1334
				1335	/* Copy data from user-supplied buffer to file's pages */
				1336	res = reiserfs_copy_from_user_to_file_region(pos, num_pages, write_bytes, prepared_pages, buf);
				1337	if ( res ) {
				1338	reiserfs_unprepare_pages(prepared_pages, num_pages);
				1339	break;
				1340	}
				1341
				1342	/* Send the pages to disk and unlock them. */
				1343	res = reiserfs_submit_file_region_for_write(&th, inode, pos, num_pages,
				1344	write_bytes,prepared_pages);
				1345	if ( res )
				1346	break;
				1347
				1348	already_written += write_bytes;
				1349	buf += write_bytes;
				1350	*ppos = pos += write_bytes;
				1351	count -= write_bytes;
				1352	balance_dirty_pages_ratelimited(inode->i_mapping);
				1353	}
				1354
				1355	/* this is only true on error */
				1356	if (th.t_trans_id) {
				1357	reiserfs_write_lock(inode->i_sb);
				1358	err = journal_end(&th, th.t_super, th.t_blocks_allocated);
				1359	reiserfs_write_unlock(inode->i_sb);
				1360	if (err) {
				1361	res = err;
				1362	goto out;
				1363	}
				1364	}
				1365
				1366	if ((file->f_flags & O_SYNC) \|\| IS_SYNC(inode))
				1367	res = generic_osync_inode(inode, file->f_mapping, OSYNC_METADATA\|OSYNC_DATA);
				1368
				1369	up(&inode->i_sem);
				1370	reiserfs_async_progress_wait(inode->i_sb);
				1371	return (already_written != 0)?already_written:res;
				1372
				1373	out:
				1374	up(&inode->i_sem); // unlock the file on exit.
				1375	return res;
				1376	}
				1377
				1378	static ssize_t reiserfs_aio_write(struct kiocb iocb, const char __user buf,
				1379	size_t count, loff_t pos)
				1380	{
				1381	return generic_file_aio_write(iocb, buf, count, pos);
				1382	}
				1383
				1384
				1385
				1386	struct file_operations reiserfs_file_operations = {
				1387	.read = generic_file_read,
				1388	.write = reiserfs_file_write,
				1389	.ioctl = reiserfs_ioctl,
				1390	.mmap = generic_file_mmap,
				1391	.release = reiserfs_file_release,
				1392	.fsync = reiserfs_sync_file,
				1393	.sendfile = generic_file_sendfile,
				1394	.aio_read = generic_file_aio_read,
				1395	.aio_write = reiserfs_aio_write,
				1396	};
				1397
				1398
				1399	struct inode_operations reiserfs_file_inode_operations = {
				1400	.truncate = reiserfs_vfs_truncate_file,
				1401	.setattr = reiserfs_setattr,
				1402	.setxattr = reiserfs_setxattr,
				1403	.getxattr = reiserfs_getxattr,
				1404	.listxattr = reiserfs_listxattr,
				1405	.removexattr = reiserfs_removexattr,
				1406	.permission = reiserfs_permission,
				1407	};
				1408
				1409