Blame - fs/reiserfs/inode.c - kernel/msm-5.4

blob: 5fdb9f97b99ec83f47c82c9db32b19cefea05655 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
				3	*/
				4
				5	#include <linux/config.h>
				6	#include <linux/time.h>
				7	#include <linux/fs.h>
				8	#include <linux/reiserfs_fs.h>
				9	#include <linux/reiserfs_acl.h>
				10	#include <linux/reiserfs_xattr.h>
				11	#include <linux/smp_lock.h>
				12	#include <linux/pagemap.h>
				13	#include <linux/highmem.h>
				14	#include <asm/uaccess.h>
				15	#include <asm/unaligned.h>
				16	#include <linux/buffer_head.h>
				17	#include <linux/mpage.h>
				18	#include <linux/writeback.h>
				19	#include <linux/quotaops.h>
				20
				21	extern int reiserfs_default_io_size; /* default io size devuned in super.c */
				22
				23	static int reiserfs_commit_write(struct file f, struct page page,
				24	unsigned from, unsigned to);
				25	static int reiserfs_prepare_write(struct file f, struct page page,
				26	unsigned from, unsigned to);
				27
				28	void reiserfs_delete_inode (struct inode * inode)
				29	{
				30	/* We need blocks for transaction + (user+group) quota update (possibly delete) */
				31	int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS;
				32	struct reiserfs_transaction_handle th ;
				33
				34	reiserfs_write_lock(inode->i_sb);
				35
				36	/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
				37	if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
				38	down (&inode->i_sem);
				39
				40	reiserfs_delete_xattrs (inode);
				41
				42	if (journal_begin(&th, inode->i_sb, jbegin_count)) {
				43	up (&inode->i_sem);
				44	goto out;
				45	}
				46	reiserfs_update_inode_transaction(inode) ;
				47
				48	if (reiserfs_delete_object (&th, inode)) {
				49	up (&inode->i_sem);
				50	goto out;
				51	}
				52
				53	/* Do quota update inside a transaction for journaled quotas. We must do that
				54	* after delete_object so that quota updates go into the same transaction as
				55	* stat data deletion */
				56	DQUOT_FREE_INODE(inode);
				57
				58	if (journal_end(&th, inode->i_sb, jbegin_count)) {
				59	up (&inode->i_sem);
				60	goto out;
				61	}
				62
				63	up (&inode->i_sem);
				64
				65	/* all items of file are deleted, so we can remove "save" link */
				66	remove_save_link (inode, 0/* not truncate /); / we can't do anything
				67	* about an error here */
				68	} else {
				69	/* no object items are in the tree */
				70	;
				71	}
				72	out:
				73	clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */
				74	inode->i_blocks = 0;
				75	reiserfs_write_unlock(inode->i_sb);
				76	}
				77
				78	static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid,
				79	loff_t offset, int type, int length )
				80	{
				81	key->version = version;
				82
				83	key->on_disk_key.k_dir_id = dirid;
				84	key->on_disk_key.k_objectid = objectid;
				85	set_cpu_key_k_offset (key, offset);
				86	set_cpu_key_k_type (key, type);
				87	key->key_length = length;
				88	}
				89
				90
				91	/* take base of inode_key (it comes from inode always) (dirid, objectid) and version from an inode, set
				92	offset and type of key */
				93	void make_cpu_key (struct cpu_key * key, struct inode * inode, loff_t offset,
				94	int type, int length )
				95	{
				96	_make_cpu_key (key, get_inode_item_key_version (inode), le32_to_cpu (INODE_PKEY (inode)->k_dir_id),
				97	le32_to_cpu (INODE_PKEY (inode)->k_objectid),
				98	offset, type, length);
				99	}
				100
				101
				102	//
				103	// when key is 0, do not set version and short key
				104	//
				105	inline void make_le_item_head (struct item_head * ih, const struct cpu_key * key,
				106	int version,
				107	loff_t offset, int type, int length,
				108	int entry_count/or ih_free_space/)
				109	{
				110	if (key) {
				111	ih->ih_key.k_dir_id = cpu_to_le32 (key->on_disk_key.k_dir_id);
				112	ih->ih_key.k_objectid = cpu_to_le32 (key->on_disk_key.k_objectid);
				113	}
				114	put_ih_version( ih, version );
				115	set_le_ih_k_offset (ih, offset);
				116	set_le_ih_k_type (ih, type);
				117	put_ih_item_len( ih, length );
				118	/* set_ih_free_space (ih, 0);*/
				119	// for directory items it is entry count, for directs and stat
				120	// datas - 0xffff, for indirects - 0
				121	put_ih_entry_count( ih, entry_count );
				122	}
				123
				124	//
				125	// FIXME: we might cache recently accessed indirect item
				126
				127	// Ugh. Not too eager for that....
				128	// I cut the code until such time as I see a convincing argument (benchmark).
				129	// I don't want a bloated inode struct..., and I don't like code complexity....
				130
				131	/* cutting the code is fine, since it really isn't in use yet and is easy
				132	** to add back in. But, Vladimir has a really good idea here. Think
				133	** about what happens for reading a file. For each page,
				134	** The VFS layer calls reiserfs_readpage, who searches the tree to find
				135	** an indirect item. This indirect item has X number of pointers, where
				136	** X is a big number if we've done the block allocation right. But,
				137	** we only use one or two of these pointers during each call to readpage,
				138	** needlessly researching again later on.
				139	**
				140	** The size of the cache could be dynamic based on the size of the file.
				141	**
				142	** I'd also like to see us cache the location the stat data item, since
				143	** we are needlessly researching for that frequently.
				144	**
				145	** --chris
				146	*/
				147
				148	/* If this page has a file tail in it, and
				149	** it was read in by get_block_create_0, the page data is valid,
				150	** but tail is still sitting in a direct item, and we can't write to
				151	** it. So, look through this page, and check all the mapped buffers
				152	** to make sure they have valid block numbers. Any that don't need
				153	** to be unmapped, so that block_prepare_write will correctly call
				154	** reiserfs_get_block to convert the tail into an unformatted node
				155	*/
				156	static inline void fix_tail_page_for_writing(struct page *page) {
				157	struct buffer_head head, next, *bh ;
				158
				159	if (page && page_has_buffers(page)) {
				160	head = page_buffers(page) ;
				161	bh = head ;
				162	do {
				163	next = bh->b_this_page ;
				164	if (buffer_mapped(bh) && bh->b_blocknr == 0) {
				165	reiserfs_unmap_buffer(bh) ;
				166	}
				167	bh = next ;
				168	} while (bh != head) ;
				169	}
				170	}
				171
				172	/* reiserfs_get_block does not need to allocate a block only if it has been
				173	done already or non-hole position has been found in the indirect item */
				174	static inline int allocation_needed (int retval, b_blocknr_t allocated,
				175	struct item_head * ih,
Al Viro	3e8962b	2005-05-01 08:59:18 -0700	[diff] [blame^]	176	__le32 * item, int pos_in_item)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	177	{
				178	if (allocated)
				179	return 0;
				180	if (retval == POSITION_FOUND && is_indirect_le_ih (ih) &&
				181	get_block_num(item, pos_in_item))
				182	return 0;
				183	return 1;
				184	}
				185
				186	static inline int indirect_item_found (int retval, struct item_head * ih)
				187	{
				188	return (retval == POSITION_FOUND) && is_indirect_le_ih (ih);
				189	}
				190
				191
				192	static inline void set_block_dev_mapped (struct buffer_head * bh,
				193	b_blocknr_t block, struct inode * inode)
				194	{
				195	map_bh(bh, inode->i_sb, block);
				196	}
				197
				198
				199	//
				200	// files which were created in the earlier version can not be longer,
				201	// than 2 gb
				202	//
				203	static int file_capable (struct inode * inode, long block)
				204	{
				205	if (get_inode_item_key_version (inode) != KEY_FORMAT_3_5 \|\| // it is new file.
				206	block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb
				207	return 1;
				208
				209	return 0;
				210	}
				211
				212	/static/ int restart_transaction(struct reiserfs_transaction_handle *th,
				213	struct inode inode, struct path path) {
				214	struct super_block *s = th->t_super ;
				215	int len = th->t_blocks_allocated ;
				216	int err;
				217
				218	BUG_ON (!th->t_trans_id);
				219	BUG_ON (!th->t_refcount);
				220
				221	/* we cannot restart while nested */
				222	if (th->t_refcount > 1) {
				223	return 0 ;
				224	}
				225	pathrelse(path) ;
				226	reiserfs_update_sd(th, inode) ;
				227	err = journal_end(th, s, len) ;
				228	if (!err) {
				229	err = journal_begin(th, s, JOURNAL_PER_BALANCE_CNT * 6) ;
				230	if (!err)
				231	reiserfs_update_inode_transaction(inode) ;
				232	}
				233	return err;
				234	}
				235
				236	// it is called by get_block when create == 0. Returns block number
				237	// for 'block'-th logical block of file. When it hits direct item it
				238	// returns 0 (being called from bmap) or read direct item into piece
				239	// of page (bh_result)
				240
				241	// Please improve the english/clarity in the comment above, as it is
				242	// hard to understand.
				243
				244	static int _get_block_create_0 (struct inode * inode, long block,
				245	struct buffer_head * bh_result,
				246	int args)
				247	{
				248	INITIALIZE_PATH (path);
				249	struct cpu_key key;
				250	struct buffer_head * bh;
				251	struct item_head * ih, tmp_ih;
				252	int fs_gen ;
				253	int blocknr;
				254	char * p = NULL;
				255	int chars;
				256	int ret ;
				257	int done = 0 ;
				258	unsigned long offset ;
				259
				260	// prepare the key to look for the 'block'-th block of file
				261	make_cpu_key (&key, inode,
				262	(loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
				263
				264	research:
				265	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
				266	pathrelse (&path);
				267	if (p)
				268	kunmap(bh_result->b_page) ;
				269	// We do not return -ENOENT if there is a hole but page is uptodate, because it means
				270	// That there is some MMAPED data associated with it that is yet to be written to disk.
				271	if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
				272	return -ENOENT ;
				273	}
				274	return 0 ;
				275	}
				276
				277	//
				278	bh = get_last_bh (&path);
				279	ih = get_ih (&path);
				280	if (is_indirect_le_ih (ih)) {
Al Viro	3e8962b	2005-05-01 08:59:18 -0700	[diff] [blame^]	281	__le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	282
				283	/* FIXME: here we could cache indirect item or part of it in
				284	the inode to avoid search_by_key in case of subsequent
				285	access to file */
				286	blocknr = get_block_num(ind_item, path.pos_in_item) ;
				287	ret = 0 ;
				288	if (blocknr) {
				289	map_bh(bh_result, inode->i_sb, blocknr);
				290	if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
				291	set_buffer_boundary(bh_result);
				292	}
				293	} else
				294	// We do not return -ENOENT if there is a hole but page is uptodate, because it means
				295	// That there is some MMAPED data associated with it that is yet to be written to disk.
				296	if ((args & GET_BLOCK_NO_HOLE) && !PageUptodate(bh_result->b_page) ) {
				297	ret = -ENOENT ;
				298	}
				299
				300	pathrelse (&path);
				301	if (p)
				302	kunmap(bh_result->b_page) ;
				303	return ret ;
				304	}
				305
				306	// requested data are in direct item(s)
				307	if (!(args & GET_BLOCK_READ_DIRECT)) {
				308	// we are called by bmap. FIXME: we can not map block of file
				309	// when it is stored in direct item(s)
				310	pathrelse (&path);
				311	if (p)
				312	kunmap(bh_result->b_page) ;
				313	return -ENOENT;
				314	}
				315
				316	/* if we've got a direct item, and the buffer or page was uptodate,
				317	** we don't want to pull data off disk again. skip to the
				318	** end, where we map the buffer and return
				319	*/
				320	if (buffer_uptodate(bh_result)) {
				321	goto finished ;
				322	} else
				323	/*
				324	** grab_tail_page can trigger calls to reiserfs_get_block on up to date
				325	** pages without any buffers. If the page is up to date, we don't want
				326	** read old data off disk. Set the up to date bit on the buffer instead
				327	** and jump to the end
				328	*/
				329	if (!bh_result->b_page \|\| PageUptodate(bh_result->b_page)) {
				330	set_buffer_uptodate(bh_result);
				331	goto finished ;
				332	}
				333
				334	// read file tail into part of page
				335	offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
				336	fs_gen = get_generation(inode->i_sb) ;
				337	copy_item_head (&tmp_ih, ih);
				338
				339	/* we only want to kmap if we are reading the tail into the page.
				340	** this is not the common case, so we don't kmap until we are
				341	** sure we need to. But, this means the item might move if
				342	** kmap schedules
				343	*/
				344	if (!p) {
				345	p = (char *)kmap(bh_result->b_page) ;
				346	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
				347	goto research;
				348	}
				349	}
				350	p += offset ;
				351	memset (p, 0, inode->i_sb->s_blocksize);
				352	do {
				353	if (!is_direct_le_ih (ih)) {
				354	BUG ();
				355	}
				356	/* make sure we don't read more bytes than actually exist in
				357	** the file. This can happen in odd cases where i_size isn't
				358	** correct, and when direct item padding results in a few
				359	** extra bytes at the end of the direct item
				360	*/
				361	if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
				362	break ;
				363	if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
				364	chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
				365	done = 1 ;
				366	} else {
				367	chars = ih_item_len(ih) - path.pos_in_item;
				368	}
				369	memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
				370
				371	if (done)
				372	break ;
				373
				374	p += chars;
				375
				376	if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
				377	// we done, if read direct item is not the last item of
				378	// node FIXME: we could try to check right delimiting key
				379	// to see whether direct item continues in the right
				380	// neighbor or rely on i_size
				381	break;
				382
				383	// update key to look for the next piece
				384	set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
				385	if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
				386	// we read something from tail, even if now we got IO_ERROR
				387	break;
				388	bh = get_last_bh (&path);
				389	ih = get_ih (&path);
				390	} while (1);
				391
				392	flush_dcache_page(bh_result->b_page) ;
				393	kunmap(bh_result->b_page) ;
				394
				395	finished:
				396	pathrelse (&path);
				397	/* this buffer has valid data, but isn't valid for io. mapping it to
				398	* block #0 tells the rest of reiserfs it just has a tail in it
				399	*/
				400	map_bh(bh_result, inode->i_sb, 0);
				401	set_buffer_uptodate (bh_result);
				402	return 0;
				403	}
				404
				405
				406	// this is called to create file map. So, _get_block_create_0 will not
				407	// read direct item
				408	static int reiserfs_bmap (struct inode * inode, sector_t block,
				409	struct buffer_head * bh_result, int create)
				410	{
				411	if (!file_capable (inode, block))
				412	return -EFBIG;
				413
				414	reiserfs_write_lock(inode->i_sb);
				415	/* do not read the direct item */
				416	_get_block_create_0 (inode, block, bh_result, 0) ;
				417	reiserfs_write_unlock(inode->i_sb);
				418	return 0;
				419	}
				420
				421	/* special version of get_block that is only used by grab_tail_page right
				422	** now. It is sent to block_prepare_write, and when you try to get a
				423	** block past the end of the file (or a block from a hole) it returns
				424	** -ENOENT instead of a valid buffer. block_prepare_write expects to
				425	** be able to do i/o on the buffers returned, unless an error value
				426	** is also returned.
				427	**
				428	** So, this allows block_prepare_write to be used for reading a single block
				429	** in a page. Where it does not produce a valid page for holes, or past the
				430	** end of the file. This turns out to be exactly what we need for reading
				431	** tails for conversion.
				432	**
				433	** The point of the wrapper is forcing a certain value for create, even
				434	** though the VFS layer is calling this function with create==1. If you
				435	** don't want to send create == GET_BLOCK_NO_HOLE to reiserfs_get_block,
				436	** don't use this function.
				437	*/
				438	static int reiserfs_get_block_create_0 (struct inode * inode, sector_t block,
				439	struct buffer_head * bh_result, int create) {
				440	return reiserfs_get_block(inode, block, bh_result, GET_BLOCK_NO_HOLE) ;
				441	}
				442
				443	/* This is special helper for reiserfs_get_block in case we are executing
				444	direct_IO request. */
				445	static int reiserfs_get_blocks_direct_io(struct inode *inode,
				446	sector_t iblock,
				447	unsigned long max_blocks,
				448	struct buffer_head *bh_result,
				449	int create)
				450	{
				451	int ret ;
				452
				453	bh_result->b_page = NULL;
				454
				455	/* We set the b_size before reiserfs_get_block call since it is
				456	referenced in convert_tail_for_hole() that may be called from
				457	reiserfs_get_block() */
				458	bh_result->b_size = (1 << inode->i_blkbits);
				459
				460	ret = reiserfs_get_block(inode, iblock, bh_result,
				461	create \| GET_BLOCK_NO_DANGLE) ;
				462	if (ret)
				463	goto out;
				464
				465	/* don't allow direct io onto tail pages */
				466	if (buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
				467	/* make sure future calls to the direct io funcs for this offset
				468	** in the file fail by unmapping the buffer
				469	*/
				470	clear_buffer_mapped(bh_result);
				471	ret = -EINVAL ;
				472	}
				473	/* Possible unpacked tail. Flush the data before pages have
				474	disappeared */
				475	if (REISERFS_I(inode)->i_flags & i_pack_on_close_mask) {
				476	int err;
				477	lock_kernel();
				478	err = reiserfs_commit_for_inode(inode);
				479	REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask;
				480	unlock_kernel();
				481	if (err < 0)
				482	ret = err;
				483	}
				484	out:
				485	return ret ;
				486	}
				487
				488
				489	/*
				490	** helper function for when reiserfs_get_block is called for a hole
				491	** but the file tail is still in a direct item
				492	** bh_result is the buffer head for the hole
				493	** tail_offset is the offset of the start of the tail in the file
				494	**
				495	** This calls prepare_write, which will start a new transaction
				496	** you should not be in a transaction, or have any paths held when you
				497	** call this.
				498	*/
				499	static int convert_tail_for_hole(struct inode *inode,
				500	struct buffer_head *bh_result,
				501	loff_t tail_offset) {
				502	unsigned long index ;
				503	unsigned long tail_end ;
				504	unsigned long tail_start ;
				505	struct page * tail_page ;
				506	struct page * hole_page = bh_result->b_page ;
				507	int retval = 0 ;
				508
				509	if ((tail_offset & (bh_result->b_size - 1)) != 1)
				510	return -EIO ;
				511
				512	/* always try to read until the end of the block */
				513	tail_start = tail_offset & (PAGE_CACHE_SIZE - 1) ;
				514	tail_end = (tail_start \| (bh_result->b_size - 1)) + 1 ;
				515
				516	index = tail_offset >> PAGE_CACHE_SHIFT ;
				517	/* hole_page can be zero in case of direct_io, we are sure
				518	that we cannot get here if we write with O_DIRECT into
				519	tail page */
				520	if (!hole_page \|\| index != hole_page->index) {
				521	tail_page = grab_cache_page(inode->i_mapping, index) ;
				522	retval = -ENOMEM;
				523	if (!tail_page) {
				524	goto out ;
				525	}
				526	} else {
				527	tail_page = hole_page ;
				528	}
				529
				530	/* we don't have to make sure the conversion did not happen while
				531	** we were locking the page because anyone that could convert
				532	** must first take i_sem.
				533	**
				534	** We must fix the tail page for writing because it might have buffers
				535	** that are mapped, but have a block number of 0. This indicates tail
				536	** data that has been read directly into the page, and block_prepare_write
				537	** won't trigger a get_block in this case.
				538	*/
				539	fix_tail_page_for_writing(tail_page) ;
				540	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
				541	if (retval)
				542	goto unlock ;
				543
				544	/* tail conversion might change the data in the page */
				545	flush_dcache_page(tail_page) ;
				546
				547	retval = reiserfs_commit_write(NULL, tail_page, tail_start, tail_end) ;
				548
				549	unlock:
				550	if (tail_page != hole_page) {
				551	unlock_page(tail_page) ;
				552	page_cache_release(tail_page) ;
				553	}
				554	out:
				555	return retval ;
				556	}
				557
				558	static inline int _allocate_block(struct reiserfs_transaction_handle *th,
				559	long block,
				560	struct inode *inode,
				561	b_blocknr_t *allocated_block_nr,
				562	struct path * path,
				563	int flags) {
				564	BUG_ON (!th->t_trans_id);
				565
				566	#ifdef REISERFS_PREALLOCATE
				567	if (!(flags & GET_BLOCK_NO_ISEM)) {
				568	return reiserfs_new_unf_blocknrs2(th, inode, allocated_block_nr, path, block);
				569	}
				570	#endif
				571	return reiserfs_new_unf_blocknrs (th, inode, allocated_block_nr, path, block);
				572	}
				573
				574	int reiserfs_get_block (struct inode * inode, sector_t block,
				575	struct buffer_head * bh_result, int create)
				576	{
				577	int repeat, retval = 0;
				578	b_blocknr_t allocated_block_nr = 0;// b_blocknr_t is (unsigned) 32 bit int
				579	INITIALIZE_PATH(path);
				580	int pos_in_item;
				581	struct cpu_key key;
				582	struct buffer_head * bh, * unbh = NULL;
				583	struct item_head * ih, tmp_ih;
Al Viro	3e8962b	2005-05-01 08:59:18 -0700	[diff] [blame^]	584	__le32 * item;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	585	int done;
				586	int fs_gen;
				587	struct reiserfs_transaction_handle *th = NULL;
				588	/* space reserved in transaction batch:
				589	. 3 balancings in direct->indirect conversion
				590	. 1 block involved into reiserfs_update_sd()
				591	XXX in practically impossible worst case direct2indirect()
				592	can incur (much) more than 3 balancings.
				593	quota update for user, group */
				594	int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
				595	int version;
				596	int dangle = 1;
				597	loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
				598
				599	/* bad.... */
				600	reiserfs_write_lock(inode->i_sb);
				601	version = get_inode_item_key_version (inode);
				602
				603	if (block < 0) {
				604	reiserfs_write_unlock(inode->i_sb);
				605	return -EIO;
				606	}
				607
				608	if (!file_capable (inode, block)) {
				609	reiserfs_write_unlock(inode->i_sb);
				610	return -EFBIG;
				611	}
				612
				613	/* if !create, we aren't changing the FS, so we don't need to
				614	** log anything, so we don't need to start a transaction
				615	*/
				616	if (!(create & GET_BLOCK_CREATE)) {
				617	int ret ;
				618	/* find number of block-th logical block of the file */
				619	ret = _get_block_create_0 (inode, block, bh_result,
				620	create \| GET_BLOCK_READ_DIRECT) ;
				621	reiserfs_write_unlock(inode->i_sb);
				622	return ret;
				623	}
				624	/*
				625	* if we're already in a transaction, make sure to close
				626	* any new transactions we start in this func
				627	*/
				628	if ((create & GET_BLOCK_NO_DANGLE) \|\|
				629	reiserfs_transaction_running(inode->i_sb))
				630	dangle = 0;
				631
				632	/* If file is of such a size, that it might have a tail and tails are enabled
				633	** we should mark it as possibly needing tail packing on close
				634	*/
				635	if ( (have_large_tails (inode->i_sb) && inode->i_size < i_block_size (inode)*4) \|\|
				636	(have_small_tails (inode->i_sb) && inode->i_size < i_block_size(inode)) )
				637	REISERFS_I(inode)->i_flags \|= i_pack_on_close_mask ;
				638
				639	/* set the key of the first byte in the 'block'-th block of file */
				640	make_cpu_key (&key, inode, new_offset,
				641	TYPE_ANY, 3/key length/);
				642	if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
				643	start_trans:
				644	th = reiserfs_persistent_transaction(inode->i_sb, jbegin_count);
				645	if (!th) {
				646	retval = -ENOMEM;
				647	goto failure;
				648	}
				649	reiserfs_update_inode_transaction(inode) ;
				650	}
				651	research:
				652
				653	retval = search_for_position_by_key (inode->i_sb, &key, &path);
				654	if (retval == IO_ERROR) {
				655	retval = -EIO;
				656	goto failure;
				657	}
				658
				659	bh = get_last_bh (&path);
				660	ih = get_ih (&path);
				661	item = get_item (&path);
				662	pos_in_item = path.pos_in_item;
				663
				664	fs_gen = get_generation (inode->i_sb);
				665	copy_item_head (&tmp_ih, ih);
				666
				667	if (allocation_needed (retval, allocated_block_nr, ih, item, pos_in_item)) {
				668	/* we have to allocate block for the unformatted node */
				669	if (!th) {
				670	pathrelse(&path) ;
				671	goto start_trans;
				672	}
				673
				674	repeat = _allocate_block(th, block, inode, &allocated_block_nr, &path, create);
				675
				676	if (repeat == NO_DISK_SPACE \|\| repeat == QUOTA_EXCEEDED) {
				677	/* restart the transaction to give the journal a chance to free
				678	** some blocks. releases the path, so we have to go back to
				679	** research if we succeed on the second try
				680	*/
				681	SB_JOURNAL(inode->i_sb)->j_next_async_flush = 1;
				682	retval = restart_transaction(th, inode, &path) ;
				683	if (retval)
				684	goto failure;
				685	repeat = _allocate_block(th, block, inode, &allocated_block_nr, NULL, create);
				686
				687	if (repeat != NO_DISK_SPACE && repeat != QUOTA_EXCEEDED) {
				688	goto research ;
				689	}
				690	if (repeat == QUOTA_EXCEEDED)
				691	retval = -EDQUOT;
				692	else
				693	retval = -ENOSPC;
				694	goto failure;
				695	}
				696
				697	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
				698	goto research;
				699	}
				700	}
				701
				702	if (indirect_item_found (retval, ih)) {
				703	b_blocknr_t unfm_ptr;
				704	/* 'block'-th block is in the file already (there is
				705	corresponding cell in some indirect item). But it may be
				706	zero unformatted node pointer (hole) */
				707	unfm_ptr = get_block_num (item, pos_in_item);
				708	if (unfm_ptr == 0) {
				709	/* use allocated block to plug the hole */
				710	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
				711	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
				712	reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
				713	goto research;
				714	}
				715	set_buffer_new(bh_result);
				716	if (buffer_dirty(bh_result) && reiserfs_data_ordered(inode->i_sb))
				717	reiserfs_add_ordered_list(inode, bh_result);
				718	put_block_num(item, pos_in_item, allocated_block_nr) ;
				719	unfm_ptr = allocated_block_nr;
				720	journal_mark_dirty (th, inode->i_sb, bh);
				721	reiserfs_update_sd(th, inode) ;
				722	}
				723	set_block_dev_mapped(bh_result, unfm_ptr, inode);
				724	pathrelse (&path);
				725	retval = 0;
				726	if (!dangle && th)
				727	retval = reiserfs_end_persistent_transaction(th);
				728
				729	reiserfs_write_unlock(inode->i_sb);
				730
				731	/* the item was found, so new blocks were not added to the file
				732	** there is no need to make sure the inode is updated with this
				733	** transaction
				734	*/
				735	return retval;
				736	}
				737
				738	if (!th) {
				739	pathrelse(&path) ;
				740	goto start_trans;
				741	}
				742
				743	/* desired position is not found or is in the direct item. We have
				744	to append file with holes up to 'block'-th block converting
				745	direct items to indirect one if necessary */
				746	done = 0;
				747	do {
				748	if (is_statdata_le_ih (ih)) {
Al Viro	3e8962b	2005-05-01 08:59:18 -0700	[diff] [blame^]	749	__le32 unp = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	750	struct cpu_key tmp_key;
				751
				752	/* indirect item has to be inserted */
				753	make_le_item_head (&tmp_ih, &key, version, 1, TYPE_INDIRECT,
				754	UNFM_P_SIZE, 0/* free_space */);
				755
				756	if (cpu_key_k_offset (&key) == 1) {
				757	/* we are going to add 'block'-th block to the file. Use
				758	allocated block for that */
				759	unp = cpu_to_le32 (allocated_block_nr);
				760	set_block_dev_mapped (bh_result, allocated_block_nr, inode);
				761	set_buffer_new(bh_result);
				762	done = 1;
				763	}
				764	tmp_key = key; // ;)
				765	set_cpu_key_k_offset (&tmp_key, 1);
				766	PATH_LAST_POSITION(&path) ++;
				767
				768	retval = reiserfs_insert_item (th, &path, &tmp_key, &tmp_ih, inode, (char *)&unp);
				769	if (retval) {
				770	reiserfs_free_block (th, inode, allocated_block_nr, 1);
				771	goto failure; // retval == -ENOSPC, -EDQUOT or -EIO or -EEXIST
				772	}
				773	//mark_tail_converted (inode);
				774	} else if (is_direct_le_ih (ih)) {
				775	/* direct item has to be converted */
				776	loff_t tail_offset;
				777
				778	tail_offset = ((le_ih_k_offset (ih) - 1) & ~(inode->i_sb->s_blocksize - 1)) + 1;
				779	if (tail_offset == cpu_key_k_offset (&key)) {
				780	/* direct item we just found fits into block we have
				781	to map. Convert it into unformatted node: use
				782	bh_result for the conversion */
				783	set_block_dev_mapped (bh_result, allocated_block_nr, inode);
				784	unbh = bh_result;
				785	done = 1;
				786	} else {
				787	/* we have to padd file tail stored in direct item(s)
				788	up to block size and convert it to unformatted
				789	node. FIXME: this should also get into page cache */
				790
				791	pathrelse(&path) ;
				792	/*
				793	* ugly, but we can only end the transaction if
				794	* we aren't nested
				795	*/
				796	BUG_ON (!th->t_refcount);
				797	if (th->t_refcount == 1) {
				798	retval = reiserfs_end_persistent_transaction(th);
				799	th = NULL;
				800	if (retval)
				801	goto failure;
				802	}
				803
				804	retval = convert_tail_for_hole(inode, bh_result, tail_offset) ;
				805	if (retval) {
				806	if ( retval != -ENOSPC )
				807	reiserfs_warning (inode->i_sb, "clm-6004: convert tail failed inode %lu, error %d", inode->i_ino, retval) ;
				808	if (allocated_block_nr) {
				809	/* the bitmap, the super, and the stat data == 3 */
				810	if (!th)
				811	th = reiserfs_persistent_transaction(inode->i_sb,3);
				812	if (th)
				813	reiserfs_free_block (th,inode,allocated_block_nr,1);
				814	}
				815	goto failure ;
				816	}
				817	goto research ;
				818	}
				819	retval = direct2indirect (th, inode, &path, unbh, tail_offset);
				820	if (retval) {
				821	reiserfs_unmap_buffer(unbh);
				822	reiserfs_free_block (th, inode, allocated_block_nr, 1);
				823	goto failure;
				824	}
				825	/* it is important the set_buffer_uptodate is done after
				826	** the direct2indirect. The buffer might contain valid
				827	** data newer than the data on disk (read by readpage, changed,
				828	** and then sent here by writepage). direct2indirect needs
				829	** to know if unbh was already up to date, so it can decide
				830	** if the data in unbh needs to be replaced with data from
				831	** the disk
				832	*/
				833	set_buffer_uptodate (unbh);
				834
				835	/* unbh->b_page == NULL in case of DIRECT_IO request, this means
				836	buffer will disappear shortly, so it should not be added to
				837	*/
				838	if ( unbh->b_page ) {
				839	/* we've converted the tail, so we must
				840	** flush unbh before the transaction commits
				841	*/
				842	reiserfs_add_tail_list(inode, unbh) ;
				843
				844	/* mark it dirty now to prevent commit_write from adding
				845	** this buffer to the inode's dirty buffer list
				846	*/
				847	/*
				848	* AKPM: changed __mark_buffer_dirty to mark_buffer_dirty().
				849	* It's still atomic, but it sets the page dirty too,
				850	* which makes it eligible for writeback at any time by the
				851	* VM (which was also the case with __mark_buffer_dirty())
				852	*/
				853	mark_buffer_dirty(unbh) ;
				854	}
				855	} else {
				856	/* append indirect item with holes if needed, when appending
				857	pointer to 'block'-th block use block, which is already
				858	allocated */
				859	struct cpu_key tmp_key;
				860	unp_t unf_single=0; // We use this in case we need to allocate only
				861	// one block which is a fastpath
				862	unp_t *un;
				863	__u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
				864	__u64 blocks_needed;
				865
				866	RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
				867	"vs-804: invalid position for append");
				868	/* indirect item has to be appended, set up key of that position */
				869	make_cpu_key (&tmp_key, inode,
				870	le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
				871	//pos_in_item * inode->i_sb->s_blocksize,
				872	TYPE_INDIRECT, 3);// key type is unimportant
				873
				874	blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
				875	RFALSE( blocks_needed < 0, "green-805: invalid offset");
				876
				877	if ( blocks_needed == 1 ) {
				878	un = &unf_single;
				879	} else {
				880	un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
				881	GFP_ATOMIC); // We need to avoid scheduling.
				882	if ( !un) {
				883	un = &unf_single;
				884	blocks_needed = 1;
				885	max_to_insert = 0;
				886	} else
				887	memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
				888	}
				889	if ( blocks_needed <= max_to_insert) {
				890	/* we are going to add target block to the file. Use allocated
				891	block for that */
				892	un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
				893	set_block_dev_mapped (bh_result, allocated_block_nr, inode);
				894	set_buffer_new(bh_result);
				895	done = 1;
				896	} else {
				897	/* paste hole to the indirect item */
				898	/* If kmalloc failed, max_to_insert becomes zero and it means we
				899	only have space for one block */
				900	blocks_needed=max_to_insert?max_to_insert:1;
				901	}
				902	retval = reiserfs_paste_into_item (th, &path, &tmp_key, inode, (char )un, UNFM_P_SIZE blocks_needed);
				903
				904	if (blocks_needed != 1)
				905	kfree(un);
				906
				907	if (retval) {
				908	reiserfs_free_block (th, inode, allocated_block_nr, 1);
				909	goto failure;
				910	}
				911	if (!done) {
				912	/* We need to mark new file size in case this function will be
				913	interrupted/aborted later on. And we may do this only for
				914	holes. */
				915	inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
				916	}
				917	}
				918
				919	if (done == 1)
				920	break;
				921
				922	/* this loop could log more blocks than we had originally asked
				923	** for. So, we have to allow the transaction to end if it is
				924	** too big or too full. Update the inode so things are
				925	** consistent if we crash before the function returns
				926	**
				927	** release the path so that anybody waiting on the path before
				928	** ending their transaction will be able to continue.
				929	*/
				930	if (journal_transaction_should_end(th, th->t_blocks_allocated)) {
				931	retval = restart_transaction(th, inode, &path) ;
				932	if (retval)
				933	goto failure;
				934	}
				935	/* inserting indirect pointers for a hole can take a
				936	** long time. reschedule if needed
				937	*/
				938	cond_resched();
				939
				940	retval = search_for_position_by_key (inode->i_sb, &key, &path);
				941	if (retval == IO_ERROR) {
				942	retval = -EIO;
				943	goto failure;
				944	}
				945	if (retval == POSITION_FOUND) {
				946	reiserfs_warning (inode->i_sb, "vs-825: reiserfs_get_block: "
				947	"%K should not be found", &key);
				948	retval = -EEXIST;
				949	if (allocated_block_nr)
				950	reiserfs_free_block (th, inode, allocated_block_nr, 1);
				951	pathrelse(&path) ;
				952	goto failure;
				953	}
				954	bh = get_last_bh (&path);
				955	ih = get_ih (&path);
				956	item = get_item (&path);
				957	pos_in_item = path.pos_in_item;
				958	} while (1);
				959
				960
				961	retval = 0;
				962
				963	failure:
				964	if (th && (!dangle \|\| (retval && !th->t_trans_id))) {
				965	int err;
				966	if (th->t_trans_id)
				967	reiserfs_update_sd(th, inode);
				968	err = reiserfs_end_persistent_transaction(th);
				969	if (err)
				970	retval = err;
				971	}
				972
				973	reiserfs_write_unlock(inode->i_sb);
				974	reiserfs_check_path(&path) ;
				975	return retval;
				976	}
				977
				978	static int
				979	reiserfs_readpages(struct file file, struct address_space mapping,
				980	struct list_head *pages, unsigned nr_pages)
				981	{
				982	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
				983	}
				984
				985	/* Compute real number of used bytes by file
				986	* Following three functions can go away when we'll have enough space in stat item
				987	*/
				988	static int real_space_diff(struct inode *inode, int sd_size)
				989	{
				990	int bytes;
				991	loff_t blocksize = inode->i_sb->s_blocksize ;
				992
				993	if (S_ISLNK(inode->i_mode) \|\| S_ISDIR(inode->i_mode))
				994	return sd_size ;
				995
				996	/* End of file is also in full block with indirect reference, so round
				997	** up to the next block.
				998	**
				999	** there is just no way to know if the tail is actually packed
				1000	** on the file, so we have to assume it isn't. When we pack the
				1001	** tail, we add 4 bytes to pretend there really is an unformatted
				1002	** node pointer
				1003	*/
				1004	bytes = ((inode->i_size + (blocksize-1)) >> inode->i_sb->s_blocksize_bits) * UNFM_P_SIZE + sd_size;
				1005	return bytes ;
				1006	}
				1007
				1008	static inline loff_t to_real_used_space(struct inode *inode, ulong blocks,
				1009	int sd_size)
				1010	{
				1011	if (S_ISLNK(inode->i_mode) \|\| S_ISDIR(inode->i_mode)) {
				1012	return inode->i_size + (loff_t)(real_space_diff(inode, sd_size)) ;
				1013	}
				1014	return ((loff_t)real_space_diff(inode, sd_size)) + (((loff_t)blocks) << 9);
				1015	}
				1016
				1017	/* Compute number of blocks used by file in ReiserFS counting */
				1018	static inline ulong to_fake_used_blocks(struct inode *inode, int sd_size)
				1019	{
				1020	loff_t bytes = inode_get_bytes(inode) ;
				1021	loff_t real_space = real_space_diff(inode, sd_size) ;
				1022
				1023	/* keeps fsck and non-quota versions of reiserfs happy */
				1024	if (S_ISLNK(inode->i_mode) \|\| S_ISDIR(inode->i_mode)) {
				1025	bytes += (loff_t)511 ;
				1026	}
				1027
				1028	/* files from before the quota patch might i_blocks such that
				1029	** bytes < real_space. Deal with that here to prevent it from
				1030	** going negative.
				1031	*/
				1032	if (bytes < real_space)
				1033	return 0 ;
				1034	return (bytes - real_space) >> 9;
				1035	}
				1036
				1037	//
				1038	// BAD: new directories have stat data of new type and all other items
				1039	// of old type. Version stored in the inode says about body items, so
				1040	// in update_stat_data we can not rely on inode, but have to check
				1041	// item version directly
				1042	//
				1043
				1044	// called by read_locked_inode
				1045	static void init_inode (struct inode * inode, struct path * path)
				1046	{
				1047	struct buffer_head * bh;
				1048	struct item_head * ih;
				1049	__u32 rdev;
				1050	//int version = ITEM_VERSION_1;
				1051
				1052	bh = PATH_PLAST_BUFFER (path);
				1053	ih = PATH_PITEM_HEAD (path);
				1054
				1055
				1056	copy_key (INODE_PKEY (inode), &(ih->ih_key));
				1057	inode->i_blksize = reiserfs_default_io_size;
				1058
				1059	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list ));
				1060	REISERFS_I(inode)->i_flags = 0;
				1061	REISERFS_I(inode)->i_prealloc_block = 0;
				1062	REISERFS_I(inode)->i_prealloc_count = 0;
				1063	REISERFS_I(inode)->i_trans_id = 0;
				1064	REISERFS_I(inode)->i_jl = NULL;
				1065	REISERFS_I(inode)->i_acl_access = NULL;
				1066	REISERFS_I(inode)->i_acl_default = NULL;
				1067	init_rwsem (&REISERFS_I(inode)->xattr_sem);
				1068
				1069	if (stat_data_v1 (ih)) {
				1070	struct stat_data_v1 * sd = (struct stat_data_v1 *)B_I_PITEM (bh, ih);
				1071	unsigned long blocks;
				1072
				1073	set_inode_item_key_version (inode, KEY_FORMAT_3_5);
				1074	set_inode_sd_version (inode, STAT_DATA_V1);
				1075	inode->i_mode = sd_v1_mode(sd);
				1076	inode->i_nlink = sd_v1_nlink(sd);
				1077	inode->i_uid = sd_v1_uid(sd);
				1078	inode->i_gid = sd_v1_gid(sd);
				1079	inode->i_size = sd_v1_size(sd);
				1080	inode->i_atime.tv_sec = sd_v1_atime(sd);
				1081	inode->i_mtime.tv_sec = sd_v1_mtime(sd);
				1082	inode->i_ctime.tv_sec = sd_v1_ctime(sd);
				1083	inode->i_atime.tv_nsec = 0;
				1084	inode->i_ctime.tv_nsec = 0;
				1085	inode->i_mtime.tv_nsec = 0;
				1086
				1087	inode->i_blocks = sd_v1_blocks(sd);
				1088	inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
				1089	blocks = (inode->i_size + 511) >> 9;
				1090	blocks = _ROUND_UP (blocks, inode->i_sb->s_blocksize >> 9);
				1091	if (inode->i_blocks > blocks) {
				1092	// there was a bug in <=3.5.23 when i_blocks could take negative
				1093	// values. Starting from 3.5.17 this value could even be stored in
				1094	// stat data. For such files we set i_blocks based on file
				1095	// size. Just 2 notes: this can be wrong for sparce files. On-disk value will be
				1096	// only updated if file's inode will ever change
				1097	inode->i_blocks = blocks;
				1098	}
				1099
				1100	rdev = sd_v1_rdev(sd);
				1101	REISERFS_I(inode)->i_first_direct_byte = sd_v1_first_direct_byte(sd);
				1102	/* an early bug in the quota code can give us an odd number for the
				1103	** block count. This is incorrect, fix it here.
				1104	*/
				1105	if (inode->i_blocks & 1) {
				1106	inode->i_blocks++ ;
				1107	}
				1108	inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
				1109	SD_V1_SIZE));
				1110	/* nopack is initially zero for v1 objects. For v2 objects,
				1111	nopack is initialised from sd_attrs */
				1112	REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
				1113	} else {
				1114	// new stat data found, but object may have old items
				1115	// (directories and symlinks)
				1116	struct stat_data * sd = (struct stat_data *)B_I_PITEM (bh, ih);
				1117
				1118	inode->i_mode = sd_v2_mode(sd);
				1119	inode->i_nlink = sd_v2_nlink(sd);
				1120	inode->i_uid = sd_v2_uid(sd);
				1121	inode->i_size = sd_v2_size(sd);
				1122	inode->i_gid = sd_v2_gid(sd);
				1123	inode->i_mtime.tv_sec = sd_v2_mtime(sd);
				1124	inode->i_atime.tv_sec = sd_v2_atime(sd);
				1125	inode->i_ctime.tv_sec = sd_v2_ctime(sd);
				1126	inode->i_ctime.tv_nsec = 0;
				1127	inode->i_mtime.tv_nsec = 0;
				1128	inode->i_atime.tv_nsec = 0;
				1129	inode->i_blocks = sd_v2_blocks(sd);
				1130	rdev = sd_v2_rdev(sd);
				1131	if( S_ISCHR( inode -> i_mode ) \|\| S_ISBLK( inode -> i_mode ) )
				1132	inode->i_generation = le32_to_cpu (INODE_PKEY (inode)->k_dir_id);
				1133	else
				1134	inode->i_generation = sd_v2_generation(sd);
				1135
				1136	if (S_ISDIR (inode->i_mode) \|\| S_ISLNK (inode->i_mode))
				1137	set_inode_item_key_version (inode, KEY_FORMAT_3_5);
				1138	else
				1139	set_inode_item_key_version (inode, KEY_FORMAT_3_6);
				1140	REISERFS_I(inode)->i_first_direct_byte = 0;
				1141	set_inode_sd_version (inode, STAT_DATA_V2);
				1142	inode_set_bytes(inode, to_real_used_space(inode, inode->i_blocks,
				1143	SD_V2_SIZE));
				1144	/* read persistent inode attributes from sd and initalise
				1145	generic inode flags from them */
				1146	REISERFS_I(inode)->i_attrs = sd_v2_attrs( sd );
				1147	sd_attrs_to_i_attrs( sd_v2_attrs( sd ), inode );
				1148	}
				1149
				1150	pathrelse (path);
				1151	if (S_ISREG (inode->i_mode)) {
				1152	inode->i_op = &reiserfs_file_inode_operations;
				1153	inode->i_fop = &reiserfs_file_operations;
				1154	inode->i_mapping->a_ops = &reiserfs_address_space_operations ;
				1155	} else if (S_ISDIR (inode->i_mode)) {
				1156	inode->i_op = &reiserfs_dir_inode_operations;
				1157	inode->i_fop = &reiserfs_dir_operations;
				1158	} else if (S_ISLNK (inode->i_mode)) {
				1159	inode->i_op = &reiserfs_symlink_inode_operations;
				1160	inode->i_mapping->a_ops = &reiserfs_address_space_operations;
				1161	} else {
				1162	inode->i_blocks = 0;
				1163	inode->i_op = &reiserfs_special_inode_operations;
				1164	init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
				1165	}
				1166	}
				1167
				1168
				1169	// update new stat data with inode fields
				1170	static void inode2sd (void * sd, struct inode * inode, loff_t size)
				1171	{
				1172	struct stat_data * sd_v2 = (struct stat_data *)sd;
				1173	__u16 flags;
				1174
				1175	set_sd_v2_mode(sd_v2, inode->i_mode );
				1176	set_sd_v2_nlink(sd_v2, inode->i_nlink );
				1177	set_sd_v2_uid(sd_v2, inode->i_uid );
				1178	set_sd_v2_size(sd_v2, size );
				1179	set_sd_v2_gid(sd_v2, inode->i_gid );
				1180	set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec );
				1181	set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec );
				1182	set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec );
				1183	set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
				1184	if (S_ISCHR(inode->i_mode) \|\| S_ISBLK(inode->i_mode))
				1185	set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
				1186	else
				1187	set_sd_v2_generation(sd_v2, inode->i_generation);
				1188	flags = REISERFS_I(inode)->i_attrs;
				1189	i_attrs_to_sd_attrs( inode, &flags );
				1190	set_sd_v2_attrs( sd_v2, flags );
				1191	}
				1192
				1193
				1194	// used to copy inode's fields to old stat data
				1195	static void inode2sd_v1 (void * sd, struct inode * inode, loff_t size)
				1196	{
				1197	struct stat_data_v1 * sd_v1 = (struct stat_data_v1 *)sd;
				1198
				1199	set_sd_v1_mode(sd_v1, inode->i_mode );
				1200	set_sd_v1_uid(sd_v1, inode->i_uid );
				1201	set_sd_v1_gid(sd_v1, inode->i_gid );
				1202	set_sd_v1_nlink(sd_v1, inode->i_nlink );
				1203	set_sd_v1_size(sd_v1, size );
				1204	set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec );
				1205	set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec );
				1206	set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec );
				1207
				1208	if (S_ISCHR(inode->i_mode) \|\| S_ISBLK(inode->i_mode))
				1209	set_sd_v1_rdev(sd_v1, new_encode_dev(inode->i_rdev));
				1210	else
				1211	set_sd_v1_blocks(sd_v1, to_fake_used_blocks(inode, SD_V1_SIZE));
				1212
				1213	// Sigh. i_first_direct_byte is back
				1214	set_sd_v1_first_direct_byte(sd_v1, REISERFS_I(inode)->i_first_direct_byte);
				1215	}
				1216
				1217
				1218	/* NOTE, you must prepare the buffer head before sending it here,
				1219	** and then log it after the call
				1220	*/
				1221	static void update_stat_data (struct path * path, struct inode * inode,
				1222	loff_t size)
				1223	{
				1224	struct buffer_head * bh;
				1225	struct item_head * ih;
				1226
				1227	bh = PATH_PLAST_BUFFER (path);
				1228	ih = PATH_PITEM_HEAD (path);
				1229
				1230	if (!is_statdata_le_ih (ih))
				1231	reiserfs_panic (inode->i_sb, "vs-13065: update_stat_data: key %k, found item %h",
				1232	INODE_PKEY (inode), ih);
				1233
				1234	if (stat_data_v1 (ih)) {
				1235	// path points to old stat data
				1236	inode2sd_v1 (B_I_PITEM (bh, ih), inode, size);
				1237	} else {
				1238	inode2sd (B_I_PITEM (bh, ih), inode, size);
				1239	}
				1240
				1241	return;
				1242	}
				1243
				1244
				1245	void reiserfs_update_sd_size (struct reiserfs_transaction_handle *th,
				1246	struct inode * inode, loff_t size)
				1247	{
				1248	struct cpu_key key;
				1249	INITIALIZE_PATH(path);
				1250	struct buffer_head *bh ;
				1251	int fs_gen ;
				1252	struct item_head *ih, tmp_ih ;
				1253	int retval;
				1254
				1255	BUG_ON (!th->t_trans_id);
				1256
				1257	make_cpu_key (&key, inode, SD_OFFSET, TYPE_STAT_DATA, 3);//key type is unimportant
				1258
				1259	for(;;) {
				1260	int pos;
				1261	/* look for the object's stat data */
				1262	retval = search_item (inode->i_sb, &key, &path);
				1263	if (retval == IO_ERROR) {
				1264	reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: "
				1265	"i/o failure occurred trying to update %K stat data",
				1266	&key);
				1267	return;
				1268	}
				1269	if (retval == ITEM_NOT_FOUND) {
				1270	pos = PATH_LAST_POSITION (&path);
				1271	pathrelse(&path) ;
				1272	if (inode->i_nlink == 0) {
				1273	/reiserfs_warning (inode->i_sb, "vs-13050: reiserfs_update_sd: i_nlink == 0, stat data not found");/
				1274	return;
				1275	}
				1276	reiserfs_warning (inode->i_sb, "vs-13060: reiserfs_update_sd: "
				1277	"stat data of object %k (nlink == %d) not found (pos %d)",
				1278	INODE_PKEY (inode), inode->i_nlink, pos);
				1279	reiserfs_check_path(&path) ;
				1280	return;
				1281	}
				1282
				1283	/* sigh, prepare_for_journal might schedule. When it schedules the
				1284	** FS might change. We have to detect that, and loop back to the
				1285	** search if the stat data item has moved
				1286	*/
				1287	bh = get_last_bh(&path) ;
				1288	ih = get_ih(&path) ;
				1289	copy_item_head (&tmp_ih, ih);
				1290	fs_gen = get_generation (inode->i_sb);
				1291	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
				1292	if (fs_changed (fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
				1293	reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
				1294	continue ; /* Stat_data item has been moved after scheduling. */
				1295	}
				1296	break;
				1297	}
				1298	update_stat_data (&path, inode, size);
				1299	journal_mark_dirty(th, th->t_super, bh) ;
				1300	pathrelse (&path);
				1301	return;
				1302	}
				1303
				1304	/* reiserfs_read_locked_inode is called to read the inode off disk, and it
				1305	** does a make_bad_inode when things go wrong. But, we need to make sure
				1306	** and clear the key in the private portion of the inode, otherwise a
				1307	** corresponding iput might try to delete whatever object the inode last
				1308	** represented.
				1309	*/
				1310	static void reiserfs_make_bad_inode(struct inode *inode) {
				1311	memset(INODE_PKEY(inode), 0, KEY_SIZE);
				1312	make_bad_inode(inode);
				1313	}
				1314
				1315	//
				1316	// initially this function was derived from minix or ext2's analog and
				1317	// evolved as the prototype did
				1318	//
				1319
				1320	int reiserfs_init_locked_inode (struct inode * inode, void *p)
				1321	{
				1322	struct reiserfs_iget_args args = (struct reiserfs_iget_args )p ;
				1323	inode->i_ino = args->objectid;
				1324	INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid);
				1325	return 0;
				1326	}
				1327
				1328	/* looks for stat data in the tree, and fills up the fields of in-core
				1329	inode stat data fields */
				1330	void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args)
				1331	{
				1332	INITIALIZE_PATH (path_to_sd);
				1333	struct cpu_key key;
				1334	unsigned long dirino;
				1335	int retval;
				1336
				1337	dirino = args->dirid ;
				1338
				1339	/* set version 1, version 2 could be used too, because stat data
				1340	key is the same in both versions */
				1341	key.version = KEY_FORMAT_3_5;
				1342	key.on_disk_key.k_dir_id = dirino;
				1343	key.on_disk_key.k_objectid = inode->i_ino;
				1344	key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
				1345	key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
				1346
				1347	/* look for the object's stat data */
				1348	retval = search_item (inode->i_sb, &key, &path_to_sd);
				1349	if (retval == IO_ERROR) {
				1350	reiserfs_warning (inode->i_sb, "vs-13070: reiserfs_read_locked_inode: "
				1351	"i/o failure occurred trying to find stat data of %K",
				1352	&key);
				1353	reiserfs_make_bad_inode(inode) ;
				1354	return;
				1355	}
				1356	if (retval != ITEM_FOUND) {
				1357	/* a stale NFS handle can trigger this without it being an error */
				1358	pathrelse (&path_to_sd);
				1359	reiserfs_make_bad_inode(inode) ;
				1360	inode->i_nlink = 0;
				1361	return;
				1362	}
				1363
				1364	init_inode (inode, &path_to_sd);
				1365
				1366	/* It is possible that knfsd is trying to access inode of a file
				1367	that is being removed from the disk by some other thread. As we
				1368	update sd on unlink all that is required is to check for nlink
				1369	here. This bug was first found by Sizif when debugging
				1370	SquidNG/Butterfly, forgotten, and found again after Philippe
				1371	Gramoulle <philippe.gramoulle@mmania.com> reproduced it.
				1372
				1373	More logical fix would require changes in fs/inode.c:iput() to
				1374	remove inode from hash-table _after_ fs cleaned disk stuff up and
				1375	in iget() to return NULL if I_FREEING inode is found in
				1376	hash-table. */
				1377	/* Currently there is one place where it's ok to meet inode with
				1378	nlink==0: processing of open-unlinked and half-truncated files
				1379	during mount (fs/reiserfs/super.c:finish_unfinished()). */
				1380	if( ( inode -> i_nlink == 0 ) &&
				1381	! REISERFS_SB(inode -> i_sb) -> s_is_unlinked_ok ) {
				1382	reiserfs_warning (inode->i_sb,
				1383	"vs-13075: reiserfs_read_locked_inode: "
				1384	"dead inode read from disk %K. "
				1385	"This is likely to be race with knfsd. Ignore",
				1386	&key );
				1387	reiserfs_make_bad_inode( inode );
				1388	}
				1389
				1390	reiserfs_check_path(&path_to_sd) ; /* init inode should be relsing */
				1391
				1392	}
				1393
				1394	/**
				1395	* reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked().
				1396	*
				1397	* @inode: inode from hash table to check
				1398	* @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args.
				1399	*
				1400	* This function is called by iget5_locked() to distinguish reiserfs inodes
				1401	* having the same inode numbers. Such inodes can only exist due to some
				1402	* error condition. One of them should be bad. Inodes with identical
				1403	* inode numbers (objectids) are distinguished by parent directory ids.
				1404	*
				1405	*/
				1406	int reiserfs_find_actor( struct inode inode, void opaque )
				1407	{
				1408	struct reiserfs_iget_args *args;
				1409
				1410	args = opaque;
				1411	/* args is already in CPU order */
				1412	return (inode->i_ino == args->objectid) &&
				1413	(le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid);
				1414	}
				1415
				1416	struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key)
				1417	{
				1418	struct inode * inode;
				1419	struct reiserfs_iget_args args ;
				1420
				1421	args.objectid = key->on_disk_key.k_objectid ;
				1422	args.dirid = key->on_disk_key.k_dir_id ;
				1423	inode = iget5_locked (s, key->on_disk_key.k_objectid,
				1424	reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args));
				1425	if (!inode)
				1426	return ERR_PTR(-ENOMEM) ;
				1427
				1428	if (inode->i_state & I_NEW) {
				1429	reiserfs_read_locked_inode(inode, &args);
				1430	unlock_new_inode(inode);
				1431	}
				1432
				1433	if (comp_short_keys (INODE_PKEY (inode), key) \|\| is_bad_inode (inode)) {
				1434	/* either due to i/o error or a stale NFS handle */
				1435	iput (inode);
				1436	inode = NULL;
				1437	}
				1438	return inode;
				1439	}
				1440
				1441	struct dentry reiserfs_get_dentry(struct super_block sb, void *vobjp)
				1442	{
				1443	__u32 *data = vobjp;
				1444	struct cpu_key key ;
				1445	struct dentry *result;
				1446	struct inode *inode;
				1447
				1448	key.on_disk_key.k_objectid = data[0] ;
				1449	key.on_disk_key.k_dir_id = data[1] ;
				1450	reiserfs_write_lock(sb);
				1451	inode = reiserfs_iget(sb, &key) ;
				1452	if (inode && !IS_ERR(inode) && data[2] != 0 &&
				1453	data[2] != inode->i_generation) {
				1454	iput(inode) ;
				1455	inode = NULL ;
				1456	}
				1457	reiserfs_write_unlock(sb);
				1458	if (!inode)
				1459	inode = ERR_PTR(-ESTALE);
				1460	if (IS_ERR(inode))
				1461	return ERR_PTR(PTR_ERR(inode));
				1462	result = d_alloc_anon(inode);
				1463	if (!result) {
				1464	iput(inode);
				1465	return ERR_PTR(-ENOMEM);
				1466	}
				1467	return result;
				1468	}
				1469
				1470	struct dentry reiserfs_decode_fh(struct super_block sb, __u32 *data,
				1471	int len, int fhtype,
				1472	int (acceptable)(void contect, struct dentry *de),
				1473	void *context) {
				1474	__u32 obj[3], parent[3];
				1475
				1476	/* fhtype happens to reflect the number of u32s encoded.
				1477	* due to a bug in earlier code, fhtype might indicate there
				1478	* are more u32s then actually fitted.
				1479	* so if fhtype seems to be more than len, reduce fhtype.
				1480	* Valid types are:
				1481	* 2 - objectid + dir_id - legacy support
				1482	* 3 - objectid + dir_id + generation
				1483	* 4 - objectid + dir_id + objectid and dirid of parent - legacy
				1484	* 5 - objectid + dir_id + generation + objectid and dirid of parent
				1485	* 6 - as above plus generation of directory
				1486	* 6 does not fit in NFSv2 handles
				1487	*/
				1488	if (fhtype > len) {
				1489	if (fhtype != 6 \|\| len != 5)
				1490	reiserfs_warning (sb, "nfsd/reiserfs, fhtype=%d, len=%d - odd",
				1491	fhtype, len);
				1492	fhtype = 5;
				1493	}
				1494
				1495	obj[0] = data[0];
				1496	obj[1] = data[1];
				1497	if (fhtype == 3 \|\| fhtype >= 5)
				1498	obj[2] = data[2];
				1499	else obj[2] = 0; /* generation number */
				1500
				1501	if (fhtype >= 4) {
				1502	parent[0] = data[fhtype>=5?3:2] ;
				1503	parent[1] = data[fhtype>=5?4:3] ;
				1504	if (fhtype == 6)
				1505	parent[2] = data[5];
				1506	else parent[2] = 0;
				1507	}
				1508	return sb->s_export_op->find_exported_dentry(sb, obj, fhtype < 4 ? NULL : parent,
				1509	acceptable, context);
				1510	}
				1511
				1512	int reiserfs_encode_fh(struct dentry dentry, __u32 data, int *lenp, int need_parent) {
				1513	struct inode *inode = dentry->d_inode ;
				1514	int maxlen = *lenp;
				1515
				1516	if (maxlen < 3)
				1517	return 255 ;
				1518
				1519	data[0] = inode->i_ino ;
				1520	data[1] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
				1521	data[2] = inode->i_generation ;
				1522	*lenp = 3 ;
				1523	/* no room for directory info? return what we've stored so far */
				1524	if (maxlen < 5 \|\| ! need_parent)
				1525	return 3 ;
				1526
				1527	spin_lock(&dentry->d_lock);
				1528	inode = dentry->d_parent->d_inode ;
				1529	data[3] = inode->i_ino ;
				1530	data[4] = le32_to_cpu(INODE_PKEY (inode)->k_dir_id) ;
				1531	*lenp = 5 ;
				1532	if (maxlen >= 6) {
				1533	data[5] = inode->i_generation ;
				1534	*lenp = 6 ;
				1535	}
				1536	spin_unlock(&dentry->d_lock);
				1537	return *lenp ;
				1538	}
				1539
				1540
				1541	/* looks for stat data, then copies fields to it, marks the buffer
				1542	containing stat data as dirty */
				1543	/* reiserfs inodes are never really dirty, since the dirty inode call
				1544	** always logs them. This call allows the VFS inode marking routines
				1545	** to properly mark inodes for datasync and such, but only actually
				1546	** does something when called for a synchronous update.
				1547	*/
				1548	int reiserfs_write_inode (struct inode * inode, int do_sync) {
				1549	struct reiserfs_transaction_handle th ;
				1550	int jbegin_count = 1 ;
				1551
				1552	if (inode->i_sb->s_flags & MS_RDONLY)
				1553	return -EROFS;
				1554	/* memory pressure can sometimes initiate write_inode calls with sync == 1,
				1555	** these cases are just when the system needs ram, not when the
				1556	** inode needs to reach disk for safety, and they can safely be
				1557	** ignored because the altered inode has already been logged.
				1558	*/
				1559	if (do_sync && !(current->flags & PF_MEMALLOC)) {
				1560	reiserfs_write_lock(inode->i_sb);
				1561	if (!journal_begin(&th, inode->i_sb, jbegin_count)) {
				1562	reiserfs_update_sd (&th, inode);
				1563	journal_end_sync(&th, inode->i_sb, jbegin_count) ;
				1564	}
				1565	reiserfs_write_unlock(inode->i_sb);
				1566	}
				1567	return 0;
				1568	}
				1569
				1570	/* stat data of new object is inserted already, this inserts the item
				1571	containing "." and ".." entries */
				1572	static int reiserfs_new_directory (struct reiserfs_transaction_handle *th,
				1573	struct inode *inode,
				1574	struct item_head * ih, struct path * path,
				1575	struct inode * dir)
				1576	{
				1577	struct super_block * sb = th->t_super;
				1578	char empty_dir [EMPTY_DIR_SIZE];
				1579	char * body = empty_dir;
				1580	struct cpu_key key;
				1581	int retval;
				1582
				1583	BUG_ON (!th->t_trans_id);
				1584
				1585	_make_cpu_key (&key, KEY_FORMAT_3_5, le32_to_cpu (ih->ih_key.k_dir_id),
				1586	le32_to_cpu (ih->ih_key.k_objectid), DOT_OFFSET, TYPE_DIRENTRY, 3/key length/);
				1587
				1588	/* compose item head for new item. Directories consist of items of
				1589	old type (ITEM_VERSION_1). Do not set key (second arg is 0), it
				1590	is done by reiserfs_new_inode */
				1591	if (old_format_only (sb)) {
				1592	make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE_V1, 2);
				1593
				1594	make_empty_dir_item_v1 (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
				1595	INODE_PKEY (dir)->k_dir_id,
				1596	INODE_PKEY (dir)->k_objectid );
				1597	} else {
				1598	make_le_item_head (ih, NULL, KEY_FORMAT_3_5, DOT_OFFSET, TYPE_DIRENTRY, EMPTY_DIR_SIZE, 2);
				1599
				1600	make_empty_dir_item (body, ih->ih_key.k_dir_id, ih->ih_key.k_objectid,
				1601	INODE_PKEY (dir)->k_dir_id,
				1602	INODE_PKEY (dir)->k_objectid );
				1603	}
				1604
				1605	/* look for place in the tree for new item */
				1606	retval = search_item (sb, &key, path);
				1607	if (retval == IO_ERROR) {
				1608	reiserfs_warning (sb, "vs-13080: reiserfs_new_directory: "
				1609	"i/o failure occurred creating new directory");
				1610	return -EIO;
				1611	}
				1612	if (retval == ITEM_FOUND) {
				1613	pathrelse (path);
				1614	reiserfs_warning (sb, "vs-13070: reiserfs_new_directory: "
				1615	"object with this key exists (%k)", &(ih->ih_key));
				1616	return -EEXIST;
				1617	}
				1618
				1619	/* insert item, that is empty directory item */
				1620	return reiserfs_insert_item (th, path, &key, ih, inode, body);
				1621	}
				1622
				1623
				1624	/* stat data of object has been inserted, this inserts the item
				1625	containing the body of symlink */
				1626	static int reiserfs_new_symlink (struct reiserfs_transaction_handle *th,
				1627	struct inode inode, / Inode of symlink */
				1628	struct item_head * ih,
				1629	struct path * path, const char * symname, int item_len)
				1630	{
				1631	struct super_block * sb = th->t_super;
				1632	struct cpu_key key;
				1633	int retval;
				1634
				1635	BUG_ON (!th->t_trans_id);
				1636
				1637	_make_cpu_key (&key, KEY_FORMAT_3_5,
				1638	le32_to_cpu (ih->ih_key.k_dir_id),
				1639	le32_to_cpu (ih->ih_key.k_objectid),
				1640	1, TYPE_DIRECT, 3/key length/);
				1641
				1642	make_le_item_head (ih, NULL, KEY_FORMAT_3_5, 1, TYPE_DIRECT, item_len, 0/free_space/);
				1643
				1644	/* look for place in the tree for new item */
				1645	retval = search_item (sb, &key, path);
				1646	if (retval == IO_ERROR) {
				1647	reiserfs_warning (sb, "vs-13080: reiserfs_new_symlinik: "
				1648	"i/o failure occurred creating new symlink");
				1649	return -EIO;
				1650	}
				1651	if (retval == ITEM_FOUND) {
				1652	pathrelse (path);
				1653	reiserfs_warning (sb, "vs-13080: reiserfs_new_symlink: "
				1654	"object with this key exists (%k)", &(ih->ih_key));
				1655	return -EEXIST;
				1656	}
				1657
				1658	/* insert item, that is body of symlink */
				1659	return reiserfs_insert_item (th, path, &key, ih, inode, symname);
				1660	}
				1661
				1662
				1663	/* inserts the stat data into the tree, and then calls
				1664	reiserfs_new_directory (to insert ".", ".." item if new object is
				1665	directory) or reiserfs_new_symlink (to insert symlink body if new
				1666	object is symlink) or nothing (if new object is regular file)
				1667
				1668	NOTE! uid and gid must already be set in the inode. If we return
				1669	non-zero due to an error, we have to drop the quota previously allocated
				1670	for the fresh inode. This can only be done outside a transaction, so
				1671	if we return non-zero, we also end the transaction. */
				1672	int reiserfs_new_inode (struct reiserfs_transaction_handle *th,
				1673	struct inode * dir, int mode,
				1674	const char * symname,
				1675	/* 0 for regular, EMTRY_DIR_SIZE for dirs,
				1676	strlen (symname) for symlinks)*/
				1677	loff_t i_size, struct dentry *dentry,
				1678	struct inode *inode)
				1679	{
				1680	struct super_block * sb;
				1681	INITIALIZE_PATH (path_to_key);
				1682	struct cpu_key key;
				1683	struct item_head ih;
				1684	struct stat_data sd;
				1685	int retval;
				1686	int err;
				1687
				1688	BUG_ON (!th->t_trans_id);
				1689
				1690	if (DQUOT_ALLOC_INODE(inode)) {
				1691	err = -EDQUOT;
				1692	goto out_end_trans;
				1693	}
				1694	if (!dir \|\| !dir->i_nlink) {
				1695	err = -EPERM;
				1696	goto out_bad_inode;
				1697	}
				1698
				1699	sb = dir->i_sb;
				1700
				1701	/* item head of new item */
				1702	ih.ih_key.k_dir_id = reiserfs_choose_packing(dir);
				1703	ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
				1704	if (!ih.ih_key.k_objectid) {
				1705	err = -ENOMEM;
				1706	goto out_bad_inode ;
				1707	}
				1708	if (old_format_only (sb))
				1709	/* not a perfect generation count, as object ids can be reused, but
				1710	** this is as good as reiserfs can do right now.
				1711	** note that the private part of inode isn't filled in yet, we have
				1712	** to use the directory.
				1713	*/
				1714	inode->i_generation = le32_to_cpu (INODE_PKEY (dir)->k_objectid);
				1715	else
				1716	#if defined( USE_INODE_GENERATION_COUNTER )
				1717	inode->i_generation = le32_to_cpu(REISERFS_SB(sb)->s_rs->s_inode_generation);
				1718	#else
				1719	inode->i_generation = ++event;
				1720	#endif
				1721
				1722	/* fill stat data */
				1723	inode->i_nlink = (S_ISDIR (mode) ? 2 : 1);
				1724
				1725	/* uid and gid must already be set by the caller for quota init */
				1726
				1727	/* symlink cannot be immutable or append only, right? */
				1728	if( S_ISLNK( inode -> i_mode ) )
				1729	inode -> i_flags &= ~ ( S_IMMUTABLE \| S_APPEND );
				1730
				1731	inode->i_mtime = inode->i_atime = inode->i_ctime =
				1732	CURRENT_TIME_SEC;
				1733	inode->i_size = i_size;
				1734	inode->i_blocks = 0;
				1735	inode->i_bytes = 0;
				1736	REISERFS_I(inode)->i_first_direct_byte = S_ISLNK(mode) ? 1 :
				1737	U32_MAX/NO_BYTES_IN_DIRECT_ITEM/;
				1738
				1739	INIT_LIST_HEAD(&(REISERFS_I(inode)->i_prealloc_list ));
				1740	REISERFS_I(inode)->i_flags = 0;
				1741	REISERFS_I(inode)->i_prealloc_block = 0;
				1742	REISERFS_I(inode)->i_prealloc_count = 0;
				1743	REISERFS_I(inode)->i_trans_id = 0;
				1744	REISERFS_I(inode)->i_jl = NULL;
				1745	REISERFS_I(inode)->i_attrs =
				1746	REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK;
				1747	sd_attrs_to_i_attrs( REISERFS_I(inode) -> i_attrs, inode );
				1748	REISERFS_I(inode)->i_acl_access = NULL;
				1749	REISERFS_I(inode)->i_acl_default = NULL;
				1750	init_rwsem (&REISERFS_I(inode)->xattr_sem);
				1751
				1752	if (old_format_only (sb))
				1753	make_le_item_head (&ih, NULL, KEY_FORMAT_3_5, SD_OFFSET, TYPE_STAT_DATA, SD_V1_SIZE, MAX_US_INT);
				1754	else
				1755	make_le_item_head (&ih, NULL, KEY_FORMAT_3_6, SD_OFFSET, TYPE_STAT_DATA, SD_SIZE, MAX_US_INT);
				1756
				1757	/* key to search for correct place for new stat data */
				1758	_make_cpu_key (&key, KEY_FORMAT_3_6, le32_to_cpu (ih.ih_key.k_dir_id),
				1759	le32_to_cpu (ih.ih_key.k_objectid), SD_OFFSET, TYPE_STAT_DATA, 3/key length/);
				1760
				1761	/* find proper place for inserting of stat data */
				1762	retval = search_item (sb, &key, &path_to_key);
				1763	if (retval == IO_ERROR) {
				1764	err = -EIO;
				1765	goto out_bad_inode;
				1766	}
				1767	if (retval == ITEM_FOUND) {
				1768	pathrelse (&path_to_key);
				1769	err = -EEXIST;
				1770	goto out_bad_inode;
				1771	}
				1772	if (old_format_only (sb)) {
				1773	if (inode->i_uid & ~0xffff \|\| inode->i_gid & ~0xffff) {
				1774	pathrelse (&path_to_key);
				1775	/* i_uid or i_gid is too big to be stored in stat data v3.5 */
				1776	err = -EINVAL;
				1777	goto out_bad_inode;
				1778	}
				1779	inode2sd_v1 (&sd, inode, inode->i_size);
				1780	} else {
				1781	inode2sd (&sd, inode, inode->i_size);
				1782	}
				1783	// these do not go to on-disk stat data
				1784	inode->i_ino = le32_to_cpu (ih.ih_key.k_objectid);
				1785	inode->i_blksize = reiserfs_default_io_size;
				1786
				1787	// store in in-core inode the key of stat data and version all
				1788	// object items will have (directory items will have old offset
				1789	// format, other new objects will consist of new items)
				1790	memcpy (INODE_PKEY (inode), &(ih.ih_key), KEY_SIZE);
				1791	if (old_format_only (sb) \|\| S_ISDIR(mode) \|\| S_ISLNK(mode))
				1792	set_inode_item_key_version (inode, KEY_FORMAT_3_5);
				1793	else
				1794	set_inode_item_key_version (inode, KEY_FORMAT_3_6);
				1795	if (old_format_only (sb))
				1796	set_inode_sd_version (inode, STAT_DATA_V1);
				1797	else
				1798	set_inode_sd_version (inode, STAT_DATA_V2);
				1799
				1800	/* insert the stat data into the tree */
				1801	#ifdef DISPLACE_NEW_PACKING_LOCALITIES
				1802	if (REISERFS_I(dir)->new_packing_locality)
				1803	th->displace_new_blocks = 1;
				1804	#endif
				1805	retval = reiserfs_insert_item (th, &path_to_key, &key, &ih, inode, (char *)(&sd));
				1806	if (retval) {
				1807	err = retval;
				1808	reiserfs_check_path(&path_to_key) ;
				1809	goto out_bad_inode;
				1810	}
				1811
				1812	#ifdef DISPLACE_NEW_PACKING_LOCALITIES
				1813	if (!th->displace_new_blocks)
				1814	REISERFS_I(dir)->new_packing_locality = 0;
				1815	#endif
				1816	if (S_ISDIR(mode)) {
				1817	/* insert item with "." and ".." */
				1818	retval = reiserfs_new_directory (th, inode, &ih, &path_to_key, dir);
				1819	}
				1820
				1821	if (S_ISLNK(mode)) {
				1822	/* insert body of symlink */
				1823	if (!old_format_only (sb))
				1824	i_size = ROUND_UP(i_size);
				1825	retval = reiserfs_new_symlink (th, inode, &ih, &path_to_key, symname, i_size);
				1826	}
				1827	if (retval) {
				1828	err = retval;
				1829	reiserfs_check_path(&path_to_key) ;
				1830	journal_end(th, th->t_super, th->t_blocks_allocated);
				1831	goto out_inserted_sd;
				1832	}
				1833
				1834	/* XXX CHECK THIS */
				1835	if (reiserfs_posixacl (inode->i_sb)) {
				1836	retval = reiserfs_inherit_default_acl (dir, dentry, inode);
				1837	if (retval) {
				1838	err = retval;
				1839	reiserfs_check_path(&path_to_key) ;
				1840	journal_end(th, th->t_super, th->t_blocks_allocated);
				1841	goto out_inserted_sd;
				1842	}
				1843	} else if (inode->i_sb->s_flags & MS_POSIXACL) {
				1844	reiserfs_warning (inode->i_sb, "ACLs aren't enabled in the fs, "
				1845	"but vfs thinks they are!");
				1846	} else if (is_reiserfs_priv_object (dir)) {
				1847	reiserfs_mark_inode_private (inode);
				1848	}
				1849
				1850	insert_inode_hash (inode);
				1851	reiserfs_update_sd(th, inode);
				1852	reiserfs_check_path(&path_to_key) ;
				1853
				1854	return 0;
				1855
				1856	/* it looks like you can easily compress these two goto targets into
				1857	* one. Keeping it like this doesn't actually hurt anything, and they
				1858	* are place holders for what the quota code actually needs.
				1859	*/
				1860	out_bad_inode:
				1861	/* Invalidate the object, nothing was inserted yet */
				1862	INODE_PKEY(inode)->k_objectid = 0;
				1863
				1864	/* Quota change must be inside a transaction for journaling */
				1865	DQUOT_FREE_INODE(inode);
				1866
				1867	out_end_trans:
				1868	journal_end(th, th->t_super, th->t_blocks_allocated) ;
				1869	/* Drop can be outside and it needs more credits so it's better to have it outside */
				1870	DQUOT_DROP(inode);
				1871	inode->i_flags \|= S_NOQUOTA;
				1872	make_bad_inode(inode);
				1873
				1874	out_inserted_sd:
				1875	inode->i_nlink = 0;
				1876	th->t_trans_id = 0; /* so the caller can't use this handle later */
				1877	iput(inode);
				1878	return err;
				1879	}
				1880
				1881	/*
				1882	** finds the tail page in the page cache,
				1883	** reads the last block in.
				1884	**
				1885	** On success, page_result is set to a locked, pinned page, and bh_result
				1886	** is set to an up to date buffer for the last block in the file. returns 0.
				1887	**
				1888	** tail conversion is not done, so bh_result might not be valid for writing
				1889	** check buffer_mapped(bh_result) and bh_result->b_blocknr != 0 before
				1890	** trying to write the block.
				1891	**
				1892	** on failure, nonzero is returned, page_result and bh_result are untouched.
				1893	*/
				1894	static int grab_tail_page(struct inode *p_s_inode,
				1895	struct page **page_result,
				1896	struct buffer_head **bh_result) {
				1897
				1898	/* we want the page with the last byte in the file,
				1899	** not the page that will hold the next byte for appending
				1900	*/
				1901	unsigned long index = (p_s_inode->i_size-1) >> PAGE_CACHE_SHIFT ;
				1902	unsigned long pos = 0 ;
				1903	unsigned long start = 0 ;
				1904	unsigned long blocksize = p_s_inode->i_sb->s_blocksize ;
				1905	unsigned long offset = (p_s_inode->i_size) & (PAGE_CACHE_SIZE - 1) ;
				1906	struct buffer_head *bh ;
				1907	struct buffer_head *head ;
				1908	struct page * page ;
				1909	int error ;
				1910
				1911	/* we know that we are only called with inode->i_size > 0.
				1912	** we also know that a file tail can never be as big as a block
				1913	** If i_size % blocksize == 0, our file is currently block aligned
				1914	** and it won't need converting or zeroing after a truncate.
				1915	*/
				1916	if ((offset & (blocksize - 1)) == 0) {
				1917	return -ENOENT ;
				1918	}
				1919	page = grab_cache_page(p_s_inode->i_mapping, index) ;
				1920	error = -ENOMEM ;
				1921	if (!page) {
				1922	goto out ;
				1923	}
				1924	/* start within the page of the last block in the file */
				1925	start = (offset / blocksize) * blocksize ;
				1926
				1927	error = block_prepare_write(page, start, offset,
				1928	reiserfs_get_block_create_0) ;
				1929	if (error)
				1930	goto unlock ;
				1931
				1932	head = page_buffers(page) ;
				1933	bh = head;
				1934	do {
				1935	if (pos >= start) {
				1936	break ;
				1937	}
				1938	bh = bh->b_this_page ;
				1939	pos += blocksize ;
				1940	} while(bh != head) ;
				1941
				1942	if (!buffer_uptodate(bh)) {
				1943	/* note, this should never happen, prepare_write should
				1944	** be taking care of this for us. If the buffer isn't up to date,
				1945	** I've screwed up the code to find the buffer, or the code to
				1946	** call prepare_write
				1947	*/
				1948	reiserfs_warning (p_s_inode->i_sb,
				1949	"clm-6000: error reading block %lu on dev %s",
				1950	bh->b_blocknr,
				1951	reiserfs_bdevname (p_s_inode->i_sb)) ;
				1952	error = -EIO ;
				1953	goto unlock ;
				1954	}
				1955	*bh_result = bh ;
				1956	*page_result = page ;
				1957
				1958	out:
				1959	return error ;
				1960
				1961	unlock:
				1962	unlock_page(page) ;
				1963	page_cache_release(page) ;
				1964	return error ;
				1965	}
				1966
				1967	/*
				1968	** vfs version of truncate file. Must NOT be called with
				1969	** a transaction already started.
				1970	**
				1971	** some code taken from block_truncate_page
				1972	*/
				1973	int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
				1974	struct reiserfs_transaction_handle th ;
				1975	/* we want the offset for the first byte after the end of the file */
				1976	unsigned long offset = p_s_inode->i_size & (PAGE_CACHE_SIZE - 1) ;
				1977	unsigned blocksize = p_s_inode->i_sb->s_blocksize ;
				1978	unsigned length ;
				1979	struct page *page = NULL ;
				1980	int error ;
				1981	struct buffer_head *bh = NULL ;
				1982
				1983	reiserfs_write_lock(p_s_inode->i_sb);
				1984
				1985	if (p_s_inode->i_size > 0) {
				1986	if ((error = grab_tail_page(p_s_inode, &page, &bh))) {
				1987	// -ENOENT means we truncated past the end of the file,
				1988	// and get_block_create_0 could not find a block to read in,
				1989	// which is ok.
				1990	if (error != -ENOENT)
				1991	reiserfs_warning (p_s_inode->i_sb,
				1992	"clm-6001: grab_tail_page failed %d",
				1993	error);
				1994	page = NULL ;
				1995	bh = NULL ;
				1996	}
				1997	}
				1998
				1999	/* so, if page != NULL, we have a buffer head for the offset at
				2000	** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
				2001	** then we have an unformatted node. Otherwise, we have a direct item,
				2002	** and no zeroing is required on disk. We zero after the truncate,
				2003	** because the truncate might pack the item anyway
				2004	** (it will unmap bh if it packs).
				2005	*/
				2006	/* it is enough to reserve space in transaction for 2 balancings:
				2007	one for "save" link adding and another for the first
				2008	cut_from_item. 1 is for update_sd */
				2009	error = journal_begin (&th, p_s_inode->i_sb,
				2010	JOURNAL_PER_BALANCE_CNT * 2 + 1);
				2011	if (error)
				2012	goto out;
				2013	reiserfs_update_inode_transaction(p_s_inode) ;
				2014	if (update_timestamps)
				2015	/* we are doing real truncate: if the system crashes before the last
				2016	transaction of truncating gets committed - on reboot the file
				2017	either appears truncated properly or not truncated at all */
				2018	add_save_link (&th, p_s_inode, 1);
				2019	error = reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ;
				2020	if (error)
				2021	goto out;
				2022	error = journal_end (&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1);
				2023	if (error)
				2024	goto out;
				2025
				2026	if (update_timestamps) {
				2027	error = remove_save_link (p_s_inode, 1/* truncate */);
				2028	if (error)
				2029	goto out;
				2030	}
				2031
				2032	if (page) {
				2033	length = offset & (blocksize - 1) ;
				2034	/* if we are not on a block boundary */
				2035	if (length) {
				2036	char *kaddr;
				2037
				2038	length = blocksize - length ;
				2039	kaddr = kmap_atomic(page, KM_USER0) ;
				2040	memset(kaddr + offset, 0, length) ;
				2041	flush_dcache_page(page) ;
				2042	kunmap_atomic(kaddr, KM_USER0) ;
				2043	if (buffer_mapped(bh) && bh->b_blocknr != 0) {
				2044	mark_buffer_dirty(bh) ;
				2045	}
				2046	}
				2047	unlock_page(page) ;
				2048	page_cache_release(page) ;
				2049	}
				2050
				2051	reiserfs_write_unlock(p_s_inode->i_sb);
				2052	return 0;
				2053	out:
				2054	if (page) {
				2055	unlock_page (page);
				2056	page_cache_release (page);
				2057	}
				2058	reiserfs_write_unlock(p_s_inode->i_sb);
				2059	return error;
				2060	}
				2061
				2062	static int map_block_for_writepage(struct inode *inode,
				2063	struct buffer_head *bh_result,
				2064	unsigned long block) {
				2065	struct reiserfs_transaction_handle th ;
				2066	int fs_gen ;
				2067	struct item_head tmp_ih ;
				2068	struct item_head *ih ;
				2069	struct buffer_head *bh ;
Al Viro	3e8962b	2005-05-01 08:59:18 -0700	[diff] [blame^]	2070	__le32 *item ;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2071	struct cpu_key key ;
				2072	INITIALIZE_PATH(path) ;
				2073	int pos_in_item ;
				2074	int jbegin_count = JOURNAL_PER_BALANCE_CNT ;
				2075	loff_t byte_offset = (block << inode->i_sb->s_blocksize_bits) + 1 ;
				2076	int retval ;
				2077	int use_get_block = 0 ;
				2078	int bytes_copied = 0 ;
				2079	int copy_size ;
				2080	int trans_running = 0;
				2081
				2082	/* catch places below that try to log something without starting a trans */
				2083	th.t_trans_id = 0;
				2084
				2085	if (!buffer_uptodate(bh_result)) {
				2086	return -EIO;
				2087	}
				2088
				2089	kmap(bh_result->b_page) ;
				2090	start_over:
				2091	reiserfs_write_lock(inode->i_sb);
				2092	make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
				2093
				2094	research:
				2095	retval = search_for_position_by_key(inode->i_sb, &key, &path) ;
				2096	if (retval != POSITION_FOUND) {
				2097	use_get_block = 1;
				2098	goto out ;
				2099	}
				2100
				2101	bh = get_last_bh(&path) ;
				2102	ih = get_ih(&path) ;
				2103	item = get_item(&path) ;
				2104	pos_in_item = path.pos_in_item ;
				2105
				2106	/* we've found an unformatted node */
				2107	if (indirect_item_found(retval, ih)) {
				2108	if (bytes_copied > 0) {
				2109	reiserfs_warning (inode->i_sb, "clm-6002: bytes_copied %d",
				2110	bytes_copied) ;
				2111	}
				2112	if (!get_block_num(item, pos_in_item)) {
				2113	/* crap, we are writing to a hole */
				2114	use_get_block = 1;
				2115	goto out ;
				2116	}
				2117	set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
				2118	} else if (is_direct_le_ih(ih)) {
				2119	char *p ;
				2120	p = page_address(bh_result->b_page) ;
				2121	p += (byte_offset -1) & (PAGE_CACHE_SIZE - 1) ;
				2122	copy_size = ih_item_len(ih) - pos_in_item;
				2123
				2124	fs_gen = get_generation(inode->i_sb) ;
				2125	copy_item_head(&tmp_ih, ih) ;
				2126
				2127	if (!trans_running) {
				2128	/* vs-3050 is gone, no need to drop the path */
				2129	retval = journal_begin(&th, inode->i_sb, jbegin_count) ;
				2130	if (retval)
				2131	goto out;
				2132	reiserfs_update_inode_transaction(inode) ;
				2133	trans_running = 1;
				2134	if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
				2135	reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
				2136	goto research;
				2137	}
				2138	}
				2139
				2140	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
				2141
				2142	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
				2143	reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
				2144	goto research;
				2145	}
				2146
				2147	memcpy( B_I_PITEM(bh, ih) + pos_in_item, p + bytes_copied, copy_size) ;
				2148
				2149	journal_mark_dirty(&th, inode->i_sb, bh) ;
				2150	bytes_copied += copy_size ;
				2151	set_block_dev_mapped(bh_result, 0, inode);
				2152
				2153	/* are there still bytes left? */
				2154	if (bytes_copied < bh_result->b_size &&
				2155	(byte_offset + bytes_copied) < inode->i_size) {
				2156	set_cpu_key_k_offset(&key, cpu_key_k_offset(&key) + copy_size) ;
				2157	goto research ;
				2158	}
				2159	} else {
				2160	reiserfs_warning (inode->i_sb,
				2161	"clm-6003: bad item inode %lu, device %s",
				2162	inode->i_ino, reiserfs_bdevname (inode->i_sb)) ;
				2163	retval = -EIO ;
				2164	goto out ;
				2165	}
				2166	retval = 0 ;
				2167
				2168	out:
				2169	pathrelse(&path) ;
				2170	if (trans_running) {
				2171	int err = journal_end(&th, inode->i_sb, jbegin_count) ;
				2172	if (err)
				2173	retval = err;
				2174	trans_running = 0;
				2175	}
				2176	reiserfs_write_unlock(inode->i_sb);
				2177
				2178	/* this is where we fill in holes in the file. */
				2179	if (use_get_block) {
				2180	retval = reiserfs_get_block(inode, block, bh_result,
				2181	GET_BLOCK_CREATE \| GET_BLOCK_NO_ISEM \|
				2182	GET_BLOCK_NO_DANGLE);
				2183	if (!retval) {
				2184	if (!buffer_mapped(bh_result) \|\| bh_result->b_blocknr == 0) {
				2185	/* get_block failed to find a mapped unformatted node. */
				2186	use_get_block = 0 ;
				2187	goto start_over ;
				2188	}
				2189	}
				2190	}
				2191	kunmap(bh_result->b_page) ;
				2192
				2193	if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
				2194	/* we've copied data from the page into the direct item, so the
				2195	* buffer in the page is now clean, mark it to reflect that.
				2196	*/
				2197	lock_buffer(bh_result);
				2198	clear_buffer_dirty(bh_result);
				2199	unlock_buffer(bh_result);
				2200	}
				2201	return retval ;
				2202	}
				2203
				2204	/*
				2205	* mason@suse.com: updated in 2.5.54 to follow the same general io
				2206	* start/recovery path as __block_write_full_page, along with special
				2207	* code to handle reiserfs tails.
				2208	*/
				2209	static int reiserfs_write_full_page(struct page page, struct writeback_control wbc) {
				2210	struct inode *inode = page->mapping->host ;
				2211	unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
				2212	int error = 0;
				2213	unsigned long block ;
				2214	struct buffer_head head, bh;
				2215	int partial = 0 ;
				2216	int nr = 0;
				2217	int checked = PageChecked(page);
				2218	struct reiserfs_transaction_handle th;
				2219	struct super_block *s = inode->i_sb;
				2220	int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
				2221	th.t_trans_id = 0;
				2222
				2223	/* The page dirty bit is cleared before writepage is called, which
				2224	* means we have to tell create_empty_buffers to make dirty buffers
				2225	* The page really should be up to date at this point, so tossing
				2226	* in the BH_Uptodate is just a sanity check.
				2227	*/
				2228	if (!page_has_buffers(page)) {
				2229	create_empty_buffers(page, s->s_blocksize,
				2230	(1 << BH_Dirty) \| (1 << BH_Uptodate));
				2231	}
				2232	head = page_buffers(page) ;
				2233
				2234	/* last page in the file, zero out any contents past the
				2235	** last byte in the file
				2236	*/
				2237	if (page->index >= end_index) {
				2238	char *kaddr;
				2239	unsigned last_offset;
				2240
				2241	last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
				2242	/* no file contents in this page */
				2243	if (page->index >= end_index + 1 \|\| !last_offset) {
				2244	unlock_page(page);
				2245	return 0;
				2246	}
				2247	kaddr = kmap_atomic(page, KM_USER0);
				2248	memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE-last_offset) ;
				2249	flush_dcache_page(page) ;
				2250	kunmap_atomic(kaddr, KM_USER0) ;
				2251	}
				2252	bh = head ;
				2253	block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits) ;
				2254	/* first map all the buffers, logging any direct items we find */
				2255	do {
				2256	if ((checked \|\| buffer_dirty(bh)) && (!buffer_mapped(bh) \|\|
				2257	(buffer_mapped(bh) && bh->b_blocknr == 0))) {
				2258	/* not mapped yet, or it points to a direct item, search
				2259	* the btree for the mapping info, and log any direct
				2260	* items found
				2261	*/
				2262	if ((error = map_block_for_writepage(inode, bh, block))) {
				2263	goto fail ;
				2264	}
				2265	}
				2266	bh = bh->b_this_page;
				2267	block++;
				2268	} while(bh != head) ;
				2269
				2270	/*
				2271	* we start the transaction after map_block_for_writepage,
				2272	* because it can create holes in the file (an unbounded operation).
				2273	* starting it here, we can make a reliable estimate for how many
				2274	* blocks we're going to log
				2275	*/
				2276	if (checked) {
				2277	ClearPageChecked(page);
				2278	reiserfs_write_lock(s);
				2279	error = journal_begin(&th, s, bh_per_page + 1);
				2280	if (error) {
				2281	reiserfs_write_unlock(s);
				2282	goto fail;
				2283	}
				2284	reiserfs_update_inode_transaction(inode);
				2285	}
				2286	/* now go through and lock any dirty buffers on the page */
				2287	do {
				2288	get_bh(bh);
				2289	if (!buffer_mapped(bh))
				2290	continue;
				2291	if (buffer_mapped(bh) && bh->b_blocknr == 0)
				2292	continue;
				2293
				2294	if (checked) {
				2295	reiserfs_prepare_for_journal(s, bh, 1);
				2296	journal_mark_dirty(&th, s, bh);
				2297	continue;
				2298	}
				2299	/* from this point on, we know the buffer is mapped to a
				2300	* real block and not a direct item
				2301	*/
				2302	if (wbc->sync_mode != WB_SYNC_NONE \|\| !wbc->nonblocking) {
				2303	lock_buffer(bh);
				2304	} else {
				2305	if (test_set_buffer_locked(bh)) {
				2306	redirty_page_for_writepage(wbc, page);
				2307	continue;
				2308	}
				2309	}
				2310	if (test_clear_buffer_dirty(bh)) {
				2311	mark_buffer_async_write(bh);
				2312	} else {
				2313	unlock_buffer(bh);
				2314	}
				2315	} while((bh = bh->b_this_page) != head);
				2316
				2317	if (checked) {
				2318	error = journal_end(&th, s, bh_per_page + 1);
				2319	reiserfs_write_unlock(s);
				2320	if (error)
				2321	goto fail;
				2322	}
				2323	BUG_ON(PageWriteback(page));
				2324	set_page_writeback(page);
				2325	unlock_page(page);
				2326
				2327	/*
				2328	* since any buffer might be the only dirty buffer on the page,
				2329	* the first submit_bh can bring the page out of writeback.
				2330	* be careful with the buffers.
				2331	*/
				2332	do {
				2333	struct buffer_head *next = bh->b_this_page;
				2334	if (buffer_async_write(bh)) {
				2335	submit_bh(WRITE, bh);
				2336	nr++;
				2337	}
				2338	put_bh(bh);
				2339	bh = next;
				2340	} while(bh != head);
				2341
				2342	error = 0;
				2343	done:
				2344	if (nr == 0) {
				2345	/*
				2346	* if this page only had a direct item, it is very possible for
				2347	* no io to be required without there being an error. Or,
				2348	* someone else could have locked them and sent them down the
				2349	* pipe without locking the page
				2350	*/
				2351	bh = head ;
				2352	do {
				2353	if (!buffer_uptodate(bh)) {
				2354	partial = 1;
				2355	break;
				2356	}
				2357	bh = bh->b_this_page;
				2358	} while(bh != head);
				2359	if (!partial)
				2360	SetPageUptodate(page);
				2361	end_page_writeback(page);
				2362	}
				2363	return error;
				2364
				2365	fail:
				2366	/* catches various errors, we need to make sure any valid dirty blocks
				2367	* get to the media. The page is currently locked and not marked for
				2368	* writeback
				2369	*/
				2370	ClearPageUptodate(page);
				2371	bh = head;
				2372	do {
				2373	get_bh(bh);
				2374	if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
				2375	lock_buffer(bh);
				2376	mark_buffer_async_write(bh);
				2377	} else {
				2378	/*
				2379	* clear any dirty bits that might have come from getting
				2380	* attached to a dirty page
				2381	*/
				2382	clear_buffer_dirty(bh);
				2383	}
				2384	bh = bh->b_this_page;
				2385	} while(bh != head);
				2386	SetPageError(page);
				2387	BUG_ON(PageWriteback(page));
				2388	set_page_writeback(page);
				2389	unlock_page(page);
				2390	do {
				2391	struct buffer_head *next = bh->b_this_page;
				2392	if (buffer_async_write(bh)) {
				2393	clear_buffer_dirty(bh);
				2394	submit_bh(WRITE, bh);
				2395	nr++;
				2396	}
				2397	put_bh(bh);
				2398	bh = next;
				2399	} while(bh != head);
				2400	goto done;
				2401	}
				2402
				2403
				2404	static int reiserfs_readpage (struct file f, struct page page)
				2405	{
				2406	return block_read_full_page (page, reiserfs_get_block);
				2407	}
				2408
				2409
				2410	static int reiserfs_writepage (struct page * page, struct writeback_control *wbc)
				2411	{
				2412	struct inode *inode = page->mapping->host ;
				2413	reiserfs_wait_on_write_block(inode->i_sb) ;
				2414	return reiserfs_write_full_page(page, wbc) ;
				2415	}
				2416
				2417	static int reiserfs_prepare_write(struct file f, struct page page,
				2418	unsigned from, unsigned to) {
				2419	struct inode *inode = page->mapping->host ;
				2420	int ret;
				2421	int old_ref = 0;
				2422
				2423	reiserfs_wait_on_write_block(inode->i_sb) ;
				2424	fix_tail_page_for_writing(page) ;
				2425	if (reiserfs_transaction_running(inode->i_sb)) {
				2426	struct reiserfs_transaction_handle *th;
				2427	th = (struct reiserfs_transaction_handle *)current->journal_info;
				2428	BUG_ON (!th->t_refcount);
				2429	BUG_ON (!th->t_trans_id);
				2430	old_ref = th->t_refcount;
				2431	th->t_refcount++;
				2432	}
				2433
				2434	ret = block_prepare_write(page, from, to, reiserfs_get_block) ;
				2435	if (ret && reiserfs_transaction_running(inode->i_sb)) {
				2436	struct reiserfs_transaction_handle *th = current->journal_info;
				2437	/* this gets a little ugly. If reiserfs_get_block returned an
				2438	* error and left a transacstion running, we've got to close it,
				2439	* and we've got to free handle if it was a persistent transaction.
				2440	*
				2441	* But, if we had nested into an existing transaction, we need
				2442	* to just drop the ref count on the handle.
				2443	*
				2444	* If old_ref == 0, the transaction is from reiserfs_get_block,
				2445	* and it was a persistent trans. Otherwise, it was nested above.
				2446	*/
				2447	if (th->t_refcount > old_ref) {
				2448	if (old_ref)
				2449	th->t_refcount--;
				2450	else {
				2451	int err;
				2452	reiserfs_write_lock(inode->i_sb);
				2453	err = reiserfs_end_persistent_transaction(th);
				2454	reiserfs_write_unlock(inode->i_sb);
				2455	if (err)
				2456	ret = err;
				2457	}
				2458	}
				2459	}
				2460	return ret;
				2461
				2462	}
				2463
				2464
				2465	static sector_t reiserfs_aop_bmap(struct address_space *as, sector_t block) {
				2466	return generic_block_bmap(as, block, reiserfs_bmap) ;
				2467	}
				2468
				2469	static int reiserfs_commit_write(struct file f, struct page page,
				2470	unsigned from, unsigned to) {
				2471	struct inode *inode = page->mapping->host ;
				2472	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
				2473	int ret = 0;
				2474	int update_sd = 0;
				2475	struct reiserfs_transaction_handle *th = NULL;
				2476
				2477	reiserfs_wait_on_write_block(inode->i_sb) ;
				2478	if (reiserfs_transaction_running(inode->i_sb)) {
				2479	th = current->journal_info;
				2480	}
				2481	reiserfs_commit_page(inode, page, from, to);
				2482
				2483	/* generic_commit_write does this for us, but does not update the
				2484	** transaction tracking stuff when the size changes. So, we have
				2485	** to do the i_size updates here.
				2486	*/
				2487	if (pos > inode->i_size) {
				2488	struct reiserfs_transaction_handle myth ;
				2489	reiserfs_write_lock(inode->i_sb);
				2490	/* If the file have grown beyond the border where it
				2491	can have a tail, unmark it as needing a tail
				2492	packing */
				2493	if ( (have_large_tails (inode->i_sb) && inode->i_size > i_block_size (inode)*4) \|\|
				2494	(have_small_tails (inode->i_sb) && inode->i_size > i_block_size(inode)) )
				2495	REISERFS_I(inode)->i_flags &= ~i_pack_on_close_mask ;
				2496
				2497	ret = journal_begin(&myth, inode->i_sb, 1) ;
				2498	if (ret) {
				2499	reiserfs_write_unlock(inode->i_sb);
				2500	goto journal_error;
				2501	}
				2502	reiserfs_update_inode_transaction(inode) ;
				2503	inode->i_size = pos ;
				2504	reiserfs_update_sd(&myth, inode) ;
				2505	update_sd = 1;
				2506	ret = journal_end(&myth, inode->i_sb, 1) ;
				2507	reiserfs_write_unlock(inode->i_sb);
				2508	if (ret)
				2509	goto journal_error;
				2510	}
				2511	if (th) {
				2512	reiserfs_write_lock(inode->i_sb);
				2513	if (!update_sd)
				2514	reiserfs_update_sd(th, inode) ;
				2515	ret = reiserfs_end_persistent_transaction(th);
				2516	reiserfs_write_unlock(inode->i_sb);
				2517	if (ret)
				2518	goto out;
				2519	}
				2520
				2521	/* we test for O_SYNC here so we can commit the transaction
				2522	** for any packed tails the file might have had
				2523	*/
				2524	if (f && (f->f_flags & O_SYNC)) {
				2525	reiserfs_write_lock(inode->i_sb);
				2526	ret = reiserfs_commit_for_inode(inode) ;
				2527	reiserfs_write_unlock(inode->i_sb);
				2528	}
				2529	out:
				2530	return ret ;
				2531
				2532	journal_error:
				2533	if (th) {
				2534	reiserfs_write_lock(inode->i_sb);
				2535	if (!update_sd)
				2536	reiserfs_update_sd(th, inode) ;
				2537	ret = reiserfs_end_persistent_transaction(th);
				2538	reiserfs_write_unlock(inode->i_sb);
				2539	}
				2540
				2541	return ret;
				2542	}
				2543
				2544	void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode )
				2545	{
				2546	if( reiserfs_attrs( inode -> i_sb ) ) {
				2547	if( sd_attrs & REISERFS_SYNC_FL )
				2548	inode -> i_flags \|= S_SYNC;
				2549	else
				2550	inode -> i_flags &= ~S_SYNC;
				2551	if( sd_attrs & REISERFS_IMMUTABLE_FL )
				2552	inode -> i_flags \|= S_IMMUTABLE;
				2553	else
				2554	inode -> i_flags &= ~S_IMMUTABLE;
				2555	if( sd_attrs & REISERFS_APPEND_FL )
				2556	inode -> i_flags \|= S_APPEND;
				2557	else
				2558	inode -> i_flags &= ~S_APPEND;
				2559	if( sd_attrs & REISERFS_NOATIME_FL )
				2560	inode -> i_flags \|= S_NOATIME;
				2561	else
				2562	inode -> i_flags &= ~S_NOATIME;
				2563	if( sd_attrs & REISERFS_NOTAIL_FL )
				2564	REISERFS_I(inode)->i_flags \|= i_nopack_mask;
				2565	else
				2566	REISERFS_I(inode)->i_flags &= ~i_nopack_mask;
				2567	}
				2568	}
				2569
				2570	void i_attrs_to_sd_attrs( struct inode inode, __u16 sd_attrs )
				2571	{
				2572	if( reiserfs_attrs( inode -> i_sb ) ) {
				2573	if( inode -> i_flags & S_IMMUTABLE )
				2574	*sd_attrs \|= REISERFS_IMMUTABLE_FL;
				2575	else
				2576	*sd_attrs &= ~REISERFS_IMMUTABLE_FL;
				2577	if( inode -> i_flags & S_SYNC )
				2578	*sd_attrs \|= REISERFS_SYNC_FL;
				2579	else
				2580	*sd_attrs &= ~REISERFS_SYNC_FL;
				2581	if( inode -> i_flags & S_NOATIME )
				2582	*sd_attrs \|= REISERFS_NOATIME_FL;
				2583	else
				2584	*sd_attrs &= ~REISERFS_NOATIME_FL;
				2585	if( REISERFS_I(inode)->i_flags & i_nopack_mask )
				2586	*sd_attrs \|= REISERFS_NOTAIL_FL;
				2587	else
				2588	*sd_attrs &= ~REISERFS_NOTAIL_FL;
				2589	}
				2590	}
				2591
				2592	/* decide if this buffer needs to stay around for data logging or ordered
				2593	** write purposes
				2594	*/
				2595	static int invalidatepage_can_drop(struct inode inode, struct buffer_head bh)
				2596	{
				2597	int ret = 1 ;
				2598	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
				2599
				2600	spin_lock(&j->j_dirty_buffers_lock) ;
				2601	if (!buffer_mapped(bh)) {
				2602	goto free_jh;
				2603	}
				2604	/* the page is locked, and the only places that log a data buffer
				2605	* also lock the page.
				2606	*/
				2607	if (reiserfs_file_data_log(inode)) {
				2608	/*
				2609	* very conservative, leave the buffer pinned if
				2610	* anyone might need it.
				2611	*/
				2612	if (buffer_journaled(bh) \|\| buffer_journal_dirty(bh)) {
				2613	ret = 0 ;
				2614	}
				2615	} else
				2616	if (buffer_dirty(bh) \|\| buffer_locked(bh)) {
				2617	struct reiserfs_journal_list *jl;
				2618	struct reiserfs_jh *jh = bh->b_private;
				2619
				2620	/* why is this safe?
				2621	* reiserfs_setattr updates i_size in the on disk
				2622	* stat data before allowing vmtruncate to be called.
				2623	*
				2624	* If buffer was put onto the ordered list for this
				2625	* transaction, we know for sure either this transaction
				2626	* or an older one already has updated i_size on disk,
				2627	* and this ordered data won't be referenced in the file
				2628	* if we crash.
				2629	*
				2630	* if the buffer was put onto the ordered list for an older
				2631	* transaction, we need to leave it around
				2632	*/
				2633	if (jh && (jl = jh->jl) && jl != SB_JOURNAL(inode->i_sb)->j_current_jl)
				2634	ret = 0;
				2635	}
				2636	free_jh:
				2637	if (ret && bh->b_private) {
				2638	reiserfs_free_jh(bh);
				2639	}
				2640	spin_unlock(&j->j_dirty_buffers_lock) ;
				2641	return ret ;
				2642	}
				2643
				2644	/* clm -- taken from fs/buffer.c:block_invalidate_page */
				2645	static int reiserfs_invalidatepage(struct page *page, unsigned long offset)
				2646	{
				2647	struct buffer_head head, bh, *next;
				2648	struct inode *inode = page->mapping->host;
				2649	unsigned int curr_off = 0;
				2650	int ret = 1;
				2651
				2652	BUG_ON(!PageLocked(page));
				2653
				2654	if (offset == 0)
				2655	ClearPageChecked(page);
				2656
				2657	if (!page_has_buffers(page))
				2658	goto out;
				2659
				2660	head = page_buffers(page);
				2661	bh = head;
				2662	do {
				2663	unsigned int next_off = curr_off + bh->b_size;
				2664	next = bh->b_this_page;
				2665
				2666	/*
				2667	* is this block fully invalidated?
				2668	*/
				2669	if (offset <= curr_off) {
				2670	if (invalidatepage_can_drop(inode, bh))
				2671	reiserfs_unmap_buffer(bh);
				2672	else
				2673	ret = 0;
				2674	}
				2675	curr_off = next_off;
				2676	bh = next;
				2677	} while (bh != head);
				2678
				2679	/*
				2680	* We release buffers only if the entire page is being invalidated.
				2681	* The get_block cached value has been unconditionally invalidated,
				2682	* so real IO is not possible anymore.
				2683	*/
				2684	if (!offset && ret)
				2685	ret = try_to_release_page(page, 0);
				2686	out:
				2687	return ret;
				2688	}
				2689
				2690	static int reiserfs_set_page_dirty(struct page *page) {
				2691	struct inode *inode = page->mapping->host;
				2692	if (reiserfs_file_data_log(inode)) {
				2693	SetPageChecked(page);
				2694	return __set_page_dirty_nobuffers(page);
				2695	}
				2696	return __set_page_dirty_buffers(page);
				2697	}
				2698
				2699	/*
				2700	* Returns 1 if the page's buffers were dropped. The page is locked.
				2701	*
				2702	* Takes j_dirty_buffers_lock to protect the b_assoc_buffers list_heads
				2703	* in the buffers at page_buffers(page).
				2704	*
				2705	* even in -o notail mode, we can't be sure an old mount without -o notail
				2706	* didn't create files with tails.
				2707	*/
				2708	static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
				2709	{
				2710	struct inode *inode = page->mapping->host ;
				2711	struct reiserfs_journal *j = SB_JOURNAL(inode->i_sb) ;
				2712	struct buffer_head *head ;
				2713	struct buffer_head *bh ;
				2714	int ret = 1 ;
				2715
				2716	WARN_ON(PageChecked(page));
				2717	spin_lock(&j->j_dirty_buffers_lock) ;
				2718	head = page_buffers(page) ;
				2719	bh = head ;
				2720	do {
				2721	if (bh->b_private) {
				2722	if (!buffer_dirty(bh) && !buffer_locked(bh)) {
				2723	reiserfs_free_jh(bh);
				2724	} else {
				2725	ret = 0 ;
				2726	break ;
				2727	}
				2728	}
				2729	bh = bh->b_this_page ;
				2730	} while (bh != head) ;
				2731	if (ret)
				2732	ret = try_to_free_buffers(page) ;
				2733	spin_unlock(&j->j_dirty_buffers_lock) ;
				2734	return ret ;
				2735	}
				2736
				2737	/* We thank Mingming Cao for helping us understand in great detail what
				2738	to do in this section of the code. */
				2739	static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
				2740	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
				2741	{
				2742	struct file *file = iocb->ki_filp;
				2743	struct inode *inode = file->f_mapping->host;
				2744
				2745	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
				2746	offset, nr_segs, reiserfs_get_blocks_direct_io, NULL);
				2747	}
				2748
				2749	int reiserfs_setattr(struct dentry dentry, struct iattr attr) {
				2750	struct inode *inode = dentry->d_inode ;
				2751	int error ;
				2752	unsigned int ia_valid = attr->ia_valid;
				2753	reiserfs_write_lock(inode->i_sb);
				2754	if (attr->ia_valid & ATTR_SIZE) {
				2755	/* version 2 items will be caught by the s_maxbytes check
				2756	** done for us in vmtruncate
				2757	*/
				2758	if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 &&
				2759	attr->ia_size > MAX_NON_LFS) {
				2760	error = -EFBIG ;
				2761	goto out;
				2762	}
				2763	/* fill in hole pointers in the expanding truncate case. */
				2764	if (attr->ia_size > inode->i_size) {
				2765	error = generic_cont_expand(inode, attr->ia_size) ;
				2766	if (REISERFS_I(inode)->i_prealloc_count > 0) {
				2767	int err;
				2768	struct reiserfs_transaction_handle th ;
				2769	/* we're changing at most 2 bitmaps, inode + super */
				2770	err = journal_begin(&th, inode->i_sb, 4) ;
				2771	if (!err) {
				2772	reiserfs_discard_prealloc (&th, inode);
				2773	err = journal_end(&th, inode->i_sb, 4) ;
				2774	}
				2775	if (err)
				2776	error = err;
				2777	}
				2778	if (error)
				2779	goto out;
				2780	}
				2781	}
				2782
				2783	if ((((attr->ia_valid & ATTR_UID) && (attr->ia_uid & ~0xffff)) \|\|
				2784	((attr->ia_valid & ATTR_GID) && (attr->ia_gid & ~0xffff))) &&
				2785	(get_inode_sd_version (inode) == STAT_DATA_V1)) {
				2786	/* stat data of format v3.5 has 16 bit uid and gid */
				2787	error = -EINVAL;
				2788	goto out;
				2789	}
				2790
				2791	error = inode_change_ok(inode, attr) ;
				2792	if (!error) {
				2793	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) \|\|
				2794	(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
				2795	error = reiserfs_chown_xattrs (inode, attr);
				2796
				2797	if (!error) {
				2798	struct reiserfs_transaction_handle th;
				2799
				2800	/* (user+group)(old+new) structure - we count quota info and , inode write (sb, inode) /
				2801	journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
				2802	error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
				2803	if (error) {
				2804	journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
				2805	goto out;
				2806	}
				2807	/* Update corresponding info in inode so that everything is in
				2808	* one transaction */
				2809	if (attr->ia_valid & ATTR_UID)
				2810	inode->i_uid = attr->ia_uid;
				2811	if (attr->ia_valid & ATTR_GID)
				2812	inode->i_gid = attr->ia_gid;
				2813	mark_inode_dirty(inode);
				2814	journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
				2815	}
				2816	}
				2817	if (!error)
				2818	error = inode_setattr(inode, attr) ;
				2819	}
				2820
				2821
				2822	if (!error && reiserfs_posixacl (inode->i_sb)) {
				2823	if (attr->ia_valid & ATTR_MODE)
				2824	error = reiserfs_acl_chmod (inode);
				2825	}
				2826
				2827	out:
				2828	reiserfs_write_unlock(inode->i_sb);
				2829	return error ;
				2830	}
				2831
				2832
				2833
				2834	struct address_space_operations reiserfs_address_space_operations = {
				2835	.writepage = reiserfs_writepage,
				2836	.readpage = reiserfs_readpage,
				2837	.readpages = reiserfs_readpages,
				2838	.releasepage = reiserfs_releasepage,
				2839	.invalidatepage = reiserfs_invalidatepage,
				2840	.sync_page = block_sync_page,
				2841	.prepare_write = reiserfs_prepare_write,
				2842	.commit_write = reiserfs_commit_write,
				2843	.bmap = reiserfs_aop_bmap,
				2844	.direct_IO = reiserfs_direct_IO,
				2845	.set_page_dirty = reiserfs_set_page_dirty,
				2846	} ;