Blame - fs/ntfs/aops.c - kernel/msm-4.9

blob: 2b4b8b9e8796f22d4dcabcc58c7061f68a71b433 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	5	* Copyright (c) 2001-2005 Anton Altaparmakov
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/mm.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/swap.h>
				28	#include <linux/buffer_head.h>
				29	#include <linux/writeback.h>
				30
				31	#include "aops.h"
				32	#include "attrib.h"
				33	#include "debug.h"
				34	#include "inode.h"
				35	#include "mft.h"
				36	#include "runlist.h"
				37	#include "types.h"
				38	#include "ntfs.h"
				39
				40	/**
				41	* ntfs_end_buffer_async_read - async io completion for reading attributes
				42	* @bh: buffer head on which io is completed
				43	* @uptodate: whether @bh is now uptodate or not
				44	*
				45	* Asynchronous I/O completion handler for reading pages belonging to the
				46	* attribute address space of an inode. The inodes can either be files or
				47	* directories or they can be fake inodes describing some attribute.
				48	*
				49	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				50	* page has been completed and mark the page uptodate or set the error bit on
				51	* the page. To determine the size of the records that need fixing up, we
				52	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				53	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				54	* record size.
				55	*/
				56	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				57	{
				58	static DEFINE_SPINLOCK(page_uptodate_lock);
				59	unsigned long flags;
				60	struct buffer_head *tmp;
				61	struct page *page;
				62	ntfs_inode *ni;
				63	int page_uptodate = 1;
				64
				65	page = bh->b_page;
				66	ni = NTFS_I(page->mapping->host);
				67
				68	if (likely(uptodate)) {
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	69	s64 file_ofs, initialized_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	70
				71	set_buffer_uptodate(bh);
				72
				73	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				74	bh_offset(bh);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	75	read_lock_irqsave(&ni->size_lock, flags);
				76	initialized_size = ni->initialized_size;
				77	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	78	/* Check for the current buffer head overflowing. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	79	if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	char *addr;
				81	int ofs = 0;
				82
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	83	if (file_ofs < initialized_size)
				84	ofs = initialized_size - file_ofs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	85	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				86	memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
				87	flush_dcache_page(page);
				88	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				89	}
				90	} else {
				91	clear_buffer_uptodate(bh);
				92	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
				93	(unsigned long long)bh->b_blocknr);
				94	SetPageError(page);
				95	}
				96	spin_lock_irqsave(&page_uptodate_lock, flags);
				97	clear_buffer_async_read(bh);
				98	unlock_buffer(bh);
				99	tmp = bh;
				100	do {
				101	if (!buffer_uptodate(tmp))
				102	page_uptodate = 0;
				103	if (buffer_async_read(tmp)) {
				104	if (likely(buffer_locked(tmp)))
				105	goto still_busy;
				106	/* Async buffers must be locked. */
				107	BUG();
				108	}
				109	tmp = tmp->b_this_page;
				110	} while (tmp != bh);
				111	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				112	/*
				113	* If none of the buffers had errors then we can set the page uptodate,
				114	* but we first have to perform the post read mst fixups, if the
				115	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				116	* Note we ignore fixup errors as those are detected when
				117	* map_mft_record() is called which gives us per record granularity
				118	* rather than per page granularity.
				119	*/
				120	if (!NInoMstProtected(ni)) {
				121	if (likely(page_uptodate && !PageError(page)))
				122	SetPageUptodate(page);
				123	} else {
				124	char *addr;
				125	unsigned int i, recs;
				126	u32 rec_size;
				127
				128	rec_size = ni->itype.index.block_size;
				129	recs = PAGE_CACHE_SIZE / rec_size;
				130	/* Should have been verified before we got here... */
				131	BUG_ON(!recs);
				132	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				133	for (i = 0; i < recs; i++)
				134	post_read_mst_fixup((NTFS_RECORD*)(addr +
				135	i * rec_size), rec_size);
				136	flush_dcache_page(page);
				137	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	138	if (likely(page_uptodate && !PageError(page)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	SetPageUptodate(page);
				140	}
				141	unlock_page(page);
				142	return;
				143	still_busy:
				144	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				145	return;
				146	}
				147
				148	/**
				149	* ntfs_read_block - fill a @page of an address space with data
				150	* @page: page cache page to fill with data
				151	*
				152	* Fill the page @page of the address space belonging to the @page->host inode.
				153	* We read each buffer asynchronously and when all buffers are read in, our io
				154	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				155	* applies the mst fixups to the page before finally marking it uptodate and
				156	* unlocking it.
				157	*
				158	* We only enforce allocated_size limit because i_size is checked for in
				159	* generic_file_read().
				160	*
				161	* Return 0 on success and -errno on error.
				162	*
				163	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				164	*/
				165	static int ntfs_read_block(struct page *page)
				166	{
				167	VCN vcn;
				168	LCN lcn;
				169	ntfs_inode *ni;
				170	ntfs_volume *vol;
				171	runlist_element *rl;
				172	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				173	sector_t iblock, lblock, zblock;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	174	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175	unsigned int blocksize, vcn_ofs;
				176	int i, nr;
				177	unsigned char blocksize_bits;
				178
				179	ni = NTFS_I(page->mapping->host);
				180	vol = ni->vol;
				181
				182	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				183	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				184
				185	blocksize_bits = VFS_I(ni)->i_blkbits;
				186	blocksize = 1 << blocksize_bits;
				187
				188	if (!page_has_buffers(page))
				189	create_empty_buffers(page, blocksize, 0);
				190	bh = head = page_buffers(page);
				191	if (unlikely(!bh)) {
				192	unlock_page(page);
				193	return -ENOMEM;
				194	}
				195
				196	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	197	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				199	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	200	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201
				202	/* Loop through all the buffers in the page. */
				203	rl = NULL;
				204	nr = i = 0;
				205	do {
				206	u8 *kaddr;
				207
				208	if (unlikely(buffer_uptodate(bh)))
				209	continue;
				210	if (unlikely(buffer_mapped(bh))) {
				211	arr[nr++] = bh;
				212	continue;
				213	}
				214	bh->b_bdev = vol->sb->s_bdev;
				215	/* Is the block within the allowed limits? */
				216	if (iblock < lblock) {
				217	BOOL is_retry = FALSE;
				218
				219	/* Convert iblock into corresponding vcn and offset. */
				220	vcn = (VCN)iblock << blocksize_bits >>
				221	vol->cluster_size_bits;
				222	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				223	vol->cluster_size_mask;
				224	if (!rl) {
				225	lock_retry_remap:
				226	down_read(&ni->runlist.lock);
				227	rl = ni->runlist.rl;
				228	}
				229	if (likely(rl != NULL)) {
				230	/* Seek to element containing target vcn. */
				231	while (rl->length && rl[1].vcn <= vcn)
				232	rl++;
				233	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				234	} else
				235	lcn = LCN_RL_NOT_MAPPED;
				236	/* Successful remap. */
				237	if (lcn >= 0) {
				238	/* Setup buffer head to correct block. */
				239	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				240	+ vcn_ofs) >> blocksize_bits;
				241	set_buffer_mapped(bh);
				242	/* Only read initialized data blocks. */
				243	if (iblock < zblock) {
				244	arr[nr++] = bh;
				245	continue;
				246	}
				247	/* Fully non-initialized data block, zero it. */
				248	goto handle_zblock;
				249	}
				250	/* It is a hole, need to zero it. */
				251	if (lcn == LCN_HOLE)
				252	goto handle_hole;
				253	/* If first try and runlist unmapped, map and retry. */
				254	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				255	int err;
				256	is_retry = TRUE;
				257	/*
				258	* Attempt to map runlist, dropping lock for
				259	* the duration.
				260	*/
				261	up_read(&ni->runlist.lock);
				262	err = ntfs_map_runlist(ni, vcn);
				263	if (likely(!err))
				264	goto lock_retry_remap;
				265	rl = NULL;
				266	lcn = err;
				267	}
				268	/* Hard error, zero out region. */
				269	bh->b_blocknr = -1;
				270	SetPageError(page);
				271	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				272	"attribute type 0x%x, vcn 0x%llx, "
				273	"offset 0x%x because its location on "
				274	"disk could not be determined%s "
				275	"(error code %lli).", ni->mft_no,
				276	ni->type, (unsigned long long)vcn,
				277	vcn_ofs, is_retry ? " even after "
				278	"retrying" : "", (long long)lcn);
				279	}
				280	/*
				281	* Either iblock was outside lblock limits or
				282	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				283	* of the page and set the buffer uptodate.
				284	*/
				285	handle_hole:
				286	bh->b_blocknr = -1UL;
				287	clear_buffer_mapped(bh);
				288	handle_zblock:
				289	kaddr = kmap_atomic(page, KM_USER0);
				290	memset(kaddr + i * blocksize, 0, blocksize);
				291	flush_dcache_page(page);
				292	kunmap_atomic(kaddr, KM_USER0);
				293	set_buffer_uptodate(bh);
				294	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				295
				296	/* Release the lock if we took it. */
				297	if (rl)
				298	up_read(&ni->runlist.lock);
				299
				300	/* Check we have at least one buffer ready for i/o. */
				301	if (nr) {
				302	struct buffer_head *tbh;
				303
				304	/* Lock the buffers. */
				305	for (i = 0; i < nr; i++) {
				306	tbh = arr[i];
				307	lock_buffer(tbh);
				308	tbh->b_end_io = ntfs_end_buffer_async_read;
				309	set_buffer_async_read(tbh);
				310	}
				311	/* Finally, start i/o on the buffers. */
				312	for (i = 0; i < nr; i++) {
				313	tbh = arr[i];
				314	if (likely(!buffer_uptodate(tbh)))
				315	submit_bh(READ, tbh);
				316	else
				317	ntfs_end_buffer_async_read(tbh, 1);
				318	}
				319	return 0;
				320	}
				321	/* No i/o was scheduled on any of the buffers. */
				322	if (likely(!PageError(page)))
				323	SetPageUptodate(page);
				324	else /* Signal synchronous i/o error. */
				325	nr = -EIO;
				326	unlock_page(page);
				327	return nr;
				328	}
				329
				330	/**
				331	* ntfs_readpage - fill a @page of a @file with data from the device
				332	* @file: open file to which the page @page belongs or NULL
				333	* @page: page cache page to fill with data
				334	*
				335	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				336	* file @file by calling the ntfs version of the generic block_read_full_page()
				337	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				338	* associated with the page asynchronously.
				339	*
				340	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				341	* data from the mft record (which at this stage is most likely in memory) and
				342	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				343	* even if the mft record is not cached at this point in time, we need to wait
				344	* for it to be read in before we can do the copy.
				345	*
				346	* Return 0 on success and -errno on error.
				347	*/
				348	static int ntfs_readpage(struct file file, struct page page)
				349	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	ntfs_inode ni, base_ni;
				351	u8 *kaddr;
				352	ntfs_attr_search_ctx *ctx;
				353	MFT_RECORD *mrec;
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	354	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	355	u32 attr_len;
				356	int err = 0;
				357
				358	BUG_ON(!PageLocked(page));
				359	/*
				360	* This can potentially happen because we clear PageUptodate() during
				361	* ntfs_writepage() of MstProtected() attributes.
				362	*/
				363	if (PageUptodate(page)) {
				364	unlock_page(page);
				365	return 0;
				366	}
				367	ni = NTFS_I(page->mapping->host);
				368
				369	/* NInoNonResident() == NInoIndexAllocPresent() */
				370	if (NInoNonResident(ni)) {
				371	/*
				372	* Only unnamed $DATA attributes can be compressed or
				373	* encrypted.
				374	*/
				375	if (ni->type == AT_DATA && !ni->name_len) {
				376	/* If file is encrypted, deny access, just like NT4. */
				377	if (NInoEncrypted(ni)) {
				378	err = -EACCES;
				379	goto err_out;
				380	}
				381	/* Compressed data streams are handled in compress.c. */
				382	if (NInoCompressed(ni))
				383	return ntfs_read_compressed_block(page);
				384	}
				385	/* Normal data stream. */
				386	return ntfs_read_block(page);
				387	}
				388	/*
				389	* Attribute is resident, implying it is not compressed or encrypted.
				390	* This also means the attribute is smaller than an mft record and
				391	* hence smaller than a page, so can simply zero out any pages with
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	392	* index above 0.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	393	*/
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	394	if (unlikely(page->index > 0)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	395	kaddr = kmap_atomic(page, KM_USER0);
				396	memset(kaddr, 0, PAGE_CACHE_SIZE);
				397	flush_dcache_page(page);
				398	kunmap_atomic(kaddr, KM_USER0);
				399	goto done;
				400	}
				401	if (!NInoAttr(ni))
				402	base_ni = ni;
				403	else
				404	base_ni = ni->ext.base_ntfs_ino;
				405	/* Map, pin, and lock the mft record. */
				406	mrec = map_mft_record(base_ni);
				407	if (IS_ERR(mrec)) {
				408	err = PTR_ERR(mrec);
				409	goto err_out;
				410	}
				411	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				412	if (unlikely(!ctx)) {
				413	err = -ENOMEM;
				414	goto unm_err_out;
				415	}
				416	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				417	CASE_SENSITIVE, 0, NULL, 0, ctx);
				418	if (unlikely(err))
				419	goto put_unm_err_out;
				420	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	421	read_lock_irqsave(&ni->size_lock, flags);
				422	if (unlikely(attr_len > ni->initialized_size))
				423	attr_len = ni->initialized_size;
				424	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	425	kaddr = kmap_atomic(page, KM_USER0);
				426	/* Copy the data to the page. */
				427	memcpy(kaddr, (u8*)ctx->attr +
				428	le16_to_cpu(ctx->attr->data.resident.value_offset),
				429	attr_len);
				430	/* Zero the remainder of the page. */
				431	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				432	flush_dcache_page(page);
				433	kunmap_atomic(kaddr, KM_USER0);
				434	put_unm_err_out:
				435	ntfs_attr_put_search_ctx(ctx);
				436	unm_err_out:
				437	unmap_mft_record(base_ni);
				438	done:
				439	SetPageUptodate(page);
				440	err_out:
				441	unlock_page(page);
				442	return err;
				443	}
				444
				445	#ifdef NTFS_RW
				446
				447	/**
				448	* ntfs_write_block - write a @page to the backing store
				449	* @page: page cache page to write out
				450	* @wbc: writeback control structure
				451	*
				452	* This function is for writing pages belonging to non-resident, non-mst
				453	* protected attributes to their backing store.
				454	*
				455	* For a page with buffers, map and write the dirty buffers asynchronously
				456	* under page writeback. For a page without buffers, create buffers for the
				457	* page, then proceed as above.
				458	*
				459	* If a page doesn't have buffers the page dirty state is definitive. If a page
				460	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				461	* state is definitive. (A hint which has rules: dirty buffers against a clean
				462	* page is illegal. Other combinations are legal and need to be handled. In
				463	* particular a dirty page containing clean buffers for example.)
				464	*
				465	* Return 0 on success and -errno on error.
				466	*
				467	* Based on ntfs_read_block() and __block_write_full_page().
				468	*/
				469	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				470	{
				471	VCN vcn;
				472	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	473	s64 initialized_size;
				474	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	475	sector_t block, dblock, iblock;
				476	struct inode *vi;
				477	ntfs_inode *ni;
				478	ntfs_volume *vol;
				479	runlist_element *rl;
				480	struct buffer_head bh, head;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	481	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	482	unsigned int blocksize, vcn_ofs;
				483	int err;
				484	BOOL need_end_writeback;
				485	unsigned char blocksize_bits;
				486
				487	vi = page->mapping->host;
				488	ni = NTFS_I(vi);
				489	vol = ni->vol;
				490
				491	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				492	"0x%lx.", ni->mft_no, ni->type, page->index);
				493
				494	BUG_ON(!NInoNonResident(ni));
				495	BUG_ON(NInoMstProtected(ni));
				496
				497	blocksize_bits = vi->i_blkbits;
				498	blocksize = 1 << blocksize_bits;
				499
				500	if (!page_has_buffers(page)) {
				501	BUG_ON(!PageUptodate(page));
				502	create_empty_buffers(page, blocksize,
				503	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				504	}
				505	bh = head = page_buffers(page);
				506	if (unlikely(!bh)) {
				507	ntfs_warning(vol->sb, "Error allocating page buffers. "
				508	"Redirtying page so we try again later.");
				509	/*
				510	* Put the page back on mapping->dirty_pages, but leave its
				511	* buffer's dirty state as-is.
				512	*/
				513	redirty_page_for_writepage(wbc, page);
				514	unlock_page(page);
				515	return 0;
				516	}
				517
				518	/* NOTE: Different naming scheme to ntfs_read_block()! */
				519
				520	/* The first block in the page. */
				521	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				522
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	523	read_lock_irqsave(&ni->size_lock, flags);
				524	i_size = i_size_read(vi);
				525	initialized_size = ni->initialized_size;
				526	read_unlock_irqrestore(&ni->size_lock, flags);
				527
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	528	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	529	dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	530
				531	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	532	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	533
				534	/*
				535	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				536	* here, and the (potentially unmapped) buffers may become dirty at
				537	* any time. If a buffer becomes dirty here after we've inspected it
				538	* then we just miss that fact, and the page stays dirty.
				539	*
				540	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				541	* handle that here by just cleaning them.
				542	*/
				543
				544	/*
				545	* Loop through all the buffers in the page, mapping all the dirty
				546	* buffers to disk addresses and handling any aliases from the
				547	* underlying block device's mapping.
				548	*/
				549	rl = NULL;
				550	err = 0;
				551	do {
				552	BOOL is_retry = FALSE;
				553
				554	if (unlikely(block >= dblock)) {
				555	/*
				556	* Mapped buffers outside i_size will occur, because
				557	* this page can be outside i_size when there is a
				558	* truncate in progress. The contents of such buffers
				559	* were zeroed by ntfs_writepage().
				560	*
				561	* FIXME: What about the small race window where
				562	* ntfs_writepage() has not done any clearing because
				563	* the page was within i_size but before we get here,
				564	* vmtruncate() modifies i_size?
				565	*/
				566	clear_buffer_dirty(bh);
				567	set_buffer_uptodate(bh);
				568	continue;
				569	}
				570
				571	/* Clean buffers are not written out, so no need to map them. */
				572	if (!buffer_dirty(bh))
				573	continue;
				574
				575	/* Make sure we have enough initialized size. */
				576	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	577	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	578	/*
				579	* If this page is fully outside initialized size, zero
				580	* out all pages between the current initialized size
				581	* and the current page. Just use ntfs_readpage() to do
				582	* the zeroing transparently.
				583	*/
				584	if (block > iblock) {
				585	// TODO:
				586	// For each page do:
				587	// - read_cache_page()
				588	// Again for each page do:
				589	// - wait_on_page_locked()
				590	// - Check (PageUptodate(page) &&
				591	// !PageError(page))
				592	// Update initialized size in the attribute and
				593	// in the inode.
				594	// Again, for each page do:
				595	// __set_page_dirty_buffers();
				596	// page_cache_release()
				597	// We don't need to wait on the writes.
				598	// Update iblock.
				599	}
				600	/*
				601	* The current page straddles initialized size. Zero
				602	* all non-uptodate buffers and set them uptodate (and
				603	* dirty?). Note, there aren't any non-uptodate buffers
				604	* if the page is uptodate.
				605	* FIXME: For an uptodate page, the buffers may need to
				606	* be written out because they were not initialized on
				607	* disk before.
				608	*/
				609	if (!PageUptodate(page)) {
				610	// TODO:
				611	// Zero any non-uptodate buffers up to i_size.
				612	// Set them uptodate and dirty.
				613	}
				614	// TODO:
				615	// Update initialized size in the attribute and in the
				616	// inode (up to i_size).
				617	// Update iblock.
				618	// FIXME: This is inefficient. Try to batch the two
				619	// size changes to happen in one go.
				620	ntfs_error(vol->sb, "Writing beyond initialized size "
				621	"is not supported yet. Sorry.");
				622	err = -EOPNOTSUPP;
				623	break;
				624	// Do NOT set_buffer_new() BUT DO clear buffer range
				625	// outside write request range.
				626	// set_buffer_uptodate() on complete buffers as well as
				627	// set_buffer_dirty().
				628	}
				629
				630	/* No need to map buffers that are already mapped. */
				631	if (buffer_mapped(bh))
				632	continue;
				633
				634	/* Unmapped, dirty buffer. Need to map it. */
				635	bh->b_bdev = vol->sb->s_bdev;
				636
				637	/* Convert block into corresponding vcn and offset. */
				638	vcn = (VCN)block << blocksize_bits;
				639	vcn_ofs = vcn & vol->cluster_size_mask;
				640	vcn >>= vol->cluster_size_bits;
				641	if (!rl) {
				642	lock_retry_remap:
				643	down_read(&ni->runlist.lock);
				644	rl = ni->runlist.rl;
				645	}
				646	if (likely(rl != NULL)) {
				647	/* Seek to element containing target vcn. */
				648	while (rl->length && rl[1].vcn <= vcn)
				649	rl++;
				650	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				651	} else
				652	lcn = LCN_RL_NOT_MAPPED;
				653	/* Successful remap. */
				654	if (lcn >= 0) {
				655	/* Setup buffer head to point to correct block. */
				656	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				657	vcn_ofs) >> blocksize_bits;
				658	set_buffer_mapped(bh);
				659	continue;
				660	}
				661	/* It is a hole, need to instantiate it. */
				662	if (lcn == LCN_HOLE) {
				663	// TODO: Instantiate the hole.
				664	// clear_buffer_new(bh);
				665	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				666	ntfs_error(vol->sb, "Writing into sparse regions is "
				667	"not supported yet. Sorry.");
				668	err = -EOPNOTSUPP;
				669	break;
				670	}
				671	/* If first try and runlist unmapped, map and retry. */
				672	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				673	is_retry = TRUE;
				674	/*
				675	* Attempt to map runlist, dropping lock for
				676	* the duration.
				677	*/
				678	up_read(&ni->runlist.lock);
				679	err = ntfs_map_runlist(ni, vcn);
				680	if (likely(!err))
				681	goto lock_retry_remap;
				682	rl = NULL;
				683	lcn = err;
				684	}
				685	/* Failed to map the buffer, even after retrying. */
				686	bh->b_blocknr = -1;
				687	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				688	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				689	"because its location on disk could not be "
				690	"determined%s (error code %lli).", ni->mft_no,
				691	ni->type, (unsigned long long)vcn,
				692	vcn_ofs, is_retry ? " even after "
				693	"retrying" : "", (long long)lcn);
				694	if (!err)
				695	err = -EIO;
				696	break;
				697	} while (block++, (bh = bh->b_this_page) != head);
				698
				699	/* Release the lock if we took it. */
				700	if (rl)
				701	up_read(&ni->runlist.lock);
				702
				703	/* For the error case, need to reset bh to the beginning. */
				704	bh = head;
				705
				706	/* Just an optimization, so ->readpage() isn't called later. */
				707	if (unlikely(!PageUptodate(page))) {
				708	int uptodate = 1;
				709	do {
				710	if (!buffer_uptodate(bh)) {
				711	uptodate = 0;
				712	bh = head;
				713	break;
				714	}
				715	} while ((bh = bh->b_this_page) != head);
				716	if (uptodate)
				717	SetPageUptodate(page);
				718	}
				719
				720	/* Setup all mapped, dirty buffers for async write i/o. */
				721	do {
				722	get_bh(bh);
				723	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				724	lock_buffer(bh);
				725	if (test_clear_buffer_dirty(bh)) {
				726	BUG_ON(!buffer_uptodate(bh));
				727	mark_buffer_async_write(bh);
				728	} else
				729	unlock_buffer(bh);
				730	} else if (unlikely(err)) {
				731	/*
				732	* For the error case. The buffer may have been set
				733	* dirty during attachment to a dirty page.
				734	*/
				735	if (err != -ENOMEM)
				736	clear_buffer_dirty(bh);
				737	}
				738	} while ((bh = bh->b_this_page) != head);
				739
				740	if (unlikely(err)) {
				741	// TODO: Remove the -EOPNOTSUPP check later on...
				742	if (unlikely(err == -EOPNOTSUPP))
				743	err = 0;
				744	else if (err == -ENOMEM) {
				745	ntfs_warning(vol->sb, "Error allocating memory. "
				746	"Redirtying page so we try again "
				747	"later.");
				748	/*
				749	* Put the page back on mapping->dirty_pages, but
				750	* leave its buffer's dirty state as-is.
				751	*/
				752	redirty_page_for_writepage(wbc, page);
				753	err = 0;
				754	} else
				755	SetPageError(page);
				756	}
				757
				758	BUG_ON(PageWriteback(page));
				759	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				760	unlock_page(page);
				761
				762	/*
				763	* Submit the prepared buffers for i/o. Note the page is unlocked,
				764	* and the async write i/o completion handler can end_page_writeback()
				765	* at any time after the first submit_bh(). So the buffers can then
				766	* disappear...
				767	*/
				768	need_end_writeback = TRUE;
				769	do {
				770	struct buffer_head *next = bh->b_this_page;
				771	if (buffer_async_write(bh)) {
				772	submit_bh(WRITE, bh);
				773	need_end_writeback = FALSE;
				774	}
				775	put_bh(bh);
				776	bh = next;
				777	} while (bh != head);
				778
				779	/* If no i/o was started, need to end_page_writeback(). */
				780	if (unlikely(need_end_writeback))
				781	end_page_writeback(page);
				782
				783	ntfs_debug("Done.");
				784	return err;
				785	}
				786
				787	/**
				788	* ntfs_write_mst_block - write a @page to the backing store
				789	* @page: page cache page to write out
				790	* @wbc: writeback control structure
				791	*
				792	* This function is for writing pages belonging to non-resident, mst protected
				793	* attributes to their backing store. The only supported attributes are index
				794	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				795	* supported for the index allocation case.
				796	*
				797	* The page must remain locked for the duration of the write because we apply
				798	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				799	* page before undoing the fixups, any other user of the page will see the
				800	* page contents as corrupt.
				801	*
				802	* We clear the page uptodate flag for the duration of the function to ensure
				803	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				804	* are about to apply the mst fixups to.
				805	*
				806	* Return 0 on success and -errno on error.
				807	*
				808	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				809	* write_mft_record_nolock().
				810	*/
				811	static int ntfs_write_mst_block(struct page *page,
				812	struct writeback_control *wbc)
				813	{
				814	sector_t block, dblock, rec_block;
				815	struct inode *vi = page->mapping->host;
				816	ntfs_inode *ni = NTFS_I(vi);
				817	ntfs_volume *vol = ni->vol;
				818	u8 *kaddr;
				819	unsigned char bh_size_bits = vi->i_blkbits;
				820	unsigned int bh_size = 1 << bh_size_bits;
				821	unsigned int rec_size = ni->itype.index.block_size;
				822	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				823	struct buffer_head bh, head, tbh, rec_start_bh;
				824	int max_bhs = PAGE_CACHE_SIZE / bh_size;
				825	struct buffer_head *bhs[max_bhs];
				826	runlist_element *rl;
				827	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
				828	unsigned rec_size_bits;
				829	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
				830
				831	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				832	"0x%lx.", vi->i_ino, ni->type, page->index);
				833	BUG_ON(!NInoNonResident(ni));
				834	BUG_ON(!NInoMstProtected(ni));
				835	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				836	/*
				837	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				838	* in its page cache were to be marked dirty. However this should
				839	* never happen with the current driver and considering we do not
				840	* handle this case here we do want to BUG(), at least for now.
				841	*/
				842	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				843	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
				844	BUG_ON(!max_bhs);
				845
				846	/* Were we called for sync purposes? */
				847	sync = (wbc->sync_mode == WB_SYNC_ALL);
				848
				849	/* Make sure we have mapped buffers. */
				850	BUG_ON(!page_has_buffers(page));
				851	bh = head = page_buffers(page);
				852	BUG_ON(!bh);
				853
				854	rec_size_bits = ni->itype.index.block_size_bits;
				855	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				856	bhs_per_rec = rec_size >> bh_size_bits;
				857	BUG_ON(!bhs_per_rec);
				858
				859	/* The first block in the page. */
				860	rec_block = block = (sector_t)page->index <<
				861	(PAGE_CACHE_SHIFT - bh_size_bits);
				862
				863	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	864	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	865
				866	rl = NULL;
				867	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				868	page_is_dirty = rec_is_dirty = FALSE;
				869	rec_start_bh = NULL;
				870	do {
				871	BOOL is_retry = FALSE;
				872
				873	if (likely(block < rec_block)) {
				874	if (unlikely(block >= dblock)) {
				875	clear_buffer_dirty(bh);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	876	set_buffer_uptodate(bh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877	continue;
				878	}
				879	/*
				880	* This block is not the first one in the record. We
				881	* ignore the buffer's dirty state because we could
				882	* have raced with a parallel mark_ntfs_record_dirty().
				883	*/
				884	if (!rec_is_dirty)
				885	continue;
				886	if (unlikely(err2)) {
				887	if (err2 != -ENOMEM)
				888	clear_buffer_dirty(bh);
				889	continue;
				890	}
				891	} else /* if (block == rec_block) */ {
				892	BUG_ON(block > rec_block);
				893	/* This block is the first one in the record. */
				894	rec_block += bhs_per_rec;
				895	err2 = 0;
				896	if (unlikely(block >= dblock)) {
				897	clear_buffer_dirty(bh);
				898	continue;
				899	}
				900	if (!buffer_dirty(bh)) {
				901	/* Clean records are not written out. */
				902	rec_is_dirty = FALSE;
				903	continue;
				904	}
				905	rec_is_dirty = TRUE;
				906	rec_start_bh = bh;
				907	}
				908	/* Need to map the buffer if it is not mapped already. */
				909	if (unlikely(!buffer_mapped(bh))) {
				910	VCN vcn;
				911	LCN lcn;
				912	unsigned int vcn_ofs;
				913
				914	/* Obtain the vcn and offset of the current block. */
				915	vcn = (VCN)block << bh_size_bits;
				916	vcn_ofs = vcn & vol->cluster_size_mask;
				917	vcn >>= vol->cluster_size_bits;
				918	if (!rl) {
				919	lock_retry_remap:
				920	down_read(&ni->runlist.lock);
				921	rl = ni->runlist.rl;
				922	}
				923	if (likely(rl != NULL)) {
				924	/* Seek to element containing target vcn. */
				925	while (rl->length && rl[1].vcn <= vcn)
				926	rl++;
				927	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				928	} else
				929	lcn = LCN_RL_NOT_MAPPED;
				930	/* Successful remap. */
				931	if (likely(lcn >= 0)) {
				932	/* Setup buffer head to correct block. */
				933	bh->b_blocknr = ((lcn <<
				934	vol->cluster_size_bits) +
				935	vcn_ofs) >> bh_size_bits;
				936	set_buffer_mapped(bh);
				937	} else {
				938	/*
				939	* Remap failed. Retry to map the runlist once
				940	* unless we are working on $MFT which always
				941	* has the whole of its runlist in memory.
				942	*/
				943	if (!is_mft && !is_retry &&
				944	lcn == LCN_RL_NOT_MAPPED) {
				945	is_retry = TRUE;
				946	/*
				947	* Attempt to map runlist, dropping
				948	* lock for the duration.
				949	*/
				950	up_read(&ni->runlist.lock);
				951	err2 = ntfs_map_runlist(ni, vcn);
				952	if (likely(!err2))
				953	goto lock_retry_remap;
				954	if (err2 == -ENOMEM)
				955	page_is_dirty = TRUE;
				956	lcn = err2;
				957	} else
				958	err2 = -EIO;
				959	/* Hard error. Abort writing this record. */
				960	if (!err \|\| err == -ENOMEM)
				961	err = err2;
				962	bh->b_blocknr = -1;
				963	ntfs_error(vol->sb, "Cannot write ntfs record "
				964	"0x%llx (inode 0x%lx, "
				965	"attribute type 0x%x) because "
				966	"its location on disk could "
				967	"not be determined (error "
				968	"code %lli).", (s64)block <<
				969	bh_size_bits >>
				970	vol->mft_record_size_bits,
				971	ni->mft_no, ni->type,
				972	(long long)lcn);
				973	/*
				974	* If this is not the first buffer, remove the
				975	* buffers in this record from the list of
				976	* buffers to write and clear their dirty bit
				977	* if not error -ENOMEM.
				978	*/
				979	if (rec_start_bh != bh) {
				980	while (bhs[--nr_bhs] != rec_start_bh)
				981	;
				982	if (err2 != -ENOMEM) {
				983	do {
				984	clear_buffer_dirty(
				985	rec_start_bh);
				986	} while ((rec_start_bh =
				987	rec_start_bh->
				988	b_this_page) !=
				989	bh);
				990	}
				991	}
				992	continue;
				993	}
				994	}
				995	BUG_ON(!buffer_uptodate(bh));
				996	BUG_ON(nr_bhs >= max_bhs);
				997	bhs[nr_bhs++] = bh;
				998	} while (block++, (bh = bh->b_this_page) != head);
				999	if (unlikely(rl))
				1000	up_read(&ni->runlist.lock);
				1001	/* If there were no dirty buffers, we are done. */
				1002	if (!nr_bhs)
				1003	goto done;
				1004	/* Map the page so we can access its contents. */
				1005	kaddr = kmap(page);
				1006	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				1007	BUG_ON(!PageUptodate(page));
				1008	ClearPageUptodate(page);
				1009	for (i = 0; i < nr_bhs; i++) {
				1010	unsigned int ofs;
				1011
				1012	/* Skip buffers which are not at the beginning of records. */
				1013	if (i % bhs_per_rec)
				1014	continue;
				1015	tbh = bhs[i];
				1016	ofs = bh_offset(tbh);
				1017	if (is_mft) {
				1018	ntfs_inode *tni;
				1019	unsigned long mft_no;
				1020
				1021	/* Get the mft record number. */
				1022	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1023	>> rec_size_bits;
				1024	/* Check whether to write this mft record. */
				1025	tni = NULL;
				1026	if (!ntfs_may_write_mft_record(vol, mft_no,
				1027	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1028	/*
				1029	* The record should not be written. This
				1030	* means we need to redirty the page before
				1031	* returning.
				1032	*/
				1033	page_is_dirty = TRUE;
				1034	/*
				1035	* Remove the buffers in this mft record from
				1036	* the list of buffers to write.
				1037	*/
				1038	do {
				1039	bhs[i] = NULL;
				1040	} while (++i % bhs_per_rec);
				1041	continue;
				1042	}
				1043	/*
				1044	* The record should be written. If a locked ntfs
				1045	* inode was returned, add it to the array of locked
				1046	* ntfs inodes.
				1047	*/
				1048	if (tni)
				1049	locked_nis[nr_locked_nis++] = tni;
				1050	}
				1051	/* Apply the mst protection fixups. */
				1052	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1053	rec_size);
				1054	if (unlikely(err2)) {
				1055	if (!err \|\| err == -ENOMEM)
				1056	err = -EIO;
				1057	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1058	"(inode 0x%lx, attribute type 0x%x, "
				1059	"page index 0x%lx, page offset 0x%x)!"
				1060	" Unmount and run chkdsk.", vi->i_ino,
				1061	ni->type, page->index, ofs);
				1062	/*
				1063	* Mark all the buffers in this record clean as we do
				1064	* not want to write corrupt data to disk.
				1065	*/
				1066	do {
				1067	clear_buffer_dirty(bhs[i]);
				1068	bhs[i] = NULL;
				1069	} while (++i % bhs_per_rec);
				1070	continue;
				1071	}
				1072	nr_recs++;
				1073	}
				1074	/* If no records are to be written out, we are done. */
				1075	if (!nr_recs)
				1076	goto unm_done;
				1077	flush_dcache_page(page);
				1078	/* Lock buffers and start synchronous write i/o on them. */
				1079	for (i = 0; i < nr_bhs; i++) {
				1080	tbh = bhs[i];
				1081	if (!tbh)
				1082	continue;
				1083	if (unlikely(test_set_buffer_locked(tbh)))
				1084	BUG();
				1085	/* The buffer dirty state is now irrelevant, just clean it. */
				1086	clear_buffer_dirty(tbh);
				1087	BUG_ON(!buffer_uptodate(tbh));
				1088	BUG_ON(!buffer_mapped(tbh));
				1089	get_bh(tbh);
				1090	tbh->b_end_io = end_buffer_write_sync;
				1091	submit_bh(WRITE, tbh);
				1092	}
				1093	/* Synchronize the mft mirror now if not @sync. */
				1094	if (is_mft && !sync)
				1095	goto do_mirror;
				1096	do_wait:
				1097	/* Wait on i/o completion of buffers. */
				1098	for (i = 0; i < nr_bhs; i++) {
				1099	tbh = bhs[i];
				1100	if (!tbh)
				1101	continue;
				1102	wait_on_buffer(tbh);
				1103	if (unlikely(!buffer_uptodate(tbh))) {
				1104	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1105	"record buffer (inode 0x%lx, "
				1106	"attribute type 0x%x, page index "
				1107	"0x%lx, page offset 0x%lx)! Unmount "
				1108	"and run chkdsk.", vi->i_ino, ni->type,
				1109	page->index, bh_offset(tbh));
				1110	if (!err \|\| err == -ENOMEM)
				1111	err = -EIO;
				1112	/*
				1113	* Set the buffer uptodate so the page and buffer
				1114	* states do not become out of sync.
				1115	*/
				1116	set_buffer_uptodate(tbh);
				1117	}
				1118	}
				1119	/* If @sync, now synchronize the mft mirror. */
				1120	if (is_mft && sync) {
				1121	do_mirror:
				1122	for (i = 0; i < nr_bhs; i++) {
				1123	unsigned long mft_no;
				1124	unsigned int ofs;
				1125
				1126	/*
				1127	* Skip buffers which are not at the beginning of
				1128	* records.
				1129	*/
				1130	if (i % bhs_per_rec)
				1131	continue;
				1132	tbh = bhs[i];
				1133	/* Skip removed buffers (and hence records). */
				1134	if (!tbh)
				1135	continue;
				1136	ofs = bh_offset(tbh);
				1137	/* Get the mft record number. */
				1138	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1139	>> rec_size_bits;
				1140	if (mft_no < vol->mftmirr_size)
				1141	ntfs_sync_mft_mirror(vol, mft_no,
				1142	(MFT_RECORD*)(kaddr + ofs),
				1143	sync);
				1144	}
				1145	if (!sync)
				1146	goto do_wait;
				1147	}
				1148	/* Remove the mst protection fixups again. */
				1149	for (i = 0; i < nr_bhs; i++) {
				1150	if (!(i % bhs_per_rec)) {
				1151	tbh = bhs[i];
				1152	if (!tbh)
				1153	continue;
				1154	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1155	bh_offset(tbh)));
				1156	}
				1157	}
				1158	flush_dcache_page(page);
				1159	unm_done:
				1160	/* Unlock any locked inodes. */
				1161	while (nr_locked_nis-- > 0) {
				1162	ntfs_inode tni, base_tni;
				1163
				1164	tni = locked_nis[nr_locked_nis];
				1165	/* Get the base inode. */
				1166	down(&tni->extent_lock);
				1167	if (tni->nr_extents >= 0)
				1168	base_tni = tni;
				1169	else {
				1170	base_tni = tni->ext.base_ntfs_ino;
				1171	BUG_ON(!base_tni);
				1172	}
				1173	up(&tni->extent_lock);
				1174	ntfs_debug("Unlocking %s inode 0x%lx.",
				1175	tni == base_tni ? "base" : "extent",
				1176	tni->mft_no);
				1177	up(&tni->mrec_lock);
				1178	atomic_dec(&tni->count);
				1179	iput(VFS_I(base_tni));
				1180	}
				1181	SetPageUptodate(page);
				1182	kunmap(page);
				1183	done:
				1184	if (unlikely(err && err != -ENOMEM)) {
				1185	/*
				1186	* Set page error if there is only one ntfs record in the page.
				1187	* Otherwise we would loose per-record granularity.
				1188	*/
				1189	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1190	SetPageError(page);
				1191	NVolSetErrors(vol);
				1192	}
				1193	if (page_is_dirty) {
				1194	ntfs_debug("Page still contains one or more dirty ntfs "
				1195	"records. Redirtying the page starting at "
				1196	"record 0x%lx.", page->index <<
				1197	(PAGE_CACHE_SHIFT - rec_size_bits));
				1198	redirty_page_for_writepage(wbc, page);
				1199	unlock_page(page);
				1200	} else {
				1201	/*
				1202	* Keep the VM happy. This must be done otherwise the
				1203	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1204	* the page is clean.
				1205	*/
				1206	BUG_ON(PageWriteback(page));
				1207	set_page_writeback(page);
				1208	unlock_page(page);
				1209	end_page_writeback(page);
				1210	}
				1211	if (likely(!err))
				1212	ntfs_debug("Done.");
				1213	return err;
				1214	}
				1215
				1216	/**
				1217	* ntfs_writepage - write a @page to the backing store
				1218	* @page: page cache page to write out
				1219	* @wbc: writeback control structure
				1220	*
				1221	* This is called from the VM when it wants to have a dirty ntfs page cache
				1222	* page cleaned. The VM has already locked the page and marked it clean.
				1223	*
				1224	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1225	* the ntfs version of the generic block_write_full_page() function,
				1226	* ntfs_write_block(), which in turn if necessary creates and writes the
				1227	* buffers associated with the page asynchronously.
				1228	*
				1229	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1230	* the data to the mft record (which at this stage is most likely in memory).
				1231	* The mft record is then marked dirty and written out asynchronously via the
				1232	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1233	* vm page dirty code path for the page the mft record is in.
				1234	*
				1235	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1236	*
				1237	* Return 0 on success and -errno on error.
				1238	*/
				1239	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1240	{
				1241	loff_t i_size;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1242	struct inode *vi = page->mapping->host;
				1243	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1244	char *kaddr;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1245	ntfs_attr_search_ctx *ctx = NULL;
				1246	MFT_RECORD *m = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1247	u32 attr_len;
				1248	int err;
				1249
				1250	BUG_ON(!PageLocked(page));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1251	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1252	/* Is the page fully outside i_size? (truncate in progress) */
				1253	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1254	PAGE_CACHE_SHIFT)) {
				1255	/*
				1256	* The page may have dirty, unmapped buffers. Make them
				1257	* freeable here, so the page does not leak.
				1258	*/
				1259	block_invalidatepage(page, 0);
				1260	unlock_page(page);
				1261	ntfs_debug("Write outside i_size - truncated?");
				1262	return 0;
				1263	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1264	/* NInoNonResident() == NInoIndexAllocPresent() */
				1265	if (NInoNonResident(ni)) {
				1266	/*
				1267	* Only unnamed $DATA attributes can be compressed, encrypted,
				1268	* and/or sparse.
				1269	*/
				1270	if (ni->type == AT_DATA && !ni->name_len) {
				1271	/* If file is encrypted, deny access, just like NT4. */
				1272	if (NInoEncrypted(ni)) {
				1273	unlock_page(page);
				1274	ntfs_debug("Denying write access to encrypted "
				1275	"file.");
				1276	return -EACCES;
				1277	}
				1278	/* Compressed data streams are handled in compress.c. */
				1279	if (NInoCompressed(ni)) {
				1280	// TODO: Implement and replace this check with
				1281	// return ntfs_write_compressed_block(page);
				1282	unlock_page(page);
				1283	ntfs_error(vi->i_sb, "Writing to compressed "
				1284	"files is not supported yet. "
				1285	"Sorry.");
				1286	return -EOPNOTSUPP;
				1287	}
				1288	// TODO: Implement and remove this check.
				1289	if (NInoSparse(ni)) {
				1290	unlock_page(page);
				1291	ntfs_error(vi->i_sb, "Writing to sparse files "
				1292	"is not supported yet. Sorry.");
				1293	return -EOPNOTSUPP;
				1294	}
				1295	}
				1296	/* We have to zero every time due to mmap-at-end-of-file. */
				1297	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1298	/* The page straddles i_size. */
				1299	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1300	kaddr = kmap_atomic(page, KM_USER0);
				1301	memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
				1302	flush_dcache_page(page);
				1303	kunmap_atomic(kaddr, KM_USER0);
				1304	}
				1305	/* Handle mst protected attributes. */
				1306	if (NInoMstProtected(ni))
				1307	return ntfs_write_mst_block(page, wbc);
				1308	/* Normal data stream. */
				1309	return ntfs_write_block(page, wbc);
				1310	}
				1311	/*
				1312	* Attribute is resident, implying it is not compressed, encrypted,
				1313	* sparse, or mst protected. This also means the attribute is smaller
				1314	* than an mft record and hence smaller than a page, so can simply
				1315	* return error on any pages with index above 0.
				1316	*/
				1317	BUG_ON(page_has_buffers(page));
				1318	BUG_ON(!PageUptodate(page));
				1319	if (unlikely(page->index > 0)) {
				1320	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1321	"Aborting write.", page->index);
				1322	BUG_ON(PageWriteback(page));
				1323	set_page_writeback(page);
				1324	unlock_page(page);
				1325	end_page_writeback(page);
				1326	return -EIO;
				1327	}
				1328	if (!NInoAttr(ni))
				1329	base_ni = ni;
				1330	else
				1331	base_ni = ni->ext.base_ntfs_ino;
				1332	/* Map, pin, and lock the mft record. */
				1333	m = map_mft_record(base_ni);
				1334	if (IS_ERR(m)) {
				1335	err = PTR_ERR(m);
				1336	m = NULL;
				1337	ctx = NULL;
				1338	goto err_out;
				1339	}
				1340	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1341	if (unlikely(!ctx)) {
				1342	err = -ENOMEM;
				1343	goto err_out;
				1344	}
				1345	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1346	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1347	if (unlikely(err))
				1348	goto err_out;
				1349	/*
				1350	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1351	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1352	*/
				1353	BUG_ON(PageWriteback(page));
				1354	set_page_writeback(page);
				1355	unlock_page(page);
				1356
				1357	/*
				1358	* Here, we don't need to zero the out of bounds area everytime because
				1359	* the below memcpy() already takes care of the mmap-at-end-of-file
				1360	* requirements. If the file is converted to a non-resident one, then
				1361	* the code path use is switched to the non-resident one where the
				1362	* zeroing happens on each ntfs_writepage() invocation.
				1363	*
				1364	* The above also applies nicely when i_size is decreased.
				1365	*
				1366	* When i_size is increased, the memory between the old and new i_size
				1367	* _must_ be zeroed (or overwritten with new data). Otherwise we will
				1368	* expose data to userspace/disk which should never have been exposed.
				1369	*
				1370	* FIXME: Ensure that i_size increases do the zeroing/overwriting and
				1371	* if we cannot guarantee that, then enable the zeroing below. If the
				1372	* zeroing below is enabled, we MUST move the unlock_page() from above
				1373	* to after the kunmap_atomic(), i.e. just before the
				1374	* end_page_writeback().
				1375	* UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
				1376	* increases for resident attributes so those are ok.
				1377	* TODO: ntfs_truncate(), others?
				1378	*/
				1379
				1380	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1381	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1382	if (unlikely(attr_len > i_size)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1383	attr_len = i_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1384	ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1385	}
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1386	kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1387	/* Copy the data from the page to the mft record. */
				1388	memcpy((u8*)ctx->attr +
				1389	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1390	kaddr, attr_len);
				1391	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1392	/* Zero out of bounds area in the page cache page. */
				1393	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1394	flush_dcache_page(page);
				1395	kunmap_atomic(kaddr, KM_USER0);
				1396
				1397	end_page_writeback(page);
				1398
				1399	/* Mark the mft record dirty, so it gets written back. */
				1400	mark_mft_record_dirty(ctx->ntfs_ino);
				1401	ntfs_attr_put_search_ctx(ctx);
				1402	unmap_mft_record(base_ni);
				1403	return 0;
				1404	err_out:
				1405	if (err == -ENOMEM) {
				1406	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1407	"page so we try again later.");
				1408	/*
				1409	* Put the page back on mapping->dirty_pages, but leave its
				1410	* buffers' dirty state as-is.
				1411	*/
				1412	redirty_page_for_writepage(wbc, page);
				1413	err = 0;
				1414	} else {
				1415	ntfs_error(vi->i_sb, "Resident attribute write failed with "
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1416	"error %i.", err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1417	SetPageError(page);
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1418	NVolSetErrors(ni->vol);
				1419	make_bad_inode(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1420	}
				1421	unlock_page(page);
				1422	if (ctx)
				1423	ntfs_attr_put_search_ctx(ctx);
				1424	if (m)
				1425	unmap_mft_record(base_ni);
				1426	return err;
				1427	}
				1428
				1429	/**
				1430	* ntfs_prepare_nonresident_write -
				1431	*
				1432	*/
				1433	static int ntfs_prepare_nonresident_write(struct page *page,
				1434	unsigned from, unsigned to)
				1435	{
				1436	VCN vcn;
				1437	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1438	s64 initialized_size;
				1439	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1440	sector_t block, ablock, iblock;
				1441	struct inode *vi;
				1442	ntfs_inode *ni;
				1443	ntfs_volume *vol;
				1444	runlist_element *rl;
				1445	struct buffer_head bh, head, wait[2], *wait_bh = wait;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1446	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1447	unsigned int vcn_ofs, block_start, block_end, blocksize;
				1448	int err;
				1449	BOOL is_retry;
				1450	unsigned char blocksize_bits;
				1451
				1452	vi = page->mapping->host;
				1453	ni = NTFS_I(vi);
				1454	vol = ni->vol;
				1455
				1456	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1457	"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
				1458	page->index, from, to);
				1459
				1460	BUG_ON(!NInoNonResident(ni));
				1461
				1462	blocksize_bits = vi->i_blkbits;
				1463	blocksize = 1 << blocksize_bits;
				1464
				1465	/*
				1466	* create_empty_buffers() will create uptodate/dirty buffers if the
				1467	* page is uptodate/dirty.
				1468	*/
				1469	if (!page_has_buffers(page))
				1470	create_empty_buffers(page, blocksize, 0);
				1471	bh = head = page_buffers(page);
				1472	if (unlikely(!bh))
				1473	return -ENOMEM;
				1474
				1475	/* The first block in the page. */
				1476	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				1477
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1478	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1479	/*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame^]	1480	* The first out of bounds block for the allocated size. No need to
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1481	* round up as allocated_size is in multiples of cluster size and the
				1482	* minimum cluster size is 512 bytes, which is equal to the smallest
				1483	* blocksize.
				1484	*/
				1485	ablock = ni->allocated_size >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1486	i_size = i_size_read(vi);
				1487	initialized_size = ni->initialized_size;
				1488	read_unlock_irqrestore(&ni->size_lock, flags);
				1489
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1490	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1491	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1492
				1493	/* Loop through all the buffers in the page. */
				1494	block_start = 0;
				1495	rl = NULL;
				1496	err = 0;
				1497	do {
				1498	block_end = block_start + blocksize;
				1499	/*
				1500	* If buffer @bh is outside the write, just mark it uptodate
				1501	* if the page is uptodate and continue with the next buffer.
				1502	*/
				1503	if (block_end <= from \|\| block_start >= to) {
				1504	if (PageUptodate(page)) {
				1505	if (!buffer_uptodate(bh))
				1506	set_buffer_uptodate(bh);
				1507	}
				1508	continue;
				1509	}
				1510	/*
				1511	* @bh is at least partially being written to.
				1512	* Make sure it is not marked as new.
				1513	*/
				1514	//if (buffer_new(bh))
				1515	// clear_buffer_new(bh);
				1516
				1517	if (block >= ablock) {
				1518	// TODO: block is above allocated_size, need to
				1519	// allocate it. Best done in one go to accommodate not
				1520	// only block but all above blocks up to and including:
				1521	// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
				1522	// - 1) >> blobksize_bits. Obviously will need to round
				1523	// up to next cluster boundary, too. This should be
				1524	// done with a helper function, so it can be reused.
				1525	ntfs_error(vol->sb, "Writing beyond allocated size "
				1526	"is not supported yet. Sorry.");
				1527	err = -EOPNOTSUPP;
				1528	goto err_out;
				1529	// Need to update ablock.
				1530	// Need to set_buffer_new() on all block bhs that are
				1531	// newly allocated.
				1532	}
				1533	/*
				1534	* Now we have enough allocated size to fulfill the whole
				1535	* request, i.e. block < ablock is true.
				1536	*/
				1537	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1538	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1539	/*
				1540	* If this page is fully outside initialized size, zero
				1541	* out all pages between the current initialized size
				1542	* and the current page. Just use ntfs_readpage() to do
				1543	* the zeroing transparently.
				1544	*/
				1545	if (block > iblock) {
				1546	// TODO:
				1547	// For each page do:
				1548	// - read_cache_page()
				1549	// Again for each page do:
				1550	// - wait_on_page_locked()
				1551	// - Check (PageUptodate(page) &&
				1552	// !PageError(page))
				1553	// Update initialized size in the attribute and
				1554	// in the inode.
				1555	// Again, for each page do:
				1556	// __set_page_dirty_buffers();
				1557	// page_cache_release()
				1558	// We don't need to wait on the writes.
				1559	// Update iblock.
				1560	}
				1561	/*
				1562	* The current page straddles initialized size. Zero
				1563	* all non-uptodate buffers and set them uptodate (and
				1564	* dirty?). Note, there aren't any non-uptodate buffers
				1565	* if the page is uptodate.
				1566	* FIXME: For an uptodate page, the buffers may need to
				1567	* be written out because they were not initialized on
				1568	* disk before.
				1569	*/
				1570	if (!PageUptodate(page)) {
				1571	// TODO:
				1572	// Zero any non-uptodate buffers up to i_size.
				1573	// Set them uptodate and dirty.
				1574	}
				1575	// TODO:
				1576	// Update initialized size in the attribute and in the
				1577	// inode (up to i_size).
				1578	// Update iblock.
				1579	// FIXME: This is inefficient. Try to batch the two
				1580	// size changes to happen in one go.
				1581	ntfs_error(vol->sb, "Writing beyond initialized size "
				1582	"is not supported yet. Sorry.");
				1583	err = -EOPNOTSUPP;
				1584	goto err_out;
				1585	// Do NOT set_buffer_new() BUT DO clear buffer range
				1586	// outside write request range.
				1587	// set_buffer_uptodate() on complete buffers as well as
				1588	// set_buffer_dirty().
				1589	}
				1590
				1591	/* Need to map unmapped buffers. */
				1592	if (!buffer_mapped(bh)) {
				1593	/* Unmapped buffer. Need to map it. */
				1594	bh->b_bdev = vol->sb->s_bdev;
				1595
				1596	/* Convert block into corresponding vcn and offset. */
				1597	vcn = (VCN)block << blocksize_bits >>
				1598	vol->cluster_size_bits;
				1599	vcn_ofs = ((VCN)block << blocksize_bits) &
				1600	vol->cluster_size_mask;
				1601
				1602	is_retry = FALSE;
				1603	if (!rl) {
				1604	lock_retry_remap:
				1605	down_read(&ni->runlist.lock);
				1606	rl = ni->runlist.rl;
				1607	}
				1608	if (likely(rl != NULL)) {
				1609	/* Seek to element containing target vcn. */
				1610	while (rl->length && rl[1].vcn <= vcn)
				1611	rl++;
				1612	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1613	} else
				1614	lcn = LCN_RL_NOT_MAPPED;
				1615	if (unlikely(lcn < 0)) {
				1616	/*
				1617	* We extended the attribute allocation above.
				1618	* If we hit an ENOENT here it means that the
				1619	* allocation was insufficient which is a bug.
				1620	*/
				1621	BUG_ON(lcn == LCN_ENOENT);
				1622
				1623	/* It is a hole, need to instantiate it. */
				1624	if (lcn == LCN_HOLE) {
				1625	// TODO: Instantiate the hole.
				1626	// clear_buffer_new(bh);
				1627	// unmap_underlying_metadata(bh->b_bdev,
				1628	// bh->b_blocknr);
				1629	// For non-uptodate buffers, need to
				1630	// zero out the region outside the
				1631	// request in this bh or all bhs,
				1632	// depending on what we implemented
				1633	// above.
				1634	// Need to flush_dcache_page().
				1635	// Or could use set_buffer_new()
				1636	// instead?
				1637	ntfs_error(vol->sb, "Writing into "
				1638	"sparse regions is "
				1639	"not supported yet. "
				1640	"Sorry.");
				1641	err = -EOPNOTSUPP;
				1642	goto err_out;
				1643	} else if (!is_retry &&
				1644	lcn == LCN_RL_NOT_MAPPED) {
				1645	is_retry = TRUE;
				1646	/*
				1647	* Attempt to map runlist, dropping
				1648	* lock for the duration.
				1649	*/
				1650	up_read(&ni->runlist.lock);
				1651	err = ntfs_map_runlist(ni, vcn);
				1652	if (likely(!err))
				1653	goto lock_retry_remap;
				1654	rl = NULL;
				1655	lcn = err;
				1656	}
				1657	/*
				1658	* Failed to map the buffer, even after
				1659	* retrying.
				1660	*/
				1661	bh->b_blocknr = -1;
				1662	ntfs_error(vol->sb, "Failed to write to inode "
				1663	"0x%lx, attribute type 0x%x, "
				1664	"vcn 0x%llx, offset 0x%x "
				1665	"because its location on disk "
				1666	"could not be determined%s "
				1667	"(error code %lli).",
				1668	ni->mft_no, ni->type,
				1669	(unsigned long long)vcn,
				1670	vcn_ofs, is_retry ? " even "
				1671	"after retrying" : "",
				1672	(long long)lcn);
				1673	if (!err)
				1674	err = -EIO;
				1675	goto err_out;
				1676	}
				1677	/* We now have a successful remap, i.e. lcn >= 0. */
				1678
				1679	/* Setup buffer head to correct block. */
				1680	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				1681	+ vcn_ofs) >> blocksize_bits;
				1682	set_buffer_mapped(bh);
				1683
				1684	// FIXME: Something analogous to this is needed for
				1685	// each newly allocated block, i.e. BH_New.
				1686	// FIXME: Might need to take this out of the
				1687	// if (!buffer_mapped(bh)) {}, depending on how we
				1688	// implement things during the allocated_size and
				1689	// initialized_size extension code above.
				1690	if (buffer_new(bh)) {
				1691	clear_buffer_new(bh);
				1692	unmap_underlying_metadata(bh->b_bdev,
				1693	bh->b_blocknr);
				1694	if (PageUptodate(page)) {
				1695	set_buffer_uptodate(bh);
				1696	continue;
				1697	}
				1698	/*
				1699	* Page is _not_ uptodate, zero surrounding
				1700	* region. NOTE: This is how we decide if to
				1701	* zero or not!
				1702	*/
				1703	if (block_end > to \|\| block_start < from) {
				1704	void *kaddr;
				1705
				1706	kaddr = kmap_atomic(page, KM_USER0);
				1707	if (block_end > to)
				1708	memset(kaddr + to, 0,
				1709	block_end - to);
				1710	if (block_start < from)
				1711	memset(kaddr + block_start, 0,
				1712	from -
				1713	block_start);
				1714	flush_dcache_page(page);
				1715	kunmap_atomic(kaddr, KM_USER0);
				1716	}
				1717	continue;
				1718	}
				1719	}
				1720	/* @bh is mapped, set it uptodate if the page is uptodate. */
				1721	if (PageUptodate(page)) {
				1722	if (!buffer_uptodate(bh))
				1723	set_buffer_uptodate(bh);
				1724	continue;
				1725	}
				1726	/*
				1727	* The page is not uptodate. The buffer is mapped. If it is not
				1728	* uptodate, and it is only partially being written to, we need
				1729	* to read the buffer in before the write, i.e. right now.
				1730	*/
				1731	if (!buffer_uptodate(bh) &&
				1732	(block_start < from \|\| block_end > to)) {
				1733	ll_rw_block(READ, 1, &bh);
				1734	*wait_bh++ = bh;
				1735	}
				1736	} while (block++, block_start = block_end,
				1737	(bh = bh->b_this_page) != head);
				1738
				1739	/* Release the lock if we took it. */
				1740	if (rl) {
				1741	up_read(&ni->runlist.lock);
				1742	rl = NULL;
				1743	}
				1744
				1745	/* If we issued read requests, let them complete. */
				1746	while (wait_bh > wait) {
				1747	wait_on_buffer(*--wait_bh);
				1748	if (!buffer_uptodate(*wait_bh))
				1749	return -EIO;
				1750	}
				1751
				1752	ntfs_debug("Done.");
				1753	return 0;
				1754	err_out:
				1755	/*
				1756	* Zero out any newly allocated blocks to avoid exposing stale data.
				1757	* If BH_New is set, we know that the block was newly allocated in the
				1758	* above loop.
				1759	* FIXME: What about initialized_size increments? Have we done all the
				1760	* required zeroing above? If not this error handling is broken, and
				1761	* in particular the if (block_end <= from) check is completely bogus.
				1762	*/
				1763	bh = head;
				1764	block_start = 0;
				1765	is_retry = FALSE;
				1766	do {
				1767	block_end = block_start + blocksize;
				1768	if (block_end <= from)
				1769	continue;
				1770	if (block_start >= to)
				1771	break;
				1772	if (buffer_new(bh)) {
				1773	void *kaddr;
				1774
				1775	clear_buffer_new(bh);
				1776	kaddr = kmap_atomic(page, KM_USER0);
				1777	memset(kaddr + block_start, 0, bh->b_size);
				1778	kunmap_atomic(kaddr, KM_USER0);
				1779	set_buffer_uptodate(bh);
				1780	mark_buffer_dirty(bh);
				1781	is_retry = TRUE;
				1782	}
				1783	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				1784	if (is_retry)
				1785	flush_dcache_page(page);
				1786	if (rl)
				1787	up_read(&ni->runlist.lock);
				1788	return err;
				1789	}
				1790
				1791	/**
				1792	* ntfs_prepare_write - prepare a page for receiving data
				1793	*
				1794	* This is called from generic_file_write() with i_sem held on the inode
				1795	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				1796	* data has not yet been copied into the @page.
				1797	*
				1798	* Need to extend the attribute/fill in holes if necessary, create blocks and
				1799	* make partially overwritten blocks uptodate,
				1800	*
				1801	* i_size is not to be modified yet.
				1802	*
				1803	* Return 0 on success or -errno on error.
				1804	*
				1805	* Should be using block_prepare_write() [support for sparse files] or
				1806	* cont_prepare_write() [no support for sparse files]. Cannot do that due to
				1807	* ntfs specifics but can look at them for implementation guidance.
				1808	*
				1809	* Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
				1810	* the first byte in the page that will be written to and @to is the first byte
				1811	* after the last byte that will be written to.
				1812	*/
				1813	static int ntfs_prepare_write(struct file file, struct page page,
				1814	unsigned from, unsigned to)
				1815	{
				1816	s64 new_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1817	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1818	struct inode *vi = page->mapping->host;
				1819	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1820	ntfs_volume *vol = ni->vol;
				1821	ntfs_attr_search_ctx *ctx = NULL;
				1822	MFT_RECORD *m = NULL;
				1823	ATTR_RECORD *a;
				1824	u8 *kaddr;
				1825	u32 attr_len;
				1826	int err;
				1827
				1828	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1829	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				1830	page->index, from, to);
				1831	BUG_ON(!PageLocked(page));
				1832	BUG_ON(from > PAGE_CACHE_SIZE);
				1833	BUG_ON(to > PAGE_CACHE_SIZE);
				1834	BUG_ON(from > to);
				1835	BUG_ON(NInoMstProtected(ni));
				1836	/*
				1837	* If a previous ntfs_truncate() failed, repeat it and abort if it
				1838	* fails again.
				1839	*/
				1840	if (unlikely(NInoTruncateFailed(ni))) {
				1841	down_write(&vi->i_alloc_sem);
				1842	err = ntfs_truncate(vi);
				1843	up_write(&vi->i_alloc_sem);
				1844	if (err \|\| NInoTruncateFailed(ni)) {
				1845	if (!err)
				1846	err = -EIO;
				1847	goto err_out;
				1848	}
				1849	}
				1850	/* If the attribute is not resident, deal with it elsewhere. */
				1851	if (NInoNonResident(ni)) {
				1852	/*
				1853	* Only unnamed $DATA attributes can be compressed, encrypted,
				1854	* and/or sparse.
				1855	*/
				1856	if (ni->type == AT_DATA && !ni->name_len) {
				1857	/* If file is encrypted, deny access, just like NT4. */
				1858	if (NInoEncrypted(ni)) {
				1859	ntfs_debug("Denying write access to encrypted "
				1860	"file.");
				1861	return -EACCES;
				1862	}
				1863	/* Compressed data streams are handled in compress.c. */
				1864	if (NInoCompressed(ni)) {
				1865	// TODO: Implement and replace this check with
				1866	// return ntfs_write_compressed_block(page);
				1867	ntfs_error(vi->i_sb, "Writing to compressed "
				1868	"files is not supported yet. "
				1869	"Sorry.");
				1870	return -EOPNOTSUPP;
				1871	}
				1872	// TODO: Implement and remove this check.
				1873	if (NInoSparse(ni)) {
				1874	ntfs_error(vi->i_sb, "Writing to sparse files "
				1875	"is not supported yet. Sorry.");
				1876	return -EOPNOTSUPP;
				1877	}
				1878	}
				1879	/* Normal data stream. */
				1880	return ntfs_prepare_nonresident_write(page, from, to);
				1881	}
				1882	/*
				1883	* Attribute is resident, implying it is not compressed, encrypted, or
				1884	* sparse.
				1885	*/
				1886	BUG_ON(page_has_buffers(page));
				1887	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				1888	/* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1889	if (new_size <= i_size_read(vi))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1890	goto done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1891	/* Map, pin, and lock the (base) mft record. */
				1892	if (!NInoAttr(ni))
				1893	base_ni = ni;
				1894	else
				1895	base_ni = ni->ext.base_ntfs_ino;
				1896	m = map_mft_record(base_ni);
				1897	if (IS_ERR(m)) {
				1898	err = PTR_ERR(m);
				1899	m = NULL;
				1900	ctx = NULL;
				1901	goto err_out;
				1902	}
				1903	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1904	if (unlikely(!ctx)) {
				1905	err = -ENOMEM;
				1906	goto err_out;
				1907	}
				1908	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1909	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1910	if (unlikely(err)) {
				1911	if (err == -ENOENT)
				1912	err = -EIO;
				1913	goto err_out;
				1914	}
				1915	m = ctx->mrec;
				1916	a = ctx->attr;
				1917	/* The total length of the attribute value. */
				1918	attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1919	/* Fix an eventual previous failure of ntfs_commit_write(). */
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1920	i_size = i_size_read(vi);
				1921	if (unlikely(attr_len > i_size)) {
				1922	attr_len = i_size;
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1923	a->data.resident.value_length = cpu_to_le32(attr_len);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1924	}
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1925	/* If we do not need to resize the attribute allocation we are done. */
				1926	if (new_size <= attr_len)
				1927	goto done_unm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1928	/* Check if new size is allowed in $AttrDef. */
				1929	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
				1930	if (unlikely(err)) {
				1931	if (err == -ERANGE) {
				1932	ntfs_error(vol->sb, "Write would cause the inode "
				1933	"0x%lx to exceed the maximum size for "
				1934	"its attribute type (0x%x). Aborting "
				1935	"write.", vi->i_ino,
				1936	le32_to_cpu(ni->type));
				1937	} else {
				1938	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
				1939	"attribute type 0x%x. Aborting "
				1940	"write.", vi->i_ino,
				1941	le32_to_cpu(ni->type));
				1942	err = -EIO;
				1943	}
				1944	goto err_out2;
				1945	}
				1946	/*
				1947	* Extend the attribute record to be able to store the new attribute
				1948	* size.
				1949	*/
				1950	if (new_size >= vol->mft_record_size \|\| ntfs_attr_record_resize(m, a,
				1951	le16_to_cpu(a->data.resident.value_offset) +
				1952	new_size)) {
				1953	/* Not enough space in the mft record. */
				1954	ntfs_error(vol->sb, "Not enough space in the mft record for "
				1955	"the resized attribute value. This is not "
				1956	"supported yet. Aborting write.");
				1957	err = -EOPNOTSUPP;
				1958	goto err_out2;
				1959	}
				1960	/*
				1961	* We have enough space in the mft record to fit the write. This
				1962	* implies the attribute is smaller than the mft record and hence the
				1963	* attribute must be in a single page and hence page->index must be 0.
				1964	*/
				1965	BUG_ON(page->index);
				1966	/*
				1967	* If the beginning of the write is past the old size, enlarge the
				1968	* attribute value up to the beginning of the write and fill it with
				1969	* zeroes.
				1970	*/
				1971	if (from > attr_len) {
				1972	memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
				1973	attr_len, 0, from - attr_len);
				1974	a->data.resident.value_length = cpu_to_le32(from);
				1975	/* Zero the corresponding area in the page as well. */
				1976	if (PageUptodate(page)) {
				1977	kaddr = kmap_atomic(page, KM_USER0);
				1978	memset(kaddr + attr_len, 0, from - attr_len);
				1979	kunmap_atomic(kaddr, KM_USER0);
				1980	flush_dcache_page(page);
				1981	}
				1982	}
				1983	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1984	mark_mft_record_dirty(ctx->ntfs_ino);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1985	done_unm:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1986	ntfs_attr_put_search_ctx(ctx);
				1987	unmap_mft_record(base_ni);
				1988	/*
				1989	* Because resident attributes are handled by memcpy() to/from the
				1990	* corresponding MFT record, and because this form of i/o is byte
				1991	* aligned rather than block aligned, there is no need to bring the
				1992	* page uptodate here as in the non-resident case where we need to
				1993	* bring the buffers straddled by the write uptodate before
				1994	* generic_file_write() does the copying from userspace.
				1995	*
				1996	* We thus defer the uptodate bringing of the page region outside the
				1997	* region written to to ntfs_commit_write(), which makes the code
				1998	* simpler and saves one atomic kmap which is good.
				1999	*/
				2000	done:
				2001	ntfs_debug("Done.");
				2002	return 0;
				2003	err_out:
				2004	if (err == -ENOMEM)
				2005	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2006	"prepare the write.");
				2007	else {
				2008	ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
				2009	"with error %i.", err);
				2010	NVolSetErrors(vol);
				2011	make_bad_inode(vi);
				2012	}
				2013	err_out2:
				2014	if (ctx)
				2015	ntfs_attr_put_search_ctx(ctx);
				2016	if (m)
				2017	unmap_mft_record(base_ni);
				2018	return err;
				2019	}
				2020
				2021	/**
				2022	* ntfs_commit_nonresident_write -
				2023	*
				2024	*/
				2025	static int ntfs_commit_nonresident_write(struct page *page,
				2026	unsigned from, unsigned to)
				2027	{
				2028	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				2029	struct inode *vi = page->mapping->host;
				2030	struct buffer_head bh, head;
				2031	unsigned int block_start, block_end, blocksize;
				2032	BOOL partial;
				2033
				2034	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2035	"0x%lx, from = %u, to = %u.", vi->i_ino,
				2036	NTFS_I(vi)->type, page->index, from, to);
				2037	blocksize = 1 << vi->i_blkbits;
				2038
				2039	// FIXME: We need a whole slew of special cases in here for compressed
				2040	// files for example...
				2041	// For now, we know ntfs_prepare_write() would have failed so we can't
				2042	// get here in any of the cases which we have to special case, so we
				2043	// are just a ripped off, unrolled generic_commit_write().
				2044
				2045	bh = head = page_buffers(page);
				2046	block_start = 0;
				2047	partial = FALSE;
				2048	do {
				2049	block_end = block_start + blocksize;
				2050	if (block_end <= from \|\| block_start >= to) {
				2051	if (!buffer_uptodate(bh))
				2052	partial = TRUE;
				2053	} else {
				2054	set_buffer_uptodate(bh);
				2055	mark_buffer_dirty(bh);
				2056	}
				2057	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				2058	/*
				2059	* If this is a partial write which happened to make all buffers
				2060	* uptodate then we can optimize away a bogus ->readpage() for the next
				2061	* read(). Here we 'discover' whether the page went uptodate as a
				2062	* result of this (potentially partial) write.
				2063	*/
				2064	if (!partial)
				2065	SetPageUptodate(page);
				2066	/*
				2067	* Not convinced about this at all. See disparity comment above. For
				2068	* now we know ntfs_prepare_write() would have failed in the write
				2069	* exceeds i_size case, so this will never trigger which is fine.
				2070	*/
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2071	if (pos > i_size_read(vi)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2072	ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
				2073	"not supported yet. Sorry.");
				2074	return -EOPNOTSUPP;
				2075	// vi->i_size = pos;
				2076	// mark_inode_dirty(vi);
				2077	}
				2078	ntfs_debug("Done.");
				2079	return 0;
				2080	}
				2081
				2082	/**
				2083	* ntfs_commit_write - commit the received data
				2084	*
				2085	* This is called from generic_file_write() with i_sem held on the inode
				2086	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				2087	* data has already been copied into the @page. ntfs_prepare_write() has been
				2088	* called before the data copied and it returned success so we can take the
				2089	* results of various BUG checks and some error handling for granted.
				2090	*
				2091	* Need to mark modified blocks dirty so they get written out later when
				2092	* ntfs_writepage() is invoked by the VM.
				2093	*
				2094	* Return 0 on success or -errno on error.
				2095	*
				2096	* Should be using generic_commit_write(). This marks buffers uptodate and
				2097	* dirty, sets the page uptodate if all buffers in the page are uptodate, and
				2098	* updates i_size if the end of io is beyond i_size. In that case, it also
				2099	* marks the inode dirty.
				2100	*
				2101	* Cannot use generic_commit_write() due to ntfs specialities but can look at
				2102	* it for implementation guidance.
				2103	*
				2104	* If things have gone as outlined in ntfs_prepare_write(), then we do not
				2105	* need to do any page content modifications here at all, except in the write
				2106	* to resident attribute case, where we need to do the uptodate bringing here
				2107	* which we combine with the copying into the mft record which means we save
				2108	* one atomic kmap.
				2109	*/
				2110	static int ntfs_commit_write(struct file file, struct page page,
				2111	unsigned from, unsigned to)
				2112	{
				2113	struct inode *vi = page->mapping->host;
				2114	ntfs_inode base_ni, ni = NTFS_I(vi);
				2115	char kaddr, kattr;
				2116	ntfs_attr_search_ctx *ctx;
				2117	MFT_RECORD *m;
				2118	ATTR_RECORD *a;
				2119	u32 attr_len;
				2120	int err;
				2121
				2122	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2123	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				2124	page->index, from, to);
				2125	/* If the attribute is not resident, deal with it elsewhere. */
				2126	if (NInoNonResident(ni)) {
				2127	/* Only unnamed $DATA attributes can be compressed/encrypted. */
				2128	if (ni->type == AT_DATA && !ni->name_len) {
				2129	/* Encrypted files need separate handling. */
				2130	if (NInoEncrypted(ni)) {
				2131	// We never get here at present!
				2132	BUG();
				2133	}
				2134	/* Compressed data streams are handled in compress.c. */
				2135	if (NInoCompressed(ni)) {
				2136	// TODO: Implement this!
				2137	// return ntfs_write_compressed_block(page);
				2138	// We never get here at present!
				2139	BUG();
				2140	}
				2141	}
				2142	/* Normal data stream. */
				2143	return ntfs_commit_nonresident_write(page, from, to);
				2144	}
				2145	/*
				2146	* Attribute is resident, implying it is not compressed, encrypted, or
				2147	* sparse.
				2148	*/
				2149	if (!NInoAttr(ni))
				2150	base_ni = ni;
				2151	else
				2152	base_ni = ni->ext.base_ntfs_ino;
				2153	/* Map, pin, and lock the mft record. */
				2154	m = map_mft_record(base_ni);
				2155	if (IS_ERR(m)) {
				2156	err = PTR_ERR(m);
				2157	m = NULL;
				2158	ctx = NULL;
				2159	goto err_out;
				2160	}
				2161	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				2162	if (unlikely(!ctx)) {
				2163	err = -ENOMEM;
				2164	goto err_out;
				2165	}
				2166	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				2167	CASE_SENSITIVE, 0, NULL, 0, ctx);
				2168	if (unlikely(err)) {
				2169	if (err == -ENOENT)
				2170	err = -EIO;
				2171	goto err_out;
				2172	}
				2173	a = ctx->attr;
				2174	/* The total length of the attribute value. */
				2175	attr_len = le32_to_cpu(a->data.resident.value_length);
				2176	BUG_ON(from > attr_len);
				2177	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
				2178	kaddr = kmap_atomic(page, KM_USER0);
				2179	/* Copy the received data from the page to the mft record. */
				2180	memcpy(kattr + from, kaddr + from, to - from);
				2181	/* Update the attribute length if necessary. */
				2182	if (to > attr_len) {
				2183	attr_len = to;
				2184	a->data.resident.value_length = cpu_to_le32(attr_len);
				2185	}
				2186	/*
				2187	* If the page is not uptodate, bring the out of bounds area(s)
				2188	* uptodate by copying data from the mft record to the page.
				2189	*/
				2190	if (!PageUptodate(page)) {
				2191	if (from > 0)
				2192	memcpy(kaddr, kattr, from);
				2193	if (to < attr_len)
				2194	memcpy(kaddr + to, kattr + to, attr_len - to);
				2195	/* Zero the region outside the end of the attribute value. */
				2196	if (attr_len < PAGE_CACHE_SIZE)
				2197	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				2198	/*
				2199	* The probability of not having done any of the above is
				2200	* extremely small, so we just flush unconditionally.
				2201	*/
				2202	flush_dcache_page(page);
				2203	SetPageUptodate(page);
				2204	}
				2205	kunmap_atomic(kaddr, KM_USER0);
				2206	/* Update i_size if necessary. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2207	if (i_size_read(vi) < attr_len) {
				2208	unsigned long flags;
				2209
				2210	write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2211	ni->allocated_size = ni->initialized_size = attr_len;
				2212	i_size_write(vi, attr_len);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2213	write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2214	}
				2215	/* Mark the mft record dirty, so it gets written back. */
				2216	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2217	mark_mft_record_dirty(ctx->ntfs_ino);
				2218	ntfs_attr_put_search_ctx(ctx);
				2219	unmap_mft_record(base_ni);
				2220	ntfs_debug("Done.");
				2221	return 0;
				2222	err_out:
				2223	if (err == -ENOMEM) {
				2224	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2225	"commit the write.");
				2226	if (PageUptodate(page)) {
				2227	ntfs_warning(vi->i_sb, "Page is uptodate, setting "
				2228	"dirty so the write will be retried "
				2229	"later on by the VM.");
				2230	/*
				2231	* Put the page on mapping->dirty_pages, but leave its
				2232	* buffers' dirty state as-is.
				2233	*/
				2234	__set_page_dirty_nobuffers(page);
				2235	err = 0;
				2236	} else
				2237	ntfs_error(vi->i_sb, "Page is not uptodate. Written "
				2238	"data has been lost.");
				2239	} else {
				2240	ntfs_error(vi->i_sb, "Resident attribute commit write failed "
				2241	"with error %i.", err);
				2242	NVolSetErrors(ni->vol);
				2243	make_bad_inode(vi);
				2244	}
				2245	if (ctx)
				2246	ntfs_attr_put_search_ctx(ctx);
				2247	if (m)
				2248	unmap_mft_record(base_ni);
				2249	return err;
				2250	}
				2251
				2252	#endif /* NTFS_RW */
				2253
				2254	/**
				2255	* ntfs_aops - general address space operations for inodes and attributes
				2256	*/
				2257	struct address_space_operations ntfs_aops = {
				2258	.readpage = ntfs_readpage, /* Fill page with data. */
				2259	.sync_page = block_sync_page, /* Currently, just unplugs the
				2260	disk request queue. */
				2261	#ifdef NTFS_RW
				2262	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2263	.prepare_write = ntfs_prepare_write, /* Prepare page and buffers
				2264	ready to receive data. */
				2265	.commit_write = ntfs_commit_write, /* Commit received data. */
				2266	#endif /* NTFS_RW */
				2267	};
				2268
				2269	/**
				2270	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				2271	* and attributes
				2272	*/
				2273	struct address_space_operations ntfs_mst_aops = {
				2274	.readpage = ntfs_readpage, /* Fill page with data. */
				2275	.sync_page = block_sync_page, /* Currently, just unplugs the
				2276	disk request queue. */
				2277	#ifdef NTFS_RW
				2278	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2279	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				2280	without touching the buffers
				2281	belonging to the page. */
				2282	#endif /* NTFS_RW */
				2283	};
				2284
				2285	#ifdef NTFS_RW
				2286
				2287	/**
				2288	* mark_ntfs_record_dirty - mark an ntfs record dirty
				2289	* @page: page containing the ntfs record to mark dirty
				2290	* @ofs: byte offset within @page at which the ntfs record begins
				2291	*
				2292	* Set the buffers and the page in which the ntfs record is located dirty.
				2293	*
				2294	* The latter also marks the vfs inode the ntfs record belongs to dirty
				2295	* (I_DIRTY_PAGES only).
				2296	*
				2297	* If the page does not have buffers, we create them and set them uptodate.
				2298	* The page may not be locked which is why we need to handle the buffers under
				2299	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				2300	* need the lock since try_to_free_buffers() does not free dirty buffers.
				2301	*/
				2302	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				2303	struct address_space *mapping = page->mapping;
				2304	ntfs_inode *ni = NTFS_I(mapping->host);
				2305	struct buffer_head bh, head, *buffers_to_free = NULL;
				2306	unsigned int end, bh_size, bh_ofs;
				2307
				2308	BUG_ON(!PageUptodate(page));
				2309	end = ofs + ni->itype.index.block_size;
				2310	bh_size = 1 << VFS_I(ni)->i_blkbits;
				2311	spin_lock(&mapping->private_lock);
				2312	if (unlikely(!page_has_buffers(page))) {
				2313	spin_unlock(&mapping->private_lock);
				2314	bh = head = alloc_page_buffers(page, bh_size, 1);
				2315	spin_lock(&mapping->private_lock);
				2316	if (likely(!page_has_buffers(page))) {
				2317	struct buffer_head *tail;
				2318
				2319	do {
				2320	set_buffer_uptodate(bh);
				2321	tail = bh;
				2322	bh = bh->b_this_page;
				2323	} while (bh);
				2324	tail->b_this_page = head;
				2325	attach_page_buffers(page, head);
				2326	} else
				2327	buffers_to_free = bh;
				2328	}
				2329	bh = head = page_buffers(page);
				2330	do {
				2331	bh_ofs = bh_offset(bh);
				2332	if (bh_ofs + bh_size <= ofs)
				2333	continue;
				2334	if (unlikely(bh_ofs >= end))
				2335	break;
				2336	set_buffer_dirty(bh);
				2337	} while ((bh = bh->b_this_page) != head);
				2338	spin_unlock(&mapping->private_lock);
				2339	__set_page_dirty_nobuffers(page);
				2340	if (unlikely(buffers_to_free)) {
				2341	do {
				2342	bh = buffers_to_free->b_this_page;
				2343	free_buffer_head(buffers_to_free);
				2344	buffers_to_free = bh;
				2345	} while (buffers_to_free);
				2346	}
				2347	}
				2348
				2349	#endif /* NTFS_RW */