Blame - fs/ntfs/aops.c - kernel/msm-5.4

blob: 6241c4cfbe2810c0cfc36b08fff070715c2b4488 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	5	* Copyright (c) 2001-2005 Anton Altaparmakov
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/mm.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/swap.h>
				28	#include <linux/buffer_head.h>
				29	#include <linux/writeback.h>
				30
				31	#include "aops.h"
				32	#include "attrib.h"
				33	#include "debug.h"
				34	#include "inode.h"
				35	#include "mft.h"
				36	#include "runlist.h"
				37	#include "types.h"
				38	#include "ntfs.h"
				39
				40	/**
				41	* ntfs_end_buffer_async_read - async io completion for reading attributes
				42	* @bh: buffer head on which io is completed
				43	* @uptodate: whether @bh is now uptodate or not
				44	*
				45	* Asynchronous I/O completion handler for reading pages belonging to the
				46	* attribute address space of an inode. The inodes can either be files or
				47	* directories or they can be fake inodes describing some attribute.
				48	*
				49	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				50	* page has been completed and mark the page uptodate or set the error bit on
				51	* the page. To determine the size of the records that need fixing up, we
				52	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				53	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				54	* record size.
				55	*/
				56	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				57	{
				58	static DEFINE_SPINLOCK(page_uptodate_lock);
				59	unsigned long flags;
				60	struct buffer_head *tmp;
				61	struct page *page;
				62	ntfs_inode *ni;
				63	int page_uptodate = 1;
				64
				65	page = bh->b_page;
				66	ni = NTFS_I(page->mapping->host);
				67
				68	if (likely(uptodate)) {
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	69	s64 file_ofs, initialized_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	70
				71	set_buffer_uptodate(bh);
				72
				73	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				74	bh_offset(bh);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	75	read_lock_irqsave(&ni->size_lock, flags);
				76	initialized_size = ni->initialized_size;
				77	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	78	/* Check for the current buffer head overflowing. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	79	if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	char *addr;
				81	int ofs = 0;
				82
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	83	if (file_ofs < initialized_size)
				84	ofs = initialized_size - file_ofs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	85	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				86	memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
				87	flush_dcache_page(page);
				88	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				89	}
				90	} else {
				91	clear_buffer_uptodate(bh);
				92	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
				93	(unsigned long long)bh->b_blocknr);
				94	SetPageError(page);
				95	}
				96	spin_lock_irqsave(&page_uptodate_lock, flags);
				97	clear_buffer_async_read(bh);
				98	unlock_buffer(bh);
				99	tmp = bh;
				100	do {
				101	if (!buffer_uptodate(tmp))
				102	page_uptodate = 0;
				103	if (buffer_async_read(tmp)) {
				104	if (likely(buffer_locked(tmp)))
				105	goto still_busy;
				106	/* Async buffers must be locked. */
				107	BUG();
				108	}
				109	tmp = tmp->b_this_page;
				110	} while (tmp != bh);
				111	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				112	/*
				113	* If none of the buffers had errors then we can set the page uptodate,
				114	* but we first have to perform the post read mst fixups, if the
				115	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				116	* Note we ignore fixup errors as those are detected when
				117	* map_mft_record() is called which gives us per record granularity
				118	* rather than per page granularity.
				119	*/
				120	if (!NInoMstProtected(ni)) {
				121	if (likely(page_uptodate && !PageError(page)))
				122	SetPageUptodate(page);
				123	} else {
				124	char *addr;
				125	unsigned int i, recs;
				126	u32 rec_size;
				127
				128	rec_size = ni->itype.index.block_size;
				129	recs = PAGE_CACHE_SIZE / rec_size;
				130	/* Should have been verified before we got here... */
				131	BUG_ON(!recs);
				132	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				133	for (i = 0; i < recs; i++)
				134	post_read_mst_fixup((NTFS_RECORD*)(addr +
				135	i * rec_size), rec_size);
				136	flush_dcache_page(page);
				137	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	138	if (likely(page_uptodate && !PageError(page)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	SetPageUptodate(page);
				140	}
				141	unlock_page(page);
				142	return;
				143	still_busy:
				144	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				145	return;
				146	}
				147
				148	/**
				149	* ntfs_read_block - fill a @page of an address space with data
				150	* @page: page cache page to fill with data
				151	*
				152	* Fill the page @page of the address space belonging to the @page->host inode.
				153	* We read each buffer asynchronously and when all buffers are read in, our io
				154	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				155	* applies the mst fixups to the page before finally marking it uptodate and
				156	* unlocking it.
				157	*
				158	* We only enforce allocated_size limit because i_size is checked for in
				159	* generic_file_read().
				160	*
				161	* Return 0 on success and -errno on error.
				162	*
				163	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				164	*/
				165	static int ntfs_read_block(struct page *page)
				166	{
				167	VCN vcn;
				168	LCN lcn;
				169	ntfs_inode *ni;
				170	ntfs_volume *vol;
				171	runlist_element *rl;
				172	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				173	sector_t iblock, lblock, zblock;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	174	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175	unsigned int blocksize, vcn_ofs;
				176	int i, nr;
				177	unsigned char blocksize_bits;
				178
				179	ni = NTFS_I(page->mapping->host);
				180	vol = ni->vol;
				181
				182	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				183	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				184
				185	blocksize_bits = VFS_I(ni)->i_blkbits;
				186	blocksize = 1 << blocksize_bits;
				187
				188	if (!page_has_buffers(page))
				189	create_empty_buffers(page, blocksize, 0);
				190	bh = head = page_buffers(page);
				191	if (unlikely(!bh)) {
				192	unlock_page(page);
				193	return -ENOMEM;
				194	}
				195
				196	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	197	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				199	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	200	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201
				202	/* Loop through all the buffers in the page. */
				203	rl = NULL;
				204	nr = i = 0;
				205	do {
				206	u8 *kaddr;
				207
				208	if (unlikely(buffer_uptodate(bh)))
				209	continue;
				210	if (unlikely(buffer_mapped(bh))) {
				211	arr[nr++] = bh;
				212	continue;
				213	}
				214	bh->b_bdev = vol->sb->s_bdev;
				215	/* Is the block within the allowed limits? */
				216	if (iblock < lblock) {
				217	BOOL is_retry = FALSE;
				218
				219	/* Convert iblock into corresponding vcn and offset. */
				220	vcn = (VCN)iblock << blocksize_bits >>
				221	vol->cluster_size_bits;
				222	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				223	vol->cluster_size_mask;
				224	if (!rl) {
				225	lock_retry_remap:
				226	down_read(&ni->runlist.lock);
				227	rl = ni->runlist.rl;
				228	}
				229	if (likely(rl != NULL)) {
				230	/* Seek to element containing target vcn. */
				231	while (rl->length && rl[1].vcn <= vcn)
				232	rl++;
				233	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				234	} else
				235	lcn = LCN_RL_NOT_MAPPED;
				236	/* Successful remap. */
				237	if (lcn >= 0) {
				238	/* Setup buffer head to correct block. */
				239	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				240	+ vcn_ofs) >> blocksize_bits;
				241	set_buffer_mapped(bh);
				242	/* Only read initialized data blocks. */
				243	if (iblock < zblock) {
				244	arr[nr++] = bh;
				245	continue;
				246	}
				247	/* Fully non-initialized data block, zero it. */
				248	goto handle_zblock;
				249	}
				250	/* It is a hole, need to zero it. */
				251	if (lcn == LCN_HOLE)
				252	goto handle_hole;
				253	/* If first try and runlist unmapped, map and retry. */
				254	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				255	int err;
				256	is_retry = TRUE;
				257	/*
				258	* Attempt to map runlist, dropping lock for
				259	* the duration.
				260	*/
				261	up_read(&ni->runlist.lock);
				262	err = ntfs_map_runlist(ni, vcn);
				263	if (likely(!err))
				264	goto lock_retry_remap;
				265	rl = NULL;
				266	lcn = err;
				267	}
				268	/* Hard error, zero out region. */
				269	bh->b_blocknr = -1;
				270	SetPageError(page);
				271	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				272	"attribute type 0x%x, vcn 0x%llx, "
				273	"offset 0x%x because its location on "
				274	"disk could not be determined%s "
				275	"(error code %lli).", ni->mft_no,
				276	ni->type, (unsigned long long)vcn,
				277	vcn_ofs, is_retry ? " even after "
				278	"retrying" : "", (long long)lcn);
				279	}
				280	/*
				281	* Either iblock was outside lblock limits or
				282	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				283	* of the page and set the buffer uptodate.
				284	*/
				285	handle_hole:
				286	bh->b_blocknr = -1UL;
				287	clear_buffer_mapped(bh);
				288	handle_zblock:
				289	kaddr = kmap_atomic(page, KM_USER0);
				290	memset(kaddr + i * blocksize, 0, blocksize);
				291	flush_dcache_page(page);
				292	kunmap_atomic(kaddr, KM_USER0);
				293	set_buffer_uptodate(bh);
				294	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				295
				296	/* Release the lock if we took it. */
				297	if (rl)
				298	up_read(&ni->runlist.lock);
				299
				300	/* Check we have at least one buffer ready for i/o. */
				301	if (nr) {
				302	struct buffer_head *tbh;
				303
				304	/* Lock the buffers. */
				305	for (i = 0; i < nr; i++) {
				306	tbh = arr[i];
				307	lock_buffer(tbh);
				308	tbh->b_end_io = ntfs_end_buffer_async_read;
				309	set_buffer_async_read(tbh);
				310	}
				311	/* Finally, start i/o on the buffers. */
				312	for (i = 0; i < nr; i++) {
				313	tbh = arr[i];
				314	if (likely(!buffer_uptodate(tbh)))
				315	submit_bh(READ, tbh);
				316	else
				317	ntfs_end_buffer_async_read(tbh, 1);
				318	}
				319	return 0;
				320	}
				321	/* No i/o was scheduled on any of the buffers. */
				322	if (likely(!PageError(page)))
				323	SetPageUptodate(page);
				324	else /* Signal synchronous i/o error. */
				325	nr = -EIO;
				326	unlock_page(page);
				327	return nr;
				328	}
				329
				330	/**
				331	* ntfs_readpage - fill a @page of a @file with data from the device
				332	* @file: open file to which the page @page belongs or NULL
				333	* @page: page cache page to fill with data
				334	*
				335	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				336	* file @file by calling the ntfs version of the generic block_read_full_page()
				337	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				338	* associated with the page asynchronously.
				339	*
				340	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				341	* data from the mft record (which at this stage is most likely in memory) and
				342	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				343	* even if the mft record is not cached at this point in time, we need to wait
				344	* for it to be read in before we can do the copy.
				345	*
				346	* Return 0 on success and -errno on error.
				347	*/
				348	static int ntfs_readpage(struct file file, struct page page)
				349	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	ntfs_inode ni, base_ni;
				351	u8 *kaddr;
				352	ntfs_attr_search_ctx *ctx;
				353	MFT_RECORD *mrec;
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	354	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	355	u32 attr_len;
				356	int err = 0;
				357
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame^]	358	retry_readpage:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	359	BUG_ON(!PageLocked(page));
				360	/*
				361	* This can potentially happen because we clear PageUptodate() during
				362	* ntfs_writepage() of MstProtected() attributes.
				363	*/
				364	if (PageUptodate(page)) {
				365	unlock_page(page);
				366	return 0;
				367	}
				368	ni = NTFS_I(page->mapping->host);
				369
				370	/* NInoNonResident() == NInoIndexAllocPresent() */
				371	if (NInoNonResident(ni)) {
				372	/*
				373	* Only unnamed $DATA attributes can be compressed or
				374	* encrypted.
				375	*/
				376	if (ni->type == AT_DATA && !ni->name_len) {
				377	/* If file is encrypted, deny access, just like NT4. */
				378	if (NInoEncrypted(ni)) {
				379	err = -EACCES;
				380	goto err_out;
				381	}
				382	/* Compressed data streams are handled in compress.c. */
				383	if (NInoCompressed(ni))
				384	return ntfs_read_compressed_block(page);
				385	}
				386	/* Normal data stream. */
				387	return ntfs_read_block(page);
				388	}
				389	/*
				390	* Attribute is resident, implying it is not compressed or encrypted.
				391	* This also means the attribute is smaller than an mft record and
				392	* hence smaller than a page, so can simply zero out any pages with
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	393	* index above 0.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	394	*/
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	395	if (unlikely(page->index > 0)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	396	kaddr = kmap_atomic(page, KM_USER0);
				397	memset(kaddr, 0, PAGE_CACHE_SIZE);
				398	flush_dcache_page(page);
				399	kunmap_atomic(kaddr, KM_USER0);
				400	goto done;
				401	}
				402	if (!NInoAttr(ni))
				403	base_ni = ni;
				404	else
				405	base_ni = ni->ext.base_ntfs_ino;
				406	/* Map, pin, and lock the mft record. */
				407	mrec = map_mft_record(base_ni);
				408	if (IS_ERR(mrec)) {
				409	err = PTR_ERR(mrec);
				410	goto err_out;
				411	}
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame^]	412	/*
				413	* If a parallel write made the attribute non-resident, drop the mft
				414	* record and retry the readpage.
				415	*/
				416	if (unlikely(NInoNonResident(ni))) {
				417	unmap_mft_record(base_ni);
				418	goto retry_readpage;
				419	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	420	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				421	if (unlikely(!ctx)) {
				422	err = -ENOMEM;
				423	goto unm_err_out;
				424	}
				425	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				426	CASE_SENSITIVE, 0, NULL, 0, ctx);
				427	if (unlikely(err))
				428	goto put_unm_err_out;
				429	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	430	read_lock_irqsave(&ni->size_lock, flags);
				431	if (unlikely(attr_len > ni->initialized_size))
				432	attr_len = ni->initialized_size;
				433	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	434	kaddr = kmap_atomic(page, KM_USER0);
				435	/* Copy the data to the page. */
				436	memcpy(kaddr, (u8*)ctx->attr +
				437	le16_to_cpu(ctx->attr->data.resident.value_offset),
				438	attr_len);
				439	/* Zero the remainder of the page. */
				440	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				441	flush_dcache_page(page);
				442	kunmap_atomic(kaddr, KM_USER0);
				443	put_unm_err_out:
				444	ntfs_attr_put_search_ctx(ctx);
				445	unm_err_out:
				446	unmap_mft_record(base_ni);
				447	done:
				448	SetPageUptodate(page);
				449	err_out:
				450	unlock_page(page);
				451	return err;
				452	}
				453
				454	#ifdef NTFS_RW
				455
				456	/**
				457	* ntfs_write_block - write a @page to the backing store
				458	* @page: page cache page to write out
				459	* @wbc: writeback control structure
				460	*
				461	* This function is for writing pages belonging to non-resident, non-mst
				462	* protected attributes to their backing store.
				463	*
				464	* For a page with buffers, map and write the dirty buffers asynchronously
				465	* under page writeback. For a page without buffers, create buffers for the
				466	* page, then proceed as above.
				467	*
				468	* If a page doesn't have buffers the page dirty state is definitive. If a page
				469	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				470	* state is definitive. (A hint which has rules: dirty buffers against a clean
				471	* page is illegal. Other combinations are legal and need to be handled. In
				472	* particular a dirty page containing clean buffers for example.)
				473	*
				474	* Return 0 on success and -errno on error.
				475	*
				476	* Based on ntfs_read_block() and __block_write_full_page().
				477	*/
				478	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				479	{
				480	VCN vcn;
				481	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	482	s64 initialized_size;
				483	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	484	sector_t block, dblock, iblock;
				485	struct inode *vi;
				486	ntfs_inode *ni;
				487	ntfs_volume *vol;
				488	runlist_element *rl;
				489	struct buffer_head bh, head;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	490	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	491	unsigned int blocksize, vcn_ofs;
				492	int err;
				493	BOOL need_end_writeback;
				494	unsigned char blocksize_bits;
				495
				496	vi = page->mapping->host;
				497	ni = NTFS_I(vi);
				498	vol = ni->vol;
				499
				500	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				501	"0x%lx.", ni->mft_no, ni->type, page->index);
				502
				503	BUG_ON(!NInoNonResident(ni));
				504	BUG_ON(NInoMstProtected(ni));
				505
				506	blocksize_bits = vi->i_blkbits;
				507	blocksize = 1 << blocksize_bits;
				508
				509	if (!page_has_buffers(page)) {
				510	BUG_ON(!PageUptodate(page));
				511	create_empty_buffers(page, blocksize,
				512	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				513	}
				514	bh = head = page_buffers(page);
				515	if (unlikely(!bh)) {
				516	ntfs_warning(vol->sb, "Error allocating page buffers. "
				517	"Redirtying page so we try again later.");
				518	/*
				519	* Put the page back on mapping->dirty_pages, but leave its
				520	* buffer's dirty state as-is.
				521	*/
				522	redirty_page_for_writepage(wbc, page);
				523	unlock_page(page);
				524	return 0;
				525	}
				526
				527	/* NOTE: Different naming scheme to ntfs_read_block()! */
				528
				529	/* The first block in the page. */
				530	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				531
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	532	read_lock_irqsave(&ni->size_lock, flags);
				533	i_size = i_size_read(vi);
				534	initialized_size = ni->initialized_size;
				535	read_unlock_irqrestore(&ni->size_lock, flags);
				536
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	537	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	538	dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	539
				540	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	541	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	542
				543	/*
				544	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				545	* here, and the (potentially unmapped) buffers may become dirty at
				546	* any time. If a buffer becomes dirty here after we've inspected it
				547	* then we just miss that fact, and the page stays dirty.
				548	*
				549	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				550	* handle that here by just cleaning them.
				551	*/
				552
				553	/*
				554	* Loop through all the buffers in the page, mapping all the dirty
				555	* buffers to disk addresses and handling any aliases from the
				556	* underlying block device's mapping.
				557	*/
				558	rl = NULL;
				559	err = 0;
				560	do {
				561	BOOL is_retry = FALSE;
				562
				563	if (unlikely(block >= dblock)) {
				564	/*
				565	* Mapped buffers outside i_size will occur, because
				566	* this page can be outside i_size when there is a
				567	* truncate in progress. The contents of such buffers
				568	* were zeroed by ntfs_writepage().
				569	*
				570	* FIXME: What about the small race window where
				571	* ntfs_writepage() has not done any clearing because
				572	* the page was within i_size but before we get here,
				573	* vmtruncate() modifies i_size?
				574	*/
				575	clear_buffer_dirty(bh);
				576	set_buffer_uptodate(bh);
				577	continue;
				578	}
				579
				580	/* Clean buffers are not written out, so no need to map them. */
				581	if (!buffer_dirty(bh))
				582	continue;
				583
				584	/* Make sure we have enough initialized size. */
				585	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	586	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	587	/*
				588	* If this page is fully outside initialized size, zero
				589	* out all pages between the current initialized size
				590	* and the current page. Just use ntfs_readpage() to do
				591	* the zeroing transparently.
				592	*/
				593	if (block > iblock) {
				594	// TODO:
				595	// For each page do:
				596	// - read_cache_page()
				597	// Again for each page do:
				598	// - wait_on_page_locked()
				599	// - Check (PageUptodate(page) &&
				600	// !PageError(page))
				601	// Update initialized size in the attribute and
				602	// in the inode.
				603	// Again, for each page do:
				604	// __set_page_dirty_buffers();
				605	// page_cache_release()
				606	// We don't need to wait on the writes.
				607	// Update iblock.
				608	}
				609	/*
				610	* The current page straddles initialized size. Zero
				611	* all non-uptodate buffers and set them uptodate (and
				612	* dirty?). Note, there aren't any non-uptodate buffers
				613	* if the page is uptodate.
				614	* FIXME: For an uptodate page, the buffers may need to
				615	* be written out because they were not initialized on
				616	* disk before.
				617	*/
				618	if (!PageUptodate(page)) {
				619	// TODO:
				620	// Zero any non-uptodate buffers up to i_size.
				621	// Set them uptodate and dirty.
				622	}
				623	// TODO:
				624	// Update initialized size in the attribute and in the
				625	// inode (up to i_size).
				626	// Update iblock.
				627	// FIXME: This is inefficient. Try to batch the two
				628	// size changes to happen in one go.
				629	ntfs_error(vol->sb, "Writing beyond initialized size "
				630	"is not supported yet. Sorry.");
				631	err = -EOPNOTSUPP;
				632	break;
				633	// Do NOT set_buffer_new() BUT DO clear buffer range
				634	// outside write request range.
				635	// set_buffer_uptodate() on complete buffers as well as
				636	// set_buffer_dirty().
				637	}
				638
				639	/* No need to map buffers that are already mapped. */
				640	if (buffer_mapped(bh))
				641	continue;
				642
				643	/* Unmapped, dirty buffer. Need to map it. */
				644	bh->b_bdev = vol->sb->s_bdev;
				645
				646	/* Convert block into corresponding vcn and offset. */
				647	vcn = (VCN)block << blocksize_bits;
				648	vcn_ofs = vcn & vol->cluster_size_mask;
				649	vcn >>= vol->cluster_size_bits;
				650	if (!rl) {
				651	lock_retry_remap:
				652	down_read(&ni->runlist.lock);
				653	rl = ni->runlist.rl;
				654	}
				655	if (likely(rl != NULL)) {
				656	/* Seek to element containing target vcn. */
				657	while (rl->length && rl[1].vcn <= vcn)
				658	rl++;
				659	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				660	} else
				661	lcn = LCN_RL_NOT_MAPPED;
				662	/* Successful remap. */
				663	if (lcn >= 0) {
				664	/* Setup buffer head to point to correct block. */
				665	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				666	vcn_ofs) >> blocksize_bits;
				667	set_buffer_mapped(bh);
				668	continue;
				669	}
				670	/* It is a hole, need to instantiate it. */
				671	if (lcn == LCN_HOLE) {
				672	// TODO: Instantiate the hole.
				673	// clear_buffer_new(bh);
				674	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				675	ntfs_error(vol->sb, "Writing into sparse regions is "
				676	"not supported yet. Sorry.");
				677	err = -EOPNOTSUPP;
				678	break;
				679	}
				680	/* If first try and runlist unmapped, map and retry. */
				681	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				682	is_retry = TRUE;
				683	/*
				684	* Attempt to map runlist, dropping lock for
				685	* the duration.
				686	*/
				687	up_read(&ni->runlist.lock);
				688	err = ntfs_map_runlist(ni, vcn);
				689	if (likely(!err))
				690	goto lock_retry_remap;
				691	rl = NULL;
				692	lcn = err;
				693	}
				694	/* Failed to map the buffer, even after retrying. */
				695	bh->b_blocknr = -1;
				696	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				697	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				698	"because its location on disk could not be "
				699	"determined%s (error code %lli).", ni->mft_no,
				700	ni->type, (unsigned long long)vcn,
				701	vcn_ofs, is_retry ? " even after "
				702	"retrying" : "", (long long)lcn);
				703	if (!err)
				704	err = -EIO;
				705	break;
				706	} while (block++, (bh = bh->b_this_page) != head);
				707
				708	/* Release the lock if we took it. */
				709	if (rl)
				710	up_read(&ni->runlist.lock);
				711
				712	/* For the error case, need to reset bh to the beginning. */
				713	bh = head;
				714
				715	/* Just an optimization, so ->readpage() isn't called later. */
				716	if (unlikely(!PageUptodate(page))) {
				717	int uptodate = 1;
				718	do {
				719	if (!buffer_uptodate(bh)) {
				720	uptodate = 0;
				721	bh = head;
				722	break;
				723	}
				724	} while ((bh = bh->b_this_page) != head);
				725	if (uptodate)
				726	SetPageUptodate(page);
				727	}
				728
				729	/* Setup all mapped, dirty buffers for async write i/o. */
				730	do {
				731	get_bh(bh);
				732	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				733	lock_buffer(bh);
				734	if (test_clear_buffer_dirty(bh)) {
				735	BUG_ON(!buffer_uptodate(bh));
				736	mark_buffer_async_write(bh);
				737	} else
				738	unlock_buffer(bh);
				739	} else if (unlikely(err)) {
				740	/*
				741	* For the error case. The buffer may have been set
				742	* dirty during attachment to a dirty page.
				743	*/
				744	if (err != -ENOMEM)
				745	clear_buffer_dirty(bh);
				746	}
				747	} while ((bh = bh->b_this_page) != head);
				748
				749	if (unlikely(err)) {
				750	// TODO: Remove the -EOPNOTSUPP check later on...
				751	if (unlikely(err == -EOPNOTSUPP))
				752	err = 0;
				753	else if (err == -ENOMEM) {
				754	ntfs_warning(vol->sb, "Error allocating memory. "
				755	"Redirtying page so we try again "
				756	"later.");
				757	/*
				758	* Put the page back on mapping->dirty_pages, but
				759	* leave its buffer's dirty state as-is.
				760	*/
				761	redirty_page_for_writepage(wbc, page);
				762	err = 0;
				763	} else
				764	SetPageError(page);
				765	}
				766
				767	BUG_ON(PageWriteback(page));
				768	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				769	unlock_page(page);
				770
				771	/*
				772	* Submit the prepared buffers for i/o. Note the page is unlocked,
				773	* and the async write i/o completion handler can end_page_writeback()
				774	* at any time after the first submit_bh(). So the buffers can then
				775	* disappear...
				776	*/
				777	need_end_writeback = TRUE;
				778	do {
				779	struct buffer_head *next = bh->b_this_page;
				780	if (buffer_async_write(bh)) {
				781	submit_bh(WRITE, bh);
				782	need_end_writeback = FALSE;
				783	}
				784	put_bh(bh);
				785	bh = next;
				786	} while (bh != head);
				787
				788	/* If no i/o was started, need to end_page_writeback(). */
				789	if (unlikely(need_end_writeback))
				790	end_page_writeback(page);
				791
				792	ntfs_debug("Done.");
				793	return err;
				794	}
				795
				796	/**
				797	* ntfs_write_mst_block - write a @page to the backing store
				798	* @page: page cache page to write out
				799	* @wbc: writeback control structure
				800	*
				801	* This function is for writing pages belonging to non-resident, mst protected
				802	* attributes to their backing store. The only supported attributes are index
				803	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				804	* supported for the index allocation case.
				805	*
				806	* The page must remain locked for the duration of the write because we apply
				807	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				808	* page before undoing the fixups, any other user of the page will see the
				809	* page contents as corrupt.
				810	*
				811	* We clear the page uptodate flag for the duration of the function to ensure
				812	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				813	* are about to apply the mst fixups to.
				814	*
				815	* Return 0 on success and -errno on error.
				816	*
				817	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				818	* write_mft_record_nolock().
				819	*/
				820	static int ntfs_write_mst_block(struct page *page,
				821	struct writeback_control *wbc)
				822	{
				823	sector_t block, dblock, rec_block;
				824	struct inode *vi = page->mapping->host;
				825	ntfs_inode *ni = NTFS_I(vi);
				826	ntfs_volume *vol = ni->vol;
				827	u8 *kaddr;
				828	unsigned char bh_size_bits = vi->i_blkbits;
				829	unsigned int bh_size = 1 << bh_size_bits;
				830	unsigned int rec_size = ni->itype.index.block_size;
				831	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				832	struct buffer_head bh, head, tbh, rec_start_bh;
				833	int max_bhs = PAGE_CACHE_SIZE / bh_size;
				834	struct buffer_head *bhs[max_bhs];
				835	runlist_element *rl;
				836	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
				837	unsigned rec_size_bits;
				838	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
				839
				840	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				841	"0x%lx.", vi->i_ino, ni->type, page->index);
				842	BUG_ON(!NInoNonResident(ni));
				843	BUG_ON(!NInoMstProtected(ni));
				844	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				845	/*
				846	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				847	* in its page cache were to be marked dirty. However this should
				848	* never happen with the current driver and considering we do not
				849	* handle this case here we do want to BUG(), at least for now.
				850	*/
				851	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				852	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
				853	BUG_ON(!max_bhs);
				854
				855	/* Were we called for sync purposes? */
				856	sync = (wbc->sync_mode == WB_SYNC_ALL);
				857
				858	/* Make sure we have mapped buffers. */
				859	BUG_ON(!page_has_buffers(page));
				860	bh = head = page_buffers(page);
				861	BUG_ON(!bh);
				862
				863	rec_size_bits = ni->itype.index.block_size_bits;
				864	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				865	bhs_per_rec = rec_size >> bh_size_bits;
				866	BUG_ON(!bhs_per_rec);
				867
				868	/* The first block in the page. */
				869	rec_block = block = (sector_t)page->index <<
				870	(PAGE_CACHE_SHIFT - bh_size_bits);
				871
				872	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	873	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	874
				875	rl = NULL;
				876	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				877	page_is_dirty = rec_is_dirty = FALSE;
				878	rec_start_bh = NULL;
				879	do {
				880	BOOL is_retry = FALSE;
				881
				882	if (likely(block < rec_block)) {
				883	if (unlikely(block >= dblock)) {
				884	clear_buffer_dirty(bh);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	885	set_buffer_uptodate(bh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	886	continue;
				887	}
				888	/*
				889	* This block is not the first one in the record. We
				890	* ignore the buffer's dirty state because we could
				891	* have raced with a parallel mark_ntfs_record_dirty().
				892	*/
				893	if (!rec_is_dirty)
				894	continue;
				895	if (unlikely(err2)) {
				896	if (err2 != -ENOMEM)
				897	clear_buffer_dirty(bh);
				898	continue;
				899	}
				900	} else /* if (block == rec_block) */ {
				901	BUG_ON(block > rec_block);
				902	/* This block is the first one in the record. */
				903	rec_block += bhs_per_rec;
				904	err2 = 0;
				905	if (unlikely(block >= dblock)) {
				906	clear_buffer_dirty(bh);
				907	continue;
				908	}
				909	if (!buffer_dirty(bh)) {
				910	/* Clean records are not written out. */
				911	rec_is_dirty = FALSE;
				912	continue;
				913	}
				914	rec_is_dirty = TRUE;
				915	rec_start_bh = bh;
				916	}
				917	/* Need to map the buffer if it is not mapped already. */
				918	if (unlikely(!buffer_mapped(bh))) {
				919	VCN vcn;
				920	LCN lcn;
				921	unsigned int vcn_ofs;
				922
				923	/* Obtain the vcn and offset of the current block. */
				924	vcn = (VCN)block << bh_size_bits;
				925	vcn_ofs = vcn & vol->cluster_size_mask;
				926	vcn >>= vol->cluster_size_bits;
				927	if (!rl) {
				928	lock_retry_remap:
				929	down_read(&ni->runlist.lock);
				930	rl = ni->runlist.rl;
				931	}
				932	if (likely(rl != NULL)) {
				933	/* Seek to element containing target vcn. */
				934	while (rl->length && rl[1].vcn <= vcn)
				935	rl++;
				936	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				937	} else
				938	lcn = LCN_RL_NOT_MAPPED;
				939	/* Successful remap. */
				940	if (likely(lcn >= 0)) {
				941	/* Setup buffer head to correct block. */
				942	bh->b_blocknr = ((lcn <<
				943	vol->cluster_size_bits) +
				944	vcn_ofs) >> bh_size_bits;
				945	set_buffer_mapped(bh);
				946	} else {
				947	/*
				948	* Remap failed. Retry to map the runlist once
				949	* unless we are working on $MFT which always
				950	* has the whole of its runlist in memory.
				951	*/
				952	if (!is_mft && !is_retry &&
				953	lcn == LCN_RL_NOT_MAPPED) {
				954	is_retry = TRUE;
				955	/*
				956	* Attempt to map runlist, dropping
				957	* lock for the duration.
				958	*/
				959	up_read(&ni->runlist.lock);
				960	err2 = ntfs_map_runlist(ni, vcn);
				961	if (likely(!err2))
				962	goto lock_retry_remap;
				963	if (err2 == -ENOMEM)
				964	page_is_dirty = TRUE;
				965	lcn = err2;
				966	} else
				967	err2 = -EIO;
				968	/* Hard error. Abort writing this record. */
				969	if (!err \|\| err == -ENOMEM)
				970	err = err2;
				971	bh->b_blocknr = -1;
				972	ntfs_error(vol->sb, "Cannot write ntfs record "
				973	"0x%llx (inode 0x%lx, "
				974	"attribute type 0x%x) because "
				975	"its location on disk could "
				976	"not be determined (error "
Randy Dunlap	8907547d	2005-03-03 11:19:53 +0000	[diff] [blame]	977	"code %lli).",
				978	(long long)block <<
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	979	bh_size_bits >>
				980	vol->mft_record_size_bits,
				981	ni->mft_no, ni->type,
				982	(long long)lcn);
				983	/*
				984	* If this is not the first buffer, remove the
				985	* buffers in this record from the list of
				986	* buffers to write and clear their dirty bit
				987	* if not error -ENOMEM.
				988	*/
				989	if (rec_start_bh != bh) {
				990	while (bhs[--nr_bhs] != rec_start_bh)
				991	;
				992	if (err2 != -ENOMEM) {
				993	do {
				994	clear_buffer_dirty(
				995	rec_start_bh);
				996	} while ((rec_start_bh =
				997	rec_start_bh->
				998	b_this_page) !=
				999	bh);
				1000	}
				1001	}
				1002	continue;
				1003	}
				1004	}
				1005	BUG_ON(!buffer_uptodate(bh));
				1006	BUG_ON(nr_bhs >= max_bhs);
				1007	bhs[nr_bhs++] = bh;
				1008	} while (block++, (bh = bh->b_this_page) != head);
				1009	if (unlikely(rl))
				1010	up_read(&ni->runlist.lock);
				1011	/* If there were no dirty buffers, we are done. */
				1012	if (!nr_bhs)
				1013	goto done;
				1014	/* Map the page so we can access its contents. */
				1015	kaddr = kmap(page);
				1016	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				1017	BUG_ON(!PageUptodate(page));
				1018	ClearPageUptodate(page);
				1019	for (i = 0; i < nr_bhs; i++) {
				1020	unsigned int ofs;
				1021
				1022	/* Skip buffers which are not at the beginning of records. */
				1023	if (i % bhs_per_rec)
				1024	continue;
				1025	tbh = bhs[i];
				1026	ofs = bh_offset(tbh);
				1027	if (is_mft) {
				1028	ntfs_inode *tni;
				1029	unsigned long mft_no;
				1030
				1031	/* Get the mft record number. */
				1032	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1033	>> rec_size_bits;
				1034	/* Check whether to write this mft record. */
				1035	tni = NULL;
				1036	if (!ntfs_may_write_mft_record(vol, mft_no,
				1037	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1038	/*
				1039	* The record should not be written. This
				1040	* means we need to redirty the page before
				1041	* returning.
				1042	*/
				1043	page_is_dirty = TRUE;
				1044	/*
				1045	* Remove the buffers in this mft record from
				1046	* the list of buffers to write.
				1047	*/
				1048	do {
				1049	bhs[i] = NULL;
				1050	} while (++i % bhs_per_rec);
				1051	continue;
				1052	}
				1053	/*
				1054	* The record should be written. If a locked ntfs
				1055	* inode was returned, add it to the array of locked
				1056	* ntfs inodes.
				1057	*/
				1058	if (tni)
				1059	locked_nis[nr_locked_nis++] = tni;
				1060	}
				1061	/* Apply the mst protection fixups. */
				1062	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1063	rec_size);
				1064	if (unlikely(err2)) {
				1065	if (!err \|\| err == -ENOMEM)
				1066	err = -EIO;
				1067	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1068	"(inode 0x%lx, attribute type 0x%x, "
				1069	"page index 0x%lx, page offset 0x%x)!"
				1070	" Unmount and run chkdsk.", vi->i_ino,
				1071	ni->type, page->index, ofs);
				1072	/*
				1073	* Mark all the buffers in this record clean as we do
				1074	* not want to write corrupt data to disk.
				1075	*/
				1076	do {
				1077	clear_buffer_dirty(bhs[i]);
				1078	bhs[i] = NULL;
				1079	} while (++i % bhs_per_rec);
				1080	continue;
				1081	}
				1082	nr_recs++;
				1083	}
				1084	/* If no records are to be written out, we are done. */
				1085	if (!nr_recs)
				1086	goto unm_done;
				1087	flush_dcache_page(page);
				1088	/* Lock buffers and start synchronous write i/o on them. */
				1089	for (i = 0; i < nr_bhs; i++) {
				1090	tbh = bhs[i];
				1091	if (!tbh)
				1092	continue;
				1093	if (unlikely(test_set_buffer_locked(tbh)))
				1094	BUG();
				1095	/* The buffer dirty state is now irrelevant, just clean it. */
				1096	clear_buffer_dirty(tbh);
				1097	BUG_ON(!buffer_uptodate(tbh));
				1098	BUG_ON(!buffer_mapped(tbh));
				1099	get_bh(tbh);
				1100	tbh->b_end_io = end_buffer_write_sync;
				1101	submit_bh(WRITE, tbh);
				1102	}
				1103	/* Synchronize the mft mirror now if not @sync. */
				1104	if (is_mft && !sync)
				1105	goto do_mirror;
				1106	do_wait:
				1107	/* Wait on i/o completion of buffers. */
				1108	for (i = 0; i < nr_bhs; i++) {
				1109	tbh = bhs[i];
				1110	if (!tbh)
				1111	continue;
				1112	wait_on_buffer(tbh);
				1113	if (unlikely(!buffer_uptodate(tbh))) {
				1114	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1115	"record buffer (inode 0x%lx, "
				1116	"attribute type 0x%x, page index "
				1117	"0x%lx, page offset 0x%lx)! Unmount "
				1118	"and run chkdsk.", vi->i_ino, ni->type,
				1119	page->index, bh_offset(tbh));
				1120	if (!err \|\| err == -ENOMEM)
				1121	err = -EIO;
				1122	/*
				1123	* Set the buffer uptodate so the page and buffer
				1124	* states do not become out of sync.
				1125	*/
				1126	set_buffer_uptodate(tbh);
				1127	}
				1128	}
				1129	/* If @sync, now synchronize the mft mirror. */
				1130	if (is_mft && sync) {
				1131	do_mirror:
				1132	for (i = 0; i < nr_bhs; i++) {
				1133	unsigned long mft_no;
				1134	unsigned int ofs;
				1135
				1136	/*
				1137	* Skip buffers which are not at the beginning of
				1138	* records.
				1139	*/
				1140	if (i % bhs_per_rec)
				1141	continue;
				1142	tbh = bhs[i];
				1143	/* Skip removed buffers (and hence records). */
				1144	if (!tbh)
				1145	continue;
				1146	ofs = bh_offset(tbh);
				1147	/* Get the mft record number. */
				1148	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1149	>> rec_size_bits;
				1150	if (mft_no < vol->mftmirr_size)
				1151	ntfs_sync_mft_mirror(vol, mft_no,
				1152	(MFT_RECORD*)(kaddr + ofs),
				1153	sync);
				1154	}
				1155	if (!sync)
				1156	goto do_wait;
				1157	}
				1158	/* Remove the mst protection fixups again. */
				1159	for (i = 0; i < nr_bhs; i++) {
				1160	if (!(i % bhs_per_rec)) {
				1161	tbh = bhs[i];
				1162	if (!tbh)
				1163	continue;
				1164	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1165	bh_offset(tbh)));
				1166	}
				1167	}
				1168	flush_dcache_page(page);
				1169	unm_done:
				1170	/* Unlock any locked inodes. */
				1171	while (nr_locked_nis-- > 0) {
				1172	ntfs_inode tni, base_tni;
				1173
				1174	tni = locked_nis[nr_locked_nis];
				1175	/* Get the base inode. */
				1176	down(&tni->extent_lock);
				1177	if (tni->nr_extents >= 0)
				1178	base_tni = tni;
				1179	else {
				1180	base_tni = tni->ext.base_ntfs_ino;
				1181	BUG_ON(!base_tni);
				1182	}
				1183	up(&tni->extent_lock);
				1184	ntfs_debug("Unlocking %s inode 0x%lx.",
				1185	tni == base_tni ? "base" : "extent",
				1186	tni->mft_no);
				1187	up(&tni->mrec_lock);
				1188	atomic_dec(&tni->count);
				1189	iput(VFS_I(base_tni));
				1190	}
				1191	SetPageUptodate(page);
				1192	kunmap(page);
				1193	done:
				1194	if (unlikely(err && err != -ENOMEM)) {
				1195	/*
				1196	* Set page error if there is only one ntfs record in the page.
				1197	* Otherwise we would loose per-record granularity.
				1198	*/
				1199	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1200	SetPageError(page);
				1201	NVolSetErrors(vol);
				1202	}
				1203	if (page_is_dirty) {
				1204	ntfs_debug("Page still contains one or more dirty ntfs "
				1205	"records. Redirtying the page starting at "
				1206	"record 0x%lx.", page->index <<
				1207	(PAGE_CACHE_SHIFT - rec_size_bits));
				1208	redirty_page_for_writepage(wbc, page);
				1209	unlock_page(page);
				1210	} else {
				1211	/*
				1212	* Keep the VM happy. This must be done otherwise the
				1213	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1214	* the page is clean.
				1215	*/
				1216	BUG_ON(PageWriteback(page));
				1217	set_page_writeback(page);
				1218	unlock_page(page);
				1219	end_page_writeback(page);
				1220	}
				1221	if (likely(!err))
				1222	ntfs_debug("Done.");
				1223	return err;
				1224	}
				1225
				1226	/**
				1227	* ntfs_writepage - write a @page to the backing store
				1228	* @page: page cache page to write out
				1229	* @wbc: writeback control structure
				1230	*
				1231	* This is called from the VM when it wants to have a dirty ntfs page cache
				1232	* page cleaned. The VM has already locked the page and marked it clean.
				1233	*
				1234	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1235	* the ntfs version of the generic block_write_full_page() function,
				1236	* ntfs_write_block(), which in turn if necessary creates and writes the
				1237	* buffers associated with the page asynchronously.
				1238	*
				1239	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1240	* the data to the mft record (which at this stage is most likely in memory).
				1241	* The mft record is then marked dirty and written out asynchronously via the
				1242	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1243	* vm page dirty code path for the page the mft record is in.
				1244	*
				1245	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1246	*
				1247	* Return 0 on success and -errno on error.
				1248	*/
				1249	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1250	{
				1251	loff_t i_size;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1252	struct inode *vi = page->mapping->host;
				1253	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1254	char *kaddr;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1255	ntfs_attr_search_ctx *ctx = NULL;
				1256	MFT_RECORD *m = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1257	u32 attr_len;
				1258	int err;
				1259
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame^]	1260	retry_writepage:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1261	BUG_ON(!PageLocked(page));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1262	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1263	/* Is the page fully outside i_size? (truncate in progress) */
				1264	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1265	PAGE_CACHE_SHIFT)) {
				1266	/*
				1267	* The page may have dirty, unmapped buffers. Make them
				1268	* freeable here, so the page does not leak.
				1269	*/
				1270	block_invalidatepage(page, 0);
				1271	unlock_page(page);
				1272	ntfs_debug("Write outside i_size - truncated?");
				1273	return 0;
				1274	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1275	/* NInoNonResident() == NInoIndexAllocPresent() */
				1276	if (NInoNonResident(ni)) {
				1277	/*
				1278	* Only unnamed $DATA attributes can be compressed, encrypted,
				1279	* and/or sparse.
				1280	*/
				1281	if (ni->type == AT_DATA && !ni->name_len) {
				1282	/* If file is encrypted, deny access, just like NT4. */
				1283	if (NInoEncrypted(ni)) {
				1284	unlock_page(page);
				1285	ntfs_debug("Denying write access to encrypted "
				1286	"file.");
				1287	return -EACCES;
				1288	}
				1289	/* Compressed data streams are handled in compress.c. */
				1290	if (NInoCompressed(ni)) {
				1291	// TODO: Implement and replace this check with
				1292	// return ntfs_write_compressed_block(page);
				1293	unlock_page(page);
				1294	ntfs_error(vi->i_sb, "Writing to compressed "
				1295	"files is not supported yet. "
				1296	"Sorry.");
				1297	return -EOPNOTSUPP;
				1298	}
				1299	// TODO: Implement and remove this check.
				1300	if (NInoSparse(ni)) {
				1301	unlock_page(page);
				1302	ntfs_error(vi->i_sb, "Writing to sparse files "
				1303	"is not supported yet. Sorry.");
				1304	return -EOPNOTSUPP;
				1305	}
				1306	}
				1307	/* We have to zero every time due to mmap-at-end-of-file. */
				1308	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1309	/* The page straddles i_size. */
				1310	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1311	kaddr = kmap_atomic(page, KM_USER0);
				1312	memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
				1313	flush_dcache_page(page);
				1314	kunmap_atomic(kaddr, KM_USER0);
				1315	}
				1316	/* Handle mst protected attributes. */
				1317	if (NInoMstProtected(ni))
				1318	return ntfs_write_mst_block(page, wbc);
				1319	/* Normal data stream. */
				1320	return ntfs_write_block(page, wbc);
				1321	}
				1322	/*
				1323	* Attribute is resident, implying it is not compressed, encrypted,
				1324	* sparse, or mst protected. This also means the attribute is smaller
				1325	* than an mft record and hence smaller than a page, so can simply
				1326	* return error on any pages with index above 0.
				1327	*/
				1328	BUG_ON(page_has_buffers(page));
				1329	BUG_ON(!PageUptodate(page));
				1330	if (unlikely(page->index > 0)) {
				1331	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1332	"Aborting write.", page->index);
				1333	BUG_ON(PageWriteback(page));
				1334	set_page_writeback(page);
				1335	unlock_page(page);
				1336	end_page_writeback(page);
				1337	return -EIO;
				1338	}
				1339	if (!NInoAttr(ni))
				1340	base_ni = ni;
				1341	else
				1342	base_ni = ni->ext.base_ntfs_ino;
				1343	/* Map, pin, and lock the mft record. */
				1344	m = map_mft_record(base_ni);
				1345	if (IS_ERR(m)) {
				1346	err = PTR_ERR(m);
				1347	m = NULL;
				1348	ctx = NULL;
				1349	goto err_out;
				1350	}
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame^]	1351	/*
				1352	* If a parallel write made the attribute non-resident, drop the mft
				1353	* record and retry the writepage.
				1354	*/
				1355	if (unlikely(NInoNonResident(ni))) {
				1356	unmap_mft_record(base_ni);
				1357	goto retry_writepage;
				1358	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1359	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1360	if (unlikely(!ctx)) {
				1361	err = -ENOMEM;
				1362	goto err_out;
				1363	}
				1364	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1365	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1366	if (unlikely(err))
				1367	goto err_out;
				1368	/*
				1369	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1370	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1371	*/
				1372	BUG_ON(PageWriteback(page));
				1373	set_page_writeback(page);
				1374	unlock_page(page);
				1375
				1376	/*
				1377	* Here, we don't need to zero the out of bounds area everytime because
				1378	* the below memcpy() already takes care of the mmap-at-end-of-file
				1379	* requirements. If the file is converted to a non-resident one, then
				1380	* the code path use is switched to the non-resident one where the
				1381	* zeroing happens on each ntfs_writepage() invocation.
				1382	*
				1383	* The above also applies nicely when i_size is decreased.
				1384	*
				1385	* When i_size is increased, the memory between the old and new i_size
				1386	* _must_ be zeroed (or overwritten with new data). Otherwise we will
				1387	* expose data to userspace/disk which should never have been exposed.
				1388	*
				1389	* FIXME: Ensure that i_size increases do the zeroing/overwriting and
				1390	* if we cannot guarantee that, then enable the zeroing below. If the
				1391	* zeroing below is enabled, we MUST move the unlock_page() from above
				1392	* to after the kunmap_atomic(), i.e. just before the
				1393	* end_page_writeback().
				1394	* UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
				1395	* increases for resident attributes so those are ok.
				1396	* TODO: ntfs_truncate(), others?
				1397	*/
				1398
				1399	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1400	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1401	if (unlikely(attr_len > i_size)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1402	attr_len = i_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1403	ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1404	}
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1405	kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1406	/* Copy the data from the page to the mft record. */
				1407	memcpy((u8*)ctx->attr +
				1408	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1409	kaddr, attr_len);
				1410	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1411	/* Zero out of bounds area in the page cache page. */
				1412	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1413	flush_dcache_page(page);
				1414	kunmap_atomic(kaddr, KM_USER0);
				1415
				1416	end_page_writeback(page);
				1417
				1418	/* Mark the mft record dirty, so it gets written back. */
				1419	mark_mft_record_dirty(ctx->ntfs_ino);
				1420	ntfs_attr_put_search_ctx(ctx);
				1421	unmap_mft_record(base_ni);
				1422	return 0;
				1423	err_out:
				1424	if (err == -ENOMEM) {
				1425	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1426	"page so we try again later.");
				1427	/*
				1428	* Put the page back on mapping->dirty_pages, but leave its
				1429	* buffers' dirty state as-is.
				1430	*/
				1431	redirty_page_for_writepage(wbc, page);
				1432	err = 0;
				1433	} else {
				1434	ntfs_error(vi->i_sb, "Resident attribute write failed with "
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1435	"error %i.", err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1436	SetPageError(page);
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1437	NVolSetErrors(ni->vol);
				1438	make_bad_inode(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1439	}
				1440	unlock_page(page);
				1441	if (ctx)
				1442	ntfs_attr_put_search_ctx(ctx);
				1443	if (m)
				1444	unmap_mft_record(base_ni);
				1445	return err;
				1446	}
				1447
				1448	/**
				1449	* ntfs_prepare_nonresident_write -
				1450	*
				1451	*/
				1452	static int ntfs_prepare_nonresident_write(struct page *page,
				1453	unsigned from, unsigned to)
				1454	{
				1455	VCN vcn;
				1456	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1457	s64 initialized_size;
				1458	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1459	sector_t block, ablock, iblock;
				1460	struct inode *vi;
				1461	ntfs_inode *ni;
				1462	ntfs_volume *vol;
				1463	runlist_element *rl;
				1464	struct buffer_head bh, head, wait[2], *wait_bh = wait;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1465	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1466	unsigned int vcn_ofs, block_start, block_end, blocksize;
				1467	int err;
				1468	BOOL is_retry;
				1469	unsigned char blocksize_bits;
				1470
				1471	vi = page->mapping->host;
				1472	ni = NTFS_I(vi);
				1473	vol = ni->vol;
				1474
				1475	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1476	"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
				1477	page->index, from, to);
				1478
				1479	BUG_ON(!NInoNonResident(ni));
				1480
				1481	blocksize_bits = vi->i_blkbits;
				1482	blocksize = 1 << blocksize_bits;
				1483
				1484	/*
				1485	* create_empty_buffers() will create uptodate/dirty buffers if the
				1486	* page is uptodate/dirty.
				1487	*/
				1488	if (!page_has_buffers(page))
				1489	create_empty_buffers(page, blocksize, 0);
				1490	bh = head = page_buffers(page);
				1491	if (unlikely(!bh))
				1492	return -ENOMEM;
				1493
				1494	/* The first block in the page. */
				1495	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				1496
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1497	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1498	/*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	1499	* The first out of bounds block for the allocated size. No need to
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1500	* round up as allocated_size is in multiples of cluster size and the
				1501	* minimum cluster size is 512 bytes, which is equal to the smallest
				1502	* blocksize.
				1503	*/
				1504	ablock = ni->allocated_size >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1505	i_size = i_size_read(vi);
				1506	initialized_size = ni->initialized_size;
				1507	read_unlock_irqrestore(&ni->size_lock, flags);
				1508
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1509	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1510	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1511
				1512	/* Loop through all the buffers in the page. */
				1513	block_start = 0;
				1514	rl = NULL;
				1515	err = 0;
				1516	do {
				1517	block_end = block_start + blocksize;
				1518	/*
				1519	* If buffer @bh is outside the write, just mark it uptodate
				1520	* if the page is uptodate and continue with the next buffer.
				1521	*/
				1522	if (block_end <= from \|\| block_start >= to) {
				1523	if (PageUptodate(page)) {
				1524	if (!buffer_uptodate(bh))
				1525	set_buffer_uptodate(bh);
				1526	}
				1527	continue;
				1528	}
				1529	/*
				1530	* @bh is at least partially being written to.
				1531	* Make sure it is not marked as new.
				1532	*/
				1533	//if (buffer_new(bh))
				1534	// clear_buffer_new(bh);
				1535
				1536	if (block >= ablock) {
				1537	// TODO: block is above allocated_size, need to
				1538	// allocate it. Best done in one go to accommodate not
				1539	// only block but all above blocks up to and including:
				1540	// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
				1541	// - 1) >> blobksize_bits. Obviously will need to round
				1542	// up to next cluster boundary, too. This should be
				1543	// done with a helper function, so it can be reused.
				1544	ntfs_error(vol->sb, "Writing beyond allocated size "
				1545	"is not supported yet. Sorry.");
				1546	err = -EOPNOTSUPP;
				1547	goto err_out;
				1548	// Need to update ablock.
				1549	// Need to set_buffer_new() on all block bhs that are
				1550	// newly allocated.
				1551	}
				1552	/*
				1553	* Now we have enough allocated size to fulfill the whole
				1554	* request, i.e. block < ablock is true.
				1555	*/
				1556	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1557	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1558	/*
				1559	* If this page is fully outside initialized size, zero
				1560	* out all pages between the current initialized size
				1561	* and the current page. Just use ntfs_readpage() to do
				1562	* the zeroing transparently.
				1563	*/
				1564	if (block > iblock) {
				1565	// TODO:
				1566	// For each page do:
				1567	// - read_cache_page()
				1568	// Again for each page do:
				1569	// - wait_on_page_locked()
				1570	// - Check (PageUptodate(page) &&
				1571	// !PageError(page))
				1572	// Update initialized size in the attribute and
				1573	// in the inode.
				1574	// Again, for each page do:
				1575	// __set_page_dirty_buffers();
				1576	// page_cache_release()
				1577	// We don't need to wait on the writes.
				1578	// Update iblock.
				1579	}
				1580	/*
				1581	* The current page straddles initialized size. Zero
				1582	* all non-uptodate buffers and set them uptodate (and
				1583	* dirty?). Note, there aren't any non-uptodate buffers
				1584	* if the page is uptodate.
				1585	* FIXME: For an uptodate page, the buffers may need to
				1586	* be written out because they were not initialized on
				1587	* disk before.
				1588	*/
				1589	if (!PageUptodate(page)) {
				1590	// TODO:
				1591	// Zero any non-uptodate buffers up to i_size.
				1592	// Set them uptodate and dirty.
				1593	}
				1594	// TODO:
				1595	// Update initialized size in the attribute and in the
				1596	// inode (up to i_size).
				1597	// Update iblock.
				1598	// FIXME: This is inefficient. Try to batch the two
				1599	// size changes to happen in one go.
				1600	ntfs_error(vol->sb, "Writing beyond initialized size "
				1601	"is not supported yet. Sorry.");
				1602	err = -EOPNOTSUPP;
				1603	goto err_out;
				1604	// Do NOT set_buffer_new() BUT DO clear buffer range
				1605	// outside write request range.
				1606	// set_buffer_uptodate() on complete buffers as well as
				1607	// set_buffer_dirty().
				1608	}
				1609
				1610	/* Need to map unmapped buffers. */
				1611	if (!buffer_mapped(bh)) {
				1612	/* Unmapped buffer. Need to map it. */
				1613	bh->b_bdev = vol->sb->s_bdev;
				1614
				1615	/* Convert block into corresponding vcn and offset. */
				1616	vcn = (VCN)block << blocksize_bits >>
				1617	vol->cluster_size_bits;
				1618	vcn_ofs = ((VCN)block << blocksize_bits) &
				1619	vol->cluster_size_mask;
				1620
				1621	is_retry = FALSE;
				1622	if (!rl) {
				1623	lock_retry_remap:
				1624	down_read(&ni->runlist.lock);
				1625	rl = ni->runlist.rl;
				1626	}
				1627	if (likely(rl != NULL)) {
				1628	/* Seek to element containing target vcn. */
				1629	while (rl->length && rl[1].vcn <= vcn)
				1630	rl++;
				1631	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1632	} else
				1633	lcn = LCN_RL_NOT_MAPPED;
				1634	if (unlikely(lcn < 0)) {
				1635	/*
				1636	* We extended the attribute allocation above.
				1637	* If we hit an ENOENT here it means that the
				1638	* allocation was insufficient which is a bug.
				1639	*/
				1640	BUG_ON(lcn == LCN_ENOENT);
				1641
				1642	/* It is a hole, need to instantiate it. */
				1643	if (lcn == LCN_HOLE) {
				1644	// TODO: Instantiate the hole.
				1645	// clear_buffer_new(bh);
				1646	// unmap_underlying_metadata(bh->b_bdev,
				1647	// bh->b_blocknr);
				1648	// For non-uptodate buffers, need to
				1649	// zero out the region outside the
				1650	// request in this bh or all bhs,
				1651	// depending on what we implemented
				1652	// above.
				1653	// Need to flush_dcache_page().
				1654	// Or could use set_buffer_new()
				1655	// instead?
				1656	ntfs_error(vol->sb, "Writing into "
				1657	"sparse regions is "
				1658	"not supported yet. "
				1659	"Sorry.");
				1660	err = -EOPNOTSUPP;
				1661	goto err_out;
				1662	} else if (!is_retry &&
				1663	lcn == LCN_RL_NOT_MAPPED) {
				1664	is_retry = TRUE;
				1665	/*
				1666	* Attempt to map runlist, dropping
				1667	* lock for the duration.
				1668	*/
				1669	up_read(&ni->runlist.lock);
				1670	err = ntfs_map_runlist(ni, vcn);
				1671	if (likely(!err))
				1672	goto lock_retry_remap;
				1673	rl = NULL;
				1674	lcn = err;
				1675	}
				1676	/*
				1677	* Failed to map the buffer, even after
				1678	* retrying.
				1679	*/
				1680	bh->b_blocknr = -1;
				1681	ntfs_error(vol->sb, "Failed to write to inode "
				1682	"0x%lx, attribute type 0x%x, "
				1683	"vcn 0x%llx, offset 0x%x "
				1684	"because its location on disk "
				1685	"could not be determined%s "
				1686	"(error code %lli).",
				1687	ni->mft_no, ni->type,
				1688	(unsigned long long)vcn,
				1689	vcn_ofs, is_retry ? " even "
				1690	"after retrying" : "",
				1691	(long long)lcn);
				1692	if (!err)
				1693	err = -EIO;
				1694	goto err_out;
				1695	}
				1696	/* We now have a successful remap, i.e. lcn >= 0. */
				1697
				1698	/* Setup buffer head to correct block. */
				1699	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				1700	+ vcn_ofs) >> blocksize_bits;
				1701	set_buffer_mapped(bh);
				1702
				1703	// FIXME: Something analogous to this is needed for
				1704	// each newly allocated block, i.e. BH_New.
				1705	// FIXME: Might need to take this out of the
				1706	// if (!buffer_mapped(bh)) {}, depending on how we
				1707	// implement things during the allocated_size and
				1708	// initialized_size extension code above.
				1709	if (buffer_new(bh)) {
				1710	clear_buffer_new(bh);
				1711	unmap_underlying_metadata(bh->b_bdev,
				1712	bh->b_blocknr);
				1713	if (PageUptodate(page)) {
				1714	set_buffer_uptodate(bh);
				1715	continue;
				1716	}
				1717	/*
				1718	* Page is _not_ uptodate, zero surrounding
				1719	* region. NOTE: This is how we decide if to
				1720	* zero or not!
				1721	*/
				1722	if (block_end > to \|\| block_start < from) {
				1723	void *kaddr;
				1724
				1725	kaddr = kmap_atomic(page, KM_USER0);
				1726	if (block_end > to)
				1727	memset(kaddr + to, 0,
				1728	block_end - to);
				1729	if (block_start < from)
				1730	memset(kaddr + block_start, 0,
				1731	from -
				1732	block_start);
				1733	flush_dcache_page(page);
				1734	kunmap_atomic(kaddr, KM_USER0);
				1735	}
				1736	continue;
				1737	}
				1738	}
				1739	/* @bh is mapped, set it uptodate if the page is uptodate. */
				1740	if (PageUptodate(page)) {
				1741	if (!buffer_uptodate(bh))
				1742	set_buffer_uptodate(bh);
				1743	continue;
				1744	}
				1745	/*
				1746	* The page is not uptodate. The buffer is mapped. If it is not
				1747	* uptodate, and it is only partially being written to, we need
				1748	* to read the buffer in before the write, i.e. right now.
				1749	*/
				1750	if (!buffer_uptodate(bh) &&
				1751	(block_start < from \|\| block_end > to)) {
				1752	ll_rw_block(READ, 1, &bh);
				1753	*wait_bh++ = bh;
				1754	}
				1755	} while (block++, block_start = block_end,
				1756	(bh = bh->b_this_page) != head);
				1757
				1758	/* Release the lock if we took it. */
				1759	if (rl) {
				1760	up_read(&ni->runlist.lock);
				1761	rl = NULL;
				1762	}
				1763
				1764	/* If we issued read requests, let them complete. */
				1765	while (wait_bh > wait) {
				1766	wait_on_buffer(*--wait_bh);
				1767	if (!buffer_uptodate(*wait_bh))
				1768	return -EIO;
				1769	}
				1770
				1771	ntfs_debug("Done.");
				1772	return 0;
				1773	err_out:
				1774	/*
				1775	* Zero out any newly allocated blocks to avoid exposing stale data.
				1776	* If BH_New is set, we know that the block was newly allocated in the
				1777	* above loop.
				1778	* FIXME: What about initialized_size increments? Have we done all the
				1779	* required zeroing above? If not this error handling is broken, and
				1780	* in particular the if (block_end <= from) check is completely bogus.
				1781	*/
				1782	bh = head;
				1783	block_start = 0;
				1784	is_retry = FALSE;
				1785	do {
				1786	block_end = block_start + blocksize;
				1787	if (block_end <= from)
				1788	continue;
				1789	if (block_start >= to)
				1790	break;
				1791	if (buffer_new(bh)) {
				1792	void *kaddr;
				1793
				1794	clear_buffer_new(bh);
				1795	kaddr = kmap_atomic(page, KM_USER0);
				1796	memset(kaddr + block_start, 0, bh->b_size);
				1797	kunmap_atomic(kaddr, KM_USER0);
				1798	set_buffer_uptodate(bh);
				1799	mark_buffer_dirty(bh);
				1800	is_retry = TRUE;
				1801	}
				1802	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				1803	if (is_retry)
				1804	flush_dcache_page(page);
				1805	if (rl)
				1806	up_read(&ni->runlist.lock);
				1807	return err;
				1808	}
				1809
				1810	/**
				1811	* ntfs_prepare_write - prepare a page for receiving data
				1812	*
				1813	* This is called from generic_file_write() with i_sem held on the inode
				1814	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				1815	* data has not yet been copied into the @page.
				1816	*
				1817	* Need to extend the attribute/fill in holes if necessary, create blocks and
				1818	* make partially overwritten blocks uptodate,
				1819	*
				1820	* i_size is not to be modified yet.
				1821	*
				1822	* Return 0 on success or -errno on error.
				1823	*
				1824	* Should be using block_prepare_write() [support for sparse files] or
				1825	* cont_prepare_write() [no support for sparse files]. Cannot do that due to
				1826	* ntfs specifics but can look at them for implementation guidance.
				1827	*
				1828	* Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
				1829	* the first byte in the page that will be written to and @to is the first byte
				1830	* after the last byte that will be written to.
				1831	*/
				1832	static int ntfs_prepare_write(struct file file, struct page page,
				1833	unsigned from, unsigned to)
				1834	{
				1835	s64 new_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1836	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1837	struct inode *vi = page->mapping->host;
				1838	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1839	ntfs_volume *vol = ni->vol;
				1840	ntfs_attr_search_ctx *ctx = NULL;
				1841	MFT_RECORD *m = NULL;
				1842	ATTR_RECORD *a;
				1843	u8 *kaddr;
				1844	u32 attr_len;
				1845	int err;
				1846
				1847	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1848	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				1849	page->index, from, to);
				1850	BUG_ON(!PageLocked(page));
				1851	BUG_ON(from > PAGE_CACHE_SIZE);
				1852	BUG_ON(to > PAGE_CACHE_SIZE);
				1853	BUG_ON(from > to);
				1854	BUG_ON(NInoMstProtected(ni));
				1855	/*
				1856	* If a previous ntfs_truncate() failed, repeat it and abort if it
				1857	* fails again.
				1858	*/
				1859	if (unlikely(NInoTruncateFailed(ni))) {
				1860	down_write(&vi->i_alloc_sem);
				1861	err = ntfs_truncate(vi);
				1862	up_write(&vi->i_alloc_sem);
				1863	if (err \|\| NInoTruncateFailed(ni)) {
				1864	if (!err)
				1865	err = -EIO;
				1866	goto err_out;
				1867	}
				1868	}
				1869	/* If the attribute is not resident, deal with it elsewhere. */
				1870	if (NInoNonResident(ni)) {
				1871	/*
				1872	* Only unnamed $DATA attributes can be compressed, encrypted,
				1873	* and/or sparse.
				1874	*/
				1875	if (ni->type == AT_DATA && !ni->name_len) {
				1876	/* If file is encrypted, deny access, just like NT4. */
				1877	if (NInoEncrypted(ni)) {
				1878	ntfs_debug("Denying write access to encrypted "
				1879	"file.");
				1880	return -EACCES;
				1881	}
				1882	/* Compressed data streams are handled in compress.c. */
				1883	if (NInoCompressed(ni)) {
				1884	// TODO: Implement and replace this check with
				1885	// return ntfs_write_compressed_block(page);
				1886	ntfs_error(vi->i_sb, "Writing to compressed "
				1887	"files is not supported yet. "
				1888	"Sorry.");
				1889	return -EOPNOTSUPP;
				1890	}
				1891	// TODO: Implement and remove this check.
				1892	if (NInoSparse(ni)) {
				1893	ntfs_error(vi->i_sb, "Writing to sparse files "
				1894	"is not supported yet. Sorry.");
				1895	return -EOPNOTSUPP;
				1896	}
				1897	}
				1898	/* Normal data stream. */
				1899	return ntfs_prepare_nonresident_write(page, from, to);
				1900	}
				1901	/*
				1902	* Attribute is resident, implying it is not compressed, encrypted, or
				1903	* sparse.
				1904	*/
				1905	BUG_ON(page_has_buffers(page));
				1906	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				1907	/* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1908	if (new_size <= i_size_read(vi))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1909	goto done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1910	/* Map, pin, and lock the (base) mft record. */
				1911	if (!NInoAttr(ni))
				1912	base_ni = ni;
				1913	else
				1914	base_ni = ni->ext.base_ntfs_ino;
				1915	m = map_mft_record(base_ni);
				1916	if (IS_ERR(m)) {
				1917	err = PTR_ERR(m);
				1918	m = NULL;
				1919	ctx = NULL;
				1920	goto err_out;
				1921	}
				1922	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1923	if (unlikely(!ctx)) {
				1924	err = -ENOMEM;
				1925	goto err_out;
				1926	}
				1927	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1928	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1929	if (unlikely(err)) {
				1930	if (err == -ENOENT)
				1931	err = -EIO;
				1932	goto err_out;
				1933	}
				1934	m = ctx->mrec;
				1935	a = ctx->attr;
				1936	/* The total length of the attribute value. */
				1937	attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1938	/* Fix an eventual previous failure of ntfs_commit_write(). */
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1939	i_size = i_size_read(vi);
				1940	if (unlikely(attr_len > i_size)) {
				1941	attr_len = i_size;
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1942	a->data.resident.value_length = cpu_to_le32(attr_len);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1943	}
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1944	/* If we do not need to resize the attribute allocation we are done. */
				1945	if (new_size <= attr_len)
				1946	goto done_unm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1947	/* Check if new size is allowed in $AttrDef. */
				1948	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
				1949	if (unlikely(err)) {
				1950	if (err == -ERANGE) {
				1951	ntfs_error(vol->sb, "Write would cause the inode "
				1952	"0x%lx to exceed the maximum size for "
				1953	"its attribute type (0x%x). Aborting "
				1954	"write.", vi->i_ino,
				1955	le32_to_cpu(ni->type));
				1956	} else {
				1957	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
				1958	"attribute type 0x%x. Aborting "
				1959	"write.", vi->i_ino,
				1960	le32_to_cpu(ni->type));
				1961	err = -EIO;
				1962	}
				1963	goto err_out2;
				1964	}
				1965	/*
				1966	* Extend the attribute record to be able to store the new attribute
				1967	* size.
				1968	*/
				1969	if (new_size >= vol->mft_record_size \|\| ntfs_attr_record_resize(m, a,
				1970	le16_to_cpu(a->data.resident.value_offset) +
				1971	new_size)) {
				1972	/* Not enough space in the mft record. */
				1973	ntfs_error(vol->sb, "Not enough space in the mft record for "
				1974	"the resized attribute value. This is not "
				1975	"supported yet. Aborting write.");
				1976	err = -EOPNOTSUPP;
				1977	goto err_out2;
				1978	}
				1979	/*
				1980	* We have enough space in the mft record to fit the write. This
				1981	* implies the attribute is smaller than the mft record and hence the
				1982	* attribute must be in a single page and hence page->index must be 0.
				1983	*/
				1984	BUG_ON(page->index);
				1985	/*
				1986	* If the beginning of the write is past the old size, enlarge the
				1987	* attribute value up to the beginning of the write and fill it with
				1988	* zeroes.
				1989	*/
				1990	if (from > attr_len) {
				1991	memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
				1992	attr_len, 0, from - attr_len);
				1993	a->data.resident.value_length = cpu_to_le32(from);
				1994	/* Zero the corresponding area in the page as well. */
				1995	if (PageUptodate(page)) {
				1996	kaddr = kmap_atomic(page, KM_USER0);
				1997	memset(kaddr + attr_len, 0, from - attr_len);
				1998	kunmap_atomic(kaddr, KM_USER0);
				1999	flush_dcache_page(page);
				2000	}
				2001	}
				2002	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2003	mark_mft_record_dirty(ctx->ntfs_ino);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	2004	done_unm:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2005	ntfs_attr_put_search_ctx(ctx);
				2006	unmap_mft_record(base_ni);
				2007	/*
				2008	* Because resident attributes are handled by memcpy() to/from the
				2009	* corresponding MFT record, and because this form of i/o is byte
				2010	* aligned rather than block aligned, there is no need to bring the
				2011	* page uptodate here as in the non-resident case where we need to
				2012	* bring the buffers straddled by the write uptodate before
				2013	* generic_file_write() does the copying from userspace.
				2014	*
				2015	* We thus defer the uptodate bringing of the page region outside the
				2016	* region written to to ntfs_commit_write(), which makes the code
				2017	* simpler and saves one atomic kmap which is good.
				2018	*/
				2019	done:
				2020	ntfs_debug("Done.");
				2021	return 0;
				2022	err_out:
				2023	if (err == -ENOMEM)
				2024	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2025	"prepare the write.");
				2026	else {
				2027	ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
				2028	"with error %i.", err);
				2029	NVolSetErrors(vol);
				2030	make_bad_inode(vi);
				2031	}
				2032	err_out2:
				2033	if (ctx)
				2034	ntfs_attr_put_search_ctx(ctx);
				2035	if (m)
				2036	unmap_mft_record(base_ni);
				2037	return err;
				2038	}
				2039
				2040	/**
				2041	* ntfs_commit_nonresident_write -
				2042	*
				2043	*/
				2044	static int ntfs_commit_nonresident_write(struct page *page,
				2045	unsigned from, unsigned to)
				2046	{
				2047	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				2048	struct inode *vi = page->mapping->host;
				2049	struct buffer_head bh, head;
				2050	unsigned int block_start, block_end, blocksize;
				2051	BOOL partial;
				2052
				2053	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2054	"0x%lx, from = %u, to = %u.", vi->i_ino,
				2055	NTFS_I(vi)->type, page->index, from, to);
				2056	blocksize = 1 << vi->i_blkbits;
				2057
				2058	// FIXME: We need a whole slew of special cases in here for compressed
				2059	// files for example...
				2060	// For now, we know ntfs_prepare_write() would have failed so we can't
				2061	// get here in any of the cases which we have to special case, so we
				2062	// are just a ripped off, unrolled generic_commit_write().
				2063
				2064	bh = head = page_buffers(page);
				2065	block_start = 0;
				2066	partial = FALSE;
				2067	do {
				2068	block_end = block_start + blocksize;
				2069	if (block_end <= from \|\| block_start >= to) {
				2070	if (!buffer_uptodate(bh))
				2071	partial = TRUE;
				2072	} else {
				2073	set_buffer_uptodate(bh);
				2074	mark_buffer_dirty(bh);
				2075	}
				2076	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				2077	/*
				2078	* If this is a partial write which happened to make all buffers
				2079	* uptodate then we can optimize away a bogus ->readpage() for the next
				2080	* read(). Here we 'discover' whether the page went uptodate as a
				2081	* result of this (potentially partial) write.
				2082	*/
				2083	if (!partial)
				2084	SetPageUptodate(page);
				2085	/*
				2086	* Not convinced about this at all. See disparity comment above. For
				2087	* now we know ntfs_prepare_write() would have failed in the write
				2088	* exceeds i_size case, so this will never trigger which is fine.
				2089	*/
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2090	if (pos > i_size_read(vi)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2091	ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
				2092	"not supported yet. Sorry.");
				2093	return -EOPNOTSUPP;
				2094	// vi->i_size = pos;
				2095	// mark_inode_dirty(vi);
				2096	}
				2097	ntfs_debug("Done.");
				2098	return 0;
				2099	}
				2100
				2101	/**
				2102	* ntfs_commit_write - commit the received data
				2103	*
				2104	* This is called from generic_file_write() with i_sem held on the inode
				2105	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				2106	* data has already been copied into the @page. ntfs_prepare_write() has been
				2107	* called before the data copied and it returned success so we can take the
				2108	* results of various BUG checks and some error handling for granted.
				2109	*
				2110	* Need to mark modified blocks dirty so they get written out later when
				2111	* ntfs_writepage() is invoked by the VM.
				2112	*
				2113	* Return 0 on success or -errno on error.
				2114	*
				2115	* Should be using generic_commit_write(). This marks buffers uptodate and
				2116	* dirty, sets the page uptodate if all buffers in the page are uptodate, and
				2117	* updates i_size if the end of io is beyond i_size. In that case, it also
				2118	* marks the inode dirty.
				2119	*
				2120	* Cannot use generic_commit_write() due to ntfs specialities but can look at
				2121	* it for implementation guidance.
				2122	*
				2123	* If things have gone as outlined in ntfs_prepare_write(), then we do not
				2124	* need to do any page content modifications here at all, except in the write
				2125	* to resident attribute case, where we need to do the uptodate bringing here
				2126	* which we combine with the copying into the mft record which means we save
				2127	* one atomic kmap.
				2128	*/
				2129	static int ntfs_commit_write(struct file file, struct page page,
				2130	unsigned from, unsigned to)
				2131	{
				2132	struct inode *vi = page->mapping->host;
				2133	ntfs_inode base_ni, ni = NTFS_I(vi);
				2134	char kaddr, kattr;
				2135	ntfs_attr_search_ctx *ctx;
				2136	MFT_RECORD *m;
				2137	ATTR_RECORD *a;
				2138	u32 attr_len;
				2139	int err;
				2140
				2141	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2142	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				2143	page->index, from, to);
				2144	/* If the attribute is not resident, deal with it elsewhere. */
				2145	if (NInoNonResident(ni)) {
				2146	/* Only unnamed $DATA attributes can be compressed/encrypted. */
				2147	if (ni->type == AT_DATA && !ni->name_len) {
				2148	/* Encrypted files need separate handling. */
				2149	if (NInoEncrypted(ni)) {
				2150	// We never get here at present!
				2151	BUG();
				2152	}
				2153	/* Compressed data streams are handled in compress.c. */
				2154	if (NInoCompressed(ni)) {
				2155	// TODO: Implement this!
				2156	// return ntfs_write_compressed_block(page);
				2157	// We never get here at present!
				2158	BUG();
				2159	}
				2160	}
				2161	/* Normal data stream. */
				2162	return ntfs_commit_nonresident_write(page, from, to);
				2163	}
				2164	/*
				2165	* Attribute is resident, implying it is not compressed, encrypted, or
				2166	* sparse.
				2167	*/
				2168	if (!NInoAttr(ni))
				2169	base_ni = ni;
				2170	else
				2171	base_ni = ni->ext.base_ntfs_ino;
				2172	/* Map, pin, and lock the mft record. */
				2173	m = map_mft_record(base_ni);
				2174	if (IS_ERR(m)) {
				2175	err = PTR_ERR(m);
				2176	m = NULL;
				2177	ctx = NULL;
				2178	goto err_out;
				2179	}
				2180	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				2181	if (unlikely(!ctx)) {
				2182	err = -ENOMEM;
				2183	goto err_out;
				2184	}
				2185	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				2186	CASE_SENSITIVE, 0, NULL, 0, ctx);
				2187	if (unlikely(err)) {
				2188	if (err == -ENOENT)
				2189	err = -EIO;
				2190	goto err_out;
				2191	}
				2192	a = ctx->attr;
				2193	/* The total length of the attribute value. */
				2194	attr_len = le32_to_cpu(a->data.resident.value_length);
				2195	BUG_ON(from > attr_len);
				2196	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
				2197	kaddr = kmap_atomic(page, KM_USER0);
				2198	/* Copy the received data from the page to the mft record. */
				2199	memcpy(kattr + from, kaddr + from, to - from);
				2200	/* Update the attribute length if necessary. */
				2201	if (to > attr_len) {
				2202	attr_len = to;
				2203	a->data.resident.value_length = cpu_to_le32(attr_len);
				2204	}
				2205	/*
				2206	* If the page is not uptodate, bring the out of bounds area(s)
				2207	* uptodate by copying data from the mft record to the page.
				2208	*/
				2209	if (!PageUptodate(page)) {
				2210	if (from > 0)
				2211	memcpy(kaddr, kattr, from);
				2212	if (to < attr_len)
				2213	memcpy(kaddr + to, kattr + to, attr_len - to);
				2214	/* Zero the region outside the end of the attribute value. */
				2215	if (attr_len < PAGE_CACHE_SIZE)
				2216	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				2217	/*
				2218	* The probability of not having done any of the above is
				2219	* extremely small, so we just flush unconditionally.
				2220	*/
				2221	flush_dcache_page(page);
				2222	SetPageUptodate(page);
				2223	}
				2224	kunmap_atomic(kaddr, KM_USER0);
				2225	/* Update i_size if necessary. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2226	if (i_size_read(vi) < attr_len) {
				2227	unsigned long flags;
				2228
				2229	write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2230	ni->allocated_size = ni->initialized_size = attr_len;
				2231	i_size_write(vi, attr_len);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2232	write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2233	}
				2234	/* Mark the mft record dirty, so it gets written back. */
				2235	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2236	mark_mft_record_dirty(ctx->ntfs_ino);
				2237	ntfs_attr_put_search_ctx(ctx);
				2238	unmap_mft_record(base_ni);
				2239	ntfs_debug("Done.");
				2240	return 0;
				2241	err_out:
				2242	if (err == -ENOMEM) {
				2243	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2244	"commit the write.");
				2245	if (PageUptodate(page)) {
				2246	ntfs_warning(vi->i_sb, "Page is uptodate, setting "
				2247	"dirty so the write will be retried "
				2248	"later on by the VM.");
				2249	/*
				2250	* Put the page on mapping->dirty_pages, but leave its
				2251	* buffers' dirty state as-is.
				2252	*/
				2253	__set_page_dirty_nobuffers(page);
				2254	err = 0;
				2255	} else
				2256	ntfs_error(vi->i_sb, "Page is not uptodate. Written "
				2257	"data has been lost.");
				2258	} else {
				2259	ntfs_error(vi->i_sb, "Resident attribute commit write failed "
				2260	"with error %i.", err);
				2261	NVolSetErrors(ni->vol);
				2262	make_bad_inode(vi);
				2263	}
				2264	if (ctx)
				2265	ntfs_attr_put_search_ctx(ctx);
				2266	if (m)
				2267	unmap_mft_record(base_ni);
				2268	return err;
				2269	}
				2270
				2271	#endif /* NTFS_RW */
				2272
				2273	/**
				2274	* ntfs_aops - general address space operations for inodes and attributes
				2275	*/
				2276	struct address_space_operations ntfs_aops = {
				2277	.readpage = ntfs_readpage, /* Fill page with data. */
				2278	.sync_page = block_sync_page, /* Currently, just unplugs the
				2279	disk request queue. */
				2280	#ifdef NTFS_RW
				2281	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2282	.prepare_write = ntfs_prepare_write, /* Prepare page and buffers
				2283	ready to receive data. */
				2284	.commit_write = ntfs_commit_write, /* Commit received data. */
				2285	#endif /* NTFS_RW */
				2286	};
				2287
				2288	/**
				2289	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				2290	* and attributes
				2291	*/
				2292	struct address_space_operations ntfs_mst_aops = {
				2293	.readpage = ntfs_readpage, /* Fill page with data. */
				2294	.sync_page = block_sync_page, /* Currently, just unplugs the
				2295	disk request queue. */
				2296	#ifdef NTFS_RW
				2297	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2298	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				2299	without touching the buffers
				2300	belonging to the page. */
				2301	#endif /* NTFS_RW */
				2302	};
				2303
				2304	#ifdef NTFS_RW
				2305
				2306	/**
				2307	* mark_ntfs_record_dirty - mark an ntfs record dirty
				2308	* @page: page containing the ntfs record to mark dirty
				2309	* @ofs: byte offset within @page at which the ntfs record begins
				2310	*
				2311	* Set the buffers and the page in which the ntfs record is located dirty.
				2312	*
				2313	* The latter also marks the vfs inode the ntfs record belongs to dirty
				2314	* (I_DIRTY_PAGES only).
				2315	*
				2316	* If the page does not have buffers, we create them and set them uptodate.
				2317	* The page may not be locked which is why we need to handle the buffers under
				2318	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				2319	* need the lock since try_to_free_buffers() does not free dirty buffers.
				2320	*/
				2321	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				2322	struct address_space *mapping = page->mapping;
				2323	ntfs_inode *ni = NTFS_I(mapping->host);
				2324	struct buffer_head bh, head, *buffers_to_free = NULL;
				2325	unsigned int end, bh_size, bh_ofs;
				2326
				2327	BUG_ON(!PageUptodate(page));
				2328	end = ofs + ni->itype.index.block_size;
				2329	bh_size = 1 << VFS_I(ni)->i_blkbits;
				2330	spin_lock(&mapping->private_lock);
				2331	if (unlikely(!page_has_buffers(page))) {
				2332	spin_unlock(&mapping->private_lock);
				2333	bh = head = alloc_page_buffers(page, bh_size, 1);
				2334	spin_lock(&mapping->private_lock);
				2335	if (likely(!page_has_buffers(page))) {
				2336	struct buffer_head *tail;
				2337
				2338	do {
				2339	set_buffer_uptodate(bh);
				2340	tail = bh;
				2341	bh = bh->b_this_page;
				2342	} while (bh);
				2343	tail->b_this_page = head;
				2344	attach_page_buffers(page, head);
				2345	} else
				2346	buffers_to_free = bh;
				2347	}
				2348	bh = head = page_buffers(page);
				2349	do {
				2350	bh_ofs = bh_offset(bh);
				2351	if (bh_ofs + bh_size <= ofs)
				2352	continue;
				2353	if (unlikely(bh_ofs >= end))
				2354	break;
				2355	set_buffer_dirty(bh);
				2356	} while ((bh = bh->b_this_page) != head);
				2357	spin_unlock(&mapping->private_lock);
				2358	__set_page_dirty_nobuffers(page);
				2359	if (unlikely(buffers_to_free)) {
				2360	do {
				2361	bh = buffers_to_free->b_this_page;
				2362	free_buffer_head(buffers_to_free);
				2363	buffers_to_free = bh;
				2364	} while (buffers_to_free);
				2365	}
				2366	}
				2367
				2368	#endif /* NTFS_RW */