Blame - fs/ntfs/aops.c - kernel/msm-4.9

blob: f3ad36d8b8c97b220b700914ecab9401c552c151 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	5	* Copyright (c) 2001-2005 Anton Altaparmakov
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/mm.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/swap.h>
				28	#include <linux/buffer_head.h>
				29	#include <linux/writeback.h>
				30
				31	#include "aops.h"
				32	#include "attrib.h"
				33	#include "debug.h"
				34	#include "inode.h"
				35	#include "mft.h"
				36	#include "runlist.h"
				37	#include "types.h"
				38	#include "ntfs.h"
				39
				40	/**
				41	* ntfs_end_buffer_async_read - async io completion for reading attributes
				42	* @bh: buffer head on which io is completed
				43	* @uptodate: whether @bh is now uptodate or not
				44	*
				45	* Asynchronous I/O completion handler for reading pages belonging to the
				46	* attribute address space of an inode. The inodes can either be files or
				47	* directories or they can be fake inodes describing some attribute.
				48	*
				49	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				50	* page has been completed and mark the page uptodate or set the error bit on
				51	* the page. To determine the size of the records that need fixing up, we
				52	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				53	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				54	* record size.
				55	*/
				56	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				57	{
				58	static DEFINE_SPINLOCK(page_uptodate_lock);
				59	unsigned long flags;
				60	struct buffer_head *tmp;
				61	struct page *page;
				62	ntfs_inode *ni;
				63	int page_uptodate = 1;
				64
				65	page = bh->b_page;
				66	ni = NTFS_I(page->mapping->host);
				67
				68	if (likely(uptodate)) {
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	69	s64 file_ofs, initialized_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	70
				71	set_buffer_uptodate(bh);
				72
				73	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				74	bh_offset(bh);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	75	read_lock_irqsave(&ni->size_lock, flags);
				76	initialized_size = ni->initialized_size;
				77	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	78	/* Check for the current buffer head overflowing. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	79	if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	char *addr;
				81	int ofs = 0;
				82
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	83	if (file_ofs < initialized_size)
				84	ofs = initialized_size - file_ofs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	85	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				86	memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
				87	flush_dcache_page(page);
				88	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				89	}
				90	} else {
				91	clear_buffer_uptodate(bh);
				92	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
				93	(unsigned long long)bh->b_blocknr);
				94	SetPageError(page);
				95	}
				96	spin_lock_irqsave(&page_uptodate_lock, flags);
				97	clear_buffer_async_read(bh);
				98	unlock_buffer(bh);
				99	tmp = bh;
				100	do {
				101	if (!buffer_uptodate(tmp))
				102	page_uptodate = 0;
				103	if (buffer_async_read(tmp)) {
				104	if (likely(buffer_locked(tmp)))
				105	goto still_busy;
				106	/* Async buffers must be locked. */
				107	BUG();
				108	}
				109	tmp = tmp->b_this_page;
				110	} while (tmp != bh);
				111	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				112	/*
				113	* If none of the buffers had errors then we can set the page uptodate,
				114	* but we first have to perform the post read mst fixups, if the
				115	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				116	* Note we ignore fixup errors as those are detected when
				117	* map_mft_record() is called which gives us per record granularity
				118	* rather than per page granularity.
				119	*/
				120	if (!NInoMstProtected(ni)) {
				121	if (likely(page_uptodate && !PageError(page)))
				122	SetPageUptodate(page);
				123	} else {
				124	char *addr;
				125	unsigned int i, recs;
				126	u32 rec_size;
				127
				128	rec_size = ni->itype.index.block_size;
				129	recs = PAGE_CACHE_SIZE / rec_size;
				130	/* Should have been verified before we got here... */
				131	BUG_ON(!recs);
				132	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				133	for (i = 0; i < recs; i++)
				134	post_read_mst_fixup((NTFS_RECORD*)(addr +
				135	i * rec_size), rec_size);
				136	flush_dcache_page(page);
				137	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	138	if (likely(page_uptodate && !PageError(page)))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	139	SetPageUptodate(page);
				140	}
				141	unlock_page(page);
				142	return;
				143	still_busy:
				144	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				145	return;
				146	}
				147
				148	/**
				149	* ntfs_read_block - fill a @page of an address space with data
				150	* @page: page cache page to fill with data
				151	*
				152	* Fill the page @page of the address space belonging to the @page->host inode.
				153	* We read each buffer asynchronously and when all buffers are read in, our io
				154	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				155	* applies the mst fixups to the page before finally marking it uptodate and
				156	* unlocking it.
				157	*
				158	* We only enforce allocated_size limit because i_size is checked for in
				159	* generic_file_read().
				160	*
				161	* Return 0 on success and -errno on error.
				162	*
				163	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				164	*/
				165	static int ntfs_read_block(struct page *page)
				166	{
				167	VCN vcn;
				168	LCN lcn;
				169	ntfs_inode *ni;
				170	ntfs_volume *vol;
				171	runlist_element *rl;
				172	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				173	sector_t iblock, lblock, zblock;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	174	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175	unsigned int blocksize, vcn_ofs;
				176	int i, nr;
				177	unsigned char blocksize_bits;
				178
				179	ni = NTFS_I(page->mapping->host);
				180	vol = ni->vol;
				181
				182	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				183	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				184
				185	blocksize_bits = VFS_I(ni)->i_blkbits;
				186	blocksize = 1 << blocksize_bits;
				187
				188	if (!page_has_buffers(page))
				189	create_empty_buffers(page, blocksize, 0);
				190	bh = head = page_buffers(page);
				191	if (unlikely(!bh)) {
				192	unlock_page(page);
				193	return -ENOMEM;
				194	}
				195
				196	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	197	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				199	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	200	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201
				202	/* Loop through all the buffers in the page. */
				203	rl = NULL;
				204	nr = i = 0;
				205	do {
				206	u8 *kaddr;
				207
				208	if (unlikely(buffer_uptodate(bh)))
				209	continue;
				210	if (unlikely(buffer_mapped(bh))) {
				211	arr[nr++] = bh;
				212	continue;
				213	}
				214	bh->b_bdev = vol->sb->s_bdev;
				215	/* Is the block within the allowed limits? */
				216	if (iblock < lblock) {
				217	BOOL is_retry = FALSE;
				218
				219	/* Convert iblock into corresponding vcn and offset. */
				220	vcn = (VCN)iblock << blocksize_bits >>
				221	vol->cluster_size_bits;
				222	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				223	vol->cluster_size_mask;
				224	if (!rl) {
				225	lock_retry_remap:
				226	down_read(&ni->runlist.lock);
				227	rl = ni->runlist.rl;
				228	}
				229	if (likely(rl != NULL)) {
				230	/* Seek to element containing target vcn. */
				231	while (rl->length && rl[1].vcn <= vcn)
				232	rl++;
				233	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				234	} else
				235	lcn = LCN_RL_NOT_MAPPED;
				236	/* Successful remap. */
				237	if (lcn >= 0) {
				238	/* Setup buffer head to correct block. */
				239	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				240	+ vcn_ofs) >> blocksize_bits;
				241	set_buffer_mapped(bh);
				242	/* Only read initialized data blocks. */
				243	if (iblock < zblock) {
				244	arr[nr++] = bh;
				245	continue;
				246	}
				247	/* Fully non-initialized data block, zero it. */
				248	goto handle_zblock;
				249	}
				250	/* It is a hole, need to zero it. */
				251	if (lcn == LCN_HOLE)
				252	goto handle_hole;
				253	/* If first try and runlist unmapped, map and retry. */
				254	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				255	int err;
				256	is_retry = TRUE;
				257	/*
				258	* Attempt to map runlist, dropping lock for
				259	* the duration.
				260	*/
				261	up_read(&ni->runlist.lock);
				262	err = ntfs_map_runlist(ni, vcn);
				263	if (likely(!err))
				264	goto lock_retry_remap;
				265	rl = NULL;
				266	lcn = err;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	267	} else if (!rl)
				268	up_read(&ni->runlist.lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	269	/* Hard error, zero out region. */
				270	bh->b_blocknr = -1;
				271	SetPageError(page);
				272	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				273	"attribute type 0x%x, vcn 0x%llx, "
				274	"offset 0x%x because its location on "
				275	"disk could not be determined%s "
				276	"(error code %lli).", ni->mft_no,
				277	ni->type, (unsigned long long)vcn,
				278	vcn_ofs, is_retry ? " even after "
				279	"retrying" : "", (long long)lcn);
				280	}
				281	/*
				282	* Either iblock was outside lblock limits or
				283	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				284	* of the page and set the buffer uptodate.
				285	*/
				286	handle_hole:
				287	bh->b_blocknr = -1UL;
				288	clear_buffer_mapped(bh);
				289	handle_zblock:
				290	kaddr = kmap_atomic(page, KM_USER0);
				291	memset(kaddr + i * blocksize, 0, blocksize);
				292	flush_dcache_page(page);
				293	kunmap_atomic(kaddr, KM_USER0);
				294	set_buffer_uptodate(bh);
				295	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				296
				297	/* Release the lock if we took it. */
				298	if (rl)
				299	up_read(&ni->runlist.lock);
				300
				301	/* Check we have at least one buffer ready for i/o. */
				302	if (nr) {
				303	struct buffer_head *tbh;
				304
				305	/* Lock the buffers. */
				306	for (i = 0; i < nr; i++) {
				307	tbh = arr[i];
				308	lock_buffer(tbh);
				309	tbh->b_end_io = ntfs_end_buffer_async_read;
				310	set_buffer_async_read(tbh);
				311	}
				312	/* Finally, start i/o on the buffers. */
				313	for (i = 0; i < nr; i++) {
				314	tbh = arr[i];
				315	if (likely(!buffer_uptodate(tbh)))
				316	submit_bh(READ, tbh);
				317	else
				318	ntfs_end_buffer_async_read(tbh, 1);
				319	}
				320	return 0;
				321	}
				322	/* No i/o was scheduled on any of the buffers. */
				323	if (likely(!PageError(page)))
				324	SetPageUptodate(page);
				325	else /* Signal synchronous i/o error. */
				326	nr = -EIO;
				327	unlock_page(page);
				328	return nr;
				329	}
				330
				331	/**
				332	* ntfs_readpage - fill a @page of a @file with data from the device
				333	* @file: open file to which the page @page belongs or NULL
				334	* @page: page cache page to fill with data
				335	*
				336	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				337	* file @file by calling the ntfs version of the generic block_read_full_page()
				338	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				339	* associated with the page asynchronously.
				340	*
				341	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				342	* data from the mft record (which at this stage is most likely in memory) and
				343	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				344	* even if the mft record is not cached at this point in time, we need to wait
				345	* for it to be read in before we can do the copy.
				346	*
				347	* Return 0 on success and -errno on error.
				348	*/
				349	static int ntfs_readpage(struct file file, struct page page)
				350	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	351	ntfs_inode ni, base_ni;
				352	u8 *kaddr;
				353	ntfs_attr_search_ctx *ctx;
				354	MFT_RECORD *mrec;
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	355	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	356	u32 attr_len;
				357	int err = 0;
				358
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame]	359	retry_readpage:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	360	BUG_ON(!PageLocked(page));
				361	/*
				362	* This can potentially happen because we clear PageUptodate() during
				363	* ntfs_writepage() of MstProtected() attributes.
				364	*/
				365	if (PageUptodate(page)) {
				366	unlock_page(page);
				367	return 0;
				368	}
				369	ni = NTFS_I(page->mapping->host);
				370
				371	/* NInoNonResident() == NInoIndexAllocPresent() */
				372	if (NInoNonResident(ni)) {
				373	/*
				374	* Only unnamed $DATA attributes can be compressed or
				375	* encrypted.
				376	*/
				377	if (ni->type == AT_DATA && !ni->name_len) {
				378	/* If file is encrypted, deny access, just like NT4. */
				379	if (NInoEncrypted(ni)) {
				380	err = -EACCES;
				381	goto err_out;
				382	}
				383	/* Compressed data streams are handled in compress.c. */
				384	if (NInoCompressed(ni))
				385	return ntfs_read_compressed_block(page);
				386	}
				387	/* Normal data stream. */
				388	return ntfs_read_block(page);
				389	}
				390	/*
				391	* Attribute is resident, implying it is not compressed or encrypted.
				392	* This also means the attribute is smaller than an mft record and
				393	* hence smaller than a page, so can simply zero out any pages with
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	394	* index above 0.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	395	*/
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	396	if (unlikely(page->index > 0)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	397	kaddr = kmap_atomic(page, KM_USER0);
				398	memset(kaddr, 0, PAGE_CACHE_SIZE);
				399	flush_dcache_page(page);
				400	kunmap_atomic(kaddr, KM_USER0);
				401	goto done;
				402	}
				403	if (!NInoAttr(ni))
				404	base_ni = ni;
				405	else
				406	base_ni = ni->ext.base_ntfs_ino;
				407	/* Map, pin, and lock the mft record. */
				408	mrec = map_mft_record(base_ni);
				409	if (IS_ERR(mrec)) {
				410	err = PTR_ERR(mrec);
				411	goto err_out;
				412	}
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame]	413	/*
				414	* If a parallel write made the attribute non-resident, drop the mft
				415	* record and retry the readpage.
				416	*/
				417	if (unlikely(NInoNonResident(ni))) {
				418	unmap_mft_record(base_ni);
				419	goto retry_readpage;
				420	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	421	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				422	if (unlikely(!ctx)) {
				423	err = -ENOMEM;
				424	goto unm_err_out;
				425	}
				426	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				427	CASE_SENSITIVE, 0, NULL, 0, ctx);
				428	if (unlikely(err))
				429	goto put_unm_err_out;
				430	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	431	read_lock_irqsave(&ni->size_lock, flags);
				432	if (unlikely(attr_len > ni->initialized_size))
				433	attr_len = ni->initialized_size;
				434	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	435	kaddr = kmap_atomic(page, KM_USER0);
				436	/* Copy the data to the page. */
				437	memcpy(kaddr, (u8*)ctx->attr +
				438	le16_to_cpu(ctx->attr->data.resident.value_offset),
				439	attr_len);
				440	/* Zero the remainder of the page. */
				441	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				442	flush_dcache_page(page);
				443	kunmap_atomic(kaddr, KM_USER0);
				444	put_unm_err_out:
				445	ntfs_attr_put_search_ctx(ctx);
				446	unm_err_out:
				447	unmap_mft_record(base_ni);
				448	done:
				449	SetPageUptodate(page);
				450	err_out:
				451	unlock_page(page);
				452	return err;
				453	}
				454
				455	#ifdef NTFS_RW
				456
				457	/**
				458	* ntfs_write_block - write a @page to the backing store
				459	* @page: page cache page to write out
				460	* @wbc: writeback control structure
				461	*
				462	* This function is for writing pages belonging to non-resident, non-mst
				463	* protected attributes to their backing store.
				464	*
				465	* For a page with buffers, map and write the dirty buffers asynchronously
				466	* under page writeback. For a page without buffers, create buffers for the
				467	* page, then proceed as above.
				468	*
				469	* If a page doesn't have buffers the page dirty state is definitive. If a page
				470	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				471	* state is definitive. (A hint which has rules: dirty buffers against a clean
				472	* page is illegal. Other combinations are legal and need to be handled. In
				473	* particular a dirty page containing clean buffers for example.)
				474	*
				475	* Return 0 on success and -errno on error.
				476	*
				477	* Based on ntfs_read_block() and __block_write_full_page().
				478	*/
				479	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				480	{
				481	VCN vcn;
				482	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	483	s64 initialized_size;
				484	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	485	sector_t block, dblock, iblock;
				486	struct inode *vi;
				487	ntfs_inode *ni;
				488	ntfs_volume *vol;
				489	runlist_element *rl;
				490	struct buffer_head bh, head;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	491	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	492	unsigned int blocksize, vcn_ofs;
				493	int err;
				494	BOOL need_end_writeback;
				495	unsigned char blocksize_bits;
				496
				497	vi = page->mapping->host;
				498	ni = NTFS_I(vi);
				499	vol = ni->vol;
				500
				501	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				502	"0x%lx.", ni->mft_no, ni->type, page->index);
				503
				504	BUG_ON(!NInoNonResident(ni));
				505	BUG_ON(NInoMstProtected(ni));
				506
				507	blocksize_bits = vi->i_blkbits;
				508	blocksize = 1 << blocksize_bits;
				509
				510	if (!page_has_buffers(page)) {
				511	BUG_ON(!PageUptodate(page));
				512	create_empty_buffers(page, blocksize,
				513	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				514	}
				515	bh = head = page_buffers(page);
				516	if (unlikely(!bh)) {
				517	ntfs_warning(vol->sb, "Error allocating page buffers. "
				518	"Redirtying page so we try again later.");
				519	/*
				520	* Put the page back on mapping->dirty_pages, but leave its
				521	* buffer's dirty state as-is.
				522	*/
				523	redirty_page_for_writepage(wbc, page);
				524	unlock_page(page);
				525	return 0;
				526	}
				527
				528	/* NOTE: Different naming scheme to ntfs_read_block()! */
				529
				530	/* The first block in the page. */
				531	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				532
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	533	read_lock_irqsave(&ni->size_lock, flags);
				534	i_size = i_size_read(vi);
				535	initialized_size = ni->initialized_size;
				536	read_unlock_irqrestore(&ni->size_lock, flags);
				537
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	538	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	539	dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	540
				541	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	542	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	543
				544	/*
				545	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				546	* here, and the (potentially unmapped) buffers may become dirty at
				547	* any time. If a buffer becomes dirty here after we've inspected it
				548	* then we just miss that fact, and the page stays dirty.
				549	*
				550	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				551	* handle that here by just cleaning them.
				552	*/
				553
				554	/*
				555	* Loop through all the buffers in the page, mapping all the dirty
				556	* buffers to disk addresses and handling any aliases from the
				557	* underlying block device's mapping.
				558	*/
				559	rl = NULL;
				560	err = 0;
				561	do {
				562	BOOL is_retry = FALSE;
				563
				564	if (unlikely(block >= dblock)) {
				565	/*
				566	* Mapped buffers outside i_size will occur, because
				567	* this page can be outside i_size when there is a
				568	* truncate in progress. The contents of such buffers
				569	* were zeroed by ntfs_writepage().
				570	*
				571	* FIXME: What about the small race window where
				572	* ntfs_writepage() has not done any clearing because
				573	* the page was within i_size but before we get here,
				574	* vmtruncate() modifies i_size?
				575	*/
				576	clear_buffer_dirty(bh);
				577	set_buffer_uptodate(bh);
				578	continue;
				579	}
				580
				581	/* Clean buffers are not written out, so no need to map them. */
				582	if (!buffer_dirty(bh))
				583	continue;
				584
				585	/* Make sure we have enough initialized size. */
				586	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	587	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	588	/*
				589	* If this page is fully outside initialized size, zero
				590	* out all pages between the current initialized size
				591	* and the current page. Just use ntfs_readpage() to do
				592	* the zeroing transparently.
				593	*/
				594	if (block > iblock) {
				595	// TODO:
				596	// For each page do:
				597	// - read_cache_page()
				598	// Again for each page do:
				599	// - wait_on_page_locked()
				600	// - Check (PageUptodate(page) &&
				601	// !PageError(page))
				602	// Update initialized size in the attribute and
				603	// in the inode.
				604	// Again, for each page do:
				605	// __set_page_dirty_buffers();
				606	// page_cache_release()
				607	// We don't need to wait on the writes.
				608	// Update iblock.
				609	}
				610	/*
				611	* The current page straddles initialized size. Zero
				612	* all non-uptodate buffers and set them uptodate (and
				613	* dirty?). Note, there aren't any non-uptodate buffers
				614	* if the page is uptodate.
				615	* FIXME: For an uptodate page, the buffers may need to
				616	* be written out because they were not initialized on
				617	* disk before.
				618	*/
				619	if (!PageUptodate(page)) {
				620	// TODO:
				621	// Zero any non-uptodate buffers up to i_size.
				622	// Set them uptodate and dirty.
				623	}
				624	// TODO:
				625	// Update initialized size in the attribute and in the
				626	// inode (up to i_size).
				627	// Update iblock.
				628	// FIXME: This is inefficient. Try to batch the two
				629	// size changes to happen in one go.
				630	ntfs_error(vol->sb, "Writing beyond initialized size "
				631	"is not supported yet. Sorry.");
				632	err = -EOPNOTSUPP;
				633	break;
				634	// Do NOT set_buffer_new() BUT DO clear buffer range
				635	// outside write request range.
				636	// set_buffer_uptodate() on complete buffers as well as
				637	// set_buffer_dirty().
				638	}
				639
				640	/* No need to map buffers that are already mapped. */
				641	if (buffer_mapped(bh))
				642	continue;
				643
				644	/* Unmapped, dirty buffer. Need to map it. */
				645	bh->b_bdev = vol->sb->s_bdev;
				646
				647	/* Convert block into corresponding vcn and offset. */
				648	vcn = (VCN)block << blocksize_bits;
				649	vcn_ofs = vcn & vol->cluster_size_mask;
				650	vcn >>= vol->cluster_size_bits;
				651	if (!rl) {
				652	lock_retry_remap:
				653	down_read(&ni->runlist.lock);
				654	rl = ni->runlist.rl;
				655	}
				656	if (likely(rl != NULL)) {
				657	/* Seek to element containing target vcn. */
				658	while (rl->length && rl[1].vcn <= vcn)
				659	rl++;
				660	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				661	} else
				662	lcn = LCN_RL_NOT_MAPPED;
				663	/* Successful remap. */
				664	if (lcn >= 0) {
				665	/* Setup buffer head to point to correct block. */
				666	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				667	vcn_ofs) >> blocksize_bits;
				668	set_buffer_mapped(bh);
				669	continue;
				670	}
				671	/* It is a hole, need to instantiate it. */
				672	if (lcn == LCN_HOLE) {
Anton Altaparmakov	8dcdeba	2005-09-08 21:25:48 +0100	[diff] [blame^]	673	u8 *kaddr;
				674	unsigned long bpos, bend;
				675
				676	/* Check if the buffer is zero. */
				677	kaddr = kmap_atomic(page, KM_USER0);
				678	bpos = (unsigned long *)(kaddr + bh_offset(bh));
				679	bend = (unsigned long )((u8)bpos + blocksize);
				680	do {
				681	if (unlikely(*bpos))
				682	break;
				683	} while (likely(++bpos < bend));
				684	kunmap_atomic(kaddr, KM_USER0);
				685	if (bpos == bend) {
				686	/*
				687	* Buffer is zero and sparse, no need to write
				688	* it.
				689	*/
				690	bh->b_blocknr = -1;
				691	clear_buffer_dirty(bh);
				692	continue;
				693	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	694	// TODO: Instantiate the hole.
				695	// clear_buffer_new(bh);
				696	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				697	ntfs_error(vol->sb, "Writing into sparse regions is "
				698	"not supported yet. Sorry.");
				699	err = -EOPNOTSUPP;
				700	break;
				701	}
				702	/* If first try and runlist unmapped, map and retry. */
				703	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				704	is_retry = TRUE;
				705	/*
				706	* Attempt to map runlist, dropping lock for
				707	* the duration.
				708	*/
				709	up_read(&ni->runlist.lock);
				710	err = ntfs_map_runlist(ni, vcn);
				711	if (likely(!err))
				712	goto lock_retry_remap;
				713	rl = NULL;
				714	lcn = err;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	715	} else if (!rl)
				716	up_read(&ni->runlist.lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	717	/* Failed to map the buffer, even after retrying. */
				718	bh->b_blocknr = -1;
				719	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				720	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				721	"because its location on disk could not be "
				722	"determined%s (error code %lli).", ni->mft_no,
				723	ni->type, (unsigned long long)vcn,
				724	vcn_ofs, is_retry ? " even after "
				725	"retrying" : "", (long long)lcn);
				726	if (!err)
				727	err = -EIO;
				728	break;
				729	} while (block++, (bh = bh->b_this_page) != head);
				730
				731	/* Release the lock if we took it. */
				732	if (rl)
				733	up_read(&ni->runlist.lock);
				734
				735	/* For the error case, need to reset bh to the beginning. */
				736	bh = head;
				737
				738	/* Just an optimization, so ->readpage() isn't called later. */
				739	if (unlikely(!PageUptodate(page))) {
				740	int uptodate = 1;
				741	do {
				742	if (!buffer_uptodate(bh)) {
				743	uptodate = 0;
				744	bh = head;
				745	break;
				746	}
				747	} while ((bh = bh->b_this_page) != head);
				748	if (uptodate)
				749	SetPageUptodate(page);
				750	}
				751
				752	/* Setup all mapped, dirty buffers for async write i/o. */
				753	do {
				754	get_bh(bh);
				755	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				756	lock_buffer(bh);
				757	if (test_clear_buffer_dirty(bh)) {
				758	BUG_ON(!buffer_uptodate(bh));
				759	mark_buffer_async_write(bh);
				760	} else
				761	unlock_buffer(bh);
				762	} else if (unlikely(err)) {
				763	/*
				764	* For the error case. The buffer may have been set
				765	* dirty during attachment to a dirty page.
				766	*/
				767	if (err != -ENOMEM)
				768	clear_buffer_dirty(bh);
				769	}
				770	} while ((bh = bh->b_this_page) != head);
				771
				772	if (unlikely(err)) {
				773	// TODO: Remove the -EOPNOTSUPP check later on...
				774	if (unlikely(err == -EOPNOTSUPP))
				775	err = 0;
				776	else if (err == -ENOMEM) {
				777	ntfs_warning(vol->sb, "Error allocating memory. "
				778	"Redirtying page so we try again "
				779	"later.");
				780	/*
				781	* Put the page back on mapping->dirty_pages, but
				782	* leave its buffer's dirty state as-is.
				783	*/
				784	redirty_page_for_writepage(wbc, page);
				785	err = 0;
				786	} else
				787	SetPageError(page);
				788	}
				789
				790	BUG_ON(PageWriteback(page));
				791	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				792	unlock_page(page);
				793
				794	/*
				795	* Submit the prepared buffers for i/o. Note the page is unlocked,
				796	* and the async write i/o completion handler can end_page_writeback()
				797	* at any time after the first submit_bh(). So the buffers can then
				798	* disappear...
				799	*/
				800	need_end_writeback = TRUE;
				801	do {
				802	struct buffer_head *next = bh->b_this_page;
				803	if (buffer_async_write(bh)) {
				804	submit_bh(WRITE, bh);
				805	need_end_writeback = FALSE;
				806	}
				807	put_bh(bh);
				808	bh = next;
				809	} while (bh != head);
				810
				811	/* If no i/o was started, need to end_page_writeback(). */
				812	if (unlikely(need_end_writeback))
				813	end_page_writeback(page);
				814
				815	ntfs_debug("Done.");
				816	return err;
				817	}
				818
				819	/**
				820	* ntfs_write_mst_block - write a @page to the backing store
				821	* @page: page cache page to write out
				822	* @wbc: writeback control structure
				823	*
				824	* This function is for writing pages belonging to non-resident, mst protected
				825	* attributes to their backing store. The only supported attributes are index
				826	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				827	* supported for the index allocation case.
				828	*
				829	* The page must remain locked for the duration of the write because we apply
				830	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				831	* page before undoing the fixups, any other user of the page will see the
				832	* page contents as corrupt.
				833	*
				834	* We clear the page uptodate flag for the duration of the function to ensure
				835	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				836	* are about to apply the mst fixups to.
				837	*
				838	* Return 0 on success and -errno on error.
				839	*
				840	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				841	* write_mft_record_nolock().
				842	*/
				843	static int ntfs_write_mst_block(struct page *page,
				844	struct writeback_control *wbc)
				845	{
				846	sector_t block, dblock, rec_block;
				847	struct inode *vi = page->mapping->host;
				848	ntfs_inode *ni = NTFS_I(vi);
				849	ntfs_volume *vol = ni->vol;
				850	u8 *kaddr;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	851	unsigned int rec_size = ni->itype.index.block_size;
				852	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				853	struct buffer_head bh, head, tbh, rec_start_bh;
Anton Altaparmakov	d53ee32	2005-04-06 16:11:20 +0100	[diff] [blame]	854	struct buffer_head *bhs[MAX_BUF_PER_PAGE];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	855	runlist_element *rl;
Anton Altaparmakov	d53ee32	2005-04-06 16:11:20 +0100	[diff] [blame]	856	int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
				857	unsigned bh_size, rec_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	858	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
Anton Altaparmakov	d53ee32	2005-04-06 16:11:20 +0100	[diff] [blame]	859	unsigned char bh_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	860
				861	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				862	"0x%lx.", vi->i_ino, ni->type, page->index);
				863	BUG_ON(!NInoNonResident(ni));
				864	BUG_ON(!NInoMstProtected(ni));
				865	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				866	/*
				867	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				868	* in its page cache were to be marked dirty. However this should
				869	* never happen with the current driver and considering we do not
				870	* handle this case here we do want to BUG(), at least for now.
				871	*/
				872	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				873	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
Anton Altaparmakov	d53ee32	2005-04-06 16:11:20 +0100	[diff] [blame]	874	bh_size_bits = vi->i_blkbits;
				875	bh_size = 1 << bh_size_bits;
				876	max_bhs = PAGE_CACHE_SIZE / bh_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877	BUG_ON(!max_bhs);
Anton Altaparmakov	d53ee32	2005-04-06 16:11:20 +0100	[diff] [blame]	878	BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	879
				880	/* Were we called for sync purposes? */
				881	sync = (wbc->sync_mode == WB_SYNC_ALL);
				882
				883	/* Make sure we have mapped buffers. */
				884	BUG_ON(!page_has_buffers(page));
				885	bh = head = page_buffers(page);
				886	BUG_ON(!bh);
				887
				888	rec_size_bits = ni->itype.index.block_size_bits;
				889	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				890	bhs_per_rec = rec_size >> bh_size_bits;
				891	BUG_ON(!bhs_per_rec);
				892
				893	/* The first block in the page. */
				894	rec_block = block = (sector_t)page->index <<
				895	(PAGE_CACHE_SHIFT - bh_size_bits);
				896
				897	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	898	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	899
				900	rl = NULL;
				901	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				902	page_is_dirty = rec_is_dirty = FALSE;
				903	rec_start_bh = NULL;
				904	do {
				905	BOOL is_retry = FALSE;
				906
				907	if (likely(block < rec_block)) {
				908	if (unlikely(block >= dblock)) {
				909	clear_buffer_dirty(bh);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	910	set_buffer_uptodate(bh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	911	continue;
				912	}
				913	/*
				914	* This block is not the first one in the record. We
				915	* ignore the buffer's dirty state because we could
				916	* have raced with a parallel mark_ntfs_record_dirty().
				917	*/
				918	if (!rec_is_dirty)
				919	continue;
				920	if (unlikely(err2)) {
				921	if (err2 != -ENOMEM)
				922	clear_buffer_dirty(bh);
				923	continue;
				924	}
				925	} else /* if (block == rec_block) */ {
				926	BUG_ON(block > rec_block);
				927	/* This block is the first one in the record. */
				928	rec_block += bhs_per_rec;
				929	err2 = 0;
				930	if (unlikely(block >= dblock)) {
				931	clear_buffer_dirty(bh);
				932	continue;
				933	}
				934	if (!buffer_dirty(bh)) {
				935	/* Clean records are not written out. */
				936	rec_is_dirty = FALSE;
				937	continue;
				938	}
				939	rec_is_dirty = TRUE;
				940	rec_start_bh = bh;
				941	}
				942	/* Need to map the buffer if it is not mapped already. */
				943	if (unlikely(!buffer_mapped(bh))) {
				944	VCN vcn;
				945	LCN lcn;
				946	unsigned int vcn_ofs;
				947
Anton Altaparmakov	481d037	2005-08-16 19:42:56 +0100	[diff] [blame]	948	bh->b_bdev = vol->sb->s_bdev;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	949	/* Obtain the vcn and offset of the current block. */
				950	vcn = (VCN)block << bh_size_bits;
				951	vcn_ofs = vcn & vol->cluster_size_mask;
				952	vcn >>= vol->cluster_size_bits;
				953	if (!rl) {
				954	lock_retry_remap:
				955	down_read(&ni->runlist.lock);
				956	rl = ni->runlist.rl;
				957	}
				958	if (likely(rl != NULL)) {
				959	/* Seek to element containing target vcn. */
				960	while (rl->length && rl[1].vcn <= vcn)
				961	rl++;
				962	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				963	} else
				964	lcn = LCN_RL_NOT_MAPPED;
				965	/* Successful remap. */
				966	if (likely(lcn >= 0)) {
				967	/* Setup buffer head to correct block. */
				968	bh->b_blocknr = ((lcn <<
				969	vol->cluster_size_bits) +
				970	vcn_ofs) >> bh_size_bits;
				971	set_buffer_mapped(bh);
				972	} else {
				973	/*
				974	* Remap failed. Retry to map the runlist once
				975	* unless we are working on $MFT which always
				976	* has the whole of its runlist in memory.
				977	*/
				978	if (!is_mft && !is_retry &&
				979	lcn == LCN_RL_NOT_MAPPED) {
				980	is_retry = TRUE;
				981	/*
				982	* Attempt to map runlist, dropping
				983	* lock for the duration.
				984	*/
				985	up_read(&ni->runlist.lock);
				986	err2 = ntfs_map_runlist(ni, vcn);
				987	if (likely(!err2))
				988	goto lock_retry_remap;
				989	if (err2 == -ENOMEM)
				990	page_is_dirty = TRUE;
				991	lcn = err2;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	992	} else {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	993	err2 = -EIO;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	994	if (!rl)
				995	up_read(&ni->runlist.lock);
				996	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	997	/* Hard error. Abort writing this record. */
				998	if (!err \|\| err == -ENOMEM)
				999	err = err2;
				1000	bh->b_blocknr = -1;
				1001	ntfs_error(vol->sb, "Cannot write ntfs record "
				1002	"0x%llx (inode 0x%lx, "
				1003	"attribute type 0x%x) because "
				1004	"its location on disk could "
				1005	"not be determined (error "
Randy Dunlap	8907547	2005-03-03 11:19:53 +0000	[diff] [blame]	1006	"code %lli).",
				1007	(long long)block <<
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1008	bh_size_bits >>
				1009	vol->mft_record_size_bits,
				1010	ni->mft_no, ni->type,
				1011	(long long)lcn);
				1012	/*
				1013	* If this is not the first buffer, remove the
				1014	* buffers in this record from the list of
				1015	* buffers to write and clear their dirty bit
				1016	* if not error -ENOMEM.
				1017	*/
				1018	if (rec_start_bh != bh) {
				1019	while (bhs[--nr_bhs] != rec_start_bh)
				1020	;
				1021	if (err2 != -ENOMEM) {
				1022	do {
				1023	clear_buffer_dirty(
				1024	rec_start_bh);
				1025	} while ((rec_start_bh =
				1026	rec_start_bh->
				1027	b_this_page) !=
				1028	bh);
				1029	}
				1030	}
				1031	continue;
				1032	}
				1033	}
				1034	BUG_ON(!buffer_uptodate(bh));
				1035	BUG_ON(nr_bhs >= max_bhs);
				1036	bhs[nr_bhs++] = bh;
				1037	} while (block++, (bh = bh->b_this_page) != head);
				1038	if (unlikely(rl))
				1039	up_read(&ni->runlist.lock);
				1040	/* If there were no dirty buffers, we are done. */
				1041	if (!nr_bhs)
				1042	goto done;
				1043	/* Map the page so we can access its contents. */
				1044	kaddr = kmap(page);
				1045	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				1046	BUG_ON(!PageUptodate(page));
				1047	ClearPageUptodate(page);
				1048	for (i = 0; i < nr_bhs; i++) {
				1049	unsigned int ofs;
				1050
				1051	/* Skip buffers which are not at the beginning of records. */
				1052	if (i % bhs_per_rec)
				1053	continue;
				1054	tbh = bhs[i];
				1055	ofs = bh_offset(tbh);
				1056	if (is_mft) {
				1057	ntfs_inode *tni;
				1058	unsigned long mft_no;
				1059
				1060	/* Get the mft record number. */
				1061	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1062	>> rec_size_bits;
				1063	/* Check whether to write this mft record. */
				1064	tni = NULL;
				1065	if (!ntfs_may_write_mft_record(vol, mft_no,
				1066	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1067	/*
				1068	* The record should not be written. This
				1069	* means we need to redirty the page before
				1070	* returning.
				1071	*/
				1072	page_is_dirty = TRUE;
				1073	/*
				1074	* Remove the buffers in this mft record from
				1075	* the list of buffers to write.
				1076	*/
				1077	do {
				1078	bhs[i] = NULL;
				1079	} while (++i % bhs_per_rec);
				1080	continue;
				1081	}
				1082	/*
				1083	* The record should be written. If a locked ntfs
				1084	* inode was returned, add it to the array of locked
				1085	* ntfs inodes.
				1086	*/
				1087	if (tni)
				1088	locked_nis[nr_locked_nis++] = tni;
				1089	}
				1090	/* Apply the mst protection fixups. */
				1091	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1092	rec_size);
				1093	if (unlikely(err2)) {
				1094	if (!err \|\| err == -ENOMEM)
				1095	err = -EIO;
				1096	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1097	"(inode 0x%lx, attribute type 0x%x, "
				1098	"page index 0x%lx, page offset 0x%x)!"
				1099	" Unmount and run chkdsk.", vi->i_ino,
				1100	ni->type, page->index, ofs);
				1101	/*
				1102	* Mark all the buffers in this record clean as we do
				1103	* not want to write corrupt data to disk.
				1104	*/
				1105	do {
				1106	clear_buffer_dirty(bhs[i]);
				1107	bhs[i] = NULL;
				1108	} while (++i % bhs_per_rec);
				1109	continue;
				1110	}
				1111	nr_recs++;
				1112	}
				1113	/* If no records are to be written out, we are done. */
				1114	if (!nr_recs)
				1115	goto unm_done;
				1116	flush_dcache_page(page);
				1117	/* Lock buffers and start synchronous write i/o on them. */
				1118	for (i = 0; i < nr_bhs; i++) {
				1119	tbh = bhs[i];
				1120	if (!tbh)
				1121	continue;
				1122	if (unlikely(test_set_buffer_locked(tbh)))
				1123	BUG();
				1124	/* The buffer dirty state is now irrelevant, just clean it. */
				1125	clear_buffer_dirty(tbh);
				1126	BUG_ON(!buffer_uptodate(tbh));
				1127	BUG_ON(!buffer_mapped(tbh));
				1128	get_bh(tbh);
				1129	tbh->b_end_io = end_buffer_write_sync;
				1130	submit_bh(WRITE, tbh);
				1131	}
				1132	/* Synchronize the mft mirror now if not @sync. */
				1133	if (is_mft && !sync)
				1134	goto do_mirror;
				1135	do_wait:
				1136	/* Wait on i/o completion of buffers. */
				1137	for (i = 0; i < nr_bhs; i++) {
				1138	tbh = bhs[i];
				1139	if (!tbh)
				1140	continue;
				1141	wait_on_buffer(tbh);
				1142	if (unlikely(!buffer_uptodate(tbh))) {
				1143	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1144	"record buffer (inode 0x%lx, "
				1145	"attribute type 0x%x, page index "
				1146	"0x%lx, page offset 0x%lx)! Unmount "
				1147	"and run chkdsk.", vi->i_ino, ni->type,
				1148	page->index, bh_offset(tbh));
				1149	if (!err \|\| err == -ENOMEM)
				1150	err = -EIO;
				1151	/*
				1152	* Set the buffer uptodate so the page and buffer
				1153	* states do not become out of sync.
				1154	*/
				1155	set_buffer_uptodate(tbh);
				1156	}
				1157	}
				1158	/* If @sync, now synchronize the mft mirror. */
				1159	if (is_mft && sync) {
				1160	do_mirror:
				1161	for (i = 0; i < nr_bhs; i++) {
				1162	unsigned long mft_no;
				1163	unsigned int ofs;
				1164
				1165	/*
				1166	* Skip buffers which are not at the beginning of
				1167	* records.
				1168	*/
				1169	if (i % bhs_per_rec)
				1170	continue;
				1171	tbh = bhs[i];
				1172	/* Skip removed buffers (and hence records). */
				1173	if (!tbh)
				1174	continue;
				1175	ofs = bh_offset(tbh);
				1176	/* Get the mft record number. */
				1177	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1178	>> rec_size_bits;
				1179	if (mft_no < vol->mftmirr_size)
				1180	ntfs_sync_mft_mirror(vol, mft_no,
				1181	(MFT_RECORD*)(kaddr + ofs),
				1182	sync);
				1183	}
				1184	if (!sync)
				1185	goto do_wait;
				1186	}
				1187	/* Remove the mst protection fixups again. */
				1188	for (i = 0; i < nr_bhs; i++) {
				1189	if (!(i % bhs_per_rec)) {
				1190	tbh = bhs[i];
				1191	if (!tbh)
				1192	continue;
				1193	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1194	bh_offset(tbh)));
				1195	}
				1196	}
				1197	flush_dcache_page(page);
				1198	unm_done:
				1199	/* Unlock any locked inodes. */
				1200	while (nr_locked_nis-- > 0) {
				1201	ntfs_inode tni, base_tni;
				1202
				1203	tni = locked_nis[nr_locked_nis];
				1204	/* Get the base inode. */
				1205	down(&tni->extent_lock);
				1206	if (tni->nr_extents >= 0)
				1207	base_tni = tni;
				1208	else {
				1209	base_tni = tni->ext.base_ntfs_ino;
				1210	BUG_ON(!base_tni);
				1211	}
				1212	up(&tni->extent_lock);
				1213	ntfs_debug("Unlocking %s inode 0x%lx.",
				1214	tni == base_tni ? "base" : "extent",
				1215	tni->mft_no);
				1216	up(&tni->mrec_lock);
				1217	atomic_dec(&tni->count);
				1218	iput(VFS_I(base_tni));
				1219	}
				1220	SetPageUptodate(page);
				1221	kunmap(page);
				1222	done:
				1223	if (unlikely(err && err != -ENOMEM)) {
				1224	/*
				1225	* Set page error if there is only one ntfs record in the page.
				1226	* Otherwise we would loose per-record granularity.
				1227	*/
				1228	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1229	SetPageError(page);
				1230	NVolSetErrors(vol);
				1231	}
				1232	if (page_is_dirty) {
				1233	ntfs_debug("Page still contains one or more dirty ntfs "
				1234	"records. Redirtying the page starting at "
				1235	"record 0x%lx.", page->index <<
				1236	(PAGE_CACHE_SHIFT - rec_size_bits));
				1237	redirty_page_for_writepage(wbc, page);
				1238	unlock_page(page);
				1239	} else {
				1240	/*
				1241	* Keep the VM happy. This must be done otherwise the
				1242	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1243	* the page is clean.
				1244	*/
				1245	BUG_ON(PageWriteback(page));
				1246	set_page_writeback(page);
				1247	unlock_page(page);
				1248	end_page_writeback(page);
				1249	}
				1250	if (likely(!err))
				1251	ntfs_debug("Done.");
				1252	return err;
				1253	}
				1254
				1255	/**
				1256	* ntfs_writepage - write a @page to the backing store
				1257	* @page: page cache page to write out
				1258	* @wbc: writeback control structure
				1259	*
				1260	* This is called from the VM when it wants to have a dirty ntfs page cache
				1261	* page cleaned. The VM has already locked the page and marked it clean.
				1262	*
				1263	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1264	* the ntfs version of the generic block_write_full_page() function,
				1265	* ntfs_write_block(), which in turn if necessary creates and writes the
				1266	* buffers associated with the page asynchronously.
				1267	*
				1268	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1269	* the data to the mft record (which at this stage is most likely in memory).
				1270	* The mft record is then marked dirty and written out asynchronously via the
				1271	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1272	* vm page dirty code path for the page the mft record is in.
				1273	*
				1274	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1275	*
				1276	* Return 0 on success and -errno on error.
				1277	*/
				1278	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1279	{
				1280	loff_t i_size;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1281	struct inode *vi = page->mapping->host;
				1282	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1283	char *kaddr;
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1284	ntfs_attr_search_ctx *ctx = NULL;
				1285	MFT_RECORD *m = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1286	u32 attr_len;
				1287	int err;
				1288
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame]	1289	retry_writepage:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1290	BUG_ON(!PageLocked(page));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1291	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1292	/* Is the page fully outside i_size? (truncate in progress) */
				1293	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1294	PAGE_CACHE_SHIFT)) {
				1295	/*
				1296	* The page may have dirty, unmapped buffers. Make them
				1297	* freeable here, so the page does not leak.
				1298	*/
				1299	block_invalidatepage(page, 0);
				1300	unlock_page(page);
				1301	ntfs_debug("Write outside i_size - truncated?");
				1302	return 0;
				1303	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1304	/* NInoNonResident() == NInoIndexAllocPresent() */
				1305	if (NInoNonResident(ni)) {
				1306	/*
				1307	* Only unnamed $DATA attributes can be compressed, encrypted,
				1308	* and/or sparse.
				1309	*/
				1310	if (ni->type == AT_DATA && !ni->name_len) {
				1311	/* If file is encrypted, deny access, just like NT4. */
				1312	if (NInoEncrypted(ni)) {
				1313	unlock_page(page);
				1314	ntfs_debug("Denying write access to encrypted "
				1315	"file.");
				1316	return -EACCES;
				1317	}
				1318	/* Compressed data streams are handled in compress.c. */
				1319	if (NInoCompressed(ni)) {
				1320	// TODO: Implement and replace this check with
				1321	// return ntfs_write_compressed_block(page);
				1322	unlock_page(page);
				1323	ntfs_error(vi->i_sb, "Writing to compressed "
				1324	"files is not supported yet. "
				1325	"Sorry.");
				1326	return -EOPNOTSUPP;
				1327	}
				1328	// TODO: Implement and remove this check.
				1329	if (NInoSparse(ni)) {
				1330	unlock_page(page);
				1331	ntfs_error(vi->i_sb, "Writing to sparse files "
				1332	"is not supported yet. Sorry.");
				1333	return -EOPNOTSUPP;
				1334	}
				1335	}
				1336	/* We have to zero every time due to mmap-at-end-of-file. */
				1337	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1338	/* The page straddles i_size. */
				1339	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1340	kaddr = kmap_atomic(page, KM_USER0);
				1341	memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
				1342	flush_dcache_page(page);
				1343	kunmap_atomic(kaddr, KM_USER0);
				1344	}
				1345	/* Handle mst protected attributes. */
				1346	if (NInoMstProtected(ni))
				1347	return ntfs_write_mst_block(page, wbc);
				1348	/* Normal data stream. */
				1349	return ntfs_write_block(page, wbc);
				1350	}
				1351	/*
				1352	* Attribute is resident, implying it is not compressed, encrypted,
				1353	* sparse, or mst protected. This also means the attribute is smaller
				1354	* than an mft record and hence smaller than a page, so can simply
				1355	* return error on any pages with index above 0.
				1356	*/
				1357	BUG_ON(page_has_buffers(page));
				1358	BUG_ON(!PageUptodate(page));
				1359	if (unlikely(page->index > 0)) {
				1360	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1361	"Aborting write.", page->index);
				1362	BUG_ON(PageWriteback(page));
				1363	set_page_writeback(page);
				1364	unlock_page(page);
				1365	end_page_writeback(page);
				1366	return -EIO;
				1367	}
				1368	if (!NInoAttr(ni))
				1369	base_ni = ni;
				1370	else
				1371	base_ni = ni->ext.base_ntfs_ino;
				1372	/* Map, pin, and lock the mft record. */
				1373	m = map_mft_record(base_ni);
				1374	if (IS_ERR(m)) {
				1375	err = PTR_ERR(m);
				1376	m = NULL;
				1377	ctx = NULL;
				1378	goto err_out;
				1379	}
Anton Altaparmakov	905685f	2005-03-10 11:06:19 +0000	[diff] [blame]	1380	/*
				1381	* If a parallel write made the attribute non-resident, drop the mft
				1382	* record and retry the writepage.
				1383	*/
				1384	if (unlikely(NInoNonResident(ni))) {
				1385	unmap_mft_record(base_ni);
				1386	goto retry_writepage;
				1387	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1388	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1389	if (unlikely(!ctx)) {
				1390	err = -ENOMEM;
				1391	goto err_out;
				1392	}
				1393	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1394	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1395	if (unlikely(err))
				1396	goto err_out;
				1397	/*
				1398	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1399	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1400	*/
				1401	BUG_ON(PageWriteback(page));
				1402	set_page_writeback(page);
				1403	unlock_page(page);
				1404
				1405	/*
				1406	* Here, we don't need to zero the out of bounds area everytime because
				1407	* the below memcpy() already takes care of the mmap-at-end-of-file
				1408	* requirements. If the file is converted to a non-resident one, then
				1409	* the code path use is switched to the non-resident one where the
				1410	* zeroing happens on each ntfs_writepage() invocation.
				1411	*
				1412	* The above also applies nicely when i_size is decreased.
				1413	*
				1414	* When i_size is increased, the memory between the old and new i_size
				1415	* _must_ be zeroed (or overwritten with new data). Otherwise we will
				1416	* expose data to userspace/disk which should never have been exposed.
				1417	*
				1418	* FIXME: Ensure that i_size increases do the zeroing/overwriting and
				1419	* if we cannot guarantee that, then enable the zeroing below. If the
				1420	* zeroing below is enabled, we MUST move the unlock_page() from above
				1421	* to after the kunmap_atomic(), i.e. just before the
				1422	* end_page_writeback().
				1423	* UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
				1424	* increases for resident attributes so those are ok.
				1425	* TODO: ntfs_truncate(), others?
				1426	*/
				1427
				1428	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1429	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1430	if (unlikely(attr_len > i_size)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1431	attr_len = i_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1432	ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1433	}
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1434	kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1435	/* Copy the data from the page to the mft record. */
				1436	memcpy((u8*)ctx->attr +
				1437	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1438	kaddr, attr_len);
				1439	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1440	/* Zero out of bounds area in the page cache page. */
				1441	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1442	flush_dcache_page(page);
				1443	kunmap_atomic(kaddr, KM_USER0);
				1444
				1445	end_page_writeback(page);
				1446
				1447	/* Mark the mft record dirty, so it gets written back. */
				1448	mark_mft_record_dirty(ctx->ntfs_ino);
				1449	ntfs_attr_put_search_ctx(ctx);
				1450	unmap_mft_record(base_ni);
				1451	return 0;
				1452	err_out:
				1453	if (err == -ENOMEM) {
				1454	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1455	"page so we try again later.");
				1456	/*
				1457	* Put the page back on mapping->dirty_pages, but leave its
				1458	* buffers' dirty state as-is.
				1459	*/
				1460	redirty_page_for_writepage(wbc, page);
				1461	err = 0;
				1462	} else {
				1463	ntfs_error(vi->i_sb, "Resident attribute write failed with "
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1464	"error %i.", err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1465	SetPageError(page);
Anton Altaparmakov	149f0c5	2005-01-12 13:52:30 +0000	[diff] [blame]	1466	NVolSetErrors(ni->vol);
				1467	make_bad_inode(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1468	}
				1469	unlock_page(page);
				1470	if (ctx)
				1471	ntfs_attr_put_search_ctx(ctx);
				1472	if (m)
				1473	unmap_mft_record(base_ni);
				1474	return err;
				1475	}
				1476
				1477	/**
				1478	* ntfs_prepare_nonresident_write -
				1479	*
				1480	*/
				1481	static int ntfs_prepare_nonresident_write(struct page *page,
				1482	unsigned from, unsigned to)
				1483	{
				1484	VCN vcn;
				1485	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1486	s64 initialized_size;
				1487	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1488	sector_t block, ablock, iblock;
				1489	struct inode *vi;
				1490	ntfs_inode *ni;
				1491	ntfs_volume *vol;
				1492	runlist_element *rl;
				1493	struct buffer_head bh, head, wait[2], *wait_bh = wait;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1494	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1495	unsigned int vcn_ofs, block_start, block_end, blocksize;
				1496	int err;
				1497	BOOL is_retry;
				1498	unsigned char blocksize_bits;
				1499
				1500	vi = page->mapping->host;
				1501	ni = NTFS_I(vi);
				1502	vol = ni->vol;
				1503
				1504	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1505	"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
				1506	page->index, from, to);
				1507
				1508	BUG_ON(!NInoNonResident(ni));
				1509
				1510	blocksize_bits = vi->i_blkbits;
				1511	blocksize = 1 << blocksize_bits;
				1512
				1513	/*
				1514	* create_empty_buffers() will create uptodate/dirty buffers if the
				1515	* page is uptodate/dirty.
				1516	*/
				1517	if (!page_has_buffers(page))
				1518	create_empty_buffers(page, blocksize, 0);
				1519	bh = head = page_buffers(page);
				1520	if (unlikely(!bh))
				1521	return -ENOMEM;
				1522
				1523	/* The first block in the page. */
				1524	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				1525
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1526	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1527	/*
Anton Altaparmakov	b6ad6c5	2005-02-15 10:08:43 +0000	[diff] [blame]	1528	* The first out of bounds block for the allocated size. No need to
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1529	* round up as allocated_size is in multiples of cluster size and the
				1530	* minimum cluster size is 512 bytes, which is equal to the smallest
				1531	* blocksize.
				1532	*/
				1533	ablock = ni->allocated_size >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1534	i_size = i_size_read(vi);
				1535	initialized_size = ni->initialized_size;
				1536	read_unlock_irqrestore(&ni->size_lock, flags);
				1537
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1538	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1539	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1540
				1541	/* Loop through all the buffers in the page. */
				1542	block_start = 0;
				1543	rl = NULL;
				1544	err = 0;
				1545	do {
				1546	block_end = block_start + blocksize;
				1547	/*
				1548	* If buffer @bh is outside the write, just mark it uptodate
				1549	* if the page is uptodate and continue with the next buffer.
				1550	*/
				1551	if (block_end <= from \|\| block_start >= to) {
				1552	if (PageUptodate(page)) {
				1553	if (!buffer_uptodate(bh))
				1554	set_buffer_uptodate(bh);
				1555	}
				1556	continue;
				1557	}
				1558	/*
				1559	* @bh is at least partially being written to.
				1560	* Make sure it is not marked as new.
				1561	*/
				1562	//if (buffer_new(bh))
				1563	// clear_buffer_new(bh);
				1564
				1565	if (block >= ablock) {
				1566	// TODO: block is above allocated_size, need to
				1567	// allocate it. Best done in one go to accommodate not
				1568	// only block but all above blocks up to and including:
				1569	// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
				1570	// - 1) >> blobksize_bits. Obviously will need to round
				1571	// up to next cluster boundary, too. This should be
				1572	// done with a helper function, so it can be reused.
				1573	ntfs_error(vol->sb, "Writing beyond allocated size "
				1574	"is not supported yet. Sorry.");
				1575	err = -EOPNOTSUPP;
				1576	goto err_out;
				1577	// Need to update ablock.
				1578	// Need to set_buffer_new() on all block bhs that are
				1579	// newly allocated.
				1580	}
				1581	/*
				1582	* Now we have enough allocated size to fulfill the whole
				1583	* request, i.e. block < ablock is true.
				1584	*/
				1585	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1586	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1587	/*
				1588	* If this page is fully outside initialized size, zero
				1589	* out all pages between the current initialized size
				1590	* and the current page. Just use ntfs_readpage() to do
				1591	* the zeroing transparently.
				1592	*/
				1593	if (block > iblock) {
				1594	// TODO:
				1595	// For each page do:
				1596	// - read_cache_page()
				1597	// Again for each page do:
				1598	// - wait_on_page_locked()
				1599	// - Check (PageUptodate(page) &&
				1600	// !PageError(page))
				1601	// Update initialized size in the attribute and
				1602	// in the inode.
				1603	// Again, for each page do:
				1604	// __set_page_dirty_buffers();
				1605	// page_cache_release()
				1606	// We don't need to wait on the writes.
				1607	// Update iblock.
				1608	}
				1609	/*
				1610	* The current page straddles initialized size. Zero
				1611	* all non-uptodate buffers and set them uptodate (and
				1612	* dirty?). Note, there aren't any non-uptodate buffers
				1613	* if the page is uptodate.
				1614	* FIXME: For an uptodate page, the buffers may need to
				1615	* be written out because they were not initialized on
				1616	* disk before.
				1617	*/
				1618	if (!PageUptodate(page)) {
				1619	// TODO:
				1620	// Zero any non-uptodate buffers up to i_size.
				1621	// Set them uptodate and dirty.
				1622	}
				1623	// TODO:
				1624	// Update initialized size in the attribute and in the
				1625	// inode (up to i_size).
				1626	// Update iblock.
				1627	// FIXME: This is inefficient. Try to batch the two
				1628	// size changes to happen in one go.
				1629	ntfs_error(vol->sb, "Writing beyond initialized size "
				1630	"is not supported yet. Sorry.");
				1631	err = -EOPNOTSUPP;
				1632	goto err_out;
				1633	// Do NOT set_buffer_new() BUT DO clear buffer range
				1634	// outside write request range.
				1635	// set_buffer_uptodate() on complete buffers as well as
				1636	// set_buffer_dirty().
				1637	}
				1638
				1639	/* Need to map unmapped buffers. */
				1640	if (!buffer_mapped(bh)) {
				1641	/* Unmapped buffer. Need to map it. */
				1642	bh->b_bdev = vol->sb->s_bdev;
				1643
				1644	/* Convert block into corresponding vcn and offset. */
				1645	vcn = (VCN)block << blocksize_bits >>
				1646	vol->cluster_size_bits;
				1647	vcn_ofs = ((VCN)block << blocksize_bits) &
				1648	vol->cluster_size_mask;
				1649
				1650	is_retry = FALSE;
				1651	if (!rl) {
				1652	lock_retry_remap:
				1653	down_read(&ni->runlist.lock);
				1654	rl = ni->runlist.rl;
				1655	}
				1656	if (likely(rl != NULL)) {
				1657	/* Seek to element containing target vcn. */
				1658	while (rl->length && rl[1].vcn <= vcn)
				1659	rl++;
				1660	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1661	} else
				1662	lcn = LCN_RL_NOT_MAPPED;
				1663	if (unlikely(lcn < 0)) {
				1664	/*
				1665	* We extended the attribute allocation above.
				1666	* If we hit an ENOENT here it means that the
				1667	* allocation was insufficient which is a bug.
				1668	*/
				1669	BUG_ON(lcn == LCN_ENOENT);
				1670
				1671	/* It is a hole, need to instantiate it. */
				1672	if (lcn == LCN_HOLE) {
				1673	// TODO: Instantiate the hole.
				1674	// clear_buffer_new(bh);
				1675	// unmap_underlying_metadata(bh->b_bdev,
				1676	// bh->b_blocknr);
				1677	// For non-uptodate buffers, need to
				1678	// zero out the region outside the
				1679	// request in this bh or all bhs,
				1680	// depending on what we implemented
				1681	// above.
				1682	// Need to flush_dcache_page().
				1683	// Or could use set_buffer_new()
				1684	// instead?
				1685	ntfs_error(vol->sb, "Writing into "
				1686	"sparse regions is "
				1687	"not supported yet. "
				1688	"Sorry.");
				1689	err = -EOPNOTSUPP;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	1690	if (!rl)
				1691	up_read(&ni->runlist.lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1692	goto err_out;
				1693	} else if (!is_retry &&
				1694	lcn == LCN_RL_NOT_MAPPED) {
				1695	is_retry = TRUE;
				1696	/*
				1697	* Attempt to map runlist, dropping
				1698	* lock for the duration.
				1699	*/
				1700	up_read(&ni->runlist.lock);
				1701	err = ntfs_map_runlist(ni, vcn);
				1702	if (likely(!err))
				1703	goto lock_retry_remap;
				1704	rl = NULL;
				1705	lcn = err;
Anton Altaparmakov	9f993fe	2005-06-25 16:15:36 +0100	[diff] [blame]	1706	} else if (!rl)
				1707	up_read(&ni->runlist.lock);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1708	/*
				1709	* Failed to map the buffer, even after
				1710	* retrying.
				1711	*/
				1712	bh->b_blocknr = -1;
				1713	ntfs_error(vol->sb, "Failed to write to inode "
				1714	"0x%lx, attribute type 0x%x, "
				1715	"vcn 0x%llx, offset 0x%x "
				1716	"because its location on disk "
				1717	"could not be determined%s "
				1718	"(error code %lli).",
				1719	ni->mft_no, ni->type,
				1720	(unsigned long long)vcn,
				1721	vcn_ofs, is_retry ? " even "
				1722	"after retrying" : "",
				1723	(long long)lcn);
				1724	if (!err)
				1725	err = -EIO;
				1726	goto err_out;
				1727	}
				1728	/* We now have a successful remap, i.e. lcn >= 0. */
				1729
				1730	/* Setup buffer head to correct block. */
				1731	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				1732	+ vcn_ofs) >> blocksize_bits;
				1733	set_buffer_mapped(bh);
				1734
				1735	// FIXME: Something analogous to this is needed for
				1736	// each newly allocated block, i.e. BH_New.
				1737	// FIXME: Might need to take this out of the
				1738	// if (!buffer_mapped(bh)) {}, depending on how we
				1739	// implement things during the allocated_size and
				1740	// initialized_size extension code above.
				1741	if (buffer_new(bh)) {
				1742	clear_buffer_new(bh);
				1743	unmap_underlying_metadata(bh->b_bdev,
				1744	bh->b_blocknr);
				1745	if (PageUptodate(page)) {
				1746	set_buffer_uptodate(bh);
				1747	continue;
				1748	}
				1749	/*
				1750	* Page is _not_ uptodate, zero surrounding
				1751	* region. NOTE: This is how we decide if to
				1752	* zero or not!
				1753	*/
				1754	if (block_end > to \|\| block_start < from) {
				1755	void *kaddr;
				1756
				1757	kaddr = kmap_atomic(page, KM_USER0);
				1758	if (block_end > to)
				1759	memset(kaddr + to, 0,
				1760	block_end - to);
				1761	if (block_start < from)
				1762	memset(kaddr + block_start, 0,
				1763	from -
				1764	block_start);
				1765	flush_dcache_page(page);
				1766	kunmap_atomic(kaddr, KM_USER0);
				1767	}
				1768	continue;
				1769	}
				1770	}
				1771	/* @bh is mapped, set it uptodate if the page is uptodate. */
				1772	if (PageUptodate(page)) {
				1773	if (!buffer_uptodate(bh))
				1774	set_buffer_uptodate(bh);
				1775	continue;
				1776	}
				1777	/*
				1778	* The page is not uptodate. The buffer is mapped. If it is not
				1779	* uptodate, and it is only partially being written to, we need
				1780	* to read the buffer in before the write, i.e. right now.
				1781	*/
				1782	if (!buffer_uptodate(bh) &&
				1783	(block_start < from \|\| block_end > to)) {
				1784	ll_rw_block(READ, 1, &bh);
				1785	*wait_bh++ = bh;
				1786	}
				1787	} while (block++, block_start = block_end,
				1788	(bh = bh->b_this_page) != head);
				1789
				1790	/* Release the lock if we took it. */
				1791	if (rl) {
				1792	up_read(&ni->runlist.lock);
				1793	rl = NULL;
				1794	}
				1795
				1796	/* If we issued read requests, let them complete. */
				1797	while (wait_bh > wait) {
				1798	wait_on_buffer(*--wait_bh);
				1799	if (!buffer_uptodate(*wait_bh))
				1800	return -EIO;
				1801	}
				1802
				1803	ntfs_debug("Done.");
				1804	return 0;
				1805	err_out:
				1806	/*
				1807	* Zero out any newly allocated blocks to avoid exposing stale data.
				1808	* If BH_New is set, we know that the block was newly allocated in the
				1809	* above loop.
				1810	* FIXME: What about initialized_size increments? Have we done all the
				1811	* required zeroing above? If not this error handling is broken, and
				1812	* in particular the if (block_end <= from) check is completely bogus.
				1813	*/
				1814	bh = head;
				1815	block_start = 0;
				1816	is_retry = FALSE;
				1817	do {
				1818	block_end = block_start + blocksize;
				1819	if (block_end <= from)
				1820	continue;
				1821	if (block_start >= to)
				1822	break;
				1823	if (buffer_new(bh)) {
				1824	void *kaddr;
				1825
				1826	clear_buffer_new(bh);
				1827	kaddr = kmap_atomic(page, KM_USER0);
				1828	memset(kaddr + block_start, 0, bh->b_size);
				1829	kunmap_atomic(kaddr, KM_USER0);
				1830	set_buffer_uptodate(bh);
				1831	mark_buffer_dirty(bh);
				1832	is_retry = TRUE;
				1833	}
				1834	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				1835	if (is_retry)
				1836	flush_dcache_page(page);
				1837	if (rl)
				1838	up_read(&ni->runlist.lock);
				1839	return err;
				1840	}
				1841
				1842	/**
				1843	* ntfs_prepare_write - prepare a page for receiving data
				1844	*
				1845	* This is called from generic_file_write() with i_sem held on the inode
				1846	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				1847	* data has not yet been copied into the @page.
				1848	*
				1849	* Need to extend the attribute/fill in holes if necessary, create blocks and
				1850	* make partially overwritten blocks uptodate,
				1851	*
				1852	* i_size is not to be modified yet.
				1853	*
				1854	* Return 0 on success or -errno on error.
				1855	*
				1856	* Should be using block_prepare_write() [support for sparse files] or
				1857	* cont_prepare_write() [no support for sparse files]. Cannot do that due to
				1858	* ntfs specifics but can look at them for implementation guidance.
				1859	*
				1860	* Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
				1861	* the first byte in the page that will be written to and @to is the first byte
				1862	* after the last byte that will be written to.
				1863	*/
				1864	static int ntfs_prepare_write(struct file file, struct page page,
				1865	unsigned from, unsigned to)
				1866	{
				1867	s64 new_size;
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1868	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1869	struct inode *vi = page->mapping->host;
				1870	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1871	ntfs_volume *vol = ni->vol;
				1872	ntfs_attr_search_ctx *ctx = NULL;
				1873	MFT_RECORD *m = NULL;
				1874	ATTR_RECORD *a;
				1875	u8 *kaddr;
				1876	u32 attr_len;
				1877	int err;
				1878
				1879	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1880	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				1881	page->index, from, to);
				1882	BUG_ON(!PageLocked(page));
				1883	BUG_ON(from > PAGE_CACHE_SIZE);
				1884	BUG_ON(to > PAGE_CACHE_SIZE);
				1885	BUG_ON(from > to);
				1886	BUG_ON(NInoMstProtected(ni));
				1887	/*
				1888	* If a previous ntfs_truncate() failed, repeat it and abort if it
				1889	* fails again.
				1890	*/
				1891	if (unlikely(NInoTruncateFailed(ni))) {
				1892	down_write(&vi->i_alloc_sem);
				1893	err = ntfs_truncate(vi);
				1894	up_write(&vi->i_alloc_sem);
				1895	if (err \|\| NInoTruncateFailed(ni)) {
				1896	if (!err)
				1897	err = -EIO;
				1898	goto err_out;
				1899	}
				1900	}
				1901	/* If the attribute is not resident, deal with it elsewhere. */
				1902	if (NInoNonResident(ni)) {
				1903	/*
				1904	* Only unnamed $DATA attributes can be compressed, encrypted,
				1905	* and/or sparse.
				1906	*/
				1907	if (ni->type == AT_DATA && !ni->name_len) {
				1908	/* If file is encrypted, deny access, just like NT4. */
				1909	if (NInoEncrypted(ni)) {
				1910	ntfs_debug("Denying write access to encrypted "
				1911	"file.");
				1912	return -EACCES;
				1913	}
				1914	/* Compressed data streams are handled in compress.c. */
				1915	if (NInoCompressed(ni)) {
				1916	// TODO: Implement and replace this check with
				1917	// return ntfs_write_compressed_block(page);
				1918	ntfs_error(vi->i_sb, "Writing to compressed "
				1919	"files is not supported yet. "
				1920	"Sorry.");
				1921	return -EOPNOTSUPP;
				1922	}
				1923	// TODO: Implement and remove this check.
				1924	if (NInoSparse(ni)) {
				1925	ntfs_error(vi->i_sb, "Writing to sparse files "
				1926	"is not supported yet. Sorry.");
				1927	return -EOPNOTSUPP;
				1928	}
				1929	}
				1930	/* Normal data stream. */
				1931	return ntfs_prepare_nonresident_write(page, from, to);
				1932	}
				1933	/*
				1934	* Attribute is resident, implying it is not compressed, encrypted, or
				1935	* sparse.
				1936	*/
				1937	BUG_ON(page_has_buffers(page));
				1938	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				1939	/* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	1940	if (new_size <= i_size_read(vi))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1941	goto done;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1942	/* Map, pin, and lock the (base) mft record. */
				1943	if (!NInoAttr(ni))
				1944	base_ni = ni;
				1945	else
				1946	base_ni = ni->ext.base_ntfs_ino;
				1947	m = map_mft_record(base_ni);
				1948	if (IS_ERR(m)) {
				1949	err = PTR_ERR(m);
				1950	m = NULL;
				1951	ctx = NULL;
				1952	goto err_out;
				1953	}
				1954	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1955	if (unlikely(!ctx)) {
				1956	err = -ENOMEM;
				1957	goto err_out;
				1958	}
				1959	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1960	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1961	if (unlikely(err)) {
				1962	if (err == -ENOENT)
				1963	err = -EIO;
				1964	goto err_out;
				1965	}
				1966	m = ctx->mrec;
				1967	a = ctx->attr;
				1968	/* The total length of the attribute value. */
				1969	attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1970	/* Fix an eventual previous failure of ntfs_commit_write(). */
Anton Altaparmakov	f40661b	2005-01-13 16:03:38 +0000	[diff] [blame]	1971	i_size = i_size_read(vi);
				1972	if (unlikely(attr_len > i_size)) {
				1973	attr_len = i_size;
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1974	a->data.resident.value_length = cpu_to_le32(attr_len);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1975	}
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	1976	/* If we do not need to resize the attribute allocation we are done. */
				1977	if (new_size <= attr_len)
				1978	goto done_unm;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1979	/* Check if new size is allowed in $AttrDef. */
				1980	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
				1981	if (unlikely(err)) {
				1982	if (err == -ERANGE) {
				1983	ntfs_error(vol->sb, "Write would cause the inode "
				1984	"0x%lx to exceed the maximum size for "
				1985	"its attribute type (0x%x). Aborting "
				1986	"write.", vi->i_ino,
				1987	le32_to_cpu(ni->type));
				1988	} else {
				1989	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
				1990	"attribute type 0x%x. Aborting "
				1991	"write.", vi->i_ino,
				1992	le32_to_cpu(ni->type));
				1993	err = -EIO;
				1994	}
				1995	goto err_out2;
				1996	}
				1997	/*
				1998	* Extend the attribute record to be able to store the new attribute
				1999	* size.
				2000	*/
				2001	if (new_size >= vol->mft_record_size \|\| ntfs_attr_record_resize(m, a,
				2002	le16_to_cpu(a->data.resident.value_offset) +
				2003	new_size)) {
				2004	/* Not enough space in the mft record. */
				2005	ntfs_error(vol->sb, "Not enough space in the mft record for "
				2006	"the resized attribute value. This is not "
				2007	"supported yet. Aborting write.");
				2008	err = -EOPNOTSUPP;
				2009	goto err_out2;
				2010	}
				2011	/*
				2012	* We have enough space in the mft record to fit the write. This
				2013	* implies the attribute is smaller than the mft record and hence the
				2014	* attribute must be in a single page and hence page->index must be 0.
				2015	*/
				2016	BUG_ON(page->index);
				2017	/*
				2018	* If the beginning of the write is past the old size, enlarge the
				2019	* attribute value up to the beginning of the write and fill it with
				2020	* zeroes.
				2021	*/
				2022	if (from > attr_len) {
				2023	memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
				2024	attr_len, 0, from - attr_len);
				2025	a->data.resident.value_length = cpu_to_le32(from);
				2026	/* Zero the corresponding area in the page as well. */
				2027	if (PageUptodate(page)) {
				2028	kaddr = kmap_atomic(page, KM_USER0);
				2029	memset(kaddr + attr_len, 0, from - attr_len);
				2030	kunmap_atomic(kaddr, KM_USER0);
				2031	flush_dcache_page(page);
				2032	}
				2033	}
				2034	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2035	mark_mft_record_dirty(ctx->ntfs_ino);
Anton Altaparmakov	946929d	2005-01-13 15:26:29 +0000	[diff] [blame]	2036	done_unm:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2037	ntfs_attr_put_search_ctx(ctx);
				2038	unmap_mft_record(base_ni);
				2039	/*
				2040	* Because resident attributes are handled by memcpy() to/from the
				2041	* corresponding MFT record, and because this form of i/o is byte
				2042	* aligned rather than block aligned, there is no need to bring the
				2043	* page uptodate here as in the non-resident case where we need to
				2044	* bring the buffers straddled by the write uptodate before
				2045	* generic_file_write() does the copying from userspace.
				2046	*
				2047	* We thus defer the uptodate bringing of the page region outside the
				2048	* region written to to ntfs_commit_write(), which makes the code
				2049	* simpler and saves one atomic kmap which is good.
				2050	*/
				2051	done:
				2052	ntfs_debug("Done.");
				2053	return 0;
				2054	err_out:
				2055	if (err == -ENOMEM)
				2056	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2057	"prepare the write.");
				2058	else {
				2059	ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
				2060	"with error %i.", err);
				2061	NVolSetErrors(vol);
				2062	make_bad_inode(vi);
				2063	}
				2064	err_out2:
				2065	if (ctx)
				2066	ntfs_attr_put_search_ctx(ctx);
				2067	if (m)
				2068	unmap_mft_record(base_ni);
				2069	return err;
				2070	}
				2071
				2072	/**
				2073	* ntfs_commit_nonresident_write -
				2074	*
				2075	*/
				2076	static int ntfs_commit_nonresident_write(struct page *page,
				2077	unsigned from, unsigned to)
				2078	{
				2079	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				2080	struct inode *vi = page->mapping->host;
				2081	struct buffer_head bh, head;
				2082	unsigned int block_start, block_end, blocksize;
				2083	BOOL partial;
				2084
				2085	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2086	"0x%lx, from = %u, to = %u.", vi->i_ino,
				2087	NTFS_I(vi)->type, page->index, from, to);
				2088	blocksize = 1 << vi->i_blkbits;
				2089
				2090	// FIXME: We need a whole slew of special cases in here for compressed
				2091	// files for example...
				2092	// For now, we know ntfs_prepare_write() would have failed so we can't
				2093	// get here in any of the cases which we have to special case, so we
				2094	// are just a ripped off, unrolled generic_commit_write().
				2095
				2096	bh = head = page_buffers(page);
				2097	block_start = 0;
				2098	partial = FALSE;
				2099	do {
				2100	block_end = block_start + blocksize;
				2101	if (block_end <= from \|\| block_start >= to) {
				2102	if (!buffer_uptodate(bh))
				2103	partial = TRUE;
				2104	} else {
				2105	set_buffer_uptodate(bh);
				2106	mark_buffer_dirty(bh);
				2107	}
				2108	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				2109	/*
				2110	* If this is a partial write which happened to make all buffers
				2111	* uptodate then we can optimize away a bogus ->readpage() for the next
				2112	* read(). Here we 'discover' whether the page went uptodate as a
				2113	* result of this (potentially partial) write.
				2114	*/
				2115	if (!partial)
				2116	SetPageUptodate(page);
				2117	/*
				2118	* Not convinced about this at all. See disparity comment above. For
				2119	* now we know ntfs_prepare_write() would have failed in the write
				2120	* exceeds i_size case, so this will never trigger which is fine.
				2121	*/
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2122	if (pos > i_size_read(vi)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2123	ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
				2124	"not supported yet. Sorry.");
				2125	return -EOPNOTSUPP;
				2126	// vi->i_size = pos;
				2127	// mark_inode_dirty(vi);
				2128	}
				2129	ntfs_debug("Done.");
				2130	return 0;
				2131	}
				2132
				2133	/**
				2134	* ntfs_commit_write - commit the received data
				2135	*
				2136	* This is called from generic_file_write() with i_sem held on the inode
				2137	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				2138	* data has already been copied into the @page. ntfs_prepare_write() has been
				2139	* called before the data copied and it returned success so we can take the
				2140	* results of various BUG checks and some error handling for granted.
				2141	*
				2142	* Need to mark modified blocks dirty so they get written out later when
				2143	* ntfs_writepage() is invoked by the VM.
				2144	*
				2145	* Return 0 on success or -errno on error.
				2146	*
				2147	* Should be using generic_commit_write(). This marks buffers uptodate and
				2148	* dirty, sets the page uptodate if all buffers in the page are uptodate, and
				2149	* updates i_size if the end of io is beyond i_size. In that case, it also
				2150	* marks the inode dirty.
				2151	*
				2152	* Cannot use generic_commit_write() due to ntfs specialities but can look at
				2153	* it for implementation guidance.
				2154	*
				2155	* If things have gone as outlined in ntfs_prepare_write(), then we do not
				2156	* need to do any page content modifications here at all, except in the write
				2157	* to resident attribute case, where we need to do the uptodate bringing here
				2158	* which we combine with the copying into the mft record which means we save
				2159	* one atomic kmap.
				2160	*/
				2161	static int ntfs_commit_write(struct file file, struct page page,
				2162	unsigned from, unsigned to)
				2163	{
				2164	struct inode *vi = page->mapping->host;
				2165	ntfs_inode base_ni, ni = NTFS_I(vi);
				2166	char kaddr, kattr;
				2167	ntfs_attr_search_ctx *ctx;
				2168	MFT_RECORD *m;
				2169	ATTR_RECORD *a;
				2170	u32 attr_len;
				2171	int err;
				2172
				2173	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2174	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				2175	page->index, from, to);
				2176	/* If the attribute is not resident, deal with it elsewhere. */
				2177	if (NInoNonResident(ni)) {
				2178	/* Only unnamed $DATA attributes can be compressed/encrypted. */
				2179	if (ni->type == AT_DATA && !ni->name_len) {
				2180	/* Encrypted files need separate handling. */
				2181	if (NInoEncrypted(ni)) {
				2182	// We never get here at present!
				2183	BUG();
				2184	}
				2185	/* Compressed data streams are handled in compress.c. */
				2186	if (NInoCompressed(ni)) {
				2187	// TODO: Implement this!
				2188	// return ntfs_write_compressed_block(page);
				2189	// We never get here at present!
				2190	BUG();
				2191	}
				2192	}
				2193	/* Normal data stream. */
				2194	return ntfs_commit_nonresident_write(page, from, to);
				2195	}
				2196	/*
				2197	* Attribute is resident, implying it is not compressed, encrypted, or
				2198	* sparse.
				2199	*/
				2200	if (!NInoAttr(ni))
				2201	base_ni = ni;
				2202	else
				2203	base_ni = ni->ext.base_ntfs_ino;
				2204	/* Map, pin, and lock the mft record. */
				2205	m = map_mft_record(base_ni);
				2206	if (IS_ERR(m)) {
				2207	err = PTR_ERR(m);
				2208	m = NULL;
				2209	ctx = NULL;
				2210	goto err_out;
				2211	}
				2212	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				2213	if (unlikely(!ctx)) {
				2214	err = -ENOMEM;
				2215	goto err_out;
				2216	}
				2217	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				2218	CASE_SENSITIVE, 0, NULL, 0, ctx);
				2219	if (unlikely(err)) {
				2220	if (err == -ENOENT)
				2221	err = -EIO;
				2222	goto err_out;
				2223	}
				2224	a = ctx->attr;
				2225	/* The total length of the attribute value. */
				2226	attr_len = le32_to_cpu(a->data.resident.value_length);
				2227	BUG_ON(from > attr_len);
				2228	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
				2229	kaddr = kmap_atomic(page, KM_USER0);
				2230	/* Copy the received data from the page to the mft record. */
				2231	memcpy(kattr + from, kaddr + from, to - from);
				2232	/* Update the attribute length if necessary. */
				2233	if (to > attr_len) {
				2234	attr_len = to;
				2235	a->data.resident.value_length = cpu_to_le32(attr_len);
				2236	}
				2237	/*
				2238	* If the page is not uptodate, bring the out of bounds area(s)
				2239	* uptodate by copying data from the mft record to the page.
				2240	*/
				2241	if (!PageUptodate(page)) {
				2242	if (from > 0)
				2243	memcpy(kaddr, kattr, from);
				2244	if (to < attr_len)
				2245	memcpy(kaddr + to, kattr + to, attr_len - to);
				2246	/* Zero the region outside the end of the attribute value. */
				2247	if (attr_len < PAGE_CACHE_SIZE)
				2248	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				2249	/*
				2250	* The probability of not having done any of the above is
				2251	* extremely small, so we just flush unconditionally.
				2252	*/
				2253	flush_dcache_page(page);
				2254	SetPageUptodate(page);
				2255	}
				2256	kunmap_atomic(kaddr, KM_USER0);
				2257	/* Update i_size if necessary. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2258	if (i_size_read(vi) < attr_len) {
				2259	unsigned long flags;
				2260
				2261	write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2262	ni->allocated_size = ni->initialized_size = attr_len;
				2263	i_size_write(vi, attr_len);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame]	2264	write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2265	}
				2266	/* Mark the mft record dirty, so it gets written back. */
				2267	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2268	mark_mft_record_dirty(ctx->ntfs_ino);
				2269	ntfs_attr_put_search_ctx(ctx);
				2270	unmap_mft_record(base_ni);
				2271	ntfs_debug("Done.");
				2272	return 0;
				2273	err_out:
				2274	if (err == -ENOMEM) {
				2275	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2276	"commit the write.");
				2277	if (PageUptodate(page)) {
				2278	ntfs_warning(vi->i_sb, "Page is uptodate, setting "
				2279	"dirty so the write will be retried "
				2280	"later on by the VM.");
				2281	/*
				2282	* Put the page on mapping->dirty_pages, but leave its
				2283	* buffers' dirty state as-is.
				2284	*/
				2285	__set_page_dirty_nobuffers(page);
				2286	err = 0;
				2287	} else
				2288	ntfs_error(vi->i_sb, "Page is not uptodate. Written "
				2289	"data has been lost.");
				2290	} else {
				2291	ntfs_error(vi->i_sb, "Resident attribute commit write failed "
				2292	"with error %i.", err);
				2293	NVolSetErrors(ni->vol);
				2294	make_bad_inode(vi);
				2295	}
				2296	if (ctx)
				2297	ntfs_attr_put_search_ctx(ctx);
				2298	if (m)
				2299	unmap_mft_record(base_ni);
				2300	return err;
				2301	}
				2302
				2303	#endif /* NTFS_RW */
				2304
				2305	/**
				2306	* ntfs_aops - general address space operations for inodes and attributes
				2307	*/
				2308	struct address_space_operations ntfs_aops = {
				2309	.readpage = ntfs_readpage, /* Fill page with data. */
				2310	.sync_page = block_sync_page, /* Currently, just unplugs the
				2311	disk request queue. */
				2312	#ifdef NTFS_RW
				2313	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2314	.prepare_write = ntfs_prepare_write, /* Prepare page and buffers
				2315	ready to receive data. */
				2316	.commit_write = ntfs_commit_write, /* Commit received data. */
				2317	#endif /* NTFS_RW */
				2318	};
				2319
				2320	/**
				2321	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				2322	* and attributes
				2323	*/
				2324	struct address_space_operations ntfs_mst_aops = {
				2325	.readpage = ntfs_readpage, /* Fill page with data. */
				2326	.sync_page = block_sync_page, /* Currently, just unplugs the
				2327	disk request queue. */
				2328	#ifdef NTFS_RW
				2329	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2330	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				2331	without touching the buffers
				2332	belonging to the page. */
				2333	#endif /* NTFS_RW */
				2334	};
				2335
				2336	#ifdef NTFS_RW
				2337
				2338	/**
				2339	* mark_ntfs_record_dirty - mark an ntfs record dirty
				2340	* @page: page containing the ntfs record to mark dirty
				2341	* @ofs: byte offset within @page at which the ntfs record begins
				2342	*
				2343	* Set the buffers and the page in which the ntfs record is located dirty.
				2344	*
				2345	* The latter also marks the vfs inode the ntfs record belongs to dirty
				2346	* (I_DIRTY_PAGES only).
				2347	*
				2348	* If the page does not have buffers, we create them and set them uptodate.
				2349	* The page may not be locked which is why we need to handle the buffers under
				2350	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				2351	* need the lock since try_to_free_buffers() does not free dirty buffers.
				2352	*/
				2353	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				2354	struct address_space *mapping = page->mapping;
				2355	ntfs_inode *ni = NTFS_I(mapping->host);
				2356	struct buffer_head bh, head, *buffers_to_free = NULL;
				2357	unsigned int end, bh_size, bh_ofs;
				2358
				2359	BUG_ON(!PageUptodate(page));
				2360	end = ofs + ni->itype.index.block_size;
				2361	bh_size = 1 << VFS_I(ni)->i_blkbits;
				2362	spin_lock(&mapping->private_lock);
				2363	if (unlikely(!page_has_buffers(page))) {
				2364	spin_unlock(&mapping->private_lock);
				2365	bh = head = alloc_page_buffers(page, bh_size, 1);
				2366	spin_lock(&mapping->private_lock);
				2367	if (likely(!page_has_buffers(page))) {
				2368	struct buffer_head *tail;
				2369
				2370	do {
				2371	set_buffer_uptodate(bh);
				2372	tail = bh;
				2373	bh = bh->b_this_page;
				2374	} while (bh);
				2375	tail->b_this_page = head;
				2376	attach_page_buffers(page, head);
				2377	} else
				2378	buffers_to_free = bh;
				2379	}
				2380	bh = head = page_buffers(page);
				2381	do {
				2382	bh_ofs = bh_offset(bh);
				2383	if (bh_ofs + bh_size <= ofs)
				2384	continue;
				2385	if (unlikely(bh_ofs >= end))
				2386	break;
				2387	set_buffer_dirty(bh);
				2388	} while ((bh = bh->b_this_page) != head);
				2389	spin_unlock(&mapping->private_lock);
				2390	__set_page_dirty_nobuffers(page);
				2391	if (unlikely(buffers_to_free)) {
				2392	do {
				2393	bh = buffers_to_free->b_this_page;
				2394	free_buffer_head(buffers_to_free);
				2395	buffers_to_free = bh;
				2396	} while (buffers_to_free);
				2397	}
				2398	}
				2399
				2400	#endif /* NTFS_RW */