Blame - fs/ntfs/aops.c - kernel/msm-4.9

blob: 45d56e41ed987f621893d38b03cceca7dbb76752 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
				5	* Copyright (c) 2001-2004 Anton Altaparmakov
				6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/mm.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/swap.h>
				28	#include <linux/buffer_head.h>
				29	#include <linux/writeback.h>
				30
				31	#include "aops.h"
				32	#include "attrib.h"
				33	#include "debug.h"
				34	#include "inode.h"
				35	#include "mft.h"
				36	#include "runlist.h"
				37	#include "types.h"
				38	#include "ntfs.h"
				39
				40	/**
				41	* ntfs_end_buffer_async_read - async io completion for reading attributes
				42	* @bh: buffer head on which io is completed
				43	* @uptodate: whether @bh is now uptodate or not
				44	*
				45	* Asynchronous I/O completion handler for reading pages belonging to the
				46	* attribute address space of an inode. The inodes can either be files or
				47	* directories or they can be fake inodes describing some attribute.
				48	*
				49	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				50	* page has been completed and mark the page uptodate or set the error bit on
				51	* the page. To determine the size of the records that need fixing up, we
				52	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				53	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				54	* record size.
				55	*/
				56	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				57	{
				58	static DEFINE_SPINLOCK(page_uptodate_lock);
				59	unsigned long flags;
				60	struct buffer_head *tmp;
				61	struct page *page;
				62	ntfs_inode *ni;
				63	int page_uptodate = 1;
				64
				65	page = bh->b_page;
				66	ni = NTFS_I(page->mapping->host);
				67
				68	if (likely(uptodate)) {
				69	s64 file_ofs;
				70
				71	set_buffer_uptodate(bh);
				72
				73	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				74	bh_offset(bh);
				75	/* Check for the current buffer head overflowing. */
				76	if (file_ofs + bh->b_size > ni->initialized_size) {
				77	char *addr;
				78	int ofs = 0;
				79
				80	if (file_ofs < ni->initialized_size)
				81	ofs = ni->initialized_size - file_ofs;
				82	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				83	memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
				84	flush_dcache_page(page);
				85	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				86	}
				87	} else {
				88	clear_buffer_uptodate(bh);
				89	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
				90	(unsigned long long)bh->b_blocknr);
				91	SetPageError(page);
				92	}
				93	spin_lock_irqsave(&page_uptodate_lock, flags);
				94	clear_buffer_async_read(bh);
				95	unlock_buffer(bh);
				96	tmp = bh;
				97	do {
				98	if (!buffer_uptodate(tmp))
				99	page_uptodate = 0;
				100	if (buffer_async_read(tmp)) {
				101	if (likely(buffer_locked(tmp)))
				102	goto still_busy;
				103	/* Async buffers must be locked. */
				104	BUG();
				105	}
				106	tmp = tmp->b_this_page;
				107	} while (tmp != bh);
				108	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				109	/*
				110	* If none of the buffers had errors then we can set the page uptodate,
				111	* but we first have to perform the post read mst fixups, if the
				112	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				113	* Note we ignore fixup errors as those are detected when
				114	* map_mft_record() is called which gives us per record granularity
				115	* rather than per page granularity.
				116	*/
				117	if (!NInoMstProtected(ni)) {
				118	if (likely(page_uptodate && !PageError(page)))
				119	SetPageUptodate(page);
				120	} else {
				121	char *addr;
				122	unsigned int i, recs;
				123	u32 rec_size;
				124
				125	rec_size = ni->itype.index.block_size;
				126	recs = PAGE_CACHE_SIZE / rec_size;
				127	/* Should have been verified before we got here... */
				128	BUG_ON(!recs);
				129	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				130	for (i = 0; i < recs; i++)
				131	post_read_mst_fixup((NTFS_RECORD*)(addr +
				132	i * rec_size), rec_size);
				133	flush_dcache_page(page);
				134	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				135	if (likely(!PageError(page) && page_uptodate))
				136	SetPageUptodate(page);
				137	}
				138	unlock_page(page);
				139	return;
				140	still_busy:
				141	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				142	return;
				143	}
				144
				145	/**
				146	* ntfs_read_block - fill a @page of an address space with data
				147	* @page: page cache page to fill with data
				148	*
				149	* Fill the page @page of the address space belonging to the @page->host inode.
				150	* We read each buffer asynchronously and when all buffers are read in, our io
				151	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				152	* applies the mst fixups to the page before finally marking it uptodate and
				153	* unlocking it.
				154	*
				155	* We only enforce allocated_size limit because i_size is checked for in
				156	* generic_file_read().
				157	*
				158	* Return 0 on success and -errno on error.
				159	*
				160	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				161	*/
				162	static int ntfs_read_block(struct page *page)
				163	{
				164	VCN vcn;
				165	LCN lcn;
				166	ntfs_inode *ni;
				167	ntfs_volume *vol;
				168	runlist_element *rl;
				169	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				170	sector_t iblock, lblock, zblock;
				171	unsigned int blocksize, vcn_ofs;
				172	int i, nr;
				173	unsigned char blocksize_bits;
				174
				175	ni = NTFS_I(page->mapping->host);
				176	vol = ni->vol;
				177
				178	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				179	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				180
				181	blocksize_bits = VFS_I(ni)->i_blkbits;
				182	blocksize = 1 << blocksize_bits;
				183
				184	if (!page_has_buffers(page))
				185	create_empty_buffers(page, blocksize, 0);
				186	bh = head = page_buffers(page);
				187	if (unlikely(!bh)) {
				188	unlock_page(page);
				189	return -ENOMEM;
				190	}
				191
				192	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				193	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				194	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
				195
				196	/* Loop through all the buffers in the page. */
				197	rl = NULL;
				198	nr = i = 0;
				199	do {
				200	u8 *kaddr;
				201
				202	if (unlikely(buffer_uptodate(bh)))
				203	continue;
				204	if (unlikely(buffer_mapped(bh))) {
				205	arr[nr++] = bh;
				206	continue;
				207	}
				208	bh->b_bdev = vol->sb->s_bdev;
				209	/* Is the block within the allowed limits? */
				210	if (iblock < lblock) {
				211	BOOL is_retry = FALSE;
				212
				213	/* Convert iblock into corresponding vcn and offset. */
				214	vcn = (VCN)iblock << blocksize_bits >>
				215	vol->cluster_size_bits;
				216	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				217	vol->cluster_size_mask;
				218	if (!rl) {
				219	lock_retry_remap:
				220	down_read(&ni->runlist.lock);
				221	rl = ni->runlist.rl;
				222	}
				223	if (likely(rl != NULL)) {
				224	/* Seek to element containing target vcn. */
				225	while (rl->length && rl[1].vcn <= vcn)
				226	rl++;
				227	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				228	} else
				229	lcn = LCN_RL_NOT_MAPPED;
				230	/* Successful remap. */
				231	if (lcn >= 0) {
				232	/* Setup buffer head to correct block. */
				233	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				234	+ vcn_ofs) >> blocksize_bits;
				235	set_buffer_mapped(bh);
				236	/* Only read initialized data blocks. */
				237	if (iblock < zblock) {
				238	arr[nr++] = bh;
				239	continue;
				240	}
				241	/* Fully non-initialized data block, zero it. */
				242	goto handle_zblock;
				243	}
				244	/* It is a hole, need to zero it. */
				245	if (lcn == LCN_HOLE)
				246	goto handle_hole;
				247	/* If first try and runlist unmapped, map and retry. */
				248	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				249	int err;
				250	is_retry = TRUE;
				251	/*
				252	* Attempt to map runlist, dropping lock for
				253	* the duration.
				254	*/
				255	up_read(&ni->runlist.lock);
				256	err = ntfs_map_runlist(ni, vcn);
				257	if (likely(!err))
				258	goto lock_retry_remap;
				259	rl = NULL;
				260	lcn = err;
				261	}
				262	/* Hard error, zero out region. */
				263	bh->b_blocknr = -1;
				264	SetPageError(page);
				265	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				266	"attribute type 0x%x, vcn 0x%llx, "
				267	"offset 0x%x because its location on "
				268	"disk could not be determined%s "
				269	"(error code %lli).", ni->mft_no,
				270	ni->type, (unsigned long long)vcn,
				271	vcn_ofs, is_retry ? " even after "
				272	"retrying" : "", (long long)lcn);
				273	}
				274	/*
				275	* Either iblock was outside lblock limits or
				276	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				277	* of the page and set the buffer uptodate.
				278	*/
				279	handle_hole:
				280	bh->b_blocknr = -1UL;
				281	clear_buffer_mapped(bh);
				282	handle_zblock:
				283	kaddr = kmap_atomic(page, KM_USER0);
				284	memset(kaddr + i * blocksize, 0, blocksize);
				285	flush_dcache_page(page);
				286	kunmap_atomic(kaddr, KM_USER0);
				287	set_buffer_uptodate(bh);
				288	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				289
				290	/* Release the lock if we took it. */
				291	if (rl)
				292	up_read(&ni->runlist.lock);
				293
				294	/* Check we have at least one buffer ready for i/o. */
				295	if (nr) {
				296	struct buffer_head *tbh;
				297
				298	/* Lock the buffers. */
				299	for (i = 0; i < nr; i++) {
				300	tbh = arr[i];
				301	lock_buffer(tbh);
				302	tbh->b_end_io = ntfs_end_buffer_async_read;
				303	set_buffer_async_read(tbh);
				304	}
				305	/* Finally, start i/o on the buffers. */
				306	for (i = 0; i < nr; i++) {
				307	tbh = arr[i];
				308	if (likely(!buffer_uptodate(tbh)))
				309	submit_bh(READ, tbh);
				310	else
				311	ntfs_end_buffer_async_read(tbh, 1);
				312	}
				313	return 0;
				314	}
				315	/* No i/o was scheduled on any of the buffers. */
				316	if (likely(!PageError(page)))
				317	SetPageUptodate(page);
				318	else /* Signal synchronous i/o error. */
				319	nr = -EIO;
				320	unlock_page(page);
				321	return nr;
				322	}
				323
				324	/**
				325	* ntfs_readpage - fill a @page of a @file with data from the device
				326	* @file: open file to which the page @page belongs or NULL
				327	* @page: page cache page to fill with data
				328	*
				329	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				330	* file @file by calling the ntfs version of the generic block_read_full_page()
				331	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				332	* associated with the page asynchronously.
				333	*
				334	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				335	* data from the mft record (which at this stage is most likely in memory) and
				336	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				337	* even if the mft record is not cached at this point in time, we need to wait
				338	* for it to be read in before we can do the copy.
				339	*
				340	* Return 0 on success and -errno on error.
				341	*/
				342	static int ntfs_readpage(struct file file, struct page page)
				343	{
				344	loff_t i_size;
				345	ntfs_inode ni, base_ni;
				346	u8 *kaddr;
				347	ntfs_attr_search_ctx *ctx;
				348	MFT_RECORD *mrec;
				349	u32 attr_len;
				350	int err = 0;
				351
				352	BUG_ON(!PageLocked(page));
				353	/*
				354	* This can potentially happen because we clear PageUptodate() during
				355	* ntfs_writepage() of MstProtected() attributes.
				356	*/
				357	if (PageUptodate(page)) {
				358	unlock_page(page);
				359	return 0;
				360	}
				361	ni = NTFS_I(page->mapping->host);
				362
				363	/* NInoNonResident() == NInoIndexAllocPresent() */
				364	if (NInoNonResident(ni)) {
				365	/*
				366	* Only unnamed $DATA attributes can be compressed or
				367	* encrypted.
				368	*/
				369	if (ni->type == AT_DATA && !ni->name_len) {
				370	/* If file is encrypted, deny access, just like NT4. */
				371	if (NInoEncrypted(ni)) {
				372	err = -EACCES;
				373	goto err_out;
				374	}
				375	/* Compressed data streams are handled in compress.c. */
				376	if (NInoCompressed(ni))
				377	return ntfs_read_compressed_block(page);
				378	}
				379	/* Normal data stream. */
				380	return ntfs_read_block(page);
				381	}
				382	/*
				383	* Attribute is resident, implying it is not compressed or encrypted.
				384	* This also means the attribute is smaller than an mft record and
				385	* hence smaller than a page, so can simply zero out any pages with
				386	* index above 0. We can also do this if the file size is 0.
				387	*/
				388	if (unlikely(page->index > 0 \|\| !i_size_read(VFS_I(ni)))) {
				389	kaddr = kmap_atomic(page, KM_USER0);
				390	memset(kaddr, 0, PAGE_CACHE_SIZE);
				391	flush_dcache_page(page);
				392	kunmap_atomic(kaddr, KM_USER0);
				393	goto done;
				394	}
				395	if (!NInoAttr(ni))
				396	base_ni = ni;
				397	else
				398	base_ni = ni->ext.base_ntfs_ino;
				399	/* Map, pin, and lock the mft record. */
				400	mrec = map_mft_record(base_ni);
				401	if (IS_ERR(mrec)) {
				402	err = PTR_ERR(mrec);
				403	goto err_out;
				404	}
				405	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				406	if (unlikely(!ctx)) {
				407	err = -ENOMEM;
				408	goto unm_err_out;
				409	}
				410	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				411	CASE_SENSITIVE, 0, NULL, 0, ctx);
				412	if (unlikely(err))
				413	goto put_unm_err_out;
				414	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
				415	i_size = i_size_read(VFS_I(ni));
				416	if (unlikely(attr_len > i_size))
				417	attr_len = i_size;
				418	kaddr = kmap_atomic(page, KM_USER0);
				419	/* Copy the data to the page. */
				420	memcpy(kaddr, (u8*)ctx->attr +
				421	le16_to_cpu(ctx->attr->data.resident.value_offset),
				422	attr_len);
				423	/* Zero the remainder of the page. */
				424	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				425	flush_dcache_page(page);
				426	kunmap_atomic(kaddr, KM_USER0);
				427	put_unm_err_out:
				428	ntfs_attr_put_search_ctx(ctx);
				429	unm_err_out:
				430	unmap_mft_record(base_ni);
				431	done:
				432	SetPageUptodate(page);
				433	err_out:
				434	unlock_page(page);
				435	return err;
				436	}
				437
				438	#ifdef NTFS_RW
				439
				440	/**
				441	* ntfs_write_block - write a @page to the backing store
				442	* @page: page cache page to write out
				443	* @wbc: writeback control structure
				444	*
				445	* This function is for writing pages belonging to non-resident, non-mst
				446	* protected attributes to their backing store.
				447	*
				448	* For a page with buffers, map and write the dirty buffers asynchronously
				449	* under page writeback. For a page without buffers, create buffers for the
				450	* page, then proceed as above.
				451	*
				452	* If a page doesn't have buffers the page dirty state is definitive. If a page
				453	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				454	* state is definitive. (A hint which has rules: dirty buffers against a clean
				455	* page is illegal. Other combinations are legal and need to be handled. In
				456	* particular a dirty page containing clean buffers for example.)
				457	*
				458	* Return 0 on success and -errno on error.
				459	*
				460	* Based on ntfs_read_block() and __block_write_full_page().
				461	*/
				462	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				463	{
				464	VCN vcn;
				465	LCN lcn;
				466	sector_t block, dblock, iblock;
				467	struct inode *vi;
				468	ntfs_inode *ni;
				469	ntfs_volume *vol;
				470	runlist_element *rl;
				471	struct buffer_head bh, head;
				472	unsigned int blocksize, vcn_ofs;
				473	int err;
				474	BOOL need_end_writeback;
				475	unsigned char blocksize_bits;
				476
				477	vi = page->mapping->host;
				478	ni = NTFS_I(vi);
				479	vol = ni->vol;
				480
				481	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				482	"0x%lx.", ni->mft_no, ni->type, page->index);
				483
				484	BUG_ON(!NInoNonResident(ni));
				485	BUG_ON(NInoMstProtected(ni));
				486
				487	blocksize_bits = vi->i_blkbits;
				488	blocksize = 1 << blocksize_bits;
				489
				490	if (!page_has_buffers(page)) {
				491	BUG_ON(!PageUptodate(page));
				492	create_empty_buffers(page, blocksize,
				493	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				494	}
				495	bh = head = page_buffers(page);
				496	if (unlikely(!bh)) {
				497	ntfs_warning(vol->sb, "Error allocating page buffers. "
				498	"Redirtying page so we try again later.");
				499	/*
				500	* Put the page back on mapping->dirty_pages, but leave its
				501	* buffer's dirty state as-is.
				502	*/
				503	redirty_page_for_writepage(wbc, page);
				504	unlock_page(page);
				505	return 0;
				506	}
				507
				508	/* NOTE: Different naming scheme to ntfs_read_block()! */
				509
				510	/* The first block in the page. */
				511	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				512
				513	/* The first out of bounds block for the data size. */
				514	dblock = (vi->i_size + blocksize - 1) >> blocksize_bits;
				515
				516	/* The last (fully or partially) initialized block. */
				517	iblock = ni->initialized_size >> blocksize_bits;
				518
				519	/*
				520	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				521	* here, and the (potentially unmapped) buffers may become dirty at
				522	* any time. If a buffer becomes dirty here after we've inspected it
				523	* then we just miss that fact, and the page stays dirty.
				524	*
				525	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				526	* handle that here by just cleaning them.
				527	*/
				528
				529	/*
				530	* Loop through all the buffers in the page, mapping all the dirty
				531	* buffers to disk addresses and handling any aliases from the
				532	* underlying block device's mapping.
				533	*/
				534	rl = NULL;
				535	err = 0;
				536	do {
				537	BOOL is_retry = FALSE;
				538
				539	if (unlikely(block >= dblock)) {
				540	/*
				541	* Mapped buffers outside i_size will occur, because
				542	* this page can be outside i_size when there is a
				543	* truncate in progress. The contents of such buffers
				544	* were zeroed by ntfs_writepage().
				545	*
				546	* FIXME: What about the small race window where
				547	* ntfs_writepage() has not done any clearing because
				548	* the page was within i_size but before we get here,
				549	* vmtruncate() modifies i_size?
				550	*/
				551	clear_buffer_dirty(bh);
				552	set_buffer_uptodate(bh);
				553	continue;
				554	}
				555
				556	/* Clean buffers are not written out, so no need to map them. */
				557	if (!buffer_dirty(bh))
				558	continue;
				559
				560	/* Make sure we have enough initialized size. */
				561	if (unlikely((block >= iblock) &&
				562	(ni->initialized_size < vi->i_size))) {
				563	/*
				564	* If this page is fully outside initialized size, zero
				565	* out all pages between the current initialized size
				566	* and the current page. Just use ntfs_readpage() to do
				567	* the zeroing transparently.
				568	*/
				569	if (block > iblock) {
				570	// TODO:
				571	// For each page do:
				572	// - read_cache_page()
				573	// Again for each page do:
				574	// - wait_on_page_locked()
				575	// - Check (PageUptodate(page) &&
				576	// !PageError(page))
				577	// Update initialized size in the attribute and
				578	// in the inode.
				579	// Again, for each page do:
				580	// __set_page_dirty_buffers();
				581	// page_cache_release()
				582	// We don't need to wait on the writes.
				583	// Update iblock.
				584	}
				585	/*
				586	* The current page straddles initialized size. Zero
				587	* all non-uptodate buffers and set them uptodate (and
				588	* dirty?). Note, there aren't any non-uptodate buffers
				589	* if the page is uptodate.
				590	* FIXME: For an uptodate page, the buffers may need to
				591	* be written out because they were not initialized on
				592	* disk before.
				593	*/
				594	if (!PageUptodate(page)) {
				595	// TODO:
				596	// Zero any non-uptodate buffers up to i_size.
				597	// Set them uptodate and dirty.
				598	}
				599	// TODO:
				600	// Update initialized size in the attribute and in the
				601	// inode (up to i_size).
				602	// Update iblock.
				603	// FIXME: This is inefficient. Try to batch the two
				604	// size changes to happen in one go.
				605	ntfs_error(vol->sb, "Writing beyond initialized size "
				606	"is not supported yet. Sorry.");
				607	err = -EOPNOTSUPP;
				608	break;
				609	// Do NOT set_buffer_new() BUT DO clear buffer range
				610	// outside write request range.
				611	// set_buffer_uptodate() on complete buffers as well as
				612	// set_buffer_dirty().
				613	}
				614
				615	/* No need to map buffers that are already mapped. */
				616	if (buffer_mapped(bh))
				617	continue;
				618
				619	/* Unmapped, dirty buffer. Need to map it. */
				620	bh->b_bdev = vol->sb->s_bdev;
				621
				622	/* Convert block into corresponding vcn and offset. */
				623	vcn = (VCN)block << blocksize_bits;
				624	vcn_ofs = vcn & vol->cluster_size_mask;
				625	vcn >>= vol->cluster_size_bits;
				626	if (!rl) {
				627	lock_retry_remap:
				628	down_read(&ni->runlist.lock);
				629	rl = ni->runlist.rl;
				630	}
				631	if (likely(rl != NULL)) {
				632	/* Seek to element containing target vcn. */
				633	while (rl->length && rl[1].vcn <= vcn)
				634	rl++;
				635	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				636	} else
				637	lcn = LCN_RL_NOT_MAPPED;
				638	/* Successful remap. */
				639	if (lcn >= 0) {
				640	/* Setup buffer head to point to correct block. */
				641	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				642	vcn_ofs) >> blocksize_bits;
				643	set_buffer_mapped(bh);
				644	continue;
				645	}
				646	/* It is a hole, need to instantiate it. */
				647	if (lcn == LCN_HOLE) {
				648	// TODO: Instantiate the hole.
				649	// clear_buffer_new(bh);
				650	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				651	ntfs_error(vol->sb, "Writing into sparse regions is "
				652	"not supported yet. Sorry.");
				653	err = -EOPNOTSUPP;
				654	break;
				655	}
				656	/* If first try and runlist unmapped, map and retry. */
				657	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				658	is_retry = TRUE;
				659	/*
				660	* Attempt to map runlist, dropping lock for
				661	* the duration.
				662	*/
				663	up_read(&ni->runlist.lock);
				664	err = ntfs_map_runlist(ni, vcn);
				665	if (likely(!err))
				666	goto lock_retry_remap;
				667	rl = NULL;
				668	lcn = err;
				669	}
				670	/* Failed to map the buffer, even after retrying. */
				671	bh->b_blocknr = -1;
				672	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				673	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				674	"because its location on disk could not be "
				675	"determined%s (error code %lli).", ni->mft_no,
				676	ni->type, (unsigned long long)vcn,
				677	vcn_ofs, is_retry ? " even after "
				678	"retrying" : "", (long long)lcn);
				679	if (!err)
				680	err = -EIO;
				681	break;
				682	} while (block++, (bh = bh->b_this_page) != head);
				683
				684	/* Release the lock if we took it. */
				685	if (rl)
				686	up_read(&ni->runlist.lock);
				687
				688	/* For the error case, need to reset bh to the beginning. */
				689	bh = head;
				690
				691	/* Just an optimization, so ->readpage() isn't called later. */
				692	if (unlikely(!PageUptodate(page))) {
				693	int uptodate = 1;
				694	do {
				695	if (!buffer_uptodate(bh)) {
				696	uptodate = 0;
				697	bh = head;
				698	break;
				699	}
				700	} while ((bh = bh->b_this_page) != head);
				701	if (uptodate)
				702	SetPageUptodate(page);
				703	}
				704
				705	/* Setup all mapped, dirty buffers for async write i/o. */
				706	do {
				707	get_bh(bh);
				708	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				709	lock_buffer(bh);
				710	if (test_clear_buffer_dirty(bh)) {
				711	BUG_ON(!buffer_uptodate(bh));
				712	mark_buffer_async_write(bh);
				713	} else
				714	unlock_buffer(bh);
				715	} else if (unlikely(err)) {
				716	/*
				717	* For the error case. The buffer may have been set
				718	* dirty during attachment to a dirty page.
				719	*/
				720	if (err != -ENOMEM)
				721	clear_buffer_dirty(bh);
				722	}
				723	} while ((bh = bh->b_this_page) != head);
				724
				725	if (unlikely(err)) {
				726	// TODO: Remove the -EOPNOTSUPP check later on...
				727	if (unlikely(err == -EOPNOTSUPP))
				728	err = 0;
				729	else if (err == -ENOMEM) {
				730	ntfs_warning(vol->sb, "Error allocating memory. "
				731	"Redirtying page so we try again "
				732	"later.");
				733	/*
				734	* Put the page back on mapping->dirty_pages, but
				735	* leave its buffer's dirty state as-is.
				736	*/
				737	redirty_page_for_writepage(wbc, page);
				738	err = 0;
				739	} else
				740	SetPageError(page);
				741	}
				742
				743	BUG_ON(PageWriteback(page));
				744	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				745	unlock_page(page);
				746
				747	/*
				748	* Submit the prepared buffers for i/o. Note the page is unlocked,
				749	* and the async write i/o completion handler can end_page_writeback()
				750	* at any time after the first submit_bh(). So the buffers can then
				751	* disappear...
				752	*/
				753	need_end_writeback = TRUE;
				754	do {
				755	struct buffer_head *next = bh->b_this_page;
				756	if (buffer_async_write(bh)) {
				757	submit_bh(WRITE, bh);
				758	need_end_writeback = FALSE;
				759	}
				760	put_bh(bh);
				761	bh = next;
				762	} while (bh != head);
				763
				764	/* If no i/o was started, need to end_page_writeback(). */
				765	if (unlikely(need_end_writeback))
				766	end_page_writeback(page);
				767
				768	ntfs_debug("Done.");
				769	return err;
				770	}
				771
				772	/**
				773	* ntfs_write_mst_block - write a @page to the backing store
				774	* @page: page cache page to write out
				775	* @wbc: writeback control structure
				776	*
				777	* This function is for writing pages belonging to non-resident, mst protected
				778	* attributes to their backing store. The only supported attributes are index
				779	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				780	* supported for the index allocation case.
				781	*
				782	* The page must remain locked for the duration of the write because we apply
				783	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				784	* page before undoing the fixups, any other user of the page will see the
				785	* page contents as corrupt.
				786	*
				787	* We clear the page uptodate flag for the duration of the function to ensure
				788	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				789	* are about to apply the mst fixups to.
				790	*
				791	* Return 0 on success and -errno on error.
				792	*
				793	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				794	* write_mft_record_nolock().
				795	*/
				796	static int ntfs_write_mst_block(struct page *page,
				797	struct writeback_control *wbc)
				798	{
				799	sector_t block, dblock, rec_block;
				800	struct inode *vi = page->mapping->host;
				801	ntfs_inode *ni = NTFS_I(vi);
				802	ntfs_volume *vol = ni->vol;
				803	u8 *kaddr;
				804	unsigned char bh_size_bits = vi->i_blkbits;
				805	unsigned int bh_size = 1 << bh_size_bits;
				806	unsigned int rec_size = ni->itype.index.block_size;
				807	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				808	struct buffer_head bh, head, tbh, rec_start_bh;
				809	int max_bhs = PAGE_CACHE_SIZE / bh_size;
				810	struct buffer_head *bhs[max_bhs];
				811	runlist_element *rl;
				812	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
				813	unsigned rec_size_bits;
				814	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
				815
				816	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				817	"0x%lx.", vi->i_ino, ni->type, page->index);
				818	BUG_ON(!NInoNonResident(ni));
				819	BUG_ON(!NInoMstProtected(ni));
				820	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				821	/*
				822	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				823	* in its page cache were to be marked dirty. However this should
				824	* never happen with the current driver and considering we do not
				825	* handle this case here we do want to BUG(), at least for now.
				826	*/
				827	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				828	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
				829	BUG_ON(!max_bhs);
				830
				831	/* Were we called for sync purposes? */
				832	sync = (wbc->sync_mode == WB_SYNC_ALL);
				833
				834	/* Make sure we have mapped buffers. */
				835	BUG_ON(!page_has_buffers(page));
				836	bh = head = page_buffers(page);
				837	BUG_ON(!bh);
				838
				839	rec_size_bits = ni->itype.index.block_size_bits;
				840	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				841	bhs_per_rec = rec_size >> bh_size_bits;
				842	BUG_ON(!bhs_per_rec);
				843
				844	/* The first block in the page. */
				845	rec_block = block = (sector_t)page->index <<
				846	(PAGE_CACHE_SHIFT - bh_size_bits);
				847
				848	/* The first out of bounds block for the data size. */
				849	dblock = (vi->i_size + bh_size - 1) >> bh_size_bits;
				850
				851	rl = NULL;
				852	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				853	page_is_dirty = rec_is_dirty = FALSE;
				854	rec_start_bh = NULL;
				855	do {
				856	BOOL is_retry = FALSE;
				857
				858	if (likely(block < rec_block)) {
				859	if (unlikely(block >= dblock)) {
				860	clear_buffer_dirty(bh);
				861	continue;
				862	}
				863	/*
				864	* This block is not the first one in the record. We
				865	* ignore the buffer's dirty state because we could
				866	* have raced with a parallel mark_ntfs_record_dirty().
				867	*/
				868	if (!rec_is_dirty)
				869	continue;
				870	if (unlikely(err2)) {
				871	if (err2 != -ENOMEM)
				872	clear_buffer_dirty(bh);
				873	continue;
				874	}
				875	} else /* if (block == rec_block) */ {
				876	BUG_ON(block > rec_block);
				877	/* This block is the first one in the record. */
				878	rec_block += bhs_per_rec;
				879	err2 = 0;
				880	if (unlikely(block >= dblock)) {
				881	clear_buffer_dirty(bh);
				882	continue;
				883	}
				884	if (!buffer_dirty(bh)) {
				885	/* Clean records are not written out. */
				886	rec_is_dirty = FALSE;
				887	continue;
				888	}
				889	rec_is_dirty = TRUE;
				890	rec_start_bh = bh;
				891	}
				892	/* Need to map the buffer if it is not mapped already. */
				893	if (unlikely(!buffer_mapped(bh))) {
				894	VCN vcn;
				895	LCN lcn;
				896	unsigned int vcn_ofs;
				897
				898	/* Obtain the vcn and offset of the current block. */
				899	vcn = (VCN)block << bh_size_bits;
				900	vcn_ofs = vcn & vol->cluster_size_mask;
				901	vcn >>= vol->cluster_size_bits;
				902	if (!rl) {
				903	lock_retry_remap:
				904	down_read(&ni->runlist.lock);
				905	rl = ni->runlist.rl;
				906	}
				907	if (likely(rl != NULL)) {
				908	/* Seek to element containing target vcn. */
				909	while (rl->length && rl[1].vcn <= vcn)
				910	rl++;
				911	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				912	} else
				913	lcn = LCN_RL_NOT_MAPPED;
				914	/* Successful remap. */
				915	if (likely(lcn >= 0)) {
				916	/* Setup buffer head to correct block. */
				917	bh->b_blocknr = ((lcn <<
				918	vol->cluster_size_bits) +
				919	vcn_ofs) >> bh_size_bits;
				920	set_buffer_mapped(bh);
				921	} else {
				922	/*
				923	* Remap failed. Retry to map the runlist once
				924	* unless we are working on $MFT which always
				925	* has the whole of its runlist in memory.
				926	*/
				927	if (!is_mft && !is_retry &&
				928	lcn == LCN_RL_NOT_MAPPED) {
				929	is_retry = TRUE;
				930	/*
				931	* Attempt to map runlist, dropping
				932	* lock for the duration.
				933	*/
				934	up_read(&ni->runlist.lock);
				935	err2 = ntfs_map_runlist(ni, vcn);
				936	if (likely(!err2))
				937	goto lock_retry_remap;
				938	if (err2 == -ENOMEM)
				939	page_is_dirty = TRUE;
				940	lcn = err2;
				941	} else
				942	err2 = -EIO;
				943	/* Hard error. Abort writing this record. */
				944	if (!err \|\| err == -ENOMEM)
				945	err = err2;
				946	bh->b_blocknr = -1;
				947	ntfs_error(vol->sb, "Cannot write ntfs record "
				948	"0x%llx (inode 0x%lx, "
				949	"attribute type 0x%x) because "
				950	"its location on disk could "
				951	"not be determined (error "
				952	"code %lli).", (s64)block <<
				953	bh_size_bits >>
				954	vol->mft_record_size_bits,
				955	ni->mft_no, ni->type,
				956	(long long)lcn);
				957	/*
				958	* If this is not the first buffer, remove the
				959	* buffers in this record from the list of
				960	* buffers to write and clear their dirty bit
				961	* if not error -ENOMEM.
				962	*/
				963	if (rec_start_bh != bh) {
				964	while (bhs[--nr_bhs] != rec_start_bh)
				965	;
				966	if (err2 != -ENOMEM) {
				967	do {
				968	clear_buffer_dirty(
				969	rec_start_bh);
				970	} while ((rec_start_bh =
				971	rec_start_bh->
				972	b_this_page) !=
				973	bh);
				974	}
				975	}
				976	continue;
				977	}
				978	}
				979	BUG_ON(!buffer_uptodate(bh));
				980	BUG_ON(nr_bhs >= max_bhs);
				981	bhs[nr_bhs++] = bh;
				982	} while (block++, (bh = bh->b_this_page) != head);
				983	if (unlikely(rl))
				984	up_read(&ni->runlist.lock);
				985	/* If there were no dirty buffers, we are done. */
				986	if (!nr_bhs)
				987	goto done;
				988	/* Map the page so we can access its contents. */
				989	kaddr = kmap(page);
				990	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				991	BUG_ON(!PageUptodate(page));
				992	ClearPageUptodate(page);
				993	for (i = 0; i < nr_bhs; i++) {
				994	unsigned int ofs;
				995
				996	/* Skip buffers which are not at the beginning of records. */
				997	if (i % bhs_per_rec)
				998	continue;
				999	tbh = bhs[i];
				1000	ofs = bh_offset(tbh);
				1001	if (is_mft) {
				1002	ntfs_inode *tni;
				1003	unsigned long mft_no;
				1004
				1005	/* Get the mft record number. */
				1006	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1007	>> rec_size_bits;
				1008	/* Check whether to write this mft record. */
				1009	tni = NULL;
				1010	if (!ntfs_may_write_mft_record(vol, mft_no,
				1011	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1012	/*
				1013	* The record should not be written. This
				1014	* means we need to redirty the page before
				1015	* returning.
				1016	*/
				1017	page_is_dirty = TRUE;
				1018	/*
				1019	* Remove the buffers in this mft record from
				1020	* the list of buffers to write.
				1021	*/
				1022	do {
				1023	bhs[i] = NULL;
				1024	} while (++i % bhs_per_rec);
				1025	continue;
				1026	}
				1027	/*
				1028	* The record should be written. If a locked ntfs
				1029	* inode was returned, add it to the array of locked
				1030	* ntfs inodes.
				1031	*/
				1032	if (tni)
				1033	locked_nis[nr_locked_nis++] = tni;
				1034	}
				1035	/* Apply the mst protection fixups. */
				1036	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1037	rec_size);
				1038	if (unlikely(err2)) {
				1039	if (!err \|\| err == -ENOMEM)
				1040	err = -EIO;
				1041	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1042	"(inode 0x%lx, attribute type 0x%x, "
				1043	"page index 0x%lx, page offset 0x%x)!"
				1044	" Unmount and run chkdsk.", vi->i_ino,
				1045	ni->type, page->index, ofs);
				1046	/*
				1047	* Mark all the buffers in this record clean as we do
				1048	* not want to write corrupt data to disk.
				1049	*/
				1050	do {
				1051	clear_buffer_dirty(bhs[i]);
				1052	bhs[i] = NULL;
				1053	} while (++i % bhs_per_rec);
				1054	continue;
				1055	}
				1056	nr_recs++;
				1057	}
				1058	/* If no records are to be written out, we are done. */
				1059	if (!nr_recs)
				1060	goto unm_done;
				1061	flush_dcache_page(page);
				1062	/* Lock buffers and start synchronous write i/o on them. */
				1063	for (i = 0; i < nr_bhs; i++) {
				1064	tbh = bhs[i];
				1065	if (!tbh)
				1066	continue;
				1067	if (unlikely(test_set_buffer_locked(tbh)))
				1068	BUG();
				1069	/* The buffer dirty state is now irrelevant, just clean it. */
				1070	clear_buffer_dirty(tbh);
				1071	BUG_ON(!buffer_uptodate(tbh));
				1072	BUG_ON(!buffer_mapped(tbh));
				1073	get_bh(tbh);
				1074	tbh->b_end_io = end_buffer_write_sync;
				1075	submit_bh(WRITE, tbh);
				1076	}
				1077	/* Synchronize the mft mirror now if not @sync. */
				1078	if (is_mft && !sync)
				1079	goto do_mirror;
				1080	do_wait:
				1081	/* Wait on i/o completion of buffers. */
				1082	for (i = 0; i < nr_bhs; i++) {
				1083	tbh = bhs[i];
				1084	if (!tbh)
				1085	continue;
				1086	wait_on_buffer(tbh);
				1087	if (unlikely(!buffer_uptodate(tbh))) {
				1088	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1089	"record buffer (inode 0x%lx, "
				1090	"attribute type 0x%x, page index "
				1091	"0x%lx, page offset 0x%lx)! Unmount "
				1092	"and run chkdsk.", vi->i_ino, ni->type,
				1093	page->index, bh_offset(tbh));
				1094	if (!err \|\| err == -ENOMEM)
				1095	err = -EIO;
				1096	/*
				1097	* Set the buffer uptodate so the page and buffer
				1098	* states do not become out of sync.
				1099	*/
				1100	set_buffer_uptodate(tbh);
				1101	}
				1102	}
				1103	/* If @sync, now synchronize the mft mirror. */
				1104	if (is_mft && sync) {
				1105	do_mirror:
				1106	for (i = 0; i < nr_bhs; i++) {
				1107	unsigned long mft_no;
				1108	unsigned int ofs;
				1109
				1110	/*
				1111	* Skip buffers which are not at the beginning of
				1112	* records.
				1113	*/
				1114	if (i % bhs_per_rec)
				1115	continue;
				1116	tbh = bhs[i];
				1117	/* Skip removed buffers (and hence records). */
				1118	if (!tbh)
				1119	continue;
				1120	ofs = bh_offset(tbh);
				1121	/* Get the mft record number. */
				1122	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1123	>> rec_size_bits;
				1124	if (mft_no < vol->mftmirr_size)
				1125	ntfs_sync_mft_mirror(vol, mft_no,
				1126	(MFT_RECORD*)(kaddr + ofs),
				1127	sync);
				1128	}
				1129	if (!sync)
				1130	goto do_wait;
				1131	}
				1132	/* Remove the mst protection fixups again. */
				1133	for (i = 0; i < nr_bhs; i++) {
				1134	if (!(i % bhs_per_rec)) {
				1135	tbh = bhs[i];
				1136	if (!tbh)
				1137	continue;
				1138	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1139	bh_offset(tbh)));
				1140	}
				1141	}
				1142	flush_dcache_page(page);
				1143	unm_done:
				1144	/* Unlock any locked inodes. */
				1145	while (nr_locked_nis-- > 0) {
				1146	ntfs_inode tni, base_tni;
				1147
				1148	tni = locked_nis[nr_locked_nis];
				1149	/* Get the base inode. */
				1150	down(&tni->extent_lock);
				1151	if (tni->nr_extents >= 0)
				1152	base_tni = tni;
				1153	else {
				1154	base_tni = tni->ext.base_ntfs_ino;
				1155	BUG_ON(!base_tni);
				1156	}
				1157	up(&tni->extent_lock);
				1158	ntfs_debug("Unlocking %s inode 0x%lx.",
				1159	tni == base_tni ? "base" : "extent",
				1160	tni->mft_no);
				1161	up(&tni->mrec_lock);
				1162	atomic_dec(&tni->count);
				1163	iput(VFS_I(base_tni));
				1164	}
				1165	SetPageUptodate(page);
				1166	kunmap(page);
				1167	done:
				1168	if (unlikely(err && err != -ENOMEM)) {
				1169	/*
				1170	* Set page error if there is only one ntfs record in the page.
				1171	* Otherwise we would loose per-record granularity.
				1172	*/
				1173	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1174	SetPageError(page);
				1175	NVolSetErrors(vol);
				1176	}
				1177	if (page_is_dirty) {
				1178	ntfs_debug("Page still contains one or more dirty ntfs "
				1179	"records. Redirtying the page starting at "
				1180	"record 0x%lx.", page->index <<
				1181	(PAGE_CACHE_SHIFT - rec_size_bits));
				1182	redirty_page_for_writepage(wbc, page);
				1183	unlock_page(page);
				1184	} else {
				1185	/*
				1186	* Keep the VM happy. This must be done otherwise the
				1187	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1188	* the page is clean.
				1189	*/
				1190	BUG_ON(PageWriteback(page));
				1191	set_page_writeback(page);
				1192	unlock_page(page);
				1193	end_page_writeback(page);
				1194	}
				1195	if (likely(!err))
				1196	ntfs_debug("Done.");
				1197	return err;
				1198	}
				1199
				1200	/**
				1201	* ntfs_writepage - write a @page to the backing store
				1202	* @page: page cache page to write out
				1203	* @wbc: writeback control structure
				1204	*
				1205	* This is called from the VM when it wants to have a dirty ntfs page cache
				1206	* page cleaned. The VM has already locked the page and marked it clean.
				1207	*
				1208	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1209	* the ntfs version of the generic block_write_full_page() function,
				1210	* ntfs_write_block(), which in turn if necessary creates and writes the
				1211	* buffers associated with the page asynchronously.
				1212	*
				1213	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1214	* the data to the mft record (which at this stage is most likely in memory).
				1215	* The mft record is then marked dirty and written out asynchronously via the
				1216	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1217	* vm page dirty code path for the page the mft record is in.
				1218	*
				1219	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1220	*
				1221	* Return 0 on success and -errno on error.
				1222	*/
				1223	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1224	{
				1225	loff_t i_size;
				1226	struct inode *vi;
				1227	ntfs_inode ni, base_ni;
				1228	char *kaddr;
				1229	ntfs_attr_search_ctx *ctx;
				1230	MFT_RECORD *m;
				1231	u32 attr_len;
				1232	int err;
				1233
				1234	BUG_ON(!PageLocked(page));
				1235
				1236	vi = page->mapping->host;
				1237	i_size = i_size_read(vi);
				1238
				1239	/* Is the page fully outside i_size? (truncate in progress) */
				1240	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1241	PAGE_CACHE_SHIFT)) {
				1242	/*
				1243	* The page may have dirty, unmapped buffers. Make them
				1244	* freeable here, so the page does not leak.
				1245	*/
				1246	block_invalidatepage(page, 0);
				1247	unlock_page(page);
				1248	ntfs_debug("Write outside i_size - truncated?");
				1249	return 0;
				1250	}
				1251	ni = NTFS_I(vi);
				1252
				1253	/* NInoNonResident() == NInoIndexAllocPresent() */
				1254	if (NInoNonResident(ni)) {
				1255	/*
				1256	* Only unnamed $DATA attributes can be compressed, encrypted,
				1257	* and/or sparse.
				1258	*/
				1259	if (ni->type == AT_DATA && !ni->name_len) {
				1260	/* If file is encrypted, deny access, just like NT4. */
				1261	if (NInoEncrypted(ni)) {
				1262	unlock_page(page);
				1263	ntfs_debug("Denying write access to encrypted "
				1264	"file.");
				1265	return -EACCES;
				1266	}
				1267	/* Compressed data streams are handled in compress.c. */
				1268	if (NInoCompressed(ni)) {
				1269	// TODO: Implement and replace this check with
				1270	// return ntfs_write_compressed_block(page);
				1271	unlock_page(page);
				1272	ntfs_error(vi->i_sb, "Writing to compressed "
				1273	"files is not supported yet. "
				1274	"Sorry.");
				1275	return -EOPNOTSUPP;
				1276	}
				1277	// TODO: Implement and remove this check.
				1278	if (NInoSparse(ni)) {
				1279	unlock_page(page);
				1280	ntfs_error(vi->i_sb, "Writing to sparse files "
				1281	"is not supported yet. Sorry.");
				1282	return -EOPNOTSUPP;
				1283	}
				1284	}
				1285	/* We have to zero every time due to mmap-at-end-of-file. */
				1286	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1287	/* The page straddles i_size. */
				1288	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1289	kaddr = kmap_atomic(page, KM_USER0);
				1290	memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
				1291	flush_dcache_page(page);
				1292	kunmap_atomic(kaddr, KM_USER0);
				1293	}
				1294	/* Handle mst protected attributes. */
				1295	if (NInoMstProtected(ni))
				1296	return ntfs_write_mst_block(page, wbc);
				1297	/* Normal data stream. */
				1298	return ntfs_write_block(page, wbc);
				1299	}
				1300	/*
				1301	* Attribute is resident, implying it is not compressed, encrypted,
				1302	* sparse, or mst protected. This also means the attribute is smaller
				1303	* than an mft record and hence smaller than a page, so can simply
				1304	* return error on any pages with index above 0.
				1305	*/
				1306	BUG_ON(page_has_buffers(page));
				1307	BUG_ON(!PageUptodate(page));
				1308	if (unlikely(page->index > 0)) {
				1309	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1310	"Aborting write.", page->index);
				1311	BUG_ON(PageWriteback(page));
				1312	set_page_writeback(page);
				1313	unlock_page(page);
				1314	end_page_writeback(page);
				1315	return -EIO;
				1316	}
				1317	if (!NInoAttr(ni))
				1318	base_ni = ni;
				1319	else
				1320	base_ni = ni->ext.base_ntfs_ino;
				1321	/* Map, pin, and lock the mft record. */
				1322	m = map_mft_record(base_ni);
				1323	if (IS_ERR(m)) {
				1324	err = PTR_ERR(m);
				1325	m = NULL;
				1326	ctx = NULL;
				1327	goto err_out;
				1328	}
				1329	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1330	if (unlikely(!ctx)) {
				1331	err = -ENOMEM;
				1332	goto err_out;
				1333	}
				1334	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1335	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1336	if (unlikely(err))
				1337	goto err_out;
				1338	/*
				1339	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1340	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1341	*/
				1342	BUG_ON(PageWriteback(page));
				1343	set_page_writeback(page);
				1344	unlock_page(page);
				1345
				1346	/*
				1347	* Here, we don't need to zero the out of bounds area everytime because
				1348	* the below memcpy() already takes care of the mmap-at-end-of-file
				1349	* requirements. If the file is converted to a non-resident one, then
				1350	* the code path use is switched to the non-resident one where the
				1351	* zeroing happens on each ntfs_writepage() invocation.
				1352	*
				1353	* The above also applies nicely when i_size is decreased.
				1354	*
				1355	* When i_size is increased, the memory between the old and new i_size
				1356	* _must_ be zeroed (or overwritten with new data). Otherwise we will
				1357	* expose data to userspace/disk which should never have been exposed.
				1358	*
				1359	* FIXME: Ensure that i_size increases do the zeroing/overwriting and
				1360	* if we cannot guarantee that, then enable the zeroing below. If the
				1361	* zeroing below is enabled, we MUST move the unlock_page() from above
				1362	* to after the kunmap_atomic(), i.e. just before the
				1363	* end_page_writeback().
				1364	* UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
				1365	* increases for resident attributes so those are ok.
				1366	* TODO: ntfs_truncate(), others?
				1367	*/
				1368
				1369	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
				1370	i_size = i_size_read(VFS_I(ni));
				1371	kaddr = kmap_atomic(page, KM_USER0);
				1372	if (unlikely(attr_len > i_size)) {
				1373	/* Zero out of bounds area in the mft record. */
				1374	memset((u8*)ctx->attr + le16_to_cpu(
				1375	ctx->attr->data.resident.value_offset) +
				1376	i_size, 0, attr_len - i_size);
				1377	attr_len = i_size;
				1378	}
				1379	/* Copy the data from the page to the mft record. */
				1380	memcpy((u8*)ctx->attr +
				1381	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1382	kaddr, attr_len);
				1383	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1384	/* Zero out of bounds area in the page cache page. */
				1385	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1386	flush_dcache_page(page);
				1387	kunmap_atomic(kaddr, KM_USER0);
				1388
				1389	end_page_writeback(page);
				1390
				1391	/* Mark the mft record dirty, so it gets written back. */
				1392	mark_mft_record_dirty(ctx->ntfs_ino);
				1393	ntfs_attr_put_search_ctx(ctx);
				1394	unmap_mft_record(base_ni);
				1395	return 0;
				1396	err_out:
				1397	if (err == -ENOMEM) {
				1398	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1399	"page so we try again later.");
				1400	/*
				1401	* Put the page back on mapping->dirty_pages, but leave its
				1402	* buffers' dirty state as-is.
				1403	*/
				1404	redirty_page_for_writepage(wbc, page);
				1405	err = 0;
				1406	} else {
				1407	ntfs_error(vi->i_sb, "Resident attribute write failed with "
				1408	"error %i. Setting page error flag.", err);
				1409	SetPageError(page);
				1410	}
				1411	unlock_page(page);
				1412	if (ctx)
				1413	ntfs_attr_put_search_ctx(ctx);
				1414	if (m)
				1415	unmap_mft_record(base_ni);
				1416	return err;
				1417	}
				1418
				1419	/**
				1420	* ntfs_prepare_nonresident_write -
				1421	*
				1422	*/
				1423	static int ntfs_prepare_nonresident_write(struct page *page,
				1424	unsigned from, unsigned to)
				1425	{
				1426	VCN vcn;
				1427	LCN lcn;
				1428	sector_t block, ablock, iblock;
				1429	struct inode *vi;
				1430	ntfs_inode *ni;
				1431	ntfs_volume *vol;
				1432	runlist_element *rl;
				1433	struct buffer_head bh, head, wait[2], *wait_bh = wait;
				1434	unsigned int vcn_ofs, block_start, block_end, blocksize;
				1435	int err;
				1436	BOOL is_retry;
				1437	unsigned char blocksize_bits;
				1438
				1439	vi = page->mapping->host;
				1440	ni = NTFS_I(vi);
				1441	vol = ni->vol;
				1442
				1443	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1444	"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
				1445	page->index, from, to);
				1446
				1447	BUG_ON(!NInoNonResident(ni));
				1448
				1449	blocksize_bits = vi->i_blkbits;
				1450	blocksize = 1 << blocksize_bits;
				1451
				1452	/*
				1453	* create_empty_buffers() will create uptodate/dirty buffers if the
				1454	* page is uptodate/dirty.
				1455	*/
				1456	if (!page_has_buffers(page))
				1457	create_empty_buffers(page, blocksize, 0);
				1458	bh = head = page_buffers(page);
				1459	if (unlikely(!bh))
				1460	return -ENOMEM;
				1461
				1462	/* The first block in the page. */
				1463	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				1464
				1465	/*
				1466	* The first out of bounds block for the allocated size. No need to
				1467	* round up as allocated_size is in multiples of cluster size and the
				1468	* minimum cluster size is 512 bytes, which is equal to the smallest
				1469	* blocksize.
				1470	*/
				1471	ablock = ni->allocated_size >> blocksize_bits;
				1472
				1473	/* The last (fully or partially) initialized block. */
				1474	iblock = ni->initialized_size >> blocksize_bits;
				1475
				1476	/* Loop through all the buffers in the page. */
				1477	block_start = 0;
				1478	rl = NULL;
				1479	err = 0;
				1480	do {
				1481	block_end = block_start + blocksize;
				1482	/*
				1483	* If buffer @bh is outside the write, just mark it uptodate
				1484	* if the page is uptodate and continue with the next buffer.
				1485	*/
				1486	if (block_end <= from \|\| block_start >= to) {
				1487	if (PageUptodate(page)) {
				1488	if (!buffer_uptodate(bh))
				1489	set_buffer_uptodate(bh);
				1490	}
				1491	continue;
				1492	}
				1493	/*
				1494	* @bh is at least partially being written to.
				1495	* Make sure it is not marked as new.
				1496	*/
				1497	//if (buffer_new(bh))
				1498	// clear_buffer_new(bh);
				1499
				1500	if (block >= ablock) {
				1501	// TODO: block is above allocated_size, need to
				1502	// allocate it. Best done in one go to accommodate not
				1503	// only block but all above blocks up to and including:
				1504	// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
				1505	// - 1) >> blobksize_bits. Obviously will need to round
				1506	// up to next cluster boundary, too. This should be
				1507	// done with a helper function, so it can be reused.
				1508	ntfs_error(vol->sb, "Writing beyond allocated size "
				1509	"is not supported yet. Sorry.");
				1510	err = -EOPNOTSUPP;
				1511	goto err_out;
				1512	// Need to update ablock.
				1513	// Need to set_buffer_new() on all block bhs that are
				1514	// newly allocated.
				1515	}
				1516	/*
				1517	* Now we have enough allocated size to fulfill the whole
				1518	* request, i.e. block < ablock is true.
				1519	*/
				1520	if (unlikely((block >= iblock) &&
				1521	(ni->initialized_size < vi->i_size))) {
				1522	/*
				1523	* If this page is fully outside initialized size, zero
				1524	* out all pages between the current initialized size
				1525	* and the current page. Just use ntfs_readpage() to do
				1526	* the zeroing transparently.
				1527	*/
				1528	if (block > iblock) {
				1529	// TODO:
				1530	// For each page do:
				1531	// - read_cache_page()
				1532	// Again for each page do:
				1533	// - wait_on_page_locked()
				1534	// - Check (PageUptodate(page) &&
				1535	// !PageError(page))
				1536	// Update initialized size in the attribute and
				1537	// in the inode.
				1538	// Again, for each page do:
				1539	// __set_page_dirty_buffers();
				1540	// page_cache_release()
				1541	// We don't need to wait on the writes.
				1542	// Update iblock.
				1543	}
				1544	/*
				1545	* The current page straddles initialized size. Zero
				1546	* all non-uptodate buffers and set them uptodate (and
				1547	* dirty?). Note, there aren't any non-uptodate buffers
				1548	* if the page is uptodate.
				1549	* FIXME: For an uptodate page, the buffers may need to
				1550	* be written out because they were not initialized on
				1551	* disk before.
				1552	*/
				1553	if (!PageUptodate(page)) {
				1554	// TODO:
				1555	// Zero any non-uptodate buffers up to i_size.
				1556	// Set them uptodate and dirty.
				1557	}
				1558	// TODO:
				1559	// Update initialized size in the attribute and in the
				1560	// inode (up to i_size).
				1561	// Update iblock.
				1562	// FIXME: This is inefficient. Try to batch the two
				1563	// size changes to happen in one go.
				1564	ntfs_error(vol->sb, "Writing beyond initialized size "
				1565	"is not supported yet. Sorry.");
				1566	err = -EOPNOTSUPP;
				1567	goto err_out;
				1568	// Do NOT set_buffer_new() BUT DO clear buffer range
				1569	// outside write request range.
				1570	// set_buffer_uptodate() on complete buffers as well as
				1571	// set_buffer_dirty().
				1572	}
				1573
				1574	/* Need to map unmapped buffers. */
				1575	if (!buffer_mapped(bh)) {
				1576	/* Unmapped buffer. Need to map it. */
				1577	bh->b_bdev = vol->sb->s_bdev;
				1578
				1579	/* Convert block into corresponding vcn and offset. */
				1580	vcn = (VCN)block << blocksize_bits >>
				1581	vol->cluster_size_bits;
				1582	vcn_ofs = ((VCN)block << blocksize_bits) &
				1583	vol->cluster_size_mask;
				1584
				1585	is_retry = FALSE;
				1586	if (!rl) {
				1587	lock_retry_remap:
				1588	down_read(&ni->runlist.lock);
				1589	rl = ni->runlist.rl;
				1590	}
				1591	if (likely(rl != NULL)) {
				1592	/* Seek to element containing target vcn. */
				1593	while (rl->length && rl[1].vcn <= vcn)
				1594	rl++;
				1595	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1596	} else
				1597	lcn = LCN_RL_NOT_MAPPED;
				1598	if (unlikely(lcn < 0)) {
				1599	/*
				1600	* We extended the attribute allocation above.
				1601	* If we hit an ENOENT here it means that the
				1602	* allocation was insufficient which is a bug.
				1603	*/
				1604	BUG_ON(lcn == LCN_ENOENT);
				1605
				1606	/* It is a hole, need to instantiate it. */
				1607	if (lcn == LCN_HOLE) {
				1608	// TODO: Instantiate the hole.
				1609	// clear_buffer_new(bh);
				1610	// unmap_underlying_metadata(bh->b_bdev,
				1611	// bh->b_blocknr);
				1612	// For non-uptodate buffers, need to
				1613	// zero out the region outside the
				1614	// request in this bh or all bhs,
				1615	// depending on what we implemented
				1616	// above.
				1617	// Need to flush_dcache_page().
				1618	// Or could use set_buffer_new()
				1619	// instead?
				1620	ntfs_error(vol->sb, "Writing into "
				1621	"sparse regions is "
				1622	"not supported yet. "
				1623	"Sorry.");
				1624	err = -EOPNOTSUPP;
				1625	goto err_out;
				1626	} else if (!is_retry &&
				1627	lcn == LCN_RL_NOT_MAPPED) {
				1628	is_retry = TRUE;
				1629	/*
				1630	* Attempt to map runlist, dropping
				1631	* lock for the duration.
				1632	*/
				1633	up_read(&ni->runlist.lock);
				1634	err = ntfs_map_runlist(ni, vcn);
				1635	if (likely(!err))
				1636	goto lock_retry_remap;
				1637	rl = NULL;
				1638	lcn = err;
				1639	}
				1640	/*
				1641	* Failed to map the buffer, even after
				1642	* retrying.
				1643	*/
				1644	bh->b_blocknr = -1;
				1645	ntfs_error(vol->sb, "Failed to write to inode "
				1646	"0x%lx, attribute type 0x%x, "
				1647	"vcn 0x%llx, offset 0x%x "
				1648	"because its location on disk "
				1649	"could not be determined%s "
				1650	"(error code %lli).",
				1651	ni->mft_no, ni->type,
				1652	(unsigned long long)vcn,
				1653	vcn_ofs, is_retry ? " even "
				1654	"after retrying" : "",
				1655	(long long)lcn);
				1656	if (!err)
				1657	err = -EIO;
				1658	goto err_out;
				1659	}
				1660	/* We now have a successful remap, i.e. lcn >= 0. */
				1661
				1662	/* Setup buffer head to correct block. */
				1663	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				1664	+ vcn_ofs) >> blocksize_bits;
				1665	set_buffer_mapped(bh);
				1666
				1667	// FIXME: Something analogous to this is needed for
				1668	// each newly allocated block, i.e. BH_New.
				1669	// FIXME: Might need to take this out of the
				1670	// if (!buffer_mapped(bh)) {}, depending on how we
				1671	// implement things during the allocated_size and
				1672	// initialized_size extension code above.
				1673	if (buffer_new(bh)) {
				1674	clear_buffer_new(bh);
				1675	unmap_underlying_metadata(bh->b_bdev,
				1676	bh->b_blocknr);
				1677	if (PageUptodate(page)) {
				1678	set_buffer_uptodate(bh);
				1679	continue;
				1680	}
				1681	/*
				1682	* Page is _not_ uptodate, zero surrounding
				1683	* region. NOTE: This is how we decide if to
				1684	* zero or not!
				1685	*/
				1686	if (block_end > to \|\| block_start < from) {
				1687	void *kaddr;
				1688
				1689	kaddr = kmap_atomic(page, KM_USER0);
				1690	if (block_end > to)
				1691	memset(kaddr + to, 0,
				1692	block_end - to);
				1693	if (block_start < from)
				1694	memset(kaddr + block_start, 0,
				1695	from -
				1696	block_start);
				1697	flush_dcache_page(page);
				1698	kunmap_atomic(kaddr, KM_USER0);
				1699	}
				1700	continue;
				1701	}
				1702	}
				1703	/* @bh is mapped, set it uptodate if the page is uptodate. */
				1704	if (PageUptodate(page)) {
				1705	if (!buffer_uptodate(bh))
				1706	set_buffer_uptodate(bh);
				1707	continue;
				1708	}
				1709	/*
				1710	* The page is not uptodate. The buffer is mapped. If it is not
				1711	* uptodate, and it is only partially being written to, we need
				1712	* to read the buffer in before the write, i.e. right now.
				1713	*/
				1714	if (!buffer_uptodate(bh) &&
				1715	(block_start < from \|\| block_end > to)) {
				1716	ll_rw_block(READ, 1, &bh);
				1717	*wait_bh++ = bh;
				1718	}
				1719	} while (block++, block_start = block_end,
				1720	(bh = bh->b_this_page) != head);
				1721
				1722	/* Release the lock if we took it. */
				1723	if (rl) {
				1724	up_read(&ni->runlist.lock);
				1725	rl = NULL;
				1726	}
				1727
				1728	/* If we issued read requests, let them complete. */
				1729	while (wait_bh > wait) {
				1730	wait_on_buffer(*--wait_bh);
				1731	if (!buffer_uptodate(*wait_bh))
				1732	return -EIO;
				1733	}
				1734
				1735	ntfs_debug("Done.");
				1736	return 0;
				1737	err_out:
				1738	/*
				1739	* Zero out any newly allocated blocks to avoid exposing stale data.
				1740	* If BH_New is set, we know that the block was newly allocated in the
				1741	* above loop.
				1742	* FIXME: What about initialized_size increments? Have we done all the
				1743	* required zeroing above? If not this error handling is broken, and
				1744	* in particular the if (block_end <= from) check is completely bogus.
				1745	*/
				1746	bh = head;
				1747	block_start = 0;
				1748	is_retry = FALSE;
				1749	do {
				1750	block_end = block_start + blocksize;
				1751	if (block_end <= from)
				1752	continue;
				1753	if (block_start >= to)
				1754	break;
				1755	if (buffer_new(bh)) {
				1756	void *kaddr;
				1757
				1758	clear_buffer_new(bh);
				1759	kaddr = kmap_atomic(page, KM_USER0);
				1760	memset(kaddr + block_start, 0, bh->b_size);
				1761	kunmap_atomic(kaddr, KM_USER0);
				1762	set_buffer_uptodate(bh);
				1763	mark_buffer_dirty(bh);
				1764	is_retry = TRUE;
				1765	}
				1766	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				1767	if (is_retry)
				1768	flush_dcache_page(page);
				1769	if (rl)
				1770	up_read(&ni->runlist.lock);
				1771	return err;
				1772	}
				1773
				1774	/**
				1775	* ntfs_prepare_write - prepare a page for receiving data
				1776	*
				1777	* This is called from generic_file_write() with i_sem held on the inode
				1778	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				1779	* data has not yet been copied into the @page.
				1780	*
				1781	* Need to extend the attribute/fill in holes if necessary, create blocks and
				1782	* make partially overwritten blocks uptodate,
				1783	*
				1784	* i_size is not to be modified yet.
				1785	*
				1786	* Return 0 on success or -errno on error.
				1787	*
				1788	* Should be using block_prepare_write() [support for sparse files] or
				1789	* cont_prepare_write() [no support for sparse files]. Cannot do that due to
				1790	* ntfs specifics but can look at them for implementation guidance.
				1791	*
				1792	* Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
				1793	* the first byte in the page that will be written to and @to is the first byte
				1794	* after the last byte that will be written to.
				1795	*/
				1796	static int ntfs_prepare_write(struct file file, struct page page,
				1797	unsigned from, unsigned to)
				1798	{
				1799	s64 new_size;
				1800	struct inode *vi = page->mapping->host;
				1801	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1802	ntfs_volume *vol = ni->vol;
				1803	ntfs_attr_search_ctx *ctx = NULL;
				1804	MFT_RECORD *m = NULL;
				1805	ATTR_RECORD *a;
				1806	u8 *kaddr;
				1807	u32 attr_len;
				1808	int err;
				1809
				1810	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1811	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				1812	page->index, from, to);
				1813	BUG_ON(!PageLocked(page));
				1814	BUG_ON(from > PAGE_CACHE_SIZE);
				1815	BUG_ON(to > PAGE_CACHE_SIZE);
				1816	BUG_ON(from > to);
				1817	BUG_ON(NInoMstProtected(ni));
				1818	/*
				1819	* If a previous ntfs_truncate() failed, repeat it and abort if it
				1820	* fails again.
				1821	*/
				1822	if (unlikely(NInoTruncateFailed(ni))) {
				1823	down_write(&vi->i_alloc_sem);
				1824	err = ntfs_truncate(vi);
				1825	up_write(&vi->i_alloc_sem);
				1826	if (err \|\| NInoTruncateFailed(ni)) {
				1827	if (!err)
				1828	err = -EIO;
				1829	goto err_out;
				1830	}
				1831	}
				1832	/* If the attribute is not resident, deal with it elsewhere. */
				1833	if (NInoNonResident(ni)) {
				1834	/*
				1835	* Only unnamed $DATA attributes can be compressed, encrypted,
				1836	* and/or sparse.
				1837	*/
				1838	if (ni->type == AT_DATA && !ni->name_len) {
				1839	/* If file is encrypted, deny access, just like NT4. */
				1840	if (NInoEncrypted(ni)) {
				1841	ntfs_debug("Denying write access to encrypted "
				1842	"file.");
				1843	return -EACCES;
				1844	}
				1845	/* Compressed data streams are handled in compress.c. */
				1846	if (NInoCompressed(ni)) {
				1847	// TODO: Implement and replace this check with
				1848	// return ntfs_write_compressed_block(page);
				1849	ntfs_error(vi->i_sb, "Writing to compressed "
				1850	"files is not supported yet. "
				1851	"Sorry.");
				1852	return -EOPNOTSUPP;
				1853	}
				1854	// TODO: Implement and remove this check.
				1855	if (NInoSparse(ni)) {
				1856	ntfs_error(vi->i_sb, "Writing to sparse files "
				1857	"is not supported yet. Sorry.");
				1858	return -EOPNOTSUPP;
				1859	}
				1860	}
				1861	/* Normal data stream. */
				1862	return ntfs_prepare_nonresident_write(page, from, to);
				1863	}
				1864	/*
				1865	* Attribute is resident, implying it is not compressed, encrypted, or
				1866	* sparse.
				1867	*/
				1868	BUG_ON(page_has_buffers(page));
				1869	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				1870	/* If we do not need to resize the attribute allocation we are done. */
				1871	if (new_size <= vi->i_size)
				1872	goto done;
				1873
				1874	// FIXME: We abort for now as this code is not safe.
				1875	ntfs_error(vi->i_sb, "Changing the file size is not supported yet. "
				1876	"Sorry.");
				1877	return -EOPNOTSUPP;
				1878
				1879	/* Map, pin, and lock the (base) mft record. */
				1880	if (!NInoAttr(ni))
				1881	base_ni = ni;
				1882	else
				1883	base_ni = ni->ext.base_ntfs_ino;
				1884	m = map_mft_record(base_ni);
				1885	if (IS_ERR(m)) {
				1886	err = PTR_ERR(m);
				1887	m = NULL;
				1888	ctx = NULL;
				1889	goto err_out;
				1890	}
				1891	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1892	if (unlikely(!ctx)) {
				1893	err = -ENOMEM;
				1894	goto err_out;
				1895	}
				1896	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1897	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1898	if (unlikely(err)) {
				1899	if (err == -ENOENT)
				1900	err = -EIO;
				1901	goto err_out;
				1902	}
				1903	m = ctx->mrec;
				1904	a = ctx->attr;
				1905	/* The total length of the attribute value. */
				1906	attr_len = le32_to_cpu(a->data.resident.value_length);
				1907	BUG_ON(vi->i_size != attr_len);
				1908	/* Check if new size is allowed in $AttrDef. */
				1909	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
				1910	if (unlikely(err)) {
				1911	if (err == -ERANGE) {
				1912	ntfs_error(vol->sb, "Write would cause the inode "
				1913	"0x%lx to exceed the maximum size for "
				1914	"its attribute type (0x%x). Aborting "
				1915	"write.", vi->i_ino,
				1916	le32_to_cpu(ni->type));
				1917	} else {
				1918	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
				1919	"attribute type 0x%x. Aborting "
				1920	"write.", vi->i_ino,
				1921	le32_to_cpu(ni->type));
				1922	err = -EIO;
				1923	}
				1924	goto err_out2;
				1925	}
				1926	/*
				1927	* Extend the attribute record to be able to store the new attribute
				1928	* size.
				1929	*/
				1930	if (new_size >= vol->mft_record_size \|\| ntfs_attr_record_resize(m, a,
				1931	le16_to_cpu(a->data.resident.value_offset) +
				1932	new_size)) {
				1933	/* Not enough space in the mft record. */
				1934	ntfs_error(vol->sb, "Not enough space in the mft record for "
				1935	"the resized attribute value. This is not "
				1936	"supported yet. Aborting write.");
				1937	err = -EOPNOTSUPP;
				1938	goto err_out2;
				1939	}
				1940	/*
				1941	* We have enough space in the mft record to fit the write. This
				1942	* implies the attribute is smaller than the mft record and hence the
				1943	* attribute must be in a single page and hence page->index must be 0.
				1944	*/
				1945	BUG_ON(page->index);
				1946	/*
				1947	* If the beginning of the write is past the old size, enlarge the
				1948	* attribute value up to the beginning of the write and fill it with
				1949	* zeroes.
				1950	*/
				1951	if (from > attr_len) {
				1952	memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
				1953	attr_len, 0, from - attr_len);
				1954	a->data.resident.value_length = cpu_to_le32(from);
				1955	/* Zero the corresponding area in the page as well. */
				1956	if (PageUptodate(page)) {
				1957	kaddr = kmap_atomic(page, KM_USER0);
				1958	memset(kaddr + attr_len, 0, from - attr_len);
				1959	kunmap_atomic(kaddr, KM_USER0);
				1960	flush_dcache_page(page);
				1961	}
				1962	}
				1963	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1964	mark_mft_record_dirty(ctx->ntfs_ino);
				1965	ntfs_attr_put_search_ctx(ctx);
				1966	unmap_mft_record(base_ni);
				1967	/*
				1968	* Because resident attributes are handled by memcpy() to/from the
				1969	* corresponding MFT record, and because this form of i/o is byte
				1970	* aligned rather than block aligned, there is no need to bring the
				1971	* page uptodate here as in the non-resident case where we need to
				1972	* bring the buffers straddled by the write uptodate before
				1973	* generic_file_write() does the copying from userspace.
				1974	*
				1975	* We thus defer the uptodate bringing of the page region outside the
				1976	* region written to to ntfs_commit_write(), which makes the code
				1977	* simpler and saves one atomic kmap which is good.
				1978	*/
				1979	done:
				1980	ntfs_debug("Done.");
				1981	return 0;
				1982	err_out:
				1983	if (err == -ENOMEM)
				1984	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				1985	"prepare the write.");
				1986	else {
				1987	ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
				1988	"with error %i.", err);
				1989	NVolSetErrors(vol);
				1990	make_bad_inode(vi);
				1991	}
				1992	err_out2:
				1993	if (ctx)
				1994	ntfs_attr_put_search_ctx(ctx);
				1995	if (m)
				1996	unmap_mft_record(base_ni);
				1997	return err;
				1998	}
				1999
				2000	/**
				2001	* ntfs_commit_nonresident_write -
				2002	*
				2003	*/
				2004	static int ntfs_commit_nonresident_write(struct page *page,
				2005	unsigned from, unsigned to)
				2006	{
				2007	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				2008	struct inode *vi = page->mapping->host;
				2009	struct buffer_head bh, head;
				2010	unsigned int block_start, block_end, blocksize;
				2011	BOOL partial;
				2012
				2013	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2014	"0x%lx, from = %u, to = %u.", vi->i_ino,
				2015	NTFS_I(vi)->type, page->index, from, to);
				2016	blocksize = 1 << vi->i_blkbits;
				2017
				2018	// FIXME: We need a whole slew of special cases in here for compressed
				2019	// files for example...
				2020	// For now, we know ntfs_prepare_write() would have failed so we can't
				2021	// get here in any of the cases which we have to special case, so we
				2022	// are just a ripped off, unrolled generic_commit_write().
				2023
				2024	bh = head = page_buffers(page);
				2025	block_start = 0;
				2026	partial = FALSE;
				2027	do {
				2028	block_end = block_start + blocksize;
				2029	if (block_end <= from \|\| block_start >= to) {
				2030	if (!buffer_uptodate(bh))
				2031	partial = TRUE;
				2032	} else {
				2033	set_buffer_uptodate(bh);
				2034	mark_buffer_dirty(bh);
				2035	}
				2036	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				2037	/*
				2038	* If this is a partial write which happened to make all buffers
				2039	* uptodate then we can optimize away a bogus ->readpage() for the next
				2040	* read(). Here we 'discover' whether the page went uptodate as a
				2041	* result of this (potentially partial) write.
				2042	*/
				2043	if (!partial)
				2044	SetPageUptodate(page);
				2045	/*
				2046	* Not convinced about this at all. See disparity comment above. For
				2047	* now we know ntfs_prepare_write() would have failed in the write
				2048	* exceeds i_size case, so this will never trigger which is fine.
				2049	*/
				2050	if (pos > vi->i_size) {
				2051	ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
				2052	"not supported yet. Sorry.");
				2053	return -EOPNOTSUPP;
				2054	// vi->i_size = pos;
				2055	// mark_inode_dirty(vi);
				2056	}
				2057	ntfs_debug("Done.");
				2058	return 0;
				2059	}
				2060
				2061	/**
				2062	* ntfs_commit_write - commit the received data
				2063	*
				2064	* This is called from generic_file_write() with i_sem held on the inode
				2065	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				2066	* data has already been copied into the @page. ntfs_prepare_write() has been
				2067	* called before the data copied and it returned success so we can take the
				2068	* results of various BUG checks and some error handling for granted.
				2069	*
				2070	* Need to mark modified blocks dirty so they get written out later when
				2071	* ntfs_writepage() is invoked by the VM.
				2072	*
				2073	* Return 0 on success or -errno on error.
				2074	*
				2075	* Should be using generic_commit_write(). This marks buffers uptodate and
				2076	* dirty, sets the page uptodate if all buffers in the page are uptodate, and
				2077	* updates i_size if the end of io is beyond i_size. In that case, it also
				2078	* marks the inode dirty.
				2079	*
				2080	* Cannot use generic_commit_write() due to ntfs specialities but can look at
				2081	* it for implementation guidance.
				2082	*
				2083	* If things have gone as outlined in ntfs_prepare_write(), then we do not
				2084	* need to do any page content modifications here at all, except in the write
				2085	* to resident attribute case, where we need to do the uptodate bringing here
				2086	* which we combine with the copying into the mft record which means we save
				2087	* one atomic kmap.
				2088	*/
				2089	static int ntfs_commit_write(struct file file, struct page page,
				2090	unsigned from, unsigned to)
				2091	{
				2092	struct inode *vi = page->mapping->host;
				2093	ntfs_inode base_ni, ni = NTFS_I(vi);
				2094	char kaddr, kattr;
				2095	ntfs_attr_search_ctx *ctx;
				2096	MFT_RECORD *m;
				2097	ATTR_RECORD *a;
				2098	u32 attr_len;
				2099	int err;
				2100
				2101	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2102	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				2103	page->index, from, to);
				2104	/* If the attribute is not resident, deal with it elsewhere. */
				2105	if (NInoNonResident(ni)) {
				2106	/* Only unnamed $DATA attributes can be compressed/encrypted. */
				2107	if (ni->type == AT_DATA && !ni->name_len) {
				2108	/* Encrypted files need separate handling. */
				2109	if (NInoEncrypted(ni)) {
				2110	// We never get here at present!
				2111	BUG();
				2112	}
				2113	/* Compressed data streams are handled in compress.c. */
				2114	if (NInoCompressed(ni)) {
				2115	// TODO: Implement this!
				2116	// return ntfs_write_compressed_block(page);
				2117	// We never get here at present!
				2118	BUG();
				2119	}
				2120	}
				2121	/* Normal data stream. */
				2122	return ntfs_commit_nonresident_write(page, from, to);
				2123	}
				2124	/*
				2125	* Attribute is resident, implying it is not compressed, encrypted, or
				2126	* sparse.
				2127	*/
				2128	if (!NInoAttr(ni))
				2129	base_ni = ni;
				2130	else
				2131	base_ni = ni->ext.base_ntfs_ino;
				2132	/* Map, pin, and lock the mft record. */
				2133	m = map_mft_record(base_ni);
				2134	if (IS_ERR(m)) {
				2135	err = PTR_ERR(m);
				2136	m = NULL;
				2137	ctx = NULL;
				2138	goto err_out;
				2139	}
				2140	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				2141	if (unlikely(!ctx)) {
				2142	err = -ENOMEM;
				2143	goto err_out;
				2144	}
				2145	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				2146	CASE_SENSITIVE, 0, NULL, 0, ctx);
				2147	if (unlikely(err)) {
				2148	if (err == -ENOENT)
				2149	err = -EIO;
				2150	goto err_out;
				2151	}
				2152	a = ctx->attr;
				2153	/* The total length of the attribute value. */
				2154	attr_len = le32_to_cpu(a->data.resident.value_length);
				2155	BUG_ON(from > attr_len);
				2156	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
				2157	kaddr = kmap_atomic(page, KM_USER0);
				2158	/* Copy the received data from the page to the mft record. */
				2159	memcpy(kattr + from, kaddr + from, to - from);
				2160	/* Update the attribute length if necessary. */
				2161	if (to > attr_len) {
				2162	attr_len = to;
				2163	a->data.resident.value_length = cpu_to_le32(attr_len);
				2164	}
				2165	/*
				2166	* If the page is not uptodate, bring the out of bounds area(s)
				2167	* uptodate by copying data from the mft record to the page.
				2168	*/
				2169	if (!PageUptodate(page)) {
				2170	if (from > 0)
				2171	memcpy(kaddr, kattr, from);
				2172	if (to < attr_len)
				2173	memcpy(kaddr + to, kattr + to, attr_len - to);
				2174	/* Zero the region outside the end of the attribute value. */
				2175	if (attr_len < PAGE_CACHE_SIZE)
				2176	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				2177	/*
				2178	* The probability of not having done any of the above is
				2179	* extremely small, so we just flush unconditionally.
				2180	*/
				2181	flush_dcache_page(page);
				2182	SetPageUptodate(page);
				2183	}
				2184	kunmap_atomic(kaddr, KM_USER0);
				2185	/* Update i_size if necessary. */
				2186	if (vi->i_size < attr_len) {
				2187	ni->allocated_size = ni->initialized_size = attr_len;
				2188	i_size_write(vi, attr_len);
				2189	}
				2190	/* Mark the mft record dirty, so it gets written back. */
				2191	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2192	mark_mft_record_dirty(ctx->ntfs_ino);
				2193	ntfs_attr_put_search_ctx(ctx);
				2194	unmap_mft_record(base_ni);
				2195	ntfs_debug("Done.");
				2196	return 0;
				2197	err_out:
				2198	if (err == -ENOMEM) {
				2199	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2200	"commit the write.");
				2201	if (PageUptodate(page)) {
				2202	ntfs_warning(vi->i_sb, "Page is uptodate, setting "
				2203	"dirty so the write will be retried "
				2204	"later on by the VM.");
				2205	/*
				2206	* Put the page on mapping->dirty_pages, but leave its
				2207	* buffers' dirty state as-is.
				2208	*/
				2209	__set_page_dirty_nobuffers(page);
				2210	err = 0;
				2211	} else
				2212	ntfs_error(vi->i_sb, "Page is not uptodate. Written "
				2213	"data has been lost.");
				2214	} else {
				2215	ntfs_error(vi->i_sb, "Resident attribute commit write failed "
				2216	"with error %i.", err);
				2217	NVolSetErrors(ni->vol);
				2218	make_bad_inode(vi);
				2219	}
				2220	if (ctx)
				2221	ntfs_attr_put_search_ctx(ctx);
				2222	if (m)
				2223	unmap_mft_record(base_ni);
				2224	return err;
				2225	}
				2226
				2227	#endif /* NTFS_RW */
				2228
				2229	/**
				2230	* ntfs_aops - general address space operations for inodes and attributes
				2231	*/
				2232	struct address_space_operations ntfs_aops = {
				2233	.readpage = ntfs_readpage, /* Fill page with data. */
				2234	.sync_page = block_sync_page, /* Currently, just unplugs the
				2235	disk request queue. */
				2236	#ifdef NTFS_RW
				2237	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2238	.prepare_write = ntfs_prepare_write, /* Prepare page and buffers
				2239	ready to receive data. */
				2240	.commit_write = ntfs_commit_write, /* Commit received data. */
				2241	#endif /* NTFS_RW */
				2242	};
				2243
				2244	/**
				2245	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				2246	* and attributes
				2247	*/
				2248	struct address_space_operations ntfs_mst_aops = {
				2249	.readpage = ntfs_readpage, /* Fill page with data. */
				2250	.sync_page = block_sync_page, /* Currently, just unplugs the
				2251	disk request queue. */
				2252	#ifdef NTFS_RW
				2253	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2254	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				2255	without touching the buffers
				2256	belonging to the page. */
				2257	#endif /* NTFS_RW */
				2258	};
				2259
				2260	#ifdef NTFS_RW
				2261
				2262	/**
				2263	* mark_ntfs_record_dirty - mark an ntfs record dirty
				2264	* @page: page containing the ntfs record to mark dirty
				2265	* @ofs: byte offset within @page at which the ntfs record begins
				2266	*
				2267	* Set the buffers and the page in which the ntfs record is located dirty.
				2268	*
				2269	* The latter also marks the vfs inode the ntfs record belongs to dirty
				2270	* (I_DIRTY_PAGES only).
				2271	*
				2272	* If the page does not have buffers, we create them and set them uptodate.
				2273	* The page may not be locked which is why we need to handle the buffers under
				2274	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				2275	* need the lock since try_to_free_buffers() does not free dirty buffers.
				2276	*/
				2277	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				2278	struct address_space *mapping = page->mapping;
				2279	ntfs_inode *ni = NTFS_I(mapping->host);
				2280	struct buffer_head bh, head, *buffers_to_free = NULL;
				2281	unsigned int end, bh_size, bh_ofs;
				2282
				2283	BUG_ON(!PageUptodate(page));
				2284	end = ofs + ni->itype.index.block_size;
				2285	bh_size = 1 << VFS_I(ni)->i_blkbits;
				2286	spin_lock(&mapping->private_lock);
				2287	if (unlikely(!page_has_buffers(page))) {
				2288	spin_unlock(&mapping->private_lock);
				2289	bh = head = alloc_page_buffers(page, bh_size, 1);
				2290	spin_lock(&mapping->private_lock);
				2291	if (likely(!page_has_buffers(page))) {
				2292	struct buffer_head *tail;
				2293
				2294	do {
				2295	set_buffer_uptodate(bh);
				2296	tail = bh;
				2297	bh = bh->b_this_page;
				2298	} while (bh);
				2299	tail->b_this_page = head;
				2300	attach_page_buffers(page, head);
				2301	} else
				2302	buffers_to_free = bh;
				2303	}
				2304	bh = head = page_buffers(page);
				2305	do {
				2306	bh_ofs = bh_offset(bh);
				2307	if (bh_ofs + bh_size <= ofs)
				2308	continue;
				2309	if (unlikely(bh_ofs >= end))
				2310	break;
				2311	set_buffer_dirty(bh);
				2312	} while ((bh = bh->b_this_page) != head);
				2313	spin_unlock(&mapping->private_lock);
				2314	__set_page_dirty_nobuffers(page);
				2315	if (unlikely(buffers_to_free)) {
				2316	do {
				2317	bh = buffers_to_free->b_this_page;
				2318	free_buffer_head(buffers_to_free);
				2319	buffers_to_free = bh;
				2320	} while (buffers_to_free);
				2321	}
				2322	}
				2323
				2324	#endif /* NTFS_RW */