Blame - fs/ntfs/aops.c - kernel/msm-4.9

blob: a53212793809c9146d508cb6be849a71ab32173e [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/**
				2	* aops.c - NTFS kernel address space operations and page cache handling.
				3	* Part of the Linux-NTFS project.
				4	*
				5	* Copyright (c) 2001-2004 Anton Altaparmakov
				6	* Copyright (c) 2002 Richard Russon
				7	*
				8	* This program/include file is free software; you can redistribute it and/or
				9	* modify it under the terms of the GNU General Public License as published
				10	* by the Free Software Foundation; either version 2 of the License, or
				11	* (at your option) any later version.
				12	*
				13	* This program/include file is distributed in the hope that it will be
				14	* useful, but WITHOUT ANY WARRANTY; without even the implied warranty
				15	* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	* GNU General Public License for more details.
				17	*
				18	* You should have received a copy of the GNU General Public License
				19	* along with this program (in the main directory of the Linux-NTFS
				20	* distribution in the file COPYING); if not, write to the Free Software
				21	* Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				22	*/
				23
				24	#include <linux/errno.h>
				25	#include <linux/mm.h>
				26	#include <linux/pagemap.h>
				27	#include <linux/swap.h>
				28	#include <linux/buffer_head.h>
				29	#include <linux/writeback.h>
				30
				31	#include "aops.h"
				32	#include "attrib.h"
				33	#include "debug.h"
				34	#include "inode.h"
				35	#include "mft.h"
				36	#include "runlist.h"
				37	#include "types.h"
				38	#include "ntfs.h"
				39
				40	/**
				41	* ntfs_end_buffer_async_read - async io completion for reading attributes
				42	* @bh: buffer head on which io is completed
				43	* @uptodate: whether @bh is now uptodate or not
				44	*
				45	* Asynchronous I/O completion handler for reading pages belonging to the
				46	* attribute address space of an inode. The inodes can either be files or
				47	* directories or they can be fake inodes describing some attribute.
				48	*
				49	* If NInoMstProtected(), perform the post read mst fixups when all IO on the
				50	* page has been completed and mark the page uptodate or set the error bit on
				51	* the page. To determine the size of the records that need fixing up, we
				52	* cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
				53	* record size, and index_block_size_bits, to the log(base 2) of the ntfs
				54	* record size.
				55	*/
				56	static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
				57	{
				58	static DEFINE_SPINLOCK(page_uptodate_lock);
				59	unsigned long flags;
				60	struct buffer_head *tmp;
				61	struct page *page;
				62	ntfs_inode *ni;
				63	int page_uptodate = 1;
				64
				65	page = bh->b_page;
				66	ni = NTFS_I(page->mapping->host);
				67
				68	if (likely(uptodate)) {
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	69	s64 file_ofs, initialized_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	70
				71	set_buffer_uptodate(bh);
				72
				73	file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
				74	bh_offset(bh);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	75	read_lock_irqsave(&ni->size_lock, flags);
				76	initialized_size = ni->initialized_size;
				77	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	78	/* Check for the current buffer head overflowing. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	79	if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80	char *addr;
				81	int ofs = 0;
				82
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	83	if (file_ofs < initialized_size)
				84	ofs = initialized_size - file_ofs;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	85	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				86	memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
				87	flush_dcache_page(page);
				88	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				89	}
				90	} else {
				91	clear_buffer_uptodate(bh);
				92	ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
				93	(unsigned long long)bh->b_blocknr);
				94	SetPageError(page);
				95	}
				96	spin_lock_irqsave(&page_uptodate_lock, flags);
				97	clear_buffer_async_read(bh);
				98	unlock_buffer(bh);
				99	tmp = bh;
				100	do {
				101	if (!buffer_uptodate(tmp))
				102	page_uptodate = 0;
				103	if (buffer_async_read(tmp)) {
				104	if (likely(buffer_locked(tmp)))
				105	goto still_busy;
				106	/* Async buffers must be locked. */
				107	BUG();
				108	}
				109	tmp = tmp->b_this_page;
				110	} while (tmp != bh);
				111	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				112	/*
				113	* If none of the buffers had errors then we can set the page uptodate,
				114	* but we first have to perform the post read mst fixups, if the
				115	* attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
				116	* Note we ignore fixup errors as those are detected when
				117	* map_mft_record() is called which gives us per record granularity
				118	* rather than per page granularity.
				119	*/
				120	if (!NInoMstProtected(ni)) {
				121	if (likely(page_uptodate && !PageError(page)))
				122	SetPageUptodate(page);
				123	} else {
				124	char *addr;
				125	unsigned int i, recs;
				126	u32 rec_size;
				127
				128	rec_size = ni->itype.index.block_size;
				129	recs = PAGE_CACHE_SIZE / rec_size;
				130	/* Should have been verified before we got here... */
				131	BUG_ON(!recs);
				132	addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
				133	for (i = 0; i < recs; i++)
				134	post_read_mst_fixup((NTFS_RECORD*)(addr +
				135	i * rec_size), rec_size);
				136	flush_dcache_page(page);
				137	kunmap_atomic(addr, KM_BIO_SRC_IRQ);
				138	if (likely(!PageError(page) && page_uptodate))
				139	SetPageUptodate(page);
				140	}
				141	unlock_page(page);
				142	return;
				143	still_busy:
				144	spin_unlock_irqrestore(&page_uptodate_lock, flags);
				145	return;
				146	}
				147
				148	/**
				149	* ntfs_read_block - fill a @page of an address space with data
				150	* @page: page cache page to fill with data
				151	*
				152	* Fill the page @page of the address space belonging to the @page->host inode.
				153	* We read each buffer asynchronously and when all buffers are read in, our io
				154	* completion handler ntfs_end_buffer_read_async(), if required, automatically
				155	* applies the mst fixups to the page before finally marking it uptodate and
				156	* unlocking it.
				157	*
				158	* We only enforce allocated_size limit because i_size is checked for in
				159	* generic_file_read().
				160	*
				161	* Return 0 on success and -errno on error.
				162	*
				163	* Contains an adapted version of fs/buffer.c::block_read_full_page().
				164	*/
				165	static int ntfs_read_block(struct page *page)
				166	{
				167	VCN vcn;
				168	LCN lcn;
				169	ntfs_inode *ni;
				170	ntfs_volume *vol;
				171	runlist_element *rl;
				172	struct buffer_head bh, head, *arr[MAX_BUF_PER_PAGE];
				173	sector_t iblock, lblock, zblock;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	174	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	175	unsigned int blocksize, vcn_ofs;
				176	int i, nr;
				177	unsigned char blocksize_bits;
				178
				179	ni = NTFS_I(page->mapping->host);
				180	vol = ni->vol;
				181
				182	/* $MFT/$DATA must have its complete runlist in memory at all times. */
				183	BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
				184
				185	blocksize_bits = VFS_I(ni)->i_blkbits;
				186	blocksize = 1 << blocksize_bits;
				187
				188	if (!page_has_buffers(page))
				189	create_empty_buffers(page, blocksize, 0);
				190	bh = head = page_buffers(page);
				191	if (unlikely(!bh)) {
				192	unlock_page(page);
				193	return -ENOMEM;
				194	}
				195
				196	iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	197	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
				199	zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	200	read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	201
				202	/* Loop through all the buffers in the page. */
				203	rl = NULL;
				204	nr = i = 0;
				205	do {
				206	u8 *kaddr;
				207
				208	if (unlikely(buffer_uptodate(bh)))
				209	continue;
				210	if (unlikely(buffer_mapped(bh))) {
				211	arr[nr++] = bh;
				212	continue;
				213	}
				214	bh->b_bdev = vol->sb->s_bdev;
				215	/* Is the block within the allowed limits? */
				216	if (iblock < lblock) {
				217	BOOL is_retry = FALSE;
				218
				219	/* Convert iblock into corresponding vcn and offset. */
				220	vcn = (VCN)iblock << blocksize_bits >>
				221	vol->cluster_size_bits;
				222	vcn_ofs = ((VCN)iblock << blocksize_bits) &
				223	vol->cluster_size_mask;
				224	if (!rl) {
				225	lock_retry_remap:
				226	down_read(&ni->runlist.lock);
				227	rl = ni->runlist.rl;
				228	}
				229	if (likely(rl != NULL)) {
				230	/* Seek to element containing target vcn. */
				231	while (rl->length && rl[1].vcn <= vcn)
				232	rl++;
				233	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				234	} else
				235	lcn = LCN_RL_NOT_MAPPED;
				236	/* Successful remap. */
				237	if (lcn >= 0) {
				238	/* Setup buffer head to correct block. */
				239	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				240	+ vcn_ofs) >> blocksize_bits;
				241	set_buffer_mapped(bh);
				242	/* Only read initialized data blocks. */
				243	if (iblock < zblock) {
				244	arr[nr++] = bh;
				245	continue;
				246	}
				247	/* Fully non-initialized data block, zero it. */
				248	goto handle_zblock;
				249	}
				250	/* It is a hole, need to zero it. */
				251	if (lcn == LCN_HOLE)
				252	goto handle_hole;
				253	/* If first try and runlist unmapped, map and retry. */
				254	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				255	int err;
				256	is_retry = TRUE;
				257	/*
				258	* Attempt to map runlist, dropping lock for
				259	* the duration.
				260	*/
				261	up_read(&ni->runlist.lock);
				262	err = ntfs_map_runlist(ni, vcn);
				263	if (likely(!err))
				264	goto lock_retry_remap;
				265	rl = NULL;
				266	lcn = err;
				267	}
				268	/* Hard error, zero out region. */
				269	bh->b_blocknr = -1;
				270	SetPageError(page);
				271	ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
				272	"attribute type 0x%x, vcn 0x%llx, "
				273	"offset 0x%x because its location on "
				274	"disk could not be determined%s "
				275	"(error code %lli).", ni->mft_no,
				276	ni->type, (unsigned long long)vcn,
				277	vcn_ofs, is_retry ? " even after "
				278	"retrying" : "", (long long)lcn);
				279	}
				280	/*
				281	* Either iblock was outside lblock limits or
				282	* ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
				283	* of the page and set the buffer uptodate.
				284	*/
				285	handle_hole:
				286	bh->b_blocknr = -1UL;
				287	clear_buffer_mapped(bh);
				288	handle_zblock:
				289	kaddr = kmap_atomic(page, KM_USER0);
				290	memset(kaddr + i * blocksize, 0, blocksize);
				291	flush_dcache_page(page);
				292	kunmap_atomic(kaddr, KM_USER0);
				293	set_buffer_uptodate(bh);
				294	} while (i++, iblock++, (bh = bh->b_this_page) != head);
				295
				296	/* Release the lock if we took it. */
				297	if (rl)
				298	up_read(&ni->runlist.lock);
				299
				300	/* Check we have at least one buffer ready for i/o. */
				301	if (nr) {
				302	struct buffer_head *tbh;
				303
				304	/* Lock the buffers. */
				305	for (i = 0; i < nr; i++) {
				306	tbh = arr[i];
				307	lock_buffer(tbh);
				308	tbh->b_end_io = ntfs_end_buffer_async_read;
				309	set_buffer_async_read(tbh);
				310	}
				311	/* Finally, start i/o on the buffers. */
				312	for (i = 0; i < nr; i++) {
				313	tbh = arr[i];
				314	if (likely(!buffer_uptodate(tbh)))
				315	submit_bh(READ, tbh);
				316	else
				317	ntfs_end_buffer_async_read(tbh, 1);
				318	}
				319	return 0;
				320	}
				321	/* No i/o was scheduled on any of the buffers. */
				322	if (likely(!PageError(page)))
				323	SetPageUptodate(page);
				324	else /* Signal synchronous i/o error. */
				325	nr = -EIO;
				326	unlock_page(page);
				327	return nr;
				328	}
				329
				330	/**
				331	* ntfs_readpage - fill a @page of a @file with data from the device
				332	* @file: open file to which the page @page belongs or NULL
				333	* @page: page cache page to fill with data
				334	*
				335	* For non-resident attributes, ntfs_readpage() fills the @page of the open
				336	* file @file by calling the ntfs version of the generic block_read_full_page()
				337	* function, ntfs_read_block(), which in turn creates and reads in the buffers
				338	* associated with the page asynchronously.
				339	*
				340	* For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
				341	* data from the mft record (which at this stage is most likely in memory) and
				342	* fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
				343	* even if the mft record is not cached at this point in time, we need to wait
				344	* for it to be read in before we can do the copy.
				345	*
				346	* Return 0 on success and -errno on error.
				347	*/
				348	static int ntfs_readpage(struct file file, struct page page)
				349	{
				350	loff_t i_size;
				351	ntfs_inode ni, base_ni;
				352	u8 *kaddr;
				353	ntfs_attr_search_ctx *ctx;
				354	MFT_RECORD *mrec;
				355	u32 attr_len;
				356	int err = 0;
				357
				358	BUG_ON(!PageLocked(page));
				359	/*
				360	* This can potentially happen because we clear PageUptodate() during
				361	* ntfs_writepage() of MstProtected() attributes.
				362	*/
				363	if (PageUptodate(page)) {
				364	unlock_page(page);
				365	return 0;
				366	}
				367	ni = NTFS_I(page->mapping->host);
				368
				369	/* NInoNonResident() == NInoIndexAllocPresent() */
				370	if (NInoNonResident(ni)) {
				371	/*
				372	* Only unnamed $DATA attributes can be compressed or
				373	* encrypted.
				374	*/
				375	if (ni->type == AT_DATA && !ni->name_len) {
				376	/* If file is encrypted, deny access, just like NT4. */
				377	if (NInoEncrypted(ni)) {
				378	err = -EACCES;
				379	goto err_out;
				380	}
				381	/* Compressed data streams are handled in compress.c. */
				382	if (NInoCompressed(ni))
				383	return ntfs_read_compressed_block(page);
				384	}
				385	/* Normal data stream. */
				386	return ntfs_read_block(page);
				387	}
				388	/*
				389	* Attribute is resident, implying it is not compressed or encrypted.
				390	* This also means the attribute is smaller than an mft record and
				391	* hence smaller than a page, so can simply zero out any pages with
				392	* index above 0. We can also do this if the file size is 0.
				393	*/
				394	if (unlikely(page->index > 0 \|\| !i_size_read(VFS_I(ni)))) {
				395	kaddr = kmap_atomic(page, KM_USER0);
				396	memset(kaddr, 0, PAGE_CACHE_SIZE);
				397	flush_dcache_page(page);
				398	kunmap_atomic(kaddr, KM_USER0);
				399	goto done;
				400	}
				401	if (!NInoAttr(ni))
				402	base_ni = ni;
				403	else
				404	base_ni = ni->ext.base_ntfs_ino;
				405	/* Map, pin, and lock the mft record. */
				406	mrec = map_mft_record(base_ni);
				407	if (IS_ERR(mrec)) {
				408	err = PTR_ERR(mrec);
				409	goto err_out;
				410	}
				411	ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
				412	if (unlikely(!ctx)) {
				413	err = -ENOMEM;
				414	goto unm_err_out;
				415	}
				416	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				417	CASE_SENSITIVE, 0, NULL, 0, ctx);
				418	if (unlikely(err))
				419	goto put_unm_err_out;
				420	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
				421	i_size = i_size_read(VFS_I(ni));
				422	if (unlikely(attr_len > i_size))
				423	attr_len = i_size;
				424	kaddr = kmap_atomic(page, KM_USER0);
				425	/* Copy the data to the page. */
				426	memcpy(kaddr, (u8*)ctx->attr +
				427	le16_to_cpu(ctx->attr->data.resident.value_offset),
				428	attr_len);
				429	/* Zero the remainder of the page. */
				430	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				431	flush_dcache_page(page);
				432	kunmap_atomic(kaddr, KM_USER0);
				433	put_unm_err_out:
				434	ntfs_attr_put_search_ctx(ctx);
				435	unm_err_out:
				436	unmap_mft_record(base_ni);
				437	done:
				438	SetPageUptodate(page);
				439	err_out:
				440	unlock_page(page);
				441	return err;
				442	}
				443
				444	#ifdef NTFS_RW
				445
				446	/**
				447	* ntfs_write_block - write a @page to the backing store
				448	* @page: page cache page to write out
				449	* @wbc: writeback control structure
				450	*
				451	* This function is for writing pages belonging to non-resident, non-mst
				452	* protected attributes to their backing store.
				453	*
				454	* For a page with buffers, map and write the dirty buffers asynchronously
				455	* under page writeback. For a page without buffers, create buffers for the
				456	* page, then proceed as above.
				457	*
				458	* If a page doesn't have buffers the page dirty state is definitive. If a page
				459	* does have buffers, the page dirty state is just a hint, and the buffer dirty
				460	* state is definitive. (A hint which has rules: dirty buffers against a clean
				461	* page is illegal. Other combinations are legal and need to be handled. In
				462	* particular a dirty page containing clean buffers for example.)
				463	*
				464	* Return 0 on success and -errno on error.
				465	*
				466	* Based on ntfs_read_block() and __block_write_full_page().
				467	*/
				468	static int ntfs_write_block(struct page page, struct writeback_control wbc)
				469	{
				470	VCN vcn;
				471	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	472	s64 initialized_size;
				473	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	474	sector_t block, dblock, iblock;
				475	struct inode *vi;
				476	ntfs_inode *ni;
				477	ntfs_volume *vol;
				478	runlist_element *rl;
				479	struct buffer_head bh, head;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	480	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	481	unsigned int blocksize, vcn_ofs;
				482	int err;
				483	BOOL need_end_writeback;
				484	unsigned char blocksize_bits;
				485
				486	vi = page->mapping->host;
				487	ni = NTFS_I(vi);
				488	vol = ni->vol;
				489
				490	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				491	"0x%lx.", ni->mft_no, ni->type, page->index);
				492
				493	BUG_ON(!NInoNonResident(ni));
				494	BUG_ON(NInoMstProtected(ni));
				495
				496	blocksize_bits = vi->i_blkbits;
				497	blocksize = 1 << blocksize_bits;
				498
				499	if (!page_has_buffers(page)) {
				500	BUG_ON(!PageUptodate(page));
				501	create_empty_buffers(page, blocksize,
				502	(1 << BH_Uptodate) \| (1 << BH_Dirty));
				503	}
				504	bh = head = page_buffers(page);
				505	if (unlikely(!bh)) {
				506	ntfs_warning(vol->sb, "Error allocating page buffers. "
				507	"Redirtying page so we try again later.");
				508	/*
				509	* Put the page back on mapping->dirty_pages, but leave its
				510	* buffer's dirty state as-is.
				511	*/
				512	redirty_page_for_writepage(wbc, page);
				513	unlock_page(page);
				514	return 0;
				515	}
				516
				517	/* NOTE: Different naming scheme to ntfs_read_block()! */
				518
				519	/* The first block in the page. */
				520	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				521
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	522	read_lock_irqsave(&ni->size_lock, flags);
				523	i_size = i_size_read(vi);
				524	initialized_size = ni->initialized_size;
				525	read_unlock_irqrestore(&ni->size_lock, flags);
				526
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	527	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	528	dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	529
				530	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	531	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	532
				533	/*
				534	* Be very careful. We have no exclusion from __set_page_dirty_buffers
				535	* here, and the (potentially unmapped) buffers may become dirty at
				536	* any time. If a buffer becomes dirty here after we've inspected it
				537	* then we just miss that fact, and the page stays dirty.
				538	*
				539	* Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
				540	* handle that here by just cleaning them.
				541	*/
				542
				543	/*
				544	* Loop through all the buffers in the page, mapping all the dirty
				545	* buffers to disk addresses and handling any aliases from the
				546	* underlying block device's mapping.
				547	*/
				548	rl = NULL;
				549	err = 0;
				550	do {
				551	BOOL is_retry = FALSE;
				552
				553	if (unlikely(block >= dblock)) {
				554	/*
				555	* Mapped buffers outside i_size will occur, because
				556	* this page can be outside i_size when there is a
				557	* truncate in progress. The contents of such buffers
				558	* were zeroed by ntfs_writepage().
				559	*
				560	* FIXME: What about the small race window where
				561	* ntfs_writepage() has not done any clearing because
				562	* the page was within i_size but before we get here,
				563	* vmtruncate() modifies i_size?
				564	*/
				565	clear_buffer_dirty(bh);
				566	set_buffer_uptodate(bh);
				567	continue;
				568	}
				569
				570	/* Clean buffers are not written out, so no need to map them. */
				571	if (!buffer_dirty(bh))
				572	continue;
				573
				574	/* Make sure we have enough initialized size. */
				575	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	576	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	577	/*
				578	* If this page is fully outside initialized size, zero
				579	* out all pages between the current initialized size
				580	* and the current page. Just use ntfs_readpage() to do
				581	* the zeroing transparently.
				582	*/
				583	if (block > iblock) {
				584	// TODO:
				585	// For each page do:
				586	// - read_cache_page()
				587	// Again for each page do:
				588	// - wait_on_page_locked()
				589	// - Check (PageUptodate(page) &&
				590	// !PageError(page))
				591	// Update initialized size in the attribute and
				592	// in the inode.
				593	// Again, for each page do:
				594	// __set_page_dirty_buffers();
				595	// page_cache_release()
				596	// We don't need to wait on the writes.
				597	// Update iblock.
				598	}
				599	/*
				600	* The current page straddles initialized size. Zero
				601	* all non-uptodate buffers and set them uptodate (and
				602	* dirty?). Note, there aren't any non-uptodate buffers
				603	* if the page is uptodate.
				604	* FIXME: For an uptodate page, the buffers may need to
				605	* be written out because they were not initialized on
				606	* disk before.
				607	*/
				608	if (!PageUptodate(page)) {
				609	// TODO:
				610	// Zero any non-uptodate buffers up to i_size.
				611	// Set them uptodate and dirty.
				612	}
				613	// TODO:
				614	// Update initialized size in the attribute and in the
				615	// inode (up to i_size).
				616	// Update iblock.
				617	// FIXME: This is inefficient. Try to batch the two
				618	// size changes to happen in one go.
				619	ntfs_error(vol->sb, "Writing beyond initialized size "
				620	"is not supported yet. Sorry.");
				621	err = -EOPNOTSUPP;
				622	break;
				623	// Do NOT set_buffer_new() BUT DO clear buffer range
				624	// outside write request range.
				625	// set_buffer_uptodate() on complete buffers as well as
				626	// set_buffer_dirty().
				627	}
				628
				629	/* No need to map buffers that are already mapped. */
				630	if (buffer_mapped(bh))
				631	continue;
				632
				633	/* Unmapped, dirty buffer. Need to map it. */
				634	bh->b_bdev = vol->sb->s_bdev;
				635
				636	/* Convert block into corresponding vcn and offset. */
				637	vcn = (VCN)block << blocksize_bits;
				638	vcn_ofs = vcn & vol->cluster_size_mask;
				639	vcn >>= vol->cluster_size_bits;
				640	if (!rl) {
				641	lock_retry_remap:
				642	down_read(&ni->runlist.lock);
				643	rl = ni->runlist.rl;
				644	}
				645	if (likely(rl != NULL)) {
				646	/* Seek to element containing target vcn. */
				647	while (rl->length && rl[1].vcn <= vcn)
				648	rl++;
				649	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				650	} else
				651	lcn = LCN_RL_NOT_MAPPED;
				652	/* Successful remap. */
				653	if (lcn >= 0) {
				654	/* Setup buffer head to point to correct block. */
				655	bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
				656	vcn_ofs) >> blocksize_bits;
				657	set_buffer_mapped(bh);
				658	continue;
				659	}
				660	/* It is a hole, need to instantiate it. */
				661	if (lcn == LCN_HOLE) {
				662	// TODO: Instantiate the hole.
				663	// clear_buffer_new(bh);
				664	// unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
				665	ntfs_error(vol->sb, "Writing into sparse regions is "
				666	"not supported yet. Sorry.");
				667	err = -EOPNOTSUPP;
				668	break;
				669	}
				670	/* If first try and runlist unmapped, map and retry. */
				671	if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
				672	is_retry = TRUE;
				673	/*
				674	* Attempt to map runlist, dropping lock for
				675	* the duration.
				676	*/
				677	up_read(&ni->runlist.lock);
				678	err = ntfs_map_runlist(ni, vcn);
				679	if (likely(!err))
				680	goto lock_retry_remap;
				681	rl = NULL;
				682	lcn = err;
				683	}
				684	/* Failed to map the buffer, even after retrying. */
				685	bh->b_blocknr = -1;
				686	ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
				687	"attribute type 0x%x, vcn 0x%llx, offset 0x%x "
				688	"because its location on disk could not be "
				689	"determined%s (error code %lli).", ni->mft_no,
				690	ni->type, (unsigned long long)vcn,
				691	vcn_ofs, is_retry ? " even after "
				692	"retrying" : "", (long long)lcn);
				693	if (!err)
				694	err = -EIO;
				695	break;
				696	} while (block++, (bh = bh->b_this_page) != head);
				697
				698	/* Release the lock if we took it. */
				699	if (rl)
				700	up_read(&ni->runlist.lock);
				701
				702	/* For the error case, need to reset bh to the beginning. */
				703	bh = head;
				704
				705	/* Just an optimization, so ->readpage() isn't called later. */
				706	if (unlikely(!PageUptodate(page))) {
				707	int uptodate = 1;
				708	do {
				709	if (!buffer_uptodate(bh)) {
				710	uptodate = 0;
				711	bh = head;
				712	break;
				713	}
				714	} while ((bh = bh->b_this_page) != head);
				715	if (uptodate)
				716	SetPageUptodate(page);
				717	}
				718
				719	/* Setup all mapped, dirty buffers for async write i/o. */
				720	do {
				721	get_bh(bh);
				722	if (buffer_mapped(bh) && buffer_dirty(bh)) {
				723	lock_buffer(bh);
				724	if (test_clear_buffer_dirty(bh)) {
				725	BUG_ON(!buffer_uptodate(bh));
				726	mark_buffer_async_write(bh);
				727	} else
				728	unlock_buffer(bh);
				729	} else if (unlikely(err)) {
				730	/*
				731	* For the error case. The buffer may have been set
				732	* dirty during attachment to a dirty page.
				733	*/
				734	if (err != -ENOMEM)
				735	clear_buffer_dirty(bh);
				736	}
				737	} while ((bh = bh->b_this_page) != head);
				738
				739	if (unlikely(err)) {
				740	// TODO: Remove the -EOPNOTSUPP check later on...
				741	if (unlikely(err == -EOPNOTSUPP))
				742	err = 0;
				743	else if (err == -ENOMEM) {
				744	ntfs_warning(vol->sb, "Error allocating memory. "
				745	"Redirtying page so we try again "
				746	"later.");
				747	/*
				748	* Put the page back on mapping->dirty_pages, but
				749	* leave its buffer's dirty state as-is.
				750	*/
				751	redirty_page_for_writepage(wbc, page);
				752	err = 0;
				753	} else
				754	SetPageError(page);
				755	}
				756
				757	BUG_ON(PageWriteback(page));
				758	set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
				759	unlock_page(page);
				760
				761	/*
				762	* Submit the prepared buffers for i/o. Note the page is unlocked,
				763	* and the async write i/o completion handler can end_page_writeback()
				764	* at any time after the first submit_bh(). So the buffers can then
				765	* disappear...
				766	*/
				767	need_end_writeback = TRUE;
				768	do {
				769	struct buffer_head *next = bh->b_this_page;
				770	if (buffer_async_write(bh)) {
				771	submit_bh(WRITE, bh);
				772	need_end_writeback = FALSE;
				773	}
				774	put_bh(bh);
				775	bh = next;
				776	} while (bh != head);
				777
				778	/* If no i/o was started, need to end_page_writeback(). */
				779	if (unlikely(need_end_writeback))
				780	end_page_writeback(page);
				781
				782	ntfs_debug("Done.");
				783	return err;
				784	}
				785
				786	/**
				787	* ntfs_write_mst_block - write a @page to the backing store
				788	* @page: page cache page to write out
				789	* @wbc: writeback control structure
				790	*
				791	* This function is for writing pages belonging to non-resident, mst protected
				792	* attributes to their backing store. The only supported attributes are index
				793	* allocation and $MFT/$DATA. Both directory inodes and index inodes are
				794	* supported for the index allocation case.
				795	*
				796	* The page must remain locked for the duration of the write because we apply
				797	* the mst fixups, write, and then undo the fixups, so if we were to unlock the
				798	* page before undoing the fixups, any other user of the page will see the
				799	* page contents as corrupt.
				800	*
				801	* We clear the page uptodate flag for the duration of the function to ensure
				802	* exclusion for the $MFT/$DATA case against someone mapping an mft record we
				803	* are about to apply the mst fixups to.
				804	*
				805	* Return 0 on success and -errno on error.
				806	*
				807	* Based on ntfs_write_block(), ntfs_mft_writepage(), and
				808	* write_mft_record_nolock().
				809	*/
				810	static int ntfs_write_mst_block(struct page *page,
				811	struct writeback_control *wbc)
				812	{
				813	sector_t block, dblock, rec_block;
				814	struct inode *vi = page->mapping->host;
				815	ntfs_inode *ni = NTFS_I(vi);
				816	ntfs_volume *vol = ni->vol;
				817	u8 *kaddr;
				818	unsigned char bh_size_bits = vi->i_blkbits;
				819	unsigned int bh_size = 1 << bh_size_bits;
				820	unsigned int rec_size = ni->itype.index.block_size;
				821	ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
				822	struct buffer_head bh, head, tbh, rec_start_bh;
				823	int max_bhs = PAGE_CACHE_SIZE / bh_size;
				824	struct buffer_head *bhs[max_bhs];
				825	runlist_element *rl;
				826	int i, nr_locked_nis, nr_recs, nr_bhs, bhs_per_rec, err, err2;
				827	unsigned rec_size_bits;
				828	BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
				829
				830	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				831	"0x%lx.", vi->i_ino, ni->type, page->index);
				832	BUG_ON(!NInoNonResident(ni));
				833	BUG_ON(!NInoMstProtected(ni));
				834	is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
				835	/*
				836	* NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
				837	* in its page cache were to be marked dirty. However this should
				838	* never happen with the current driver and considering we do not
				839	* handle this case here we do want to BUG(), at least for now.
				840	*/
				841	BUG_ON(!(is_mft \|\| S_ISDIR(vi->i_mode) \|\|
				842	(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
				843	BUG_ON(!max_bhs);
				844
				845	/* Were we called for sync purposes? */
				846	sync = (wbc->sync_mode == WB_SYNC_ALL);
				847
				848	/* Make sure we have mapped buffers. */
				849	BUG_ON(!page_has_buffers(page));
				850	bh = head = page_buffers(page);
				851	BUG_ON(!bh);
				852
				853	rec_size_bits = ni->itype.index.block_size_bits;
				854	BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
				855	bhs_per_rec = rec_size >> bh_size_bits;
				856	BUG_ON(!bhs_per_rec);
				857
				858	/* The first block in the page. */
				859	rec_block = block = (sector_t)page->index <<
				860	(PAGE_CACHE_SHIFT - bh_size_bits);
				861
				862	/* The first out of bounds block for the data size. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	863	dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	864
				865	rl = NULL;
				866	err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
				867	page_is_dirty = rec_is_dirty = FALSE;
				868	rec_start_bh = NULL;
				869	do {
				870	BOOL is_retry = FALSE;
				871
				872	if (likely(block < rec_block)) {
				873	if (unlikely(block >= dblock)) {
				874	clear_buffer_dirty(bh);
				875	continue;
				876	}
				877	/*
				878	* This block is not the first one in the record. We
				879	* ignore the buffer's dirty state because we could
				880	* have raced with a parallel mark_ntfs_record_dirty().
				881	*/
				882	if (!rec_is_dirty)
				883	continue;
				884	if (unlikely(err2)) {
				885	if (err2 != -ENOMEM)
				886	clear_buffer_dirty(bh);
				887	continue;
				888	}
				889	} else /* if (block == rec_block) */ {
				890	BUG_ON(block > rec_block);
				891	/* This block is the first one in the record. */
				892	rec_block += bhs_per_rec;
				893	err2 = 0;
				894	if (unlikely(block >= dblock)) {
				895	clear_buffer_dirty(bh);
				896	continue;
				897	}
				898	if (!buffer_dirty(bh)) {
				899	/* Clean records are not written out. */
				900	rec_is_dirty = FALSE;
				901	continue;
				902	}
				903	rec_is_dirty = TRUE;
				904	rec_start_bh = bh;
				905	}
				906	/* Need to map the buffer if it is not mapped already. */
				907	if (unlikely(!buffer_mapped(bh))) {
				908	VCN vcn;
				909	LCN lcn;
				910	unsigned int vcn_ofs;
				911
				912	/* Obtain the vcn and offset of the current block. */
				913	vcn = (VCN)block << bh_size_bits;
				914	vcn_ofs = vcn & vol->cluster_size_mask;
				915	vcn >>= vol->cluster_size_bits;
				916	if (!rl) {
				917	lock_retry_remap:
				918	down_read(&ni->runlist.lock);
				919	rl = ni->runlist.rl;
				920	}
				921	if (likely(rl != NULL)) {
				922	/* Seek to element containing target vcn. */
				923	while (rl->length && rl[1].vcn <= vcn)
				924	rl++;
				925	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				926	} else
				927	lcn = LCN_RL_NOT_MAPPED;
				928	/* Successful remap. */
				929	if (likely(lcn >= 0)) {
				930	/* Setup buffer head to correct block. */
				931	bh->b_blocknr = ((lcn <<
				932	vol->cluster_size_bits) +
				933	vcn_ofs) >> bh_size_bits;
				934	set_buffer_mapped(bh);
				935	} else {
				936	/*
				937	* Remap failed. Retry to map the runlist once
				938	* unless we are working on $MFT which always
				939	* has the whole of its runlist in memory.
				940	*/
				941	if (!is_mft && !is_retry &&
				942	lcn == LCN_RL_NOT_MAPPED) {
				943	is_retry = TRUE;
				944	/*
				945	* Attempt to map runlist, dropping
				946	* lock for the duration.
				947	*/
				948	up_read(&ni->runlist.lock);
				949	err2 = ntfs_map_runlist(ni, vcn);
				950	if (likely(!err2))
				951	goto lock_retry_remap;
				952	if (err2 == -ENOMEM)
				953	page_is_dirty = TRUE;
				954	lcn = err2;
				955	} else
				956	err2 = -EIO;
				957	/* Hard error. Abort writing this record. */
				958	if (!err \|\| err == -ENOMEM)
				959	err = err2;
				960	bh->b_blocknr = -1;
				961	ntfs_error(vol->sb, "Cannot write ntfs record "
				962	"0x%llx (inode 0x%lx, "
				963	"attribute type 0x%x) because "
				964	"its location on disk could "
				965	"not be determined (error "
				966	"code %lli).", (s64)block <<
				967	bh_size_bits >>
				968	vol->mft_record_size_bits,
				969	ni->mft_no, ni->type,
				970	(long long)lcn);
				971	/*
				972	* If this is not the first buffer, remove the
				973	* buffers in this record from the list of
				974	* buffers to write and clear their dirty bit
				975	* if not error -ENOMEM.
				976	*/
				977	if (rec_start_bh != bh) {
				978	while (bhs[--nr_bhs] != rec_start_bh)
				979	;
				980	if (err2 != -ENOMEM) {
				981	do {
				982	clear_buffer_dirty(
				983	rec_start_bh);
				984	} while ((rec_start_bh =
				985	rec_start_bh->
				986	b_this_page) !=
				987	bh);
				988	}
				989	}
				990	continue;
				991	}
				992	}
				993	BUG_ON(!buffer_uptodate(bh));
				994	BUG_ON(nr_bhs >= max_bhs);
				995	bhs[nr_bhs++] = bh;
				996	} while (block++, (bh = bh->b_this_page) != head);
				997	if (unlikely(rl))
				998	up_read(&ni->runlist.lock);
				999	/* If there were no dirty buffers, we are done. */
				1000	if (!nr_bhs)
				1001	goto done;
				1002	/* Map the page so we can access its contents. */
				1003	kaddr = kmap(page);
				1004	/* Clear the page uptodate flag whilst the mst fixups are applied. */
				1005	BUG_ON(!PageUptodate(page));
				1006	ClearPageUptodate(page);
				1007	for (i = 0; i < nr_bhs; i++) {
				1008	unsigned int ofs;
				1009
				1010	/* Skip buffers which are not at the beginning of records. */
				1011	if (i % bhs_per_rec)
				1012	continue;
				1013	tbh = bhs[i];
				1014	ofs = bh_offset(tbh);
				1015	if (is_mft) {
				1016	ntfs_inode *tni;
				1017	unsigned long mft_no;
				1018
				1019	/* Get the mft record number. */
				1020	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1021	>> rec_size_bits;
				1022	/* Check whether to write this mft record. */
				1023	tni = NULL;
				1024	if (!ntfs_may_write_mft_record(vol, mft_no,
				1025	(MFT_RECORD*)(kaddr + ofs), &tni)) {
				1026	/*
				1027	* The record should not be written. This
				1028	* means we need to redirty the page before
				1029	* returning.
				1030	*/
				1031	page_is_dirty = TRUE;
				1032	/*
				1033	* Remove the buffers in this mft record from
				1034	* the list of buffers to write.
				1035	*/
				1036	do {
				1037	bhs[i] = NULL;
				1038	} while (++i % bhs_per_rec);
				1039	continue;
				1040	}
				1041	/*
				1042	* The record should be written. If a locked ntfs
				1043	* inode was returned, add it to the array of locked
				1044	* ntfs inodes.
				1045	*/
				1046	if (tni)
				1047	locked_nis[nr_locked_nis++] = tni;
				1048	}
				1049	/* Apply the mst protection fixups. */
				1050	err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
				1051	rec_size);
				1052	if (unlikely(err2)) {
				1053	if (!err \|\| err == -ENOMEM)
				1054	err = -EIO;
				1055	ntfs_error(vol->sb, "Failed to apply mst fixups "
				1056	"(inode 0x%lx, attribute type 0x%x, "
				1057	"page index 0x%lx, page offset 0x%x)!"
				1058	" Unmount and run chkdsk.", vi->i_ino,
				1059	ni->type, page->index, ofs);
				1060	/*
				1061	* Mark all the buffers in this record clean as we do
				1062	* not want to write corrupt data to disk.
				1063	*/
				1064	do {
				1065	clear_buffer_dirty(bhs[i]);
				1066	bhs[i] = NULL;
				1067	} while (++i % bhs_per_rec);
				1068	continue;
				1069	}
				1070	nr_recs++;
				1071	}
				1072	/* If no records are to be written out, we are done. */
				1073	if (!nr_recs)
				1074	goto unm_done;
				1075	flush_dcache_page(page);
				1076	/* Lock buffers and start synchronous write i/o on them. */
				1077	for (i = 0; i < nr_bhs; i++) {
				1078	tbh = bhs[i];
				1079	if (!tbh)
				1080	continue;
				1081	if (unlikely(test_set_buffer_locked(tbh)))
				1082	BUG();
				1083	/* The buffer dirty state is now irrelevant, just clean it. */
				1084	clear_buffer_dirty(tbh);
				1085	BUG_ON(!buffer_uptodate(tbh));
				1086	BUG_ON(!buffer_mapped(tbh));
				1087	get_bh(tbh);
				1088	tbh->b_end_io = end_buffer_write_sync;
				1089	submit_bh(WRITE, tbh);
				1090	}
				1091	/* Synchronize the mft mirror now if not @sync. */
				1092	if (is_mft && !sync)
				1093	goto do_mirror;
				1094	do_wait:
				1095	/* Wait on i/o completion of buffers. */
				1096	for (i = 0; i < nr_bhs; i++) {
				1097	tbh = bhs[i];
				1098	if (!tbh)
				1099	continue;
				1100	wait_on_buffer(tbh);
				1101	if (unlikely(!buffer_uptodate(tbh))) {
				1102	ntfs_error(vol->sb, "I/O error while writing ntfs "
				1103	"record buffer (inode 0x%lx, "
				1104	"attribute type 0x%x, page index "
				1105	"0x%lx, page offset 0x%lx)! Unmount "
				1106	"and run chkdsk.", vi->i_ino, ni->type,
				1107	page->index, bh_offset(tbh));
				1108	if (!err \|\| err == -ENOMEM)
				1109	err = -EIO;
				1110	/*
				1111	* Set the buffer uptodate so the page and buffer
				1112	* states do not become out of sync.
				1113	*/
				1114	set_buffer_uptodate(tbh);
				1115	}
				1116	}
				1117	/* If @sync, now synchronize the mft mirror. */
				1118	if (is_mft && sync) {
				1119	do_mirror:
				1120	for (i = 0; i < nr_bhs; i++) {
				1121	unsigned long mft_no;
				1122	unsigned int ofs;
				1123
				1124	/*
				1125	* Skip buffers which are not at the beginning of
				1126	* records.
				1127	*/
				1128	if (i % bhs_per_rec)
				1129	continue;
				1130	tbh = bhs[i];
				1131	/* Skip removed buffers (and hence records). */
				1132	if (!tbh)
				1133	continue;
				1134	ofs = bh_offset(tbh);
				1135	/* Get the mft record number. */
				1136	mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
				1137	>> rec_size_bits;
				1138	if (mft_no < vol->mftmirr_size)
				1139	ntfs_sync_mft_mirror(vol, mft_no,
				1140	(MFT_RECORD*)(kaddr + ofs),
				1141	sync);
				1142	}
				1143	if (!sync)
				1144	goto do_wait;
				1145	}
				1146	/* Remove the mst protection fixups again. */
				1147	for (i = 0; i < nr_bhs; i++) {
				1148	if (!(i % bhs_per_rec)) {
				1149	tbh = bhs[i];
				1150	if (!tbh)
				1151	continue;
				1152	post_write_mst_fixup((NTFS_RECORD*)(kaddr +
				1153	bh_offset(tbh)));
				1154	}
				1155	}
				1156	flush_dcache_page(page);
				1157	unm_done:
				1158	/* Unlock any locked inodes. */
				1159	while (nr_locked_nis-- > 0) {
				1160	ntfs_inode tni, base_tni;
				1161
				1162	tni = locked_nis[nr_locked_nis];
				1163	/* Get the base inode. */
				1164	down(&tni->extent_lock);
				1165	if (tni->nr_extents >= 0)
				1166	base_tni = tni;
				1167	else {
				1168	base_tni = tni->ext.base_ntfs_ino;
				1169	BUG_ON(!base_tni);
				1170	}
				1171	up(&tni->extent_lock);
				1172	ntfs_debug("Unlocking %s inode 0x%lx.",
				1173	tni == base_tni ? "base" : "extent",
				1174	tni->mft_no);
				1175	up(&tni->mrec_lock);
				1176	atomic_dec(&tni->count);
				1177	iput(VFS_I(base_tni));
				1178	}
				1179	SetPageUptodate(page);
				1180	kunmap(page);
				1181	done:
				1182	if (unlikely(err && err != -ENOMEM)) {
				1183	/*
				1184	* Set page error if there is only one ntfs record in the page.
				1185	* Otherwise we would loose per-record granularity.
				1186	*/
				1187	if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
				1188	SetPageError(page);
				1189	NVolSetErrors(vol);
				1190	}
				1191	if (page_is_dirty) {
				1192	ntfs_debug("Page still contains one or more dirty ntfs "
				1193	"records. Redirtying the page starting at "
				1194	"record 0x%lx.", page->index <<
				1195	(PAGE_CACHE_SHIFT - rec_size_bits));
				1196	redirty_page_for_writepage(wbc, page);
				1197	unlock_page(page);
				1198	} else {
				1199	/*
				1200	* Keep the VM happy. This must be done otherwise the
				1201	* radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
				1202	* the page is clean.
				1203	*/
				1204	BUG_ON(PageWriteback(page));
				1205	set_page_writeback(page);
				1206	unlock_page(page);
				1207	end_page_writeback(page);
				1208	}
				1209	if (likely(!err))
				1210	ntfs_debug("Done.");
				1211	return err;
				1212	}
				1213
				1214	/**
				1215	* ntfs_writepage - write a @page to the backing store
				1216	* @page: page cache page to write out
				1217	* @wbc: writeback control structure
				1218	*
				1219	* This is called from the VM when it wants to have a dirty ntfs page cache
				1220	* page cleaned. The VM has already locked the page and marked it clean.
				1221	*
				1222	* For non-resident attributes, ntfs_writepage() writes the @page by calling
				1223	* the ntfs version of the generic block_write_full_page() function,
				1224	* ntfs_write_block(), which in turn if necessary creates and writes the
				1225	* buffers associated with the page asynchronously.
				1226	*
				1227	* For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
				1228	* the data to the mft record (which at this stage is most likely in memory).
				1229	* The mft record is then marked dirty and written out asynchronously via the
				1230	* vfs inode dirty code path for the inode the mft record belongs to or via the
				1231	* vm page dirty code path for the page the mft record is in.
				1232	*
				1233	* Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
				1234	*
				1235	* Return 0 on success and -errno on error.
				1236	*/
				1237	static int ntfs_writepage(struct page page, struct writeback_control wbc)
				1238	{
				1239	loff_t i_size;
				1240	struct inode *vi;
				1241	ntfs_inode ni, base_ni;
				1242	char *kaddr;
				1243	ntfs_attr_search_ctx *ctx;
				1244	MFT_RECORD *m;
				1245	u32 attr_len;
				1246	int err;
				1247
				1248	BUG_ON(!PageLocked(page));
				1249
				1250	vi = page->mapping->host;
				1251	i_size = i_size_read(vi);
				1252
				1253	/* Is the page fully outside i_size? (truncate in progress) */
				1254	if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
				1255	PAGE_CACHE_SHIFT)) {
				1256	/*
				1257	* The page may have dirty, unmapped buffers. Make them
				1258	* freeable here, so the page does not leak.
				1259	*/
				1260	block_invalidatepage(page, 0);
				1261	unlock_page(page);
				1262	ntfs_debug("Write outside i_size - truncated?");
				1263	return 0;
				1264	}
				1265	ni = NTFS_I(vi);
				1266
				1267	/* NInoNonResident() == NInoIndexAllocPresent() */
				1268	if (NInoNonResident(ni)) {
				1269	/*
				1270	* Only unnamed $DATA attributes can be compressed, encrypted,
				1271	* and/or sparse.
				1272	*/
				1273	if (ni->type == AT_DATA && !ni->name_len) {
				1274	/* If file is encrypted, deny access, just like NT4. */
				1275	if (NInoEncrypted(ni)) {
				1276	unlock_page(page);
				1277	ntfs_debug("Denying write access to encrypted "
				1278	"file.");
				1279	return -EACCES;
				1280	}
				1281	/* Compressed data streams are handled in compress.c. */
				1282	if (NInoCompressed(ni)) {
				1283	// TODO: Implement and replace this check with
				1284	// return ntfs_write_compressed_block(page);
				1285	unlock_page(page);
				1286	ntfs_error(vi->i_sb, "Writing to compressed "
				1287	"files is not supported yet. "
				1288	"Sorry.");
				1289	return -EOPNOTSUPP;
				1290	}
				1291	// TODO: Implement and remove this check.
				1292	if (NInoSparse(ni)) {
				1293	unlock_page(page);
				1294	ntfs_error(vi->i_sb, "Writing to sparse files "
				1295	"is not supported yet. Sorry.");
				1296	return -EOPNOTSUPP;
				1297	}
				1298	}
				1299	/* We have to zero every time due to mmap-at-end-of-file. */
				1300	if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
				1301	/* The page straddles i_size. */
				1302	unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
				1303	kaddr = kmap_atomic(page, KM_USER0);
				1304	memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
				1305	flush_dcache_page(page);
				1306	kunmap_atomic(kaddr, KM_USER0);
				1307	}
				1308	/* Handle mst protected attributes. */
				1309	if (NInoMstProtected(ni))
				1310	return ntfs_write_mst_block(page, wbc);
				1311	/* Normal data stream. */
				1312	return ntfs_write_block(page, wbc);
				1313	}
				1314	/*
				1315	* Attribute is resident, implying it is not compressed, encrypted,
				1316	* sparse, or mst protected. This also means the attribute is smaller
				1317	* than an mft record and hence smaller than a page, so can simply
				1318	* return error on any pages with index above 0.
				1319	*/
				1320	BUG_ON(page_has_buffers(page));
				1321	BUG_ON(!PageUptodate(page));
				1322	if (unlikely(page->index > 0)) {
				1323	ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
				1324	"Aborting write.", page->index);
				1325	BUG_ON(PageWriteback(page));
				1326	set_page_writeback(page);
				1327	unlock_page(page);
				1328	end_page_writeback(page);
				1329	return -EIO;
				1330	}
				1331	if (!NInoAttr(ni))
				1332	base_ni = ni;
				1333	else
				1334	base_ni = ni->ext.base_ntfs_ino;
				1335	/* Map, pin, and lock the mft record. */
				1336	m = map_mft_record(base_ni);
				1337	if (IS_ERR(m)) {
				1338	err = PTR_ERR(m);
				1339	m = NULL;
				1340	ctx = NULL;
				1341	goto err_out;
				1342	}
				1343	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1344	if (unlikely(!ctx)) {
				1345	err = -ENOMEM;
				1346	goto err_out;
				1347	}
				1348	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1349	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1350	if (unlikely(err))
				1351	goto err_out;
				1352	/*
				1353	* Keep the VM happy. This must be done otherwise the radix-tree tag
				1354	* PAGECACHE_TAG_DIRTY remains set even though the page is clean.
				1355	*/
				1356	BUG_ON(PageWriteback(page));
				1357	set_page_writeback(page);
				1358	unlock_page(page);
				1359
				1360	/*
				1361	* Here, we don't need to zero the out of bounds area everytime because
				1362	* the below memcpy() already takes care of the mmap-at-end-of-file
				1363	* requirements. If the file is converted to a non-resident one, then
				1364	* the code path use is switched to the non-resident one where the
				1365	* zeroing happens on each ntfs_writepage() invocation.
				1366	*
				1367	* The above also applies nicely when i_size is decreased.
				1368	*
				1369	* When i_size is increased, the memory between the old and new i_size
				1370	* _must_ be zeroed (or overwritten with new data). Otherwise we will
				1371	* expose data to userspace/disk which should never have been exposed.
				1372	*
				1373	* FIXME: Ensure that i_size increases do the zeroing/overwriting and
				1374	* if we cannot guarantee that, then enable the zeroing below. If the
				1375	* zeroing below is enabled, we MUST move the unlock_page() from above
				1376	* to after the kunmap_atomic(), i.e. just before the
				1377	* end_page_writeback().
				1378	* UPDATE: ntfs_prepare/commit_write() do the zeroing on i_size
				1379	* increases for resident attributes so those are ok.
				1380	* TODO: ntfs_truncate(), others?
				1381	*/
				1382
				1383	attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1384	i_size = i_size_read(vi);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1385	kaddr = kmap_atomic(page, KM_USER0);
				1386	if (unlikely(attr_len > i_size)) {
				1387	/* Zero out of bounds area in the mft record. */
				1388	memset((u8*)ctx->attr + le16_to_cpu(
				1389	ctx->attr->data.resident.value_offset) +
				1390	i_size, 0, attr_len - i_size);
				1391	attr_len = i_size;
				1392	}
				1393	/* Copy the data from the page to the mft record. */
				1394	memcpy((u8*)ctx->attr +
				1395	le16_to_cpu(ctx->attr->data.resident.value_offset),
				1396	kaddr, attr_len);
				1397	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1398	/* Zero out of bounds area in the page cache page. */
				1399	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				1400	flush_dcache_page(page);
				1401	kunmap_atomic(kaddr, KM_USER0);
				1402
				1403	end_page_writeback(page);
				1404
				1405	/* Mark the mft record dirty, so it gets written back. */
				1406	mark_mft_record_dirty(ctx->ntfs_ino);
				1407	ntfs_attr_put_search_ctx(ctx);
				1408	unmap_mft_record(base_ni);
				1409	return 0;
				1410	err_out:
				1411	if (err == -ENOMEM) {
				1412	ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
				1413	"page so we try again later.");
				1414	/*
				1415	* Put the page back on mapping->dirty_pages, but leave its
				1416	* buffers' dirty state as-is.
				1417	*/
				1418	redirty_page_for_writepage(wbc, page);
				1419	err = 0;
				1420	} else {
				1421	ntfs_error(vi->i_sb, "Resident attribute write failed with "
				1422	"error %i. Setting page error flag.", err);
				1423	SetPageError(page);
				1424	}
				1425	unlock_page(page);
				1426	if (ctx)
				1427	ntfs_attr_put_search_ctx(ctx);
				1428	if (m)
				1429	unmap_mft_record(base_ni);
				1430	return err;
				1431	}
				1432
				1433	/**
				1434	* ntfs_prepare_nonresident_write -
				1435	*
				1436	*/
				1437	static int ntfs_prepare_nonresident_write(struct page *page,
				1438	unsigned from, unsigned to)
				1439	{
				1440	VCN vcn;
				1441	LCN lcn;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1442	s64 initialized_size;
				1443	loff_t i_size;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1444	sector_t block, ablock, iblock;
				1445	struct inode *vi;
				1446	ntfs_inode *ni;
				1447	ntfs_volume *vol;
				1448	runlist_element *rl;
				1449	struct buffer_head bh, head, wait[2], *wait_bh = wait;
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1450	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1451	unsigned int vcn_ofs, block_start, block_end, blocksize;
				1452	int err;
				1453	BOOL is_retry;
				1454	unsigned char blocksize_bits;
				1455
				1456	vi = page->mapping->host;
				1457	ni = NTFS_I(vi);
				1458	vol = ni->vol;
				1459
				1460	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1461	"0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
				1462	page->index, from, to);
				1463
				1464	BUG_ON(!NInoNonResident(ni));
				1465
				1466	blocksize_bits = vi->i_blkbits;
				1467	blocksize = 1 << blocksize_bits;
				1468
				1469	/*
				1470	* create_empty_buffers() will create uptodate/dirty buffers if the
				1471	* page is uptodate/dirty.
				1472	*/
				1473	if (!page_has_buffers(page))
				1474	create_empty_buffers(page, blocksize, 0);
				1475	bh = head = page_buffers(page);
				1476	if (unlikely(!bh))
				1477	return -ENOMEM;
				1478
				1479	/* The first block in the page. */
				1480	block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
				1481
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1482	read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1483	/*
				1484	* The first out of bounds block for the allocated size. No need to
				1485	* round up as allocated_size is in multiples of cluster size and the
				1486	* minimum cluster size is 512 bytes, which is equal to the smallest
				1487	* blocksize.
				1488	*/
				1489	ablock = ni->allocated_size >> blocksize_bits;
				1490
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1491	i_size = i_size_read(vi);
				1492	initialized_size = ni->initialized_size;
				1493	read_unlock_irqrestore(&ni->size_lock, flags);
				1494
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1495	/* The last (fully or partially) initialized block. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1496	iblock = initialized_size >> blocksize_bits;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1497
				1498	/* Loop through all the buffers in the page. */
				1499	block_start = 0;
				1500	rl = NULL;
				1501	err = 0;
				1502	do {
				1503	block_end = block_start + blocksize;
				1504	/*
				1505	* If buffer @bh is outside the write, just mark it uptodate
				1506	* if the page is uptodate and continue with the next buffer.
				1507	*/
				1508	if (block_end <= from \|\| block_start >= to) {
				1509	if (PageUptodate(page)) {
				1510	if (!buffer_uptodate(bh))
				1511	set_buffer_uptodate(bh);
				1512	}
				1513	continue;
				1514	}
				1515	/*
				1516	* @bh is at least partially being written to.
				1517	* Make sure it is not marked as new.
				1518	*/
				1519	//if (buffer_new(bh))
				1520	// clear_buffer_new(bh);
				1521
				1522	if (block >= ablock) {
				1523	// TODO: block is above allocated_size, need to
				1524	// allocate it. Best done in one go to accommodate not
				1525	// only block but all above blocks up to and including:
				1526	// ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
				1527	// - 1) >> blobksize_bits. Obviously will need to round
				1528	// up to next cluster boundary, too. This should be
				1529	// done with a helper function, so it can be reused.
				1530	ntfs_error(vol->sb, "Writing beyond allocated size "
				1531	"is not supported yet. Sorry.");
				1532	err = -EOPNOTSUPP;
				1533	goto err_out;
				1534	// Need to update ablock.
				1535	// Need to set_buffer_new() on all block bhs that are
				1536	// newly allocated.
				1537	}
				1538	/*
				1539	* Now we have enough allocated size to fulfill the whole
				1540	* request, i.e. block < ablock is true.
				1541	*/
				1542	if (unlikely((block >= iblock) &&
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1543	(initialized_size < i_size))) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1544	/*
				1545	* If this page is fully outside initialized size, zero
				1546	* out all pages between the current initialized size
				1547	* and the current page. Just use ntfs_readpage() to do
				1548	* the zeroing transparently.
				1549	*/
				1550	if (block > iblock) {
				1551	// TODO:
				1552	// For each page do:
				1553	// - read_cache_page()
				1554	// Again for each page do:
				1555	// - wait_on_page_locked()
				1556	// - Check (PageUptodate(page) &&
				1557	// !PageError(page))
				1558	// Update initialized size in the attribute and
				1559	// in the inode.
				1560	// Again, for each page do:
				1561	// __set_page_dirty_buffers();
				1562	// page_cache_release()
				1563	// We don't need to wait on the writes.
				1564	// Update iblock.
				1565	}
				1566	/*
				1567	* The current page straddles initialized size. Zero
				1568	* all non-uptodate buffers and set them uptodate (and
				1569	* dirty?). Note, there aren't any non-uptodate buffers
				1570	* if the page is uptodate.
				1571	* FIXME: For an uptodate page, the buffers may need to
				1572	* be written out because they were not initialized on
				1573	* disk before.
				1574	*/
				1575	if (!PageUptodate(page)) {
				1576	// TODO:
				1577	// Zero any non-uptodate buffers up to i_size.
				1578	// Set them uptodate and dirty.
				1579	}
				1580	// TODO:
				1581	// Update initialized size in the attribute and in the
				1582	// inode (up to i_size).
				1583	// Update iblock.
				1584	// FIXME: This is inefficient. Try to batch the two
				1585	// size changes to happen in one go.
				1586	ntfs_error(vol->sb, "Writing beyond initialized size "
				1587	"is not supported yet. Sorry.");
				1588	err = -EOPNOTSUPP;
				1589	goto err_out;
				1590	// Do NOT set_buffer_new() BUT DO clear buffer range
				1591	// outside write request range.
				1592	// set_buffer_uptodate() on complete buffers as well as
				1593	// set_buffer_dirty().
				1594	}
				1595
				1596	/* Need to map unmapped buffers. */
				1597	if (!buffer_mapped(bh)) {
				1598	/* Unmapped buffer. Need to map it. */
				1599	bh->b_bdev = vol->sb->s_bdev;
				1600
				1601	/* Convert block into corresponding vcn and offset. */
				1602	vcn = (VCN)block << blocksize_bits >>
				1603	vol->cluster_size_bits;
				1604	vcn_ofs = ((VCN)block << blocksize_bits) &
				1605	vol->cluster_size_mask;
				1606
				1607	is_retry = FALSE;
				1608	if (!rl) {
				1609	lock_retry_remap:
				1610	down_read(&ni->runlist.lock);
				1611	rl = ni->runlist.rl;
				1612	}
				1613	if (likely(rl != NULL)) {
				1614	/* Seek to element containing target vcn. */
				1615	while (rl->length && rl[1].vcn <= vcn)
				1616	rl++;
				1617	lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
				1618	} else
				1619	lcn = LCN_RL_NOT_MAPPED;
				1620	if (unlikely(lcn < 0)) {
				1621	/*
				1622	* We extended the attribute allocation above.
				1623	* If we hit an ENOENT here it means that the
				1624	* allocation was insufficient which is a bug.
				1625	*/
				1626	BUG_ON(lcn == LCN_ENOENT);
				1627
				1628	/* It is a hole, need to instantiate it. */
				1629	if (lcn == LCN_HOLE) {
				1630	// TODO: Instantiate the hole.
				1631	// clear_buffer_new(bh);
				1632	// unmap_underlying_metadata(bh->b_bdev,
				1633	// bh->b_blocknr);
				1634	// For non-uptodate buffers, need to
				1635	// zero out the region outside the
				1636	// request in this bh or all bhs,
				1637	// depending on what we implemented
				1638	// above.
				1639	// Need to flush_dcache_page().
				1640	// Or could use set_buffer_new()
				1641	// instead?
				1642	ntfs_error(vol->sb, "Writing into "
				1643	"sparse regions is "
				1644	"not supported yet. "
				1645	"Sorry.");
				1646	err = -EOPNOTSUPP;
				1647	goto err_out;
				1648	} else if (!is_retry &&
				1649	lcn == LCN_RL_NOT_MAPPED) {
				1650	is_retry = TRUE;
				1651	/*
				1652	* Attempt to map runlist, dropping
				1653	* lock for the duration.
				1654	*/
				1655	up_read(&ni->runlist.lock);
				1656	err = ntfs_map_runlist(ni, vcn);
				1657	if (likely(!err))
				1658	goto lock_retry_remap;
				1659	rl = NULL;
				1660	lcn = err;
				1661	}
				1662	/*
				1663	* Failed to map the buffer, even after
				1664	* retrying.
				1665	*/
				1666	bh->b_blocknr = -1;
				1667	ntfs_error(vol->sb, "Failed to write to inode "
				1668	"0x%lx, attribute type 0x%x, "
				1669	"vcn 0x%llx, offset 0x%x "
				1670	"because its location on disk "
				1671	"could not be determined%s "
				1672	"(error code %lli).",
				1673	ni->mft_no, ni->type,
				1674	(unsigned long long)vcn,
				1675	vcn_ofs, is_retry ? " even "
				1676	"after retrying" : "",
				1677	(long long)lcn);
				1678	if (!err)
				1679	err = -EIO;
				1680	goto err_out;
				1681	}
				1682	/* We now have a successful remap, i.e. lcn >= 0. */
				1683
				1684	/* Setup buffer head to correct block. */
				1685	bh->b_blocknr = ((lcn << vol->cluster_size_bits)
				1686	+ vcn_ofs) >> blocksize_bits;
				1687	set_buffer_mapped(bh);
				1688
				1689	// FIXME: Something analogous to this is needed for
				1690	// each newly allocated block, i.e. BH_New.
				1691	// FIXME: Might need to take this out of the
				1692	// if (!buffer_mapped(bh)) {}, depending on how we
				1693	// implement things during the allocated_size and
				1694	// initialized_size extension code above.
				1695	if (buffer_new(bh)) {
				1696	clear_buffer_new(bh);
				1697	unmap_underlying_metadata(bh->b_bdev,
				1698	bh->b_blocknr);
				1699	if (PageUptodate(page)) {
				1700	set_buffer_uptodate(bh);
				1701	continue;
				1702	}
				1703	/*
				1704	* Page is _not_ uptodate, zero surrounding
				1705	* region. NOTE: This is how we decide if to
				1706	* zero or not!
				1707	*/
				1708	if (block_end > to \|\| block_start < from) {
				1709	void *kaddr;
				1710
				1711	kaddr = kmap_atomic(page, KM_USER0);
				1712	if (block_end > to)
				1713	memset(kaddr + to, 0,
				1714	block_end - to);
				1715	if (block_start < from)
				1716	memset(kaddr + block_start, 0,
				1717	from -
				1718	block_start);
				1719	flush_dcache_page(page);
				1720	kunmap_atomic(kaddr, KM_USER0);
				1721	}
				1722	continue;
				1723	}
				1724	}
				1725	/* @bh is mapped, set it uptodate if the page is uptodate. */
				1726	if (PageUptodate(page)) {
				1727	if (!buffer_uptodate(bh))
				1728	set_buffer_uptodate(bh);
				1729	continue;
				1730	}
				1731	/*
				1732	* The page is not uptodate. The buffer is mapped. If it is not
				1733	* uptodate, and it is only partially being written to, we need
				1734	* to read the buffer in before the write, i.e. right now.
				1735	*/
				1736	if (!buffer_uptodate(bh) &&
				1737	(block_start < from \|\| block_end > to)) {
				1738	ll_rw_block(READ, 1, &bh);
				1739	*wait_bh++ = bh;
				1740	}
				1741	} while (block++, block_start = block_end,
				1742	(bh = bh->b_this_page) != head);
				1743
				1744	/* Release the lock if we took it. */
				1745	if (rl) {
				1746	up_read(&ni->runlist.lock);
				1747	rl = NULL;
				1748	}
				1749
				1750	/* If we issued read requests, let them complete. */
				1751	while (wait_bh > wait) {
				1752	wait_on_buffer(*--wait_bh);
				1753	if (!buffer_uptodate(*wait_bh))
				1754	return -EIO;
				1755	}
				1756
				1757	ntfs_debug("Done.");
				1758	return 0;
				1759	err_out:
				1760	/*
				1761	* Zero out any newly allocated blocks to avoid exposing stale data.
				1762	* If BH_New is set, we know that the block was newly allocated in the
				1763	* above loop.
				1764	* FIXME: What about initialized_size increments? Have we done all the
				1765	* required zeroing above? If not this error handling is broken, and
				1766	* in particular the if (block_end <= from) check is completely bogus.
				1767	*/
				1768	bh = head;
				1769	block_start = 0;
				1770	is_retry = FALSE;
				1771	do {
				1772	block_end = block_start + blocksize;
				1773	if (block_end <= from)
				1774	continue;
				1775	if (block_start >= to)
				1776	break;
				1777	if (buffer_new(bh)) {
				1778	void *kaddr;
				1779
				1780	clear_buffer_new(bh);
				1781	kaddr = kmap_atomic(page, KM_USER0);
				1782	memset(kaddr + block_start, 0, bh->b_size);
				1783	kunmap_atomic(kaddr, KM_USER0);
				1784	set_buffer_uptodate(bh);
				1785	mark_buffer_dirty(bh);
				1786	is_retry = TRUE;
				1787	}
				1788	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				1789	if (is_retry)
				1790	flush_dcache_page(page);
				1791	if (rl)
				1792	up_read(&ni->runlist.lock);
				1793	return err;
				1794	}
				1795
				1796	/**
				1797	* ntfs_prepare_write - prepare a page for receiving data
				1798	*
				1799	* This is called from generic_file_write() with i_sem held on the inode
				1800	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				1801	* data has not yet been copied into the @page.
				1802	*
				1803	* Need to extend the attribute/fill in holes if necessary, create blocks and
				1804	* make partially overwritten blocks uptodate,
				1805	*
				1806	* i_size is not to be modified yet.
				1807	*
				1808	* Return 0 on success or -errno on error.
				1809	*
				1810	* Should be using block_prepare_write() [support for sparse files] or
				1811	* cont_prepare_write() [no support for sparse files]. Cannot do that due to
				1812	* ntfs specifics but can look at them for implementation guidance.
				1813	*
				1814	* Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
				1815	* the first byte in the page that will be written to and @to is the first byte
				1816	* after the last byte that will be written to.
				1817	*/
				1818	static int ntfs_prepare_write(struct file file, struct page page,
				1819	unsigned from, unsigned to)
				1820	{
				1821	s64 new_size;
				1822	struct inode *vi = page->mapping->host;
				1823	ntfs_inode base_ni = NULL, ni = NTFS_I(vi);
				1824	ntfs_volume *vol = ni->vol;
				1825	ntfs_attr_search_ctx *ctx = NULL;
				1826	MFT_RECORD *m = NULL;
				1827	ATTR_RECORD *a;
				1828	u8 *kaddr;
				1829	u32 attr_len;
				1830	int err;
				1831
				1832	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				1833	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				1834	page->index, from, to);
				1835	BUG_ON(!PageLocked(page));
				1836	BUG_ON(from > PAGE_CACHE_SIZE);
				1837	BUG_ON(to > PAGE_CACHE_SIZE);
				1838	BUG_ON(from > to);
				1839	BUG_ON(NInoMstProtected(ni));
				1840	/*
				1841	* If a previous ntfs_truncate() failed, repeat it and abort if it
				1842	* fails again.
				1843	*/
				1844	if (unlikely(NInoTruncateFailed(ni))) {
				1845	down_write(&vi->i_alloc_sem);
				1846	err = ntfs_truncate(vi);
				1847	up_write(&vi->i_alloc_sem);
				1848	if (err \|\| NInoTruncateFailed(ni)) {
				1849	if (!err)
				1850	err = -EIO;
				1851	goto err_out;
				1852	}
				1853	}
				1854	/* If the attribute is not resident, deal with it elsewhere. */
				1855	if (NInoNonResident(ni)) {
				1856	/*
				1857	* Only unnamed $DATA attributes can be compressed, encrypted,
				1858	* and/or sparse.
				1859	*/
				1860	if (ni->type == AT_DATA && !ni->name_len) {
				1861	/* If file is encrypted, deny access, just like NT4. */
				1862	if (NInoEncrypted(ni)) {
				1863	ntfs_debug("Denying write access to encrypted "
				1864	"file.");
				1865	return -EACCES;
				1866	}
				1867	/* Compressed data streams are handled in compress.c. */
				1868	if (NInoCompressed(ni)) {
				1869	// TODO: Implement and replace this check with
				1870	// return ntfs_write_compressed_block(page);
				1871	ntfs_error(vi->i_sb, "Writing to compressed "
				1872	"files is not supported yet. "
				1873	"Sorry.");
				1874	return -EOPNOTSUPP;
				1875	}
				1876	// TODO: Implement and remove this check.
				1877	if (NInoSparse(ni)) {
				1878	ntfs_error(vi->i_sb, "Writing to sparse files "
				1879	"is not supported yet. Sorry.");
				1880	return -EOPNOTSUPP;
				1881	}
				1882	}
				1883	/* Normal data stream. */
				1884	return ntfs_prepare_nonresident_write(page, from, to);
				1885	}
				1886	/*
				1887	* Attribute is resident, implying it is not compressed, encrypted, or
				1888	* sparse.
				1889	*/
				1890	BUG_ON(page_has_buffers(page));
				1891	new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				1892	/* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1893	if (new_size <= i_size_read(vi))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1894	goto done;
				1895
				1896	// FIXME: We abort for now as this code is not safe.
				1897	ntfs_error(vi->i_sb, "Changing the file size is not supported yet. "
				1898	"Sorry.");
				1899	return -EOPNOTSUPP;
				1900
				1901	/* Map, pin, and lock the (base) mft record. */
				1902	if (!NInoAttr(ni))
				1903	base_ni = ni;
				1904	else
				1905	base_ni = ni->ext.base_ntfs_ino;
				1906	m = map_mft_record(base_ni);
				1907	if (IS_ERR(m)) {
				1908	err = PTR_ERR(m);
				1909	m = NULL;
				1910	ctx = NULL;
				1911	goto err_out;
				1912	}
				1913	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				1914	if (unlikely(!ctx)) {
				1915	err = -ENOMEM;
				1916	goto err_out;
				1917	}
				1918	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				1919	CASE_SENSITIVE, 0, NULL, 0, ctx);
				1920	if (unlikely(err)) {
				1921	if (err == -ENOENT)
				1922	err = -EIO;
				1923	goto err_out;
				1924	}
				1925	m = ctx->mrec;
				1926	a = ctx->attr;
				1927	/* The total length of the attribute value. */
				1928	attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	1929	BUG_ON(i_size_read(vi) != attr_len);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1930	/* Check if new size is allowed in $AttrDef. */
				1931	err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
				1932	if (unlikely(err)) {
				1933	if (err == -ERANGE) {
				1934	ntfs_error(vol->sb, "Write would cause the inode "
				1935	"0x%lx to exceed the maximum size for "
				1936	"its attribute type (0x%x). Aborting "
				1937	"write.", vi->i_ino,
				1938	le32_to_cpu(ni->type));
				1939	} else {
				1940	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
				1941	"attribute type 0x%x. Aborting "
				1942	"write.", vi->i_ino,
				1943	le32_to_cpu(ni->type));
				1944	err = -EIO;
				1945	}
				1946	goto err_out2;
				1947	}
				1948	/*
				1949	* Extend the attribute record to be able to store the new attribute
				1950	* size.
				1951	*/
				1952	if (new_size >= vol->mft_record_size \|\| ntfs_attr_record_resize(m, a,
				1953	le16_to_cpu(a->data.resident.value_offset) +
				1954	new_size)) {
				1955	/* Not enough space in the mft record. */
				1956	ntfs_error(vol->sb, "Not enough space in the mft record for "
				1957	"the resized attribute value. This is not "
				1958	"supported yet. Aborting write.");
				1959	err = -EOPNOTSUPP;
				1960	goto err_out2;
				1961	}
				1962	/*
				1963	* We have enough space in the mft record to fit the write. This
				1964	* implies the attribute is smaller than the mft record and hence the
				1965	* attribute must be in a single page and hence page->index must be 0.
				1966	*/
				1967	BUG_ON(page->index);
				1968	/*
				1969	* If the beginning of the write is past the old size, enlarge the
				1970	* attribute value up to the beginning of the write and fill it with
				1971	* zeroes.
				1972	*/
				1973	if (from > attr_len) {
				1974	memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
				1975	attr_len, 0, from - attr_len);
				1976	a->data.resident.value_length = cpu_to_le32(from);
				1977	/* Zero the corresponding area in the page as well. */
				1978	if (PageUptodate(page)) {
				1979	kaddr = kmap_atomic(page, KM_USER0);
				1980	memset(kaddr + attr_len, 0, from - attr_len);
				1981	kunmap_atomic(kaddr, KM_USER0);
				1982	flush_dcache_page(page);
				1983	}
				1984	}
				1985	flush_dcache_mft_record_page(ctx->ntfs_ino);
				1986	mark_mft_record_dirty(ctx->ntfs_ino);
				1987	ntfs_attr_put_search_ctx(ctx);
				1988	unmap_mft_record(base_ni);
				1989	/*
				1990	* Because resident attributes are handled by memcpy() to/from the
				1991	* corresponding MFT record, and because this form of i/o is byte
				1992	* aligned rather than block aligned, there is no need to bring the
				1993	* page uptodate here as in the non-resident case where we need to
				1994	* bring the buffers straddled by the write uptodate before
				1995	* generic_file_write() does the copying from userspace.
				1996	*
				1997	* We thus defer the uptodate bringing of the page region outside the
				1998	* region written to to ntfs_commit_write(), which makes the code
				1999	* simpler and saves one atomic kmap which is good.
				2000	*/
				2001	done:
				2002	ntfs_debug("Done.");
				2003	return 0;
				2004	err_out:
				2005	if (err == -ENOMEM)
				2006	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2007	"prepare the write.");
				2008	else {
				2009	ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
				2010	"with error %i.", err);
				2011	NVolSetErrors(vol);
				2012	make_bad_inode(vi);
				2013	}
				2014	err_out2:
				2015	if (ctx)
				2016	ntfs_attr_put_search_ctx(ctx);
				2017	if (m)
				2018	unmap_mft_record(base_ni);
				2019	return err;
				2020	}
				2021
				2022	/**
				2023	* ntfs_commit_nonresident_write -
				2024	*
				2025	*/
				2026	static int ntfs_commit_nonresident_write(struct page *page,
				2027	unsigned from, unsigned to)
				2028	{
				2029	s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
				2030	struct inode *vi = page->mapping->host;
				2031	struct buffer_head bh, head;
				2032	unsigned int block_start, block_end, blocksize;
				2033	BOOL partial;
				2034
				2035	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2036	"0x%lx, from = %u, to = %u.", vi->i_ino,
				2037	NTFS_I(vi)->type, page->index, from, to);
				2038	blocksize = 1 << vi->i_blkbits;
				2039
				2040	// FIXME: We need a whole slew of special cases in here for compressed
				2041	// files for example...
				2042	// For now, we know ntfs_prepare_write() would have failed so we can't
				2043	// get here in any of the cases which we have to special case, so we
				2044	// are just a ripped off, unrolled generic_commit_write().
				2045
				2046	bh = head = page_buffers(page);
				2047	block_start = 0;
				2048	partial = FALSE;
				2049	do {
				2050	block_end = block_start + blocksize;
				2051	if (block_end <= from \|\| block_start >= to) {
				2052	if (!buffer_uptodate(bh))
				2053	partial = TRUE;
				2054	} else {
				2055	set_buffer_uptodate(bh);
				2056	mark_buffer_dirty(bh);
				2057	}
				2058	} while (block_start = block_end, (bh = bh->b_this_page) != head);
				2059	/*
				2060	* If this is a partial write which happened to make all buffers
				2061	* uptodate then we can optimize away a bogus ->readpage() for the next
				2062	* read(). Here we 'discover' whether the page went uptodate as a
				2063	* result of this (potentially partial) write.
				2064	*/
				2065	if (!partial)
				2066	SetPageUptodate(page);
				2067	/*
				2068	* Not convinced about this at all. See disparity comment above. For
				2069	* now we know ntfs_prepare_write() would have failed in the write
				2070	* exceeds i_size case, so this will never trigger which is fine.
				2071	*/
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	2072	if (pos > i_size_read(vi)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2073	ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
				2074	"not supported yet. Sorry.");
				2075	return -EOPNOTSUPP;
				2076	// vi->i_size = pos;
				2077	// mark_inode_dirty(vi);
				2078	}
				2079	ntfs_debug("Done.");
				2080	return 0;
				2081	}
				2082
				2083	/**
				2084	* ntfs_commit_write - commit the received data
				2085	*
				2086	* This is called from generic_file_write() with i_sem held on the inode
				2087	* (@page->mapping->host). The @page is locked but not kmap()ped. The source
				2088	* data has already been copied into the @page. ntfs_prepare_write() has been
				2089	* called before the data copied and it returned success so we can take the
				2090	* results of various BUG checks and some error handling for granted.
				2091	*
				2092	* Need to mark modified blocks dirty so they get written out later when
				2093	* ntfs_writepage() is invoked by the VM.
				2094	*
				2095	* Return 0 on success or -errno on error.
				2096	*
				2097	* Should be using generic_commit_write(). This marks buffers uptodate and
				2098	* dirty, sets the page uptodate if all buffers in the page are uptodate, and
				2099	* updates i_size if the end of io is beyond i_size. In that case, it also
				2100	* marks the inode dirty.
				2101	*
				2102	* Cannot use generic_commit_write() due to ntfs specialities but can look at
				2103	* it for implementation guidance.
				2104	*
				2105	* If things have gone as outlined in ntfs_prepare_write(), then we do not
				2106	* need to do any page content modifications here at all, except in the write
				2107	* to resident attribute case, where we need to do the uptodate bringing here
				2108	* which we combine with the copying into the mft record which means we save
				2109	* one atomic kmap.
				2110	*/
				2111	static int ntfs_commit_write(struct file file, struct page page,
				2112	unsigned from, unsigned to)
				2113	{
				2114	struct inode *vi = page->mapping->host;
				2115	ntfs_inode base_ni, ni = NTFS_I(vi);
				2116	char kaddr, kattr;
				2117	ntfs_attr_search_ctx *ctx;
				2118	MFT_RECORD *m;
				2119	ATTR_RECORD *a;
				2120	u32 attr_len;
				2121	int err;
				2122
				2123	ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
				2124	"0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
				2125	page->index, from, to);
				2126	/* If the attribute is not resident, deal with it elsewhere. */
				2127	if (NInoNonResident(ni)) {
				2128	/* Only unnamed $DATA attributes can be compressed/encrypted. */
				2129	if (ni->type == AT_DATA && !ni->name_len) {
				2130	/* Encrypted files need separate handling. */
				2131	if (NInoEncrypted(ni)) {
				2132	// We never get here at present!
				2133	BUG();
				2134	}
				2135	/* Compressed data streams are handled in compress.c. */
				2136	if (NInoCompressed(ni)) {
				2137	// TODO: Implement this!
				2138	// return ntfs_write_compressed_block(page);
				2139	// We never get here at present!
				2140	BUG();
				2141	}
				2142	}
				2143	/* Normal data stream. */
				2144	return ntfs_commit_nonresident_write(page, from, to);
				2145	}
				2146	/*
				2147	* Attribute is resident, implying it is not compressed, encrypted, or
				2148	* sparse.
				2149	*/
				2150	if (!NInoAttr(ni))
				2151	base_ni = ni;
				2152	else
				2153	base_ni = ni->ext.base_ntfs_ino;
				2154	/* Map, pin, and lock the mft record. */
				2155	m = map_mft_record(base_ni);
				2156	if (IS_ERR(m)) {
				2157	err = PTR_ERR(m);
				2158	m = NULL;
				2159	ctx = NULL;
				2160	goto err_out;
				2161	}
				2162	ctx = ntfs_attr_get_search_ctx(base_ni, m);
				2163	if (unlikely(!ctx)) {
				2164	err = -ENOMEM;
				2165	goto err_out;
				2166	}
				2167	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				2168	CASE_SENSITIVE, 0, NULL, 0, ctx);
				2169	if (unlikely(err)) {
				2170	if (err == -ENOENT)
				2171	err = -EIO;
				2172	goto err_out;
				2173	}
				2174	a = ctx->attr;
				2175	/* The total length of the attribute value. */
				2176	attr_len = le32_to_cpu(a->data.resident.value_length);
				2177	BUG_ON(from > attr_len);
				2178	kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
				2179	kaddr = kmap_atomic(page, KM_USER0);
				2180	/* Copy the received data from the page to the mft record. */
				2181	memcpy(kattr + from, kaddr + from, to - from);
				2182	/* Update the attribute length if necessary. */
				2183	if (to > attr_len) {
				2184	attr_len = to;
				2185	a->data.resident.value_length = cpu_to_le32(attr_len);
				2186	}
				2187	/*
				2188	* If the page is not uptodate, bring the out of bounds area(s)
				2189	* uptodate by copying data from the mft record to the page.
				2190	*/
				2191	if (!PageUptodate(page)) {
				2192	if (from > 0)
				2193	memcpy(kaddr, kattr, from);
				2194	if (to < attr_len)
				2195	memcpy(kaddr + to, kattr + to, attr_len - to);
				2196	/* Zero the region outside the end of the attribute value. */
				2197	if (attr_len < PAGE_CACHE_SIZE)
				2198	memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
				2199	/*
				2200	* The probability of not having done any of the above is
				2201	* extremely small, so we just flush unconditionally.
				2202	*/
				2203	flush_dcache_page(page);
				2204	SetPageUptodate(page);
				2205	}
				2206	kunmap_atomic(kaddr, KM_USER0);
				2207	/* Update i_size if necessary. */
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	2208	if (i_size_read(vi) < attr_len) {
				2209	unsigned long flags;
				2210
				2211	write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2212	ni->allocated_size = ni->initialized_size = attr_len;
				2213	i_size_write(vi, attr_len);
Anton Altaparmakov	07a4e2d	2005-01-12 13:08:26 +0000	[diff] [blame^]	2214	write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2215	}
				2216	/* Mark the mft record dirty, so it gets written back. */
				2217	flush_dcache_mft_record_page(ctx->ntfs_ino);
				2218	mark_mft_record_dirty(ctx->ntfs_ino);
				2219	ntfs_attr_put_search_ctx(ctx);
				2220	unmap_mft_record(base_ni);
				2221	ntfs_debug("Done.");
				2222	return 0;
				2223	err_out:
				2224	if (err == -ENOMEM) {
				2225	ntfs_warning(vi->i_sb, "Error allocating memory required to "
				2226	"commit the write.");
				2227	if (PageUptodate(page)) {
				2228	ntfs_warning(vi->i_sb, "Page is uptodate, setting "
				2229	"dirty so the write will be retried "
				2230	"later on by the VM.");
				2231	/*
				2232	* Put the page on mapping->dirty_pages, but leave its
				2233	* buffers' dirty state as-is.
				2234	*/
				2235	__set_page_dirty_nobuffers(page);
				2236	err = 0;
				2237	} else
				2238	ntfs_error(vi->i_sb, "Page is not uptodate. Written "
				2239	"data has been lost.");
				2240	} else {
				2241	ntfs_error(vi->i_sb, "Resident attribute commit write failed "
				2242	"with error %i.", err);
				2243	NVolSetErrors(ni->vol);
				2244	make_bad_inode(vi);
				2245	}
				2246	if (ctx)
				2247	ntfs_attr_put_search_ctx(ctx);
				2248	if (m)
				2249	unmap_mft_record(base_ni);
				2250	return err;
				2251	}
				2252
				2253	#endif /* NTFS_RW */
				2254
				2255	/**
				2256	* ntfs_aops - general address space operations for inodes and attributes
				2257	*/
				2258	struct address_space_operations ntfs_aops = {
				2259	.readpage = ntfs_readpage, /* Fill page with data. */
				2260	.sync_page = block_sync_page, /* Currently, just unplugs the
				2261	disk request queue. */
				2262	#ifdef NTFS_RW
				2263	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2264	.prepare_write = ntfs_prepare_write, /* Prepare page and buffers
				2265	ready to receive data. */
				2266	.commit_write = ntfs_commit_write, /* Commit received data. */
				2267	#endif /* NTFS_RW */
				2268	};
				2269
				2270	/**
				2271	* ntfs_mst_aops - general address space operations for mst protecteed inodes
				2272	* and attributes
				2273	*/
				2274	struct address_space_operations ntfs_mst_aops = {
				2275	.readpage = ntfs_readpage, /* Fill page with data. */
				2276	.sync_page = block_sync_page, /* Currently, just unplugs the
				2277	disk request queue. */
				2278	#ifdef NTFS_RW
				2279	.writepage = ntfs_writepage, /* Write dirty page to disk. */
				2280	.set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
				2281	without touching the buffers
				2282	belonging to the page. */
				2283	#endif /* NTFS_RW */
				2284	};
				2285
				2286	#ifdef NTFS_RW
				2287
				2288	/**
				2289	* mark_ntfs_record_dirty - mark an ntfs record dirty
				2290	* @page: page containing the ntfs record to mark dirty
				2291	* @ofs: byte offset within @page at which the ntfs record begins
				2292	*
				2293	* Set the buffers and the page in which the ntfs record is located dirty.
				2294	*
				2295	* The latter also marks the vfs inode the ntfs record belongs to dirty
				2296	* (I_DIRTY_PAGES only).
				2297	*
				2298	* If the page does not have buffers, we create them and set them uptodate.
				2299	* The page may not be locked which is why we need to handle the buffers under
				2300	* the mapping->private_lock. Once the buffers are marked dirty we no longer
				2301	* need the lock since try_to_free_buffers() does not free dirty buffers.
				2302	*/
				2303	void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
				2304	struct address_space *mapping = page->mapping;
				2305	ntfs_inode *ni = NTFS_I(mapping->host);
				2306	struct buffer_head bh, head, *buffers_to_free = NULL;
				2307	unsigned int end, bh_size, bh_ofs;
				2308
				2309	BUG_ON(!PageUptodate(page));
				2310	end = ofs + ni->itype.index.block_size;
				2311	bh_size = 1 << VFS_I(ni)->i_blkbits;
				2312	spin_lock(&mapping->private_lock);
				2313	if (unlikely(!page_has_buffers(page))) {
				2314	spin_unlock(&mapping->private_lock);
				2315	bh = head = alloc_page_buffers(page, bh_size, 1);
				2316	spin_lock(&mapping->private_lock);
				2317	if (likely(!page_has_buffers(page))) {
				2318	struct buffer_head *tail;
				2319
				2320	do {
				2321	set_buffer_uptodate(bh);
				2322	tail = bh;
				2323	bh = bh->b_this_page;
				2324	} while (bh);
				2325	tail->b_this_page = head;
				2326	attach_page_buffers(page, head);
				2327	} else
				2328	buffers_to_free = bh;
				2329	}
				2330	bh = head = page_buffers(page);
				2331	do {
				2332	bh_ofs = bh_offset(bh);
				2333	if (bh_ofs + bh_size <= ofs)
				2334	continue;
				2335	if (unlikely(bh_ofs >= end))
				2336	break;
				2337	set_buffer_dirty(bh);
				2338	} while ((bh = bh->b_this_page) != head);
				2339	spin_unlock(&mapping->private_lock);
				2340	__set_page_dirty_nobuffers(page);
				2341	if (unlikely(buffers_to_free)) {
				2342	do {
				2343	bh = buffers_to_free->b_this_page;
				2344	free_buffer_head(buffers_to_free);
				2345	buffers_to_free = bh;
				2346	} while (buffers_to_free);
				2347	}
				2348	}
				2349
				2350	#endif /* NTFS_RW */