blob: 2482b677a82a0354afe4f568fec4e6e8964761d3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * aops.c - NTFS kernel address space operations and page cache handling.
3 * Part of the Linux-NTFS project.
4 *
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +00005 * Copyright (c) 2001-2005 Anton Altaparmakov
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 * Copyright (c) 2002 Richard Russon
7 *
8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published
10 * by the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program/include file is distributed in the hope that it will be
14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program (in the main directory of the Linux-NTFS
20 * distribution in the file COPYING); if not, write to the Free Software
21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <linux/errno.h>
25#include <linux/mm.h>
26#include <linux/pagemap.h>
27#include <linux/swap.h>
28#include <linux/buffer_head.h>
29#include <linux/writeback.h>
30
31#include "aops.h"
32#include "attrib.h"
33#include "debug.h"
34#include "inode.h"
35#include "mft.h"
36#include "runlist.h"
37#include "types.h"
38#include "ntfs.h"
39
40/**
41 * ntfs_end_buffer_async_read - async io completion for reading attributes
42 * @bh: buffer head on which io is completed
43 * @uptodate: whether @bh is now uptodate or not
44 *
45 * Asynchronous I/O completion handler for reading pages belonging to the
46 * attribute address space of an inode. The inodes can either be files or
47 * directories or they can be fake inodes describing some attribute.
48 *
49 * If NInoMstProtected(), perform the post read mst fixups when all IO on the
50 * page has been completed and mark the page uptodate or set the error bit on
51 * the page. To determine the size of the records that need fixing up, we
52 * cheat a little bit by setting the index_block_size in ntfs_inode to the ntfs
53 * record size, and index_block_size_bits, to the log(base 2) of the ntfs
54 * record size.
55 */
56static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
57{
58 static DEFINE_SPINLOCK(page_uptodate_lock);
59 unsigned long flags;
60 struct buffer_head *tmp;
61 struct page *page;
62 ntfs_inode *ni;
63 int page_uptodate = 1;
64
65 page = bh->b_page;
66 ni = NTFS_I(page->mapping->host);
67
68 if (likely(uptodate)) {
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000069 s64 file_ofs, initialized_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -070070
71 set_buffer_uptodate(bh);
72
73 file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
74 bh_offset(bh);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000075 read_lock_irqsave(&ni->size_lock, flags);
76 initialized_size = ni->initialized_size;
77 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 /* Check for the current buffer head overflowing. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000079 if (file_ofs + bh->b_size > initialized_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -070080 char *addr;
81 int ofs = 0;
82
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +000083 if (file_ofs < initialized_size)
84 ofs = initialized_size - file_ofs;
Linus Torvalds1da177e2005-04-16 15:20:36 -070085 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
86 memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
87 flush_dcache_page(page);
88 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
89 }
90 } else {
91 clear_buffer_uptodate(bh);
92 ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
93 (unsigned long long)bh->b_blocknr);
94 SetPageError(page);
95 }
96 spin_lock_irqsave(&page_uptodate_lock, flags);
97 clear_buffer_async_read(bh);
98 unlock_buffer(bh);
99 tmp = bh;
100 do {
101 if (!buffer_uptodate(tmp))
102 page_uptodate = 0;
103 if (buffer_async_read(tmp)) {
104 if (likely(buffer_locked(tmp)))
105 goto still_busy;
106 /* Async buffers must be locked. */
107 BUG();
108 }
109 tmp = tmp->b_this_page;
110 } while (tmp != bh);
111 spin_unlock_irqrestore(&page_uptodate_lock, flags);
112 /*
113 * If none of the buffers had errors then we can set the page uptodate,
114 * but we first have to perform the post read mst fixups, if the
115 * attribute is mst protected, i.e. if NInoMstProteced(ni) is true.
116 * Note we ignore fixup errors as those are detected when
117 * map_mft_record() is called which gives us per record granularity
118 * rather than per page granularity.
119 */
120 if (!NInoMstProtected(ni)) {
121 if (likely(page_uptodate && !PageError(page)))
122 SetPageUptodate(page);
123 } else {
124 char *addr;
125 unsigned int i, recs;
126 u32 rec_size;
127
128 rec_size = ni->itype.index.block_size;
129 recs = PAGE_CACHE_SIZE / rec_size;
130 /* Should have been verified before we got here... */
131 BUG_ON(!recs);
132 addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
133 for (i = 0; i < recs; i++)
134 post_read_mst_fixup((NTFS_RECORD*)(addr +
135 i * rec_size), rec_size);
136 flush_dcache_page(page);
137 kunmap_atomic(addr, KM_BIO_SRC_IRQ);
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000138 if (likely(page_uptodate && !PageError(page)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 SetPageUptodate(page);
140 }
141 unlock_page(page);
142 return;
143still_busy:
144 spin_unlock_irqrestore(&page_uptodate_lock, flags);
145 return;
146}
147
148/**
149 * ntfs_read_block - fill a @page of an address space with data
150 * @page: page cache page to fill with data
151 *
152 * Fill the page @page of the address space belonging to the @page->host inode.
153 * We read each buffer asynchronously and when all buffers are read in, our io
154 * completion handler ntfs_end_buffer_read_async(), if required, automatically
155 * applies the mst fixups to the page before finally marking it uptodate and
156 * unlocking it.
157 *
158 * We only enforce allocated_size limit because i_size is checked for in
159 * generic_file_read().
160 *
161 * Return 0 on success and -errno on error.
162 *
163 * Contains an adapted version of fs/buffer.c::block_read_full_page().
164 */
165static int ntfs_read_block(struct page *page)
166{
167 VCN vcn;
168 LCN lcn;
169 ntfs_inode *ni;
170 ntfs_volume *vol;
171 runlist_element *rl;
172 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
173 sector_t iblock, lblock, zblock;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000174 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 unsigned int blocksize, vcn_ofs;
176 int i, nr;
177 unsigned char blocksize_bits;
178
179 ni = NTFS_I(page->mapping->host);
180 vol = ni->vol;
181
182 /* $MFT/$DATA must have its complete runlist in memory at all times. */
183 BUG_ON(!ni->runlist.rl && !ni->mft_no && !NInoAttr(ni));
184
185 blocksize_bits = VFS_I(ni)->i_blkbits;
186 blocksize = 1 << blocksize_bits;
187
188 if (!page_has_buffers(page))
189 create_empty_buffers(page, blocksize, 0);
190 bh = head = page_buffers(page);
191 if (unlikely(!bh)) {
192 unlock_page(page);
193 return -ENOMEM;
194 }
195
196 iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000197 read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
199 zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000200 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
202 /* Loop through all the buffers in the page. */
203 rl = NULL;
204 nr = i = 0;
205 do {
206 u8 *kaddr;
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100207 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208
209 if (unlikely(buffer_uptodate(bh)))
210 continue;
211 if (unlikely(buffer_mapped(bh))) {
212 arr[nr++] = bh;
213 continue;
214 }
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100215 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 bh->b_bdev = vol->sb->s_bdev;
217 /* Is the block within the allowed limits? */
218 if (iblock < lblock) {
219 BOOL is_retry = FALSE;
220
221 /* Convert iblock into corresponding vcn and offset. */
222 vcn = (VCN)iblock << blocksize_bits >>
223 vol->cluster_size_bits;
224 vcn_ofs = ((VCN)iblock << blocksize_bits) &
225 vol->cluster_size_mask;
226 if (!rl) {
227lock_retry_remap:
228 down_read(&ni->runlist.lock);
229 rl = ni->runlist.rl;
230 }
231 if (likely(rl != NULL)) {
232 /* Seek to element containing target vcn. */
233 while (rl->length && rl[1].vcn <= vcn)
234 rl++;
235 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
236 } else
237 lcn = LCN_RL_NOT_MAPPED;
238 /* Successful remap. */
239 if (lcn >= 0) {
240 /* Setup buffer head to correct block. */
241 bh->b_blocknr = ((lcn << vol->cluster_size_bits)
242 + vcn_ofs) >> blocksize_bits;
243 set_buffer_mapped(bh);
244 /* Only read initialized data blocks. */
245 if (iblock < zblock) {
246 arr[nr++] = bh;
247 continue;
248 }
249 /* Fully non-initialized data block, zero it. */
250 goto handle_zblock;
251 }
252 /* It is a hole, need to zero it. */
253 if (lcn == LCN_HOLE)
254 goto handle_hole;
255 /* If first try and runlist unmapped, map and retry. */
256 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 is_retry = TRUE;
258 /*
259 * Attempt to map runlist, dropping lock for
260 * the duration.
261 */
262 up_read(&ni->runlist.lock);
263 err = ntfs_map_runlist(ni, vcn);
264 if (likely(!err))
265 goto lock_retry_remap;
266 rl = NULL;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +0100267 } else if (!rl)
268 up_read(&ni->runlist.lock);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100269 /*
270 * If buffer is outside the runlist, treat it as a
271 * hole. This can happen due to concurrent truncate
272 * for example.
273 */
274 if (err == -ENOENT || lcn == LCN_ENOENT) {
275 err = 0;
276 goto handle_hole;
277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 /* Hard error, zero out region. */
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100279 if (!err)
280 err = -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 bh->b_blocknr = -1;
282 SetPageError(page);
283 ntfs_error(vol->sb, "Failed to read from inode 0x%lx, "
284 "attribute type 0x%x, vcn 0x%llx, "
285 "offset 0x%x because its location on "
286 "disk could not be determined%s "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100287 "(error code %i).", ni->mft_no,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 ni->type, (unsigned long long)vcn,
289 vcn_ofs, is_retry ? " even after "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100290 "retrying" : "", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 }
292 /*
293 * Either iblock was outside lblock limits or
294 * ntfs_rl_vcn_to_lcn() returned error. Just zero that portion
295 * of the page and set the buffer uptodate.
296 */
297handle_hole:
298 bh->b_blocknr = -1UL;
299 clear_buffer_mapped(bh);
300handle_zblock:
301 kaddr = kmap_atomic(page, KM_USER0);
302 memset(kaddr + i * blocksize, 0, blocksize);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 kunmap_atomic(kaddr, KM_USER0);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100304 flush_dcache_page(page);
305 if (likely(!err))
306 set_buffer_uptodate(bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 } while (i++, iblock++, (bh = bh->b_this_page) != head);
308
309 /* Release the lock if we took it. */
310 if (rl)
311 up_read(&ni->runlist.lock);
312
313 /* Check we have at least one buffer ready for i/o. */
314 if (nr) {
315 struct buffer_head *tbh;
316
317 /* Lock the buffers. */
318 for (i = 0; i < nr; i++) {
319 tbh = arr[i];
320 lock_buffer(tbh);
321 tbh->b_end_io = ntfs_end_buffer_async_read;
322 set_buffer_async_read(tbh);
323 }
324 /* Finally, start i/o on the buffers. */
325 for (i = 0; i < nr; i++) {
326 tbh = arr[i];
327 if (likely(!buffer_uptodate(tbh)))
328 submit_bh(READ, tbh);
329 else
330 ntfs_end_buffer_async_read(tbh, 1);
331 }
332 return 0;
333 }
334 /* No i/o was scheduled on any of the buffers. */
335 if (likely(!PageError(page)))
336 SetPageUptodate(page);
337 else /* Signal synchronous i/o error. */
338 nr = -EIO;
339 unlock_page(page);
340 return nr;
341}
342
343/**
344 * ntfs_readpage - fill a @page of a @file with data from the device
345 * @file: open file to which the page @page belongs or NULL
346 * @page: page cache page to fill with data
347 *
348 * For non-resident attributes, ntfs_readpage() fills the @page of the open
349 * file @file by calling the ntfs version of the generic block_read_full_page()
350 * function, ntfs_read_block(), which in turn creates and reads in the buffers
351 * associated with the page asynchronously.
352 *
353 * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the
354 * data from the mft record (which at this stage is most likely in memory) and
355 * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as
356 * even if the mft record is not cached at this point in time, we need to wait
357 * for it to be read in before we can do the copy.
358 *
359 * Return 0 on success and -errno on error.
360 */
361static int ntfs_readpage(struct file *file, struct page *page)
362{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 ntfs_inode *ni, *base_ni;
364 u8 *kaddr;
365 ntfs_attr_search_ctx *ctx;
366 MFT_RECORD *mrec;
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000367 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 u32 attr_len;
369 int err = 0;
370
Anton Altaparmakov905685f2005-03-10 11:06:19 +0000371retry_readpage:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 BUG_ON(!PageLocked(page));
373 /*
374 * This can potentially happen because we clear PageUptodate() during
375 * ntfs_writepage() of MstProtected() attributes.
376 */
377 if (PageUptodate(page)) {
378 unlock_page(page);
379 return 0;
380 }
381 ni = NTFS_I(page->mapping->host);
Anton Altaparmakov311120e2005-09-08 22:04:20 +0100382 /*
383 * Only $DATA attributes can be encrypted and only unnamed $DATA
384 * attributes can be compressed. Index root can have the flags set but
385 * this means to create compressed/encrypted files, not that the
386 * attribute is compressed/encrypted.
387 */
388 if (ni->type != AT_INDEX_ROOT) {
389 /* If attribute is encrypted, deny access, just like NT4. */
390 if (NInoEncrypted(ni)) {
391 BUG_ON(ni->type != AT_DATA);
392 err = -EACCES;
393 goto err_out;
394 }
395 /* Compressed data streams are handled in compress.c. */
396 if (NInoNonResident(ni) && NInoCompressed(ni)) {
397 BUG_ON(ni->type != AT_DATA);
398 BUG_ON(ni->name_len);
399 return ntfs_read_compressed_block(page);
400 }
401 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 /* NInoNonResident() == NInoIndexAllocPresent() */
403 if (NInoNonResident(ni)) {
Anton Altaparmakov311120e2005-09-08 22:04:20 +0100404 /* Normal, non-resident data stream. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return ntfs_read_block(page);
406 }
407 /*
408 * Attribute is resident, implying it is not compressed or encrypted.
409 * This also means the attribute is smaller than an mft record and
410 * hence smaller than a page, so can simply zero out any pages with
Anton Altaparmakov311120e2005-09-08 22:04:20 +0100411 * index above 0. Note the attribute can actually be marked compressed
412 * but if it is resident the actual data is not compressed so we are
413 * ok to ignore the compressed flag here.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 */
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000415 if (unlikely(page->index > 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 kaddr = kmap_atomic(page, KM_USER0);
417 memset(kaddr, 0, PAGE_CACHE_SIZE);
418 flush_dcache_page(page);
419 kunmap_atomic(kaddr, KM_USER0);
420 goto done;
421 }
422 if (!NInoAttr(ni))
423 base_ni = ni;
424 else
425 base_ni = ni->ext.base_ntfs_ino;
426 /* Map, pin, and lock the mft record. */
427 mrec = map_mft_record(base_ni);
428 if (IS_ERR(mrec)) {
429 err = PTR_ERR(mrec);
430 goto err_out;
431 }
Anton Altaparmakov905685f2005-03-10 11:06:19 +0000432 /*
433 * If a parallel write made the attribute non-resident, drop the mft
434 * record and retry the readpage.
435 */
436 if (unlikely(NInoNonResident(ni))) {
437 unmap_mft_record(base_ni);
438 goto retry_readpage;
439 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700440 ctx = ntfs_attr_get_search_ctx(base_ni, mrec);
441 if (unlikely(!ctx)) {
442 err = -ENOMEM;
443 goto unm_err_out;
444 }
445 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
446 CASE_SENSITIVE, 0, NULL, 0, ctx);
447 if (unlikely(err))
448 goto put_unm_err_out;
449 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +0000450 read_lock_irqsave(&ni->size_lock, flags);
451 if (unlikely(attr_len > ni->initialized_size))
452 attr_len = ni->initialized_size;
453 read_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 kaddr = kmap_atomic(page, KM_USER0);
455 /* Copy the data to the page. */
456 memcpy(kaddr, (u8*)ctx->attr +
457 le16_to_cpu(ctx->attr->data.resident.value_offset),
458 attr_len);
459 /* Zero the remainder of the page. */
460 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
461 flush_dcache_page(page);
462 kunmap_atomic(kaddr, KM_USER0);
463put_unm_err_out:
464 ntfs_attr_put_search_ctx(ctx);
465unm_err_out:
466 unmap_mft_record(base_ni);
467done:
468 SetPageUptodate(page);
469err_out:
470 unlock_page(page);
471 return err;
472}
473
474#ifdef NTFS_RW
475
476/**
477 * ntfs_write_block - write a @page to the backing store
478 * @page: page cache page to write out
479 * @wbc: writeback control structure
480 *
481 * This function is for writing pages belonging to non-resident, non-mst
482 * protected attributes to their backing store.
483 *
484 * For a page with buffers, map and write the dirty buffers asynchronously
485 * under page writeback. For a page without buffers, create buffers for the
486 * page, then proceed as above.
487 *
488 * If a page doesn't have buffers the page dirty state is definitive. If a page
489 * does have buffers, the page dirty state is just a hint, and the buffer dirty
490 * state is definitive. (A hint which has rules: dirty buffers against a clean
491 * page is illegal. Other combinations are legal and need to be handled. In
492 * particular a dirty page containing clean buffers for example.)
493 *
494 * Return 0 on success and -errno on error.
495 *
496 * Based on ntfs_read_block() and __block_write_full_page().
497 */
498static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
499{
500 VCN vcn;
501 LCN lcn;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000502 s64 initialized_size;
503 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 sector_t block, dblock, iblock;
505 struct inode *vi;
506 ntfs_inode *ni;
507 ntfs_volume *vol;
508 runlist_element *rl;
509 struct buffer_head *bh, *head;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000510 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511 unsigned int blocksize, vcn_ofs;
512 int err;
513 BOOL need_end_writeback;
514 unsigned char blocksize_bits;
515
516 vi = page->mapping->host;
517 ni = NTFS_I(vi);
518 vol = ni->vol;
519
520 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
521 "0x%lx.", ni->mft_no, ni->type, page->index);
522
523 BUG_ON(!NInoNonResident(ni));
524 BUG_ON(NInoMstProtected(ni));
525
526 blocksize_bits = vi->i_blkbits;
527 blocksize = 1 << blocksize_bits;
528
529 if (!page_has_buffers(page)) {
530 BUG_ON(!PageUptodate(page));
531 create_empty_buffers(page, blocksize,
532 (1 << BH_Uptodate) | (1 << BH_Dirty));
533 }
534 bh = head = page_buffers(page);
535 if (unlikely(!bh)) {
536 ntfs_warning(vol->sb, "Error allocating page buffers. "
537 "Redirtying page so we try again later.");
538 /*
539 * Put the page back on mapping->dirty_pages, but leave its
540 * buffer's dirty state as-is.
541 */
542 redirty_page_for_writepage(wbc, page);
543 unlock_page(page);
544 return 0;
545 }
546
547 /* NOTE: Different naming scheme to ntfs_read_block()! */
548
549 /* The first block in the page. */
550 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
551
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000552 read_lock_irqsave(&ni->size_lock, flags);
553 i_size = i_size_read(vi);
554 initialized_size = ni->initialized_size;
555 read_unlock_irqrestore(&ni->size_lock, flags);
556
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 /* The first out of bounds block for the data size. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000558 dblock = (i_size + blocksize - 1) >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700559
560 /* The last (fully or partially) initialized block. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000561 iblock = initialized_size >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562
563 /*
564 * Be very careful. We have no exclusion from __set_page_dirty_buffers
565 * here, and the (potentially unmapped) buffers may become dirty at
566 * any time. If a buffer becomes dirty here after we've inspected it
567 * then we just miss that fact, and the page stays dirty.
568 *
569 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
570 * handle that here by just cleaning them.
571 */
572
573 /*
574 * Loop through all the buffers in the page, mapping all the dirty
575 * buffers to disk addresses and handling any aliases from the
576 * underlying block device's mapping.
577 */
578 rl = NULL;
579 err = 0;
580 do {
581 BOOL is_retry = FALSE;
582
583 if (unlikely(block >= dblock)) {
584 /*
585 * Mapped buffers outside i_size will occur, because
586 * this page can be outside i_size when there is a
587 * truncate in progress. The contents of such buffers
588 * were zeroed by ntfs_writepage().
589 *
590 * FIXME: What about the small race window where
591 * ntfs_writepage() has not done any clearing because
592 * the page was within i_size but before we get here,
593 * vmtruncate() modifies i_size?
594 */
595 clear_buffer_dirty(bh);
596 set_buffer_uptodate(bh);
597 continue;
598 }
599
600 /* Clean buffers are not written out, so no need to map them. */
601 if (!buffer_dirty(bh))
602 continue;
603
604 /* Make sure we have enough initialized size. */
605 if (unlikely((block >= iblock) &&
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000606 (initialized_size < i_size))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 /*
608 * If this page is fully outside initialized size, zero
609 * out all pages between the current initialized size
610 * and the current page. Just use ntfs_readpage() to do
611 * the zeroing transparently.
612 */
613 if (block > iblock) {
614 // TODO:
615 // For each page do:
616 // - read_cache_page()
617 // Again for each page do:
618 // - wait_on_page_locked()
619 // - Check (PageUptodate(page) &&
620 // !PageError(page))
621 // Update initialized size in the attribute and
622 // in the inode.
623 // Again, for each page do:
624 // __set_page_dirty_buffers();
625 // page_cache_release()
626 // We don't need to wait on the writes.
627 // Update iblock.
628 }
629 /*
630 * The current page straddles initialized size. Zero
631 * all non-uptodate buffers and set them uptodate (and
632 * dirty?). Note, there aren't any non-uptodate buffers
633 * if the page is uptodate.
634 * FIXME: For an uptodate page, the buffers may need to
635 * be written out because they were not initialized on
636 * disk before.
637 */
638 if (!PageUptodate(page)) {
639 // TODO:
640 // Zero any non-uptodate buffers up to i_size.
641 // Set them uptodate and dirty.
642 }
643 // TODO:
644 // Update initialized size in the attribute and in the
645 // inode (up to i_size).
646 // Update iblock.
647 // FIXME: This is inefficient. Try to batch the two
648 // size changes to happen in one go.
649 ntfs_error(vol->sb, "Writing beyond initialized size "
650 "is not supported yet. Sorry.");
651 err = -EOPNOTSUPP;
652 break;
653 // Do NOT set_buffer_new() BUT DO clear buffer range
654 // outside write request range.
655 // set_buffer_uptodate() on complete buffers as well as
656 // set_buffer_dirty().
657 }
658
659 /* No need to map buffers that are already mapped. */
660 if (buffer_mapped(bh))
661 continue;
662
663 /* Unmapped, dirty buffer. Need to map it. */
664 bh->b_bdev = vol->sb->s_bdev;
665
666 /* Convert block into corresponding vcn and offset. */
667 vcn = (VCN)block << blocksize_bits;
668 vcn_ofs = vcn & vol->cluster_size_mask;
669 vcn >>= vol->cluster_size_bits;
670 if (!rl) {
671lock_retry_remap:
672 down_read(&ni->runlist.lock);
673 rl = ni->runlist.rl;
674 }
675 if (likely(rl != NULL)) {
676 /* Seek to element containing target vcn. */
677 while (rl->length && rl[1].vcn <= vcn)
678 rl++;
679 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
680 } else
681 lcn = LCN_RL_NOT_MAPPED;
682 /* Successful remap. */
683 if (lcn >= 0) {
684 /* Setup buffer head to point to correct block. */
685 bh->b_blocknr = ((lcn << vol->cluster_size_bits) +
686 vcn_ofs) >> blocksize_bits;
687 set_buffer_mapped(bh);
688 continue;
689 }
690 /* It is a hole, need to instantiate it. */
691 if (lcn == LCN_HOLE) {
Anton Altaparmakov8dcdeba2005-09-08 21:25:48 +0100692 u8 *kaddr;
693 unsigned long *bpos, *bend;
694
695 /* Check if the buffer is zero. */
696 kaddr = kmap_atomic(page, KM_USER0);
697 bpos = (unsigned long *)(kaddr + bh_offset(bh));
698 bend = (unsigned long *)((u8*)bpos + blocksize);
699 do {
700 if (unlikely(*bpos))
701 break;
702 } while (likely(++bpos < bend));
703 kunmap_atomic(kaddr, KM_USER0);
704 if (bpos == bend) {
705 /*
706 * Buffer is zero and sparse, no need to write
707 * it.
708 */
709 bh->b_blocknr = -1;
710 clear_buffer_dirty(bh);
711 continue;
712 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 // TODO: Instantiate the hole.
714 // clear_buffer_new(bh);
715 // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
716 ntfs_error(vol->sb, "Writing into sparse regions is "
717 "not supported yet. Sorry.");
718 err = -EOPNOTSUPP;
719 break;
720 }
721 /* If first try and runlist unmapped, map and retry. */
722 if (!is_retry && lcn == LCN_RL_NOT_MAPPED) {
723 is_retry = TRUE;
724 /*
725 * Attempt to map runlist, dropping lock for
726 * the duration.
727 */
728 up_read(&ni->runlist.lock);
729 err = ntfs_map_runlist(ni, vcn);
730 if (likely(!err))
731 goto lock_retry_remap;
732 rl = NULL;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +0100733 } else if (!rl)
734 up_read(&ni->runlist.lock);
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100735 /*
736 * If buffer is outside the runlist, truncate has cut it out
737 * of the runlist. Just clean and clear the buffer and set it
738 * uptodate so it can get discarded by the VM.
739 */
740 if (err == -ENOENT || lcn == LCN_ENOENT) {
741 u8 *kaddr;
742
743 bh->b_blocknr = -1;
744 clear_buffer_dirty(bh);
745 kaddr = kmap_atomic(page, KM_USER0);
746 memset(kaddr + bh_offset(bh), 0, blocksize);
747 kunmap_atomic(kaddr, KM_USER0);
748 flush_dcache_page(page);
749 set_buffer_uptodate(bh);
750 err = 0;
751 continue;
752 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 /* Failed to map the buffer, even after retrying. */
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100754 if (!err)
755 err = -EIO;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 bh->b_blocknr = -1;
757 ntfs_error(vol->sb, "Failed to write to inode 0x%lx, "
758 "attribute type 0x%x, vcn 0x%llx, offset 0x%x "
759 "because its location on disk could not be "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100760 "determined%s (error code %i).", ni->mft_no,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 ni->type, (unsigned long long)vcn,
762 vcn_ofs, is_retry ? " even after "
Anton Altaparmakov8273d5d2005-09-08 22:00:33 +0100763 "retrying" : "", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 break;
765 } while (block++, (bh = bh->b_this_page) != head);
766
767 /* Release the lock if we took it. */
768 if (rl)
769 up_read(&ni->runlist.lock);
770
771 /* For the error case, need to reset bh to the beginning. */
772 bh = head;
773
Anton Altaparmakov54b02eb2005-09-08 21:43:47 +0100774 /* Just an optimization, so ->readpage() is not called later. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (unlikely(!PageUptodate(page))) {
776 int uptodate = 1;
777 do {
778 if (!buffer_uptodate(bh)) {
779 uptodate = 0;
780 bh = head;
781 break;
782 }
783 } while ((bh = bh->b_this_page) != head);
784 if (uptodate)
785 SetPageUptodate(page);
786 }
787
788 /* Setup all mapped, dirty buffers for async write i/o. */
789 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 if (buffer_mapped(bh) && buffer_dirty(bh)) {
791 lock_buffer(bh);
792 if (test_clear_buffer_dirty(bh)) {
793 BUG_ON(!buffer_uptodate(bh));
794 mark_buffer_async_write(bh);
795 } else
796 unlock_buffer(bh);
797 } else if (unlikely(err)) {
798 /*
799 * For the error case. The buffer may have been set
800 * dirty during attachment to a dirty page.
801 */
802 if (err != -ENOMEM)
803 clear_buffer_dirty(bh);
804 }
805 } while ((bh = bh->b_this_page) != head);
806
807 if (unlikely(err)) {
808 // TODO: Remove the -EOPNOTSUPP check later on...
809 if (unlikely(err == -EOPNOTSUPP))
810 err = 0;
811 else if (err == -ENOMEM) {
812 ntfs_warning(vol->sb, "Error allocating memory. "
813 "Redirtying page so we try again "
814 "later.");
815 /*
816 * Put the page back on mapping->dirty_pages, but
817 * leave its buffer's dirty state as-is.
818 */
819 redirty_page_for_writepage(wbc, page);
820 err = 0;
821 } else
822 SetPageError(page);
823 }
824
825 BUG_ON(PageWriteback(page));
826 set_page_writeback(page); /* Keeps try_to_free_buffers() away. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
Anton Altaparmakov54b02eb2005-09-08 21:43:47 +0100828 /* Submit the prepared buffers for i/o. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 need_end_writeback = TRUE;
830 do {
831 struct buffer_head *next = bh->b_this_page;
832 if (buffer_async_write(bh)) {
833 submit_bh(WRITE, bh);
834 need_end_writeback = FALSE;
835 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836 bh = next;
837 } while (bh != head);
Anton Altaparmakov54b02eb2005-09-08 21:43:47 +0100838 unlock_page(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839
840 /* If no i/o was started, need to end_page_writeback(). */
841 if (unlikely(need_end_writeback))
842 end_page_writeback(page);
843
844 ntfs_debug("Done.");
845 return err;
846}
847
848/**
849 * ntfs_write_mst_block - write a @page to the backing store
850 * @page: page cache page to write out
851 * @wbc: writeback control structure
852 *
853 * This function is for writing pages belonging to non-resident, mst protected
854 * attributes to their backing store. The only supported attributes are index
855 * allocation and $MFT/$DATA. Both directory inodes and index inodes are
856 * supported for the index allocation case.
857 *
858 * The page must remain locked for the duration of the write because we apply
859 * the mst fixups, write, and then undo the fixups, so if we were to unlock the
860 * page before undoing the fixups, any other user of the page will see the
861 * page contents as corrupt.
862 *
863 * We clear the page uptodate flag for the duration of the function to ensure
864 * exclusion for the $MFT/$DATA case against someone mapping an mft record we
865 * are about to apply the mst fixups to.
866 *
867 * Return 0 on success and -errno on error.
868 *
869 * Based on ntfs_write_block(), ntfs_mft_writepage(), and
870 * write_mft_record_nolock().
871 */
872static int ntfs_write_mst_block(struct page *page,
873 struct writeback_control *wbc)
874{
875 sector_t block, dblock, rec_block;
876 struct inode *vi = page->mapping->host;
877 ntfs_inode *ni = NTFS_I(vi);
878 ntfs_volume *vol = ni->vol;
879 u8 *kaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 unsigned int rec_size = ni->itype.index.block_size;
881 ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
882 struct buffer_head *bh, *head, *tbh, *rec_start_bh;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100883 struct buffer_head *bhs[MAX_BUF_PER_PAGE];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 runlist_element *rl;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100885 int i, nr_locked_nis, nr_recs, nr_bhs, max_bhs, bhs_per_rec, err, err2;
886 unsigned bh_size, rec_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 BOOL sync, is_mft, page_is_dirty, rec_is_dirty;
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100888 unsigned char bh_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889
890 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
891 "0x%lx.", vi->i_ino, ni->type, page->index);
892 BUG_ON(!NInoNonResident(ni));
893 BUG_ON(!NInoMstProtected(ni));
894 is_mft = (S_ISREG(vi->i_mode) && !vi->i_ino);
895 /*
896 * NOTE: ntfs_write_mst_block() would be called for $MFTMirr if a page
897 * in its page cache were to be marked dirty. However this should
898 * never happen with the current driver and considering we do not
899 * handle this case here we do want to BUG(), at least for now.
900 */
901 BUG_ON(!(is_mft || S_ISDIR(vi->i_mode) ||
902 (NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100903 bh_size_bits = vi->i_blkbits;
904 bh_size = 1 << bh_size_bits;
905 max_bhs = PAGE_CACHE_SIZE / bh_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 BUG_ON(!max_bhs);
Anton Altaparmakovd53ee322005-04-06 16:11:20 +0100907 BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
909 /* Were we called for sync purposes? */
910 sync = (wbc->sync_mode == WB_SYNC_ALL);
911
912 /* Make sure we have mapped buffers. */
913 BUG_ON(!page_has_buffers(page));
914 bh = head = page_buffers(page);
915 BUG_ON(!bh);
916
917 rec_size_bits = ni->itype.index.block_size_bits;
918 BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
919 bhs_per_rec = rec_size >> bh_size_bits;
920 BUG_ON(!bhs_per_rec);
921
922 /* The first block in the page. */
923 rec_block = block = (sector_t)page->index <<
924 (PAGE_CACHE_SHIFT - bh_size_bits);
925
926 /* The first out of bounds block for the data size. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +0000927 dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928
929 rl = NULL;
930 err = err2 = nr_bhs = nr_recs = nr_locked_nis = 0;
931 page_is_dirty = rec_is_dirty = FALSE;
932 rec_start_bh = NULL;
933 do {
934 BOOL is_retry = FALSE;
935
936 if (likely(block < rec_block)) {
937 if (unlikely(block >= dblock)) {
938 clear_buffer_dirty(bh);
Anton Altaparmakov946929d2005-01-13 15:26:29 +0000939 set_buffer_uptodate(bh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 continue;
941 }
942 /*
943 * This block is not the first one in the record. We
944 * ignore the buffer's dirty state because we could
945 * have raced with a parallel mark_ntfs_record_dirty().
946 */
947 if (!rec_is_dirty)
948 continue;
949 if (unlikely(err2)) {
950 if (err2 != -ENOMEM)
951 clear_buffer_dirty(bh);
952 continue;
953 }
954 } else /* if (block == rec_block) */ {
955 BUG_ON(block > rec_block);
956 /* This block is the first one in the record. */
957 rec_block += bhs_per_rec;
958 err2 = 0;
959 if (unlikely(block >= dblock)) {
960 clear_buffer_dirty(bh);
961 continue;
962 }
963 if (!buffer_dirty(bh)) {
964 /* Clean records are not written out. */
965 rec_is_dirty = FALSE;
966 continue;
967 }
968 rec_is_dirty = TRUE;
969 rec_start_bh = bh;
970 }
971 /* Need to map the buffer if it is not mapped already. */
972 if (unlikely(!buffer_mapped(bh))) {
973 VCN vcn;
974 LCN lcn;
975 unsigned int vcn_ofs;
976
Anton Altaparmakov481d0372005-08-16 19:42:56 +0100977 bh->b_bdev = vol->sb->s_bdev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 /* Obtain the vcn and offset of the current block. */
979 vcn = (VCN)block << bh_size_bits;
980 vcn_ofs = vcn & vol->cluster_size_mask;
981 vcn >>= vol->cluster_size_bits;
982 if (!rl) {
983lock_retry_remap:
984 down_read(&ni->runlist.lock);
985 rl = ni->runlist.rl;
986 }
987 if (likely(rl != NULL)) {
988 /* Seek to element containing target vcn. */
989 while (rl->length && rl[1].vcn <= vcn)
990 rl++;
991 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
992 } else
993 lcn = LCN_RL_NOT_MAPPED;
994 /* Successful remap. */
995 if (likely(lcn >= 0)) {
996 /* Setup buffer head to correct block. */
997 bh->b_blocknr = ((lcn <<
998 vol->cluster_size_bits) +
999 vcn_ofs) >> bh_size_bits;
1000 set_buffer_mapped(bh);
1001 } else {
1002 /*
1003 * Remap failed. Retry to map the runlist once
1004 * unless we are working on $MFT which always
1005 * has the whole of its runlist in memory.
1006 */
1007 if (!is_mft && !is_retry &&
1008 lcn == LCN_RL_NOT_MAPPED) {
1009 is_retry = TRUE;
1010 /*
1011 * Attempt to map runlist, dropping
1012 * lock for the duration.
1013 */
1014 up_read(&ni->runlist.lock);
1015 err2 = ntfs_map_runlist(ni, vcn);
1016 if (likely(!err2))
1017 goto lock_retry_remap;
1018 if (err2 == -ENOMEM)
1019 page_is_dirty = TRUE;
1020 lcn = err2;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001021 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 err2 = -EIO;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001023 if (!rl)
1024 up_read(&ni->runlist.lock);
1025 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 /* Hard error. Abort writing this record. */
1027 if (!err || err == -ENOMEM)
1028 err = err2;
1029 bh->b_blocknr = -1;
1030 ntfs_error(vol->sb, "Cannot write ntfs record "
1031 "0x%llx (inode 0x%lx, "
1032 "attribute type 0x%x) because "
1033 "its location on disk could "
1034 "not be determined (error "
Randy Dunlap8907547d2005-03-03 11:19:53 +00001035 "code %lli).",
1036 (long long)block <<
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 bh_size_bits >>
1038 vol->mft_record_size_bits,
1039 ni->mft_no, ni->type,
1040 (long long)lcn);
1041 /*
1042 * If this is not the first buffer, remove the
1043 * buffers in this record from the list of
1044 * buffers to write and clear their dirty bit
1045 * if not error -ENOMEM.
1046 */
1047 if (rec_start_bh != bh) {
1048 while (bhs[--nr_bhs] != rec_start_bh)
1049 ;
1050 if (err2 != -ENOMEM) {
1051 do {
1052 clear_buffer_dirty(
1053 rec_start_bh);
1054 } while ((rec_start_bh =
1055 rec_start_bh->
1056 b_this_page) !=
1057 bh);
1058 }
1059 }
1060 continue;
1061 }
1062 }
1063 BUG_ON(!buffer_uptodate(bh));
1064 BUG_ON(nr_bhs >= max_bhs);
1065 bhs[nr_bhs++] = bh;
1066 } while (block++, (bh = bh->b_this_page) != head);
1067 if (unlikely(rl))
1068 up_read(&ni->runlist.lock);
1069 /* If there were no dirty buffers, we are done. */
1070 if (!nr_bhs)
1071 goto done;
1072 /* Map the page so we can access its contents. */
1073 kaddr = kmap(page);
1074 /* Clear the page uptodate flag whilst the mst fixups are applied. */
1075 BUG_ON(!PageUptodate(page));
1076 ClearPageUptodate(page);
1077 for (i = 0; i < nr_bhs; i++) {
1078 unsigned int ofs;
1079
1080 /* Skip buffers which are not at the beginning of records. */
1081 if (i % bhs_per_rec)
1082 continue;
1083 tbh = bhs[i];
1084 ofs = bh_offset(tbh);
1085 if (is_mft) {
1086 ntfs_inode *tni;
1087 unsigned long mft_no;
1088
1089 /* Get the mft record number. */
1090 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1091 >> rec_size_bits;
1092 /* Check whether to write this mft record. */
1093 tni = NULL;
1094 if (!ntfs_may_write_mft_record(vol, mft_no,
1095 (MFT_RECORD*)(kaddr + ofs), &tni)) {
1096 /*
1097 * The record should not be written. This
1098 * means we need to redirty the page before
1099 * returning.
1100 */
1101 page_is_dirty = TRUE;
1102 /*
1103 * Remove the buffers in this mft record from
1104 * the list of buffers to write.
1105 */
1106 do {
1107 bhs[i] = NULL;
1108 } while (++i % bhs_per_rec);
1109 continue;
1110 }
1111 /*
1112 * The record should be written. If a locked ntfs
1113 * inode was returned, add it to the array of locked
1114 * ntfs inodes.
1115 */
1116 if (tni)
1117 locked_nis[nr_locked_nis++] = tni;
1118 }
1119 /* Apply the mst protection fixups. */
1120 err2 = pre_write_mst_fixup((NTFS_RECORD*)(kaddr + ofs),
1121 rec_size);
1122 if (unlikely(err2)) {
1123 if (!err || err == -ENOMEM)
1124 err = -EIO;
1125 ntfs_error(vol->sb, "Failed to apply mst fixups "
1126 "(inode 0x%lx, attribute type 0x%x, "
1127 "page index 0x%lx, page offset 0x%x)!"
1128 " Unmount and run chkdsk.", vi->i_ino,
1129 ni->type, page->index, ofs);
1130 /*
1131 * Mark all the buffers in this record clean as we do
1132 * not want to write corrupt data to disk.
1133 */
1134 do {
1135 clear_buffer_dirty(bhs[i]);
1136 bhs[i] = NULL;
1137 } while (++i % bhs_per_rec);
1138 continue;
1139 }
1140 nr_recs++;
1141 }
1142 /* If no records are to be written out, we are done. */
1143 if (!nr_recs)
1144 goto unm_done;
1145 flush_dcache_page(page);
1146 /* Lock buffers and start synchronous write i/o on them. */
1147 for (i = 0; i < nr_bhs; i++) {
1148 tbh = bhs[i];
1149 if (!tbh)
1150 continue;
1151 if (unlikely(test_set_buffer_locked(tbh)))
1152 BUG();
1153 /* The buffer dirty state is now irrelevant, just clean it. */
1154 clear_buffer_dirty(tbh);
1155 BUG_ON(!buffer_uptodate(tbh));
1156 BUG_ON(!buffer_mapped(tbh));
1157 get_bh(tbh);
1158 tbh->b_end_io = end_buffer_write_sync;
1159 submit_bh(WRITE, tbh);
1160 }
1161 /* Synchronize the mft mirror now if not @sync. */
1162 if (is_mft && !sync)
1163 goto do_mirror;
1164do_wait:
1165 /* Wait on i/o completion of buffers. */
1166 for (i = 0; i < nr_bhs; i++) {
1167 tbh = bhs[i];
1168 if (!tbh)
1169 continue;
1170 wait_on_buffer(tbh);
1171 if (unlikely(!buffer_uptodate(tbh))) {
1172 ntfs_error(vol->sb, "I/O error while writing ntfs "
1173 "record buffer (inode 0x%lx, "
1174 "attribute type 0x%x, page index "
1175 "0x%lx, page offset 0x%lx)! Unmount "
1176 "and run chkdsk.", vi->i_ino, ni->type,
1177 page->index, bh_offset(tbh));
1178 if (!err || err == -ENOMEM)
1179 err = -EIO;
1180 /*
1181 * Set the buffer uptodate so the page and buffer
1182 * states do not become out of sync.
1183 */
1184 set_buffer_uptodate(tbh);
1185 }
1186 }
1187 /* If @sync, now synchronize the mft mirror. */
1188 if (is_mft && sync) {
1189do_mirror:
1190 for (i = 0; i < nr_bhs; i++) {
1191 unsigned long mft_no;
1192 unsigned int ofs;
1193
1194 /*
1195 * Skip buffers which are not at the beginning of
1196 * records.
1197 */
1198 if (i % bhs_per_rec)
1199 continue;
1200 tbh = bhs[i];
1201 /* Skip removed buffers (and hence records). */
1202 if (!tbh)
1203 continue;
1204 ofs = bh_offset(tbh);
1205 /* Get the mft record number. */
1206 mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
1207 >> rec_size_bits;
1208 if (mft_no < vol->mftmirr_size)
1209 ntfs_sync_mft_mirror(vol, mft_no,
1210 (MFT_RECORD*)(kaddr + ofs),
1211 sync);
1212 }
1213 if (!sync)
1214 goto do_wait;
1215 }
1216 /* Remove the mst protection fixups again. */
1217 for (i = 0; i < nr_bhs; i++) {
1218 if (!(i % bhs_per_rec)) {
1219 tbh = bhs[i];
1220 if (!tbh)
1221 continue;
1222 post_write_mst_fixup((NTFS_RECORD*)(kaddr +
1223 bh_offset(tbh)));
1224 }
1225 }
1226 flush_dcache_page(page);
1227unm_done:
1228 /* Unlock any locked inodes. */
1229 while (nr_locked_nis-- > 0) {
1230 ntfs_inode *tni, *base_tni;
1231
1232 tni = locked_nis[nr_locked_nis];
1233 /* Get the base inode. */
1234 down(&tni->extent_lock);
1235 if (tni->nr_extents >= 0)
1236 base_tni = tni;
1237 else {
1238 base_tni = tni->ext.base_ntfs_ino;
1239 BUG_ON(!base_tni);
1240 }
1241 up(&tni->extent_lock);
1242 ntfs_debug("Unlocking %s inode 0x%lx.",
1243 tni == base_tni ? "base" : "extent",
1244 tni->mft_no);
1245 up(&tni->mrec_lock);
1246 atomic_dec(&tni->count);
1247 iput(VFS_I(base_tni));
1248 }
1249 SetPageUptodate(page);
1250 kunmap(page);
1251done:
1252 if (unlikely(err && err != -ENOMEM)) {
1253 /*
1254 * Set page error if there is only one ntfs record in the page.
1255 * Otherwise we would loose per-record granularity.
1256 */
1257 if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
1258 SetPageError(page);
1259 NVolSetErrors(vol);
1260 }
1261 if (page_is_dirty) {
1262 ntfs_debug("Page still contains one or more dirty ntfs "
1263 "records. Redirtying the page starting at "
1264 "record 0x%lx.", page->index <<
1265 (PAGE_CACHE_SHIFT - rec_size_bits));
1266 redirty_page_for_writepage(wbc, page);
1267 unlock_page(page);
1268 } else {
1269 /*
1270 * Keep the VM happy. This must be done otherwise the
1271 * radix-tree tag PAGECACHE_TAG_DIRTY remains set even though
1272 * the page is clean.
1273 */
1274 BUG_ON(PageWriteback(page));
1275 set_page_writeback(page);
1276 unlock_page(page);
1277 end_page_writeback(page);
1278 }
1279 if (likely(!err))
1280 ntfs_debug("Done.");
1281 return err;
1282}
1283
1284/**
1285 * ntfs_writepage - write a @page to the backing store
1286 * @page: page cache page to write out
1287 * @wbc: writeback control structure
1288 *
1289 * This is called from the VM when it wants to have a dirty ntfs page cache
1290 * page cleaned. The VM has already locked the page and marked it clean.
1291 *
1292 * For non-resident attributes, ntfs_writepage() writes the @page by calling
1293 * the ntfs version of the generic block_write_full_page() function,
1294 * ntfs_write_block(), which in turn if necessary creates and writes the
1295 * buffers associated with the page asynchronously.
1296 *
1297 * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying
1298 * the data to the mft record (which at this stage is most likely in memory).
1299 * The mft record is then marked dirty and written out asynchronously via the
1300 * vfs inode dirty code path for the inode the mft record belongs to or via the
1301 * vm page dirty code path for the page the mft record is in.
1302 *
1303 * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page().
1304 *
1305 * Return 0 on success and -errno on error.
1306 */
1307static int ntfs_writepage(struct page *page, struct writeback_control *wbc)
1308{
1309 loff_t i_size;
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001310 struct inode *vi = page->mapping->host;
1311 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 char *kaddr;
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001313 ntfs_attr_search_ctx *ctx = NULL;
1314 MFT_RECORD *m = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001315 u32 attr_len;
1316 int err;
1317
Anton Altaparmakov905685f2005-03-10 11:06:19 +00001318retry_writepage:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 BUG_ON(!PageLocked(page));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 i_size = i_size_read(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 /* Is the page fully outside i_size? (truncate in progress) */
1322 if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
1323 PAGE_CACHE_SHIFT)) {
1324 /*
1325 * The page may have dirty, unmapped buffers. Make them
1326 * freeable here, so the page does not leak.
1327 */
1328 block_invalidatepage(page, 0);
1329 unlock_page(page);
1330 ntfs_debug("Write outside i_size - truncated?");
1331 return 0;
1332 }
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001333 /*
1334 * Only $DATA attributes can be encrypted and only unnamed $DATA
1335 * attributes can be compressed. Index root can have the flags set but
1336 * this means to create compressed/encrypted files, not that the
1337 * attribute is compressed/encrypted.
1338 */
1339 if (ni->type != AT_INDEX_ROOT) {
1340 /* If file is encrypted, deny access, just like NT4. */
1341 if (NInoEncrypted(ni)) {
1342 unlock_page(page);
1343 BUG_ON(ni->type != AT_DATA);
1344 ntfs_debug("Denying write access to encrypted "
1345 "file.");
1346 return -EACCES;
1347 }
1348 /* Compressed data streams are handled in compress.c. */
1349 if (NInoNonResident(ni) && NInoCompressed(ni)) {
1350 BUG_ON(ni->type != AT_DATA);
1351 BUG_ON(ni->name_len);
1352 // TODO: Implement and replace this with
1353 // return ntfs_write_compressed_block(page);
1354 unlock_page(page);
1355 ntfs_error(vi->i_sb, "Writing to compressed files is "
1356 "not supported yet. Sorry.");
1357 return -EOPNOTSUPP;
1358 }
1359 // TODO: Implement and remove this check.
1360 if (NInoNonResident(ni) && NInoSparse(ni)) {
1361 unlock_page(page);
1362 ntfs_error(vi->i_sb, "Writing to sparse files is not "
1363 "supported yet. Sorry.");
1364 return -EOPNOTSUPP;
1365 }
1366 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 /* NInoNonResident() == NInoIndexAllocPresent() */
1368 if (NInoNonResident(ni)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 /* We have to zero every time due to mmap-at-end-of-file. */
1370 if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
1371 /* The page straddles i_size. */
1372 unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
1373 kaddr = kmap_atomic(page, KM_USER0);
1374 memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
1375 flush_dcache_page(page);
1376 kunmap_atomic(kaddr, KM_USER0);
1377 }
1378 /* Handle mst protected attributes. */
1379 if (NInoMstProtected(ni))
1380 return ntfs_write_mst_block(page, wbc);
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001381 /* Normal, non-resident data stream. */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 return ntfs_write_block(page, wbc);
1383 }
1384 /*
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001385 * Attribute is resident, implying it is not compressed, encrypted, or
1386 * mst protected. This also means the attribute is smaller than an mft
1387 * record and hence smaller than a page, so can simply return error on
1388 * any pages with index above 0. Note the attribute can actually be
1389 * marked compressed but if it is resident the actual data is not
1390 * compressed so we are ok to ignore the compressed flag here.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 */
1392 BUG_ON(page_has_buffers(page));
1393 BUG_ON(!PageUptodate(page));
1394 if (unlikely(page->index > 0)) {
1395 ntfs_error(vi->i_sb, "BUG()! page->index (0x%lx) > 0. "
1396 "Aborting write.", page->index);
1397 BUG_ON(PageWriteback(page));
1398 set_page_writeback(page);
1399 unlock_page(page);
1400 end_page_writeback(page);
1401 return -EIO;
1402 }
1403 if (!NInoAttr(ni))
1404 base_ni = ni;
1405 else
1406 base_ni = ni->ext.base_ntfs_ino;
1407 /* Map, pin, and lock the mft record. */
1408 m = map_mft_record(base_ni);
1409 if (IS_ERR(m)) {
1410 err = PTR_ERR(m);
1411 m = NULL;
1412 ctx = NULL;
1413 goto err_out;
1414 }
Anton Altaparmakov905685f2005-03-10 11:06:19 +00001415 /*
1416 * If a parallel write made the attribute non-resident, drop the mft
1417 * record and retry the writepage.
1418 */
1419 if (unlikely(NInoNonResident(ni))) {
1420 unmap_mft_record(base_ni);
1421 goto retry_writepage;
1422 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1424 if (unlikely(!ctx)) {
1425 err = -ENOMEM;
1426 goto err_out;
1427 }
1428 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1429 CASE_SENSITIVE, 0, NULL, 0, ctx);
1430 if (unlikely(err))
1431 goto err_out;
1432 /*
1433 * Keep the VM happy. This must be done otherwise the radix-tree tag
1434 * PAGECACHE_TAG_DIRTY remains set even though the page is clean.
1435 */
1436 BUG_ON(PageWriteback(page));
1437 set_page_writeback(page);
1438 unlock_page(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 /*
Anton Altaparmakovbd45fdd2005-09-08 21:38:05 +01001440 * Here, we do not need to zero the out of bounds area everytime
1441 * because the below memcpy() already takes care of the
1442 * mmap-at-end-of-file requirements. If the file is converted to a
1443 * non-resident one, then the code path use is switched to the
1444 * non-resident one where the zeroing happens on each ntfs_writepage()
1445 * invocation.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001448 i_size = i_size_read(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449 if (unlikely(attr_len > i_size)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 attr_len = i_size;
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001451 ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 }
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001453 kaddr = kmap_atomic(page, KM_USER0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 /* Copy the data from the page to the mft record. */
1455 memcpy((u8*)ctx->attr +
1456 le16_to_cpu(ctx->attr->data.resident.value_offset),
1457 kaddr, attr_len);
1458 flush_dcache_mft_record_page(ctx->ntfs_ino);
1459 /* Zero out of bounds area in the page cache page. */
1460 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
1461 flush_dcache_page(page);
1462 kunmap_atomic(kaddr, KM_USER0);
1463
1464 end_page_writeback(page);
1465
1466 /* Mark the mft record dirty, so it gets written back. */
1467 mark_mft_record_dirty(ctx->ntfs_ino);
1468 ntfs_attr_put_search_ctx(ctx);
1469 unmap_mft_record(base_ni);
1470 return 0;
1471err_out:
1472 if (err == -ENOMEM) {
1473 ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying "
1474 "page so we try again later.");
1475 /*
1476 * Put the page back on mapping->dirty_pages, but leave its
1477 * buffers' dirty state as-is.
1478 */
1479 redirty_page_for_writepage(wbc, page);
1480 err = 0;
1481 } else {
1482 ntfs_error(vi->i_sb, "Resident attribute write failed with "
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001483 "error %i.", err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 SetPageError(page);
Anton Altaparmakov149f0c52005-01-12 13:52:30 +00001485 NVolSetErrors(ni->vol);
1486 make_bad_inode(vi);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 }
1488 unlock_page(page);
1489 if (ctx)
1490 ntfs_attr_put_search_ctx(ctx);
1491 if (m)
1492 unmap_mft_record(base_ni);
1493 return err;
1494}
1495
1496/**
1497 * ntfs_prepare_nonresident_write -
1498 *
1499 */
1500static int ntfs_prepare_nonresident_write(struct page *page,
1501 unsigned from, unsigned to)
1502{
1503 VCN vcn;
1504 LCN lcn;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001505 s64 initialized_size;
1506 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 sector_t block, ablock, iblock;
1508 struct inode *vi;
1509 ntfs_inode *ni;
1510 ntfs_volume *vol;
1511 runlist_element *rl;
1512 struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001513 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 unsigned int vcn_ofs, block_start, block_end, blocksize;
1515 int err;
1516 BOOL is_retry;
1517 unsigned char blocksize_bits;
1518
1519 vi = page->mapping->host;
1520 ni = NTFS_I(vi);
1521 vol = ni->vol;
1522
1523 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
1524 "0x%lx, from = %u, to = %u.", ni->mft_no, ni->type,
1525 page->index, from, to);
1526
1527 BUG_ON(!NInoNonResident(ni));
1528
1529 blocksize_bits = vi->i_blkbits;
1530 blocksize = 1 << blocksize_bits;
1531
1532 /*
1533 * create_empty_buffers() will create uptodate/dirty buffers if the
1534 * page is uptodate/dirty.
1535 */
1536 if (!page_has_buffers(page))
1537 create_empty_buffers(page, blocksize, 0);
1538 bh = head = page_buffers(page);
1539 if (unlikely(!bh))
1540 return -ENOMEM;
1541
1542 /* The first block in the page. */
1543 block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
1544
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001545 read_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001546 /*
Anton Altaparmakovb6ad6c52005-02-15 10:08:43 +00001547 * The first out of bounds block for the allocated size. No need to
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 * round up as allocated_size is in multiples of cluster size and the
1549 * minimum cluster size is 512 bytes, which is equal to the smallest
1550 * blocksize.
1551 */
1552 ablock = ni->allocated_size >> blocksize_bits;
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001553 i_size = i_size_read(vi);
1554 initialized_size = ni->initialized_size;
1555 read_unlock_irqrestore(&ni->size_lock, flags);
1556
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 /* The last (fully or partially) initialized block. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001558 iblock = initialized_size >> blocksize_bits;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559
1560 /* Loop through all the buffers in the page. */
1561 block_start = 0;
1562 rl = NULL;
1563 err = 0;
1564 do {
1565 block_end = block_start + blocksize;
1566 /*
1567 * If buffer @bh is outside the write, just mark it uptodate
1568 * if the page is uptodate and continue with the next buffer.
1569 */
1570 if (block_end <= from || block_start >= to) {
1571 if (PageUptodate(page)) {
1572 if (!buffer_uptodate(bh))
1573 set_buffer_uptodate(bh);
1574 }
1575 continue;
1576 }
1577 /*
1578 * @bh is at least partially being written to.
1579 * Make sure it is not marked as new.
1580 */
1581 //if (buffer_new(bh))
1582 // clear_buffer_new(bh);
1583
1584 if (block >= ablock) {
1585 // TODO: block is above allocated_size, need to
1586 // allocate it. Best done in one go to accommodate not
1587 // only block but all above blocks up to and including:
1588 // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize
1589 // - 1) >> blobksize_bits. Obviously will need to round
1590 // up to next cluster boundary, too. This should be
1591 // done with a helper function, so it can be reused.
1592 ntfs_error(vol->sb, "Writing beyond allocated size "
1593 "is not supported yet. Sorry.");
1594 err = -EOPNOTSUPP;
1595 goto err_out;
1596 // Need to update ablock.
1597 // Need to set_buffer_new() on all block bhs that are
1598 // newly allocated.
1599 }
1600 /*
1601 * Now we have enough allocated size to fulfill the whole
1602 * request, i.e. block < ablock is true.
1603 */
1604 if (unlikely((block >= iblock) &&
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001605 (initialized_size < i_size))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 /*
1607 * If this page is fully outside initialized size, zero
1608 * out all pages between the current initialized size
1609 * and the current page. Just use ntfs_readpage() to do
1610 * the zeroing transparently.
1611 */
1612 if (block > iblock) {
1613 // TODO:
1614 // For each page do:
1615 // - read_cache_page()
1616 // Again for each page do:
1617 // - wait_on_page_locked()
1618 // - Check (PageUptodate(page) &&
1619 // !PageError(page))
1620 // Update initialized size in the attribute and
1621 // in the inode.
1622 // Again, for each page do:
1623 // __set_page_dirty_buffers();
1624 // page_cache_release()
1625 // We don't need to wait on the writes.
1626 // Update iblock.
1627 }
1628 /*
1629 * The current page straddles initialized size. Zero
1630 * all non-uptodate buffers and set them uptodate (and
1631 * dirty?). Note, there aren't any non-uptodate buffers
1632 * if the page is uptodate.
1633 * FIXME: For an uptodate page, the buffers may need to
1634 * be written out because they were not initialized on
1635 * disk before.
1636 */
1637 if (!PageUptodate(page)) {
1638 // TODO:
1639 // Zero any non-uptodate buffers up to i_size.
1640 // Set them uptodate and dirty.
1641 }
1642 // TODO:
1643 // Update initialized size in the attribute and in the
1644 // inode (up to i_size).
1645 // Update iblock.
1646 // FIXME: This is inefficient. Try to batch the two
1647 // size changes to happen in one go.
1648 ntfs_error(vol->sb, "Writing beyond initialized size "
1649 "is not supported yet. Sorry.");
1650 err = -EOPNOTSUPP;
1651 goto err_out;
1652 // Do NOT set_buffer_new() BUT DO clear buffer range
1653 // outside write request range.
1654 // set_buffer_uptodate() on complete buffers as well as
1655 // set_buffer_dirty().
1656 }
1657
1658 /* Need to map unmapped buffers. */
1659 if (!buffer_mapped(bh)) {
1660 /* Unmapped buffer. Need to map it. */
1661 bh->b_bdev = vol->sb->s_bdev;
1662
1663 /* Convert block into corresponding vcn and offset. */
1664 vcn = (VCN)block << blocksize_bits >>
1665 vol->cluster_size_bits;
1666 vcn_ofs = ((VCN)block << blocksize_bits) &
1667 vol->cluster_size_mask;
1668
1669 is_retry = FALSE;
1670 if (!rl) {
1671lock_retry_remap:
1672 down_read(&ni->runlist.lock);
1673 rl = ni->runlist.rl;
1674 }
1675 if (likely(rl != NULL)) {
1676 /* Seek to element containing target vcn. */
1677 while (rl->length && rl[1].vcn <= vcn)
1678 rl++;
1679 lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
1680 } else
1681 lcn = LCN_RL_NOT_MAPPED;
1682 if (unlikely(lcn < 0)) {
1683 /*
1684 * We extended the attribute allocation above.
1685 * If we hit an ENOENT here it means that the
1686 * allocation was insufficient which is a bug.
1687 */
1688 BUG_ON(lcn == LCN_ENOENT);
1689
1690 /* It is a hole, need to instantiate it. */
1691 if (lcn == LCN_HOLE) {
1692 // TODO: Instantiate the hole.
1693 // clear_buffer_new(bh);
1694 // unmap_underlying_metadata(bh->b_bdev,
1695 // bh->b_blocknr);
1696 // For non-uptodate buffers, need to
1697 // zero out the region outside the
1698 // request in this bh or all bhs,
1699 // depending on what we implemented
1700 // above.
1701 // Need to flush_dcache_page().
1702 // Or could use set_buffer_new()
1703 // instead?
1704 ntfs_error(vol->sb, "Writing into "
1705 "sparse regions is "
1706 "not supported yet. "
1707 "Sorry.");
1708 err = -EOPNOTSUPP;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001709 if (!rl)
1710 up_read(&ni->runlist.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001711 goto err_out;
1712 } else if (!is_retry &&
1713 lcn == LCN_RL_NOT_MAPPED) {
1714 is_retry = TRUE;
1715 /*
1716 * Attempt to map runlist, dropping
1717 * lock for the duration.
1718 */
1719 up_read(&ni->runlist.lock);
1720 err = ntfs_map_runlist(ni, vcn);
1721 if (likely(!err))
1722 goto lock_retry_remap;
1723 rl = NULL;
1724 lcn = err;
Anton Altaparmakov9f993fe2005-06-25 16:15:36 +01001725 } else if (!rl)
1726 up_read(&ni->runlist.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727 /*
1728 * Failed to map the buffer, even after
1729 * retrying.
1730 */
1731 bh->b_blocknr = -1;
1732 ntfs_error(vol->sb, "Failed to write to inode "
1733 "0x%lx, attribute type 0x%x, "
1734 "vcn 0x%llx, offset 0x%x "
1735 "because its location on disk "
1736 "could not be determined%s "
1737 "(error code %lli).",
1738 ni->mft_no, ni->type,
1739 (unsigned long long)vcn,
1740 vcn_ofs, is_retry ? " even "
1741 "after retrying" : "",
1742 (long long)lcn);
1743 if (!err)
1744 err = -EIO;
1745 goto err_out;
1746 }
1747 /* We now have a successful remap, i.e. lcn >= 0. */
1748
1749 /* Setup buffer head to correct block. */
1750 bh->b_blocknr = ((lcn << vol->cluster_size_bits)
1751 + vcn_ofs) >> blocksize_bits;
1752 set_buffer_mapped(bh);
1753
1754 // FIXME: Something analogous to this is needed for
1755 // each newly allocated block, i.e. BH_New.
1756 // FIXME: Might need to take this out of the
1757 // if (!buffer_mapped(bh)) {}, depending on how we
1758 // implement things during the allocated_size and
1759 // initialized_size extension code above.
1760 if (buffer_new(bh)) {
1761 clear_buffer_new(bh);
1762 unmap_underlying_metadata(bh->b_bdev,
1763 bh->b_blocknr);
1764 if (PageUptodate(page)) {
1765 set_buffer_uptodate(bh);
1766 continue;
1767 }
1768 /*
1769 * Page is _not_ uptodate, zero surrounding
1770 * region. NOTE: This is how we decide if to
1771 * zero or not!
1772 */
1773 if (block_end > to || block_start < from) {
1774 void *kaddr;
1775
1776 kaddr = kmap_atomic(page, KM_USER0);
1777 if (block_end > to)
1778 memset(kaddr + to, 0,
1779 block_end - to);
1780 if (block_start < from)
1781 memset(kaddr + block_start, 0,
1782 from -
1783 block_start);
1784 flush_dcache_page(page);
1785 kunmap_atomic(kaddr, KM_USER0);
1786 }
1787 continue;
1788 }
1789 }
1790 /* @bh is mapped, set it uptodate if the page is uptodate. */
1791 if (PageUptodate(page)) {
1792 if (!buffer_uptodate(bh))
1793 set_buffer_uptodate(bh);
1794 continue;
1795 }
1796 /*
1797 * The page is not uptodate. The buffer is mapped. If it is not
1798 * uptodate, and it is only partially being written to, we need
1799 * to read the buffer in before the write, i.e. right now.
1800 */
1801 if (!buffer_uptodate(bh) &&
1802 (block_start < from || block_end > to)) {
1803 ll_rw_block(READ, 1, &bh);
1804 *wait_bh++ = bh;
1805 }
1806 } while (block++, block_start = block_end,
1807 (bh = bh->b_this_page) != head);
1808
1809 /* Release the lock if we took it. */
1810 if (rl) {
1811 up_read(&ni->runlist.lock);
1812 rl = NULL;
1813 }
1814
1815 /* If we issued read requests, let them complete. */
1816 while (wait_bh > wait) {
1817 wait_on_buffer(*--wait_bh);
1818 if (!buffer_uptodate(*wait_bh))
1819 return -EIO;
1820 }
1821
1822 ntfs_debug("Done.");
1823 return 0;
1824err_out:
1825 /*
1826 * Zero out any newly allocated blocks to avoid exposing stale data.
1827 * If BH_New is set, we know that the block was newly allocated in the
1828 * above loop.
1829 * FIXME: What about initialized_size increments? Have we done all the
1830 * required zeroing above? If not this error handling is broken, and
1831 * in particular the if (block_end <= from) check is completely bogus.
1832 */
1833 bh = head;
1834 block_start = 0;
1835 is_retry = FALSE;
1836 do {
1837 block_end = block_start + blocksize;
1838 if (block_end <= from)
1839 continue;
1840 if (block_start >= to)
1841 break;
1842 if (buffer_new(bh)) {
1843 void *kaddr;
1844
1845 clear_buffer_new(bh);
1846 kaddr = kmap_atomic(page, KM_USER0);
1847 memset(kaddr + block_start, 0, bh->b_size);
1848 kunmap_atomic(kaddr, KM_USER0);
1849 set_buffer_uptodate(bh);
1850 mark_buffer_dirty(bh);
1851 is_retry = TRUE;
1852 }
1853 } while (block_start = block_end, (bh = bh->b_this_page) != head);
1854 if (is_retry)
1855 flush_dcache_page(page);
1856 if (rl)
1857 up_read(&ni->runlist.lock);
1858 return err;
1859}
1860
1861/**
1862 * ntfs_prepare_write - prepare a page for receiving data
1863 *
1864 * This is called from generic_file_write() with i_sem held on the inode
1865 * (@page->mapping->host). The @page is locked but not kmap()ped. The source
1866 * data has not yet been copied into the @page.
1867 *
1868 * Need to extend the attribute/fill in holes if necessary, create blocks and
1869 * make partially overwritten blocks uptodate,
1870 *
1871 * i_size is not to be modified yet.
1872 *
1873 * Return 0 on success or -errno on error.
1874 *
1875 * Should be using block_prepare_write() [support for sparse files] or
1876 * cont_prepare_write() [no support for sparse files]. Cannot do that due to
1877 * ntfs specifics but can look at them for implementation guidance.
1878 *
1879 * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is
1880 * the first byte in the page that will be written to and @to is the first byte
1881 * after the last byte that will be written to.
1882 */
1883static int ntfs_prepare_write(struct file *file, struct page *page,
1884 unsigned from, unsigned to)
1885{
1886 s64 new_size;
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001887 loff_t i_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 struct inode *vi = page->mapping->host;
1889 ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi);
1890 ntfs_volume *vol = ni->vol;
1891 ntfs_attr_search_ctx *ctx = NULL;
1892 MFT_RECORD *m = NULL;
1893 ATTR_RECORD *a;
1894 u8 *kaddr;
1895 u32 attr_len;
1896 int err;
1897
1898 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
1899 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
1900 page->index, from, to);
1901 BUG_ON(!PageLocked(page));
1902 BUG_ON(from > PAGE_CACHE_SIZE);
1903 BUG_ON(to > PAGE_CACHE_SIZE);
1904 BUG_ON(from > to);
1905 BUG_ON(NInoMstProtected(ni));
1906 /*
1907 * If a previous ntfs_truncate() failed, repeat it and abort if it
1908 * fails again.
1909 */
1910 if (unlikely(NInoTruncateFailed(ni))) {
1911 down_write(&vi->i_alloc_sem);
1912 err = ntfs_truncate(vi);
1913 up_write(&vi->i_alloc_sem);
1914 if (err || NInoTruncateFailed(ni)) {
1915 if (!err)
1916 err = -EIO;
1917 goto err_out;
1918 }
1919 }
1920 /* If the attribute is not resident, deal with it elsewhere. */
1921 if (NInoNonResident(ni)) {
1922 /*
1923 * Only unnamed $DATA attributes can be compressed, encrypted,
1924 * and/or sparse.
1925 */
1926 if (ni->type == AT_DATA && !ni->name_len) {
1927 /* If file is encrypted, deny access, just like NT4. */
1928 if (NInoEncrypted(ni)) {
1929 ntfs_debug("Denying write access to encrypted "
1930 "file.");
1931 return -EACCES;
1932 }
1933 /* Compressed data streams are handled in compress.c. */
1934 if (NInoCompressed(ni)) {
1935 // TODO: Implement and replace this check with
1936 // return ntfs_write_compressed_block(page);
1937 ntfs_error(vi->i_sb, "Writing to compressed "
1938 "files is not supported yet. "
1939 "Sorry.");
1940 return -EOPNOTSUPP;
1941 }
1942 // TODO: Implement and remove this check.
1943 if (NInoSparse(ni)) {
1944 ntfs_error(vi->i_sb, "Writing to sparse files "
1945 "is not supported yet. Sorry.");
1946 return -EOPNOTSUPP;
1947 }
1948 }
1949 /* Normal data stream. */
1950 return ntfs_prepare_nonresident_write(page, from, to);
1951 }
1952 /*
1953 * Attribute is resident, implying it is not compressed, encrypted, or
1954 * sparse.
1955 */
1956 BUG_ON(page_has_buffers(page));
1957 new_size = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
1958 /* If we do not need to resize the attribute allocation we are done. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00001959 if (new_size <= i_size_read(vi))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 /* Map, pin, and lock the (base) mft record. */
1962 if (!NInoAttr(ni))
1963 base_ni = ni;
1964 else
1965 base_ni = ni->ext.base_ntfs_ino;
1966 m = map_mft_record(base_ni);
1967 if (IS_ERR(m)) {
1968 err = PTR_ERR(m);
1969 m = NULL;
1970 ctx = NULL;
1971 goto err_out;
1972 }
1973 ctx = ntfs_attr_get_search_ctx(base_ni, m);
1974 if (unlikely(!ctx)) {
1975 err = -ENOMEM;
1976 goto err_out;
1977 }
1978 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
1979 CASE_SENSITIVE, 0, NULL, 0, ctx);
1980 if (unlikely(err)) {
1981 if (err == -ENOENT)
1982 err = -EIO;
1983 goto err_out;
1984 }
1985 m = ctx->mrec;
1986 a = ctx->attr;
1987 /* The total length of the attribute value. */
1988 attr_len = le32_to_cpu(a->data.resident.value_length);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001989 /* Fix an eventual previous failure of ntfs_commit_write(). */
Anton Altaparmakovf40661b2005-01-13 16:03:38 +00001990 i_size = i_size_read(vi);
1991 if (unlikely(attr_len > i_size)) {
1992 attr_len = i_size;
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001993 a->data.resident.value_length = cpu_to_le32(attr_len);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001994 }
Anton Altaparmakov946929d2005-01-13 15:26:29 +00001995 /* If we do not need to resize the attribute allocation we are done. */
1996 if (new_size <= attr_len)
1997 goto done_unm;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 /* Check if new size is allowed in $AttrDef. */
1999 err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
2000 if (unlikely(err)) {
2001 if (err == -ERANGE) {
2002 ntfs_error(vol->sb, "Write would cause the inode "
2003 "0x%lx to exceed the maximum size for "
2004 "its attribute type (0x%x). Aborting "
2005 "write.", vi->i_ino,
2006 le32_to_cpu(ni->type));
2007 } else {
2008 ntfs_error(vol->sb, "Inode 0x%lx has unknown "
2009 "attribute type 0x%x. Aborting "
2010 "write.", vi->i_ino,
2011 le32_to_cpu(ni->type));
2012 err = -EIO;
2013 }
2014 goto err_out2;
2015 }
2016 /*
2017 * Extend the attribute record to be able to store the new attribute
2018 * size.
2019 */
2020 if (new_size >= vol->mft_record_size || ntfs_attr_record_resize(m, a,
2021 le16_to_cpu(a->data.resident.value_offset) +
2022 new_size)) {
2023 /* Not enough space in the mft record. */
2024 ntfs_error(vol->sb, "Not enough space in the mft record for "
2025 "the resized attribute value. This is not "
2026 "supported yet. Aborting write.");
2027 err = -EOPNOTSUPP;
2028 goto err_out2;
2029 }
2030 /*
2031 * We have enough space in the mft record to fit the write. This
2032 * implies the attribute is smaller than the mft record and hence the
2033 * attribute must be in a single page and hence page->index must be 0.
2034 */
2035 BUG_ON(page->index);
2036 /*
2037 * If the beginning of the write is past the old size, enlarge the
2038 * attribute value up to the beginning of the write and fill it with
2039 * zeroes.
2040 */
2041 if (from > attr_len) {
2042 memset((u8*)a + le16_to_cpu(a->data.resident.value_offset) +
2043 attr_len, 0, from - attr_len);
2044 a->data.resident.value_length = cpu_to_le32(from);
2045 /* Zero the corresponding area in the page as well. */
2046 if (PageUptodate(page)) {
2047 kaddr = kmap_atomic(page, KM_USER0);
2048 memset(kaddr + attr_len, 0, from - attr_len);
2049 kunmap_atomic(kaddr, KM_USER0);
2050 flush_dcache_page(page);
2051 }
2052 }
2053 flush_dcache_mft_record_page(ctx->ntfs_ino);
2054 mark_mft_record_dirty(ctx->ntfs_ino);
Anton Altaparmakov946929d2005-01-13 15:26:29 +00002055done_unm:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 ntfs_attr_put_search_ctx(ctx);
2057 unmap_mft_record(base_ni);
2058 /*
2059 * Because resident attributes are handled by memcpy() to/from the
2060 * corresponding MFT record, and because this form of i/o is byte
2061 * aligned rather than block aligned, there is no need to bring the
2062 * page uptodate here as in the non-resident case where we need to
2063 * bring the buffers straddled by the write uptodate before
2064 * generic_file_write() does the copying from userspace.
2065 *
2066 * We thus defer the uptodate bringing of the page region outside the
2067 * region written to to ntfs_commit_write(), which makes the code
2068 * simpler and saves one atomic kmap which is good.
2069 */
2070done:
2071 ntfs_debug("Done.");
2072 return 0;
2073err_out:
2074 if (err == -ENOMEM)
2075 ntfs_warning(vi->i_sb, "Error allocating memory required to "
2076 "prepare the write.");
2077 else {
2078 ntfs_error(vi->i_sb, "Resident attribute prepare write failed "
2079 "with error %i.", err);
2080 NVolSetErrors(vol);
2081 make_bad_inode(vi);
2082 }
2083err_out2:
2084 if (ctx)
2085 ntfs_attr_put_search_ctx(ctx);
2086 if (m)
2087 unmap_mft_record(base_ni);
2088 return err;
2089}
2090
2091/**
2092 * ntfs_commit_nonresident_write -
2093 *
2094 */
2095static int ntfs_commit_nonresident_write(struct page *page,
2096 unsigned from, unsigned to)
2097{
2098 s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to;
2099 struct inode *vi = page->mapping->host;
2100 struct buffer_head *bh, *head;
2101 unsigned int block_start, block_end, blocksize;
2102 BOOL partial;
2103
2104 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
2105 "0x%lx, from = %u, to = %u.", vi->i_ino,
2106 NTFS_I(vi)->type, page->index, from, to);
2107 blocksize = 1 << vi->i_blkbits;
2108
2109 // FIXME: We need a whole slew of special cases in here for compressed
2110 // files for example...
2111 // For now, we know ntfs_prepare_write() would have failed so we can't
2112 // get here in any of the cases which we have to special case, so we
2113 // are just a ripped off, unrolled generic_commit_write().
2114
2115 bh = head = page_buffers(page);
2116 block_start = 0;
2117 partial = FALSE;
2118 do {
2119 block_end = block_start + blocksize;
2120 if (block_end <= from || block_start >= to) {
2121 if (!buffer_uptodate(bh))
2122 partial = TRUE;
2123 } else {
2124 set_buffer_uptodate(bh);
2125 mark_buffer_dirty(bh);
2126 }
2127 } while (block_start = block_end, (bh = bh->b_this_page) != head);
2128 /*
2129 * If this is a partial write which happened to make all buffers
2130 * uptodate then we can optimize away a bogus ->readpage() for the next
2131 * read(). Here we 'discover' whether the page went uptodate as a
2132 * result of this (potentially partial) write.
2133 */
2134 if (!partial)
2135 SetPageUptodate(page);
2136 /*
2137 * Not convinced about this at all. See disparity comment above. For
2138 * now we know ntfs_prepare_write() would have failed in the write
2139 * exceeds i_size case, so this will never trigger which is fine.
2140 */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002141 if (pos > i_size_read(vi)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002142 ntfs_error(vi->i_sb, "Writing beyond the existing file size is "
2143 "not supported yet. Sorry.");
2144 return -EOPNOTSUPP;
2145 // vi->i_size = pos;
2146 // mark_inode_dirty(vi);
2147 }
2148 ntfs_debug("Done.");
2149 return 0;
2150}
2151
2152/**
2153 * ntfs_commit_write - commit the received data
2154 *
2155 * This is called from generic_file_write() with i_sem held on the inode
2156 * (@page->mapping->host). The @page is locked but not kmap()ped. The source
2157 * data has already been copied into the @page. ntfs_prepare_write() has been
2158 * called before the data copied and it returned success so we can take the
2159 * results of various BUG checks and some error handling for granted.
2160 *
2161 * Need to mark modified blocks dirty so they get written out later when
2162 * ntfs_writepage() is invoked by the VM.
2163 *
2164 * Return 0 on success or -errno on error.
2165 *
2166 * Should be using generic_commit_write(). This marks buffers uptodate and
2167 * dirty, sets the page uptodate if all buffers in the page are uptodate, and
2168 * updates i_size if the end of io is beyond i_size. In that case, it also
2169 * marks the inode dirty.
2170 *
2171 * Cannot use generic_commit_write() due to ntfs specialities but can look at
2172 * it for implementation guidance.
2173 *
2174 * If things have gone as outlined in ntfs_prepare_write(), then we do not
2175 * need to do any page content modifications here at all, except in the write
2176 * to resident attribute case, where we need to do the uptodate bringing here
2177 * which we combine with the copying into the mft record which means we save
2178 * one atomic kmap.
2179 */
2180static int ntfs_commit_write(struct file *file, struct page *page,
2181 unsigned from, unsigned to)
2182{
2183 struct inode *vi = page->mapping->host;
2184 ntfs_inode *base_ni, *ni = NTFS_I(vi);
2185 char *kaddr, *kattr;
2186 ntfs_attr_search_ctx *ctx;
2187 MFT_RECORD *m;
2188 ATTR_RECORD *a;
2189 u32 attr_len;
2190 int err;
2191
2192 ntfs_debug("Entering for inode 0x%lx, attribute type 0x%x, page index "
2193 "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type,
2194 page->index, from, to);
2195 /* If the attribute is not resident, deal with it elsewhere. */
2196 if (NInoNonResident(ni)) {
2197 /* Only unnamed $DATA attributes can be compressed/encrypted. */
2198 if (ni->type == AT_DATA && !ni->name_len) {
2199 /* Encrypted files need separate handling. */
2200 if (NInoEncrypted(ni)) {
2201 // We never get here at present!
2202 BUG();
2203 }
2204 /* Compressed data streams are handled in compress.c. */
2205 if (NInoCompressed(ni)) {
2206 // TODO: Implement this!
2207 // return ntfs_write_compressed_block(page);
2208 // We never get here at present!
2209 BUG();
2210 }
2211 }
2212 /* Normal data stream. */
2213 return ntfs_commit_nonresident_write(page, from, to);
2214 }
2215 /*
2216 * Attribute is resident, implying it is not compressed, encrypted, or
2217 * sparse.
2218 */
2219 if (!NInoAttr(ni))
2220 base_ni = ni;
2221 else
2222 base_ni = ni->ext.base_ntfs_ino;
2223 /* Map, pin, and lock the mft record. */
2224 m = map_mft_record(base_ni);
2225 if (IS_ERR(m)) {
2226 err = PTR_ERR(m);
2227 m = NULL;
2228 ctx = NULL;
2229 goto err_out;
2230 }
2231 ctx = ntfs_attr_get_search_ctx(base_ni, m);
2232 if (unlikely(!ctx)) {
2233 err = -ENOMEM;
2234 goto err_out;
2235 }
2236 err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
2237 CASE_SENSITIVE, 0, NULL, 0, ctx);
2238 if (unlikely(err)) {
2239 if (err == -ENOENT)
2240 err = -EIO;
2241 goto err_out;
2242 }
2243 a = ctx->attr;
2244 /* The total length of the attribute value. */
2245 attr_len = le32_to_cpu(a->data.resident.value_length);
2246 BUG_ON(from > attr_len);
2247 kattr = (u8*)a + le16_to_cpu(a->data.resident.value_offset);
2248 kaddr = kmap_atomic(page, KM_USER0);
2249 /* Copy the received data from the page to the mft record. */
2250 memcpy(kattr + from, kaddr + from, to - from);
2251 /* Update the attribute length if necessary. */
2252 if (to > attr_len) {
2253 attr_len = to;
2254 a->data.resident.value_length = cpu_to_le32(attr_len);
2255 }
2256 /*
2257 * If the page is not uptodate, bring the out of bounds area(s)
2258 * uptodate by copying data from the mft record to the page.
2259 */
2260 if (!PageUptodate(page)) {
2261 if (from > 0)
2262 memcpy(kaddr, kattr, from);
2263 if (to < attr_len)
2264 memcpy(kaddr + to, kattr + to, attr_len - to);
2265 /* Zero the region outside the end of the attribute value. */
2266 if (attr_len < PAGE_CACHE_SIZE)
2267 memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
2268 /*
2269 * The probability of not having done any of the above is
2270 * extremely small, so we just flush unconditionally.
2271 */
2272 flush_dcache_page(page);
2273 SetPageUptodate(page);
2274 }
2275 kunmap_atomic(kaddr, KM_USER0);
2276 /* Update i_size if necessary. */
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002277 if (i_size_read(vi) < attr_len) {
2278 unsigned long flags;
2279
2280 write_lock_irqsave(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 ni->allocated_size = ni->initialized_size = attr_len;
2282 i_size_write(vi, attr_len);
Anton Altaparmakov07a4e2d2005-01-12 13:08:26 +00002283 write_unlock_irqrestore(&ni->size_lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284 }
2285 /* Mark the mft record dirty, so it gets written back. */
2286 flush_dcache_mft_record_page(ctx->ntfs_ino);
2287 mark_mft_record_dirty(ctx->ntfs_ino);
2288 ntfs_attr_put_search_ctx(ctx);
2289 unmap_mft_record(base_ni);
2290 ntfs_debug("Done.");
2291 return 0;
2292err_out:
2293 if (err == -ENOMEM) {
2294 ntfs_warning(vi->i_sb, "Error allocating memory required to "
2295 "commit the write.");
2296 if (PageUptodate(page)) {
2297 ntfs_warning(vi->i_sb, "Page is uptodate, setting "
2298 "dirty so the write will be retried "
2299 "later on by the VM.");
2300 /*
2301 * Put the page on mapping->dirty_pages, but leave its
2302 * buffers' dirty state as-is.
2303 */
2304 __set_page_dirty_nobuffers(page);
2305 err = 0;
2306 } else
2307 ntfs_error(vi->i_sb, "Page is not uptodate. Written "
2308 "data has been lost.");
2309 } else {
2310 ntfs_error(vi->i_sb, "Resident attribute commit write failed "
2311 "with error %i.", err);
2312 NVolSetErrors(ni->vol);
2313 make_bad_inode(vi);
2314 }
2315 if (ctx)
2316 ntfs_attr_put_search_ctx(ctx);
2317 if (m)
2318 unmap_mft_record(base_ni);
2319 return err;
2320}
2321
2322#endif /* NTFS_RW */
2323
2324/**
2325 * ntfs_aops - general address space operations for inodes and attributes
2326 */
2327struct address_space_operations ntfs_aops = {
2328 .readpage = ntfs_readpage, /* Fill page with data. */
2329 .sync_page = block_sync_page, /* Currently, just unplugs the
2330 disk request queue. */
2331#ifdef NTFS_RW
2332 .writepage = ntfs_writepage, /* Write dirty page to disk. */
2333 .prepare_write = ntfs_prepare_write, /* Prepare page and buffers
2334 ready to receive data. */
2335 .commit_write = ntfs_commit_write, /* Commit received data. */
2336#endif /* NTFS_RW */
2337};
2338
2339/**
2340 * ntfs_mst_aops - general address space operations for mst protecteed inodes
2341 * and attributes
2342 */
2343struct address_space_operations ntfs_mst_aops = {
2344 .readpage = ntfs_readpage, /* Fill page with data. */
2345 .sync_page = block_sync_page, /* Currently, just unplugs the
2346 disk request queue. */
2347#ifdef NTFS_RW
2348 .writepage = ntfs_writepage, /* Write dirty page to disk. */
2349 .set_page_dirty = __set_page_dirty_nobuffers, /* Set the page dirty
2350 without touching the buffers
2351 belonging to the page. */
2352#endif /* NTFS_RW */
2353};
2354
2355#ifdef NTFS_RW
2356
2357/**
2358 * mark_ntfs_record_dirty - mark an ntfs record dirty
2359 * @page: page containing the ntfs record to mark dirty
2360 * @ofs: byte offset within @page at which the ntfs record begins
2361 *
2362 * Set the buffers and the page in which the ntfs record is located dirty.
2363 *
2364 * The latter also marks the vfs inode the ntfs record belongs to dirty
2365 * (I_DIRTY_PAGES only).
2366 *
2367 * If the page does not have buffers, we create them and set them uptodate.
2368 * The page may not be locked which is why we need to handle the buffers under
2369 * the mapping->private_lock. Once the buffers are marked dirty we no longer
2370 * need the lock since try_to_free_buffers() does not free dirty buffers.
2371 */
2372void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) {
2373 struct address_space *mapping = page->mapping;
2374 ntfs_inode *ni = NTFS_I(mapping->host);
2375 struct buffer_head *bh, *head, *buffers_to_free = NULL;
2376 unsigned int end, bh_size, bh_ofs;
2377
2378 BUG_ON(!PageUptodate(page));
2379 end = ofs + ni->itype.index.block_size;
2380 bh_size = 1 << VFS_I(ni)->i_blkbits;
2381 spin_lock(&mapping->private_lock);
2382 if (unlikely(!page_has_buffers(page))) {
2383 spin_unlock(&mapping->private_lock);
2384 bh = head = alloc_page_buffers(page, bh_size, 1);
2385 spin_lock(&mapping->private_lock);
2386 if (likely(!page_has_buffers(page))) {
2387 struct buffer_head *tail;
2388
2389 do {
2390 set_buffer_uptodate(bh);
2391 tail = bh;
2392 bh = bh->b_this_page;
2393 } while (bh);
2394 tail->b_this_page = head;
2395 attach_page_buffers(page, head);
2396 } else
2397 buffers_to_free = bh;
2398 }
2399 bh = head = page_buffers(page);
2400 do {
2401 bh_ofs = bh_offset(bh);
2402 if (bh_ofs + bh_size <= ofs)
2403 continue;
2404 if (unlikely(bh_ofs >= end))
2405 break;
2406 set_buffer_dirty(bh);
2407 } while ((bh = bh->b_this_page) != head);
2408 spin_unlock(&mapping->private_lock);
2409 __set_page_dirty_nobuffers(page);
2410 if (unlikely(buffers_to_free)) {
2411 do {
2412 bh = buffers_to_free->b_this_page;
2413 free_buffer_head(buffers_to_free);
2414 buffers_to_free = bh;
2415 } while (buffers_to_free);
2416 }
2417}
2418
2419#endif /* NTFS_RW */