Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (43 commits)
ext4: fix a BUG in mb_mark_used during trim.
ext4: unused variables cleanup in fs/ext4/extents.c
ext4: remove redundant set_buffer_mapped() in ext4_da_get_block_prep()
ext4: add more tracepoints and use dev_t in the trace buffer
ext4: don't kfree uninitialized s_group_info members
ext4: add missing space in printk's in __ext4_grp_locked_error()
ext4: add FITRIM to compat_ioctl.
ext4: handle errors in ext4_clear_blocks()
ext4: unify the ext4_handle_release_buffer() api
ext4: handle errors in ext4_rename
jbd2: add COW fields to struct jbd2_journal_handle
jbd2: add the b_cow_tid field to journal_head struct
ext4: Initialize fsync transaction ids in ext4_new_inode()
ext4: Use single thread to perform DIO unwritten convertion
ext4: optimize ext4_bio_write_page() when no extent conversion is needed
ext4: skip orphan cleanup if fs has unknown ROCOMPAT features
ext4: use the nblocks arg to ext4_truncate_restart_trans()
ext4: fix missing iput of root inode for some mount error paths
ext4: make FIEMAP and delayed allocation play well together
ext4: suppress verbose debugging information if malloc-debug is off
...
Fi up conflicts in fs/ext4/super.c due to workqueue changes
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index 5fb7099..f22ac08 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -48,7 +48,7 @@
will have its blocks allocated out of its own unique
preallocation pool.
-What: /sys/fs/ext4/<disk>/inode_readahead
+What: /sys/fs/ext4/<disk>/inode_readahead_blks
Date: March 2008
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
@@ -85,7 +85,14 @@
Contact: "Theodore Ts'o" <tytso@mit.edu>
Description:
Tuning parameter which (if non-zero) controls the goal
- inode used by the inode allocator in p0reference to
- all other allocation hueristics. This is intended for
+ inode used by the inode allocator in preference to
+ all other allocation heuristics. This is intended for
debugging use only, and should be 0 on production
systems.
+
+What: /sys/fs/ext4/<disk>/max_writeback_mb_bump
+Date: September 2009
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The maximum number of megabytes the writeback code will
+ try to write out before move on to another inode.
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 6ab9442..6b05046 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -367,12 +367,47 @@
minimizes the impact on the systme performance
while file system's inode table is being initialized.
-discard Controls whether ext4 should issue discard/TRIM
+discard Controls whether ext4 should issue discard/TRIM
nodiscard(*) commands to the underlying block device when
blocks are freed. This is useful for SSD devices
and sparse/thinly-provisioned LUNs, but it is off
by default until sufficient testing has been done.
+nouid32 Disables 32-bit UIDs and GIDs. This is for
+ interoperability with older kernels which only
+ store and expect 16-bit values.
+
+resize Allows to resize filesystem to the end of the last
+ existing block group, further resize has to be done
+ with resize2fs either online, or offline. It can be
+ used only with conjunction with remount.
+
+block_validity This options allows to enables/disables the in-kernel
+noblock_validity facility for tracking filesystem metadata blocks
+ within internal data structures. This allows multi-
+ block allocator and other routines to quickly locate
+ extents which might overlap with filesystem metadata
+ blocks. This option is intended for debugging
+ purposes and since it negatively affects the
+ performance, it is off by default.
+
+dioread_lock Controls whether or not ext4 should use the DIO read
+dioread_nolock locking. If the dioread_nolock option is specified
+ ext4 will allocate uninitialized extent before buffer
+ write and convert the extent to initialized after IO
+ completes. This approach allows ext4 code to avoid
+ using inode mutex, which improves scalability on high
+ speed storages. However this does not work with nobh
+ option and the mount will fail. Nor does it work with
+ data journaling and dioread_nolock option will be
+ ignored with kernel warning. Note that dioread_nolock
+ code path is only used for extent-based files.
+ Because of the restrictions this options comprises
+ it is off by default (e.g. dioread_lock).
+
+i_version Enable 64-bit inode version support. This option is
+ off by default.
+
Data Mode
=========
There are 3 different data modes:
@@ -400,6 +435,176 @@
outperforms all others modes. Currently ext4 does not have delayed
allocation support if this data journalling mode is selected.
+/proc entries
+=============
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4. Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0). The files in each per-device directory are shown
+in table below.
+
+Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File Content
+ mb_groups details of multiblock allocator buddy cache of free blocks
+..............................................................................
+
+/sys entries
+============
+
+Information about mounted ext4 file systems can be found in
+/sys/fs/ext4. Each mounted filesystem will have a directory in
+/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or
+/sys/fs/ext4/dm-0). The files in each per-device directory are shown
+in table below.
+
+Files in /sys/fs/ext4/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-ext4)
+..............................................................................
+ File Content
+
+ delayed_allocation_blocks This file is read-only and shows the number of
+ blocks that are dirty in the page cache, but
+ which do not have their location in the
+ filesystem allocated yet.
+
+ inode_goal Tuning parameter which (if non-zero) controls
+ the goal inode used by the inode allocator in
+ preference to all other allocation heuristics.
+ This is intended for debugging use only, and
+ should be 0 on production systems.
+
+ inode_readahead_blks Tuning parameter which controls the maximum
+ number of inode table blocks that ext4's inode
+ table readahead algorithm will pre-read into
+ the buffer cache
+
+ lifetime_write_kbytes This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was created.
+
+ max_writeback_mb_bump The maximum number of megabytes the writeback
+ code will try to write out before move on to
+ another inode.
+
+ mb_group_prealloc The multiblock allocator will round up allocation
+ requests to a multiple of this tuning parameter if
+ the stripe size is not set in the ext4 superblock
+
+ mb_max_to_scan The maximum number of extents the multiblock
+ allocator will search to find the best extent
+
+ mb_min_to_scan The minimum number of extents the multiblock
+ allocator will search to find the best extent
+
+ mb_order2_req Tuning parameter which controls the minimum size
+ for requests (as a power of 2) where the buddy
+ cache is used
+
+ mb_stats Controls whether the multiblock allocator should
+ collect statistics, which are shown during the
+ unmount. 1 means to collect statistics, 0 means
+ not to collect statistics
+
+ mb_stream_req Files which have fewer blocks than this tunable
+ parameter will have their blocks allocated out
+ of a block group specific preallocation pool, so
+ that small files are packed closely together.
+ Each large file will have its blocks allocated
+ out of its own unique preallocation pool.
+
+ session_write_kbytes This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was mounted.
+..............................................................................
+
+Ioctls
+======
+
+There is some Ext4 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all Ext4 specific ioctls are
+shown in the table below.
+
+Table of Ext4 specific ioctls
+..............................................................................
+ Ioctl Description
+ EXT4_IOC_GETFLAGS Get additional attributes associated with inode.
+ The ioctl argument is an integer bitfield, with
+ bit values described in ext4.h. This ioctl is an
+ alias for FS_IOC_GETFLAGS.
+
+ EXT4_IOC_SETFLAGS Set additional attributes associated with inode.
+ The ioctl argument is an integer bitfield, with
+ bit values described in ext4.h. This ioctl is an
+ alias for FS_IOC_SETFLAGS.
+
+ EXT4_IOC_GETVERSION
+ EXT4_IOC_GETVERSION_OLD
+ Get the inode i_generation number stored for
+ each inode. The i_generation number is normally
+ changed only when new inode is created and it is
+ particularly useful for network filesystems. The
+ '_OLD' version of this ioctl is an alias for
+ FS_IOC_GETVERSION.
+
+ EXT4_IOC_SETVERSION
+ EXT4_IOC_SETVERSION_OLD
+ Set the inode i_generation number stored for
+ each inode. The '_OLD' version of this ioctl
+ is an alias for FS_IOC_SETVERSION.
+
+ EXT4_IOC_GROUP_EXTEND This ioctl has the same purpose as the resize
+ mount option. It allows to resize filesystem
+ to the end of the last existing block group,
+ further resize has to be done with resize2fs,
+ either online, or offline. The argument points
+ to the unsigned logn number representing the
+ filesystem new block count.
+
+ EXT4_IOC_MOVE_EXT Move the block extents from orig_fd (the one
+ this ioctl is pointing to) to the donor_fd (the
+ one specified in move_extent structure passed
+ as an argument to this ioctl). Then, exchange
+ inode metadata between orig_fd and donor_fd.
+ This is especially useful for online
+ defragmentation, because the allocator has the
+ opportunity to allocate moved blocks better,
+ ideally into one contiguous extent.
+
+ EXT4_IOC_GROUP_ADD Add a new group descriptor to an existing or
+ new group descriptor block. The new group
+ descriptor is described by ext4_new_group_input
+ structure, which is passed as an argument to
+ this ioctl. This is especially useful in
+ conjunction with EXT4_IOC_GROUP_EXTEND,
+ which allows online resize of the filesystem
+ to the end of the last existing block group.
+ Those two ioctls combined is used in userspace
+ online resize tool (e.g. resize2fs).
+
+ EXT4_IOC_MIGRATE This ioctl operates on the filesystem itself.
+ It converts (migrates) ext3 indirect block mapped
+ inode to ext4 extent mapped inode by walking
+ through indirect block mapping of the original
+ inode and converting contiguous block ranges
+ into ext4 extents of the temporary inode. Then,
+ inodes are swapped. This ioctl might help, when
+ migrating from ext3 to ext4 filesystem, however
+ suggestion is to create fresh ext4 filesystem
+ and copy data from the backup. Note, that
+ filesystem has to support extents for this ioctl
+ to work.
+
+ EXT4_IOC_ALLOC_DA_BLKS Force all of the delay allocated blocks to be
+ allocated to preserve application-expected ext3
+ behaviour. Note that this will also start
+ triggering a write of the data blocks, but this
+ behaviour may change in the future as it is
+ not necessary and has been done this way only
+ for sake of simplicity.
+..............................................................................
+
References
==========
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index adf96b8..97b970e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -21,6 +21,8 @@
#include "ext4_jbd2.h"
#include "mballoc.h"
+#include <trace/events/ext4.h>
+
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -342,6 +344,7 @@
* We do it here so the bitmap uptodate bit
* get set with buffer lock held.
*/
+ trace_ext4_read_block_bitmap_load(sb, block_group);
set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d8b992e..e25e99b 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -202,13 +202,6 @@
return 1;
}
-static inline void ext4_journal_release_buffer(handle_t *handle,
- struct buffer_head *bh)
-{
- if (ext4_handle_valid(handle))
- jbd2_journal_release_buffer(handle, bh);
-}
-
static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
{
return ext4_journal_start_sb(inode->i_sb, nblocks);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7516fb9..dd2cb50 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,8 @@
#include "ext4_jbd2.h"
#include "ext4_extents.h"
+#include <trace/events/ext4.h>
+
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
@@ -664,6 +666,8 @@
if (unlikely(!bh))
goto err;
if (!bh_uptodate_or_lock(bh)) {
+ trace_ext4_ext_load_extent(inode, block,
+ path[ppos].p_block);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
goto err;
@@ -1034,7 +1038,7 @@
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
- ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+ ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
EXT4_FREE_BLOCKS_METADATA);
}
}
@@ -2059,7 +2063,7 @@
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
- ext4_free_blocks(handle, inode, 0, leaf, 1,
+ ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}
@@ -2156,7 +2160,7 @@
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
- ext4_free_blocks(handle, inode, 0, start, num, flags);
+ ext4_free_blocks(handle, inode, NULL, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -3108,14 +3112,13 @@
{
int i, depth;
struct ext4_extent_header *eh;
- struct ext4_extent *ex, *last_ex;
+ struct ext4_extent *last_ex;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
return 0;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3295,9 +3298,8 @@
struct ext4_map_blocks *map, int flags)
{
struct ext4_ext_path *path = NULL;
- struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
- ext4_fsblk_t newblock;
+ ext4_fsblk_t newblock = 0;
int err = 0, depth, ret;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
@@ -3305,6 +3307,7 @@
ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino);
+ trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* check in cache */
if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3355,6 @@
err = -EIO;
goto out2;
}
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex) {
@@ -3485,7 +3487,7 @@
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+ ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
@@ -3525,6 +3527,8 @@
ext4_ext_drop_refs(path);
kfree(path);
}
+ trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+ newblock, map->m_len, err ? err : allocated);
return err ? err : allocated;
}
@@ -3658,6 +3662,7 @@
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP;
+ trace_ext4_fallocate_enter(inode, offset, len, mode);
map.m_lblk = offset >> blkbits;
/*
* We can't just convert len to max_blocks because
@@ -3673,6 +3678,7 @@
ret = inode_newsize_ok(inode, (len + offset));
if (ret) {
mutex_unlock(&inode->i_mutex);
+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
retry:
@@ -3717,6 +3723,8 @@
goto retry;
}
mutex_unlock(&inode->i_mutex);
+ trace_ext4_fallocate_exit(inode, offset, max_blocks,
+ ret > 0 ? ret2 : ret);
return ret > 0 ? ret2 : ret;
}
@@ -3775,6 +3783,7 @@
}
return ret > 0 ? ret2 : ret;
}
+
/*
* Callback function called for each extent to gather FIEMAP information.
*/
@@ -3782,38 +3791,162 @@
struct ext4_ext_cache *newex, struct ext4_extent *ex,
void *data)
{
- struct fiemap_extent_info *fieinfo = data;
- unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
__u64 logical;
__u64 physical;
__u64 length;
+ loff_t size;
__u32 flags = 0;
- int error;
+ int ret = 0;
+ struct fiemap_extent_info *fieinfo = data;
+ unsigned char blksize_bits;
- logical = (__u64)newex->ec_block << blksize_bits;
+ blksize_bits = inode->i_sb->s_blocksize_bits;
+ logical = (__u64)newex->ec_block << blksize_bits;
if (newex->ec_start == 0) {
- pgoff_t offset;
- struct page *page;
+ /*
+ * No extent in extent-tree contains block @newex->ec_start,
+ * then the block may stay in 1)a hole or 2)delayed-extent.
+ *
+ * Holes or delayed-extents are processed as follows.
+ * 1. lookup dirty pages with specified range in pagecache.
+ * If no page is got, then there is no delayed-extent and
+ * return with EXT_CONTINUE.
+ * 2. find the 1st mapped buffer,
+ * 3. check if the mapped buffer is both in the request range
+ * and a delayed buffer. If not, there is no delayed-extent,
+ * then return.
+ * 4. a delayed-extent is found, the extent will be collected.
+ */
+ ext4_lblk_t end = 0;
+ pgoff_t last_offset;
+ pgoff_t offset;
+ pgoff_t index;
+ struct page **pages = NULL;
struct buffer_head *bh = NULL;
+ struct buffer_head *head = NULL;
+ unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+
+ pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (pages == NULL)
+ return -ENOMEM;
offset = logical >> PAGE_SHIFT;
- page = find_get_page(inode->i_mapping, offset);
- if (!page || !page_has_buffers(page))
- return EXT_CONTINUE;
+repeat:
+ last_offset = offset;
+ head = NULL;
+ ret = find_get_pages_tag(inode->i_mapping, &offset,
+ PAGECACHE_TAG_DIRTY, nr_pages, pages);
- bh = page_buffers(page);
+ if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+ /* First time, try to find a mapped buffer. */
+ if (ret == 0) {
+out:
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ /* just a hole. */
+ kfree(pages);
+ return EXT_CONTINUE;
+ }
- if (!bh)
- return EXT_CONTINUE;
+ /* Try to find the 1st mapped buffer. */
+ end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+ blksize_bits;
+ if (!page_has_buffers(pages[0]))
+ goto out;
+ head = page_buffers(pages[0]);
+ if (!head)
+ goto out;
- if (buffer_delay(bh)) {
- flags |= FIEMAP_EXTENT_DELALLOC;
- page_cache_release(page);
+ bh = head;
+ do {
+ if (buffer_mapped(bh)) {
+ /* get the 1st mapped buffer. */
+ if (end > newex->ec_block +
+ newex->ec_len)
+ /* The buffer is out of
+ * the request range.
+ */
+ goto out;
+ goto found_mapped_buffer;
+ }
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
+
+ /* No mapped buffer found. */
+ goto out;
} else {
- page_cache_release(page);
- return EXT_CONTINUE;
+ /*Find contiguous delayed buffers. */
+ if (ret > 0 && pages[0]->index == last_offset)
+ head = page_buffers(pages[0]);
+ bh = head;
}
+
+found_mapped_buffer:
+ if (bh != NULL && buffer_delay(bh)) {
+ /* 1st or contiguous delayed buffer found. */
+ if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+ /*
+ * 1st delayed buffer found, record
+ * the start of extent.
+ */
+ flags |= FIEMAP_EXTENT_DELALLOC;
+ newex->ec_block = end;
+ logical = (__u64)end << blksize_bits;
+ }
+ /* Find contiguous delayed buffers. */
+ do {
+ if (!buffer_delay(bh))
+ goto found_delayed_extent;
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
+
+ for (index = 1; index < ret; index++) {
+ if (!page_has_buffers(pages[index])) {
+ bh = NULL;
+ break;
+ }
+ head = page_buffers(pages[index]);
+ if (!head) {
+ bh = NULL;
+ break;
+ }
+ if (pages[index]->index !=
+ pages[0]->index + index) {
+ /* Blocks are not contiguous. */
+ bh = NULL;
+ break;
+ }
+ bh = head;
+ do {
+ if (!buffer_delay(bh))
+ /* Delayed-extent ends. */
+ goto found_delayed_extent;
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
+ }
+ } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+ /* a hole found. */
+ goto out;
+
+found_delayed_extent:
+ newex->ec_len = min(end - newex->ec_block,
+ (ext4_lblk_t)EXT_INIT_MAX_LEN);
+ if (ret == nr_pages && bh != NULL &&
+ newex->ec_len < EXT_INIT_MAX_LEN &&
+ buffer_delay(bh)) {
+ /* Have not collected an extent and continue. */
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ goto repeat;
+ }
+
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ kfree(pages);
}
physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3955,16 @@
if (ex && ext4_ext_is_uninitialized(ex))
flags |= FIEMAP_EXTENT_UNWRITTEN;
- /*
- * If this extent reaches EXT_MAX_BLOCK, it must be last.
- *
- * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
- * this also indicates no more allocated blocks.
- *
- * XXX this might miss a single-block extent at EXT_MAX_BLOCK
- */
- if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
- newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
- loff_t size = i_size_read(inode);
- loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
-
+ size = i_size_read(inode);
+ if (logical + length >= size)
flags |= FIEMAP_EXTENT_LAST;
- if ((flags & FIEMAP_EXTENT_DELALLOC) &&
- logical+length > size)
- length = (size - logical + bs - 1) & ~(bs-1);
- }
- error = fiemap_fill_next_extent(fieinfo, logical, physical,
+ ret = fiemap_fill_next_extent(fieinfo, logical, physical,
length, flags);
- if (error < 0)
- return error;
- if (error == 1)
+ if (ret < 0)
+ return ret;
+ if (ret == 1)
return EXT_BREAK;
-
return EXT_CONTINUE;
}
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 7829b28..7f74019 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -164,20 +164,20 @@
J_ASSERT(ext4_journal_current_handle() == NULL);
- trace_ext4_sync_file(file, datasync);
+ trace_ext4_sync_file_enter(file, datasync);
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
ret = ext4_flush_completed_IO(inode);
if (ret < 0)
- return ret;
+ goto out;
if (!journal) {
ret = generic_file_fsync(file, datasync);
if (!ret && !list_empty(&inode->i_dentry))
ext4_sync_parent(inode);
- return ret;
+ goto out;
}
/*
@@ -194,8 +194,10 @@
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext4_should_journal_data(inode))
- return ext4_force_commit(inode->i_sb);
+ if (ext4_should_journal_data(inode)) {
+ ret = ext4_force_commit(inode->i_sb);
+ goto out;
+ }
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (jbd2_log_start_commit(journal, commit_tid)) {
@@ -215,5 +217,7 @@
ret = jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+ out:
+ trace_ext4_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 78b79e1..21bb2f6 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -152,6 +152,7 @@
* We do it here so the bitmap uptodate bit
* get set with buffer lock held.
*/
+ trace_ext4_load_inode_bitmap(sb, block_group);
set_bitmap_uptodate(bh);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
@@ -649,7 +650,7 @@
*group = parent_group + flex_size;
if (*group > ngroups)
*group = 0;
- return find_group_orlov(sb, parent, group, mode, 0);
+ return find_group_orlov(sb, parent, group, mode, NULL);
}
/*
@@ -1054,6 +1055,11 @@
}
}
+ if (ext4_handle_valid(handle)) {
+ ei->i_sync_tid = handle->h_transaction->t_tid;
+ ei->i_datasync_tid = handle->h_transaction->t_tid;
+ }
+
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
ext4_std_error(sb, err);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9297ad4..1a86282 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -173,7 +173,7 @@
BUG_ON(EXT4_JOURNAL(inode) == NULL);
jbd_debug(2, "restarting handle %p\n", handle);
up_write(&EXT4_I(inode)->i_data_sem);
- ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+ ret = ext4_journal_restart(handle, nblocks);
down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode);
@@ -720,7 +720,7 @@
return ret;
failed_out:
for (i = 0; i < index; i++)
- ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+ ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
return ret;
}
@@ -823,20 +823,20 @@
return err;
failed:
/* Allocation failed, free what we already allocated */
- ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+ ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
for (i = 1; i <= n ; i++) {
/*
* branch[i].bh is newly allocated, so there is no
* need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA.
*/
- ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+ ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
EXT4_FREE_BLOCKS_FORGET);
}
for (i = n+1; i < indirect_blks; i++)
- ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+ ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
- ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+ ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
return err;
}
@@ -924,7 +924,7 @@
ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
EXT4_FREE_BLOCKS_FORGET);
}
- ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+ ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
blks, 0);
return err;
@@ -973,6 +973,7 @@
int count = 0;
ext4_fsblk_t first_block = 0;
+ trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@
partial--;
}
out:
+ trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+ map->m_pblk, map->m_len, err);
return err;
}
@@ -2060,7 +2063,7 @@
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
- int commit_write = 0, redirty_page = 0;
+ int commit_write = 0, skip_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
@@ -2086,14 +2089,12 @@
* If the page does not have buffers (for
* whatever reason), try to create them using
* __block_write_begin. If this fails,
- * redirty the page and move on.
+ * skip the page and move on.
*/
if (!page_has_buffers(page)) {
if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
- redirty_page:
- redirty_page_for_writepage(mpd->wbc,
- page);
+ skip_page:
unlock_page(page);
continue;
}
@@ -2104,7 +2105,7 @@
block_start = 0;
do {
if (!bh)
- goto redirty_page;
+ goto skip_page;
if (map && (cur_logical >= map->m_lblk) &&
(cur_logical <= (map->m_lblk +
(map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@
clear_buffer_unwritten(bh);
}
- /* redirty page if block allocation undone */
+ /* skip page if block allocation undone */
if (buffer_delay(bh) || buffer_unwritten(bh))
- redirty_page = 1;
+ skip_page = 1;
bh = bh->b_this_page;
block_start += bh->b_size;
cur_logical++;
pblock++;
} while (bh != page_bufs);
- if (redirty_page)
- goto redirty_page;
+ if (skip_page)
+ goto skip_page;
if (commit_write)
/* mark the buffer_heads as dirty & uptodate */
block_commit_write(page, 0, len);
+ clear_page_dirty_for_io(page);
/*
* Delalloc doesn't support data journalling,
* but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@
return ret;
}
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
- sector_t logical, long blk_cnt)
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
{
int nr_pages, i;
pgoff_t index, end;
@@ -2174,9 +2175,8 @@
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
- index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (logical + blk_cnt - 1) >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ index = mpd->first_page;
+ end = mpd->next_page - 1;
while (index <= end) {
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
if (nr_pages == 0)
@@ -2279,9 +2279,8 @@
err = blks;
/*
* If get block returns EAGAIN or ENOSPC and there
- * appears to be free blocks we will call
- * ext4_writepage() for all of the pages which will
- * just redirty the pages.
+ * appears to be free blocks we will just let
+ * mpage_da_submit_io() unlock all of the pages.
*/
if (err == -EAGAIN)
goto submit_io;
@@ -2312,8 +2311,10 @@
ext4_print_free_blocks(mpd->inode);
}
/* invalidate all the pages */
- ext4_da_block_invalidatepages(mpd, next,
- mpd->b_size >> mpd->inode->i_blkbits);
+ ext4_da_block_invalidatepages(mpd);
+
+ /* Mark this page range as having been completed */
+ mpd->io_done = 1;
return;
}
BUG_ON(blks == 0);
@@ -2438,102 +2439,6 @@
}
/*
- * __mpage_da_writepage - finds extent of pages and blocks
- *
- * @page: page to consider
- * @wbc: not used, we just follow rules
- * @data: context
- *
- * The function finds extents of pages and scan them for all blocks.
- */
-static int __mpage_da_writepage(struct page *page,
- struct writeback_control *wbc,
- struct mpage_da_data *mpd)
-{
- struct inode *inode = mpd->inode;
- struct buffer_head *bh, *head;
- sector_t logical;
-
- /*
- * Can we merge this page to current extent?
- */
- if (mpd->next_page != page->index) {
- /*
- * Nope, we can't. So, we map non-allocated blocks
- * and start IO on them
- */
- if (mpd->next_page != mpd->first_page) {
- mpage_da_map_and_submit(mpd);
- /*
- * skip rest of the page in the page_vec
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return MPAGE_DA_EXTENT_TAIL;
- }
-
- /*
- * Start next extent of pages ...
- */
- mpd->first_page = page->index;
-
- /*
- * ... and blocks
- */
- mpd->b_size = 0;
- mpd->b_state = 0;
- mpd->b_blocknr = 0;
- }
-
- mpd->next_page = page->index + 1;
- logical = (sector_t) page->index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- if (!page_has_buffers(page)) {
- mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
- (1 << BH_Dirty) | (1 << BH_Uptodate));
- if (mpd->io_done)
- return MPAGE_DA_EXTENT_TAIL;
- } else {
- /*
- * Page with regular buffer heads, just add all dirty ones
- */
- head = page_buffers(page);
- bh = head;
- do {
- BUG_ON(buffer_locked(bh));
- /*
- * We need to try to allocate
- * unmapped blocks in the same page.
- * Otherwise we won't make progress
- * with the page in ext4_writepage
- */
- if (ext4_bh_delay_or_unwritten(NULL, bh)) {
- mpage_add_bh_to_extent(mpd, logical,
- bh->b_size,
- bh->b_state);
- if (mpd->io_done)
- return MPAGE_DA_EXTENT_TAIL;
- } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
- /*
- * mapped dirty buffer. We need to update
- * the b_state because we look at
- * b_state in mpage_da_map_blocks. We don't
- * update b_size because if we find an
- * unmapped buffer_head later we need to
- * use the b_state flag of that buffer_head.
- */
- if (mpd->b_size == 0)
- mpd->b_state = bh->b_state & BH_FLAGS;
- }
- logical++;
- } while ((bh = bh->b_this_page) != head);
- }
-
- return 0;
-}
-
-/*
* This is a special get_blocks_t callback which is used by
* ext4_da_write_begin(). It will either return mapped block or
* reserve space for a single block.
@@ -2597,7 +2502,6 @@
* for partial write.
*/
set_buffer_new(bh);
- set_buffer_mapped(bh);
}
return 0;
}
@@ -2811,27 +2715,27 @@
/*
* write_cache_pages_da - walk the list of dirty pages of the given
- * address space and call the callback function (which usually writes
- * the pages).
- *
- * This is a forked version of write_cache_pages(). Differences:
- * Range cyclic is ignored.
- * no_nrwrite_index_update is always presumed true
+ * address space and accumulate pages that need writing, and call
+ * mpage_da_map_and_submit to map a single contiguous memory region
+ * and then write them.
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
struct mpage_da_data *mpd,
pgoff_t *done_index)
{
- int ret = 0;
- int done = 0;
- struct pagevec pvec;
- unsigned nr_pages;
- pgoff_t index;
- pgoff_t end; /* Inclusive */
- long nr_to_write = wbc->nr_to_write;
- int tag;
+ struct buffer_head *bh, *head;
+ struct inode *inode = mapping->host;
+ struct pagevec pvec;
+ unsigned int nr_pages;
+ sector_t logical;
+ pgoff_t index, end;
+ long nr_to_write = wbc->nr_to_write;
+ int i, tag, ret = 0;
+ memset(mpd, 0, sizeof(struct mpage_da_data));
+ mpd->wbc = wbc;
+ mpd->inode = inode;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2746,11 @@
tag = PAGECACHE_TAG_DIRTY;
*done_index = index;
- while (!done && (index <= end)) {
- int i;
-
+ while (index <= end) {
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
- break;
+ return 0;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
@@ -2860,60 +2762,100 @@
* mapping. However, page->index will not change
* because we have a reference on the page.
*/
- if (page->index > end) {
- done = 1;
- break;
- }
+ if (page->index > end)
+ goto out;
*done_index = page->index + 1;
+ /*
+ * If we can't merge this page, and we have
+ * accumulated an contiguous region, write it
+ */
+ if ((mpd->next_page != page->index) &&
+ (mpd->next_page != mpd->first_page)) {
+ mpage_da_map_and_submit(mpd);
+ goto ret_extent_tail;
+ }
+
lock_page(page);
/*
- * Page truncated or invalidated. We can freely skip it
- * then, even for data integrity operations: the page
- * has disappeared concurrently, so there could be no
- * real expectation of this data interity operation
- * even if there is now a new, dirty page at the same
- * pagecache address.
+ * If the page is no longer dirty, or its
+ * mapping no longer corresponds to inode we
+ * are writing (which means it has been
+ * truncated or invalidated), or the page is
+ * already under writeback and we are not
+ * doing a data integrity writeback, skip the page
*/
- if (unlikely(page->mapping != mapping)) {
-continue_unlock:
+ if (!PageDirty(page) ||
+ (PageWriteback(page) &&
+ (wbc->sync_mode == WB_SYNC_NONE)) ||
+ unlikely(page->mapping != mapping)) {
unlock_page(page);
continue;
}
- if (!PageDirty(page)) {
- /* someone wrote it for us */
- goto continue_unlock;
- }
-
- if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
- wait_on_page_writeback(page);
- else
- goto continue_unlock;
- }
+ if (PageWriteback(page))
+ wait_on_page_writeback(page);
BUG_ON(PageWriteback(page));
- if (!clear_page_dirty_for_io(page))
- goto continue_unlock;
- ret = __mpage_da_writepage(page, wbc, mpd);
- if (unlikely(ret)) {
- if (ret == AOP_WRITEPAGE_ACTIVATE) {
- unlock_page(page);
- ret = 0;
- } else {
- done = 1;
- break;
- }
+ if (mpd->next_page != page->index)
+ mpd->first_page = page->index;
+ mpd->next_page = page->index + 1;
+ logical = (sector_t) page->index <<
+ (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ if (!page_has_buffers(page)) {
+ mpage_add_bh_to_extent(mpd, logical,
+ PAGE_CACHE_SIZE,
+ (1 << BH_Dirty) | (1 << BH_Uptodate));
+ if (mpd->io_done)
+ goto ret_extent_tail;
+ } else {
+ /*
+ * Page with regular buffer heads,
+ * just add all dirty ones
+ */
+ head = page_buffers(page);
+ bh = head;
+ do {
+ BUG_ON(buffer_locked(bh));
+ /*
+ * We need to try to allocate
+ * unmapped blocks in the same page.
+ * Otherwise we won't make progress
+ * with the page in ext4_writepage
+ */
+ if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+ mpage_add_bh_to_extent(mpd, logical,
+ bh->b_size,
+ bh->b_state);
+ if (mpd->io_done)
+ goto ret_extent_tail;
+ } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+ /*
+ * mapped dirty buffer. We need
+ * to update the b_state
+ * because we look at b_state
+ * in mpage_da_map_blocks. We
+ * don't update b_size because
+ * if we find an unmapped
+ * buffer_head later we need to
+ * use the b_state flag of that
+ * buffer_head.
+ */
+ if (mpd->b_size == 0)
+ mpd->b_state = bh->b_state & BH_FLAGS;
+ }
+ logical++;
+ } while ((bh = bh->b_this_page) != head);
}
if (nr_to_write > 0) {
nr_to_write--;
if (nr_to_write == 0 &&
- wbc->sync_mode == WB_SYNC_NONE) {
+ wbc->sync_mode == WB_SYNC_NONE)
/*
* We stop writing back only if we are
* not doing integrity sync. In case of
@@ -2924,14 +2866,18 @@
* pages, but have not synced all of the
* old dirty pages.
*/
- done = 1;
- break;
- }
+ goto out;
}
}
pagevec_release(&pvec);
cond_resched();
}
+ return 0;
+ret_extent_tail:
+ ret = MPAGE_DA_EXTENT_TAIL;
+out:
+ pagevec_release(&pvec);
+ cond_resched();
return ret;
}
@@ -2945,7 +2891,6 @@
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int pages_written = 0;
- long pages_skipped;
unsigned int max_pages;
int range_cyclic, cycled = 1, io_done = 0;
int needed_blocks, ret = 0;
@@ -3028,11 +2973,6 @@
wbc->nr_to_write = desired_nr_to_write;
}
- mpd.wbc = wbc;
- mpd.inode = mapping->host;
-
- pages_skipped = wbc->pages_skipped;
-
retry:
if (wbc->sync_mode == WB_SYNC_ALL)
tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +2999,10 @@
}
/*
- * Now call __mpage_da_writepage to find the next
+ * Now call write_cache_pages_da() to find the next
* contiguous region of logical blocks that need
- * blocks to be allocated by ext4. We don't actually
- * submit the blocks for I/O here, even though
- * write_cache_pages thinks it will, and will set the
- * pages as clean for write before calling
- * __mpage_da_writepage().
+ * blocks to be allocated by ext4 and submit them.
*/
- mpd.b_size = 0;
- mpd.b_state = 0;
- mpd.b_blocknr = 0;
- mpd.first_page = 0;
- mpd.next_page = 0;
- mpd.io_done = 0;
- mpd.pages_written = 0;
- mpd.retval = 0;
ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
@@ -3096,7 +3024,6 @@
* and try again
*/
jbd2_journal_force_commit_nested(sbi->s_journal);
- wbc->pages_skipped = pages_skipped;
ret = 0;
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
/*
@@ -3104,7 +3031,6 @@
* rest of the pages
*/
pages_written += mpd.pages_written;
- wbc->pages_skipped = pages_skipped;
ret = 0;
io_done = 1;
} else if (wbc->nr_to_write)
@@ -3122,11 +3048,6 @@
wbc->range_end = mapping->writeback_index - 1;
goto retry;
}
- if (pages_skipped != wbc->pages_skipped)
- ext4_msg(inode->i_sb, KERN_CRIT,
- "This should not happen leaving %s "
- "with nr_to_write = %ld ret = %d",
- __func__, wbc->nr_to_write, ret);
/* Update index */
wbc->range_cyclic = range_cyclic;
@@ -3460,6 +3381,7 @@
static int ext4_readpage(struct file *file, struct page *page)
{
+ trace_ext4_readpage(page);
return mpage_readpage(page, ext4_get_block);
}
@@ -3494,6 +3416,8 @@
{
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+ trace_ext4_invalidatepage(page, offset);
+
/*
* free any io_end structure allocated for buffers to be discarded
*/
@@ -3515,6 +3439,8 @@
{
journal_t *journal = EXT4_JOURNAL(page->mapping->host);
+ trace_ext4_releasepage(page);
+
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
@@ -3873,11 +3799,16 @@
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+ trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
-
- return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+ ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+ else
+ ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+ trace_ext4_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
+ return ret;
}
/*
@@ -4173,6 +4104,9 @@
*
* We release `count' blocks on disk, but (last - first) may be greater
* than `count' because there can be holes in there.
+ *
+ * Return 0 on success, 1 on invalid block range
+ * and < 0 on fatal error.
*/
static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh,
@@ -4199,33 +4133,32 @@
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_handle_dirty_metadata(handle, inode, bh);
- if (unlikely(err)) {
- ext4_std_error(inode->i_sb, err);
- return 1;
- }
+ if (unlikely(err))
+ goto out_err;
}
err = ext4_mark_inode_dirty(handle, inode);
- if (unlikely(err)) {
- ext4_std_error(inode->i_sb, err);
- return 1;
- }
+ if (unlikely(err))
+ goto out_err;
err = ext4_truncate_restart_trans(handle, inode,
blocks_for_truncate(inode));
- if (unlikely(err)) {
- ext4_std_error(inode->i_sb, err);
- return 1;
- }
+ if (unlikely(err))
+ goto out_err;
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
- ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, bh);
+ if (unlikely(err))
+ goto out_err;
}
}
for (p = first; p < last; p++)
*p = 0;
- ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+ ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
return 0;
+out_err:
+ ext4_std_error(inode->i_sb, err);
+ return err;
}
/**
@@ -4259,7 +4192,7 @@
ext4_fsblk_t nr; /* Current block # */
__le32 *p; /* Pointer into inode/ind
for current block */
- int err;
+ int err = 0;
if (this_bh) { /* For indirect block */
BUFFER_TRACE(this_bh, "get_write_access");
@@ -4281,9 +4214,10 @@
} else if (nr == block_to_free + count) {
count++;
} else {
- if (ext4_clear_blocks(handle, inode, this_bh,
- block_to_free, count,
- block_to_free_p, p))
+ err = ext4_clear_blocks(handle, inode, this_bh,
+ block_to_free, count,
+ block_to_free_p, p);
+ if (err)
break;
block_to_free = nr;
block_to_free_p = p;
@@ -4292,9 +4226,12 @@
}
}
- if (count > 0)
- ext4_clear_blocks(handle, inode, this_bh, block_to_free,
- count, block_to_free_p, p);
+ if (!err && count > 0)
+ err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+ count, block_to_free_p, p);
+ if (err < 0)
+ /* fatal error */
+ return;
if (this_bh) {
BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4412,7 +4349,7 @@
* transaction where the data blocks are
* actually freed.
*/
- ext4_free_blocks(handle, inode, 0, nr, 1,
+ ext4_free_blocks(handle, inode, NULL, nr, 1,
EXT4_FREE_BLOCKS_METADATA|
EXT4_FREE_BLOCKS_FORGET);
@@ -4496,6 +4433,8 @@
ext4_lblk_t last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
+ trace_ext4_truncate_enter(inode);
+
if (!ext4_can_truncate(inode))
return;
@@ -4506,6 +4445,7 @@
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ext4_ext_truncate(inode);
+ trace_ext4_truncate_exit(inode);
return;
}
@@ -4635,6 +4575,7 @@
ext4_orphan_del(handle, inode);
ext4_journal_stop(handle);
+ trace_ext4_truncate_exit(inode);
}
/*
@@ -4766,6 +4707,7 @@
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext4_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ_META, bh);
@@ -4871,7 +4813,7 @@
return inode;
ei = EXT4_I(inode);
- iloc.bh = 0;
+ iloc.bh = NULL;
ret = __ext4_get_inode_loc(inode, &iloc, 0);
if (ret < 0)
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a84faa1..808c554 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -334,16 +334,22 @@
case FITRIM:
{
struct super_block *sb = inode->i_sb;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&range, (struct fstrim_range *)arg,
sizeof(range)))
return -EFAULT;
+ range.minlen = max((unsigned int)range.minlen,
+ q->limits.discard_granularity);
ret = ext4_trim_fs(sb, &range);
if (ret < 0)
return ret;
@@ -421,6 +427,7 @@
return err;
}
case EXT4_IOC_MOVE_EXT:
+ case FITRIM:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d1fe09a..a5837a8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -432,9 +432,10 @@
}
/* at order 0 we see each particular block */
- *max = 1 << (e4b->bd_blkbits + 3);
- if (order == 0)
+ if (order == 0) {
+ *max = 1 << (e4b->bd_blkbits + 3);
return EXT4_MB_BITMAP(e4b);
+ }
bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order];
*max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order];
@@ -616,7 +617,6 @@
MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments);
grp = ext4_get_group_info(sb, e4b->bd_group);
- buddy = mb_find_buddy(e4b, 0, &max);
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
@@ -635,7 +635,12 @@
#define mb_check_buddy(e4b)
#endif
-/* FIXME!! need more doc */
+/*
+ * Divide blocks started from @first with length @len into
+ * smaller chunks with power of 2 blocks.
+ * Clear the bits in bitmap which the blocks of the chunk(s) covered,
+ * then increase bb_counters[] for corresponded chunk size.
+ */
static void ext4_mb_mark_free_simple(struct super_block *sb,
void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
struct ext4_group_info *grp)
@@ -2381,7 +2386,7 @@
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
* So a two level scheme suffices for now. */
- sbi->s_group_info = kmalloc(array_size, GFP_KERNEL);
+ sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
if (sbi->s_group_info == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
return -ENOMEM;
@@ -3208,7 +3213,7 @@
cur_distance = abs(goal_block - cpa->pa_pstart);
new_distance = abs(goal_block - pa->pa_pstart);
- if (cur_distance < new_distance)
+ if (cur_distance <= new_distance)
return cpa;
/* drop the previous reference */
@@ -3907,7 +3912,8 @@
struct super_block *sb = ac->ac_sb;
ext4_group_t ngroups, i;
- if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
+ if (!mb_enable_debug ||
+ (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
return;
printk(KERN_ERR "EXT4-fs: Can't allocate:"
@@ -4753,7 +4759,8 @@
* bitmap. Then issue a TRIM command on this extent and free the extent in
* the group buddy bitmap. This is done until whole group is scanned.
*/
-ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+static ext4_grpblk_t
+ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
{
void *bitmap;
@@ -4863,10 +4870,15 @@
break;
}
- if (len >= EXT4_BLOCKS_PER_GROUP(sb))
- len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
- else
+ /*
+ * For all the groups except the last one, last block will
+ * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to
+ * change it for the last group in which case start +
+ * len < EXT4_BLOCKS_PER_GROUP(sb).
+ */
+ if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb))
last_block = first_block + len;
+ len -= last_block - first_block;
if (e4b.bd_info->bb_free >= minlen) {
cnt = ext4_trim_all_free(sb, &e4b, first_block,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b619322..22bd4d7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -169,7 +169,7 @@
/* original request */
struct ext4_free_extent ac_o_ex;
- /* goal request (after normalization) */
+ /* goal request (normalized ac_o_ex) */
struct ext4_free_extent ac_g_ex;
/* the best found extent */
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index b0a126f..d1bafa5 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -263,7 +263,7 @@
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, 0,
+ ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(tmp_idata[i]), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
@@ -271,7 +271,7 @@
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+ ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
return 0;
@@ -302,7 +302,7 @@
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
+ ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
return 0;
@@ -315,7 +315,7 @@
/* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) {
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, 0,
+ ext4_free_blocks(handle, inode, NULL,
le32_to_cpu(i_data[0]), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
@@ -428,7 +428,7 @@
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
- ext4_free_blocks(handle, inode, 0, block, 1,
+ ext4_free_blocks(handle, inode, NULL, block, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return retval;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e781b7e..67fd0b0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -40,6 +40,7 @@
#include "xattr.h"
#include "acl.h"
+#include <trace/events/ext4.h>
/*
* define how far ahead to read directories while searching them.
*/
@@ -2183,6 +2184,7 @@
struct ext4_dir_entry_2 *de;
handle_t *handle;
+ trace_ext4_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
@@ -2228,6 +2230,7 @@
end_unlink:
ext4_journal_stop(handle);
brelse(bh);
+ trace_ext4_unlink_exit(dentry, retval);
return retval;
}
@@ -2402,6 +2405,10 @@
if (!new_inode && new_dir != old_dir &&
EXT4_DIR_LINK_MAX(new_dir))
goto end_rename;
+ BUFFER_TRACE(dir_bh, "get_write_access");
+ retval = ext4_journal_get_write_access(handle, dir_bh);
+ if (retval)
+ goto end_rename;
}
if (!new_bh) {
retval = ext4_add_entry(handle, new_dentry, old_inode);
@@ -2409,7 +2416,9 @@
goto end_rename;
} else {
BUFFER_TRACE(new_bh, "get write access");
- ext4_journal_get_write_access(handle, new_bh);
+ retval = ext4_journal_get_write_access(handle, new_bh);
+ if (retval)
+ goto end_rename;
new_de->inode = cpu_to_le32(old_inode->i_ino);
if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
EXT4_FEATURE_INCOMPAT_FILETYPE))
@@ -2470,8 +2479,6 @@
old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
ext4_update_dx_flag(old_dir);
if (dir_bh) {
- BUFFER_TRACE(dir_bh, "get_write_access");
- ext4_journal_get_write_access(handle, dir_bh);
PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
cpu_to_le32(new_dir->i_ino);
BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index e2cd90e..b6dbd05 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -259,6 +259,11 @@
bi_sector >> (inode->i_blkbits - 9));
}
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ ext4_free_io_end(io_end);
+ return;
+ }
+
/* Add the io_end to per-inode completed io list*/
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
@@ -279,9 +284,9 @@
BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
bio_put(io->io_bio);
}
- io->io_bio = 0;
+ io->io_bio = NULL;
io->io_op = 0;
- io->io_end = 0;
+ io->io_end = NULL;
}
static int io_submit_init(struct ext4_io_submit *io,
@@ -380,8 +385,6 @@
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- set_page_writeback(page);
- ClearPageError(page);
io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
if (!io_page) {
@@ -392,6 +395,8 @@
io_page->p_page = page;
atomic_set(&io_page->p_count, 1);
get_page(page);
+ set_page_writeback(page);
+ ClearPageError(page);
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3ecc6e4..80bbc9c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -230,7 +230,7 @@
}
/* Zero out all of the reserved backup group descriptor table blocks */
- ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
GFP_NOFS);
@@ -248,7 +248,7 @@
/* Zero out all of the inode table blocks */
block = input->inode_table;
- ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
block, sbi->s_itb_per_group);
err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
if (err)
@@ -499,12 +499,12 @@
return err;
exit_inode:
- /* ext4_journal_release_buffer(handle, iloc.bh); */
+ /* ext4_handle_release_buffer(handle, iloc.bh); */
brelse(iloc.bh);
exit_dindj:
- /* ext4_journal_release_buffer(handle, dind); */
+ /* ext4_handle_release_buffer(handle, dind); */
exit_sbh:
- /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
+ /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */
exit_dind:
brelse(dind);
exit_bh:
@@ -586,7 +586,7 @@
/*
int j;
for (j = 0; j < i; j++)
- ext4_journal_release_buffer(handle, primary[j]);
+ ext4_handle_release_buffer(handle, primary[j]);
*/
goto exit_bh;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 203f9e4..22546ad 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -54,9 +54,9 @@
static struct proc_dir_entry *ext4_proc_root;
static struct kset *ext4_kset;
-struct ext4_lazy_init *ext4_li_info;
-struct mutex ext4_li_mtx;
-struct ext4_features *ext4_feat;
+static struct ext4_lazy_init *ext4_li_info;
+static struct mutex ext4_li_mtx;
+static struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
@@ -75,6 +75,7 @@
static int ext4_freeze(struct super_block *sb);
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
+static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
@@ -594,7 +595,7 @@
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
+ printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
sb->s_id, function, line, grp);
if (ino)
printk(KERN_CONT "inode %lu: ", ino);
@@ -997,13 +998,10 @@
if (test_opt(sb, OLDALLOC))
seq_puts(seq, ",oldalloc");
#ifdef CONFIG_EXT4_FS_XATTR
- if (test_opt(sb, XATTR_USER) &&
- !(def_mount_opts & EXT4_DEFM_XATTR_USER))
+ if (test_opt(sb, XATTR_USER))
seq_puts(seq, ",user_xattr");
- if (!test_opt(sb, XATTR_USER) &&
- (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
+ if (!test_opt(sb, XATTR_USER))
seq_puts(seq, ",nouser_xattr");
- }
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
@@ -1041,8 +1039,8 @@
!(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc");
- if (test_opt(sb, MBLK_IO_SUBMIT))
- seq_puts(seq, ",mblk_io_submit");
+ if (!test_opt(sb, MBLK_IO_SUBMIT))
+ seq_puts(seq, ",nomblk_io_submit");
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
@@ -1451,7 +1449,7 @@
* Initialize args struct so we know whether arg was
* found; some options take optional arguments.
*/
- args[0].to = args[0].from = 0;
+ args[0].to = args[0].from = NULL;
token = match_token(p, tokens, args);
switch (token) {
case Opt_bsd_df:
@@ -1771,7 +1769,7 @@
return 0;
if (option < 0 || option > (1 << 30))
return 0;
- if (!is_power_of_2(option)) {
+ if (option && !is_power_of_2(option)) {
ext4_msg(sb, KERN_ERR,
"EXT4-fs: inode_readahead_blks"
" must be a power of 2");
@@ -2120,6 +2118,13 @@
return;
}
+ /* Check if feature set would not allow a r/w mount */
+ if (!ext4_feature_set_ok(sb, 0)) {
+ ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
+ "unknown ROCOMPAT features");
+ return;
+ }
+
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
if (es->s_last_orphan)
jbd_debug(1, "Errors on filesystem, "
@@ -2412,7 +2417,7 @@
if (parse_strtoul(buf, 0x40000000, &t))
return -EINVAL;
- if (!is_power_of_2(t))
+ if (t && !is_power_of_2(t))
return -EINVAL;
sbi->s_inode_readahead_blks = t;
@@ -3095,14 +3100,14 @@
}
if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sb, NO_UID32);
+ /* xattr user namespace & acls are now defaulted on */
#ifdef CONFIG_EXT4_FS_XATTR
- if (def_mount_opts & EXT4_DEFM_XATTR_USER)
- set_opt(sb, XATTR_USER);
+ set_opt(sb, XATTR_USER);
#endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL
- if (def_mount_opts & EXT4_DEFM_ACL)
- set_opt(sb, POSIX_ACL);
+ set_opt(sb, POSIX_ACL);
#endif
+ set_opt(sb, MBLK_IO_SUBMIT);
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3516,7 +3521,7 @@
* concurrency isn't really necessary. Limit it to 1.
*/
EXT4_SB(sb)->dio_unwritten_wq =
- alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
+ alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
goto failed_mount_wq;
@@ -3531,17 +3536,16 @@
if (IS_ERR(root)) {
ext4_msg(sb, KERN_ERR, "get root inode failed");
ret = PTR_ERR(root);
+ root = NULL;
goto failed_mount4;
}
if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
- iput(root);
ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
goto failed_mount4;
}
sb->s_root = d_alloc_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
- iput(root);
ret = -ENOMEM;
goto failed_mount4;
}
@@ -3657,6 +3661,8 @@
goto failed_mount;
failed_mount4:
+ iput(root);
+ sb->s_root = NULL;
ext4_msg(sb, KERN_ERR, "mount failed");
destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq);
failed_mount_wq:
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fc32176..b545ca1 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -735,7 +735,7 @@
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- jbd2_journal_release_buffer(handle, bs->bh);
+ ext4_handle_release_buffer(handle, bs->bh);
if (ce) {
mb_cache_entry_release(ce);
ce = NULL;
@@ -833,7 +833,7 @@
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
- ext4_free_blocks(handle, inode, 0, block, 1,
+ ext4_free_blocks(handle, inode, NULL, block, 1,
EXT4_FREE_BLOCKS_METADATA);
error = -EIO;
goto cleanup;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 27e79c2..a32dcae 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -432,13 +432,35 @@
int h_err;
/* Flags [no locking] */
- unsigned int h_sync: 1; /* sync-on-close */
- unsigned int h_jdata: 1; /* force data journaling */
- unsigned int h_aborted: 1; /* fatal error on handle */
+ unsigned int h_sync:1; /* sync-on-close */
+ unsigned int h_jdata:1; /* force data journaling */
+ unsigned int h_aborted:1; /* fatal error on handle */
+ unsigned int h_cowing:1; /* COWing block to snapshot */
+
+ /* Number of buffers requested by user:
+ * (before adding the COW credits factor) */
+ unsigned int h_base_credits:14;
+
+ /* Number of buffers the user is allowed to dirty:
+ * (counts only buffers dirtied when !h_cowing) */
+ unsigned int h_user_credits:14;
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map h_lockdep_map;
#endif
+
+#ifdef CONFIG_JBD2_DEBUG
+ /* COW debugging counters: */
+ unsigned int h_cow_moved; /* blocks moved to snapshot */
+ unsigned int h_cow_copied; /* blocks copied to snapshot */
+ unsigned int h_cow_ok_jh; /* blocks already COWed during current
+ transaction */
+ unsigned int h_cow_ok_bitmap; /* blocks not set in COW bitmap */
+ unsigned int h_cow_ok_mapped;/* blocks already mapped in snapshot */
+ unsigned int h_cow_bitmaps; /* COW bitmaps created */
+ unsigned int h_cow_excluded; /* blocks set in exclude bitmap */
+#endif
};
diff --git a/include/linux/journal-head.h b/include/linux/journal-head.h
index 525aac3..44e95d0 100644
--- a/include/linux/journal-head.h
+++ b/include/linux/journal-head.h
@@ -41,6 +41,13 @@
unsigned b_modified;
/*
+ * This feild tracks the last transaction id in which this buffer
+ * has been cowed
+ * [jbd_lock_bh_state()]
+ */
+ unsigned b_cow_tid;
+
+ /*
* Copy of the buffer data frozen for writing to the log.
* [jbd_lock_bh_state()]
*/
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index e5e345f..e09592d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -21,8 +21,7 @@
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
@@ -31,8 +30,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
@@ -41,9 +39,9 @@
),
TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->mode,
- __entry->uid, __entry->gid,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, __entry->uid, __entry->gid,
(unsigned long long) __entry->blocks)
);
@@ -53,21 +51,19 @@
TP_ARGS(dir, mode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(dir->i_sb->s_dev);
- __entry->dev_minor = MINOR(dir->i_sb->s_dev);
+ __entry->dev = dir->i_sb->s_dev;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %d,%d dir %lu mode 0%o",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->dir, __entry->mode)
);
@@ -77,23 +73,21 @@
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
@@ -104,21 +98,19 @@
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( int, nlink )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nlink = inode->i_nlink;
),
TP_printk("dev %d,%d ino %lu nlink %d",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->nlink)
);
@@ -128,21 +120,19 @@
TP_ARGS(inode, drop),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( int, drop )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->drop = drop;
),
TP_printk("dev %d,%d ino %lu drop %d",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->drop)
);
@@ -152,21 +142,19 @@
TP_ARGS(inode, IP),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field(unsigned long, ip )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ip = IP;
),
TP_printk("dev %d,%d ino %lu caller %pF",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (void *)__entry->ip)
);
@@ -176,21 +164,19 @@
TP_ARGS(inode, new_size),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, new_size )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->new_size = new_size;
),
TP_printk("dev %d,%d ino %lu new_size %lld",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(long long) __entry->new_size)
);
@@ -203,8 +189,7 @@
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
@@ -212,8 +197,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
@@ -221,7 +205,7 @@
),
TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->pos, __entry->len, __entry->flags)
);
@@ -249,8 +233,7 @@
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
@@ -258,8 +241,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
@@ -267,9 +249,9 @@
),
TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->pos,
- __entry->len, __entry->copied)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->pos, __entry->len, __entry->copied)
);
DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
@@ -310,22 +292,20 @@
TP_ARGS(inode, page),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
TP_printk("dev %d,%d ino %lu page_index %lu",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->index)
);
@@ -335,43 +315,39 @@
TP_ARGS(inode, wbc),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( long, nr_to_write )
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
+ __field( int, sync_mode )
__field( char, for_kupdate )
- __field( char, for_reclaim )
__field( char, range_cyclic )
__field( pgoff_t, writeback_index )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
__entry->range_end = wbc->range_end;
+ __entry->sync_mode = wbc->sync_mode;
__entry->for_kupdate = wbc->for_kupdate;
- __entry->for_reclaim = wbc->for_reclaim;
__entry->range_cyclic = wbc->range_cyclic;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
- "range_start %llu range_end %llu "
- "for_kupdate %d for_reclaim %d "
- "range_cyclic %d writeback_index %lu",
- __entry->dev_major, __entry->dev_minor,
+ "range_start %llu range_end %llu sync_mode %d"
+ "for_kupdate %d range_cyclic %d writeback_index %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
- __entry->range_end,
- __entry->for_kupdate, __entry->for_reclaim,
- __entry->range_cyclic,
+ __entry->range_end, __entry->sync_mode,
+ __entry->for_kupdate, __entry->range_cyclic,
(unsigned long) __entry->writeback_index)
);
@@ -381,8 +357,7 @@
TP_ARGS(inode, mpd),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, b_blocknr )
__field( __u32, b_size )
@@ -390,11 +365,11 @@
__field( unsigned long, first_page )
__field( int, io_done )
__field( int, pages_written )
+ __field( int, sync_mode )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr;
__entry->b_size = mpd->b_size;
@@ -402,14 +377,18 @@
__entry->first_page = mpd->first_page;
__entry->io_done = mpd->io_done;
__entry->pages_written = mpd->pages_written;
+ __entry->sync_mode = mpd->wbc->sync_mode;
),
- TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
- __entry->dev_major, __entry->dev_minor,
+ TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x "
+ "first_page %lu io_done %d pages_written %d sync_mode %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->b_blocknr, __entry->b_size,
__entry->b_state, __entry->first_page,
- __entry->io_done, __entry->pages_written)
+ __entry->io_done, __entry->pages_written,
+ __entry->sync_mode
+ )
);
TRACE_EVENT(ext4_da_writepages_result,
@@ -419,35 +398,100 @@
TP_ARGS(inode, wbc, ret, pages_written),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( int, ret )
__field( int, pages_written )
__field( long, pages_skipped )
+ __field( int, sync_mode )
__field( char, more_io )
__field( pgoff_t, writeback_index )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->pages_skipped = wbc->pages_skipped;
+ __entry->sync_mode = wbc->sync_mode;
__entry->more_io = wbc->more_io;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
- TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
- __entry->dev_major, __entry->dev_minor,
+ TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld "
+ " more_io %d sync_mode %d writeback_index %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
- __entry->more_io,
+ __entry->more_io, __entry->sync_mode,
(unsigned long) __entry->writeback_index)
);
+DECLARE_EVENT_CLASS(ext4__page_op,
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page),
+
+ TP_STRUCT__entry(
+ __field( pgoff_t, index )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+
+ ),
+
+ TP_fast_assign(
+ __entry->index = page->index;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu page_index %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->index)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_readpage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_releasepage,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page)
+);
+
+TRACE_EVENT(ext4_invalidatepage,
+ TP_PROTO(struct page *page, unsigned long offset),
+
+ TP_ARGS(page, offset),
+
+ TP_STRUCT__entry(
+ __field( pgoff_t, index )
+ __field( unsigned long, offset )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+
+ ),
+
+ TP_fast_assign(
+ __entry->index = page->index;
+ __entry->offset = offset;
+ __entry->ino = page->mapping->host->i_ino;
+ __entry->dev = page->mapping->host->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->index, __entry->offset)
+);
+
TRACE_EVENT(ext4_discard_blocks,
TP_PROTO(struct super_block *sb, unsigned long long blk,
unsigned long long count),
@@ -455,22 +499,20 @@
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( __u64, blk )
__field( __u64, count )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->blk = blk;
__entry->count = count;
),
TP_printk("dev %d,%d blk %llu count %llu",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->blk, __entry->count)
);
@@ -481,8 +523,7 @@
TP_ARGS(ac, pa),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
@@ -491,8 +532,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(ac->ac_sb->s_dev);
- __entry->dev_minor = MINOR(ac->ac_sb->s_dev);
+ __entry->dev = ac->ac_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
@@ -500,9 +540,9 @@
),
TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->pa_pstart,
- __entry->pa_len, __entry->pa_lstart)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
);
DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
@@ -530,8 +570,7 @@
TP_ARGS(sb, inode, pa, block, count),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( __u32, count )
@@ -539,16 +578,16 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
),
TP_printk("dev %d,%d ino %lu block %llu count %u",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->block, __entry->count)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->block, __entry->count)
);
TRACE_EVENT(ext4_mb_release_group_pa,
@@ -558,22 +597,20 @@
TP_ARGS(sb, pa),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),
TP_printk("dev %d,%d pstart %llu len %u",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->pa_pstart, __entry->pa_len)
);
@@ -583,20 +620,18 @@
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
TP_printk("dev %d,%d ino %lu",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino)
);
@@ -606,20 +641,19 @@
TP_ARGS(sb, needed),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( int, needed )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->needed = needed;
),
TP_printk("dev %d,%d needed %d",
- __entry->dev_major, __entry->dev_minor, __entry->needed)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->needed)
);
TRACE_EVENT(ext4_request_blocks,
@@ -628,8 +662,7 @@
TP_ARGS(ar),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
@@ -642,8 +675,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+ __entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
@@ -655,8 +687,9 @@
__entry->pright = ar->pright;
),
- TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
- __entry->dev_major, __entry->dev_minor,
+ TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu "
+ "lleft %llu lright %llu pleft %llu pright %llu ",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->flags, __entry->len,
(unsigned long long) __entry->logical,
@@ -673,8 +706,7 @@
TP_ARGS(ar, block),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
@@ -688,8 +720,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
+ __entry->dev = ar->inode->i_sb->s_dev;
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
@@ -702,10 +733,11 @@
__entry->pright = ar->pright;
),
- TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->flags,
- __entry->len, __entry->block,
+ TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu "
+ "goal %llu lleft %llu lright %llu pleft %llu pright %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->flags, __entry->len, __entry->block,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
@@ -721,8 +753,7 @@
TP_ARGS(inode, block, count, flags),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, block )
@@ -731,8 +762,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
@@ -741,20 +771,19 @@
),
TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, __entry->block, __entry->count,
__entry->flags)
);
-TRACE_EVENT(ext4_sync_file,
+TRACE_EVENT(ext4_sync_file_enter,
TP_PROTO(struct file *file, int datasync),
TP_ARGS(file, datasync),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
@@ -763,39 +792,60 @@
TP_fast_assign(
struct dentry *dentry = file->f_path.dentry;
- __entry->dev_major = MAJOR(dentry->d_inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(dentry->d_inode->i_sb->s_dev);
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
(unsigned long) __entry->parent, __entry->datasync)
);
+TRACE_EVENT(ext4_sync_file_exit,
+ TP_PROTO(struct inode *inode, int ret),
+
+ TP_ARGS(inode, ret),
+
+ TP_STRUCT__entry(
+ __field( int, ret )
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->ret = ret;
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %ld ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->ret)
+);
+
TRACE_EVENT(ext4_sync_fs,
TP_PROTO(struct super_block *sb, int wait),
TP_ARGS(sb, wait),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( int, wait )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->wait = wait;
),
- TP_printk("dev %d,%d wait %d", __entry->dev_major,
- __entry->dev_minor, __entry->wait)
+ TP_printk("dev %d,%d wait %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->wait)
);
TRACE_EVENT(ext4_alloc_da_blocks,
@@ -804,23 +854,21 @@
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( unsigned int, data_blocks )
__field( unsigned int, meta_blocks )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->data_blocks, __entry->meta_blocks)
);
@@ -831,8 +879,7 @@
TP_ARGS(ac),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u16, found )
__field( __u16, groups )
@@ -855,8 +902,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
+ __entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->found = ac->ac_found;
__entry->flags = ac->ac_flags;
@@ -881,7 +927,7 @@
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
"result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
"tail %u broken %u",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
@@ -900,8 +946,7 @@
TP_ARGS(ac),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( __u32, orig_logical )
__field( int, orig_start )
@@ -914,8 +959,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
+ __entry->dev = ac->ac_inode->i_sb->s_dev;
__entry->ino = ac->ac_inode->i_ino;
__entry->orig_logical = ac->ac_o_ex.fe_logical;
__entry->orig_start = ac->ac_o_ex.fe_start;
@@ -928,7 +972,7 @@
),
TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
@@ -946,8 +990,7 @@
TP_ARGS(sb, inode, group, start, len),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( int, result_start )
__field( __u32, result_group )
@@ -955,8 +998,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->ino = inode ? inode->i_ino : 0;
__entry->result_start = start;
__entry->result_group = group;
@@ -964,7 +1006,7 @@
),
TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
- __entry->dev_major, __entry->dev_minor,
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->result_group, __entry->result_start,
__entry->result_len)
@@ -998,8 +1040,7 @@
TP_ARGS(inode, is_metadata, block),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( int, is_metadata )
@@ -1007,8 +1048,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
@@ -1016,9 +1056,9 @@
),
TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->mode,
- __entry->is_metadata, __entry->block)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, __entry->is_metadata, __entry->block)
);
TRACE_EVENT(ext4_da_update_reserve_space,
@@ -1027,8 +1067,7 @@
TP_ARGS(inode, used_blocks),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@@ -1039,8 +1078,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@@ -1050,10 +1088,12 @@
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
- TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino, __entry->mode,
- (unsigned long long) __entry->i_blocks,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
+ "reserved_data_blocks %d reserved_meta_blocks %d "
+ "allocated_meta_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->mode, (unsigned long long) __entry->i_blocks,
__entry->used_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
);
@@ -1064,8 +1104,7 @@
TP_ARGS(inode, md_needed),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@@ -1075,8 +1114,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@@ -1085,8 +1123,9 @@
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
- TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
- __entry->dev_major, __entry->dev_minor,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
+ "reserved_data_blocks %d reserved_meta_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
@@ -1099,8 +1138,7 @@
TP_ARGS(inode, freed_blocks),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
@@ -1111,8 +1149,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
@@ -1122,8 +1159,10 @@
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
- TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
- __entry->dev_major, __entry->dev_minor,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
+ "reserved_data_blocks %d reserved_meta_blocks %d "
+ "allocated_meta_blocks %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->freed_blocks, __entry->reserved_data_blocks,
@@ -1136,20 +1175,19 @@
TP_ARGS(sb, group),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( __u32, group )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(sb->s_dev);
- __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->dev = sb->s_dev;
__entry->group = group;
),
TP_printk("dev %d,%d group %u",
- __entry->dev_major, __entry->dev_minor, __entry->group)
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,
@@ -1166,6 +1204,349 @@
TP_ARGS(sb, group)
);
+DEFINE_EVENT(ext4__bitmap_load, ext4_read_block_bitmap_load,
+
+ TP_PROTO(struct super_block *sb, unsigned long group),
+
+ TP_ARGS(sb, group)
+);
+
+DEFINE_EVENT(ext4__bitmap_load, ext4_load_inode_bitmap,
+
+ TP_PROTO(struct super_block *sb, unsigned long group),
+
+ TP_ARGS(sb, group)
+);
+
+TRACE_EVENT(ext4_direct_IO_enter,
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw),
+
+ TP_ARGS(inode, offset, len, rw),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( unsigned long, len )
+ __field( int, rw )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len, __entry->rw)
+);
+
+TRACE_EVENT(ext4_direct_IO_exit,
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret),
+
+ TP_ARGS(inode, offset, len, rw, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( unsigned long, len )
+ __field( int, rw )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->rw = rw;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->len,
+ __entry->rw, __entry->ret)
+);
+
+TRACE_EVENT(ext4_fallocate_enter,
+ TP_PROTO(struct inode *inode, loff_t offset, loff_t len, int mode),
+
+ TP_ARGS(inode, offset, len, mode),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( loff_t, len )
+ __field( int, mode )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->len = len;
+ __entry->mode = mode;
+ ),
+
+ TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos,
+ (unsigned long long) __entry->len, __entry->mode)
+);
+
+TRACE_EVENT(ext4_fallocate_exit,
+ TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret),
+
+ TP_ARGS(inode, offset, max_blocks, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( loff_t, pos )
+ __field( unsigned, blocks )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->pos = offset;
+ __entry->blocks = max_blocks;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned long long) __entry->pos, __entry->blocks,
+ __entry->ret)
+);
+
+TRACE_EVENT(ext4_unlink_enter,
+ TP_PROTO(struct inode *parent, struct dentry *dentry),
+
+ TP_ARGS(parent, dentry),
+
+ TP_STRUCT__entry(
+ __field( ino_t, parent )
+ __field( ino_t, ino )
+ __field( loff_t, size )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->parent = parent->i_ino;
+ __entry->ino = dentry->d_inode->i_ino;
+ __entry->size = dentry->d_inode->i_size;
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %ld size %lld parent %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, __entry->size,
+ (unsigned long) __entry->parent)
+);
+
+TRACE_EVENT(ext4_unlink_exit,
+ TP_PROTO(struct dentry *dentry, int ret),
+
+ TP_ARGS(dentry, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = dentry->d_inode->i_ino;
+ __entry->dev = dentry->d_inode->i_sb->s_dev;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %ld ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ __entry->ret)
+);
+
+DECLARE_EVENT_CLASS(ext4__truncate,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( blkcnt_t, blocks )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->blocks = inode->i_blocks;
+ ),
+
+ TP_printk("dev %d,%d ino %lu blocks %lu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_enter,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
+
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ unsigned len, unsigned flags),
+
+ TP_ARGS(inode, lblk, len, flags),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( ext4_lblk_t, lblk )
+ __field( unsigned, len )
+ __field( unsigned, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->lblk = lblk;
+ __entry->len = len;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned) __entry->lblk, __entry->len, __entry->flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ unsigned len, unsigned flags),
+
+ TP_ARGS(inode, lblk, len, flags)
+);
+
+DEFINE_EVENT(ext4__map_blocks_enter, ext4_ind_map_blocks_enter,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ unsigned len, unsigned flags),
+
+ TP_ARGS(inode, lblk, len, flags)
+);
+
+DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, unsigned len, int ret),
+
+ TP_ARGS(inode, lblk, pblk, len, ret),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( ext4_lblk_t, lblk )
+ __field( ext4_fsblk_t, pblk )
+ __field( unsigned, len )
+ __field( int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->lblk = lblk;
+ __entry->pblk = pblk;
+ __entry->len = len;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+ __entry->len, __entry->ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ext_map_blocks_exit,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, unsigned len, int ret),
+
+ TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+DEFINE_EVENT(ext4__map_blocks_exit, ext4_ind_map_blocks_exit,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, unsigned len, int ret),
+
+ TP_ARGS(inode, lblk, pblk, len, ret)
+);
+
+TRACE_EVENT(ext4_ext_load_extent,
+ TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk),
+
+ TP_ARGS(inode, lblk, pblk),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ __field( ext4_lblk_t, lblk )
+ __field( ext4_fsblk_t, pblk )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ __entry->lblk = lblk;
+ __entry->pblk = pblk;
+ ),
+
+ TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino,
+ (unsigned) __entry->lblk, (unsigned long long) __entry->pblk)
+);
+
+TRACE_EVENT(ext4_load_inode,
+ TP_PROTO(struct inode *inode),
+
+ TP_ARGS(inode),
+
+ TP_STRUCT__entry(
+ __field( ino_t, ino )
+ __field( dev_t, dev )
+ ),
+
+ TP_fast_assign(
+ __entry->ino = inode->i_ino;
+ __entry->dev = inode->i_sb->s_dev;
+ ),
+
+ TP_printk("dev %d,%d ino %ld",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long) __entry->ino)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 7447ea9..bf16545 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -17,19 +17,17 @@
TP_ARGS(journal, result),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( int, result )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
- __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
+ __entry->dev = journal->j_fs_dev->bd_dev;
__entry->result = result;
),
- TP_printk("dev %d,%d result %d",
- __entry->dev_major, __entry->dev_minor, __entry->result)
+ TP_printk("dev %s result %d",
+ jbd2_dev_to_name(__entry->dev), __entry->result)
);
DECLARE_EVENT_CLASS(jbd2_commit,
@@ -39,22 +37,20 @@
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
- __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
+ __entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
),
- TP_printk("dev %d,%d transaction %d sync %d",
- __entry->dev_major, __entry->dev_minor,
- __entry->transaction, __entry->sync_commit)
+ TP_printk("dev %s transaction %d sync %d",
+ jbd2_dev_to_name(__entry->dev), __entry->transaction,
+ __entry->sync_commit)
);
DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -91,24 +87,22 @@
TP_ARGS(journal, commit_transaction),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( char, sync_commit )
__field( int, transaction )
__field( int, head )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
- __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
+ __entry->dev = journal->j_fs_dev->bd_dev;
__entry->sync_commit = commit_transaction->t_synchronous_commit;
__entry->transaction = commit_transaction->t_tid;
__entry->head = journal->j_tail_sequence;
),
- TP_printk("dev %d,%d transaction %d sync %d head %d",
- __entry->dev_major, __entry->dev_minor,
- __entry->transaction, __entry->sync_commit, __entry->head)
+ TP_printk("dev %s transaction %d sync %d head %d",
+ jbd2_dev_to_name(__entry->dev), __entry->transaction,
+ __entry->sync_commit, __entry->head)
);
TRACE_EVENT(jbd2_submit_inode_data,
@@ -117,20 +111,17 @@
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( ino_t, ino )
),
TP_fast_assign(
- __entry->dev_major = MAJOR(inode->i_sb->s_dev);
- __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+ __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
),
- TP_printk("dev %d,%d ino %lu",
- __entry->dev_major, __entry->dev_minor,
- (unsigned long) __entry->ino)
+ TP_printk("dev %s ino %lu",
+ jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
);
TRACE_EVENT(jbd2_run_stats,
@@ -140,8 +131,7 @@
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( unsigned long, tid )
__field( unsigned long, wait )
__field( unsigned long, running )
@@ -154,8 +144,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(dev);
- __entry->dev_minor = MINOR(dev);
+ __entry->dev = dev;
__entry->tid = tid;
__entry->wait = stats->rs_wait;
__entry->running = stats->rs_running;
@@ -167,9 +156,9 @@
__entry->blocks_logged = stats->rs_blocks_logged;
),
- TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
+ TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
"logging %u handle_count %u blocks %u blocks_logged %u",
- __entry->dev_major, __entry->dev_minor, __entry->tid,
+ jbd2_dev_to_name(__entry->dev), __entry->tid,
jiffies_to_msecs(__entry->wait),
jiffies_to_msecs(__entry->running),
jiffies_to_msecs(__entry->locked),
@@ -186,8 +175,7 @@
TP_ARGS(dev, tid, stats),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( unsigned long, tid )
__field( unsigned long, chp_time )
__field( __u32, forced_to_close )
@@ -196,8 +184,7 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(dev);
- __entry->dev_minor = MINOR(dev);
+ __entry->dev = dev;
__entry->tid = tid;
__entry->chp_time = stats->cs_chp_time;
__entry->forced_to_close= stats->cs_forced_to_close;
@@ -205,9 +192,9 @@
__entry->dropped = stats->cs_dropped;
),
- TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
+ TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
"written %u dropped %u",
- __entry->dev_major, __entry->dev_minor, __entry->tid,
+ jbd2_dev_to_name(__entry->dev), __entry->tid,
jiffies_to_msecs(__entry->chp_time),
__entry->forced_to_close, __entry->written, __entry->dropped)
);
@@ -220,8 +207,7 @@
TP_ARGS(journal, first_tid, block_nr, freed),
TP_STRUCT__entry(
- __field( int, dev_major )
- __field( int, dev_minor )
+ __field( dev_t, dev )
__field( tid_t, tail_sequence )
__field( tid_t, first_tid )
__field(unsigned long, block_nr )
@@ -229,18 +215,16 @@
),
TP_fast_assign(
- __entry->dev_major = MAJOR(journal->j_fs_dev->bd_dev);
- __entry->dev_minor = MINOR(journal->j_fs_dev->bd_dev);
+ __entry->dev = journal->j_fs_dev->bd_dev;
__entry->tail_sequence = journal->j_tail_sequence;
__entry->first_tid = first_tid;
__entry->block_nr = block_nr;
__entry->freed = freed;
),
- TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
- __entry->dev_major, __entry->dev_minor,
- __entry->tail_sequence, __entry->first_tid,
- __entry->block_nr, __entry->freed)
+ TP_printk("dev %s from %u to %u offset %lu freed %lu",
+ jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
+ __entry->first_tid, __entry->block_nr, __entry->freed)
);
#endif /* _TRACE_JBD2_H */