Merge branch 'xfs-bug-fixes-for-3.15-3' into for-next
diff --git a/MAINTAINERS b/MAINTAINERS
index b2cf5cf..e75188f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9715,7 +9715,6 @@
XFS FILESYSTEM
P: Silicon Graphics Inc
M: Dave Chinner <david@fromorbit.com>
-M: Ben Myers <bpm@sgi.com>
M: xfs@oss.sgi.com
L: xfs@oss.sgi.com
W: http://oss.sgi.com/projects/xfs
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 160a548..a701752 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1194,13 +1194,19 @@
}
/*
- * For file extending writes updating i_size before data
- * writeouts complete can expose uninitialized blocks. So
- * even for AIO, we need to wait for i/o to complete before
- * returning in this case.
+ * For file extending writes updating i_size before data writeouts
+ * complete can expose uninitialized blocks in dumb filesystems.
+ * In that case we need to wait for I/O completion even if asked
+ * for an asynchronous write.
*/
- dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
- (end > i_size_read(inode)));
+ if (is_sync_kiocb(iocb))
+ dio->is_async = false;
+ else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
+ (rw & WRITE) && end > i_size_read(inode))
+ dio->is_async = false;
+ else
+ dio->is_async = true;
+
dio->inode = inode;
dio->rw = rw;
diff --git a/fs/open.c b/fs/open.c
index 4b3e1ed..c4465b2f8 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@
return -EINVAL;
/* Return error if mode is not supported */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+ return -EOPNOTSUPP;
+
+ /* Punch hole and zero range are mutually exclusive */
+ if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+ (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
@@ -239,11 +245,20 @@
!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
+ /* Collapse range should only be used exclusively. */
+ if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+ (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+ return -EINVAL;
+
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- /* It's not possible punch hole on append only file */
- if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+ /*
+ * It's not possible to punch hole or perform collapse range
+ * on append only file
+ */
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+ && IS_APPEND(inode))
return -EPERM;
if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@
if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
return -EFBIG;
+ /*
+ * There is no need to overlap collapse range with EOF, in which case
+ * it is effectively a truncate operation
+ */
+ if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+ (offset + len >= i_size_read(inode)))
+ return -EINVAL;
+
if (!file->f_op->fallocate)
return -EOPNOTSUPP;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 0ecec18..6888ad8 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@
if (!acl)
goto set_acl;
- error = -EINVAL;
+ error = -E2BIG;
if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
return error;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 3fc1098..0fdd410 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -89,6 +89,8 @@
/* structure must be padded to 64 bit alignment */
} xfs_agf_t;
+#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc)
+
#define XFS_AGF_MAGICNUM 0x00000001
#define XFS_AGF_VERSIONNUM 0x00000002
#define XFS_AGF_SEQNO 0x00000004
@@ -167,6 +169,8 @@
/* structure must be padded to 64 bit alignment */
} xfs_agi_t;
+#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
+
#define XFS_AGI_MAGICNUM 0x00000001
#define XFS_AGI_VERSIONNUM 0x00000002
#define XFS_AGI_SEQNO 0x00000004
@@ -222,6 +226,8 @@
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
} xfs_agfl_t;
+#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
+
/*
* tags for inode radix tree
*/
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9eab2df..c1cf6a3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -474,7 +474,6 @@
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agfl_ok = 1;
/*
* There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agfl, agfl_crc));
-
- agfl_ok = agfl_ok && xfs_agfl_verify(bp);
-
- if (!agfl_ok) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_agfl_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -508,16 +505,15 @@
return;
if (!xfs_agfl_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
if (bip)
XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agfl, agfl_crc));
+ xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agf_ok = 1;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agf, agf_crc));
-
- agf_ok = agf_ok && xfs_agf_verify(mp, bp);
-
- if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
- XFS_RANDOM_ALLOC_READ_AGF))) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
+ XFS_ERRTAG_ALLOC_READ_AGF,
+ XFS_RANDOM_ALLOC_READ_AGF))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -2261,8 +2255,8 @@
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -2272,8 +2266,7 @@
if (bip)
XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agf, agf_crc));
+ xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 1308542..cc1eadc 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -355,12 +355,14 @@
xfs_allocbt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_sblock_verify_crc(bp) &&
- xfs_allocbt_verify(bp))) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_allocbt_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
}
}
@@ -370,9 +372,9 @@
{
if (!xfs_allocbt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_btree_sblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index dba4885..75df77d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1460,7 +1460,8 @@
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL, 0);
+ xfs_end_io_direct_write, NULL,
+ DIO_ASYNC_EXTEND);
if (ret != -EIOCBQUEUED && iocb->private)
goto out_destroy_ioend;
} else {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7b126f4..fe9587f 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -213,8 +213,8 @@
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_attr3_leaf_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -224,7 +224,7 @@
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
}
/*
@@ -239,13 +239,14 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_ATTR3_LEAF_CRC_OFF)) ||
- !xfs_attr3_leaf_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_attr3_leaf_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 5549d69..6e37823 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -125,7 +125,6 @@
struct xfs_mount *mp = bp->b_target->bt_mount;
char *ptr;
int len;
- bool corrupt = false;
xfs_daddr_t bno;
/* no verification of non-crc buffers */
@@ -140,11 +139,11 @@
while (len > 0) {
if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
XFS_ATTR3_RMT_CRC_OFF)) {
- corrupt = true;
+ xfs_buf_ioerror(bp, EFSBADCRC);
break;
}
if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
- corrupt = true;
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
break;
}
len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@
bno += mp->m_bsize;
}
- if (corrupt) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
- } else
+ if (bp->b_error)
+ xfs_verifier_error(bp);
+ else
ASSERT(len == 0);
}
@@ -180,9 +178,8 @@
while (len > 0) {
if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
- XFS_CORRUPTION_ERROR(__func__,
- XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
if (bip) {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c..5b6092e 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@
}
return error;
}
+
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int *done,
+ xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb,
+ xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock,
+ struct xfs_bmap_free *flist,
+ int num_exts)
+{
+ struct xfs_btree_cur *cur;
+ struct xfs_bmbt_rec_host *gotp;
+ struct xfs_bmbt_irec got;
+ struct xfs_bmbt_irec left;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ xfs_extnum_t nexts = 0;
+ xfs_fileoff_t startoff;
+ int error = 0;
+ int i;
+ int whichfork = XFS_DATA_FORK;
+ int logflags;
+ xfs_filblks_t blockcount = 0;
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+ XFS_ERRLEVEL_LOW, mp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ ASSERT(current_ext != NULL);
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ /* Read in all the extents */
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ /*
+ * If *current_ext is 0, we would need to lookup the extent
+ * from where we would start shifting and store it in gotp.
+ */
+ if (!*current_ext) {
+ gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+ /*
+ * gotp can be null in 2 cases: 1) if there are no extents
+ * or 2) start_fsb lies in a hole beyond which there are
+ * no extents. Either way, we are done.
+ */
+ if (!gotp) {
+ *done = 1;
+ return 0;
+ }
+ }
+
+ /* We are going to change core inode */
+ logflags = XFS_ILOG_CORE;
+
+ if (ifp->if_flags & XFS_IFBROOT) {
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ cur->bc_private.b.firstblock = *firstblock;
+ cur->bc_private.b.flist = flist;
+ cur->bc_private.b.flags = 0;
+ } else {
+ cur = NULL;
+ logflags |= XFS_ILOG_DEXT;
+ }
+
+ while (nexts++ < num_exts &&
+ *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) {
+
+ gotp = xfs_iext_get_ext(ifp, *current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+ startoff = got.br_startoff - offset_shift_fsb;
+
+ /*
+ * Before shifting extent into hole, make sure that the hole
+ * is large enough to accomodate the shift.
+ */
+ if (*current_ext) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+ *current_ext - 1), &left);
+
+ if (startoff < left.br_startoff + left.br_blockcount)
+ error = XFS_ERROR(EINVAL);
+ } else if (offset_shift_fsb > got.br_startoff) {
+ /*
+ * When first extent is shifted, offset_shift_fsb
+ * should be less than the stating offset of
+ * the first extent.
+ */
+ error = XFS_ERROR(EINVAL);
+ }
+
+ if (error)
+ goto del_cursor;
+
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ /* Check if we can merge 2 adjacent extents */
+ if (*current_ext &&
+ left.br_startoff + left.br_blockcount == startoff &&
+ left.br_startblock + left.br_blockcount ==
+ got.br_startblock &&
+ left.br_state == got.br_state &&
+ left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+ blockcount = left.br_blockcount +
+ got.br_blockcount;
+ xfs_iext_remove(ip, *current_ext, 1, 0);
+ if (cur) {
+ error = xfs_btree_delete(cur, &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+ gotp = xfs_iext_get_ext(ifp, --*current_ext);
+ xfs_bmbt_get_all(gotp, &got);
+
+ /* Make cursor point to the extent we will update */
+ if (cur) {
+ error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ &i);
+ if (error)
+ goto del_cursor;
+ XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+ }
+
+ xfs_bmbt_set_blockcount(gotp, blockcount);
+ got.br_blockcount = blockcount;
+ } else {
+ /* We have to update the startoff */
+ xfs_bmbt_set_startoff(gotp, startoff);
+ got.br_startoff = startoff;
+ }
+
+ if (cur) {
+ error = xfs_bmbt_update(cur, got.br_startoff,
+ got.br_startblock,
+ got.br_blockcount,
+ got.br_state);
+ if (error)
+ goto del_cursor;
+ }
+
+ (*current_ext)++;
+ }
+
+ /* Check if we are done */
+ if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork))
+ *done = 1;
+
+del_cursor:
+ if (cur)
+ xfs_btree_del_cursor(cur,
+ error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+
+ xfs_trans_log_inode(tp, ip, logflags);
+
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f3..f84bd7a 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
+
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS 1
+
#ifdef DEBUG
void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
+int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ int *done, xfs_fileoff_t start_fsb,
+ xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+ xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+ int num_exts);
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 706bc3f..818d546 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -780,12 +780,14 @@
xfs_bmbt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_lblock_verify_crc(bp) &&
- xfs_bmbt_verify(bp))) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
+ if (!xfs_btree_lblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_bmbt_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
}
}
@@ -794,11 +796,9 @@
struct xfs_buf *bp)
{
if (!xfs_bmbt_verify(bp)) {
- xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
xfs_btree_lblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616..01f6a64 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@
* the freeing of the space succeeds at ENOSPC.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
/*
@@ -1468,6 +1467,102 @@
}
/*
+ * xfs_collapse_file_space()
+ * This routine frees disk space and shift extent for the given file.
+ * The first thing we do is to free data blocks in the specified range
+ * by calling xfs_free_file_space(). It would also sync dirty data
+ * and invalidate page cache over the region on which collapse range
+ * is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ * 0 on success
+ * errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t len)
+{
+ int done = 0;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+ xfs_extnum_t current_ext = 0;
+ struct xfs_bmap_free free_list;
+ xfs_fsblock_t first_block;
+ int committed;
+ xfs_fileoff_t start_fsb;
+ xfs_fileoff_t shift_fsb;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+
+ trace_xfs_collapse_file_space(ip);
+
+ start_fsb = XFS_B_TO_FSB(mp, offset + len);
+ shift_fsb = XFS_B_TO_FSB(mp, len);
+
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ return error;
+
+ while (!error && !done) {
+ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+ tp->t_flags |= XFS_TRANS_RESERVE;
+ /*
+ * We would need to reserve permanent block for transaction.
+ * This will come into picture when after shifting extent into
+ * hole we found that adjacent extents can be merged which
+ * may lead to freeing of a block during record update.
+ */
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+ if (error) {
+ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+ xfs_trans_cancel(tp, 0);
+ break;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+ ip->i_gdquot, ip->i_pdquot,
+ XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto out;
+
+ xfs_trans_ijoin(tp, ip, 0);
+
+ xfs_bmap_init(&free_list, &first_block);
+
+ /*
+ * We are using the write transaction in which max 2 bmbt
+ * updates are allowed
+ */
+ error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+ shift_fsb, ¤t_ext,
+ &first_block, &free_list,
+ XFS_BMAP_MAX_SHIFT_EXTENTS);
+ if (error)
+ goto out;
+
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
+ if (error)
+ goto out;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ }
+
+ return error;
+
+out:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+/*
* We need to check that the format of the data fork in the temporary inode is
* valid for the target inode before doing the swap. This is not a problem with
* attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b..935ed2b2 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@
xfs_off_t len);
int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
+int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+ xfs_off_t len);
/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9adaae4..e80d59f 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -234,8 +234,7 @@
return;
if (bip)
block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_LBLOCK_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
}
bool
@@ -243,8 +242,8 @@
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_LBLOCK_CRC_OFF);
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+
return true;
}
@@ -267,8 +266,7 @@
return;
if (bip)
block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_SBLOCK_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
}
bool
@@ -276,8 +274,8 @@
struct xfs_buf *bp)
{
if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_BTREE_SBLOCK_CRC_OFF);
+ return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+
return true;
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 9953395..b8a3abf 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -369,6 +369,20 @@
xfs_buf_rele(bp);
}
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
+
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+ cksum_offset);
+}
+
/*
* Handling of buftargs.
*/
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 3314911..8752821 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@
bip->bli_formats[i].blf_map_size = map_size;
}
-#ifdef XFS_TRANS_DEBUG
- /*
- * Allocate the arrays for tracking what needs to be logged
- * and what our callers request to be logged. bli_orig
- * holds a copy of the original, clean buffer for comparison
- * against, and bli_logged keeps a 1 bit flag per byte in
- * the buffer to indicate which bytes the callers have asked
- * to have logged.
- */
- bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
- memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
- bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
/*
* Put the buf item into the list of items attached to the
* buffer at the front.
@@ -957,11 +943,6 @@
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index e69d57b..6cc5f67 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -185,8 +185,8 @@
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
if (!xfs_da3_node_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -196,7 +196,7 @@
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
}
/*
@@ -209,18 +209,20 @@
xfs_da3_node_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_da_blkinfo *info = bp->b_addr;
switch (be16_to_cpu(info->magic)) {
case XFS_DA3_NODE_MAGIC:
- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DA3_NODE_CRC_OFF))
+ if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
+ xfs_buf_ioerror(bp, EFSBADCRC);
break;
+ }
/* fall through */
case XFS_DA_NODE_MAGIC:
- if (!xfs_da3_node_verify(bp))
+ if (!xfs_da3_node_verify(bp)) {
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
break;
+ }
return;
case XFS_ATTR_LEAF_MAGIC:
case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@
}
/* corrupt block */
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
- xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5869b5..623bbe8 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -89,6 +89,8 @@
/* structure must be padded to 64 bit alignment */
} xfs_dinode_t;
+#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc)
+
#define DI_MAX_FLUSH 0xffff
/*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef0..fda4625 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@
xfs_inode_t *dp,
xfs_inode_t *pdp)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int error;
- memset((char *)&args, 0, sizeof(args));
- args.dp = dp;
- args.trans = tp;
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+ error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+ if (error)
return error;
- return xfs_dir2_sf_create(&args, pdp->i_ino);
+
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->dp = dp;
+ args->trans = tp;
+ error = xfs_dir2_sf_create(args, pdp->i_ino);
+ kmem_free(args);
+ return error;
}
/*
@@ -205,41 +212,56 @@
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
XFS_STATS_INC(xs_dir_create);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+
+out_free:
+ kmem_free(args);
return rval;
}
@@ -282,46 +304,66 @@
xfs_ino_t *inum, /* out: inode number */
struct xfs_name *ci_name) /* out: actual name if CI match */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_OKNOENT;
+ /*
+ * We need to use KM_NOFS here so that lockdep will not throw false
+ * positive deadlock warnings on a non-transactional lookup path. It is
+ * safe to recurse into inode recalim in that case, but lockdep can't
+ * easily be taught about it. Hence KM_NOFS avoids having to add more
+ * lockdep Doing this avoids having to add a bunch of lockdep class
+ * annotations into the reclaim path for the ilock.
+ */
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_OKNOENT;
if (ci_name)
- args.op_flags |= XFS_DA_OP_CILOOKUP;
+ args->op_flags |= XFS_DA_OP_CILOOKUP;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_lookup(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_lookup(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_lookup(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_lookup(args);
+ goto out_check_rval;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_lookup(args);
else
- rval = xfs_dir2_node_lookup(&args);
+ rval = xfs_dir2_node_lookup(args);
+
+out_check_rval:
if (rval == EEXIST)
rval = 0;
if (!rval) {
- *inum = args.inumber;
+ *inum = args->inumber;
if (ci_name) {
- ci_name->name = args.value;
- ci_name->len = args.valuelen;
+ ci_name->name = args->value;
+ ci_name->len = args->valuelen;
}
}
+out_free:
+ kmem_free(args);
return rval;
}
@@ -338,38 +380,51 @@
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = ino;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_removename(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_removename(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_removename(&args);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = ino;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_removename(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_removename(args);
else
- rval = xfs_dir2_node_removename(&args);
+ rval = xfs_dir2_node_removename(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -386,40 +441,54 @@
xfs_bmap_free_t *flist, /* bmap's freeblock list */
xfs_extlen_t total) /* bmap's total block count */
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
ASSERT(S_ISDIR(dp->i_d.di_mode));
- if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+ rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+ if (rval)
return rval;
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.inumber = inum;
- args.dp = dp;
- args.firstblock = first;
- args.flist = flist;
- args.total = total;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_replace(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_replace(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_replace(&args);
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->inumber = inum;
+ args->dp = dp;
+ args->firstblock = first;
+ args->flist = flist;
+ args->total = total;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_replace(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_replace(args);
else
- rval = xfs_dir2_node_replace(&args);
+ rval = xfs_dir2_node_replace(args);
+out_free:
+ kmem_free(args);
return rval;
}
@@ -434,7 +503,7 @@
struct xfs_name *name, /* name of entry to add */
uint resblks)
{
- xfs_da_args_t args;
+ struct xfs_da_args *args;
int rval;
int v; /* type-checking value */
@@ -443,29 +512,42 @@
ASSERT(S_ISDIR(dp->i_d.di_mode));
- memset(&args, 0, sizeof(xfs_da_args_t));
- args.name = name->name;
- args.namelen = name->len;
- args.filetype = name->type;
- args.hashval = dp->i_mount->m_dirnameops->hashname(name);
- args.dp = dp;
- args.whichfork = XFS_DATA_FORK;
- args.trans = tp;
- args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+ args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ if (!args)
+ return ENOMEM;
+
+ args->name = name->name;
+ args->namelen = name->len;
+ args->filetype = name->type;
+ args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->dp = dp;
+ args->whichfork = XFS_DATA_FORK;
+ args->trans = tp;
+ args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
XFS_DA_OP_OKNOENT;
- if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
- rval = xfs_dir2_sf_addname(&args);
- else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_block_addname(&args);
- else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
- return rval;
- else if (v)
- rval = xfs_dir2_leaf_addname(&args);
+ if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
+ rval = xfs_dir2_sf_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isblock(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v) {
+ rval = xfs_dir2_block_addname(args);
+ goto out_free;
+ }
+
+ rval = xfs_dir2_isleaf(tp, dp, &v);
+ if (rval)
+ goto out_free;
+ if (v)
+ rval = xfs_dir2_leaf_addname(args);
else
- rval = xfs_dir2_node_addname(&args);
+ rval = xfs_dir2_node_addname(args);
+out_free:
+ kmem_free(args);
return rval;
}
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 90cdbf4..4f6a38c 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_DATA_CRC_OFF)) ||
- !xfs_dir3_block_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_dir3_block_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -107,8 +108,8 @@
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_block_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -118,7 +119,7 @@
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 70acff4..afa4ad5 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -241,7 +241,6 @@
xfs_dir3_data_reada_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
switch (hdr->magic) {
@@ -255,8 +254,8 @@
xfs_dir3_data_verify(bp);
return;
default:
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
break;
}
}
@@ -267,13 +266,14 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_DATA_CRC_OFF)) ||
- !xfs_dir3_data_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_dir3_data_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -285,8 +285,8 @@
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_data_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -296,7 +296,7 @@
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae47ec6..d36e97d 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -179,13 +179,14 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_LEAF_CRC_OFF)) ||
- !xfs_dir3_leaf_verify(bp, magic)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_dir3_leaf_verify(bp, magic))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -198,8 +199,8 @@
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_leaf_verify(bp, magic)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -209,7 +210,7 @@
if (bip)
hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
}
static void
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 48c7d18..cb434d7 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if ((xfs_sb_version_hascrc(&mp->m_sb) &&
- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- XFS_DIR3_FREE_CRC_OFF)) ||
- !xfs_dir3_free_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_dir3_free_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -133,8 +134,8 @@
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_dir3_free_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -144,7 +145,7 @@
if (bip)
hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
+ xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7aeb4c8..868b19f 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -615,7 +615,7 @@
if (flags & XFS_QMOPT_DQALLOC) {
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
XFS_QM_DQALLOC_SPACE_RES(mp), 0);
if (error)
goto error1;
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index d401457..610da81 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -257,10 +257,13 @@
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (!xfs_dquot_buf_verify_crc(mp, bp))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_dquot_buf_verify(mp, bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
/*
@@ -275,8 +278,8 @@
struct xfs_mount *mp = bp->b_target->bt_mount;
if (!xfs_dquot_buf_verify(mp, bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
}
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b80..edac5b0 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -156,7 +156,7 @@
{
if (level <= xfs_error_level) {
xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
- "Internal error %s at line %d of file %s. Caller 0x%p",
+ "Internal error %s at line %d of file %s. Caller %pF",
tag, linenum, filename, ra);
xfs_stack_trace();
@@ -178,3 +178,28 @@
xfs_error_report(tag, level, mp, filename, linenum, ra);
xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
}
+
+/*
+ * Warnings specifically for verifier errors. Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
+ __return_address, bp->b_bn);
+
+ xfs_alert(mp, "Unmount and run xfs_repair");
+
+ if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
+ xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+ }
+
+ if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+ xfs_stack_trace();
+}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 079a367..c1c57d4 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -34,6 +34,7 @@
extern void xfs_corruption_error(const char *tag, int level,
struct xfs_mount *mp, void *p, const char *filename,
int linenum, inst_t *ra);
+extern void xfs_verifier_error(struct xfs_buf *bp);
#define XFS_ERROR_REPORT(e, lvl, mp) \
xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2e7989e..8fb97a6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@
if (!S_ISREG(inode->i_mode))
return -EINVAL;
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
+ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+ if (offset & blksize_mask || len & blksize_mask) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
+ ASSERT(offset + len < i_size_read(inode));
+ new_size = i_size_read(inode) - len;
+
+ error = xfs_collapse_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
} else {
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@
goto out_unlock;
}
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
+ if (mode & FALLOC_FL_ZERO_RANGE)
+ error = xfs_zero_file_space(ip, offset, len);
+ else
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
@@ -859,7 +877,7 @@
if (ip->i_d.di_mode & S_IXGRP)
ip->i_d.di_mode &= ~S_ISGID;
- if (!(mode & FALLOC_FL_PUNCH_HOLE))
+ if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index b6ab5a3..9898f31 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -145,6 +145,8 @@
__be64 sl_lsn;
};
+#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc)
+
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 283a76d..8f711db 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1580,18 +1580,17 @@
struct xfs_buf *bp)
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- int agi_ok = 1;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agi, agi_crc));
- agi_ok = agi_ok && xfs_agi_verify(bp);
-
- if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
- XFS_RANDOM_IALLOC_READ_AGI))) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+ XFS_ERRTAG_IALLOC_READ_AGI,
+ XFS_RANDOM_IALLOC_READ_AGI))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -1602,8 +1601,8 @@
struct xfs_buf_log_item *bip = bp->b_fspriv;
if (!xfs_agi_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -1612,8 +1611,7 @@
if (bip)
XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_agi, agi_crc));
+ xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c8fa5bb..7e309b1 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -243,12 +243,14 @@
xfs_inobt_read_verify(
struct xfs_buf *bp)
{
- if (!(xfs_btree_sblock_verify_crc(bp) &&
- xfs_inobt_verify(bp))) {
- trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
+ if (!xfs_btree_sblock_verify_crc(bp))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_inobt_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
+
+ if (bp->b_error) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp);
}
}
@@ -258,9 +260,9 @@
{
if (!xfs_inobt_verify(bp)) {
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- bp->b_target->bt_mount, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
+ return;
}
xfs_btree_sblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3a137e9..5e7a38f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
#include "xfs_bmap_util.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_dinode.h"
#include "xfs_filestream.h"
#include "xfs_cksum.h"
#include "xfs_trace.h"
@@ -62,6 +61,8 @@
STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
+STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+
/*
* helper function to extract extent size hint from inode
*/
@@ -1115,7 +1116,7 @@
{
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- ASSERT(ip->i_d.di_nlink > 0);
+ ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
ip->i_d.di_nlink++;
inc_nlink(VFS_I(ip));
if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- prid = xfs_get_projid(dp);
- else
- prid = XFS_PROJID_DEFAULT;
+ prid = xfs_get_initial_prid(dp);
/*
* Make sure that we have allocated dquot(s) on disk.
@@ -1333,6 +1331,113 @@
}
int
+xfs_create_tmpfile(
+ struct xfs_inode *dp,
+ struct dentry *dentry,
+ umode_t mode)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_inode *ip = NULL;
+ struct xfs_trans *tp = NULL;
+ int error;
+ uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+ prid_t prid;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ struct xfs_trans_res *tres;
+ uint resblks;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ prid = xfs_get_initial_prid(dp);
+
+ /*
+ * Make sure that we have allocated dquot(s) on disk.
+ */
+ error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+ xfs_kgid_to_gid(current_fsgid()), prid,
+ XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+ &udqp, &gdqp, &pdqp);
+ if (error)
+ return error;
+
+ resblks = XFS_IALLOC_SPACE_RES(mp);
+ tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
+
+ tres = &M_RES(mp)->tr_create_tmpfile;
+ error = xfs_trans_reserve(tp, tres, resblks, 0);
+ if (error == ENOSPC) {
+ /* No space at all so try a "no-allocation" reservation */
+ resblks = 0;
+ error = xfs_trans_reserve(tp, tres, 0, 0);
+ }
+ if (error) {
+ cancel_flags = 0;
+ goto out_trans_cancel;
+ }
+
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+ pdqp, resblks, 1, 0);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+ prid, resblks > 0, &ip, NULL);
+ if (error) {
+ if (error == ENOSPC)
+ goto out_trans_cancel;
+ goto out_trans_abort;
+ }
+
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
+
+ /*
+ * Attach the dquot(s) to the inodes and modify them incore.
+ * These ids of the inode couldn't have changed since the new
+ * inode has been locked ever since it was created.
+ */
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+
+ ip->i_d.di_nlink--;
+ d_tmpfile(dentry, VFS_I(ip));
+ error = xfs_iunlink(tp, ip);
+ if (error)
+ goto out_trans_abort;
+
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto out_release_inode;
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return 0;
+
+ out_trans_abort:
+ cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+ xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+ /*
+ * Wait until after the current transaction is aborted to
+ * release the inode. This prevents recursive transactions
+ * and deadlocks from xfs_inactive.
+ */
+ if (ip)
+ IRELE(ip);
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return error;
+}
+
+int
xfs_link(
xfs_inode_t *tdp,
xfs_inode_t *sip,
@@ -1397,6 +1502,12 @@
xfs_bmap_init(&free_list, &first_block);
+ if (sip->i_d.di_nlink == 0) {
+ error = xfs_iunlink_remove(tp, sip);
+ if (error)
+ goto abort_return;
+ }
+
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
&first_block, &free_list, resblks);
if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65e2350..396cc1f 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
#include "xfs_inode_buf.h"
#include "xfs_inode_fork.h"
+#include "xfs_dinode.h"
/*
* Kernel only inode definitions
@@ -192,6 +193,15 @@
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
}
+static inline prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+ if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+ return xfs_get_projid(dp);
+
+ return XFS_PROJID_DEFAULT;
+}
+
/*
* In-core inode flags.
*/
@@ -323,6 +333,8 @@
struct xfs_inode **ipp, struct xfs_name *ci_name);
int xfs_create(struct xfs_inode *dp, struct xfs_name *name,
umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
+ umode_t mode);
int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
struct xfs_inode *ip);
int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 4fc9f39..24e9939 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -102,8 +102,7 @@
}
xfs_buf_ioerror(bp, EFSCORRUPTED);
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
- mp, dip);
+ xfs_verifier_error(bp);
#ifdef DEBUG
xfs_alert(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@
if (!xfs_sb_version_hascrc(&mp->m_sb))
return false;
if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
- offsetof(struct xfs_dinode, di_crc)))
+ XFS_DINODE_CRC_OFF))
return false;
if (be64_to_cpu(dip->di_ino) != ip->i_ino)
return false;
@@ -327,7 +326,7 @@
ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
- offsetof(struct xfs_dinode, di_crc));
+ XFS_DINODE_CRC_OFF);
dip->di_crc = xfs_end_cksum(crc);
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbe..3b80eba 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -128,7 +128,6 @@
xfs_fsblock_t firstfsb;
xfs_extlen_t extsz, temp;
int nimaps;
- int bmapi_flag;
int quota_flag;
int rt;
xfs_trans_t *tp;
@@ -200,18 +199,15 @@
xfs_trans_ijoin(tp, ip, 0);
- bmapi_flag = 0;
- if (offset < XFS_ISIZE(ip) || extsz)
- bmapi_flag |= XFS_BMAPI_PREALLOC;
-
/*
* From this point onwards we overwrite the imap pointer that the
* caller gave to us.
*/
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
- error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
- &firstfsb, 0, imap, &nimaps, &free_list);
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_PREALLOC, &firstfsb, 0,
+ imap, &nimaps, &free_list);
if (error)
goto out_bmap_cancel;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f35d5c9..89b07e4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_dinode.h"
+#include "xfs_trans_space.h"
#include <linux/capability.h>
#include <linux/xattr.h>
@@ -48,6 +49,18 @@
#include <linux/fiemap.h>
#include <linux/slab.h>
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
+
static int
xfs_initxattrs(
struct inode *inode,
@@ -705,7 +718,6 @@
{
struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
- int mask = iattr->ia_valid;
xfs_off_t oldsize, newsize;
struct xfs_trans *tp;
int error;
@@ -726,8 +738,8 @@
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(S_ISREG(ip->i_d.di_mode));
- ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
- ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
+ ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
+ ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
oldsize = inode->i_size;
newsize = iattr->ia_size;
@@ -736,7 +748,7 @@
* Short circuit the truncate case for zero length files.
*/
if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
- if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
+ if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME)))
return 0;
/*
@@ -824,10 +836,11 @@
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ if (newsize != oldsize &&
+ !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
iattr->ia_ctime = iattr->ia_mtime =
current_fs_time(inode->i_sb);
- mask |= ATTR_CTIME | ATTR_MTIME;
+ iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
}
/*
@@ -863,9 +876,9 @@
xfs_inode_clear_eofblocks_tag(ip);
}
- if (mask & ATTR_MODE)
+ if (iattr->ia_valid & ATTR_MODE)
xfs_setattr_mode(ip, iattr);
- if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+ if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
xfs_setattr_time(ip, iattr);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1034,6 +1047,19 @@
return 0;
}
+STATIC int
+xfs_vn_tmpfile(
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
+{
+ int error;
+
+ error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
+
+ return -error;
+}
+
static const struct inode_operations xfs_inode_operations = {
.get_acl = xfs_get_acl,
.set_acl = xfs_set_acl,
@@ -1072,6 +1098,7 @@
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
+ .tmpfile = xfs_vn_tmpfile,
};
static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@
xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@
inode->i_mapping->a_ops = &xfs_address_space_operations;
break;
case S_IFDIR:
+ lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
inode->i_op = &xfs_dir_ci_inode_operations;
else
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9bb590..825249d 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -119,6 +119,7 @@
#include "xfs_iops.h"
#include "xfs_aops.h"
#include "xfs_super.h"
+#include "xfs_cksum.h"
#include "xfs_buf.h"
#include "xfs_message.h"
@@ -178,6 +179,7 @@
#define ENOATTR ENODATA /* Attribute not found */
#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b0f4ef7..2c40044 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -175,7 +175,7 @@
struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
void xfs_log_ticket_put(struct xlog_ticket *ticket);
-int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, int flags);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index cdebd83..7e54553 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -205,16 +205,25 @@
/*
* We 64-bit align the length of each iovec so that the start
* of the next one is naturally aligned. We'll need to
- * account for that slack space here.
+ * account for that slack space here. Then round nbytes up
+ * to 64-bit alignment so that the initial buffer alignment is
+ * easy to calculate and verify.
*/
nbytes += niovecs * sizeof(uint64_t);
+ nbytes = round_up(nbytes, sizeof(uint64_t));
/* grab the old item if it exists for reservation accounting */
old_lv = lip->li_lv;
- /* calc buffer size */
- buf_size = sizeof(struct xfs_log_vec) + nbytes +
- niovecs * sizeof(struct xfs_log_iovec);
+ /*
+ * The data buffer needs to start 64-bit aligned, so round up
+ * that space to ensure we can align it appropriately and not
+ * overrun the buffer.
+ */
+ buf_size = nbytes +
+ round_up((sizeof(struct xfs_log_vec) +
+ niovecs * sizeof(struct xfs_log_iovec)),
+ sizeof(uint64_t));
/* compare to existing item size */
if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
@@ -251,6 +260,8 @@
/* The allocated data region lies beyond the iovec region */
lv->lv_buf_len = 0;
lv->lv_buf = (char *)lv + buf_size - nbytes;
+ ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
+
lip->li_ops->iop_format(lip, lv);
insert:
ASSERT(lv->lv_buf_len <= nbytes);
@@ -488,13 +499,6 @@
cil->xc_ctx = new_ctx;
/*
- * mirror the new sequence into the cil structure so that we can do
- * unlocked checks against the current sequence in log forces without
- * risking deferencing a freed context pointer.
- */
- cil->xc_current_sequence = new_ctx->sequence;
-
- /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -512,8 +516,15 @@
* Hence we need to add this context to the committing context list so
* that higher sequences will wait for us to write out a commit record
* before they do.
+ *
+ * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * structure atomically with the addition of this sequence to the
+ * committing list. This also ensures that we can do unlocked checks
+ * against the current sequence in log forces without risking
+ * deferencing a freed context pointer.
*/
spin_lock(&cil->xc_push_lock);
+ cil->xc_current_sequence = new_ctx->sequence;
list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock);
@@ -651,8 +662,14 @@
}
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
struct xlog *log,
xfs_lsn_t push_seq)
{
@@ -677,10 +694,8 @@
}
cil->xc_push_seq = push_seq;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
spin_unlock(&cil->xc_push_lock);
-
- /* do the push now */
- xlog_cil_push(log);
}
bool
@@ -710,7 +725,7 @@
* background commit, returns without it held once background commits are
* allowed again.
*/
-int
+void
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
@@ -756,7 +771,6 @@
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock);
- return 0;
}
/*
@@ -785,7 +799,8 @@
* xlog_cil_push() handles racing pushes for the same sequence,
* so no need to deal with it here.
*/
- xlog_cil_push_foreground(log, sequence);
+restart:
+ xlog_cil_push_now(log, sequence);
/*
* See if we can find a previous sequence still committing.
@@ -793,7 +808,6 @@
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
-restart:
spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
if (ctx->sequence > sequence)
@@ -811,6 +825,28 @@
/* found it! */
commit_lsn = ctx->commit_lsn;
}
+
+ /*
+ * The call to xlog_cil_push_now() executes the push in the background.
+ * Hence by the time we have got here it our sequence may not have been
+ * pushed yet. This is true if the current sequence still matches the
+ * push sequence after the above wait loop and the CIL still contains
+ * dirty objects.
+ *
+ * When the push occurs, it will empty the CIL and
+ * atomically increment the currect sequence past the push sequence and
+ * move it into the committing list. Of course, if the CIL is clean at
+ * the time of the push, it won't have pushed the CIL at all, so in that
+ * case we should try the push for this sequence again from the start
+ * just in case.
+ */
+
+ if (sequence == cil->xc_current_sequence &&
+ !list_empty(&cil->xc_cil)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto restart;
+ }
+
spin_unlock(&cil->xc_push_lock);
return commit_lsn;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 5c670f5..993cb19 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -282,22 +282,29 @@
struct xfs_sb *sbp = &mp->m_sb;
int error;
int loud = !(flags & XFS_MFSI_QUIET);
+ const struct xfs_buf_ops *buf_ops;
ASSERT(mp->m_sb_bp == NULL);
ASSERT(mp->m_ddev_targp != NULL);
/*
+ * For the initial read, we must guess at the sector
+ * size based on the block device. It's enough to
+ * get the sb_sectsize out of the superblock and
+ * then reread with the proper length.
+ * We don't verify it yet, because it may not be complete.
+ */
+ sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
+ buf_ops = NULL;
+
+ /*
* Allocate a (locked) buffer to hold the superblock.
* This will be kept around at all times to optimize
* access to the superblock.
*/
- sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-
reread:
bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR,
- BTOBB(sector_size), 0,
- loud ? &xfs_sb_buf_ops
- : &xfs_sb_quiet_buf_ops);
+ BTOBB(sector_size), 0, buf_ops);
if (!bp) {
if (loud)
xfs_warn(mp, "SB buffer read failed");
@@ -331,12 +338,13 @@
}
/*
- * If device sector size is smaller than the superblock size,
- * re-read the superblock so the buffer is correctly sized.
+ * Re-read the superblock so the buffer is correctly sized,
+ * and properly verified.
*/
- if (sector_size < sbp->sb_sectsize) {
+ if (buf_ops == NULL) {
xfs_buf_relse(bp);
sector_size = sbp->sb_sectsize;
+ buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;
goto reread;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2..ec5ca65 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@
/*
* Reserve space & log for one extent added to the file.
*/
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
resblks, 0);
if (error)
goto error_cancel;
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index b7c9aea..0c0e41b 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -288,6 +288,7 @@
sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
+ sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
@@ -295,8 +296,7 @@
sbp->sb_dblocks == 0 ||
sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
- XFS_CORRUPTION_ERROR("SB sanity check failed",
- XFS_ERRLEVEL_LOW, mp, sbp);
+ xfs_notice(mp, "SB sanity check failed");
return XFS_ERROR(EFSCORRUPTED);
}
@@ -611,12 +611,11 @@
XFS_SB_VERSION_5) ||
dsb->sb_crc != 0)) {
- if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize),
- offsetof(struct xfs_sb, sb_crc))) {
+ if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
/* Only fail bad secondaries on a known V5 filesystem */
- if (bp->b_bn != XFS_SB_DADDR &&
+ if (bp->b_bn == XFS_SB_DADDR ||
xfs_sb_version_hascrc(&mp->m_sb)) {
- error = EFSCORRUPTED;
+ error = EFSBADCRC;
goto out_error;
}
}
@@ -625,10 +624,9 @@
out_error:
if (error) {
- if (error != EWRONGFS)
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
+ if (error == EFSCORRUPTED || error == EFSBADCRC)
+ xfs_verifier_error(bp);
}
}
@@ -644,7 +642,6 @@
{
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
-
if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
/* XFS filesystem, verify noisily! */
xfs_sb_read_verify(bp);
@@ -664,9 +661,8 @@
error = xfs_sb_verify(bp, false);
if (error) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- mp, bp->b_addr);
xfs_buf_ioerror(bp, error);
+ xfs_verifier_error(bp);
return;
}
@@ -676,8 +672,7 @@
if (bip)
XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_sb, sb_crc));
+ xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 35061d4..f7b2fe7 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -182,6 +182,8 @@
/* must be padded to 64 bit alignment */
} xfs_sb_t;
+#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc)
+
/*
* Superblock - on disk version. Must match the in core version above.
* Must be padded to 64 bit alignment.
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 8c5035a1..4484e51 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -104,7 +104,8 @@
#define XFS_TRANS_SB_COUNT 41
#define XFS_TRANS_CHECKPOINT 42
#define XFS_TRANS_ICREATE 43
-#define XFS_TRANS_TYPE_MAX 43
+#define XFS_TRANS_CREATE_TMPFILE 44
+#define XFS_TRANS_TYPE_MAX 44
/* new transaction types need to be reflected in xfs_logprint(8) */
#define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@
{ XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
{ XFS_TRANS_INACTIVE, "INACTIVE" }, \
{ XFS_TRANS_CREATE, "CREATE" }, \
+ { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \
{ XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
{ XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
{ XFS_TRANS_REMOVE, "REMOVE" }, \
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 5fda189..52979aa 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -212,10 +212,7 @@
return XFS_ERROR(ENAMETOOLONG);
udqp = gdqp = NULL;
- if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- prid = xfs_get_projid(dp);
- else
- prid = XFS_PROJID_DEFAULT;
+ prid = xfs_get_initial_prid(dp);
/*
* Make sure that we have allocated dquot(s) on disk.
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index bf59a2b..9b32052 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -133,12 +133,13 @@
if (!xfs_sb_version_hascrc(&mp->m_sb))
return;
- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
- !xfs_symlink_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
+ xfs_buf_ioerror(bp, EFSBADCRC);
+ else if (!xfs_symlink_verify(bp))
xfs_buf_ioerror(bp, EFSCORRUPTED);
- }
+
+ if (bp->b_error)
+ xfs_verifier_error(bp);
}
static void
@@ -153,8 +154,8 @@
return;
if (!xfs_symlink_verify(bp)) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
xfs_buf_ioerror(bp, EFSCORRUPTED);
+ xfs_verifier_error(bp);
return;
}
@@ -162,8 +163,7 @@
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
}
- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
- offsetof(struct xfs_dsymlink_hdr, sl_crc));
+ xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa4..a4ae41c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@
DEFINE_INODE_EVENT(xfs_inactive_symlink);
DEFINE_INODE_EVENT(xfs_alloc_file_space);
DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
DEFINE_INODE_EVENT(xfs_readdir);
#ifdef CONFIG_XFS_POSIX_ACL
DEFINE_INODE_EVENT(xfs_get_acl);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c0..54a5732 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -887,12 +887,7 @@
xfs_trans_apply_sb_deltas(tp);
xfs_trans_apply_dquot_deltas(tp);
- error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
- if (error == ENOMEM) {
- xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
- error = XFS_ERROR(EIO);
- goto out_unreserve;
- }
+ xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_trans_free(tp);
@@ -902,10 +897,7 @@
* log out now and wait for it.
*/
if (sync) {
- if (!error) {
- error = _xfs_log_force_lsn(mp, commit_lsn,
- XFS_LOG_SYNC, NULL);
- }
+ error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
XFS_STATS_INC(xs_trans_sync);
} else {
XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index c486e4b..ae36816 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -212,6 +212,19 @@
}
/*
+ * For removing an inode from unlinked list at first, we can modify:
+ * the agi hash list and counters: sector size
+ * the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+ struct xfs_mount *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+
+/*
* For creating a link to an inode:
* the parent directory inode: inode size
* the linked inode: inode size
@@ -228,6 +241,7 @@
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
+ xfs_calc_iunlink_remove_reservation(mp) +
MAX((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
@@ -237,6 +251,18 @@
}
/*
+ * For adding an inode to unlinked list we can modify:
+ * the agi hash list: sector size
+ * the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+ return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+ xfs_calc_inode_res(mp, 1);
+}
+
+/*
* For removing a directory entry we can modify:
* the parent directory inode: inode size
* the removed inode: inode size
@@ -253,10 +279,11 @@
struct xfs_mount *mp)
{
return XFS_DQUOT_LOGRES(mp) +
- MAX((xfs_calc_inode_res(mp, 2) +
+ xfs_calc_iunlink_add_reservation(mp) +
+ MAX((xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
@@ -351,6 +378,20 @@
}
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+ struct xfs_mount *mp)
+{
+ uint res = XFS_DQUOT_LOGRES(mp);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ res += xfs_calc_icreate_resv_alloc(mp);
+ else
+ res += xfs_calc_create_resv_alloc(mp);
+
+ return res + xfs_calc_iunlink_add_reservation(mp);
+}
+
/*
* Making a new directory is the same as creating a new file.
*/
@@ -391,9 +432,9 @@
{
return XFS_DQUOT_LOGRES(mp) +
xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) +
+ xfs_calc_iunlink_remove_reservation(mp) +
xfs_calc_buf_res(1, 0) +
xfs_calc_buf_res(2 + mp->m_ialloc_blks +
mp->m_in_maxlevels, 0) +
@@ -652,15 +693,14 @@
/*
* Allocating quota on disk if needed.
- * the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ * the write transaction log space for quota file extent allocation
* the unit of quota allocation: one system block size
*/
STATIC uint
xfs_calc_qm_dqalloc_reservation(
struct xfs_mount *mp)
{
- ASSERT(M_RES(mp)->tr_write.tr_logres);
- return M_RES(mp)->tr_write.tr_logres +
+ return xfs_calc_write_reservation(mp) +
xfs_calc_buf_res(1,
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
}
@@ -737,6 +777,11 @@
resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+ resp->tr_create_tmpfile.tr_logres =
+ xfs_calc_create_tmpfile_reservation(mp);
+ resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+ resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+
resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -792,7 +837,6 @@
/* The following transaction are logged in logical format */
resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
- resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h
index de7de9a..1097d14 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@
struct xfs_trans_res tr_remove; /* unlink trans */
struct xfs_trans_res tr_symlink; /* symlink trans */
struct xfs_trans_res tr_create; /* create trans */
+ struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */
struct xfs_trans_res tr_mkdir; /* mkdir trans */
struct xfs_trans_res tr_ifree; /* inode free trans */
struct xfs_trans_res tr_ichange; /* inode update trans */
struct xfs_trans_res tr_growdata; /* fs data section grow trans */
- struct xfs_trans_res tr_swrite; /* sync write inode trans */
struct xfs_trans_res tr_addafork; /* add inode attr fork trans */
struct xfs_trans_res tr_writeid; /* write setuid/setgid file */
struct xfs_trans_res tr_attrinval; /* attr fork buffer
@@ -100,6 +100,7 @@
#define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2
+#define XFS_CREATE_TMPFILE_LOG_COUNT 2
#define XFS_MKDIR_LOG_COUNT 3
#define XFS_SYMLINK_LOG_COUNT 3
#define XFS_REMOVE_LOG_COUNT 2
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 09f553c..f7faefc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2527,6 +2527,9 @@
/* filesystem does not support filling holes */
DIO_SKIP_HOLES = 0x02,
+
+ /* filesystem can handle aio writes beyond i_size */
+ DIO_ASYNC_EXTEND = 0x04,
};
void dio_end_io(struct bio *bio, int error);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4cc..d1197ae 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,40 @@
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
+/*
+ * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
+ * without leaving a hole in the file. The contents of the file beyond
+ * the range being removed is appended to the start offset of the range
+ * being removed (i.e. the hole that was punched is "collapsed"),
+ * resulting in a file layout that looks like the range that was
+ * removed never existed. As such collapsing a range of a file changes
+ * the size of the file, reducing it by the same length of the range
+ * that has been removed by the operation.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to
+ * filesystem block size boundaries, but this boundary may be larger or
+ * smaller depending on the filesystem and/or the configuration of the
+ * filesystem or file.
+ *
+ * Attempting to collapse a range that crosses the end of the file is
+ * considered an illegal operation - just use ftruncate(2) if you need
+ * to collapse a range that crosses EOF.
+ */
+#define FALLOC_FL_COLLAPSE_RANGE 0x08
+
+/*
+ * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
+ * without issuing data IO. Blocks should be preallocated for the regions that
+ * span holes in the file, and the entire range is preferable converted to
+ * unwritten extents - even though file system may choose to zero out the
+ * extent or do whatever which will result in reading zeros from the range
+ * while the range remains allocated for the file.
+ *
+ * This can be also used to preallocate blocks past EOF in the same way as
+ * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
+ * size to remain the same.
+ */
+#define FALLOC_FL_ZERO_RANGE 0x10
#endif /* _UAPI_FALLOC_H_ */