xfs: log refcount intent items
Provide a mechanism for higher levels to create CUI/CUD items, submit
them to the log, and a stub function to deal with recovered CUI items.
These parts will be connected to the refcountbt in a later patch.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d6429fd..6a9ea9e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -113,6 +113,7 @@
xfs_trans_buf.o \
xfs_trans_extfree.o \
xfs_trans_inode.o \
+ xfs_trans_refcount.o \
xfs_trans_rmap.o \
# optional features
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 4dc335a..67b1c13 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -27,4 +27,18 @@
extern int xfs_refcount_get_rec(struct xfs_btree_cur *cur,
struct xfs_refcount_irec *irec, int *stat);
+enum xfs_refcount_intent_type {
+ XFS_REFCOUNT_INCREASE = 1,
+ XFS_REFCOUNT_DECREASE,
+ XFS_REFCOUNT_ALLOC_COW,
+ XFS_REFCOUNT_FREE_COW,
+};
+
+struct xfs_refcount_intent {
+ struct list_head ri_list;
+ enum xfs_refcount_intent_type ri_type;
+ xfs_fsblock_t ri_startblock;
+ xfs_extlen_t ri_blockcount;
+};
+
#endif /* __XFS_REFCOUNT_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 846483d..7def672 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -45,6 +45,7 @@
#include "xfs_dir2.h"
#include "xfs_rmap_item.h"
#include "xfs_buf_item.h"
+#include "xfs_refcount_item.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
@@ -1924,6 +1925,8 @@
case XFS_LI_EFI:
case XFS_LI_RUI:
case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
list_move_tail(&item->ri_list, &inode_list);
@@ -3547,6 +3550,123 @@
}
/*
+ * Copy an CUI format buffer from the given buf, and into the destination
+ * CUI format structure. The CUI/CUD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_cui_copy_format(
+ struct xfs_log_iovec *buf,
+ struct xfs_cui_log_format *dst_cui_fmt)
+{
+ struct xfs_cui_log_format *src_cui_fmt;
+ uint len;
+
+ src_cui_fmt = buf->i_addr;
+ len = xfs_cui_log_format_sizeof(src_cui_fmt->cui_nextents);
+
+ if (buf->i_len == len) {
+ memcpy(dst_cui_fmt, src_cui_fmt, len);
+ return 0;
+ }
+ return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent refcount update
+ * item from the cui format structure which was logged on disk.
+ * It allocates an in-core cui, copies the extents from the format
+ * structure into it, and adds the cui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_cui_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_cui_log_item *cuip;
+ struct xfs_cui_log_format *cui_formatp;
+
+ cui_formatp = item->ri_buf[0].i_addr;
+
+ cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
+ error = xfs_cui_copy_format(&item->ri_buf[0], &cuip->cui_format);
+ if (error) {
+ xfs_cui_item_free(cuip);
+ return error;
+ }
+ atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
+
+ spin_lock(&log->l_ailp->xa_lock);
+ /*
+ * The CUI has two references. One for the CUD and one for CUI to ensure
+ * it makes it into the AIL. Insert the CUI into the AIL directly and
+ * drop the CUI reference. Note that xfs_trans_ail_update() drops the
+ * AIL lock.
+ */
+ xfs_trans_ail_update(log->l_ailp, &cuip->cui_item, lsn);
+ xfs_cui_release(cuip);
+ return 0;
+}
+
+
+/*
+ * This routine is called when an CUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding CUI if it
+ * was still in the log. To do this it searches the AIL for the CUI with an id
+ * equal to that in the CUD format structure. If we find it we drop the CUD
+ * reference, which removes the CUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_cud_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_cud_log_format *cud_formatp;
+ struct xfs_cui_log_item *cuip = NULL;
+ struct xfs_log_item *lip;
+ __uint64_t cui_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
+
+ cud_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format))
+ return -EFSCORRUPTED;
+ cui_id = cud_formatp->cud_cui_id;
+
+ /*
+ * Search for the CUI with the id in the CUD format structure in the
+ * AIL.
+ */
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ while (lip != NULL) {
+ if (lip->li_type == XFS_LI_CUI) {
+ cuip = (struct xfs_cui_log_item *)lip;
+ if (cuip->cui_format.cui_id == cui_id) {
+ /*
+ * Drop the CUD reference to the CUI. This
+ * removes the CUI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->xa_lock);
+ xfs_cui_release(cuip);
+ spin_lock(&ailp->xa_lock);
+ break;
+ }
+ }
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ }
+
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->xa_lock);
+
+ return 0;
+}
+
+/*
* This routine is called when an inode create format structure is found in a
* committed transaction in the log. It's purpose is to initialise the inodes
* being allocated on disk. This requires us to get inode cluster buffers that
@@ -3773,6 +3893,8 @@
case XFS_LI_QUOTAOFF:
case XFS_LI_RUI:
case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
default:
break;
}
@@ -3798,6 +3920,8 @@
case XFS_LI_ICREATE:
case XFS_LI_RUI:
case XFS_LI_RUD:
+ case XFS_LI_CUI:
+ case XFS_LI_CUD:
/* nothing to do in pass 1 */
return 0;
default:
@@ -3832,6 +3956,10 @@
return xlog_recover_rui_pass2(log, item, trans->r_lsn);
case XFS_LI_RUD:
return xlog_recover_rud_pass2(log, item);
+ case XFS_LI_CUI:
+ return xlog_recover_cui_pass2(log, item, trans->r_lsn);
+ case XFS_LI_CUD:
+ return xlog_recover_cud_pass2(log, item);
case XFS_LI_DQUOT:
return xlog_recover_dquot_pass2(log, buffer_list, item,
trans->r_lsn);
@@ -4419,12 +4547,53 @@
spin_lock(&ailp->xa_lock);
}
+/* Recover the CUI if necessary. */
+STATIC int
+xlog_recover_process_cui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_cui_log_item *cuip;
+ int error;
+
+ /*
+ * Skip CUIs that we've already processed.
+ */
+ cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
+ if (test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags))
+ return 0;
+
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_cui_recover(mp, cuip);
+ spin_lock(&ailp->xa_lock);
+
+ return error;
+}
+
+/* Release the CUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_cui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_cui_log_item *cuip;
+
+ cuip = container_of(lip, struct xfs_cui_log_item, cui_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_cui_release(cuip);
+ spin_lock(&ailp->xa_lock);
+}
+
/* Is this log item a deferred action intent? */
static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
{
switch (lip->li_type) {
case XFS_LI_EFI:
case XFS_LI_RUI:
+ case XFS_LI_CUI:
return true;
default:
return false;
@@ -4488,6 +4657,9 @@
case XFS_LI_RUI:
error = xlog_recover_process_rui(log->l_mp, ailp, lip);
break;
+ case XFS_LI_CUI:
+ error = xlog_recover_process_cui(log->l_mp, ailp, lip);
+ break;
}
if (error)
goto out;
@@ -4535,6 +4707,9 @@
case XFS_LI_RUI:
xlog_recover_cancel_rui(log->l_mp, ailp, lip);
break;
+ case XFS_LI_CUI:
+ xlog_recover_cancel_cui(log->l_mp, ailp, lip);
+ break;
}
lip = xfs_trans_ail_cursor_next(ailp, &cur);
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index f9ad055..599a8d2 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -22,12 +22,15 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_refcount_item.h"
#include "xfs_log.h"
+#include "xfs_refcount.h"
kmem_zone_t *xfs_cui_zone;
@@ -381,3 +384,60 @@
return cudp;
}
+
+/*
+ * Process a refcount update intent item that was recovered from the log.
+ * We need to update the refcountbt.
+ */
+int
+xfs_cui_recover(
+ struct xfs_mount *mp,
+ struct xfs_cui_log_item *cuip)
+{
+ int i;
+ int error = 0;
+ struct xfs_phys_extent *refc;
+ xfs_fsblock_t startblock_fsb;
+ bool op_ok;
+
+ ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags));
+
+ /*
+ * First check the validity of the extents described by the
+ * CUI. If any are bad, then assume that all are bad and
+ * just toss the CUI.
+ */
+ for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
+ refc = &cuip->cui_format.cui_extents[i];
+ startblock_fsb = XFS_BB_TO_FSB(mp,
+ XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
+ switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
+ case XFS_REFCOUNT_INCREASE:
+ case XFS_REFCOUNT_DECREASE:
+ case XFS_REFCOUNT_ALLOC_COW:
+ case XFS_REFCOUNT_FREE_COW:
+ op_ok = true;
+ break;
+ default:
+ op_ok = false;
+ break;
+ }
+ if (!op_ok || startblock_fsb == 0 ||
+ refc->pe_len == 0 ||
+ startblock_fsb >= mp->m_sb.sb_dblocks ||
+ refc->pe_len >= mp->m_sb.sb_agblocks ||
+ (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) {
+ /*
+ * This will pull the CUI from the AIL and
+ * free the memory associated with it.
+ */
+ set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+ xfs_cui_release(cuip);
+ return -EIO;
+ }
+ }
+
+ set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
+ xfs_cui_release(cuip);
+ return error;
+}
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 34b6f7a..5b74ddd 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -96,5 +96,6 @@
struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
+int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip);
#endif /* __XFS_REFCOUNT_ITEM_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c7b9853..e306e83 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -2932,6 +2932,39 @@
DEFINE_AG_EXTENT_EVENT(xfs_refcount_find_shared_result);
DEFINE_AG_ERROR_EVENT(xfs_refcount_find_shared_error);
+TRACE_EVENT(xfs_refcount_finish_one_leftover,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ int type, xfs_agblock_t agbno, xfs_extlen_t len,
+ xfs_agblock_t new_agbno, xfs_extlen_t new_len),
+ TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(int, type)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, len)
+ __field(xfs_agblock_t, new_agbno)
+ __field(xfs_extlen_t, new_len)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->type = type;
+ __entry->agbno = agbno;
+ __entry->len = len;
+ __entry->new_agbno = new_agbno;
+ __entry->new_len = new_len;
+ ),
+ TP_printk("dev %d:%d type %d agno %u agbno %u len %u new_agbno %u new_len %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->type,
+ __entry->agno,
+ __entry->agbno,
+ __entry->len,
+ __entry->new_agbno,
+ __entry->new_len)
+);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index e2bf86a..fe69e20 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -36,6 +36,7 @@
struct xfs_rud_log_item;
struct xfs_rui_log_item;
struct xfs_btree_cur;
+struct xfs_cui_log_item;
typedef struct xfs_log_item {
struct list_head li_ail; /* AIL pointers */
@@ -248,4 +249,14 @@
xfs_fsblock_t startblock, xfs_filblks_t blockcount,
xfs_exntst_t state, struct xfs_btree_cur **pcur);
+/* refcount updates */
+enum xfs_refcount_intent_type;
+
+struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
+ struct xfs_cui_log_item *cuip);
+int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
+ struct xfs_cud_log_item *cudp,
+ enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
+ xfs_extlen_t blockcount, struct xfs_btree_cur **pcur);
+
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
new file mode 100644
index 0000000..b18d548
--- /dev/null
+++ b/fs/xfs/xfs_trans_refcount.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2016 Oracle. All Rights Reserved.
+ *
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_refcount_item.h"
+#include "xfs_alloc.h"
+#include "xfs_refcount.h"
+
+/*
+ * This routine is called to allocate a "refcount update done"
+ * log item.
+ */
+struct xfs_cud_log_item *
+xfs_trans_get_cud(
+ struct xfs_trans *tp,
+ struct xfs_cui_log_item *cuip)
+{
+ struct xfs_cud_log_item *cudp;
+
+ cudp = xfs_cud_init(tp->t_mountp, cuip);
+ xfs_trans_add_item(tp, &cudp->cud_item);
+ return cudp;
+}
+
+/*
+ * Finish an refcount update and log it to the CUD. Note that the
+ * transaction is marked dirty regardless of whether the refcount
+ * update succeeds or fails to support the CUI/CUD lifecycle rules.
+ */
+int
+xfs_trans_log_finish_refcount_update(
+ struct xfs_trans *tp,
+ struct xfs_cud_log_item *cudp,
+ enum xfs_refcount_intent_type type,
+ xfs_fsblock_t startblock,
+ xfs_extlen_t blockcount,
+ struct xfs_btree_cur **pcur)
+{
+ int error;
+
+ /* XXX: leave this empty for now */
+ error = -EFSCORRUPTED;
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the CUI and frees the CUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ cudp->cud_item.li_desc->lid_flags |= XFS_LID_DIRTY;
+
+ return error;
+}