summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_exchmaps_item.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_exchmaps_item.c')
-rw-r--r--fs/xfs/xfs_exchmaps_item.c614
1 files changed, 614 insertions, 0 deletions
diff --git a/fs/xfs/xfs_exchmaps_item.c b/fs/xfs/xfs_exchmaps_item.c
new file mode 100644
index 000000000000..264a121c5e16
--- /dev/null
+++ b/fs/xfs/xfs_exchmaps_item.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_shared.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_exchmaps_item.h"
+#include "xfs_exchmaps.h"
+#include "xfs_log.h"
+#include "xfs_bmap.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+#include "xfs_exchrange.h"
+#include "xfs_trace.h"
+
+struct kmem_cache *xfs_xmi_cache;
+struct kmem_cache *xfs_xmd_cache;
+
+static const struct xfs_item_ops xfs_xmi_item_ops;
+
+static inline struct xfs_xmi_log_item *XMI_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_xmi_log_item, xmi_item);
+}
+
+STATIC void
+xfs_xmi_item_free(
+ struct xfs_xmi_log_item *xmi_lip)
+{
+ kvfree(xmi_lip->xmi_item.li_lv_shadow);
+ kmem_cache_free(xfs_xmi_cache, xmi_lip);
+}
+
+/*
+ * Freeing the XMI requires that we remove it from the AIL if it has already
+ * been placed there. However, the XMI may not yet have been placed in the AIL
+ * when called by xfs_xmi_release() from XMD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the XMI.
+ */
+STATIC void
+xfs_xmi_release(
+ struct xfs_xmi_log_item *xmi_lip)
+{
+ ASSERT(atomic_read(&xmi_lip->xmi_refcount) > 0);
+ if (atomic_dec_and_test(&xmi_lip->xmi_refcount)) {
+ xfs_trans_ail_delete(&xmi_lip->xmi_item, 0);
+ xfs_xmi_item_free(xmi_lip);
+ }
+}
+
+
+STATIC void
+xfs_xmi_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ *nvecs += 1;
+ *nbytes += sizeof(struct xfs_xmi_log_format);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the given xmi log
+ * item. We use only 1 iovec, and we point that at the xmi_log_format structure
+ * embedded in the xmi item.
+ */
+STATIC void
+xfs_xmi_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ xmi_lip->xmi_format.xmi_type = XFS_LI_XMI;
+ xmi_lip->xmi_format.xmi_size = 1;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMI_FORMAT,
+ &xmi_lip->xmi_format,
+ sizeof(struct xfs_xmi_log_format));
+}
+
+/*
+ * The unpin operation is the last place an XMI is manipulated in the log. It
+ * is either inserted in the AIL or aborted in the event of a log I/O error. In
+ * either case, the XMI transaction has been successfully committed to make it
+ * this far. Therefore, we expect whoever committed the XMI to either construct
+ * and commit the XMD or drop the XMD's reference in the event of error. Simply
+ * drop the log's XMI reference now that the log is done with it.
+ */
+STATIC void
+xfs_xmi_item_unpin(
+ struct xfs_log_item *lip,
+ int remove)
+{
+ struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
+
+ xfs_xmi_release(xmi_lip);
+}
+
+/*
+ * The XMI has been either committed or aborted if the transaction has been
+ * cancelled. If the transaction was cancelled, an XMD isn't going to be
+ * constructed and thus we free the XMI here directly.
+ */
+STATIC void
+xfs_xmi_item_release(
+ struct xfs_log_item *lip)
+{
+ xfs_xmi_release(XMI_ITEM(lip));
+}
+
+/* Allocate and initialize an xmi item. */
+STATIC struct xfs_xmi_log_item *
+xfs_xmi_init(
+ struct xfs_mount *mp)
+
+{
+ struct xfs_xmi_log_item *xmi_lip;
+
+ xmi_lip = kmem_cache_zalloc(xfs_xmi_cache, GFP_KERNEL | __GFP_NOFAIL);
+
+ xfs_log_item_init(mp, &xmi_lip->xmi_item, XFS_LI_XMI, &xfs_xmi_item_ops);
+ xmi_lip->xmi_format.xmi_id = (uintptr_t)(void *)xmi_lip;
+ atomic_set(&xmi_lip->xmi_refcount, 2);
+
+ return xmi_lip;
+}
+
+static inline struct xfs_xmd_log_item *XMD_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_xmd_log_item, xmd_item);
+}
+
+STATIC void
+xfs_xmd_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ *nvecs += 1;
+ *nbytes += sizeof(struct xfs_xmd_log_format);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the given xmd log
+ * item. We use only 1 iovec, and we point that at the xmd_log_format structure
+ * embedded in the xmd item.
+ */
+STATIC void
+xfs_xmd_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ xmd_lip->xmd_format.xmd_type = XFS_LI_XMD;
+ xmd_lip->xmd_format.xmd_size = 1;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_XMD_FORMAT, &xmd_lip->xmd_format,
+ sizeof(struct xfs_xmd_log_format));
+}
+
+/*
+ * The XMD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the XMI and free the
+ * XMD.
+ */
+STATIC void
+xfs_xmd_item_release(
+ struct xfs_log_item *lip)
+{
+ struct xfs_xmd_log_item *xmd_lip = XMD_ITEM(lip);
+
+ xfs_xmi_release(xmd_lip->xmd_intent_log_item);
+ kvfree(xmd_lip->xmd_item.li_lv_shadow);
+ kmem_cache_free(xfs_xmd_cache, xmd_lip);
+}
+
+static struct xfs_log_item *
+xfs_xmd_item_intent(
+ struct xfs_log_item *lip)
+{
+ return &XMD_ITEM(lip)->xmd_intent_log_item->xmi_item;
+}
+
+static const struct xfs_item_ops xfs_xmd_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
+ XFS_ITEM_INTENT_DONE,
+ .iop_size = xfs_xmd_item_size,
+ .iop_format = xfs_xmd_item_format,
+ .iop_release = xfs_xmd_item_release,
+ .iop_intent = xfs_xmd_item_intent,
+};
+
+/* Log file mapping exchange information in the intent item. */
+STATIC struct xfs_log_item *
+xfs_exchmaps_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ struct xfs_xmi_log_item *xmi_lip;
+ struct xfs_exchmaps_intent *xmi;
+ struct xfs_xmi_log_format *xlf;
+
+ ASSERT(count == 1);
+
+ xmi = list_first_entry_or_null(items, struct xfs_exchmaps_intent,
+ xmi_list);
+
+ xmi_lip = xfs_xmi_init(tp->t_mountp);
+ xlf = &xmi_lip->xmi_format;
+
+ xlf->xmi_inode1 = xmi->xmi_ip1->i_ino;
+ xlf->xmi_igen1 = VFS_I(xmi->xmi_ip1)->i_generation;
+ xlf->xmi_inode2 = xmi->xmi_ip2->i_ino;
+ xlf->xmi_igen2 = VFS_I(xmi->xmi_ip2)->i_generation;
+ xlf->xmi_startoff1 = xmi->xmi_startoff1;
+ xlf->xmi_startoff2 = xmi->xmi_startoff2;
+ xlf->xmi_blockcount = xmi->xmi_blockcount;
+ xlf->xmi_isize1 = xmi->xmi_isize1;
+ xlf->xmi_isize2 = xmi->xmi_isize2;
+ xlf->xmi_flags = xmi->xmi_flags & XFS_EXCHMAPS_LOGGED_FLAGS;
+
+ return &xmi_lip->xmi_item;
+}
+
+STATIC struct xfs_log_item *
+xfs_exchmaps_create_done(
+ struct xfs_trans *tp,
+ struct xfs_log_item *intent,
+ unsigned int count)
+{
+ struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(intent);
+ struct xfs_xmd_log_item *xmd_lip;
+
+ xmd_lip = kmem_cache_zalloc(xfs_xmd_cache, GFP_KERNEL | __GFP_NOFAIL);
+ xfs_log_item_init(tp->t_mountp, &xmd_lip->xmd_item, XFS_LI_XMD,
+ &xfs_xmd_item_ops);
+ xmd_lip->xmd_intent_log_item = xmi_lip;
+ xmd_lip->xmd_format.xmd_xmi_id = xmi_lip->xmi_format.xmi_id;
+
+ return &xmd_lip->xmd_item;
+}
+
+/* Add this deferred XMI to the transaction. */
+void
+xfs_exchmaps_defer_add(
+ struct xfs_trans *tp,
+ struct xfs_exchmaps_intent *xmi)
+{
+ trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
+
+ xfs_defer_add(tp, &xmi->xmi_list, &xfs_exchmaps_defer_type);
+}
+
+static inline struct xfs_exchmaps_intent *xmi_entry(const struct list_head *e)
+{
+ return list_entry(e, struct xfs_exchmaps_intent, xmi_list);
+}
+
+/* Cancel a deferred file mapping exchange. */
+STATIC void
+xfs_exchmaps_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_exchmaps_intent *xmi = xmi_entry(item);
+
+ kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
+}
+
+/* Process a deferred file mapping exchange. */
+STATIC int
+xfs_exchmaps_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_exchmaps_intent *xmi = xmi_entry(item);
+ int error;
+
+ /*
+ * Exchange one more mappings between two files. If there's still more
+ * work to do, we want to requeue ourselves after all other pending
+ * deferred operations have finished. This includes all of the dfops
+ * that we queued directly as well as any new ones created in the
+ * process of finishing the others. Doing so prevents us from queuing
+ * a large number of XMI log items in kernel memory, which in turn
+ * prevents us from pinning the tail of the log (while logging those
+ * new XMI items) until the first XMI items can be processed.
+ */
+ error = xfs_exchmaps_finish_one(tp, xmi);
+ if (error != -EAGAIN)
+ xfs_exchmaps_cancel_item(item);
+ return error;
+}
+
+/* Abort all pending XMIs. */
+STATIC void
+xfs_exchmaps_abort_intent(
+ struct xfs_log_item *intent)
+{
+ xfs_xmi_release(XMI_ITEM(intent));
+}
+
+/* Is this recovered XMI ok? */
+static inline bool
+xfs_xmi_validate(
+ struct xfs_mount *mp,
+ struct xfs_xmi_log_item *xmi_lip)
+{
+ struct xfs_xmi_log_format *xlf = &xmi_lip->xmi_format;
+
+ if (!xfs_has_exchange_range(mp))
+ return false;
+
+ if (xmi_lip->xmi_format.__pad != 0)
+ return false;
+
+ if (xlf->xmi_flags & ~XFS_EXCHMAPS_LOGGED_FLAGS)
+ return false;
+
+ if (!xfs_verify_ino(mp, xlf->xmi_inode1) ||
+ !xfs_verify_ino(mp, xlf->xmi_inode2))
+ return false;
+
+ if (!xfs_verify_fileext(mp, xlf->xmi_startoff1, xlf->xmi_blockcount))
+ return false;
+
+ return xfs_verify_fileext(mp, xlf->xmi_startoff2, xlf->xmi_blockcount);
+}
+
+/*
+ * Use the recovered log state to create a new request, estimate resource
+ * requirements, and create a new incore intent state.
+ */
+STATIC struct xfs_exchmaps_intent *
+xfs_xmi_item_recover_intent(
+ struct xfs_mount *mp,
+ struct xfs_defer_pending *dfp,
+ const struct xfs_xmi_log_format *xlf,
+ struct xfs_exchmaps_req *req,
+ struct xfs_inode **ipp1,
+ struct xfs_inode **ipp2)
+{
+ struct xfs_inode *ip1, *ip2;
+ struct xfs_exchmaps_intent *xmi;
+ int error;
+
+ /*
+ * Grab both inodes and set IRECOVERY to prevent trimming of post-eof
+ * mappings and freeing of unlinked inodes until we're totally done
+ * processing files. The ondisk format of this new log item contains
+ * file handle information, which is why recovery for other items do
+ * not check the inode generation number.
+ */
+ error = xlog_recover_iget_handle(mp, xlf->xmi_inode1, xlf->xmi_igen1,
+ &ip1);
+ if (error) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf,
+ sizeof(*xlf));
+ return ERR_PTR(error);
+ }
+
+ error = xlog_recover_iget_handle(mp, xlf->xmi_inode2, xlf->xmi_igen2,
+ &ip2);
+ if (error) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, xlf,
+ sizeof(*xlf));
+ goto err_rele1;
+ }
+
+ req->ip1 = ip1;
+ req->ip2 = ip2;
+ req->startoff1 = xlf->xmi_startoff1;
+ req->startoff2 = xlf->xmi_startoff2;
+ req->blockcount = xlf->xmi_blockcount;
+ req->flags = xlf->xmi_flags & XFS_EXCHMAPS_PARAMS;
+
+ xfs_exchrange_ilock(NULL, ip1, ip2);
+ error = xfs_exchmaps_estimate(req);
+ xfs_exchrange_iunlock(ip1, ip2);
+ if (error)
+ goto err_rele2;
+
+ *ipp1 = ip1;
+ *ipp2 = ip2;
+ xmi = xfs_exchmaps_init_intent(req);
+ xfs_defer_add_item(dfp, &xmi->xmi_list);
+ return xmi;
+
+err_rele2:
+ xfs_irele(ip2);
+err_rele1:
+ xfs_irele(ip1);
+ req->ip2 = req->ip1 = NULL;
+ return ERR_PTR(error);
+}
+
+/* Process a file mapping exchange item that was recovered from the log. */
+STATIC int
+xfs_exchmaps_recover_work(
+ struct xfs_defer_pending *dfp,
+ struct list_head *capture_list)
+{
+ struct xfs_exchmaps_req req = { .flags = 0 };
+ struct xfs_trans_res resv;
+ struct xfs_exchmaps_intent *xmi;
+ struct xfs_log_item *lip = dfp->dfp_intent;
+ struct xfs_xmi_log_item *xmi_lip = XMI_ITEM(lip);
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_trans *tp;
+ struct xfs_inode *ip1, *ip2;
+ int error = 0;
+
+ if (!xfs_xmi_validate(mp, xmi_lip)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &xmi_lip->xmi_format,
+ sizeof(xmi_lip->xmi_format));
+ return -EFSCORRUPTED;
+ }
+
+ xmi = xfs_xmi_item_recover_intent(mp, dfp, &xmi_lip->xmi_format, &req,
+ &ip1, &ip2);
+ if (IS_ERR(xmi))
+ return PTR_ERR(xmi);
+
+ trace_xfs_exchmaps_recover(mp, xmi);
+
+ resv = xlog_recover_resv(&M_RES(mp)->tr_write);
+ error = xfs_trans_alloc(mp, &resv, req.resblks, 0, 0, &tp);
+ if (error)
+ goto err_rele;
+
+ xfs_exchrange_ilock(tp, ip1, ip2);
+
+ xfs_exchmaps_ensure_reflink(tp, xmi);
+ xfs_exchmaps_upgrade_extent_counts(tp, xmi);
+ error = xlog_recover_finish_intent(tp, dfp);
+ if (error == -EFSCORRUPTED)
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ &xmi_lip->xmi_format,
+ sizeof(xmi_lip->xmi_format));
+ if (error)
+ goto err_cancel;
+
+ /*
+ * Commit transaction, which frees the transaction and saves the inodes
+ * for later replay activities.
+ */
+ error = xfs_defer_ops_capture_and_commit(tp, capture_list);
+ goto err_unlock;
+
+err_cancel:
+ xfs_trans_cancel(tp);
+err_unlock:
+ xfs_exchrange_iunlock(ip1, ip2);
+err_rele:
+ xfs_irele(ip2);
+ xfs_irele(ip1);
+ return error;
+}
+
+/* Relog an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_exchmaps_relog_intent(
+ struct xfs_trans *tp,
+ struct xfs_log_item *intent,
+ struct xfs_log_item *done_item)
+{
+ struct xfs_xmi_log_item *xmi_lip;
+ struct xfs_xmi_log_format *old_xlf, *new_xlf;
+
+ old_xlf = &XMI_ITEM(intent)->xmi_format;
+
+ xmi_lip = xfs_xmi_init(tp->t_mountp);
+ new_xlf = &xmi_lip->xmi_format;
+
+ new_xlf->xmi_inode1 = old_xlf->xmi_inode1;
+ new_xlf->xmi_inode2 = old_xlf->xmi_inode2;
+ new_xlf->xmi_igen1 = old_xlf->xmi_igen1;
+ new_xlf->xmi_igen2 = old_xlf->xmi_igen2;
+ new_xlf->xmi_startoff1 = old_xlf->xmi_startoff1;
+ new_xlf->xmi_startoff2 = old_xlf->xmi_startoff2;
+ new_xlf->xmi_blockcount = old_xlf->xmi_blockcount;
+ new_xlf->xmi_flags = old_xlf->xmi_flags;
+ new_xlf->xmi_isize1 = old_xlf->xmi_isize1;
+ new_xlf->xmi_isize2 = old_xlf->xmi_isize2;
+
+ return &xmi_lip->xmi_item;
+}
+
+const struct xfs_defer_op_type xfs_exchmaps_defer_type = {
+ .name = "exchmaps",
+ .max_items = 1,
+ .create_intent = xfs_exchmaps_create_intent,
+ .abort_intent = xfs_exchmaps_abort_intent,
+ .create_done = xfs_exchmaps_create_done,
+ .finish_item = xfs_exchmaps_finish_item,
+ .cancel_item = xfs_exchmaps_cancel_item,
+ .recover_work = xfs_exchmaps_recover_work,
+ .relog_intent = xfs_exchmaps_relog_intent,
+};
+
+STATIC bool
+xfs_xmi_item_match(
+ struct xfs_log_item *lip,
+ uint64_t intent_id)
+{
+ return XMI_ITEM(lip)->xmi_format.xmi_id == intent_id;
+}
+
+static const struct xfs_item_ops xfs_xmi_item_ops = {
+ .flags = XFS_ITEM_INTENT,
+ .iop_size = xfs_xmi_item_size,
+ .iop_format = xfs_xmi_item_format,
+ .iop_unpin = xfs_xmi_item_unpin,
+ .iop_release = xfs_xmi_item_release,
+ .iop_match = xfs_xmi_item_match,
+};
+
+/*
+ * This routine is called to create an in-core file mapping exchange item from
+ * the xmi format structure which was logged on disk. It allocates an in-core
+ * xmi, copies the exchange information from the format structure into it, and
+ * adds the xmi to the AIL with the given LSN.
+ */
+STATIC int
+xlog_recover_xmi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_xmi_log_item *xmi_lip;
+ struct xfs_xmi_log_format *xmi_formatp;
+ size_t len;
+
+ len = sizeof(struct xfs_xmi_log_format);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ return -EFSCORRUPTED;
+ }
+
+ xmi_formatp = item->ri_buf[0].i_addr;
+ if (xmi_formatp->__pad != 0) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ return -EFSCORRUPTED;
+ }
+
+ xmi_lip = xfs_xmi_init(mp);
+ memcpy(&xmi_lip->xmi_format, xmi_formatp, len);
+
+ xlog_recover_intent_item(log, &xmi_lip->xmi_item, lsn,
+ &xfs_exchmaps_defer_type);
+ return 0;
+}
+
+const struct xlog_recover_item_ops xlog_xmi_item_ops = {
+ .item_type = XFS_LI_XMI,
+ .commit_pass2 = xlog_recover_xmi_commit_pass2,
+};
+
+/*
+ * This routine is called when an XMD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding XMI if it
+ * was still in the log. To do this it searches the AIL for the XMI with an id
+ * equal to that in the XMD format structure. If we find it we drop the XMD
+ * reference, which removes the XMI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_xmd_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_xmd_log_format *xmd_formatp;
+
+ xmd_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_xmd_log_format)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_XMI, xmd_formatp->xmd_xmi_id);
+ return 0;
+}
+
+const struct xlog_recover_item_ops xlog_xmd_item_ops = {
+ .item_type = XFS_LI_XMD,
+ .commit_pass2 = xlog_recover_xmd_commit_pass2,
+};