@@ -3932,6 +3932,20 @@ xfs_bmap_btalloc(
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
XFS_TRANS_DQ_BCOUNT,
(long) args.len);
+
+ /*
+ * If we are going to update the rmap btree, we need to join the
+ * AGF used for this allocation to the defer ops processing so
+ * that it stays locked until we've processed all the relevant
+ * btree updates that are required.
+ */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+ ASSERT(args.agbp);
+ error = xfs_defer_join_buf(ap->tp, ap->dfops,
+ args.agbp);
+ if (error)
+ return error;
+ }
} else {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
@@ -271,6 +271,13 @@ xfs_defer_trans_roll(
xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
}
+ /* Rejoin the held buffers */
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
+ ASSERT(dop->dop_bufs[i]->b_transp == NULL);
+ xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
+ xfs_trans_bhold(*tp, dop->dop_bufs[i]);
+ }
+
return error;
}
@@ -297,7 +304,7 @@ xfs_defer_join(
for (i = 0; i < XFS_DEFER_OPS_NR_INODES; i++) {
if (dop->dop_inodes[i] == ip)
return 0;
- else if (dop->dop_inodes[i] == NULL) {
+ if (!dop->dop_inodes[i]) {
dop->dop_inodes[i] = ip;
return 0;
}
@@ -307,6 +314,60 @@ xfs_defer_join(
}
/*
+ * Add this buffer to the deferred op. If this is a new buffer we are joining
+ * to the deferred ops, we need to ensure it will be held locked across
+ * transaction commits. This means each joined buffer remains locked until
+ * the final defer op commits and we clean up the xfs_defer_ops structure. This
+ * ensures atomicity of access to buffers and their state while we perform
+ * multiple operations to them through deferred ops processing.
+ */
+int
+xfs_defer_join_buf(
+ struct xfs_trans *tp,
+ struct xfs_defer_ops *dop,
+ struct xfs_buf *bp)
+{
+ int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+ if (dop->dop_bufs[i] == bp)
+ return 0;
+ if (!dop->dop_bufs[i]) {
+ xfs_trans_bhold(tp, bp);
+ dop->dop_bufs[i] = bp;
+ return 0;
+ }
+ }
+
+ return -EFSCORRUPTED;
+}
+
+/*
+ * When we are all done with the defer processing, the buffers we hold
+ * will still be locked. We need to unlock and release them now.
+ *
+ * We can get called in from a context that doesn't pass a transaction,
+ * but the buffers are still attached to a transaction. If this happens,
+ * pull the transaction from the buffer. If the buffer is not part of a
+ * transaction, then b_transp will be null and the buffer will be released
+ * normally.
+ */
+static void
+xfs_defer_brelse(
+ struct xfs_trans *tp,
+ struct xfs_defer_ops *dop)
+{
+ int i;
+
+ for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
+ if (!tp)
+ tp = dop->dop_bufs[i]->b_transp;
+ if (dop->dop_bufs[i])
+ xfs_trans_bhold_release(tp, dop->dop_bufs[i]);
+ }
+}
+
+/*
* Finish all the pending work. This involves logging intent items for
* any work items that wandered in since the last transaction roll (if
* one has even happened), rolling the transaction, and finishing the
@@ -402,6 +463,7 @@ xfs_defer_finish(
}
out:
+ xfs_defer_brelse(*tp, dop);
if (error)
trace_xfs_defer_finish_error((*tp)->t_mountp, dop, error);
else
@@ -449,6 +511,7 @@ xfs_defer_cancel(
ASSERT(dfp->dfp_count == 0);
kmem_free(dfp);
}
+ xfs_defer_brelse(NULL, dop);
}
/* Add an item for later deferred processing. */
@@ -502,9 +565,7 @@ xfs_defer_init(
struct xfs_defer_ops *dop,
xfs_fsblock_t *fbp)
{
- dop->dop_committed = false;
- dop->dop_low = false;
- memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
+ memset(dop, 0, sizeof(*dop));
*fbp = NULLFSBLOCK;
INIT_LIST_HEAD(&dop->dop_intake);
INIT_LIST_HEAD(&dop->dop_pending);
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
};
#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
+#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
struct xfs_defer_ops {
bool dop_committed; /* did any trans commit? */
@@ -68,6 +69,9 @@ struct xfs_defer_ops {
/* relog these inodes with each roll */
struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
+
+ /* hold these buffers with each roll */
+ struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
};
void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -78,6 +82,8 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
int xfs_defer_join(struct xfs_defer_ops *dop, struct xfs_inode *ip);
+int xfs_defer_join_buf(struct xfs_trans *tp, struct xfs_defer_ops *dop,
+ struct xfs_buf *bp);
/* Description of a deferred type. */
struct xfs_defer_op_type {
@@ -66,6 +66,12 @@ xfs_trans_buf_item_match(
* Add the locked buffer to the transaction.
*
* The buffer must be locked, and it cannot be associated with any
+ * transaction other than the one we pass in. This "join" recursion
+ * is a result of needing to hold buffers locked across multiple transactions in
+ * the defer ops processing. To hold the buffer when rolling the transaction we
+ * need to first join the buffer to transaction, and hence when this gets done
+ * in the normal course of the transaction we then get called a second time.
+ * Hence just do nothing if bp->b_transp already matches the incoming
* transaction.
*
* If the buffer does not yet have a buf log item associated with it,
@@ -79,7 +85,11 @@ _xfs_trans_bjoin(
{
struct xfs_buf_log_item *bip;
- ASSERT(bp->b_transp == NULL);
+ ASSERT(!bp->b_transp || bp->b_transp == tp);
+
+ /* already joined to this transaction from defer ops? */
+ if (bp->b_transp == tp)
+ return;
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If