diff mbox series

[v26,01/12] xfs: Fix double unlock in defer capture code

Message ID 20220124052708.580016-2-allison.henderson@oracle.com (mailing list archive)
State Superseded
Headers show
Series xfs: Log Attribute Replay | expand

Commit Message

Allison Henderson Jan. 24, 2022, 5:26 a.m. UTC
The new deferred attr patch set uncovered a double unlock in the
recent port of the defer ops capture and continue code.  During log
recovery, we're allowed to hold buffers to a transaction that's being
used to replay an intent item.  When we capture the resources as part
of scheduling a continuation of an intent chain, we call xfs_buf_hold
to retain our reference to the buffer beyond the transaction commit,
but we do /not/ call xfs_trans_bhold to maintain the buffer lock.
This means that xfs_defer_ops_continue needs to relock the buffers
before xfs_defer_restore_resources joins then tothe new transaction.

Additionally, the buffers should not be passed back via the dres
structure since they need to remain locked unlike the inodes.  So
simply set dr_bufs to zero after populating the dres structure.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
---
 fs/xfs/libxfs/xfs_defer.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Comments

Chandan Babu R Jan. 27, 2022, 5:38 a.m. UTC | #1
On 24 Jan 2022 at 10:56, Allison Henderson wrote:
> The new deferred attr patch set uncovered a double unlock in the
> recent port of the defer ops capture and continue code.  During log
> recovery, we're allowed to hold buffers to a transaction that's being
> used to replay an intent item.  When we capture the resources as part
> of scheduling a continuation of an intent chain, we call xfs_buf_hold
> to retain our reference to the buffer beyond the transaction commit,
> but we do /not/ call xfs_trans_bhold to maintain the buffer lock.

As part of recovering an intent item, xfs_defer_ops_capture_and_commit()
invokes xfs_defer_save_resources(). Here we save/capture those xfs_bufs which
have XFS_BLI_HOLD flag set. AFAICT, these xfs_bufs are already locked. When
the transaction is committed to the CIL, iop_committing()
(i.e. xfs_buf_item_committing()) routine is invoked. Here we refrain from
unlocking an xfs_buf if XFS_BLI_HOLD flag is set. Hence the xfs_buf continues
to be in locked state.

Later, When processing the captured list (via xlog_finish_defer_ops()),
wouldn't locking the same xfs_buf by xfs_defer_ops_continue() cause a
deadlock?

> This means that xfs_defer_ops_continue needs to relock the buffers
> before xfs_defer_restore_resources joins then tothe new transaction.
>
> Additionally, the buffers should not be passed back via the dres
> structure since they need to remain locked unlike the inodes.  So
> simply set dr_bufs to zero after populating the dres structure.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
> ---
>  fs/xfs/libxfs/xfs_defer.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
> index 0805ade2d300..6dac8d6b8c21 100644
> --- a/fs/xfs/libxfs/xfs_defer.c
> +++ b/fs/xfs/libxfs/xfs_defer.c
> @@ -22,6 +22,7 @@
>  #include "xfs_refcount.h"
>  #include "xfs_bmap.h"
>  #include "xfs_alloc.h"
> +#include "xfs_buf.h"
>  
>  static struct kmem_cache	*xfs_defer_pending_cache;
>  
> @@ -774,17 +775,25 @@ xfs_defer_ops_continue(
>  	struct xfs_trans		*tp,
>  	struct xfs_defer_resources	*dres)
>  {
> +	unsigned int			i;
> +
>  	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
>  	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
>  
> -	/* Lock and join the captured inode to the new transaction. */
> +	/* Lock the captured resources to the new transaction. */
>  	if (dfc->dfc_held.dr_inos == 2)
>  		xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
>  				    dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
>  	else if (dfc->dfc_held.dr_inos == 1)
>  		xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
> +
> +	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
> +		xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
> +
> +	/* Join the captured resources to the new transaction. */
>  	xfs_defer_restore_resources(tp, &dfc->dfc_held);
>  	memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
> +	dres->dr_bufs = 0;
>  
>  	/* Move captured dfops chain and state to the transaction. */
>  	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
Allison Henderson Jan. 27, 2022, 10:54 p.m. UTC | #2
On 1/26/22 10:38 PM, Chandan Babu R wrote:
> On 24 Jan 2022 at 10:56, Allison Henderson wrote:
>> The new deferred attr patch set uncovered a double unlock in the
>> recent port of the defer ops capture and continue code.  During log
>> recovery, we're allowed to hold buffers to a transaction that's being
>> used to replay an intent item.  When we capture the resources as part
>> of scheduling a continuation of an intent chain, we call xfs_buf_hold
>> to retain our reference to the buffer beyond the transaction commit,
>> but we do /not/ call xfs_trans_bhold to maintain the buffer lock.
> 
> As part of recovering an intent item, xfs_defer_ops_capture_and_commit()
> invokes xfs_defer_save_resources(). Here we save/capture those xfs_bufs which
> have XFS_BLI_HOLD flag set. AFAICT, these xfs_bufs are already locked. When
> the transaction is committed to the CIL, iop_committing()
> (i.e. xfs_buf_item_committing()) routine is invoked. Here we refrain from
> unlocking an xfs_buf if XFS_BLI_HOLD flag is set. Hence the xfs_buf continues
> to be in locked state.
> 
> Later, When processing the captured list (via xlog_finish_defer_ops()),
> wouldn't locking the same xfs_buf by xfs_defer_ops_continue() cause a
> deadlock?

Well, currently the attr code may take the lock at some point during the 
operation and then lets it go later when it no longer needs it.  So that 
is where the corresponding unlock comes from.  Ideally, the delay replay 
and the log replay should behave the same so that the underlying 
operation doesn't need to know about it, or do anything different.  I 
think the attr operation is the first to use this lock hold over during 
a journal replay though, so I suspect there just wasn't very much 
testing to exercise it when the defer capture port went in.  It comes up 
pretty quickly with the new log attribute replay test though.

If it helps to see it, it's easy to reproduce:
Build/install both kernel and user space branches, as well as the test cases
https://github.com/allisonhenderson/xfs_work/tree/delayed_attrs_v26_extended
https://github.com/allisonhenderson/xfs_work/tree/delayed_attrs_xfsprogs_v26_extended
https://github.com/allisonhenderson/xfs_work/tree/pptr_xfstestsv5

Turn on the log attr replay feature:
echo 1 > /sys/fs/xfs/debug/larp

Run new journal replay test
./check xfs/542

Test should pass as it is.  Reverse apply this patch to see the bug.

Hope this helps!
Allison

> 
>> This means that xfs_defer_ops_continue needs to relock the buffers
>> before xfs_defer_restore_resources joins then tothe new transaction.
>>
>> Additionally, the buffers should not be passed back via the dres
>> structure since they need to remain locked unlike the inodes.  So
>> simply set dr_bufs to zero after populating the dres structure.
>>
>> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
>> Signed-off-by: Allison Henderson <allison.henderson@oracle.com>
>> ---
>>   fs/xfs/libxfs/xfs_defer.c | 11 ++++++++++-
>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
>> index 0805ade2d300..6dac8d6b8c21 100644
>> --- a/fs/xfs/libxfs/xfs_defer.c
>> +++ b/fs/xfs/libxfs/xfs_defer.c
>> @@ -22,6 +22,7 @@
>>   #include "xfs_refcount.h"
>>   #include "xfs_bmap.h"
>>   #include "xfs_alloc.h"
>> +#include "xfs_buf.h"
>>   
>>   static struct kmem_cache	*xfs_defer_pending_cache;
>>   
>> @@ -774,17 +775,25 @@ xfs_defer_ops_continue(
>>   	struct xfs_trans		*tp,
>>   	struct xfs_defer_resources	*dres)
>>   {
>> +	unsigned int			i;
>> +
>>   	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
>>   	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
>>   
>> -	/* Lock and join the captured inode to the new transaction. */
>> +	/* Lock the captured resources to the new transaction. */
>>   	if (dfc->dfc_held.dr_inos == 2)
>>   		xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
>>   				    dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
>>   	else if (dfc->dfc_held.dr_inos == 1)
>>   		xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
>> +
>> +	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
>> +		xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
>> +
>> +	/* Join the captured resources to the new transaction. */
>>   	xfs_defer_restore_resources(tp, &dfc->dfc_held);
>>   	memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
>> +	dres->dr_bufs = 0;
>>   
>>   	/* Move captured dfops chain and state to the transaction. */
>>   	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
> 
>
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 0805ade2d300..6dac8d6b8c21 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -22,6 +22,7 @@ 
 #include "xfs_refcount.h"
 #include "xfs_bmap.h"
 #include "xfs_alloc.h"
+#include "xfs_buf.h"
 
 static struct kmem_cache	*xfs_defer_pending_cache;
 
@@ -774,17 +775,25 @@  xfs_defer_ops_continue(
 	struct xfs_trans		*tp,
 	struct xfs_defer_resources	*dres)
 {
+	unsigned int			i;
+
 	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
 	ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
 
-	/* Lock and join the captured inode to the new transaction. */
+	/* Lock the captured resources to the new transaction. */
 	if (dfc->dfc_held.dr_inos == 2)
 		xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
 				    dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
 	else if (dfc->dfc_held.dr_inos == 1)
 		xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
+
+	for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
+		xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
+
+	/* Join the captured resources to the new transaction. */
 	xfs_defer_restore_resources(tp, &dfc->dfc_held);
 	memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
+	dres->dr_bufs = 0;
 
 	/* Move captured dfops chain and state to the transaction. */
 	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);