diff mbox series

[PATCHC,RFC,V2,1/1] ocfs2: fix a deadlock when commit trans

Message ID 1642682627-12041-1-git-send-email-gautham.ananthakrishna@oracle.com (mailing list archive)
State New, archived
Headers show
Series [PATCHC,RFC,V2,1/1] ocfs2: fix a deadlock when commit trans | expand

Commit Message

Gautham Ananthakrishna Jan. 20, 2022, 12:43 p.m. UTC
commit 6f1b228529ae49b0f85ab89bcdb6c365df401558 caused a deadlock
which was uncovered by our internal tests. The deadlock is as foollows:

Task 1:
A1) jbd2_journal_commit_transaction()
A2)         spin_lock(&jh->b_state_lock);
A3)         __jbd2_journal_remove_checkpoint()
A4)                         jbd2_journal_put_journal_head()
A5)                                         jbd_lock_bh_journal_head()

Task 2:
B1) ocfs2_test_bg_bit_allocatable()
B2)         jbd_lock_bh_journal_head()
B3)         spin_lock(&jh->b_state_lock);

A1->A2->A3->A4->B1->B2->B3(blocked)->A5(blocked)

Now cause process 2 has the jbd lock, it doesn’t let process 1 to continue after A5.
Process 2 now is waiting for b_state_lock to be released by process 1.

This patch resolves the deadlock.

Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
---
 fs/ocfs2/suballoc.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

Comments

Joseph Qi Jan. 21, 2022, 1:43 a.m. UTC | #1
Seems you still sent a patch that couldn't build.
Have you checked my previous comments and tried my suggested way?

Thanks,
Joseph

On 1/20/22 8:43 PM, Gautham Ananthakrishna wrote:
> commit 6f1b228529ae49b0f85ab89bcdb6c365df401558 caused a deadlock
> which was uncovered by our internal tests. The deadlock is as foollows:
> 
> Task 1:
> A1) jbd2_journal_commit_transaction()
> A2)         spin_lock(&jh->b_state_lock);
> A3)         __jbd2_journal_remove_checkpoint()
> A4)                         jbd2_journal_put_journal_head()
> A5)                                         jbd_lock_bh_journal_head()
> 
> Task 2:
> B1) ocfs2_test_bg_bit_allocatable()
> B2)         jbd_lock_bh_journal_head()
> B3)         spin_lock(&jh->b_state_lock);
> 
> A1->A2->A3->A4->B1->B2->B3(blocked)->A5(blocked)
> 
> Now cause process 2 has the jbd lock, it doesn’t let process 1 to continue after A5.
> Process 2 now is waiting for b_state_lock to be released by process 1.
> 
> This patch resolves the deadlock.
> 
> Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com>
> ---
>  fs/ocfs2/suballoc.c | 7 +++----
>  1 file changed, 3 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 481017e..a618970 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -1259,9 +1259,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
>  	if (!buffer_jbd(bg_bh))
>  		return 1;
>  
> -	jbd_lock_bh_journal_head(bg_bh);
> -	if (buffer_jbd(bg_bh)) {
> -		jh = bh2jh(bg_bh);
> +	jh = jbd2_journal_grab_journal_head(bg_bh);
> +	if (jh) {
>  		spin_lock(&jh->b_state_lock);
>  		bg = (struct ocfs2_group_desc *) jh->b_committed_data;
>  		if (bg)
> @@ -1269,8 +1268,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
>  		else
>  			ret = 1;
>  		spin_unlock(&jh->b_state_lock);
> +		jbd2_journal_put_journal_head(bg_bh);
>  	}
> -	jbd_unlock_bh_journal_head(bg_bh);
>  
>  	return ret;
>  }
diff mbox series

Patch

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 481017e..a618970 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1259,9 +1259,8 @@  static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 	if (!buffer_jbd(bg_bh))
 		return 1;
 
-	jbd_lock_bh_journal_head(bg_bh);
-	if (buffer_jbd(bg_bh)) {
-		jh = bh2jh(bg_bh);
+	jh = jbd2_journal_grab_journal_head(bg_bh);
+	if (jh) {
 		spin_lock(&jh->b_state_lock);
 		bg = (struct ocfs2_group_desc *) jh->b_committed_data;
 		if (bg)
@@ -1269,8 +1268,8 @@  static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 		else
 			ret = 1;
 		spin_unlock(&jh->b_state_lock);
+		jbd2_journal_put_journal_head(bg_bh);
 	}
-	jbd_unlock_bh_journal_head(bg_bh);
 
 	return ret;
 }