Message ID | 1642673242-1522-1-git-send-email-gautham.ananthakrishna@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [RFC,V1,1/1] ocfs2: fix a deadlock caused by commit 6f1b228529ae49b0 | expand |
Suggest change the patch title to: ocfs2: fix a deadlock when commit trans On 1/20/22 6:07 PM, Gautham Ananthakrishna via Ocfs2-devel wrote: > commit 6f1b228529ae49b0f85ab89bcdb6c365df401558 caused a deadlock > which was uncovered by our internal tests. The deadlock is as foollows: > > Task 1: > A1) jbd2_journal_commit_transaction() > A2) spin_lock(&jh->b_state_lock); > A3) __jbd2_journal_remove_checkpoint() > A4) jbd2_journal_put_journal_head() > A5) jbd_lock_bh_journal_head() > > Task 2: > B1) ocfs2_test_bg_bit_allocatable() > B2) jbd_lock_bh_journal_head() > B3) spin_lock(&jh->b_state_lock); > > A1->A2->A3->A4->B1->B2->B3(blocked)->A5(blocked) > > Now cause process 2 has the jbd lock, it doesn’t let process 1 to continue after A5. > Process 2 now is waiting for b_state_lock to be released by process 1. > > This patch resolves the deadlock. > Missing a fixes tag here, Fixes: 6f1b228529ae ("ocfs2: fix race between searching chunks and release journal_head from buffer_head") Cc: <stable@vger.kernel.org> > Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com> > --- > fs/ocfs2/suballoc.c | 7 +++---- > 1 file changed, 3 insertions(+), 4 deletions(-) > > diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c > index 481017e..a618970 100644 > --- a/fs/ocfs2/suballoc.c > +++ b/fs/ocfs2/suballoc.c > @@ -1259,9 +1259,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, > if (!buffer_jbd(bg_bh)) > return 1; > > - jbd_lock_bh_journal_head(bg_bh); > - if (buffer_jbd(bg_bh)) { > - jh = bh2jh(bg_bh); > + jh = jbd2_journal_grab_journal_head(bg_bh); > + if (jh) { > spin_lock(&jh->b_state_lock); > bg = (struct ocfs2_group_desc *) jh->b_committed_data; > if (bg) > @@ -1269,8 +1268,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, > else > ret = 1; > spin_unlock(&jh->b_state_lock); > + jbd2_journal_put_journal_head(bg_bh); Seems a mistake here, it's 'jh' not 'bh'. > } > - jbd_unlock_bh_journal_head(bg_bh); > > return ret; > } And symbols jbd2_journal_[grab|put]_journal_head are not exported. Do you test your patch? How about the following way? diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0b86a4365b66..e9f0c72f6664 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2972,6 +2972,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { @@ -3024,6 +3025,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_unlock_bh_journal_head(bh); } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 481017e1dac5..7e1dd2299578 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,7 +1251,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret = 1; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; @@ -1259,18 +1259,18 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (!buffer_jbd(bg_bh)) return 1; - jbd_lock_bh_journal_head(bg_bh); - if (buffer_jbd(bg_bh)) { - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); - } - jbd_unlock_bh_journal_head(bg_bh); + jh = jbd2_journal_grab_journal_head(bg_bh); + if (!jh) + return 1; + + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); return ret; }
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 481017e..a618970 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1259,9 +1259,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (!buffer_jbd(bg_bh)) return 1; - jbd_lock_bh_journal_head(bg_bh); - if (buffer_jbd(bg_bh)) { - jh = bh2jh(bg_bh); + jh = jbd2_journal_grab_journal_head(bg_bh); + if (jh) { spin_lock(&jh->b_state_lock); bg = (struct ocfs2_group_desc *) jh->b_committed_data; if (bg) @@ -1269,8 +1268,8 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, else ret = 1; spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(bg_bh); } - jbd_unlock_bh_journal_head(bg_bh); return ret; }
commit 6f1b228529ae49b0f85ab89bcdb6c365df401558 caused a deadlock which was uncovered by our internal tests. The deadlock is as foollows: Task 1: A1) jbd2_journal_commit_transaction() A2) spin_lock(&jh->b_state_lock); A3) __jbd2_journal_remove_checkpoint() A4) jbd2_journal_put_journal_head() A5) jbd_lock_bh_journal_head() Task 2: B1) ocfs2_test_bg_bit_allocatable() B2) jbd_lock_bh_journal_head() B3) spin_lock(&jh->b_state_lock); A1->A2->A3->A4->B1->B2->B3(blocked)->A5(blocked) Now cause process 2 has the jbd lock, it doesn’t let process 1 to continue after A5. Process 2 now is waiting for b_state_lock to be released by process 1. This patch resolves the deadlock. Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@oracle.com> --- fs/ocfs2/suballoc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)