Message ID | 20230628010756.70649-3-yukuai1@huaweicloud.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | md/raid5-cache: fix a deadlock in r5l_exit_log() | expand |
On Wed, Jun 28, 2023 at 9:08 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > From: Yu Kuai <yukuai3@huawei.com> > > r5l_reclaim_thread() already check that 'conf->log' is not NULL in the > beginning, however, r5c_do_reclaim() and r5l_do_reclaim() will > dereference 'conf->log' again, which will cause null-ptr-deref if > 'conf->log' is set to NULL from r5l_exit_log() concurrently. r5l_exit_log() will wait until reclaim_thread() finishes, and then set conf->log to NULL. So this is not a problem, no? Thanks, Song > > Fix this problem by don't dereference 'conf->log' again in > r5c_do_reclaim() and r5c_do_reclaim(). > > Fixes: a39f7afde358 ("md/r5cache: write-out phase and reclaim support") > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/raid5-cache.c | 20 ++++++++------------ > 1 file changed, 8 insertions(+), 12 deletions(-) > > diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c > index 083288e36949..ba6fc146d265 100644 > --- a/drivers/md/raid5-cache.c > +++ b/drivers/md/raid5-cache.c > @@ -1148,10 +1148,9 @@ static void r5l_run_no_space_stripes(struct r5l_log *log) > * for write through mode, returns log->next_checkpoint > * for write back, returns log_start of first sh in stripe_in_journal_list > */ > -static sector_t r5c_calculate_new_cp(struct r5conf *conf) > +static sector_t r5c_calculate_new_cp(struct r5l_log *log) > { > struct stripe_head *sh; > - struct r5l_log *log = conf->log; > sector_t new_cp; > unsigned long flags; > > @@ -1159,12 +1158,12 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) > return log->next_checkpoint; > > spin_lock_irqsave(&log->stripe_in_journal_lock, flags); > - if (list_empty(&conf->log->stripe_in_journal_list)) { > + if (list_empty(&log->stripe_in_journal_list)) { > /* all stripes flushed */ > spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); > return log->next_checkpoint; > } > - sh = list_first_entry(&conf->log->stripe_in_journal_list, > + sh = list_first_entry(&log->stripe_in_journal_list, > struct stripe_head, r5c); > new_cp = sh->log_start; > spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); > @@ -1173,10 +1172,8 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) > > static sector_t r5l_reclaimable_space(struct r5l_log *log) > { > - struct r5conf *conf = log->rdev->mddev->private; > - > return r5l_ring_distance(log, log->last_checkpoint, > - r5c_calculate_new_cp(conf)); > + r5c_calculate_new_cp(log)); > } > > static void r5l_run_no_mem_stripe(struct r5l_log *log) > @@ -1419,9 +1416,9 @@ void r5c_flush_cache(struct r5conf *conf, int num) > } > } > > -static void r5c_do_reclaim(struct r5conf *conf) > +static void r5c_do_reclaim(struct r5l_log *log) > { > - struct r5l_log *log = conf->log; > + struct r5conf *conf = log->rdev->mddev->private; > struct stripe_head *sh; > int count = 0; > unsigned long flags; > @@ -1496,7 +1493,6 @@ static void r5c_do_reclaim(struct r5conf *conf) > > static void r5l_do_reclaim(struct r5l_log *log) > { > - struct r5conf *conf = log->rdev->mddev->private; > sector_t reclaim_target = xchg(&log->reclaim_target, 0); > sector_t reclaimable; > sector_t next_checkpoint; > @@ -1525,7 +1521,7 @@ static void r5l_do_reclaim(struct r5l_log *log) > log->io_list_lock); > } > > - next_checkpoint = r5c_calculate_new_cp(conf); > + next_checkpoint = r5c_calculate_new_cp(log); > spin_unlock_irq(&log->io_list_lock); > > if (reclaimable == 0 || !write_super) > @@ -1554,7 +1550,7 @@ static void r5l_reclaim_thread(struct md_thread *thread) > > if (!log) > return; > - r5c_do_reclaim(conf); > + r5c_do_reclaim(log); > r5l_do_reclaim(log); > } > > -- > 2.39.2 >
Hi, 在 2023/07/07 16:52, Song Liu 写道: > On Wed, Jun 28, 2023 at 9:08 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: >> >> From: Yu Kuai <yukuai3@huawei.com> >> >> r5l_reclaim_thread() already check that 'conf->log' is not NULL in the >> beginning, however, r5c_do_reclaim() and r5l_do_reclaim() will >> dereference 'conf->log' again, which will cause null-ptr-deref if >> 'conf->log' is set to NULL from r5l_exit_log() concurrently. > > r5l_exit_log() will wait until reclaim_thread() finishes, and then set > conf->log to NULL. So this is not a problem, no? Patch one just revert this, wait until reclaim_thread() then set conf->log to NULL will cause deadlock, as I sescribled in patch 0. Thanks, Kuai > > Thanks, > Song > >> >> Fix this problem by don't dereference 'conf->log' again in >> r5c_do_reclaim() and r5c_do_reclaim(). >> >> Fixes: a39f7afde358 ("md/r5cache: write-out phase and reclaim support") >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> drivers/md/raid5-cache.c | 20 ++++++++------------ >> 1 file changed, 8 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c >> index 083288e36949..ba6fc146d265 100644 >> --- a/drivers/md/raid5-cache.c >> +++ b/drivers/md/raid5-cache.c >> @@ -1148,10 +1148,9 @@ static void r5l_run_no_space_stripes(struct r5l_log *log) >> * for write through mode, returns log->next_checkpoint >> * for write back, returns log_start of first sh in stripe_in_journal_list >> */ >> -static sector_t r5c_calculate_new_cp(struct r5conf *conf) >> +static sector_t r5c_calculate_new_cp(struct r5l_log *log) >> { >> struct stripe_head *sh; >> - struct r5l_log *log = conf->log; >> sector_t new_cp; >> unsigned long flags; >> >> @@ -1159,12 +1158,12 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) >> return log->next_checkpoint; >> >> spin_lock_irqsave(&log->stripe_in_journal_lock, flags); >> - if (list_empty(&conf->log->stripe_in_journal_list)) { >> + if (list_empty(&log->stripe_in_journal_list)) { >> /* all stripes flushed */ >> spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); >> return log->next_checkpoint; >> } >> - sh = list_first_entry(&conf->log->stripe_in_journal_list, >> + sh = list_first_entry(&log->stripe_in_journal_list, >> struct stripe_head, r5c); >> new_cp = sh->log_start; >> spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); >> @@ -1173,10 +1172,8 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) >> >> static sector_t r5l_reclaimable_space(struct r5l_log *log) >> { >> - struct r5conf *conf = log->rdev->mddev->private; >> - >> return r5l_ring_distance(log, log->last_checkpoint, >> - r5c_calculate_new_cp(conf)); >> + r5c_calculate_new_cp(log)); >> } >> >> static void r5l_run_no_mem_stripe(struct r5l_log *log) >> @@ -1419,9 +1416,9 @@ void r5c_flush_cache(struct r5conf *conf, int num) >> } >> } >> >> -static void r5c_do_reclaim(struct r5conf *conf) >> +static void r5c_do_reclaim(struct r5l_log *log) >> { >> - struct r5l_log *log = conf->log; >> + struct r5conf *conf = log->rdev->mddev->private; >> struct stripe_head *sh; >> int count = 0; >> unsigned long flags; >> @@ -1496,7 +1493,6 @@ static void r5c_do_reclaim(struct r5conf *conf) >> >> static void r5l_do_reclaim(struct r5l_log *log) >> { >> - struct r5conf *conf = log->rdev->mddev->private; >> sector_t reclaim_target = xchg(&log->reclaim_target, 0); >> sector_t reclaimable; >> sector_t next_checkpoint; >> @@ -1525,7 +1521,7 @@ static void r5l_do_reclaim(struct r5l_log *log) >> log->io_list_lock); >> } >> >> - next_checkpoint = r5c_calculate_new_cp(conf); >> + next_checkpoint = r5c_calculate_new_cp(log); >> spin_unlock_irq(&log->io_list_lock); >> >> if (reclaimable == 0 || !write_super) >> @@ -1554,7 +1550,7 @@ static void r5l_reclaim_thread(struct md_thread *thread) >> >> if (!log) >> return; >> - r5c_do_reclaim(conf); >> + r5c_do_reclaim(log); >> r5l_do_reclaim(log); >> } >> >> -- >> 2.39.2 >> > . >
Hi, 在 2023/07/07 17:06, Yu Kuai 写道: > Hi, > > 在 2023/07/07 16:52, Song Liu 写道: >> On Wed, Jun 28, 2023 at 9:08 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: >>> >>> From: Yu Kuai <yukuai3@huawei.com> >>> >>> r5l_reclaim_thread() already check that 'conf->log' is not NULL in the >>> beginning, however, r5c_do_reclaim() and r5l_do_reclaim() will >>> dereference 'conf->log' again, which will cause null-ptr-deref if >>> 'conf->log' is set to NULL from r5l_exit_log() concurrently. >> >> r5l_exit_log() will wait until reclaim_thread() finishes, and then set >> conf->log to NULL. So this is not a problem, no? Perhaps you means this order? r5l_exit_log flush_work(&log->disable_writeback_work) conf->log = NULL md_unregister_thread(&log->reclaim_thread) I think this is better indeed. Thanks, Kuai > > Patch one just revert this, wait until reclaim_thread() then set > conf->log to NULL will cause deadlock, as I sescribled in patch 0. > > Thanks, > Kuai >> >> Thanks, >> Song >> >>> >>> Fix this problem by don't dereference 'conf->log' again in >>> r5c_do_reclaim() and r5c_do_reclaim(). >>> >>> Fixes: a39f7afde358 ("md/r5cache: write-out phase and reclaim support") >>> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >>> --- >>> drivers/md/raid5-cache.c | 20 ++++++++------------ >>> 1 file changed, 8 insertions(+), 12 deletions(-) >>> >>> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c >>> index 083288e36949..ba6fc146d265 100644 >>> --- a/drivers/md/raid5-cache.c >>> +++ b/drivers/md/raid5-cache.c >>> @@ -1148,10 +1148,9 @@ static void r5l_run_no_space_stripes(struct >>> r5l_log *log) >>> * for write through mode, returns log->next_checkpoint >>> * for write back, returns log_start of first sh in >>> stripe_in_journal_list >>> */ >>> -static sector_t r5c_calculate_new_cp(struct r5conf *conf) >>> +static sector_t r5c_calculate_new_cp(struct r5l_log *log) >>> { >>> struct stripe_head *sh; >>> - struct r5l_log *log = conf->log; >>> sector_t new_cp; >>> unsigned long flags; >>> >>> @@ -1159,12 +1158,12 @@ static sector_t r5c_calculate_new_cp(struct >>> r5conf *conf) >>> return log->next_checkpoint; >>> >>> spin_lock_irqsave(&log->stripe_in_journal_lock, flags); >>> - if (list_empty(&conf->log->stripe_in_journal_list)) { >>> + if (list_empty(&log->stripe_in_journal_list)) { >>> /* all stripes flushed */ >>> spin_unlock_irqrestore(&log->stripe_in_journal_lock, >>> flags); >>> return log->next_checkpoint; >>> } >>> - sh = list_first_entry(&conf->log->stripe_in_journal_list, >>> + sh = list_first_entry(&log->stripe_in_journal_list, >>> struct stripe_head, r5c); >>> new_cp = sh->log_start; >>> spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); >>> @@ -1173,10 +1172,8 @@ static sector_t r5c_calculate_new_cp(struct >>> r5conf *conf) >>> >>> static sector_t r5l_reclaimable_space(struct r5l_log *log) >>> { >>> - struct r5conf *conf = log->rdev->mddev->private; >>> - >>> return r5l_ring_distance(log, log->last_checkpoint, >>> - r5c_calculate_new_cp(conf)); >>> + r5c_calculate_new_cp(log)); >>> } >>> >>> static void r5l_run_no_mem_stripe(struct r5l_log *log) >>> @@ -1419,9 +1416,9 @@ void r5c_flush_cache(struct r5conf *conf, int num) >>> } >>> } >>> >>> -static void r5c_do_reclaim(struct r5conf *conf) >>> +static void r5c_do_reclaim(struct r5l_log *log) >>> { >>> - struct r5l_log *log = conf->log; >>> + struct r5conf *conf = log->rdev->mddev->private; >>> struct stripe_head *sh; >>> int count = 0; >>> unsigned long flags; >>> @@ -1496,7 +1493,6 @@ static void r5c_do_reclaim(struct r5conf *conf) >>> >>> static void r5l_do_reclaim(struct r5l_log *log) >>> { >>> - struct r5conf *conf = log->rdev->mddev->private; >>> sector_t reclaim_target = xchg(&log->reclaim_target, 0); >>> sector_t reclaimable; >>> sector_t next_checkpoint; >>> @@ -1525,7 +1521,7 @@ static void r5l_do_reclaim(struct r5l_log *log) >>> log->io_list_lock); >>> } >>> >>> - next_checkpoint = r5c_calculate_new_cp(conf); >>> + next_checkpoint = r5c_calculate_new_cp(log); >>> spin_unlock_irq(&log->io_list_lock); >>> >>> if (reclaimable == 0 || !write_super) >>> @@ -1554,7 +1550,7 @@ static void r5l_reclaim_thread(struct md_thread >>> *thread) >>> >>> if (!log) >>> return; >>> - r5c_do_reclaim(conf); >>> + r5c_do_reclaim(log); >>> r5l_do_reclaim(log); >>> } >>> >>> -- >>> 2.39.2 >>> >> . >> > > . >
Hi, 在 2023/07/07 17:16, Yu Kuai 写道: > Perhaps you means this order? > > r5l_exit_log > flush_work(&log->disable_writeback_work) > conf->log = NULL > md_unregister_thread(&log->reclaim_thread) > > I think this is better indeed. Never mind, this is wrong, I got confused... Please ignore this and take a look at my original fix. Thanks, Kuai
On Fri, Jul 7, 2023 at 5:19 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2023/07/07 17:16, Yu Kuai 写道: > > Perhaps you means this order? > > > > r5l_exit_log > > flush_work(&log->disable_writeback_work) > > conf->log = NULL > > md_unregister_thread(&log->reclaim_thread) > > > > I think this is better indeed. > Never mind, this is wrong, I got confused... > > Please ignore this and take a look at my original fix. How about r5l_exit_log md_unregister_thread(&log->reclaim_thread) conf->log = NULL flush_work(&log->disable_writeback_work) ? Thanks, Song
Hi, 在 2023/07/07 17:36, Song Liu 写道: > On Fri, Jul 7, 2023 at 5:19 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: >> >> Hi, >> >> 在 2023/07/07 17:16, Yu Kuai 写道: >>> Perhaps you means this order? >>> >>> r5l_exit_log >>> flush_work(&log->disable_writeback_work) >>> conf->log = NULL >>> md_unregister_thread(&log->reclaim_thread) >>> >>> I think this is better indeed. >> Never mind, this is wrong, I got confused... >> >> Please ignore this and take a look at my original fix. > > How about > > r5l_exit_log > md_unregister_thread(&log->reclaim_thread) > conf->log = NULL > flush_work(&log->disable_writeback_work) > > ? This looks correct, expect that wake_up() should be moved together. I'll send a v2. Thanks, Kuai > > Thanks, > Song > . >
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 083288e36949..ba6fc146d265 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1148,10 +1148,9 @@ static void r5l_run_no_space_stripes(struct r5l_log *log) * for write through mode, returns log->next_checkpoint * for write back, returns log_start of first sh in stripe_in_journal_list */ -static sector_t r5c_calculate_new_cp(struct r5conf *conf) +static sector_t r5c_calculate_new_cp(struct r5l_log *log) { struct stripe_head *sh; - struct r5l_log *log = conf->log; sector_t new_cp; unsigned long flags; @@ -1159,12 +1158,12 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) return log->next_checkpoint; spin_lock_irqsave(&log->stripe_in_journal_lock, flags); - if (list_empty(&conf->log->stripe_in_journal_list)) { + if (list_empty(&log->stripe_in_journal_list)) { /* all stripes flushed */ spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); return log->next_checkpoint; } - sh = list_first_entry(&conf->log->stripe_in_journal_list, + sh = list_first_entry(&log->stripe_in_journal_list, struct stripe_head, r5c); new_cp = sh->log_start; spin_unlock_irqrestore(&log->stripe_in_journal_lock, flags); @@ -1173,10 +1172,8 @@ static sector_t r5c_calculate_new_cp(struct r5conf *conf) static sector_t r5l_reclaimable_space(struct r5l_log *log) { - struct r5conf *conf = log->rdev->mddev->private; - return r5l_ring_distance(log, log->last_checkpoint, - r5c_calculate_new_cp(conf)); + r5c_calculate_new_cp(log)); } static void r5l_run_no_mem_stripe(struct r5l_log *log) @@ -1419,9 +1416,9 @@ void r5c_flush_cache(struct r5conf *conf, int num) } } -static void r5c_do_reclaim(struct r5conf *conf) +static void r5c_do_reclaim(struct r5l_log *log) { - struct r5l_log *log = conf->log; + struct r5conf *conf = log->rdev->mddev->private; struct stripe_head *sh; int count = 0; unsigned long flags; @@ -1496,7 +1493,6 @@ static void r5c_do_reclaim(struct r5conf *conf) static void r5l_do_reclaim(struct r5l_log *log) { - struct r5conf *conf = log->rdev->mddev->private; sector_t reclaim_target = xchg(&log->reclaim_target, 0); sector_t reclaimable; sector_t next_checkpoint; @@ -1525,7 +1521,7 @@ static void r5l_do_reclaim(struct r5l_log *log) log->io_list_lock); } - next_checkpoint = r5c_calculate_new_cp(conf); + next_checkpoint = r5c_calculate_new_cp(log); spin_unlock_irq(&log->io_list_lock); if (reclaimable == 0 || !write_super) @@ -1554,7 +1550,7 @@ static void r5l_reclaim_thread(struct md_thread *thread) if (!log) return; - r5c_do_reclaim(conf); + r5c_do_reclaim(log); r5l_do_reclaim(log); }