Message ID | 20220718063410.338626-10-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/10] md: fix mddev->kobj lifetime | expand |
On 7/18/22 08:34, Christoph Hellwig wrote: > This ensures device names don't get prematurely reused. Instead add a > deleted flag to skip already deleted devices in mddev_get and other > places that only want to see live mddevs. > > Reported-by; Logan Gunthorpe <logang@deltatee.com> > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++---------------- > drivers/md/md.h | 1 + > 2 files changed, 39 insertions(+), 18 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 805f2b4ed9c0d..08cf21ad4c2d7 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request); > > static inline struct mddev *mddev_get(struct mddev *mddev) > { > + lockdep_assert_held(&all_mddevs_lock); > + > + if (mddev->deleted) > + return NULL; > atomic_inc(&mddev->active); > return mddev; > } Why can't we use 'atomic_inc_unless_zero' here and do away with the additional 'deleted' flag? > @@ -639,7 +643,7 @@ static void mddev_put(struct mddev *mddev) > mddev->ctime == 0 && !mddev->hold_active) { > /* Array is not configured at all, and not held active, > * so destroy it */ > - list_del_init(&mddev->all_mddevs); > + mddev->deleted = true; > > /* > * Call queue_work inside the spinlock so that > @@ -719,8 +723,8 @@ static struct mddev *mddev_find(dev_t unit) > > spin_lock(&all_mddevs_lock); > mddev = mddev_find_locked(unit); > - if (mddev) > - mddev_get(mddev); > + if (mddev && !mddev_get(mddev)) > + mddev = NULL; > spin_unlock(&all_mddevs_lock); > > return mddev; > @@ -3338,6 +3342,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev) > > spin_lock(&all_mddevs_lock); > list_for_each_entry(mddev, &all_mddevs, all_mddevs) { > + if (mddev->deleted) > + continue; Any particular reason why you can't use the 'mddev_get()' / 'mddev_put()' sequence here? After all, I don't think that 'mddev' should vanish while being in this loop, no? > rdev_for_each(rdev2, mddev) { > if (rdev != rdev2 && rdev->bdev == rdev2->bdev && > md_rdevs_overlap(rdev, rdev2)) { > @@ -5525,11 +5531,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) > if (!entry->show) > return -EIO; > spin_lock(&all_mddevs_lock); > - if (list_empty(&mddev->all_mddevs)) { > + if (!mddev_get(mddev)) { > spin_unlock(&all_mddevs_lock); > return -EBUSY; > } > - mddev_get(mddev); > spin_unlock(&all_mddevs_lock); > > rv = entry->show(mddev, page); > @@ -5550,11 +5555,10 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, > if (!capable(CAP_SYS_ADMIN)) > return -EACCES; > spin_lock(&all_mddevs_lock); > - if (list_empty(&mddev->all_mddevs)) { > + if (!mddev_get(mddev)) { > spin_unlock(&all_mddevs_lock); > return -EBUSY; > } > - mddev_get(mddev); > spin_unlock(&all_mddevs_lock); > rv = entry->store(mddev, page, length); > mddev_put(mddev); > @@ -7851,7 +7855,7 @@ static void md_free_disk(struct gendisk *disk) > bioset_exit(&mddev->bio_set); > bioset_exit(&mddev->sync_set); > > - kfree(mddev); > + mddev_free(mddev); > } > > const struct block_device_operations md_fops = > @@ -8173,6 +8177,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) > if (!l--) { > mddev = list_entry(tmp, struct mddev, all_mddevs); > mddev_get(mddev); > + if (!mddev_get(mddev)) > + continue; > spin_unlock(&all_mddevs_lock); > return mddev; > } > @@ -8186,25 +8192,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) > { > struct list_head *tmp; > struct mddev *next_mddev, *mddev = v; > + struct mddev *to_put = NULL; > > ++*pos; > if (v == (void*)2) > return NULL; > > spin_lock(&all_mddevs_lock); > - if (v == (void*)1) > + if (v == (void*)1) { > tmp = all_mddevs.next; > - else > + } else { > + to_put = mddev; > tmp = mddev->all_mddevs.next; > - if (tmp != &all_mddevs) > - next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs)); > - else { > - next_mddev = (void*)2; > - *pos = 0x10000; > } > + > + for (;;) { > + if (tmp == &all_mddevs) { > + next_mddev = (void*)2; > + *pos = 0x10000; > + break; > + } > + next_mddev = list_entry(tmp, struct mddev, all_mddevs); > + if (mddev_get(next_mddev)) > + break; > + mddev = next_mddev; > + tmp = mddev->all_mddevs.next; > + }; > spin_unlock(&all_mddevs_lock); > > - if (v != (void*)1) > + if (to_put) > mddev_put(mddev); > return next_mddev; > > @@ -8768,6 +8784,8 @@ void md_do_sync(struct md_thread *thread) > goto skip; > spin_lock(&all_mddevs_lock); > list_for_each_entry(mddev2, &all_mddevs, all_mddevs) { > + if (mddev2->deleted) > + continue; > if (mddev2 == mddev) > continue; > if (!mddev->parallel_resync > @@ -9570,7 +9588,8 @@ static int md_notify_reboot(struct notifier_block *this, > > spin_lock(&all_mddevs_lock); > list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { > - mddev_get(mddev); > + if (!mddev_get(mddev)) > + continue; > spin_unlock(&all_mddevs_lock); > if (mddev_trylock(mddev)) { > if (mddev->pers) > @@ -9925,7 +9944,8 @@ static __exit void md_exit(void) > > spin_lock(&all_mddevs_lock); > list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { > - mddev_get(mddev); > + if (!mddev_get(mddev)) > + continue; > spin_unlock(&all_mddevs_lock); > export_array(mddev); > mddev->ctime = 0; > diff --git a/drivers/md/md.h b/drivers/md/md.h > index 1a85dbe78a71c..bc870e1f1e8c2 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -503,6 +503,7 @@ struct mddev { > > atomic_t max_corr_read_errors; /* max read retries */ > struct list_head all_mddevs; > + bool deleted; > > const struct attribute_group *to_remove; > Cheers, Hannes
On Mon, Jul 18, 2022 at 12:17 AM Hannes Reinecke <hare@suse.de> wrote: > > On 7/18/22 08:34, Christoph Hellwig wrote: > > This ensures device names don't get prematurely reused. Instead add a > > deleted flag to skip already deleted devices in mddev_get and other > > places that only want to see live mddevs. > > > > Reported-by; Logan Gunthorpe <logang@deltatee.com> > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > --- > > drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++---------------- > > drivers/md/md.h | 1 + > > 2 files changed, 39 insertions(+), 18 deletions(-) > > > > diff --git a/drivers/md/md.c b/drivers/md/md.c > > index 805f2b4ed9c0d..08cf21ad4c2d7 100644 > > --- a/drivers/md/md.c > > +++ b/drivers/md/md.c > > @@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request); > > > > static inline struct mddev *mddev_get(struct mddev *mddev) > > { > > + lockdep_assert_held(&all_mddevs_lock); > > + > > + if (mddev->deleted) > > + return NULL; > > atomic_inc(&mddev->active); > > return mddev; > > } > > Why can't we use 'atomic_inc_unless_zero' here and do away with the > additional 'deleted' flag? Thanks Christoph for the set. And thanks Hannes for the quick review! I think atomic_inc_unless_zero() should work here. Alternatively, we can also grab a big from mddev->flags as MD_DELETED. Thanks, Song [...]
On Mon, Jul 18, 2022 at 11:20:39AM -0700, Song Liu wrote: > > > static inline struct mddev *mddev_get(struct mddev *mddev) > > > { > > > + lockdep_assert_held(&all_mddevs_lock); > > > + > > > + if (mddev->deleted) > > > + return NULL; > > > atomic_inc(&mddev->active); > > > return mddev; > > > } > > > > Why can't we use 'atomic_inc_unless_zero' here and do away with the > > additional 'deleted' flag? > I think atomic_inc_unless_zero() should work here. Alternatively, we can > also grab a big from mddev->flags as MD_DELETED. s/big/bit/ ? Yes, this could use mddev->flags. I don't think atomic_inc_unless_zero would work, as an array can have a refcount of 0 but we still allow to take new references to it, the deletion only happens if the other conditions in mddev_put are met. Do you want me to repin the series using a flag?
On Mon, Jul 18, 2022 at 10:07 PM Christoph Hellwig <hch@lst.de> wrote: > > On Mon, Jul 18, 2022 at 11:20:39AM -0700, Song Liu wrote: > > > > static inline struct mddev *mddev_get(struct mddev *mddev) > > > > { > > > > + lockdep_assert_held(&all_mddevs_lock); > > > > + > > > > + if (mddev->deleted) > > > > + return NULL; > > > > atomic_inc(&mddev->active); > > > > return mddev; > > > > } > > > > > > Why can't we use 'atomic_inc_unless_zero' here and do away with the > > > additional 'deleted' flag? > > > I think atomic_inc_unless_zero() should work here. Alternatively, we can > > also grab a big from mddev->flags as MD_DELETED. > > s/big/bit/ ? yeah, bit... > > Yes, this could use mddev->flags. I don't think atomic_inc_unless_zero > would work, as an array can have a refcount of 0 but we still allow > to take new references to it, the deletion only happens if the other > conditions in mddev_put are met. > > Do you want me to repin the series using a flag? Respin the series sounds good. Please also address Hannes' feedback on 8/10. Thanks, Song
On 7/19/22 07:06, Christoph Hellwig wrote: > On Mon, Jul 18, 2022 at 11:20:39AM -0700, Song Liu wrote: >>>> static inline struct mddev *mddev_get(struct mddev *mddev) >>>> { >>>> + lockdep_assert_held(&all_mddevs_lock); >>>> + >>>> + if (mddev->deleted) >>>> + return NULL; >>>> atomic_inc(&mddev->active); >>>> return mddev; >>>> } >>> >>> Why can't we use 'atomic_inc_unless_zero' here and do away with the >>> additional 'deleted' flag? > >> I think atomic_inc_unless_zero() should work here. Alternatively, we can >> also grab a big from mddev->flags as MD_DELETED. > > s/big/bit/ ? > > Yes, this could use mddev->flags. I don't think atomic_inc_unless_zero > would work, as an array can have a refcount of 0 but we still allow > to take new references to it, the deletion only happens if the other > conditions in mddev_put are met. > > Do you want me to repin the series using a flag? I would vote for it. Cheers, Hannes
On Mon, Jul 18, 2022 at 09:17:42AM +0200, Hannes Reinecke wrote: > Why can't we use 'atomic_inc_unless_zero' here and do away with the > additional 'deleted' flag? See my previous reply. >> @@ -3338,6 +3342,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev) >> spin_lock(&all_mddevs_lock); >> list_for_each_entry(mddev, &all_mddevs, all_mddevs) { >> + if (mddev->deleted) >> + continue; > > Any particular reason why you can't use the 'mddev_get()' / 'mddev_put()' > sequence here? Mostly because it would be pretty mess. mdev_put takes all_mddevs_lock, so we'd have to add an unlock/relock cycle for each loop iteration. > After all, I don't think that 'mddev' should vanish while being in this > loop, no? It won't, at least without the call to mddev_put. Once mddev_put is in the game things aren't that easy, and I suspect the exising and new code might have bugs in that area in the reboot notifier and md_exit for extreme corner cases.
On 7/19/22 09:14, Christoph Hellwig wrote: > On Mon, Jul 18, 2022 at 09:17:42AM +0200, Hannes Reinecke wrote: >> Why can't we use 'atomic_inc_unless_zero' here and do away with the >> additional 'deleted' flag? > > See my previous reply. > >>> @@ -3338,6 +3342,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev) >>> spin_lock(&all_mddevs_lock); >>> list_for_each_entry(mddev, &all_mddevs, all_mddevs) { >>> + if (mddev->deleted) >>> + continue; >> >> Any particular reason why you can't use the 'mddev_get()' / 'mddev_put()' >> sequence here? > > Mostly because it would be pretty mess. mdev_put takes all_mddevs_lock, > so we'd have to add an unlock/relock cycle for each loop iteration. > >> After all, I don't think that 'mddev' should vanish while being in this >> loop, no? > > It won't, at least without the call to mddev_put. Once mddev_put is > in the game things aren't that easy, and I suspect the exising and > new code might have bugs in that area in the reboot notifier and > md_exit for extreme corner cases. And again, MD mess. But thanks for start clearing it up. Cheers, Hannes
diff --git a/drivers/md/md.c b/drivers/md/md.c index 805f2b4ed9c0d..08cf21ad4c2d7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -625,6 +625,10 @@ EXPORT_SYMBOL(md_flush_request); static inline struct mddev *mddev_get(struct mddev *mddev) { + lockdep_assert_held(&all_mddevs_lock); + + if (mddev->deleted) + return NULL; atomic_inc(&mddev->active); return mddev; } @@ -639,7 +643,7 @@ static void mddev_put(struct mddev *mddev) mddev->ctime == 0 && !mddev->hold_active) { /* Array is not configured at all, and not held active, * so destroy it */ - list_del_init(&mddev->all_mddevs); + mddev->deleted = true; /* * Call queue_work inside the spinlock so that @@ -719,8 +723,8 @@ static struct mddev *mddev_find(dev_t unit) spin_lock(&all_mddevs_lock); mddev = mddev_find_locked(unit); - if (mddev) - mddev_get(mddev); + if (mddev && !mddev_get(mddev)) + mddev = NULL; spin_unlock(&all_mddevs_lock); return mddev; @@ -3338,6 +3342,8 @@ static bool md_rdev_overlaps(struct md_rdev *rdev) spin_lock(&all_mddevs_lock); list_for_each_entry(mddev, &all_mddevs, all_mddevs) { + if (mddev->deleted) + continue; rdev_for_each(rdev2, mddev) { if (rdev != rdev2 && rdev->bdev == rdev2->bdev && md_rdevs_overlap(rdev, rdev2)) { @@ -5525,11 +5531,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; spin_lock(&all_mddevs_lock); - if (list_empty(&mddev->all_mddevs)) { + if (!mddev_get(mddev)) { spin_unlock(&all_mddevs_lock); return -EBUSY; } - mddev_get(mddev); spin_unlock(&all_mddevs_lock); rv = entry->show(mddev, page); @@ -5550,11 +5555,10 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock(&all_mddevs_lock); - if (list_empty(&mddev->all_mddevs)) { + if (!mddev_get(mddev)) { spin_unlock(&all_mddevs_lock); return -EBUSY; } - mddev_get(mddev); spin_unlock(&all_mddevs_lock); rv = entry->store(mddev, page, length); mddev_put(mddev); @@ -7851,7 +7855,7 @@ static void md_free_disk(struct gendisk *disk) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); - kfree(mddev); + mddev_free(mddev); } const struct block_device_operations md_fops = @@ -8173,6 +8177,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) if (!l--) { mddev = list_entry(tmp, struct mddev, all_mddevs); mddev_get(mddev); + if (!mddev_get(mddev)) + continue; spin_unlock(&all_mddevs_lock); return mddev; } @@ -8186,25 +8192,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct list_head *tmp; struct mddev *next_mddev, *mddev = v; + struct mddev *to_put = NULL; ++*pos; if (v == (void*)2) return NULL; spin_lock(&all_mddevs_lock); - if (v == (void*)1) + if (v == (void*)1) { tmp = all_mddevs.next; - else + } else { + to_put = mddev; tmp = mddev->all_mddevs.next; - if (tmp != &all_mddevs) - next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs)); - else { - next_mddev = (void*)2; - *pos = 0x10000; } + + for (;;) { + if (tmp == &all_mddevs) { + next_mddev = (void*)2; + *pos = 0x10000; + break; + } + next_mddev = list_entry(tmp, struct mddev, all_mddevs); + if (mddev_get(next_mddev)) + break; + mddev = next_mddev; + tmp = mddev->all_mddevs.next; + }; spin_unlock(&all_mddevs_lock); - if (v != (void*)1) + if (to_put) mddev_put(mddev); return next_mddev; @@ -8768,6 +8784,8 @@ void md_do_sync(struct md_thread *thread) goto skip; spin_lock(&all_mddevs_lock); list_for_each_entry(mddev2, &all_mddevs, all_mddevs) { + if (mddev2->deleted) + continue; if (mddev2 == mddev) continue; if (!mddev->parallel_resync @@ -9570,7 +9588,8 @@ static int md_notify_reboot(struct notifier_block *this, spin_lock(&all_mddevs_lock); list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { - mddev_get(mddev); + if (!mddev_get(mddev)) + continue; spin_unlock(&all_mddevs_lock); if (mddev_trylock(mddev)) { if (mddev->pers) @@ -9925,7 +9944,8 @@ static __exit void md_exit(void) spin_lock(&all_mddevs_lock); list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { - mddev_get(mddev); + if (!mddev_get(mddev)) + continue; spin_unlock(&all_mddevs_lock); export_array(mddev); mddev->ctime = 0; diff --git a/drivers/md/md.h b/drivers/md/md.h index 1a85dbe78a71c..bc870e1f1e8c2 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -503,6 +503,7 @@ struct mddev { atomic_t max_corr_read_errors; /* max read retries */ struct list_head all_mddevs; + bool deleted; const struct attribute_group *to_remove;
This ensures device names don't get prematurely reused. Instead add a deleted flag to skip already deleted devices in mddev_get and other places that only want to see live mddevs. Reported-by; Logan Gunthorpe <logang@deltatee.com> Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/md/md.c | 56 +++++++++++++++++++++++++++++++++---------------- drivers/md/md.h | 1 + 2 files changed, 39 insertions(+), 18 deletions(-)