Message ID | 20201126130422.92945-21-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/44] blk-cgroup: fix a hd_struct leak in blkcg_fill_root_iostats | expand |
On Thu 26-11-20 14:03:58, Christoph Hellwig wrote: > Move more code that is only run on the outer open but not the open of > the underlying whole device when opening a partition into blkdev_get, > which leads to a much easier to follow structure. > > This allows to simplify the disk and module refcounting so that one > reference is held for each open, similar to what we do with normal > file operations. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > Acked-by: Tejun Heo <tj@kernel.org> The patch looks good to me. You can add: Reviewed-by: Jan Kara <jack@suse.cz> Honza > --- > fs/block_dev.c | 185 +++++++++++++++++++++++-------------------------- > 1 file changed, 86 insertions(+), 99 deletions(-) > > diff --git a/fs/block_dev.c b/fs/block_dev.c > index 41c50cfba864e2..86a61a2141f642 100644 > --- a/fs/block_dev.c > +++ b/fs/block_dev.c > @@ -1403,46 +1403,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); > * mutex_lock(part->bd_mutex) > * mutex_lock_nested(whole->bd_mutex, 1) > */ > - > -static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, > - int for_part) > +static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, > + int partno, fmode_t mode) > { > - struct block_device *whole = NULL, *claiming = NULL; > - struct gendisk *disk; > int ret; > - int partno; > - bool first_open = false, unblock_events = true, need_restart; > - > - restart: > - need_restart = false; > - ret = -ENXIO; > - disk = bdev_get_gendisk(bdev, &partno); > - if (!disk) > - goto out; > - > - if (partno) { > - whole = bdget_disk(disk, 0); > - if (!whole) { > - ret = -ENOMEM; > - goto out_put_disk; > - } > - } > > - if (!for_part && (mode & FMODE_EXCL)) { > - WARN_ON_ONCE(!holder); > - if (whole) > - claiming = whole; > - else > - claiming = bdev; > - ret = bd_prepare_to_claim(bdev, claiming, holder); > - if (ret) > - goto out_put_whole; > - } > - > - disk_block_events(disk); > - mutex_lock_nested(&bdev->bd_mutex, for_part); > if (!bdev->bd_openers) { > - first_open = true; > bdev->bd_disk = disk; > bdev->bd_contains = bdev; > bdev->bd_partno = partno; > @@ -1454,15 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, > goto out_clear; > > ret = 0; > - if (disk->fops->open) { > + if (disk->fops->open) > ret = disk->fops->open(bdev, mode); > - /* > - * If we lost a race with 'disk' being deleted, > - * try again. See md.c > - */ > - if (ret == -ERESTARTSYS) > - need_restart = true; > - } > > if (!ret) { > bd_set_nr_sectors(bdev, get_capacity(disk)); > @@ -1482,14 +1441,23 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, > if (ret) > goto out_clear; > } else { > - BUG_ON(for_part); > - ret = __blkdev_get(whole, mode, NULL, 1); > - if (ret) > + struct block_device *whole = bdget_disk(disk, 0); > + > + mutex_lock_nested(&whole->bd_mutex, 1); > + ret = __blkdev_get(whole, disk, 0, mode); > + if (ret) { > + mutex_unlock(&whole->bd_mutex); > + bdput(whole); > goto out_clear; > - bdev->bd_contains = bdgrab(whole); > + } > + whole->bd_part_count++; > + mutex_unlock(&whole->bd_mutex); > + > + bdev->bd_contains = whole; > bdev->bd_part = disk_get_part(disk, partno); > if (!(disk->flags & GENHD_FL_UP) || > !bdev->bd_part || !bdev->bd_part->nr_sects) { > + __blkdev_put(whole, mode, 1); > ret = -ENXIO; > goto out_clear; > } > @@ -1509,58 +1477,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, > (!ret || ret == -ENOMEDIUM)) > bdev_disk_changed(bdev, ret == -ENOMEDIUM); > if (ret) > - goto out_unlock_bdev; > + return ret; > } > } > bdev->bd_openers++; > - if (for_part) > - bdev->bd_part_count++; > - if (claiming) > - bd_finish_claiming(bdev, claiming, holder); > - > - /* > - * Block event polling for write claims if requested. Any write holder > - * makes the write_holder state stick until all are released. This is > - * good enough and tracking individual writeable reference is too > - * fragile given the way @mode is used in blkdev_get/put(). > - */ > - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && > - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { > - bdev->bd_write_holder = true; > - unblock_events = false; > - } > - mutex_unlock(&bdev->bd_mutex); > - > - if (unblock_events) > - disk_unblock_events(disk); > - > - /* only one opener holds refs to the module and disk */ > - if (!first_open) > - put_disk_and_module(disk); > - if (whole) > - bdput(whole); > return 0; > > out_clear: > disk_put_part(bdev->bd_part); > bdev->bd_disk = NULL; > bdev->bd_part = NULL; > - if (bdev != bdev->bd_contains) > - __blkdev_put(bdev->bd_contains, mode, 1); > bdev->bd_contains = NULL; > - out_unlock_bdev: > - if (claiming) > - bd_abort_claiming(bdev, claiming, holder); > - mutex_unlock(&bdev->bd_mutex); > - disk_unblock_events(disk); > - out_put_whole: > - if (whole) > - bdput(whole); > - out_put_disk: > - put_disk_and_module(disk); > - if (need_restart) > - goto restart; > - out: > return ret; > } > > @@ -1585,7 +1512,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, > */ > static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) > { > - int ret, perm = 0; > + struct block_device *claiming; > + bool unblock_events = true; > + struct gendisk *disk; > + int perm = 0; > + int partno; > + int ret; > > if (mode & FMODE_READ) > perm |= MAY_READ; > @@ -1595,13 +1527,67 @@ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) > if (ret) > goto bdput; > > - ret =__blkdev_get(bdev, mode, holder, 0); > - if (ret) > + /* > + * If we lost a race with 'disk' being deleted, try again. See md.c. > + */ > +retry: > + ret = -ENXIO; > + disk = bdev_get_gendisk(bdev, &partno); > + if (!disk) > goto bdput; > - return 0; > > + if (mode & FMODE_EXCL) { > + WARN_ON_ONCE(!holder); > + > + ret = -ENOMEM; > + claiming = bdget_disk(disk, 0); > + if (!claiming) > + goto put_disk; > + ret = bd_prepare_to_claim(bdev, claiming, holder); > + if (ret) > + goto put_claiming; > + } > + > + disk_block_events(disk); > + > + mutex_lock(&bdev->bd_mutex); > + ret =__blkdev_get(bdev, disk, partno, mode); > + if (!(mode & FMODE_EXCL)) { > + ; /* nothing to do here */ > + } else if (ret) { > + bd_abort_claiming(bdev, claiming, holder); > + } else { > + bd_finish_claiming(bdev, claiming, holder); > + > + /* > + * Block event polling for write claims if requested. Any write > + * holder makes the write_holder state stick until all are > + * released. This is good enough and tracking individual > + * writeable reference is too fragile given the way @mode is > + * used in blkdev_get/put(). > + */ > + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && > + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { > + bdev->bd_write_holder = true; > + unblock_events = false; > + } > + } > + mutex_unlock(&bdev->bd_mutex); > + > + if (unblock_events) > + disk_unblock_events(disk); > + > +put_claiming: > + if (mode & FMODE_EXCL) > + bdput(claiming); > +put_disk: > + if (ret) > + put_disk_and_module(disk); > + if (ret == -ERESTARTSYS) > + goto retry; > bdput: > - bdput(bdev); > + if (ret) > + bdput(bdev); > return ret; > } > > @@ -1749,8 +1735,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) > if (bdev_is_partition(bdev)) > victim = bdev->bd_contains; > bdev->bd_contains = NULL; > - > - put_disk_and_module(disk); > } else { > if (!bdev_is_partition(bdev) && disk->fops->release) > disk->fops->release(disk, mode); > @@ -1763,6 +1747,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) > > void blkdev_put(struct block_device *bdev, fmode_t mode) > { > + struct gendisk *disk = bdev->bd_disk; > + > mutex_lock(&bdev->bd_mutex); > > if (mode & FMODE_EXCL) { > @@ -1791,7 +1777,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) > * unblock evpoll if it was a write holder. > */ > if (bdev_free && bdev->bd_write_holder) { > - disk_unblock_events(bdev->bd_disk); > + disk_unblock_events(disk); > bdev->bd_write_holder = false; > } > } > @@ -1801,11 +1787,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) > * event. This is to ensure detection of media removal commanded > * from userland - e.g. eject(1). > */ > - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); > + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); > > mutex_unlock(&bdev->bd_mutex); > > __blkdev_put(bdev, mode, 0); > + put_disk_and_module(disk); > } > EXPORT_SYMBOL(blkdev_put); > > -- > 2.29.2 >
diff --git a/fs/block_dev.c b/fs/block_dev.c index 41c50cfba864e2..86a61a2141f642 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1403,46 +1403,12 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed); * mutex_lock(part->bd_mutex) * mutex_lock_nested(whole->bd_mutex, 1) */ - -static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, - int for_part) +static int __blkdev_get(struct block_device *bdev, struct gendisk *disk, + int partno, fmode_t mode) { - struct block_device *whole = NULL, *claiming = NULL; - struct gendisk *disk; int ret; - int partno; - bool first_open = false, unblock_events = true, need_restart; - - restart: - need_restart = false; - ret = -ENXIO; - disk = bdev_get_gendisk(bdev, &partno); - if (!disk) - goto out; - - if (partno) { - whole = bdget_disk(disk, 0); - if (!whole) { - ret = -ENOMEM; - goto out_put_disk; - } - } - if (!for_part && (mode & FMODE_EXCL)) { - WARN_ON_ONCE(!holder); - if (whole) - claiming = whole; - else - claiming = bdev; - ret = bd_prepare_to_claim(bdev, claiming, holder); - if (ret) - goto out_put_whole; - } - - disk_block_events(disk); - mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { - first_open = true; bdev->bd_disk = disk; bdev->bd_contains = bdev; bdev->bd_partno = partno; @@ -1454,15 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, goto out_clear; ret = 0; - if (disk->fops->open) { + if (disk->fops->open) ret = disk->fops->open(bdev, mode); - /* - * If we lost a race with 'disk' being deleted, - * try again. See md.c - */ - if (ret == -ERESTARTSYS) - need_restart = true; - } if (!ret) { bd_set_nr_sectors(bdev, get_capacity(disk)); @@ -1482,14 +1441,23 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, if (ret) goto out_clear; } else { - BUG_ON(for_part); - ret = __blkdev_get(whole, mode, NULL, 1); - if (ret) + struct block_device *whole = bdget_disk(disk, 0); + + mutex_lock_nested(&whole->bd_mutex, 1); + ret = __blkdev_get(whole, disk, 0, mode); + if (ret) { + mutex_unlock(&whole->bd_mutex); + bdput(whole); goto out_clear; - bdev->bd_contains = bdgrab(whole); + } + whole->bd_part_count++; + mutex_unlock(&whole->bd_mutex); + + bdev->bd_contains = whole; bdev->bd_part = disk_get_part(disk, partno); if (!(disk->flags & GENHD_FL_UP) || !bdev->bd_part || !bdev->bd_part->nr_sects) { + __blkdev_put(whole, mode, 1); ret = -ENXIO; goto out_clear; } @@ -1509,58 +1477,17 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, (!ret || ret == -ENOMEDIUM)) bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) - goto out_unlock_bdev; + return ret; } } bdev->bd_openers++; - if (for_part) - bdev->bd_part_count++; - if (claiming) - bd_finish_claiming(bdev, claiming, holder); - - /* - * Block event polling for write claims if requested. Any write holder - * makes the write_holder state stick until all are released. This is - * good enough and tracking individual writeable reference is too - * fragile given the way @mode is used in blkdev_get/put(). - */ - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { - bdev->bd_write_holder = true; - unblock_events = false; - } - mutex_unlock(&bdev->bd_mutex); - - if (unblock_events) - disk_unblock_events(disk); - - /* only one opener holds refs to the module and disk */ - if (!first_open) - put_disk_and_module(disk); - if (whole) - bdput(whole); return 0; out_clear: disk_put_part(bdev->bd_part); bdev->bd_disk = NULL; bdev->bd_part = NULL; - if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; - out_unlock_bdev: - if (claiming) - bd_abort_claiming(bdev, claiming, holder); - mutex_unlock(&bdev->bd_mutex); - disk_unblock_events(disk); - out_put_whole: - if (whole) - bdput(whole); - out_put_disk: - put_disk_and_module(disk); - if (need_restart) - goto restart; - out: return ret; } @@ -1585,7 +1512,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, */ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) { - int ret, perm = 0; + struct block_device *claiming; + bool unblock_events = true; + struct gendisk *disk; + int perm = 0; + int partno; + int ret; if (mode & FMODE_READ) perm |= MAY_READ; @@ -1595,13 +1527,67 @@ static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) if (ret) goto bdput; - ret =__blkdev_get(bdev, mode, holder, 0); - if (ret) + /* + * If we lost a race with 'disk' being deleted, try again. See md.c. + */ +retry: + ret = -ENXIO; + disk = bdev_get_gendisk(bdev, &partno); + if (!disk) goto bdput; - return 0; + if (mode & FMODE_EXCL) { + WARN_ON_ONCE(!holder); + + ret = -ENOMEM; + claiming = bdget_disk(disk, 0); + if (!claiming) + goto put_disk; + ret = bd_prepare_to_claim(bdev, claiming, holder); + if (ret) + goto put_claiming; + } + + disk_block_events(disk); + + mutex_lock(&bdev->bd_mutex); + ret =__blkdev_get(bdev, disk, partno, mode); + if (!(mode & FMODE_EXCL)) { + ; /* nothing to do here */ + } else if (ret) { + bd_abort_claiming(bdev, claiming, holder); + } else { + bd_finish_claiming(bdev, claiming, holder); + + /* + * Block event polling for write claims if requested. Any write + * holder makes the write_holder state stick until all are + * released. This is good enough and tracking individual + * writeable reference is too fragile given the way @mode is + * used in blkdev_get/put(). + */ + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { + bdev->bd_write_holder = true; + unblock_events = false; + } + } + mutex_unlock(&bdev->bd_mutex); + + if (unblock_events) + disk_unblock_events(disk); + +put_claiming: + if (mode & FMODE_EXCL) + bdput(claiming); +put_disk: + if (ret) + put_disk_and_module(disk); + if (ret == -ERESTARTSYS) + goto retry; bdput: - bdput(bdev); + if (ret) + bdput(bdev); return ret; } @@ -1749,8 +1735,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) if (bdev_is_partition(bdev)) victim = bdev->bd_contains; bdev->bd_contains = NULL; - - put_disk_and_module(disk); } else { if (!bdev_is_partition(bdev) && disk->fops->release) disk->fops->release(disk, mode); @@ -1763,6 +1747,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) void blkdev_put(struct block_device *bdev, fmode_t mode) { + struct gendisk *disk = bdev->bd_disk; + mutex_lock(&bdev->bd_mutex); if (mode & FMODE_EXCL) { @@ -1791,7 +1777,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * unblock evpoll if it was a write holder. */ if (bdev_free && bdev->bd_write_holder) { - disk_unblock_events(bdev->bd_disk); + disk_unblock_events(disk); bdev->bd_write_holder = false; } } @@ -1801,11 +1787,12 @@ void blkdev_put(struct block_device *bdev, fmode_t mode) * event. This is to ensure detection of media removal commanded * from userland - e.g. eject(1). */ - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); mutex_unlock(&bdev->bd_mutex); __blkdev_put(bdev, mode, 0); + put_disk_and_module(disk); } EXPORT_SYMBOL(blkdev_put);