diff mbox series

[37/45] block: allocate struct hd_struct as part of struct bdev_inode

Message ID 20201128161510.347752-38-hch@lst.de (mailing list archive)
State New
Headers show
Series [01/45] blk-cgroup: fix a hd_struct leak in blkcg_fill_root_iostats | expand

Commit Message

Christoph Hellwig Nov. 28, 2020, 4:15 p.m. UTC
Allocate hd_struct together with struct block_device to pre-load
the lifetime rule changes in preparation of merging the two structures.

Note that part0 was previously embedded into struct gendisk, but is
a separate allocation now, and already points to the block_device instead
of the hd_struct.  The lifetime of struct gendisk is still controlled by
the struct device embedded in the part0 hd_struct.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c                   | 16 ++++---
 block/blk-flush.c                  |  2 +-
 block/blk-merge.c                  |  2 -
 block/blk.h                        | 21 ----------
 block/genhd.c                      | 50 +++++++++-------------
 block/partitions/core.c            | 67 +++---------------------------
 drivers/block/drbd/drbd_receiver.c |  2 +-
 drivers/block/drbd/drbd_worker.c   |  3 +-
 drivers/block/zram/zram_drv.c      |  2 +-
 drivers/md/dm.c                    |  4 +-
 drivers/md/md.c                    |  2 +-
 fs/block_dev.c                     | 39 ++++++-----------
 include/linux/blk_types.h          |  2 +-
 include/linux/genhd.h              | 14 +++----
 include/linux/part_stat.h          |  4 +-
 15 files changed, 61 insertions(+), 169 deletions(-)

Comments

Hannes Reinecke Nov. 30, 2020, 7:46 a.m. UTC | #1
On 11/28/20 5:15 PM, Christoph Hellwig wrote:
> Allocate hd_struct together with struct block_device to pre-load
> the lifetime rule changes in preparation of merging the two structures.
> 
> Note that part0 was previously embedded into struct gendisk, but is
> a separate allocation now, and already points to the block_device instead
> of the hd_struct.  The lifetime of struct gendisk is still controlled by
> the struct device embedded in the part0 hd_struct.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-core.c                   | 16 ++++---
>   block/blk-flush.c                  |  2 +-
>   block/blk-merge.c                  |  2 -
>   block/blk.h                        | 21 ----------
>   block/genhd.c                      | 50 +++++++++-------------
>   block/partitions/core.c            | 67 +++---------------------------
>   drivers/block/drbd/drbd_receiver.c |  2 +-
>   drivers/block/drbd/drbd_worker.c   |  3 +-
>   drivers/block/zram/zram_drv.c      |  2 +-
>   drivers/md/dm.c                    |  4 +-
>   drivers/md/md.c                    |  2 +-
>   fs/block_dev.c                     | 39 ++++++-----------
>   include/linux/blk_types.h          |  2 +-
>   include/linux/genhd.h              | 14 +++----
>   include/linux/part_stat.h          |  4 +-
>   15 files changed, 61 insertions(+), 169 deletions(-)
> 
Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
Jan Kara Nov. 30, 2020, 9:53 a.m. UTC | #2
On Sat 28-11-20 17:15:02, Christoph Hellwig wrote:
> Allocate hd_struct together with struct block_device to pre-load
> the lifetime rule changes in preparation of merging the two structures.
> 
> Note that part0 was previously embedded into struct gendisk, but is
> a separate allocation now, and already points to the block_device instead
> of the hd_struct.  The lifetime of struct gendisk is still controlled by
> the struct device embedded in the part0 hd_struct.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Looks good to me. You can add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  block/blk-core.c                   | 16 ++++---
>  block/blk-flush.c                  |  2 +-
>  block/blk-merge.c                  |  2 -
>  block/blk.h                        | 21 ----------
>  block/genhd.c                      | 50 +++++++++-------------
>  block/partitions/core.c            | 67 +++---------------------------
>  drivers/block/drbd/drbd_receiver.c |  2 +-
>  drivers/block/drbd/drbd_worker.c   |  3 +-
>  drivers/block/zram/zram_drv.c      |  2 +-
>  drivers/md/dm.c                    |  4 +-
>  drivers/md/md.c                    |  2 +-
>  fs/block_dev.c                     | 39 ++++++-----------
>  include/linux/blk_types.h          |  2 +-
>  include/linux/genhd.h              | 14 +++----
>  include/linux/part_stat.h          |  4 +-
>  15 files changed, 61 insertions(+), 169 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index d64ffcb6f9ae5d..9ea70275fc1cfe 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -714,7 +714,8 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
>  
>  static noinline int should_fail_bio(struct bio *bio)
>  {
> -	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
> +	if (should_fail_request(bio->bi_disk->part0->bd_part,
> +			bio->bi_iter.bi_size))
>  		return -EIO;
>  	return 0;
>  }
> @@ -831,7 +832,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
>  		if (unlikely(blk_partition_remap(bio)))
>  			goto end_io;
>  	} else {
> -		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
> +		if (unlikely(bio_check_ro(bio, bio->bi_disk->part0->bd_part)))
>  			goto end_io;
>  		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
>  			goto end_io;
> @@ -1203,7 +1204,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
>  		return ret;
>  
>  	if (rq->rq_disk &&
> -	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
> +	    should_fail_request(rq->rq_disk->part0->bd_part, blk_rq_bytes(rq)))
>  		return BLK_STS_IOERR;
>  
>  	if (blk_crypto_insert_cloned_request(rq))
> @@ -1272,7 +1273,7 @@ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
>  			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
>  	}
>  	if (part->partno) {
> -		part = &part_to_disk(part)->part0;
> +		part = part_to_disk(part)->part0->bd_part;
>  		goto again;
>  	}
>  }
> @@ -1309,8 +1310,6 @@ void blk_account_io_done(struct request *req, u64 now)
>  		part_stat_inc(part, ios[sgrp]);
>  		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
>  		part_stat_unlock();
> -
> -		hd_struct_put(part);
>  	}
>  }
>  
> @@ -1354,7 +1353,7 @@ EXPORT_SYMBOL_GPL(part_start_io_acct);
>  unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
>  				 unsigned int op)
>  {
> -	return __part_start_io_acct(&disk->part0, sectors, op);
> +	return __part_start_io_acct(disk->part0->bd_part, sectors, op);
>  }
>  EXPORT_SYMBOL(disk_start_io_acct);
>  
> @@ -1376,14 +1375,13 @@ void part_end_io_acct(struct hd_struct *part, struct bio *bio,
>  		      unsigned long start_time)
>  {
>  	__part_end_io_acct(part, bio_op(bio), start_time);
> -	hd_struct_put(part);
>  }
>  EXPORT_SYMBOL_GPL(part_end_io_acct);
>  
>  void disk_end_io_acct(struct gendisk *disk, unsigned int op,
>  		      unsigned long start_time)
>  {
> -	__part_end_io_acct(&disk->part0, op, start_time);
> +	__part_end_io_acct(disk->part0->bd_part, op, start_time);
>  }
>  EXPORT_SYMBOL(disk_end_io_acct);
>  
> diff --git a/block/blk-flush.c b/block/blk-flush.c
> index e32958f0b68750..fcd0a60574dff8 100644
> --- a/block/blk-flush.c
> +++ b/block/blk-flush.c
> @@ -139,7 +139,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
>  
>  static void blk_account_io_flush(struct request *rq)
>  {
> -	struct hd_struct *part = &rq->rq_disk->part0;
> +	struct hd_struct *part = rq->rq_disk->part0->bd_part;
>  
>  	part_stat_lock();
>  	part_stat_inc(part, ios[STAT_FLUSH]);
> diff --git a/block/blk-merge.c b/block/blk-merge.c
> index bcf5e458060337..cb351ab9b77dbd 100644
> --- a/block/blk-merge.c
> +++ b/block/blk-merge.c
> @@ -683,8 +683,6 @@ static void blk_account_io_merge_request(struct request *req)
>  		part_stat_lock();
>  		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
>  		part_stat_unlock();
> -
> -		hd_struct_put(req->part);
>  	}
>  }
>  
> diff --git a/block/blk.h b/block/blk.h
> index 0bd4b58bcbaf77..32ac41f7557fcc 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -363,27 +363,6 @@ int bdev_del_partition(struct block_device *bdev, int partno);
>  int bdev_resize_partition(struct block_device *bdev, int partno,
>  		sector_t start, sector_t length);
>  int disk_expand_part_tbl(struct gendisk *disk, int target);
> -int hd_ref_init(struct hd_struct *part);
> -
> -/* no need to get/put refcount of part0 */
> -static inline int hd_struct_try_get(struct hd_struct *part)
> -{
> -	if (part->partno)
> -		return percpu_ref_tryget_live(&part->ref);
> -	return 1;
> -}
> -
> -static inline void hd_struct_put(struct hd_struct *part)
> -{
> -	if (part->partno)
> -		percpu_ref_put(&part->ref);
> -}
> -
> -static inline void hd_free_part(struct hd_struct *part)
> -{
> -	bdput(part->bdev);
> -	percpu_ref_exit(&part->ref);
> -}
>  
>  int bio_add_hw_page(struct request_queue *q, struct bio *bio,
>  		struct page *page, unsigned int len, unsigned int offset,
> diff --git a/block/genhd.c b/block/genhd.c
> index ae312ccc6dd7c0..eaa5d65ae826b8 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -42,7 +42,7 @@ static void disk_release_events(struct gendisk *disk);
>  
>  void set_capacity(struct gendisk *disk, sector_t sectors)
>  {
> -	struct block_device *bdev = disk->part0.bdev;
> +	struct block_device *bdev = disk->part0;
>  
>  	spin_lock(&bdev->bd_size_lock);
>  	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
> @@ -310,9 +310,7 @@ static inline int sector_in_part(struct hd_struct *part, sector_t sector)
>   * primarily used for stats accounting.
>   *
>   * CONTEXT:
> - * RCU read locked.  The returned partition pointer is always valid
> - * because its refcount is grabbed except for part0, which lifetime
> - * is same with the disk.
> + * RCU read locked.
>   *
>   * RETURNS:
>   * Found partition on success, part0 is returned if no partition matches
> @@ -328,26 +326,19 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
>  	ptbl = rcu_dereference(disk->part_tbl);
>  
>  	part = rcu_dereference(ptbl->last_lookup);
> -	if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
> +	if (part && sector_in_part(part, sector))
>  		goto out_unlock;
>  
>  	for (i = 1; i < ptbl->len; i++) {
>  		part = rcu_dereference(ptbl->part[i]);
>  
>  		if (part && sector_in_part(part, sector)) {
> -			/*
> -			 * only live partition can be cached for lookup,
> -			 * so use-after-free on cached & deleting partition
> -			 * can be avoided
> -			 */
> -			if (!hd_struct_try_get(part))
> -				break;
>  			rcu_assign_pointer(ptbl->last_lookup, part);
>  			goto out_unlock;
>  		}
>  	}
>  
> -	part = &disk->part0;
> +	part = disk->part0->bd_part;
>  out_unlock:
>  	rcu_read_unlock();
>  	return part;
> @@ -673,8 +664,8 @@ static void register_disk(struct device *parent, struct gendisk *disk,
>  	 */
>  	pm_runtime_set_memalloc_noio(ddev, true);
>  
> -	disk->part0.bdev->bd_holder_dir =
> -			kobject_create_and_add("holders", &ddev->kobj);
> +	disk->part0->bd_holder_dir =
> +		kobject_create_and_add("holders", &ddev->kobj);
>  	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
>  
>  	if (disk->flags & GENHD_FL_HIDDEN) {
> @@ -740,7 +731,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  
>  	disk->flags |= GENHD_FL_UP;
>  
> -	retval = blk_alloc_devt(&disk->part0, &devt);
> +	retval = blk_alloc_devt(disk->part0->bd_part, &devt);
>  	if (retval) {
>  		WARN_ON(1);
>  		return;
> @@ -767,7 +758,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
>  		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
>  		WARN_ON(ret);
>  		bdi_set_owner(bdi, dev);
> -		bdev_add(disk->part0.bdev, devt);
> +		bdev_add(disk->part0, devt);
>  	}
>  	register_disk(parent, disk, groups);
>  	if (register_queue)
> @@ -880,11 +871,11 @@ void del_gendisk(struct gendisk *disk)
>  
>  	blk_unregister_queue(disk);
>  
> -	kobject_put(disk->part0.bdev->bd_holder_dir);
> +	kobject_put(disk->part0->bd_holder_dir);
>  	kobject_put(disk->slave_dir);
>  
> -	part_stat_set_all(&disk->part0, 0);
> -	disk->part0.bdev->bd_stamp = 0;
> +	part_stat_set_all(disk->part0->bd_part, 0);
> +	disk->part0->bd_stamp = 0;
>  	if (!sysfs_deprecated)
>  		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
>  	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
> @@ -997,7 +988,7 @@ void __init printk_all_partitions(void)
>  		 */
>  		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
>  		while ((part = disk_part_iter_next(&piter))) {
> -			bool is_part0 = part == &disk->part0;
> +			bool is_part0 = part == disk->part0->bd_part;
>  
>  			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
>  			       bdevt_str(part_devt(part), devt_buf),
> @@ -1452,7 +1443,7 @@ static void disk_release(struct device *dev)
>  	disk_release_events(disk);
>  	kfree(disk->random);
>  	disk_replace_part_tbl(disk, NULL);
> -	hd_free_part(&disk->part0);
> +	bdput(disk->part0);
>  	if (disk->queue)
>  		blk_put_queue(disk->queue);
>  	kfree(disk);
> @@ -1618,8 +1609,8 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	if (!disk)
>  		return NULL;
>  
> -	disk->part0.bdev = bdev_alloc(disk, 0);
> -	if (!disk->part0.bdev)
> +	disk->part0 = bdev_alloc(disk, 0);
> +	if (!disk->part0)
>  		goto out_free_disk;
>  
>  	disk->node_id = node_id;
> @@ -1627,10 +1618,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  		goto out_bdput;
>  
>  	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
> -	rcu_assign_pointer(ptbl->part[0], &disk->part0);
> -
> -	if (hd_ref_init(&disk->part0))
> -		goto out_bdput;
> +	rcu_assign_pointer(ptbl->part[0], disk->part0->bd_part);
>  
>  	disk->minors = minors;
>  	rand_initialize_disk(disk);
> @@ -1640,7 +1628,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
>  	return disk;
>  
>  out_bdput:
> -	bdput(disk->part0.bdev);
> +	bdput(disk->part0);
>  out_free_disk:
>  	kfree(disk);
>  	return NULL;
> @@ -1679,9 +1667,9 @@ void set_disk_ro(struct gendisk *disk, int flag)
>  	struct disk_part_iter piter;
>  	struct hd_struct *part;
>  
> -	if (disk->part0.bdev->bd_read_only != flag) {
> +	if (disk->part0->bd_read_only != flag) {
>  		set_disk_ro_uevent(disk, flag);
> -		disk->part0.bdev->bd_read_only = flag;
> +		disk->part0->bd_read_only = flag;
>  	}
>  
>  	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index 060c1be13cd8da..6d1fca193cbd6f 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -265,9 +265,9 @@ static const struct attribute_group *part_attr_groups[] = {
>  static void part_release(struct device *dev)
>  {
>  	struct hd_struct *p = dev_to_part(dev);
> +
>  	blk_free_devt(dev->devt);
> -	hd_free_part(p);
> -	kfree(p);
> +	bdput(p->bdev);
>  }
>  
>  static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
> @@ -288,46 +288,6 @@ struct device_type part_type = {
>  	.uevent		= part_uevent,
>  };
>  
> -static void hd_struct_free_work(struct work_struct *work)
> -{
> -	struct hd_struct *part =
> -		container_of(to_rcu_work(work), struct hd_struct, rcu_work);
> -	struct gendisk *disk = part_to_disk(part);
> -
> -	/*
> -	 * Release the disk reference acquired in delete_partition here.
> -	 * We can't release it in hd_struct_free because the final put_device
> -	 * needs process context and thus can't be run directly from a
> -	 * percpu_ref ->release handler.
> -	 */
> -	put_device(disk_to_dev(disk));
> -
> -	part->bdev->bd_start_sect = 0;
> -	bdev_set_nr_sectors(part->bdev, 0);
> -	part_stat_set_all(part, 0);
> -	put_device(part_to_dev(part));
> -}
> -
> -static void hd_struct_free(struct percpu_ref *ref)
> -{
> -	struct hd_struct *part = container_of(ref, struct hd_struct, ref);
> -	struct gendisk *disk = part_to_disk(part);
> -	struct disk_part_tbl *ptbl =
> -		rcu_dereference_protected(disk->part_tbl, 1);
> -
> -	rcu_assign_pointer(ptbl->last_lookup, NULL);
> -
> -	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
> -	queue_rcu_work(system_wq, &part->rcu_work);
> -}
> -
> -int hd_ref_init(struct hd_struct *part)
> -{
> -	if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL))
> -		return -ENOMEM;
> -	return 0;
> -}
> -
>  /*
>   * Must be called either with bd_mutex held, before a disk can be opened or
>   * after all disk users are gone.
> @@ -342,8 +302,8 @@ void delete_partition(struct hd_struct *part)
>  	 * ->part_tbl is referenced in this part's release handler, so
>  	 *  we have to hold the disk device
>  	 */
> -	get_device(disk_to_dev(disk));
>  	rcu_assign_pointer(ptbl->part[part->partno], NULL);
> +	rcu_assign_pointer(ptbl->last_lookup, NULL);
>  	kobject_put(part->bdev->bd_holder_dir);
>  	device_del(part_to_dev(part));
>  
> @@ -353,7 +313,7 @@ void delete_partition(struct hd_struct *part)
>  	 */
>  	remove_inode_hash(part->bdev->bd_inode);
>  
> -	percpu_ref_kill(&part->ref);
> +	put_device(part_to_dev(part));
>  }
>  
>  static ssize_t whole_disk_show(struct device *dev,
> @@ -406,15 +366,11 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  	if (ptbl->part[partno])
>  		return ERR_PTR(-EBUSY);
>  
> -	p = kzalloc(sizeof(*p), GFP_KERNEL);
> -	if (!p)
> -		return ERR_PTR(-EBUSY);
> -
>  	bdev = bdev_alloc(disk, partno);
>  	if (!bdev)
> -		goto out_free;
> -	p->bdev = bdev;
> +		return ERR_PTR(-ENOMEM);
>  
> +	p = bdev->bd_part;
>  	pdev = part_to_dev(p);
>  
>  	bdev->bd_start_sect = start;
> @@ -463,13 +419,6 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  			goto out_del;
>  	}
>  
> -	err = hd_ref_init(p);
> -	if (err) {
> -		if (flags & ADDPART_FLAG_WHOLEDISK)
> -			goto out_remove_file;
> -		goto out_del;
> -	}
> -
>  	/* everything is up and running, commence */
>  	bdev_add(bdev, devt);
>  	rcu_assign_pointer(ptbl->part[partno], p);
> @@ -481,11 +430,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno,
>  
>  out_bdput:
>  	bdput(bdev);
> -out_free:
> -	kfree(p);
>  	return ERR_PTR(err);
> -out_remove_file:
> -	device_remove_file(pdev, &dev_attr_whole_disk);
>  out_del:
>  	kobject_put(bdev->bd_holder_dir);
>  	device_del(pdev);
> diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
> index dc333dbe523281..9e5c2fdfda3629 100644
> --- a/drivers/block/drbd/drbd_receiver.c
> +++ b/drivers/block/drbd/drbd_receiver.c
> @@ -2802,7 +2802,7 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
>  	if (c_min_rate == 0)
>  		return false;
>  
> -	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
> +	curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) -
>  			atomic_read(&device->rs_sect_ev);
>  
>  	if (atomic_read(&device->ap_actlog_cnt)
> diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
> index ba56f3f05312f0..343f56b86bb766 100644
> --- a/drivers/block/drbd/drbd_worker.c
> +++ b/drivers/block/drbd/drbd_worker.c
> @@ -1678,7 +1678,8 @@ void drbd_rs_controller_reset(struct drbd_device *device)
>  	atomic_set(&device->rs_sect_in, 0);
>  	atomic_set(&device->rs_sect_ev, 0);
>  	device->rs_in_flight = 0;
> -	device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors);
> +	device->rs_last_events =
> +		(int)part_stat_read_accum(disk->part0->bd_part, sectors);
>  
>  	/* Updating the RCU protected object in place is necessary since
>  	   this function gets called from atomic context.
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index b5f68951c9d280..6d84876a9cd0ae 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -1687,7 +1687,7 @@ static void zram_reset_device(struct zram *zram)
>  	zram->disksize = 0;
>  
>  	set_capacity_and_notify(zram->disk, 0);
> -	part_stat_set_all(&zram->disk->part0, 0);
> +	part_stat_set_all(zram->disk->part0->bd_part, 0);
>  
>  	up_write(&zram->init_lock);
>  	/* I/O operation under all of CPU are done so let's free */
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 48051db006f30c..1b2db4d530ea71 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1607,7 +1607,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
>  				 * (by eliminating DM's splitting and just using bio_split)
>  				 */
>  				part_stat_lock();
> -				__dm_part_stat_sub(&dm_disk(md)->part0,
> +				__dm_part_stat_sub(dm_disk(md)->part0->bd_part,
>  						   sectors[op_stat_group(bio_op(bio))], ci.sector_count);
>  				part_stat_unlock();
>  
> @@ -2242,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put);
>  static bool md_in_flight_bios(struct mapped_device *md)
>  {
>  	int cpu;
> -	struct hd_struct *part = &dm_disk(md)->part0;
> +	struct hd_struct *part = dm_disk(md)->part0->bd_part;
>  	long sum = 0;
>  
>  	for_each_possible_cpu(cpu) {
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 7ce6047c856ea2..3696c2d77a4dd7 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -8441,7 +8441,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
>  	rcu_read_lock();
>  	rdev_for_each_rcu(rdev, mddev) {
>  		struct gendisk *disk = rdev->bdev->bd_disk;
> -		curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
> +		curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) -
>  			      atomic_read(&disk->sync_io);
>  		/* sync IO will cause sync_io to increase before the disk_stats
>  		 * as sync_io is counted when a request starts, and
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index 381c22426f435b..61cf33b6284feb 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -39,6 +39,7 @@
>  
>  struct bdev_inode {
>  	struct block_device bdev;
> +	struct hd_struct hd;
>  	struct inode vfs_inode;
>  };
>  
> @@ -886,6 +887,9 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  		iput(inode);
>  		return NULL;
>  	}
> +	bdev->bd_part = &BDEV_I(inode)->hd;
> +	memset(bdev->bd_part, 0, sizeof(*bdev->bd_part));
> +	bdev->bd_part->bdev = bdev;
>  	return bdev;
>  }
>  
> @@ -1280,15 +1284,10 @@ EXPORT_SYMBOL_GPL(bdev_disk_changed);
>  static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  {
>  	struct gendisk *disk = bdev->bd_disk;
> -	int ret;
> +	int ret = 0;
>  
>  	if (!bdev->bd_openers) {
>  		if (!bdev_is_partition(bdev)) {
> -			ret = -ENXIO;
> -			bdev->bd_part = disk_get_part(disk, 0);
> -			if (!bdev->bd_part)
> -				goto out_clear;
> -
>  			ret = 0;
>  			if (disk->fops->open)
>  				ret = disk->fops->open(bdev, mode);
> @@ -1307,7 +1306,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
>  
>  			if (ret)
> -				goto out_clear;
> +				return ret;
>  		} else {
>  			struct block_device *whole = bdget_disk(disk, 0);
>  
> @@ -1316,18 +1315,16 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  			if (ret) {
>  				mutex_unlock(&whole->bd_mutex);
>  				bdput(whole);
> -				goto out_clear;
> +				return ret;
>  			}
>  			whole->bd_part_count++;
>  			mutex_unlock(&whole->bd_mutex);
>  
> -			bdev->bd_part = disk_get_part(disk, bdev->bd_partno);
>  			if (!(disk->flags & GENHD_FL_UP) ||
> -			    !bdev->bd_part || !bdev_nr_sectors(bdev)) {
> +			    !bdev_nr_sectors(bdev)) {
>  				__blkdev_put(whole, mode, 1);
>  				bdput(whole);
> -				ret = -ENXIO;
> -				goto out_clear;
> +				return -ENXIO;
>  			}
>  			set_init_blocksize(bdev);
>  		}
> @@ -1336,7 +1333,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
>  	} else {
>  		if (!bdev_is_partition(bdev)) {
> -			ret = 0;
>  			if (bdev->bd_disk->fops->open)
>  				ret = bdev->bd_disk->fops->open(bdev, mode);
>  			/* the same as first opener case, read comment there */
> @@ -1349,11 +1345,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode)
>  	}
>  	bdev->bd_openers++;
>  	return 0;
> -
> - out_clear:
> -	disk_put_part(bdev->bd_part);
> -	bdev->bd_part = NULL;
> -	return ret;
>  }
>  
>  struct block_device *blkdev_get_no_open(dev_t dev)
> @@ -1580,18 +1571,12 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
>  		sync_blockdev(bdev);
>  		kill_bdev(bdev);
>  		bdev_write_inode(bdev);
> -
> -		if (!bdev_is_partition(bdev) && disk->fops->release)
> -			disk->fops->release(disk, mode);
> -
> -		disk_put_part(bdev->bd_part);
> -		bdev->bd_part = NULL;
>  		if (bdev_is_partition(bdev))
>  			victim = bdev_whole(bdev);
> -	} else {
> -		if (!bdev_is_partition(bdev) && disk->fops->release)
> -			disk->fops->release(disk, mode);
>  	}
> +
> +	if (!bdev_is_partition(bdev) && disk->fops->release)
> +		disk->fops->release(disk, mode);
>  	mutex_unlock(&bdev->bd_mutex);
>  	if (victim) {
>  		__blkdev_put(victim, mode, 1);
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 758cf71c9aa2a6..6edea5c1625909 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -59,7 +59,7 @@ struct block_device {
>  } __randomize_layout;
>  
>  #define bdev_whole(_bdev) \
> -	((_bdev)->bd_disk->part0.bdev)
> +	((_bdev)->bd_disk->part0)
>  
>  #define bdev_kobj(_bdev) \
>  	(&part_to_dev((_bdev)->bd_part)->kobj)
> diff --git a/include/linux/genhd.h b/include/linux/genhd.h
> index dcbf9ef7610ea6..df7319da013ccf 100644
> --- a/include/linux/genhd.h
> +++ b/include/linux/genhd.h
> @@ -19,11 +19,12 @@
>  #include <linux/blk_types.h>
>  #include <asm/local.h>
>  
> -#define dev_to_disk(device)	container_of((device), struct gendisk, part0.__dev)
>  #define dev_to_part(device)	container_of((device), struct hd_struct, __dev)
> -#define disk_to_dev(disk)	(&(disk)->part0.__dev)
>  #define part_to_dev(part)	(&((part)->__dev))
>  
> +#define dev_to_disk(device)	(dev_to_part(device)->bdev->bd_disk)
> +#define disk_to_dev(disk)	(part_to_dev((disk)->part0->bd_part))
> +
>  extern const struct device_type disk_type;
>  extern struct device_type part_type;
>  extern struct class block_class;
> @@ -51,12 +52,9 @@ struct partition_meta_info {
>  };
>  
>  struct hd_struct {
> -	struct percpu_ref ref;
> -
>  	struct block_device *bdev;
>  	struct device __dev;
>  	int partno;
> -	struct rcu_work rcu_work;
>  };
>  
>  /**
> @@ -168,7 +166,7 @@ struct gendisk {
>  	 * helpers.
>  	 */
>  	struct disk_part_tbl __rcu *part_tbl;
> -	struct hd_struct part0;
> +	struct block_device *part0;
>  
>  	const struct block_device_operations *fops;
>  	struct request_queue *queue;
> @@ -278,7 +276,7 @@ extern void set_disk_ro(struct gendisk *disk, int flag);
>  
>  static inline int get_disk_ro(struct gendisk *disk)
>  {
> -	return disk->part0.bdev->bd_read_only;
> +	return disk->part0->bd_read_only;
>  }
>  
>  extern void disk_block_events(struct gendisk *disk);
> @@ -302,7 +300,7 @@ static inline sector_t bdev_nr_sectors(struct block_device *bdev)
>  
>  static inline sector_t get_capacity(struct gendisk *disk)
>  {
> -	return bdev_nr_sectors(disk->part0.bdev);
> +	return bdev_nr_sectors(disk->part0);
>  }
>  
>  int bdev_disk_changed(struct block_device *bdev, bool invalidate);
> diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
> index 87ad60106e1db0..680de036691ef9 100644
> --- a/include/linux/part_stat.h
> +++ b/include/linux/part_stat.h
> @@ -59,8 +59,8 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
>  #define part_stat_add(part, field, addnd)	do {			\
>  	__part_stat_add((part), field, addnd);				\
>  	if ((part)->partno)						\
> -		__part_stat_add(&part_to_disk((part))->part0,		\
> -				field, addnd);				\
> +		__part_stat_add(part_to_disk((part))->part0->bd_part,	\
> +			field, addnd); \
>  } while (0)
>  
>  #define part_stat_dec(part, field)					\
> -- 
> 2.29.2
>
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index d64ffcb6f9ae5d..9ea70275fc1cfe 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -714,7 +714,8 @@  static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
 
 static noinline int should_fail_bio(struct bio *bio)
 {
-	if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size))
+	if (should_fail_request(bio->bi_disk->part0->bd_part,
+			bio->bi_iter.bi_size))
 		return -EIO;
 	return 0;
 }
@@ -831,7 +832,7 @@  static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 		if (unlikely(blk_partition_remap(bio)))
 			goto end_io;
 	} else {
-		if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0)))
+		if (unlikely(bio_check_ro(bio, bio->bi_disk->part0->bd_part)))
 			goto end_io;
 		if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk))))
 			goto end_io;
@@ -1203,7 +1204,7 @@  blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 		return ret;
 
 	if (rq->rq_disk &&
-	    should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq)))
+	    should_fail_request(rq->rq_disk->part0->bd_part, blk_rq_bytes(rq)))
 		return BLK_STS_IOERR;
 
 	if (blk_crypto_insert_cloned_request(rq))
@@ -1272,7 +1273,7 @@  static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
 			__part_stat_add(part, io_ticks, end ? now - stamp : 1);
 	}
 	if (part->partno) {
-		part = &part_to_disk(part)->part0;
+		part = part_to_disk(part)->part0->bd_part;
 		goto again;
 	}
 }
@@ -1309,8 +1310,6 @@  void blk_account_io_done(struct request *req, u64 now)
 		part_stat_inc(part, ios[sgrp]);
 		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
 		part_stat_unlock();
-
-		hd_struct_put(part);
 	}
 }
 
@@ -1354,7 +1353,7 @@  EXPORT_SYMBOL_GPL(part_start_io_acct);
 unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
 				 unsigned int op)
 {
-	return __part_start_io_acct(&disk->part0, sectors, op);
+	return __part_start_io_acct(disk->part0->bd_part, sectors, op);
 }
 EXPORT_SYMBOL(disk_start_io_acct);
 
@@ -1376,14 +1375,13 @@  void part_end_io_acct(struct hd_struct *part, struct bio *bio,
 		      unsigned long start_time)
 {
 	__part_end_io_acct(part, bio_op(bio), start_time);
-	hd_struct_put(part);
 }
 EXPORT_SYMBOL_GPL(part_end_io_acct);
 
 void disk_end_io_acct(struct gendisk *disk, unsigned int op,
 		      unsigned long start_time)
 {
-	__part_end_io_acct(&disk->part0, op, start_time);
+	__part_end_io_acct(disk->part0->bd_part, op, start_time);
 }
 EXPORT_SYMBOL(disk_end_io_acct);
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e32958f0b68750..fcd0a60574dff8 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -139,7 +139,7 @@  static void blk_flush_queue_rq(struct request *rq, bool add_front)
 
 static void blk_account_io_flush(struct request *rq)
 {
-	struct hd_struct *part = &rq->rq_disk->part0;
+	struct hd_struct *part = rq->rq_disk->part0->bd_part;
 
 	part_stat_lock();
 	part_stat_inc(part, ios[STAT_FLUSH]);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index bcf5e458060337..cb351ab9b77dbd 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -683,8 +683,6 @@  static void blk_account_io_merge_request(struct request *req)
 		part_stat_lock();
 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
 		part_stat_unlock();
-
-		hd_struct_put(req->part);
 	}
 }
 
diff --git a/block/blk.h b/block/blk.h
index 0bd4b58bcbaf77..32ac41f7557fcc 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -363,27 +363,6 @@  int bdev_del_partition(struct block_device *bdev, int partno);
 int bdev_resize_partition(struct block_device *bdev, int partno,
 		sector_t start, sector_t length);
 int disk_expand_part_tbl(struct gendisk *disk, int target);
-int hd_ref_init(struct hd_struct *part);
-
-/* no need to get/put refcount of part0 */
-static inline int hd_struct_try_get(struct hd_struct *part)
-{
-	if (part->partno)
-		return percpu_ref_tryget_live(&part->ref);
-	return 1;
-}
-
-static inline void hd_struct_put(struct hd_struct *part)
-{
-	if (part->partno)
-		percpu_ref_put(&part->ref);
-}
-
-static inline void hd_free_part(struct hd_struct *part)
-{
-	bdput(part->bdev);
-	percpu_ref_exit(&part->ref);
-}
 
 int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
diff --git a/block/genhd.c b/block/genhd.c
index ae312ccc6dd7c0..eaa5d65ae826b8 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -42,7 +42,7 @@  static void disk_release_events(struct gendisk *disk);
 
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
-	struct block_device *bdev = disk->part0.bdev;
+	struct block_device *bdev = disk->part0;
 
 	spin_lock(&bdev->bd_size_lock);
 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
@@ -310,9 +310,7 @@  static inline int sector_in_part(struct hd_struct *part, sector_t sector)
  * primarily used for stats accounting.
  *
  * CONTEXT:
- * RCU read locked.  The returned partition pointer is always valid
- * because its refcount is grabbed except for part0, which lifetime
- * is same with the disk.
+ * RCU read locked.
  *
  * RETURNS:
  * Found partition on success, part0 is returned if no partition matches
@@ -328,26 +326,19 @@  struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 	ptbl = rcu_dereference(disk->part_tbl);
 
 	part = rcu_dereference(ptbl->last_lookup);
-	if (part && sector_in_part(part, sector) && hd_struct_try_get(part))
+	if (part && sector_in_part(part, sector))
 		goto out_unlock;
 
 	for (i = 1; i < ptbl->len; i++) {
 		part = rcu_dereference(ptbl->part[i]);
 
 		if (part && sector_in_part(part, sector)) {
-			/*
-			 * only live partition can be cached for lookup,
-			 * so use-after-free on cached & deleting partition
-			 * can be avoided
-			 */
-			if (!hd_struct_try_get(part))
-				break;
 			rcu_assign_pointer(ptbl->last_lookup, part);
 			goto out_unlock;
 		}
 	}
 
-	part = &disk->part0;
+	part = disk->part0->bd_part;
 out_unlock:
 	rcu_read_unlock();
 	return part;
@@ -673,8 +664,8 @@  static void register_disk(struct device *parent, struct gendisk *disk,
 	 */
 	pm_runtime_set_memalloc_noio(ddev, true);
 
-	disk->part0.bdev->bd_holder_dir =
-			kobject_create_and_add("holders", &ddev->kobj);
+	disk->part0->bd_holder_dir =
+		kobject_create_and_add("holders", &ddev->kobj);
 	disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
 	if (disk->flags & GENHD_FL_HIDDEN) {
@@ -740,7 +731,7 @@  static void __device_add_disk(struct device *parent, struct gendisk *disk,
 
 	disk->flags |= GENHD_FL_UP;
 
-	retval = blk_alloc_devt(&disk->part0, &devt);
+	retval = blk_alloc_devt(disk->part0->bd_part, &devt);
 	if (retval) {
 		WARN_ON(1);
 		return;
@@ -767,7 +758,7 @@  static void __device_add_disk(struct device *parent, struct gendisk *disk,
 		ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt));
 		WARN_ON(ret);
 		bdi_set_owner(bdi, dev);
-		bdev_add(disk->part0.bdev, devt);
+		bdev_add(disk->part0, devt);
 	}
 	register_disk(parent, disk, groups);
 	if (register_queue)
@@ -880,11 +871,11 @@  void del_gendisk(struct gendisk *disk)
 
 	blk_unregister_queue(disk);
 
-	kobject_put(disk->part0.bdev->bd_holder_dir);
+	kobject_put(disk->part0->bd_holder_dir);
 	kobject_put(disk->slave_dir);
 
-	part_stat_set_all(&disk->part0, 0);
-	disk->part0.bdev->bd_stamp = 0;
+	part_stat_set_all(disk->part0->bd_part, 0);
+	disk->part0->bd_stamp = 0;
 	if (!sysfs_deprecated)
 		sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 	pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
@@ -997,7 +988,7 @@  void __init printk_all_partitions(void)
 		 */
 		disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 		while ((part = disk_part_iter_next(&piter))) {
-			bool is_part0 = part == &disk->part0;
+			bool is_part0 = part == disk->part0->bd_part;
 
 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
 			       bdevt_str(part_devt(part), devt_buf),
@@ -1452,7 +1443,7 @@  static void disk_release(struct device *dev)
 	disk_release_events(disk);
 	kfree(disk->random);
 	disk_replace_part_tbl(disk, NULL);
-	hd_free_part(&disk->part0);
+	bdput(disk->part0);
 	if (disk->queue)
 		blk_put_queue(disk->queue);
 	kfree(disk);
@@ -1618,8 +1609,8 @@  struct gendisk *__alloc_disk_node(int minors, int node_id)
 	if (!disk)
 		return NULL;
 
-	disk->part0.bdev = bdev_alloc(disk, 0);
-	if (!disk->part0.bdev)
+	disk->part0 = bdev_alloc(disk, 0);
+	if (!disk->part0)
 		goto out_free_disk;
 
 	disk->node_id = node_id;
@@ -1627,10 +1618,7 @@  struct gendisk *__alloc_disk_node(int minors, int node_id)
 		goto out_bdput;
 
 	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
-	rcu_assign_pointer(ptbl->part[0], &disk->part0);
-
-	if (hd_ref_init(&disk->part0))
-		goto out_bdput;
+	rcu_assign_pointer(ptbl->part[0], disk->part0->bd_part);
 
 	disk->minors = minors;
 	rand_initialize_disk(disk);
@@ -1640,7 +1628,7 @@  struct gendisk *__alloc_disk_node(int minors, int node_id)
 	return disk;
 
 out_bdput:
-	bdput(disk->part0.bdev);
+	bdput(disk->part0);
 out_free_disk:
 	kfree(disk);
 	return NULL;
@@ -1679,9 +1667,9 @@  void set_disk_ro(struct gendisk *disk, int flag)
 	struct disk_part_iter piter;
 	struct hd_struct *part;
 
-	if (disk->part0.bdev->bd_read_only != flag) {
+	if (disk->part0->bd_read_only != flag) {
 		set_disk_ro_uevent(disk, flag);
-		disk->part0.bdev->bd_read_only = flag;
+		disk->part0->bd_read_only = flag;
 	}
 
 	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 060c1be13cd8da..6d1fca193cbd6f 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -265,9 +265,9 @@  static const struct attribute_group *part_attr_groups[] = {
 static void part_release(struct device *dev)
 {
 	struct hd_struct *p = dev_to_part(dev);
+
 	blk_free_devt(dev->devt);
-	hd_free_part(p);
-	kfree(p);
+	bdput(p->bdev);
 }
 
 static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
@@ -288,46 +288,6 @@  struct device_type part_type = {
 	.uevent		= part_uevent,
 };
 
-static void hd_struct_free_work(struct work_struct *work)
-{
-	struct hd_struct *part =
-		container_of(to_rcu_work(work), struct hd_struct, rcu_work);
-	struct gendisk *disk = part_to_disk(part);
-
-	/*
-	 * Release the disk reference acquired in delete_partition here.
-	 * We can't release it in hd_struct_free because the final put_device
-	 * needs process context and thus can't be run directly from a
-	 * percpu_ref ->release handler.
-	 */
-	put_device(disk_to_dev(disk));
-
-	part->bdev->bd_start_sect = 0;
-	bdev_set_nr_sectors(part->bdev, 0);
-	part_stat_set_all(part, 0);
-	put_device(part_to_dev(part));
-}
-
-static void hd_struct_free(struct percpu_ref *ref)
-{
-	struct hd_struct *part = container_of(ref, struct hd_struct, ref);
-	struct gendisk *disk = part_to_disk(part);
-	struct disk_part_tbl *ptbl =
-		rcu_dereference_protected(disk->part_tbl, 1);
-
-	rcu_assign_pointer(ptbl->last_lookup, NULL);
-
-	INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work);
-	queue_rcu_work(system_wq, &part->rcu_work);
-}
-
-int hd_ref_init(struct hd_struct *part)
-{
-	if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL))
-		return -ENOMEM;
-	return 0;
-}
-
 /*
  * Must be called either with bd_mutex held, before a disk can be opened or
  * after all disk users are gone.
@@ -342,8 +302,8 @@  void delete_partition(struct hd_struct *part)
 	 * ->part_tbl is referenced in this part's release handler, so
 	 *  we have to hold the disk device
 	 */
-	get_device(disk_to_dev(disk));
 	rcu_assign_pointer(ptbl->part[part->partno], NULL);
+	rcu_assign_pointer(ptbl->last_lookup, NULL);
 	kobject_put(part->bdev->bd_holder_dir);
 	device_del(part_to_dev(part));
 
@@ -353,7 +313,7 @@  void delete_partition(struct hd_struct *part)
 	 */
 	remove_inode_hash(part->bdev->bd_inode);
 
-	percpu_ref_kill(&part->ref);
+	put_device(part_to_dev(part));
 }
 
 static ssize_t whole_disk_show(struct device *dev,
@@ -406,15 +366,11 @@  static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 	if (ptbl->part[partno])
 		return ERR_PTR(-EBUSY);
 
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return ERR_PTR(-EBUSY);
-
 	bdev = bdev_alloc(disk, partno);
 	if (!bdev)
-		goto out_free;
-	p->bdev = bdev;
+		return ERR_PTR(-ENOMEM);
 
+	p = bdev->bd_part;
 	pdev = part_to_dev(p);
 
 	bdev->bd_start_sect = start;
@@ -463,13 +419,6 @@  static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 			goto out_del;
 	}
 
-	err = hd_ref_init(p);
-	if (err) {
-		if (flags & ADDPART_FLAG_WHOLEDISK)
-			goto out_remove_file;
-		goto out_del;
-	}
-
 	/* everything is up and running, commence */
 	bdev_add(bdev, devt);
 	rcu_assign_pointer(ptbl->part[partno], p);
@@ -481,11 +430,7 @@  static struct hd_struct *add_partition(struct gendisk *disk, int partno,
 
 out_bdput:
 	bdput(bdev);
-out_free:
-	kfree(p);
 	return ERR_PTR(err);
-out_remove_file:
-	device_remove_file(pdev, &dev_attr_whole_disk);
 out_del:
 	kobject_put(bdev->bd_holder_dir);
 	device_del(pdev);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index dc333dbe523281..9e5c2fdfda3629 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2802,7 +2802,7 @@  bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
 	if (c_min_rate == 0)
 		return false;
 
-	curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
+	curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) -
 			atomic_read(&device->rs_sect_ev);
 
 	if (atomic_read(&device->ap_actlog_cnt)
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index ba56f3f05312f0..343f56b86bb766 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1678,7 +1678,8 @@  void drbd_rs_controller_reset(struct drbd_device *device)
 	atomic_set(&device->rs_sect_in, 0);
 	atomic_set(&device->rs_sect_ev, 0);
 	device->rs_in_flight = 0;
-	device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors);
+	device->rs_last_events =
+		(int)part_stat_read_accum(disk->part0->bd_part, sectors);
 
 	/* Updating the RCU protected object in place is necessary since
 	   this function gets called from atomic context.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b5f68951c9d280..6d84876a9cd0ae 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1687,7 +1687,7 @@  static void zram_reset_device(struct zram *zram)
 	zram->disksize = 0;
 
 	set_capacity_and_notify(zram->disk, 0);
-	part_stat_set_all(&zram->disk->part0, 0);
+	part_stat_set_all(zram->disk->part0->bd_part, 0);
 
 	up_write(&zram->init_lock);
 	/* I/O operation under all of CPU are done so let's free */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 48051db006f30c..1b2db4d530ea71 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1607,7 +1607,7 @@  static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 				 * (by eliminating DM's splitting and just using bio_split)
 				 */
 				part_stat_lock();
-				__dm_part_stat_sub(&dm_disk(md)->part0,
+				__dm_part_stat_sub(dm_disk(md)->part0->bd_part,
 						   sectors[op_stat_group(bio_op(bio))], ci.sector_count);
 				part_stat_unlock();
 
@@ -2242,7 +2242,7 @@  EXPORT_SYMBOL_GPL(dm_put);
 static bool md_in_flight_bios(struct mapped_device *md)
 {
 	int cpu;
-	struct hd_struct *part = &dm_disk(md)->part0;
+	struct hd_struct *part = dm_disk(md)->part0->bd_part;
 	long sum = 0;
 
 	for_each_possible_cpu(cpu) {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7ce6047c856ea2..3696c2d77a4dd7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8441,7 +8441,7 @@  static int is_mddev_idle(struct mddev *mddev, int init)
 	rcu_read_lock();
 	rdev_for_each_rcu(rdev, mddev) {
 		struct gendisk *disk = rdev->bdev->bd_disk;
-		curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
+		curr_events = (int)part_stat_read_accum(disk->part0->bd_part, sectors) -
 			      atomic_read(&disk->sync_io);
 		/* sync IO will cause sync_io to increase before the disk_stats
 		 * as sync_io is counted when a request starts, and
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 381c22426f435b..61cf33b6284feb 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -39,6 +39,7 @@ 
 
 struct bdev_inode {
 	struct block_device bdev;
+	struct hd_struct hd;
 	struct inode vfs_inode;
 };
 
@@ -886,6 +887,9 @@  struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 		iput(inode);
 		return NULL;
 	}
+	bdev->bd_part = &BDEV_I(inode)->hd;
+	memset(bdev->bd_part, 0, sizeof(*bdev->bd_part));
+	bdev->bd_part->bdev = bdev;
 	return bdev;
 }
 
@@ -1280,15 +1284,10 @@  EXPORT_SYMBOL_GPL(bdev_disk_changed);
 static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	int ret;
+	int ret = 0;
 
 	if (!bdev->bd_openers) {
 		if (!bdev_is_partition(bdev)) {
-			ret = -ENXIO;
-			bdev->bd_part = disk_get_part(disk, 0);
-			if (!bdev->bd_part)
-				goto out_clear;
-
 			ret = 0;
 			if (disk->fops->open)
 				ret = disk->fops->open(bdev, mode);
@@ -1307,7 +1306,7 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
 
 			if (ret)
-				goto out_clear;
+				return ret;
 		} else {
 			struct block_device *whole = bdget_disk(disk, 0);
 
@@ -1316,18 +1315,16 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 			if (ret) {
 				mutex_unlock(&whole->bd_mutex);
 				bdput(whole);
-				goto out_clear;
+				return ret;
 			}
 			whole->bd_part_count++;
 			mutex_unlock(&whole->bd_mutex);
 
-			bdev->bd_part = disk_get_part(disk, bdev->bd_partno);
 			if (!(disk->flags & GENHD_FL_UP) ||
-			    !bdev->bd_part || !bdev_nr_sectors(bdev)) {
+			    !bdev_nr_sectors(bdev)) {
 				__blkdev_put(whole, mode, 1);
 				bdput(whole);
-				ret = -ENXIO;
-				goto out_clear;
+				return -ENXIO;
 			}
 			set_init_blocksize(bdev);
 		}
@@ -1336,7 +1333,6 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
 	} else {
 		if (!bdev_is_partition(bdev)) {
-			ret = 0;
 			if (bdev->bd_disk->fops->open)
 				ret = bdev->bd_disk->fops->open(bdev, mode);
 			/* the same as first opener case, read comment there */
@@ -1349,11 +1345,6 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode)
 	}
 	bdev->bd_openers++;
 	return 0;
-
- out_clear:
-	disk_put_part(bdev->bd_part);
-	bdev->bd_part = NULL;
-	return ret;
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
@@ -1580,18 +1571,12 @@  static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		sync_blockdev(bdev);
 		kill_bdev(bdev);
 		bdev_write_inode(bdev);
-
-		if (!bdev_is_partition(bdev) && disk->fops->release)
-			disk->fops->release(disk, mode);
-
-		disk_put_part(bdev->bd_part);
-		bdev->bd_part = NULL;
 		if (bdev_is_partition(bdev))
 			victim = bdev_whole(bdev);
-	} else {
-		if (!bdev_is_partition(bdev) && disk->fops->release)
-			disk->fops->release(disk, mode);
 	}
+
+	if (!bdev_is_partition(bdev) && disk->fops->release)
+		disk->fops->release(disk, mode);
 	mutex_unlock(&bdev->bd_mutex);
 	if (victim) {
 		__blkdev_put(victim, mode, 1);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 758cf71c9aa2a6..6edea5c1625909 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -59,7 +59,7 @@  struct block_device {
 } __randomize_layout;
 
 #define bdev_whole(_bdev) \
-	((_bdev)->bd_disk->part0.bdev)
+	((_bdev)->bd_disk->part0)
 
 #define bdev_kobj(_bdev) \
 	(&part_to_dev((_bdev)->bd_part)->kobj)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index dcbf9ef7610ea6..df7319da013ccf 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -19,11 +19,12 @@ 
 #include <linux/blk_types.h>
 #include <asm/local.h>
 
-#define dev_to_disk(device)	container_of((device), struct gendisk, part0.__dev)
 #define dev_to_part(device)	container_of((device), struct hd_struct, __dev)
-#define disk_to_dev(disk)	(&(disk)->part0.__dev)
 #define part_to_dev(part)	(&((part)->__dev))
 
+#define dev_to_disk(device)	(dev_to_part(device)->bdev->bd_disk)
+#define disk_to_dev(disk)	(part_to_dev((disk)->part0->bd_part))
+
 extern const struct device_type disk_type;
 extern struct device_type part_type;
 extern struct class block_class;
@@ -51,12 +52,9 @@  struct partition_meta_info {
 };
 
 struct hd_struct {
-	struct percpu_ref ref;
-
 	struct block_device *bdev;
 	struct device __dev;
 	int partno;
-	struct rcu_work rcu_work;
 };
 
 /**
@@ -168,7 +166,7 @@  struct gendisk {
 	 * helpers.
 	 */
 	struct disk_part_tbl __rcu *part_tbl;
-	struct hd_struct part0;
+	struct block_device *part0;
 
 	const struct block_device_operations *fops;
 	struct request_queue *queue;
@@ -278,7 +276,7 @@  extern void set_disk_ro(struct gendisk *disk, int flag);
 
 static inline int get_disk_ro(struct gendisk *disk)
 {
-	return disk->part0.bdev->bd_read_only;
+	return disk->part0->bd_read_only;
 }
 
 extern void disk_block_events(struct gendisk *disk);
@@ -302,7 +300,7 @@  static inline sector_t bdev_nr_sectors(struct block_device *bdev)
 
 static inline sector_t get_capacity(struct gendisk *disk)
 {
-	return bdev_nr_sectors(disk->part0.bdev);
+	return bdev_nr_sectors(disk->part0);
 }
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate);
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index 87ad60106e1db0..680de036691ef9 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -59,8 +59,8 @@  static inline void part_stat_set_all(struct hd_struct *part, int value)
 #define part_stat_add(part, field, addnd)	do {			\
 	__part_stat_add((part), field, addnd);				\
 	if ((part)->partno)						\
-		__part_stat_add(&part_to_disk((part))->part0,		\
-				field, addnd);				\
+		__part_stat_add(part_to_disk((part))->part0->bd_part,	\
+			field, addnd); \
 } while (0)
 
 #define part_stat_dec(part, field)					\