diff mbox series

[v3,2/3] block: introduce new field bd_flags in block_device

Message ID 20231122103103.1104589-3-yukuai1@huaweicloud.com (mailing list archive)
State New, archived
Headers show
Series block: warn once for each partition in bio_check_ro() | expand

Commit Message

Yu Kuai Nov. 22, 2023, 10:31 a.m. UTC
From: Yu Kuai <yukuai3@huawei.com>

There are multiple switches in struct block_device, use separate bool
fields for them is not gracefully. Add a new field bd_flags and replace
swithes to a bit, there are no functional changes, and prepare to add
a new switch in the next patch. In order to keep bd_flags in the first
cacheline and prevent layout to be affected, define it as u16.

Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/bdev.c              | 15 ++++++++-------
 block/blk-core.c          |  7 ++++---
 block/genhd.c             | 15 +++++++++++----
 block/ioctl.c             |  6 +++++-
 include/linux/blk_types.h | 27 +++++++++++++++++++++------
 include/linux/blkdev.h    |  5 +++--
 6 files changed, 52 insertions(+), 23 deletions(-)

Comments

Ming Lei Nov. 22, 2023, 3:30 a.m. UTC | #1
On Wed, Nov 22, 2023 at 06:31:02PM +0800, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> There are multiple switches in struct block_device, use separate bool
> fields for them is not gracefully. Add a new field bd_flags and replace
> swithes to a bit, there are no functional changes, and prepare to add
> a new switch in the next patch. In order to keep bd_flags in the first
> cacheline and prevent layout to be affected, define it as u16.
> 
> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/bdev.c              | 15 ++++++++-------
>  block/blk-core.c          |  7 ++++---
>  block/genhd.c             | 15 +++++++++++----
>  block/ioctl.c             |  6 +++++-
>  include/linux/blk_types.h | 27 +++++++++++++++++++++------
>  include/linux/blkdev.h    |  5 +++--
>  6 files changed, 52 insertions(+), 23 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index e4cfb7adb645..10f524a7416c 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>  	bdev->bd_partno = partno;
>  	bdev->bd_inode = inode;
>  	bdev->bd_queue = disk->queue;
> -	if (partno)
> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> -		bdev->bd_has_submit_bio = false;
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	bdev->bd_stats = alloc_percpu(struct disk_stats);
>  	if (!bdev->bd_stats) {
>  		iput(inode);
> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  		bdev->bd_holder = NULL;
>  		bdev->bd_holder_ops = NULL;
>  		mutex_unlock(&bdev->bd_holder_lock);
> -		if (bdev->bd_write_holder)
> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>  			unblock = true;
>  	}
>  	if (!whole->bd_holders)
> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  	 */
>  	if (unblock) {
>  		disk_unblock_events(bdev->bd_disk);
> -		bdev->bd_write_holder = false;
> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  	}
>  }
>  
> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
>  		 * writeable reference is too fragile given the way @mode is
>  		 * used in blkdev_get/put().
>  		 */
> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
> +		if ((mode & BLK_OPEN_WRITE) &&
> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>  		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
> -			bdev->bd_write_holder = true;
> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  			unblock_events = false;
>  		}
>  	}
> diff --git a/block/blk-core.c b/block/blk-core.c
> index fdf25b8d6e78..f9f8b12ba626 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -482,7 +482,8 @@ __setup("fail_make_request=", setup_fail_make_request);
>  
>  bool should_fail_request(struct block_device *part, unsigned int bytes)
>  {
> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
> +		should_fail(&fail_make_request, bytes);
>  }
>  
>  static int __init fail_make_request_debugfs(void)
> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>  	if (unlikely(!blk_crypto_bio_prep(&bio)))
>  		return;
>  
> -	if (!bio->bi_bdev->bd_has_submit_bio) {
> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>  		blk_mq_submit_bio(bio);
>  	} else if (likely(bio_queue_enter(bio) == 0)) {
>  		struct gendisk *disk = bio->bi_bdev->bd_disk;
> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>  	 */
>  	if (current->bio_list)
>  		bio_list_add(&current->bio_list[0], bio);
> -	else if (!bio->bi_bdev->bd_has_submit_bio)
> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>  		__submit_bio_noacct_mq(bio);
>  	else
>  		__submit_bio_noacct(bio);
> diff --git a/block/genhd.c b/block/genhd.c
> index c9d06f72c587..57f96c0c8da0 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
>  	elevator_init_mq(disk->queue);
>  
>  	/* Mark bdev as having a submit_bio, if needed */
> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
> +	if (disk->fops->submit_bio)
> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>  
>  	/*
>  	 * If the driver provides an explicit major number it also must provide
> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>  ssize_t part_fail_show(struct device *dev,
>  		       struct device_attribute *attr, char *buf)
>  {
> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
> +	return sprintf(buf, "%d\n",
> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>  }
>  
>  ssize_t part_fail_store(struct device *dev,
> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>  {
>  	int i;
>  
> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
> -		dev_to_bdev(dev)->bd_make_it_fail = i;
> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
> +		if (!i)
> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +		else
> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +
> +	}
>  
>  	return count;
>  }
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4160f4e6bd5b..a64440f4c96b 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>  		if (ret)
>  			return ret;
>  	}
> -	bdev->bd_read_only = n;
> +
> +	if (!n)
> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
> +	else
> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>  	return 0;
>  }
>  
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index f7d40692dd94..de652045111b 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>  #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>  #define SECTOR_MASK		(PAGE_SECTORS - 1)
>  
> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3
> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;
>  	u8			bd_partno;
> -	bool			bd_write_holder;
> -	bool			bd_has_submit_bio;
>  	dev_t			bd_dev;
>  	struct inode		*bd_inode;	/* will die */
>  
> @@ -67,9 +70,6 @@ struct block_device {
>  	struct super_block	*bd_fsfreeze_sb;
>  
>  	struct partition_meta_info *bd_meta_info;
> -#ifdef CONFIG_FAIL_MAKE_REQUEST
> -	bool			bd_make_it_fail;
> -#endif
>  	/*
>  	 * keep this out-of-line as it's both big and not needed in the fast
>  	 * path
> @@ -86,6 +86,21 @@ struct block_device {
>  #define bdev_kobj(_bdev) \
>  	(&((_bdev)->bd_device.kobj))
>  
> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
> +{
> +	return bdev->bd_flags & (1U << bit);
> +}
> +
> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags |= (1U << bit);
> +}
> +
> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags &= ~(1U << bit);
> +}

'U' becomes incorrect after .bd_flags is changed to 'unsigned short'.


Thanks,
Ming
Michael Kelley Nov. 22, 2023, 3:52 a.m. UTC | #2
From: Yu Kuai <yukuai1@huaweicloud.com> Sent: Wednesday, November 22, 2023 2:31 AM
> 
> There are multiple switches in struct block_device, use separate bool
> fields for them is not gracefully. Add a new field bd_flags and replace
> swithes to a bit, there are no functional changes, and prepare to add
> a new switch in the next patch. In order to keep bd_flags in the first
> cacheline and prevent layout to be affected, define it as u16.
> 
> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/bdev.c              | 15 ++++++++-------
>  block/blk-core.c          |  7 ++++---
>  block/genhd.c             | 15 +++++++++++----
>  block/ioctl.c             |  6 +++++-
>  include/linux/blk_types.h | 27 +++++++++++++++++++++------
>  include/linux/blkdev.h    |  5 +++--
>  6 files changed, 52 insertions(+), 23 deletions(-)
> 
> diff --git a/block/bdev.c b/block/bdev.c
> index e4cfb7adb645..10f524a7416c 100644
> --- a/block/bdev.c
> +++ b/block/bdev.c
> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk
> *disk, u8 partno)
>  	bdev->bd_partno = partno;
>  	bdev->bd_inode = inode;
>  	bdev->bd_queue = disk->queue;
> -	if (partno)
> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> -		bdev->bd_has_submit_bio = false;
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	bdev->bd_stats = alloc_percpu(struct disk_stats);
>  	if (!bdev->bd_stats) {
>  		iput(inode);
> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  		bdev->bd_holder = NULL;
>  		bdev->bd_holder_ops = NULL;
>  		mutex_unlock(&bdev->bd_holder_lock);
> -		if (bdev->bd_write_holder)
> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>  			unblock = true;
>  	}
>  	if (!whole->bd_holders)
> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>  	 */
>  	if (unblock) {
>  		disk_unblock_events(bdev->bd_disk);
> -		bdev->bd_write_holder = false;
> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  	}
>  }
> 
> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev,
> blk_mode_t mode, void *holder,
>  		 * writeable reference is too fragile given the way @mode is
>  		 * used in blkdev_get/put().
>  		 */
> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
> +		if ((mode & BLK_OPEN_WRITE) &&
> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>  		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
> -			bdev->bd_write_holder = true;
> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>  			unblock_events = false;
>  		}
>  	}
> diff --git a/block/blk-core.c b/block/blk-core.c
> index fdf25b8d6e78..f9f8b12ba626 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -482,7 +482,8 @@ __setup("fail_make_request=",
> setup_fail_make_request);
> 
>  bool should_fail_request(struct block_device *part, unsigned int bytes)
>  {
> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
> +		should_fail(&fail_make_request, bytes);
>  }
> 
>  static int __init fail_make_request_debugfs(void)
> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>  	if (unlikely(!blk_crypto_bio_prep(&bio)))
>  		return;
> 
> -	if (!bio->bi_bdev->bd_has_submit_bio) {
> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>  		blk_mq_submit_bio(bio);
>  	} else if (likely(bio_queue_enter(bio) == 0)) {
>  		struct gendisk *disk = bio->bi_bdev->bd_disk;
> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>  	 */
>  	if (current->bio_list)
>  		bio_list_add(&current->bio_list[0], bio);
> -	else if (!bio->bi_bdev->bd_has_submit_bio)
> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>  		__submit_bio_noacct_mq(bio);
>  	else
>  		__submit_bio_noacct(bio);
> diff --git a/block/genhd.c b/block/genhd.c
> index c9d06f72c587..57f96c0c8da0 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device
> *parent, struct gendisk *disk,
>  	elevator_init_mq(disk->queue);
> 
>  	/* Mark bdev as having a submit_bio, if needed */
> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
> +	if (disk->fops->submit_bio)
> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
> 
>  	/*
>  	 * If the driver provides an explicit major number it also must provide
> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>  ssize_t part_fail_show(struct device *dev,
>  		       struct device_attribute *attr, char *buf)
>  {
> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
> +	return sprintf(buf, "%d\n",
> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>  }
> 
>  ssize_t part_fail_store(struct device *dev,
> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>  {
>  	int i;
> 
> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
> -		dev_to_bdev(dev)->bd_make_it_fail = i;
> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
> +		if (!i)
> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +		else
> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
> +
> +	}
> 
>  	return count;
>  }
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4160f4e6bd5b..a64440f4c96b 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>  		if (ret)
>  			return ret;
>  	}
> -	bdev->bd_read_only = n;
> +
> +	if (!n)
> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
> +	else
> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>  	return 0;
>  }
> 
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index f7d40692dd94..de652045111b 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>  #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>  #define SECTOR_MASK		(PAGE_SECTORS - 1)
> 
> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3
> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;
>  	u8			bd_partno;
> -	bool			bd_write_holder;
> -	bool			bd_has_submit_bio;
>  	dev_t			bd_dev;
>  	struct inode		*bd_inode;	/* will die */
> 
> @@ -67,9 +70,6 @@ struct block_device {
>  	struct super_block	*bd_fsfreeze_sb;
> 
>  	struct partition_meta_info *bd_meta_info;
> -#ifdef CONFIG_FAIL_MAKE_REQUEST
> -	bool			bd_make_it_fail;
> -#endif
>  	/*
>  	 * keep this out-of-line as it's both big and not needed in the fast
>  	 * path
> @@ -86,6 +86,21 @@ struct block_device {
>  #define bdev_kobj(_bdev) \
>  	(&((_bdev)->bd_device.kobj))
> 
> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
> +{
> +	return bdev->bd_flags & (1U << bit);
> +}
> +
> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags |= (1U << bit);
> +}
> +
> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
> +{
> +	bdev->bd_flags &= ~(1U << bit);
> +}

It seems like there's atomicity problem with this approach.  In the above
code, setting and clearing a bd_flag is *not* atomic with respect to
setting/clearing some other bd_flag.  For example, setting/clearing
BD_FLAG_MAKE_IT_FAIL via the /sys interface could race with
setting/clearing BD_FLAG_READ_ONLY via an ioctl, and one of the
two changes could be lost.  This problem wouldn't happen when
each flag was a separate field.  Admittedly, the likelihood of a
problem with BD_FLAG_MAKE_IT_FAIL is extremely low, but
conceptually the lack of atomicity is still wrong.  There might
be a similar problem with BD_FLAG_WRITE_HOLDER, but I
didn't investigate thoroughly.

Michael

> +
>  /*
>   * Block error status values.  See block/blk-core:blk_errors for the details.
>   * Alpha cannot write a byte atomically, so we need to use 32-bit value.
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 51fa7ffdee83..fc1d55ef5107 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -741,13 +741,14 @@ void disk_uevent(struct gendisk *disk, enum
> kobject_action action);
> 
>  static inline int get_disk_ro(struct gendisk *disk)
>  {
> -	return disk->part0->bd_read_only ||
> +	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
>  		test_bit(GD_READ_ONLY, &disk->state);
>  }
> 
>  static inline int bdev_read_only(struct block_device *bdev)
>  {
> -	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
> +	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
> +		get_disk_ro(bdev->bd_disk);
>  }
> 
>  bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
> --
> 2.39.2
Yu Kuai Nov. 22, 2023, 6:15 a.m. UTC | #3
Hi,

在 2023/11/22 11:30, Ming Lei 写道:
> On Wed, Nov 22, 2023 at 06:31:02PM +0800, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> There are multiple switches in struct block_device, use separate bool
>> fields for them is not gracefully. Add a new field bd_flags and replace
>> swithes to a bit, there are no functional changes, and prepare to add
>> a new switch in the next patch. In order to keep bd_flags in the first
>> cacheline and prevent layout to be affected, define it as u16.
>>
>> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/bdev.c              | 15 ++++++++-------
>>   block/blk-core.c          |  7 ++++---
>>   block/genhd.c             | 15 +++++++++++----
>>   block/ioctl.c             |  6 +++++-
>>   include/linux/blk_types.h | 27 +++++++++++++++++++++------
>>   include/linux/blkdev.h    |  5 +++--
>>   6 files changed, 52 insertions(+), 23 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e4cfb7adb645..10f524a7416c 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
>>   	bdev->bd_partno = partno;
>>   	bdev->bd_inode = inode;
>>   	bdev->bd_queue = disk->queue;
>> -	if (partno)
>> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
>> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
>> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	else
>> -		bdev->bd_has_submit_bio = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	bdev->bd_stats = alloc_percpu(struct disk_stats);
>>   	if (!bdev->bd_stats) {
>>   		iput(inode);
>> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   		bdev->bd_holder = NULL;
>>   		bdev->bd_holder_ops = NULL;
>>   		mutex_unlock(&bdev->bd_holder_lock);
>> -		if (bdev->bd_write_holder)
>> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>>   			unblock = true;
>>   	}
>>   	if (!whole->bd_holders)
>> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   	 */
>>   	if (unblock) {
>>   		disk_unblock_events(bdev->bd_disk);
>> -		bdev->bd_write_holder = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   	}
>>   }
>>   
>> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
>>   		 * writeable reference is too fragile given the way @mode is
>>   		 * used in blkdev_get/put().
>>   		 */
>> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
>> +		if ((mode & BLK_OPEN_WRITE) &&
>> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>>   		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
>> -			bdev->bd_write_holder = true;
>> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   			unblock_events = false;
>>   		}
>>   	}
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index fdf25b8d6e78..f9f8b12ba626 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -482,7 +482,8 @@ __setup("fail_make_request=", setup_fail_make_request);
>>   
>>   bool should_fail_request(struct block_device *part, unsigned int bytes)
>>   {
>> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
>> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
>> +		should_fail(&fail_make_request, bytes);
>>   }
>>   
>>   static int __init fail_make_request_debugfs(void)
>> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>>   	if (unlikely(!blk_crypto_bio_prep(&bio)))
>>   		return;
>>   
>> -	if (!bio->bi_bdev->bd_has_submit_bio) {
>> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>>   		blk_mq_submit_bio(bio);
>>   	} else if (likely(bio_queue_enter(bio) == 0)) {
>>   		struct gendisk *disk = bio->bi_bdev->bd_disk;
>> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>>   	 */
>>   	if (current->bio_list)
>>   		bio_list_add(&current->bio_list[0], bio);
>> -	else if (!bio->bi_bdev->bd_has_submit_bio)
>> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>>   		__submit_bio_noacct_mq(bio);
>>   	else
>>   		__submit_bio_noacct(bio);
>> diff --git a/block/genhd.c b/block/genhd.c
>> index c9d06f72c587..57f96c0c8da0 100644
>> --- a/block/genhd.c
>> +++ b/block/genhd.c
>> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
>>   	elevator_init_mq(disk->queue);
>>   
>>   	/* Mark bdev as having a submit_bio, if needed */
>> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
>> +	if (disk->fops->submit_bio)
>> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>>   
>>   	/*
>>   	 * If the driver provides an explicit major number it also must provide
>> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>>   ssize_t part_fail_show(struct device *dev,
>>   		       struct device_attribute *attr, char *buf)
>>   {
>> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
>> +	return sprintf(buf, "%d\n",
>> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>>   }
>>   
>>   ssize_t part_fail_store(struct device *dev,
>> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>>   {
>>   	int i;
>>   
>> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
>> -		dev_to_bdev(dev)->bd_make_it_fail = i;
>> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
>> +		if (!i)
>> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +		else
>> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +
>> +	}
>>   
>>   	return count;
>>   }
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 4160f4e6bd5b..a64440f4c96b 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>>   		if (ret)
>>   			return ret;
>>   	}
>> -	bdev->bd_read_only = n;
>> +
>> +	if (!n)
>> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
>> +	else
>> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>>   	return 0;
>>   }
>>   
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index f7d40692dd94..de652045111b 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>>   #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>>   #define SECTOR_MASK		(PAGE_SECTORS - 1)
>>   
>> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
>> +#define BD_FLAG_WRITE_HOLDER	1
>> +#define BD_FLAG_HAS_SUBMIT_BIO	2
>> +#define BD_FLAG_MAKE_IT_FAIL	3
>> +
>>   struct block_device {
>>   	sector_t		bd_start_sect;
>>   	sector_t		bd_nr_sectors;
>> @@ -44,10 +49,8 @@ struct block_device {
>>   	struct request_queue *	bd_queue;
>>   	struct disk_stats __percpu *bd_stats;
>>   	unsigned long		bd_stamp;
>> -	bool			bd_read_only;	/* read-only policy */
>> +	unsigned short		bd_flags;
>>   	u8			bd_partno;
>> -	bool			bd_write_holder;
>> -	bool			bd_has_submit_bio;
>>   	dev_t			bd_dev;
>>   	struct inode		*bd_inode;	/* will die */
>>   
>> @@ -67,9 +70,6 @@ struct block_device {
>>   	struct super_block	*bd_fsfreeze_sb;
>>   
>>   	struct partition_meta_info *bd_meta_info;
>> -#ifdef CONFIG_FAIL_MAKE_REQUEST
>> -	bool			bd_make_it_fail;
>> -#endif
>>   	/*
>>   	 * keep this out-of-line as it's both big and not needed in the fast
>>   	 * path
>> @@ -86,6 +86,21 @@ struct block_device {
>>   #define bdev_kobj(_bdev) \
>>   	(&((_bdev)->bd_device.kobj))
>>   
>> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
>> +{
>> +	return bdev->bd_flags & (1U << bit);
>> +}
>> +
>> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags |= (1U << bit);
>> +}
>> +
>> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags &= ~(1U << bit);
>> +}
> 
> 'U' becomes incorrect after .bd_flags is changed to 'unsigned short'.

Currently, bi_flags is unsigned short, and this is the same as
bio_set/clear_flag(), will this behaviour cause any problems?

Thanks,
Kuai

> 
> 
> Thanks,
> Ming
> 
> .
>
Yu Kuai Nov. 22, 2023, 7:06 a.m. UTC | #4
Hi,

在 2023/11/22 11:52, Michael Kelley 写道:
> From: Yu Kuai <yukuai1@huaweicloud.com> Sent: Wednesday, November 22, 2023 2:31 AM
>>
>> There are multiple switches in struct block_device, use separate bool
>> fields for them is not gracefully. Add a new field bd_flags and replace
>> swithes to a bit, there are no functional changes, and prepare to add
>> a new switch in the next patch. In order to keep bd_flags in the first
>> cacheline and prevent layout to be affected, define it as u16.
>>
>> Also add new helpers to set/clear/test each bit like 'bio->bi_flags'.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/bdev.c              | 15 ++++++++-------
>>   block/blk-core.c          |  7 ++++---
>>   block/genhd.c             | 15 +++++++++++----
>>   block/ioctl.c             |  6 +++++-
>>   include/linux/blk_types.h | 27 +++++++++++++++++++++------
>>   include/linux/blkdev.h    |  5 +++--
>>   6 files changed, 52 insertions(+), 23 deletions(-)
>>
>> diff --git a/block/bdev.c b/block/bdev.c
>> index e4cfb7adb645..10f524a7416c 100644
>> --- a/block/bdev.c
>> +++ b/block/bdev.c
>> @@ -402,10 +402,10 @@ struct block_device *bdev_alloc(struct gendisk
>> *disk, u8 partno)
>>   	bdev->bd_partno = partno;
>>   	bdev->bd_inode = inode;
>>   	bdev->bd_queue = disk->queue;
>> -	if (partno)
>> -		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
>> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
>> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	else
>> -		bdev->bd_has_submit_bio = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>>   	bdev->bd_stats = alloc_percpu(struct disk_stats);
>>   	if (!bdev->bd_stats) {
>>   		iput(inode);
>> @@ -606,7 +606,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   		bdev->bd_holder = NULL;
>>   		bdev->bd_holder_ops = NULL;
>>   		mutex_unlock(&bdev->bd_holder_lock);
>> -		if (bdev->bd_write_holder)
>> +		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
>>   			unblock = true;
>>   	}
>>   	if (!whole->bd_holders)
>> @@ -619,7 +619,7 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
>>   	 */
>>   	if (unblock) {
>>   		disk_unblock_events(bdev->bd_disk);
>> -		bdev->bd_write_holder = false;
>> +		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   	}
>>   }
>>
>> @@ -805,9 +805,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev,
>> blk_mode_t mode, void *holder,
>>   		 * writeable reference is too fragile given the way @mode is
>>   		 * used in blkdev_get/put().
>>   		 */
>> -		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
>> +		if ((mode & BLK_OPEN_WRITE) &&
>> +		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
>>   		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
>> -			bdev->bd_write_holder = true;
>> +			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
>>   			unblock_events = false;
>>   		}
>>   	}
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index fdf25b8d6e78..f9f8b12ba626 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -482,7 +482,8 @@ __setup("fail_make_request=",
>> setup_fail_make_request);
>>
>>   bool should_fail_request(struct block_device *part, unsigned int bytes)
>>   {
>> -	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
>> +	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
>> +		should_fail(&fail_make_request, bytes);
>>   }
>>
>>   static int __init fail_make_request_debugfs(void)
>> @@ -595,7 +596,7 @@ static void __submit_bio(struct bio *bio)
>>   	if (unlikely(!blk_crypto_bio_prep(&bio)))
>>   		return;
>>
>> -	if (!bio->bi_bdev->bd_has_submit_bio) {
>> +	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
>>   		blk_mq_submit_bio(bio);
>>   	} else if (likely(bio_queue_enter(bio) == 0)) {
>>   		struct gendisk *disk = bio->bi_bdev->bd_disk;
>> @@ -703,7 +704,7 @@ void submit_bio_noacct_nocheck(struct bio *bio)
>>   	 */
>>   	if (current->bio_list)
>>   		bio_list_add(&current->bio_list[0], bio);
>> -	else if (!bio->bi_bdev->bd_has_submit_bio)
>> +	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
>>   		__submit_bio_noacct_mq(bio);
>>   	else
>>   		__submit_bio_noacct(bio);
>> diff --git a/block/genhd.c b/block/genhd.c
>> index c9d06f72c587..57f96c0c8da0 100644
>> --- a/block/genhd.c
>> +++ b/block/genhd.c
>> @@ -413,7 +413,8 @@ int __must_check device_add_disk(struct device
>> *parent, struct gendisk *disk,
>>   	elevator_init_mq(disk->queue);
>>
>>   	/* Mark bdev as having a submit_bio, if needed */
>> -	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
>> +	if (disk->fops->submit_bio)
>> +		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
>>
>>   	/*
>>   	 * If the driver provides an explicit major number it also must provide
>> @@ -1062,7 +1063,8 @@ static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
>>   ssize_t part_fail_show(struct device *dev,
>>   		       struct device_attribute *attr, char *buf)
>>   {
>> -	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
>> +	return sprintf(buf, "%d\n",
>> +		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
>>   }
>>
>>   ssize_t part_fail_store(struct device *dev,
>> @@ -1071,8 +1073,13 @@ ssize_t part_fail_store(struct device *dev,
>>   {
>>   	int i;
>>
>> -	if (count > 0 && sscanf(buf, "%d", &i) > 0)
>> -		dev_to_bdev(dev)->bd_make_it_fail = i;
>> +	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
>> +		if (!i)
>> +			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +		else
>> +			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
>> +
>> +	}
>>
>>   	return count;
>>   }
>> diff --git a/block/ioctl.c b/block/ioctl.c
>> index 4160f4e6bd5b..a64440f4c96b 100644
>> --- a/block/ioctl.c
>> +++ b/block/ioctl.c
>> @@ -394,7 +394,11 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd,
>>   		if (ret)
>>   			return ret;
>>   	}
>> -	bdev->bd_read_only = n;
>> +
>> +	if (!n)
>> +		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
>> +	else
>> +		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
>>   	return 0;
>>   }
>>
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index f7d40692dd94..de652045111b 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -37,6 +37,11 @@ struct bio_crypt_ctx;
>>   #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
>>   #define SECTOR_MASK		(PAGE_SECTORS - 1)
>>
>> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
>> +#define BD_FLAG_WRITE_HOLDER	1
>> +#define BD_FLAG_HAS_SUBMIT_BIO	2
>> +#define BD_FLAG_MAKE_IT_FAIL	3
>> +
>>   struct block_device {
>>   	sector_t		bd_start_sect;
>>   	sector_t		bd_nr_sectors;
>> @@ -44,10 +49,8 @@ struct block_device {
>>   	struct request_queue *	bd_queue;
>>   	struct disk_stats __percpu *bd_stats;
>>   	unsigned long		bd_stamp;
>> -	bool			bd_read_only;	/* read-only policy */
>> +	unsigned short		bd_flags;
>>   	u8			bd_partno;
>> -	bool			bd_write_holder;
>> -	bool			bd_has_submit_bio;
>>   	dev_t			bd_dev;
>>   	struct inode		*bd_inode;	/* will die */
>>
>> @@ -67,9 +70,6 @@ struct block_device {
>>   	struct super_block	*bd_fsfreeze_sb;
>>
>>   	struct partition_meta_info *bd_meta_info;
>> -#ifdef CONFIG_FAIL_MAKE_REQUEST
>> -	bool			bd_make_it_fail;
>> -#endif
>>   	/*
>>   	 * keep this out-of-line as it's both big and not needed in the fast
>>   	 * path
>> @@ -86,6 +86,21 @@ struct block_device {
>>   #define bdev_kobj(_bdev) \
>>   	(&((_bdev)->bd_device.kobj))
>>
>> +static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
>> +{
>> +	return bdev->bd_flags & (1U << bit);
>> +}
>> +
>> +static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags |= (1U << bit);
>> +}
>> +
>> +static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
>> +{
>> +	bdev->bd_flags &= ~(1U << bit);
>> +}
> 
> It seems like there's atomicity problem with this approach.  In the above
> code, setting and clearing a bd_flag is *not* atomic with respect to
> setting/clearing some other bd_flag.  For example, setting/clearing
> BD_FLAG_MAKE_IT_FAIL via the /sys interface could race with
> setting/clearing BD_FLAG_READ_ONLY via an ioctl, and one of the
> two changes could be lost.  This problem wouldn't happen when
> each flag was a separate field.  Admittedly, the likelihood of a
> problem with BD_FLAG_MAKE_IT_FAIL is extremely low, but
> conceptually the lack of atomicity is still wrong.  There might
> be a similar problem with BD_FLAG_WRITE_HOLDER, but I
> didn't investigate thoroughly.

Thanks for the explanation, however, currently bio->bi_opf,
bio->bi_flags and req->rq_flags, and I can't understand now how do they
prevent to change bit concurrently? I'm probably missing something...

Thanks,
Kuai

> 
> Michael
> 
>> +
>>   /*
>>    * Block error status values.  See block/blk-core:blk_errors for the details.
>>    * Alpha cannot write a byte atomically, so we need to use 32-bit value.
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 51fa7ffdee83..fc1d55ef5107 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -741,13 +741,14 @@ void disk_uevent(struct gendisk *disk, enum
>> kobject_action action);
>>
>>   static inline int get_disk_ro(struct gendisk *disk)
>>   {
>> -	return disk->part0->bd_read_only ||
>> +	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
>>   		test_bit(GD_READ_ONLY, &disk->state);
>>   }
>>
>>   static inline int bdev_read_only(struct block_device *bdev)
>>   {
>> -	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
>> +	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
>> +		get_disk_ro(bdev->bd_disk);
>>   }
>>
>>   bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
>> --
>> 2.39.2
> 
> .
>
Christoph Hellwig Nov. 22, 2023, 7:28 a.m. UTC | #5
> +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
>  	else
> +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);

While the block layer has a bit of history of using wrappers for
testing, setting and clearing flags, I have to say I always find them
rather confusing when reading the code.

> +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */

I know this is copied from the existing field, but can you expand
it a bit?

> +#define BD_FLAG_WRITE_HOLDER	1
> +#define BD_FLAG_HAS_SUBMIT_BIO	2
> +#define BD_FLAG_MAKE_IT_FAIL	3

And also write comments for these. 

> +
>  struct block_device {
>  	sector_t		bd_start_sect;
>  	sector_t		bd_nr_sectors;
> @@ -44,10 +49,8 @@ struct block_device {
>  	struct request_queue *	bd_queue;
>  	struct disk_stats __percpu *bd_stats;
>  	unsigned long		bd_stamp;
> -	bool			bd_read_only;	/* read-only policy */
> +	unsigned short		bd_flags;

I suspect you really need an unsigned long and atomic bit ops here.
Even a lock would probably not work on alpha as it could affect
the other fields in the same 32-bit alignment.
Ming Lei Nov. 22, 2023, 7:45 a.m. UTC | #6
On Tue, Nov 21, 2023 at 11:28:56PM -0800, Christoph Hellwig wrote:
> > +	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
> > +		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
> >  	else
> > +		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
> 
> While the block layer has a bit of history of using wrappers for
> testing, setting and clearing flags, I have to say I always find them
> rather confusing when reading the code.
> 
> > +#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
> 
> I know this is copied from the existing field, but can you expand
> it a bit?
> 
> > +#define BD_FLAG_WRITE_HOLDER	1
> > +#define BD_FLAG_HAS_SUBMIT_BIO	2
> > +#define BD_FLAG_MAKE_IT_FAIL	3
> 
> And also write comments for these. 
> 
> > +
> >  struct block_device {
> >  	sector_t		bd_start_sect;
> >  	sector_t		bd_nr_sectors;
> > @@ -44,10 +49,8 @@ struct block_device {
> >  	struct request_queue *	bd_queue;
> >  	struct disk_stats __percpu *bd_stats;
> >  	unsigned long		bd_stamp;
> > -	bool			bd_read_only;	/* read-only policy */
> > +	unsigned short		bd_flags;
> 
> I suspect you really need an unsigned long and atomic bit ops here.
> Even a lock would probably not work on alpha as it could affect
> the other fields in the same 32-bit alignment.
 
All the existed 'bool' flags are not atomic RW, so I think it isn't
necessary to define 'bd_flags' as 'unsigned long' for replacing them.

Thanks, 
Ming
Christoph Hellwig Nov. 22, 2023, 7:53 a.m. UTC | #7
On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> All the existed 'bool' flags are not atomic RW, so I think it isn't
> necessary to define 'bd_flags' as 'unsigned long' for replacing them.

So because the old code wasn't correct we'll never bother?  The new
flag and the new placement certainly make this more critical as well.
Ming Lei Nov. 22, 2023, 8:19 a.m. UTC | #8
On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> 
> So because the old code wasn't correct we'll never bother?  The new
> flag and the new placement certainly make this more critical as well.

Can you explain why the old code was wrong?

1) ->bd_read_only and ->bd_make_it_fail

- set from userspace interface(ioctl or sysfs)
- check in IO code path

so changing it into atomic bit doesn't make difference from user
viewpoint.

2) ->bd_write_holder

disk->open_mutex is held for read & write this flag

3) ->bd_has_submit_bio

This flag is setup as oneshot before adding disk, and check in FS io code
path.

Of course, defining it as 'unsigned long' can extend its future usage, but
it depends on the atomic requirement of each flag itself.


Thanks, 
Ming
Christoph Hellwig Nov. 22, 2023, 12:47 p.m. UTC | #9
On Wed, Nov 22, 2023 at 04:19:40PM +0800, Ming Lei wrote:
> On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> > On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> > 
> > So because the old code wasn't correct we'll never bother?  The new
> > flag and the new placement certainly make this more critical as well.
> 
> Can you explain why the old code was wrong?
> 
> 1) ->bd_read_only and ->bd_make_it_fail
> 
> - set from userspace interface(ioctl or sysfs)
> - check in IO code path
> 
> so changing it into atomic bit doesn't make difference from user
> viewpoint.

> 
> 2) ->bd_write_holder
> 
> disk->open_mutex is held for read & write this flag
> 
> 3) ->bd_has_submit_bio
> 
> This flag is setup as oneshot before adding disk, and check in FS io code
> path.

On architectures that can't do byte-level atomics all three can corrupt
each other, and even worse bd_partno.  Granted that is only alpha these
days IIRC, but it's still buggy.
Ming Lei Nov. 23, 2023, 2:19 a.m. UTC | #10
On Wed, Nov 22, 2023 at 04:47:51AM -0800, Christoph Hellwig wrote:
> On Wed, Nov 22, 2023 at 04:19:40PM +0800, Ming Lei wrote:
> > On Tue, Nov 21, 2023 at 11:53:17PM -0800, Christoph Hellwig wrote:
> > > On Wed, Nov 22, 2023 at 03:45:24PM +0800, Ming Lei wrote:
> > > > All the existed 'bool' flags are not atomic RW, so I think it isn't
> > > > necessary to define 'bd_flags' as 'unsigned long' for replacing them.
> > > 
> > > So because the old code wasn't correct we'll never bother?  The new
> > > flag and the new placement certainly make this more critical as well.
> > 
> > Can you explain why the old code was wrong?
> > 
> > 1) ->bd_read_only and ->bd_make_it_fail
> > 
> > - set from userspace interface(ioctl or sysfs)
> > - check in IO code path
> > 
> > so changing it into atomic bit doesn't make difference from user
> > viewpoint.
> 
> > 
> > 2) ->bd_write_holder
> > 
> > disk->open_mutex is held for read & write this flag
> > 
> > 3) ->bd_has_submit_bio
> > 
> > This flag is setup as oneshot before adding disk, and check in FS io code
> > path.
> 
> On architectures that can't do byte-level atomics all three can corrupt
> each other

Yeah, C/C++ doesn't provide such guarantee, but many modern ARCHs [1]
guarantees that RW on naturally aligned type is atomic.

I verified the point on x86/arm64/ppc64le by the following code, and
all three STOREs are done in single instruction.

	struct data {
		int b;
		char a;
		char a2;
		char a3;
		char a4;
	} __attribute__((aligned(8)));
	
	void atomic_test()
	{
		struct data d;
	
		d.b = 1;
		d.a = 2;
		d.a3 = 3;
	
		printf("%d %d %d\n", d.b, d.a, d.a3);
	}

[1] https://preshing.com/20130618/atomic-vs-non-atomic-operations/

> and even worse bd_partno.  Granted that is only alpha these
> days IIRC, but it's still buggy.

bd_has_submit_bio and bd_partno can be thought as read only, and the
two can be corrupted?

bd_dev may have similar trouble with bd_partno for ARCHs which don't
provide atomic RW on naturally aligned int.


Thanks,
Ming
diff mbox series

Patch

diff --git a/block/bdev.c b/block/bdev.c
index e4cfb7adb645..10f524a7416c 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -402,10 +402,10 @@  struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	bdev->bd_partno = partno;
 	bdev->bd_inode = inode;
 	bdev->bd_queue = disk->queue;
-	if (partno)
-		bdev->bd_has_submit_bio = disk->part0->bd_has_submit_bio;
+	if (partno && bdev_flagged(disk->part0, BD_FLAG_HAS_SUBMIT_BIO))
+		bdev_set_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
 	else
-		bdev->bd_has_submit_bio = false;
+		bdev_clear_flag(bdev, BD_FLAG_HAS_SUBMIT_BIO);
 	bdev->bd_stats = alloc_percpu(struct disk_stats);
 	if (!bdev->bd_stats) {
 		iput(inode);
@@ -606,7 +606,7 @@  static void bd_end_claim(struct block_device *bdev, void *holder)
 		bdev->bd_holder = NULL;
 		bdev->bd_holder_ops = NULL;
 		mutex_unlock(&bdev->bd_holder_lock);
-		if (bdev->bd_write_holder)
+		if (bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER))
 			unblock = true;
 	}
 	if (!whole->bd_holders)
@@ -619,7 +619,7 @@  static void bd_end_claim(struct block_device *bdev, void *holder)
 	 */
 	if (unblock) {
 		disk_unblock_events(bdev->bd_disk);
-		bdev->bd_write_holder = false;
+		bdev_clear_flag(bdev, BD_FLAG_WRITE_HOLDER);
 	}
 }
 
@@ -805,9 +805,10 @@  struct block_device *blkdev_get_by_dev(dev_t dev, blk_mode_t mode, void *holder,
 		 * writeable reference is too fragile given the way @mode is
 		 * used in blkdev_get/put().
 		 */
-		if ((mode & BLK_OPEN_WRITE) && !bdev->bd_write_holder &&
+		if ((mode & BLK_OPEN_WRITE) &&
+		    !bdev_flagged(bdev, BD_FLAG_WRITE_HOLDER) &&
 		    (disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
-			bdev->bd_write_holder = true;
+			bdev_set_flag(bdev, BD_FLAG_WRITE_HOLDER);
 			unblock_events = false;
 		}
 	}
diff --git a/block/blk-core.c b/block/blk-core.c
index fdf25b8d6e78..f9f8b12ba626 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -482,7 +482,8 @@  __setup("fail_make_request=", setup_fail_make_request);
 
 bool should_fail_request(struct block_device *part, unsigned int bytes)
 {
-	return part->bd_make_it_fail && should_fail(&fail_make_request, bytes);
+	return bdev_flagged(part, BD_FLAG_MAKE_IT_FAIL) &&
+		should_fail(&fail_make_request, bytes);
 }
 
 static int __init fail_make_request_debugfs(void)
@@ -595,7 +596,7 @@  static void __submit_bio(struct bio *bio)
 	if (unlikely(!blk_crypto_bio_prep(&bio)))
 		return;
 
-	if (!bio->bi_bdev->bd_has_submit_bio) {
+	if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO)) {
 		blk_mq_submit_bio(bio);
 	} else if (likely(bio_queue_enter(bio) == 0)) {
 		struct gendisk *disk = bio->bi_bdev->bd_disk;
@@ -703,7 +704,7 @@  void submit_bio_noacct_nocheck(struct bio *bio)
 	 */
 	if (current->bio_list)
 		bio_list_add(&current->bio_list[0], bio);
-	else if (!bio->bi_bdev->bd_has_submit_bio)
+	else if (!bdev_flagged(bio->bi_bdev, BD_FLAG_HAS_SUBMIT_BIO))
 		__submit_bio_noacct_mq(bio);
 	else
 		__submit_bio_noacct(bio);
diff --git a/block/genhd.c b/block/genhd.c
index c9d06f72c587..57f96c0c8da0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -413,7 +413,8 @@  int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 	elevator_init_mq(disk->queue);
 
 	/* Mark bdev as having a submit_bio, if needed */
-	disk->part0->bd_has_submit_bio = disk->fops->submit_bio != NULL;
+	if (disk->fops->submit_bio)
+		bdev_set_flag(disk->part0, BD_FLAG_HAS_SUBMIT_BIO);
 
 	/*
 	 * If the driver provides an explicit major number it also must provide
@@ -1062,7 +1063,8 @@  static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
+	return sprintf(buf, "%d\n",
+		       bdev_flagged(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL));
 }
 
 ssize_t part_fail_store(struct device *dev,
@@ -1071,8 +1073,13 @@  ssize_t part_fail_store(struct device *dev,
 {
 	int i;
 
-	if (count > 0 && sscanf(buf, "%d", &i) > 0)
-		dev_to_bdev(dev)->bd_make_it_fail = i;
+	if (count > 0 && sscanf(buf, "%d", &i) > 0) {
+		if (!i)
+			bdev_clear_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
+		else
+			bdev_set_flag(dev_to_bdev(dev), BD_FLAG_MAKE_IT_FAIL);
+
+	}
 
 	return count;
 }
diff --git a/block/ioctl.c b/block/ioctl.c
index 4160f4e6bd5b..a64440f4c96b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -394,7 +394,11 @@  static int blkdev_roset(struct block_device *bdev, unsigned cmd,
 		if (ret)
 			return ret;
 	}
-	bdev->bd_read_only = n;
+
+	if (!n)
+		bdev_clear_flag(bdev, BD_FLAG_READ_ONLY);
+	else
+		bdev_set_flag(bdev, BD_FLAG_READ_ONLY);
 	return 0;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f7d40692dd94..de652045111b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,11 @@  struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+#define BD_FLAG_READ_ONLY	0 /* read-only-policy */
+#define BD_FLAG_WRITE_HOLDER	1
+#define BD_FLAG_HAS_SUBMIT_BIO	2
+#define BD_FLAG_MAKE_IT_FAIL	3
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -44,10 +49,8 @@  struct block_device {
 	struct request_queue *	bd_queue;
 	struct disk_stats __percpu *bd_stats;
 	unsigned long		bd_stamp;
-	bool			bd_read_only;	/* read-only policy */
+	unsigned short		bd_flags;
 	u8			bd_partno;
-	bool			bd_write_holder;
-	bool			bd_has_submit_bio;
 	dev_t			bd_dev;
 	struct inode		*bd_inode;	/* will die */
 
@@ -67,9 +70,6 @@  struct block_device {
 	struct super_block	*bd_fsfreeze_sb;
 
 	struct partition_meta_info *bd_meta_info;
-#ifdef CONFIG_FAIL_MAKE_REQUEST
-	bool			bd_make_it_fail;
-#endif
 	/*
 	 * keep this out-of-line as it's both big and not needed in the fast
 	 * path
@@ -86,6 +86,21 @@  struct block_device {
 #define bdev_kobj(_bdev) \
 	(&((_bdev)->bd_device.kobj))
 
+static inline bool bdev_flagged(struct block_device *bdev, unsigned int bit)
+{
+	return bdev->bd_flags & (1U << bit);
+}
+
+static inline void bdev_set_flag(struct block_device *bdev, unsigned int bit)
+{
+	bdev->bd_flags |= (1U << bit);
+}
+
+static inline void bdev_clear_flag(struct block_device *bdev, unsigned int bit)
+{
+	bdev->bd_flags &= ~(1U << bit);
+}
+
 /*
  * Block error status values.  See block/blk-core:blk_errors for the details.
  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51fa7ffdee83..fc1d55ef5107 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -741,13 +741,14 @@  void disk_uevent(struct gendisk *disk, enum kobject_action action);
 
 static inline int get_disk_ro(struct gendisk *disk)
 {
-	return disk->part0->bd_read_only ||
+	return bdev_flagged(disk->part0, BD_FLAG_READ_ONLY) ||
 		test_bit(GD_READ_ONLY, &disk->state);
 }
 
 static inline int bdev_read_only(struct block_device *bdev)
 {
-	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+	return bdev_flagged(bdev, BD_FLAG_READ_ONLY) ||
+		get_disk_ro(bdev->bd_disk);
 }
 
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);