diff mbox series

[v2,8/8] null_blk: Support configuring the maximum segment size

Message ID 20221123205740.463185-9-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Add support for segments smaller than one page | expand

Commit Message

Bart Van Assche Nov. 23, 2022, 8:57 p.m. UTC
Add support for configuring the maximum segment size.

Add support for segments smaller than the page size.

This patch enables testing segments smaller than the page size with a
driver that does not call blk_rq_map_sg().

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 drivers/block/null_blk/main.c     | 20 +++++++++++++++++---
 drivers/block/null_blk/null_blk.h |  1 +
 2 files changed, 18 insertions(+), 3 deletions(-)

Comments

Damien Le Moal Nov. 24, 2022, 1:40 a.m. UTC | #1
On 11/24/22 05:57, Bart Van Assche wrote:
> Add support for configuring the maximum segment size.
> 
> Add support for segments smaller than the page size.
> 
> This patch enables testing segments smaller than the page size with a
> driver that does not call blk_rq_map_sg().
> 
> Cc: Christoph Hellwig <hch@lst.de>
> Cc: Ming Lei <ming.lei@redhat.com>
> Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Cc: Chaitanya Kulkarni <kch@nvidia.com>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  drivers/block/null_blk/main.c     | 20 +++++++++++++++++---
>  drivers/block/null_blk/null_blk.h |  1 +
>  2 files changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> index 1f154f92f4c2..bc811ab52c4a 100644
> --- a/drivers/block/null_blk/main.c
> +++ b/drivers/block/null_blk/main.c
> @@ -157,6 +157,10 @@ static int g_max_sectors;
>  module_param_named(max_sectors, g_max_sectors, int, 0444);
>  MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
>  
> +static unsigned int g_max_segment_size = 1UL << 31;

1UL is unsigned long be this var is unsigned int. Why not simply use
UINT_MAX here ? You prefer the 2GB value ? If yes, then may be at least
change that to "1U << 31", no ?

> +module_param_named(max_segment_size, g_max_segment_size, int, 0444);
> +MODULE_PARM_DESC(max_segment_size, "Maximum size of a segment in bytes");
> +
>  static unsigned int nr_devices = 1;
>  module_param(nr_devices, uint, 0444);
>  MODULE_PARM_DESC(nr_devices, "Number of devices to register");
> @@ -409,6 +413,7 @@ NULLB_DEVICE_ATTR(home_node, uint, NULL);
>  NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
>  NULLB_DEVICE_ATTR(blocksize, uint, NULL);
>  NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
> +NULLB_DEVICE_ATTR(max_segment_size, uint, NULL);
>  NULLB_DEVICE_ATTR(irqmode, uint, NULL);
>  NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
>  NULLB_DEVICE_ATTR(index, uint, NULL);
> @@ -532,6 +537,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
>  	&nullb_device_attr_queue_mode,
>  	&nullb_device_attr_blocksize,
>  	&nullb_device_attr_max_sectors,
> +	&nullb_device_attr_max_segment_size,
>  	&nullb_device_attr_irqmode,
>  	&nullb_device_attr_hw_queue_depth,
>  	&nullb_device_attr_index,
> @@ -610,7 +616,8 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
>  	return snprintf(page, PAGE_SIZE,
>  			"badblocks,blocking,blocksize,cache_size,"
>  			"completion_nsec,discard,home_node,hw_queue_depth,"
> -			"irqmode,max_sectors,mbps,memory_backed,no_sched,"
> +			"irqmode,max_sectors,max_segment_size,mbps,"
> +			"memory_backed,no_sched,"
>  			"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
>  			"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
>  			"zone_capacity,zone_max_active,zone_max_open,"
> @@ -673,6 +680,7 @@ static struct nullb_device *null_alloc_dev(void)
>  	dev->queue_mode = g_queue_mode;
>  	dev->blocksize = g_bs;
>  	dev->max_sectors = g_max_sectors;
> +	dev->max_segment_size = g_max_segment_size;
>  	dev->irqmode = g_irqmode;
>  	dev->hw_queue_depth = g_hw_queue_depth;
>  	dev->blocking = g_blocking;
> @@ -1214,6 +1222,8 @@ static int null_transfer(struct nullb *nullb, struct page *page,
>  	unsigned int valid_len = len;
>  	int err = 0;
>  
> +	WARN_ONCE(len > dev->max_segment_size, "%u > %u\n", len,
> +		  dev->max_segment_size);
>  	if (!is_write) {
>  		if (dev->zoned)
>  			valid_len = null_zone_valid_read_len(nullb,
> @@ -1249,7 +1259,8 @@ static int null_handle_rq(struct nullb_cmd *cmd)
>  
>  	spin_lock_irq(&nullb->lock);
>  	rq_for_each_segment(bvec, rq, iter) {
> -		len = bvec.bv_len;
> +		len = min(bvec.bv_len, nullb->dev->max_segment_size);
> +		bvec.bv_len = len;
>  		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
>  				     op_is_write(req_op(rq)), sector,
>  				     rq->cmd_flags & REQ_FUA);
> @@ -1276,7 +1287,8 @@ static int null_handle_bio(struct nullb_cmd *cmd)
>  
>  	spin_lock_irq(&nullb->lock);
>  	bio_for_each_segment(bvec, bio, iter) {
> -		len = bvec.bv_len;
> +		len = min(bvec.bv_len, nullb->dev->max_segment_size);
> +		bvec.bv_len = len;
>  		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
>  				     op_is_write(bio_op(bio)), sector,
>  				     bio->bi_opf & REQ_FUA);
> @@ -2088,6 +2100,7 @@ static int null_add_dev(struct nullb_device *dev)
>  	nullb->q->queuedata = nullb;
>  	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
>  	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
> +	blk_queue_flag_set(QUEUE_FLAG_SUB_PAGE_SEGMENTS, nullb->q);
>  
>  	mutex_lock(&lock);
>  	rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
> @@ -2106,6 +2119,7 @@ static int null_add_dev(struct nullb_device *dev)
>  	dev->max_sectors = min_t(unsigned int, dev->max_sectors,
>  				 BLK_DEF_MAX_SECTORS);
>  	blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
> +	blk_queue_max_segment_size(nullb->q, dev->max_segment_size);

Should we keep the ability to use the kernel default value as the default
here ?
E.g.

	if (dev->max_segment_size)
		blk_queue_max_segment_size(nullb->q,
				dev->max_segment_size);

If yes, then g_max_segment_size initial value should be 0, meaning "kernel
default".

>  
>  	if (dev->virt_boundary)
>  		blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
> diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
> index 94ff68052b1e..6784ee9f5fda 100644
> --- a/drivers/block/null_blk/null_blk.h
> +++ b/drivers/block/null_blk/null_blk.h
> @@ -102,6 +102,7 @@ struct nullb_device {
>  	unsigned int queue_mode; /* block interface */
>  	unsigned int blocksize; /* block size */
>  	unsigned int max_sectors; /* Max sectors per command */
> +	unsigned int max_segment_size; /* Max size of a single DMA segment. */
>  	unsigned int irqmode; /* IRQ completion handler */
>  	unsigned int hw_queue_depth; /* queue depth */
>  	unsigned int index; /* index of the disk, only valid with a disk */
Bart Van Assche Nov. 30, 2022, 10:29 p.m. UTC | #2
On 11/23/22 17:40, Damien Le Moal wrote:
> On 11/24/22 05:57, Bart Van Assche wrote:
>> +static unsigned int g_max_segment_size = 1UL << 31;
> 
> 1UL is unsigned long be this var is unsigned int. Why not simply use
> UINT_MAX here ? You prefer the 2GB value ? If yes, then may be at least
> change that to "1U << 31", no ?
> 
> [ ... ]
>> @@ -2106,6 +2119,7 @@ static int null_add_dev(struct nullb_device *dev)
>>   	dev->max_sectors = min_t(unsigned int, dev->max_sectors,
>>   				 BLK_DEF_MAX_SECTORS);
>>   	blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
>> +	blk_queue_max_segment_size(nullb->q, dev->max_segment_size);
> 
> Should we keep the ability to use the kernel default value as the default
> here ?
> E.g.
> 
> 	if (dev->max_segment_size)
> 		blk_queue_max_segment_size(nullb->q,
> 				dev->max_segment_size);
> 
> If yes, then g_max_segment_size initial value should be 0, meaning "kernel
> default".

Hi Damien,

How about changing the default value for g_max_segment_size from
1UL << 31 into BLK_MAX_SEGMENT_SIZE? That will simplify the code and 
also prevents that this patch changes the behavior of the null_blk 
driver if g_max_segment_size is not modified by the user.

Thanks,

Bart.
Damien Le Moal Dec. 1, 2022, 12:39 a.m. UTC | #3
On 12/1/22 07:29, Bart Van Assche wrote:
> On 11/23/22 17:40, Damien Le Moal wrote:
>> On 11/24/22 05:57, Bart Van Assche wrote:
>>> +static unsigned int g_max_segment_size = 1UL << 31;
>>
>> 1UL is unsigned long be this var is unsigned int. Why not simply use
>> UINT_MAX here ? You prefer the 2GB value ? If yes, then may be at least
>> change that to "1U << 31", no ?
>>
>> [ ... ]
>>> @@ -2106,6 +2119,7 @@ static int null_add_dev(struct nullb_device *dev)
>>>   	dev->max_sectors = min_t(unsigned int, dev->max_sectors,
>>>   				 BLK_DEF_MAX_SECTORS);
>>>   	blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
>>> +	blk_queue_max_segment_size(nullb->q, dev->max_segment_size);
>>
>> Should we keep the ability to use the kernel default value as the default
>> here ?
>> E.g.
>>
>> 	if (dev->max_segment_size)
>> 		blk_queue_max_segment_size(nullb->q,
>> 				dev->max_segment_size);
>>
>> If yes, then g_max_segment_size initial value should be 0, meaning "kernel
>> default".
> 
> Hi Damien,
> 
> How about changing the default value for g_max_segment_size from
> 1UL << 31 into BLK_MAX_SEGMENT_SIZE? That will simplify the code and 
> also prevents that this patch changes the behavior of the null_blk 
> driver if g_max_segment_size is not modified by the user.

Sounds good to me.

> 
> Thanks,
> 
> Bart.
>
diff mbox series

Patch

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 1f154f92f4c2..bc811ab52c4a 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -157,6 +157,10 @@  static int g_max_sectors;
 module_param_named(max_sectors, g_max_sectors, int, 0444);
 MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)");
 
+static unsigned int g_max_segment_size = 1UL << 31;
+module_param_named(max_segment_size, g_max_segment_size, int, 0444);
+MODULE_PARM_DESC(max_segment_size, "Maximum size of a segment in bytes");
+
 static unsigned int nr_devices = 1;
 module_param(nr_devices, uint, 0444);
 MODULE_PARM_DESC(nr_devices, "Number of devices to register");
@@ -409,6 +413,7 @@  NULLB_DEVICE_ATTR(home_node, uint, NULL);
 NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
 NULLB_DEVICE_ATTR(blocksize, uint, NULL);
 NULLB_DEVICE_ATTR(max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(max_segment_size, uint, NULL);
 NULLB_DEVICE_ATTR(irqmode, uint, NULL);
 NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
 NULLB_DEVICE_ATTR(index, uint, NULL);
@@ -532,6 +537,7 @@  static struct configfs_attribute *nullb_device_attrs[] = {
 	&nullb_device_attr_queue_mode,
 	&nullb_device_attr_blocksize,
 	&nullb_device_attr_max_sectors,
+	&nullb_device_attr_max_segment_size,
 	&nullb_device_attr_irqmode,
 	&nullb_device_attr_hw_queue_depth,
 	&nullb_device_attr_index,
@@ -610,7 +616,8 @@  static ssize_t memb_group_features_show(struct config_item *item, char *page)
 	return snprintf(page, PAGE_SIZE,
 			"badblocks,blocking,blocksize,cache_size,"
 			"completion_nsec,discard,home_node,hw_queue_depth,"
-			"irqmode,max_sectors,mbps,memory_backed,no_sched,"
+			"irqmode,max_sectors,max_segment_size,mbps,"
+			"memory_backed,no_sched,"
 			"poll_queues,power,queue_mode,shared_tag_bitmap,size,"
 			"submit_queues,use_per_node_hctx,virt_boundary,zoned,"
 			"zone_capacity,zone_max_active,zone_max_open,"
@@ -673,6 +680,7 @@  static struct nullb_device *null_alloc_dev(void)
 	dev->queue_mode = g_queue_mode;
 	dev->blocksize = g_bs;
 	dev->max_sectors = g_max_sectors;
+	dev->max_segment_size = g_max_segment_size;
 	dev->irqmode = g_irqmode;
 	dev->hw_queue_depth = g_hw_queue_depth;
 	dev->blocking = g_blocking;
@@ -1214,6 +1222,8 @@  static int null_transfer(struct nullb *nullb, struct page *page,
 	unsigned int valid_len = len;
 	int err = 0;
 
+	WARN_ONCE(len > dev->max_segment_size, "%u > %u\n", len,
+		  dev->max_segment_size);
 	if (!is_write) {
 		if (dev->zoned)
 			valid_len = null_zone_valid_read_len(nullb,
@@ -1249,7 +1259,8 @@  static int null_handle_rq(struct nullb_cmd *cmd)
 
 	spin_lock_irq(&nullb->lock);
 	rq_for_each_segment(bvec, rq, iter) {
-		len = bvec.bv_len;
+		len = min(bvec.bv_len, nullb->dev->max_segment_size);
+		bvec.bv_len = len;
 		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
 				     op_is_write(req_op(rq)), sector,
 				     rq->cmd_flags & REQ_FUA);
@@ -1276,7 +1287,8 @@  static int null_handle_bio(struct nullb_cmd *cmd)
 
 	spin_lock_irq(&nullb->lock);
 	bio_for_each_segment(bvec, bio, iter) {
-		len = bvec.bv_len;
+		len = min(bvec.bv_len, nullb->dev->max_segment_size);
+		bvec.bv_len = len;
 		err = null_transfer(nullb, bvec.bv_page, len, bvec.bv_offset,
 				     op_is_write(bio_op(bio)), sector,
 				     bio->bi_opf & REQ_FUA);
@@ -2088,6 +2100,7 @@  static int null_add_dev(struct nullb_device *dev)
 	nullb->q->queuedata = nullb;
 	blk_queue_flag_set(QUEUE_FLAG_NONROT, nullb->q);
 	blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
+	blk_queue_flag_set(QUEUE_FLAG_SUB_PAGE_SEGMENTS, nullb->q);
 
 	mutex_lock(&lock);
 	rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
@@ -2106,6 +2119,7 @@  static int null_add_dev(struct nullb_device *dev)
 	dev->max_sectors = min_t(unsigned int, dev->max_sectors,
 				 BLK_DEF_MAX_SECTORS);
 	blk_queue_max_hw_sectors(nullb->q, dev->max_sectors);
+	blk_queue_max_segment_size(nullb->q, dev->max_segment_size);
 
 	if (dev->virt_boundary)
 		blk_queue_virt_boundary(nullb->q, PAGE_SIZE - 1);
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 94ff68052b1e..6784ee9f5fda 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -102,6 +102,7 @@  struct nullb_device {
 	unsigned int queue_mode; /* block interface */
 	unsigned int blocksize; /* block size */
 	unsigned int max_sectors; /* Max sectors per command */
+	unsigned int max_segment_size; /* Max size of a single DMA segment. */
 	unsigned int irqmode; /* IRQ completion handler */
 	unsigned int hw_queue_depth; /* queue depth */
 	unsigned int index; /* index of the disk, only valid with a disk */