[V2] block: loop: set discard granularity and alignment for block device backed loop
diff mbox series

Message ID 20200805035059.1989050-1-ming.lei@redhat.com
State New
Headers show
Series
  • [V2] block: loop: set discard granularity and alignment for block device backed loop
Related show

Commit Message

Ming Lei Aug. 5, 2020, 3:50 a.m. UTC
In case of block device backend, if the backend supports write zeros, the
loop device will set queue flag of QUEUE_FLAG_DISCARD. However,
limits.discard_granularity isn't setup, and this way is wrong,
see the following description in Documentation/ABI/testing/sysfs-block:

	A discard_granularity of 0 means that the device does not support
	discard functionality.

Especially 9b15d109a6b2 ("block: improve discard bio alignment in
__blkdev_issue_discard()") starts to take q->limits.discard_granularity
for computing max discard sectors. And zero discard granularity may cause
kernel oops, or fail discard request even though the loop queue claims
discard support via QUEUE_FLAG_DISCARD.

Fix the issue by setup discard granularity and alignment.

Fixes: c52abf563049 ("loop: Better discard support for block devices")
Cc: Coly Li <colyli@suse.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Xiao Ni <xni@redhat.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Evan Green <evgreen@chromium.org>
Cc: Gwendal Grignou <gwendal@chromium.org>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
V2:
	- mirror backing queue's discard_granularity to loop queue
	- set discard limit parameters explicitly when QUEUE_FLAG_DISCARD is
	set

 drivers/block/loop.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

Comments

Coly Li Aug. 5, 2020, 4:39 a.m. UTC | #1
On 2020/8/5 11:50, Ming Lei wrote:
> In case of block device backend, if the backend supports write zeros, the
> loop device will set queue flag of QUEUE_FLAG_DISCARD. However,
> limits.discard_granularity isn't setup, and this way is wrong,
> see the following description in Documentation/ABI/testing/sysfs-block:
> 
> 	A discard_granularity of 0 means that the device does not support
> 	discard functionality.
> 
> Especially 9b15d109a6b2 ("block: improve discard bio alignment in
> __blkdev_issue_discard()") starts to take q->limits.discard_granularity
> for computing max discard sectors. And zero discard granularity may cause
> kernel oops, or fail discard request even though the loop queue claims
> discard support via QUEUE_FLAG_DISCARD.
> 
> Fix the issue by setup discard granularity and alignment.
> 
> Fixes: c52abf563049 ("loop: Better discard support for block devices")
> Cc: Coly Li <colyli@suse.de>
> Cc: Hannes Reinecke <hare@suse.com>
> Cc: Xiao Ni <xni@redhat.com>
> Cc: Martin K. Petersen <martin.petersen@oracle.com>
> Cc: Evan Green <evgreen@chromium.org>
> Cc: Gwendal Grignou <gwendal@chromium.org>
> Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
> Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> V2:
> 	- mirror backing queue's discard_granularity to loop queue
> 	- set discard limit parameters explicitly when QUEUE_FLAG_DISCARD is
> 	set
> 
>  drivers/block/loop.c | 33 ++++++++++++++++++---------------
>  1 file changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index d18160146226..661c0814d63c 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -878,6 +878,7 @@ static void loop_config_discard(struct loop_device *lo)
>  	struct file *file = lo->lo_backing_file;
>  	struct inode *inode = file->f_mapping->host;
>  	struct request_queue *q = lo->lo_queue;
> +	u32 granularity, max_discard_sectors;
>  
>  	/*
>  	 * If the backing device is a block device, mirror its zeroing
> @@ -890,11 +891,10 @@ static void loop_config_discard(struct loop_device *lo)
>  		struct request_queue *backingq;
>  
>  		backingq = bdev_get_queue(inode->i_bdev);
> -		blk_queue_max_discard_sectors(q,
> -			backingq->limits.max_write_zeroes_sectors);
>  
> -		blk_queue_max_write_zeroes_sectors(q,
> -			backingq->limits.max_write_zeroes_sectors);
> +		max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
> +		granularity = backingq->limits.discard_granularity ?:
> +			queue_physical_block_size(backingq);

I assume logical_block_size >= physical_block_size, maybe
queue_logical_block_size(backing) is better ?

I am not sure, just because I see nvme host driver and virtio block
driver use the logical block size, and scsi sd driver uses
max(physical_block_size, unmap_granularity * logical_block_size).


>  
>  	/*
>  	 * We use punch hole to reclaim the free space used by the
> @@ -903,23 +903,26 @@ static void loop_config_discard(struct loop_device *lo)
>  	 * useful information.
>  	 */
>  	} else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
> -		q->limits.discard_granularity = 0;
> -		q->limits.discard_alignment = 0;
> -		blk_queue_max_discard_sectors(q, 0);
> -		blk_queue_max_write_zeroes_sectors(q, 0);
> +		max_discard_sectors = 0;
> +		granularity = 0;
>  
>  	} else {
> -		q->limits.discard_granularity = inode->i_sb->s_blocksize;
> -		q->limits.discard_alignment = 0;
> -
> -		blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> -		blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> +		max_discard_sectors = UINT_MAX >> 9;
> +		granularity = inode->i_sb->s_blocksize;
>  	}
>  
> -	if (q->limits.max_write_zeroes_sectors)
> +	if (max_discard_sectors) {
> +		q->limits.discard_granularity = granularity;
> +		blk_queue_max_discard_sectors(q, max_discard_sectors);
> +		blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
>  		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> -	else
> +	} else {
> +		q->limits.discard_granularity = 0;
> +		blk_queue_max_discard_sectors(q, 0);
> +		blk_queue_max_write_zeroes_sectors(q, 0);
>  		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> +	}
> +	q->limits.discard_alignment = 0;
>  }
>  
>  static void loop_unprepare_queue(struct loop_device *lo)
> 

Overall the patch is good to me.

Acked-by: Coly Li <colyli@suse.de>

Thanks.

Coly Li
Ming Lei Aug. 5, 2020, 5:28 a.m. UTC | #2
On Wed, Aug 05, 2020 at 12:39:50PM +0800, Coly Li wrote:
> On 2020/8/5 11:50, Ming Lei wrote:
> > In case of block device backend, if the backend supports write zeros, the
> > loop device will set queue flag of QUEUE_FLAG_DISCARD. However,
> > limits.discard_granularity isn't setup, and this way is wrong,
> > see the following description in Documentation/ABI/testing/sysfs-block:
> > 
> > 	A discard_granularity of 0 means that the device does not support
> > 	discard functionality.
> > 
> > Especially 9b15d109a6b2 ("block: improve discard bio alignment in
> > __blkdev_issue_discard()") starts to take q->limits.discard_granularity
> > for computing max discard sectors. And zero discard granularity may cause
> > kernel oops, or fail discard request even though the loop queue claims
> > discard support via QUEUE_FLAG_DISCARD.
> > 
> > Fix the issue by setup discard granularity and alignment.
> > 
> > Fixes: c52abf563049 ("loop: Better discard support for block devices")
> > Cc: Coly Li <colyli@suse.de>
> > Cc: Hannes Reinecke <hare@suse.com>
> > Cc: Xiao Ni <xni@redhat.com>
> > Cc: Martin K. Petersen <martin.petersen@oracle.com>
> > Cc: Evan Green <evgreen@chromium.org>
> > Cc: Gwendal Grignou <gwendal@chromium.org>
> > Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
> > Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
> > Cc: Christoph Hellwig <hch@lst.de>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> > V2:
> > 	- mirror backing queue's discard_granularity to loop queue
> > 	- set discard limit parameters explicitly when QUEUE_FLAG_DISCARD is
> > 	set
> > 
> >  drivers/block/loop.c | 33 ++++++++++++++++++---------------
> >  1 file changed, 18 insertions(+), 15 deletions(-)
> > 
> > diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> > index d18160146226..661c0814d63c 100644
> > --- a/drivers/block/loop.c
> > +++ b/drivers/block/loop.c
> > @@ -878,6 +878,7 @@ static void loop_config_discard(struct loop_device *lo)
> >  	struct file *file = lo->lo_backing_file;
> >  	struct inode *inode = file->f_mapping->host;
> >  	struct request_queue *q = lo->lo_queue;
> > +	u32 granularity, max_discard_sectors;
> >  
> >  	/*
> >  	 * If the backing device is a block device, mirror its zeroing
> > @@ -890,11 +891,10 @@ static void loop_config_discard(struct loop_device *lo)
> >  		struct request_queue *backingq;
> >  
> >  		backingq = bdev_get_queue(inode->i_bdev);
> > -		blk_queue_max_discard_sectors(q,
> > -			backingq->limits.max_write_zeroes_sectors);
> >  
> > -		blk_queue_max_write_zeroes_sectors(q,
> > -			backingq->limits.max_write_zeroes_sectors);
> > +		max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
> > +		granularity = backingq->limits.discard_granularity ?:
> > +			queue_physical_block_size(backingq);
> 
> I assume logical_block_size >= physical_block_size, maybe
> queue_logical_block_size(backing) is better ?

logical_block_size is <= physical_block_size, and it is set as physical
block size by following Documentation/ABI/testing/sysfs-block:

What:       /sys/block/<disk>/queue/discard_granularity
Date:       May 2011
Contact:    Martin K. Petersen <martin.petersen@oracle.com>
Description:
        Devices that support discard functionality may
        internally allocate space using units that are bigger
        than the logical block size. The discard_granularity
        parameter indicates the size of the internal allocation
        unit in bytes if reported by the device. Otherwise the
        discard_granularity will be set to match the device's
        physical block size. A discard_granularity of 0 means
        that the device does not support discard functionality.

> 
> I am not sure, just because I see nvme host driver and virtio block
> driver use the logical block size, and scsi sd driver uses
> max(physical_block_size, unmap_granularity * logical_block_size).
> 
> 
> >  
> >  	/*
> >  	 * We use punch hole to reclaim the free space used by the
> > @@ -903,23 +903,26 @@ static void loop_config_discard(struct loop_device *lo)
> >  	 * useful information.
> >  	 */
> >  	} else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
> > -		q->limits.discard_granularity = 0;
> > -		q->limits.discard_alignment = 0;
> > -		blk_queue_max_discard_sectors(q, 0);
> > -		blk_queue_max_write_zeroes_sectors(q, 0);
> > +		max_discard_sectors = 0;
> > +		granularity = 0;
> >  
> >  	} else {
> > -		q->limits.discard_granularity = inode->i_sb->s_blocksize;
> > -		q->limits.discard_alignment = 0;
> > -
> > -		blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
> > -		blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
> > +		max_discard_sectors = UINT_MAX >> 9;
> > +		granularity = inode->i_sb->s_blocksize;
> >  	}
> >  
> > -	if (q->limits.max_write_zeroes_sectors)
> > +	if (max_discard_sectors) {
> > +		q->limits.discard_granularity = granularity;
> > +		blk_queue_max_discard_sectors(q, max_discard_sectors);
> > +		blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
> >  		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
> > -	else
> > +	} else {
> > +		q->limits.discard_granularity = 0;
> > +		blk_queue_max_discard_sectors(q, 0);
> > +		blk_queue_max_write_zeroes_sectors(q, 0);
> >  		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
> > +	}
> > +	q->limits.discard_alignment = 0;
> >  }
> >  
> >  static void loop_unprepare_queue(struct loop_device *lo)
> > 
> 
> Overall the patch is good to me.
> 
> Acked-by: Coly Li <colyli@suse.de>

Thanks!
Coly Li Aug. 5, 2020, 5:32 a.m. UTC | #3
On 2020/8/5 13:28, Ming Lei wrote:
> On Wed, Aug 05, 2020 at 12:39:50PM +0800, Coly Li wrote:
>> On 2020/8/5 11:50, Ming Lei wrote:
>>> In case of block device backend, if the backend supports write zeros, the
>>> loop device will set queue flag of QUEUE_FLAG_DISCARD. However,
>>> limits.discard_granularity isn't setup, and this way is wrong,
>>> see the following description in Documentation/ABI/testing/sysfs-block:
>>>
>>> 	A discard_granularity of 0 means that the device does not support
>>> 	discard functionality.
>>>
>>> Especially 9b15d109a6b2 ("block: improve discard bio alignment in
>>> __blkdev_issue_discard()") starts to take q->limits.discard_granularity
>>> for computing max discard sectors. And zero discard granularity may cause
>>> kernel oops, or fail discard request even though the loop queue claims
>>> discard support via QUEUE_FLAG_DISCARD.
>>>
>>> Fix the issue by setup discard granularity and alignment.
>>>
>>> Fixes: c52abf563049 ("loop: Better discard support for block devices")
>>> Cc: Coly Li <colyli@suse.de>
>>> Cc: Hannes Reinecke <hare@suse.com>
>>> Cc: Xiao Ni <xni@redhat.com>
>>> Cc: Martin K. Petersen <martin.petersen@oracle.com>
>>> Cc: Evan Green <evgreen@chromium.org>
>>> Cc: Gwendal Grignou <gwendal@chromium.org>
>>> Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
>>> Cc: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
>>> Cc: Christoph Hellwig <hch@lst.de>
>>> Signed-off-by: Ming Lei <ming.lei@redhat.com>
>>> ---
>>> V2:
>>> 	- mirror backing queue's discard_granularity to loop queue
>>> 	- set discard limit parameters explicitly when QUEUE_FLAG_DISCARD is
>>> 	set
>>>
>>>  drivers/block/loop.c | 33 ++++++++++++++++++---------------
>>>  1 file changed, 18 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
>>> index d18160146226..661c0814d63c 100644
>>> --- a/drivers/block/loop.c
>>> +++ b/drivers/block/loop.c
>>> @@ -878,6 +878,7 @@ static void loop_config_discard(struct loop_device *lo)
>>>  	struct file *file = lo->lo_backing_file;
>>>  	struct inode *inode = file->f_mapping->host;
>>>  	struct request_queue *q = lo->lo_queue;
>>> +	u32 granularity, max_discard_sectors;
>>>  
>>>  	/*
>>>  	 * If the backing device is a block device, mirror its zeroing
>>> @@ -890,11 +891,10 @@ static void loop_config_discard(struct loop_device *lo)
>>>  		struct request_queue *backingq;
>>>  
>>>  		backingq = bdev_get_queue(inode->i_bdev);
>>> -		blk_queue_max_discard_sectors(q,
>>> -			backingq->limits.max_write_zeroes_sectors);
>>>  
>>> -		blk_queue_max_write_zeroes_sectors(q,
>>> -			backingq->limits.max_write_zeroes_sectors);
>>> +		max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
>>> +		granularity = backingq->limits.discard_granularity ?:
>>> +			queue_physical_block_size(backingq);
>>
>> I assume logical_block_size >= physical_block_size, maybe
>> queue_logical_block_size(backing) is better ?
> 
> logical_block_size is <= physical_block_size, and it is set as physical
> block size by following Documentation/ABI/testing/sysfs-block:
> 
> What:       /sys/block/<disk>/queue/discard_granularity
> Date:       May 2011
> Contact:    Martin K. Petersen <martin.petersen@oracle.com>
> Description:
>         Devices that support discard functionality may
>         internally allocate space using units that are bigger
>         than the logical block size. The discard_granularity
>         parameter indicates the size of the internal allocation
>         unit in bytes if reported by the device. Otherwise the
>         discard_granularity will be set to match the device's
>         physical block size. A discard_granularity of 0 means
>         that the device does not support discard functionality.
> 

Thanks for the hint :-)

Coly Li

Patch
diff mbox series

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d18160146226..661c0814d63c 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -878,6 +878,7 @@  static void loop_config_discard(struct loop_device *lo)
 	struct file *file = lo->lo_backing_file;
 	struct inode *inode = file->f_mapping->host;
 	struct request_queue *q = lo->lo_queue;
+	u32 granularity, max_discard_sectors;
 
 	/*
 	 * If the backing device is a block device, mirror its zeroing
@@ -890,11 +891,10 @@  static void loop_config_discard(struct loop_device *lo)
 		struct request_queue *backingq;
 
 		backingq = bdev_get_queue(inode->i_bdev);
-		blk_queue_max_discard_sectors(q,
-			backingq->limits.max_write_zeroes_sectors);
 
-		blk_queue_max_write_zeroes_sectors(q,
-			backingq->limits.max_write_zeroes_sectors);
+		max_discard_sectors = backingq->limits.max_write_zeroes_sectors;
+		granularity = backingq->limits.discard_granularity ?:
+			queue_physical_block_size(backingq);
 
 	/*
 	 * We use punch hole to reclaim the free space used by the
@@ -903,23 +903,26 @@  static void loop_config_discard(struct loop_device *lo)
 	 * useful information.
 	 */
 	} else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
-		q->limits.discard_granularity = 0;
-		q->limits.discard_alignment = 0;
-		blk_queue_max_discard_sectors(q, 0);
-		blk_queue_max_write_zeroes_sectors(q, 0);
+		max_discard_sectors = 0;
+		granularity = 0;
 
 	} else {
-		q->limits.discard_granularity = inode->i_sb->s_blocksize;
-		q->limits.discard_alignment = 0;
-
-		blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
-		blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
+		max_discard_sectors = UINT_MAX >> 9;
+		granularity = inode->i_sb->s_blocksize;
 	}
 
-	if (q->limits.max_write_zeroes_sectors)
+	if (max_discard_sectors) {
+		q->limits.discard_granularity = granularity;
+		blk_queue_max_discard_sectors(q, max_discard_sectors);
+		blk_queue_max_write_zeroes_sectors(q, max_discard_sectors);
 		blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
-	else
+	} else {
+		q->limits.discard_granularity = 0;
+		blk_queue_max_discard_sectors(q, 0);
+		blk_queue_max_write_zeroes_sectors(q, 0);
 		blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
+	}
+	q->limits.discard_alignment = 0;
 }
 
 static void loop_unprepare_queue(struct loop_device *lo)