diff mbox series

[v4,4/7] block: introduce write-hint to stream-id conversion

Message ID 1555523406-2380-5-git-send-email-joshi.k@samsung.com (mailing list archive)
State New, archived
Headers show
Series Extend write-hint/stream infrastructure | expand

Commit Message

Kanchan Joshi April 17, 2019, 5:50 p.m. UTC
This patch moves write-hint-to-stream-id conversion in block-layer.
Earlier this was done by driver (nvme). Current conversion is of the
form "streamid = write-hint - 1", for both user and kernel streams.
Conversion takes stream limit (maintained in request queue) into
account. Write-hints beyond the exposed limit turn to 0.
A new field 'streamid' has been added in request. While 'write-hint'
field continues to exist. It keeps original value passed from upper
layer, and used during merging checks.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 block/blk-core.c       | 20 ++++++++++++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 21 insertions(+)

Comments

Jens Axboe April 17, 2019, 5:57 p.m. UTC | #1
On 4/17/19 11:50 AM, Kanchan Joshi wrote:
> This patch moves write-hint-to-stream-id conversion in block-layer.
> Earlier this was done by driver (nvme). Current conversion is of the
> form "streamid = write-hint - 1", for both user and kernel streams.
> Conversion takes stream limit (maintained in request queue) into
> account. Write-hints beyond the exposed limit turn to 0.
> A new field 'streamid' has been added in request. While 'write-hint'
> field continues to exist. It keeps original value passed from upper
> layer, and used during merging checks.

Why not just use the bio write hint? We already disallow merging of
dissimilar write hints, so req->bio->bi_write_hint is known to be
identical with the rest of the bio's in that chain.
Jan Kara April 18, 2019, 2:06 p.m. UTC | #2
On Wed 17-04-19 23:20:03, Kanchan Joshi wrote:
> This patch moves write-hint-to-stream-id conversion in block-layer.
> Earlier this was done by driver (nvme). Current conversion is of the
> form "streamid = write-hint - 1", for both user and kernel streams.
> Conversion takes stream limit (maintained in request queue) into
> account. Write-hints beyond the exposed limit turn to 0.
> A new field 'streamid' has been added in request. While 'write-hint'
> field continues to exist. It keeps original value passed from upper
> layer, and used during merging checks.
> 
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> ---
>  block/blk-core.c       | 20 ++++++++++++++++++++
>  include/linux/blkdev.h |  1 +
>  2 files changed, 21 insertions(+)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index a55389b..712e6b7 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -730,6 +730,25 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
>  	return false;
>  }
>  
> +enum rw_hint blk_write_hint_to_streamid(struct request *req,
> +				        struct bio *bio)
> +{
> +	enum rw_hint streamid, nr_streams;
> +	struct request_queue *q = req->q;
> +	nr_streams = q->limits.nr_streams;
> +
> +	streamid = bio->bi_write_hint;
> +	if (!nr_streams || streamid == WRITE_LIFE_NOT_SET ||
> +	    streamid == WRITE_LIFE_NONE)
> +		streamid = 0;
> +	else {
> +		--streamid;
> +		if(streamid > nr_streams)
> +			streamid = 0;
> +	}
> +	return streamid;
> +}
> +

Someone told me that stream ids are potentially persistent on the storage
so it isn't great to change the id for the same thing e.g. if we add
another user hint. So maybe we should allocate kernel hints from top as
Dave Chinner suggested? I.e., something like the following mapping function:

	if (streamid <= BLK_MAX_USER_HINTS) {
		streamid--;
		if (streamid > nr_streams)
			streamid = 0;
	} else {
		/* Kernel hints get allocated from top */
		streamid -= WRITE_LIFE_KERN_MIN;
		if (streamid > nr_streams - BLK_MAX_USER_HINTS)
			streamid = 0;
		else
			streamid = nr_streams - streamid - 1;
	}

what do you think?

								Honza
Andreas Dilger April 18, 2019, 6:58 p.m. UTC | #3
On Apr 18, 2019, at 8:06 AM, Jan Kara <jack@suse.cz> wrote:
> 
> On Wed 17-04-19 23:20:03, Kanchan Joshi wrote:
>> This patch moves write-hint-to-stream-id conversion in block-layer.
>> Earlier this was done by driver (nvme). Current conversion is of the
>> form "streamid = write-hint - 1", for both user and kernel streams.
>> Conversion takes stream limit (maintained in request queue) into
>> account. Write-hints beyond the exposed limit turn to 0.
>> A new field 'streamid' has been added in request. While 'write-hint'
>> field continues to exist. It keeps original value passed from upper
>> layer, and used during merging checks.
>> 
>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>> ---
>> block/blk-core.c       | 20 ++++++++++++++++++++
>> include/linux/blkdev.h |  1 +
>> 2 files changed, 21 insertions(+)
>> 
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index a55389b..712e6b7 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -730,6 +730,25 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
>> 	return false;
>> }
>> 
>> +enum rw_hint blk_write_hint_to_streamid(struct request *req,
>> +				        struct bio *bio)
>> +{
>> +	enum rw_hint streamid, nr_streams;
>> +	struct request_queue *q = req->q;
>> +	nr_streams = q->limits.nr_streams;
>> +
>> +	streamid = bio->bi_write_hint;
>> +	if (!nr_streams || streamid == WRITE_LIFE_NOT_SET ||
>> +	    streamid == WRITE_LIFE_NONE)
>> +		streamid = 0;
>> +	else {
>> +		--streamid;
>> +		if(streamid > nr_streams)
>> +			streamid = 0;
>> +	}
>> +	return streamid;
>> +}
>> +
> 
> Someone told me that stream ids are potentially persistent on the storage
> so it isn't great to change the id for the same thing e.g. if we add
> another user hint. So maybe we should allocate kernel hints from top as
> Dave Chinner suggested? I.e., something like the following mapping function:
> 
> 	if (streamid <= BLK_MAX_USER_HINTS) {
> 		streamid--;
> 		if (streamid > nr_streams)
> 			streamid = 0;
> 	} else {
> 		/* Kernel hints get allocated from top */
> 		streamid -= WRITE_LIFE_KERN_MIN;
> 		if (streamid > nr_streams - BLK_MAX_USER_HINTS)
> 			streamid = 0;
> 		else
> 			streamid = nr_streams - streamid - 1;
> 	}
> 
> what do you think?

Dave has expressed this sentiment several times, and I agree.  We don't
want the filesystem hint values/mapping to change over time, or it will
conflict with data that was written with the previous hints (e.g. if
data was written with a "short lifetime" hint suddenly changes to be
grouped with a "long lifetime" hint).  This is easily avoided with some
simple changes now, but harder to fix after this patch has landed.

Cheers, Andreas
Kanchan Joshi April 22, 2019, 1:33 p.m. UTC | #4
> Someone told me that stream ids are potentially persistent on the 
> storage so it isn't great to change the id for the same thing e.g. if 
> we add another user hint. So maybe we should allocate kernel hints 
> from top as Dave Chinner suggested? I.e., something like the following
mapping function:

This function is good. Thank you for sharing. 

-----Original Message-----
From: Andreas Dilger [mailto:adilger@dilger.ca] 
Sent: Friday, April 19, 2019 12:28 AM
To: Jan Kara <jack@suse.cz>
Cc: Kanchan Joshi <joshi.k@samsung.com>; open list
<linux-kernel@vger.kernel.org>; linux-block <linux-block@vger.kernel.org>;
linux-nvme@lists.infradead.org; linux-fsdevel
<linux-fsdevel@vger.kernel.org>; linux-ext4@vger.kernel.org;
prakash.v@samsung.com
Subject: Re: [PATCH v4 4/7] block: introduce write-hint to stream-id
conversion

On Apr 18, 2019, at 8:06 AM, Jan Kara <jack@suse.cz> wrote:
> 
> On Wed 17-04-19 23:20:03, Kanchan Joshi wrote:
>> This patch moves write-hint-to-stream-id conversion in block-layer.
>> Earlier this was done by driver (nvme). Current conversion is of the 
>> form "streamid = write-hint - 1", for both user and kernel streams.
>> Conversion takes stream limit (maintained in request queue) into 
>> account. Write-hints beyond the exposed limit turn to 0.
>> A new field 'streamid' has been added in request. While 'write-hint'
>> field continues to exist. It keeps original value passed from upper 
>> layer, and used during merging checks.
>> 
>> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>> ---
>> block/blk-core.c       | 20 ++++++++++++++++++++
>> include/linux/blkdev.h |  1 +
>> 2 files changed, 21 insertions(+)
>> 
>> diff --git a/block/blk-core.c b/block/blk-core.c index 
>> a55389b..712e6b7 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -730,6 +730,25 @@ bool blk_attempt_plug_merge(struct request_queue *q,
struct bio *bio,
>> 	return false;
>> }
>> 
>> +enum rw_hint blk_write_hint_to_streamid(struct request *req,
>> +				        struct bio *bio)
>> +{
>> +	enum rw_hint streamid, nr_streams;
>> +	struct request_queue *q = req->q;
>> +	nr_streams = q->limits.nr_streams;
>> +
>> +	streamid = bio->bi_write_hint;
>> +	if (!nr_streams || streamid == WRITE_LIFE_NOT_SET ||
>> +	    streamid == WRITE_LIFE_NONE)
>> +		streamid = 0;
>> +	else {
>> +		--streamid;
>> +		if(streamid > nr_streams)
>> +			streamid = 0;
>> +	}
>> +	return streamid;
>> +}
>> +
> 
> Someone told me that stream ids are potentially persistent on the 
> storage so it isn't great to change the id for the same thing e.g. if 
> we add another user hint. So maybe we should allocate kernel hints 
> from top as Dave Chinner suggested? I.e., something like the following
mapping function:
> 
> 	if (streamid <= BLK_MAX_USER_HINTS) {
> 		streamid--;
> 		if (streamid > nr_streams)
> 			streamid = 0;
> 	} else {
> 		/* Kernel hints get allocated from top */
> 		streamid -= WRITE_LIFE_KERN_MIN;
> 		if (streamid > nr_streams - BLK_MAX_USER_HINTS)
> 			streamid = 0;
> 		else
> 			streamid = nr_streams - streamid - 1;
> 	}
> 
> what do you think?

Dave has expressed this sentiment several times, and I agree.  We don't want
the filesystem hint values/mapping to change over time, or it will conflict
with data that was written with the previous hints (e.g. if data was written
with a "short lifetime" hint suddenly changes to be grouped with a "long
lifetime" hint).  This is easily avoided with some simple changes now, but
harder to fix after this patch has landed.

Cheers, Andreas
Kanchan Joshi April 22, 2019, 1:36 p.m. UTC | #5
> Why not just use the bio write hint? We already disallow merging of dissimilar write hints, so req->bio->bi_write_hint is known to be identical with the rest of the bio's in that chain.

Yes, that is better. Thanks for suggesting it.

-----Original Message-----
From: Jens Axboe [mailto:axboe@kernel.dk] 
Sent: Wednesday, April 17, 2019 11:28 PM
To: Kanchan Joshi <joshi.k@samsung.com>; linux-kernel@vger.kernel.org; linux-block@vger.kernel.org; linux-nvme@lists.infradead.org; linux-fsdevel@vger.kernel.org; linux-ext4@vger.kernel.org
Cc: prakash.v@samsung.com
Subject: Re: [PATCH v4 4/7] block: introduce write-hint to stream-id conversion

On 4/17/19 11:50 AM, Kanchan Joshi wrote:
> This patch moves write-hint-to-stream-id conversion in block-layer.
> Earlier this was done by driver (nvme). Current conversion is of the 
> form "streamid = write-hint - 1", for both user and kernel streams.
> Conversion takes stream limit (maintained in request queue) into 
> account. Write-hints beyond the exposed limit turn to 0.
> A new field 'streamid' has been added in request. While 'write-hint'
> field continues to exist. It keeps original value passed from upper 
> layer, and used during merging checks.

Why not just use the bio write hint? We already disallow merging of dissimilar write hints, so req->bio->bi_write_hint is known to be identical with the rest of the bio's in that chain.

--
Jens Axboe
diff mbox series

Patch

diff --git a/block/blk-core.c b/block/blk-core.c
index a55389b..712e6b7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -730,6 +730,25 @@  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	return false;
 }
 
+enum rw_hint blk_write_hint_to_streamid(struct request *req,
+				        struct bio *bio)
+{
+	enum rw_hint streamid, nr_streams;
+	struct request_queue *q = req->q;
+	nr_streams = q->limits.nr_streams;
+
+	streamid = bio->bi_write_hint;
+	if (!nr_streams || streamid == WRITE_LIFE_NOT_SET ||
+	    streamid == WRITE_LIFE_NONE)
+		streamid = 0;
+	else {
+		--streamid;
+		if(streamid > nr_streams)
+			streamid = 0;
+	}
+	return streamid;
+}
+
 void blk_init_request_from_bio(struct request *req, struct bio *bio)
 {
 	if (bio->bi_opf & REQ_RAHEAD)
@@ -738,6 +757,7 @@  void blk_init_request_from_bio(struct request *req, struct bio *bio)
 	req->__sector = bio->bi_iter.bi_sector;
 	req->ioprio = bio_prio(bio);
 	req->write_hint = bio->bi_write_hint;
+	req->streamid = blk_write_hint_to_streamid(req, bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
 EXPORT_SYMBOL_GPL(blk_init_request_from_bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index eb6eb60..7cd1c2d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -217,6 +217,7 @@  struct request {
 #endif
 
 	unsigned short write_hint;
+	unsigned short streamid;
 	unsigned short ioprio;
 
 	unsigned int extra_len;	/* length of alignment and padding */