diff mbox series

[v8,9/9] null_blk: add support for copy offload

Message ID 20230327084103.21601-10-anuj20.g@samsung.com (mailing list archive)
State New, archived
Headers show
Series [v8,1/9] block: Introduce queue limits for copy-offload support | expand

Commit Message

Anuj Gupta March 27, 2023, 8:40 a.m. UTC
From: Nitesh Shetty <nj.shetty@samsung.com>

Implementaion is based on existing read and write infrastructure.

Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
---
 drivers/block/null_blk/main.c     | 94 +++++++++++++++++++++++++++++++
 drivers/block/null_blk/null_blk.h |  7 +++
 2 files changed, 101 insertions(+)

Comments

Damien Le Moal March 29, 2023, 9:04 a.m. UTC | #1
On 3/27/23 17:40, Anuj Gupta wrote:
> From: Nitesh Shetty <nj.shetty@samsung.com>
> 
> Implementaion is based on existing read and write infrastructure.
> 
> Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
> ---
>  drivers/block/null_blk/main.c     | 94 +++++++++++++++++++++++++++++++
>  drivers/block/null_blk/null_blk.h |  7 +++
>  2 files changed, 101 insertions(+)
> 
> diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> index 9e6b032c8ecc..84c5fbcd67a5 100644
> --- a/drivers/block/null_blk/main.c
> +++ b/drivers/block/null_blk/main.c
> @@ -1257,6 +1257,81 @@ static int null_transfer(struct nullb *nullb, struct page *page,
>  	return err;
>  }
>  
> +static inline int nullb_setup_copy_read(struct nullb *nullb,
> +		struct bio *bio)
> +{
> +	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
> +
> +	memcpy(token->subsys, "nullb", 5);
> +	token->sector_in = bio->bi_iter.bi_sector;
> +	token->nullb = nullb;
> +	token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> +
> +	return 0;
> +}
> +
> +static inline int nullb_setup_copy_write(struct nullb *nullb,
> +		struct bio *bio, bool is_fua)
> +{
> +	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
> +	sector_t sector_in, sector_out;
> +	void *in, *out;
> +	size_t rem, temp;
> +	unsigned long offset_in, offset_out;
> +	struct nullb_page *t_page_in, *t_page_out;
> +	int ret = -EIO;
> +
> +	if (unlikely(memcmp(token->subsys, "nullb", 5)))
> +		return -EOPNOTSUPP;
> +	if (unlikely(token->nullb != nullb))
> +		return -EOPNOTSUPP;
> +	if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT))
> +		return -EOPNOTSUPP;

EOPNOTSUPP is strange. These are EINVAL, no ?.

> +
> +	sector_in = token->sector_in;
> +	sector_out = bio->bi_iter.bi_sector;
> +	rem = token->sectors << SECTOR_SHIFT;
> +
> +	spin_lock_irq(&nullb->lock);
> +	while (rem > 0) {
> +		temp = min_t(size_t, nullb->dev->blocksize, rem);
> +		offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT;
> +		offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT;
> +
> +		if (null_cache_active(nullb) && !is_fua)
> +			null_make_cache_space(nullb, PAGE_SIZE);
> +
> +		t_page_in = null_lookup_page(nullb, sector_in, false,
> +			!null_cache_active(nullb));
> +		if (!t_page_in)
> +			goto err;
> +		t_page_out = null_insert_page(nullb, sector_out,
> +			!null_cache_active(nullb) || is_fua);
> +		if (!t_page_out)
> +			goto err;
> +
> +		in = kmap_local_page(t_page_in->page);
> +		out = kmap_local_page(t_page_out->page);
> +
> +		memcpy(out + offset_out, in + offset_in, temp);
> +		kunmap_local(out);
> +		kunmap_local(in);
> +		__set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap);
> +
> +		if (is_fua)
> +			null_free_sector(nullb, sector_out, true);
> +
> +		rem -= temp;
> +		sector_in += temp >> SECTOR_SHIFT;
> +		sector_out += temp >> SECTOR_SHIFT;
> +	}
> +
> +	ret = 0;
> +err:
> +	spin_unlock_irq(&nullb->lock);
> +	return ret;
> +}
> +
>  static int null_handle_rq(struct nullb_cmd *cmd)
>  {
>  	struct request *rq = cmd->rq;
> @@ -1267,6 +1342,14 @@ static int null_handle_rq(struct nullb_cmd *cmd)
>  	struct req_iterator iter;
>  	struct bio_vec bvec;
>  
> +	if (rq->cmd_flags & REQ_COPY) {
> +		if (op_is_write(req_op(rq)))
> +			return nullb_setup_copy_write(nullb, rq->bio,
> +						rq->cmd_flags & REQ_FUA);
> +		else

No need for this else.

> +			return nullb_setup_copy_read(nullb, rq->bio);
> +	}
> +
>  	spin_lock_irq(&nullb->lock);
>  	rq_for_each_segment(bvec, rq, iter) {
>  		len = bvec.bv_len;
> @@ -1294,6 +1377,14 @@ static int null_handle_bio(struct nullb_cmd *cmd)
>  	struct bio_vec bvec;
>  	struct bvec_iter iter;
>  
> +	if (bio->bi_opf & REQ_COPY) {
> +		if (op_is_write(bio_op(bio)))
> +			return nullb_setup_copy_write(nullb, bio,
> +							bio->bi_opf & REQ_FUA);
> +		else

No need for this else.

> +			return nullb_setup_copy_read(nullb, bio);
> +	}
> +
>  	spin_lock_irq(&nullb->lock);
>  	bio_for_each_segment(bvec, bio, iter) {
>  		len = bvec.bv_len;
> @@ -2146,6 +2237,9 @@ static int null_add_dev(struct nullb_device *dev)
>  	list_add_tail(&nullb->list, &nullb_list);
>  	mutex_unlock(&lock);
>  
> +	blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024);
> +	blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue);

This should NOT be unconditionally enabled with a magic value of 1K sectors. The
max copy sectors needs to be set with a configfs attribute so that we can
enable/disable the copy offload support, to be able to exercise both block layer
emulation and native device support.

> +
>  	pr_info("disk %s created\n", nullb->disk_name);
>  
>  	return 0;
> diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
> index eb5972c50be8..94e524e7306a 100644
> --- a/drivers/block/null_blk/null_blk.h
> +++ b/drivers/block/null_blk/null_blk.h
> @@ -67,6 +67,13 @@ enum {
>  	NULL_Q_MQ	= 2,
>  };
>  
> +struct nullb_copy_token {
> +	char subsys[5];
> +	struct nullb *nullb;
> +	u64 sector_in;
> +	u64 sectors;
> +};
> +
>  struct nullb_device {
>  	struct nullb *nullb;
>  	struct config_item item;
Nitesh Shetty March 29, 2023, 12:22 p.m. UTC | #2
On Wed, Mar 29, 2023 at 06:04:49PM +0900, Damien Le Moal wrote:
> On 3/27/23 17:40, Anuj Gupta wrote:
> > From: Nitesh Shetty <nj.shetty@samsung.com>
> > 
> > Implementaion is based on existing read and write infrastructure.
> > 
> > Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com>
> > Signed-off-by: Vincent Fu <vincent.fu@samsung.com>
> > ---
> >  drivers/block/null_blk/main.c     | 94 +++++++++++++++++++++++++++++++
> >  drivers/block/null_blk/null_blk.h |  7 +++
> >  2 files changed, 101 insertions(+)
> > 
> > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
> > index 9e6b032c8ecc..84c5fbcd67a5 100644
> > --- a/drivers/block/null_blk/main.c
> > +++ b/drivers/block/null_blk/main.c
> > @@ -1257,6 +1257,81 @@ static int null_transfer(struct nullb *nullb, struct page *page,
> >  	return err;
> >  }
> >  
> > +static inline int nullb_setup_copy_read(struct nullb *nullb,
> > +		struct bio *bio)
> > +{
> > +	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
> > +
> > +	memcpy(token->subsys, "nullb", 5);
> > +	token->sector_in = bio->bi_iter.bi_sector;
> > +	token->nullb = nullb;
> > +	token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int nullb_setup_copy_write(struct nullb *nullb,
> > +		struct bio *bio, bool is_fua)
> > +{
> > +	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
> > +	sector_t sector_in, sector_out;
> > +	void *in, *out;
> > +	size_t rem, temp;
> > +	unsigned long offset_in, offset_out;
> > +	struct nullb_page *t_page_in, *t_page_out;
> > +	int ret = -EIO;
> > +
> > +	if (unlikely(memcmp(token->subsys, "nullb", 5)))
> > +		return -EOPNOTSUPP;
> > +	if (unlikely(token->nullb != nullb))
> > +		return -EOPNOTSUPP;
> > +	if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT))
> > +		return -EOPNOTSUPP;
> 
> EOPNOTSUPP is strange. These are EINVAL, no ?.
> 
acked, will update in next revision.

> > +
> > +	sector_in = token->sector_in;
> > +	sector_out = bio->bi_iter.bi_sector;
> > +	rem = token->sectors << SECTOR_SHIFT;
> > +
> > +	spin_lock_irq(&nullb->lock);
> > +	while (rem > 0) {
> > +		temp = min_t(size_t, nullb->dev->blocksize, rem);
> > +		offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT;
> > +		offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT;
> > +
> > +		if (null_cache_active(nullb) && !is_fua)
> > +			null_make_cache_space(nullb, PAGE_SIZE);
> > +
> > +		t_page_in = null_lookup_page(nullb, sector_in, false,
> > +			!null_cache_active(nullb));
> > +		if (!t_page_in)
> > +			goto err;
> > +		t_page_out = null_insert_page(nullb, sector_out,
> > +			!null_cache_active(nullb) || is_fua);
> > +		if (!t_page_out)
> > +			goto err;
> > +
> > +		in = kmap_local_page(t_page_in->page);
> > +		out = kmap_local_page(t_page_out->page);
> > +
> > +		memcpy(out + offset_out, in + offset_in, temp);
> > +		kunmap_local(out);
> > +		kunmap_local(in);
> > +		__set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap);
> > +
> > +		if (is_fua)
> > +			null_free_sector(nullb, sector_out, true);
> > +
> > +		rem -= temp;
> > +		sector_in += temp >> SECTOR_SHIFT;
> > +		sector_out += temp >> SECTOR_SHIFT;
> > +	}
> > +
> > +	ret = 0;
> > +err:
> > +	spin_unlock_irq(&nullb->lock);
> > +	return ret;
> > +}
> > +
> >  static int null_handle_rq(struct nullb_cmd *cmd)
> >  {
> >  	struct request *rq = cmd->rq;
> > @@ -1267,6 +1342,14 @@ static int null_handle_rq(struct nullb_cmd *cmd)
> >  	struct req_iterator iter;
> >  	struct bio_vec bvec;
> >  
> > +	if (rq->cmd_flags & REQ_COPY) {
> > +		if (op_is_write(req_op(rq)))
> > +			return nullb_setup_copy_write(nullb, rq->bio,
> > +						rq->cmd_flags & REQ_FUA);
> > +		else
> 
> No need for this else.
> 

acked

> > +			return nullb_setup_copy_read(nullb, rq->bio);
> > +	}
> > +
> >  	spin_lock_irq(&nullb->lock);
> >  	rq_for_each_segment(bvec, rq, iter) {
> >  		len = bvec.bv_len;
> > @@ -1294,6 +1377,14 @@ static int null_handle_bio(struct nullb_cmd *cmd)
> >  	struct bio_vec bvec;
> >  	struct bvec_iter iter;
> >  
> > +	if (bio->bi_opf & REQ_COPY) {
> > +		if (op_is_write(bio_op(bio)))
> > +			return nullb_setup_copy_write(nullb, bio,
> > +							bio->bi_opf & REQ_FUA);
> > +		else
> 
> No need for this else.
> 

acked

> > +			return nullb_setup_copy_read(nullb, bio);
> > +	}
> > +
> >  	spin_lock_irq(&nullb->lock);
> >  	bio_for_each_segment(bvec, bio, iter) {
> >  		len = bvec.bv_len;
> > @@ -2146,6 +2237,9 @@ static int null_add_dev(struct nullb_device *dev)
> >  	list_add_tail(&nullb->list, &nullb_list);
> >  	mutex_unlock(&lock);
> >  
> > +	blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024);
> > +	blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue);
> 
> This should NOT be unconditionally enabled with a magic value of 1K sectors. The
> max copy sectors needs to be set with a configfs attribute so that we can
> enable/disable the copy offload support, to be able to exercise both block layer
> emulation and native device support.
> 

acked

> > +
> >  	pr_info("disk %s created\n", nullb->disk_name);
> >  
> >  	return 0;
> > diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
> > index eb5972c50be8..94e524e7306a 100644
> > --- a/drivers/block/null_blk/null_blk.h
> > +++ b/drivers/block/null_blk/null_blk.h
> > @@ -67,6 +67,13 @@ enum {
> >  	NULL_Q_MQ	= 2,
> >  };
> >  
> > +struct nullb_copy_token {
> > +	char subsys[5];
> > +	struct nullb *nullb;
> > +	u64 sector_in;
> > +	u64 sectors;
> > +};
> > +
> >  struct nullb_device {
> >  	struct nullb *nullb;
> >  	struct config_item item;
> 
> -- 
> Damien Le Moal
> Western Digital Research
> 
> 
Thank you,
Nitesh Shetty
diff mbox series

Patch

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 9e6b032c8ecc..84c5fbcd67a5 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1257,6 +1257,81 @@  static int null_transfer(struct nullb *nullb, struct page *page,
 	return err;
 }
 
+static inline int nullb_setup_copy_read(struct nullb *nullb,
+		struct bio *bio)
+{
+	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
+
+	memcpy(token->subsys, "nullb", 5);
+	token->sector_in = bio->bi_iter.bi_sector;
+	token->nullb = nullb;
+	token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+
+	return 0;
+}
+
+static inline int nullb_setup_copy_write(struct nullb *nullb,
+		struct bio *bio, bool is_fua)
+{
+	struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]);
+	sector_t sector_in, sector_out;
+	void *in, *out;
+	size_t rem, temp;
+	unsigned long offset_in, offset_out;
+	struct nullb_page *t_page_in, *t_page_out;
+	int ret = -EIO;
+
+	if (unlikely(memcmp(token->subsys, "nullb", 5)))
+		return -EOPNOTSUPP;
+	if (unlikely(token->nullb != nullb))
+		return -EOPNOTSUPP;
+	if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT))
+		return -EOPNOTSUPP;
+
+	sector_in = token->sector_in;
+	sector_out = bio->bi_iter.bi_sector;
+	rem = token->sectors << SECTOR_SHIFT;
+
+	spin_lock_irq(&nullb->lock);
+	while (rem > 0) {
+		temp = min_t(size_t, nullb->dev->blocksize, rem);
+		offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT;
+		offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT;
+
+		if (null_cache_active(nullb) && !is_fua)
+			null_make_cache_space(nullb, PAGE_SIZE);
+
+		t_page_in = null_lookup_page(nullb, sector_in, false,
+			!null_cache_active(nullb));
+		if (!t_page_in)
+			goto err;
+		t_page_out = null_insert_page(nullb, sector_out,
+			!null_cache_active(nullb) || is_fua);
+		if (!t_page_out)
+			goto err;
+
+		in = kmap_local_page(t_page_in->page);
+		out = kmap_local_page(t_page_out->page);
+
+		memcpy(out + offset_out, in + offset_in, temp);
+		kunmap_local(out);
+		kunmap_local(in);
+		__set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap);
+
+		if (is_fua)
+			null_free_sector(nullb, sector_out, true);
+
+		rem -= temp;
+		sector_in += temp >> SECTOR_SHIFT;
+		sector_out += temp >> SECTOR_SHIFT;
+	}
+
+	ret = 0;
+err:
+	spin_unlock_irq(&nullb->lock);
+	return ret;
+}
+
 static int null_handle_rq(struct nullb_cmd *cmd)
 {
 	struct request *rq = cmd->rq;
@@ -1267,6 +1342,14 @@  static int null_handle_rq(struct nullb_cmd *cmd)
 	struct req_iterator iter;
 	struct bio_vec bvec;
 
+	if (rq->cmd_flags & REQ_COPY) {
+		if (op_is_write(req_op(rq)))
+			return nullb_setup_copy_write(nullb, rq->bio,
+						rq->cmd_flags & REQ_FUA);
+		else
+			return nullb_setup_copy_read(nullb, rq->bio);
+	}
+
 	spin_lock_irq(&nullb->lock);
 	rq_for_each_segment(bvec, rq, iter) {
 		len = bvec.bv_len;
@@ -1294,6 +1377,14 @@  static int null_handle_bio(struct nullb_cmd *cmd)
 	struct bio_vec bvec;
 	struct bvec_iter iter;
 
+	if (bio->bi_opf & REQ_COPY) {
+		if (op_is_write(bio_op(bio)))
+			return nullb_setup_copy_write(nullb, bio,
+							bio->bi_opf & REQ_FUA);
+		else
+			return nullb_setup_copy_read(nullb, bio);
+	}
+
 	spin_lock_irq(&nullb->lock);
 	bio_for_each_segment(bvec, bio, iter) {
 		len = bvec.bv_len;
@@ -2146,6 +2237,9 @@  static int null_add_dev(struct nullb_device *dev)
 	list_add_tail(&nullb->list, &nullb_list);
 	mutex_unlock(&lock);
 
+	blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024);
+	blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue);
+
 	pr_info("disk %s created\n", nullb->disk_name);
 
 	return 0;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index eb5972c50be8..94e524e7306a 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -67,6 +67,13 @@  enum {
 	NULL_Q_MQ	= 2,
 };
 
+struct nullb_copy_token {
+	char subsys[5];
+	struct nullb *nullb;
+	u64 sector_in;
+	u64 sectors;
+};
+
 struct nullb_device {
 	struct nullb *nullb;
 	struct config_item item;