Message ID | 20230327084103.21601-10-anuj20.g@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v8,1/9] block: Introduce queue limits for copy-offload support | expand |
On 3/27/23 17:40, Anuj Gupta wrote: > From: Nitesh Shetty <nj.shetty@samsung.com> > > Implementaion is based on existing read and write infrastructure. > > Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com> > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > Signed-off-by: Vincent Fu <vincent.fu@samsung.com> > --- > drivers/block/null_blk/main.c | 94 +++++++++++++++++++++++++++++++ > drivers/block/null_blk/null_blk.h | 7 +++ > 2 files changed, 101 insertions(+) > > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c > index 9e6b032c8ecc..84c5fbcd67a5 100644 > --- a/drivers/block/null_blk/main.c > +++ b/drivers/block/null_blk/main.c > @@ -1257,6 +1257,81 @@ static int null_transfer(struct nullb *nullb, struct page *page, > return err; > } > > +static inline int nullb_setup_copy_read(struct nullb *nullb, > + struct bio *bio) > +{ > + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); > + > + memcpy(token->subsys, "nullb", 5); > + token->sector_in = bio->bi_iter.bi_sector; > + token->nullb = nullb; > + token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; > + > + return 0; > +} > + > +static inline int nullb_setup_copy_write(struct nullb *nullb, > + struct bio *bio, bool is_fua) > +{ > + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); > + sector_t sector_in, sector_out; > + void *in, *out; > + size_t rem, temp; > + unsigned long offset_in, offset_out; > + struct nullb_page *t_page_in, *t_page_out; > + int ret = -EIO; > + > + if (unlikely(memcmp(token->subsys, "nullb", 5))) > + return -EOPNOTSUPP; > + if (unlikely(token->nullb != nullb)) > + return -EOPNOTSUPP; > + if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT)) > + return -EOPNOTSUPP; EOPNOTSUPP is strange. These are EINVAL, no ?. > + > + sector_in = token->sector_in; > + sector_out = bio->bi_iter.bi_sector; > + rem = token->sectors << SECTOR_SHIFT; > + > + spin_lock_irq(&nullb->lock); > + while (rem > 0) { > + temp = min_t(size_t, nullb->dev->blocksize, rem); > + offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT; > + offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT; > + > + if (null_cache_active(nullb) && !is_fua) > + null_make_cache_space(nullb, PAGE_SIZE); > + > + t_page_in = null_lookup_page(nullb, sector_in, false, > + !null_cache_active(nullb)); > + if (!t_page_in) > + goto err; > + t_page_out = null_insert_page(nullb, sector_out, > + !null_cache_active(nullb) || is_fua); > + if (!t_page_out) > + goto err; > + > + in = kmap_local_page(t_page_in->page); > + out = kmap_local_page(t_page_out->page); > + > + memcpy(out + offset_out, in + offset_in, temp); > + kunmap_local(out); > + kunmap_local(in); > + __set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap); > + > + if (is_fua) > + null_free_sector(nullb, sector_out, true); > + > + rem -= temp; > + sector_in += temp >> SECTOR_SHIFT; > + sector_out += temp >> SECTOR_SHIFT; > + } > + > + ret = 0; > +err: > + spin_unlock_irq(&nullb->lock); > + return ret; > +} > + > static int null_handle_rq(struct nullb_cmd *cmd) > { > struct request *rq = cmd->rq; > @@ -1267,6 +1342,14 @@ static int null_handle_rq(struct nullb_cmd *cmd) > struct req_iterator iter; > struct bio_vec bvec; > > + if (rq->cmd_flags & REQ_COPY) { > + if (op_is_write(req_op(rq))) > + return nullb_setup_copy_write(nullb, rq->bio, > + rq->cmd_flags & REQ_FUA); > + else No need for this else. > + return nullb_setup_copy_read(nullb, rq->bio); > + } > + > spin_lock_irq(&nullb->lock); > rq_for_each_segment(bvec, rq, iter) { > len = bvec.bv_len; > @@ -1294,6 +1377,14 @@ static int null_handle_bio(struct nullb_cmd *cmd) > struct bio_vec bvec; > struct bvec_iter iter; > > + if (bio->bi_opf & REQ_COPY) { > + if (op_is_write(bio_op(bio))) > + return nullb_setup_copy_write(nullb, bio, > + bio->bi_opf & REQ_FUA); > + else No need for this else. > + return nullb_setup_copy_read(nullb, bio); > + } > + > spin_lock_irq(&nullb->lock); > bio_for_each_segment(bvec, bio, iter) { > len = bvec.bv_len; > @@ -2146,6 +2237,9 @@ static int null_add_dev(struct nullb_device *dev) > list_add_tail(&nullb->list, &nullb_list); > mutex_unlock(&lock); > > + blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024); > + blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue); This should NOT be unconditionally enabled with a magic value of 1K sectors. The max copy sectors needs to be set with a configfs attribute so that we can enable/disable the copy offload support, to be able to exercise both block layer emulation and native device support. > + > pr_info("disk %s created\n", nullb->disk_name); > > return 0; > diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h > index eb5972c50be8..94e524e7306a 100644 > --- a/drivers/block/null_blk/null_blk.h > +++ b/drivers/block/null_blk/null_blk.h > @@ -67,6 +67,13 @@ enum { > NULL_Q_MQ = 2, > }; > > +struct nullb_copy_token { > + char subsys[5]; > + struct nullb *nullb; > + u64 sector_in; > + u64 sectors; > +}; > + > struct nullb_device { > struct nullb *nullb; > struct config_item item;
On Wed, Mar 29, 2023 at 06:04:49PM +0900, Damien Le Moal wrote: > On 3/27/23 17:40, Anuj Gupta wrote: > > From: Nitesh Shetty <nj.shetty@samsung.com> > > > > Implementaion is based on existing read and write infrastructure. > > > > Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com> > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > > Signed-off-by: Nitesh Shetty <nj.shetty@samsung.com> > > Signed-off-by: Vincent Fu <vincent.fu@samsung.com> > > --- > > drivers/block/null_blk/main.c | 94 +++++++++++++++++++++++++++++++ > > drivers/block/null_blk/null_blk.h | 7 +++ > > 2 files changed, 101 insertions(+) > > > > diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c > > index 9e6b032c8ecc..84c5fbcd67a5 100644 > > --- a/drivers/block/null_blk/main.c > > +++ b/drivers/block/null_blk/main.c > > @@ -1257,6 +1257,81 @@ static int null_transfer(struct nullb *nullb, struct page *page, > > return err; > > } > > > > +static inline int nullb_setup_copy_read(struct nullb *nullb, > > + struct bio *bio) > > +{ > > + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); > > + > > + memcpy(token->subsys, "nullb", 5); > > + token->sector_in = bio->bi_iter.bi_sector; > > + token->nullb = nullb; > > + token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; > > + > > + return 0; > > +} > > + > > +static inline int nullb_setup_copy_write(struct nullb *nullb, > > + struct bio *bio, bool is_fua) > > +{ > > + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); > > + sector_t sector_in, sector_out; > > + void *in, *out; > > + size_t rem, temp; > > + unsigned long offset_in, offset_out; > > + struct nullb_page *t_page_in, *t_page_out; > > + int ret = -EIO; > > + > > + if (unlikely(memcmp(token->subsys, "nullb", 5))) > > + return -EOPNOTSUPP; > > + if (unlikely(token->nullb != nullb)) > > + return -EOPNOTSUPP; > > + if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT)) > > + return -EOPNOTSUPP; > > EOPNOTSUPP is strange. These are EINVAL, no ?. > acked, will update in next revision. > > + > > + sector_in = token->sector_in; > > + sector_out = bio->bi_iter.bi_sector; > > + rem = token->sectors << SECTOR_SHIFT; > > + > > + spin_lock_irq(&nullb->lock); > > + while (rem > 0) { > > + temp = min_t(size_t, nullb->dev->blocksize, rem); > > + offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT; > > + offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT; > > + > > + if (null_cache_active(nullb) && !is_fua) > > + null_make_cache_space(nullb, PAGE_SIZE); > > + > > + t_page_in = null_lookup_page(nullb, sector_in, false, > > + !null_cache_active(nullb)); > > + if (!t_page_in) > > + goto err; > > + t_page_out = null_insert_page(nullb, sector_out, > > + !null_cache_active(nullb) || is_fua); > > + if (!t_page_out) > > + goto err; > > + > > + in = kmap_local_page(t_page_in->page); > > + out = kmap_local_page(t_page_out->page); > > + > > + memcpy(out + offset_out, in + offset_in, temp); > > + kunmap_local(out); > > + kunmap_local(in); > > + __set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap); > > + > > + if (is_fua) > > + null_free_sector(nullb, sector_out, true); > > + > > + rem -= temp; > > + sector_in += temp >> SECTOR_SHIFT; > > + sector_out += temp >> SECTOR_SHIFT; > > + } > > + > > + ret = 0; > > +err: > > + spin_unlock_irq(&nullb->lock); > > + return ret; > > +} > > + > > static int null_handle_rq(struct nullb_cmd *cmd) > > { > > struct request *rq = cmd->rq; > > @@ -1267,6 +1342,14 @@ static int null_handle_rq(struct nullb_cmd *cmd) > > struct req_iterator iter; > > struct bio_vec bvec; > > > > + if (rq->cmd_flags & REQ_COPY) { > > + if (op_is_write(req_op(rq))) > > + return nullb_setup_copy_write(nullb, rq->bio, > > + rq->cmd_flags & REQ_FUA); > > + else > > No need for this else. > acked > > + return nullb_setup_copy_read(nullb, rq->bio); > > + } > > + > > spin_lock_irq(&nullb->lock); > > rq_for_each_segment(bvec, rq, iter) { > > len = bvec.bv_len; > > @@ -1294,6 +1377,14 @@ static int null_handle_bio(struct nullb_cmd *cmd) > > struct bio_vec bvec; > > struct bvec_iter iter; > > > > + if (bio->bi_opf & REQ_COPY) { > > + if (op_is_write(bio_op(bio))) > > + return nullb_setup_copy_write(nullb, bio, > > + bio->bi_opf & REQ_FUA); > > + else > > No need for this else. > acked > > + return nullb_setup_copy_read(nullb, bio); > > + } > > + > > spin_lock_irq(&nullb->lock); > > bio_for_each_segment(bvec, bio, iter) { > > len = bvec.bv_len; > > @@ -2146,6 +2237,9 @@ static int null_add_dev(struct nullb_device *dev) > > list_add_tail(&nullb->list, &nullb_list); > > mutex_unlock(&lock); > > > > + blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024); > > + blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue); > > This should NOT be unconditionally enabled with a magic value of 1K sectors. The > max copy sectors needs to be set with a configfs attribute so that we can > enable/disable the copy offload support, to be able to exercise both block layer > emulation and native device support. > acked > > + > > pr_info("disk %s created\n", nullb->disk_name); > > > > return 0; > > diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h > > index eb5972c50be8..94e524e7306a 100644 > > --- a/drivers/block/null_blk/null_blk.h > > +++ b/drivers/block/null_blk/null_blk.h > > @@ -67,6 +67,13 @@ enum { > > NULL_Q_MQ = 2, > > }; > > > > +struct nullb_copy_token { > > + char subsys[5]; > > + struct nullb *nullb; > > + u64 sector_in; > > + u64 sectors; > > +}; > > + > > struct nullb_device { > > struct nullb *nullb; > > struct config_item item; > > -- > Damien Le Moal > Western Digital Research > > Thank you, Nitesh Shetty
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 9e6b032c8ecc..84c5fbcd67a5 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1257,6 +1257,81 @@ static int null_transfer(struct nullb *nullb, struct page *page, return err; } +static inline int nullb_setup_copy_read(struct nullb *nullb, + struct bio *bio) +{ + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); + + memcpy(token->subsys, "nullb", 5); + token->sector_in = bio->bi_iter.bi_sector; + token->nullb = nullb; + token->sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; + + return 0; +} + +static inline int nullb_setup_copy_write(struct nullb *nullb, + struct bio *bio, bool is_fua) +{ + struct nullb_copy_token *token = bvec_kmap_local(&bio->bi_io_vec[0]); + sector_t sector_in, sector_out; + void *in, *out; + size_t rem, temp; + unsigned long offset_in, offset_out; + struct nullb_page *t_page_in, *t_page_out; + int ret = -EIO; + + if (unlikely(memcmp(token->subsys, "nullb", 5))) + return -EOPNOTSUPP; + if (unlikely(token->nullb != nullb)) + return -EOPNOTSUPP; + if (WARN_ON(token->sectors != bio->bi_iter.bi_size >> SECTOR_SHIFT)) + return -EOPNOTSUPP; + + sector_in = token->sector_in; + sector_out = bio->bi_iter.bi_sector; + rem = token->sectors << SECTOR_SHIFT; + + spin_lock_irq(&nullb->lock); + while (rem > 0) { + temp = min_t(size_t, nullb->dev->blocksize, rem); + offset_in = (sector_in & SECTOR_MASK) << SECTOR_SHIFT; + offset_out = (sector_out & SECTOR_MASK) << SECTOR_SHIFT; + + if (null_cache_active(nullb) && !is_fua) + null_make_cache_space(nullb, PAGE_SIZE); + + t_page_in = null_lookup_page(nullb, sector_in, false, + !null_cache_active(nullb)); + if (!t_page_in) + goto err; + t_page_out = null_insert_page(nullb, sector_out, + !null_cache_active(nullb) || is_fua); + if (!t_page_out) + goto err; + + in = kmap_local_page(t_page_in->page); + out = kmap_local_page(t_page_out->page); + + memcpy(out + offset_out, in + offset_in, temp); + kunmap_local(out); + kunmap_local(in); + __set_bit(sector_out & SECTOR_MASK, t_page_out->bitmap); + + if (is_fua) + null_free_sector(nullb, sector_out, true); + + rem -= temp; + sector_in += temp >> SECTOR_SHIFT; + sector_out += temp >> SECTOR_SHIFT; + } + + ret = 0; +err: + spin_unlock_irq(&nullb->lock); + return ret; +} + static int null_handle_rq(struct nullb_cmd *cmd) { struct request *rq = cmd->rq; @@ -1267,6 +1342,14 @@ static int null_handle_rq(struct nullb_cmd *cmd) struct req_iterator iter; struct bio_vec bvec; + if (rq->cmd_flags & REQ_COPY) { + if (op_is_write(req_op(rq))) + return nullb_setup_copy_write(nullb, rq->bio, + rq->cmd_flags & REQ_FUA); + else + return nullb_setup_copy_read(nullb, rq->bio); + } + spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; @@ -1294,6 +1377,14 @@ static int null_handle_bio(struct nullb_cmd *cmd) struct bio_vec bvec; struct bvec_iter iter; + if (bio->bi_opf & REQ_COPY) { + if (op_is_write(bio_op(bio))) + return nullb_setup_copy_write(nullb, bio, + bio->bi_opf & REQ_FUA); + else + return nullb_setup_copy_read(nullb, bio); + } + spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; @@ -2146,6 +2237,9 @@ static int null_add_dev(struct nullb_device *dev) list_add_tail(&nullb->list, &nullb_list); mutex_unlock(&lock); + blk_queue_max_copy_sectors_hw(nullb->disk->queue, 1024); + blk_queue_flag_set(QUEUE_FLAG_COPY, nullb->disk->queue); + pr_info("disk %s created\n", nullb->disk_name); return 0; diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h index eb5972c50be8..94e524e7306a 100644 --- a/drivers/block/null_blk/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -67,6 +67,13 @@ enum { NULL_Q_MQ = 2, }; +struct nullb_copy_token { + char subsys[5]; + struct nullb *nullb; + u64 sector_in; + u64 sectors; +}; + struct nullb_device { struct nullb *nullb; struct config_item item;