diff mbox series

[v6,4/5] dm-thin: Add REQ_OP_PROVISION support

Message ID 20230506062909.74601-5-sarthakkukreti@chromium.org (mailing list archive)
State Mainlined, archived
Headers show
Series [v6,1/5] block: Don't invalidate pagecache for invalid falloc modes | expand

Commit Message

Sarthak Kukreti May 6, 2023, 6:29 a.m. UTC
dm-thinpool uses the provision request to provision
blocks for a dm-thin device. dm-thinpool currently does not
pass through REQ_OP_PROVISION to underlying devices.

For shared blocks, provision requests will break sharing and copy the
contents of the entire block. Additionally, if 'skip_block_zeroing'
is not set, dm-thin will opt to zero out the entire range as a part
of provisioning.

Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
---
 drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 4 deletions(-)

Comments

Mike Snitzer May 9, 2023, 4:58 p.m. UTC | #1
On Sat, May 06 2023 at  2:29P -0400,
Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:

> dm-thinpool uses the provision request to provision
> blocks for a dm-thin device. dm-thinpool currently does not
> pass through REQ_OP_PROVISION to underlying devices.
> 
> For shared blocks, provision requests will break sharing and copy the
> contents of the entire block. Additionally, if 'skip_block_zeroing'
> is not set, dm-thin will opt to zero out the entire range as a part
> of provisioning.
> 
> Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> ---
>  drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 66 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index 2b13c949bd72..3f94f53ac956 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -274,6 +274,7 @@ struct pool {
>  
>  	process_bio_fn process_bio;
>  	process_bio_fn process_discard;
> +	process_bio_fn process_provision;
>  
>  	process_cell_fn process_cell;
>  	process_cell_fn process_discard_cell;
> @@ -913,7 +914,8 @@ static void __inc_remap_and_issue_cell(void *context,
>  	struct bio *bio;
>  
>  	while ((bio = bio_list_pop(&cell->bios))) {
> -		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
> +		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
> +		    bio_op(bio) == REQ_OP_PROVISION)
>  			bio_list_add(&info->defer_bios, bio);
>  		else {
>  			inc_all_io_entry(info->tc->pool, bio);
> @@ -1245,8 +1247,8 @@ static int io_overlaps_block(struct pool *pool, struct bio *bio)
>  
>  static int io_overwrites_block(struct pool *pool, struct bio *bio)
>  {
> -	return (bio_data_dir(bio) == WRITE) &&
> -		io_overlaps_block(pool, bio);
> +	return (bio_data_dir(bio) == WRITE) && io_overlaps_block(pool, bio) &&
> +	       bio_op(bio) != REQ_OP_PROVISION;
>  }
>  
>  static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
> @@ -1953,6 +1955,51 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
>  	}
>  }
>  
> +static void process_provision_bio(struct thin_c *tc, struct bio *bio)
> +{
> +	int r;
> +	struct pool *pool = tc->pool;
> +	dm_block_t block = get_bio_block(tc, bio);
> +	struct dm_bio_prison_cell *cell;
> +	struct dm_cell_key key;
> +	struct dm_thin_lookup_result lookup_result;
> +
> +	/*
> +	 * If cell is already occupied, then the block is already
> +	 * being provisioned so we have nothing further to do here.
> +	 */
> +	build_virtual_key(tc->td, block, &key);
> +	if (bio_detain(pool, &key, bio, &cell))
> +		return;
> +
> +	if (tc->requeue_mode) {
> +		cell_requeue(pool, cell);
> +		return;
> +	}
> +
> +	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
> +	switch (r) {
> +	case 0:
> +		if (lookup_result.shared) {
> +			process_shared_bio(tc, bio, block, &lookup_result, cell);
> +		} else {
> +			bio_endio(bio);
> +			cell_defer_no_holder(tc, cell);
> +		}
> +		break;
> +	case -ENODATA:
> +		provision_block(tc, bio, block, cell);
> +		break;
> +
> +	default:
> +		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
> +			    __func__, r);
> +		cell_defer_no_holder(tc, cell);
> +		bio_io_error(bio);
> +		break;
> +	}
> +}
> +
>  static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
>  {
>  	int r;
> @@ -2228,6 +2275,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
>  
>  		if (bio_op(bio) == REQ_OP_DISCARD)
>  			pool->process_discard(tc, bio);
> +		else if (bio_op(bio) == REQ_OP_PROVISION)
> +			pool->process_provision(tc, bio);
>  		else
>  			pool->process_bio(tc, bio);
>  
> @@ -2579,6 +2628,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
>  		dm_pool_metadata_read_only(pool->pmd);
>  		pool->process_bio = process_bio_fail;
>  		pool->process_discard = process_bio_fail;
> +		pool->process_provision = process_bio_fail;
>  		pool->process_cell = process_cell_fail;
>  		pool->process_discard_cell = process_cell_fail;
>  		pool->process_prepared_mapping = process_prepared_mapping_fail;
> @@ -2592,6 +2642,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
>  		dm_pool_metadata_read_only(pool->pmd);
>  		pool->process_bio = process_bio_read_only;
>  		pool->process_discard = process_bio_success;
> +		pool->process_provision = process_bio_fail;
>  		pool->process_cell = process_cell_read_only;
>  		pool->process_discard_cell = process_cell_success;
>  		pool->process_prepared_mapping = process_prepared_mapping_fail;
> @@ -2612,6 +2663,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
>  		pool->out_of_data_space = true;
>  		pool->process_bio = process_bio_read_only;
>  		pool->process_discard = process_discard_bio;
> +		pool->process_provision = process_bio_fail;
>  		pool->process_cell = process_cell_read_only;
>  		pool->process_prepared_mapping = process_prepared_mapping;
>  		set_discard_callbacks(pool);
> @@ -2628,6 +2680,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
>  		dm_pool_metadata_read_write(pool->pmd);
>  		pool->process_bio = process_bio;
>  		pool->process_discard = process_discard_bio;
> +		pool->process_provision = process_provision_bio;
>  		pool->process_cell = process_cell;
>  		pool->process_prepared_mapping = process_prepared_mapping;
>  		set_discard_callbacks(pool);
> @@ -2749,7 +2802,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
>  		return DM_MAPIO_SUBMITTED;
>  	}
>  
> -	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
> +	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
> +	    bio_op(bio) == REQ_OP_PROVISION) {
>  		thin_defer_bio_with_throttle(tc, bio);
>  		return DM_MAPIO_SUBMITTED;
>  	}
> @@ -3396,6 +3450,9 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  	pt->adjusted_pf = pt->requested_pf = pf;
>  	ti->num_flush_bios = 1;
>  	ti->limit_swap_bios = true;
> +	ti->num_provision_bios = 1;
> +	ti->provision_supported = true;
> +	ti->max_provision_granularity = true;
>  
>  	/*
>  	 * Only need to enable discards if the pool should pass
> @@ -4114,6 +4171,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
>  	 * The pool uses the same discard limits as the underlying data
>  	 * device.  DM core has already set this up.
>  	 */
> +
> +	limits->max_provision_sectors = pool->sectors_per_block;
>  }
>  
>  static struct target_type pool_target = {
> @@ -4288,6 +4347,9 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  		ti->max_discard_granularity = true;
>  	}
>  
> +	ti->num_provision_bios = 1;
> +	ti->provision_supported = true;
> +

We need this in thin_ctr: ti->max_provision_granularity = true;

More needed in the thin target than thin-pool; otherwise provision bio
issued to thin devices won't be split appropriately.  But I do think
its fine to set in both thin_ctr and pool_ctr.

Otherwise, looks good.

Thanks,
Mike
Sarthak Kukreti May 11, 2023, 8:03 p.m. UTC | #2
On Tue, May 9, 2023 at 9:58 AM Mike Snitzer <snitzer@kernel.org> wrote:
>
> On Sat, May 06 2023 at  2:29P -0400,
> Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:
>
> > dm-thinpool uses the provision request to provision
> > blocks for a dm-thin device. dm-thinpool currently does not
> > pass through REQ_OP_PROVISION to underlying devices.
> >
> > For shared blocks, provision requests will break sharing and copy the
> > contents of the entire block. Additionally, if 'skip_block_zeroing'
> > is not set, dm-thin will opt to zero out the entire range as a part
> > of provisioning.
> >
> > Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> > ---
> >  drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 66 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > index 2b13c949bd72..3f94f53ac956 100644
> > --- a/drivers/md/dm-thin.c
> > +++ b/drivers/md/dm-thin.c
> > @@ -274,6 +274,7 @@ struct pool {
> >
> >       process_bio_fn process_bio;
> >       process_bio_fn process_discard;
> > +     process_bio_fn process_provision;
> >
> >       process_cell_fn process_cell;
> >       process_cell_fn process_discard_cell;
> > @@ -913,7 +914,8 @@ static void __inc_remap_and_issue_cell(void *context,
> >       struct bio *bio;
> >
> >       while ((bio = bio_list_pop(&cell->bios))) {
> > -             if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
> > +             if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
> > +                 bio_op(bio) == REQ_OP_PROVISION)
> >                       bio_list_add(&info->defer_bios, bio);
> >               else {
> >                       inc_all_io_entry(info->tc->pool, bio);
> > @@ -1245,8 +1247,8 @@ static int io_overlaps_block(struct pool *pool, struct bio *bio)
> >
> >  static int io_overwrites_block(struct pool *pool, struct bio *bio)
> >  {
> > -     return (bio_data_dir(bio) == WRITE) &&
> > -             io_overlaps_block(pool, bio);
> > +     return (bio_data_dir(bio) == WRITE) && io_overlaps_block(pool, bio) &&
> > +            bio_op(bio) != REQ_OP_PROVISION;
> >  }
> >
> >  static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
> > @@ -1953,6 +1955,51 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
> >       }
> >  }
> >
> > +static void process_provision_bio(struct thin_c *tc, struct bio *bio)
> > +{
> > +     int r;
> > +     struct pool *pool = tc->pool;
> > +     dm_block_t block = get_bio_block(tc, bio);
> > +     struct dm_bio_prison_cell *cell;
> > +     struct dm_cell_key key;
> > +     struct dm_thin_lookup_result lookup_result;
> > +
> > +     /*
> > +      * If cell is already occupied, then the block is already
> > +      * being provisioned so we have nothing further to do here.
> > +      */
> > +     build_virtual_key(tc->td, block, &key);
> > +     if (bio_detain(pool, &key, bio, &cell))
> > +             return;
> > +
> > +     if (tc->requeue_mode) {
> > +             cell_requeue(pool, cell);
> > +             return;
> > +     }
> > +
> > +     r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
> > +     switch (r) {
> > +     case 0:
> > +             if (lookup_result.shared) {
> > +                     process_shared_bio(tc, bio, block, &lookup_result, cell);
> > +             } else {
> > +                     bio_endio(bio);
> > +                     cell_defer_no_holder(tc, cell);
> > +             }
> > +             break;
> > +     case -ENODATA:
> > +             provision_block(tc, bio, block, cell);
> > +             break;
> > +
> > +     default:
> > +             DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
> > +                         __func__, r);
> > +             cell_defer_no_holder(tc, cell);
> > +             bio_io_error(bio);
> > +             break;
> > +     }
> > +}
> > +
> >  static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
> >  {
> >       int r;
> > @@ -2228,6 +2275,8 @@ static void process_thin_deferred_bios(struct thin_c *tc)
> >
> >               if (bio_op(bio) == REQ_OP_DISCARD)
> >                       pool->process_discard(tc, bio);
> > +             else if (bio_op(bio) == REQ_OP_PROVISION)
> > +                     pool->process_provision(tc, bio);
> >               else
> >                       pool->process_bio(tc, bio);
> >
> > @@ -2579,6 +2628,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
> >               dm_pool_metadata_read_only(pool->pmd);
> >               pool->process_bio = process_bio_fail;
> >               pool->process_discard = process_bio_fail;
> > +             pool->process_provision = process_bio_fail;
> >               pool->process_cell = process_cell_fail;
> >               pool->process_discard_cell = process_cell_fail;
> >               pool->process_prepared_mapping = process_prepared_mapping_fail;
> > @@ -2592,6 +2642,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
> >               dm_pool_metadata_read_only(pool->pmd);
> >               pool->process_bio = process_bio_read_only;
> >               pool->process_discard = process_bio_success;
> > +             pool->process_provision = process_bio_fail;
> >               pool->process_cell = process_cell_read_only;
> >               pool->process_discard_cell = process_cell_success;
> >               pool->process_prepared_mapping = process_prepared_mapping_fail;
> > @@ -2612,6 +2663,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
> >               pool->out_of_data_space = true;
> >               pool->process_bio = process_bio_read_only;
> >               pool->process_discard = process_discard_bio;
> > +             pool->process_provision = process_bio_fail;
> >               pool->process_cell = process_cell_read_only;
> >               pool->process_prepared_mapping = process_prepared_mapping;
> >               set_discard_callbacks(pool);
> > @@ -2628,6 +2680,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
> >               dm_pool_metadata_read_write(pool->pmd);
> >               pool->process_bio = process_bio;
> >               pool->process_discard = process_discard_bio;
> > +             pool->process_provision = process_provision_bio;
> >               pool->process_cell = process_cell;
> >               pool->process_prepared_mapping = process_prepared_mapping;
> >               set_discard_callbacks(pool);
> > @@ -2749,7 +2802,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
> >               return DM_MAPIO_SUBMITTED;
> >       }
> >
> > -     if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
> > +     if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
> > +         bio_op(bio) == REQ_OP_PROVISION) {
> >               thin_defer_bio_with_throttle(tc, bio);
> >               return DM_MAPIO_SUBMITTED;
> >       }
> > @@ -3396,6 +3450,9 @@ static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >       pt->adjusted_pf = pt->requested_pf = pf;
> >       ti->num_flush_bios = 1;
> >       ti->limit_swap_bios = true;
> > +     ti->num_provision_bios = 1;
> > +     ti->provision_supported = true;
> > +     ti->max_provision_granularity = true;
> >
> >       /*
> >        * Only need to enable discards if the pool should pass
> > @@ -4114,6 +4171,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
> >        * The pool uses the same discard limits as the underlying data
> >        * device.  DM core has already set this up.
> >        */
> > +
> > +     limits->max_provision_sectors = pool->sectors_per_block;
> >  }
> >
> >  static struct target_type pool_target = {
> > @@ -4288,6 +4347,9 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> >               ti->max_discard_granularity = true;
> >       }
> >
> > +     ti->num_provision_bios = 1;
> > +     ti->provision_supported = true;
> > +
>
> We need this in thin_ctr: ti->max_provision_granularity = true;
>
> More needed in the thin target than thin-pool; otherwise provision bio
> issued to thin devices won't be split appropriately.  But I do think
> its fine to set in both thin_ctr and pool_ctr.
>
> Otherwise, looks good.
>
Thanks! I'll add it to the next iteration (in addition to any other
feedback that's added to v6).

Given that this series covers multiple subsystems, would there be a
preferred way of queueing this for merge?

Best
Sarthak

> Thanks,
> Mike
Mike Snitzer May 12, 2023, 2:34 p.m. UTC | #3
On Thu, May 11 2023 at  4:03P -0400,
Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:

> On Tue, May 9, 2023 at 9:58 AM Mike Snitzer <snitzer@kernel.org> wrote:
> >
> > On Sat, May 06 2023 at  2:29P -0400,
> > Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:
> >
> > > dm-thinpool uses the provision request to provision
> > > blocks for a dm-thin device. dm-thinpool currently does not
> > > pass through REQ_OP_PROVISION to underlying devices.
> > >
> > > For shared blocks, provision requests will break sharing and copy the
> > > contents of the entire block. Additionally, if 'skip_block_zeroing'
> > > is not set, dm-thin will opt to zero out the entire range as a part
> > > of provisioning.
> > >
> > > Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> > > ---
> > >  drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
> > >  1 file changed, 66 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > > index 2b13c949bd72..3f94f53ac956 100644
> > > --- a/drivers/md/dm-thin.c
> > > +++ b/drivers/md/dm-thin.c
> > > @@ -4288,6 +4347,9 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
> > >               ti->max_discard_granularity = true;
> > >       }
> > >
> > > +     ti->num_provision_bios = 1;
> > > +     ti->provision_supported = true;
> > > +
> >
> > We need this in thin_ctr: ti->max_provision_granularity = true;
> >
> > More needed in the thin target than thin-pool; otherwise provision bio
> > issued to thin devices won't be split appropriately.  But I do think
> > its fine to set in both thin_ctr and pool_ctr.
> >
> > Otherwise, looks good.
> >
> Thanks! I'll add it to the next iteration (in addition to any other
> feedback that's added to v6).

OK. I'll begin basing dm-thinp's WRITE_ZEROES support ontop of this
series.
 
> Given that this series covers multiple subsystems, would there be a
> preferred way of queueing this for merge?

I think it'd be OK for Jens to pick this series up and I'll rebase
my corresponding DM tree once he does.

In addition to Jens; Brian, Darrick and/or others: any chance you
could review the block core changes in this series to ensure you're
cool with them?

Would be nice to get Sarthak review feedback so that hopefully his v7
can be the final revision.

Thanks,
Mike
Mike Snitzer May 12, 2023, 5:32 p.m. UTC | #4
On Sat, May 06 2023 at  2:29P -0400,
Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:

> dm-thinpool uses the provision request to provision
> blocks for a dm-thin device. dm-thinpool currently does not
> pass through REQ_OP_PROVISION to underlying devices.
> 
> For shared blocks, provision requests will break sharing and copy the
> contents of the entire block. Additionally, if 'skip_block_zeroing'
> is not set, dm-thin will opt to zero out the entire range as a part
> of provisioning.
> 
> Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> ---
>  drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 66 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index 2b13c949bd72..3f94f53ac956 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
...
> @@ -4114,6 +4171,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
>  	 * The pool uses the same discard limits as the underlying data
>  	 * device.  DM core has already set this up.
>  	 */
> +
> +	limits->max_provision_sectors = pool->sectors_per_block;

Just noticed that setting limits->max_provision_sectors needs to move
above pool_io_hints code that sets up discards -- otherwise the early
return from if (!pt->adjusted_pf.discard_enabled) will cause setting
max_provision_sectors to be skipped.

Here is a roll up of the fixes that need to be folded into this patch:

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 3f94f53ac956..90c8e36cb327 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4151,6 +4151,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 		blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
 	}
 
+	limits->max_provision_sectors = pool->sectors_per_block;
+
 	/*
 	 * pt->adjusted_pf is a staging area for the actual features to use.
 	 * They get transferred to the live pool in bind_control_target()
@@ -4171,8 +4173,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 * The pool uses the same discard limits as the underlying data
 	 * device.  DM core has already set this up.
 	 */
-
-	limits->max_provision_sectors = pool->sectors_per_block;
 }
 
 static struct target_type pool_target = {
@@ -4349,6 +4349,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->num_provision_bios = 1;
 	ti->provision_supported = true;
+	ti->max_provision_granularity = true;
 
 	mutex_unlock(&dm_thin_pool_table.mutex);
Sarthak Kukreti May 15, 2023, 9:19 p.m. UTC | #5
On Fri, May 12, 2023 at 10:32 AM Mike Snitzer <snitzer@kernel.org> wrote:
>
> On Sat, May 06 2023 at  2:29P -0400,
> Sarthak Kukreti <sarthakkukreti@chromium.org> wrote:
>
> > dm-thinpool uses the provision request to provision
> > blocks for a dm-thin device. dm-thinpool currently does not
> > pass through REQ_OP_PROVISION to underlying devices.
> >
> > For shared blocks, provision requests will break sharing and copy the
> > contents of the entire block. Additionally, if 'skip_block_zeroing'
> > is not set, dm-thin will opt to zero out the entire range as a part
> > of provisioning.
> >
> > Signed-off-by: Sarthak Kukreti <sarthakkukreti@chromium.org>
> > ---
> >  drivers/md/dm-thin.c | 70 +++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 66 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > index 2b13c949bd72..3f94f53ac956 100644
> > --- a/drivers/md/dm-thin.c
> > +++ b/drivers/md/dm-thin.c
> ...
> > @@ -4114,6 +4171,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
> >        * The pool uses the same discard limits as the underlying data
> >        * device.  DM core has already set this up.
> >        */
> > +
> > +     limits->max_provision_sectors = pool->sectors_per_block;
>
> Just noticed that setting limits->max_provision_sectors needs to move
> above pool_io_hints code that sets up discards -- otherwise the early
> return from if (!pt->adjusted_pf.discard_enabled) will cause setting
> max_provision_sectors to be skipped.
>
> Here is a roll up of the fixes that need to be folded into this patch:
>
Ah right, thanks for pointing that out! I'll fold this into v7.

Best
Sarthak

> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index 3f94f53ac956..90c8e36cb327 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -4151,6 +4151,8 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
>                 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
>         }
>
> +       limits->max_provision_sectors = pool->sectors_per_block;
> +
>         /*
>          * pt->adjusted_pf is a staging area for the actual features to use.
>          * They get transferred to the live pool in bind_control_target()
> @@ -4171,8 +4173,6 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
>          * The pool uses the same discard limits as the underlying data
>          * device.  DM core has already set this up.
>          */
> -
> -       limits->max_provision_sectors = pool->sectors_per_block;
>  }
>
>  static struct target_type pool_target = {
> @@ -4349,6 +4349,7 @@ static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>
>         ti->num_provision_bios = 1;
>         ti->provision_supported = true;
> +       ti->max_provision_granularity = true;
>
>         mutex_unlock(&dm_thin_pool_table.mutex);
>
diff mbox series

Patch

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 2b13c949bd72..3f94f53ac956 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -274,6 +274,7 @@  struct pool {
 
 	process_bio_fn process_bio;
 	process_bio_fn process_discard;
+	process_bio_fn process_provision;
 
 	process_cell_fn process_cell;
 	process_cell_fn process_discard_cell;
@@ -913,7 +914,8 @@  static void __inc_remap_and_issue_cell(void *context,
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&cell->bios))) {
-		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD)
+		if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
+		    bio_op(bio) == REQ_OP_PROVISION)
 			bio_list_add(&info->defer_bios, bio);
 		else {
 			inc_all_io_entry(info->tc->pool, bio);
@@ -1245,8 +1247,8 @@  static int io_overlaps_block(struct pool *pool, struct bio *bio)
 
 static int io_overwrites_block(struct pool *pool, struct bio *bio)
 {
-	return (bio_data_dir(bio) == WRITE) &&
-		io_overlaps_block(pool, bio);
+	return (bio_data_dir(bio) == WRITE) && io_overlaps_block(pool, bio) &&
+	       bio_op(bio) != REQ_OP_PROVISION;
 }
 
 static void save_and_set_endio(struct bio *bio, bio_end_io_t **save,
@@ -1953,6 +1955,51 @@  static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block
 	}
 }
 
+static void process_provision_bio(struct thin_c *tc, struct bio *bio)
+{
+	int r;
+	struct pool *pool = tc->pool;
+	dm_block_t block = get_bio_block(tc, bio);
+	struct dm_bio_prison_cell *cell;
+	struct dm_cell_key key;
+	struct dm_thin_lookup_result lookup_result;
+
+	/*
+	 * If cell is already occupied, then the block is already
+	 * being provisioned so we have nothing further to do here.
+	 */
+	build_virtual_key(tc->td, block, &key);
+	if (bio_detain(pool, &key, bio, &cell))
+		return;
+
+	if (tc->requeue_mode) {
+		cell_requeue(pool, cell);
+		return;
+	}
+
+	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
+	switch (r) {
+	case 0:
+		if (lookup_result.shared) {
+			process_shared_bio(tc, bio, block, &lookup_result, cell);
+		} else {
+			bio_endio(bio);
+			cell_defer_no_holder(tc, cell);
+		}
+		break;
+	case -ENODATA:
+		provision_block(tc, bio, block, cell);
+		break;
+
+	default:
+		DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d",
+			    __func__, r);
+		cell_defer_no_holder(tc, cell);
+		bio_io_error(bio);
+		break;
+	}
+}
+
 static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
 {
 	int r;
@@ -2228,6 +2275,8 @@  static void process_thin_deferred_bios(struct thin_c *tc)
 
 		if (bio_op(bio) == REQ_OP_DISCARD)
 			pool->process_discard(tc, bio);
+		else if (bio_op(bio) == REQ_OP_PROVISION)
+			pool->process_provision(tc, bio);
 		else
 			pool->process_bio(tc, bio);
 
@@ -2579,6 +2628,7 @@  static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		dm_pool_metadata_read_only(pool->pmd);
 		pool->process_bio = process_bio_fail;
 		pool->process_discard = process_bio_fail;
+		pool->process_provision = process_bio_fail;
 		pool->process_cell = process_cell_fail;
 		pool->process_discard_cell = process_cell_fail;
 		pool->process_prepared_mapping = process_prepared_mapping_fail;
@@ -2592,6 +2642,7 @@  static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		dm_pool_metadata_read_only(pool->pmd);
 		pool->process_bio = process_bio_read_only;
 		pool->process_discard = process_bio_success;
+		pool->process_provision = process_bio_fail;
 		pool->process_cell = process_cell_read_only;
 		pool->process_discard_cell = process_cell_success;
 		pool->process_prepared_mapping = process_prepared_mapping_fail;
@@ -2612,6 +2663,7 @@  static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		pool->out_of_data_space = true;
 		pool->process_bio = process_bio_read_only;
 		pool->process_discard = process_discard_bio;
+		pool->process_provision = process_bio_fail;
 		pool->process_cell = process_cell_read_only;
 		pool->process_prepared_mapping = process_prepared_mapping;
 		set_discard_callbacks(pool);
@@ -2628,6 +2680,7 @@  static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
 		dm_pool_metadata_read_write(pool->pmd);
 		pool->process_bio = process_bio;
 		pool->process_discard = process_discard_bio;
+		pool->process_provision = process_provision_bio;
 		pool->process_cell = process_cell;
 		pool->process_prepared_mapping = process_prepared_mapping;
 		set_discard_callbacks(pool);
@@ -2749,7 +2802,8 @@  static int thin_bio_map(struct dm_target *ti, struct bio *bio)
 		return DM_MAPIO_SUBMITTED;
 	}
 
-	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD) {
+	if (op_is_flush(bio->bi_opf) || bio_op(bio) == REQ_OP_DISCARD ||
+	    bio_op(bio) == REQ_OP_PROVISION) {
 		thin_defer_bio_with_throttle(tc, bio);
 		return DM_MAPIO_SUBMITTED;
 	}
@@ -3396,6 +3450,9 @@  static int pool_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	pt->adjusted_pf = pt->requested_pf = pf;
 	ti->num_flush_bios = 1;
 	ti->limit_swap_bios = true;
+	ti->num_provision_bios = 1;
+	ti->provision_supported = true;
+	ti->max_provision_granularity = true;
 
 	/*
 	 * Only need to enable discards if the pool should pass
@@ -4114,6 +4171,8 @@  static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 * The pool uses the same discard limits as the underlying data
 	 * device.  DM core has already set this up.
 	 */
+
+	limits->max_provision_sectors = pool->sectors_per_block;
 }
 
 static struct target_type pool_target = {
@@ -4288,6 +4347,9 @@  static int thin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ti->max_discard_granularity = true;
 	}
 
+	ti->num_provision_bios = 1;
+	ti->provision_supported = true;
+
 	mutex_unlock(&dm_thin_pool_table.mutex);
 
 	spin_lock_irq(&tc->pool->lock);