diff mbox series

[RFC,8/9] dm thin: add llseek(SEEK_HOLE/SEEK_DATA) support

Message ID 20240328203910.2370087-9-stefanha@redhat.com (mailing list archive)
State Deferred, archived
Delegated to: Mike Snitzer
Headers show
Series block: add llseek(SEEK_HOLE/SEEK_DATA) support | expand

Commit Message

Stefan Hajnoczi March 28, 2024, 8:39 p.m. UTC
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
Open issues:
- Locking?
- thin_seek_hole_data() does not run as a bio or request. This patch
  assumes dm_thin_find_mapped_range() synchronously performs I/O if
  metadata needs to be loaded from disk. Is that a valid assumption?
---
 drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

Comments

Eric Blake March 29, 2024, 1:31 a.m. UTC | #1
On Thu, Mar 28, 2024 at 04:39:09PM -0400, Stefan Hajnoczi wrote:
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> Open issues:
> - Locking?
> - thin_seek_hole_data() does not run as a bio or request. This patch
>   assumes dm_thin_find_mapped_range() synchronously performs I/O if
>   metadata needs to be loaded from disk. Is that a valid assumption?
> ---
>  drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
> --- a/drivers/md/dm-thin.c
> +++ b/drivers/md/dm-thin.c
> @@ -4501,6 +4501,82 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
>  	}
>  }
>  
> +static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
> +{
> +	sector_t offset_sectors = offset >> SECTOR_SHIFT;
> +	dm_block_t ret;
> +
> +	if (block_size_is_power_of_two(pool))
> +		ret = offset_sectors >> pool->sectors_per_block_shift;
> +	else {
> +		ret = offset_sectors;
> +		(void) sector_div(ret, pool->sectors_per_block);
> +	}
> +
> +	return ret;
> +}
> +
> +static loff_t block_to_loff(struct pool *pool, dm_block_t block)
> +{
> +	return block_to_sectors(pool, block) << SECTOR_SHIFT;
> +}
> +
> +static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
> +		int whence)
> +{
> +	struct thin_c *tc = ti->private;
> +	struct dm_thin_device *td = tc->td;
> +	struct pool *pool = tc->pool;
> +	dm_block_t begin;
> +	dm_block_t end;
> +	dm_block_t mapped_begin;
> +	dm_block_t mapped_end;
> +	dm_block_t pool_begin;
> +	bool maybe_shared;
> +	int ret;
> +
> +	/* TODO locking? */
> +
> +	if (block_size_is_power_of_two(pool))
> +		end = ti->len >> pool->sectors_per_block_shift;
> +	else {
> +		end = ti->len;
> +		(void) sector_div(end, pool->sectors_per_block);
> +	}
> +
> +	offset -= ti->begin << SECTOR_SHIFT;
> +
> +	while (true) {
> +		begin = loff_to_block(pool, offset);
> +		ret = dm_thin_find_mapped_range(td, begin, end,
> +						&mapped_begin, &mapped_end,
> +						&pool_begin, &maybe_shared);
> +		if (ret == -ENODATA) {
> +			if (whence == SEEK_DATA)
> +				return -ENXIO;
> +			break;
> +		} else if (ret < 0) {
> +			/* TODO handle EWOULDBLOCK? */
> +			return -ENXIO;

This should probably be -EIO, not -ENXIO.

> +		}
> +
> +		/* SEEK_DATA finishes here... */
> +		if (whence == SEEK_DATA) {
> +			if (mapped_begin != begin)
> +				offset = block_to_loff(pool, mapped_begin);
> +			break;
> +		}
> +
> +		/* ...while SEEK_HOLE may need to look further */
> +		if (mapped_begin != begin)
> +			break; /* offset is in a hole */
> +
> +		offset = block_to_loff(pool, mapped_end);
> +	}
> +
> +	return offset + (ti->begin << SECTOR_SHIFT);

It's hard to follow, but I'm fairly certain that if whence ==
SEEK_HOLE, you end up returning ti->begin + ti->len instead of -ENXIO
if the range from begin to end is fully mapped; which is inconsistent
with the semantics you have in 4/9 (although in 6/9 I argue that
having all of the dm callbacks return ti->begin + ti->len instead of
-ENXIO might make logic easier for iterating through consecutive ti,
and then convert to -ENXIO only in the caller).

> +}
> +
>  static struct target_type thin_target = {
>  	.name = "thin",
>  	.version = {1, 23, 0},
> @@ -4515,6 +4591,7 @@ static struct target_type thin_target = {
>  	.status = thin_status,
>  	.iterate_devices = thin_iterate_devices,
>  	.io_hints = thin_io_hints,
> +	.seek_hole_data = thin_seek_hole_data,
>  };
>  
>  /*----------------------------------------------------------------*/
> -- 
> 2.44.0
>
Stefan Hajnoczi April 3, 2024, 3:03 p.m. UTC | #2
On Thu, Mar 28, 2024 at 08:31:21PM -0500, Eric Blake wrote:
> On Thu, Mar 28, 2024 at 04:39:09PM -0400, Stefan Hajnoczi wrote:
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > Open issues:
> > - Locking?
> > - thin_seek_hole_data() does not run as a bio or request. This patch
> >   assumes dm_thin_find_mapped_range() synchronously performs I/O if
> >   metadata needs to be loaded from disk. Is that a valid assumption?
> > ---
> >  drivers/md/dm-thin.c | 77 ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 77 insertions(+)
> > 
> > diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
> > index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
> > --- a/drivers/md/dm-thin.c
> > +++ b/drivers/md/dm-thin.c
> > @@ -4501,6 +4501,82 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
> >  	}
> >  }
> >  
> > +static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
> > +{
> > +	sector_t offset_sectors = offset >> SECTOR_SHIFT;
> > +	dm_block_t ret;
> > +
> > +	if (block_size_is_power_of_two(pool))
> > +		ret = offset_sectors >> pool->sectors_per_block_shift;
> > +	else {
> > +		ret = offset_sectors;
> > +		(void) sector_div(ret, pool->sectors_per_block);
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static loff_t block_to_loff(struct pool *pool, dm_block_t block)
> > +{
> > +	return block_to_sectors(pool, block) << SECTOR_SHIFT;
> > +}
> > +
> > +static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
> > +		int whence)
> > +{
> > +	struct thin_c *tc = ti->private;
> > +	struct dm_thin_device *td = tc->td;
> > +	struct pool *pool = tc->pool;
> > +	dm_block_t begin;
> > +	dm_block_t end;
> > +	dm_block_t mapped_begin;
> > +	dm_block_t mapped_end;
> > +	dm_block_t pool_begin;
> > +	bool maybe_shared;
> > +	int ret;
> > +
> > +	/* TODO locking? */
> > +
> > +	if (block_size_is_power_of_two(pool))
> > +		end = ti->len >> pool->sectors_per_block_shift;
> > +	else {
> > +		end = ti->len;
> > +		(void) sector_div(end, pool->sectors_per_block);
> > +	}
> > +
> > +	offset -= ti->begin << SECTOR_SHIFT;
> > +
> > +	while (true) {
> > +		begin = loff_to_block(pool, offset);
> > +		ret = dm_thin_find_mapped_range(td, begin, end,
> > +						&mapped_begin, &mapped_end,
> > +						&pool_begin, &maybe_shared);
> > +		if (ret == -ENODATA) {
> > +			if (whence == SEEK_DATA)
> > +				return -ENXIO;
> > +			break;
> > +		} else if (ret < 0) {
> > +			/* TODO handle EWOULDBLOCK? */
> > +			return -ENXIO;
> 
> This should probably be -EIO, not -ENXIO.

Yes. XFS also returns -EIO, so I guess it's okay to do so.

I still need to get to the bottom of whether calling
dm_thin_find_mapped_range() is sane here and what to do when/if it
returns EWOULDBLOCK.

> > +		}
> > +
> > +		/* SEEK_DATA finishes here... */
> > +		if (whence == SEEK_DATA) {
> > +			if (mapped_begin != begin)
> > +				offset = block_to_loff(pool, mapped_begin);
> > +			break;
> > +		}
> > +
> > +		/* ...while SEEK_HOLE may need to look further */
> > +		if (mapped_begin != begin)
> > +			break; /* offset is in a hole */
> > +
> > +		offset = block_to_loff(pool, mapped_end);
> > +	}
> > +
> > +	return offset + (ti->begin << SECTOR_SHIFT);
> 
> It's hard to follow, but I'm fairly certain that if whence ==
> SEEK_HOLE, you end up returning ti->begin + ti->len instead of -ENXIO
> if the range from begin to end is fully mapped; which is inconsistent
> with the semantics you have in 4/9 (although in 6/9 I argue that
> having all of the dm callbacks return ti->begin + ti->len instead of
> -ENXIO might make logic easier for iterating through consecutive ti,
> and then convert to -ENXIO only in the caller).

Returning (ti->begin + ti->len) << SECTOR_SHIFT for SEEK_HOLE when there
is data at the end of the target is intentional. This matches the
semantics of lseek().

I agree there is adjustment necessary in dm.c, but I want to seek the
semantics of all lseek() functions identical to avoid confusion.

Stefan
diff mbox series

Patch

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4793ad2aa1f7e..3c5dc4f0fe8a3 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4501,6 +4501,82 @@  static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	}
 }
 
+static dm_block_t loff_to_block(struct pool *pool, loff_t offset)
+{
+	sector_t offset_sectors = offset >> SECTOR_SHIFT;
+	dm_block_t ret;
+
+	if (block_size_is_power_of_two(pool))
+		ret = offset_sectors >> pool->sectors_per_block_shift;
+	else {
+		ret = offset_sectors;
+		(void) sector_div(ret, pool->sectors_per_block);
+	}
+
+	return ret;
+}
+
+static loff_t block_to_loff(struct pool *pool, dm_block_t block)
+{
+	return block_to_sectors(pool, block) << SECTOR_SHIFT;
+}
+
+static loff_t thin_seek_hole_data(struct dm_target *ti, loff_t offset,
+		int whence)
+{
+	struct thin_c *tc = ti->private;
+	struct dm_thin_device *td = tc->td;
+	struct pool *pool = tc->pool;
+	dm_block_t begin;
+	dm_block_t end;
+	dm_block_t mapped_begin;
+	dm_block_t mapped_end;
+	dm_block_t pool_begin;
+	bool maybe_shared;
+	int ret;
+
+	/* TODO locking? */
+
+	if (block_size_is_power_of_two(pool))
+		end = ti->len >> pool->sectors_per_block_shift;
+	else {
+		end = ti->len;
+		(void) sector_div(end, pool->sectors_per_block);
+	}
+
+	offset -= ti->begin << SECTOR_SHIFT;
+
+	while (true) {
+		begin = loff_to_block(pool, offset);
+		ret = dm_thin_find_mapped_range(td, begin, end,
+						&mapped_begin, &mapped_end,
+						&pool_begin, &maybe_shared);
+		if (ret == -ENODATA) {
+			if (whence == SEEK_DATA)
+				return -ENXIO;
+			break;
+		} else if (ret < 0) {
+			/* TODO handle EWOULDBLOCK? */
+			return -ENXIO;
+		}
+
+		/* SEEK_DATA finishes here... */
+		if (whence == SEEK_DATA) {
+			if (mapped_begin != begin)
+				offset = block_to_loff(pool, mapped_begin);
+			break;
+		}
+
+		/* ...while SEEK_HOLE may need to look further */
+		if (mapped_begin != begin)
+			break; /* offset is in a hole */
+
+		offset = block_to_loff(pool, mapped_end);
+	}
+
+	return offset + (ti->begin << SECTOR_SHIFT);
+}
+
 static struct target_type thin_target = {
 	.name = "thin",
 	.version = {1, 23, 0},
@@ -4515,6 +4591,7 @@  static struct target_type thin_target = {
 	.status = thin_status,
 	.iterate_devices = thin_iterate_devices,
 	.io_hints = thin_io_hints,
+	.seek_hole_data = thin_seek_hole_data,
 };
 
 /*----------------------------------------------------------------*/