diff mbox series

[6/7] block/rbd: add write zeroes support

Message ID 20201227164236.10143-7-pl@kamp.de (mailing list archive)
State New, archived
Headers show
Series block/rbd: migrate to coroutines and add write zeroes support | expand

Commit Message

Peter Lieven Dec. 27, 2020, 4:42 p.m. UTC
Signed-off-by: Peter Lieven <pl@kamp.de>
---
 block/rbd.c | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

Comments

Jason Dillaman Jan. 14, 2021, 7:19 p.m. UTC | #1
On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
>
> Signed-off-by: Peter Lieven <pl@kamp.de>
> ---
>  block/rbd.c | 31 ++++++++++++++++++++++++++++++-
>  1 file changed, 30 insertions(+), 1 deletion(-)
>
> diff --git a/block/rbd.c b/block/rbd.c
> index 2d77d0007f..27b4404adf 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -63,7 +63,8 @@ typedef enum {
>      RBD_AIO_READ,
>      RBD_AIO_WRITE,
>      RBD_AIO_DISCARD,
> -    RBD_AIO_FLUSH
> +    RBD_AIO_FLUSH,
> +    RBD_AIO_WRITE_ZEROES
>  } RBDAIOCmd;
>
>  typedef struct BDRVRBDState {
> @@ -221,8 +222,12 @@ done:
>
>  static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
>  {
> +    BDRVRBDState *s = bs->opaque;
>      /* XXX Does RBD support AIO on less than 512-byte alignment? */
>      bs->bl.request_alignment = 512;
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +    bs->bl.pwrite_zeroes_alignment = s->object_size;

The optimal alignment is 512 bytes -- but it can safely work just fine
down to 1 byte alignment since it will pad the request with additional
writes if needed.

> +#endif
>  }
>
>
> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>      }
>
>      s->aio_context = bdrv_get_aio_context(bs);
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> +#endif
>
>      /* When extending regular files, we get zeros from the OS */
>      bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
>      case RBD_AIO_FLUSH:
>          r = rbd_aio_flush(s->image, c);
>          break;
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +    case RBD_AIO_WRITE_ZEROES:
> +        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
> +        break;
> +#endif
>      default:
>          r = -EINVAL;
>      }
> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
>      return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
>  }
>
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +static int
> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
> +                                      int count, BdrvRequestFlags flags)
> +{
> +    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
> +        return -ENOTSUP;
> +    }

There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
use to optionally disable unmap.


> +    return qemu_rbd_start_co(bs, offset, count, NULL, flags,
> +                             RBD_AIO_WRITE_ZEROES);
> +}
> +#endif
> +
>  static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
>  {
>      BDRVRBDState *s = bs->opaque;
> @@ -1108,6 +1134,9 @@ static BlockDriver bdrv_rbd = {
>      .bdrv_co_pwritev        = qemu_rbd_co_pwritev,
>      .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
>      .bdrv_co_pdiscard       = qemu_rbd_co_pdiscard,
> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> +    .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
> +#endif
>
>      .bdrv_snapshot_create   = qemu_rbd_snap_create,
>      .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
> --
> 2.17.1
>
>


--
Jason
Peter Lieven Jan. 14, 2021, 7:41 p.m. UTC | #2
Am 14.01.21 um 20:19 schrieb Jason Dillaman:
> On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
>> Signed-off-by: Peter Lieven <pl@kamp.de>
>> ---
>>  block/rbd.c | 31 ++++++++++++++++++++++++++++++-
>>  1 file changed, 30 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/rbd.c b/block/rbd.c
>> index 2d77d0007f..27b4404adf 100644
>> --- a/block/rbd.c
>> +++ b/block/rbd.c
>> @@ -63,7 +63,8 @@ typedef enum {
>>      RBD_AIO_READ,
>>      RBD_AIO_WRITE,
>>      RBD_AIO_DISCARD,
>> -    RBD_AIO_FLUSH
>> +    RBD_AIO_FLUSH,
>> +    RBD_AIO_WRITE_ZEROES
>>  } RBDAIOCmd;
>>
>>  typedef struct BDRVRBDState {
>> @@ -221,8 +222,12 @@ done:
>>
>>  static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
>>  {
>> +    BDRVRBDState *s = bs->opaque;
>>      /* XXX Does RBD support AIO on less than 512-byte alignment? */
>>      bs->bl.request_alignment = 512;
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> +    bs->bl.pwrite_zeroes_alignment = s->object_size;
> The optimal alignment is 512 bytes -- but it can safely work just fine
> down to 1 byte alignment since it will pad the request with additional
> writes if needed.


Okay and this will likely be faster than having Qemu doing that request split, right?

If we drop the alignment hint Qemu will pass the original request.


>
>> +#endif
>>  }
>>
>>
>> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
>>      }
>>
>>      s->aio_context = bdrv_get_aio_context(bs);
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> +    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
>> +#endif
>>
>>      /* When extending regular files, we get zeros from the OS */
>>      bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
>> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
>>      case RBD_AIO_FLUSH:
>>          r = rbd_aio_flush(s->image, c);
>>          break;
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> +    case RBD_AIO_WRITE_ZEROES:
>> +        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
>> +        break;
>> +#endif
>>      default:
>>          r = -EINVAL;
>>      }
>> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
>>      return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
>>  }
>>
>> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
>> +static int
>> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
>> +                                      int count, BdrvRequestFlags flags)
>> +{
>> +    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
>> +        return -ENOTSUP;
>> +    }
> There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
> use to optionally disable unmap.


I have seen that. If you want I can support for this, too. But afaik this

is only available since Octopus release?


Peter
Jason Dillaman Jan. 15, 2021, 3:09 p.m. UTC | #3
On Thu, Jan 14, 2021 at 2:41 PM Peter Lieven <pl@kamp.de> wrote:
>
> Am 14.01.21 um 20:19 schrieb Jason Dillaman:
> > On Sun, Dec 27, 2020 at 11:42 AM Peter Lieven <pl@kamp.de> wrote:
> >> Signed-off-by: Peter Lieven <pl@kamp.de>
> >> ---
> >>  block/rbd.c | 31 ++++++++++++++++++++++++++++++-
> >>  1 file changed, 30 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/block/rbd.c b/block/rbd.c
> >> index 2d77d0007f..27b4404adf 100644
> >> --- a/block/rbd.c
> >> +++ b/block/rbd.c
> >> @@ -63,7 +63,8 @@ typedef enum {
> >>      RBD_AIO_READ,
> >>      RBD_AIO_WRITE,
> >>      RBD_AIO_DISCARD,
> >> -    RBD_AIO_FLUSH
> >> +    RBD_AIO_FLUSH,
> >> +    RBD_AIO_WRITE_ZEROES
> >>  } RBDAIOCmd;
> >>
> >>  typedef struct BDRVRBDState {
> >> @@ -221,8 +222,12 @@ done:
> >>
> >>  static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
> >>  {
> >> +    BDRVRBDState *s = bs->opaque;
> >>      /* XXX Does RBD support AIO on less than 512-byte alignment? */
> >>      bs->bl.request_alignment = 512;
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> +    bs->bl.pwrite_zeroes_alignment = s->object_size;
> > The optimal alignment is 512 bytes -- but it can safely work just fine
> > down to 1 byte alignment since it will pad the request with additional
> > writes if needed.
>
>
> Okay and this will likely be faster than having Qemu doing that request split, right?
>
> If we drop the alignment hint Qemu will pass the original request.
>
>
> >
> >> +#endif
> >>  }
> >>
> >>
> >> @@ -695,6 +700,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
> >>      }
> >>
> >>      s->aio_context = bdrv_get_aio_context(bs);
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> +    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
> >> +#endif
> >>
> >>      /* When extending regular files, we get zeros from the OS */
> >>      bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
> >> @@ -808,6 +816,11 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
> >>      case RBD_AIO_FLUSH:
> >>          r = rbd_aio_flush(s->image, c);
> >>          break;
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> +    case RBD_AIO_WRITE_ZEROES:
> >> +        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
> >> +        break;
> >> +#endif
> >>      default:
> >>          r = -EINVAL;
> >>      }
> >> @@ -880,6 +893,19 @@ static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
> >>      return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
> >>  }
> >>
> >> +#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
> >> +static int
> >> +coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
> >> +                                      int count, BdrvRequestFlags flags)
> >> +{
> >> +    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
> >> +        return -ENOTSUP;
> >> +    }
> > There is a "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" flag that you can
> > use to optionally disable unmap.
>
>
> I have seen that. If you want I can support for this, too. But afaik this
>
> is only available since Octopus release?

True -- I didn't backport that support to Nautilus since it was a new
feature vs the bug-fix that the write-zeroes API was introduced to
fix.

>
> Peter
>
>
diff mbox series

Patch

diff --git a/block/rbd.c b/block/rbd.c
index 2d77d0007f..27b4404adf 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -63,7 +63,8 @@  typedef enum {
     RBD_AIO_READ,
     RBD_AIO_WRITE,
     RBD_AIO_DISCARD,
-    RBD_AIO_FLUSH
+    RBD_AIO_FLUSH,
+    RBD_AIO_WRITE_ZEROES
 } RBDAIOCmd;
 
 typedef struct BDRVRBDState {
@@ -221,8 +222,12 @@  done:
 
 static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+    BDRVRBDState *s = bs->opaque;
     /* XXX Does RBD support AIO on less than 512-byte alignment? */
     bs->bl.request_alignment = 512;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    bs->bl.pwrite_zeroes_alignment = s->object_size;
+#endif
 }
 
 
@@ -695,6 +700,9 @@  static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     s->aio_context = bdrv_get_aio_context(bs);
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+#endif
 
     /* When extending regular files, we get zeros from the OS */
     bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
@@ -808,6 +816,11 @@  static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs,
     case RBD_AIO_FLUSH:
         r = rbd_aio_flush(s->image, c);
         break;
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    case RBD_AIO_WRITE_ZEROES:
+        r = rbd_aio_write_zeroes(s->image, offset, bytes, c, 0, 0);
+        break;
+#endif
     default:
         r = -EINVAL;
     }
@@ -880,6 +893,19 @@  static int coroutine_fn qemu_rbd_co_pdiscard(BlockDriverState *bs,
     return qemu_rbd_start_co(bs, offset, count, NULL, 0, RBD_AIO_DISCARD);
 }
 
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+static int
+coroutine_fn qemu_rbd_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+                                      int count, BdrvRequestFlags flags)
+{
+    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
+        return -ENOTSUP;
+    }
+    return qemu_rbd_start_co(bs, offset, count, NULL, flags,
+                             RBD_AIO_WRITE_ZEROES);
+}
+#endif
+
 static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
     BDRVRBDState *s = bs->opaque;
@@ -1108,6 +1134,9 @@  static BlockDriver bdrv_rbd = {
     .bdrv_co_pwritev        = qemu_rbd_co_pwritev,
     .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
     .bdrv_co_pdiscard       = qemu_rbd_co_pdiscard,
+#ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
+    .bdrv_co_pwrite_zeroes  = qemu_rbd_co_pwrite_zeroes,
+#endif
 
     .bdrv_snapshot_create   = qemu_rbd_snap_create,
     .bdrv_snapshot_delete   = qemu_rbd_snap_remove,