diff mbox series

[v2,2/2] block/file-posix.c: Use pwritev2() with RWF_DSYNC for FUA - update

Message ID 20250403081633.158591-3-prantoran@gmail.com (mailing list archive)
State New
Headers show
Series block/file-posix.c: Use pwritev2() with RWF_DSYNC for FUA - update | expand

Commit Message

Pinku Deb Nath April 3, 2025, 8:16 a.m. UTC
The testing with "-t writeback" works for turning on enable_write_cache.
I renamed the function to qemu_pwritev_fua() and fixed any typos.

I moved the handle_aiocb_flush() into the qemu_pwritev_fua() and
removed from the previously todo seciont. Initially I thought
of only passing aiocb, but then I was not sure whethe I could
derive buf from aiocb, so I added arguments for iovec and iovcnt
into qemu_pwritev_fua().

For handling buf in handle_aiocb_rw_linear(), I created iovec
and passed its reference. I assumed that there will be only one
buffer/iovec, so I passed 1 for iovcnt.

Signed-off-by: Pinku Deb Nath <prantoran@gmail.com>
---
 block/file-posix.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

Comments

Stefan Hajnoczi April 3, 2025, 3:58 p.m. UTC | #1
On Thu, Apr 03, 2025 at 01:16:33AM -0700, Pinku Deb Nath wrote:
> The testing with "-t writeback" works for turning on enable_write_cache.
> I renamed the function to qemu_pwritev_fua() and fixed any typos.
> 
> I moved the handle_aiocb_flush() into the qemu_pwritev_fua() and
> removed from the previously todo seciont. Initially I thought
> of only passing aiocb, but then I was not sure whethe I could
> derive buf from aiocb, so I added arguments for iovec and iovcnt
> into qemu_pwritev_fua().
> 
> For handling buf in handle_aiocb_rw_linear(), I created iovec
> and passed its reference. I assumed that there will be only one
> buffer/iovec, so I passed 1 for iovcnt.
> 
> Signed-off-by: Pinku Deb Nath <prantoran@gmail.com>
> ---
>  block/file-posix.c | 38 +++++++++++++++++++++-----------------
>  1 file changed, 21 insertions(+), 17 deletions(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 34de816eab..4fffd49318 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1676,12 +1676,24 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
>  }
>  
>  static ssize_t
> -qemu_pwrite_fua(int fd, const struct iovec *iov, int nr_iov, off_t offset)
> +qemu_pwritev_fua(const RawPosixAIOData *aiocb, struct iovec *iov, int iovcnt)
>  {
>  #ifdef RWF_DSYNC
> -    return pwritev2(fd, iov, nr_iov, offset, RWF_DSYNC);
> +    return pwritev2(aiocb->aio_fildes,
> +                    iov,
> +                    iovcnt,
> +                    aiocb->aio_offset,
> +                    RWF_DSYNC);
>  #else
> -    return pwritev2(fd, iov, nr_iov, offset, 0);
> +    ssize_t len = pwritev2(aiocb->aio_fildes,
> +                        iov,
> +                        iovcnt,
> +                        aiocb->aio_offset,
> +                        0);

On a non-Linux host pwritev2(2) will not exist. Please take a look at
how qemu_preadv() is integrated (including the !CONFIG_PREADV case) and
decide on a solution that works on non-Linux hosts.

> +    if (len == 0) {
> +        len = handle_aiocb_flush(aiocb);
> +    }
> +    return len;
>  #endif
>  }
>  
> @@ -1710,10 +1722,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
>      len = RETRY_ON_EINTR(
>          (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ?
>              (aiocb->flags &  BDRV_REQ_FUA) ?
> -                qemu_pwrite_fua(aiocb->aio_fildes,
> -                                aiocb->io.iov,
> -                                aiocb->io.niov,
> -                                aiocb->aio_offset) :
> +                qemu_pwritev_fua(aiocb, aiocb->io.iov, aiocb->io.niov) :
>                  qemu_pwritev(aiocb->aio_fildes,
>                              aiocb->io.iov,
>                              aiocb->io.niov,
> @@ -1744,10 +1753,11 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
>      while (offset < aiocb->aio_nbytes) {
>          if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
>              if (aiocb->flags & BDRV_REQ_FUA) {
> -                len = qemu_pwrite_fua(aiocb->aio_fildes,
> -                                    aiocb->io.iov,
> -                                    aiocb->io.niov,
> -                                    aiocb->aio_offset);
> +                struct iovec iov = {
> +                    .iov_base = buf,
> +                    .iov_len = aiocb->aio_nbytes - offset,
> +                };
> +                len = qemu_pwritev_fua(aiocb, &iov, 1);

The else branch takes offset into account. Here aiocb is passed in
assuming it's the first iteration of the while (offset <
aiocb->aio_nbytes) loop. On subsequent iterations the wrong values will
be used because offset has changed.

Perhaps it's easier to pass in the individual parameters (fd, offset,
etc) instead of passing in aiocb.

>              } else {
>                  len = pwrite(aiocb->aio_fildes,
>                              (const char *)buf + offset,
> @@ -2567,12 +2577,6 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
>  
>      assert(qiov->size == bytes);
>      ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
> -#ifndef RWD_DSYNC
> -    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
> -        /* TODO Use pwritev2() instead if it's available */
> -        ret = raw_co_flush_to_disk(bs);
> -    }
> -#endif
>      goto out; /* Avoid the compiler err of unused label */
>  
>  out:
> -- 
> 2.43.0
>
diff mbox series

Patch

diff --git a/block/file-posix.c b/block/file-posix.c
index 34de816eab..4fffd49318 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1676,12 +1676,24 @@  qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
 }
 
 static ssize_t
-qemu_pwrite_fua(int fd, const struct iovec *iov, int nr_iov, off_t offset)
+qemu_pwritev_fua(const RawPosixAIOData *aiocb, struct iovec *iov, int iovcnt)
 {
 #ifdef RWF_DSYNC
-    return pwritev2(fd, iov, nr_iov, offset, RWF_DSYNC);
+    return pwritev2(aiocb->aio_fildes,
+                    iov,
+                    iovcnt,
+                    aiocb->aio_offset,
+                    RWF_DSYNC);
 #else
-    return pwritev2(fd, iov, nr_iov, offset, 0);
+    ssize_t len = pwritev2(aiocb->aio_fildes,
+                        iov,
+                        iovcnt,
+                        aiocb->aio_offset,
+                        0);
+    if (len == 0) {
+        len = handle_aiocb_flush(aiocb);
+    }
+    return len;
 #endif
 }
 
@@ -1710,10 +1722,7 @@  static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
     len = RETRY_ON_EINTR(
         (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ?
             (aiocb->flags &  BDRV_REQ_FUA) ?
-                qemu_pwrite_fua(aiocb->aio_fildes,
-                                aiocb->io.iov,
-                                aiocb->io.niov,
-                                aiocb->aio_offset) :
+                qemu_pwritev_fua(aiocb, aiocb->io.iov, aiocb->io.niov) :
                 qemu_pwritev(aiocb->aio_fildes,
                             aiocb->io.iov,
                             aiocb->io.niov,
@@ -1744,10 +1753,11 @@  static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
     while (offset < aiocb->aio_nbytes) {
         if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) {
             if (aiocb->flags & BDRV_REQ_FUA) {
-                len = qemu_pwrite_fua(aiocb->aio_fildes,
-                                    aiocb->io.iov,
-                                    aiocb->io.niov,
-                                    aiocb->aio_offset);
+                struct iovec iov = {
+                    .iov_base = buf,
+                    .iov_len = aiocb->aio_nbytes - offset,
+                };
+                len = qemu_pwritev_fua(aiocb, &iov, 1);
             } else {
                 len = pwrite(aiocb->aio_fildes,
                             (const char *)buf + offset,
@@ -2567,12 +2577,6 @@  static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
 
     assert(qiov->size == bytes);
     ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
-#ifndef RWD_DSYNC
-    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
-        /* TODO Use pwritev2() instead if it's available */
-        ret = raw_co_flush_to_disk(bs);
-    }
-#endif
     goto out; /* Avoid the compiler err of unused label */
 
 out: