Message ID | 20250403081633.158591-3-prantoran@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block/file-posix.c: Use pwritev2() with RWF_DSYNC for FUA - update | expand |
On Thu, Apr 03, 2025 at 01:16:33AM -0700, Pinku Deb Nath wrote: > The testing with "-t writeback" works for turning on enable_write_cache. > I renamed the function to qemu_pwritev_fua() and fixed any typos. > > I moved the handle_aiocb_flush() into the qemu_pwritev_fua() and > removed from the previously todo seciont. Initially I thought > of only passing aiocb, but then I was not sure whethe I could > derive buf from aiocb, so I added arguments for iovec and iovcnt > into qemu_pwritev_fua(). > > For handling buf in handle_aiocb_rw_linear(), I created iovec > and passed its reference. I assumed that there will be only one > buffer/iovec, so I passed 1 for iovcnt. > > Signed-off-by: Pinku Deb Nath <prantoran@gmail.com> > --- > block/file-posix.c | 38 +++++++++++++++++++++----------------- > 1 file changed, 21 insertions(+), 17 deletions(-) > > diff --git a/block/file-posix.c b/block/file-posix.c > index 34de816eab..4fffd49318 100644 > --- a/block/file-posix.c > +++ b/block/file-posix.c > @@ -1676,12 +1676,24 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) > } > > static ssize_t > -qemu_pwrite_fua(int fd, const struct iovec *iov, int nr_iov, off_t offset) > +qemu_pwritev_fua(const RawPosixAIOData *aiocb, struct iovec *iov, int iovcnt) > { > #ifdef RWF_DSYNC > - return pwritev2(fd, iov, nr_iov, offset, RWF_DSYNC); > + return pwritev2(aiocb->aio_fildes, > + iov, > + iovcnt, > + aiocb->aio_offset, > + RWF_DSYNC); > #else > - return pwritev2(fd, iov, nr_iov, offset, 0); > + ssize_t len = pwritev2(aiocb->aio_fildes, > + iov, > + iovcnt, > + aiocb->aio_offset, > + 0); On a non-Linux host pwritev2(2) will not exist. Please take a look at how qemu_preadv() is integrated (including the !CONFIG_PREADV case) and decide on a solution that works on non-Linux hosts. > + if (len == 0) { > + len = handle_aiocb_flush(aiocb); > + } > + return len; > #endif > } > > @@ -1710,10 +1722,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) > len = RETRY_ON_EINTR( > (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ? > (aiocb->flags & BDRV_REQ_FUA) ? > - qemu_pwrite_fua(aiocb->aio_fildes, > - aiocb->io.iov, > - aiocb->io.niov, > - aiocb->aio_offset) : > + qemu_pwritev_fua(aiocb, aiocb->io.iov, aiocb->io.niov) : > qemu_pwritev(aiocb->aio_fildes, > aiocb->io.iov, > aiocb->io.niov, > @@ -1744,10 +1753,11 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) > while (offset < aiocb->aio_nbytes) { > if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { > if (aiocb->flags & BDRV_REQ_FUA) { > - len = qemu_pwrite_fua(aiocb->aio_fildes, > - aiocb->io.iov, > - aiocb->io.niov, > - aiocb->aio_offset); > + struct iovec iov = { > + .iov_base = buf, > + .iov_len = aiocb->aio_nbytes - offset, > + }; > + len = qemu_pwritev_fua(aiocb, &iov, 1); The else branch takes offset into account. Here aiocb is passed in assuming it's the first iteration of the while (offset < aiocb->aio_nbytes) loop. On subsequent iterations the wrong values will be used because offset has changed. Perhaps it's easier to pass in the individual parameters (fd, offset, etc) instead of passing in aiocb. > } else { > len = pwrite(aiocb->aio_fildes, > (const char *)buf + offset, > @@ -2567,12 +2577,6 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, > > assert(qiov->size == bytes); > ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); > -#ifndef RWD_DSYNC > - if (ret == 0 && (flags & BDRV_REQ_FUA)) { > - /* TODO Use pwritev2() instead if it's available */ > - ret = raw_co_flush_to_disk(bs); > - } > -#endif > goto out; /* Avoid the compiler err of unused label */ > > out: > -- > 2.43.0 >
diff --git a/block/file-posix.c b/block/file-posix.c index 34de816eab..4fffd49318 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -1676,12 +1676,24 @@ qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset) } static ssize_t -qemu_pwrite_fua(int fd, const struct iovec *iov, int nr_iov, off_t offset) +qemu_pwritev_fua(const RawPosixAIOData *aiocb, struct iovec *iov, int iovcnt) { #ifdef RWF_DSYNC - return pwritev2(fd, iov, nr_iov, offset, RWF_DSYNC); + return pwritev2(aiocb->aio_fildes, + iov, + iovcnt, + aiocb->aio_offset, + RWF_DSYNC); #else - return pwritev2(fd, iov, nr_iov, offset, 0); + ssize_t len = pwritev2(aiocb->aio_fildes, + iov, + iovcnt, + aiocb->aio_offset, + 0); + if (len == 0) { + len = handle_aiocb_flush(aiocb); + } + return len; #endif } @@ -1710,10 +1722,7 @@ static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb) len = RETRY_ON_EINTR( (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) ? (aiocb->flags & BDRV_REQ_FUA) ? - qemu_pwrite_fua(aiocb->aio_fildes, - aiocb->io.iov, - aiocb->io.niov, - aiocb->aio_offset) : + qemu_pwritev_fua(aiocb, aiocb->io.iov, aiocb->io.niov) : qemu_pwritev(aiocb->aio_fildes, aiocb->io.iov, aiocb->io.niov, @@ -1744,10 +1753,11 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf) while (offset < aiocb->aio_nbytes) { if (aiocb->aio_type & (QEMU_AIO_WRITE | QEMU_AIO_ZONE_APPEND)) { if (aiocb->flags & BDRV_REQ_FUA) { - len = qemu_pwrite_fua(aiocb->aio_fildes, - aiocb->io.iov, - aiocb->io.niov, - aiocb->aio_offset); + struct iovec iov = { + .iov_base = buf, + .iov_len = aiocb->aio_nbytes - offset, + }; + len = qemu_pwritev_fua(aiocb, &iov, 1); } else { len = pwrite(aiocb->aio_fildes, (const char *)buf + offset, @@ -2567,12 +2577,6 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr, assert(qiov->size == bytes); ret = raw_thread_pool_submit(handle_aiocb_rw, &acb); -#ifndef RWD_DSYNC - if (ret == 0 && (flags & BDRV_REQ_FUA)) { - /* TODO Use pwritev2() instead if it's available */ - ret = raw_co_flush_to_disk(bs); - } -#endif goto out; /* Avoid the compiler err of unused label */ out:
The testing with "-t writeback" works for turning on enable_write_cache. I renamed the function to qemu_pwritev_fua() and fixed any typos. I moved the handle_aiocb_flush() into the qemu_pwritev_fua() and removed from the previously todo seciont. Initially I thought of only passing aiocb, but then I was not sure whethe I could derive buf from aiocb, so I added arguments for iovec and iovcnt into qemu_pwritev_fua(). For handling buf in handle_aiocb_rw_linear(), I created iovec and passed its reference. I assumed that there will be only one buffer/iovec, so I passed 1 for iovcnt. Signed-off-by: Pinku Deb Nath <prantoran@gmail.com> --- block/file-posix.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-)