@@ -17,6 +17,7 @@
#define QEMU_RAW_AIO_H
#include "block/aio.h"
+#include "block/block-common.h"
#include "qemu/iov.h"
/* AIO request types */
@@ -58,9 +59,11 @@ void laio_cleanup(LinuxAioState *s);
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
- int type, uint64_t dev_max_batch);
+ int type, BdrvRequestFlags flags,
+ uint64_t dev_max_batch);
bool laio_has_fdsync(int);
+bool laio_has_fua(void);
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
#endif
@@ -71,7 +74,8 @@ void luring_cleanup(LuringState *s);
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
- QEMUIOVector *qiov, int type);
+ QEMUIOVector *qiov, int type,
+ BdrvRequestFlags flags);
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
#endif
@@ -194,6 +194,7 @@ static int fd_open(BlockDriverState *bs)
}
static int64_t raw_getlength(BlockDriverState *bs);
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
typedef struct RawPosixAIOData {
BlockDriverState *bs;
@@ -804,6 +805,10 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
#endif
s->needs_alignment = raw_needs_alignment(bs);
+ if (!s->use_linux_aio || laio_has_fua()) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
+
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
if (S_ISREG(st.st_mode)) {
/* When extending regular files, we get zeros from the OS */
@@ -2477,7 +2482,8 @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
#endif
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
- uint64_t bytes, QEMUIOVector *qiov, int type)
+ uint64_t bytes, QEMUIOVector *qiov, int type,
+ int flags)
{
BDRVRawState *s = bs->opaque;
RawPosixAIOData acb;
@@ -2508,13 +2514,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
#ifdef CONFIG_LINUX_IO_URING
} else if (raw_check_linux_io_uring(s)) {
assert(qiov->size == bytes);
- ret = luring_co_submit(bs, s->fd, offset, qiov, type);
+ ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
goto out;
#endif
#ifdef CONFIG_LINUX_AIO
} else if (raw_check_linux_aio(s)) {
assert(qiov->size == bytes);
- ret = laio_co_submit(s->fd, offset, qiov, type,
+ ret = laio_co_submit(s->fd, offset, qiov, type, flags,
s->aio_max_batch);
goto out;
#endif
@@ -2534,6 +2540,10 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
assert(qiov->size == bytes);
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ /* TODO Use pwritev2() instead if it's available */
+ ret = raw_co_flush_to_disk(bs);
+ }
goto out; /* Avoid the compiler err of unused label */
out:
@@ -2571,14 +2581,14 @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
}
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
}
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
@@ -2600,12 +2610,12 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
#ifdef CONFIG_LINUX_IO_URING
if (raw_check_linux_io_uring(s)) {
- return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
}
#endif
#ifdef CONFIG_LINUX_AIO
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
- return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
+ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
}
#endif
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
@@ -3540,7 +3550,7 @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
}
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
- return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
+ return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
}
#endif
@@ -335,15 +335,17 @@ static void luring_deferred_fn(void *opaque)
*
*/
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
- uint64_t offset, int type)
+ uint64_t offset, int type, BdrvRequestFlags flags)
{
int ret;
struct io_uring_sqe *sqes = &luringcb->sqeq;
+ int luring_flags;
switch (type) {
case QEMU_AIO_WRITE:
- io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
- luringcb->qiov->niov, offset);
+ luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+ io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
+ luringcb->qiov->niov, offset, luring_flags);
break;
case QEMU_AIO_ZONE_APPEND:
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
@@ -380,7 +382,8 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
}
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
- QEMUIOVector *qiov, int type)
+ QEMUIOVector *qiov, int type,
+ BdrvRequestFlags flags)
{
int ret;
AioContext *ctx = qemu_get_current_aio_context();
@@ -393,7 +396,7 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
};
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
type);
- ret = luring_do_submit(fd, &luringcb, s, offset, type);
+ ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
if (ret < 0) {
return ret;
@@ -368,15 +368,23 @@ static void laio_deferred_fn(void *opaque)
}
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
- int type, uint64_t dev_max_batch)
+ int type, BdrvRequestFlags flags,
+ uint64_t dev_max_batch)
{
LinuxAioState *s = laiocb->ctx;
struct iocb *iocbs = &laiocb->iocb;
QEMUIOVector *qiov = laiocb->qiov;
+ int laio_flags;
switch (type) {
case QEMU_AIO_WRITE:
+#ifdef HAVE_IO_PREP_PWRITEV2
+ laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
+ io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
+#else
+ assert(flags == 0);
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
+#endif
break;
case QEMU_AIO_ZONE_APPEND:
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
@@ -409,7 +417,8 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
}
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
- int type, uint64_t dev_max_batch)
+ int type, BdrvRequestFlags flags,
+ uint64_t dev_max_batch)
{
int ret;
AioContext *ctx = qemu_get_current_aio_context();
@@ -422,7 +431,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
.qiov = qiov,
};
- ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
+ ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
if (ret < 0) {
return ret;
}
@@ -505,3 +514,12 @@ bool laio_has_fdsync(int fd)
io_destroy(ctx);
return (ret == -EINVAL) ? false : true;
}
+
+bool laio_has_fua(void)
+{
+#ifdef HAVE_IO_PREP_PWRITEV2
+ return true;
+#else
+ return false;
+#endif
+}
@@ -2727,6 +2727,10 @@ config_host_data.set('HAVE_OPTRESET',
cc.has_header_symbol('getopt.h', 'optreset'))
config_host_data.set('HAVE_IPPROTO_MPTCP',
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+if libaio.found()
+ config_host_data.set('HAVE_IO_PREP_PWRITEV2',
+ cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
+endif
# has_member
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',