From patchwork Wed May 29 16:23:19 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: John Levon X-Patchwork-Id: 13679245 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from lists.gnu.org (lists.gnu.org [209.51.188.17]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 880FBC25B75 for ; Wed, 29 May 2024 16:31:10 +0000 (UTC) Received: from localhost ([::1] helo=lists1p.gnu.org) by lists.gnu.org with esmtp (Exim 4.90_1) (envelope-from ) id 1sCM7U-0001ud-Oc; Wed, 29 May 2024 12:25:44 -0400 Received: from eggs.gnu.org ([2001:470:142:3::10]) by lists.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM7A-0001Ku-IX for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:25 -0400 Received: from ssh.movementarian.org ([139.162.205.133] helo=movementarian.org) by eggs.gnu.org with esmtps (TLS1.2:ECDHE_RSA_AES_256_GCM_SHA384:256) (Exim 4.90_1) (envelope-from ) id 1sCM74-0006Nr-GT for qemu-devel@nongnu.org; Wed, 29 May 2024 12:25:24 -0400 Received: from movement by movementarian.org with local (Exim 4.95) (envelope-from ) id 1sCM6t-006CRU-Ct; Wed, 29 May 2024 17:25:07 +0100 From: John Levon To: qemu-devel@nongnu.org Cc: alex.williamson@redhat.com, clg@redhat.com, jag.raman@oracle.com, thanos.makatos@nutanix.com, John Johnson , Elena Ufimtseva , John Levon Subject: [PATCH 26/26] vfio-user: add coalesced posted writes Date: Wed, 29 May 2024 17:23:19 +0100 Message-Id: <20240529162319.1476680-27-levon@movementarian.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240529162319.1476680-1-levon@movementarian.org> References: <20240529162319.1476680-1-levon@movementarian.org> MIME-Version: 1.0 Received-SPF: pass client-ip=139.162.205.133; envelope-from=movement@movementarian.org; helo=movementarian.org X-Spam_score_int: -18 X-Spam_score: -1.9 X-Spam_bar: - X-Spam_report: (-1.9 / 5.0 requ) BAYES_00=-1.9, SPF_HELO_PASS=-0.001, SPF_PASS=-0.001, T_SCC_BODY_TEXT_LINE=-0.01 autolearn=ham autolearn_force=no X-Spam_action: no action X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org Sender: qemu-devel-bounces+qemu-devel=archiver.kernel.org@nongnu.org From: Jagannathan Raman Add new message to send multiple writes to server. Prevents the outgoing queue from overflowing when a long latency operation is followed by a series of posted writes. Originally-by: John Johnson Signed-off-by: Elena Ufimtseva Signed-off-by: Jagannathan Raman Signed-off-by: John Levon --- hw/vfio/trace-events | 1 + hw/vfio/user-protocol.h | 21 +++++++ hw/vfio/user.c | 130 +++++++++++++++++++++++++++++++++++++++- hw/vfio/user.h | 7 +++ 4 files changed, 157 insertions(+), 2 deletions(-) diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index 387751bd7f..2e6f0e6240 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -188,6 +188,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) " index vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d offset 0x%"PRIx64" count %d" vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index %d flags 0x%x count %d" vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t flags) " index %d start %d count %d flags 0x%x" +vfio_user_wrmulti(const char *s, uint64_t wr_cnt) " %s count 0x%"PRIx64 # user-container.c vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags, bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x async_ops %d" diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h index 607e0f4b7f..22e3265f58 100644 --- a/hw/vfio/user-protocol.h +++ b/hw/vfio/user-protocol.h @@ -42,6 +42,7 @@ enum vfio_user_command { VFIO_USER_DMA_WRITE = 12, VFIO_USER_DEVICE_RESET = 13, VFIO_USER_DIRTY_PAGES = 14, + VFIO_USER_REGION_WRITE_MULTI = 15, VFIO_USER_MAX, }; @@ -75,6 +76,7 @@ typedef struct { #define VFIO_USER_CAP_PGSIZES "pgsizes" #define VFIO_USER_CAP_MAP_MAX "max_dma_maps" #define VFIO_USER_CAP_MIGR "migration" +#define VFIO_USER_CAP_MULTI "write_multiple" /* "migration" members */ #define VFIO_USER_CAP_PGSIZE "pgsize" @@ -221,4 +223,23 @@ typedef struct { char data[]; } VFIOUserBitmap; +/* + * VFIO_USER_REGION_WRITE_MULTI + */ +#define VFIO_USER_MULTI_DATA 8 +#define VFIO_USER_MULTI_MAX 200 + +typedef struct { + uint64_t offset; + uint32_t region; + uint32_t count; + char data[VFIO_USER_MULTI_DATA]; +} VFIOUserWROne; + +typedef struct { + VFIOUserHdr hdr; + uint64_t wr_cnt; + VFIOUserWROne wrs[VFIO_USER_MULTI_MAX]; +} VFIOUserWRMulti; + #endif /* VFIO_USER_PROTOCOL_H */ diff --git a/hw/vfio/user.c b/hw/vfio/user.c index f13381f310..b866c84b4c 100644 --- a/hw/vfio/user.c +++ b/hw/vfio/user.c @@ -61,6 +61,7 @@ static void vfio_user_request(void *opaque); static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg); static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr, VFIOUserFDs *fds); +static void vfio_user_flush_multi(VFIOUserProxy *proxy); static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err) { @@ -465,6 +466,11 @@ static void vfio_user_send(void *opaque) } qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, NULL, NULL, proxy); + + /* queue empty - send any pending multi write msgs */ + if (proxy->wr_multi != NULL) { + vfio_user_flush_multi(proxy); + } } } @@ -485,6 +491,7 @@ static int vfio_user_send_one(VFIOUserProxy *proxy) } QTAILQ_REMOVE(&proxy->outgoing, msg, next); + proxy->num_outgoing--; if (msg->type == VFIO_MSG_ASYNC) { vfio_user_recycle(proxy, msg); } else { @@ -592,11 +599,18 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg) { int ret; + /* older coalesced writes go first */ + if (proxy->wr_multi != NULL && + ((msg->hdr->flags & VFIO_USER_TYPE) == VFIO_USER_REQUEST)) { + vfio_user_flush_multi(proxy); + } + /* * Unsent outgoing msgs - add to tail */ if (!QTAILQ_EMPTY(&proxy->outgoing)) { QTAILQ_INSERT_TAIL(&proxy->outgoing, msg, next); + proxy->num_outgoing++; return 0; } @@ -610,6 +624,7 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg) } if (ret == QIO_CHANNEL_ERR_BLOCK) { QTAILQ_INSERT_HEAD(&proxy->outgoing, msg, next); + proxy->num_outgoing = 1; qio_channel_set_aio_fd_handler(proxy->ioc, proxy->ctx, vfio_user_recv, proxy->ctx, vfio_user_send, proxy); @@ -1149,12 +1164,27 @@ static bool check_migr(VFIOUserProxy *proxy, QObject *qobj, Error **errp) return caps_parse(proxy, qdict, caps_migr, errp); } +static bool check_multi(VFIOUserProxy *proxy, QObject *qobj, Error **errp) +{ + QBool *qb = qobject_to(QBool, qobj); + + if (qb == NULL) { + error_setg(errp, "malformed %s", VFIO_USER_CAP_MULTI); + return false; + } + if (qbool_get_bool(qb)) { + proxy->flags |= VFIO_PROXY_USE_MULTI; + } + return true; +} + static struct cap_entry caps_cap[] = { { VFIO_USER_CAP_MAX_FDS, check_max_fds }, { VFIO_USER_CAP_MAX_XFER, check_max_xfer }, { VFIO_USER_CAP_PGSIZES, check_pgsizes }, { VFIO_USER_CAP_MAP_MAX, check_max_dma }, { VFIO_USER_CAP_MIGR, check_migr }, + { VFIO_USER_CAP_MULTI, check_multi }, { NULL } }; @@ -1213,6 +1243,7 @@ static GString *caps_json(void) qdict_put_int(capdict, VFIO_USER_CAP_MAX_XFER, VFIO_USER_DEF_MAX_XFER); qdict_put_int(capdict, VFIO_USER_CAP_PGSIZES, VFIO_USER_DEF_PGSIZE); qdict_put_int(capdict, VFIO_USER_CAP_MAP_MAX, VFIO_USER_DEF_MAP_MAX); + qdict_put_bool(capdict, VFIO_USER_CAP_MULTI, true); qdict_put_obj(dict, VFIO_USER_CAP, QOBJECT(capdict)); @@ -1481,19 +1512,114 @@ static int vfio_user_region_read(VFIOUserProxy *proxy, uint8_t index, return msgp->count; } +static void vfio_user_flush_multi(VFIOUserProxy *proxy) +{ + VFIOUserMsg *msg; + VFIOUserWRMulti *wm = proxy->wr_multi; + int ret; + + proxy->wr_multi = NULL; + + /* adjust size for actual # of writes */ + wm->hdr.size -= (VFIO_USER_MULTI_MAX - wm->wr_cnt) * sizeof(VFIOUserWROne); + + msg = vfio_user_getmsg(proxy, &wm->hdr, NULL); + msg->id = wm->hdr.id; + msg->rsize = 0; + msg->type = VFIO_MSG_ASYNC; + trace_vfio_user_wrmulti("flush", wm->wr_cnt); + + ret = vfio_user_send_queued(proxy, msg); + if (ret < 0) { + vfio_user_recycle(proxy, msg); + } +} + +static void vfio_user_create_multi(VFIOUserProxy *proxy) +{ + VFIOUserWRMulti *wm; + + wm = g_malloc0(sizeof(*wm)); + vfio_user_request_msg(&wm->hdr, VFIO_USER_REGION_WRITE_MULTI, + sizeof(*wm), VFIO_USER_NO_REPLY); + proxy->wr_multi = wm; +} + +static void vfio_user_add_multi(VFIOUserProxy *proxy, uint8_t index, + off_t offset, uint32_t count, void *data) +{ + VFIOUserWRMulti *wm = proxy->wr_multi; + VFIOUserWROne *w1 = &wm->wrs[wm->wr_cnt]; + + w1->offset = offset; + w1->region = index; + w1->count = count; + memcpy(&w1->data, data, count); + + wm->wr_cnt++; + trace_vfio_user_wrmulti("add", wm->wr_cnt); + if (wm->wr_cnt == VFIO_USER_MULTI_MAX || + proxy->num_outgoing < VFIO_USER_OUT_LOW) { + vfio_user_flush_multi(proxy); + } +} + static int vfio_user_region_write(VFIOUserProxy *proxy, uint8_t index, off_t offset, uint32_t count, void *data, bool post) { VFIOUserRegionRW *msgp = NULL; - int flags = post ? VFIO_USER_NO_REPLY : 0; + int flags; int size = sizeof(*msgp) + count; + bool can_multi; int ret; if (count > proxy->max_xfer_size) { return -EINVAL; } + if (proxy->flags & VFIO_PROXY_NO_POST) { + post = false; + } + + /* write eligible to be in a WRITE_MULTI msg ? */ + can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && + count <= VFIO_USER_MULTI_DATA; + + /* + * This should be a rare case, so first check without the lock, + * if we're wrong, vfio_send_queued() will flush any posted writes + * we missed here + */ + if (proxy->wr_multi != NULL || + (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { + + /* + * re-check with lock + * + * if already building a WRITE_MULTI msg, + * add this one if possible else flush pending before + * sending the current one + * + * else if outgoing queue is over the highwater, + * start a new WRITE_MULTI message + */ + WITH_QEMU_LOCK_GUARD(&proxy->lock) { + if (proxy->wr_multi != NULL) { + if (can_multi) { + vfio_user_add_multi(proxy, index, offset, count, data); + return count; + } + vfio_user_flush_multi(proxy); + } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { + vfio_user_create_multi(proxy); + vfio_user_add_multi(proxy, index, offset, count, data); + return count; + } + } + } + + flags = post ? VFIO_USER_NO_REPLY : 0; msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); msgp->offset = offset; @@ -1503,7 +1629,7 @@ static int vfio_user_region_write(VFIOUserProxy *proxy, uint8_t index, trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); /* async send will free msg after it's sent */ - if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) { + if (post) { vfio_user_send_async(proxy, &msgp->hdr, NULL); return count; } diff --git a/hw/vfio/user.h b/hw/vfio/user.h index ff2aa005eb..dc4d41cc0e 100644 --- a/hw/vfio/user.h +++ b/hw/vfio/user.h @@ -90,6 +90,8 @@ typedef struct VFIOUserProxy { VFIOUserMsg *last_nowait; VFIOUserMsg *part_recv; size_t recv_left; + VFIOUserWRMulti *wr_multi; + int num_outgoing; enum proxy_state state; } VFIOUserProxy; @@ -98,6 +100,11 @@ typedef struct VFIOUserProxy { #define VFIO_PROXY_NO_MMAP 0x2 #define VFIO_PROXY_FORCE_QUEUED 0x4 #define VFIO_PROXY_NO_POST 0x8 +#define VFIO_PROXY_USE_MULTI 0x10 + +/* coalescing high and low water marks for VFIOProxy num_outgoing */ +#define VFIO_USER_OUT_HIGH 1024 +#define VFIO_USER_OUT_LOW 128 typedef struct VFIODevice VFIODevice;