From patchwork Mon Feb 3 15:45:17 2025 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Keith Busch X-Patchwork-Id: 13957744 Received: from mx0b-00082601.pphosted.com (mx0b-00082601.pphosted.com [67.231.153.30]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AE7AD209F5A for ; Mon, 3 Feb 2025 15:45:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=67.231.153.30 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738597540; cv=none; b=ADE8i7TjgB/kHe+b6tRJKl+J8nJ+OC0i8m/oTv+VZXdrj21kVnfGYtywvBhHmJTqOUp8VYIPxlS4iCOPFrWJ9xEKTsDGrByiKBe7r6zBtc2Ysp37Po0/JPphZ2hcjy0yoj6O0E6AL9FITmU2IaVJre6c8/eeApfXumWNVSWLKJc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1738597540; c=relaxed/simple; bh=mn+CY3dTAGazV9JQYW/9hNHqGStvNKf5F/Qe3qx9n28=; h=From:To:CC:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=LNqxplDdhuJ0Z+fzaqmKlwWq5j2YLtwSvA9qxfH2FVmDJIoc+mYre3lzSUBc34uNdkqEXrLd4LeF7mByP9sEj/BbH1c3hXkFCXP9XNjtACoSMoIn3lsj/aaTHC9pVivyc5se3OAXIprDDm+AKaBZAj9jvT7DqRlP+dESLkL267g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=meta.com; spf=pass smtp.mailfrom=meta.com; dkim=pass (2048-bit key) header.d=meta.com header.i=@meta.com header.b=HQvVRfGT; arc=none smtp.client-ip=67.231.153.30 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=meta.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=meta.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=meta.com header.i=@meta.com header.b="HQvVRfGT" Received: from pps.filterd (m0148460.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.18.1.2/8.18.1.2) with ESMTP id 513FjCw2004352 for ; Mon, 3 Feb 2025 07:45:37 -0800 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=meta.com; h=cc :content-transfer-encoding:content-type:date:from:in-reply-to :message-id:mime-version:references:subject:to; s=s2048-2021-q4; bh=xT9bYyhPiIjW1fRj+7v8fy9G12JyuUUWgtfu26pK2PA=; b=HQvVRfGTbkBH 9CXBd9XHZzNCH8/YauHuv2aeFRjhKRy62AVoaAgWeYVzgSdkuD2uMtZOTAVtbToU Ku2Lo0cwnrKjp0VxfC6PNZ/swiKvTBlSVyek4fLVevDvH5mJvnupY7ZQQTTBq0IX 7MvoDl/YfWxdkNBY6kQL/ExBVpThBDDjzNm5dlqrAXTkDA/Qo+hUVHNo/9qWXXDB b0PftaG9/6Mp8UIYV1YYhkjGgIZU3W3gvz0gDKV1ELSqzmCwpw05Ov+SCnNHYFQx WzDNPdWwAws74/uODMgbMkYwLQDWBetjjo5vYhR4r4+L5OtxFdDLOHxgYv/bGqau UfFcnRdntQ== Received: from mail.thefacebook.com ([163.114.134.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 44k0q6r041-15 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Mon, 03 Feb 2025 07:45:37 -0800 (PST) Received: from twshared55211.03.ash8.facebook.com (2620:10d:c085:208::7cb7) by mail.thefacebook.com (2620:10d:c08b:78::c78f) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.2.1544.14; Mon, 3 Feb 2025 15:45:27 +0000 Received: by devbig638.nha1.facebook.com (Postfix, from userid 544533) id 3D158179A9858; Mon, 3 Feb 2025 07:45:22 -0800 (PST) From: Keith Busch To: , , , , CC: Keith Busch Subject: [PATCH 6/6] io_uring: cache nodes and mapped buffers Date: Mon, 3 Feb 2025 07:45:17 -0800 Message-ID: <20250203154517.937623-7-kbusch@meta.com> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20250203154517.937623-1-kbusch@meta.com> References: <20250203154517.937623-1-kbusch@meta.com> Precedence: bulk X-Mailing-List: linux-block@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-ORIG-GUID: HUI2nd_ADcA16PmjzpRGg2llf3mgfJLb X-Proofpoint-GUID: HUI2nd_ADcA16PmjzpRGg2llf3mgfJLb X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.293,Aquarius:18.0.1057,Hydra:6.0.680,FMLib:17.12.68.34 definitions=2025-02-03_06,2025-01-31_02,2024-11-22_01 From: Keith Busch Frequent alloc/free cycles on these is pretty costly. Use an io cache to more efficiently reuse these buffers. Signed-off-by: Keith Busch --- include/linux/io_uring_types.h | 16 ++--- io_uring/filetable.c | 2 +- io_uring/rsrc.c | 108 ++++++++++++++++++++++++--------- io_uring/rsrc.h | 2 +- 4 files changed, 92 insertions(+), 36 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index aa661ebfd6568..c0e0c1f92e5b1 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -67,8 +67,17 @@ struct io_file_table { unsigned int alloc_hint; }; +struct io_alloc_cache { + void **entries; + unsigned int nr_cached; + unsigned int max_cached; + size_t elem_size; +}; + struct io_buf_table { struct io_rsrc_data data; + struct io_alloc_cache node_cache; + struct io_alloc_cache imu_cache; }; struct io_hash_bucket { @@ -222,13 +231,6 @@ struct io_submit_state { struct blk_plug plug; }; -struct io_alloc_cache { - void **entries; - unsigned int nr_cached; - unsigned int max_cached; - size_t elem_size; -}; - struct io_ring_ctx { /* const or read-mostly hot data */ struct { diff --git a/io_uring/filetable.c b/io_uring/filetable.c index dd8eeec97acf6..a21660e3145ab 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, if (slot_index >= ctx->file_table.data.nr) return -EINVAL; - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) return -ENOMEM; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 864c2eabf8efd..5434b0d992d62 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -117,23 +117,39 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node) unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); - kvfree(imu); + if (struct_size(imu, bvec, imu->nr_bvecs) > + ctx->buf_table.imu_cache.elem_size || + !io_alloc_cache_put(&ctx->buf_table.imu_cache, imu)) + kvfree(imu); } } -struct io_rsrc_node *io_rsrc_node_alloc(int type) +struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type) { struct io_rsrc_node *node; - node = kzalloc(sizeof(*node), GFP_KERNEL); + if (type == IORING_RSRC_FILE) + node = kmalloc(sizeof(*node), GFP_KERNEL); + else + node = io_cache_alloc(&ctx->buf_table.node_cache, GFP_KERNEL, NULL); if (node) { node->type = type; node->refs = 1; + node->tag = 0; + node->file_ptr = 0; } return node; } -__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data) +static __cold void __io_rsrc_data_free(struct io_rsrc_data *data) +{ + kvfree(data->nodes); + data->nodes = NULL; + data->nr = 0; +} + +__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, + struct io_rsrc_data *data) { if (!data->nr) return; @@ -141,9 +157,7 @@ __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data if (data->nodes[data->nr]) io_put_rsrc_node(ctx, data->nodes[data->nr]); } - kvfree(data->nodes); - data->nodes = NULL; - data->nr = 0; + __io_rsrc_data_free(data); } __cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr) @@ -157,6 +171,31 @@ __cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr) return -ENOMEM; } +static __cold int io_rsrc_buffer_alloc(struct io_buf_table *table, unsigned nr) +{ + int ret; + + ret = io_rsrc_data_alloc(&table->data, nr); + if (ret) + return ret; + + ret = io_alloc_cache_init(&table->node_cache, nr, + sizeof(struct io_rsrc_node)); + if (ret) + goto out_1; + + ret = io_alloc_cache_init(&table->imu_cache, nr, 512); + if (ret) + goto out_2; + + return 0; +out_2: + io_alloc_cache_free(&table->node_cache, kfree); +out_1: + __io_rsrc_data_free(&table->data); + return ret; +} + static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_rsrc_update2 *up, unsigned nr_args) @@ -206,7 +245,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) { err = -ENOMEM; fput(file); @@ -466,6 +505,8 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node) case IORING_RSRC_KBUF: if (node->buf) io_buffer_unmap(ctx, node); + if (io_alloc_cache_put(&ctx->buf_table.node_cache, node)) + return; break; default: WARN_ON_ONCE(1); @@ -534,7 +575,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; } ret = -ENOMEM; - node = io_rsrc_node_alloc(IORING_RSRC_FILE); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE); if (!node) { fput(file); goto fail; @@ -554,11 +595,19 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, return ret; } +static void io_rsrc_buffer_free(struct io_ring_ctx *ctx, + struct io_buf_table *table) +{ + io_rsrc_data_free(ctx, &table->data); + io_alloc_cache_free(&table->node_cache, kfree); + io_alloc_cache_free(&table->imu_cache, kfree); +} + int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) { if (!ctx->buf_table.data.nr) return -ENXIO; - io_rsrc_data_free(ctx, &ctx->buf_table.data); + io_rsrc_buffer_free(ctx, &ctx->buf_table); return 0; } @@ -739,7 +788,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, if (!iov->iov_base) return NULL; - node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!node) return ERR_PTR(-ENOMEM); node->buf = NULL; @@ -759,7 +808,10 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx, coalesced = io_coalesce_buffer(&pages, &nr_pages, &data); } - imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + if (struct_size(imu, bvec, nr_pages) > ctx->buf_table.imu_cache.elem_size) + imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + else + imu = io_cache_alloc(&ctx->buf_table.imu_cache, GFP_KERNEL, NULL); if (!imu) goto done; @@ -805,9 +857,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, unsigned int nr_args, u64 __user *tags) { struct page *last_hpage = NULL; - struct io_rsrc_data data; struct iovec fast_iov, *iov = &fast_iov; const struct iovec __user *uvec; + struct io_buf_table table; int i, ret; BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); @@ -816,13 +868,14 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, return -EBUSY; if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) return -EINVAL; - ret = io_rsrc_data_alloc(&data, nr_args); + ret = io_rsrc_buffer_alloc(&table, nr_args); if (ret) return ret; if (!arg) memset(iov, 0, sizeof(*iov)); + ctx->buf_table = table; for (i = 0; i < nr_args; i++) { struct io_rsrc_node *node; u64 tag = 0; @@ -862,10 +915,8 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, } node->tag = tag; } - data.nodes[i] = node; + table.data.nodes[i] = node; } - - ctx->buf_table.data = data; if (ret) io_sqe_buffers_unregister(ctx); return ret; @@ -878,11 +929,14 @@ static struct io_rsrc_node *io_buffer_alloc_node(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu; struct io_rsrc_node *node; - node = io_rsrc_node_alloc(IORING_RSRC_KBUF); + node = io_rsrc_node_alloc(ctx, IORING_RSRC_KBUF); if (!node) return NULL; - imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL); + if (struct_size(imu, bvec, nr_bvecs) > ctx->buf_table.imu_cache.elem_size) + imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL); + else + imu = io_cache_alloc(&ctx->buf_table.imu_cache, GFP_KERNEL, NULL); if (!imu) { io_put_rsrc_node(ctx, node); return NULL; @@ -1036,7 +1090,7 @@ static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2) static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx, struct io_uring_clone_buffers *arg) { - struct io_rsrc_data data; + struct io_buf_table table; int i, ret, off, nr; unsigned int nbufs; @@ -1067,7 +1121,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) return -EOVERFLOW; - ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.data.nr)); + ret = io_rsrc_buffer_alloc(&table, max(nbufs, ctx->buf_table.data.nr)); if (ret) return ret; @@ -1076,7 +1130,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx struct io_rsrc_node *src_node = ctx->buf_table.data.nodes[i]; if (src_node) { - data.nodes[i] = src_node; + table.data.nodes[i] = src_node; src_node->refs++; } } @@ -1106,7 +1160,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx if (!src_node) { dst_node = NULL; } else { - dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER); + dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER); if (!dst_node) { ret = -ENOMEM; goto out_free; @@ -1115,12 +1169,12 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx refcount_inc(&src_node->buf->refs); dst_node->buf = src_node->buf; } - data.nodes[off++] = dst_node; + table.data.nodes[off++] = dst_node; i++; } /* - * If asked for replace, put the old table. data->nodes[] holds both + * If asked for replace, put the old table. table.data->nodes[] holds both * old and new nodes at this point. */ if (arg->flags & IORING_REGISTER_DST_REPLACE) @@ -1133,10 +1187,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx * entry). */ WARN_ON_ONCE(ctx->buf_table.data.nr); - ctx->buf_table.data = data; + ctx->buf_table = table; return 0; out_free: - io_rsrc_data_free(ctx, &data); + io_rsrc_buffer_free(ctx, &table); return ret; } diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index d1d90d9cd2b43..759ac373b0dc6 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -46,7 +46,7 @@ struct io_imu_folio_data { unsigned int nr_folios; }; -struct io_rsrc_node *io_rsrc_node_alloc(int type); +struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type); void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node); void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data); int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);