From patchwork Sun Apr 30 09:35:23 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227168 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id D4A1FC77B60 for ; Sun, 30 Apr 2023 09:37:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229596AbjD3Jhk (ORCPT ); Sun, 30 Apr 2023 05:37:40 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55438 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229774AbjD3Jhj (ORCPT ); Sun, 30 Apr 2023 05:37:39 -0400 Received: from mail-wm1-x32d.google.com (mail-wm1-x32d.google.com [IPv6:2a00:1450:4864:20::32d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 916D410EC for ; Sun, 30 Apr 2023 02:37:37 -0700 (PDT) Received: by mail-wm1-x32d.google.com with SMTP id 5b1f17b1804b1-3f1728c2a57so14174645e9.0 for ; Sun, 30 Apr 2023 02:37:37 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847456; x=1685439456; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=PODfE/eB9roMtLo3DHpW/qaU1lJNg5kkSWiXbfO6TOU=; b=flAMcMU4+jmIVU6lXBEo4WBi4VY3NJRr/DoqFw/QwRnm49q+fUECOPVuZWUek/yn9f tlhDNtRBL5p3Eap2ZsasjeAUdkjc3WJkjs7Ih03DY+hmoTiHQ77qwJWblHwSjwUtuEbl gTEkdfBELXXk+a/IkW0HDCMxcUv1on/npyE+P/6z9Y8twitonMamk/X+ZCtUjwGk288e SS+VUN/UAwPfc391XpxZf287nUuFqg7YR25tEF+mD3oVXV/uMlQqih3IEqU4Ct7sRRde TKiaCQSg0HuYGQYWchXgRS6oBXXIgNhyNjYHjcHcazdbmRXMZJLsRzDN2etY7IRI4pfS 2xdw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847456; x=1685439456; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=PODfE/eB9roMtLo3DHpW/qaU1lJNg5kkSWiXbfO6TOU=; b=Gfmx54DVeKxvyjN/3Sv+i+j3J+lN+87Bckw/AddP4C5i/4kNiyjP4suC48PwnCeXTp ogEkL6tIkmxX/LQvTOCHdDqTGHX7lI26wnTI7tgxJf40kgpYw6E+9EXwiOpGwDev3bI6 C1MlPSjebMLi1JCGKM5BDjc3f/SzMwWr+l7lcmEMpXe9cBHTIVRxinYnatOMBVoTJEmP vuvQu81tj8+0qy2Mq8NIuSLKgK3Oo6Pr7Ow0qclLWk3bsGyOLfZ3c8POeKf1WInKvmOZ C35VS/EgsJjJwsMlAA2CQ48zephIU38j1QEZ/e1UVp/QKkQgOE+4yFUUuybD5ZrTTR0W nETQ== X-Gm-Message-State: AC+VfDydO3HvZ7CIcpIgagpklze97+00LlKDHWld5X26QbROq/r6bDE+ uwlhZrjfw1duS+npfKCBVeDC7597PPQ= X-Google-Smtp-Source: ACHHUZ5n6eIketr0VlgaJ5dQMa3OgFu6PrnZUh9LYZGFdeD5T3s3yOTZrq39Jt8RZwTjGaFzgoRhQg== X-Received: by 2002:a1c:4b12:0:b0:3f0:9f9b:1665 with SMTP id y18-20020a1c4b12000000b003f09f9b1665mr8248958wma.3.1682847455808; Sun, 30 Apr 2023 02:37:35 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.35 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:35 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 1/7] io_uring: add io_mapped_ubuf caches Date: Sun, 30 Apr 2023 10:35:23 +0100 Message-Id: <72f0d52521c35c1aee2d1327a3c1cd4ccb0bc6c3.1682701588.git.asml.silence@gmail.com> X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org We'll be allocating lots of io_mapped_ubuf shortly, add caches Signed-off-by: Pavel Begunkov --- include/linux/io_uring_types.h | 2 ++ io_uring/io_uring.c | 9 ++++++++ io_uring/rsrc.c | 39 ++++++++++++++++++++++++++++++++-- io_uring/rsrc.h | 6 +++++- 4 files changed, 53 insertions(+), 3 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 1b2a20a42413..3d103a00264c 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -336,6 +336,8 @@ struct io_ring_ctx { struct wait_queue_head rsrc_quiesce_wq; unsigned rsrc_quiesce; + struct io_alloc_cache reg_buf_cache; + struct list_head io_buffers_pages; #if defined(CONFIG_UNIX) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3d43df8f1e4e..fdd62dbfd0ba 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -312,6 +312,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->io_buffers_cache); io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_rsrc_node)); + io_alloc_cache_init(&ctx->reg_buf_cache, IO_NODE_ALLOC_CACHE_MAX, + sizeof(struct io_async_msghdr)); io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, sizeof(struct async_poll)); io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, @@ -2827,6 +2829,11 @@ static void io_rsrc_node_cache_free(struct io_cache_entry *entry) kfree(container_of(entry, struct io_rsrc_node, cache)); } +static void io_reg_buf_cache_free(struct io_cache_entry *entry) +{ + kvfree(container_of(entry, struct io_mapped_ubuf, cache)); +} + static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_sq_thread_finish(ctx); @@ -2865,6 +2872,8 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); io_alloc_cache_free(&ctx->rsrc_node_cache, io_rsrc_node_cache_free); + io_alloc_cache_free(&ctx->reg_buf_cache, io_reg_buf_cache_free); + if (ctx->mm_account) { mmdrop(ctx->mm_account); ctx->mm_account = NULL; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index ddee7adb4006..fef94f8d788d 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -33,6 +33,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, #define IORING_MAX_FIXED_FILES (1U << 20) #define IORING_MAX_REG_BUFFERS (1U << 14) +#define IO_BUF_CACHE_MAX_BVECS 64 + int __io_account_mem(struct user_struct *user, unsigned long nr_pages) { unsigned long page_limit, cur_pages, new_pages; @@ -78,6 +80,39 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) return 0; } +static void io_put_reg_buf(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) +{ + lockdep_assert_held(&ctx->uring_lock); + + if ((imu->max_bvecs != IO_BUF_CACHE_MAX_BVECS) || + !io_alloc_cache_put(&ctx->reg_buf_cache, &imu->cache)) + kvfree(imu); +} + +static struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, + int nr_bvecs) +{ + struct io_cache_entry *entry; + struct io_mapped_ubuf *imu; + + lockdep_assert_held(&ctx->uring_lock); + + if (nr_bvecs > IO_BUF_CACHE_MAX_BVECS) { +do_alloc: + imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL); + if (!imu) + return NULL; + } else { + nr_bvecs = IO_BUF_CACHE_MAX_BVECS; + entry = io_alloc_cache_get(&ctx->reg_buf_cache); + if (!entry) + goto do_alloc; + imu = container_of(entry, struct io_mapped_ubuf, cache); + } + imu->max_bvecs = nr_bvecs; + return imu; +} + static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, void __user *arg, unsigned index) { @@ -137,7 +172,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); - kvfree(imu); + io_put_reg_buf(ctx, imu); } *slot = NULL; } @@ -1134,7 +1169,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, } } - imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + imu = io_alloc_reg_buf(ctx, nr_pages); if (!imu) goto done; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 0a8a95e9b99e..f34de451a79a 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -50,9 +50,13 @@ struct io_rsrc_node { }; struct io_mapped_ubuf { - u64 ubuf; + union { + struct io_cache_entry cache; + u64 ubuf; + }; u64 ubuf_end; unsigned int nr_bvecs; + unsigned int max_bvecs; unsigned long acct_pages; struct bio_vec bvec[]; }; From patchwork Sun Apr 30 09:35:24 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227170 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6C958C77B7C for ; Sun, 30 Apr 2023 09:37:42 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229774AbjD3Jhl (ORCPT ); Sun, 30 Apr 2023 05:37:41 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55440 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229791AbjD3Jhj (ORCPT ); Sun, 30 Apr 2023 05:37:39 -0400 Received: from mail-wm1-x32e.google.com (mail-wm1-x32e.google.com [IPv6:2a00:1450:4864:20::32e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1A7C510F5 for ; Sun, 30 Apr 2023 02:37:38 -0700 (PDT) Received: by mail-wm1-x32e.google.com with SMTP id 5b1f17b1804b1-3f1738d0d4cso7686795e9.1 for ; Sun, 30 Apr 2023 02:37:38 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847456; x=1685439456; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=k2vxHCZ2OuX71vQYdRJrfXQvjrvCcUs/wO/UUdzYl7E=; b=sJRgjvbcGr58l5wmaZKYxF3uhClScVRDWB4wUcQscLShTx1n8IcUjEoTMsHbqxMFvJ qgnF/Yn30dsib17ok/MvfeDe2u2d+hRW5167RKvB99IZwrBk3P2LgSm4WIefVFSNfkhJ HU9GGww0KF7NYhjost7cW0qO+ZfU8btpXRZHJAROVSUew5tKtOoaAZz6EdF1UWEW9MPF oHLpdC7pZH6wrawq558mcCESxnhn/5X2SHcd2oLl82+8Xb0x3UVlS7ofjQW/MJ9Rl4k8 zpfw7h0BOGPPv8wEmO9JgYlQxeh3jr27iw8LYTCXY4Sku/0Y0jbFDycYJbKfAOYJ2HGG HW6Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847456; x=1685439456; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=k2vxHCZ2OuX71vQYdRJrfXQvjrvCcUs/wO/UUdzYl7E=; b=WWT8cvK/xK3uJx9R1yTI+JVx+imbcm+p8mvgzgTIhLfhHvQREJ+1H2FF16906SbHYK 6PHCWiJm1ag+GS54c00MJmR2KN1bPchj/qSIE/3pVCOePDdRWFpVp1oZs96AgeKEDx/Y OIVZW9EokvhU6pkovGP5SVaJ27eXmUF04Zbq25qhw8hhQXkS/LiQ2o4IgaUwPnlLN/Oi RrIuO2bC7yOla1QnsOEqMeB1+R3LlvM6k1N844+Nje/toNVHG0JrsOSwp5RrKc1RUkaE WqG037GVoJjnsQFal3MI83hP2xbPtmg5yU5W/MNETZMWP2mShog94qPgjxnwe4pplAKh PBEQ== X-Gm-Message-State: AC+VfDzU++Faox5IB5vIYlMXrvRNZjTec/u1jDDgy/EzOlflxfTDYyXL S6gBJ+U++RpJhg9Zz4dE+ITeyoNFFSU= X-Google-Smtp-Source: ACHHUZ7UuoyUsMkIvWgL0+58z5czdu/3mGEgXzt3chNRzzOoJEC/Ey+do/f+/UP6oBJ417J9rem9XA== X-Received: by 2002:a7b:ce07:0:b0:3f1:70e6:df66 with SMTP id m7-20020a7bce07000000b003f170e6df66mr8263037wmc.36.1682847456484; Sun, 30 Apr 2023 02:37:36 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.35 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:36 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 2/7] io_uring: add reg-buffer data directions Date: Sun, 30 Apr 2023 10:35:24 +0100 Message-Id: <01cf42c097ca12984b6dbe01407319b05b123824.1682701588.git.asml.silence@gmail.com> X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org There will be buffers that only allow reading from or writing to it, so add data directions, and check it when importing a buffer. Signed-off-by: Pavel Begunkov --- io_uring/rsrc.c | 5 +++++ io_uring/rsrc.h | 1 + 2 files changed, 6 insertions(+) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index fef94f8d788d..b6305ae3538c 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1185,6 +1185,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, imu->ubuf = (unsigned long) iov->iov_base; imu->ubuf_end = imu->ubuf + iov->iov_len; imu->nr_bvecs = nr_pages; + imu->dir_mask = (1U << ITER_SOURCE) | (1U << ITER_DEST); *pimu = imu; ret = 0; @@ -1274,6 +1275,8 @@ int io_import_fixed(int ddir, struct iov_iter *iter, u64 buf_end; size_t offset; + BUILD_BUG_ON((1U << ITER_SOURCE) & (1U << ITER_DEST)); + if (WARN_ON_ONCE(!imu)) return -EFAULT; if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) @@ -1281,6 +1284,8 @@ int io_import_fixed(int ddir, struct iov_iter *iter, /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) return -EFAULT; + if (unlikely(!((1U << ddir) & imu->dir_mask))) + return -EFAULT; /* * Might not be a start of buffer, set size appropriately diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index f34de451a79a..10daa25d9194 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -57,6 +57,7 @@ struct io_mapped_ubuf { u64 ubuf_end; unsigned int nr_bvecs; unsigned int max_bvecs; + unsigned int dir_mask; unsigned long acct_pages; struct bio_vec bvec[]; }; From patchwork Sun Apr 30 09:35:25 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227169 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A44DDC7EE21 for ; Sun, 30 Apr 2023 09:37:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229791AbjD3Jhm (ORCPT ); Sun, 30 Apr 2023 05:37:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55446 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229904AbjD3Jhk (ORCPT ); Sun, 30 Apr 2023 05:37:40 -0400 Received: from mail-wr1-x431.google.com (mail-wr1-x431.google.com [IPv6:2a00:1450:4864:20::431]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 316D5199F for ; Sun, 30 Apr 2023 02:37:39 -0700 (PDT) Received: by mail-wr1-x431.google.com with SMTP id ffacd0b85a97d-2f95231618aso798243f8f.1 for ; Sun, 30 Apr 2023 02:37:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847457; x=1685439457; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=MzBA59yYyYkKW9Q9sRIrnqWt/01emGF0gXOuXVIPvFA=; b=BF2RK0RPKWkVkaz5oc5sTryOFI2S4kaI7WpQ+ic/WSiEyzxfI030A/Li1ZrD0mlsLh 25vuq1YhABsS6ahEBScW83LelfRxhSVpqQn4DR62gu8ZN/f63donFw/aweDrBj1bkJ8q sCrXyBtcohUBvzbRr94d7AOIEQUqXxjuIw1Hz49BctU8XSajMYPfhzOBJU3hMuIPMfAl CV11/01QiR/7sl/SNGXTF95JJ4OOl4UexhdUaWhzUOSkbUQCDHsQGI2ZH5cXtjAHAcrI tNouw2S+5gxvC8Sq6JjOextEt9vpqJ1zeEID0LZX06DnV7TY8UZiae+Gdw7b3DUAmCiC k0Ig== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847457; x=1685439457; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=MzBA59yYyYkKW9Q9sRIrnqWt/01emGF0gXOuXVIPvFA=; b=FuvZT/SBH6SHSfpLggGjyHb4/KFNCztU58W7E43Krq1lIaTzyVkORfHFDi2sIQRY3+ 6D4gbF2hefdWZaxHfA9WtmMShHpdrZTsxq8xnvkNyNZsOoTCCOWPNugLHNgiEVhnAf9S JYZyOtAlbS8L/YZJpUWdiOGhCwzLdBenWjiUg1kWcnd6tjG6bXFFENLdm/2R7zCWlr6X r1nRIKC+7zh58A8ryl70FoaUz1r2cUuckumHbKZx4Ndqp3lEpaa1kWUBYadOQ7koJ5DE kDC0I+4UoQSbFGqIRDIuWUNrt5oE8ebJoCGEH/A5nuo+Sx015vaIgPx00r+pHNONG7Gm 8DTw== X-Gm-Message-State: AC+VfDzQ5MfvTt3KReRSaOWrq+tIhjVVQZ695n6+MUpr9oLVNKjsp+Ap ZUpHSpZBeFLHQj2MqEsPN3VDAAySE00= X-Google-Smtp-Source: ACHHUZ5Z3yR1+FAsPqItLSw5LVAMWb397YdFg7m32ihLCi0zadX+bzCKFCN4HYqPFBUmVzGS3TnTwA== X-Received: by 2002:adf:ef52:0:b0:306:b48:3fc4 with SMTP id c18-20020adfef52000000b003060b483fc4mr2903484wrp.31.1682847457345; Sun, 30 Apr 2023 02:37:37 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.36 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:37 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 3/7] io_uring: fail loop_rw_iter with pure bvec bufs Date: Sun, 30 Apr 2023 10:35:25 +0100 Message-Id: X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org There will be registered buffers that have never had a userspace mapping and to use them the file have to work with iterators. Fail loop_rw_iter() if it meets such a buffer. Signed-off-by: Pavel Begunkov --- io_uring/rw.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/io_uring/rw.c b/io_uring/rw.c index 6c7d2654770e..b2ad99e0e304 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -428,11 +428,18 @@ static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) */ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter) { + struct io_kiocb *req = cmd_to_io_kiocb(rw); struct kiocb *kiocb = &rw->kiocb; struct file *file = kiocb->ki_filp; ssize_t ret = 0; loff_t *ppos; + if (req->opcode == IORING_OP_READ_FIXED || + req->opcode == IORING_OP_WRITE_FIXED) { + if (!req->imu->ubuf) + return -EFAULT; + } + /* * Don't support polled IO through this interface, and we can't * support non-blocking either. For the latter, this just causes From patchwork Sun Apr 30 09:35:26 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227171 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AE3BEC77B73 for ; Sun, 30 Apr 2023 09:37:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230165AbjD3Jhm (ORCPT ); Sun, 30 Apr 2023 05:37:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55456 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230159AbjD3Jhl (ORCPT ); Sun, 30 Apr 2023 05:37:41 -0400 Received: from mail-wm1-x32d.google.com (mail-wm1-x32d.google.com [IPv6:2a00:1450:4864:20::32d]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D60EE10E7 for ; Sun, 30 Apr 2023 02:37:39 -0700 (PDT) Received: by mail-wm1-x32d.google.com with SMTP id 5b1f17b1804b1-3f3331f928cso5641385e9.2 for ; Sun, 30 Apr 2023 02:37:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847458; x=1685439458; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=O2cYuHgnWZu3gBAdMg79GUuiwYo9/wRt2GmSInH2TC8=; b=giMHTccZkrsVwm2bPGHOsUiDOKEvfXvlRWK5XNxK3RtjZlnDARVrDyACUhZEH5Z0Ex gJnCtPrTETXh6Dujb+/CREicFRwYtwRqPH+XTBDH9LuDGpL+J9zLKGkxYgR+0DD6jl6V gLy9cb2qVTFVJ6QK04kCLvEc1UN4IXS/kP3YFI6UdQYWKVx7f1S7VF9slbeMaP3k3PiF +JBb7ABLWdNJZiLyNXn0BY2fGWa9N1J9xIr8HAHpWkCQMff/EdQ2y8FEsqncMxFFOMDn 0gHAvfdUWwIKOaLt+UoGehx07UFrwGwvwPMC14x9bgh03TPHwylyZcppcK7n2tQJmGGk PcsA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847458; x=1685439458; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=O2cYuHgnWZu3gBAdMg79GUuiwYo9/wRt2GmSInH2TC8=; b=Zwl58YeJmt/r0UhKaAi/ACzsCK0rSDDd0wsV+pn8QIXzEBRBILasNt19xEk5M/wMBe Tq49sWIvp7VDrj8KiOnvY1u8sGXQtX335Z5jYoT06x8TXIejz8e34OyaY5ZV5GestUcu 24gMo6Tx2au6cRJrtIIgPrEM1qbTCJXKBBOH/PNzI42oS0O2db/1SXOsiMnqMZrnfwhe XuApj9BgflyHHn+ko/8KnuJvoliKaOGa8v8PmrC1w9s8fXcJd0Rx0A7mJKtHiuLG3FA9 upsVGeI6fPPhc+r6D33GXfyejsrvCvVISIhZYHfv/XUB/R5rjeunmywkdi6q5TG7odyK 5Hig== X-Gm-Message-State: AC+VfDz3e5VDbMbue7kZVCSXJU/56dlCeFAQfsJKd8+qZNj+a7jiAxdd J/49mrMrJ8plsEzbYY2K0qul09PrjHA= X-Google-Smtp-Source: ACHHUZ676VdVLcIy8GwGxy/HOom5QtXOXqNuvMy2W0ynzUaxl5aFKGfEBOx+vJt7CZBU7QJdP4Li3w== X-Received: by 2002:a7b:c8c3:0:b0:3f1:6f53:7207 with SMTP id f3-20020a7bc8c3000000b003f16f537207mr7379933wml.17.1682847458124; Sun, 30 Apr 2023 02:37:38 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.37 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:37 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 4/7] io_uring/rsrc: introduce struct iou_buf_desc Date: Sun, 30 Apr 2023 10:35:26 +0100 Message-Id: X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org Add struct iou_buf_desc, which will be used for new get_buf operations. It'll be handed over to a file with via new operation to be filled. After the content should eventually end up in struct io_mapped_ubuf, and so to not make extra copies just place the descriptor inside struct io_mapped_ubuf. Signed-off-by: Pavel Begunkov --- include/linux/io_uring.h | 6 ++++++ io_uring/rsrc.c | 13 +++++++------ io_uring/rsrc.h | 11 +++++------ 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 35b9328ca335..fddb5d52b776 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -22,6 +22,12 @@ enum io_uring_cmd_flags { IO_URING_F_IOPOLL = (1 << 10), }; +struct iou_buf_desc { + unsigned nr_bvecs; + unsigned max_bvecs; + struct bio_vec *bvec; +}; + struct io_uring_cmd { struct file *file; const void *cmd; diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index b6305ae3538c..0edcebb6b5cb 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -84,7 +84,7 @@ static void io_put_reg_buf(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) { lockdep_assert_held(&ctx->uring_lock); - if ((imu->max_bvecs != IO_BUF_CACHE_MAX_BVECS) || + if ((imu->desc.max_bvecs != IO_BUF_CACHE_MAX_BVECS) || !io_alloc_cache_put(&ctx->reg_buf_cache, &imu->cache)) kvfree(imu); } @@ -109,7 +109,8 @@ static struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, goto do_alloc; imu = container_of(entry, struct io_mapped_ubuf, cache); } - imu->max_bvecs = nr_bvecs; + imu->desc.bvec = imu->bvec; + imu->desc.max_bvecs = nr_bvecs; return imu; } @@ -168,7 +169,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo unsigned int i; if (imu != ctx->dummy_ubuf) { - for (i = 0; i < imu->nr_bvecs; i++) + for (i = 0; i < imu->desc.nr_bvecs; i++) unpin_user_page(imu->bvec[i].bv_page); if (imu->acct_pages) io_unaccount_mem(ctx, imu->acct_pages); @@ -1020,7 +1021,7 @@ static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, for (i = 0; i < ctx->nr_user_bufs; i++) { struct io_mapped_ubuf *imu = ctx->user_bufs[i]; - for (j = 0; j < imu->nr_bvecs; j++) { + for (j = 0; j < imu->desc.nr_bvecs; j++) { if (!PageCompound(imu->bvec[j].bv_page)) continue; if (compound_head(imu->bvec[j].bv_page) == hpage) @@ -1184,7 +1185,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, /* store original address for later verification */ imu->ubuf = (unsigned long) iov->iov_base; imu->ubuf_end = imu->ubuf + iov->iov_len; - imu->nr_bvecs = nr_pages; + imu->desc.nr_bvecs = nr_pages; imu->dir_mask = (1U << ITER_SOURCE) | (1U << ITER_DEST); *pimu = imu; ret = 0; @@ -1292,7 +1293,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter, * and advance us to the beginning. */ offset = buf_addr - imu->ubuf; - iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); + iov_iter_bvec(iter, ddir, imu->bvec, imu->desc.nr_bvecs, offset + len); if (offset) { /* diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 10daa25d9194..9ac10b3d25ac 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -54,12 +54,11 @@ struct io_mapped_ubuf { struct io_cache_entry cache; u64 ubuf; }; - u64 ubuf_end; - unsigned int nr_bvecs; - unsigned int max_bvecs; - unsigned int dir_mask; - unsigned long acct_pages; - struct bio_vec bvec[]; + u64 ubuf_end; + struct iou_buf_desc desc; + unsigned int dir_mask; + unsigned long acct_pages; + struct bio_vec bvec[]; }; void io_rsrc_put_tw(struct callback_head *cb); From patchwork Sun Apr 30 09:35:27 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227172 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0393EC7EE24 for ; Sun, 30 Apr 2023 09:37:44 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230159AbjD3Jhn (ORCPT ); Sun, 30 Apr 2023 05:37:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55458 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229655AbjD3Jhl (ORCPT ); Sun, 30 Apr 2023 05:37:41 -0400 Received: from mail-wm1-x334.google.com (mail-wm1-x334.google.com [IPv6:2a00:1450:4864:20::334]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 6F97210EC for ; Sun, 30 Apr 2023 02:37:40 -0700 (PDT) Received: by mail-wm1-x334.google.com with SMTP id 5b1f17b1804b1-3f20215fa70so8030395e9.0 for ; Sun, 30 Apr 2023 02:37:40 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847459; x=1685439459; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=F209vZkGL7kWoLxpVYvPls6K5IzM+99Cmjxg/T/04lo=; b=haeamIQPBbE13H+Y3mgH7GI0iOxj5Dqko3U9g5OAuflPsFJvnSWRH2oEDeMqqb2WP5 NdhJ9LElYph+v8NkdxN1csMZeVpQgjNridFe16SKGkKhFYqpYzUGxyp/MNRBbdlYqxYZ omJ5WKn4xmuO2fHyH3UGYFC+13V5V6RlI6PJlUcWVavIhaX/hIFtZCvRGaRe/V6tNKwv WWKERt4mVooFZ4ZsU6yguLKmaHEeaqxt75T7PyeQRmFrGwomuKIGez5m0Cj5DwTi73sw q2Yp96tKlVeJ24Y6WpwGSmf8V046ZRpTe3oQYiOy6PP/kE7pk2EeuM8lebG+CM7HDu/p 5O0w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847459; x=1685439459; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=F209vZkGL7kWoLxpVYvPls6K5IzM+99Cmjxg/T/04lo=; b=k3fu1jYW8f6anxQGkBC6w6ncdN0Fxh1vg76q+rredMFmjrVjPFgofMZ+8S3hmLKsP5 67ed8ln/58+mWlQ9YHdFRTFOc8mIUHEiao7qo2+nkhFrWPg0Om4g4goPY/t3M4pbkRy1 AVzDezsPHXP8JzEi5ryOiWaulxMVzChwtC0CQBGMmyllhTBdxEzmYWA7rIDgZF8STDRs 8nCphEpNodD25HT2XS+Ak6fkUM+F/q8yvl7Ob3Kuj0H2VrgyfJW+/obDj/45Dx4PejWe DLXkBLqYrzyd2Rs5EFIT+td8eTk+brSNQUCTlMfKb1uYaRTB3Y1THAEGBeguvcY00xDG zncw== X-Gm-Message-State: AC+VfDyRSG1AwGCotGkE8nsMr/t8b6hvLO5x8D4kHPBpZvoi58sS5mDM /c7AE+ZghoGgcOR/HCD+8JAnlasOCuQ= X-Google-Smtp-Source: ACHHUZ7+jd5LTO/UOWXTrnCbWwvtZbvGUdlaJvQ5kKGDnjnPvb3/8MpXnD1YbmYwsiA7sUWoajvEBw== X-Received: by 2002:a05:600c:3789:b0:3f2:4f24:5f61 with SMTP id o9-20020a05600c378900b003f24f245f61mr7893618wmr.38.1682847458860; Sun, 30 Apr 2023 02:37:38 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.38 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:38 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 5/7] io_uring/rsrc: add buffer release callbacks Date: Sun, 30 Apr 2023 10:35:27 +0100 Message-Id: X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org With other buffer types we may want to have a different way of releasing them. For example, we may want to put pages in some kind of cache or bulk put them. Add a release callback. Signed-off-by: Pavel Begunkov --- include/linux/io_uring.h | 2 ++ io_uring/rsrc.c | 14 ++++++++++---- io_uring/rsrc.h | 5 +++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index fddb5d52b776..e0e7df5beefc 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -26,6 +26,8 @@ struct iou_buf_desc { unsigned nr_bvecs; unsigned max_bvecs; struct bio_vec *bvec; + void (*release)(struct iou_buf_desc *); + void *private; }; struct io_uring_cmd { diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 0edcebb6b5cb..3799470fd45e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -111,6 +111,8 @@ static struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, } imu->desc.bvec = imu->bvec; imu->desc.max_bvecs = nr_bvecs; + imu->desc.private = NULL; + imu->desc.release = NULL; return imu; } @@ -169,10 +171,14 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slo unsigned int i; if (imu != ctx->dummy_ubuf) { - for (i = 0; i < imu->desc.nr_bvecs; i++) - unpin_user_page(imu->bvec[i].bv_page); - if (imu->acct_pages) - io_unaccount_mem(ctx, imu->acct_pages); + if (imu->desc.release) { + io_reg_buf_release(imu); + } else { + for (i = 0; i < imu->desc.nr_bvecs; i++) + unpin_user_page(imu->bvec[i].bv_page); + if (imu->acct_pages) + io_unaccount_mem(ctx, imu->acct_pages); + } io_put_reg_buf(ctx, imu); } *slot = NULL; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 9ac10b3d25ac..29ce9a8a2277 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -169,4 +169,9 @@ static inline void __io_unaccount_mem(struct user_struct *user, atomic_long_sub(nr_pages, &user->locked_vm); } +static inline void io_reg_buf_release(struct io_mapped_ubuf *imu) +{ + imu->desc.release(&imu->desc); +} + #endif From patchwork Sun Apr 30 09:35:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227173 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A3F4DC7EE21 for ; Sun, 30 Apr 2023 09:37:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229655AbjD3Jhp (ORCPT ); Sun, 30 Apr 2023 05:37:45 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55480 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230163AbjD3Jhm (ORCPT ); Sun, 30 Apr 2023 05:37:42 -0400 Received: from mail-wm1-x32b.google.com (mail-wm1-x32b.google.com [IPv6:2a00:1450:4864:20::32b]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 8DD4A2693 for ; Sun, 30 Apr 2023 02:37:41 -0700 (PDT) Received: by mail-wm1-x32b.google.com with SMTP id 5b1f17b1804b1-3f173af665fso7655995e9.3 for ; Sun, 30 Apr 2023 02:37:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847460; x=1685439460; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=/I+SXBlP5m5skW9dAnqAKLCKy57FKwiqD1z+msHPm2g=; b=PMbAC/g+BiMHyF893heaIHe7yxcmuLzyesy22WmK857xvQThW7+Zl7MGtGWcCWbgjo VnbMpvOKDf8V6WGOfL0Ed2qnL/xE352VGxFClU09gNDJ7LmDNhm/7B9setToYLjovq3R gUgIPO3JO3JOxheCgVnvH4PLhIzb64v7uenLqxZ77xiZ3zHhXFWddZo9NFAaGvVCY6ie eKJBxSPDU5MEBkXG/LqzkaZHAAdNh3iKi+I2A0+ubf9hKmYEZOqKgS4EfcX5BJHkQ6oF atnGhRtjlbV/zoSr+rSA6xnTfroyYOgd/cWUppbSK9EhoFQ868/Z5pj+E1jmohkjqlYt 9jug== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847460; x=1685439460; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=/I+SXBlP5m5skW9dAnqAKLCKy57FKwiqD1z+msHPm2g=; b=SgIh0u0VedMuV9G4QlUWFfiOH3WDEha4zy3hU6rwVOHyYTD1l9Q1NHLDr5pe1ULKd4 uc/zXI7HHC9Jq7bdCcii8I0U3Oyz1BpH2N/fpWa1ToOWKXDl4EQ/hl1r8v049Lv/Xqct AAYWeIpuAdwMcczz17fOuc+IGC3unycakyxxP2955FyOFU2kYDIdNADxPlaU2o0RIopd kSKCOg44Koy/yua6wMPJ2fnqJtozNwHNRwkzfJzQRQ/jznhMbgo0YnzuoOEyO0KC3Aw/ TNS4txN5PajwLSvlb3/bGa3mHYxvLQ2CXAysqItD8tuvpi7pml+QTRZ4YimhjaXo0XIU hvfg== X-Gm-Message-State: AC+VfDxWzzEQtL7Rona0plfGMhlYKWG25jIAzsryYg/bG3UPRIWlC0PM pawPOvp00QzAoR98shWmxQFNEnOKEdY= X-Google-Smtp-Source: ACHHUZ4fs7cxek3ysOf9I8NDt0Gh1fnihK6DstK4skxwa2H//clRrDp6/QffkCCn9q965n7sLTxxJg== X-Received: by 2002:a7b:c5d9:0:b0:3ed:b048:73f4 with SMTP id n25-20020a7bc5d9000000b003edb04873f4mr7553794wmk.5.1682847459670; Sun, 30 Apr 2023 02:37:39 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.38 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:39 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 6/7] io_uring/rsrc: introduce helper installing one buffer Date: Sun, 30 Apr 2023 10:35:28 +0100 Message-Id: X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org Add a new helper called io_install_buffer(), which will be used later for operations willing to install buffers into the registered buffer table. Signed-off-by: Pavel Begunkov --- io_uring/rsrc.c | 15 +++++++++++++++ io_uring/rsrc.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3799470fd45e..db4286b42dce 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -517,6 +517,21 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, return done ? done : err; } +int io_install_buffer(struct io_ring_ctx *ctx, + struct io_mapped_ubuf *imu, + unsigned i) +{ + if (unlikely(i >= ctx->nr_user_bufs)) + return -EFAULT; + + i = array_index_nospec(i, ctx->nr_user_bufs); + if (unlikely(ctx->user_bufs[i] != ctx->dummy_ubuf)) + return -EINVAL; + + ctx->user_bufs[i] = imu; + return 0; +} + static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, struct io_uring_rsrc_update2 *up, unsigned nr_args) diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 29ce9a8a2277..aba95bdd060e 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -75,6 +75,9 @@ void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_unregister(struct io_ring_ctx *ctx); int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, unsigned int nr_args, u64 __user *tags); +int io_install_buffer(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu, + unsigned i); + void __io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, From patchwork Sun Apr 30 09:35:29 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Pavel Begunkov X-Patchwork-Id: 13227174 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DE1B2C77B60 for ; Sun, 30 Apr 2023 09:37:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230163AbjD3Jhq (ORCPT ); Sun, 30 Apr 2023 05:37:46 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55520 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230231AbjD3Jho (ORCPT ); Sun, 30 Apr 2023 05:37:44 -0400 Received: from mail-wr1-x431.google.com (mail-wr1-x431.google.com [IPv6:2a00:1450:4864:20::431]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 5B61D2735 for ; Sun, 30 Apr 2023 02:37:42 -0700 (PDT) Received: by mail-wr1-x431.google.com with SMTP id ffacd0b85a97d-2fa36231b1cso794699f8f.2 for ; Sun, 30 Apr 2023 02:37:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20221208; t=1682847460; x=1685439460; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=WmuW/MpO/9IxCXcGhDL4XUbVtFJQiN1vt8nHVjwm53I=; b=VlLFbbxmDuQ6Fjd0ikAmdy7s6UvkS0rN6f+mgOf1Htv4UdYNYWBG42dTnQuDYP1te+ RmKCsoqRgnJ+dmNBLWFEdN3+vSLcJDqVLltHss8CHBt8Q/egL5DP8+U4yGoyDk6h05qD yHA1M4nBbzivPL0bzVoMgwrend7qX3yUGOiRMesuIJgRjjREaGNkNXg6PRsdZl7iWzw+ w+I3SQw0dDypFkq9GID4WG7r7C5d4+YSMImCyv33FBd70/Vj5cak5pzBZdRKztPDaoEv aMnheTh1yNKTLBfX12XX5etV5xPq0IrOEg1bTuNuyBppcqkAgg5EpSza1b4UMk/P52ha w3Fg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20221208; t=1682847460; x=1685439460; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=WmuW/MpO/9IxCXcGhDL4XUbVtFJQiN1vt8nHVjwm53I=; b=ZbGce07NLGLYq8qVjePjEdQuzeFAsvEnk84vd1RYHu36BsYqAQxbJobpRFiVBtpL9V Zxh+TA4h8gECkwZWsLhv7ltgGYqFDaTqvJliKdVZ47yqxP3ALKZUr119yQbrj/ELyBMd 0Mdjk2yhccxdTkvpYyHrmhUpy8mHrV585AU/TzOUZMOdErjKobzDp+dbMXS0Z8v0imRW w5nHtrHMcB+Z78YWQ0PmhvYor7/uMZdL661RT0Ofzabw8S/6kwcUA3LqWB6lgLJeqG84 kB9nAomOFlMgc5vUjgvhOeaE4F4aD49LYa23+D5YUPVS7Ltl19nBiliweLb8e4OBYsE8 EdyQ== X-Gm-Message-State: AC+VfDzLTwx1WUlImiP5cp8Fqen5KzVRW+Vv53NYeXh5skVwxH5Dsyo8 tWGAETQ3ECGLAxva6kZf/92dUjIY14A= X-Google-Smtp-Source: ACHHUZ4tZoBlQD9+cVWg/k/xGqRcUKknFJe4nGxVR0QRWye9TM+8l3sjTOe9Q8E9IoIiTiVKWx87qw== X-Received: by 2002:a05:6000:1d1:b0:306:20eb:bedd with SMTP id t17-20020a05600001d100b0030620ebbeddmr2234265wrx.51.1682847460505; Sun, 30 Apr 2023 02:37:40 -0700 (PDT) Received: from 127.0.0.1localhost (188.31.116.198.threembb.co.uk. [188.31.116.198]) by smtp.gmail.com with ESMTPSA id u19-20020a05600c00d300b003f17eaae2c9sm29473170wmm.1.2023.04.30.02.37.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 30 Apr 2023 02:37:40 -0700 (PDT) From: Pavel Begunkov To: io-uring@vger.kernel.org Cc: asml.silence@gmail.com, ming.lei@redhat.com Subject: [RFC 7/7] io_uring,fs: introduce IORING_OP_GET_BUF Date: Sun, 30 Apr 2023 10:35:29 +0100 Message-Id: X-Mailer: git-send-email 2.40.0 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: io-uring@vger.kernel.org There are several problems with splice requests, aka IORING_OP_SPLICE: 1) They are always executed by a worker thread, which is a slow path, as we don't have any reliable way to execute it NOWAIT. 2) It can't easily poll for data, as there are 2 files it operates on. It would either need to track what file to poll or poll both of them, in both cases it'll be a mess and add lot of overhead. 3) It has to have pipes in the middle, which adds overhead and is not great from the uapi design perspective when it goes for io_uring requests. 4) We want to operate with spliced data as with a normal buffer, i.e. write / send / etc. data as normally while it's zerocopy. It can partially be solved, but the root cause is a suboptimal for io_uring design of IORING_OP_SPLICE. Introduce a new request type called IORING_OP_GET_BUF, inspired by splice(2) as well as other proposals like fused requests. The main idea is to use io_uring's registered buffers as the middle man instead of pipes. Once a buffer is fetched / spliced from a file using a new fops callback ->iou_get_buf, it's installed as a registered buffers and can be used by all operations supporting the feature. Once the userspace releases the buffer, io_uring will wait for all requests using the buffer to complete and then use a file provided callback ->release() to return the buffer back. It operates on the level of the entire buffer instead of individual pages like it's with splice(2). As it was noted by the fused cmd work from where it came, this approach should be more flexible and efficient, and also leaves the space for more optimisations like custom caching or avoiding page refcounting altogether. Signed-off-by: Pavel Begunkov --- include/linux/fs.h | 2 + include/linux/io_uring.h | 11 +++++ include/uapi/linux/io_uring.h | 1 + io_uring/opdef.c | 11 +++++ io_uring/rsrc.c | 2 +- io_uring/rsrc.h | 2 + io_uring/splice.c | 90 +++++++++++++++++++++++++++++++++++ io_uring/splice.h | 4 ++ 8 files changed, 122 insertions(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 475d88640d3d..a2528a39571f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1753,6 +1753,7 @@ struct dir_context { struct iov_iter; struct io_uring_cmd; +struct iou_get_buf_info; struct file_operations { struct module *owner; @@ -1798,6 +1799,7 @@ struct file_operations { int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags); int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *, unsigned int poll_flags); + int (*iou_get_buf)(struct file *file, struct iou_get_buf_info *); } __randomize_layout; struct inode_operations { diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index e0e7df5beefc..9564db555bab 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -30,6 +30,17 @@ struct iou_buf_desc { void *private; }; +enum { + IOU_GET_BUF_F_NOWAIT = 1, +}; + +struct iou_get_buf_info { + loff_t off; + size_t len; + unsigned flags; + struct iou_buf_desc *desc; +}; + struct io_uring_cmd { struct file *file; const void *cmd; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0716cb17e436..b244215d03ad 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -223,6 +223,7 @@ enum io_uring_op { IORING_OP_URING_CMD, IORING_OP_SEND_ZC, IORING_OP_SENDMSG_ZC, + IORING_OP_GET_BUF, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/opdef.c b/io_uring/opdef.c index cca7c5b55208..d3b7144c685a 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -428,6 +428,13 @@ const struct io_issue_def io_issue_defs[] = { .prep = io_eopnotsupp_prep, #endif }, + [IORING_OP_GET_BUF] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .prep = io_get_buf_prep, + .issue = io_get_buf, + }, }; @@ -648,6 +655,10 @@ const struct io_cold_def io_cold_defs[] = { .fail = io_sendrecv_fail, #endif }, + [IORING_OP_GET_BUF] = { + .name = "IORING_OP_GET_BUF", + .cleanup = io_get_buf_cleanup, + }, }; const char *io_uring_get_opcode(u8 opcode) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index db4286b42dce..bdcd417bca87 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -89,7 +89,7 @@ static void io_put_reg_buf(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu) kvfree(imu); } -static struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, +struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, int nr_bvecs) { struct io_cache_entry *entry; diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index aba95bdd060e..6aaf7acb60c5 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -177,4 +177,6 @@ static inline void io_reg_buf_release(struct io_mapped_ubuf *imu) imu->desc.release(&imu->desc); } +struct io_mapped_ubuf *io_alloc_reg_buf(struct io_ring_ctx *ctx, int nr_bvecs); + #endif diff --git a/io_uring/splice.c b/io_uring/splice.c index 2a4bbb719531..3d50334caec5 100644 --- a/io_uring/splice.c +++ b/io_uring/splice.c @@ -13,6 +13,7 @@ #include "io_uring.h" #include "splice.h" +#include "rsrc.h" struct io_splice { struct file *file_out; @@ -119,3 +120,92 @@ int io_splice(struct io_kiocb *req, unsigned int issue_flags) io_req_set_res(req, ret, 0); return IOU_OK; } + +struct io_get_buf { + struct file *file; + struct io_mapped_ubuf *imu; + int max_pages; + loff_t off; + u64 len; +}; + +void io_get_buf_cleanup(struct io_kiocb *req) +{ + struct io_get_buf *gb = io_kiocb_to_cmd(req, struct io_get_buf); + struct io_mapped_ubuf *imu = gb->imu; + + if (!imu) + return; + if (imu->desc.nr_bvecs && !WARN_ON_ONCE(!imu->desc.release)) + io_reg_buf_release(imu); + + kvfree(imu); + gb->imu = NULL; +} + +int io_get_buf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_get_buf *gb = io_kiocb_to_cmd(req, struct io_get_buf); + struct io_mapped_ubuf *imu; + int nr_pages; + + if (unlikely(sqe->splice_flags || sqe->splice_fd_in || sqe->ioprio || + sqe->addr || sqe->addr3)) + return -EINVAL; + + req->buf_index = READ_ONCE(sqe->buf_index); + gb->len = READ_ONCE(sqe->len); + gb->off = READ_ONCE(sqe->off); + nr_pages = (gb->len >> PAGE_SHIFT) + 2; + gb->max_pages = nr_pages; + + gb->imu = imu = io_alloc_reg_buf(req->ctx, nr_pages); + if (!imu) + return -ENOMEM; + imu->desc.nr_bvecs = 0; + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +int io_get_buf(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_get_buf *gb = io_kiocb_to_cmd(req, struct io_get_buf); + struct io_mapped_ubuf *imu = gb->imu; + struct iou_get_buf_info bi; + int ret, err; + + bi.off = gb->off; + bi.len = gb->len; + bi.flags = (issue_flags & IO_URING_F_NONBLOCK) ? IOU_GET_BUF_F_NOWAIT : 0; + bi.desc = &imu->desc; + + if (!gb->file->f_op->iou_get_buf) + return -ENOTSUPP; + ret = gb->file->f_op->iou_get_buf(gb->file, &bi); + if (ret < 0) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + goto done; + } + + imu->ubuf = 0; + imu->ubuf_end = ret; + imu->dir_mask = 1U << ITER_SOURCE; + imu->acct_pages = 0; + + io_ring_submit_lock(req->ctx, issue_flags); + err = io_install_buffer(req->ctx, imu, req->buf_index); + io_ring_submit_unlock(req->ctx, issue_flags); + if (unlikely(err)) { + ret = err; + goto done; + } + + gb->imu = NULL; + req->flags &= ~REQ_F_NEED_CLEANUP; +done: + if (ret != gb->len) + req_set_fail(req); + io_req_set_res(req, ret, 0); + return IOU_OK; +} diff --git a/io_uring/splice.h b/io_uring/splice.h index 542f94168ad3..2b923fc2bbf1 100644 --- a/io_uring/splice.h +++ b/io_uring/splice.h @@ -5,3 +5,7 @@ int io_tee(struct io_kiocb *req, unsigned int issue_flags); int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_splice(struct io_kiocb *req, unsigned int issue_flags); + +int io_get_buf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); +int io_get_buf(struct io_kiocb *req, unsigned int issue_flags); +void io_get_buf_cleanup(struct io_kiocb *req);