From patchwork Tue Jul 11 20:20:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309349 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A2EAFEB64DD for ; Tue, 11 Jul 2023 20:21:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230018AbjGKUV2 (ORCPT ); Tue, 11 Jul 2023 16:21:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42524 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229987AbjGKUVZ (ORCPT ); Tue, 11 Jul 2023 16:21:25 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 83BC210D4 for ; Tue, 11 Jul 2023 13:21:24 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=5FN0HiT2a6i6oQBEDqgGmkCDTbMkD96qK8TmIcdDOvU=; b=BzG7fNesDSv9f5c/jHckIpGYxr 1AjjHU4Ah6jva8EK1Xcg1/biocRXvpql2nETap2r1zt63XApMDK8MNuoMrkEge7p433afITqveoC3 Lh9PyA4r1OYCcI8kixo3vPG870zrKJVBOEc45zm+IEQoWThCsGK0JCfwO5zglXVRuurCxIV49tMb6 tlkYbLY36M0L1CpBIWxZD89hARza9OQ7yNWvq3+9zcyasAmecs0SbqtEGsAY8bVkyE7Y/8n3U9YG8 hOj3abiKh2aB7siI996/oscxpg5i29HTOtetWXWuMHJK8XmiAJaslK8w9KiTK8fuihRlHUcdFaRBh GJRAiejQ==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqr-00G1Q6-EC; Tue, 11 Jul 2023 20:20:49 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 1/9] Revert "tcp: Use per-vma locking for receive zerocopy" Date: Tue, 11 Jul 2023 21:20:39 +0100 Message-Id: <20230711202047.3818697-2-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org This reverts commit 7a7f094635349a7d0314364ad50bdeb770b6df4f. Reviewed-by: Suren Baghdasaryan --- MAINTAINERS | 1 - include/linux/net_mm.h | 17 ---------------- include/net/tcp.h | 1 - mm/memory.c | 7 +++---- net/ipv4/tcp.c | 45 ++++++++---------------------------------- 5 files changed, 11 insertions(+), 60 deletions(-) delete mode 100644 include/linux/net_mm.h diff --git a/MAINTAINERS b/MAINTAINERS index 18cd0ce2c7d2..00047800cff1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14816,7 +14816,6 @@ NETWORKING [TCP] M: Eric Dumazet L: netdev@vger.kernel.org S: Maintained -F: include/linux/net_mm.h F: include/linux/tcp.h F: include/net/tcp.h F: include/trace/events/tcp.h diff --git a/include/linux/net_mm.h b/include/linux/net_mm.h deleted file mode 100644 index b298998bd5a0..000000000000 --- a/include/linux/net_mm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifdef CONFIG_MMU - -#ifdef CONFIG_INET -extern const struct vm_operations_struct tcp_vm_ops; -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return vma->vm_ops == &tcp_vm_ops; -} -#else -static inline bool vma_is_tcp(const struct vm_area_struct *vma) -{ - return false; -} -#endif /* CONFIG_INET*/ - -#endif /* CONFIG_MMU */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 226bce6d1e8c..95e4507febed 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -45,7 +45,6 @@ #include #include #include -#include extern struct inet_hashinfo tcp_hashinfo; diff --git a/mm/memory.c b/mm/memory.c index 0a265ac6246e..2c7967632866 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -77,7 +77,6 @@ #include #include #include -#include #include @@ -5419,12 +5418,12 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, if (!vma) goto inval; - /* Only anonymous and tcp vmas are supported for now */ - if (!vma_is_anonymous(vma) && !vma_is_tcp(vma)) + /* Only anonymous vmas are supported for now */ + if (!vma_is_anonymous(vma)) goto inval; /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ - if (!vma->anon_vma && !vma_is_tcp(vma)) + if (!vma->anon_vma) goto inval; if (!vma_start_read(vma)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03e08745308..1542de3f66f7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1739,7 +1739,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb, } #ifdef CONFIG_MMU -const struct vm_operations_struct tcp_vm_ops = { +static const struct vm_operations_struct tcp_vm_ops = { }; int tcp_mmap(struct file *file, struct socket *sock, @@ -2038,34 +2038,6 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk, } } -static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, - unsigned long address, - bool *mmap_locked) -{ - struct vm_area_struct *vma = NULL; - -#ifdef CONFIG_PER_VMA_LOCK - vma = lock_vma_under_rcu(mm, address); -#endif - if (vma) { - if (!vma_is_tcp(vma)) { - vma_end_read(vma); - return NULL; - } - *mmap_locked = false; - return vma; - } - - mmap_read_lock(mm); - vma = vma_lookup(mm, address); - if (!vma || !vma_is_tcp(vma)) { - mmap_read_unlock(mm); - return NULL; - } - *mmap_locked = true; - return vma; -} - #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc, @@ -2083,7 +2055,6 @@ static int tcp_zerocopy_receive(struct sock *sk, u32 seq = tp->copied_seq; u32 total_bytes_to_map; int inq = tcp_inq(sk); - bool mmap_locked; int ret; zc->copybuf_len = 0; @@ -2108,10 +2079,13 @@ static int tcp_zerocopy_receive(struct sock *sk, return 0; } - vma = find_tcp_vma(current->mm, address, &mmap_locked); - if (!vma) - return -EINVAL; + mmap_read_lock(current->mm); + vma = vma_lookup(current->mm, address); + if (!vma || vma->vm_ops != &tcp_vm_ops) { + mmap_read_unlock(current->mm); + return -EINVAL; + } vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); avail_len = min_t(u32, vma_len, inq); total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); @@ -2185,10 +2159,7 @@ static int tcp_zerocopy_receive(struct sock *sk, zc, total_bytes_to_map); } out: - if (mmap_locked) - mmap_read_unlock(current->mm); - else - vma_end_read(vma); + mmap_read_unlock(current->mm); /* Try to copy straggler data. */ if (!ret) copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss); From patchwork Tue Jul 11 20:20:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309335 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C1131C001B0 for ; Tue, 11 Jul 2023 20:21:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229931AbjGKUVI (ORCPT ); Tue, 11 Jul 2023 16:21:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42452 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229843AbjGKUVH (ORCPT ); Tue, 11 Jul 2023 16:21:07 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3CBB210D4 for ; Tue, 11 Jul 2023 13:21:06 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=X97NNzauf5O0p+HCWC6dKF0cIqUU0VbFEqEvNM0A4i4=; b=N2FB4LpMKm0mT+slcEozTMrM51 Mzt8L4tH3hlsu8n+Qea08POT5eAZDE9TFcS3oWwRYevUqOyXF/Rxy4n/DXbo8yPxPUMSnF1bTd3Jq gf0xyKh8oVCeUwqgzPW/PsA43KHs15iBRAqVrbt6ZknhdhH5OHOKUD2dh1GgoMCR3PZslcddURI8l EpKHwR6htyRucMHqyS7bSSc7VDt5IeV4qcQWX1bvGuIyveIrqWC1wYlfiFy9LhIHbt2dgordQS3wd qxSzFkAwiJG2L7M8XeQxRR36/4ziRMElrEsLItSW0Wk/DindbqfqMSYGzo0dTdKqjhSGNcQyQBCg4 HcX1v2wQ==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqr-00G1Q8-HB; Tue, 11 Jul 2023 20:20:49 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 2/9] mm: Allow per-VMA locks on file-backed VMAs Date: Tue, 11 Jul 2023 21:20:40 +0100 Message-Id: <20230711202047.3818697-3-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org The fault path will immediately fail in handle_mm_fault(), so this is the minimal step which allows the per-VMA lock to be taken on file-backed VMAs. There may be a small performance reduction as a little unnecessary work will be done on each page fault. See later patches for the improvement. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/memory.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 2c7967632866..f2dcc695f54e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5247,6 +5247,11 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, goto out; } + if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. @@ -5418,12 +5423,8 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, if (!vma) goto inval; - /* Only anonymous vmas are supported for now */ - if (!vma_is_anonymous(vma)) - goto inval; - /* find_mergeable_anon_vma uses adjacent vmas which are not locked */ - if (!vma->anon_vma) + if (vma_is_anonymous(vma) && !vma->anon_vma) goto inval; if (!vma_start_read(vma)) From patchwork Tue Jul 11 20:20:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309332 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id ADDE2C001DE for ; Tue, 11 Jul 2023 20:21:02 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229766AbjGKUVB (ORCPT ); Tue, 11 Jul 2023 16:21:01 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42418 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229512AbjGKUU6 (ORCPT ); Tue, 11 Jul 2023 16:20:58 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9408910C7 for ; Tue, 11 Jul 2023 13:20:57 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=GLnKPguWFW7/+2NrpNM1tuYQ/qesy2XQRrzmH9bU/NA=; b=wQ2iTu3CFpFz2xaUFdin9qQcLg Zo6SBBpIiOLLIMV6swSA+k3EKgZiUTrGU0JtQ0GqAtC1zBFq/djG4qxyKgxXL2rELIMTjX7PST0+t wEF09/oTC9SpZ1eYMICRhU549nBmS1JJGjx1yl6+G1tVkWh32Nnm4jfwsuZ1yc1aLIxLimEAr0Trj agjHHSe1c13m7UzUObkj9h9Ps3CS52CsrM3yseZX/Eq7zh8qaB/t5l99NlgY7UH+vEG1mGH8Nt+LF Xh2jqTBfKjMtc1j99Is0Levj2AGXa208pRFmDKx3Lc0dmOmQC3OHcoSsJ7qGFeTWMSqRSo0MyyldI H8WIznMA==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqr-00G1QB-Ju; Tue, 11 Jul 2023 20:20:49 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 3/9] mm: Move FAULT_FLAG_VMA_LOCK check from handle_mm_fault() Date: Tue, 11 Jul 2023 21:20:41 +0100 Message-Id: <20230711202047.3818697-4-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Handle a little more of the page fault path outside the mmap sem. The hugetlb path doesn't need to check whether the VMA is anonymous; the VM_HUGETLB flag is only set on hugetlbfs VMAs. There should be no performance change from the previous commit; this is simply a step to ease bisection of any problems. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/hugetlb.c | 6 ++++++ mm/memory.c | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e4a28ce0667f..109e1ff92bc8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6063,6 +6063,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, int need_wait_lock = 0; unsigned long haddr = address & huge_page_mask(h); + /* TODO: Handle faults under the VMA lock */ + if (flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + /* * Serialize hugepage allocation and instantiation, so that we don't * get spurious allocation failures if two CPUs race to instantiate diff --git a/mm/memory.c b/mm/memory.c index f2dcc695f54e..6eda5c5f2069 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4998,10 +4998,10 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) } /* - * By the time we get here, we already hold the mm semaphore - * - * The mmap_lock may have been released depending on flags and our - * return value. See filemap_fault() and __folio_lock_or_retry(). + * On entry, we hold either the VMA lock or the mmap_lock + * (FAULT_FLAG_VMA_LOCK tells you which). If VM_FAULT_RETRY is set in + * the result, the mmap_lock is not held on exit. See filemap_fault() + * and __folio_lock_or_retry(). */ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, unsigned int flags) @@ -5020,6 +5020,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, p4d_t *p4d; vm_fault_t ret; + if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); if (!p4d) @@ -5247,11 +5252,6 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, goto out; } - if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } - /* * Enable the memcg OOM handling for faults triggered in user * space. Kernel faults are handled more gracefully. From patchwork Tue Jul 11 20:20:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309333 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8B08AC001DC for ; Tue, 11 Jul 2023 20:21:03 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229769AbjGKUVC (ORCPT ); Tue, 11 Jul 2023 16:21:02 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42424 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229843AbjGKUVB (ORCPT ); Tue, 11 Jul 2023 16:21:01 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id B34F210D4 for ; Tue, 11 Jul 2023 13:21:00 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=SWi2hnJa6UVy5RDmiU5nvVsgG2SVJpIZTkG5op8he4U=; b=W/ZMMD3uCnkA6CMhMyDdz/rsx8 js1sAmaboeOuGlO/QfP/tkIY64sHyudD8yzOcDG8w95UyCCrvW3+lRjx/ndAFDyBdq1yILWDVxX8s +xR3hsJqKfIvR/awLGd+z8FNyTkV10J7mb6UVwioKberqclMIvX8E8is2PY1eBNf85tLDI9xFcP6J ojVNyWZKBjenQ1R0trgEulrgdvA3/JGfcRNl/lFbRuHNBJHBPsQg4e2GGLqbbjN8nrFGqOYg/Pafl pfiG+sUyiO7cHtXp32JUp31f8x4BgNQtw5g4lLaF2ybHg3RjpLPYkq3hVEu9oje9EIX+X7z7w25Lp j1KRSO/g==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqr-00G1QD-Un; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 4/9] mm: Move FAULT_FLAG_VMA_LOCK check into handle_pte_fault() Date: Tue, 11 Jul 2023 21:20:42 +0100 Message-Id: <20230711202047.3818697-5-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Push the check down from __handle_mm_fault(). There's a mild upside to this patch in that we'll allocate the page tables while under the VMA lock rather than the mmap lock, reducing the hold time on the mmap lock, since the retry will find the page tables already populated. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/memory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 6eda5c5f2069..52f7fdd78380 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4924,6 +4924,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; + if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) { + vma_end_read(vmf->vma); + return VM_FAULT_RETRY; + } + if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may @@ -5020,11 +5025,6 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, p4d_t *p4d; vm_fault_t ret; - if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) { - vma_end_read(vma); - return VM_FAULT_RETRY; - } - pgd = pgd_offset(mm, address); p4d = p4d_alloc(mm, pgd, address); if (!p4d) From patchwork Tue Jul 11 20:20:43 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309347 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9F01FEB64DD for ; Tue, 11 Jul 2023 20:21:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229969AbjGKUVS (ORCPT ); Tue, 11 Jul 2023 16:21:18 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42490 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229961AbjGKUVP (ORCPT ); Tue, 11 Jul 2023 16:21:15 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 200F910D4 for ; Tue, 11 Jul 2023 13:21:15 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=1J6AQ/unSE4IC1W70poNN/OYLEC8RIrhsKKg+sT5K2M=; b=N/UAfb14mocbi+tYXm4qnqDyG2 54o1KXDavmlW/bXWhMvMxUY0YE4V7uNEqi2dinDdzSmQaNlP0d+Yo6FQ2sFbcl0CI4HlzHr8GgyI3 1XULhcv0Vfox/1xF4oknDaW6xBMDp4a8gImwj39X+NOkn+YB4nvufbnG1EEQvBTxBx9D5U2xysqUK 7lgn9jqfuC3V29CCgIJTSn59h4STqEJYBLzWetZR48XDpxEouAj6oMF1+SnvSA0dsDRjjqbB7/ymW bq5CuSOEJW5NWZQrReS0SsFhu2hMZx+h3GJZFO0tayozQN0qenw3amuyVgKEuBcMmwrOV8B3rWlCf 6//Jqs+Q==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqs-00G1QF-2B; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 5/9] mm: Move FAULT_FLAG_VMA_LOCK check down in handle_pte_fault() Date: Tue, 11 Jul 2023 21:20:43 +0100 Message-Id: <20230711202047.3818697-6-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Call do_pte_missing() under the VMA lock ... then immediately retry in do_fault(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/memory.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 52f7fdd78380..88cf9860f17e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4661,6 +4661,11 @@ static vm_fault_t do_fault(struct vm_fault *vmf) struct mm_struct *vm_mm = vma->vm_mm; vm_fault_t ret; + if (vmf->flags & FAULT_FLAG_VMA_LOCK){ + vma_end_read(vma); + return VM_FAULT_RETRY; + } + /* * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ @@ -4924,11 +4929,6 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; - if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) { - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } - if (unlikely(pmd_none(*vmf->pmd))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may @@ -4961,6 +4961,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) if (!vmf->pte) return do_pte_missing(vmf); + if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) { + vma_end_read(vmf->vma); + return VM_FAULT_RETRY; + } + if (!pte_present(vmf->orig_pte)) return do_swap_page(vmf); From patchwork Tue Jul 11 20:20:44 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309334 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 106CCC001DE for ; Tue, 11 Jul 2023 20:21:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229928AbjGKUVG (ORCPT ); Tue, 11 Jul 2023 16:21:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42436 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229903AbjGKUVE (ORCPT ); Tue, 11 Jul 2023 16:21:04 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 7255610D4 for ; Tue, 11 Jul 2023 13:21:03 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=y6MqKlK6OxY+tonB29qlEWdduvckRCFfUllkRcgCchQ=; b=ZX0x/yAQj25a5I0W1lufq/qwUP oXson9J+2ubQdjXaXjjRQ+O80fML4BiNDyo6y8jRSU1mXCImAXYb7ruFGwoRefM3EqSQc1BDVRPo2 ppz+hwLnF9JLwe05Hcdxyrw3BWuVIpaMCFMjDmpKW1LoHIwy9KeZbvl1DlfsSmjuvhHINYGd8vruW wwfs5AZ3GNemhwSJxsakWnNgZpxjJnyTa/d7nKlahUEur99F9UV6GUfq7l/OxED//pTMUJ0Bk9pGb qK6l4TnIwmYQpiv/bo7yufDpeMmcqZxl85DCZrq+hr9QYvbfBAf+PWdagfdqEdA21O7KpOeUCFSP8 HfNHy3Gg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqs-00G1QH-5O; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 6/9] mm: Move the FAULT_FLAG_VMA_LOCK check down from do_fault() Date: Tue, 11 Jul 2023 21:20:44 +0100 Message-Id: <20230711202047.3818697-7-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Perform the check at the start of do_read_fault(), do_cow_fault() and do_shared_fault() instead. Should be no performance change from the last commit. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/memory.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 88cf9860f17e..709bffee8aa2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4547,6 +4547,11 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) vm_fault_t ret = 0; struct folio *folio; + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vmf->vma); + return VM_FAULT_RETRY; + } + /* * Let's call ->map_pages() first and use ->fault() as fallback * if page by the offset is not ready to be mapped (cold cache or @@ -4575,6 +4580,11 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret; + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + if (unlikely(anon_vma_prepare(vma))) return VM_FAULT_OOM; @@ -4615,6 +4625,11 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) vm_fault_t ret, tmp; struct folio *folio; + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vma); + return VM_FAULT_RETRY; + } + ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; @@ -4661,11 +4676,6 @@ static vm_fault_t do_fault(struct vm_fault *vmf) struct mm_struct *vm_mm = vma->vm_mm; vm_fault_t ret; - if (vmf->flags & FAULT_FLAG_VMA_LOCK){ - vma_end_read(vma); - return VM_FAULT_RETRY; - } - /* * The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ From patchwork Tue Jul 11 20:20:45 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309331 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 1E087EB64DC for ; Tue, 11 Jul 2023 20:20:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229537AbjGKUU5 (ORCPT ); Tue, 11 Jul 2023 16:20:57 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42412 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229512AbjGKUU4 (ORCPT ); Tue, 11 Jul 2023 16:20:56 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C102310C7 for ; Tue, 11 Jul 2023 13:20:54 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=v94Ubxfh0lfpoM1pvfa/PhiO08lrJc9bG/pAuDnDu5E=; b=TzHT2edho2+AoBnEaZG6rSN+Xz HqdS52OSxRjhfnGl0GcPjNDDaW+puGj9MFK6PDZdT0nHLkUZZHVZia6dFjj4fCyS/krNwe1XiBJ5W fG8eF6uJHrHh/n19hVXzIQE2sFtiQSswSN30D3B9vOlrHmVQ5q51J7cxkBk0/VfmnHySgMxOdbGO5 lPK6MQnwfgOKQrrf+YXuHBuIkBte0N1ZLRiCRnpASNkDN1JdOSpcNqw2mwsAfZ7OuF9U6HU9PLIZ/ YsAHUMECuSPk8nMWNQYQyiCeg/QspuKbmiWuO24oJQwEh5tQaIjAA63lIhKQPa68IinpMHnL73dW7 q4aTheXA==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqs-00G1QJ-7t; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 7/9] mm: Run the fault-around code under the VMA lock Date: Tue, 11 Jul 2023 21:20:45 +0100 Message-Id: <20230711202047.3818697-8-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org The map_pages fs method should be safe to run under the VMA lock instead of the mmap lock. This should have a measurable reduction in contention on the mmap lock. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- mm/memory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 709bffee8aa2..0a4e363b0605 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4547,11 +4547,6 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) vm_fault_t ret = 0; struct folio *folio; - if (vmf->flags & FAULT_FLAG_VMA_LOCK) { - vma_end_read(vmf->vma); - return VM_FAULT_RETRY; - } - /* * Let's call ->map_pages() first and use ->fault() as fallback * if page by the offset is not ready to be mapped (cold cache or @@ -4563,6 +4558,11 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf) return ret; } + if (vmf->flags & FAULT_FLAG_VMA_LOCK) { + vma_end_read(vmf->vma); + return VM_FAULT_RETRY; + } + ret = __do_fault(vmf); if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; From patchwork Tue Jul 11 20:20:46 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309346 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8F147EB64DC for ; Tue, 11 Jul 2023 20:21:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229947AbjGKUVO (ORCPT ); Tue, 11 Jul 2023 16:21:14 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42478 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229959AbjGKUVN (ORCPT ); Tue, 11 Jul 2023 16:21:13 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 34F3F1704 for ; Tue, 11 Jul 2023 13:21:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=oydEjCEkyZj8+gutTzPsBmFt0r4FHmvMPjaQ23PbRgc=; b=cXXWg8BuZO0XqlvdRgjjzr84up U8ERFG7x3TwJYwHvp5ZfHxkfuMe5agMVZN+MrgkEHvY58Ub8lYv03fWL+T227H7pgedOQfEJG9i+Y ny9Z37VhKs93IJeDFrg9FolL8ayGmHoQ3sQQM7VsqTk3DK/TJVXztIu5IZ0T3dm1zwnVp9C/URPmO NlWMbj/9ZcTXccrHoDs0LxlptMyivsKD5t1zRgmN9/NZEvfXmHpeta+MjmGtFT7ZkLEFRd18BNAtz j8ux2UC+USp+KPRGF8TeHcLInvUIx25O41hcZyxkRGgAEupYP1XcneHPoS2ak3MRrBOFF90mnIpWq QRicDFBg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqs-00G1QL-AJ; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: "Matthew Wilcox (Oracle)" , Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal Subject: [PATCH v2 8/9] mm: Remove CONFIG_PER_VMA_LOCK ifdefs Date: Tue, 11 Jul 2023 21:20:46 +0100 Message-Id: <20230711202047.3818697-9-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Provide lock_vma_under_rcu() when CONFIG_PER_VMA_LOCK is not defined to eliminate ifdefs in the users. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- arch/arm64/mm/fault.c | 2 -- arch/powerpc/mm/fault.c | 4 ---- arch/riscv/mm/fault.c | 4 ---- arch/s390/mm/fault.c | 2 -- arch/x86/mm/fault.c | 4 ---- include/linux/mm.h | 6 ++++++ 6 files changed, 6 insertions(+), 16 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b8c80f7b8a5f..2e5d1e238af9 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -587,7 +587,6 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); -#ifdef CONFIG_PER_VMA_LOCK if (!(mm_flags & FAULT_FLAG_USER)) goto lock_mmap; @@ -616,7 +615,6 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr, return 0; } lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ retry: vma = lock_mm_and_find_vma(mm, addr, regs); diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 82954d0e6906..b1723094d464 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -469,7 +469,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, if (is_exec) flags |= FAULT_FLAG_INSTRUCTION; -#ifdef CONFIG_PER_VMA_LOCK if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; @@ -502,7 +501,6 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, return user_mode(regs) ? 0 : SIGBUS; lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -552,9 +550,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, mmap_read_unlock(current->mm); -#ifdef CONFIG_PER_VMA_LOCK done: -#endif if (unlikely(fault & VM_FAULT_ERROR)) return mm_fault_error(regs, address, fault); diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 6ea2cce4cc17..046732fcb48c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -283,7 +283,6 @@ void handle_page_fault(struct pt_regs *regs) flags |= FAULT_FLAG_WRITE; else if (cause == EXC_INST_PAGE_FAULT) flags |= FAULT_FLAG_INSTRUCTION; -#ifdef CONFIG_PER_VMA_LOCK if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; @@ -311,7 +310,6 @@ void handle_page_fault(struct pt_regs *regs) return; } lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ retry: vma = lock_mm_and_find_vma(mm, addr, regs); @@ -368,9 +366,7 @@ void handle_page_fault(struct pt_regs *regs) mmap_read_unlock(mm); -#ifdef CONFIG_PER_VMA_LOCK done: -#endif if (unlikely(fault & VM_FAULT_ERROR)) { tsk->thread.bad_cause = cause; mm_fault_error(regs, addr, fault); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 40a71063949b..ac8351f172bb 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -407,7 +407,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) access = VM_WRITE; if (access == VM_WRITE) flags |= FAULT_FLAG_WRITE; -#ifdef CONFIG_PER_VMA_LOCK if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; vma = lock_vma_under_rcu(mm, address); @@ -431,7 +430,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) goto out; } lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ mmap_read_lock(mm); gmap = NULL; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b0f7add07aa5..ab778eac1952 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1350,7 +1350,6 @@ void do_user_addr_fault(struct pt_regs *regs, } #endif -#ifdef CONFIG_PER_VMA_LOCK if (!(flags & FAULT_FLAG_USER)) goto lock_mmap; @@ -1381,7 +1380,6 @@ void do_user_addr_fault(struct pt_regs *regs, return; } lock_mmap: -#endif /* CONFIG_PER_VMA_LOCK */ retry: vma = lock_mm_and_find_vma(mm, address, regs); @@ -1441,9 +1439,7 @@ void do_user_addr_fault(struct pt_regs *regs, } mmap_read_unlock(mm); -#ifdef CONFIG_PER_VMA_LOCK done: -#endif if (likely(!(fault & VM_FAULT_ERROR))) return; diff --git a/include/linux/mm.h b/include/linux/mm.h index 46c442855df7..3c923a4bf213 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -813,6 +813,12 @@ static inline void assert_fault_locked(struct vm_fault *vmf) mmap_assert_locked(vmf->vma->vm_mm); } +static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, + unsigned long address) +{ + return NULL; +} + #endif /* CONFIG_PER_VMA_LOCK */ /* From patchwork Tue Jul 11 20:20:47 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Wilcox X-Patchwork-Id: 13309348 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 865FDEB64DC for ; Tue, 11 Jul 2023 20:21:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230001AbjGKUVX (ORCPT ); Tue, 11 Jul 2023 16:21:23 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42512 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229987AbjGKUVW (ORCPT ); Tue, 11 Jul 2023 16:21:22 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3D85C10DF for ; Tue, 11 Jul 2023 13:21:21 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=/8ipsblXmqhBdjmpy7pSUutiQ9uV/NjS0hvvOESjZ3g=; b=oRIKjCgdAvLsraMKft9V0w2oKZ gA8+g5HvWxvz4J9SdjP/9qtxqWDS4jIJdweAuxxJ07yxaaSCu0MhExZ19AW2OuvPf+CA6aC+NYOqT QsgdUoumFuLkq5/WvVKVt/mYvxzsa7oZ1+my3VBjHQ7LrQYRRkh7iEd+/l6iwx5E4Lo3bhWFbXcen HXyLgW8Wd4jARBTL4Z6MZxVI6s9jQrSqITbeKUWsnlYMYZms9rU81kzYHlX62UPOop87gF1igohrk 6TJx6U6NbyQJBf3mNlRd3N7lTbJ8mI8y5XtZtR6ZhMIGyIzri2Zmj1h68QaAbAoenY2dJY9625Dzi 2yTOA1Xg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qJJqs-00G1QN-D5; Tue, 11 Jul 2023 20:20:50 +0000 From: "Matthew Wilcox (Oracle)" To: linux-mm@kvack.org Cc: Arjun Roy , Eric Dumazet , Suren Baghdasaryan , linux-fsdevel@vger.kernel.org, Punit Agrawal , "David S . Miller" , Matthew Wilcox Subject: [PATCH v2 9/9] tcp: Use per-vma locking for receive zerocopy Date: Tue, 11 Jul 2023 21:20:47 +0100 Message-Id: <20230711202047.3818697-10-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230711202047.3818697-1-willy@infradead.org> References: <20230711202047.3818697-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org From: Arjun Roy Per-VMA locking allows us to lock a struct vm_area_struct without taking the process-wide mmap lock in read mode. Consider a process workload where the mmap lock is taken constantly in write mode. In this scenario, all zerocopy receives are periodically blocked during that period of time - though in principle, the memory ranges being used by TCP are not touched by the operations that need the mmap write lock. This results in performance degradation. Now consider another workload where the mmap lock is never taken in write mode, but there are many TCP connections using receive zerocopy that are concurrently receiving. These connections all take the mmap lock in read mode, but this does induce a lot of contention and atomic ops for this process-wide lock. This results in additional CPU overhead caused by contending on the cache line for this lock. However, with per-vma locking, both of these problems can be avoided. As a test, I ran an RPC-style request/response workload with 4KB payloads and receive zerocopy enabled, with 100 simultaneous TCP connections. I measured perf cycles within the find_tcp_vma/mmap_read_lock/mmap_read_unlock codepath, with and without per-vma locking enabled. When using process-wide mmap semaphore read locking, about 1% of measured perf cycles were within this path. With per-VMA locking, this value dropped to about 0.45%. Signed-off-by: Arjun Roy Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Suren Baghdasaryan --- net/ipv4/tcp.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1542de3f66f7..7118ec6cf886 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2038,6 +2038,30 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk, } } +static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm, + unsigned long address, bool *mmap_locked) +{ + struct vm_area_struct *vma = lock_vma_under_rcu(mm, address); + + if (vma) { + if (vma->vm_ops != &tcp_vm_ops) { + vma_end_read(vma); + return NULL; + } + *mmap_locked = false; + return vma; + } + + mmap_read_lock(mm); + vma = vma_lookup(mm, address); + if (!vma || vma->vm_ops != &tcp_vm_ops) { + mmap_read_unlock(mm); + return NULL; + } + *mmap_locked = true; + return vma; +} + #define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32 static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc, @@ -2055,6 +2079,7 @@ static int tcp_zerocopy_receive(struct sock *sk, u32 seq = tp->copied_seq; u32 total_bytes_to_map; int inq = tcp_inq(sk); + bool mmap_locked; int ret; zc->copybuf_len = 0; @@ -2079,13 +2104,10 @@ static int tcp_zerocopy_receive(struct sock *sk, return 0; } - mmap_read_lock(current->mm); - - vma = vma_lookup(current->mm, address); - if (!vma || vma->vm_ops != &tcp_vm_ops) { - mmap_read_unlock(current->mm); + vma = find_tcp_vma(current->mm, address, &mmap_locked); + if (!vma) return -EINVAL; - } + vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); avail_len = min_t(u32, vma_len, inq); total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); @@ -2159,7 +2181,10 @@ static int tcp_zerocopy_receive(struct sock *sk, zc, total_bytes_to_map); } out: - mmap_read_unlock(current->mm); + if (mmap_locked) + mmap_read_unlock(current->mm); + else + vma_end_read(vma); /* Try to copy straggler data. */ if (!ret) copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);