From patchwork Sun Mar 19 07:09:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lorenzo Stoakes X-Patchwork-Id: 13180235 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id A61A4C761A6 for ; Sun, 19 Mar 2023 07:09:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230089AbjCSHJo (ORCPT ); Sun, 19 Mar 2023 03:09:44 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42502 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229991AbjCSHJm (ORCPT ); Sun, 19 Mar 2023 03:09:42 -0400 Received: from mail-ed1-x536.google.com (mail-ed1-x536.google.com [IPv6:2a00:1450:4864:20::536]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 3CD3C26B7; Sun, 19 Mar 2023 00:09:41 -0700 (PDT) Received: by mail-ed1-x536.google.com with SMTP id cy23so35354911edb.12; Sun, 19 Mar 2023 00:09:41 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; t=1679209779; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=csP2U+h4A4N+JyUsV24C6SnnqlSeb33JonwK9o1duFk=; b=E7BbUYx2VyQn5O1pHGy3AFX0ymfBCLHPKP5Y3xZwRpm/H/ORpIqsSyumlzF6fxFTMN SY1a+7FlryqU0IeYC7yS4qjOhenHYnANyIqz2mKvafPCfKBU2PnV33pqBXOs2JOCZIIL +KD1JgRFeZhX8S0bSCi1NovBHbT/CTaf2CcMG2IL1dgmBzSHi8Oe8r29QIyjxDH4DEip rbMrF2R8NWa5WZHFzuhkQhRmOu8CnVhC9Gs61cOqvXaFBehc1LqJzAofSWFI+eSCjmF0 xIHOYl+VudPK5mSG4x7D1PTE/K931ydfuzIGMidUKyKYP9nmi0qrAlxd0Mlo3UBh7+95 CcfA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679209779; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=csP2U+h4A4N+JyUsV24C6SnnqlSeb33JonwK9o1duFk=; b=m+Acu4itpffUoA+fMJ/KXWpeqYU6cM7R1O34brAFzOaJuDdLs7mz4knfiAycRz4cc1 vjYkMmaAnF3XiSq4DmbxcqtTal74h8jwdQkCd7RUjK1qoSwcgKUlCH5UqsJGmlohB9V1 ST8X+T8C6bb1cPERTjnDwMeGzVqkkITCY4O2HIMccY4AVbKjMOKZaM3owgN2Wab5qd8M 3aHis2u816QxIj0liT6T8/S6tX16wgqWrhniPeGNq1YYJB5XiAqFUw61ZvJrNlfgEEKR q22SLwh5QZP7sz4Icf1+nGZX0MfhKRTC3Y8VWR+3pKOsm3yWAomD7LfLLADzZPNzFApC W6gQ== X-Gm-Message-State: AO0yUKXSPzL7r/+WSpbFl21jRTVZUTijQ4sglP2whDJw3JAuQqhIX9cT 4MAsmki53uJJ5tZF2pifJyQ= X-Google-Smtp-Source: AK7set+W4lasYomB07kPXcM5wE+qUni1s4ui3d4L4qeGtG0Gv+O2Px0+D0Q0CSaktIqhgOZmF0Xktw== X-Received: by 2002:aa7:d744:0:b0:4fd:2b05:aa2 with SMTP id a4-20020aa7d744000000b004fd2b050aa2mr9076089eds.42.1679209779674; Sun, 19 Mar 2023 00:09:39 -0700 (PDT) Received: from localhost.localdomain ([2a00:23ee:1938:1bcd:c6e1:42ba:ae87:772e]) by smtp.googlemail.com with ESMTPSA id u8-20020a170906b10800b008c9b44b7851sm2943920ejy.182.2023.03.19.00.09.38 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 19 Mar 2023 00:09:38 -0700 (PDT) From: Lorenzo Stoakes To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Andrew Morton Cc: Baoquan He , Uladzislau Rezki , Matthew Wilcox , David Hildenbrand , Liu Shixin , Jiri Olsa , Lorenzo Stoakes Subject: [PATCH v2 1/4] fs/proc/kcore: Avoid bounce buffer for ktext data Date: Sun, 19 Mar 2023 07:09:30 +0000 Message-Id: <2ed992d6604965fd9eea05fed4473ddf54540989.1679209395.git.lstoakes@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") introduced the use of a bounce buffer to retrieve kernel text data for /proc/kcore in order to avoid failures arising from hardened user copies enabled by CONFIG_HARDENED_USERCOPY in check_kernel_text_object(). We can avoid doing this if instead of copy_to_user() we use _copy_to_user() which bypasses the hardening check. This is more efficient than using a bounce buffer and simplifies the code. We do so as part an overall effort to eliminate bounce buffer usage in the function with an eye to converting it an iterator read. Signed-off-by: Lorenzo Stoakes Reviewed-by: David Hildenbrand --- fs/proc/kcore.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 71157ee35c1a..556f310d6aa4 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -541,19 +541,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) case KCORE_VMEMMAP: case KCORE_TEXT: /* - * Using bounce buffer to bypass the - * hardened user copy kernel text checks. + * We use _copy_to_user() to bypass usermode hardening + * which would otherwise prevent this operation. */ - if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { - if (clear_user(buffer, tsz)) { - ret = -EFAULT; - goto out; - } - } else { - if (copy_to_user(buffer, buf, tsz)) { - ret = -EFAULT; - goto out; - } + if (_copy_to_user(buffer, (char *)start, tsz)) { + ret = -EFAULT; + goto out; } break; default: From patchwork Sun Mar 19 07:09:31 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lorenzo Stoakes X-Patchwork-Id: 13180236 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 28E48C761AF for ; Sun, 19 Mar 2023 07:09:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230107AbjCSHJr (ORCPT ); Sun, 19 Mar 2023 03:09:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42550 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230091AbjCSHJo (ORCPT ); Sun, 19 Mar 2023 03:09:44 -0400 Received: from mail-ed1-x52e.google.com (mail-ed1-x52e.google.com [IPv6:2a00:1450:4864:20::52e]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 37BA011E95; Sun, 19 Mar 2023 00:09:43 -0700 (PDT) Received: by mail-ed1-x52e.google.com with SMTP id x3so35381608edb.10; Sun, 19 Mar 2023 00:09:43 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; t=1679209781; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=V8uXWzHaUDgnMR3zVzK4ll6XJwTMNIlCa4zu/w/8SY0=; b=kD77EjSANPv5buSsqXEhflSmgfuAbw+3r1lZTfUH2Si4lAQ5F0kzSrIlBMvWxHyGM5 WrJBoxIFgqN77KaZWfx9jZJpTTd1XtWADAQ8thPxb/RoqM3tMAU4bhfeOHYTD+lZ69tS aeuBbNWwY1xB3ARatXkNcEY4ZMYWbQ+oxyk05M6ElH2m8XUnXKVC90AhPutyGx/glQkO /gnQf39KUikvmDGyoWmpprZoF+1tCRrknofidgyJ79vExjBPNoHY7nIJWHtw+0Z8Dcuk tL4FqjTHjMeftBEJY03uoDecD4TzfQTt7hca9Hln0v9q0JxjgJpoZpLhl/yE4u0zeTt8 0TCA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679209781; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=V8uXWzHaUDgnMR3zVzK4ll6XJwTMNIlCa4zu/w/8SY0=; b=nT7H7Cdfmmrpoli6i+QQeB8TNNKTOTeveLrT9C0QZXHxniYq9KblWJ0gQkjIgBeRBM vsqMDZZYAUsYtUqv+HPlMjUb9x1OaoEXZttGc7kgTdGHnumZiE90m3XydSbah5Juarw1 MCnaP6wKC8yJbtipQFJZseioG/YZ5vx5D8gu0NUllcsKGgyyyPt/OiyqW65trVtv1MzU WboDpKW4Hlp5PCzghT51kOz3AWHA/Eq5l8hwbNi6kHcCz8byCey43KoSOPMO/J3nQnlO ihzuhPQL75HavwbBBBmd34yODwnXFEOB9P+P76jLXIqj1eIPTGvZNSImm5DWprtPRoGV Qnsg== X-Gm-Message-State: AO0yUKXZ/ldvijswrPjzWPjXoqiV0V6d4vAUm9wjPAtWfXCTY5G/B0ow awBAytFzQ5W9UmzlFAZBNcpoz/unAuk= X-Google-Smtp-Source: AK7set9mnjoY0F9bvHw0JKzlTE3bCjtuxWNF0T2RwMxpa2FJSHNLFnLdQB7KeDSZJqZzpZJ9Mxvzog== X-Received: by 2002:a17:906:6886:b0:930:9cec:2fb9 with SMTP id n6-20020a170906688600b009309cec2fb9mr5024214ejr.77.1679209781486; Sun, 19 Mar 2023 00:09:41 -0700 (PDT) Received: from localhost.localdomain ([2a00:23ee:1938:1bcd:c6e1:42ba:ae87:772e]) by smtp.googlemail.com with ESMTPSA id u8-20020a170906b10800b008c9b44b7851sm2943920ejy.182.2023.03.19.00.09.39 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 19 Mar 2023 00:09:40 -0700 (PDT) From: Lorenzo Stoakes To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Andrew Morton Cc: Baoquan He , Uladzislau Rezki , Matthew Wilcox , David Hildenbrand , Liu Shixin , Jiri Olsa , Lorenzo Stoakes Subject: [PATCH v2 2/4] mm: vmalloc: use rwsem, mutex for vmap_area_lock and vmap_block->lock Date: Sun, 19 Mar 2023 07:09:31 +0000 Message-Id: <6c7f1ac0aeb55faaa46a09108d3999e4595870d9.1679209395.git.lstoakes@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org vmalloc() is, by design, not permitted to be used in atomic context and already contains components which may sleep, so avoiding spin locks is not a problem from the perspective of atomic context. The global vmap_area_lock is held when the red/black tree rooted in vmap_are_root is accessed and thus is rather long-held and under potentially high contention. It is likely to be under contention for reads rather than write, so replace it with a rwsem. Each individual vmap_block->lock is likely to be held for less time but under low contention, so a mutex is not an outrageous choice here. A subset of test_vmalloc.sh performance results:- fix_size_alloc_test 0.40% full_fit_alloc_test 2.08% long_busy_list_alloc_test 0.34% random_size_alloc_test -0.25% random_size_align_alloc_test 0.06% ... all tests cycles 0.2% This represents a tiny reduction in performance that sits barely above noise. The reason for making this change is to build a basis for vread() to be usable asynchronously, this eliminating the need for a bounce buffer when copying data to userland in read_kcore() and allowing that to be converted to an iterator form. Signed-off-by: Lorenzo Stoakes Signed-off-by: Uladzislau Rezki (Sony) --- mm/vmalloc.c | 77 +++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 978194dc2bb8..c24b27664a97 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -725,7 +726,7 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0 -static DEFINE_SPINLOCK(vmap_area_lock); +static DECLARE_RWSEM(vmap_area_lock); static DEFINE_SPINLOCK(free_vmap_area_lock); /* Export for kexec only */ LIST_HEAD(vmap_area_list); @@ -1537,9 +1538,9 @@ static void free_vmap_area(struct vmap_area *va) /* * Remove from the busy tree/list. */ - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); /* * Insert/Merge it back to the free tree/list. @@ -1627,9 +1628,9 @@ static struct vmap_area *alloc_vmap_area(unsigned long size, va->vm = NULL; va->flags = va_flags; - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); insert_vmap_area(va, &vmap_area_root, &vmap_area_list); - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); BUG_ON(!IS_ALIGNED(va->va_start, align)); BUG_ON(va->va_start < vstart); @@ -1854,9 +1855,9 @@ struct vmap_area *find_vmap_area(unsigned long addr) { struct vmap_area *va; - spin_lock(&vmap_area_lock); + down_read(&vmap_area_lock); va = __find_vmap_area(addr, &vmap_area_root); - spin_unlock(&vmap_area_lock); + up_read(&vmap_area_lock); return va; } @@ -1865,11 +1866,11 @@ static struct vmap_area *find_unlink_vmap_area(unsigned long addr) { struct vmap_area *va; - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); va = __find_vmap_area(addr, &vmap_area_root); if (va) unlink_va(va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); return va; } @@ -1914,7 +1915,7 @@ struct vmap_block_queue { }; struct vmap_block { - spinlock_t lock; + struct mutex lock; struct vmap_area *va; unsigned long free, dirty; DECLARE_BITMAP(used_map, VMAP_BBMAP_BITS); @@ -1991,7 +1992,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) } vaddr = vmap_block_vaddr(va->va_start, 0); - spin_lock_init(&vb->lock); + mutex_init(&vb->lock); vb->va = va; /* At least something should be left free */ BUG_ON(VMAP_BBMAP_BITS <= (1UL << order)); @@ -2026,9 +2027,9 @@ static void free_vmap_block(struct vmap_block *vb) tmp = xa_erase(&vmap_blocks, addr_to_vb_idx(vb->va->va_start)); BUG_ON(tmp != vb); - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); unlink_va(vb->va, &vmap_area_root); - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); free_vmap_area_noflush(vb->va); kfree_rcu(vb, rcu_head); @@ -2047,7 +2048,7 @@ static void purge_fragmented_blocks(int cpu) if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) continue; - spin_lock(&vb->lock); + mutex_lock(&vb->lock); if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { vb->free = 0; /* prevent further allocs after releasing lock */ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ @@ -2056,10 +2057,10 @@ static void purge_fragmented_blocks(int cpu) spin_lock(&vbq->lock); list_del_rcu(&vb->free_list); spin_unlock(&vbq->lock); - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); list_add_tail(&vb->purge, &purge); } else - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); } rcu_read_unlock(); @@ -2101,9 +2102,9 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) list_for_each_entry_rcu(vb, &vbq->free, free_list) { unsigned long pages_off; - spin_lock(&vb->lock); + mutex_lock(&vb->lock); if (vb->free < (1UL << order)) { - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); continue; } @@ -2117,7 +2118,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) spin_unlock(&vbq->lock); } - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); break; } @@ -2144,16 +2145,16 @@ static void vb_free(unsigned long addr, unsigned long size) order = get_order(size); offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT; vb = xa_load(&vmap_blocks, addr_to_vb_idx(addr)); - spin_lock(&vb->lock); + mutex_lock(&vb->lock); bitmap_clear(vb->used_map, offset, (1UL << order)); - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); vunmap_range_noflush(addr, addr + size); if (debug_pagealloc_enabled_static()) flush_tlb_kernel_range(addr, addr + size); - spin_lock(&vb->lock); + mutex_lock(&vb->lock); /* Expand dirty range */ vb->dirty_min = min(vb->dirty_min, offset); @@ -2162,10 +2163,10 @@ static void vb_free(unsigned long addr, unsigned long size) vb->dirty += 1UL << order; if (vb->dirty == VMAP_BBMAP_BITS) { BUG_ON(vb->free); - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); free_vmap_block(vb); } else - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); } static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) @@ -2183,7 +2184,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) rcu_read_lock(); list_for_each_entry_rcu(vb, &vbq->free, free_list) { - spin_lock(&vb->lock); + mutex_lock(&vb->lock); if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; @@ -2196,7 +2197,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) flush = 1; } - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); } rcu_read_unlock(); } @@ -2451,9 +2452,9 @@ static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); setup_vmalloc_vm_locked(vm, va, flags, caller); - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); } static void clear_vm_uninitialized_flag(struct vm_struct *vm) @@ -3507,9 +3508,9 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags if (!vb) goto finished; - spin_lock(&vb->lock); + mutex_lock(&vb->lock); if (bitmap_empty(vb->used_map, VMAP_BBMAP_BITS)) { - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); goto finished; } for_each_set_bitrange(rs, re, vb->used_map, VMAP_BBMAP_BITS) { @@ -3536,7 +3537,7 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags count -= n; } unlock: - spin_unlock(&vb->lock); + mutex_unlock(&vb->lock); finished: /* zero-fill the left dirty or free regions */ @@ -3576,13 +3577,15 @@ long vread(char *buf, char *addr, unsigned long count) unsigned long buflen = count; unsigned long n, size, flags; + might_sleep(); + addr = kasan_reset_tag(addr); /* Don't allow overflow */ if ((unsigned long) addr + count < count) count = -(unsigned long) addr; - spin_lock(&vmap_area_lock); + down_read(&vmap_area_lock); va = find_vmap_area_exceed_addr((unsigned long)addr); if (!va) goto finished; @@ -3639,7 +3642,7 @@ long vread(char *buf, char *addr, unsigned long count) count -= n; } finished: - spin_unlock(&vmap_area_lock); + up_read(&vmap_area_lock); if (buf == buf_start) return 0; @@ -3980,14 +3983,14 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, } /* insert all vm's */ - spin_lock(&vmap_area_lock); + down_write(&vmap_area_lock); for (area = 0; area < nr_vms; area++) { insert_vmap_area(vas[area], &vmap_area_root, &vmap_area_list); setup_vmalloc_vm_locked(vms[area], vas[area], VM_ALLOC, pcpu_get_vm_areas); } - spin_unlock(&vmap_area_lock); + up_write(&vmap_area_lock); /* * Mark allocated areas as accessible. Do it now as a best-effort @@ -4114,7 +4117,7 @@ static void *s_start(struct seq_file *m, loff_t *pos) __acquires(&vmap_area_lock) { mutex_lock(&vmap_purge_lock); - spin_lock(&vmap_area_lock); + down_read(&vmap_area_lock); return seq_list_start(&vmap_area_list, *pos); } @@ -4128,7 +4131,7 @@ static void s_stop(struct seq_file *m, void *p) __releases(&vmap_area_lock) __releases(&vmap_purge_lock) { - spin_unlock(&vmap_area_lock); + up_read(&vmap_area_lock); mutex_unlock(&vmap_purge_lock); } From patchwork Sun Mar 19 07:09:32 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lorenzo Stoakes X-Patchwork-Id: 13180237 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E353BC6FD1F for ; Sun, 19 Mar 2023 07:09:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230025AbjCSHJt (ORCPT ); Sun, 19 Mar 2023 03:09:49 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42580 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229635AbjCSHJq (ORCPT ); Sun, 19 Mar 2023 03:09:46 -0400 Received: from mail-ed1-x533.google.com (mail-ed1-x533.google.com [IPv6:2a00:1450:4864:20::533]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D788293E4; Sun, 19 Mar 2023 00:09:44 -0700 (PDT) Received: by mail-ed1-x533.google.com with SMTP id x3so35381717edb.10; Sun, 19 Mar 2023 00:09:44 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; t=1679209783; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=ML/fjqBQiFdniaLp2U4iIoVxIHX7dLvD1UO1EbWAfgg=; b=Z5A4SYAMeoS3RR8Z4VWJcLUiygTK9MPz2hpEcGzak+h9lDhV9fLONDhT9zbE2C+yNg 8rv7BvOpHO0Sx3zejcitLQz6k0a4MVQIxsp9pkabAiX3qbecdxCSYy1n/OTf5RrL2ibo imKjO+t9688AnjvhOoFAdoaDlUwgG7oddRgRcNPVlJEtwMAZHJUFCIouxn9LGFRg5sPp 3specuIgov4Tk2AoSaONSPqii/LRvwchK3NxsaP4ID8Jpud36J7LBJY7/MyOYZ+Yryl/ eLLC8ao/emzfoxD23mCr58ZoewzZOJcncj0ZpZrlVsnuTdhu6B12vmmKBw5twY17tH8n 24xg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679209783; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=ML/fjqBQiFdniaLp2U4iIoVxIHX7dLvD1UO1EbWAfgg=; b=KPxyjybsdBCpGkkCm982QT6TnBQWKfv+VNpCzBL0Dt12+4wBXkKS0sB0ukrTtbeL2m W/kh+8mvzTyeoRRN6SEli/RSE38lNcRL011B8qkIrVltobRZW8TLiGpfdVTzDMlEYmM8 8/lKyzYCzcqF39NI0JGB1g2JmptrbVgx8RBqSUYgwMt5ZoA1gNf6houQX88/w3a/Mo+M 6BCta2vP7tXxRmfltVQtz/xK08LCuIxYh9q9uw00yBK08b/SZ5QyfgIpQkw6FPOG1Tw1 oWkInZPfLNQKBgAX4L4F40lB7/0wqrBEbt1Inb4ZwpXQ4+Dxk9hUst5y52khFK8delgN qWRw== X-Gm-Message-State: AO0yUKWgJ+u8OmYKXDojAp7vSYFuuH97Ctx/So7j70ybiGsC3/0c9Ygp 6+NhTPeuX2r04fJTa61TyAI= X-Google-Smtp-Source: AK7set+MDLQIHRjSLb5brGnPCVCIAh7+aXOeit8OMTHh4Z0ogtj8/iZA2kquj6wEYzeRrXr2kOJBAQ== X-Received: by 2002:a17:907:3e0e:b0:933:3da2:436 with SMTP id hp14-20020a1709073e0e00b009333da20436mr4510936ejc.54.1679209783409; Sun, 19 Mar 2023 00:09:43 -0700 (PDT) Received: from localhost.localdomain ([2a00:23ee:1938:1bcd:c6e1:42ba:ae87:772e]) by smtp.googlemail.com with ESMTPSA id u8-20020a170906b10800b008c9b44b7851sm2943920ejy.182.2023.03.19.00.09.41 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 19 Mar 2023 00:09:42 -0700 (PDT) From: Lorenzo Stoakes To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Andrew Morton Cc: Baoquan He , Uladzislau Rezki , Matthew Wilcox , David Hildenbrand , Liu Shixin , Jiri Olsa , Lorenzo Stoakes Subject: [PATCH v2 3/4] fs/proc/kcore: convert read_kcore() to read_kcore_iter() Date: Sun, 19 Mar 2023 07:09:32 +0000 Message-Id: <32f8fad50500d0cd0927a66638c5890533725d30.1679209395.git.lstoakes@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Now we have eliminated spinlocks from the vread() case, convert read_kcore() to read_kcore_iter(). For the time being we still use a bounce buffer for vread(), however in the next patch we will convert this to interact directly with the iterator and eliminate the bounce buffer altogether. Signed-off-by: Lorenzo Stoakes --- fs/proc/kcore.c | 58 ++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 556f310d6aa4..25e0eeb8d498 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include @@ -308,9 +308,12 @@ static void append_kcore_note(char *notes, size_t *i, const char *name, } static ssize_t -read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) +read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { + struct file *file = iocb->ki_filp; char *buf = file->private_data; + loff_t *ppos = &iocb->ki_pos; + size_t phdrs_offset, notes_offset, data_offset; size_t page_offline_frozen = 1; size_t phdrs_len, notes_len; @@ -318,6 +321,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) size_t tsz; int nphdr; unsigned long start; + size_t buflen = iov_iter_count(iter); size_t orig_buflen = buflen; int ret = 0; @@ -333,7 +337,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) notes_offset = phdrs_offset + phdrs_len; /* ELF file header. */ - if (buflen && *fpos < sizeof(struct elfhdr)) { + if (buflen && *ppos < sizeof(struct elfhdr)) { struct elfhdr ehdr = { .e_ident = { [EI_MAG0] = ELFMAG0, @@ -355,19 +359,18 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) .e_phnum = nphdr, }; - tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos); - if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) { + tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *ppos); + if (copy_to_iter((char *)&ehdr + *ppos, tsz, iter) != tsz) { ret = -EFAULT; goto out; } - buffer += tsz; buflen -= tsz; - *fpos += tsz; + *ppos += tsz; } /* ELF program headers. */ - if (buflen && *fpos < phdrs_offset + phdrs_len) { + if (buflen && *ppos < phdrs_offset + phdrs_len) { struct elf_phdr *phdrs, *phdr; phdrs = kzalloc(phdrs_len, GFP_KERNEL); @@ -397,22 +400,21 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) phdr++; } - tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos); - if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset, - tsz)) { + tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *ppos); + if (copy_to_iter((char *)phdrs + *ppos - phdrs_offset, tsz, + iter) != tsz) { kfree(phdrs); ret = -EFAULT; goto out; } kfree(phdrs); - buffer += tsz; buflen -= tsz; - *fpos += tsz; + *ppos += tsz; } /* ELF note segment. */ - if (buflen && *fpos < notes_offset + notes_len) { + if (buflen && *ppos < notes_offset + notes_len) { struct elf_prstatus prstatus = {}; struct elf_prpsinfo prpsinfo = { .pr_sname = 'R', @@ -447,24 +449,23 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) vmcoreinfo_data, min(vmcoreinfo_size, notes_len - i)); - tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos); - if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) { + tsz = min_t(size_t, buflen, notes_offset + notes_len - *ppos); + if (copy_to_iter(notes + *ppos - notes_offset, tsz, iter) != tsz) { kfree(notes); ret = -EFAULT; goto out; } kfree(notes); - buffer += tsz; buflen -= tsz; - *fpos += tsz; + *ppos += tsz; } /* * Check to see if our file offset matches with any of * the addresses in the elf_phdr on our list. */ - start = kc_offset_to_vaddr(*fpos - data_offset); + start = kc_offset_to_vaddr(*ppos - data_offset); if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen) tsz = buflen; @@ -497,7 +498,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) } if (!m) { - if (clear_user(buffer, tsz)) { + if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } @@ -508,14 +509,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) case KCORE_VMALLOC: vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ - if (copy_to_user(buffer, buf, tsz)) { + if (copy_to_iter(buf, tsz, iter) != tsz) { ret = -EFAULT; goto out; } break; case KCORE_USER: /* User page is handled prior to normal kernel page: */ - if (copy_to_user(buffer, (char *)start, tsz)) { + if (copy_to_iter((char *)start, tsz, iter) != tsz) { ret = -EFAULT; goto out; } @@ -531,7 +532,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) */ if (!page || PageOffline(page) || is_page_hwpoison(page) || !pfn_is_ram(pfn)) { - if (clear_user(buffer, tsz)) { + if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } @@ -541,25 +542,24 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) case KCORE_VMEMMAP: case KCORE_TEXT: /* - * We use _copy_to_user() to bypass usermode hardening + * We use _copy_to_iter() to bypass usermode hardening * which would otherwise prevent this operation. */ - if (_copy_to_user(buffer, (char *)start, tsz)) { + if (_copy_to_iter((char *)start, tsz, iter) != tsz) { ret = -EFAULT; goto out; } break; default: pr_warn_once("Unhandled KCORE type: %d\n", m->type); - if (clear_user(buffer, tsz)) { + if (iov_iter_zero(tsz, iter) != tsz) { ret = -EFAULT; goto out; } } skip: buflen -= tsz; - *fpos += tsz; - buffer += tsz; + *ppos += tsz; start += tsz; tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen); } @@ -603,7 +603,7 @@ static int release_kcore(struct inode *inode, struct file *file) } static const struct proc_ops kcore_proc_ops = { - .proc_read = read_kcore, + .proc_read_iter = read_kcore_iter, .proc_open = open_kcore, .proc_release = release_kcore, .proc_lseek = default_llseek, From patchwork Sun Mar 19 07:09:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lorenzo Stoakes X-Patchwork-Id: 13180238 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EA874C6FD1F for ; Sun, 19 Mar 2023 07:09:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230152AbjCSHJy (ORCPT ); Sun, 19 Mar 2023 03:09:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42680 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230126AbjCSHJs (ORCPT ); Sun, 19 Mar 2023 03:09:48 -0400 Received: from mail-ed1-x52a.google.com (mail-ed1-x52a.google.com [IPv6:2a00:1450:4864:20::52a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0218820574; Sun, 19 Mar 2023 00:09:45 -0700 (PDT) Received: by mail-ed1-x52a.google.com with SMTP id o12so35399557edb.9; Sun, 19 Mar 2023 00:09:45 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; t=1679209785; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=Ay2I6E7X+s9rgNiMvSqEAwkwcRQ0HQ/eFKvjtZ5gGaM=; b=qg4JfEok2IbeOBbE15vPGb6fxFMwm3osYe4121wl6/wuw3nUKAZTo9eCXEHKEAucps Z3g1QJDJngKzPz7zzLGyFr73SQob7YMOhEAq0N69oguhcAsph5uBh3qo1oul4LtpT3ez UTUQeDn6ShR1299B7Nc3LXa3OOe4b+t+F7SP5So37OcHxQYUOcpRXbyZZ/R7DQicuK5S ze/qeriWyym5iwBvoz9CcNZBjzhaq+dlH9N6msil+nI+DUhYvRQqaW7T2AgmhvFiB5VN 1mKeCZ3GDWhP3w8nHLrKwQElB259N50qABO2XV4NJqOggvgz47KhxPD0BiopioEU1sTB /Uqw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679209785; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Ay2I6E7X+s9rgNiMvSqEAwkwcRQ0HQ/eFKvjtZ5gGaM=; b=OZ56LEts+l9WHqdQYVn5GtU62FuunrtstqbuVyZKrluzbOLnsuNBMINy1Ut1q8Q6Gr WVT+pxBkLKvJV3E/5w7ww//9fiUxQT/498cn+cLOhf7yV47WM4GdUXt5wQkFmclEuRIy 8BVCuAOHc0RcFZrzE+zdhRt6R1zmg9t+WGKL00Gh+GrrEppDwUEu4e1MmH1WAOFfrvWx r6JlQ5N3/bdGx74rHEf7jE3bki4t+zTCfxQh/sIyaS9YFdvscyJKbkXW0F5WHyzZhYCF URlqCIHiJlQJcf7oTQOT9lny6tx9Xk36IRgv2b/mX1Aju+Ji/prgc0AFEDEBq3wNMLNi b9iQ== X-Gm-Message-State: AO0yUKUvMDNfWakkKwjz4zacqRt57NHsXChL5W2qYD66uV/t+JcKLppx UcdPtZQgJmt9YVoGstlyqek= X-Google-Smtp-Source: AK7set+FByhR3QDaaOdxDdO9lHVIRn8pNe8XzlgLkLTytG+TSg8g/xKLS2t/Br17U/XJKeK5a1dHCg== X-Received: by 2002:a17:906:69c9:b0:870:b950:18d4 with SMTP id g9-20020a17090669c900b00870b95018d4mr5272766ejs.5.1679209785207; Sun, 19 Mar 2023 00:09:45 -0700 (PDT) Received: from localhost.localdomain ([2a00:23ee:1938:1bcd:c6e1:42ba:ae87:772e]) by smtp.googlemail.com with ESMTPSA id u8-20020a170906b10800b008c9b44b7851sm2943920ejy.182.2023.03.19.00.09.43 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Sun, 19 Mar 2023 00:09:44 -0700 (PDT) From: Lorenzo Stoakes To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, Andrew Morton Cc: Baoquan He , Uladzislau Rezki , Matthew Wilcox , David Hildenbrand , Liu Shixin , Jiri Olsa , Lorenzo Stoakes Subject: [PATCH v2 4/4] mm: vmalloc: convert vread() to vread_iter() Date: Sun, 19 Mar 2023 07:09:33 +0000 Message-Id: <7f9dad4deade9639cf7af7a8b01143bca882ff02.1679209395.git.lstoakes@gmail.com> X-Mailer: git-send-email 2.39.2 In-Reply-To: References: MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org Having previously laid the foundation for converting vread() to an iterator function, pull the trigger and do so. This patch attempts to provide minimal refactoring and to reflect the existing logic as best we can, with the exception of aligned_vread_iter() which drops the use of the deprecated kmap_atomic() in favour of kmap_local_page(). All existing logic to zero portions of memory not read remain and there should be no functional difference other than a performance improvement in /proc/kcore access to vmalloc regions. Now we have discarded with the need for a bounce buffer at all in read_kcore_iter(), we dispense with the one allocated there altogether. Signed-off-by: Lorenzo Stoakes --- fs/proc/kcore.c | 21 +-------- include/linux/vmalloc.h | 3 +- mm/nommu.c | 10 ++-- mm/vmalloc.c | 101 +++++++++++++++++++++------------------- 4 files changed, 62 insertions(+), 73 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 25e0eeb8d498..a0ed3ca35cce 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -307,13 +307,9 @@ static void append_kcore_note(char *notes, size_t *i, const char *name, *i = ALIGN(*i + descsz, 4); } -static ssize_t -read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) +static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) { - struct file *file = iocb->ki_filp; - char *buf = file->private_data; loff_t *ppos = &iocb->ki_pos; - size_t phdrs_offset, notes_offset, data_offset; size_t page_offline_frozen = 1; size_t phdrs_len, notes_len; @@ -507,9 +503,7 @@ read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) switch (m->type) { case KCORE_VMALLOC: - vread(buf, (char *)start, tsz); - /* we have to zero-fill user buffer even if no read */ - if (copy_to_iter(buf, tsz, iter) != tsz) { + if (vread_iter(iter, (char *)start, tsz) != tsz) { ret = -EFAULT; goto out; } @@ -582,10 +576,6 @@ static int open_kcore(struct inode *inode, struct file *filp) if (ret) return ret; - filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!filp->private_data) - return -ENOMEM; - if (kcore_need_update) kcore_update_ram(); if (i_size_read(inode) != proc_root_kcore->size) { @@ -596,16 +586,9 @@ static int open_kcore(struct inode *inode, struct file *filp) return 0; } -static int release_kcore(struct inode *inode, struct file *file) -{ - kfree(file->private_data); - return 0; -} - static const struct proc_ops kcore_proc_ops = { .proc_read_iter = read_kcore_iter, .proc_open = open_kcore, - .proc_release = release_kcore, .proc_lseek = default_llseek, }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 69250efa03d1..6beb2ace6a7a 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -9,6 +9,7 @@ #include /* pgprot_t */ #include #include +#include #include @@ -251,7 +252,7 @@ static inline void set_vm_flush_reset_perms(void *addr) #endif /* for /proc/kcore */ -extern long vread(char *buf, char *addr, unsigned long count); +extern long vread_iter(struct iov_iter *iter, char *addr, size_t count); /* * Internals. Don't use.. diff --git a/mm/nommu.c b/mm/nommu.c index 57ba243c6a37..e0fcd948096e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -198,14 +199,13 @@ unsigned long vmalloc_to_pfn(const void *addr) } EXPORT_SYMBOL(vmalloc_to_pfn); -long vread(char *buf, char *addr, unsigned long count) +long vread_iter(struct iov_iter *iter, char *addr, size_t count) { /* Don't allow overflow */ - if ((unsigned long) buf + count < count) - count = -(unsigned long) buf; + if ((unsigned long) addr + count < count) + count = -(unsigned long) addr; - memcpy(buf, addr, count); - return count; + return copy_to_iter(addr, count, iter); } /* diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c24b27664a97..f19509a6eef4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -3446,20 +3445,20 @@ EXPORT_SYMBOL(vmalloc_32_user); * small helper routine , copy contents to buf from addr. * If the page is not present, fill zero. */ - -static int aligned_vread(char *buf, char *addr, unsigned long count) +static void aligned_vread_iter(struct iov_iter *iter, + char *addr, size_t count) { - struct page *p; - int copied = 0; + struct page *page; - while (count) { + while (count > 0) { unsigned long offset, length; + size_t copied = 0; offset = offset_in_page(addr); length = PAGE_SIZE - offset; if (length > count) length = count; - p = vmalloc_to_page(addr); + page = vmalloc_to_page(addr); /* * To do safe access to this _mapped_ area, we need * lock. But adding lock here means that we need to add @@ -3467,23 +3466,24 @@ static int aligned_vread(char *buf, char *addr, unsigned long count) * interface, rarely used. Instead of that, we'll use * kmap() and get small overhead in this access function. */ - if (p) { + if (page) { /* We can expect USER0 is not used -- see vread() */ - void *map = kmap_atomic(p); - memcpy(buf, map + offset, length); - kunmap_atomic(map); - } else - memset(buf, 0, length); + void *map = kmap_local_page(page); + + copied = copy_to_iter(map + offset, length, iter); + kunmap_local(map); + } + + if (copied < length) + iov_iter_zero(length - copied, iter); addr += length; - buf += length; - copied += length; count -= length; } - return copied; } -static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags) +static void vmap_ram_vread_iter(struct iov_iter *iter, char *addr, int count, + unsigned long flags) { char *start; struct vmap_block *vb; @@ -3496,7 +3496,7 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags * handle it here. */ if (!(flags & VMAP_BLOCK)) { - aligned_vread(buf, addr, count); + aligned_vread_iter(iter, addr, count); return; } @@ -3517,22 +3517,24 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags if (!count) break; start = vmap_block_vaddr(vb->va->va_start, rs); - while (addr < start) { + + if (addr < start) { + size_t to_zero = min_t(size_t, start - addr, count); + + iov_iter_zero(to_zero, iter); + addr += to_zero; + count -= (int)to_zero; if (count == 0) goto unlock; - *buf = '\0'; - buf++; - addr++; - count--; } + /*it could start reading from the middle of used region*/ offset = offset_in_page(addr); n = ((re - rs + 1) << PAGE_SHIFT) - offset; if (n > count) n = count; - aligned_vread(buf, start+offset, n); + aligned_vread_iter(iter, start + offset, n); - buf += n; addr += n; count -= n; } @@ -3541,15 +3543,15 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags finished: /* zero-fill the left dirty or free regions */ - if (count) - memset(buf, 0, count); + if (count > 0) + iov_iter_zero(count, iter); } /** - * vread() - read vmalloc area in a safe way. - * @buf: buffer for reading data - * @addr: vm address. - * @count: number of bytes to be read. + * vread_iter() - read vmalloc area in a safe way to an iterator. + * @iter: the iterator to which data should be written. + * @addr: vm address. + * @count: number of bytes to be read. * * This function checks that addr is a valid vmalloc'ed area, and * copy data from that area to a given buffer. If the given memory range @@ -3569,13 +3571,13 @@ static void vmap_ram_vread(char *buf, char *addr, int count, unsigned long flags * (same number as @count) or %0 if [addr...addr+count) doesn't * include any intersection with valid vmalloc area */ -long vread(char *buf, char *addr, unsigned long count) +long vread_iter(struct iov_iter *iter, char *addr, size_t count) { struct vmap_area *va; struct vm_struct *vm; - char *vaddr, *buf_start = buf; - unsigned long buflen = count; - unsigned long n, size, flags; + char *vaddr; + size_t buflen = count; + size_t n, size, flags; might_sleep(); @@ -3595,7 +3597,7 @@ long vread(char *buf, char *addr, unsigned long count) goto finished; list_for_each_entry_from(va, &vmap_area_list, list) { - if (!count) + if (count == 0) break; vm = va->vm; @@ -3619,36 +3621,39 @@ long vread(char *buf, char *addr, unsigned long count) if (addr >= vaddr + size) continue; - while (addr < vaddr) { + + if (addr < vaddr) { + size_t to_zero = min_t(size_t, vaddr - addr, count); + + iov_iter_zero(to_zero, iter); + addr += to_zero; + count -= to_zero; if (count == 0) goto finished; - *buf = '\0'; - buf++; - addr++; - count--; } + n = vaddr + size - addr; if (n > count) n = count; if (flags & VMAP_RAM) - vmap_ram_vread(buf, addr, n, flags); + vmap_ram_vread_iter(iter, addr, n, flags); else if (!(vm->flags & VM_IOREMAP)) - aligned_vread(buf, addr, n); + aligned_vread_iter(iter, addr, n); else /* IOREMAP area is treated as memory hole */ - memset(buf, 0, n); - buf += n; + iov_iter_zero(n, iter); + addr += n; count -= n; } finished: up_read(&vmap_area_lock); - if (buf == buf_start) + if (count == buflen) return 0; /* zero-fill memory holes */ - if (buf != buf_start + buflen) - memset(buf, 0, buflen - (buf - buf_start)); + if (count > 0) + iov_iter_zero(count, iter); return buflen; }