From patchwork Mon May 16 05:40:47 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Song Liu X-Patchwork-Id: 12850264 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 286EEC433F5 for ; Mon, 16 May 2022 05:41:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239806AbiEPFlT convert rfc822-to-8bit (ORCPT ); Mon, 16 May 2022 01:41:19 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56694 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239546AbiEPFlH (ORCPT ); Mon, 16 May 2022 01:41:07 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 77FBFDF48 for ; Sun, 15 May 2022 22:41:06 -0700 (PDT) Received: from pps.filterd (m0109333.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 24G0w9Sp014122 for ; Sun, 15 May 2022 22:41:06 -0700 Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3g28fk7gj2-2 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Sun, 15 May 2022 22:41:06 -0700 Received: from twshared19572.14.frc2.facebook.com (2620:10d:c085:208::f) by mail.thefacebook.com (2620:10d:c085:21d::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Sun, 15 May 2022 22:41:05 -0700 Received: by devbig932.frc1.facebook.com (Postfix, from userid 4523) id E7C627AEBC12; Sun, 15 May 2022 22:40:57 -0700 (PDT) From: Song Liu To: , CC: , , , , , , , Song Liu Subject: [PATCH bpf-next 1/5] bpf: fill new bpf_prog_pack with illegal instructions Date: Sun, 15 May 2022 22:40:47 -0700 Message-ID: <20220516054051.114490-2-song@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220516054051.114490-1-song@kernel.org> References: <20220516054051.114490-1-song@kernel.org> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-ORIG-GUID: z7UVKQR8g4s89WhJDdEBDHA3SWEFoqbM X-Proofpoint-GUID: z7UVKQR8g4s89WhJDdEBDHA3SWEFoqbM X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.858,Hydra:6.0.486,FMLib:17.11.64.514 definitions=2022-05-15_11,2022-05-13_01,2022-02-23_01 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net bpf_prog_pack enables sharing huge pages among multiple BPF programs. These pages are marked as executable before the JIT engine fill it with BPF programs. To make these pages safe, fill the hole bpf_prog_pack with illegal instructions before making it executable. Fixes: 57631054fae6 ("bpf: Introduce bpf_prog_pack allocator") Fixes: 33c9805860e5 ("bpf: Introduce bpf_jit_binary_pack_[alloc|finalize|free]") Signed-off-by: Song Liu --- kernel/bpf/core.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9cc91f0f3115..2d0c9d4696ad 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -873,7 +873,7 @@ static size_t select_bpf_prog_pack_size(void) return size; } -static struct bpf_prog_pack *alloc_new_pack(void) +static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; @@ -886,6 +886,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) kfree(pack); return NULL; } + bpf_fill_ill_insns(pack->ptr, bpf_prog_pack_size); bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); list_add_tail(&pack->list, &pack_list); @@ -895,7 +896,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) return pack; } -static void *bpf_prog_pack_alloc(u32 size) +static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; @@ -910,6 +911,7 @@ static void *bpf_prog_pack_alloc(u32 size) size = round_up(size, PAGE_SIZE); ptr = module_alloc(size); if (ptr) { + bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); set_memory_x((unsigned long)ptr, size / PAGE_SIZE); @@ -923,7 +925,7 @@ static void *bpf_prog_pack_alloc(u32 size) goto found_free_area; } - pack = alloc_new_pack(); + pack = alloc_new_pack(bpf_fill_ill_insns); if (!pack) goto out; @@ -1102,7 +1104,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, if (bpf_jit_charge_modmem(size)) return NULL; - ro_header = bpf_prog_pack_alloc(size); + ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; From patchwork Mon May 16 05:40:48 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Song Liu X-Patchwork-Id: 12850268 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BA6F7C4332F for ; Mon, 16 May 2022 05:41:27 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239113AbiEPFlZ convert rfc822-to-8bit (ORCPT ); Mon, 16 May 2022 01:41:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56952 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239767AbiEPFlP (ORCPT ); Mon, 16 May 2022 01:41:15 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id EB2A3DF48 for ; Sun, 15 May 2022 22:41:13 -0700 (PDT) Received: from pps.filterd (m0044010.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 24FBxMHI027826 for ; Sun, 15 May 2022 22:41:13 -0700 Received: from mail.thefacebook.com ([163.114.132.120]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3g27rnqmbs-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Sun, 15 May 2022 22:41:13 -0700 Received: from twshared24024.25.frc3.facebook.com (2620:10d:c085:208::11) by mail.thefacebook.com (2620:10d:c085:11d::7) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Sun, 15 May 2022 22:41:12 -0700 Received: by devbig932.frc1.facebook.com (Postfix, from userid 4523) id DF9B77AEBC5A; Sun, 15 May 2022 22:40:59 -0700 (PDT) From: Song Liu To: , CC: , , , , , , , Song Liu Subject: [PATCH bpf-next 2/5] x86/alternative: introduce text_poke_set Date: Sun, 15 May 2022 22:40:48 -0700 Message-ID: <20220516054051.114490-3-song@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220516054051.114490-1-song@kernel.org> References: <20220516054051.114490-1-song@kernel.org> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-ORIG-GUID: BvaePzEhzZ6hB-9x6LP_x6w-oCbD1Jn7 X-Proofpoint-GUID: BvaePzEhzZ6hB-9x6LP_x6w-oCbD1Jn7 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.858,Hydra:6.0.486,FMLib:17.11.64.514 definitions=2022-05-15_11,2022-05-13_01,2022-02-23_01 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net Introduce a memset like API for text_poke. This will be used to fill the unused RX memory with illegal instructions. Suggested-by: Peter Zijlstra Signed-off-by: Song Liu Acked-by: Peter Zijlstra (Intel) --- arch/x86/include/asm/text-patching.h | 1 + arch/x86/kernel/alternative.c | 70 ++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index d20ab0921480..1cc15528ce29 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index d374cb3cf024..732814065389 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -994,7 +994,21 @@ static inline void unuse_temporary_mm(temp_mm_state_t prev_state) __ro_after_init struct mm_struct *poking_mm; __ro_after_init unsigned long poking_addr; -static void *__text_poke(void *addr, const void *opcode, size_t len) +static void text_poke_memcpy(void *dst, const void *src, size_t len) +{ + memcpy(dst, src, len); +} + +static void text_poke_memset(void *dst, const void *src, size_t len) +{ + int c = *(int *)src; + + memset(dst, c, len); +} + +typedef void text_poke_f(void *dst, const void *src, size_t len); + +static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t len) { bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE; struct page *pages[2] = {NULL}; @@ -1059,7 +1073,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) prev = use_temporary_mm(poking_mm); kasan_disable_current(); - memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len); + func((u8 *)poking_addr + offset_in_page(addr), src, len); kasan_enable_current(); /* @@ -1087,11 +1101,13 @@ static void *__text_poke(void *addr, const void *opcode, size_t len) (cross_page_boundary ? 2 : 1) * PAGE_SIZE, PAGE_SHIFT, false); - /* - * If the text does not match what we just wrote then something is - * fundamentally screwy; there's nothing we can really do about that. - */ - BUG_ON(memcmp(addr, opcode, len)); + if (func == text_poke_memcpy) { + /* + * If the text does not match what we just wrote then something is + * fundamentally screwy; there's nothing we can really do about that. + */ + BUG_ON(memcmp(addr, src, len)); + } local_irq_restore(flags); pte_unmap_unlock(ptep, ptl); @@ -1118,7 +1134,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) { lockdep_assert_held(&text_mutex); - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1137,7 +1153,7 @@ void *text_poke(void *addr, const void *opcode, size_t len) */ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) { - return __text_poke(addr, opcode, len); + return __text_poke(text_poke_memcpy, addr, opcode, len); } /** @@ -1167,7 +1183,41 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len) s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); - __text_poke((void *)ptr, opcode + patched, s); + __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); + patched += s; + } + mutex_unlock(&text_mutex); + return addr; +} + +/** + * text_poke_set - memset into (an unused part of) RX memory + * @addr: address to modify + * @c: the byte to fill the area with + * @len: length to copy, could be more than 2x PAGE_SIZE + * + * Not safe against concurrent execution; useful for JITs to dump + * new code blocks into unused regions of RX memory. Can be used in + * conjunction with synchronize_rcu_tasks() to wait for existing + * execution to quiesce after having made sure no existing functions + * pointers are live. + */ +void *text_poke_set(void *addr, int c, size_t len) +{ + unsigned long start = (unsigned long)addr; + size_t patched = 0; + + if (WARN_ON_ONCE(core_kernel_text(start))) + return NULL; + + mutex_lock(&text_mutex); + while (patched < len) { + unsigned long ptr = start + patched; + size_t s; + + s = min_t(size_t, PAGE_SIZE * 2 - offset_in_page(ptr), len - patched); + + __text_poke(text_poke_memset, (void *)ptr, (void *)&c, s); patched += s; } mutex_unlock(&text_mutex); From patchwork Mon May 16 05:40:49 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Song Liu X-Patchwork-Id: 12850267 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 31C30C433FE for ; Mon, 16 May 2022 05:41:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S238765AbiEPFlY convert rfc822-to-8bit (ORCPT ); Mon, 16 May 2022 01:41:24 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56868 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239740AbiEPFlM (ORCPT ); Mon, 16 May 2022 01:41:12 -0400 Received: from mx0b-00082601.pphosted.com (mx0b-00082601.pphosted.com [67.231.153.30]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BC4AADF6F for ; Sun, 15 May 2022 22:41:11 -0700 (PDT) Received: from pps.filterd (m0148460.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 24FLaUZF008181 for ; Sun, 15 May 2022 22:41:10 -0700 Received: from maileast.thefacebook.com ([163.114.130.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3g29xxf5eb-2 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Sun, 15 May 2022 22:41:10 -0700 Received: from twshared11660.23.frc3.facebook.com (2620:10d:c0a8:1b::d) by mail.thefacebook.com (2620:10d:c0a8:83::5) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Sun, 15 May 2022 22:41:09 -0700 Received: by devbig932.frc1.facebook.com (Postfix, from userid 4523) id 595B17AEBC8C; Sun, 15 May 2022 22:41:01 -0700 (PDT) From: Song Liu To: , CC: , , , , , , , Song Liu Subject: [PATCH bpf-next 3/5] bpf: introduce bpf_arch_text_invalidate for bpf_prog_pack Date: Sun, 15 May 2022 22:40:49 -0700 Message-ID: <20220516054051.114490-4-song@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220516054051.114490-1-song@kernel.org> References: <20220516054051.114490-1-song@kernel.org> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-GUID: N-qY4FD60K72PxLuweWmo9a2UR2Wvd67 X-Proofpoint-ORIG-GUID: N-qY4FD60K72PxLuweWmo9a2UR2Wvd67 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.858,Hydra:6.0.486,FMLib:17.11.64.514 definitions=2022-05-15_11,2022-05-13_01,2022-02-23_01 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net Introduce bpf_arch_text_invalidate and use it to fill unused part of the bpf_prog_pack with illegal instructions when a BPF program is freed. Signed-off-by: Song Liu --- arch/x86/net/bpf_jit_comp.c | 5 +++++ include/linux/bpf.h | 1 + kernel/bpf/core.c | 8 ++++++++ 3 files changed, 14 insertions(+) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a2b6d197c226..f298b18a9a3d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -228,6 +228,11 @@ static void jit_fill_hole(void *area, unsigned int size) memset(area, 0xcc, size); } +int bpf_arch_text_invalidate(void *dst, size_t len) +{ + return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len)); +} + struct jit_context { int cleanup_addr; /* Epilogue code offset */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5061ccd8b2dc..0288a6464236 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2362,6 +2362,7 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); void *bpf_arch_text_copy(void *dst, void *src, size_t len); +int bpf_arch_text_invalidate(void *dst, size_t len); struct btf_id_set; bool btf_id_set_contains(const struct btf_id_set *set, u32 id); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2d0c9d4696ad..cacd8684c3c4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -968,6 +968,9 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size); pos = ((unsigned long)hdr - (unsigned long)pack_ptr) >> BPF_PROG_CHUNK_SHIFT; + WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size), + "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); + bitmap_clear(pack->bitmap, pos, nbits); if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, bpf_prog_chunk_count(), 0) == 0) { @@ -2740,6 +2743,11 @@ void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-ENOTSUPP); } +int __weak bpf_arch_text_invalidate(void *dst, size_t len) +{ + return -ENOTSUPP; +} + DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); From patchwork Mon May 16 05:40:50 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Song Liu X-Patchwork-Id: 12850269 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 70112C433EF for ; Mon, 16 May 2022 05:43:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239844AbiEPFnV convert rfc822-to-8bit (ORCPT ); Mon, 16 May 2022 01:43:21 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33146 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239802AbiEPFnQ (ORCPT ); Mon, 16 May 2022 01:43:16 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 98213DF7E for ; Sun, 15 May 2022 22:43:15 -0700 (PDT) Received: from pps.filterd (m0044010.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 24G0ovfp000453 for ; Sun, 15 May 2022 22:43:15 -0700 Received: from maileast.thefacebook.com ([163.114.130.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3g27rnqmk8-2 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Sun, 15 May 2022 22:43:15 -0700 Received: from twshared0725.22.frc3.facebook.com (2620:10d:c0a8:1b::d) by mail.thefacebook.com (2620:10d:c0a8:82::d) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Sun, 15 May 2022 22:43:13 -0700 Received: by devbig932.frc1.facebook.com (Postfix, from userid 4523) id D778C7AEBC95; Sun, 15 May 2022 22:41:02 -0700 (PDT) From: Song Liu To: , CC: , , , , , , , Song Liu , Stephen Rothwell Subject: [PATCH bpf-next 4/5] module: introduce module_alloc_huge Date: Sun, 15 May 2022 22:40:50 -0700 Message-ID: <20220516054051.114490-5-song@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220516054051.114490-1-song@kernel.org> References: <20220516054051.114490-1-song@kernel.org> MIME-Version: 1.0 X-FB-Internal: Safe X-Proofpoint-ORIG-GUID: AYHK3JZu7BfgfKLxDx6oPiTj_CHIHVin X-Proofpoint-GUID: AYHK3JZu7BfgfKLxDx6oPiTj_CHIHVin X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.858,Hydra:6.0.486,FMLib:17.11.64.514 definitions=2022-05-15_11,2022-05-13_01,2022-02-23_01 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net Introduce module_alloc_huge, which allocates huge page backed memory in module memory space. The primary user of this memory is bpf_prog_pack (multiple BPF programs sharing a huge page). Cc: Stephen Rothwell Cc: Luis Chamberlain Cc: Linus Torvalds Signed-off-by: Song Liu --- Note: This conflicts with the module.c => module/ split in modules-next. Current patch is for module.c in the bpf-next tree. After the split, __weak module_alloc_huge() should be added to kernel/module/main.c. --- arch/x86/kernel/module.c | 21 +++++++++++++++++++++ include/linux/moduleloader.h | 5 +++++ kernel/module.c | 8 ++++++++ 3 files changed, 34 insertions(+) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index b98ffcf4d250..63f6a16c70dc 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -86,6 +86,27 @@ void *module_alloc(unsigned long size) return p; } +void *module_alloc_huge(unsigned long size) +{ + gfp_t gfp_mask = GFP_KERNEL; + void *p; + + if (PAGE_ALIGN(size) > MODULES_LEN) + return NULL; + + p = __vmalloc_node_range(size, MODULE_ALIGN, + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_DEFER_KMEMLEAK | VM_ALLOW_HUGE_VMAP, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { + vfree(p); + return NULL; + } + + return p; +} + #ifdef CONFIG_X86_32 int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 9e09d11ffe5b..d34743a88938 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -26,6 +26,11 @@ unsigned int arch_mod_section_prepend(struct module *mod, unsigned int section); sections. Returns NULL on failure. */ void *module_alloc(unsigned long size); +/* Allocator used for allocating memory in module memory space. If size is + * greater than PMD_SIZE, allow using huge pages. Returns NULL on failure. + */ +void *module_alloc_huge(unsigned long size); + /* Free memory returned from module_alloc. */ void module_memfree(void *module_region); diff --git a/kernel/module.c b/kernel/module.c index 6cea788fd965..2af20ac3209c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2839,6 +2839,14 @@ void * __weak module_alloc(unsigned long size) NUMA_NO_NODE, __builtin_return_address(0)); } +void * __weak module_alloc_huge(unsigned long size) +{ + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, + VM_FLUSH_RESET_PERMS | VM_ALLOW_HUGE_VMAP, + NUMA_NO_NODE, __builtin_return_address(0)); +} + bool __weak module_init_section(const char *name) { return strstarts(name, ".init"); From patchwork Mon May 16 05:40:51 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Song Liu X-Patchwork-Id: 12850266 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 2092FC433F5 for ; Mon, 16 May 2022 05:41:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232101AbiEPFlX convert rfc822-to-8bit (ORCPT ); Mon, 16 May 2022 01:41:23 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56828 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239674AbiEPFlL (ORCPT ); Mon, 16 May 2022 01:41:11 -0400 Received: from mx0a-00082601.pphosted.com (mx0a-00082601.pphosted.com [67.231.145.42]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 02030DF48 for ; Sun, 15 May 2022 22:41:10 -0700 (PDT) Received: from pps.filterd (m0109334.ppops.net [127.0.0.1]) by mx0a-00082601.pphosted.com (8.17.1.5/8.17.1.5) with ESMTP id 24G1XTjx002200 for ; Sun, 15 May 2022 22:41:10 -0700 Received: from maileast.thefacebook.com ([163.114.130.16]) by mx0a-00082601.pphosted.com (PPS) with ESMTPS id 3g2br3xwwn-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128 verify=NOT) for ; Sun, 15 May 2022 22:41:10 -0700 Received: from twshared8307.18.frc3.facebook.com (2620:10d:c0a8:1b::d) by mail.thefacebook.com (2620:10d:c0a8:82::d) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256) id 15.1.2375.24; Sun, 15 May 2022 22:41:09 -0700 Received: by devbig932.frc1.facebook.com (Postfix, from userid 4523) id D36787AEBCA2; Sun, 15 May 2022 22:41:04 -0700 (PDT) From: Song Liu To: , CC: , , , , , , , Song Liu Subject: [PATCH bpf-next 5/5] bpf: use module_alloc_huge for bpf_prog_pack Date: Sun, 15 May 2022 22:40:51 -0700 Message-ID: <20220516054051.114490-6-song@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220516054051.114490-1-song@kernel.org> References: <20220516054051.114490-1-song@kernel.org> X-FB-Internal: Safe X-Proofpoint-GUID: bjMJUkHUo0EaVh2k4_rP4fwDZr14bkIT X-Proofpoint-ORIG-GUID: bjMJUkHUo0EaVh2k4_rP4fwDZr14bkIT X-Proofpoint-UnRewURL: 0 URL was un-rewritten MIME-Version: 1.0 X-Proofpoint-Virus-Version: vendor=baseguard engine=ICAP:2.0.205,Aquarius:18.0.858,Hydra:6.0.486,FMLib:17.11.64.514 definitions=2022-05-15_11,2022-05-13_01,2022-02-23_01 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: bpf@iogearbox.net Use module_alloc_huge for bpf_prog_pack so that BPF programs sit on PMD_SIZE pages. This benefits system performance by reducing iTLB miss rate. Benchmark of a real web service workload shows this change gives another ~0.2% performance boost on top of PAGE_SIZE bpf_prog_pack (which improve system throughput by ~0.5%). Also, remove set_vm_flush_reset_perms() from alloc_new_pack() and use set_memory_[nx|rw] in bpf_prog_pack_free(). This is because VM_FLUSH_RESET_PERMS does not work with huge pages yet. [1] [1] https://lore.kernel.org/bpf/aeeeaf0b7ec63fdba55d4834d2f524d8bf05b71b.camel@intel.com/ Suggested-by: Rick Edgecombe Signed-off-by: Song Liu --- kernel/bpf/core.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index cacd8684c3c4..b64d91fcb0ba 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -857,7 +857,7 @@ static size_t select_bpf_prog_pack_size(void) void *ptr; size = BPF_HPAGE_SIZE * num_online_nodes(); - ptr = module_alloc(size); + ptr = module_alloc_huge(size); /* Test whether we can get huge pages. If not just use PAGE_SIZE * packs. @@ -881,7 +881,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins GFP_KERNEL); if (!pack) return NULL; - pack->ptr = module_alloc(bpf_prog_pack_size); + pack->ptr = module_alloc_huge(bpf_prog_pack_size); if (!pack->ptr) { kfree(pack); return NULL; @@ -890,7 +890,6 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins bitmap_zero(pack->bitmap, bpf_prog_pack_size / BPF_PROG_CHUNK_SIZE); list_add_tail(&pack->list, &pack_list); - set_vm_flush_reset_perms(pack->ptr); set_memory_ro((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); set_memory_x((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); return pack; @@ -909,10 +908,9 @@ static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insn if (size > bpf_prog_pack_size) { size = round_up(size, PAGE_SIZE); - ptr = module_alloc(size); + ptr = module_alloc_huge(size); if (ptr) { bpf_fill_ill_insns(ptr, size); - set_vm_flush_reset_perms(ptr); set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); set_memory_x((unsigned long)ptr, size / PAGE_SIZE); } @@ -949,6 +947,8 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) mutex_lock(&pack_mutex); if (hdr->size > bpf_prog_pack_size) { + set_memory_nx((unsigned long)hdr, hdr->size / PAGE_SIZE); + set_memory_rw((unsigned long)hdr, hdr->size / PAGE_SIZE); module_memfree(hdr); goto out; } @@ -975,6 +975,8 @@ static void bpf_prog_pack_free(struct bpf_binary_header *hdr) if (bitmap_find_next_zero_area(pack->bitmap, bpf_prog_chunk_count(), 0, bpf_prog_chunk_count(), 0) == 0) { list_del(&pack->list); + set_memory_nx((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); + set_memory_rw((unsigned long)pack->ptr, bpf_prog_pack_size / PAGE_SIZE); module_memfree(pack->ptr); kfree(pack); }