From patchwork Thu Apr 21 14:03:38 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 12821690 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from bombadil.infradead.org (bombadil.infradead.org [198.137.202.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 6690DC433EF for ; Thu, 21 Apr 2022 14:05:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=lists.infradead.org; s=bombadil.20210309; h=Sender: Content-Transfer-Encoding:Content-Type:List-Subscribe:List-Help:List-Post: List-Archive:List-Unsubscribe:List-Id:MIME-Version:References:In-Reply-To: Message-Id:Date:Subject:Cc:To:From:Reply-To:Content-ID:Content-Description: Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: List-Owner; bh=hcn8AQ9rfQc9w/n2GREtdvUXaxpYQZAPbdG5TRAtxlM=; b=QrCWFGN9irQxcb 5DPs4Tk9AFNZ3O0N3cb4kzk97EVMT84DhT9beRlGNkayZQpxZ/UN9IxlYmdEbrE74yPYuj31gMsu3 6AEkLvyPWZQFU78EZ8hG6sJpO2CG58NR48BvPvVahalDGcEKBryQgGvQEEDVV0yqVZjpm4LXhLVQ4 gpHpcjH7ypAiHlk5wjN3ZUHvqSWyVKlz39wRTR8ZJuHlEkwLblv78NAAco6i4YQTa4mfGuuhHT8i6 Ikw6us2gcv7hREfOJZ9NgeICWGzDU94EeKVtajoSAL76YgMDb8caaYTVkAjPXF3dtBpGXAche08KM mMyIpbccMj29K3Hxw+1w==; Received: from localhost ([::1] helo=bombadil.infradead.org) by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux)) id 1nhXPg-00Dfaj-VB; Thu, 21 Apr 2022 14:04:05 +0000 Received: from dfw.source.kernel.org ([2604:1380:4641:c500::1]) by bombadil.infradead.org with esmtps (Exim 4.94.2 #2 (Red Hat Linux)) id 1nhXPW-00DfWq-TU for linux-arm-kernel@lists.infradead.org; Thu, 21 Apr 2022 14:03:56 +0000 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 56D3361D60; Thu, 21 Apr 2022 14:03:53 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 31D8AC385AC; Thu, 21 Apr 2022 14:03:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1650549832; bh=UVCjV3GWPRpvAoW7QHeTFw1uLq+FlqrS+VoKJHF8vFE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Ny4/6YL7j82GLH6kg4G2OeinTiEofLR+2DSIl6S24Vmr+X2pnHM0/mPipx4QjA396 /jr/oGe/M2hIFsr28G3WkpAtinN2fZhC2erRWwqSHycCiEFXa4fXzY1TLVlU872EgN ZhGFZAuqBrTD3hNuDDlWrzbIBW1ltK4zHK2ikJ1qoATHa0z/egu9MNZRD5GofJOlaK TbLVSQZaBTD6rxh48hZ3Qg7zBVbKpw0lrby809hfgUdoW/eFgeOpPtO9yU4y5V83EY EtQHnzSJ0sLGXNdFdk1cuLZbERUwsTMworY15hnuC/F/C0CzMFhj8yU8EQhAQFFcCM +JkSeixfFLFgA== From: Ard Biesheuvel To: linux-arm-kernel@lists.infradead.org Cc: Ard Biesheuvel , will@kernel.org, maz@kernel.org, mark.rutland@arm.com, catalin.marinas@arm.com, keescook@chromium.org Subject: [PATCH v3 1/2] arm64: kpti-ng: simplify page table traversal logic Date: Thu, 21 Apr 2022 16:03:38 +0200 Message-Id: <20220421140339.1329019-2-ardb@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220421140339.1329019-1-ardb@kernel.org> References: <20220421140339.1329019-1-ardb@kernel.org> MIME-Version: 1.0 X-Developer-Signature: v=1; a=openpgp-sha256; l=5060; h=from:subject; bh=UVCjV3GWPRpvAoW7QHeTFw1uLq+FlqrS+VoKJHF8vFE=; b=owEB7QES/pANAwAKAcNPIjmS2Y8kAcsmYgBiYWQ5RTCFNSemZJKJrxvSDDt11TmzKHiEPKD0qC4F UQuh3TOJAbMEAAEKAB0WIQT72WJ8QGnJQhU3VynDTyI5ktmPJAUCYmFkOQAKCRDDTyI5ktmPJCj9C/ 9ibqz+CSIz1bDHyj9YFGgcB3kSjg0l4u+a9WQAIPa1GYIg5NvpAuibvaOGy4LZHiq3PNWz3cEj/U6j 5GuC8WN08u21jWg0M0AX3jnAQDhCfrfKXeTbBpFiQOzIcz+PTWyDEdr0zMgdMVJZ32KZY81UVcEdjn f6+X+0cqHkvlFLOAylvDhvaC37ea+M+zmRjc3CJEISeLJRdveEAQBYA8zMjYTGLlUSk6j2TwcvZ74S +J11hnaI2tgveRuclozhwdIwLsPc15gm0hbFV6u9plhiwDlsWA3Zjwjri1jGzG837wFS/aIDY+HpCa u8Wt5sE0ijXpG4RjsrcTsyvLbJ9IniT9SQ5/TN7+K80BmbXYn3d6+tgMGSxARWcpcL0j2WUtFD15uh VG9HQal1+h+goLItn53cus6n0kBW/YxguzEGvUBUjIgwTO9PooxAqbxisPRYeMxWheMUTOzHxzBdlb KmkekCEbgEm6MmciSoiuWmexK0VV1BGodLghmWW1AcNXc= X-Developer-Key: i=ardb@kernel.org; a=openpgp; fpr=F43D03328115A198C90016883D200E9CA6329909 X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 X-CRM114-CacheID: sfid-20220421_070355_106461_976FDD77 X-CRM114-Status: GOOD ( 15.43 ) X-BeenThere: linux-arm-kernel@lists.infradead.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: "linux-arm-kernel" Errors-To: linux-arm-kernel-bounces+linux-arm-kernel=archiver.kernel.org@lists.infradead.org Simplify the KPTI G-to-nG asm helper code by: - pulling the 'table bit' test into the get/put macros so we can combine them and incorporate the entire loop; - moving the 'table bit' test after the update of bit #11 so we no longer need separate next_xxx and skip_xxx labels; - redefining the pmd/pud register aliases and the next_pmd/next_pud labels instead of branching to them if the number of configured page table levels is less than 3 or 4, respectively; - folding the descriptor pointer increment into the LDR instructions. No functional change intended, except for the fact that we now descend into a next level table after setting bit #11 on its descriptor but this should make no difference in practice. While at it, switch to .L prefixed local labels so they don't clutter up the symbol tables, kallsyms, etc, and clean up the indentation for legibility. Signed-off-by: Ard Biesheuvel Reviewed-by: Mark Rutland Tested-by: Mark Rutland --- arch/arm64/mm/proc.S | 97 +++++++------------- 1 file changed, 34 insertions(+), 63 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 50bbed947bec..5619c00f8cd4 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -202,19 +202,24 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 .pushsection ".idmap.text", "awx" - .macro __idmap_kpti_get_pgtable_ent, type + .macro kpti_mk_tbl_ng, type, num_entries + add end_\type\()p, cur_\type\()p, #\num_entries * 8 +.Ldo_\type: dc cvac, cur_\()\type\()p // Ensure any existing dirty dmb sy // lines are written back before - ldr \type, [cur_\()\type\()p] // loading the entry - tbz \type, #0, skip_\()\type // Skip invalid and - tbnz \type, #11, skip_\()\type // non-global entries - .endm - - .macro __idmap_kpti_put_pgtable_ent_ng, type + ldr \type, [cur_\type\()p], #8 // loading the entry + tbz \type, #0, .Lnext_\type // Skip invalid and + tbnz \type, #11, .Lnext_\type // non-global entries orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure + str \type, [cur_\type\()p, #-8] // Update the entry and ensure dmb sy // that it is visible to all dc civac, cur_\()\type\()p // CPUs. + .ifnc \type, pte + tbnz \type, #1, .Lderef_\type + .endif +.Lnext_\type: + cmp cur_\type\()p, end_\type\()p + b.ne .Ldo_\type .endm /* @@ -235,10 +240,8 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) pgd .req x7 cur_pudp .req x8 end_pudp .req x9 - pud .req x10 cur_pmdp .req x11 end_pmdp .req x12 - pmd .req x13 cur_ptep .req x14 end_ptep .req x15 pte .req x16 @@ -265,16 +268,8 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa - add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8) -do_pgd: __idmap_kpti_get_pgtable_ent pgd - tbnz pgd, #1, walk_puds -next_pgd: - __idmap_kpti_put_pgtable_ent_ng pgd -skip_pgd: - add cur_pgdp, cur_pgdp, #8 - cmp cur_pgdp, end_pgdp - b.ne do_pgd + mov cur_pgdp, swapper_pa + kpti_mk_tbl_ng pgd, PTRS_PER_PGD /* Publish the updated tables and nuke all the TLBs */ dsb sy @@ -291,59 +286,35 @@ skip_pgd: str wzr, [flag_ptr] ret +.Lderef_pgd: /* PUD */ -walk_puds: - .if CONFIG_PGTABLE_LEVELS > 3 + .if CONFIG_PGTABLE_LEVELS > 3 + pud .req x10 pte_to_phys cur_pudp, pgd - add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8) -do_pud: __idmap_kpti_get_pgtable_ent pud - tbnz pud, #1, walk_pmds -next_pud: - __idmap_kpti_put_pgtable_ent_ng pud -skip_pud: - add cur_pudp, cur_pudp, 8 - cmp cur_pudp, end_pudp - b.ne do_pud - b next_pgd - .else /* CONFIG_PGTABLE_LEVELS <= 3 */ - mov pud, pgd - b walk_pmds -next_pud: - b next_pgd + kpti_mk_tbl_ng pud, PTRS_PER_PUD + b .Lnext_pgd + .else /* CONFIG_PGTABLE_LEVELS <= 3 */ + pud .req pgd + .set .Lnext_pud, .Lnext_pgd .endif +.Lderef_pud: /* PMD */ -walk_pmds: - .if CONFIG_PGTABLE_LEVELS > 2 + .if CONFIG_PGTABLE_LEVELS > 2 + pmd .req x13 pte_to_phys cur_pmdp, pud - add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8) -do_pmd: __idmap_kpti_get_pgtable_ent pmd - tbnz pmd, #1, walk_ptes -next_pmd: - __idmap_kpti_put_pgtable_ent_ng pmd -skip_pmd: - add cur_pmdp, cur_pmdp, #8 - cmp cur_pmdp, end_pmdp - b.ne do_pmd - b next_pud - .else /* CONFIG_PGTABLE_LEVELS <= 2 */ - mov pmd, pud - b walk_ptes -next_pmd: - b next_pud + kpti_mk_tbl_ng pmd, PTRS_PER_PMD + b .Lnext_pud + .else /* CONFIG_PGTABLE_LEVELS <= 2 */ + pmd .req pgd + .set .Lnext_pmd, .Lnext_pgd .endif +.Lderef_pmd: /* PTE */ -walk_ptes: pte_to_phys cur_ptep, pmd - add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8) -do_pte: __idmap_kpti_get_pgtable_ent pte - __idmap_kpti_put_pgtable_ent_ng pte -skip_pte: - add cur_ptep, cur_ptep, #8 - cmp cur_ptep, end_ptep - b.ne do_pte - b next_pmd + kpti_mk_tbl_ng pte, PTRS_PER_PTE + b .Lnext_pmd .unreq cpu .unreq num_cpus From patchwork Thu Apr 21 14:03:39 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ard Biesheuvel X-Patchwork-Id: 12821689 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from bombadil.infradead.org (bombadil.infradead.org [198.137.202.133]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id E5515C433F5 for ; Thu, 21 Apr 2022 14:05:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=lists.infradead.org; s=bombadil.20210309; h=Sender: Content-Transfer-Encoding:Content-Type:List-Subscribe:List-Help:List-Post: List-Archive:List-Unsubscribe:List-Id:MIME-Version:References:In-Reply-To: Message-Id:Date:Subject:Cc:To:From:Reply-To:Content-ID:Content-Description: Resent-Date:Resent-From:Resent-Sender:Resent-To:Resent-Cc:Resent-Message-ID: List-Owner; bh=QtsOFNBXD2O97L+/rMq2UKlb5vKTvbQCX3psBYEUI9g=; b=jsv+7y8tWdnhRd uKFhOcYag7WSbHZ9FNlzbbTfB7iDrSG1mj/MrCoJIWCrnxyY0eRt4JQQB5YTQPTpkHDitprZAx8fO crDTc2ziyzX6g3ewNGhvwlPr4E04Mu2dEfelfCMkZoNue0WDzJI5sIxzcwx6auEHmJy/sYcXItpbp VmyTaDFFmLbppc1o1IUr7YiwPkGiQ2iW4PmtIrgczpWu0X9J9QBPnLhKP/6LlGOjCkvufQRkiXlRx 7pzZugDXu5JQjtiMIg3CbWlvsO5URM+sKNAZcbuVTCvxV/7zz5nl1mZbYpBxiIUWRdbOWXObI+B5j 6/XkBhnL+86br2pze8Yg==; Received: from localhost ([::1] helo=bombadil.infradead.org) by bombadil.infradead.org with esmtp (Exim 4.94.2 #2 (Red Hat Linux)) id 1nhXPr-00DfeQ-LH; Thu, 21 Apr 2022 14:04:15 +0000 Received: from ams.source.kernel.org ([145.40.68.75]) by bombadil.infradead.org with esmtps (Exim 4.94.2 #2 (Red Hat Linux)) id 1nhXPZ-00DfXx-O8 for linux-arm-kernel@lists.infradead.org; Thu, 21 Apr 2022 14:04:01 +0000 Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id 00016B824BC; Thu, 21 Apr 2022 14:03:55 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 36A9AC385A5; Thu, 21 Apr 2022 14:03:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1650549834; bh=oot1NilvJAbEwQu+EA7qFvymoFgNb9KANbZg0sRyZ0I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LLJkspqH5PHtaGZdDUdmFdImK+L127jAgn/fYi3gbBsymaOPmN6iilzWEDr81eqAZ Q6+zhCH5OT9wmXXSMrZJrxif0Dxh93O/hJs71M9OV2/SYadDISkFi9TMaLdyRymo3o Ga4WfCAtcTP2DoaO1W9FkNA/kTdtN4f+piWqdd+BzYRpZEptVXkYaYvifogPxosKxG WcZa6bO4QGJ7Ym/iktBMbB013Woi50IViYo3fx/Mk3BvLRzb1BbHejwI9OyCvNAc49 wIdXf5zVttgZSl4YSTF7pLqtKhd8FKePwoAAO/b+8p+k9SbtFHAGJrdzU9xAM3e8Q/ JkD5MrJ3NqH5A== From: Ard Biesheuvel To: linux-arm-kernel@lists.infradead.org Cc: Ard Biesheuvel , will@kernel.org, maz@kernel.org, mark.rutland@arm.com, catalin.marinas@arm.com, keescook@chromium.org Subject: [PATCH v3 2/2] arm64: mm: install KPTI nG mappings with MMU enabled Date: Thu, 21 Apr 2022 16:03:39 +0200 Message-Id: <20220421140339.1329019-3-ardb@kernel.org> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20220421140339.1329019-1-ardb@kernel.org> References: <20220421140339.1329019-1-ardb@kernel.org> MIME-Version: 1.0 X-Developer-Signature: v=1; a=openpgp-sha256; l=13128; h=from:subject; bh=oot1NilvJAbEwQu+EA7qFvymoFgNb9KANbZg0sRyZ0I=; b=owEB7QES/pANAwAKAcNPIjmS2Y8kAcsmYgBiYWQ65v0IAX6alpQY6L34E/VSHICUTZcwmfv4UL/s g4eQ1WKJAbMEAAEKAB0WIQT72WJ8QGnJQhU3VynDTyI5ktmPJAUCYmFkOgAKCRDDTyI5ktmPJMwKDA CJzoyUGUdY0xNki9j+buV7ygsdp0SV3HLW86a9bKXBBYU5tMOA/cXg1Xd8h8tTNDCEcXVpuAfmLc1E mkGpNHWkjXqOvnShcNjrO+aHIT3eG0eCHEc8gfGALfP37Z+A2w48t82WaCwFX2jYUeh4n4Y3uzllAJ eqIjD6KTlsvgDR2arMO3uiALTWvMlnvMLYp0lr5WpR7+7ABi+IFhXoHqaNLqDfAJDBjBCnM9Ow9B13 IfrNmpso+T6ggknt1Ks+eWbpkf3NANe4Jt++k5jG4y6YNtneOWzijDdJ/AqYX6bIXIuykcT0/3o9gc b9JEcIZRvuFVqRGzuYKP6LcVOLhpskZpgwKA5j4nzxq0gOEANn9EogBhtGUcf0NAl74ybgfuEW9aI5 bf/rkElFDRUhC48c5TWVDZmi7o3WjJGZEDGbq74j4I0EiJl/Am8BBA4JTrNDtCYgdRYwcFw4u18E9i U352dYqlCKBI7URm+ApJO4gUgOE8HV6SvZepTsbBPzzbc= X-Developer-Key: i=ardb@kernel.org; a=openpgp; fpr=F43D03328115A198C90016883D200E9CA6329909 X-CRM114-Version: 20100106-BlameMichelson ( TRE 0.8.0 (BSD) ) MR-646709E3 X-CRM114-CacheID: sfid-20220421_070358_123798_8D20B17E X-CRM114-Status: GOOD ( 33.05 ) X-BeenThere: linux-arm-kernel@lists.infradead.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: "linux-arm-kernel" Errors-To: linux-arm-kernel-bounces+linux-arm-kernel=archiver.kernel.org@lists.infradead.org In cases where we unmap the kernel while running in user space, we rely on ASIDs to distinguish the minimal trampoline from the full kernel mapping, and this means we must use non-global attributes for those mappings, to ensure they are scoped by ASID and will not hit in the TLB inadvertently. We only do this when needed, as this is generally more costly in terms of TLB pressure, and so we boot without these non-global attributes, and apply them to all existing kernel mappings once all CPUs are up and we know whether or not the non-global attributes are needed. At this point, we cannot simply unmap and remap the entire address space, so we have to update all existing block and page descriptors in place. Currently, we go through a lot of trouble to perform these updates with the MMU and caches off, to avoid violating break before make (BBM) rules imposed by the architecture. Since we make changes to page tables that are not covered by the ID map, we gain access to those descriptors by disabling translations altogether. This means that the stores to memory are issued with device attributes, and require extra care in terms of coherency, which is costly. We also rely on the ID map to access a shared flag, which requires the ID map to be executable and writable at the same time, which is another thing we'd prefer to avoid. So let's switch to an approach where we replace the kernel mapping with a minimal mapping of a few pages that can be used for the shared flag, as well as a minimal, ad-hoc fixmap that we can use to map each page table in turn as we traverse the hierarchy. This requires one PTE per level, and an associated page worth of VA space in the temporary mapping. Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/mmu.h | 4 + arch/arm64/kernel/cpufeature.c | 65 +++++++++++- arch/arm64/mm/mmu.c | 8 +- arch/arm64/mm/proc.S | 107 ++++++++++++-------- 4 files changed, 134 insertions(+), 50 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 48f8466a4be9..b896f0ac4985 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -68,6 +68,10 @@ extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); +extern void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags); extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); extern bool kaslr_requires_kpti(void); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d72c4b4d389c..f0688e812e19 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1596,14 +1596,31 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, } #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) + +static phys_addr_t kpti_ng_temp_alloc; + +static phys_addr_t kpti_ng_pgd_alloc(int shift) +{ + kpti_ng_temp_alloc -= PAGE_SIZE; + return kpti_ng_temp_alloc; +} + static void __nocfi kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { - typedef void (kpti_remap_fn)(int, int, phys_addr_t); + static atomic_t flag = ATOMIC_INIT(0); + static pgd_t *kpti_ng_temp_pgd; + static u64 alloc; + + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; - int cpu = smp_processor_id(); + int levels = CONFIG_PGTABLE_LEVELS; + int order = order_base_2(levels + 1); + int num_cpus = num_online_cpus(); + int primary = 0; if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); @@ -1619,14 +1636,54 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) if (arm64_use_ng_mappings) return; + // First CPU to arrive here gets the job + if (atomic_inc_return(&flag) == 1) { + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); + kpti_ng_temp_pgd = (pgd_t *)(alloc + levels * PAGE_SIZE); + kpti_ng_temp_alloc = __pa(kpti_ng_temp_pgd); + primary = 1; + + // + // Create a minimal page table hierarchy that permits us to + // store a shared variable that secondaries will poll, and to + // map the swapper page tables temporarily as we traverse them. + // + // The physical pages are laid out as follows: + // + // +---------+--------+-/-------+-/------ +-\\--------+ + // : data : PTE[] : | PMD[] : | PUD[] : || PGD[] : + // +---------+--------+-\-------+-\------ +-//--------+ + // ^ ^ + // The first two pages are mapped consecutively into this + // hierarchy at a PMD_SHIFT aligned virtual address, so that we + // have a place to store the shared variable, and so that we + // can manipulate the PTE level entries while the mapping is + // active. The first two entries cover the data page and the + // PTE[] page itself, the remaining entries are free to be used + // as a ad-hoc fixmap. + // + __create_pgd_mapping(kpti_ng_temp_pgd, __pa(alloc), + KPTI_NG_TEMP_VA, 2 * PAGE_SIZE, + PAGE_KERNEL, kpti_ng_pgd_alloc, 0); + + // Increment flag again to signal other CPUs to proceed as well + atomic_inc_return_release(&flag); + } else { + // Wait for the primary CPU to set up the temporary page tables + while (atomic_read(&flag) <= num_cpus) + cpu_relax(); + } remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings)); cpu_install_idmap(); - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); + remap_fn(!primary, num_cpus - 1, __pa(kpti_ng_temp_pgd), KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); - if (!cpu) + // Last CPU to leave frees the pages + if (atomic_dec_return(&flag) == 1) { + free_pages(alloc, order); arm64_use_ng_mappings = true; + } } #else static void diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 626ec32873c6..1c7299dfaa84 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -360,11 +360,9 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, mutex_unlock(&fixmap_lock); } -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - pgprot_t prot, - phys_addr_t (*pgtable_alloc)(int), - int flags) +void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + phys_addr_t (*pgtable_alloc)(int), int flags) { unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_pgd(pgdir, virt); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5619c00f8cd4..20d726207db5 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -200,20 +201,21 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) + .pushsection ".idmap.text", "awx" .macro kpti_mk_tbl_ng, type, num_entries add end_\type\()p, cur_\type\()p, #\num_entries * 8 .Ldo_\type: - dc cvac, cur_\()\type\()p // Ensure any existing dirty - dmb sy // lines are written back before - ldr \type, [cur_\type\()p], #8 // loading the entry - tbz \type, #0, .Lnext_\type // Skip invalid and - tbnz \type, #11, .Lnext_\type // non-global entries - orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\type\()p, #-8] // Update the entry and ensure - dmb sy // that it is visible to all - dc civac, cur_\()\type\()p // CPUs. + ldr \type, [cur_\type\()p], #8 // Load the entry + .ifnc \type, pte + tbnz \type, #11, .Lnext_\type // Skip visited entries + .endif + and valid, \type, #1 + orr \type, \type, valid, lsl #11 // nG |= valid + str \type, [cur_\type\()p, #-8] // Update the entry .ifnc \type, pte tbnz \type, #1, .Lderef_\type .endif @@ -222,19 +224,42 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) b.ne .Ldo_\type .endm + /* + * Dereference the current table entry and map it into the temporary + * fixmap slot associated with the current level. The ad-hoc fixmap + * is a set of PTEs that are located above the PTEs that cover the + * level 3 page table and the scratch page that precedes it. + */ + .macro kpti_map_pgtbl, type, level + str xzr, [temp_pte, #8 * (\level + 2)] // break before make + dsb ishst + add pte, flag_ptr, #PAGE_SIZE * (\level + 2) + lsr pte, pte, #12 + tlbi vaae1is, pte + dsb ish + isb + + phys_to_pte pte, cur_\type\()p + add cur_\type\()p, flag_ptr, #PAGE_SIZE * (\level + 2) + orr pte, pte, pte_flags + str pte, [temp_pte, #8 * (\level + 2)] + dsb ishst + .endm + /* - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper) + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd, + * unsigned long kpti_ng_temp_va) * * Called exactly once from stop_machine context by each CPU found during boot. */ -__idmap_kpti_flag: - .long 1 SYM_FUNC_START(idmap_kpti_install_ng_mappings) - cpu .req w0 + cpu .req w0 // at entry + pte_flags .req x0 num_cpus .req w1 - swapper_pa .req x2 - swapper_ttb .req x3 - flag_ptr .req x4 + temp_pgd_phys .req x2 // at entry + temp_pte .req x2 + flag_ptr .req x3 + swapper_ttb .req x4 cur_pgdp .req x5 end_pgdp .req x6 pgd .req x7 @@ -245,10 +270,15 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) cur_ptep .req x14 end_ptep .req x15 pte .req x16 + valid .req x17 mrs swapper_ttb, ttbr1_el1 - restore_ttbr1 swapper_ttb - adr flag_ptr, __idmap_kpti_flag + + /* Uninstall swapper before surgery begins */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + offset_ttbr1 temp_pgd_phys, x8 + msr ttbr1_el1, temp_pgd_phys + isb cbnz cpu, __idmap_kpti_secondary @@ -259,31 +289,24 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) eor w17, w17, num_cpus cbnz w17, 1b - /* We need to walk swapper, so turn off the MMU. */ - pre_disable_mmu_workaround - mrs x17, sctlr_el1 - bic x17, x17, #SCTLR_ELx_M - msr sctlr_el1, x17 - isb + mov pte_flags, #KPTI_NG_PTE_FLAGS + + /* Advance temp_pte to the fixmap page */ + add temp_pte, flag_ptr, #PAGE_SIZE /* Everybody is enjoying the idmap, so we can rewrite swapper. */ /* PGD */ - mov cur_pgdp, swapper_pa + adrp cur_pgdp, swapper_pg_dir + kpti_map_pgtbl pgd, 0 kpti_mk_tbl_ng pgd, PTRS_PER_PGD - /* Publish the updated tables and nuke all the TLBs */ - dsb sy - tlbi vmalle1is - dsb ish - isb - - /* We're done: fire up the MMU again */ - mrs x17, sctlr_el1 - orr x17, x17, #SCTLR_ELx_M - set_sctlr_el1 x17 - /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] + + /* We're done: fire up swapper again */ + __idmap_cpu_set_reserved_ttbr1 x8, x9 + msr ttbr1_el1, swapper_ttb + isb ret .Lderef_pgd: @@ -291,6 +314,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) .if CONFIG_PGTABLE_LEVELS > 3 pud .req x10 pte_to_phys cur_pudp, pgd + kpti_map_pgtbl pud, 1 kpti_mk_tbl_ng pud, PTRS_PER_PUD b .Lnext_pgd .else /* CONFIG_PGTABLE_LEVELS <= 3 */ @@ -303,6 +327,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) .if CONFIG_PGTABLE_LEVELS > 2 pmd .req x13 pte_to_phys cur_pmdp, pud + kpti_map_pgtbl pmd, 2 kpti_mk_tbl_ng pmd, PTRS_PER_PMD b .Lnext_pud .else /* CONFIG_PGTABLE_LEVELS <= 2 */ @@ -313,12 +338,14 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) .Lderef_pmd: /* PTE */ pte_to_phys cur_ptep, pmd + kpti_map_pgtbl pte, 3 kpti_mk_tbl_ng pte, PTRS_PER_PTE b .Lnext_pmd .unreq cpu + .unreq pte_flags .unreq num_cpus - .unreq swapper_pa + .unreq temp_pgd_phys .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -331,12 +358,10 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings) .unreq cur_ptep .unreq end_ptep .unreq pte + .unreq valid /* Secondary CPUs end up here */ __idmap_kpti_secondary: - /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x16, x17 - /* Increment the flag to let the boot CPU we're ready */ 1: ldxr w16, [flag_ptr] add w16, w16, #1 @@ -350,7 +375,7 @@ __idmap_kpti_secondary: cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb, x16 + __idmap_cpu_set_reserved_ttbr1 x8, x9 msr ttbr1_el1, swapper_ttb isb ret