diff mbox series

mm/mmap: Define index macros for protection_map[]

Message ID 1632712920-8171-1-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New
Headers show
Series mm/mmap: Define index macros for protection_map[] | expand

Commit Message

Anshuman Khandual Sept. 27, 2021, 3:22 a.m. UTC
protection_map[] maps the lower four bits from vm_flags into platform page
protection mask. Default initialization (and possible re-initialization in
the platform) does not make it clear that these indices are just derived
from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC).
This defines macros for protection_map[] indices which concatenate various
vm_flag attributes, making it clear and explicit.

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mips@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
This applies on v5.15-rc3 after the following patch.

https://lore.kernel.org/all/20210924060821.1138281-1-guoren@kernel.org/

 arch/mips/mm/cache.c    | 34 +++++++++++++++++-----------------
 arch/sparc/mm/init_64.c | 32 ++++++++++++++++----------------
 include/linux/mm.h      | 31 +++++++++++++++++++++++++++++++
 mm/debug_vm_pgtable.c   |  8 ++++----
 mm/mmap.c               | 18 ++++++++++++++++--
 5 files changed, 84 insertions(+), 39 deletions(-)

Comments

Christoph Hellwig Sept. 27, 2021, 2:58 p.m. UTC | #1
On Mon, Sep 27, 2021 at 08:52:00AM +0530, Anshuman Khandual wrote:
> protection_map[] maps the lower four bits from vm_flags into platform page
> protection mask. Default initialization (and possible re-initialization in
> the platform) does not make it clear that these indices are just derived
> from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC).
> This defines macros for protection_map[] indices which concatenate various
> vm_flag attributes, making it clear and explicit.

I dont think this is all that helpful.  The main issue here is that
protection_map is a pointless obsfucation ad should be replaced with a
simple switch statement provided by each architecture.  See the below
WIP which just works for x86 and without pagetable debugging for where I
think we should be going.

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ab83c22d274e7..70d8ae60a416f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -74,8 +74,8 @@ config X86
 	select ARCH_HAS_EARLY_DEBUG		if KGDB
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
-	select ARCH_HAS_FILTER_PGPROT
 	select ARCH_HAS_FORTIFY_SOURCE
+	select ARCH_HAS_GET_PAGE_PROT
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64 && STACK_VALIDATION
 	select ARCH_HAS_MEM_ENCRYPT
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ecb..a0cc70b056385 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -646,11 +646,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
-static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
-{
-	return canon_pgprot(prot);
-}
-
 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
 					 enum page_cache_mode pcm,
 					 enum page_cache_mode new_pcm)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 40497a9020c6e..1a9dd933088e6 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -228,25 +228,6 @@ enum page_cache_mode {
 
 #endif	/* __ASSEMBLY__ */
 
-/*         xwr */
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_EXEC
-#define __P101	PAGE_READONLY_EXEC
-#define __P110	PAGE_COPY_EXEC
-#define __P111	PAGE_COPY_EXEC
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_EXEC
-#define __S101	PAGE_READONLY_EXEC
-#define __S110	PAGE_SHARED_EXEC
-#define __S111	PAGE_SHARED_EXEC
-
 /*
  * early identity mapping  pte attrib macros.
  */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index d4a8d0424bfbf..775dbd3aff736 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,20 +5,6 @@
 #define MAP_32BIT	0x40		/* only give out 32bit addresses */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-/*
- * Take the 4 protection key bits out of the vma->vm_flags
- * value and turn them in to the bits that we can put in
- * to a pte.
- *
- * Only override these if Protection Keys are available
- * (which is only on 64-bit).
- */
-#define arch_vm_get_page_prot(vm_flags)	__pgprot(	\
-		((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) |	\
-		((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
-
 #define arch_calc_vm_prot_bits(prot, key) (		\
 		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
 		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 5864219221ca8..b44806c5b3de8 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -16,8 +16,10 @@ CFLAGS_REMOVE_mem_encrypt.o		= -pg
 CFLAGS_REMOVE_mem_encrypt_identity.o	= -pg
 endif
 
-obj-y				:=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
-				    pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
+obj-y				:=  init.o init_$(BITS).o fault.o ioremap.o \
+				    extable.o mmap.o pgtable.o physaddr.o \
+				    setup_nx.o tlb.o cpu_entry_area.o \
+				    maccess.o pgprot.o
 
 obj-y				+= pat/
 
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index ff08dc4636347..e1d1168ed25e8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -189,10 +189,6 @@ void __init sme_early_init(void)
 
 	__supported_pte_mask = __sme_set(__supported_pte_mask);
 
-	/* Update the protection map with memory encryption mask */
-	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
-		protection_map[i] = pgprot_encrypted(protection_map[i]);
-
 	if (sev_active())
 		swiotlb_force = SWIOTLB_FORCE;
 }
diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c
new file mode 100644
index 0000000000000..4d8e9dbcce993
--- /dev/null
+++ b/arch/x86/mm/pgprot.c
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+static inline pgprot_t __vm_get_page_prot(unsigned long vm_flags)
+{
+	switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
+	case 0:
+		return PAGE_NONE;
+	case VM_READ:
+		return PAGE_READONLY;
+	case VM_WRITE:
+		return PAGE_COPY;
+	case VM_READ | VM_WRITE:
+		return PAGE_COPY;
+	case VM_EXEC:
+	case VM_EXEC | VM_READ:
+		return PAGE_READONLY_EXEC;
+	case VM_EXEC | VM_WRITE:
+	case VM_EXEC | VM_READ | VM_WRITE:
+		return PAGE_COPY_EXEC;
+	case VM_SHARED:
+		return PAGE_NONE;
+	case VM_SHARED | VM_READ:
+		return PAGE_READONLY;
+	case VM_SHARED | VM_WRITE:
+	case VM_SHARED | VM_READ | VM_WRITE:
+		return PAGE_SHARED;
+	case VM_SHARED | VM_EXEC:
+	case VM_SHARED | VM_READ | VM_EXEC:
+		return PAGE_READONLY_EXEC;
+	case VM_SHARED | VM_WRITE | VM_EXEC:
+	case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
+		return PAGE_SHARED_EXEC;
+	default:
+		BUILD_BUG();
+		return PAGE_NONE;
+	}
+}
+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+	unsigned long val = pgprot_val(__vm_get_page_prot(vm_flags));
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+	/*
+	 * Take the 4 protection key bits out of the vma->vm_flags value and
+	 * turn them in to the bits that we can put in to a pte.
+	 *
+	 * Only override these if Protection Keys are available (which is only
+	 * on 64-bit).
+	 */
+	if (vm_flags & VM_PKEY_BIT0)
+		val |= _PAGE_PKEY_BIT0;
+	if (vm_flags & VM_PKEY_BIT1)
+		val |= _PAGE_PKEY_BIT1;
+	if (vm_flags & VM_PKEY_BIT2)
+		val |= _PAGE_PKEY_BIT2;
+	if (vm_flags & VM_PKEY_BIT3)
+		val |= _PAGE_PKEY_BIT3;
+#endif
+
+	val = __sme_set(val);
+	if (val & _PAGE_PRESENT)
+		val &= __supported_pte_mask;
+	return __pgprot(val);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f9..def17c5fb6afc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -428,12 +428,6 @@ extern unsigned int kobjsize(const void *objp);
 #endif
 #define VM_FLAGS_CLEAR	(ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)
 
-/*
- * mapping from the currently active vm_flags protection bits (the
- * low four bits) to a page protection mask..
- */
-extern pgprot_t protection_map[16];
-
 /**
  * enum fault_flag - Fault flag definitions.
  * @FAULT_FLAG_WRITE: Fault was a write fault.
diff --git a/mm/Kconfig b/mm/Kconfig
index d16ba9249bc53..d9fa0bac189b4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -894,6 +894,9 @@ config IO_MAPPING
 config SECRETMEM
 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
 
+config ARCH_HAS_GET_PAGE_PROT
+	bool
+
 source "mm/damon/Kconfig"
 
 endmenu
diff --git a/mm/mmap.c b/mm/mmap.c
index 88dcc5c252255..c6031dfcedd18 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -80,6 +80,7 @@ static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
 
+#ifndef CONFIG_ARCH_HAS_GET_PAGE_PROT
 /* description of effects of mapping type and prot in current implementation.
  * this is due to the limited x86 page protection hardware.  The expected
  * behavior is in parens:
@@ -100,11 +101,6 @@ static void unmap_region(struct mm_struct *mm,
  *								w: (no) no
  *								x: (yes) yes
  */
-pgprot_t protection_map[16] __ro_after_init = {
-	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
 #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
 static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 {
@@ -112,15 +108,57 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
 }
 #endif
 
+static pgprot_t __vm_get_page_prot(unsigned long vm_flags)
+{
+	switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
+	case 0:
+		return __P000;
+	case VM_READ:
+		return __P001;
+	case VM_WRITE:
+		return __P010;
+	case VM_READ | VM_WRITE:
+		return __P011;
+	case VM_EXEC:
+		return __P100;
+	case VM_EXEC | VM_READ:
+		return __P101;
+	case VM_EXEC | VM_WRITE:
+		return __P110;
+	case VM_EXEC | VM_READ | VM_WRITE:
+		return __P111;
+	case VM_SHARED:
+		return __S000;
+	case VM_SHARED | VM_READ:
+		return __S001;
+	case VM_SHARED | VM_WRITE:
+		return __S010;
+	case VM_SHARED | VM_READ | VM_WRITE:
+		return __S011;
+	case VM_SHARED | VM_EXEC:
+		return __S100;
+	case VM_SHARED | VM_READ | VM_EXEC:
+		return __S101;
+	case VM_SHARED | VM_WRITE | VM_EXEC:
+		return __S110;
+	case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
+		return __S111;
+	default:
+		BUG();
+	}
+}
+
 pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
-	pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
-				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
-			pgprot_val(arch_vm_get_page_prot(vm_flags)));
+	pgprot_t ret;
+
+	ret = __pgprot(pgprot_val(__vm_get_page_prot(vm_flags)) |
+		       pgprot_val(arch_vm_get_page_prot(vm_flags)));
 
 	return arch_filter_pgprot(ret);
 }
 EXPORT_SYMBOL(vm_get_page_prot);
+#endif /* CONFIG_ARCH_HAS_GET_PAGE_PROT */
 
 static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
 {
@@ -1660,10 +1698,9 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 #endif /* __ARCH_WANT_SYS_OLD_MMAP */
 
 /*
- * Some shared mappings will want the pages marked read-only
- * to track write events. If so, we'll downgrade vm_page_prot
- * to the private version (using protection_map[] without the
- * VM_SHARED bit).
+ * Some shared mappings will want the pages marked read-only to track write
+ * events.  If so, we'll downgrade vm_page_prot to the private version without
+ * the VM_SHARED bit.
  */
 int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
 {
Anshuman Khandual Sept. 28, 2021, 2:54 a.m. UTC | #2
On 9/27/21 8:28 PM, Christoph Hellwig wrote:
> On Mon, Sep 27, 2021 at 08:52:00AM +0530, Anshuman Khandual wrote:
>> protection_map[] maps the lower four bits from vm_flags into platform page
>> protection mask. Default initialization (and possible re-initialization in
>> the platform) does not make it clear that these indices are just derived
>> from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC).
>> This defines macros for protection_map[] indices which concatenate various
>> vm_flag attributes, making it clear and explicit.
> 
> I dont think this is all that helpful.  The main issue here is that
> protection_map is a pointless obsfucation ad should be replaced with a

Agreed, protection_map[] is an additional abstraction which could be dropped.

> simple switch statement provided by each architecture.  See the below
> WIP which just works for x86 and without pagetable debugging for where I
> think we should be going.

Sure, this will work as well but all platforms need to be changed at once.
Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and
export its own vm_get_page_prot() ? AFAICS all platforms are required to
export __PXXX and __SXXX elements currently.

This seems to be a better idea than the current proposal. Probably all the
vm_flags combinations, which will be used in those switch statements can be
converted into macros just to improve readability. Are you planning to send
this as a proper patch soon ?

> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index ab83c22d274e7..70d8ae60a416f 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -74,8 +74,8 @@ config X86
>  	select ARCH_HAS_EARLY_DEBUG		if KGDB
>  	select ARCH_HAS_ELF_RANDOMIZE
>  	select ARCH_HAS_FAST_MULTIPLIER
> -	select ARCH_HAS_FILTER_PGPROT
>  	select ARCH_HAS_FORTIFY_SOURCE
> +	select ARCH_HAS_GET_PAGE_PROT
>  	select ARCH_HAS_GCOV_PROFILE_ALL
>  	select ARCH_HAS_KCOV			if X86_64 && STACK_VALIDATION
>  	select ARCH_HAS_MEM_ENCRYPT
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 448cd01eb3ecb..a0cc70b056385 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -646,11 +646,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
>  
>  #define canon_pgprot(p) __pgprot(massage_pgprot(p))
>  
> -static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
> -{
> -	return canon_pgprot(prot);
> -}
> -
>  static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
>  					 enum page_cache_mode pcm,
>  					 enum page_cache_mode new_pcm)
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index 40497a9020c6e..1a9dd933088e6 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -228,25 +228,6 @@ enum page_cache_mode {
>  
>  #endif	/* __ASSEMBLY__ */
>  
> -/*         xwr */
> -#define __P000	PAGE_NONE
> -#define __P001	PAGE_READONLY
> -#define __P010	PAGE_COPY
> -#define __P011	PAGE_COPY
> -#define __P100	PAGE_READONLY_EXEC
> -#define __P101	PAGE_READONLY_EXEC
> -#define __P110	PAGE_COPY_EXEC
> -#define __P111	PAGE_COPY_EXEC
> -
> -#define __S000	PAGE_NONE
> -#define __S001	PAGE_READONLY
> -#define __S010	PAGE_SHARED
> -#define __S011	PAGE_SHARED
> -#define __S100	PAGE_READONLY_EXEC
> -#define __S101	PAGE_READONLY_EXEC
> -#define __S110	PAGE_SHARED_EXEC
> -#define __S111	PAGE_SHARED_EXEC
> -
>  /*
>   * early identity mapping  pte attrib macros.
>   */
> diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
> index d4a8d0424bfbf..775dbd3aff736 100644
> --- a/arch/x86/include/uapi/asm/mman.h
> +++ b/arch/x86/include/uapi/asm/mman.h
> @@ -5,20 +5,6 @@
>  #define MAP_32BIT	0x40		/* only give out 32bit addresses */
>  
>  #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
> -/*
> - * Take the 4 protection key bits out of the vma->vm_flags
> - * value and turn them in to the bits that we can put in
> - * to a pte.
> - *
> - * Only override these if Protection Keys are available
> - * (which is only on 64-bit).
> - */
> -#define arch_vm_get_page_prot(vm_flags)	__pgprot(	\
> -		((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) |	\
> -		((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) |	\
> -		((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) |	\
> -		((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0))
> -
>  #define arch_calc_vm_prot_bits(prot, key) (		\
>  		((key) & 0x1 ? VM_PKEY_BIT0 : 0) |      \
>  		((key) & 0x2 ? VM_PKEY_BIT1 : 0) |      \
> diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
> index 5864219221ca8..b44806c5b3de8 100644
> --- a/arch/x86/mm/Makefile
> +++ b/arch/x86/mm/Makefile
> @@ -16,8 +16,10 @@ CFLAGS_REMOVE_mem_encrypt.o		= -pg
>  CFLAGS_REMOVE_mem_encrypt_identity.o	= -pg
>  endif
>  
> -obj-y				:=  init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \
> -				    pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
> +obj-y				:=  init.o init_$(BITS).o fault.o ioremap.o \
> +				    extable.o mmap.o pgtable.o physaddr.o \
> +				    setup_nx.o tlb.o cpu_entry_area.o \
> +				    maccess.o pgprot.o
>  
>  obj-y				+= pat/
>  
> diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
> index ff08dc4636347..e1d1168ed25e8 100644
> --- a/arch/x86/mm/mem_encrypt.c
> +++ b/arch/x86/mm/mem_encrypt.c
> @@ -189,10 +189,6 @@ void __init sme_early_init(void)
>  
>  	__supported_pte_mask = __sme_set(__supported_pte_mask);
>  
> -	/* Update the protection map with memory encryption mask */
> -	for (i = 0; i < ARRAY_SIZE(protection_map); i++)
> -		protection_map[i] = pgprot_encrypted(protection_map[i]);
> -
>  	if (sev_active())
>  		swiotlb_force = SWIOTLB_FORCE;
>  }
> diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c
> new file mode 100644
> index 0000000000000..4d8e9dbcce993
> --- /dev/null
> +++ b/arch/x86/mm/pgprot.c
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/export.h>
> +#include <linux/mm.h>
> +#include <asm/pgtable.h>
> +
> +static inline pgprot_t __vm_get_page_prot(unsigned long vm_flags)
> +{
> +	switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
> +	case 0:
> +		return PAGE_NONE;
> +	case VM_READ:
> +		return PAGE_READONLY;
> +	case VM_WRITE:
> +		return PAGE_COPY;
> +	case VM_READ | VM_WRITE:
> +		return PAGE_COPY;
> +	case VM_EXEC:
> +	case VM_EXEC | VM_READ:
> +		return PAGE_READONLY_EXEC;
> +	case VM_EXEC | VM_WRITE:
> +	case VM_EXEC | VM_READ | VM_WRITE:
> +		return PAGE_COPY_EXEC;
> +	case VM_SHARED:
> +		return PAGE_NONE;
> +	case VM_SHARED | VM_READ:
> +		return PAGE_READONLY;
> +	case VM_SHARED | VM_WRITE:
> +	case VM_SHARED | VM_READ | VM_WRITE:
> +		return PAGE_SHARED;
> +	case VM_SHARED | VM_EXEC:
> +	case VM_SHARED | VM_READ | VM_EXEC:
> +		return PAGE_READONLY_EXEC;
> +	case VM_SHARED | VM_WRITE | VM_EXEC:
> +	case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
> +		return PAGE_SHARED_EXEC;
> +	default:
> +		BUILD_BUG();
> +		return PAGE_NONE;
> +	}
> +}
> +
> +pgprot_t vm_get_page_prot(unsigned long vm_flags)
> +{
> +	unsigned long val = pgprot_val(__vm_get_page_prot(vm_flags));
> +
> +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
> +	/*
> +	 * Take the 4 protection key bits out of the vma->vm_flags value and
> +	 * turn them in to the bits that we can put in to a pte.
> +	 *
> +	 * Only override these if Protection Keys are available (which is only
> +	 * on 64-bit).
> +	 */
> +	if (vm_flags & VM_PKEY_BIT0)
> +		val |= _PAGE_PKEY_BIT0;
> +	if (vm_flags & VM_PKEY_BIT1)
> +		val |= _PAGE_PKEY_BIT1;
> +	if (vm_flags & VM_PKEY_BIT2)
> +		val |= _PAGE_PKEY_BIT2;
> +	if (vm_flags & VM_PKEY_BIT3)
> +		val |= _PAGE_PKEY_BIT3;
> +#endif
> +
> +	val = __sme_set(val);
> +	if (val & _PAGE_PRESENT)
> +		val &= __supported_pte_mask;
> +	return __pgprot(val);
> +}
> +EXPORT_SYMBOL(vm_get_page_prot);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 73a52aba448f9..def17c5fb6afc 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -428,12 +428,6 @@ extern unsigned int kobjsize(const void *objp);
>  #endif
>  #define VM_FLAGS_CLEAR	(ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR)
>  
> -/*
> - * mapping from the currently active vm_flags protection bits (the
> - * low four bits) to a page protection mask..
> - */
> -extern pgprot_t protection_map[16];
> -
>  /**
>   * enum fault_flag - Fault flag definitions.
>   * @FAULT_FLAG_WRITE: Fault was a write fault.
> diff --git a/mm/Kconfig b/mm/Kconfig
> index d16ba9249bc53..d9fa0bac189b4 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -894,6 +894,9 @@ config IO_MAPPING
>  config SECRETMEM
>  	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
>  
> +config ARCH_HAS_GET_PAGE_PROT
> +	bool
> +
>  source "mm/damon/Kconfig"
>  
>  endmenu
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 88dcc5c252255..c6031dfcedd18 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -80,6 +80,7 @@ static void unmap_region(struct mm_struct *mm,
>  		struct vm_area_struct *vma, struct vm_area_struct *prev,
>  		unsigned long start, unsigned long end);
>  
> +#ifndef CONFIG_ARCH_HAS_GET_PAGE_PROT
>  /* description of effects of mapping type and prot in current implementation.
>   * this is due to the limited x86 page protection hardware.  The expected
>   * behavior is in parens:
> @@ -100,11 +101,6 @@ static void unmap_region(struct mm_struct *mm,
>   *								w: (no) no
>   *								x: (yes) yes
>   */
> -pgprot_t protection_map[16] __ro_after_init = {
> -	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
> -	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
> -};
> -
>  #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
>  static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
>  {
> @@ -112,15 +108,57 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot)
>  }
>  #endif
>  
> +static pgprot_t __vm_get_page_prot(unsigned long vm_flags)
> +{
> +	switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) {
> +	case 0:
> +		return __P000;
> +	case VM_READ:
> +		return __P001;
> +	case VM_WRITE:
> +		return __P010;
> +	case VM_READ | VM_WRITE:
> +		return __P011;
> +	case VM_EXEC:
> +		return __P100;
> +	case VM_EXEC | VM_READ:
> +		return __P101;
> +	case VM_EXEC | VM_WRITE:
> +		return __P110;
> +	case VM_EXEC | VM_READ | VM_WRITE:
> +		return __P111;
> +	case VM_SHARED:
> +		return __S000;
> +	case VM_SHARED | VM_READ:
> +		return __S001;
> +	case VM_SHARED | VM_WRITE:
> +		return __S010;
> +	case VM_SHARED | VM_READ | VM_WRITE:
> +		return __S011;
> +	case VM_SHARED | VM_EXEC:
> +		return __S100;
> +	case VM_SHARED | VM_READ | VM_EXEC:
> +		return __S101;
> +	case VM_SHARED | VM_WRITE | VM_EXEC:
> +		return __S110;
> +	case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC:
> +		return __S111;
> +	default:
> +		BUG();
> +	}
> +}
> +
>  pgprot_t vm_get_page_prot(unsigned long vm_flags)
>  {
> -	pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
> -				(VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
> -			pgprot_val(arch_vm_get_page_prot(vm_flags)));
> +	pgprot_t ret;
> +
> +	ret = __pgprot(pgprot_val(__vm_get_page_prot(vm_flags)) |
> +		       pgprot_val(arch_vm_get_page_prot(vm_flags)));
>  
>  	return arch_filter_pgprot(ret);
>  }
>  EXPORT_SYMBOL(vm_get_page_prot);
> +#endif /* CONFIG_ARCH_HAS_GET_PAGE_PROT */
>  
>  static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
>  {
> @@ -1660,10 +1698,9 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
>  #endif /* __ARCH_WANT_SYS_OLD_MMAP */
>  
>  /*
> - * Some shared mappings will want the pages marked read-only
> - * to track write events. If so, we'll downgrade vm_page_prot
> - * to the private version (using protection_map[] without the
> - * VM_SHARED bit).
> + * Some shared mappings will want the pages marked read-only to track write
> + * events.  If so, we'll downgrade vm_page_prot to the private version without
> + * the VM_SHARED bit.
>   */
>  int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
>  {
>
Christoph Hellwig Sept. 28, 2021, 4:42 a.m. UTC | #3
On Tue, Sep 28, 2021 at 08:24:43AM +0530, Anshuman Khandual wrote:
> > simple switch statement provided by each architecture.  See the below
> > WIP which just works for x86 and without pagetable debugging for where I
> > think we should be going.
> 
> Sure, this will work as well but all platforms need to be changed at once.
> Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and
> export its own vm_get_page_prot() ? AFAICS all platforms are required to
> export __PXXX and __SXXX elements currently.
> 
> This seems to be a better idea than the current proposal. Probably all the
> vm_flags combinations, which will be used in those switch statements can be
> converted into macros just to improve readability. Are you planning to send
> this as a proper patch soon ?

This was just a quіck WIP patch.  If you have some spare time to tackle
it for real I'd sugget the following approach:

 1) Remove the direct references to protection_map in debug_vm_pgtable.c
 2) add the ARCH_HAS_GET_PAGE_PROT symbol that lets architectures
    provide vm_get_page_prot itself and not define protection_map at all
    in this case
 3) convert all architectures that touch protection_map to provide
    vm_get_page_prot themselves
 4) mark protection_map static
 5) convert all architectures that provide arch_filter_pgprot and/or
    arch_vm_get_page_prot to provide vm_get_page_prot directly and
    remove those hooks
 6) remove the __S???/__P??? macros and the generic vm_get_page_prot
    after providing an arch implementation for every architecture.
    This can maybe simplified with a new generic version that directly
    looks at PAGE_* macros, but that will need further investigation
    first.
Anshuman Khandual Sept. 29, 2021, 2:57 a.m. UTC | #4
On 9/28/21 10:12 AM, Christoph Hellwig wrote:
> On Tue, Sep 28, 2021 at 08:24:43AM +0530, Anshuman Khandual wrote:
>>> simple switch statement provided by each architecture.  See the below
>>> WIP which just works for x86 and without pagetable debugging for where I
>>> think we should be going.
>>
>> Sure, this will work as well but all platforms need to be changed at once.
>> Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and
>> export its own vm_get_page_prot() ? AFAICS all platforms are required to
>> export __PXXX and __SXXX elements currently.
>>
>> This seems to be a better idea than the current proposal. Probably all the
>> vm_flags combinations, which will be used in those switch statements can be
>> converted into macros just to improve readability. Are you planning to send
>> this as a proper patch soon ?
> 
> This was just a quіck WIP patch.  If you have some spare time to tackle
> it for real I'd sugget the following approach:

Sure, will try and get this working.

> 
>  1) Remove the direct references to protection_map in debug_vm_pgtable.c
>  2) add the ARCH_HAS_GET_PAGE_PROT symbol that lets architectures
>     provide vm_get_page_prot itself and not define protection_map at all
>     in this case
>  3) convert all architectures that touch protection_map to provide
>     vm_get_page_prot themselves
>  4) mark protection_map static
>  5) convert all architectures that provide arch_filter_pgprot and/or
>     arch_vm_get_page_prot to provide vm_get_page_prot directly and
>     remove those hooks
>  6) remove the __S???/__P??? macros and the generic vm_get_page_prot
>     after providing an arch implementation for every architecture.
>     This can maybe simplified with a new generic version that directly
>     looks at PAGE_* macros, but that will need further investigation
>     first.
>
diff mbox series

Patch

diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 830ab91e574f..d0197cc1fb5a 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -161,24 +161,24 @@  EXPORT_SYMBOL(_page_cachable_default);
 
 static inline void setup_protection_map(void)
 {
-	protection_map[0]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[1]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[2]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[3]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[4]  = PM(_PAGE_PRESENT);
-	protection_map[5]  = PM(_PAGE_PRESENT);
-	protection_map[6]  = PM(_PAGE_PRESENT);
-	protection_map[7]  = PM(_PAGE_PRESENT);
-
-	protection_map[8]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-	protection_map[9]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-	protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+	protection_map[PROTMAP_IDX_XXXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[PROTMAP_IDX_XRXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[PROTMAP_IDX_XXWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[PROTMAP_IDX_XRWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[PROTMAP_IDX_XXXE] = PM(_PAGE_PRESENT);
+	protection_map[PROTMAP_IDX_XRXE] = PM(_PAGE_PRESENT);
+	protection_map[PROTMAP_IDX_XXWE] = PM(_PAGE_PRESENT);
+	protection_map[PROTMAP_IDX_XRWE] = PM(_PAGE_PRESENT);
+
+	protection_map[PROTMAP_IDX_SXXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+	protection_map[PROTMAP_IDX_SRXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
+	protection_map[PROTMAP_IDX_SXWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
 				_PAGE_NO_READ);
-	protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-	protection_map[12] = PM(_PAGE_PRESENT);
-	protection_map[13] = PM(_PAGE_PRESENT);
-	protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
-	protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+	protection_map[PROTMAP_IDX_SRWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+	protection_map[PROTMAP_IDX_SXXE] = PM(_PAGE_PRESENT);
+	protection_map[PROTMAP_IDX_SRXE] = PM(_PAGE_PRESENT);
+	protection_map[PROTMAP_IDX_SXWE] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+	protection_map[PROTMAP_IDX_SRWE] = PM(_PAGE_PRESENT | _PAGE_WRITE);
 }
 
 #undef PM
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1b23639e2fcd..1a7fe97c8167 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2642,22 +2642,22 @@  static void prot_init_common(unsigned long page_none,
 	PAGE_COPY = __pgprot(page_copy);
 	PAGE_SHARED = __pgprot(page_shared);
 
-	protection_map[0x0] = __pgprot(page_none);
-	protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
-	protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit);
-	protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit);
-	protection_map[0x4] = __pgprot(page_readonly);
-	protection_map[0x5] = __pgprot(page_readonly);
-	protection_map[0x6] = __pgprot(page_copy);
-	protection_map[0x7] = __pgprot(page_copy);
-	protection_map[0x8] = __pgprot(page_none);
-	protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit);
-	protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit);
-	protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit);
-	protection_map[0xc] = __pgprot(page_readonly);
-	protection_map[0xd] = __pgprot(page_readonly);
-	protection_map[0xe] = __pgprot(page_shared);
-	protection_map[0xf] = __pgprot(page_shared);
+	protection_map[PROTMAP_IDX_XXXX] = __pgprot(page_none);
+	protection_map[PROTMAP_IDX_XRXX] = __pgprot(page_readonly & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_XXWX] = __pgprot(page_copy & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_XRWX] = __pgprot(page_copy & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_XXXE] = __pgprot(page_readonly);
+	protection_map[PROTMAP_IDX_XRXE] = __pgprot(page_readonly);
+	protection_map[PROTMAP_IDX_XXWE] = __pgprot(page_copy);
+	protection_map[PROTMAP_IDX_XRWE] = __pgprot(page_copy);
+	protection_map[PROTMAP_IDX_SXXX] = __pgprot(page_none);
+	protection_map[PROTMAP_IDX_SRXX] = __pgprot(page_readonly & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_SXWX] = __pgprot(page_shared & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_SRWX] = __pgprot(page_shared & ~page_exec_bit);
+	protection_map[PROTMAP_IDX_SXXE] = __pgprot(page_readonly);
+	protection_map[PROTMAP_IDX_SRXE] = __pgprot(page_readonly);
+	protection_map[PROTMAP_IDX_SXWE] = __pgprot(page_shared);
+	protection_map[PROTMAP_IDX_SRWE] = __pgprot(page_shared);
 }
 
 static void __init sun4u_pgprot_init(void)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..4f99a49749a5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -431,9 +431,40 @@  extern unsigned int kobjsize(const void *objp);
 /*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
+ *
+ * VM_EXEC ---------------------|
+ *                              |
+ * VM_WRITE ---------------|    |
+ *                         |    |
+ * VM_READ -----------|    |    |
+ *                    |    |    |
+ * VM_SHARED ----|    |    |    |
+ *               |    |    |    |
+ *               v    v    v    v
+ * PROTMAP_IDX_(S|X)(R|X)(W|X)(E|X)
+ *
+ * X - The protection flag is absent
+ *
  */
 extern pgprot_t protection_map[16];
 
+#define PROTMAP_IDX_XXXX (VM_NONE)
+#define PROTMAP_IDX_XRXX (VM_READ)
+#define PROTMAP_IDX_XXWX (VM_WRITE)
+#define PROTMAP_IDX_XRWX (VM_READ | VM_WRITE)
+#define PROTMAP_IDX_XXXE (VM_EXEC)
+#define PROTMAP_IDX_XRXE (VM_READ | VM_EXEC)
+#define PROTMAP_IDX_XXWE (VM_WRITE | VM_EXEC)
+#define PROTMAP_IDX_XRWE (VM_READ | VM_WRITE | VM_EXEC)
+#define PROTMAP_IDX_SXXX (VM_SHARED | VM_NONE)
+#define PROTMAP_IDX_SRXX (VM_SHARED | VM_READ)
+#define PROTMAP_IDX_SXWX (VM_SHARED | VM_WRITE)
+#define PROTMAP_IDX_SRWX (VM_SHARED | VM_READ | VM_WRITE)
+#define PROTMAP_IDX_SXXE (VM_SHARED | VM_EXEC)
+#define PROTMAP_IDX_SRXE (VM_SHARED | VM_READ | VM_EXEC)
+#define PROTMAP_IDX_SXWE (VM_SHARED | VM_WRITE | VM_EXEC)
+#define PROTMAP_IDX_SRWE (VM_SHARED | VM_READ | VM_WRITE | VM_EXEC)
+
 /**
  * enum fault_flag - Fault flag definitions.
  * @FAULT_FLAG_WRITE: Fault was a write fault.
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 228e3954b90c..2e01d0d395bb 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -1104,14 +1104,14 @@  static int __init init_args(struct pgtable_debug_args *args)
 	/*
 	 * Initialize the debugging data.
 	 *
-	 * protection_map[0] (or even protection_map[8]) will help create
-	 * page table entries with PROT_NONE permission as required for
-	 * pxx_protnone_tests().
+	 * protection_map[PROTMAP_IDX_XXXX] (or even protection_map[PROTMAP_IDX_SXXX])
+	 * will help create page table entries with PROT_NONE permission as required
+	 * for pxx_protnone_tests().
 	 */
 	memset(args, 0, sizeof(*args));
 	args->vaddr              = get_random_vaddr();
 	args->page_prot          = vm_get_page_prot(VMFLAGS);
-	args->page_prot_none     = protection_map[0];
+	args->page_prot_none     = protection_map[PROTMAP_IDX_XXXX];
 	args->is_contiguous_page = false;
 	args->pud_pfn            = ULONG_MAX;
 	args->pmd_pfn            = ULONG_MAX;
diff --git a/mm/mmap.c b/mm/mmap.c
index 88dcc5c25225..d38bd4e305f9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -101,8 +101,22 @@  static void unmap_region(struct mm_struct *mm,
  *								x: (yes) yes
  */
 pgprot_t protection_map[16] __ro_after_init = {
-	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
+	[PROTMAP_IDX_XXXX] = __P000,
+	[PROTMAP_IDX_XRXX] = __P001,
+	[PROTMAP_IDX_XXWX] = __P010,
+	[PROTMAP_IDX_XRWX] = __P011,
+	[PROTMAP_IDX_XXXE] = __P100,
+	[PROTMAP_IDX_XRXE] = __P101,
+	[PROTMAP_IDX_XXWE] = __P110,
+	[PROTMAP_IDX_XRWE] = __P111,
+	[PROTMAP_IDX_SXXX] = __S000,
+	[PROTMAP_IDX_SRXX] = __S001,
+	[PROTMAP_IDX_SXWX] = __S010,
+	[PROTMAP_IDX_SRWX] = __S011,
+	[PROTMAP_IDX_SXXE] = __S100,
+	[PROTMAP_IDX_SRXE] = __S101,
+	[PROTMAP_IDX_SXWE] = __S110,
+	[PROTMAP_IDX_SRWE] = __S111
 };
 
 #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT