Message ID | 1632712920-8171-1-git-send-email-anshuman.khandual@arm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/mmap: Define index macros for protection_map[] | expand |
On Mon, Sep 27, 2021 at 08:52:00AM +0530, Anshuman Khandual wrote: > protection_map[] maps the lower four bits from vm_flags into platform page > protection mask. Default initialization (and possible re-initialization in > the platform) does not make it clear that these indices are just derived > from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC). > This defines macros for protection_map[] indices which concatenate various > vm_flag attributes, making it clear and explicit. I dont think this is all that helpful. The main issue here is that protection_map is a pointless obsfucation ad should be replaced with a simple switch statement provided by each architecture. See the below WIP which just works for x86 and without pagetable debugging for where I think we should be going. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ab83c22d274e7..70d8ae60a416f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -74,8 +74,8 @@ config X86 select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER - select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_GET_PAGE_PROT select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION select ARCH_HAS_MEM_ENCRYPT diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 448cd01eb3ecb..a0cc70b056385 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -646,11 +646,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(massage_pgprot(p)) -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) -{ - return canon_pgprot(prot); -} - static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, enum page_cache_mode pcm, enum page_cache_mode new_pcm) diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 40497a9020c6e..1a9dd933088e6 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -228,25 +228,6 @@ enum page_cache_mode { #endif /* __ASSEMBLY__ */ -/* xwr */ -#define __P000 PAGE_NONE -#define __P001 PAGE_READONLY -#define __P010 PAGE_COPY -#define __P011 PAGE_COPY -#define __P100 PAGE_READONLY_EXEC -#define __P101 PAGE_READONLY_EXEC -#define __P110 PAGE_COPY_EXEC -#define __P111 PAGE_COPY_EXEC - -#define __S000 PAGE_NONE -#define __S001 PAGE_READONLY -#define __S010 PAGE_SHARED -#define __S011 PAGE_SHARED -#define __S100 PAGE_READONLY_EXEC -#define __S101 PAGE_READONLY_EXEC -#define __S110 PAGE_SHARED_EXEC -#define __S111 PAGE_SHARED_EXEC - /* * early identity mapping pte attrib macros. */ diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h index d4a8d0424bfbf..775dbd3aff736 100644 --- a/arch/x86/include/uapi/asm/mman.h +++ b/arch/x86/include/uapi/asm/mman.h @@ -5,20 +5,6 @@ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -/* - * Take the 4 protection key bits out of the vma->vm_flags - * value and turn them in to the bits that we can put in - * to a pte. - * - * Only override these if Protection Keys are available - * (which is only on 64-bit). - */ -#define arch_vm_get_page_prot(vm_flags) __pgprot( \ - ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ - ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ - ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ - ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) - #define arch_calc_vm_prot_bits(prot, key) ( \ ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 5864219221ca8..b44806c5b3de8 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -16,8 +16,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ - pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o +obj-y := init.o init_$(BITS).o fault.o ioremap.o \ + extable.o mmap.o pgtable.o physaddr.o \ + setup_nx.o tlb.o cpu_entry_area.o \ + maccess.o pgprot.o obj-y += pat/ diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index ff08dc4636347..e1d1168ed25e8 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -189,10 +189,6 @@ void __init sme_early_init(void) __supported_pte_mask = __sme_set(__supported_pte_mask); - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - if (sev_active()) swiotlb_force = SWIOTLB_FORCE; } diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c new file mode 100644 index 0000000000000..4d8e9dbcce993 --- /dev/null +++ b/arch/x86/mm/pgprot.c @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/export.h> +#include <linux/mm.h> +#include <asm/pgtable.h> + +static inline pgprot_t __vm_get_page_prot(unsigned long vm_flags) +{ + switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) { + case 0: + return PAGE_NONE; + case VM_READ: + return PAGE_READONLY; + case VM_WRITE: + return PAGE_COPY; + case VM_READ | VM_WRITE: + return PAGE_COPY; + case VM_EXEC: + case VM_EXEC | VM_READ: + return PAGE_READONLY_EXEC; + case VM_EXEC | VM_WRITE: + case VM_EXEC | VM_READ | VM_WRITE: + return PAGE_COPY_EXEC; + case VM_SHARED: + return PAGE_NONE; + case VM_SHARED | VM_READ: + return PAGE_READONLY; + case VM_SHARED | VM_WRITE: + case VM_SHARED | VM_READ | VM_WRITE: + return PAGE_SHARED; + case VM_SHARED | VM_EXEC: + case VM_SHARED | VM_READ | VM_EXEC: + return PAGE_READONLY_EXEC; + case VM_SHARED | VM_WRITE | VM_EXEC: + case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC: + return PAGE_SHARED_EXEC; + default: + BUILD_BUG(); + return PAGE_NONE; + } +} + +pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + unsigned long val = pgprot_val(__vm_get_page_prot(vm_flags)); + +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* + * Take the 4 protection key bits out of the vma->vm_flags value and + * turn them in to the bits that we can put in to a pte. + * + * Only override these if Protection Keys are available (which is only + * on 64-bit). + */ + if (vm_flags & VM_PKEY_BIT0) + val |= _PAGE_PKEY_BIT0; + if (vm_flags & VM_PKEY_BIT1) + val |= _PAGE_PKEY_BIT1; + if (vm_flags & VM_PKEY_BIT2) + val |= _PAGE_PKEY_BIT2; + if (vm_flags & VM_PKEY_BIT3) + val |= _PAGE_PKEY_BIT3; +#endif + + val = __sme_set(val); + if (val & _PAGE_PRESENT) + val &= __supported_pte_mask; + return __pgprot(val); +} +EXPORT_SYMBOL(vm_get_page_prot); diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f9..def17c5fb6afc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -428,12 +428,6 @@ extern unsigned int kobjsize(const void *objp); #endif #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) -/* - * mapping from the currently active vm_flags protection bits (the - * low four bits) to a page protection mask.. - */ -extern pgprot_t protection_map[16]; - /** * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. diff --git a/mm/Kconfig b/mm/Kconfig index d16ba9249bc53..d9fa0bac189b4 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -894,6 +894,9 @@ config IO_MAPPING config SECRETMEM def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED +config ARCH_HAS_GET_PAGE_PROT + bool + source "mm/damon/Kconfig" endmenu diff --git a/mm/mmap.c b/mm/mmap.c index 88dcc5c252255..c6031dfcedd18 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -80,6 +80,7 @@ static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, unsigned long start, unsigned long end); +#ifndef CONFIG_ARCH_HAS_GET_PAGE_PROT /* description of effects of mapping type and prot in current implementation. * this is due to the limited x86 page protection hardware. The expected * behavior is in parens: @@ -100,11 +101,6 @@ static void unmap_region(struct mm_struct *mm, * w: (no) no * x: (yes) yes */ -pgprot_t protection_map[16] __ro_after_init = { - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 -}; - #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT static inline pgprot_t arch_filter_pgprot(pgprot_t prot) { @@ -112,15 +108,57 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) } #endif +static pgprot_t __vm_get_page_prot(unsigned long vm_flags) +{ + switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) { + case 0: + return __P000; + case VM_READ: + return __P001; + case VM_WRITE: + return __P010; + case VM_READ | VM_WRITE: + return __P011; + case VM_EXEC: + return __P100; + case VM_EXEC | VM_READ: + return __P101; + case VM_EXEC | VM_WRITE: + return __P110; + case VM_EXEC | VM_READ | VM_WRITE: + return __P111; + case VM_SHARED: + return __S000; + case VM_SHARED | VM_READ: + return __S001; + case VM_SHARED | VM_WRITE: + return __S010; + case VM_SHARED | VM_READ | VM_WRITE: + return __S011; + case VM_SHARED | VM_EXEC: + return __S100; + case VM_SHARED | VM_READ | VM_EXEC: + return __S101; + case VM_SHARED | VM_WRITE | VM_EXEC: + return __S110; + case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC: + return __S111; + default: + BUG(); + } +} + pgprot_t vm_get_page_prot(unsigned long vm_flags) { - pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | - pgprot_val(arch_vm_get_page_prot(vm_flags))); + pgprot_t ret; + + ret = __pgprot(pgprot_val(__vm_get_page_prot(vm_flags)) | + pgprot_val(arch_vm_get_page_prot(vm_flags))); return arch_filter_pgprot(ret); } EXPORT_SYMBOL(vm_get_page_prot); +#endif /* CONFIG_ARCH_HAS_GET_PAGE_PROT */ static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) { @@ -1660,10 +1698,9 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) #endif /* __ARCH_WANT_SYS_OLD_MMAP */ /* - * Some shared mappings will want the pages marked read-only - * to track write events. If so, we'll downgrade vm_page_prot - * to the private version (using protection_map[] without the - * VM_SHARED bit). + * Some shared mappings will want the pages marked read-only to track write + * events. If so, we'll downgrade vm_page_prot to the private version without + * the VM_SHARED bit. */ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) {
On 9/27/21 8:28 PM, Christoph Hellwig wrote: > On Mon, Sep 27, 2021 at 08:52:00AM +0530, Anshuman Khandual wrote: >> protection_map[] maps the lower four bits from vm_flags into platform page >> protection mask. Default initialization (and possible re-initialization in >> the platform) does not make it clear that these indices are just derived >> from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC). >> This defines macros for protection_map[] indices which concatenate various >> vm_flag attributes, making it clear and explicit. > > I dont think this is all that helpful. The main issue here is that > protection_map is a pointless obsfucation ad should be replaced with a Agreed, protection_map[] is an additional abstraction which could be dropped. > simple switch statement provided by each architecture. See the below > WIP which just works for x86 and without pagetable debugging for where I > think we should be going. Sure, this will work as well but all platforms need to be changed at once. Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and export its own vm_get_page_prot() ? AFAICS all platforms are required to export __PXXX and __SXXX elements currently. This seems to be a better idea than the current proposal. Probably all the vm_flags combinations, which will be used in those switch statements can be converted into macros just to improve readability. Are you planning to send this as a proper patch soon ? > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index ab83c22d274e7..70d8ae60a416f 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -74,8 +74,8 @@ config X86 > select ARCH_HAS_EARLY_DEBUG if KGDB > select ARCH_HAS_ELF_RANDOMIZE > select ARCH_HAS_FAST_MULTIPLIER > - select ARCH_HAS_FILTER_PGPROT > select ARCH_HAS_FORTIFY_SOURCE > + select ARCH_HAS_GET_PAGE_PROT > select ARCH_HAS_GCOV_PROFILE_ALL > select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION > select ARCH_HAS_MEM_ENCRYPT > diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h > index 448cd01eb3ecb..a0cc70b056385 100644 > --- a/arch/x86/include/asm/pgtable.h > +++ b/arch/x86/include/asm/pgtable.h > @@ -646,11 +646,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) > > #define canon_pgprot(p) __pgprot(massage_pgprot(p)) > > -static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > -{ > - return canon_pgprot(prot); > -} > - > static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, > enum page_cache_mode pcm, > enum page_cache_mode new_pcm) > diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h > index 40497a9020c6e..1a9dd933088e6 100644 > --- a/arch/x86/include/asm/pgtable_types.h > +++ b/arch/x86/include/asm/pgtable_types.h > @@ -228,25 +228,6 @@ enum page_cache_mode { > > #endif /* __ASSEMBLY__ */ > > -/* xwr */ > -#define __P000 PAGE_NONE > -#define __P001 PAGE_READONLY > -#define __P010 PAGE_COPY > -#define __P011 PAGE_COPY > -#define __P100 PAGE_READONLY_EXEC > -#define __P101 PAGE_READONLY_EXEC > -#define __P110 PAGE_COPY_EXEC > -#define __P111 PAGE_COPY_EXEC > - > -#define __S000 PAGE_NONE > -#define __S001 PAGE_READONLY > -#define __S010 PAGE_SHARED > -#define __S011 PAGE_SHARED > -#define __S100 PAGE_READONLY_EXEC > -#define __S101 PAGE_READONLY_EXEC > -#define __S110 PAGE_SHARED_EXEC > -#define __S111 PAGE_SHARED_EXEC > - > /* > * early identity mapping pte attrib macros. > */ > diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h > index d4a8d0424bfbf..775dbd3aff736 100644 > --- a/arch/x86/include/uapi/asm/mman.h > +++ b/arch/x86/include/uapi/asm/mman.h > @@ -5,20 +5,6 @@ > #define MAP_32BIT 0x40 /* only give out 32bit addresses */ > > #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS > -/* > - * Take the 4 protection key bits out of the vma->vm_flags > - * value and turn them in to the bits that we can put in > - * to a pte. > - * > - * Only override these if Protection Keys are available > - * (which is only on 64-bit). > - */ > -#define arch_vm_get_page_prot(vm_flags) __pgprot( \ > - ((vm_flags) & VM_PKEY_BIT0 ? _PAGE_PKEY_BIT0 : 0) | \ > - ((vm_flags) & VM_PKEY_BIT1 ? _PAGE_PKEY_BIT1 : 0) | \ > - ((vm_flags) & VM_PKEY_BIT2 ? _PAGE_PKEY_BIT2 : 0) | \ > - ((vm_flags) & VM_PKEY_BIT3 ? _PAGE_PKEY_BIT3 : 0)) > - > #define arch_calc_vm_prot_bits(prot, key) ( \ > ((key) & 0x1 ? VM_PKEY_BIT0 : 0) | \ > ((key) & 0x2 ? VM_PKEY_BIT1 : 0) | \ > diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile > index 5864219221ca8..b44806c5b3de8 100644 > --- a/arch/x86/mm/Makefile > +++ b/arch/x86/mm/Makefile > @@ -16,8 +16,10 @@ CFLAGS_REMOVE_mem_encrypt.o = -pg > CFLAGS_REMOVE_mem_encrypt_identity.o = -pg > endif > > -obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o mmap.o \ > - pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o > +obj-y := init.o init_$(BITS).o fault.o ioremap.o \ > + extable.o mmap.o pgtable.o physaddr.o \ > + setup_nx.o tlb.o cpu_entry_area.o \ > + maccess.o pgprot.o > > obj-y += pat/ > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index ff08dc4636347..e1d1168ed25e8 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -189,10 +189,6 @@ void __init sme_early_init(void) > > __supported_pte_mask = __sme_set(__supported_pte_mask); > > - /* Update the protection map with memory encryption mask */ > - for (i = 0; i < ARRAY_SIZE(protection_map); i++) > - protection_map[i] = pgprot_encrypted(protection_map[i]); > - > if (sev_active()) > swiotlb_force = SWIOTLB_FORCE; > } > diff --git a/arch/x86/mm/pgprot.c b/arch/x86/mm/pgprot.c > new file mode 100644 > index 0000000000000..4d8e9dbcce993 > --- /dev/null > +++ b/arch/x86/mm/pgprot.c > @@ -0,0 +1,70 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#include <linux/export.h> > +#include <linux/mm.h> > +#include <asm/pgtable.h> > + > +static inline pgprot_t __vm_get_page_prot(unsigned long vm_flags) > +{ > + switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) { > + case 0: > + return PAGE_NONE; > + case VM_READ: > + return PAGE_READONLY; > + case VM_WRITE: > + return PAGE_COPY; > + case VM_READ | VM_WRITE: > + return PAGE_COPY; > + case VM_EXEC: > + case VM_EXEC | VM_READ: > + return PAGE_READONLY_EXEC; > + case VM_EXEC | VM_WRITE: > + case VM_EXEC | VM_READ | VM_WRITE: > + return PAGE_COPY_EXEC; > + case VM_SHARED: > + return PAGE_NONE; > + case VM_SHARED | VM_READ: > + return PAGE_READONLY; > + case VM_SHARED | VM_WRITE: > + case VM_SHARED | VM_READ | VM_WRITE: > + return PAGE_SHARED; > + case VM_SHARED | VM_EXEC: > + case VM_SHARED | VM_READ | VM_EXEC: > + return PAGE_READONLY_EXEC; > + case VM_SHARED | VM_WRITE | VM_EXEC: > + case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC: > + return PAGE_SHARED_EXEC; > + default: > + BUILD_BUG(); > + return PAGE_NONE; > + } > +} > + > +pgprot_t vm_get_page_prot(unsigned long vm_flags) > +{ > + unsigned long val = pgprot_val(__vm_get_page_prot(vm_flags)); > + > +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS > + /* > + * Take the 4 protection key bits out of the vma->vm_flags value and > + * turn them in to the bits that we can put in to a pte. > + * > + * Only override these if Protection Keys are available (which is only > + * on 64-bit). > + */ > + if (vm_flags & VM_PKEY_BIT0) > + val |= _PAGE_PKEY_BIT0; > + if (vm_flags & VM_PKEY_BIT1) > + val |= _PAGE_PKEY_BIT1; > + if (vm_flags & VM_PKEY_BIT2) > + val |= _PAGE_PKEY_BIT2; > + if (vm_flags & VM_PKEY_BIT3) > + val |= _PAGE_PKEY_BIT3; > +#endif > + > + val = __sme_set(val); > + if (val & _PAGE_PRESENT) > + val &= __supported_pte_mask; > + return __pgprot(val); > +} > +EXPORT_SYMBOL(vm_get_page_prot); > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 73a52aba448f9..def17c5fb6afc 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -428,12 +428,6 @@ extern unsigned int kobjsize(const void *objp); > #endif > #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) > > -/* > - * mapping from the currently active vm_flags protection bits (the > - * low four bits) to a page protection mask.. > - */ > -extern pgprot_t protection_map[16]; > - > /** > * enum fault_flag - Fault flag definitions. > * @FAULT_FLAG_WRITE: Fault was a write fault. > diff --git a/mm/Kconfig b/mm/Kconfig > index d16ba9249bc53..d9fa0bac189b4 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -894,6 +894,9 @@ config IO_MAPPING > config SECRETMEM > def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED > > +config ARCH_HAS_GET_PAGE_PROT > + bool > + > source "mm/damon/Kconfig" > > endmenu > diff --git a/mm/mmap.c b/mm/mmap.c > index 88dcc5c252255..c6031dfcedd18 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -80,6 +80,7 @@ static void unmap_region(struct mm_struct *mm, > struct vm_area_struct *vma, struct vm_area_struct *prev, > unsigned long start, unsigned long end); > > +#ifndef CONFIG_ARCH_HAS_GET_PAGE_PROT > /* description of effects of mapping type and prot in current implementation. > * this is due to the limited x86 page protection hardware. The expected > * behavior is in parens: > @@ -100,11 +101,6 @@ static void unmap_region(struct mm_struct *mm, > * w: (no) no > * x: (yes) yes > */ > -pgprot_t protection_map[16] __ro_after_init = { > - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, > - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 > -}; > - > #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT > static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > { > @@ -112,15 +108,57 @@ static inline pgprot_t arch_filter_pgprot(pgprot_t prot) > } > #endif > > +static pgprot_t __vm_get_page_prot(unsigned long vm_flags) > +{ > + switch (vm_flags & (VM_READ | VM_WRITE | VM_EXEC | VM_SHARED)) { > + case 0: > + return __P000; > + case VM_READ: > + return __P001; > + case VM_WRITE: > + return __P010; > + case VM_READ | VM_WRITE: > + return __P011; > + case VM_EXEC: > + return __P100; > + case VM_EXEC | VM_READ: > + return __P101; > + case VM_EXEC | VM_WRITE: > + return __P110; > + case VM_EXEC | VM_READ | VM_WRITE: > + return __P111; > + case VM_SHARED: > + return __S000; > + case VM_SHARED | VM_READ: > + return __S001; > + case VM_SHARED | VM_WRITE: > + return __S010; > + case VM_SHARED | VM_READ | VM_WRITE: > + return __S011; > + case VM_SHARED | VM_EXEC: > + return __S100; > + case VM_SHARED | VM_READ | VM_EXEC: > + return __S101; > + case VM_SHARED | VM_WRITE | VM_EXEC: > + return __S110; > + case VM_SHARED | VM_READ | VM_WRITE | VM_EXEC: > + return __S111; > + default: > + BUG(); > + } > +} > + > pgprot_t vm_get_page_prot(unsigned long vm_flags) > { > - pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & > - (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | > - pgprot_val(arch_vm_get_page_prot(vm_flags))); > + pgprot_t ret; > + > + ret = __pgprot(pgprot_val(__vm_get_page_prot(vm_flags)) | > + pgprot_val(arch_vm_get_page_prot(vm_flags))); > > return arch_filter_pgprot(ret); > } > EXPORT_SYMBOL(vm_get_page_prot); > +#endif /* CONFIG_ARCH_HAS_GET_PAGE_PROT */ > > static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags) > { > @@ -1660,10 +1698,9 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) > #endif /* __ARCH_WANT_SYS_OLD_MMAP */ > > /* > - * Some shared mappings will want the pages marked read-only > - * to track write events. If so, we'll downgrade vm_page_prot > - * to the private version (using protection_map[] without the > - * VM_SHARED bit). > + * Some shared mappings will want the pages marked read-only to track write > + * events. If so, we'll downgrade vm_page_prot to the private version without > + * the VM_SHARED bit. > */ > int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) > { >
On Tue, Sep 28, 2021 at 08:24:43AM +0530, Anshuman Khandual wrote: > > simple switch statement provided by each architecture. See the below > > WIP which just works for x86 and without pagetable debugging for where I > > think we should be going. > > Sure, this will work as well but all platforms need to be changed at once. > Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and > export its own vm_get_page_prot() ? AFAICS all platforms are required to > export __PXXX and __SXXX elements currently. > > This seems to be a better idea than the current proposal. Probably all the > vm_flags combinations, which will be used in those switch statements can be > converted into macros just to improve readability. Are you planning to send > this as a proper patch soon ? This was just a quіck WIP patch. If you have some spare time to tackle it for real I'd sugget the following approach: 1) Remove the direct references to protection_map in debug_vm_pgtable.c 2) add the ARCH_HAS_GET_PAGE_PROT symbol that lets architectures provide vm_get_page_prot itself and not define protection_map at all in this case 3) convert all architectures that touch protection_map to provide vm_get_page_prot themselves 4) mark protection_map static 5) convert all architectures that provide arch_filter_pgprot and/or arch_vm_get_page_prot to provide vm_get_page_prot directly and remove those hooks 6) remove the __S???/__P??? macros and the generic vm_get_page_prot after providing an arch implementation for every architecture. This can maybe simplified with a new generic version that directly looks at PAGE_* macros, but that will need further investigation first.
On 9/28/21 10:12 AM, Christoph Hellwig wrote: > On Tue, Sep 28, 2021 at 08:24:43AM +0530, Anshuman Khandual wrote: >>> simple switch statement provided by each architecture. See the below >>> WIP which just works for x86 and without pagetable debugging for where I >>> think we should be going. >> >> Sure, this will work as well but all platforms need to be changed at once. >> Is there any platform that would not subscribe ARCH_HAS_GET_PAGE_PROT and >> export its own vm_get_page_prot() ? AFAICS all platforms are required to >> export __PXXX and __SXXX elements currently. >> >> This seems to be a better idea than the current proposal. Probably all the >> vm_flags combinations, which will be used in those switch statements can be >> converted into macros just to improve readability. Are you planning to send >> this as a proper patch soon ? > > This was just a quіck WIP patch. If you have some spare time to tackle > it for real I'd sugget the following approach: Sure, will try and get this working. > > 1) Remove the direct references to protection_map in debug_vm_pgtable.c > 2) add the ARCH_HAS_GET_PAGE_PROT symbol that lets architectures > provide vm_get_page_prot itself and not define protection_map at all > in this case > 3) convert all architectures that touch protection_map to provide > vm_get_page_prot themselves > 4) mark protection_map static > 5) convert all architectures that provide arch_filter_pgprot and/or > arch_vm_get_page_prot to provide vm_get_page_prot directly and > remove those hooks > 6) remove the __S???/__P??? macros and the generic vm_get_page_prot > after providing an arch implementation for every architecture. > This can maybe simplified with a new generic version that directly > looks at PAGE_* macros, but that will need further investigation > first. >
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 830ab91e574f..d0197cc1fb5a 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -161,24 +161,24 @@ EXPORT_SYMBOL(_page_cachable_default); static inline void setup_protection_map(void) { - protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[4] = PM(_PAGE_PRESENT); - protection_map[5] = PM(_PAGE_PRESENT); - protection_map[6] = PM(_PAGE_PRESENT); - protection_map[7] = PM(_PAGE_PRESENT); - - protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); - protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); - protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | + protection_map[PROTMAP_IDX_XXXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[PROTMAP_IDX_XRXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[PROTMAP_IDX_XXWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[PROTMAP_IDX_XRWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[PROTMAP_IDX_XXXE] = PM(_PAGE_PRESENT); + protection_map[PROTMAP_IDX_XRXE] = PM(_PAGE_PRESENT); + protection_map[PROTMAP_IDX_XXWE] = PM(_PAGE_PRESENT); + protection_map[PROTMAP_IDX_XRWE] = PM(_PAGE_PRESENT); + + protection_map[PROTMAP_IDX_SXXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ); + protection_map[PROTMAP_IDX_SRXX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC); + protection_map[PROTMAP_IDX_SXWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE | _PAGE_NO_READ); - protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); - protection_map[12] = PM(_PAGE_PRESENT); - protection_map[13] = PM(_PAGE_PRESENT); - protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE); - protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[PROTMAP_IDX_SRWX] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE); + protection_map[PROTMAP_IDX_SXXE] = PM(_PAGE_PRESENT); + protection_map[PROTMAP_IDX_SRXE] = PM(_PAGE_PRESENT); + protection_map[PROTMAP_IDX_SXWE] = PM(_PAGE_PRESENT | _PAGE_WRITE); + protection_map[PROTMAP_IDX_SRWE] = PM(_PAGE_PRESENT | _PAGE_WRITE); } #undef PM diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1b23639e2fcd..1a7fe97c8167 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2642,22 +2642,22 @@ static void prot_init_common(unsigned long page_none, PAGE_COPY = __pgprot(page_copy); PAGE_SHARED = __pgprot(page_shared); - protection_map[0x0] = __pgprot(page_none); - protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit); - protection_map[0x2] = __pgprot(page_copy & ~page_exec_bit); - protection_map[0x3] = __pgprot(page_copy & ~page_exec_bit); - protection_map[0x4] = __pgprot(page_readonly); - protection_map[0x5] = __pgprot(page_readonly); - protection_map[0x6] = __pgprot(page_copy); - protection_map[0x7] = __pgprot(page_copy); - protection_map[0x8] = __pgprot(page_none); - protection_map[0x9] = __pgprot(page_readonly & ~page_exec_bit); - protection_map[0xa] = __pgprot(page_shared & ~page_exec_bit); - protection_map[0xb] = __pgprot(page_shared & ~page_exec_bit); - protection_map[0xc] = __pgprot(page_readonly); - protection_map[0xd] = __pgprot(page_readonly); - protection_map[0xe] = __pgprot(page_shared); - protection_map[0xf] = __pgprot(page_shared); + protection_map[PROTMAP_IDX_XXXX] = __pgprot(page_none); + protection_map[PROTMAP_IDX_XRXX] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[PROTMAP_IDX_XXWX] = __pgprot(page_copy & ~page_exec_bit); + protection_map[PROTMAP_IDX_XRWX] = __pgprot(page_copy & ~page_exec_bit); + protection_map[PROTMAP_IDX_XXXE] = __pgprot(page_readonly); + protection_map[PROTMAP_IDX_XRXE] = __pgprot(page_readonly); + protection_map[PROTMAP_IDX_XXWE] = __pgprot(page_copy); + protection_map[PROTMAP_IDX_XRWE] = __pgprot(page_copy); + protection_map[PROTMAP_IDX_SXXX] = __pgprot(page_none); + protection_map[PROTMAP_IDX_SRXX] = __pgprot(page_readonly & ~page_exec_bit); + protection_map[PROTMAP_IDX_SXWX] = __pgprot(page_shared & ~page_exec_bit); + protection_map[PROTMAP_IDX_SRWX] = __pgprot(page_shared & ~page_exec_bit); + protection_map[PROTMAP_IDX_SXXE] = __pgprot(page_readonly); + protection_map[PROTMAP_IDX_SRXE] = __pgprot(page_readonly); + protection_map[PROTMAP_IDX_SXWE] = __pgprot(page_shared); + protection_map[PROTMAP_IDX_SRWE] = __pgprot(page_shared); } static void __init sun4u_pgprot_init(void) diff --git a/include/linux/mm.h b/include/linux/mm.h index 73a52aba448f..4f99a49749a5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -431,9 +431,40 @@ extern unsigned int kobjsize(const void *objp); /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. + * + * VM_EXEC ---------------------| + * | + * VM_WRITE ---------------| | + * | | + * VM_READ -----------| | | + * | | | + * VM_SHARED ----| | | | + * | | | | + * v v v v + * PROTMAP_IDX_(S|X)(R|X)(W|X)(E|X) + * + * X - The protection flag is absent + * */ extern pgprot_t protection_map[16]; +#define PROTMAP_IDX_XXXX (VM_NONE) +#define PROTMAP_IDX_XRXX (VM_READ) +#define PROTMAP_IDX_XXWX (VM_WRITE) +#define PROTMAP_IDX_XRWX (VM_READ | VM_WRITE) +#define PROTMAP_IDX_XXXE (VM_EXEC) +#define PROTMAP_IDX_XRXE (VM_READ | VM_EXEC) +#define PROTMAP_IDX_XXWE (VM_WRITE | VM_EXEC) +#define PROTMAP_IDX_XRWE (VM_READ | VM_WRITE | VM_EXEC) +#define PROTMAP_IDX_SXXX (VM_SHARED | VM_NONE) +#define PROTMAP_IDX_SRXX (VM_SHARED | VM_READ) +#define PROTMAP_IDX_SXWX (VM_SHARED | VM_WRITE) +#define PROTMAP_IDX_SRWX (VM_SHARED | VM_READ | VM_WRITE) +#define PROTMAP_IDX_SXXE (VM_SHARED | VM_EXEC) +#define PROTMAP_IDX_SRXE (VM_SHARED | VM_READ | VM_EXEC) +#define PROTMAP_IDX_SXWE (VM_SHARED | VM_WRITE | VM_EXEC) +#define PROTMAP_IDX_SRWE (VM_SHARED | VM_READ | VM_WRITE | VM_EXEC) + /** * enum fault_flag - Fault flag definitions. * @FAULT_FLAG_WRITE: Fault was a write fault. diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 228e3954b90c..2e01d0d395bb 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -1104,14 +1104,14 @@ static int __init init_args(struct pgtable_debug_args *args) /* * Initialize the debugging data. * - * protection_map[0] (or even protection_map[8]) will help create - * page table entries with PROT_NONE permission as required for - * pxx_protnone_tests(). + * protection_map[PROTMAP_IDX_XXXX] (or even protection_map[PROTMAP_IDX_SXXX]) + * will help create page table entries with PROT_NONE permission as required + * for pxx_protnone_tests(). */ memset(args, 0, sizeof(*args)); args->vaddr = get_random_vaddr(); args->page_prot = vm_get_page_prot(VMFLAGS); - args->page_prot_none = protection_map[0]; + args->page_prot_none = protection_map[PROTMAP_IDX_XXXX]; args->is_contiguous_page = false; args->pud_pfn = ULONG_MAX; args->pmd_pfn = ULONG_MAX; diff --git a/mm/mmap.c b/mm/mmap.c index 88dcc5c25225..d38bd4e305f9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -101,8 +101,22 @@ static void unmap_region(struct mm_struct *mm, * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { - __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, - __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 + [PROTMAP_IDX_XXXX] = __P000, + [PROTMAP_IDX_XRXX] = __P001, + [PROTMAP_IDX_XXWX] = __P010, + [PROTMAP_IDX_XRWX] = __P011, + [PROTMAP_IDX_XXXE] = __P100, + [PROTMAP_IDX_XRXE] = __P101, + [PROTMAP_IDX_XXWE] = __P110, + [PROTMAP_IDX_XRWE] = __P111, + [PROTMAP_IDX_SXXX] = __S000, + [PROTMAP_IDX_SRXX] = __S001, + [PROTMAP_IDX_SXWX] = __S010, + [PROTMAP_IDX_SRWX] = __S011, + [PROTMAP_IDX_SXXE] = __S100, + [PROTMAP_IDX_SRXE] = __S101, + [PROTMAP_IDX_SXWE] = __S110, + [PROTMAP_IDX_SRWE] = __S111 }; #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT
protection_map[] maps the lower four bits from vm_flags into platform page protection mask. Default initialization (and possible re-initialization in the platform) does not make it clear that these indices are just derived from various vm_flags protections (VM_SHARED, VM_READ, VM_WRITE, VM_EXEC). This defines macros for protection_map[] indices which concatenate various vm_flag attributes, making it clear and explicit. Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: linux-mips@vger.kernel.org Cc: sparclinux@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com> --- This applies on v5.15-rc3 after the following patch. https://lore.kernel.org/all/20210924060821.1138281-1-guoren@kernel.org/ arch/mips/mm/cache.c | 34 +++++++++++++++++----------------- arch/sparc/mm/init_64.c | 32 ++++++++++++++++---------------- include/linux/mm.h | 31 +++++++++++++++++++++++++++++++ mm/debug_vm_pgtable.c | 8 ++++---- mm/mmap.c | 18 ++++++++++++++++-- 5 files changed, 84 insertions(+), 39 deletions(-)