diff mbox series

[hotfix,6.12,v4,4/5] mm: refactor arch_calc_vm_flag_bits() and arm64 MTE handling

Message ID ec251b20ba1964fb64cf1607d2ad80c47f3873df.1730224667.git.lorenzo.stoakes@oracle.com (mailing list archive)
State New
Headers show
Series fix error handling in mmap_region() and refactor (hotfixes) | expand

Commit Message

Lorenzo Stoakes Oct. 29, 2024, 6:11 p.m. UTC
Currently MTE is permitted in two circumstances (desiring to use MTE having
been specified by the VM_MTE flag) - where MAP_ANONYMOUS is specified, as
checked by arch_calc_vm_flag_bits() and actualised by setting the
VM_MTE_ALLOWED flag, or if the file backing the mapping is shmem, in which
case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap hook is activated
in mmap_region().

The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also set
is the arm64 implementation of arch_validate_flags().

Unfortunately, we intend to refactor mmap_region() to perform this check
earlier, meaning that in the case of a shmem backing we will not have
invoked shmem_mmap() yet, causing the mapping to fail spuriously.

It is inappropriate to set this architecture-specific flag in general mm
code anyway, so a sensible resolution of this issue is to instead move the
check somewhere else.

We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via
the arch_calc_vm_flag_bits() call.

This is an appropriate place to do this as we already check for the
MAP_ANONYMOUS case here, and the shmem file case is simply a variant of the
same idea - we permit RAM-backed memory.

This requires a modification to the arch_calc_vm_flag_bits() signature to
pass in a pointer to the struct file associated with the mapping, however
this is not too egregious as this is only used by two architectures anyway
- arm64 and parisc.

So this patch performs this adjustment and removes the unnecessary
assignment of VM_MTE_ALLOWED in shmem_mmap().

Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Jann Horn <jannh@google.com>
Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
Cc: stable <stable@kernel.org>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 arch/arm64/include/asm/mman.h  | 10 +++++++---
 arch/parisc/include/asm/mman.h |  5 +++--
 include/linux/mman.h           |  7 ++++---
 mm/mmap.c                      |  2 +-
 mm/nommu.c                     |  2 +-
 mm/shmem.c                     |  3 ---
 6 files changed, 16 insertions(+), 13 deletions(-)

--
2.47.0

Comments

Vlastimil Babka Oct. 30, 2024, 9:18 a.m. UTC | #1
On 10/29/24 19:11, Lorenzo Stoakes wrote:
> Currently MTE is permitted in two circumstances (desiring to use MTE having
> been specified by the VM_MTE flag) - where MAP_ANONYMOUS is specified, as
> checked by arch_calc_vm_flag_bits() and actualised by setting the
> VM_MTE_ALLOWED flag, or if the file backing the mapping is shmem, in which
> case we set VM_MTE_ALLOWED in shmem_mmap() when the mmap hook is activated
> in mmap_region().
> 
> The function that checks that, if VM_MTE is set, VM_MTE_ALLOWED is also set
> is the arm64 implementation of arch_validate_flags().
> 
> Unfortunately, we intend to refactor mmap_region() to perform this check
> earlier, meaning that in the case of a shmem backing we will not have
> invoked shmem_mmap() yet, causing the mapping to fail spuriously.
> 
> It is inappropriate to set this architecture-specific flag in general mm
> code anyway, so a sensible resolution of this issue is to instead move the
> check somewhere else.
> 
> We resolve this by setting VM_MTE_ALLOWED much earlier in do_mmap(), via
> the arch_calc_vm_flag_bits() call.
> 
> This is an appropriate place to do this as we already check for the
> MAP_ANONYMOUS case here, and the shmem file case is simply a variant of the
> same idea - we permit RAM-backed memory.
> 
> This requires a modification to the arch_calc_vm_flag_bits() signature to
> pass in a pointer to the struct file associated with the mapping, however
> this is not too egregious as this is only used by two architectures anyway
> - arm64 and parisc.
> 
> So this patch performs this adjustment and removes the unnecessary
> assignment of VM_MTE_ALLOWED in shmem_mmap().
> 
> Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
> Reported-by: Jann Horn <jannh@google.com>
> Fixes: deb0f6562884 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
> Cc: stable <stable@kernel.org>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>

> --- a/arch/arm64/include/asm/mman.h
> +++ b/arch/arm64/include/asm/mman.h
> @@ -6,6 +6,8 @@
> 
>  #ifndef BUILD_VDSO
>  #include <linux/compiler.h>
> +#include <linux/fs.h>
> +#include <linux/shmem_fs.h>
>  #include <linux/types.h>
> 
>  static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>  }
>  #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> 
> -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
> +static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
> +						   unsigned long flags)
>  {
>  	/*
>  	 * Only allow MTE on anonymous mappings as these are guaranteed to be
>  	 * backed by tags-capable memory. The vm_flags may be overridden by a
>  	 * filesystem supporting MTE (RAM-based).

We should also eventually remove the last sentence or even replace it with
its negation, or somebody might try reintroducing the pattern that won't
work anymore (wasn't there such a hugetlbfs thing in -next?).

>  	 */
> -	if (system_supports_mte() && (flags & MAP_ANONYMOUS))
> +	if (system_supports_mte() &&
> +	    ((flags & MAP_ANONYMOUS) || shmem_file(file)))
>  		return VM_MTE_ALLOWED;
> 
>  	return 0;
>  }
Catalin Marinas Oct. 30, 2024, 10:58 a.m. UTC | #2
On Wed, Oct 30, 2024 at 10:18:27AM +0100, Vlastimil Babka wrote:
> On 10/29/24 19:11, Lorenzo Stoakes wrote:
> > --- a/arch/arm64/include/asm/mman.h
> > +++ b/arch/arm64/include/asm/mman.h
> > @@ -6,6 +6,8 @@
> > 
> >  #ifndef BUILD_VDSO
> >  #include <linux/compiler.h>
> > +#include <linux/fs.h>
> > +#include <linux/shmem_fs.h>
> >  #include <linux/types.h>
> > 
> >  static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> > @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
> >  }
> >  #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
> > 
> > -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
> > +static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
> > +						   unsigned long flags)
> >  {
> >  	/*
> >  	 * Only allow MTE on anonymous mappings as these are guaranteed to be
> >  	 * backed by tags-capable memory. The vm_flags may be overridden by a
> >  	 * filesystem supporting MTE (RAM-based).
> 
> We should also eventually remove the last sentence or even replace it with
> its negation, or somebody might try reintroducing the pattern that won't
> work anymore (wasn't there such a hugetlbfs thing in -next?).

I agree, we should update this comment as well though as a fix this
patch is fine for now.

There is indeed a hugetlbfs change in -next adding VM_MTE_ALLOWED. It
should still work after the above change but we'd need to move it over
here (and fix the comment at the same time). We'll probably do it around
-rc1 or maybe earlier once this fix hits mainline. I don't think we have
an equivalent of shmem_file() for hugetlbfs, we'll need to figure
something out.

> >  	 */
> > -	if (system_supports_mte() && (flags & MAP_ANONYMOUS))
> > +	if (system_supports_mte() &&
> > +	    ((flags & MAP_ANONYMOUS) || shmem_file(file)))
> >  		return VM_MTE_ALLOWED;
> > 
> >  	return 0;
> >  }

This will conflict with the arm64 for-next/core tree as it's adding
a MAP_HUGETLB check. Trivial resolution though, Stephen will handle it.
Vlastimil Babka Oct. 30, 2024, 11:09 a.m. UTC | #3
On 10/30/24 11:58, Catalin Marinas wrote:
> On Wed, Oct 30, 2024 at 10:18:27AM +0100, Vlastimil Babka wrote:
>> On 10/29/24 19:11, Lorenzo Stoakes wrote:
>> > --- a/arch/arm64/include/asm/mman.h
>> > +++ b/arch/arm64/include/asm/mman.h
>> > @@ -6,6 +6,8 @@
>> > 
>> >  #ifndef BUILD_VDSO
>> >  #include <linux/compiler.h>
>> > +#include <linux/fs.h>
>> > +#include <linux/shmem_fs.h>
>> >  #include <linux/types.h>
>> > 
>> >  static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>> > @@ -31,19 +33,21 @@ static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
>> >  }
>> >  #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
>> > 
>> > -static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
>> > +static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
>> > +						   unsigned long flags)
>> >  {
>> >  	/*
>> >  	 * Only allow MTE on anonymous mappings as these are guaranteed to be
>> >  	 * backed by tags-capable memory. The vm_flags may be overridden by a
>> >  	 * filesystem supporting MTE (RAM-based).
>> 
>> We should also eventually remove the last sentence or even replace it with
>> its negation, or somebody might try reintroducing the pattern that won't
>> work anymore (wasn't there such a hugetlbfs thing in -next?).
> 
> I agree, we should update this comment as well though as a fix this
> patch is fine for now.
> 
> There is indeed a hugetlbfs change in -next adding VM_MTE_ALLOWED. It
> should still work after the above change but we'd need to move it over

I guess it will work after the above change, but not after 5/5?

> here (and fix the comment at the same time). We'll probably do it around
> -rc1 or maybe earlier once this fix hits mainline.

I assume this will hopefully go to rc7.

> I don't think we have
> an equivalent of shmem_file() for hugetlbfs, we'll need to figure
> something out.

I've found is_file_hugepages(), could work? And while adding the hugetlbfs
change here, the comment could be adjusted too, right?

> 
>> >  	 */
>> > -	if (system_supports_mte() && (flags & MAP_ANONYMOUS))
>> > +	if (system_supports_mte() &&
>> > +	    ((flags & MAP_ANONYMOUS) || shmem_file(file)))
>> >  		return VM_MTE_ALLOWED;
>> > 
>> >  	return 0;
>> >  }
> 
> This will conflict with the arm64 for-next/core tree as it's adding
> a MAP_HUGETLB check. Trivial resolution though, Stephen will handle it.
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h
index 9e39217b4afb..798d965760d4 100644
--- a/arch/arm64/include/asm/mman.h
+++ b/arch/arm64/include/asm/mman.h
@@ -6,6 +6,8 @@ 

 #ifndef BUILD_VDSO
 #include <linux/compiler.h>
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
 #include <linux/types.h>

 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
@@ -31,19 +33,21 @@  static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)

-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+static inline unsigned long arch_calc_vm_flag_bits(struct file *file,
+						   unsigned long flags)
 {
 	/*
 	 * Only allow MTE on anonymous mappings as these are guaranteed to be
 	 * backed by tags-capable memory. The vm_flags may be overridden by a
 	 * filesystem supporting MTE (RAM-based).
 	 */
-	if (system_supports_mte() && (flags & MAP_ANONYMOUS))
+	if (system_supports_mte() &&
+	    ((flags & MAP_ANONYMOUS) || shmem_file(file)))
 		return VM_MTE_ALLOWED;

 	return 0;
 }
-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)

 static inline bool arch_validate_prot(unsigned long prot,
 	unsigned long addr __always_unused)
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index 89b6beeda0b8..663f587dc789 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -2,6 +2,7 @@ 
 #ifndef __ASM_MMAN_H__
 #define __ASM_MMAN_H__

+#include <linux/fs.h>
 #include <uapi/asm/mman.h>

 /* PARISC cannot allow mdwe as it needs writable stacks */
@@ -11,7 +12,7 @@  static inline bool arch_memory_deny_write_exec_supported(void)
 }
 #define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported

-static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)
+static inline unsigned long arch_calc_vm_flag_bits(struct file *file, unsigned long flags)
 {
 	/*
 	 * The stack on parisc grows upwards, so if userspace requests memory
@@ -23,6 +24,6 @@  static inline unsigned long arch_calc_vm_flag_bits(unsigned long flags)

 	return 0;
 }
-#define arch_calc_vm_flag_bits(flags) arch_calc_vm_flag_bits(flags)
+#define arch_calc_vm_flag_bits(file, flags) arch_calc_vm_flag_bits(file, flags)

 #endif /* __ASM_MMAN_H__ */
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8ddca62d6460..bd70af0321e8 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -2,6 +2,7 @@ 
 #ifndef _LINUX_MMAN_H
 #define _LINUX_MMAN_H

+#include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/percpu_counter.h>

@@ -94,7 +95,7 @@  static inline void vm_unacct_memory(long pages)
 #endif

 #ifndef arch_calc_vm_flag_bits
-#define arch_calc_vm_flag_bits(flags) 0
+#define arch_calc_vm_flag_bits(file, flags) 0
 #endif

 #ifndef arch_validate_prot
@@ -151,13 +152,13 @@  calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
  * Combine the mmap "flags" argument into "vm_flags" used internally.
  */
 static inline unsigned long
-calc_vm_flag_bits(unsigned long flags)
+calc_vm_flag_bits(struct file *file, unsigned long flags)
 {
 	return _calc_vm_trans(flags, MAP_GROWSDOWN,  VM_GROWSDOWN ) |
 	       _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
 	       _calc_vm_trans(flags, MAP_SYNC,	     VM_SYNC      ) |
 	       _calc_vm_trans(flags, MAP_STACK,	     VM_NOHUGEPAGE) |
-	       arch_calc_vm_flag_bits(flags);
+		arch_calc_vm_flag_bits(file, flags);
 }

 unsigned long vm_commit_limit(void);
diff --git a/mm/mmap.c b/mm/mmap.c
index ab71d4c3464c..aee5fa08ae5d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -344,7 +344,7 @@  unsigned long do_mmap(struct file *file, unsigned long addr,
 	 * to. we assume access permissions have been handled by the open
 	 * of the memory object, so we don't do any here.
 	 */
-	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
+	vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(file, flags) |
 			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;

 	/* Obtain the address to map to. we verify (or select) it and ensure
diff --git a/mm/nommu.c b/mm/nommu.c
index 635d028d647b..e9b5f527ab5b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -842,7 +842,7 @@  static unsigned long determine_vm_flags(struct file *file,
 {
 	unsigned long vm_flags;

-	vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(flags);
+	vm_flags = calc_vm_prot_bits(prot, 0) | calc_vm_flag_bits(file, flags);

 	if (!file) {
 		/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ba1d00fabda..e87f5d6799a7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2733,9 +2733,6 @@  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 	if (ret)
 		return ret;

-	/* arm64 - allow memory tagging on RAM-based files */
-	vm_flags_set(vma, VM_MTE_ALLOWED);
-
 	file_accessed(file);
 	/* This is anonymous shared memory if it is unlinked at the time of mmap */
 	if (inode->i_nlink)