diff mbox series

[2/2] mm: make mmap_region() internal

Message ID de5a3c574d35c26237edf20a1d8652d7305709c9.1735819274.git.lorenzo.stoakes@oracle.com (mailing list archive)
State New
Headers show
Series mm: update mips to use do_mmap(), make mmap_region() internal | expand

Commit Message

Lorenzo Stoakes Jan. 2, 2025, 12:10 p.m. UTC
Now that we have removed the one user of mmap_region() outside of mm, make
it internal and add it to vma.c so it can be userland tested.

This ensures that all external memory mappings are performed using the
appropriate interfaces and allows us to modify memory mapping logic as we
see fit.

Additionally expand test stubs to allow for the mmap_region() code to
compile and be userland testable.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 include/linux/mm.h               |  3 --
 mm/mmap.c                        | 59 -----------------------------
 mm/vma.c                         | 61 +++++++++++++++++++++++++++++-
 mm/vma.h                         |  2 +-
 tools/testing/vma/vma_internal.h | 65 ++++++++++++++++++++++++++++++++
 5 files changed, 126 insertions(+), 64 deletions(-)

Comments

Liam R. Howlett Jan. 3, 2025, 4:22 p.m. UTC | #1
* Lorenzo Stoakes <lorenzo.stoakes@oracle.com> [250102 07:11]:
> Now that we have removed the one user of mmap_region() outside of mm, make
> it internal and add it to vma.c so it can be userland tested.
> 
> This ensures that all external memory mappings are performed using the
> appropriate interfaces and allows us to modify memory mapping logic as we
> see fit.
> 
> Additionally expand test stubs to allow for the mmap_region() code to
> compile and be userland testable.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

> ---
>  include/linux/mm.h               |  3 --
>  mm/mmap.c                        | 59 -----------------------------
>  mm/vma.c                         | 61 +++++++++++++++++++++++++++++-
>  mm/vma.h                         |  2 +-
>  tools/testing/vma/vma_internal.h | 65 ++++++++++++++++++++++++++++++++
>  5 files changed, 126 insertions(+), 64 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 6939c2a8d90f..1a11f9df5c2d 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3446,9 +3446,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
>  	return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
>  }
>  
> -extern unsigned long mmap_region(struct file *file, unsigned long addr,
> -	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> -	struct list_head *uf);
>  extern unsigned long do_mmap(struct file *file, unsigned long addr,
>  	unsigned long len, unsigned long prot, unsigned long flags,
>  	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 7fdc4207fe98..7aa36216ecc0 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1072,65 +1072,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
>  	return do_vmi_munmap(&vmi, mm, start, len, uf, false);
>  }
>  
> -/**
> - * mmap_region() - Actually perform the userland mapping of a VMA into
> - * current->mm with known, aligned and overflow-checked @addr and @len, and
> - * correctly determined VMA flags @vm_flags and page offset @pgoff.
> - *
> - * This is an internal memory management function, and should not be used
> - * directly.
> - *
> - * The caller must write-lock current->mm->mmap_lock.
> - *
> - * @file: If a file-backed mapping, a pointer to the struct file describing the
> - * file to be mapped, otherwise NULL.
> - * @addr: The page-aligned address at which to perform the mapping.
> - * @len: The page-aligned, non-zero, length of the mapping.
> - * @vm_flags: The VMA flags which should be applied to the mapping.
> - * @pgoff: If @file is specified, the page offset into the file, if not then
> - * the virtual page offset in memory of the anonymous mapping.
> - * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
> - * events.
> - *
> - * Returns: Either an error, or the address at which the requested mapping has
> - * been performed.
> - */
> -unsigned long mmap_region(struct file *file, unsigned long addr,
> -			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> -			  struct list_head *uf)
> -{
> -	unsigned long ret;
> -	bool writable_file_mapping = false;
> -
> -	mmap_assert_write_locked(current->mm);
> -
> -	/* Check to see if MDWE is applicable. */
> -	if (map_deny_write_exec(vm_flags, vm_flags))
> -		return -EACCES;
> -
> -	/* Allow architectures to sanity-check the vm_flags. */
> -	if (!arch_validate_flags(vm_flags))
> -		return -EINVAL;
> -
> -	/* Map writable and ensure this isn't a sealed memfd. */
> -	if (file && is_shared_maywrite(vm_flags)) {
> -		int error = mapping_map_writable(file->f_mapping);
> -
> -		if (error)
> -			return error;
> -		writable_file_mapping = true;
> -	}
> -
> -	ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
> -
> -	/* Clear our write mapping regardless of error. */
> -	if (writable_file_mapping)
> -		mapping_unmap_writable(file->f_mapping);
> -
> -	validate_mm(current->mm);
> -	return ret;
> -}
> -
>  int vm_munmap(unsigned long start, size_t len)
>  {
>  	return __vm_munmap(start, len, false);
> diff --git a/mm/vma.c b/mm/vma.c
> index e37eb384d118..4cf2acc378ba 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2427,7 +2427,7 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
>  	vma_set_page_prot(vma);
>  }
>  
> -unsigned long __mmap_region(struct file *file, unsigned long addr,
> +static unsigned long __mmap_region(struct file *file, unsigned long addr,
>  		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
>  		struct list_head *uf)
>  {
> @@ -2479,6 +2479,65 @@ unsigned long __mmap_region(struct file *file, unsigned long addr,
>  	return error;
>  }
>  
> +/**
> + * mmap_region() - Actually perform the userland mapping of a VMA into
> + * current->mm with known, aligned and overflow-checked @addr and @len, and
> + * correctly determined VMA flags @vm_flags and page offset @pgoff.
> + *
> + * This is an internal memory management function, and should not be used
> + * directly.
> + *
> + * The caller must write-lock current->mm->mmap_lock.
> + *
> + * @file: If a file-backed mapping, a pointer to the struct file describing the
> + * file to be mapped, otherwise NULL.
> + * @addr: The page-aligned address at which to perform the mapping.
> + * @len: The page-aligned, non-zero, length of the mapping.
> + * @vm_flags: The VMA flags which should be applied to the mapping.
> + * @pgoff: If @file is specified, the page offset into the file, if not then
> + * the virtual page offset in memory of the anonymous mapping.
> + * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
> + * events.
> + *
> + * Returns: Either an error, or the address at which the requested mapping has
> + * been performed.
> + */
> +unsigned long mmap_region(struct file *file, unsigned long addr,
> +			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
> +			  struct list_head *uf)
> +{
> +	unsigned long ret;
> +	bool writable_file_mapping = false;
> +
> +	mmap_assert_write_locked(current->mm);
> +
> +	/* Check to see if MDWE is applicable. */
> +	if (map_deny_write_exec(vm_flags, vm_flags))
> +		return -EACCES;
> +
> +	/* Allow architectures to sanity-check the vm_flags. */
> +	if (!arch_validate_flags(vm_flags))
> +		return -EINVAL;
> +
> +	/* Map writable and ensure this isn't a sealed memfd. */
> +	if (file && is_shared_maywrite(vm_flags)) {
> +		int error = mapping_map_writable(file->f_mapping);
> +
> +		if (error)
> +			return error;
> +		writable_file_mapping = true;
> +	}
> +
> +	ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
> +
> +	/* Clear our write mapping regardless of error. */
> +	if (writable_file_mapping)
> +		mapping_unmap_writable(file->f_mapping);
> +
> +	validate_mm(current->mm);
> +	return ret;
> +}
> +
>  /*
>   * do_brk_flags() - Increase the brk vma if the flags match.
>   * @vmi: The vma iterator
> diff --git a/mm/vma.h b/mm/vma.h
> index d6803626151d..41bb52594ffd 100644
> --- a/mm/vma.h
> +++ b/mm/vma.h
> @@ -245,7 +245,7 @@ bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
>  int mm_take_all_locks(struct mm_struct *mm);
>  void mm_drop_all_locks(struct mm_struct *mm);
>  
> -unsigned long __mmap_region(struct file *file, unsigned long addr,
> +unsigned long mmap_region(struct file *file, unsigned long addr,
>  		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
>  		struct list_head *uf);
>  
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index c7c580ec9a2d..49a85ce0d45a 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -41,6 +41,8 @@ extern unsigned long dac_mmap_min_addr;
>  #define VM_BUG_ON(_expr) (BUG_ON(_expr))
>  #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
>  
> +#define MMF_HAS_MDWE	28
> +
>  #define VM_NONE		0x00000000
>  #define VM_READ		0x00000001
>  #define VM_WRITE	0x00000002
> @@ -222,6 +224,8 @@ struct mm_struct {
>  	unsigned long stack_vm;	   /* VM_STACK */
>  
>  	unsigned long def_flags;
> +
> +	unsigned long flags; /* Must use atomic bitops to access */
>  };
>  
>  struct file {
> @@ -1176,4 +1180,65 @@ static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
>  {
>  }
>  
> +/*
> + * Denies creating a writable executable mapping or gaining executable permissions.
> + *
> + * This denies the following:
> + *
> + *     a)      mmap(PROT_WRITE | PROT_EXEC)
> + *
> + *     b)      mmap(PROT_WRITE)
> + *             mprotect(PROT_EXEC)
> + *
> + *     c)      mmap(PROT_WRITE)
> + *             mprotect(PROT_READ)
> + *             mprotect(PROT_EXEC)
> + *
> + * But allows the following:
> + *
> + *     d)      mmap(PROT_READ | PROT_EXEC)
> + *             mmap(PROT_READ | PROT_EXEC | PROT_BTI)
> + *
> + * This is only applicable if the user has set the Memory-Deny-Write-Execute
> + * (MDWE) protection mask for the current process.
> + *
> + * @old specifies the VMA flags the VMA originally possessed, and @new the ones
> + * we propose to set.
> + *
> + * Return: false if proposed change is OK, true if not ok and should be denied.
> + */
> +static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
> +{
> +	/* If MDWE is disabled, we have nothing to deny. */
> +	if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
> +		return false;
> +
> +	/* If the new VMA is not executable, we have nothing to deny. */
> +	if (!(new & VM_EXEC))
> +		return false;
> +
> +	/* Under MDWE we do not accept newly writably executable VMAs... */
> +	if (new & VM_WRITE)
> +		return true;
> +
> +	/* ...nor previously non-executable VMAs becoming executable. */
> +	if (!(old & VM_EXEC))
> +		return true;
> +
> +	return false;
> +}
> +
> +static inline int mapping_map_writable(struct address_space *mapping)
> +{
> +	int c = atomic_read(&mapping->i_mmap_writable);
> +
> +	/* Derived from the raw_atomic_inc_unless_negative() implementation. */
> +	do {
> +		if (c < 0)
> +			return -EPERM;
> +	} while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
> +
> +	return 0;
> +}
> +
>  #endif	/* __MM_VMA_INTERNAL_H */
> -- 
> 2.47.1
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6939c2a8d90f..1a11f9df5c2d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3446,9 +3446,6 @@  get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 	return __get_unmapped_area(file, addr, len, pgoff, flags, 0);
 }
 
-extern unsigned long mmap_region(struct file *file, unsigned long addr,
-	unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-	struct list_head *uf);
 extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
diff --git a/mm/mmap.c b/mm/mmap.c
index 7fdc4207fe98..7aa36216ecc0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1072,65 +1072,6 @@  int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	return do_vmi_munmap(&vmi, mm, start, len, uf, false);
 }
 
-/**
- * mmap_region() - Actually perform the userland mapping of a VMA into
- * current->mm with known, aligned and overflow-checked @addr and @len, and
- * correctly determined VMA flags @vm_flags and page offset @pgoff.
- *
- * This is an internal memory management function, and should not be used
- * directly.
- *
- * The caller must write-lock current->mm->mmap_lock.
- *
- * @file: If a file-backed mapping, a pointer to the struct file describing the
- * file to be mapped, otherwise NULL.
- * @addr: The page-aligned address at which to perform the mapping.
- * @len: The page-aligned, non-zero, length of the mapping.
- * @vm_flags: The VMA flags which should be applied to the mapping.
- * @pgoff: If @file is specified, the page offset into the file, if not then
- * the virtual page offset in memory of the anonymous mapping.
- * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
- * events.
- *
- * Returns: Either an error, or the address at which the requested mapping has
- * been performed.
- */
-unsigned long mmap_region(struct file *file, unsigned long addr,
-			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
-			  struct list_head *uf)
-{
-	unsigned long ret;
-	bool writable_file_mapping = false;
-
-	mmap_assert_write_locked(current->mm);
-
-	/* Check to see if MDWE is applicable. */
-	if (map_deny_write_exec(vm_flags, vm_flags))
-		return -EACCES;
-
-	/* Allow architectures to sanity-check the vm_flags. */
-	if (!arch_validate_flags(vm_flags))
-		return -EINVAL;
-
-	/* Map writable and ensure this isn't a sealed memfd. */
-	if (file && is_shared_maywrite(vm_flags)) {
-		int error = mapping_map_writable(file->f_mapping);
-
-		if (error)
-			return error;
-		writable_file_mapping = true;
-	}
-
-	ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
-
-	/* Clear our write mapping regardless of error. */
-	if (writable_file_mapping)
-		mapping_unmap_writable(file->f_mapping);
-
-	validate_mm(current->mm);
-	return ret;
-}
-
 int vm_munmap(unsigned long start, size_t len)
 {
 	return __vm_munmap(start, len, false);
diff --git a/mm/vma.c b/mm/vma.c
index e37eb384d118..4cf2acc378ba 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2427,7 +2427,7 @@  static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
 	vma_set_page_prot(vma);
 }
 
-unsigned long __mmap_region(struct file *file, unsigned long addr,
+static unsigned long __mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
 		struct list_head *uf)
 {
@@ -2479,6 +2479,65 @@  unsigned long __mmap_region(struct file *file, unsigned long addr,
 	return error;
 }
 
+/**
+ * mmap_region() - Actually perform the userland mapping of a VMA into
+ * current->mm with known, aligned and overflow-checked @addr and @len, and
+ * correctly determined VMA flags @vm_flags and page offset @pgoff.
+ *
+ * This is an internal memory management function, and should not be used
+ * directly.
+ *
+ * The caller must write-lock current->mm->mmap_lock.
+ *
+ * @file: If a file-backed mapping, a pointer to the struct file describing the
+ * file to be mapped, otherwise NULL.
+ * @addr: The page-aligned address at which to perform the mapping.
+ * @len: The page-aligned, non-zero, length of the mapping.
+ * @vm_flags: The VMA flags which should be applied to the mapping.
+ * @pgoff: If @file is specified, the page offset into the file, if not then
+ * the virtual page offset in memory of the anonymous mapping.
+ * @uf: Optionally, a pointer to a list head used for tracking userfaultfd unmap
+ * events.
+ *
+ * Returns: Either an error, or the address at which the requested mapping has
+ * been performed.
+ */
+unsigned long mmap_region(struct file *file, unsigned long addr,
+			  unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
+			  struct list_head *uf)
+{
+	unsigned long ret;
+	bool writable_file_mapping = false;
+
+	mmap_assert_write_locked(current->mm);
+
+	/* Check to see if MDWE is applicable. */
+	if (map_deny_write_exec(vm_flags, vm_flags))
+		return -EACCES;
+
+	/* Allow architectures to sanity-check the vm_flags. */
+	if (!arch_validate_flags(vm_flags))
+		return -EINVAL;
+
+	/* Map writable and ensure this isn't a sealed memfd. */
+	if (file && is_shared_maywrite(vm_flags)) {
+		int error = mapping_map_writable(file->f_mapping);
+
+		if (error)
+			return error;
+		writable_file_mapping = true;
+	}
+
+	ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
+
+	/* Clear our write mapping regardless of error. */
+	if (writable_file_mapping)
+		mapping_unmap_writable(file->f_mapping);
+
+	validate_mm(current->mm);
+	return ret;
+}
+
 /*
  * do_brk_flags() - Increase the brk vma if the flags match.
  * @vmi: The vma iterator
diff --git a/mm/vma.h b/mm/vma.h
index d6803626151d..41bb52594ffd 100644
--- a/mm/vma.h
+++ b/mm/vma.h
@@ -245,7 +245,7 @@  bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
 int mm_take_all_locks(struct mm_struct *mm);
 void mm_drop_all_locks(struct mm_struct *mm);
 
-unsigned long __mmap_region(struct file *file, unsigned long addr,
+unsigned long mmap_region(struct file *file, unsigned long addr,
 		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
 		struct list_head *uf);
 
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index c7c580ec9a2d..49a85ce0d45a 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -41,6 +41,8 @@  extern unsigned long dac_mmap_min_addr;
 #define VM_BUG_ON(_expr) (BUG_ON(_expr))
 #define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr))
 
+#define MMF_HAS_MDWE	28
+
 #define VM_NONE		0x00000000
 #define VM_READ		0x00000001
 #define VM_WRITE	0x00000002
@@ -222,6 +224,8 @@  struct mm_struct {
 	unsigned long stack_vm;	   /* VM_STACK */
 
 	unsigned long def_flags;
+
+	unsigned long flags; /* Must use atomic bitops to access */
 };
 
 struct file {
@@ -1176,4 +1180,65 @@  static inline void userfaultfd_unmap_complete(struct mm_struct *mm,
 {
 }
 
+/*
+ * Denies creating a writable executable mapping or gaining executable permissions.
+ *
+ * This denies the following:
+ *
+ *     a)      mmap(PROT_WRITE | PROT_EXEC)
+ *
+ *     b)      mmap(PROT_WRITE)
+ *             mprotect(PROT_EXEC)
+ *
+ *     c)      mmap(PROT_WRITE)
+ *             mprotect(PROT_READ)
+ *             mprotect(PROT_EXEC)
+ *
+ * But allows the following:
+ *
+ *     d)      mmap(PROT_READ | PROT_EXEC)
+ *             mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
+ */
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
+{
+	/* If MDWE is disabled, we have nothing to deny. */
+	if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
+		return false;
+
+	/* If the new VMA is not executable, we have nothing to deny. */
+	if (!(new & VM_EXEC))
+		return false;
+
+	/* Under MDWE we do not accept newly writably executable VMAs... */
+	if (new & VM_WRITE)
+		return true;
+
+	/* ...nor previously non-executable VMAs becoming executable. */
+	if (!(old & VM_EXEC))
+		return true;
+
+	return false;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+	int c = atomic_read(&mapping->i_mmap_writable);
+
+	/* Derived from the raw_atomic_inc_unless_negative() implementation. */
+	do {
+		if (c < 0)
+			return -EPERM;
+	} while (!__sync_bool_compare_and_swap(&mapping->i_mmap_writable, c, c+1));
+
+	return 0;
+}
+
 #endif	/* __MM_VMA_INTERNAL_H */