diff mbox series

[3/6] mremap: Don't allow MREMAP_DONTUNMAP on special_mappings and aio

Message ID 20201013013416.390574-4-dima@arista.com (mailing list archive)
State New, archived
Headers show
Series mremap: move_vma() fixes | expand

Commit Message

Dmitry Safonov Oct. 13, 2020, 1:34 a.m. UTC
As kernel expect to see only one of such mappings, any further
operations on the VMA-copy may be unexpected by the kernel.
Maybe it's being on the safe side, but there doesn't seem to be any
expected use-case for this, so restrict it now.

Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()")
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +-
 fs/aio.c                                  | 5 ++++-
 include/linux/mm.h                        | 2 +-
 mm/mmap.c                                 | 6 +++++-
 mm/mremap.c                               | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)

Comments

Brian Geffon Dec. 28, 2020, 6:03 p.m. UTC | #1
I don't think this situation can ever happen MREMAP_DONTUNMAP is
already restricted to anonymous mappings (defined as not having
vm_ops) and vma_to_resize checks that the mapping is anonymous before
move_vma is called.



On Mon, Oct 12, 2020 at 6:34 PM Dmitry Safonov <dima@arista.com> wrote:
>
> As kernel expect to see only one of such mappings, any further
> operations on the VMA-copy may be unexpected by the kernel.
> Maybe it's being on the safe side, but there doesn't seem to be any
> expected use-case for this, so restrict it now.
>
> Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()")
> Signed-off-by: Dmitry Safonov <dima@arista.com>
> ---
>  arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 2 +-
>  fs/aio.c                                  | 5 ++++-
>  include/linux/mm.h                        | 2 +-
>  mm/mmap.c                                 | 6 +++++-
>  mm/mremap.c                               | 2 +-
>  5 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> index 0daf2f1cf7a8..e916646adc69 100644
> --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
> @@ -1458,7 +1458,7 @@ static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
>         return 0;
>  }
>
> -static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
> +static int pseudo_lock_dev_mremap(struct vm_area_struct *area, unsigned long flags)
>  {
>         /* Not supported */
>         return -EINVAL;
> diff --git a/fs/aio.c b/fs/aio.c
> index d5ec30385566..3be3c0f77548 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -324,13 +324,16 @@ static void aio_free_ring(struct kioctx *ctx)
>         }
>  }
>
> -static int aio_ring_mremap(struct vm_area_struct *vma)
> +static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags)
>  {
>         struct file *file = vma->vm_file;
>         struct mm_struct *mm = vma->vm_mm;
>         struct kioctx_table *table;
>         int i, res = -EINVAL;
>
> +       if (flags & MREMAP_DONTUNMAP)
> +               return -EINVAL;
> +
>         spin_lock(&mm->ioctx_lock);
>         rcu_read_lock();
>         table = rcu_dereference(mm->ioctx_table);
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 16b799a0522c..fd51a4a1f722 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -550,7 +550,7 @@ struct vm_operations_struct {
>         void (*open)(struct vm_area_struct * area);
>         void (*close)(struct vm_area_struct * area);
>         int (*split)(struct vm_area_struct * area, unsigned long addr);
> -       int (*mremap)(struct vm_area_struct * area);
> +       int (*mremap)(struct vm_area_struct *area, unsigned long flags);
>         vm_fault_t (*fault)(struct vm_fault *vmf);
>         vm_fault_t (*huge_fault)(struct vm_fault *vmf,
>                         enum page_entry_size pe_size);
> diff --git a/mm/mmap.c b/mm/mmap.c
> index bdd19f5b994e..50f853b0ec39 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3372,10 +3372,14 @@ static const char *special_mapping_name(struct vm_area_struct *vma)
>         return ((struct vm_special_mapping *)vma->vm_private_data)->name;
>  }
>
> -static int special_mapping_mremap(struct vm_area_struct *new_vma)
> +static int special_mapping_mremap(struct vm_area_struct *new_vma,
> +                                 unsigned long flags)
>  {
>         struct vm_special_mapping *sm = new_vma->vm_private_data;
>
> +       if (flags & MREMAP_DONTUNMAP)
> +               return -EINVAL;
> +
>         if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
>                 return -EFAULT;
>
> diff --git a/mm/mremap.c b/mm/mremap.c
> index c248f9a52125..898e9818ba6d 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -384,7 +384,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
>         if (moved_len < old_len) {
>                 err = -ENOMEM;
>         } else if (vma->vm_ops && vma->vm_ops->mremap) {
> -               err = vma->vm_ops->mremap(new_vma);
> +               err = vma->vm_ops->mremap(new_vma, flags);
>         }
>
>         if (unlikely(err)) {
> --
> 2.28.0
>
Dmitry Safonov Dec. 28, 2020, 7:33 p.m. UTC | #2
[I moved your reply to avoid top-posting]

On 12/28/20 6:03 PM, Brian Geffon wrote:
> On Mon, Oct 12, 2020 at 6:34 PM Dmitry Safonov <dima@arista.com> wrote:
>>
>> As kernel expect to see only one of such mappings, any further
>> operations on the VMA-copy may be unexpected by the kernel.
>> Maybe it's being on the safe side, but there doesn't seem to be any
>> expected use-case for this, so restrict it now.
>>
>> Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()")
>> Signed-off-by: Dmitry Safonov <dima@arista.com>
>
> I don't think this situation can ever happen MREMAP_DONTUNMAP is
> already restricted to anonymous mappings (defined as not having
> vm_ops) and vma_to_resize checks that the mapping is anonymous before
> move_vma is called.

I've looked again now, I think it is possible. One can call
MREMAP_DONTUNMAP without MREMAP_FIXED and without resizing. So that the
old VMA is copied at some free address.

The calltrace would be: mremap()=>move_vma()
[under if (flags & MREMAP_MAYMOVE)].

On the other side I agree with you that the fix could have been better
if I realized the semantics that MREMAP_DONTUNMAP should only work with
anonymous mappings.

Probably, a better fix would be to move
:       if (flags & MREMAP_DONTUNMAP && (!vma_is_anonymous(vma) ||
:                       vma->vm_flags & VM_SHARED))
:               return ERR_PTR(-EINVAL);

from vma_to_resize() into the mremap() syscall directly.
What do you think?
Dmitry Safonov Dec. 28, 2020, 7:43 p.m. UTC | #3
On 12/28/20 7:33 PM, Dmitry Safonov wrote:
> [I moved your reply to avoid top-posting]
> 
> On 12/28/20 6:03 PM, Brian Geffon wrote:
>> On Mon, Oct 12, 2020 at 6:34 PM Dmitry Safonov <dima@arista.com> wrote:
>>>
>>> As kernel expect to see only one of such mappings, any further
>>> operations on the VMA-copy may be unexpected by the kernel.
>>> Maybe it's being on the safe side, but there doesn't seem to be any
>>> expected use-case for this, so restrict it now.
>>>
>>> Fixes: commit e346b3813067 ("mm/mremap: add MREMAP_DONTUNMAP to mremap()")
>>> Signed-off-by: Dmitry Safonov <dima@arista.com>
>>
>> I don't think this situation can ever happen MREMAP_DONTUNMAP is
>> already restricted to anonymous mappings (defined as not having
>> vm_ops) and vma_to_resize checks that the mapping is anonymous before
>> move_vma is called.
> 
> I've looked again now, I think it is possible. One can call
> MREMAP_DONTUNMAP without MREMAP_FIXED and without resizing. So that the
> old VMA is copied at some free address.
> 
> The calltrace would be: mremap()=>move_vma()
> [under if (flags & MREMAP_MAYMOVE)].
> 
> On the other side I agree with you that the fix could have been better
> if I realized the semantics that MREMAP_DONTUNMAP should only work with
> anonymous mappings.
> 
> Probably, a better fix would be to move
> :       if (flags & MREMAP_DONTUNMAP && (!vma_is_anonymous(vma) ||
> :                       vma->vm_flags & VM_SHARED))
> :               return ERR_PTR(-EINVAL);
> 
> from vma_to_resize() into the mremap() syscall directly.
> What do you think?

Ok, I've misread the code now, it checks vma_to_resize() before.
I'll send a revert to this one.

Thanks for noticing,
          Dima
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index 0daf2f1cf7a8..e916646adc69 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1458,7 +1458,7 @@  static int pseudo_lock_dev_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int pseudo_lock_dev_mremap(struct vm_area_struct *area)
+static int pseudo_lock_dev_mremap(struct vm_area_struct *area, unsigned long flags)
 {
 	/* Not supported */
 	return -EINVAL;
diff --git a/fs/aio.c b/fs/aio.c
index d5ec30385566..3be3c0f77548 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -324,13 +324,16 @@  static void aio_free_ring(struct kioctx *ctx)
 	}
 }
 
-static int aio_ring_mremap(struct vm_area_struct *vma)
+static int aio_ring_mremap(struct vm_area_struct *vma, unsigned long flags)
 {
 	struct file *file = vma->vm_file;
 	struct mm_struct *mm = vma->vm_mm;
 	struct kioctx_table *table;
 	int i, res = -EINVAL;
 
+	if (flags & MREMAP_DONTUNMAP)
+		return -EINVAL;
+
 	spin_lock(&mm->ioctx_lock);
 	rcu_read_lock();
 	table = rcu_dereference(mm->ioctx_table);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 16b799a0522c..fd51a4a1f722 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -550,7 +550,7 @@  struct vm_operations_struct {
 	void (*open)(struct vm_area_struct * area);
 	void (*close)(struct vm_area_struct * area);
 	int (*split)(struct vm_area_struct * area, unsigned long addr);
-	int (*mremap)(struct vm_area_struct * area);
+	int (*mremap)(struct vm_area_struct *area, unsigned long flags);
 	vm_fault_t (*fault)(struct vm_fault *vmf);
 	vm_fault_t (*huge_fault)(struct vm_fault *vmf,
 			enum page_entry_size pe_size);
diff --git a/mm/mmap.c b/mm/mmap.c
index bdd19f5b994e..50f853b0ec39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3372,10 +3372,14 @@  static const char *special_mapping_name(struct vm_area_struct *vma)
 	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
 }
 
-static int special_mapping_mremap(struct vm_area_struct *new_vma)
+static int special_mapping_mremap(struct vm_area_struct *new_vma,
+				  unsigned long flags)
 {
 	struct vm_special_mapping *sm = new_vma->vm_private_data;
 
+	if (flags & MREMAP_DONTUNMAP)
+		return -EINVAL;
+
 	if (WARN_ON_ONCE(current->mm != new_vma->vm_mm))
 		return -EFAULT;
 
diff --git a/mm/mremap.c b/mm/mremap.c
index c248f9a52125..898e9818ba6d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -384,7 +384,7 @@  static unsigned long move_vma(struct vm_area_struct *vma,
 	if (moved_len < old_len) {
 		err = -ENOMEM;
 	} else if (vma->vm_ops && vma->vm_ops->mremap) {
-		err = vma->vm_ops->mremap(new_vma);
+		err = vma->vm_ops->mremap(new_vma, flags);
 	}
 
 	if (unlikely(err)) {