diff mbox series

[v2,2/7] mm: Extend copy_vma()

Message ID 155836081252.2441.9024100415314519956.stgit@localhost.localdomain (mailing list archive)
State New, archived
Headers show
Series mm: process_vm_mmap() -- syscall for duplication a process mapping | expand

Commit Message

Kirill Tkhai May 20, 2019, 2 p.m. UTC
This prepares the function to copy a vma between
two processes. Two new arguments are introduced.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 include/linux/mm.h |    4 ++--
 mm/mmap.c          |   33 ++++++++++++++++++++++++---------
 mm/mremap.c        |    4 ++--
 3 files changed, 28 insertions(+), 13 deletions(-)

Comments

Kirill A. Shutemov May 21, 2019, 8:18 a.m. UTC | #1
On Mon, May 20, 2019 at 05:00:12PM +0300, Kirill Tkhai wrote:
> This prepares the function to copy a vma between
> two processes. Two new arguments are introduced.

This kind of changes requires a lot more explanation in commit message,
describing all possible corner cases.

For instance, I would really like to see a story on why logic around
need_rmap_locks is safe after the change.

> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  include/linux/mm.h |    4 ++--
>  mm/mmap.c          |   33 ++++++++++++++++++++++++---------
>  mm/mremap.c        |    4 ++--
>  3 files changed, 28 insertions(+), 13 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 0e8834ac32b7..afe07e4a76f8 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2329,8 +2329,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
>  	struct rb_node **, struct rb_node *);
>  extern void unlink_file_vma(struct vm_area_struct *);
>  extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
> -	unsigned long addr, unsigned long len, pgoff_t pgoff,
> -	bool *need_rmap_locks);
> +	struct mm_struct *, unsigned long addr, unsigned long len,
> +	pgoff_t pgoff, bool *need_rmap_locks, bool clear_flags_ctx);
>  extern void exit_mmap(struct mm_struct *);
>  
>  static inline int check_data_rlimit(unsigned long rlim,
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 57803a0a3a5c..99778e724ad1 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3195,19 +3195,21 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
>  }
>  
>  /*
> - * Copy the vma structure to a new location in the same mm,
> - * prior to moving page table entries, to effect an mremap move.
> + * Copy the vma structure to new location in the same vma
> + * prior to moving page table entries, to effect an mremap move;
>   */
>  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
> -	unsigned long addr, unsigned long len, pgoff_t pgoff,
> -	bool *need_rmap_locks)
> +				struct mm_struct *mm, unsigned long addr,
> +				unsigned long len, pgoff_t pgoff,
> +				bool *need_rmap_locks, bool clear_flags_ctx)
>  {
>  	struct vm_area_struct *vma = *vmap;
>  	unsigned long vma_start = vma->vm_start;
> -	struct mm_struct *mm = vma->vm_mm;
> +	struct vm_userfaultfd_ctx uctx;
>  	struct vm_area_struct *new_vma, *prev;
>  	struct rb_node **rb_link, *rb_parent;
>  	bool faulted_in_anon_vma = true;
> +	unsigned long flags;
>  
>  	/*
>  	 * If anonymous vma has not yet been faulted, update new pgoff
> @@ -3220,15 +3222,25 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  
>  	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
>  		return NULL;	/* should never get here */
> -	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
> -			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
> -			    vma->vm_userfaultfd_ctx);
> +
> +	uctx = vma->vm_userfaultfd_ctx;
> +	flags = vma->vm_flags;
> +	if (clear_flags_ctx) {
> +		uctx = NULL_VM_UFFD_CTX;
> +		flags &= ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_MERGEABLE |
> +			   VM_LOCKED | VM_LOCKONFAULT | VM_WIPEONFORK |
> +			   VM_DONTCOPY);
> +	}

Why is the new logic required? No justification given.

> +
> +	new_vma = vma_merge(mm, prev, addr, addr + len, flags, vma->anon_vma,
> +			    vma->vm_file, pgoff, vma_policy(vma), uctx);
>  	if (new_vma) {
>  		/*
>  		 * Source vma may have been merged into new_vma
>  		 */
>  		if (unlikely(vma_start >= new_vma->vm_start &&
> -			     vma_start < new_vma->vm_end)) {
> +			     vma_start < new_vma->vm_end) &&
> +			     vma->vm_mm == mm) {

How can vma_merge() succeed if vma->vm_mm != mm?

>  			/*
>  			 * The only way we can get a vma_merge with
>  			 * self during an mremap is if the vma hasn't
> @@ -3249,6 +3261,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>  		new_vma = vm_area_dup(vma);
>  		if (!new_vma)
>  			goto out;
> +		new_vma->vm_mm = mm;
> +		new_vma->vm_flags = flags;
> +		new_vma->vm_userfaultfd_ctx = uctx;
>  		new_vma->vm_start = addr;
>  		new_vma->vm_end = addr + len;
>  		new_vma->vm_pgoff = pgoff;
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 37b5b2ad91be..9a96cfc28675 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -352,8 +352,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
>  		return err;
>  
>  	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
> -	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
> -			   &need_rmap_locks);
> +	new_vma = copy_vma(&vma, mm, new_addr, new_len, new_pgoff,
> +			   &need_rmap_locks, false);
>  	if (!new_vma)
>  		return -ENOMEM;
>  
>
Kirill Tkhai May 21, 2019, 8:48 a.m. UTC | #2
Hi, Kirill,

On 21.05.2019 11:18, Kirill A. Shutemov wrote:
> On Mon, May 20, 2019 at 05:00:12PM +0300, Kirill Tkhai wrote:
>> This prepares the function to copy a vma between
>> two processes. Two new arguments are introduced.
> 
> This kind of changes requires a lot more explanation in commit message,
> describing all possible corner cases> For instance, I would really like to see a story on why logic around
> need_rmap_locks is safe after the change.

Let me fast answer on the below question firstly, and later I'll write
wide explanations, since this requires much more time.
 
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  include/linux/mm.h |    4 ++--
>>  mm/mmap.c          |   33 ++++++++++++++++++++++++---------
>>  mm/mremap.c        |    4 ++--
>>  3 files changed, 28 insertions(+), 13 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 0e8834ac32b7..afe07e4a76f8 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -2329,8 +2329,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
>>  	struct rb_node **, struct rb_node *);
>>  extern void unlink_file_vma(struct vm_area_struct *);
>>  extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
>> -	unsigned long addr, unsigned long len, pgoff_t pgoff,
>> -	bool *need_rmap_locks);
>> +	struct mm_struct *, unsigned long addr, unsigned long len,
>> +	pgoff_t pgoff, bool *need_rmap_locks, bool clear_flags_ctx);
>>  extern void exit_mmap(struct mm_struct *);
>>  
>>  static inline int check_data_rlimit(unsigned long rlim,
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 57803a0a3a5c..99778e724ad1 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -3195,19 +3195,21 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
>>  }
>>  
>>  /*
>> - * Copy the vma structure to a new location in the same mm,
>> - * prior to moving page table entries, to effect an mremap move.
>> + * Copy the vma structure to new location in the same vma
>> + * prior to moving page table entries, to effect an mremap move;
>>   */
>>  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>> -	unsigned long addr, unsigned long len, pgoff_t pgoff,
>> -	bool *need_rmap_locks)
>> +				struct mm_struct *mm, unsigned long addr,
>> +				unsigned long len, pgoff_t pgoff,
>> +				bool *need_rmap_locks, bool clear_flags_ctx)
>>  {
>>  	struct vm_area_struct *vma = *vmap;
>>  	unsigned long vma_start = vma->vm_start;
>> -	struct mm_struct *mm = vma->vm_mm;
>> +	struct vm_userfaultfd_ctx uctx;
>>  	struct vm_area_struct *new_vma, *prev;
>>  	struct rb_node **rb_link, *rb_parent;
>>  	bool faulted_in_anon_vma = true;
>> +	unsigned long flags;
>>  
>>  	/*
>>  	 * If anonymous vma has not yet been faulted, update new pgoff
>> @@ -3220,15 +3222,25 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
>>  
>>  	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
>>  		return NULL;	/* should never get here */
>> -	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
>> -			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
>> -			    vma->vm_userfaultfd_ctx);
>> +
>> +	uctx = vma->vm_userfaultfd_ctx;
>> +	flags = vma->vm_flags;
>> +	if (clear_flags_ctx) {
>> +		uctx = NULL_VM_UFFD_CTX;
>> +		flags &= ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_MERGEABLE |
>> +			   VM_LOCKED | VM_LOCKONFAULT | VM_WIPEONFORK |
>> +			   VM_DONTCOPY);
>> +	}
> 
> Why is the new logic required? No justification given.

Ditto.

>> +
>> +	new_vma = vma_merge(mm, prev, addr, addr + len, flags, vma->anon_vma,
>> +			    vma->vm_file, pgoff, vma_policy(vma), uctx);
>>  	if (new_vma) {
>>  		/*
>>  		 * Source vma may have been merged into new_vma
>>  		 */
>>  		if (unlikely(vma_start >= new_vma->vm_start &&
>> -			     vma_start < new_vma->vm_end)) {
>> +			     vma_start < new_vma->vm_end) &&
>> +			     vma->vm_mm == mm) {
> 
> How can vma_merge() succeed if vma->vm_mm != mm?

We don't use vma as an argument of vma_merge(). We use vma as a source of
vma->anon_vma, vma->vm_file and vma_policy().

We search some new_vma in mm with the same characteristics as vma has in vma->vm_mm.
In case of success vma_merge() returns it for us. For example, it may success, when
vma->vm_mm is mm_struct of forked process, while mm is mm_struct of its parent.

[...]

Kirill
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..afe07e4a76f8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2329,8 +2329,8 @@  extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
 extern void unlink_file_vma(struct vm_area_struct *);
 extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
-	unsigned long addr, unsigned long len, pgoff_t pgoff,
-	bool *need_rmap_locks);
+	struct mm_struct *, unsigned long addr, unsigned long len,
+	pgoff_t pgoff, bool *need_rmap_locks, bool clear_flags_ctx);
 extern void exit_mmap(struct mm_struct *);
 
 static inline int check_data_rlimit(unsigned long rlim,
diff --git a/mm/mmap.c b/mm/mmap.c
index 57803a0a3a5c..99778e724ad1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3195,19 +3195,21 @@  int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 }
 
 /*
- * Copy the vma structure to a new location in the same mm,
- * prior to moving page table entries, to effect an mremap move.
+ * Copy the vma structure to new location in the same vma
+ * prior to moving page table entries, to effect an mremap move;
  */
 struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
-	unsigned long addr, unsigned long len, pgoff_t pgoff,
-	bool *need_rmap_locks)
+				struct mm_struct *mm, unsigned long addr,
+				unsigned long len, pgoff_t pgoff,
+				bool *need_rmap_locks, bool clear_flags_ctx)
 {
 	struct vm_area_struct *vma = *vmap;
 	unsigned long vma_start = vma->vm_start;
-	struct mm_struct *mm = vma->vm_mm;
+	struct vm_userfaultfd_ctx uctx;
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
 	bool faulted_in_anon_vma = true;
+	unsigned long flags;
 
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
@@ -3220,15 +3222,25 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 
 	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
 		return NULL;	/* should never get here */
-	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
-			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
-			    vma->vm_userfaultfd_ctx);
+
+	uctx = vma->vm_userfaultfd_ctx;
+	flags = vma->vm_flags;
+	if (clear_flags_ctx) {
+		uctx = NULL_VM_UFFD_CTX;
+		flags &= ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_MERGEABLE |
+			   VM_LOCKED | VM_LOCKONFAULT | VM_WIPEONFORK |
+			   VM_DONTCOPY);
+	}
+
+	new_vma = vma_merge(mm, prev, addr, addr + len, flags, vma->anon_vma,
+			    vma->vm_file, pgoff, vma_policy(vma), uctx);
 	if (new_vma) {
 		/*
 		 * Source vma may have been merged into new_vma
 		 */
 		if (unlikely(vma_start >= new_vma->vm_start &&
-			     vma_start < new_vma->vm_end)) {
+			     vma_start < new_vma->vm_end) &&
+			     vma->vm_mm == mm) {
 			/*
 			 * The only way we can get a vma_merge with
 			 * self during an mremap is if the vma hasn't
@@ -3249,6 +3261,9 @@  struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		new_vma = vm_area_dup(vma);
 		if (!new_vma)
 			goto out;
+		new_vma->vm_mm = mm;
+		new_vma->vm_flags = flags;
+		new_vma->vm_userfaultfd_ctx = uctx;
 		new_vma->vm_start = addr;
 		new_vma->vm_end = addr + len;
 		new_vma->vm_pgoff = pgoff;
diff --git a/mm/mremap.c b/mm/mremap.c
index 37b5b2ad91be..9a96cfc28675 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -352,8 +352,8 @@  static unsigned long move_vma(struct vm_area_struct *vma,
 		return err;
 
 	new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
-	new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
-			   &need_rmap_locks);
+	new_vma = copy_vma(&vma, mm, new_addr, new_len, new_pgoff,
+			   &need_rmap_locks, false);
 	if (!new_vma)
 		return -ENOMEM;