diff mbox series

[3/5] mm/hmm: Use mm_get_hmm() in hmm_range_register()

Message ID 20190506232942.12623-4-rcampbell@nvidia.com (mailing list archive)
State New, archived
Headers show
Series mm/hmm: HMM documentation updates and code fixes | expand

Commit Message

Ralph Campbell May 6, 2019, 11:29 p.m. UTC
From: Ralph Campbell <rcampbell@nvidia.com>

In hmm_range_register(), the call to hmm_get_or_create() implies that
hmm_range_register() could be called before hmm_mirror_register() when
in fact, that would violate the HMM API.

Use mm_get_hmm() instead of hmm_get_or_create() to get the HMM structure.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hmm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

Jason Gunthorpe May 23, 2019, 12:51 p.m. UTC | #1
On Mon, May 06, 2019 at 04:29:40PM -0700, rcampbell@nvidia.com wrote:
> From: Ralph Campbell <rcampbell@nvidia.com>
> 
> In hmm_range_register(), the call to hmm_get_or_create() implies that
> hmm_range_register() could be called before hmm_mirror_register() when
> in fact, that would violate the HMM API.
> 
> Use mm_get_hmm() instead of hmm_get_or_create() to get the HMM structure.
> 
> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Cc: Balbir Singh <bsingharora@gmail.com>
> Cc: Dan Carpenter <dan.carpenter@oracle.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Souptick Joarder <jrdr.linux@gmail.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
>  mm/hmm.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index f6c4c8633db9..2aa75dbed04a 100644
> +++ b/mm/hmm.c
> @@ -936,7 +936,7 @@ int hmm_range_register(struct hmm_range *range,
>  	range->start = start;
>  	range->end = end;
>  
> -	range->hmm = hmm_get_or_create(mm);
> +	range->hmm = mm_get_hmm(mm);
>  	if (!range->hmm)
>  		return -EFAULT;

I looked for documentation saying that hmm_range_register should only
be done inside a hmm_mirror_register and didn't see it. Did I miss it?
Can you add a comment? 

It is really good to fix this because it means we can rely on mmap sem
to manage mm->hmm!

If this is true then I also think we should change the signature of
the function to make this dependency relationship clear, and remove
some possible confusing edge cases.

What do you think about something like this? (unfinished)

commit 29098bd59cf481ad1915db40aefc8435dabb8b28
Author: Jason Gunthorpe <jgg@mellanox.com>
Date:   Thu May 23 09:41:19 2019 -0300

    mm/hmm: Use hmm_mirror not mm as an argument for hmm_register_range
    
    Ralf observes that hmm_register_range() can only be called by a driver
    while a mirror is registered. Make this clear in the API by passing
    in the mirror structure as a parameter.
    
    This also simplifies understanding the lifetime model for struct hmm,
    as the hmm pointer must be valid as part of a registered mirror
    so all we need in hmm_register_range() is a simple kref_get.
    
    Suggested-by: Ralph Campbell <rcampbell@nvidia.com>
    Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 8b91c90d3b88cb..87d29e085a69f7 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -503,7 +503,7 @@ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror)
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
 int hmm_range_register(struct hmm_range *range,
-		       struct mm_struct *mm,
+		       struct hmm_mirror *mirror,
 		       unsigned long start,
 		       unsigned long end,
 		       unsigned page_shift);
@@ -539,7 +539,8 @@ static inline bool hmm_vma_range_done(struct hmm_range *range)
 }
 
 /* This is a temporary helper to avoid merge conflict between trees. */
-static inline int hmm_vma_fault(struct hmm_range *range, bool block)
+static inline int hmm_vma_fault(struct hmm_mirror *mirror,
+				struct hmm_range *range, bool block)
 {
 	long ret;
 
@@ -552,7 +553,7 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block)
 	range->default_flags = 0;
 	range->pfn_flags_mask = -1UL;
 
-	ret = hmm_range_register(range, range->vma->vm_mm,
+	ret = hmm_range_register(range, mirror,
 				 range->start, range->end,
 				 PAGE_SHIFT);
 	if (ret)
diff --git a/mm/hmm.c b/mm/hmm.c
index 824e7e160d8167..fa1b04fcfc2549 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -927,7 +927,7 @@ static void hmm_pfns_clear(struct hmm_range *range,
  * Track updates to the CPU page table see include/linux/hmm.h
  */
 int hmm_range_register(struct hmm_range *range,
-		       struct mm_struct *mm,
+		       struct hmm_mirror *mirror,
 		       unsigned long start,
 		       unsigned long end,
 		       unsigned page_shift)
@@ -935,7 +935,6 @@ int hmm_range_register(struct hmm_range *range,
 	unsigned long mask = ((1UL << page_shift) - 1UL);
 
 	range->valid = false;
-	range->hmm = NULL;
 
 	if ((start & mask) || (end & mask))
 		return -EINVAL;
@@ -946,15 +945,12 @@ int hmm_range_register(struct hmm_range *range,
 	range->start = start;
 	range->end = end;
 
-	range->hmm = hmm_get_or_create(mm);
-	if (!range->hmm)
-		return -EFAULT;
-
 	/* Check if hmm_mm_destroy() was call. */
-	if (range->hmm->mm == NULL || range->hmm->dead) {
-		hmm_put(range->hmm);
+	if (mirror->hmm->mm == NULL || mirror->hmm->dead)
 		return -EFAULT;
-	}
+
+	range->hmm = mirror->hmm;
+	kref_get(&range->hmm->kref);
 
 	/* Initialize range to track CPU page table update */
 	mutex_lock(&range->hmm->lock);
Ralph Campbell May 23, 2019, 6:19 p.m. UTC | #2
On 5/23/19 5:51 AM, Jason Gunthorpe wrote:
> On Mon, May 06, 2019 at 04:29:40PM -0700, rcampbell@nvidia.com wrote:
>> From: Ralph Campbell <rcampbell@nvidia.com>
>>
>> In hmm_range_register(), the call to hmm_get_or_create() implies that
>> hmm_range_register() could be called before hmm_mirror_register() when
>> in fact, that would violate the HMM API.
>>
>> Use mm_get_hmm() instead of hmm_get_or_create() to get the HMM structure.
>>
>> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: John Hubbard <jhubbard@nvidia.com>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Cc: Dan Williams <dan.j.williams@intel.com>
>> Cc: Arnd Bergmann <arnd@arndb.de>
>> Cc: Balbir Singh <bsingharora@gmail.com>
>> Cc: Dan Carpenter <dan.carpenter@oracle.com>
>> Cc: Matthew Wilcox <willy@infradead.org>
>> Cc: Souptick Joarder <jrdr.linux@gmail.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>   mm/hmm.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/mm/hmm.c b/mm/hmm.c
>> index f6c4c8633db9..2aa75dbed04a 100644
>> +++ b/mm/hmm.c
>> @@ -936,7 +936,7 @@ int hmm_range_register(struct hmm_range *range,
>>   	range->start = start;
>>   	range->end = end;
>>   
>> -	range->hmm = hmm_get_or_create(mm);
>> +	range->hmm = mm_get_hmm(mm);
>>   	if (!range->hmm)
>>   		return -EFAULT;
> 
> I looked for documentation saying that hmm_range_register should only
> be done inside a hmm_mirror_register and didn't see it. Did I miss it?

Yes, hmm_mirror_register() has to be called before hmm_range_register().
Look at Documentation/vm/hmm.rst for "Address space mirroring 
implementation and API".

> Can you add a comment?

Sure, although I think your idea to pass hmm_mirror to 
hmm_range_register() makes sense and makes it clear.

> It is really good to fix this because it means we can rely on mmap sem
> to manage mm->hmm!
> 
> If this is true then I also think we should change the signature of
> the function to make this dependency relationship clear, and remove
> some possible confusing edge cases.
> 
> What do you think about something like this? (unfinished)
> 
> commit 29098bd59cf481ad1915db40aefc8435dabb8b28
> Author: Jason Gunthorpe <jgg@mellanox.com>
> Date:   Thu May 23 09:41:19 2019 -0300
> 
>      mm/hmm: Use hmm_mirror not mm as an argument for hmm_register_range
>      
>      Ralf observes that hmm_register_range() can only be called by a driver
>      while a mirror is registered. Make this clear in the API by passing
>      in the mirror structure as a parameter.
>      
>      This also simplifies understanding the lifetime model for struct hmm,
>      as the hmm pointer must be valid as part of a registered mirror
>      so all we need in hmm_register_range() is a simple kref_get.
>      
>      Suggested-by: Ralph Campbell <rcampbell@nvidia.com>
>      Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 8b91c90d3b88cb..87d29e085a69f7 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -503,7 +503,7 @@ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror)
>    * Please see Documentation/vm/hmm.rst for how to use the range API.
>    */
>   int hmm_range_register(struct hmm_range *range,
> -		       struct mm_struct *mm,
> +		       struct hmm_mirror *mirror,
>   		       unsigned long start,
>   		       unsigned long end,
>   		       unsigned page_shift);
> @@ -539,7 +539,8 @@ static inline bool hmm_vma_range_done(struct hmm_range *range)
>   }
>   
>   /* This is a temporary helper to avoid merge conflict between trees. */
> -static inline int hmm_vma_fault(struct hmm_range *range, bool block)
> +static inline int hmm_vma_fault(struct hmm_mirror *mirror,
> +				struct hmm_range *range, bool block)
>   {
>   	long ret;
>   
> @@ -552,7 +553,7 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block)
>   	range->default_flags = 0;
>   	range->pfn_flags_mask = -1UL;
>   
> -	ret = hmm_range_register(range, range->vma->vm_mm,
> +	ret = hmm_range_register(range, mirror,
>   				 range->start, range->end,
>   				 PAGE_SHIFT);
>   	if (ret)
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 824e7e160d8167..fa1b04fcfc2549 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -927,7 +927,7 @@ static void hmm_pfns_clear(struct hmm_range *range,
>    * Track updates to the CPU page table see include/linux/hmm.h
>    */
>   int hmm_range_register(struct hmm_range *range,
> -		       struct mm_struct *mm,
> +		       struct hmm_mirror *mirror,
>   		       unsigned long start,
>   		       unsigned long end,
>   		       unsigned page_shift)
> @@ -935,7 +935,6 @@ int hmm_range_register(struct hmm_range *range,
>   	unsigned long mask = ((1UL << page_shift) - 1UL);
>   
>   	range->valid = false;
> -	range->hmm = NULL;
>   
>   	if ((start & mask) || (end & mask))
>   		return -EINVAL;
> @@ -946,15 +945,12 @@ int hmm_range_register(struct hmm_range *range,
>   	range->start = start;
>   	range->end = end;
>   
> -	range->hmm = hmm_get_or_create(mm);
> -	if (!range->hmm)
> -		return -EFAULT;
> -
>   	/* Check if hmm_mm_destroy() was call. */
> -	if (range->hmm->mm == NULL || range->hmm->dead) {
> -		hmm_put(range->hmm);
> +	if (mirror->hmm->mm == NULL || mirror->hmm->dead)
>   		return -EFAULT;
> -	}
> +
> +	range->hmm = mirror->hmm;
> +	kref_get(&range->hmm->kref);
>   
>   	/* Initialize range to track CPU page table update */
>   	mutex_lock(&range->hmm->lock);
>
John Hubbard May 23, 2019, 6:46 p.m. UTC | #3
On 5/23/19 5:51 AM, Jason Gunthorpe wrote:
> On Mon, May 06, 2019 at 04:29:40PM -0700, rcampbell@nvidia.com wrote:
>> From: Ralph Campbell <rcampbell@nvidia.com>
>>
>> In hmm_range_register(), the call to hmm_get_or_create() implies that
>> hmm_range_register() could be called before hmm_mirror_register() when
>> in fact, that would violate the HMM API.
>>
>> Use mm_get_hmm() instead of hmm_get_or_create() to get the HMM structure.
>>
>> Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: John Hubbard <jhubbard@nvidia.com>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Cc: Dan Williams <dan.j.williams@intel.com>
>> Cc: Arnd Bergmann <arnd@arndb.de>
>> Cc: Balbir Singh <bsingharora@gmail.com>
>> Cc: Dan Carpenter <dan.carpenter@oracle.com>
>> Cc: Matthew Wilcox <willy@infradead.org>
>> Cc: Souptick Joarder <jrdr.linux@gmail.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>   mm/hmm.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/mm/hmm.c b/mm/hmm.c
>> index f6c4c8633db9..2aa75dbed04a 100644
>> +++ b/mm/hmm.c
>> @@ -936,7 +936,7 @@ int hmm_range_register(struct hmm_range *range,
>>   	range->start = start;
>>   	range->end = end;
>>   
>> -	range->hmm = hmm_get_or_create(mm);
>> +	range->hmm = mm_get_hmm(mm);
>>   	if (!range->hmm)
>>   		return -EFAULT;
> 
> I looked for documentation saying that hmm_range_register should only
> be done inside a hmm_mirror_register and didn't see it. Did I miss it?
> Can you add a comment?
> 
> It is really good to fix this because it means we can rely on mmap sem
> to manage mm->hmm!
> 
> If this is true then I also think we should change the signature of
> the function to make this dependency relationship clear, and remove
> some possible confusing edge cases.
> 
> What do you think about something like this? (unfinished)

I like it...

> 
> commit 29098bd59cf481ad1915db40aefc8435dabb8b28
> Author: Jason Gunthorpe <jgg@mellanox.com>
> Date:   Thu May 23 09:41:19 2019 -0300
> 
>      mm/hmm: Use hmm_mirror not mm as an argument for hmm_register_range
>      
>      Ralf observes that hmm_register_range() can only be called by a driver

       ^Ralph
:)

>      while a mirror is registered. Make this clear in the API by passing
>      in the mirror structure as a parameter.
>      
>      This also simplifies understanding the lifetime model for struct hmm,
>      as the hmm pointer must be valid as part of a registered mirror
>      so all we need in hmm_register_range() is a simple kref_get.
>      
>      Suggested-by: Ralph Campbell <rcampbell@nvidia.com>
>      Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 8b91c90d3b88cb..87d29e085a69f7 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -503,7 +503,7 @@ static inline bool hmm_mirror_mm_is_alive(struct hmm_mirror *mirror)
>    * Please see Documentation/vm/hmm.rst for how to use the range API.
>    */
>   int hmm_range_register(struct hmm_range *range,
> -		       struct mm_struct *mm,
> +		       struct hmm_mirror *mirror,
>   		       unsigned long start,
>   		       unsigned long end,
>   		       unsigned page_shift);
> @@ -539,7 +539,8 @@ static inline bool hmm_vma_range_done(struct hmm_range *range)
>   }
>   
>   /* This is a temporary helper to avoid merge conflict between trees. */
> -static inline int hmm_vma_fault(struct hmm_range *range, bool block)
> +static inline int hmm_vma_fault(struct hmm_mirror *mirror,
> +				struct hmm_range *range, bool block)
>   {
>   	long ret;
>   
> @@ -552,7 +553,7 @@ static inline int hmm_vma_fault(struct hmm_range *range, bool block)
>   	range->default_flags = 0;
>   	range->pfn_flags_mask = -1UL;
>   
> -	ret = hmm_range_register(range, range->vma->vm_mm,
> +	ret = hmm_range_register(range, mirror,
>   				 range->start, range->end,
>   				 PAGE_SHIFT);
>   	if (ret)
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 824e7e160d8167..fa1b04fcfc2549 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -927,7 +927,7 @@ static void hmm_pfns_clear(struct hmm_range *range,
>    * Track updates to the CPU page table see include/linux/hmm.h
>    */
>   int hmm_range_register(struct hmm_range *range,
> -		       struct mm_struct *mm,
> +		       struct hmm_mirror *mirror,
>   		       unsigned long start,
>   		       unsigned long end,
>   		       unsigned page_shift)
> @@ -935,7 +935,6 @@ int hmm_range_register(struct hmm_range *range,
>   	unsigned long mask = ((1UL << page_shift) - 1UL);
>   
>   	range->valid = false;
> -	range->hmm = NULL;
>   
>   	if ((start & mask) || (end & mask))
>   		return -EINVAL;
> @@ -946,15 +945,12 @@ int hmm_range_register(struct hmm_range *range,
>   	range->start = start;
>   	range->end = end;
>   
> -	range->hmm = hmm_get_or_create(mm);
> -	if (!range->hmm)
> -		return -EFAULT;
> -
>   	/* Check if hmm_mm_destroy() was call. */
> -	if (range->hmm->mm == NULL || range->hmm->dead) {
> -		hmm_put(range->hmm);
> +	if (mirror->hmm->mm == NULL || mirror->hmm->dead)
>   		return -EFAULT;
> -	}
> +
> +	range->hmm = mirror->hmm;
> +	kref_get(&range->hmm->kref);
>   
>   	/* Initialize range to track CPU page table update */
>   	mutex_lock(&range->hmm->lock);
> 

So far, this looks very good to me. Passing the mirror around is an
elegant API solution to the "we must have a valid mirror in order to
call this function" constraint.


thanks,
diff mbox series

Patch

diff --git a/mm/hmm.c b/mm/hmm.c
index f6c4c8633db9..2aa75dbed04a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -936,7 +936,7 @@  int hmm_range_register(struct hmm_range *range,
 	range->start = start;
 	range->end = end;
 
-	range->hmm = hmm_get_or_create(mm);
+	range->hmm = mm_get_hmm(mm);
 	if (!range->hmm)
 		return -EFAULT;