diff mbox series

[hmm,6/8] mm/hmm: reorganize how !pte_present is handled in hmm_vma_handle_pte()

Message ID 20200311183506.3997-7-jgg@ziepe.ca (mailing list archive)
State New, archived
Headers show
Series Various error case bug fixes for hmm_range_fault() | expand

Commit Message

Jason Gunthorpe March 11, 2020, 6:35 p.m. UTC
From: Jason Gunthorpe <jgg@mellanox.com>

The intention with this code is to determine if the caller required the
pages to be valid, and if so, then take some action to make them valid.
The action varies depending on the page type.

In all cases, if the caller doesn't ask for the page, then
hmm_range_fault() should not return an error.

Revise the implementation to be clearer, and fix some bugs:

 - hmm_pte_need_fault() must always be called before testing fault or
   write_fault otherwise the defaults of false apply and the if()'s don't
   work. This was missed on the is_migration_entry() branch

 - -EFAULT should not be returned unless hmm_pte_need_fault() indicates
   fault is required - ie snapshotting should not fail.

 - For !pte_present() the cpu_flags are always 0, except in the special
   case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is
   confusing.

Reorganize the flow so that it always follows the pattern of calling
hmm_pte_need_fault() and then checking fault || write_fault.

Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on page basis")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 mm/hmm.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

Comments

Ralph Campbell March 12, 2020, 1:36 a.m. UTC | #1
On 3/11/20 11:35 AM, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
> 
> The intention with this code is to determine if the caller required the
> pages to be valid, and if so, then take some action to make them valid.
> The action varies depending on the page type.
> 
> In all cases, if the caller doesn't ask for the page, then
> hmm_range_fault() should not return an error.
> 
> Revise the implementation to be clearer, and fix some bugs:
> 
>   - hmm_pte_need_fault() must always be called before testing fault or
>     write_fault otherwise the defaults of false apply and the if()'s don't
>     work. This was missed on the is_migration_entry() branch
> 
>   - -EFAULT should not be returned unless hmm_pte_need_fault() indicates
>     fault is required - ie snapshotting should not fail.
> 
>   - For !pte_present() the cpu_flags are always 0, except in the special
>     case of is_device_private_entry(), calling pte_to_hmm_pfn_flags() is
>     confusing.
> 
> Reorganize the flow so that it always follows the pattern of calling
> hmm_pte_need_fault() and then checking fault || write_fault.
> 
> Fixes: 2aee09d8c116 ("mm/hmm: change hmm_vma_fault() to allow write fault on page basis")
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>

> ---
>   mm/hmm.c | 35 +++++++++++++++--------------------
>   1 file changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/mm/hmm.c b/mm/hmm.c
> index e10cd0adba7b37..bf676cfef3e8ee 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -282,15 +282,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   	if (!pte_present(pte)) {
>   		swp_entry_t entry = pte_to_swp_entry(pte);
>   
> -		if (!non_swap_entry(entry)) {
> -			cpu_flags = pte_to_hmm_pfn_flags(range, pte);
> -			hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
> -					   &fault, &write_fault);
> -			if (fault || write_fault)
> -				goto fault;
> -			return 0;
> -		}
> -
>   		/*
>   		 * This is a special swap entry, ignore migration, use
>   		 * device and report anything else as error.
> @@ -310,26 +301,30 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
>   			return 0;
>   		}
>   
> -		if (is_migration_entry(entry)) {
> -			if (fault || write_fault) {
> -				pte_unmap(ptep);
> -				hmm_vma_walk->last = addr;
> -				migration_entry_wait(walk->mm, pmdp, addr);
> -				return -EBUSY;
> -			}
> +		hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault,
> +				   &write_fault);
> +		if (!fault && !write_fault)
>   			return 0;
> +
> +		if (!non_swap_entry(entry))
> +			goto fault;
> +
> +		if (is_migration_entry(entry)) {
> +			pte_unmap(ptep);
> +			hmm_vma_walk->last = addr;
> +			migration_entry_wait(walk->mm, pmdp, addr);
> +			return -EBUSY;
>   		}
>   
>   		/* Report error for everything else */
>   		pte_unmap(ptep);
>   		*pfn = range->values[HMM_PFN_ERROR];
>   		return -EFAULT;
> -	} else {
> -		cpu_flags = pte_to_hmm_pfn_flags(range, pte);
> -		hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
> -				   &fault, &write_fault);
>   	}
>   
> +	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
> +	hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, &fault,
> +			   &write_fault);
>   	if (fault || write_fault)
>   		goto fault;
>   
>
Christoph Hellwig March 16, 2020, 9:11 a.m. UTC | #2
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/mm/hmm.c b/mm/hmm.c
index e10cd0adba7b37..bf676cfef3e8ee 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -282,15 +282,6 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 	if (!pte_present(pte)) {
 		swp_entry_t entry = pte_to_swp_entry(pte);
 
-		if (!non_swap_entry(entry)) {
-			cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-			hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-					   &fault, &write_fault);
-			if (fault || write_fault)
-				goto fault;
-			return 0;
-		}
-
 		/*
 		 * This is a special swap entry, ignore migration, use
 		 * device and report anything else as error.
@@ -310,26 +301,30 @@  static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 			return 0;
 		}
 
-		if (is_migration_entry(entry)) {
-			if (fault || write_fault) {
-				pte_unmap(ptep);
-				hmm_vma_walk->last = addr;
-				migration_entry_wait(walk->mm, pmdp, addr);
-				return -EBUSY;
-			}
+		hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault,
+				   &write_fault);
+		if (!fault && !write_fault)
 			return 0;
+
+		if (!non_swap_entry(entry))
+			goto fault;
+
+		if (is_migration_entry(entry)) {
+			pte_unmap(ptep);
+			hmm_vma_walk->last = addr;
+			migration_entry_wait(walk->mm, pmdp, addr);
+			return -EBUSY;
 		}
 
 		/* Report error for everything else */
 		pte_unmap(ptep);
 		*pfn = range->values[HMM_PFN_ERROR];
 		return -EFAULT;
-	} else {
-		cpu_flags = pte_to_hmm_pfn_flags(range, pte);
-		hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
-				   &fault, &write_fault);
 	}
 
+	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
+	hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, &fault,
+			   &write_fault);
 	if (fault || write_fault)
 		goto fault;