diff mbox series

[rfc,v2,04/10] s390: mm: use try_vma_locked_page_fault()

Message ID 20230821123056.2109942-5-wangkefeng.wang@huawei.com (mailing list archive)
State New, archived
Headers show
Series mm: convert to generic VMA lock-based page fault | expand

Commit Message

Kefeng Wang Aug. 21, 2023, 12:30 p.m. UTC
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/s390/mm/fault.c | 66 ++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 39 deletions(-)

Comments

Alexander Gordeev Aug. 24, 2023, 8:16 a.m. UTC | #1
On Mon, Aug 21, 2023 at 08:30:50PM +0800, Kefeng Wang wrote:
> Use new try_vma_locked_page_fault() helper to simplify code.
> No functional change intended.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/s390/mm/fault.c | 66 ++++++++++++++++++--------------------------
>  1 file changed, 27 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> index 099c4824dd8a..fbbdebde6ea7 100644
> --- a/arch/s390/mm/fault.c
> +++ b/arch/s390/mm/fault.c
> @@ -357,16 +357,18 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
>  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  {
>  	struct gmap *gmap;
> -	struct task_struct *tsk;
> -	struct mm_struct *mm;
>  	struct vm_area_struct *vma;
>  	enum fault_type type;
> -	unsigned long address;
> -	unsigned int flags;
> +	struct mm_struct *mm = current->mm;
> +	unsigned long address = get_fault_address(regs);
>  	vm_fault_t fault;
>  	bool is_write;
> +	struct vm_fault vmf = {
> +		.real_address = address,
> +		.flags = FAULT_FLAG_DEFAULT,
> +		.vm_flags = access,
> +	};
>  
> -	tsk = current;
>  	/*
>  	 * The instruction that caused the program check has
>  	 * been nullified. Don't signal single step via SIGTRAP.
> @@ -376,8 +378,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	if (kprobe_page_fault(regs, 14))
>  		return 0;
>  
> -	mm = tsk->mm;
> -	address = get_fault_address(regs);
>  	is_write = fault_is_write(regs);
>  
>  	/*
> @@ -398,45 +398,33 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	}
>  
>  	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
> -	flags = FAULT_FLAG_DEFAULT;
>  	if (user_mode(regs))
> -		flags |= FAULT_FLAG_USER;
> +		vmf.flags |= FAULT_FLAG_USER;
>  	if (is_write)
> -		access = VM_WRITE;
> -	if (access == VM_WRITE)
> -		flags |= FAULT_FLAG_WRITE;
> -	if (!(flags & FAULT_FLAG_USER))
> -		goto lock_mmap;
> -	vma = lock_vma_under_rcu(mm, address);
> -	if (!vma)
> -		goto lock_mmap;
> -	if (!(vma->vm_flags & access)) {
> -		vma_end_read(vma);
> -		goto lock_mmap;
> -	}
> -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> -		vma_end_read(vma);
> -	if (!(fault & VM_FAULT_RETRY)) {
> -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> -		if (likely(!(fault & VM_FAULT_ERROR)))
> -			fault = 0;

This fault fixup is removed in the new version.

> +		vmf.vm_flags = VM_WRITE;
> +	if (vmf.vm_flags == VM_WRITE)
> +		vmf.flags |= FAULT_FLAG_WRITE;
> +
> +	fault = try_vma_locked_page_fault(&vmf);
> +	if (fault == VM_FAULT_NONE)
> +		goto lock_mm;

Because VM_FAULT_NONE is set to 0 it gets confused with
the success code of 0 returned by a fault handler. In the
former case we want to continue, while in the latter -
successfully return. I think it applies to all archs.

> +	if (!(fault & VM_FAULT_RETRY))
>  		goto out;
> -	}
> -	count_vm_vma_lock_event(VMA_LOCK_RETRY);
> +
>  	/* Quick path to respond to signals */
>  	if (fault_signal_pending(fault, regs)) {
>  		fault = VM_FAULT_SIGNAL;
>  		goto out;
>  	}
> -lock_mmap:
> +
> +lock_mm:
>  	mmap_read_lock(mm);
>  
>  	gmap = NULL;
>  	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
>  		gmap = (struct gmap *) S390_lowcore.gmap;
>  		current->thread.gmap_addr = address;
> -		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
> +		current->thread.gmap_write_flag = !!(vmf.flags & FAULT_FLAG_WRITE);
>  		current->thread.gmap_int_code = regs->int_code & 0xffff;
>  		address = __gmap_translate(gmap, address);
>  		if (address == -EFAULT) {
> @@ -444,7 +432,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  			goto out_up;
>  		}
>  		if (gmap->pfault_enabled)
> -			flags |= FAULT_FLAG_RETRY_NOWAIT;
> +			vmf.flags |= FAULT_FLAG_RETRY_NOWAIT;
>  	}
>  
>  retry:
> @@ -466,7 +454,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	 * we can handle it..
>  	 */
>  	fault = VM_FAULT_BADACCESS;
> -	if (unlikely(!(vma->vm_flags & access)))
> +	if (unlikely(!(vma->vm_flags & vmf.vm_flags)))
>  		goto out_up;
>  
>  	/*
> @@ -474,10 +462,10 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	 * make sure we exit gracefully rather than endlessly redo
>  	 * the fault.
>  	 */
> -	fault = handle_mm_fault(vma, address, flags, regs);
> +	fault = handle_mm_fault(vma, address, vmf.flags, regs);
>  	if (fault_signal_pending(fault, regs)) {
>  		fault = VM_FAULT_SIGNAL;
> -		if (flags & FAULT_FLAG_RETRY_NOWAIT)
> +		if (vmf.flags & FAULT_FLAG_RETRY_NOWAIT)
>  			goto out_up;
>  		goto out;
>  	}
> @@ -497,7 +485,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  
>  	if (fault & VM_FAULT_RETRY) {
>  		if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
> -			(flags & FAULT_FLAG_RETRY_NOWAIT)) {
> +			(vmf.flags & FAULT_FLAG_RETRY_NOWAIT)) {
>  			/*
>  			 * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
>  			 * not been released
> @@ -506,8 +494,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  			fault = VM_FAULT_PFAULT;
>  			goto out_up;
>  		}
> -		flags &= ~FAULT_FLAG_RETRY_NOWAIT;
> -		flags |= FAULT_FLAG_TRIED;
> +		vmf.flags &= ~FAULT_FLAG_RETRY_NOWAIT;
> +		vmf.flags |= FAULT_FLAG_TRIED;
>  		mmap_read_lock(mm);
>  		goto retry;
>  	}

FWIW, this series ends up with kernel BUG at arch/s390/mm/fault.c:341!

Thanks!
Heiko Carstens Aug. 24, 2023, 8:32 a.m. UTC | #2
On Thu, Aug 24, 2023 at 10:16:33AM +0200, Alexander Gordeev wrote:
> On Mon, Aug 21, 2023 at 08:30:50PM +0800, Kefeng Wang wrote:
> > Use new try_vma_locked_page_fault() helper to simplify code.
> > No functional change intended.
> > 
> > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > ---
> >  arch/s390/mm/fault.c | 66 ++++++++++++++++++--------------------------
> >  1 file changed, 27 insertions(+), 39 deletions(-)
...
> > -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
> > -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> > -		vma_end_read(vma);
> > -	if (!(fault & VM_FAULT_RETRY)) {
> > -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > -		if (likely(!(fault & VM_FAULT_ERROR)))
> > -			fault = 0;
> 
> This fault fixup is removed in the new version.
...

> > +		vmf.vm_flags = VM_WRITE;
> > +	if (vmf.vm_flags == VM_WRITE)
> > +		vmf.flags |= FAULT_FLAG_WRITE;
> > +
> > +	fault = try_vma_locked_page_fault(&vmf);
> > +	if (fault == VM_FAULT_NONE)
> > +		goto lock_mm;
> 
> Because VM_FAULT_NONE is set to 0 it gets confused with
> the success code of 0 returned by a fault handler. In the
> former case we want to continue, while in the latter -
> successfully return. I think it applies to all archs.
...
> FWIW, this series ends up with kernel BUG at arch/s390/mm/fault.c:341!

Without having looked in detail into this patch: all of this is likely
because s390's fault handling is quite odd. Not only because fault is set
to 0, but also because of the private VM_FAULT values like
VM_FAULT_BADCONTEXT. I'm just cleaning up all of this, but it won't make it
for the next merge window.

Therefore I'd like to ask to drop the s390 conversion of this series, and
if this series is supposed to be merged the s390 conversion needs to be
done later. Let's not waste more time on the current implementation,
please.
Kefeng Wang Aug. 26, 2023, 1:07 a.m. UTC | #3
On 2023/8/24 16:32, Heiko Carstens wrote:
> On Thu, Aug 24, 2023 at 10:16:33AM +0200, Alexander Gordeev wrote:
>> On Mon, Aug 21, 2023 at 08:30:50PM +0800, Kefeng Wang wrote:
>>> Use new try_vma_locked_page_fault() helper to simplify code.
>>> No functional change intended.
>>>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>   arch/s390/mm/fault.c | 66 ++++++++++++++++++--------------------------
>>>   1 file changed, 27 insertions(+), 39 deletions(-)
> ...
>>> -	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
>>> -	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>>> -		vma_end_read(vma);
>>> -	if (!(fault & VM_FAULT_RETRY)) {
>>> -		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>> -		if (likely(!(fault & VM_FAULT_ERROR)))
>>> -			fault = 0;
>>
>> This fault fixup is removed in the new version.
> ...
> 
>>> +		vmf.vm_flags = VM_WRITE;
>>> +	if (vmf.vm_flags == VM_WRITE)
>>> +		vmf.flags |= FAULT_FLAG_WRITE;
>>> +
>>> +	fault = try_vma_locked_page_fault(&vmf);
>>> +	if (fault == VM_FAULT_NONE)
>>> +		goto lock_mm;
>>
>> Because VM_FAULT_NONE is set to 0 it gets confused with
>> the success code of 0 returned by a fault handler. In the
>> former case we want to continue, while in the latter -
>> successfully return. I think it applies to all archs.
> ...
>> FWIW, this series ends up with kernel BUG at arch/s390/mm/fault.c:341!
> 

I didn't test and only built, this is a RFC to want to know whether
the way to add three more numbers into vmf and using vmf in arch's page
fault is feasible or not.

> Without having looked in detail into this patch: all of this is likely
> because s390's fault handling is quite odd. Not only because fault is set
> to 0, but also because of the private VM_FAULT values like
> VM_FAULT_BADCONTEXT. I'm just cleaning up all of this, but it won't make it
> for the next merge window.

Sure, if re-post, will drop the s390's change, but as mentioned above, 
the abstract of the generic vma locked and changes may be not perfect,
let's wait for more response.

Thanks all.

> 
> Therefore I'd like to ask to drop the s390 conversion of this series, and
> if this series is supposed to be merged the s390 conversion needs to be
> done later. Let's not waste more time on the current implementation,
> please.
diff mbox series

Patch

diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 099c4824dd8a..fbbdebde6ea7 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -357,16 +357,18 @@  static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
 static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 {
 	struct gmap *gmap;
-	struct task_struct *tsk;
-	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	enum fault_type type;
-	unsigned long address;
-	unsigned int flags;
+	struct mm_struct *mm = current->mm;
+	unsigned long address = get_fault_address(regs);
 	vm_fault_t fault;
 	bool is_write;
+	struct vm_fault vmf = {
+		.real_address = address,
+		.flags = FAULT_FLAG_DEFAULT,
+		.vm_flags = access,
+	};
 
-	tsk = current;
 	/*
 	 * The instruction that caused the program check has
 	 * been nullified. Don't signal single step via SIGTRAP.
@@ -376,8 +378,6 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 	if (kprobe_page_fault(regs, 14))
 		return 0;
 
-	mm = tsk->mm;
-	address = get_fault_address(regs);
 	is_write = fault_is_write(regs);
 
 	/*
@@ -398,45 +398,33 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 	}
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
-	flags = FAULT_FLAG_DEFAULT;
 	if (user_mode(regs))
-		flags |= FAULT_FLAG_USER;
+		vmf.flags |= FAULT_FLAG_USER;
 	if (is_write)
-		access = VM_WRITE;
-	if (access == VM_WRITE)
-		flags |= FAULT_FLAG_WRITE;
-	if (!(flags & FAULT_FLAG_USER))
-		goto lock_mmap;
-	vma = lock_vma_under_rcu(mm, address);
-	if (!vma)
-		goto lock_mmap;
-	if (!(vma->vm_flags & access)) {
-		vma_end_read(vma);
-		goto lock_mmap;
-	}
-	fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
-	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
-		vma_end_read(vma);
-	if (!(fault & VM_FAULT_RETRY)) {
-		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
-		if (likely(!(fault & VM_FAULT_ERROR)))
-			fault = 0;
+		vmf.vm_flags = VM_WRITE;
+	if (vmf.vm_flags == VM_WRITE)
+		vmf.flags |= FAULT_FLAG_WRITE;
+
+	fault = try_vma_locked_page_fault(&vmf);
+	if (fault == VM_FAULT_NONE)
+		goto lock_mm;
+	if (!(fault & VM_FAULT_RETRY))
 		goto out;
-	}
-	count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
 	/* Quick path to respond to signals */
 	if (fault_signal_pending(fault, regs)) {
 		fault = VM_FAULT_SIGNAL;
 		goto out;
 	}
-lock_mmap:
+
+lock_mm:
 	mmap_read_lock(mm);
 
 	gmap = NULL;
 	if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) {
 		gmap = (struct gmap *) S390_lowcore.gmap;
 		current->thread.gmap_addr = address;
-		current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE);
+		current->thread.gmap_write_flag = !!(vmf.flags & FAULT_FLAG_WRITE);
 		current->thread.gmap_int_code = regs->int_code & 0xffff;
 		address = __gmap_translate(gmap, address);
 		if (address == -EFAULT) {
@@ -444,7 +432,7 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 			goto out_up;
 		}
 		if (gmap->pfault_enabled)
-			flags |= FAULT_FLAG_RETRY_NOWAIT;
+			vmf.flags |= FAULT_FLAG_RETRY_NOWAIT;
 	}
 
 retry:
@@ -466,7 +454,7 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 	 * we can handle it..
 	 */
 	fault = VM_FAULT_BADACCESS;
-	if (unlikely(!(vma->vm_flags & access)))
+	if (unlikely(!(vma->vm_flags & vmf.vm_flags)))
 		goto out_up;
 
 	/*
@@ -474,10 +462,10 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	fault = handle_mm_fault(vma, address, flags, regs);
+	fault = handle_mm_fault(vma, address, vmf.flags, regs);
 	if (fault_signal_pending(fault, regs)) {
 		fault = VM_FAULT_SIGNAL;
-		if (flags & FAULT_FLAG_RETRY_NOWAIT)
+		if (vmf.flags & FAULT_FLAG_RETRY_NOWAIT)
 			goto out_up;
 		goto out;
 	}
@@ -497,7 +485,7 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 
 	if (fault & VM_FAULT_RETRY) {
 		if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
-			(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+			(vmf.flags & FAULT_FLAG_RETRY_NOWAIT)) {
 			/*
 			 * FAULT_FLAG_RETRY_NOWAIT has been set, mmap_lock has
 			 * not been released
@@ -506,8 +494,8 @@  static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
 			fault = VM_FAULT_PFAULT;
 			goto out_up;
 		}
-		flags &= ~FAULT_FLAG_RETRY_NOWAIT;
-		flags |= FAULT_FLAG_TRIED;
+		vmf.flags &= ~FAULT_FLAG_RETRY_NOWAIT;
+		vmf.flags |= FAULT_FLAG_TRIED;
 		mmap_read_lock(mm);
 		goto retry;
 	}