Message ID | 20230821123056.2109942-6-wangkefeng.wang@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: convert to generic VMA lock-based page fault | expand |
Le 21/08/2023 à 14:30, Kefeng Wang a écrit : > Use new try_vma_locked_page_fault() helper to simplify code. > No functional change intended. Does it really simplifies code ? It's 32 insertions versus 34 deletions so only removing 2 lines. I don't like the struct vm_fault you are adding because when it was four independant variables it was handled through local registers. Now that it is a struct it has to go via the stack, leading to unnecessary memory read and writes. And going back and forth between architecture code and generic code may also be counter-performant. Did you make any performance analysis ? Page faults are really a hot path when dealling with minor faults. Thanks Christophe > > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> > --- > arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++--------------------- > 1 file changed, 32 insertions(+), 34 deletions(-) > > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c > index b1723094d464..52f9546e020e 100644 > --- a/arch/powerpc/mm/fault.c > +++ b/arch/powerpc/mm/fault.c > @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err) > #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) > #endif > > +#ifdef CONFIG_PER_VMA_LOCK > +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf) > +{ > + int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE; > + int is_write = page_fault_is_write(vmf->fault_code); > + > + if (unlikely(access_pkey_error(is_write, is_exec, > + (vmf->fault_code & DSISR_KEYFAULT), vma))) > + return true; > + > + if (unlikely(access_error(is_write, is_exec, vma))) > + return true; > + return false; > +} > +#endif > + > /* > * For 600- and 800-family processors, the error_code parameter is DSISR > * for a data fault, SRR1 for an instruction fault. > @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, > { > struct vm_area_struct * vma; > struct mm_struct *mm = current->mm; > - unsigned int flags = FAULT_FLAG_DEFAULT; > int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; > int is_user = user_mode(regs); > int is_write = page_fault_is_write(error_code); > vm_fault_t fault, major = 0; > bool kprobe_fault = kprobe_page_fault(regs, 11); > + struct vm_fault vmf = { > + .real_address = address, > + .fault_code = error_code, > + .regs = regs, > + .flags = FAULT_FLAG_DEFAULT, > + }; > + > > if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) > return 0; > @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, > * mmap_lock held > */ > if (is_user) > - flags |= FAULT_FLAG_USER; > + vmf.flags |= FAULT_FLAG_USER; > if (is_write) > - flags |= FAULT_FLAG_WRITE; > + vmf.flags |= FAULT_FLAG_WRITE; > if (is_exec) > - flags |= FAULT_FLAG_INSTRUCTION; > + vmf.flags |= FAULT_FLAG_INSTRUCTION; > > - if (!(flags & FAULT_FLAG_USER)) > - goto lock_mmap; > - > - vma = lock_vma_under_rcu(mm, address); > - if (!vma) > - goto lock_mmap; > - > - if (unlikely(access_pkey_error(is_write, is_exec, > - (error_code & DSISR_KEYFAULT), vma))) { > - vma_end_read(vma); > - goto lock_mmap; > - } > - > - if (unlikely(access_error(is_write, is_exec, vma))) { > - vma_end_read(vma); > - goto lock_mmap; > - } > - > - fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); > - if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) > - vma_end_read(vma); > - > - if (!(fault & VM_FAULT_RETRY)) { > - count_vm_vma_lock_event(VMA_LOCK_SUCCESS); > + fault = try_vma_locked_page_fault(&vmf); > + if (fault == VM_FAULT_NONE) > + goto retry; > + if (!(fault & VM_FAULT_RETRY)) > goto done; > - } > - count_vm_vma_lock_event(VMA_LOCK_RETRY); > > if (fault_signal_pending(fault, regs)) > return user_mode(regs) ? 0 : SIGBUS; > > -lock_mmap: > - > /* When running in the kernel we expect faults to occur only to > * addresses in user space. All other faults represent errors in the > * kernel and should generate an OOPS. Unfortunately, in the case of an > @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, > * make sure we exit gracefully rather than endlessly redo > * the fault. > */ > - fault = handle_mm_fault(vma, address, flags, regs); > + fault = handle_mm_fault(vma, address, vmf.flags, regs); > > major |= fault & VM_FAULT_MAJOR; > > @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, > * case. > */ > if (unlikely(fault & VM_FAULT_RETRY)) { > - flags |= FAULT_FLAG_TRIED; > + vmf.flags |= FAULT_FLAG_TRIED; > goto retry; > } >
On 2023/8/22 17:38, Christophe Leroy wrote: > > > Le 21/08/2023 à 14:30, Kefeng Wang a écrit : >> Use new try_vma_locked_page_fault() helper to simplify code. >> No functional change intended. > > Does it really simplifies code ? It's 32 insertions versus 34 deletions > so only removing 2 lines. Yes,it is unfriendly for powerpc as the arch's vma access check is much complex than other arch, > > I don't like the struct vm_fault you are adding because when it was four > independant variables it was handled through local registers. Now that > it is a struct it has to go via the stack, leading to unnecessary memory > read and writes. And going back and forth between architecture code and > generic code may also be counter-performant. Because different arch has different var to check vma access, so the easy way to add them into vmf, I don' find a better way. > > Did you make any performance analysis ? Page faults are really a hot > path when dealling with minor faults. no, this is only built and rfc to see the feedback about the conversion. Thanks. > > Thanks > Christophe > >> >> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> >> --- >> arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++--------------------- >> 1 file changed, 32 insertions(+), 34 deletions(-) >> >> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c >> index b1723094d464..52f9546e020e 100644 >> --- a/arch/powerpc/mm/fault.c >> +++ b/arch/powerpc/mm/fault.c >> @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err) >> #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) >> #endif >> >> +#ifdef CONFIG_PER_VMA_LOCK >> +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf) >> +{ >> + int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE; >> + int is_write = page_fault_is_write(vmf->fault_code); >> + >> + if (unlikely(access_pkey_error(is_write, is_exec, >> + (vmf->fault_code & DSISR_KEYFAULT), vma))) >> + return true; >> + >> + if (unlikely(access_error(is_write, is_exec, vma))) >> + return true; >> + return false; >> +} >> +#endif >> + >> /* >> * For 600- and 800-family processors, the error_code parameter is DSISR >> * for a data fault, SRR1 for an instruction fault. >> @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, >> { >> struct vm_area_struct * vma; >> struct mm_struct *mm = current->mm; >> - unsigned int flags = FAULT_FLAG_DEFAULT; >> int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; >> int is_user = user_mode(regs); >> int is_write = page_fault_is_write(error_code); >> vm_fault_t fault, major = 0; >> bool kprobe_fault = kprobe_page_fault(regs, 11); >> + struct vm_fault vmf = { >> + .real_address = address, >> + .fault_code = error_code, >> + .regs = regs, >> + .flags = FAULT_FLAG_DEFAULT, >> + }; >> + >> >> if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) >> return 0; >> @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, >> * mmap_lock held >> */ >> if (is_user) >> - flags |= FAULT_FLAG_USER; >> + vmf.flags |= FAULT_FLAG_USER; >> if (is_write) >> - flags |= FAULT_FLAG_WRITE; >> + vmf.flags |= FAULT_FLAG_WRITE; >> if (is_exec) >> - flags |= FAULT_FLAG_INSTRUCTION; >> + vmf.flags |= FAULT_FLAG_INSTRUCTION; >> >> - if (!(flags & FAULT_FLAG_USER)) >> - goto lock_mmap; >> - >> - vma = lock_vma_under_rcu(mm, address); >> - if (!vma) >> - goto lock_mmap; >> - >> - if (unlikely(access_pkey_error(is_write, is_exec, >> - (error_code & DSISR_KEYFAULT), vma))) { >> - vma_end_read(vma); >> - goto lock_mmap; >> - } >> - >> - if (unlikely(access_error(is_write, is_exec, vma))) { >> - vma_end_read(vma); >> - goto lock_mmap; >> - } >> - >> - fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); >> - if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) >> - vma_end_read(vma); >> - >> - if (!(fault & VM_FAULT_RETRY)) { >> - count_vm_vma_lock_event(VMA_LOCK_SUCCESS); >> + fault = try_vma_locked_page_fault(&vmf); >> + if (fault == VM_FAULT_NONE) >> + goto retry; >> + if (!(fault & VM_FAULT_RETRY)) >> goto done; >> - } >> - count_vm_vma_lock_event(VMA_LOCK_RETRY); >> >> if (fault_signal_pending(fault, regs)) >> return user_mode(regs) ? 0 : SIGBUS; >> >> -lock_mmap: >> - >> /* When running in the kernel we expect faults to occur only to >> * addresses in user space. All other faults represent errors in the >> * kernel and should generate an OOPS. Unfortunately, in the case of an >> @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, >> * make sure we exit gracefully rather than endlessly redo >> * the fault. >> */ >> - fault = handle_mm_fault(vma, address, flags, regs); >> + fault = handle_mm_fault(vma, address, vmf.flags, regs); >> >> major |= fault & VM_FAULT_MAJOR; >> >> @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, >> * case. >> */ >> if (unlikely(fault & VM_FAULT_RETRY)) { >> - flags |= FAULT_FLAG_TRIED; >> + vmf.flags |= FAULT_FLAG_TRIED; >> goto retry; >> } >>
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index b1723094d464..52f9546e020e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -391,6 +391,22 @@ static int page_fault_is_bad(unsigned long err) #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) #endif +#ifdef CONFIG_PER_VMA_LOCK +bool arch_vma_access_error(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int is_exec = TRAP(vmf->regs) == INTERRUPT_INST_STORAGE; + int is_write = page_fault_is_write(vmf->fault_code); + + if (unlikely(access_pkey_error(is_write, is_exec, + (vmf->fault_code & DSISR_KEYFAULT), vma))) + return true; + + if (unlikely(access_error(is_write, is_exec, vma))) + return true; + return false; +} +#endif + /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. @@ -407,12 +423,18 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; - unsigned int flags = FAULT_FLAG_DEFAULT; int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; int is_user = user_mode(regs); int is_write = page_fault_is_write(error_code); vm_fault_t fault, major = 0; bool kprobe_fault = kprobe_page_fault(regs, 11); + struct vm_fault vmf = { + .real_address = address, + .fault_code = error_code, + .regs = regs, + .flags = FAULT_FLAG_DEFAULT, + }; + if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) return 0; @@ -463,45 +485,21 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * mmap_lock held */ if (is_user) - flags |= FAULT_FLAG_USER; + vmf.flags |= FAULT_FLAG_USER; if (is_write) - flags |= FAULT_FLAG_WRITE; + vmf.flags |= FAULT_FLAG_WRITE; if (is_exec) - flags |= FAULT_FLAG_INSTRUCTION; + vmf.flags |= FAULT_FLAG_INSTRUCTION; - if (!(flags & FAULT_FLAG_USER)) - goto lock_mmap; - - vma = lock_vma_under_rcu(mm, address); - if (!vma) - goto lock_mmap; - - if (unlikely(access_pkey_error(is_write, is_exec, - (error_code & DSISR_KEYFAULT), vma))) { - vma_end_read(vma); - goto lock_mmap; - } - - if (unlikely(access_error(is_write, is_exec, vma))) { - vma_end_read(vma); - goto lock_mmap; - } - - fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); - if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) - vma_end_read(vma); - - if (!(fault & VM_FAULT_RETRY)) { - count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + fault = try_vma_locked_page_fault(&vmf); + if (fault == VM_FAULT_NONE) + goto retry; + if (!(fault & VM_FAULT_RETRY)) goto done; - } - count_vm_vma_lock_event(VMA_LOCK_RETRY); if (fault_signal_pending(fault, regs)) return user_mode(regs) ? 0 : SIGBUS; -lock_mmap: - /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -528,7 +526,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(vma, address, flags, regs); + fault = handle_mm_fault(vma, address, vmf.flags, regs); major |= fault & VM_FAULT_MAJOR; @@ -544,7 +542,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address, * case. */ if (unlikely(fault & VM_FAULT_RETRY)) { - flags |= FAULT_FLAG_TRIED; + vmf.flags |= FAULT_FLAG_TRIED; goto retry; }
Use new try_vma_locked_page_fault() helper to simplify code. No functional change intended. Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- arch/powerpc/mm/fault.c | 66 ++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 34 deletions(-)