@@ -345,7 +345,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned long gpa, gfn, hva, pfn;
struct kvm_memory_slot *memslot;
struct page *page = NULL, *pages[1];
- long ret, npages, ok;
+ long ret, npages;
unsigned int writing;
struct vm_area_struct *vma;
unsigned long flags;
@@ -397,43 +397,29 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (dsisr & DSISR_SET_RC) {
/*
* Need to set an R or C bit in the 2nd-level tables;
- * if the relevant bits aren't already set in the linux
- * page tables, fall through to do the gup_fast to
- * set them in the linux page tables too.
+ * since we are just helping out the hardware here,
+ * it is sufficient to do what the hardware does.
*/
- ok = 0;
pgflags = _PAGE_ACCESSED;
if (writing)
pgflags |= _PAGE_DIRTY;
- local_irq_save(flags);
- ptep = find_current_mm_pte(current->mm->pgd, hva, NULL, NULL);
- if (ptep) {
- pte = READ_ONCE(*ptep);
- if (pte_present(pte) &&
- (pte_val(pte) & pgflags) == pgflags)
- ok = 1;
- }
- local_irq_restore(flags);
- if (ok) {
- spin_lock(&kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
- spin_unlock(&kvm->mmu_lock);
- return RESUME_GUEST;
- }
- /*
- * We are walking the secondary page table here. We can do this
- * without disabling irq.
- */
- ptep = __find_linux_pte(kvm->arch.pgtable,
- gpa, NULL, &shift);
- if (ptep && pte_present(*ptep)) {
- kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
- gpa, shift);
- spin_unlock(&kvm->mmu_lock);
- return RESUME_GUEST;
- }
- spin_unlock(&kvm->mmu_lock);
+ /*
+ * We are walking the secondary page table here. We can do this
+ * without disabling irq.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = __find_linux_pte(kvm->arch.pgtable,
+ gpa, NULL, &shift);
+ if (ptep && pte_present(*ptep) &&
+ (!writing || pte_write(*ptep))) {
+ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags,
+ gpa, shift);
+ dsisr &= ~DSISR_SET_RC;
}
+ spin_unlock(&kvm->mmu_lock);
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT | DSISR_SET_RC)))
+ return RESUME_GUEST;
}
ret = -EFAULT;
When using the radix MMU, we can get hypervisor page fault interrupts with the DSISR_SET_RC bit set in DSISR/HSRR1, indicating that an attempt to set the R (reference) or C (change) bit in a PTE atomically failed. Previously we would find the corresponding Linux PTE and check the permission and dirty bits there, but this is not really necessary since we only need to do what the hardware was trying to do, namely set R or C atomically. This removes the code that reads the Linux PTE and just update the partition-scoped PTE, having first checked that it is still present, and if the access is a write, that the PTE still has write permission. Furthermore, we now check whether any other relevant bits are set in DSISR, and if there are, then we proceed with the rest of the function in order to handle whatever condition they represent, instead of returning to the guest as we did previously. Signed-off-by: Paul Mackerras <paulus@ozlabs.org> --- arch/powerpc/kvm/book3s_64_mmu_radix.c | 52 +++++++++++++--------------------- 1 file changed, 19 insertions(+), 33 deletions(-)