@@ -998,63 +998,73 @@ int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *vm)
return ret;
}
-int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
+static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa)
{
- struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
- u64 ipa = hyp_pfn_to_phys(gfn);
u64 phys;
int ret;
- if (prot & ~KVM_PGTABLE_PROT_RWX)
- return -EINVAL;
+ if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG))
+ return;
host_lock_component();
guest_lock_component(vm);
ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
host_unlock_component();
- return ret;
+ WARN_ON(ret && ret != -ENOENT);
}
-int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
+int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot)
{
+ struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
int ret;
- host_lock_component();
- guest_lock_component(vm);
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
- ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ if (prot & ~KVM_PGTABLE_PROT_RWX)
+ return -EINVAL;
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0);
guest_unlock_component(vm);
- host_unlock_component();
return ret;
}
-int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
+int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm)
{
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
int ret;
- host_lock_component();
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+
+ assert_host_shared_guest(vm, ipa);
guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE);
+ guest_unlock_component(vm);
- ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
+ return ret;
+}
+int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm)
+{
+ u64 ipa = hyp_pfn_to_phys(gfn);
+ int ret;
+
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold);
guest_unlock_component(vm);
- host_unlock_component();
return ret;
}
@@ -1063,18 +1073,15 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu)
{
struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu);
u64 ipa = hyp_pfn_to_phys(gfn);
- u64 phys;
int ret;
- host_lock_component();
- guest_lock_component(vm);
-
- ret = __check_host_shared_guest(vm, &phys, ipa);
- if (!ret)
- kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
+ if (pkvm_hyp_vm_is_protected(vm))
+ return -EPERM;
+ assert_host_shared_guest(vm, ipa);
+ guest_lock_component(vm);
+ kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0);
guest_unlock_component(vm);
- host_unlock_component();
return ret;
}
When the handling of a guest stage-2 permission fault races with an MMU notifier, the faulting page might be gone from the guest's stage-2 by the point we attempt to call (p)kvm_pgtable_stage2_relax_perms(). In the normal KVM case, this leads to returning -EAGAIN which user_mem_abort() handles correctly by simply re-entering the guest. However, the pKVM hypercall implementation has additional logic to check the page state using __check_host_shared_guest() which gets confused with absence of a page mapped at the requested IPA and returns -ENOENT, hence breaking user_mem_abort() and hilarity ensues. Luckily, several of the hypercalls for managing the stage-2 page-table of NP guests have no effect on the pKVM ownership tracking (wrprotect, test_clear_young, mkyoung, and crucially relax_perms), so the extra state checking logic is in fact not strictly necessary. So, to fix the discrepancy between standard KVM and pKVM, let's just drop the superfluous __check_host_shared_guest() logic from those hypercalls and make the extra state checking a debug assertion dependent on CONFIG_NVHE_EL2_DEBUG as we already do for other transitions. Signed-off-by: Quentin Perret <qperret@google.com> --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 69 +++++++++++++++------------ 1 file changed, 38 insertions(+), 31 deletions(-)