@@ -6179,7 +6179,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
vcpu->stat.pf_spurious++;
if (r != RET_PF_EMULATE)
- return 1;
+ return r;
emulate:
return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
@@ -1705,8 +1705,9 @@ int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
err = tdh_mem_sept_add(to_kvm_tdx(kvm)->tdr_pa, gpa, tdx_level, hpa, &entry,
&level_state);
- if (unlikely(err == TDX_ERROR_SEPT_BUSY))
- return -EAGAIN;
+ if (unlikely(err & TDX_OPERAND_BUSY))
+ return -EBUSY;
+
if (KVM_BUG_ON(err, kvm)) {
pr_tdx_error_2(TDH_MEM_SEPT_ADD, err, entry, level_state);
return -EIO;
@@ -1855,6 +1856,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
{
gpa_t gpa = tdexit_gpa(vcpu);
unsigned long exit_qual;
+ bool local_retry = false;
+ int ret;
if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) {
if (tdx_is_sept_violation_unexpected_pending(vcpu)) {
@@ -1873,6 +1876,23 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
* due to aliasing a single HPA to multiple GPAs.
*/
exit_qual = EPT_VIOLATION_ACC_WRITE;
+
+ /*
+ * Mapping of private memory may meet RET_PF_RETRY due to
+ * SEAMCALL contentions, e.g.
+ * - TDH.MEM.PAGE.AUG/TDH.MEM.SEPT.ADD on local vCPU vs
+ * TDH.VP.ENTER with 0-step mitigation on a remote vCPU.
+ * - TDH.MEM.PAGE.AUG/TDH.MEM.SEPT.ADD on local vCPU vs
+ * TDG.MEM.PAGE.ACCEPT on a remote vCPU.
+ *
+ * Retry internally in TDX to prevent exacerbating the
+ * activation of 0-step mitigation on local vCPU.
+ * However, despite these retries, the 0-step mitigation on the
+ * local vCPU may still be triggered due to:
+ * - Exiting on signals, interrupts.
+ * - KVM_EXIT_MEMORY_FAULT.
+ */
+ local_retry = true;
} else {
exit_qual = tdexit_exit_qual(vcpu);
/*
@@ -1885,7 +1905,24 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
}
trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
- return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
+
+ while (1) {
+ ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual);
+
+ if (ret != RET_PF_RETRY || !local_retry)
+ break;
+
+ /*
+ * Break and keep the orig return value.
+ * Signal & irq handling will be done later in vcpu_run()
+ */
+ if (signal_pending(current) || pi_has_pending_interrupt(vcpu) ||
+ kvm_test_request(KVM_REQ_NMI, vcpu) || vcpu->arch.nmi_pending)
+ break;
+
+ cond_resched();
+ }
+ return ret;
}
int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
@@ -3028,13 +3065,11 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
}
ret = 0;
- do {
- err = tdh_mem_page_add(kvm_tdx->tdr_pa, gpa, pfn_to_hpa(pfn),
- pfn_to_hpa(page_to_pfn(page)),
- &entry, &level_state);
- } while (err == TDX_ERROR_SEPT_BUSY);
+ err = tdh_mem_page_add(kvm_tdx->tdr_pa, gpa, pfn_to_hpa(pfn),
+ pfn_to_hpa(page_to_pfn(page)),
+ &entry, &level_state);
if (err) {
- ret = -EIO;
+ ret = unlikely(err & TDX_OPERAND_BUSY) ? -EBUSY : -EIO;
goto out;
}
For tdh_mem_page_add, Just return error when TDX_OPERAND_BUSY is found. For tdh_mem_sept_add(), tdh_mem_page_aug(), - Return -EBUSY in KVM for TDX_OPERAND_BUSY to cause RET_PF_RETRY to be returned in kvm_mmu_do_page_fault()/kvm_mmu_page_fault(). - Inside TDX's EPT violation handler, retry on RET_PF_RETRY as long as there are no pending signals/interrupts. Signed-off-by: Yan Zhao <yan.y.zhao@intel.com> --- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/vmx/tdx.c | 53 +++++++++++++++++++++++++++++++++++------- 2 files changed, 45 insertions(+), 10 deletions(-)