@@ -1078,6 +1078,10 @@ struct kvm_arch {
*/
spinlock_t mmu_unsync_pages_lock;
+ bool enable_mmio_caching;
+ u64 shadow_mmio_value;
+ u64 shadow_mmio_mask;
+
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
bool iommu_noncoherent;
@@ -509,6 +509,7 @@ enum vmcs_field {
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_ACCESS_BIT (1ull << 8)
#define VMX_EPT_DIRTY_BIT (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
@@ -108,7 +108,9 @@ static inline u8 kvm_get_shadow_phys_bits(void)
return boot_cpu_data.x86_phys_bits;
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
+void kvm_mmu_set_mmio_spte_mask(struct kvm *kvm, u64 mmio_value, u64 mmio_mask,
+ u64 access_mask);
+void kvm_mmu_set_default_mmio_spte_mask(u64 mask);
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
@@ -2310,7 +2310,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
return kvm_mmu_prepare_zap_page(kvm, child,
invalid_list);
}
- } else if (is_mmio_spte(pte)) {
+ } else if (is_mmio_spte(kvm, pte)) {
mmu_spte_clear_no_track(spte);
}
return 0;
@@ -3092,8 +3092,13 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fa
* by L0 userspace (you can observe gfn > L1.MAXPHYADDR if
* and only if L1's MAXPHYADDR is inaccurate with respect to
* the hardware's).
+ *
+ * Excludes the INTEL TD guest. Because TD memory is
+ * protected, the instruction can't be emulated. Instead, use
+ * SPTE value without #VE suppress bit cleared
+ * (kvm->arch.shadow_mmio_value = 0).
*/
- if (unlikely(!enable_mmio_caching) ||
+ if (unlikely(!vcpu->kvm->arch.enable_mmio_caching) ||
unlikely(fault->gfn > kvm_mmu_max_gfn())) {
*ret_val = RET_PF_EMULATE;
return true;
@@ -3221,7 +3226,8 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
else
sptep = fast_pf_get_last_sptep(vcpu, fault->addr, &spte);
- if (!is_shadow_present_pte(spte) || is_mmio_spte(spte))
+ if (!is_shadow_present_pte(spte) ||
+ is_mmio_spte(vcpu->kvm, spte))
break;
sp = sptep_to_sp(sptep);
@@ -3914,7 +3920,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
if (WARN_ON(reserved))
return -EINVAL;
- if (is_mmio_spte(spte)) {
+ if (is_mmio_spte(vcpu->kvm, spte)) {
gfn_t gfn = get_mmio_spte_gfn(spte);
unsigned int access = get_mmio_spte_access(spte);
@@ -4341,7 +4347,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
unsigned int access)
{
- if (unlikely(is_mmio_spte(*sptep))) {
+ if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) {
if (gfn != get_mmio_spte_gfn(*sptep)) {
mmu_spte_clear_no_track(sptep);
return true;
@@ -5851,6 +5857,10 @@ int kvm_mmu_init_vm(struct kvm *kvm)
node->track_write = kvm_mmu_pte_write;
node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
kvm_page_track_register_notifier(kvm, node);
+ kvm_mmu_set_mmio_spte_mask(kvm, shadow_default_mmio_mask,
+ shadow_default_mmio_mask,
+ ACC_WRITE_MASK | ACC_USER_MASK);
+
return 0;
}
@@ -1031,7 +1031,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn_t gfn;
if (!is_shadow_present_pte(sp->spt[i]) &&
- !is_mmio_spte(sp->spt[i]))
+ !is_mmio_spte(vcpu->kvm, sp->spt[i]))
continue;
pte_gpa = first_pte_gpa + i * sizeof(pt_element_t);
@@ -29,8 +29,7 @@ u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
u64 __read_mostly shadow_user_mask;
u64 __read_mostly shadow_accessed_mask;
u64 __read_mostly shadow_dirty_mask;
-u64 __read_mostly shadow_mmio_value;
-u64 __read_mostly shadow_mmio_mask;
+u64 __read_mostly shadow_default_mmio_mask;
u64 __read_mostly shadow_mmio_access_mask;
u64 __read_mostly shadow_present_mask;
u64 __read_mostly shadow_me_value;
@@ -62,10 +61,11 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
u64 spte = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;
- WARN_ON_ONCE(!shadow_mmio_value);
+ WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value &&
+ !kvm_gfn_shared_mask(vcpu->kvm));
access &= shadow_mmio_access_mask;
- spte |= shadow_mmio_value | access;
+ spte |= vcpu->kvm->arch.shadow_mmio_value | access;
spte |= gpa | shadow_nonpresent_or_rsvd_mask;
spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
@@ -307,7 +307,8 @@ u64 mark_spte_for_access_track(u64 spte)
return spte;
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
+void kvm_mmu_set_mmio_spte_mask(struct kvm *kvm, u64 mmio_value, u64 mmio_mask,
+ u64 access_mask)
{
BUG_ON((u64)(unsigned)access_mask != access_mask);
WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
@@ -336,11 +337,9 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
WARN_ON(mmio_value && (__REMOVED_SPTE & mmio_mask) == mmio_value))
mmio_value = 0;
- if (!mmio_value)
- enable_mmio_caching = false;
-
- shadow_mmio_value = mmio_value;
- shadow_mmio_mask = mmio_mask;
+ kvm->arch.enable_mmio_caching = !!mmio_value;
+ kvm->arch.shadow_mmio_value = mmio_value;
+ kvm->arch.shadow_mmio_mask = mmio_mask;
shadow_mmio_access_mask = access_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
@@ -363,24 +362,18 @@ void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
shadow_dirty_mask = has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull;
shadow_nx_mask = 0ull;
shadow_x_mask = VMX_EPT_EXECUTABLE_MASK;
- shadow_present_mask = has_exec_only ? 0ull : VMX_EPT_READABLE_MASK;
+ /* VMX_EPT_SUPPRESS_VE_BIT is needed for W or X violation. */
+ shadow_present_mask =
+ (has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | VMX_EPT_SUPPRESS_VE_BIT;
shadow_acc_track_mask = VMX_EPT_RWX_MASK;
shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
shadow_mmu_writable_mask = EPT_SPTE_MMU_WRITABLE;
-
- /*
- * EPT Misconfigurations are generated if the value of bits 2:0
- * of an EPT paging-structure entry is 110b (write/execute).
- */
- kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
- VMX_EPT_RWX_MASK, 0);
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks);
void kvm_mmu_reset_all_pte_masks(void)
{
u8 low_phys_bits;
- u64 mask;
shadow_phys_bits = kvm_get_shadow_phys_bits();
@@ -429,9 +422,13 @@ void kvm_mmu_reset_all_pte_masks(void)
* PTEs and so the reserved PA approach must be disabled.
*/
if (shadow_phys_bits < 52)
- mask = BIT_ULL(51) | PT_PRESENT_MASK;
+ shadow_default_mmio_mask = BIT_ULL(51) | PT_PRESENT_MASK;
else
- mask = 0;
+ shadow_default_mmio_mask = 0;
+}
- kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
+void kvm_mmu_set_default_mmio_spte_mask(u64 mask)
+{
+ shadow_default_mmio_mask = mask;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_default_mmio_spte_mask);
@@ -5,8 +5,6 @@
#include "mmu_internal.h"
-extern bool __read_mostly enable_mmio_caching;
-
/*
* A MMU present SPTE is backed by actual memory and may or may not be present
* in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it
@@ -160,8 +158,7 @@ extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
extern u64 __read_mostly shadow_user_mask;
extern u64 __read_mostly shadow_accessed_mask;
extern u64 __read_mostly shadow_dirty_mask;
-extern u64 __read_mostly shadow_mmio_value;
-extern u64 __read_mostly shadow_mmio_mask;
+extern u64 __read_mostly shadow_default_mmio_mask;
extern u64 __read_mostly shadow_mmio_access_mask;
extern u64 __read_mostly shadow_present_mask;
extern u64 __read_mostly shadow_me_value;
@@ -233,10 +230,10 @@ static inline bool is_removed_spte(u64 spte)
*/
extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
-static inline bool is_mmio_spte(u64 spte)
+static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
{
- return (spte & shadow_mmio_mask) == shadow_mmio_value &&
- likely(enable_mmio_caching);
+ return (spte & kvm->arch.shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
+ likely(kvm->arch.enable_mmio_caching || kvm_gfn_shared_mask(kvm));
}
static inline bool is_shadow_present_pte(u64 pte)
@@ -549,8 +549,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
- if (WARN_ON(!is_mmio_spte(old_spte) &&
- !is_mmio_spte(new_spte) &&
+ if (WARN_ON(!is_mmio_spte(kvm, old_spte) &&
+ !is_mmio_spte(kvm, new_spte) &&
!is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
@@ -1084,7 +1084,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
}
/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
- if (unlikely(is_mmio_spte(new_spte))) {
+ if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
new_spte);
ret = RET_PF_EMULATE;
@@ -4770,7 +4770,7 @@ static __init void svm_adjust_mmio_mask(void)
*/
mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0;
- kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK);
+ kvm_mmu_set_default_mmio_spte_mask(mask);
}
static __init void svm_set_cpu_caps(void)
@@ -7181,6 +7181,14 @@ int vmx_vm_init(struct kvm *kvm)
if (!ple_gap)
kvm->arch.pause_in_guest = true;
+ /*
+ * EPT Misconfigurations can be generated if the value of bits 2:0
+ * of an EPT paging-structure entry is 110b (write/execute).
+ */
+ if (enable_ept)
+ kvm_mmu_set_mmio_spte_mask(kvm, VMX_EPT_MISCONFIG_WX_VALUE,
+ VMX_EPT_RWX_MASK, 0);
+
if (boot_cpu_has(X86_BUG_L1TF) && enable_ept) {
switch (l1tf_mitigation) {
case L1TF_MITIGATION_OFF: