From 4f7263b9dd12aad3c0ab3eab832933963e570dc5 Mon Sep 17 00:00:00 2001
From: Haozhong Zhang <haozhong.zhang@intel.com>
Date: Thu, 19 Oct 2017 08:58:27 +0800
Subject: [PATCH] KVM: MMU: check MMIO pages with host cache type
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
arch/x86/kvm/mmu.c | 59 +++++++++++++++++++++++++++++++++++-------------
arch/x86/mm/pat.c | 1 +
include/linux/kvm_host.h | 4 ++++
virt/kvm/kvm_main.c | 31 +++++++++++++++++++++++++
4 files changed, 79 insertions(+), 16 deletions(-)
@@ -2705,18 +2705,20 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
return false;
}
-static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
+static bool kvm_is_mmio_pfn(kvm_pfn_t pfn, pgprot_t *prot)
{
if (pfn_valid(pfn))
- return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn)) &&
+ (!prot || pgprot2cachemode(*prot) == _PAGE_CACHE_MODE_UC);
return true;
}
-static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+static int set_spte_prot(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int level,
gfn_t gfn, kvm_pfn_t pfn, bool speculative,
- bool can_unsync, bool host_writable)
+ bool can_unsync, bool host_writable,
+ pgprot_t *pfn_prot)
{
u64 spte = 0;
int ret = 0;
@@ -2751,7 +2753,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= PT_PAGE_SIZE_MASK;
if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
- kvm_is_mmio_pfn(pfn));
+ kvm_is_mmio_pfn(pfn, pfn_prot));
if (host_writable)
spte |= SPTE_HOST_WRITEABLE;
@@ -2808,9 +2810,18 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
return ret;
}
-static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
- int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
- bool speculative, bool host_writable)
+static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
+ unsigned pte_access, int level,
+ gfn_t gfn, kvm_pfn_t pfn, bool speculative,
+ bool can_unsync, bool host_writable)
+{
+ return set_spte_prot(vcpu, sptep, pte_access, level, gfn, pfn,
+ speculative, can_unsync, host_writable, NULL);
+}
+
+static int mmu_set_spte_prot(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+ int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
+ bool speculative, bool host_writable, pgprot_t *pfn_prot)
{
int was_rmapped = 0;
int rmap_count;
@@ -2841,8 +2852,8 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
was_rmapped = 1;
}
- if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
- true, host_writable)) {
+ if (set_spte_prot(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
+ true, host_writable, pfn_prot)) {
if (write_fault)
ret = RET_PF_EMULATE;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2872,6 +2883,14 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
return ret;
}
+static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+ int write_fault, int level, gfn_t gfn, kvm_pfn_t pfn,
+ bool speculative, bool host_writable)
+{
+ return mmu_set_spte_prot(vcpu, sptep, pte_access, write_fault, level,
+ gfn, pfn, speculative, host_writable, NULL);
+}
+
static kvm_pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
@@ -2954,7 +2973,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
}
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
+ int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault,
+ pgprot_t *pfn_prot)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp;
@@ -2966,10 +2986,11 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+ emulate = mmu_set_spte_prot(vcpu, iterator.sptep, ACC_ALL,
write, level, gfn, pfn, prefault,
- map_writable);
- direct_pte_prefetch(vcpu, iterator.sptep);
+ map_writable, pfn_prot);
+ if (!pfn_prot)
+ direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
break;
}
@@ -3317,7 +3338,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+ r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault,
+ NULL);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
@@ -3868,6 +3890,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
+ pgprot_t prot;
+ bool prot_valid;
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -3900,6 +3924,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
+ prot_valid = kvm_vcpu_gfn_to_pgprot(vcpu, gfn, pfn, &prot);
+
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@@ -3907,7 +3933,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
+ r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault,
+ prot_valid ? &prot : NULL);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
@@ -998,6 +998,7 @@ void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn)
*prot = __pgprot((pgprot_val(*prot) & (~_PAGE_CACHE_MASK)) |
cachemode2protval(pcm));
}
+EXPORT_SYMBOL_GPL(track_pfn_insert);
/*
* untrack_pfn is called while unmapping a pfnmap for a region.
@@ -67,6 +67,8 @@
#define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
#define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
+#define kvm_pfn_to_pfn(x) ((pfn_t){ .val = (x)})
+
/*
* error pfns indicate that the gfn is in slot but faild to
* translate it to pfn on host.
@@ -702,6 +704,8 @@ kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
+bool kvm_vcpu_gfn_to_pgprot(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
+ pgprot_t *prot);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
int len);
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
@@ -1315,6 +1315,37 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w
return gfn_to_hva_memslot_prot(slot, gfn, writable);
}
+bool kvm_vcpu_gfn_to_pgprot(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn,
+ pgprot_t *prot)
+{
+ kvm_pfn_t _pfn;
+ unsigned long addr;
+ struct vm_area_struct *vma;
+ bool prot_valid = false;
+
+ _pfn = kvm_vcpu_gfn_to_pfn(vcpu, gfn);
+ if (_pfn != pfn)
+ return false;
+
+ addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
+ if (kvm_is_error_hva(addr))
+ return false;
+
+ down_read(&vcpu->kvm->mm->mmap_sem);
+ vma = find_vma(vcpu->kvm->mm, addr);
+ if (!vma)
+ goto out;
+
+ *prot = vma->vm_page_prot;
+ track_pfn_insert(vma, prot, kvm_pfn_to_pfn(pfn));
+ prot_valid = true;
+
+ out:
+ up_read(&vcpu->kvm->mm->mmap_sem);
+
+ return prot_valid;
+}
+
static int get_user_page_nowait(unsigned long start, int write,
struct page **page)
{
--
2.11.0