diff mbox

[v2,8/9] KVM: MMU: fully check zero bits for sptes

Message ID 1438747467-25425-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Aug. 5, 2015, 4:04 a.m. UTC
The #PF with PFEC.RSV = 1 is designed to speed MMIO emulation, however,
it is possible that the RSV #PF is caused by real BUG by mis-configure
shadow page table entries

This patch enables full check for the zero bits on shadow page table
entries which include not only the reserved bit on hardware but also
the bits spte never used, then dump the shadow page table hierarchy
if the real bug is detected

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/mmu.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

Comments

Paolo Bonzini Aug. 5, 2015, 10:12 a.m. UTC | #1
On 05/08/2015 06:04, Xiao Guangrong wrote:
> -	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
> +	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
> +		leaf = iterator.level;
> +
> +		if (!root)
> +			root = leaf;
> +
> +		sptes[leaf - 1] = spte;
> +

I'm a bit undecided between this and open-coding the macro:

        for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
             shadow_walk_okay(&iterator);
             __shadow_walk_next(&iterator, spte)) {
                leaf = iterator.level;
                spte = mmu_spte_get_lockless(iterator.sptep);

Any second opinions?

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 6, 2015, 2:53 a.m. UTC | #2
On 08/05/2015 06:12 PM, Paolo Bonzini wrote:
>
>
> On 05/08/2015 06:04, Xiao Guangrong wrote:
>> -	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
>> +	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
>> +		leaf = iterator.level;
>> +
>> +		if (!root)
>> +			root = leaf;
>> +
>> +		sptes[leaf - 1] = spte;
>> +
>
> I'm a bit undecided between this and open-coding the macro:
>
>          for (shadow_walk_init(&iterator, vcpu, addr), root = iterator.level;
>               shadow_walk_okay(&iterator);
>               __shadow_walk_next(&iterator, spte)) {
>                  leaf = iterator.level;
>                  spte = mmu_spte_get_lockless(iterator.sptep);
>
> Any second opinions?

Your adjustment is good to me, i do not have other ideas... :)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e6a7ed0..1393317 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3295,31 +3295,60 @@  static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	return vcpu_match_mmio_gva(vcpu, addr);
 }
 
-static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
+/* return true if reserved bit is detected on spte. */
+static bool
+walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
 {
 	struct kvm_shadow_walk_iterator iterator;
-	u64 spte = 0ull;
+	u64 sptes[PT64_ROOT_LEVEL], spte = 0ull;
+	int root = 0, leaf;
+	bool reserved = false;
 
 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
-		return spte;
+		goto exit;
 
 	walk_shadow_page_lockless_begin(vcpu);
-	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
+	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
+		leaf = iterator.level;
+
+		if (!root)
+			root = leaf;
+
+		sptes[leaf - 1] = spte;
+
 		if (!is_shadow_present_pte(spte))
 			break;
+
+		reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+						    leaf);
+	}
 	walk_shadow_page_lockless_end(vcpu);
 
-	return spte;
+	if (reserved) {
+		pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
+		       __func__, addr);
+		while (root >= leaf) {
+			pr_err("------ spte 0x%llx level %d.\n",
+			       sptes[root - 1], root);
+			root--;
+		}
+	}
+exit:
+	*sptep = spte;
+	return reserved;
 }
 
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 {
 	u64 spte;
+	bool reserved;
 
 	if (quickly_check_mmio_pf(vcpu, addr, direct))
 		return RET_MMIO_PF_EMULATE;
 
-	spte = walk_shadow_page_get_mmio_spte(vcpu, addr);
+	reserved = walk_shadow_page_get_mmio_spte(vcpu, addr, &spte);
+	if (unlikely(reserved))
+		return RET_MMIO_PF_BUG;
 
 	if (is_mmio_spte(spte)) {
 		gfn_t gfn = get_mmio_spte_gfn(spte);