diff mbox

[v3,09/10] KVM: MMU: fix MTRR update

Message ID 1431499348-25188-10-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong May 13, 2015, 6:42 a.m. UTC
Currently, whenever guest MTRR registers are changed
kvm_mmu_reset_context is called to switch to the new root shadow page
table, however, it's useless since:
1) the cache type is not cached into shadow page's attribute so that
   the original root shadow page will be reused

2) the cache type is set on the last spte, that means we should sync
   the last sptes when MTRR is changed

This patch fixs this issue by drop all the spte in the gfn range which
is being updated by MTRR

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/kvm/x86.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

Comments

Wanpeng Li May 13, 2015, 8:43 a.m. UTC | #1
Hi Xiao,
On Wed, May 13, 2015 at 02:42:27PM +0800, Xiao Guangrong wrote:
>Currently, whenever guest MTRR registers are changed
>kvm_mmu_reset_context is called to switch to the new root shadow page
>table, however, it's useless since:
>1) the cache type is not cached into shadow page's attribute so that
>   the original root shadow page will be reused

kvm_mmu_reset_context
  kvm_mmu_unload
    mmu_free_roots

The original root shadow page will be freed in mmu_free_roots, where I
miss?

Another question maybe not related to this patch:

If kvm_mmu_reset_context is just called to destroy the original root 
shadow page and all the sptes will remain valid? 

Regards,
Wanpeng Li 

>
>2) the cache type is set on the last spte, that means we should sync
>   the last sptes when MTRR is changed
>
>This patch fixs this issue by drop all the spte in the gfn range which
>is being updated by MTRR
>
>Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>---
> arch/x86/kvm/x86.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 58 insertions(+), 1 deletion(-)
>
>diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>index cde5d61..bbe184f 100644
>--- a/arch/x86/kvm/x86.c
>+++ b/arch/x86/kvm/x86.c
>@@ -1852,6 +1852,63 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> }
> EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
> 
>+static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
>+{
>+	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
>+	unsigned char mtrr_enabled = mtrr_state->enabled;
>+	gfn_t start, end, mask;
>+	int index;
>+	bool is_fixed = true;
>+
>+	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
>+	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
>+		return;
>+
>+	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
>+		return;
>+
>+	switch (msr) {
>+	case MSR_MTRRfix64K_00000:
>+		start = 0x0;
>+		end = 0x80000;
>+		break;
>+	case MSR_MTRRfix16K_80000:
>+		start = 0x80000;
>+		end = 0xa0000;
>+		break;
>+	case MSR_MTRRfix16K_A0000:
>+		start = 0xa0000;
>+		end = 0xc0000;
>+		break;
>+	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
>+		index = msr - MSR_MTRRfix4K_C0000;
>+		start = 0xc0000 + index * (32 << 10);
>+		end = start + (32 << 10);
>+		break;
>+	case MSR_MTRRdefType:
>+		is_fixed = false;
>+		start = 0x0;
>+		end = ~0ULL;
>+		break;
>+	default:
>+		/* variable range MTRRs. */
>+		is_fixed = false;
>+		index = (msr - 0x200) / 2;
>+		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
>+		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
>+		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
>+		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
>+		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
>+
>+		end = ((start & mask) | ~mask) + 1;
>+	}
>+
>+	if (is_fixed && !(mtrr_enabled & 0x1))
>+		return;
>+
>+	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
>+}
>+
> static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> {
> 	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
>@@ -1885,7 +1942,7 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
> 		*pt = data;
> 	}
> 
>-	kvm_mmu_reset_context(vcpu);
>+	update_mtrr(vcpu, msr);
> 	return 0;
> }
> 
>-- 
>2.1.0
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
>Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 13, 2015, 2:10 p.m. UTC | #2
On 13/05/2015 10:43, Wanpeng Li wrote:
> kvm_mmu_reset_context
>   kvm_mmu_unload
>     mmu_free_roots
> 
> The original root shadow page will be freed in mmu_free_roots, where I
> miss?
> 
> Another question maybe not related to this patch:
> 
> If kvm_mmu_reset_context is just called to destroy the original root 
> shadow page and all the sptes will remain valid? 

SPTEs are kept around and cached.  The "role" field is used as the hash
key; if the role doesn't change, SPTEs are reused, so you have to zap
the SPTEs explicitly.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wanpeng Li May 14, 2015, 12:16 a.m. UTC | #3
On Wed, May 13, 2015 at 04:10:08PM +0200, Paolo Bonzini wrote:
>
>
>On 13/05/2015 10:43, Wanpeng Li wrote:
>> kvm_mmu_reset_context
>>   kvm_mmu_unload
>>     mmu_free_roots
>> 
>> The original root shadow page will be freed in mmu_free_roots, where I
>> miss?
>> 
>> Another question maybe not related to this patch:
>> 
>> If kvm_mmu_reset_context is just called to destroy the original root 
>> shadow page and all the sptes will remain valid? 
>
>SPTEs are kept around and cached.  The "role" field is used as the hash
>key; if the role doesn't change, SPTEs are reused, so you have to zap
>the SPTEs explicitly.

Thanks for your explanation. :)

Btw, why the patch changelog mentioned that the root shadow page will be
reused, I think it will be zapped in mmu_free_roots. 

Regards,
Wanpeng Li 

>
>Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini May 14, 2015, 8:43 a.m. UTC | #4
On 14/05/2015 02:16, Wanpeng Li wrote:
> >SPTEs are kept around and cached.  The "role" field is used as the hash
> >key; if the role doesn't change, SPTEs are reused, so you have to zap
> >the SPTEs explicitly.
> 
> Btw, why the patch changelog mentioned that the root shadow page will be
> reused, I think it will be zapped in mmu_free_roots. 

Who has set sp->role.invalid on it?  If no one has, it's not zapped and
it will be found again in the hash table.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cde5d61..bbe184f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1852,6 +1852,63 @@  bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 }
 EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
 
+static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	struct mtrr_state_type *mtrr_state = &vcpu->arch.mtrr_state;
+	unsigned char mtrr_enabled = mtrr_state->enabled;
+	gfn_t start, end, mask;
+	int index;
+	bool is_fixed = true;
+
+	if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
+	      !kvm_arch_has_noncoherent_dma(vcpu->kvm))
+		return;
+
+	if (!(mtrr_enabled & 0x2) && msr != MSR_MTRRdefType)
+		return;
+
+	switch (msr) {
+	case MSR_MTRRfix64K_00000:
+		start = 0x0;
+		end = 0x80000;
+		break;
+	case MSR_MTRRfix16K_80000:
+		start = 0x80000;
+		end = 0xa0000;
+		break;
+	case MSR_MTRRfix16K_A0000:
+		start = 0xa0000;
+		end = 0xc0000;
+		break;
+	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
+		index = msr - MSR_MTRRfix4K_C0000;
+		start = 0xc0000 + index * (32 << 10);
+		end = start + (32 << 10);
+		break;
+	case MSR_MTRRdefType:
+		is_fixed = false;
+		start = 0x0;
+		end = ~0ULL;
+		break;
+	default:
+		/* variable range MTRRs. */
+		is_fixed = false;
+		index = (msr - 0x200) / 2;
+		start = (((u64)mtrr_state->var_ranges[index].base_hi) << 32) +
+		       (mtrr_state->var_ranges[index].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state->var_ranges[index].mask_hi) << 32) +
+		       (mtrr_state->var_ranges[index].mask_lo & PAGE_MASK);
+		mask |= ~0ULL << cpuid_maxphyaddr(vcpu);
+
+		end = ((start & mask) | ~mask) + 1;
+	}
+
+	if (is_fixed && !(mtrr_enabled & 0x1))
+		return;
+
+	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
+}
+
 static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
@@ -1885,7 +1942,7 @@  static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		*pt = data;
 	}
 
-	kvm_mmu_reset_context(vcpu);
+	update_mtrr(vcpu, msr);
 	return 0;
 }