diff mbox

[v2,2/3] KVM: MMU: fix SMAP virtualization

Message ID 5550ABE5.3010503@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong May 11, 2015, 1:17 p.m. UTC
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Date: Mon, 11 May 2015 21:09:15 +0800
Subject: [PATCH] KVM: MMU: fix SMAP virtualization

KVM may turn a user page to a kernel page when kernel writes a readonly
user page if CR0.WP = 1. This shadow page entry will be reused after
SMAP is enabled so that kernel is allowed to access this user page

Fix it by setting SMAP && !CR0.WP into shadow page's role and reset mmu
once CR4.SMAP is updated

Changelog in v2:
- rebase the patch against the commit 31fd9880a1c5 (Paolo Bonzini:
KVM: MMU: fix CR4.SMEP=1, CR0.WP=0 with shadow pages)
- do not prefetch the spte if it does not match smap_andnot_wp with
   current vcpu

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
  arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/mmu.c              | 16 ++++++++++++----
  arch/x86/kvm/mmu.h              |  2 --
  arch/x86/kvm/x86.c              |  8 +++-----
  4 files changed, 16 insertions(+), 11 deletions(-)

Comments

Paolo Bonzini May 11, 2015, 1:41 p.m. UTC | #1
On 11/05/2015 15:17, Xiao Guangrong wrote:
> 
> From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> Date: Mon, 11 May 2015 21:09:15 +0800
> Subject: [PATCH] KVM: MMU: fix SMAP virtualization
> 
> KVM may turn a user page to a kernel page when kernel writes a readonly
> user page if CR0.WP = 1. This shadow page entry will be reused after
> SMAP is enabled so that kernel is allowed to access this user page
> 
> Fix it by setting SMAP && !CR0.WP into shadow page's role and reset mmu
> once CR4.SMAP is updated
> 
> Changelog in v2:
> - rebase the patch against the commit 31fd9880a1c5 (Paolo Bonzini:
> KVM: MMU: fix CR4.SMEP=1, CR0.WP=0 with shadow pages)
> - do not prefetch the spte if it does not match smap_andnot_wp with
>   current vcpu
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>

Sorry, whitespace is mangled in the patch.

Paolo

> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu.c              | 16 ++++++++++++----
>  arch/x86/kvm/mmu.h              |  2 --
>  arch/x86/kvm/x86.c              |  8 +++-----
>  4 files changed, 16 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 8b661d1..bbb8f4e 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -207,6 +207,7 @@ union kvm_mmu_page_role {
>          unsigned nxe:1;
>          unsigned cr0_wp:1;
>          unsigned smep_andnot_wp:1;
> +        unsigned smap_andnot_wp:1;
>      };
>  };
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 3711095..4058a6b 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3737,8 +3737,8 @@ static void reset_rsvds_bits_mask_ept(struct
> kvm_vcpu *vcpu,
>      }
>  }
> 
> -void update_permission_bitmask(struct kvm_vcpu *vcpu,
> -        struct kvm_mmu *mmu, bool ept)
> +static void update_permission_bitmask(struct kvm_vcpu *vcpu,
> +                      struct kvm_mmu *mmu, bool ept)
>  {
>      unsigned bit, byte, pfec;
>      u8 map;
> @@ -3919,6 +3919,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
>  {
>      bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
> +    bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
>      struct kvm_mmu *context = &vcpu->arch.mmu;
> 
>      MMU_WARN_ON(VALID_PAGE(context->root_hpa));
> @@ -3937,6 +3938,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
>      context->base_role.cr0_wp  = is_write_protection(vcpu);
>      context->base_role.smep_andnot_wp
>          = smep && !is_write_protection(vcpu);
> +    context->base_role.smap_andnot_wp
> +        = smap && !is_write_protection(vcpu);
>  }
>  EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
> 
> @@ -4208,12 +4211,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
> gpa_t gpa,
>                 const u8 *new, int bytes)
>  {
>      gfn_t gfn = gpa >> PAGE_SHIFT;
> -    union kvm_mmu_page_role mask = { .word = 0 };
>      struct kvm_mmu_page *sp;
>      LIST_HEAD(invalid_list);
>      u64 entry, gentry, *spte;
>      int npte;
>      bool remote_flush, local_flush, zap_page;
> +    union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
> +        .cr0_wp = 1,
> +        .cr4_pae = 1,
> +        .nxe = 1,
> +        .smep_andnot_wp = 1,
> +        .smap_andnot_wp    = 1,
> +    };
> 
>      /*
>       * If we don't have indirect shadow pages, it means no page is
> @@ -4239,7 +4248,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
> gpa_t gpa,
>      ++vcpu->kvm->stat.mmu_pte_write;
>      kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
> 
> -    mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1;
>      for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
>          if (detect_write_misaligned(sp, gpa, bytes) ||
>                detect_write_flooding(sp)) {
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 06eb2fc..0ada65e 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -71,8 +71,6 @@ enum {
>  int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool
> direct);
>  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
>  void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
> -void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> -        bool ept);
> 
>  static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
>  {
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index cdccbe1..cde5d61 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
>  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
>  {
>      unsigned long old_cr4 = kvm_read_cr4(vcpu);
> -    unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
> -                   X86_CR4_PAE | X86_CR4_SMEP;
> +    unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
> +                   X86_CR4_SMEP | X86_CR4_SMAP;
> +
>      if (cr4 & CR4_RESERVED_BITS)
>          return 1;
> 
> @@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long
> cr4)
>          (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
>          kvm_mmu_reset_context(vcpu);
> 
> -    if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
> -        update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
> -
>      if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
>          kvm_update_cpuid(vcpu);
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8b661d1..bbb8f4e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -207,6 +207,7 @@  union kvm_mmu_page_role {
  		unsigned nxe:1;
  		unsigned cr0_wp:1;
  		unsigned smep_andnot_wp:1;
+		unsigned smap_andnot_wp:1;
  	};
  };

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3711095..4058a6b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3737,8 +3737,8 @@  static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
  	}
  }

-void update_permission_bitmask(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+				      struct kvm_mmu *mmu, bool ept)
  {
  	unsigned bit, byte, pfec;
  	u8 map;
@@ -3919,6 +3919,7 @@  static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
  {
  	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
  	struct kvm_mmu *context = &vcpu->arch.mmu;

  	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3937,6 +3938,8 @@  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
  	context->base_role.cr0_wp  = is_write_protection(vcpu);
  	context->base_role.smep_andnot_wp
  		= smep && !is_write_protection(vcpu);
+	context->base_role.smap_andnot_wp
+		= smap && !is_write_protection(vcpu);
  }
  EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);

@@ -4208,12 +4211,18 @@  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  		       const u8 *new, int bytes)
  {
  	gfn_t gfn = gpa >> PAGE_SHIFT;
-	union kvm_mmu_page_role mask = { .word = 0 };
  	struct kvm_mmu_page *sp;
  	LIST_HEAD(invalid_list);
  	u64 entry, gentry, *spte;
  	int npte;
  	bool remote_flush, local_flush, zap_page;
+	union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
+		.cr0_wp = 1,
+		.cr4_pae = 1,
+		.nxe = 1,
+		.smep_andnot_wp = 1,
+		.smap_andnot_wp	= 1,
+	};

  	/*
  	 * If we don't have indirect shadow pages, it means no page is
@@ -4239,7 +4248,6 @@  void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
  	++vcpu->kvm->stat.mmu_pte_write;
  	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);

-	mask.cr0_wp = mask.cr4_pae = mask.nxe = mask.smep_andnot_wp = 1;
  	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
  		if (detect_write_misaligned(sp, gpa, bytes) ||
  		      detect_write_flooding(sp)) {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 06eb2fc..0ada65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -71,8 +71,6 @@  enum {
  int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
  void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
  void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-		bool ept);

  static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
  {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cdccbe1..cde5d61 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -702,8 +702,9 @@  EXPORT_SYMBOL_GPL(kvm_set_xcr);
  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
  	unsigned long old_cr4 = kvm_read_cr4(vcpu);
-	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
-				   X86_CR4_PAE | X86_CR4_SMEP;
+	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+				   X86_CR4_SMEP | X86_CR4_SMAP;
+
  	if (cr4 & CR4_RESERVED_BITS)
  		return 1;

@@ -744,9 +745,6 @@  int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
  		kvm_mmu_reset_context(vcpu);

-	if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
-		update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
  	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
  		kvm_update_cpuid(vcpu);