diff mbox

[06/10] KVM: x86: MMU: Consolidate WARN_ON/BUG_ON checks for reverse-mapped sptes

Message ID 20151112205343.61fbcc0a911e891b1ddc8f19@lab.ntt.co.jp (mailing list archive)
State New, archived
Headers show

Commit Message

Takuya Yoshikawa Nov. 12, 2015, 11:53 a.m. UTC
At some call sites of rmap_get_first() and rmap_get_next(), BUG_ON is
placed right after the call to detect unrelated sptes which must not be
found in the reverse-mapping list.

Move this check in rmap_get_first/next() so that all call sites, not
just the users of the for_each_rmap_spte() macro, will be checked the
same way.  In addition, change the BUG_ON to WARN_ON since killing the
whole host is the last thing that KVM should try.

One thing to keep in mind is that kvm_mmu_unlink_parents() also uses
rmap_get_first() to handle parent sptes.  The change will not break it
because parent sptes are present, at least until drop_parent_pte()
actually unlinks them, and not mmio-sptes.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
---
 Documentation/virtual/kvm/mmu.txt |  4 ++--
 arch/x86/kvm/mmu.c                | 26 +++++++++++++++++---------
 2 files changed, 19 insertions(+), 11 deletions(-)

Comments

Marcelo Tosatti Nov. 13, 2015, 10:08 p.m. UTC | #1
On Thu, Nov 12, 2015 at 08:53:43PM +0900, Takuya Yoshikawa wrote:
> At some call sites of rmap_get_first() and rmap_get_next(), BUG_ON is
> placed right after the call to detect unrelated sptes which must not be
> found in the reverse-mapping list.
> 
> Move this check in rmap_get_first/next() so that all call sites, not
> just the users of the for_each_rmap_spte() macro, will be checked the
> same way.  In addition, change the BUG_ON to WARN_ON since killing the
> whole host is the last thing that KVM should try.

It should be a BUG_ON, if KVM continues it will corrupt (more) memory.

> One thing to keep in mind is that kvm_mmu_unlink_parents() also uses
> rmap_get_first() to handle parent sptes.  The change will not break it
> because parent sptes are present, at least until drop_parent_pte()
> actually unlinks them, and not mmio-sptes.
> 
> Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
> ---
>  Documentation/virtual/kvm/mmu.txt |  4 ++--
>  arch/x86/kvm/mmu.c                | 26 +++++++++++++++++---------
>  2 files changed, 19 insertions(+), 11 deletions(-)
>
> diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
> index 3a4d681..daf9c0f 100644
> --- a/Documentation/virtual/kvm/mmu.txt
> +++ b/Documentation/virtual/kvm/mmu.txt
> @@ -203,10 +203,10 @@ Shadow pages contain the following information:
>      page cannot be destroyed.  See role.invalid.
>    parent_ptes:
>      The reverse mapping for the pte/ptes pointing at this page's spt. If
> -    parent_ptes bit 0 is zero, only one spte points at this pages and
> +    parent_ptes bit 0 is zero, only one spte points at this page and
>      parent_ptes points at this single spte, otherwise, there exists multiple
>      sptes pointing at this page and (parent_ptes & ~0x1) points at a data
> -    structure with a list of parent_ptes.
> +    structure with a list of parent sptes.
>    unsync:
>      If true, then the translations in this page may not match the guest's
>      translation.  This is equivalent to the state of the tlb when a pte is
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 1691171..ee7b101 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1079,17 +1079,23 @@ struct rmap_iterator {
>   */
>  static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
>  {
> +	u64 *sptep;
> +
>  	if (!rmap)
>  		return NULL;
>  
>  	if (!(rmap & 1)) {
>  		iter->desc = NULL;
> -		return (u64 *)rmap;
> +		sptep = (u64 *)rmap;
> +		goto out;
>  	}
>  
>  	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
>  	iter->pos = 0;
> -	return iter->desc->sptes[iter->pos];
> +	sptep = iter->desc->sptes[iter->pos];
> +out:
> +	WARN_ON(!is_shadow_present_pte(*sptep));
> +	return sptep;
>  }
>  
>  /*
> @@ -1099,14 +1105,14 @@ static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
>   */
>  static u64 *rmap_get_next(struct rmap_iterator *iter)
>  {
> +	u64 *sptep;
> +
>  	if (iter->desc) {
>  		if (iter->pos < PTE_LIST_EXT - 1) {
> -			u64 *sptep;
> -
>  			++iter->pos;
>  			sptep = iter->desc->sptes[iter->pos];
>  			if (sptep)
> -				return sptep;
> +				goto out;
>  		}
>  
>  		iter->desc = iter->desc->more;
> @@ -1114,17 +1120,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
>  		if (iter->desc) {
>  			iter->pos = 0;
>  			/* desc->sptes[0] cannot be NULL */
> -			return iter->desc->sptes[iter->pos];
> +			sptep = iter->desc->sptes[iter->pos];
> +			goto out;
>  		}
>  	}
>  
>  	return NULL;
> +out:
> +	WARN_ON(!is_shadow_present_pte(*sptep));
> +	return sptep;
>  }
>  
>  #define for_each_rmap_spte(_rmap_, _iter_, _spte_)			    \
>  	   for (_spte_ = rmap_get_first(*_rmap_, _iter_);		    \
> -		_spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});  \
> -			_spte_ = rmap_get_next(_iter_))
> +		_spte_; _spte_ = rmap_get_next(_iter_))
>  
>  static void drop_spte(struct kvm *kvm, u64 *sptep)
>  {
> @@ -1338,7 +1347,6 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
>  	bool flush = false;
>  
>  	while ((sptep = rmap_get_first(*rmapp, &iter))) {
> -		BUG_ON(!(*sptep & PT_PRESENT_MASK));
>  		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
>  
>  		drop_spte(kvm, sptep);
> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Takuya Yoshikawa Nov. 16, 2015, 3:34 a.m. UTC | #2
On 2015/11/14 7:08, Marcelo Tosatti wrote:
> On Thu, Nov 12, 2015 at 08:53:43PM +0900, Takuya Yoshikawa wrote:
>> At some call sites of rmap_get_first() and rmap_get_next(), BUG_ON is
>> placed right after the call to detect unrelated sptes which must not be
>> found in the reverse-mapping list.
>>
>> Move this check in rmap_get_first/next() so that all call sites, not
>> just the users of the for_each_rmap_spte() macro, will be checked the
>> same way.  In addition, change the BUG_ON to WARN_ON since killing the
>> whole host is the last thing that KVM should try.
>
> It should be a BUG_ON, if KVM continues it will corrupt (more) memory.

In the sense that we cannot predict what kind of corruption it will
cause, I agree with you.

But if it can only corrupt that guest's memory, it is a bit sad to
kill unrelated guests, and host, too.  Anyway, since we cannot say
for sure what a possible bug can cause, I agree with you now.

Thanks,
   Takuya

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 3a4d681..daf9c0f 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -203,10 +203,10 @@  Shadow pages contain the following information:
     page cannot be destroyed.  See role.invalid.
   parent_ptes:
     The reverse mapping for the pte/ptes pointing at this page's spt. If
-    parent_ptes bit 0 is zero, only one spte points at this pages and
+    parent_ptes bit 0 is zero, only one spte points at this page and
     parent_ptes points at this single spte, otherwise, there exists multiple
     sptes pointing at this page and (parent_ptes & ~0x1) points at a data
-    structure with a list of parent_ptes.
+    structure with a list of parent sptes.
   unsync:
     If true, then the translations in this page may not match the guest's
     translation.  This is equivalent to the state of the tlb when a pte is
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 1691171..ee7b101 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1079,17 +1079,23 @@  struct rmap_iterator {
  */
 static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
 {
+	u64 *sptep;
+
 	if (!rmap)
 		return NULL;
 
 	if (!(rmap & 1)) {
 		iter->desc = NULL;
-		return (u64 *)rmap;
+		sptep = (u64 *)rmap;
+		goto out;
 	}
 
 	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
 	iter->pos = 0;
-	return iter->desc->sptes[iter->pos];
+	sptep = iter->desc->sptes[iter->pos];
+out:
+	WARN_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 
 /*
@@ -1099,14 +1105,14 @@  static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
  */
 static u64 *rmap_get_next(struct rmap_iterator *iter)
 {
+	u64 *sptep;
+
 	if (iter->desc) {
 		if (iter->pos < PTE_LIST_EXT - 1) {
-			u64 *sptep;
-
 			++iter->pos;
 			sptep = iter->desc->sptes[iter->pos];
 			if (sptep)
-				return sptep;
+				goto out;
 		}
 
 		iter->desc = iter->desc->more;
@@ -1114,17 +1120,20 @@  static u64 *rmap_get_next(struct rmap_iterator *iter)
 		if (iter->desc) {
 			iter->pos = 0;
 			/* desc->sptes[0] cannot be NULL */
-			return iter->desc->sptes[iter->pos];
+			sptep = iter->desc->sptes[iter->pos];
+			goto out;
 		}
 	}
 
 	return NULL;
+out:
+	WARN_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 
 #define for_each_rmap_spte(_rmap_, _iter_, _spte_)			    \
 	   for (_spte_ = rmap_get_first(*_rmap_, _iter_);		    \
-		_spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});  \
-			_spte_ = rmap_get_next(_iter_))
+		_spte_; _spte_ = rmap_get_next(_iter_))
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
@@ -1338,7 +1347,6 @@  static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	bool flush = false;
 
 	while ((sptep = rmap_get_first(*rmapp, &iter))) {
-		BUG_ON(!(*sptep & PT_PRESENT_MASK));
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
 		drop_spte(kvm, sptep);