[01/11] kvm: x86: Make sync_page() flush remote TLBs once only
diff mbox

Message ID 20180504183720.134909-2-junaids@google.com
State New
Headers show

Commit Message

Junaid Shahid May 4, 2018, 6:37 p.m. UTC
sync_page() calls set_spte() from a loop across a page table. It would
work better if set_spte() left the TLB flushing to its callers, so that
sync_page() can aggregate into a single call.

Signed-off-by: Junaid Shahid <junaids@google.com>
---
 arch/x86/kvm/mmu.c         | 16 ++++++++++++----
 arch/x86/kvm/paging_tmpl.h | 12 ++++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

Comments

David Hildenbrand May 7, 2018, 9:22 a.m. UTC | #1
On 04.05.2018 20:37, Junaid Shahid wrote:
> sync_page() calls set_spte() from a loop across a page table. It would
> work better if set_spte() left the TLB flushing to its callers, so that
> sync_page() can aggregate into a single call.
> 
> Signed-off-by: Junaid Shahid <junaids@google.com>
> ---
>  arch/x86/kvm/mmu.c         | 16 ++++++++++++----
>  arch/x86/kvm/paging_tmpl.h | 12 ++++++++----
>  2 files changed, 20 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 8494dbae41b9..07f517fd6e95 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2725,6 +2725,10 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
>  	return true;
>  }
>  
> +/* Bits which may be returned by set_spte() */
> +#define WRPROT_SHADOW_PT	BIT(0)
> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)

Not sure if I really like returning flags. Especially as the naming does
not suggest that or that these values are somehow linked together.

What about a flag &flush?

> +
>  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		    unsigned pte_access, int level,
>  		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
> @@ -2801,7 +2805,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
>  			pgprintk("%s: found shadow page for %llx, marking ro\n",
>  				 __func__, gfn);
> -			ret = 1;
> +			ret |= WRPROT_SHADOW_PT;
>  			pte_access &= ~ACC_WRITE_MASK;
>  			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
>  		}
> @@ -2817,7 +2821,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>  
>  set_pte:
>  	if (mmu_spte_update(sptep, spte))
> -		kvm_flush_remote_tlbs(vcpu->kvm);
> +		ret |= NEED_FLUSH_REMOTE_TLBS;
>  done:
>  	return ret;
>  }
> @@ -2828,6 +2832,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
>  {
>  	int was_rmapped = 0;
>  	int rmap_count;
> +	int set_spte_ret;
>  	int ret = RET_PF_RETRY;
>  
>  	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
> @@ -2855,12 +2860,15 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
>  			was_rmapped = 1;
>  	}
>  
> -	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
> -	      true, host_writable)) {
> +	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
> +				speculative, true, host_writable);
> +	if (set_spte_ret & WRPROT_SHADOW_PT) {
>  		if (write_fault)
>  			ret = RET_PF_EMULATE;
>  		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
>  	}
> +	if (set_spte_ret & NEED_FLUSH_REMOTE_TLBS)
> +		kvm_flush_remote_tlbs(vcpu->kvm);
>  
>  	if (unlikely(is_mmio_spte(*sptep)))
>  		ret = RET_PF_EMULATE;
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 6288e9d7068e..f176b85767ec 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -968,6 +968,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
>  	int i, nr_present = 0;
>  	bool host_writable;
>  	gpa_t first_pte_gpa;
> +	int set_spte_ret = 0;
>  
>  	/* direct kvm_mmu_page can not be unsync. */
>  	BUG_ON(sp->role.direct);
> @@ -1024,12 +1025,15 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
>  
>  		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
>  
> -		set_spte(vcpu, &sp->spt[i], pte_access,
> -			 PT_PAGE_TABLE_LEVEL, gfn,
> -			 spte_to_pfn(sp->spt[i]), true, false,
> -			 host_writable);
> +		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
> +					 pte_access, PT_PAGE_TABLE_LEVEL,
> +					 gfn, spte_to_pfn(sp->spt[i]),
> +					 true, false, host_writable);
>  	}
>  
> +	if (set_spte_ret & NEED_FLUSH_REMOTE_TLBS)
> +		kvm_flush_remote_tlbs(vcpu->kvm);
> +
>  	return nr_present;
>  }
>  
> 

Logic seems to be fine from what I can tell.
Paolo Bonzini May 7, 2018, 4:19 p.m. UTC | #2
On 07/05/2018 11:22, David Hildenbrand wrote:
>>  
>> +/* Bits which may be returned by set_spte() */
>> +#define WRPROT_SHADOW_PT	BIT(0)
>> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
> Not sure if I really like returning flags. Especially as the naming does
> not suggest that or that these values are somehow linked together.
> 
> What about a flag &flush?
> 

I think the flags are okay (I dislike bool return values in general),
but the naming can be improved, for example SET_SPTE_WRITE_PROTECTED and
SET_SPTE_NEED_REMOTE_FLUSH.

Paolo
Junaid Shahid May 7, 2018, 7:19 p.m. UTC | #3
On 05/07/2018 09:19 AM, Paolo Bonzini wrote:
> On 07/05/2018 11:22, David Hildenbrand wrote:
>>>  
>>> +/* Bits which may be returned by set_spte() */
>>> +#define WRPROT_SHADOW_PT	BIT(0)
>>> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
>> Not sure if I really like returning flags. Especially as the naming does
>> not suggest that or that these values are somehow linked together.
>>
>> What about a flag &flush?
>>
> 
> I think the flags are okay (I dislike bool return values in general),
> but the naming can be improved, for example SET_SPTE_WRITE_PROTECTED and
> SET_SPTE_NEED_REMOTE_FLUSH.
> 
> Paolo
> 

Sure, I'll rename the flags.

Thanks,
Junaid
David Hildenbrand May 7, 2018, 7:40 p.m. UTC | #4
On 07.05.2018 18:19, Paolo Bonzini wrote:
> On 07/05/2018 11:22, David Hildenbrand wrote:
>>>  
>>> +/* Bits which may be returned by set_spte() */
>>> +#define WRPROT_SHADOW_PT	BIT(0)
>>> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
>> Not sure if I really like returning flags. Especially as the naming does
>> not suggest that or that these values are somehow linked together.
>>
>> What about a flag &flush?
>>
> 
> I think the flags are okay (I dislike bool return values in general),
> but the naming can be improved, for example SET_SPTE_WRITE_PROTECTED and
> SET_SPTE_NEED_REMOTE_FLUSH.

The thing I don't like about flags is that you cannot properly handle
error scenarios anymore (return -EWHATEVER). But as there are no errors
scenarios to handle, fine with me :)

> 
> Paolo
>
Paolo Bonzini May 11, 2018, 9:11 a.m. UTC | #5
On 07/05/2018 21:40, David Hildenbrand wrote:
> On 07.05.2018 18:19, Paolo Bonzini wrote:
>> On 07/05/2018 11:22, David Hildenbrand wrote:
>>>>  
>>>> +/* Bits which may be returned by set_spte() */
>>>> +#define WRPROT_SHADOW_PT	BIT(0)
>>>> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
>>> Not sure if I really like returning flags. Especially as the naming does
>>> not suggest that or that these values are somehow linked together.
>>>
>>> What about a flag &flush?
>>>
>>
>> I think the flags are okay (I dislike bool return values in general),
>> but the naming can be improved, for example SET_SPTE_WRITE_PROTECTED and
>> SET_SPTE_NEED_REMOTE_FLUSH.
> 
> The thing I don't like about flags is that you cannot properly handle
> error scenarios anymore (return -EWHATEVER). But as there are no errors
> scenarios to handle, fine with me :)

If the errors are negative and the flags are <64 they can be handled
too, can't they?

Paolo
David Hildenbrand May 11, 2018, 9:59 a.m. UTC | #6
On 11.05.2018 11:11, Paolo Bonzini wrote:
> On 07/05/2018 21:40, David Hildenbrand wrote:
>> On 07.05.2018 18:19, Paolo Bonzini wrote:
>>> On 07/05/2018 11:22, David Hildenbrand wrote:
>>>>>  
>>>>> +/* Bits which may be returned by set_spte() */
>>>>> +#define WRPROT_SHADOW_PT	BIT(0)
>>>>> +#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
>>>> Not sure if I really like returning flags. Especially as the naming does
>>>> not suggest that or that these values are somehow linked together.
>>>>
>>>> What about a flag &flush?
>>>>
>>>
>>> I think the flags are okay (I dislike bool return values in general),
>>> but the naming can be improved, for example SET_SPTE_WRITE_PROTECTED and
>>> SET_SPTE_NEED_REMOTE_FLUSH.
>>
>> The thing I don't like about flags is that you cannot properly handle
>> error scenarios anymore (return -EWHATEVER). But as there are no errors
>> scenarios to handle, fine with me :)
> 
> If the errors are negative and the flags are <64 they can be handled
> too, can't they?

I see what you mean. e.g. -1 -> 0xfffff.. -> all flags set.
But if the caller always checks for <0 it's not a problem. So it's all
about the check for bit 64 in the caller a.k.a < 0

> 
> Paolo
>

Patch
diff mbox

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8494dbae41b9..07f517fd6e95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2725,6 +2725,10 @@  static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
 	return true;
 }
 
+/* Bits which may be returned by set_spte() */
+#define WRPROT_SHADOW_PT	BIT(0)
+#define NEED_FLUSH_REMOTE_TLBS	BIT(1)
+
 static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		    unsigned pte_access, int level,
 		    gfn_t gfn, kvm_pfn_t pfn, bool speculative,
@@ -2801,7 +2805,7 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
 			pgprintk("%s: found shadow page for %llx, marking ro\n",
 				 __func__, gfn);
-			ret = 1;
+			ret |= WRPROT_SHADOW_PT;
 			pte_access &= ~ACC_WRITE_MASK;
 			spte &= ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE);
 		}
@@ -2817,7 +2821,7 @@  static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 set_pte:
 	if (mmu_spte_update(sptep, spte))
-		kvm_flush_remote_tlbs(vcpu->kvm);
+		ret |= NEED_FLUSH_REMOTE_TLBS;
 done:
 	return ret;
 }
@@ -2828,6 +2832,7 @@  static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
 {
 	int was_rmapped = 0;
 	int rmap_count;
+	int set_spte_ret;
 	int ret = RET_PF_RETRY;
 
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
@@ -2855,12 +2860,15 @@  static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
 			was_rmapped = 1;
 	}
 
-	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
-	      true, host_writable)) {
+	set_spte_ret = set_spte(vcpu, sptep, pte_access, level, gfn, pfn,
+				speculative, true, host_writable);
+	if (set_spte_ret & WRPROT_SHADOW_PT) {
 		if (write_fault)
 			ret = RET_PF_EMULATE;
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
+	if (set_spte_ret & NEED_FLUSH_REMOTE_TLBS)
+		kvm_flush_remote_tlbs(vcpu->kvm);
 
 	if (unlikely(is_mmio_spte(*sptep)))
 		ret = RET_PF_EMULATE;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6288e9d7068e..f176b85767ec 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -968,6 +968,7 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	int i, nr_present = 0;
 	bool host_writable;
 	gpa_t first_pte_gpa;
+	int set_spte_ret = 0;
 
 	/* direct kvm_mmu_page can not be unsync. */
 	BUG_ON(sp->role.direct);
@@ -1024,12 +1025,15 @@  static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-		set_spte(vcpu, &sp->spt[i], pte_access,
-			 PT_PAGE_TABLE_LEVEL, gfn,
-			 spte_to_pfn(sp->spt[i]), true, false,
-			 host_writable);
+		set_spte_ret |= set_spte(vcpu, &sp->spt[i],
+					 pte_access, PT_PAGE_TABLE_LEVEL,
+					 gfn, spte_to_pfn(sp->spt[i]),
+					 true, false, host_writable);
 	}
 
+	if (set_spte_ret & NEED_FLUSH_REMOTE_TLBS)
+		kvm_flush_remote_tlbs(vcpu->kvm);
+
 	return nr_present;
 }