[08/11] KVM: MMU: use page track for non-leaf shadow pages
diff mbox

Message ID 1448907973-36066-9-git-send-email-guangrong.xiao@linux.intel.com
State New
Headers show

Commit Message

Xiao Guangrong Nov. 30, 2015, 6:26 p.m. UTC
non-leaf shadow pages are always write protected, it can be the user
of page track

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 arch/x86/include/asm/kvm_page_track.h |  8 +++++
 arch/x86/kvm/mmu.c                    | 26 +++++++++++++---
 arch/x86/kvm/page_track.c             | 58 +++++++++++++++++++++++------------
 3 files changed, 67 insertions(+), 25 deletions(-)

Comments

Kai Huang Dec. 15, 2015, 7:52 a.m. UTC | #1
On 12/01/2015 02:26 AM, Xiao Guangrong wrote:
> non-leaf shadow pages are always write protected, it can be the user
> of page track
>
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>   arch/x86/include/asm/kvm_page_track.h |  8 +++++
>   arch/x86/kvm/mmu.c                    | 26 +++++++++++++---
>   arch/x86/kvm/page_track.c             | 58 +++++++++++++++++++++++------------
>   3 files changed, 67 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
> index 6744234..3447dac 100644
> --- a/arch/x86/include/asm/kvm_page_track.h
> +++ b/arch/x86/include/asm/kvm_page_track.h
> @@ -41,8 +41,16 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
>   void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
>   				 struct kvm_memory_slot *dont);
>   
> +void
> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
> +				    struct kvm_memory_slot *slot, gfn_t gfn,
> +				    enum kvm_page_track_mode mode);
>   void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn,
>   			     enum kvm_page_track_mode mode);
> +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
> +					    struct kvm_memory_slot *slot,
> +					    gfn_t gfn,
> +					    enum kvm_page_track_mode mode);
>   void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
>   				enum kvm_page_track_mode mode);
>   bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn,
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index b23f9fc..5a2ca73 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -806,11 +806,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>   	struct kvm_memory_slot *slot;
>   	gfn_t gfn;
>   
> +	kvm->arch.indirect_shadow_pages++;
>   	gfn = sp->gfn;
>   	slots = kvm_memslots_for_spte_role(kvm, sp->role);
>   	slot = __gfn_to_memslot(slots, gfn);
> +
> +	/* the non-leaf shadow pages are keeping readonly. */
> +	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
> +		return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn,
> +							KVM_PAGE_TRACK_WRITE);
> +
>   	kvm_mmu_gfn_disallow_lpage(slot, gfn);
> -	kvm->arch.indirect_shadow_pages++;
>   }
>   
>   static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
> @@ -819,11 +825,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
>   	struct kvm_memory_slot *slot;
>   	gfn_t gfn;
>   
> +	kvm->arch.indirect_shadow_pages--;
>   	gfn = sp->gfn;
>   	slots = kvm_memslots_for_spte_role(kvm, sp->role);
>   	slot = __gfn_to_memslot(slots, gfn);
> +	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
> +		return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn,
> +							KVM_PAGE_TRACK_WRITE);
> +
>   	kvm_mmu_gfn_allow_lpage(slot, gfn);
> -	kvm->arch.indirect_shadow_pages--;
>   }
>   
>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>   	hlist_add_head(&sp->hash_link,
>   		&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>   	if (!direct) {
> -		if (rmap_write_protect(vcpu, gfn))
> +		/*
> +		 * we should do write protection before syncing pages
> +		 * otherwise the content of the synced shadow page may
> +		 * be inconsistent with guest page table.
> +		 */
> +		account_shadowed(vcpu->kvm, sp);
> +
> +		if (level == PT_PAGE_TABLE_LEVEL &&
> +		      rmap_write_protect(vcpu, gfn))
>   			kvm_flush_remote_tlbs(vcpu->kvm);
I think your modification is good but I am little bit confused here. In 
account_shadowed, if sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn 
is write protected, and this is reasonable. So why write protecting the 
gfn of PT_PAGE_TABLE_LEVEL here?

>   		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
>   			kvm_sync_pages(vcpu, gfn);
> -
> -		account_shadowed(vcpu->kvm, sp);
>   	}
>   	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
>   	init_shadow_page_table(sp);
> diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
> index 84420df..87554d3 100644
> --- a/arch/x86/kvm/page_track.c
> +++ b/arch/x86/kvm/page_track.c
> @@ -77,6 +77,26 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
>   	WARN_ON(val < 0);
>   }
>   
> +void
> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
> +				    struct kvm_memory_slot *slot, gfn_t gfn,
> +				    enum kvm_page_track_mode mode)
> +{
> +	WARN_ON(!check_mode(mode));
> +
> +	update_gfn_track(slot, gfn, mode, 1);
> +
> +	/*
> +	 * new track stops large page mapping for the
> +	 * tracked page.
> +	 */
> +	kvm_mmu_gfn_disallow_lpage(slot, gfn);
> +
> +	if (mode == KVM_PAGE_TRACK_WRITE)
> +		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
> +			kvm_flush_remote_tlbs(kvm);
> +}
> +
>   /*
>    * add guest page to the tracking pool so that corresponding access on that
>    * page will be intercepted.
> @@ -101,21 +121,27 @@ void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn,
>   		slot = __gfn_to_memslot(slots, gfn);
>   
>   		spin_lock(&kvm->mmu_lock);
> -		update_gfn_track(slot, gfn, mode, 1);
> -
> -		/*
> -		 * new track stops large page mapping for the
> -		 * tracked page.
> -		 */
> -		kvm_mmu_gfn_disallow_lpage(slot, gfn);
> -
> -		if (mode == KVM_PAGE_TRACK_WRITE)
> -			if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
> -				kvm_flush_remote_tlbs(kvm);
> +		kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode);
>   		spin_unlock(&kvm->mmu_lock);
>   	}
>   }
>   
> +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
> +					    struct kvm_memory_slot *slot,
> +					    gfn_t gfn,
> +					    enum kvm_page_track_mode mode)
> +{
> +	WARN_ON(!check_mode(mode));
> +
> +	update_gfn_track(slot, gfn, mode, -1);
> +
> +	/*
> +	 * allow large page mapping for the tracked page
> +	 * after the tracker is gone.
> +	 */
> +	kvm_mmu_gfn_allow_lpage(slot, gfn);
> +}
> +
>   /*
>    * remove the guest page from the tracking pool which stops the interception
>    * of corresponding access on that page. It is the opposed operation of
> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
>   	struct kvm_memory_slot *slot;
>   	int i;
>   
> -	WARN_ON(!check_mode(mode));
> -
>   	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>   		slots = __kvm_memslots(kvm, i);
>   		slot = __gfn_to_memslot(slots, gfn);
>   
>   		spin_lock(&kvm->mmu_lock);
> -		update_gfn_track(slot, gfn, mode, -1);
> -
> -		/*
> -		 * allow large page mapping for the tracked page
> -		 * after the tracker is gone.
> -		 */
> -		kvm_mmu_gfn_allow_lpage(slot, gfn);
> +		kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode);
Looks you need to merge this part with patch 1, as you are modifying 
kvm_page_track_{add,remove}_page here, which are introduced in your patch 1.

Thanks,
-Kai
>   		spin_unlock(&kvm->mmu_lock);
>   	}
>   }

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kai Huang Dec. 15, 2015, 7:59 a.m. UTC | #2
On 12/15/2015 03:52 PM, Kai Huang wrote:
>
>
> On 12/01/2015 02:26 AM, Xiao Guangrong wrote:
>> non-leaf shadow pages are always write protected, it can be the user
>> of page track
>>
>> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
>> ---
>>   arch/x86/include/asm/kvm_page_track.h |  8 +++++
>>   arch/x86/kvm/mmu.c                    | 26 +++++++++++++---
>>   arch/x86/kvm/page_track.c             | 58 
>> +++++++++++++++++++++++------------
>>   3 files changed, 67 insertions(+), 25 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_page_track.h 
>> b/arch/x86/include/asm/kvm_page_track.h
>> index 6744234..3447dac 100644
>> --- a/arch/x86/include/asm/kvm_page_track.h
>> +++ b/arch/x86/include/asm/kvm_page_track.h
>> @@ -41,8 +41,16 @@ int kvm_page_track_create_memslot(struct 
>> kvm_memory_slot *slot,
>>   void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
>>                    struct kvm_memory_slot *dont);
>>   +void
>> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
>> +                    struct kvm_memory_slot *slot, gfn_t gfn,
>> +                    enum kvm_page_track_mode mode);
>>   void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn,
>>                    enum kvm_page_track_mode mode);
>> +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
>> +                        struct kvm_memory_slot *slot,
>> +                        gfn_t gfn,
>> +                        enum kvm_page_track_mode mode);
>>   void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
>>                   enum kvm_page_track_mode mode);
>>   bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn,
>> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
>> index b23f9fc..5a2ca73 100644
>> --- a/arch/x86/kvm/mmu.c
>> +++ b/arch/x86/kvm/mmu.c
>> @@ -806,11 +806,17 @@ static void account_shadowed(struct kvm *kvm, 
>> struct kvm_mmu_page *sp)
>>       struct kvm_memory_slot *slot;
>>       gfn_t gfn;
>>   +    kvm->arch.indirect_shadow_pages++;
>>       gfn = sp->gfn;
>>       slots = kvm_memslots_for_spte_role(kvm, sp->role);
>>       slot = __gfn_to_memslot(slots, gfn);
>> +
>> +    /* the non-leaf shadow pages are keeping readonly. */
>> +    if (sp->role.level > PT_PAGE_TABLE_LEVEL)
>> +        return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn,
>> +                            KVM_PAGE_TRACK_WRITE);
>> +
>>       kvm_mmu_gfn_disallow_lpage(slot, gfn);
>> -    kvm->arch.indirect_shadow_pages++;
>>   }
>>     static void unaccount_shadowed(struct kvm *kvm, struct 
>> kvm_mmu_page *sp)
>> @@ -819,11 +825,15 @@ static void unaccount_shadowed(struct kvm *kvm, 
>> struct kvm_mmu_page *sp)
>>       struct kvm_memory_slot *slot;
>>       gfn_t gfn;
>>   +    kvm->arch.indirect_shadow_pages--;
>>       gfn = sp->gfn;
>>       slots = kvm_memslots_for_spte_role(kvm, sp->role);
>>       slot = __gfn_to_memslot(slots, gfn);
>> +    if (sp->role.level > PT_PAGE_TABLE_LEVEL)
>> +        return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn,
>> +                            KVM_PAGE_TRACK_WRITE);
>> +
>>       kvm_mmu_gfn_allow_lpage(slot, gfn);
>> -    kvm->arch.indirect_shadow_pages--;
>>   }
>>     static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page 
>> *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>       hlist_add_head(&sp->hash_link,
>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>       if (!direct) {
>> -        if (rmap_write_protect(vcpu, gfn))
>> +        /*
>> +         * we should do write protection before syncing pages
>> +         * otherwise the content of the synced shadow page may
>> +         * be inconsistent with guest page table.
>> +         */
>> +        account_shadowed(vcpu->kvm, sp);
>> +
>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>> +              rmap_write_protect(vcpu, gfn))
>>               kvm_flush_remote_tlbs(vcpu->kvm);
> I think your modification is good but I am little bit confused here. 
> In account_shadowed, if sp->role.level > PT_PAGE_TABLE_LEVEL, the 
> sp->gfn is write protected, and this is reasonable. So why write 
> protecting the gfn of PT_PAGE_TABLE_LEVEL here?
>
>>           if (level > PT_PAGE_TABLE_LEVEL && need_sync)
>>               kvm_sync_pages(vcpu, gfn);
>> -
>> -        account_shadowed(vcpu->kvm, sp);
>>       }
>>       sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
>>       init_shadow_page_table(sp);
>> diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
>> index 84420df..87554d3 100644
>> --- a/arch/x86/kvm/page_track.c
>> +++ b/arch/x86/kvm/page_track.c
>> @@ -77,6 +77,26 @@ static void update_gfn_track(struct 
>> kvm_memory_slot *slot, gfn_t gfn,
>>       WARN_ON(val < 0);
>>   }
>>   +void
>> +kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
>> +                    struct kvm_memory_slot *slot, gfn_t gfn,
>> +                    enum kvm_page_track_mode mode)
>> +{
>> +    WARN_ON(!check_mode(mode));
>> +
>> +    update_gfn_track(slot, gfn, mode, 1);
>> +
>> +    /*
>> +     * new track stops large page mapping for the
>> +     * tracked page.
>> +     */
>> +    kvm_mmu_gfn_disallow_lpage(slot, gfn);
>> +
>> +    if (mode == KVM_PAGE_TRACK_WRITE)
>> +        if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
>> +            kvm_flush_remote_tlbs(kvm);
>> +}
>> +
>>   /*
>>    * add guest page to the tracking pool so that corresponding access 
>> on that
>>    * page will be intercepted.
>> @@ -101,21 +121,27 @@ void kvm_page_track_add_page(struct kvm *kvm, 
>> gfn_t gfn,
>>           slot = __gfn_to_memslot(slots, gfn);
>>             spin_lock(&kvm->mmu_lock);
>> -        update_gfn_track(slot, gfn, mode, 1);
>> -
>> -        /*
>> -         * new track stops large page mapping for the
>> -         * tracked page.
>> -         */
>> -        kvm_mmu_gfn_disallow_lpage(slot, gfn);
>> -
>> -        if (mode == KVM_PAGE_TRACK_WRITE)
>> -            if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
>> -                kvm_flush_remote_tlbs(kvm);
>> +        kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode);
>>           spin_unlock(&kvm->mmu_lock);
>>       }
>>   }
>>   +void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
>> +                        struct kvm_memory_slot *slot,
>> +                        gfn_t gfn,
>> +                        enum kvm_page_track_mode mode)
>> +{
>> +    WARN_ON(!check_mode(mode));
>> +
>> +    update_gfn_track(slot, gfn, mode, -1);
>> +
>> +    /*
>> +     * allow large page mapping for the tracked page
>> +     * after the tracker is gone.
>> +     */
>> +    kvm_mmu_gfn_allow_lpage(slot, gfn);
>> +}
>> +
>>   /*
>>    * remove the guest page from the tracking pool which stops the 
>> interception
>>    * of corresponding access on that page. It is the opposed 
>> operation of
>> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm 
>> *kvm, gfn_t gfn,
>>       struct kvm_memory_slot *slot;
>>       int i;
>>   -    WARN_ON(!check_mode(mode));
>> -
>>       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>>           slots = __kvm_memslots(kvm, i);
>>           slot = __gfn_to_memslot(slots, gfn);
>>             spin_lock(&kvm->mmu_lock);
>> -        update_gfn_track(slot, gfn, mode, -1);
>> -
>> -        /*
>> -         * allow large page mapping for the tracked page
>> -         * after the tracker is gone.
>> -         */
>> -        kvm_mmu_gfn_allow_lpage(slot, gfn);
>> +        kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode);
> Looks you need to merge this part with patch 1, as you are modifying 
> kvm_page_track_{add,remove}_page here, which are introduced in your 
> patch 1.
Should be patch 5. sorry again.

Thanks,
-Kai
>
> Thanks,
> -Kai
>> spin_unlock(&kvm->mmu_lock);
>>       }
>>   }
>
> -- 
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Dec. 15, 2015, 9:10 a.m. UTC | #3
On 12/15/2015 03:52 PM, Kai Huang wrote:

>>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>       hlist_add_head(&sp->hash_link,
>>           &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>       if (!direct) {
>> -        if (rmap_write_protect(vcpu, gfn))
>> +        /*
>> +         * we should do write protection before syncing pages
>> +         * otherwise the content of the synced shadow page may
>> +         * be inconsistent with guest page table.
>> +         */
>> +        account_shadowed(vcpu->kvm, sp);
>> +
>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>> +              rmap_write_protect(vcpu, gfn))
>>               kvm_flush_remote_tlbs(vcpu->kvm);
> I think your modification is good but I am little bit confused here. In account_shadowed, if
> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. So why
> write protecting the gfn of PT_PAGE_TABLE_LEVEL here?

Because the shadow page will become 'sync' that means the shadow page will be synced
with the page table in guest. So the shadow page need to be write-protected to avoid
the guest page table is changed when we do the 'sync' thing.

The shadow page need to be write-protected to avoid that guest page table is changed
when we are syncing the shadow page table. See kvm_sync_pages() after doing
rmap_write_protect().

>>   /*
>>    * remove the guest page from the tracking pool which stops the interception
>>    * of corresponding access on that page. It is the opposed operation of
>> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
>>       struct kvm_memory_slot *slot;
>>       int i;
>> -    WARN_ON(!check_mode(mode));
>> -
>>       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>>           slots = __kvm_memslots(kvm, i);
>>           slot = __gfn_to_memslot(slots, gfn);
>>           spin_lock(&kvm->mmu_lock);
>> -        update_gfn_track(slot, gfn, mode, -1);
>> -
>> -        /*
>> -         * allow large page mapping for the tracked page
>> -         * after the tracker is gone.
>> -         */
>> -        kvm_mmu_gfn_allow_lpage(slot, gfn);
>> +        kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode);
> Looks you need to merge this part with patch 1, as you are modifying
> kvm_page_track_{add,remove}_page here, which are introduced in your patch 1.

Indeed, it is better.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kai Huang Dec. 16, 2015, 7:51 a.m. UTC | #4
On 12/15/2015 05:10 PM, Xiao Guangrong wrote:
>
>
> On 12/15/2015 03:52 PM, Kai Huang wrote:
>
>>>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page 
>>> *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>>       hlist_add_head(&sp->hash_link,
>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>>       if (!direct) {
>>> -        if (rmap_write_protect(vcpu, gfn))
>>> +        /*
>>> +         * we should do write protection before syncing pages
>>> +         * otherwise the content of the synced shadow page may
>>> +         * be inconsistent with guest page table.
>>> +         */
>>> +        account_shadowed(vcpu->kvm, sp);
>>> +
>>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>>> +              rmap_write_protect(vcpu, gfn))
>>>               kvm_flush_remote_tlbs(vcpu->kvm);
>> I think your modification is good but I am little bit confused here. 
>> In account_shadowed, if
>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, 
>> and this is reasonable. So why
>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here?
>
> Because the shadow page will become 'sync' that means the shadow page 
> will be synced
> with the page table in guest. So the shadow page need to be 
> write-protected to avoid
> the guest page table is changed when we do the 'sync' thing.
>
> The shadow page need to be write-protected to avoid that guest page 
> table is changed
> when we are syncing the shadow page table. See kvm_sync_pages() after 
> doing
> rmap_write_protect().
I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why 
this cannot be done in account_shadowed, as you did for upper level sp? 
Actually I am thinking whether account_shadowed is overdoing things. 
 From it's name it should only *account* shadow sp, but now it also does 
write protection and disable large page mapping.

Thanks,
-Kai
>
>>>   /*
>>>    * remove the guest page from the tracking pool which stops the 
>>> interception
>>>    * of corresponding access on that page. It is the opposed 
>>> operation of
>>> @@ -134,20 +160,12 @@ void kvm_page_track_remove_page(struct kvm 
>>> *kvm, gfn_t gfn,
>>>       struct kvm_memory_slot *slot;
>>>       int i;
>>> -    WARN_ON(!check_mode(mode));
>>> -
>>>       for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
>>>           slots = __kvm_memslots(kvm, i);
>>>           slot = __gfn_to_memslot(slots, gfn);
>>>           spin_lock(&kvm->mmu_lock);
>>> -        update_gfn_track(slot, gfn, mode, -1);
>>> -
>>> -        /*
>>> -         * allow large page mapping for the tracked page
>>> -         * after the tracker is gone.
>>> -         */
>>> -        kvm_mmu_gfn_allow_lpage(slot, gfn);
>>> +        kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode);
>> Looks you need to merge this part with patch 1, as you are modifying
>> kvm_page_track_{add,remove}_page here, which are introduced in your 
>> patch 1.
>
> Indeed, it is better.
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Dec. 16, 2015, 8:39 a.m. UTC | #5
On 12/16/2015 03:51 PM, Kai Huang wrote:
>
>
> On 12/15/2015 05:10 PM, Xiao Guangrong wrote:
>>
>>
>> On 12/15/2015 03:52 PM, Kai Huang wrote:
>>
>>>>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>>>       hlist_add_head(&sp->hash_link,
>>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>>>       if (!direct) {
>>>> -        if (rmap_write_protect(vcpu, gfn))
>>>> +        /*
>>>> +         * we should do write protection before syncing pages
>>>> +         * otherwise the content of the synced shadow page may
>>>> +         * be inconsistent with guest page table.
>>>> +         */
>>>> +        account_shadowed(vcpu->kvm, sp);
>>>> +
>>>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>>>> +              rmap_write_protect(vcpu, gfn))
>>>>               kvm_flush_remote_tlbs(vcpu->kvm);
>>> I think your modification is good but I am little bit confused here. In account_shadowed, if
>>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable. So why
>>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here?
>>
>> Because the shadow page will become 'sync' that means the shadow page will be synced
>> with the page table in guest. So the shadow page need to be write-protected to avoid
>> the guest page table is changed when we do the 'sync' thing.
>>
>> The shadow page need to be write-protected to avoid that guest page table is changed
>> when we are syncing the shadow page table. See kvm_sync_pages() after doing
>> rmap_write_protect().
> I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why this cannot be done in
> account_shadowed, as you did for upper level sp?

non-leaf shadow pages are keepking write-protected which page fault handler can not fix write
access on it. And leaf shadow pages are not.

> Actually I am thinking whether account_shadowed is
> overdoing things. From it's name it should only *account* shadow sp, but now it also does write
> protection and disable large page mapping.
>

Hmm.. disable large page mapping is already in current code... i think account_shadowed() can
be understood as new page is taken into account, so protection things are needed there.

But I am not good at naming function and also my english is not good enough, any other better name
is welcome. ;)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kai Huang Dec. 17, 2015, 2:44 a.m. UTC | #6
On 12/16/2015 04:39 PM, Xiao Guangrong wrote:
>
>
> On 12/16/2015 03:51 PM, Kai Huang wrote:
>>
>>
>> On 12/15/2015 05:10 PM, Xiao Guangrong wrote:
>>>
>>>
>>> On 12/15/2015 03:52 PM, Kai Huang wrote:
>>>
>>>>>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>>>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page 
>>>>> *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>>>>       hlist_add_head(&sp->hash_link,
>>>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>>>>       if (!direct) {
>>>>> -        if (rmap_write_protect(vcpu, gfn))
>>>>> +        /*
>>>>> +         * we should do write protection before syncing pages
>>>>> +         * otherwise the content of the synced shadow page may
>>>>> +         * be inconsistent with guest page table.
>>>>> +         */
>>>>> +        account_shadowed(vcpu->kvm, sp);
>>>>> +
>>>>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>>>>> +              rmap_write_protect(vcpu, gfn))
>>>>>               kvm_flush_remote_tlbs(vcpu->kvm);
>>>> I think your modification is good but I am little bit confused 
>>>> here. In account_shadowed, if
>>>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write 
>>>> protected, and this is reasonable. So why
>>>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here?
>>>
>>> Because the shadow page will become 'sync' that means the shadow 
>>> page will be synced
>>> with the page table in guest. So the shadow page need to be 
>>> write-protected to avoid
>>> the guest page table is changed when we do the 'sync' thing.
>>>
>>> The shadow page need to be write-protected to avoid that guest page 
>>> table is changed
>>> when we are syncing the shadow page table. See kvm_sync_pages() 
>>> after doing
>>> rmap_write_protect().
>> I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? 
>> why this cannot be done in
>> account_shadowed, as you did for upper level sp?
>
> non-leaf shadow pages are keepking write-protected which page fault 
> handler can not fix write
> access on it. And leaf shadow pages are not.
My point is the original code didn't separate the two cases so I am not 
sure why you need to separate. Perhaps you want to make account_shadowed 
imply the non-leaf guest page table is write-protected while leaf page 
table is not.

Thanks,
-Kai
>> Actually I am thinking whether account_shadowed is
>> overdoing things. From it's name it should only *account* shadow sp, 
>> but now it also does write
>> protection and disable large page mapping.
>>
>
> Hmm.. disable large page mapping is already in current code... i think 
> account_shadowed() can
> be understood as new page is taken into account, so protection things 
> are needed there.
>
> But I am not good at naming function and also my english is not good 
> enough, any other better name
> is welcome. ;)
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Dec. 17, 2015, 4:07 a.m. UTC | #7
On 12/17/2015 10:44 AM, Kai Huang wrote:
>
>
> On 12/16/2015 04:39 PM, Xiao Guangrong wrote:
>>
>>
>> On 12/16/2015 03:51 PM, Kai Huang wrote:
>>>
>>>
>>> On 12/15/2015 05:10 PM, Xiao Guangrong wrote:
>>>>
>>>>
>>>> On 12/15/2015 03:52 PM, Kai Huang wrote:
>>>>
>>>>>>   static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
>>>>>> @@ -2140,12 +2150,18 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
>>>>>>       hlist_add_head(&sp->hash_link,
>>>>>> &vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
>>>>>>       if (!direct) {
>>>>>> -        if (rmap_write_protect(vcpu, gfn))
>>>>>> +        /*
>>>>>> +         * we should do write protection before syncing pages
>>>>>> +         * otherwise the content of the synced shadow page may
>>>>>> +         * be inconsistent with guest page table.
>>>>>> +         */
>>>>>> +        account_shadowed(vcpu->kvm, sp);
>>>>>> +
>>>>>> +        if (level == PT_PAGE_TABLE_LEVEL &&
>>>>>> +              rmap_write_protect(vcpu, gfn))
>>>>>>               kvm_flush_remote_tlbs(vcpu->kvm);
>>>>> I think your modification is good but I am little bit confused here. In account_shadowed, if
>>>>> sp->role.level > PT_PAGE_TABLE_LEVEL, the sp->gfn is write protected, and this is reasonable.
>>>>> So why
>>>>> write protecting the gfn of PT_PAGE_TABLE_LEVEL here?
>>>>
>>>> Because the shadow page will become 'sync' that means the shadow page will be synced
>>>> with the page table in guest. So the shadow page need to be write-protected to avoid
>>>> the guest page table is changed when we do the 'sync' thing.
>>>>
>>>> The shadow page need to be write-protected to avoid that guest page table is changed
>>>> when we are syncing the shadow page table. See kvm_sync_pages() after doing
>>>> rmap_write_protect().
>>> I see. So why are you treat PT_PAGE_TABLE_LEVEL gfn separately here? why this cannot be done in
>>> account_shadowed, as you did for upper level sp?
>>
>> non-leaf shadow pages are keepking write-protected which page fault handler can not fix write
>> access on it. And leaf shadow pages are not.
> My point is the original code didn't separate the two cases so I am not sure why you need to
> separate. Perhaps you want to make account_shadowed imply the non-leaf guest page table is
> write-protected while leaf page table is not.

That is why we get improvement after this patchset, we seep up the case for the write access
happens on non-leaf page tables. ;)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 6744234..3447dac 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -41,8 +41,16 @@  int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
 void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
 				 struct kvm_memory_slot *dont);
 
+void
+kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
+				    struct kvm_memory_slot *slot, gfn_t gfn,
+				    enum kvm_page_track_mode mode);
 void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn,
 			     enum kvm_page_track_mode mode);
+void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
+					    struct kvm_memory_slot *slot,
+					    gfn_t gfn,
+					    enum kvm_page_track_mode mode);
 void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
 				enum kvm_page_track_mode mode);
 bool kvm_page_track_check_mode(struct kvm_vcpu *vcpu, gfn_t gfn,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index b23f9fc..5a2ca73 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -806,11 +806,17 @@  static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	struct kvm_memory_slot *slot;
 	gfn_t gfn;
 
+	kvm->arch.indirect_shadow_pages++;
 	gfn = sp->gfn;
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, gfn);
+
+	/* the non-leaf shadow pages are keeping readonly. */
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return kvm_slot_page_track_add_page_nolock(kvm, slot, gfn,
+							KVM_PAGE_TRACK_WRITE);
+
 	kvm_mmu_gfn_disallow_lpage(slot, gfn);
-	kvm->arch.indirect_shadow_pages++;
 }
 
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -819,11 +825,15 @@  static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	struct kvm_memory_slot *slot;
 	gfn_t gfn;
 
+	kvm->arch.indirect_shadow_pages--;
 	gfn = sp->gfn;
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, gfn);
+	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
+		return kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn,
+							KVM_PAGE_TRACK_WRITE);
+
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
-	kvm->arch.indirect_shadow_pages--;
 }
 
 static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
@@ -2140,12 +2150,18 @@  static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 	hlist_add_head(&sp->hash_link,
 		&vcpu->kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
 	if (!direct) {
-		if (rmap_write_protect(vcpu, gfn))
+		/*
+		 * we should do write protection before syncing pages
+		 * otherwise the content of the synced shadow page may
+		 * be inconsistent with guest page table.
+		 */
+		account_shadowed(vcpu->kvm, sp);
+
+		if (level == PT_PAGE_TABLE_LEVEL &&
+		      rmap_write_protect(vcpu, gfn))
 			kvm_flush_remote_tlbs(vcpu->kvm);
 		if (level > PT_PAGE_TABLE_LEVEL && need_sync)
 			kvm_sync_pages(vcpu, gfn);
-
-		account_shadowed(vcpu->kvm, sp);
 	}
 	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
 	init_shadow_page_table(sp);
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 84420df..87554d3 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -77,6 +77,26 @@  static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
 	WARN_ON(val < 0);
 }
 
+void
+kvm_slot_page_track_add_page_nolock(struct kvm *kvm,
+				    struct kvm_memory_slot *slot, gfn_t gfn,
+				    enum kvm_page_track_mode mode)
+{
+	WARN_ON(!check_mode(mode));
+
+	update_gfn_track(slot, gfn, mode, 1);
+
+	/*
+	 * new track stops large page mapping for the
+	 * tracked page.
+	 */
+	kvm_mmu_gfn_disallow_lpage(slot, gfn);
+
+	if (mode == KVM_PAGE_TRACK_WRITE)
+		if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
+			kvm_flush_remote_tlbs(kvm);
+}
+
 /*
  * add guest page to the tracking pool so that corresponding access on that
  * page will be intercepted.
@@ -101,21 +121,27 @@  void kvm_page_track_add_page(struct kvm *kvm, gfn_t gfn,
 		slot = __gfn_to_memslot(slots, gfn);
 
 		spin_lock(&kvm->mmu_lock);
-		update_gfn_track(slot, gfn, mode, 1);
-
-		/*
-		 * new track stops large page mapping for the
-		 * tracked page.
-		 */
-		kvm_mmu_gfn_disallow_lpage(slot, gfn);
-
-		if (mode == KVM_PAGE_TRACK_WRITE)
-			if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
-				kvm_flush_remote_tlbs(kvm);
+		kvm_slot_page_track_add_page_nolock(kvm, slot, gfn, mode);
 		spin_unlock(&kvm->mmu_lock);
 	}
 }
 
+void kvm_slot_page_track_remove_page_nolock(struct kvm *kvm,
+					    struct kvm_memory_slot *slot,
+					    gfn_t gfn,
+					    enum kvm_page_track_mode mode)
+{
+	WARN_ON(!check_mode(mode));
+
+	update_gfn_track(slot, gfn, mode, -1);
+
+	/*
+	 * allow large page mapping for the tracked page
+	 * after the tracker is gone.
+	 */
+	kvm_mmu_gfn_allow_lpage(slot, gfn);
+}
+
 /*
  * remove the guest page from the tracking pool which stops the interception
  * of corresponding access on that page. It is the opposed operation of
@@ -134,20 +160,12 @@  void kvm_page_track_remove_page(struct kvm *kvm, gfn_t gfn,
 	struct kvm_memory_slot *slot;
 	int i;
 
-	WARN_ON(!check_mode(mode));
-
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
 		slots = __kvm_memslots(kvm, i);
 		slot = __gfn_to_memslot(slots, gfn);
 
 		spin_lock(&kvm->mmu_lock);
-		update_gfn_track(slot, gfn, mode, -1);
-
-		/*
-		 * allow large page mapping for the tracked page
-		 * after the tracker is gone.
-		 */
-		kvm_mmu_gfn_allow_lpage(slot, gfn);
+		kvm_slot_page_track_remove_page_nolock(kvm, slot, gfn, mode);
 		spin_unlock(&kvm->mmu_lock);
 	}
 }