diff mbox series

[04/10] KVM: x86: Set PVCLOCK_GUEST_STOPPED only for kvmclock, not for Xen PV clock

Message ID 20250118005552.2626804-5-seanjc@google.com (mailing list archive)
State New
Headers show
Series KVM: x86: pvclock fixes and cleanups | expand

Commit Message

Sean Christopherson Jan. 18, 2025, 12:55 a.m. UTC
Handle "guest stopped" propagation only for kvmclock, as the flag is set
if and only if kvmclock is "active", i.e. can only be set for Xen PV clock
if kvmclock *and* Xen PV clock are in-use by the guest, which creates very
bizarre behavior for the guest.

Simply restrict the flag to kvmclock, e.g. instead of trying to handle
Xen PV clock, as propagation of PVCLOCK_GUEST_STOPPED was unintentionally
added during a refactoring, and while Xen proper defines
XEN_PVCLOCK_GUEST_STOPPED, there's no evidence that Xen guests actually
support the flag.

Check and clear pvclock_set_guest_stopped_request if and only if kvmclock
is active to preserve the original behavior, i.e. keep the flag pending
if kvmclock happens to be disabled when KVM processes the initial request.

Fixes: aa096aa0a05f ("KVM: x86/xen: setup pvclock updates")
Cc: Paul Durrant <pdurrant@amazon.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

Comments

Paul Durrant Jan. 21, 2025, 4:42 p.m. UTC | #1
On 18/01/2025 00:55, Sean Christopherson wrote:
> Handle "guest stopped" propagation only for kvmclock, as the flag is set
> if and only if kvmclock is "active", i.e. can only be set for Xen PV clock
> if kvmclock *and* Xen PV clock are in-use by the guest, which creates very
> bizarre behavior for the guest.
> 
> Simply restrict the flag to kvmclock, e.g. instead of trying to handle
> Xen PV clock, as propagation of PVCLOCK_GUEST_STOPPED was unintentionally
> added during a refactoring, and while Xen proper defines
> XEN_PVCLOCK_GUEST_STOPPED, there's no evidence that Xen guests actually
> support the flag.

Indeed. I can find no consumers of the flag.

> 
> Check and clear pvclock_set_guest_stopped_request if and only if kvmclock
> is active to preserve the original behavior, i.e. keep the flag pending
> if kvmclock happens to be disabled when KVM processes the initial request.
> 
> Fixes: aa096aa0a05f ("KVM: x86/xen: setup pvclock updates")
> Cc: Paul Durrant <pdurrant@amazon.com>
> Cc: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>   arch/x86/kvm/x86.c | 20 ++++++++++++++------
>   1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index d8ee37dd2b57..3c4d210e8a9e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3150,11 +3150,6 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
>   	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
>   	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
>   
> -	if (vcpu->pvclock_set_guest_stopped_request) {
> -		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> -		vcpu->pvclock_set_guest_stopped_request = false;
> -	}
> -
>   	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
>   
>   	if (force_tsc_unstable)
> @@ -3264,8 +3259,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>   	if (use_master_clock)
>   		vcpu->hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
>   
> -	if (vcpu->pv_time.active)
> +	if (vcpu->pv_time.active) {
> +		/*
> +		 * GUEST_STOPPED is only supported by kvmclock, and KVM's
> +		 * historic behavior is to only process the request if kvmclock
> +		 * is active/enabled.
> +		 */
> +		if (vcpu->pvclock_set_guest_stopped_request) {
> +			vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> +			vcpu->pvclock_set_guest_stopped_request = false;
> +		}
>   		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
> +
> +		vcpu->hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;

Is this intentional? The line above your change in 
kvm_setup_guest_pvclock() clearly keeps the flag enabled if it already 
set and, without this patch, I don't see anything clearing it.

> +	}
> +
>   #ifdef CONFIG_KVM_XEN
>   	if (vcpu->xen.vcpu_info_cache.active)
>   		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
Sean Christopherson Jan. 21, 2025, 5:09 p.m. UTC | #2
On Tue, Jan 21, 2025, Paul Durrant wrote:
> > ---
> >   arch/x86/kvm/x86.c | 20 ++++++++++++++------
> >   1 file changed, 14 insertions(+), 6 deletions(-)
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index d8ee37dd2b57..3c4d210e8a9e 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -3150,11 +3150,6 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
> >   	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> >   	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
> > -	if (vcpu->pvclock_set_guest_stopped_request) {
> > -		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> > -		vcpu->pvclock_set_guest_stopped_request = false;
> > -	}
> > -
> >   	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
> >   	if (force_tsc_unstable)
> > @@ -3264,8 +3259,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> >   	if (use_master_clock)
> >   		vcpu->hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
> > -	if (vcpu->pv_time.active)
> > +	if (vcpu->pv_time.active) {
> > +		/*
> > +		 * GUEST_STOPPED is only supported by kvmclock, and KVM's
> > +		 * historic behavior is to only process the request if kvmclock
> > +		 * is active/enabled.
> > +		 */
> > +		if (vcpu->pvclock_set_guest_stopped_request) {
> > +			vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> > +			vcpu->pvclock_set_guest_stopped_request = false;
> > +		}
> >   		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
> > +
> > +		vcpu->hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
> 
> Is this intentional? The line above your change in kvm_setup_guest_pvclock()
> clearly keeps the flag enabled if it already set and, without this patch, I
> don't see anything clearing it.

Oh, I see what you're getting at.  Hrm.  Yes, clearing the flag is intentional,
otherwise the patch wouldn't do what it claims to do (set PVCLOCK_GUEST_STOPPED
only for kvmclock).

Swapping the order of this patch and the next patch ("don't bleed ...") doesn't
break the cycle because that would result in PVCLOCK_GUEST_STOPPED only being
applied to the first active clock (kvmclock).

The only way I can think of to fully isolate the changes would be to split this
into two patches: (4a) hoist pvclock_set_guest_stopped_request processing into
kvm_guest_time_update() and (4b) apply it only to kvmclock, and then make the
ordering 4a, 5, 4b, i.e. "hoist", "don't bleed", "only kvmclock".

4a would be quite ugly, because to avoid introducing a functional change, it
would need to be:

	if (vcpu->pv_time.active || vcpu->xen.vcpu_info_cache.active ||
	    vcpu->xen.vcpu_time_info_cache.active) {
		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
		vcpu->pvclock_set_guest_stopped_request = false;
	}

But it's not the worst intermediate code, so I'm not opposed to going that
route.

> > +	}
> > +
> >   #ifdef CONFIG_KVM_XEN
> >   	if (vcpu->xen.vcpu_info_cache.active)
> >   		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
>
Paul Durrant Jan. 21, 2025, 5:15 p.m. UTC | #3
On 21/01/2025 17:09, Sean Christopherson wrote:
> On Tue, Jan 21, 2025, Paul Durrant wrote:
>>> ---
>>>    arch/x86/kvm/x86.c | 20 ++++++++++++++------
>>>    1 file changed, 14 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index d8ee37dd2b57..3c4d210e8a9e 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -3150,11 +3150,6 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
>>>    	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
>>>    	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
>>> -	if (vcpu->pvclock_set_guest_stopped_request) {
>>> -		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
>>> -		vcpu->pvclock_set_guest_stopped_request = false;
>>> -	}
>>> -
>>>    	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
>>>    	if (force_tsc_unstable)
>>> @@ -3264,8 +3259,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
>>>    	if (use_master_clock)
>>>    		vcpu->hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
>>> -	if (vcpu->pv_time.active)
>>> +	if (vcpu->pv_time.active) {
>>> +		/*
>>> +		 * GUEST_STOPPED is only supported by kvmclock, and KVM's
>>> +		 * historic behavior is to only process the request if kvmclock
>>> +		 * is active/enabled.
>>> +		 */
>>> +		if (vcpu->pvclock_set_guest_stopped_request) {
>>> +			vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
>>> +			vcpu->pvclock_set_guest_stopped_request = false;
>>> +		}
>>>    		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
>>> +
>>> +		vcpu->hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
>>
>> Is this intentional? The line above your change in kvm_setup_guest_pvclock()
>> clearly keeps the flag enabled if it already set and, without this patch, I
>> don't see anything clearing it.
> 
> Oh, I see what you're getting at.  Hrm.  Yes, clearing the flag is intentional,
> otherwise the patch wouldn't do what it claims to do (set PVCLOCK_GUEST_STOPPED
> only for kvmclock).
> 
> Swapping the order of this patch and the next patch ("don't bleed ...") doesn't
> break the cycle because that would result in PVCLOCK_GUEST_STOPPED only being
> applied to the first active clock (kvmclock).
> 
> The only way I can think of to fully isolate the changes would be to split this
> into two patches: (4a) hoist pvclock_set_guest_stopped_request processing into
> kvm_guest_time_update() and (4b) apply it only to kvmclock, and then make the
> ordering 4a, 5, 4b, i.e. "hoist", "don't bleed", "only kvmclock".
> 
> 4a would be quite ugly, because to avoid introducing a functional change, it
> would need to be:
> 
> 	if (vcpu->pv_time.active || vcpu->xen.vcpu_info_cache.active ||
> 	    vcpu->xen.vcpu_time_info_cache.active) {
> 		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> 		vcpu->pvclock_set_guest_stopped_request = false;
> 	}
> 
> But it's not the worst intermediate code, so I'm not opposed to going that
> route.
> 

What about putting this change after patch 7. Then you could take a 
local copy of hv_clock in which you could set PVCLOCK_GUEST_STOPPED and 
so avoid bleeding the flag that way?

>>> +	}
>>> +
>>>    #ifdef CONFIG_KVM_XEN
>>>    	if (vcpu->xen.vcpu_info_cache.active)
>>>    		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
>>
Sean Christopherson Jan. 21, 2025, 6:32 p.m. UTC | #4
On Tue, Jan 21, 2025, Paul Durrant wrote:
> On 21/01/2025 17:09, Sean Christopherson wrote:
> > On Tue, Jan 21, 2025, Paul Durrant wrote:
> > > > ---
> > > >    arch/x86/kvm/x86.c | 20 ++++++++++++++------
> > > >    1 file changed, 14 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > > > index d8ee37dd2b57..3c4d210e8a9e 100644
> > > > --- a/arch/x86/kvm/x86.c
> > > > +++ b/arch/x86/kvm/x86.c
> > > > @@ -3150,11 +3150,6 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
> > > >    	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
> > > >    	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
> > > > -	if (vcpu->pvclock_set_guest_stopped_request) {
> > > > -		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> > > > -		vcpu->pvclock_set_guest_stopped_request = false;
> > > > -	}
> > > > -
> > > >    	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
> > > >    	if (force_tsc_unstable)
> > > > @@ -3264,8 +3259,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
> > > >    	if (use_master_clock)
> > > >    		vcpu->hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
> > > > -	if (vcpu->pv_time.active)
> > > > +	if (vcpu->pv_time.active) {
> > > > +		/*
> > > > +		 * GUEST_STOPPED is only supported by kvmclock, and KVM's
> > > > +		 * historic behavior is to only process the request if kvmclock
> > > > +		 * is active/enabled.
> > > > +		 */
> > > > +		if (vcpu->pvclock_set_guest_stopped_request) {
> > > > +			vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> > > > +			vcpu->pvclock_set_guest_stopped_request = false;
> > > > +		}
> > > >    		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
> > > > +
> > > > +		vcpu->hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
> > > 
> > > Is this intentional? The line above your change in kvm_setup_guest_pvclock()
> > > clearly keeps the flag enabled if it already set and, without this patch, I
> > > don't see anything clearing it.
> > 
> > Oh, I see what you're getting at.  Hrm.  Yes, clearing the flag is intentional,
> > otherwise the patch wouldn't do what it claims to do (set PVCLOCK_GUEST_STOPPED
> > only for kvmclock).
> > 
> > Swapping the order of this patch and the next patch ("don't bleed ...") doesn't
> > break the cycle because that would result in PVCLOCK_GUEST_STOPPED only being
> > applied to the first active clock (kvmclock).
> > 
> > The only way I can think of to fully isolate the changes would be to split this
> > into two patches: (4a) hoist pvclock_set_guest_stopped_request processing into
> > kvm_guest_time_update() and (4b) apply it only to kvmclock, and then make the
> > ordering 4a, 5, 4b, i.e. "hoist", "don't bleed", "only kvmclock".
> > 
> > 4a would be quite ugly, because to avoid introducing a functional change, it
> > would need to be:
> > 
> > 	if (vcpu->pv_time.active || vcpu->xen.vcpu_info_cache.active ||
> > 	    vcpu->xen.vcpu_time_info_cache.active) {
> > 		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
> > 		vcpu->pvclock_set_guest_stopped_request = false;
> > 	}
> > 
> > But it's not the worst intermediate code, so I'm not opposed to going that
> > route.
> > 
> 
> What about putting this change after patch 7. Then you could take a local
> copy of hv_clock in which you could set PVCLOCK_GUEST_STOPPED and so avoid
> bleeding the flag that way?

But to preserve the current behavior of setting PVCLOCK_GUEST_STOPPED for all
clocks, processing pvclock_set_guest_stopped_request needs to be moved out of
kvm_setup_guest_pvclock() before said helper can make a copy of the reference.
diff mbox series

Patch

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d8ee37dd2b57..3c4d210e8a9e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3150,11 +3150,6 @@  static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	vcpu->hv_clock.flags |= (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
 
-	if (vcpu->pvclock_set_guest_stopped_request) {
-		vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
-		vcpu->pvclock_set_guest_stopped_request = false;
-	}
-
 	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
 
 	if (force_tsc_unstable)
@@ -3264,8 +3259,21 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	if (use_master_clock)
 		vcpu->hv_clock.flags |= PVCLOCK_TSC_STABLE_BIT;
 
-	if (vcpu->pv_time.active)
+	if (vcpu->pv_time.active) {
+		/*
+		 * GUEST_STOPPED is only supported by kvmclock, and KVM's
+		 * historic behavior is to only process the request if kvmclock
+		 * is active/enabled.
+		 */
+		if (vcpu->pvclock_set_guest_stopped_request) {
+			vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
+			vcpu->pvclock_set_guest_stopped_request = false;
+		}
 		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
+
+		vcpu->hv_clock.flags &= ~PVCLOCK_GUEST_STOPPED;
+	}
+
 #ifdef CONFIG_KVM_XEN
 	if (vcpu->xen.vcpu_info_cache.active)
 		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,