diff mbox series

[v2] x86/vpmu: Fix race-condition in vpmu_load

Message ID 86f8a095ff18e4dc41ecb9cef5153438158b91ce.1663878942.git.tamas.lengyel@intel.com (mailing list archive)
State New, archived
Headers show
Series [v2] x86/vpmu: Fix race-condition in vpmu_load | expand

Commit Message

Tamas K Lengyel Sept. 22, 2022, 8:48 p.m. UTC
The vPMU code-bases attempts to perform an optimization on saving/reloading the
PMU context by keeping track of what vCPU ran on each pCPU. When a pCPU is
getting scheduled, checks if the previous vCPU isn't the current one. If so,
attempts a call to vpmu_save_force. Unfortunately if the previous vCPU is
already getting scheduled to run on another pCPU its state will be already
runnable, which results in an ASSERT failure.

Fix this by always performing a pmu context save in vpmu_save when called from
vpmu_switch_from, and do a vpmu_load when called from vpmu_switch_to.

While this presents a minimal overhead in case the same vCPU is getting
rescheduled on the same pCPU, the ASSERT failure is avoided and the code is a
lot easier to reason about.

Signed-off-by: Tamas K Lengyel <tamas.lengyel@intel.com>
---
 xen/arch/x86/cpu/vpmu.c | 43 +++++------------------------------------
 1 file changed, 5 insertions(+), 38 deletions(-)

Comments

Jan Beulich Sept. 26, 2022, 2:12 p.m. UTC | #1
On 22.09.2022 22:48, Tamas K Lengyel wrote:
> --- a/xen/arch/x86/cpu/vpmu.c
> +++ b/xen/arch/x86/cpu/vpmu.c
> @@ -376,57 +376,24 @@ void vpmu_save(struct vcpu *v)
>      vpmu->last_pcpu = pcpu;
>      per_cpu(last_vcpu, pcpu) = v;
>  
> +    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> +
>      if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
>          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>  
> +    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
> +
>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
>  }
>  
>  int vpmu_load(struct vcpu *v, bool_t from_guest)
>  {
>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> -    int pcpu = smp_processor_id(), ret;
> -    struct vcpu *prev = NULL;
> +    int ret;
>  
>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
>          return 0;
>  
> -    /* First time this VCPU is running here */
> -    if ( vpmu->last_pcpu != pcpu )
> -    {
> -        /*
> -         * Get the context from last pcpu that we ran on. Note that if another
> -         * VCPU is running there it must have saved this VPCU's context before
> -         * startig to run (see below).
> -         * There should be no race since remote pcpu will disable interrupts
> -         * before saving the context.
> -         */
> -        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> -        {
> -            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
> -                             vpmu_save_force, (void *)v, 1);
> -            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> -        }
> -    } 
> -
> -    /* Prevent forced context save from remote CPU */
> -    local_irq_disable();
> -
> -    prev = per_cpu(last_vcpu, pcpu);
> -
> -    if ( prev != v && prev )
> -    {
> -        vpmu = vcpu_vpmu(prev);
> -
> -        /* Someone ran here before us */
> -        vpmu_save_force(prev);
> -        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> -
> -        vpmu = vcpu_vpmu(v);
> -    }
> -
> -    local_irq_enable();
> -
>      /* Only when PMU is counting, we load PMU context immediately. */
>      if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
>           (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&

What about the other two uses of vpmu_save_force() in this file? I looks
to me as if only the use in mem_sharing.c needs to be retained.

Also, going forward, please Cc Boris right on new iterations of this fix
(if any; I'm not going to exclude I'm wrong with the above and all is
fine with this version).

Jan
Tamas K Lengyel Sept. 26, 2022, 2:22 p.m. UTC | #2
On Mon, Sep 26, 2022 at 10:12 AM Jan Beulich <jbeulich@suse.com> wrote:

> On 22.09.2022 22:48, Tamas K Lengyel wrote:
> > --- a/xen/arch/x86/cpu/vpmu.c
> > +++ b/xen/arch/x86/cpu/vpmu.c
> > @@ -376,57 +376,24 @@ void vpmu_save(struct vcpu *v)
> >      vpmu->last_pcpu = pcpu;
> >      per_cpu(last_vcpu, pcpu) = v;
> >
> > +    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> > +
> >      if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
> >          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> >
> > +    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
> > +
> >      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> >  }
> >
> >  int vpmu_load(struct vcpu *v, bool_t from_guest)
> >  {
> >      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> > -    int pcpu = smp_processor_id(), ret;
> > -    struct vcpu *prev = NULL;
> > +    int ret;
> >
> >      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> >          return 0;
> >
> > -    /* First time this VCPU is running here */
> > -    if ( vpmu->last_pcpu != pcpu )
> > -    {
> > -        /*
> > -         * Get the context from last pcpu that we ran on. Note that if
> another
> > -         * VCPU is running there it must have saved this VPCU's context
> before
> > -         * startig to run (see below).
> > -         * There should be no race since remote pcpu will disable
> interrupts
> > -         * before saving the context.
> > -         */
> > -        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> > -        {
> > -            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
> > -                             vpmu_save_force, (void *)v, 1);
> > -            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> > -        }
> > -    }
> > -
> > -    /* Prevent forced context save from remote CPU */
> > -    local_irq_disable();
> > -
> > -    prev = per_cpu(last_vcpu, pcpu);
> > -
> > -    if ( prev != v && prev )
> > -    {
> > -        vpmu = vcpu_vpmu(prev);
> > -
> > -        /* Someone ran here before us */
> > -        vpmu_save_force(prev);
> > -        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> > -
> > -        vpmu = vcpu_vpmu(v);
> > -    }
> > -
> > -    local_irq_enable();
> > -
> >      /* Only when PMU is counting, we load PMU context immediately. */
> >      if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> >           (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
>
> What about the other two uses of vpmu_save_force() in this file? I looks
> to me as if only the use in mem_sharing.c needs to be retained.
>

I don't know, maybe. I rather focus this patch only on the issue and its
fix as I don't want to introduce unintended side effects by doing a
cleanup/consolidation at other code-paths when not strictly necessary.

Tamas
Jan Beulich Sept. 29, 2022, 1:07 p.m. UTC | #3
On 26.09.2022 16:22, Tamas K Lengyel wrote:
> On Mon, Sep 26, 2022 at 10:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>> On 22.09.2022 22:48, Tamas K Lengyel wrote:
>>> --- a/xen/arch/x86/cpu/vpmu.c
>>> +++ b/xen/arch/x86/cpu/vpmu.c
>>> @@ -376,57 +376,24 @@ void vpmu_save(struct vcpu *v)
>>>      vpmu->last_pcpu = pcpu;
>>>      per_cpu(last_vcpu, pcpu) = v;
>>>
>>> +    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
>>> +
>>>      if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
>>>          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>>
>>> +    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>>> +
>>>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
>>>  }
>>>
>>>  int vpmu_load(struct vcpu *v, bool_t from_guest)
>>>  {
>>>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>>> -    int pcpu = smp_processor_id(), ret;
>>> -    struct vcpu *prev = NULL;
>>> +    int ret;
>>>
>>>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
>>>          return 0;
>>>
>>> -    /* First time this VCPU is running here */
>>> -    if ( vpmu->last_pcpu != pcpu )
>>> -    {
>>> -        /*
>>> -         * Get the context from last pcpu that we ran on. Note that if
>> another
>>> -         * VCPU is running there it must have saved this VPCU's context
>> before
>>> -         * startig to run (see below).
>>> -         * There should be no race since remote pcpu will disable
>> interrupts
>>> -         * before saving the context.
>>> -         */
>>> -        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>>> -        {
>>> -            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
>>> -                             vpmu_save_force, (void *)v, 1);
>>> -            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>> -        }
>>> -    }
>>> -
>>> -    /* Prevent forced context save from remote CPU */
>>> -    local_irq_disable();
>>> -
>>> -    prev = per_cpu(last_vcpu, pcpu);
>>> -
>>> -    if ( prev != v && prev )
>>> -    {
>>> -        vpmu = vcpu_vpmu(prev);
>>> -
>>> -        /* Someone ran here before us */
>>> -        vpmu_save_force(prev);
>>> -        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>> -
>>> -        vpmu = vcpu_vpmu(v);
>>> -    }
>>> -
>>> -    local_irq_enable();
>>> -
>>>      /* Only when PMU is counting, we load PMU context immediately. */
>>>      if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
>>>           (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
>>
>> What about the other two uses of vpmu_save_force() in this file? I looks
>> to me as if only the use in mem_sharing.c needs to be retained.
> 
> I don't know, maybe. I rather focus this patch only on the issue and its
> fix as I don't want to introduce unintended side effects by doing a
> cleanup/consolidation at other code-paths when not strictly necessary.

While I see your point, I'm afraid I don't think I can ack this
change without knowing whether the other uses don't expose a similar
issue. It would feel wrong to fix only one half of a problem. I may,
somewhat hesitantly, give an ack if e.g. Boris offered his R-b.
Else the only other option I see is that some other maintainer give
their ack.

Jan
Tamas K Lengyel Sept. 29, 2022, 2:28 p.m. UTC | #4
On Thu, Sep 29, 2022 at 9:07 AM Jan Beulich <jbeulich@suse.com> wrote:

> On 26.09.2022 16:22, Tamas K Lengyel wrote:
> > On Mon, Sep 26, 2022 at 10:12 AM Jan Beulich <jbeulich@suse.com> wrote:
> >> On 22.09.2022 22:48, Tamas K Lengyel wrote:
> >>> --- a/xen/arch/x86/cpu/vpmu.c
> >>> +++ b/xen/arch/x86/cpu/vpmu.c
> >>> @@ -376,57 +376,24 @@ void vpmu_save(struct vcpu *v)
> >>>      vpmu->last_pcpu = pcpu;
> >>>      per_cpu(last_vcpu, pcpu) = v;
> >>>
> >>> +    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
> >>> +
> >>>      if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
> >>>          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> >>>
> >>> +    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
> >>> +
> >>>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
> >>>  }
> >>>
> >>>  int vpmu_load(struct vcpu *v, bool_t from_guest)
> >>>  {
> >>>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
> >>> -    int pcpu = smp_processor_id(), ret;
> >>> -    struct vcpu *prev = NULL;
> >>> +    int ret;
> >>>
> >>>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
> >>>          return 0;
> >>>
> >>> -    /* First time this VCPU is running here */
> >>> -    if ( vpmu->last_pcpu != pcpu )
> >>> -    {
> >>> -        /*
> >>> -         * Get the context from last pcpu that we ran on. Note that if
> >> another
> >>> -         * VCPU is running there it must have saved this VPCU's
> context
> >> before
> >>> -         * startig to run (see below).
> >>> -         * There should be no race since remote pcpu will disable
> >> interrupts
> >>> -         * before saving the context.
> >>> -         */
> >>> -        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
> >>> -        {
> >>> -            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
> >>> -                             vpmu_save_force, (void *)v, 1);
> >>> -            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> >>> -        }
> >>> -    }
> >>> -
> >>> -    /* Prevent forced context save from remote CPU */
> >>> -    local_irq_disable();
> >>> -
> >>> -    prev = per_cpu(last_vcpu, pcpu);
> >>> -
> >>> -    if ( prev != v && prev )
> >>> -    {
> >>> -        vpmu = vcpu_vpmu(prev);
> >>> -
> >>> -        /* Someone ran here before us */
> >>> -        vpmu_save_force(prev);
> >>> -        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
> >>> -
> >>> -        vpmu = vcpu_vpmu(v);
> >>> -    }
> >>> -
> >>> -    local_irq_enable();
> >>> -
> >>>      /* Only when PMU is counting, we load PMU context immediately. */
> >>>      if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
> >>>           (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
> >>
> >> What about the other two uses of vpmu_save_force() in this file? I looks
> >> to me as if only the use in mem_sharing.c needs to be retained.
> >
> > I don't know, maybe. I rather focus this patch only on the issue and its
> > fix as I don't want to introduce unintended side effects by doing a
> > cleanup/consolidation at other code-paths when not strictly necessary.
>
> While I see your point, I'm afraid I don't think I can ack this
> change without knowing whether the other uses don't expose a similar
> issue. It would feel wrong to fix only one half of a problem. I may,
> somewhat hesitantly, give an ack if e.g. Boris offered his R-b.
> Else the only other option I see is that some other maintainer give
> their ack.
>

I may have misunderstood what you are asking. I thought you were asking if
the other two remaining users of vpmu_save_force could be switched over to
vpmu_save as a generic cleanup, to which my answer is still maybe. From the
perspective of this particular bug those use-cases are safe. On is acting
on the current vcpu and doesn't try to run vpmu_save_force on a remote
vcpu, the other one is being called when the domain is being shut down so
the vcpu cannot be in a runnable state.

Tamas
Jan Beulich Sept. 29, 2022, 2:46 p.m. UTC | #5
On 29.09.2022 16:28, Tamas K Lengyel wrote:
> On Thu, Sep 29, 2022 at 9:07 AM Jan Beulich <jbeulich@suse.com> wrote:
> 
>> On 26.09.2022 16:22, Tamas K Lengyel wrote:
>>> On Mon, Sep 26, 2022 at 10:12 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>> On 22.09.2022 22:48, Tamas K Lengyel wrote:
>>>>> --- a/xen/arch/x86/cpu/vpmu.c
>>>>> +++ b/xen/arch/x86/cpu/vpmu.c
>>>>> @@ -376,57 +376,24 @@ void vpmu_save(struct vcpu *v)
>>>>>      vpmu->last_pcpu = pcpu;
>>>>>      per_cpu(last_vcpu, pcpu) = v;
>>>>>
>>>>> +    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
>>>>> +
>>>>>      if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
>>>>>          vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>>>>
>>>>> +    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
>>>>> +
>>>>>      apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
>>>>>  }
>>>>>
>>>>>  int vpmu_load(struct vcpu *v, bool_t from_guest)
>>>>>  {
>>>>>      struct vpmu_struct *vpmu = vcpu_vpmu(v);
>>>>> -    int pcpu = smp_processor_id(), ret;
>>>>> -    struct vcpu *prev = NULL;
>>>>> +    int ret;
>>>>>
>>>>>      if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
>>>>>          return 0;
>>>>>
>>>>> -    /* First time this VCPU is running here */
>>>>> -    if ( vpmu->last_pcpu != pcpu )
>>>>> -    {
>>>>> -        /*
>>>>> -         * Get the context from last pcpu that we ran on. Note that if
>>>> another
>>>>> -         * VCPU is running there it must have saved this VPCU's
>> context
>>>> before
>>>>> -         * startig to run (see below).
>>>>> -         * There should be no race since remote pcpu will disable
>>>> interrupts
>>>>> -         * before saving the context.
>>>>> -         */
>>>>> -        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
>>>>> -        {
>>>>> -            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
>>>>> -                             vpmu_save_force, (void *)v, 1);
>>>>> -            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>> -    /* Prevent forced context save from remote CPU */
>>>>> -    local_irq_disable();
>>>>> -
>>>>> -    prev = per_cpu(last_vcpu, pcpu);
>>>>> -
>>>>> -    if ( prev != v && prev )
>>>>> -    {
>>>>> -        vpmu = vcpu_vpmu(prev);
>>>>> -
>>>>> -        /* Someone ran here before us */
>>>>> -        vpmu_save_force(prev);
>>>>> -        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
>>>>> -
>>>>> -        vpmu = vcpu_vpmu(v);
>>>>> -    }
>>>>> -
>>>>> -    local_irq_enable();
>>>>> -
>>>>>      /* Only when PMU is counting, we load PMU context immediately. */
>>>>>      if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
>>>>>           (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
>>>>
>>>> What about the other two uses of vpmu_save_force() in this file? I looks
>>>> to me as if only the use in mem_sharing.c needs to be retained.
>>>
>>> I don't know, maybe. I rather focus this patch only on the issue and its
>>> fix as I don't want to introduce unintended side effects by doing a
>>> cleanup/consolidation at other code-paths when not strictly necessary.
>>
>> While I see your point, I'm afraid I don't think I can ack this
>> change without knowing whether the other uses don't expose a similar
>> issue. It would feel wrong to fix only one half of a problem. I may,
>> somewhat hesitantly, give an ack if e.g. Boris offered his R-b.
>> Else the only other option I see is that some other maintainer give
>> their ack.
>>
> 
> I may have misunderstood what you are asking. I thought you were asking if
> the other two remaining users of vpmu_save_force could be switched over to
> vpmu_save as a generic cleanup, to which my answer is still maybe. From the
> perspective of this particular bug those use-cases are safe. On is acting
> on the current vcpu and doesn't try to run vpmu_save_force on a remote
> vcpu, the other one is being called when the domain is being shut down so
> the vcpu cannot be in a runnable state.

Hmm, yes - I can accept that. Thanks for the clarification.

Acked-by: Jan Beulich <jbeulich@suse.com>

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c
index cacc24a30f..64cdbfc48c 100644
--- a/xen/arch/x86/cpu/vpmu.c
+++ b/xen/arch/x86/cpu/vpmu.c
@@ -376,57 +376,24 @@  void vpmu_save(struct vcpu *v)
     vpmu->last_pcpu = pcpu;
     per_cpu(last_vcpu, pcpu) = v;
 
+    vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
+
     if ( alternative_call(vpmu_ops.arch_vpmu_save, v, 0) )
         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
 
+    vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
+
     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
 }
 
 int vpmu_load(struct vcpu *v, bool_t from_guest)
 {
     struct vpmu_struct *vpmu = vcpu_vpmu(v);
-    int pcpu = smp_processor_id(), ret;
-    struct vcpu *prev = NULL;
+    int ret;
 
     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
         return 0;
 
-    /* First time this VCPU is running here */
-    if ( vpmu->last_pcpu != pcpu )
-    {
-        /*
-         * Get the context from last pcpu that we ran on. Note that if another
-         * VCPU is running there it must have saved this VPCU's context before
-         * startig to run (see below).
-         * There should be no race since remote pcpu will disable interrupts
-         * before saving the context.
-         */
-        if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
-        {
-            on_selected_cpus(cpumask_of(vpmu->last_pcpu),
-                             vpmu_save_force, (void *)v, 1);
-            vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-        }
-    } 
-
-    /* Prevent forced context save from remote CPU */
-    local_irq_disable();
-
-    prev = per_cpu(last_vcpu, pcpu);
-
-    if ( prev != v && prev )
-    {
-        vpmu = vcpu_vpmu(prev);
-
-        /* Someone ran here before us */
-        vpmu_save_force(prev);
-        vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
-
-        vpmu = vcpu_vpmu(v);
-    }
-
-    local_irq_enable();
-
     /* Only when PMU is counting, we load PMU context immediately. */
     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
          (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&