diff mbox series

[v9,11/15] microcode: unify loading update during CPU resuming and AP wakeup

Message ID 1566177928-19114-12-git-send-email-chao.gao@intel.com (mailing list archive)
State Superseded
Headers show
Series improve late microcode loading | expand

Commit Message

Chao Gao Aug. 19, 2019, 1:25 a.m. UTC
Both are loading the cached patch. Since APs call the unified function,
microcode_update_one(), during wakeup, the 'start_update' parameter
which originally used to distinguish BSP and APs is redundant. So remove
this parameter.

Signed-off-by: Chao Gao <chao.gao@intel.com>
---
Note that here is a functional change: resuming a CPU would call
->end_update() now while previously it wasn't. Not quite sure
whether it is correct.

Changes in v9:
 - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
   microcode_update_one()
 - rebase and fix conflicts.

Changes in v8:
 - split out from the previous patch
---
 xen/arch/x86/acpi/power.c       |  2 +-
 xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
 xen/arch/x86/smpboot.c          |  5 +--
 xen/include/asm-x86/processor.h |  4 +-
 4 files changed, 44 insertions(+), 57 deletions(-)

Comments

Roger Pau Monné Aug. 22, 2019, 2:10 p.m. UTC | #1
On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
> Both are loading the cached patch. Since APs call the unified function,
> microcode_update_one(), during wakeup, the 'start_update' parameter
> which originally used to distinguish BSP and APs is redundant. So remove
> this parameter.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
> Note that here is a functional change: resuming a CPU would call
> ->end_update() now while previously it wasn't. Not quite sure
> whether it is correct.

I guess that's required if it called start_update prior to calling
end_update?

> 
> Changes in v9:
>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
>    microcode_update_one()
>  - rebase and fix conflicts.
> 
> Changes in v8:
>  - split out from the previous patch
> ---
>  xen/arch/x86/acpi/power.c       |  2 +-
>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
>  xen/arch/x86/smpboot.c          |  5 +--
>  xen/include/asm-x86/processor.h |  4 +-
>  4 files changed, 44 insertions(+), 57 deletions(-)
> 
> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
> index 4f21903..24798d5 100644
> --- a/xen/arch/x86/acpi/power.c
> +++ b/xen/arch/x86/acpi/power.c
> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
>  
>      console_end_sync();
>  
> -    microcode_resume_cpu();
> +    microcode_update_one();
>  
>      if ( !recheck_cpu_features(0) )
>          panic("Missing previously available feature(s)\n");
> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> index a2febc7..bdd9c9f 100644
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
>      return NULL;
>  }
>  
> -int microcode_resume_cpu(void)
> -{
> -    int err;
> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
> -
> -    if ( !microcode_ops )
> -        return 0;
> -
> -    spin_lock(&microcode_mutex);
> -
> -    err = microcode_ops->collect_cpu_info(sig);
> -    if ( likely(!err) )
> -        err = microcode_ops->apply_microcode(microcode_cache);
> -    spin_unlock(&microcode_mutex);
> -
> -    return err;
> -}
> -
>  void microcode_free_patch(struct microcode_patch *microcode_patch)
>  {
>      microcode_ops->free_patch(microcode_patch->mc);
> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
>  }
>  __initcall(microcode_init);
>  
> -int __init early_microcode_update_cpu(bool start_update)
> +/* Load a cached update to current cpu */
> +int microcode_update_one(void)
> +{
> +    int rc;
> +
> +    if ( !microcode_ops )
> +        return -EOPNOTSUPP;
> +
> +    rc = microcode_update_cpu(NULL);
> +
> +    if ( microcode_ops->end_update )
> +        microcode_ops->end_update();

Don't you need to call start_update before calling
microcode_update_cpu?

It would be nice to have paired calls to start_update/end_update in
the same context (ie: function) or else this is very hard to follow,
and very easy to get out of sync.

Thanks, Roger.
Chao Gao Aug. 22, 2019, 4:44 p.m. UTC | #2
On Thu, Aug 22, 2019 at 04:10:46PM +0200, Roger Pau Monné wrote:
>On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
>> Both are loading the cached patch. Since APs call the unified function,
>> microcode_update_one(), during wakeup, the 'start_update' parameter
>> which originally used to distinguish BSP and APs is redundant. So remove
>> this parameter.
>> 
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> ---
>> Note that here is a functional change: resuming a CPU would call
>> ->end_update() now while previously it wasn't. Not quite sure
>> whether it is correct.
>
>I guess that's required if it called start_update prior to calling
>end_update?
>
>> 
>> Changes in v9:
>>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
>>    microcode_update_one()
>>  - rebase and fix conflicts.
>> 
>> Changes in v8:
>>  - split out from the previous patch
>> ---
>>  xen/arch/x86/acpi/power.c       |  2 +-
>>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
>>  xen/arch/x86/smpboot.c          |  5 +--
>>  xen/include/asm-x86/processor.h |  4 +-
>>  4 files changed, 44 insertions(+), 57 deletions(-)
>> 
>> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
>> index 4f21903..24798d5 100644
>> --- a/xen/arch/x86/acpi/power.c
>> +++ b/xen/arch/x86/acpi/power.c
>> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
>>  
>>      console_end_sync();
>>  
>> -    microcode_resume_cpu();
>> +    microcode_update_one();
>>  
>>      if ( !recheck_cpu_features(0) )
>>          panic("Missing previously available feature(s)\n");
>> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
>> index a2febc7..bdd9c9f 100644
>> --- a/xen/arch/x86/microcode.c
>> +++ b/xen/arch/x86/microcode.c
>> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
>>      return NULL;
>>  }
>>  
>> -int microcode_resume_cpu(void)
>> -{
>> -    int err;
>> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
>> -
>> -    if ( !microcode_ops )
>> -        return 0;
>> -
>> -    spin_lock(&microcode_mutex);
>> -
>> -    err = microcode_ops->collect_cpu_info(sig);
>> -    if ( likely(!err) )
>> -        err = microcode_ops->apply_microcode(microcode_cache);
>> -    spin_unlock(&microcode_mutex);
>> -
>> -    return err;
>> -}
>> -
>>  void microcode_free_patch(struct microcode_patch *microcode_patch)
>>  {
>>      microcode_ops->free_patch(microcode_patch->mc);
>> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
>>  }
>>  __initcall(microcode_init);
>>  
>> -int __init early_microcode_update_cpu(bool start_update)
>> +/* Load a cached update to current cpu */
>> +int microcode_update_one(void)
>> +{
>> +    int rc;
>> +
>> +    if ( !microcode_ops )
>> +        return -EOPNOTSUPP;
>> +
>> +    rc = microcode_update_cpu(NULL);
>> +
>> +    if ( microcode_ops->end_update )
>> +        microcode_ops->end_update();
>
>Don't you need to call start_update before calling
>microcode_update_cpu?

No. On AMD side, osvw_status records the hardware erratum in the system.
As we don't assume all CPUs have the same erratum, each cpu calls
end_update to update osvw_status after ucode loading.
start_update just resets osvw_status to 0. And it is called once prior
to ucode loading on any CPU so that osvw_status can be recomputed.

Thanks
Chao
Roger Pau Monné Aug. 23, 2019, 9:09 a.m. UTC | #3
On Fri, Aug 23, 2019 at 12:44:34AM +0800, Chao Gao wrote:
> On Thu, Aug 22, 2019 at 04:10:46PM +0200, Roger Pau Monné wrote:
> >On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
> >> Both are loading the cached patch. Since APs call the unified function,
> >> microcode_update_one(), during wakeup, the 'start_update' parameter
> >> which originally used to distinguish BSP and APs is redundant. So remove
> >> this parameter.
> >> 
> >> Signed-off-by: Chao Gao <chao.gao@intel.com>
> >> ---
> >> Note that here is a functional change: resuming a CPU would call
> >> ->end_update() now while previously it wasn't. Not quite sure
> >> whether it is correct.
> >
> >I guess that's required if it called start_update prior to calling
> >end_update?
> >
> >> 
> >> Changes in v9:
> >>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
> >>    microcode_update_one()
> >>  - rebase and fix conflicts.
> >> 
> >> Changes in v8:
> >>  - split out from the previous patch
> >> ---
> >>  xen/arch/x86/acpi/power.c       |  2 +-
> >>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
> >>  xen/arch/x86/smpboot.c          |  5 +--
> >>  xen/include/asm-x86/processor.h |  4 +-
> >>  4 files changed, 44 insertions(+), 57 deletions(-)
> >> 
> >> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
> >> index 4f21903..24798d5 100644
> >> --- a/xen/arch/x86/acpi/power.c
> >> +++ b/xen/arch/x86/acpi/power.c
> >> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
> >>  
> >>      console_end_sync();
> >>  
> >> -    microcode_resume_cpu();
> >> +    microcode_update_one();
> >>  
> >>      if ( !recheck_cpu_features(0) )
> >>          panic("Missing previously available feature(s)\n");
> >> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> >> index a2febc7..bdd9c9f 100644
> >> --- a/xen/arch/x86/microcode.c
> >> +++ b/xen/arch/x86/microcode.c
> >> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
> >>      return NULL;
> >>  }
> >>  
> >> -int microcode_resume_cpu(void)
> >> -{
> >> -    int err;
> >> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
> >> -
> >> -    if ( !microcode_ops )
> >> -        return 0;
> >> -
> >> -    spin_lock(&microcode_mutex);
> >> -
> >> -    err = microcode_ops->collect_cpu_info(sig);
> >> -    if ( likely(!err) )
> >> -        err = microcode_ops->apply_microcode(microcode_cache);
> >> -    spin_unlock(&microcode_mutex);
> >> -
> >> -    return err;
> >> -}
> >> -
> >>  void microcode_free_patch(struct microcode_patch *microcode_patch)
> >>  {
> >>      microcode_ops->free_patch(microcode_patch->mc);
> >> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
> >>  }
> >>  __initcall(microcode_init);
> >>  
> >> -int __init early_microcode_update_cpu(bool start_update)
> >> +/* Load a cached update to current cpu */
> >> +int microcode_update_one(void)
> >> +{
> >> +    int rc;
> >> +
> >> +    if ( !microcode_ops )
> >> +        return -EOPNOTSUPP;
> >> +
> >> +    rc = microcode_update_cpu(NULL);
> >> +
> >> +    if ( microcode_ops->end_update )
> >> +        microcode_ops->end_update();
> >
> >Don't you need to call start_update before calling
> >microcode_update_cpu?
> 
> No. On AMD side, osvw_status records the hardware erratum in the system.
> As we don't assume all CPUs have the same erratum, each cpu calls
> end_update to update osvw_status after ucode loading.
> start_update just resets osvw_status to 0. And it is called once prior
> to ucode loading on any CPU so that osvw_status can be recomputed.

Oh, I think I understand it. start_update must only be called once
_before_ the sequence to update the microcode on all CPUs is
performed, while end_update needs to be called on _each_ CPU after the
update has been completed in order to account for any erratas.

The name for those hooks should be improved, I guess renaming
end_update to end_update_each or end_update_percpu would be clearer in
order to make it clear that start_update is global, while end_update
is percpu. Anyway, I don't want to delay this series for a naming nit.

I'm still unsure where start_update is called for the resume from
suspension case, I don't seem to see any call to start_update neither
in enter_state or microcode_update_one, hence I think this is missing?

I would expect you need to clean osvw_status also on resume from
suspension, in case microcode loading fails? Or else you will be
carrying a stale osvw_status.

Thanks, Roger.
Chao Gao Aug. 29, 2019, 7:37 a.m. UTC | #4
On Fri, Aug 23, 2019 at 11:09:07AM +0200, Roger Pau Monné wrote:
>On Fri, Aug 23, 2019 at 12:44:34AM +0800, Chao Gao wrote:
>> On Thu, Aug 22, 2019 at 04:10:46PM +0200, Roger Pau Monné wrote:
>> >On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
>> >> Both are loading the cached patch. Since APs call the unified function,
>> >> microcode_update_one(), during wakeup, the 'start_update' parameter
>> >> which originally used to distinguish BSP and APs is redundant. So remove
>> >> this parameter.
>> >> 
>> >> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> >> ---
>> >> Note that here is a functional change: resuming a CPU would call
>> >> ->end_update() now while previously it wasn't. Not quite sure
>> >> whether it is correct.
>> >
>> >I guess that's required if it called start_update prior to calling
>> >end_update?
>> >
>> >> 
>> >> Changes in v9:
>> >>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
>> >>    microcode_update_one()
>> >>  - rebase and fix conflicts.
>> >> 
>> >> Changes in v8:
>> >>  - split out from the previous patch
>> >> ---
>> >>  xen/arch/x86/acpi/power.c       |  2 +-
>> >>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
>> >>  xen/arch/x86/smpboot.c          |  5 +--
>> >>  xen/include/asm-x86/processor.h |  4 +-
>> >>  4 files changed, 44 insertions(+), 57 deletions(-)
>> >> 
>> >> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
>> >> index 4f21903..24798d5 100644
>> >> --- a/xen/arch/x86/acpi/power.c
>> >> +++ b/xen/arch/x86/acpi/power.c
>> >> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
>> >>  
>> >>      console_end_sync();
>> >>  
>> >> -    microcode_resume_cpu();
>> >> +    microcode_update_one();
>> >>  
>> >>      if ( !recheck_cpu_features(0) )
>> >>          panic("Missing previously available feature(s)\n");
>> >> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
>> >> index a2febc7..bdd9c9f 100644
>> >> --- a/xen/arch/x86/microcode.c
>> >> +++ b/xen/arch/x86/microcode.c
>> >> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
>> >>      return NULL;
>> >>  }
>> >>  
>> >> -int microcode_resume_cpu(void)
>> >> -{
>> >> -    int err;
>> >> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
>> >> -
>> >> -    if ( !microcode_ops )
>> >> -        return 0;
>> >> -
>> >> -    spin_lock(&microcode_mutex);
>> >> -
>> >> -    err = microcode_ops->collect_cpu_info(sig);
>> >> -    if ( likely(!err) )
>> >> -        err = microcode_ops->apply_microcode(microcode_cache);
>> >> -    spin_unlock(&microcode_mutex);
>> >> -
>> >> -    return err;
>> >> -}
>> >> -
>> >>  void microcode_free_patch(struct microcode_patch *microcode_patch)
>> >>  {
>> >>      microcode_ops->free_patch(microcode_patch->mc);
>> >> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
>> >>  }
>> >>  __initcall(microcode_init);
>> >>  
>> >> -int __init early_microcode_update_cpu(bool start_update)
>> >> +/* Load a cached update to current cpu */
>> >> +int microcode_update_one(void)
>> >> +{
>> >> +    int rc;
>> >> +
>> >> +    if ( !microcode_ops )
>> >> +        return -EOPNOTSUPP;
>> >> +
>> >> +    rc = microcode_update_cpu(NULL);
>> >> +
>> >> +    if ( microcode_ops->end_update )
>> >> +        microcode_ops->end_update();
>> >
>> >Don't you need to call start_update before calling
>> >microcode_update_cpu?
>> 
>> No. On AMD side, osvw_status records the hardware erratum in the system.
>> As we don't assume all CPUs have the same erratum, each cpu calls
>> end_update to update osvw_status after ucode loading.
>> start_update just resets osvw_status to 0. And it is called once prior
>> to ucode loading on any CPU so that osvw_status can be recomputed.
>
>Oh, I think I understand it. start_update must only be called once
>_before_ the sequence to update the microcode on all CPUs is
>performed, while end_update needs to be called on _each_ CPU after the
>update has been completed in order to account for any erratas.
>
>The name for those hooks should be improved, I guess renaming
>end_update to end_update_each or end_update_percpu would be clearer in
>order to make it clear that start_update is global, while end_update
>is percpu. Anyway, I don't want to delay this series for a naming nit.
>
>I'm still unsure where start_update is called for the resume from
>suspension case, I don't seem to see any call to start_update neither
>in enter_state or microcode_update_one, hence I think this is missing?

No. Actually, no call of start_update for resume case.

>
>I would expect you need to clean osvw_status also on resume from
>suspension, in case microcode loading fails? Or else you will be
>carrying a stale osvw_status.

Then we need to send IPI to all other CPUs to recompute osvw_state. But
I think it is not necessary. If ucode cache isn't changed during the
CPU's suspension period, there is not stale osvw bit (assuming OSVW on
the resuming CPU won't change). If the ucode cache is updated (there
must be a late ucode loading), osvw_status should have been cleaned
before late ucode loading.

Thanks
Chao
Roger Pau Monné Aug. 29, 2019, 8:16 a.m. UTC | #5
On Thu, Aug 29, 2019 at 03:37:47PM +0800, Chao Gao wrote:
> On Fri, Aug 23, 2019 at 11:09:07AM +0200, Roger Pau Monné wrote:
> >On Fri, Aug 23, 2019 at 12:44:34AM +0800, Chao Gao wrote:
> >> On Thu, Aug 22, 2019 at 04:10:46PM +0200, Roger Pau Monné wrote:
> >> >On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
> >> >> Both are loading the cached patch. Since APs call the unified function,
> >> >> microcode_update_one(), during wakeup, the 'start_update' parameter
> >> >> which originally used to distinguish BSP and APs is redundant. So remove
> >> >> this parameter.
> >> >> 
> >> >> Signed-off-by: Chao Gao <chao.gao@intel.com>
> >> >> ---
> >> >> Note that here is a functional change: resuming a CPU would call
> >> >> ->end_update() now while previously it wasn't. Not quite sure
> >> >> whether it is correct.
> >> >
> >> >I guess that's required if it called start_update prior to calling
> >> >end_update?
> >> >
> >> >> 
> >> >> Changes in v9:
> >> >>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
> >> >>    microcode_update_one()
> >> >>  - rebase and fix conflicts.
> >> >> 
> >> >> Changes in v8:
> >> >>  - split out from the previous patch
> >> >> ---
> >> >>  xen/arch/x86/acpi/power.c       |  2 +-
> >> >>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
> >> >>  xen/arch/x86/smpboot.c          |  5 +--
> >> >>  xen/include/asm-x86/processor.h |  4 +-
> >> >>  4 files changed, 44 insertions(+), 57 deletions(-)
> >> >> 
> >> >> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
> >> >> index 4f21903..24798d5 100644
> >> >> --- a/xen/arch/x86/acpi/power.c
> >> >> +++ b/xen/arch/x86/acpi/power.c
> >> >> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
> >> >>  
> >> >>      console_end_sync();
> >> >>  
> >> >> -    microcode_resume_cpu();
> >> >> +    microcode_update_one();
> >> >>  
> >> >>      if ( !recheck_cpu_features(0) )
> >> >>          panic("Missing previously available feature(s)\n");
> >> >> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> >> >> index a2febc7..bdd9c9f 100644
> >> >> --- a/xen/arch/x86/microcode.c
> >> >> +++ b/xen/arch/x86/microcode.c
> >> >> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
> >> >>      return NULL;
> >> >>  }
> >> >>  
> >> >> -int microcode_resume_cpu(void)
> >> >> -{
> >> >> -    int err;
> >> >> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
> >> >> -
> >> >> -    if ( !microcode_ops )
> >> >> -        return 0;
> >> >> -
> >> >> -    spin_lock(&microcode_mutex);
> >> >> -
> >> >> -    err = microcode_ops->collect_cpu_info(sig);
> >> >> -    if ( likely(!err) )
> >> >> -        err = microcode_ops->apply_microcode(microcode_cache);
> >> >> -    spin_unlock(&microcode_mutex);
> >> >> -
> >> >> -    return err;
> >> >> -}
> >> >> -
> >> >>  void microcode_free_patch(struct microcode_patch *microcode_patch)
> >> >>  {
> >> >>      microcode_ops->free_patch(microcode_patch->mc);
> >> >> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
> >> >>  }
> >> >>  __initcall(microcode_init);
> >> >>  
> >> >> -int __init early_microcode_update_cpu(bool start_update)
> >> >> +/* Load a cached update to current cpu */
> >> >> +int microcode_update_one(void)
> >> >> +{
> >> >> +    int rc;
> >> >> +
> >> >> +    if ( !microcode_ops )
> >> >> +        return -EOPNOTSUPP;
> >> >> +
> >> >> +    rc = microcode_update_cpu(NULL);
> >> >> +
> >> >> +    if ( microcode_ops->end_update )
> >> >> +        microcode_ops->end_update();
> >> >
> >> >Don't you need to call start_update before calling
> >> >microcode_update_cpu?
> >> 
> >> No. On AMD side, osvw_status records the hardware erratum in the system.
> >> As we don't assume all CPUs have the same erratum, each cpu calls
> >> end_update to update osvw_status after ucode loading.
> >> start_update just resets osvw_status to 0. And it is called once prior
> >> to ucode loading on any CPU so that osvw_status can be recomputed.
> >
> >Oh, I think I understand it. start_update must only be called once
> >_before_ the sequence to update the microcode on all CPUs is
> >performed, while end_update needs to be called on _each_ CPU after the
> >update has been completed in order to account for any erratas.
> >
> >The name for those hooks should be improved, I guess renaming
> >end_update to end_update_each or end_update_percpu would be clearer in
> >order to make it clear that start_update is global, while end_update
> >is percpu. Anyway, I don't want to delay this series for a naming nit.
> >
> >I'm still unsure where start_update is called for the resume from
> >suspension case, I don't seem to see any call to start_update neither
> >in enter_state or microcode_update_one, hence I think this is missing?
> 
> No. Actually, no call of start_update for resume case.
> 
> >
> >I would expect you need to clean osvw_status also on resume from
> >suspension, in case microcode loading fails? Or else you will be
> >carrying a stale osvw_status.
> 
> Then we need to send IPI to all other CPUs to recompute osvw_state.

Why would you need to send an IPI? Aren't other CPUs going to update
the microcode, and hence call end_update?

AFAICT you only need to call start_update after returning from
suspension and before any CPU updates it's microcode. Then osvw_status
will be updated by each CPU as the microcode gets loaded?

> But
> I think it is not necessary. If ucode cache isn't changed during the
> CPU's suspension period, there is not stale osvw bit (assuming OSVW on
> the resuming CPU won't change). If the ucode cache is updated (there
> must be a late ucode loading), osvw_status should have been cleaned
> before late ucode loading.

It could be possible that an ucode that previously loaded fine throw
an error, but I agree that's quite unlikely. Anyway the fix seemed
trivial to me, but maybe I'm missing something.

Thanks, Roger.
Jan Beulich Aug. 29, 2019, 10:26 a.m. UTC | #6
On 29.08.2019 09:37, Chao Gao wrote:
> On Fri, Aug 23, 2019 at 11:09:07AM +0200, Roger Pau Monné wrote:
>> On Fri, Aug 23, 2019 at 12:44:34AM +0800, Chao Gao wrote:
>>> On Thu, Aug 22, 2019 at 04:10:46PM +0200, Roger Pau Monné wrote:
>>>> On Mon, Aug 19, 2019 at 09:25:24AM +0800, Chao Gao wrote:
>>>>> Both are loading the cached patch. Since APs call the unified function,
>>>>> microcode_update_one(), during wakeup, the 'start_update' parameter
>>>>> which originally used to distinguish BSP and APs is redundant. So remove
>>>>> this parameter.
>>>>>
>>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>>> ---
>>>>> Note that here is a functional change: resuming a CPU would call
>>>>> ->end_update() now while previously it wasn't. Not quite sure
>>>>> whether it is correct.
>>>>
>>>> I guess that's required if it called start_update prior to calling
>>>> end_update?
>>>>
>>>>>
>>>>> Changes in v9:
>>>>>  - return -EOPNOTSUPP rather than 0 if microcode_ops is NULL in
>>>>>    microcode_update_one()
>>>>>  - rebase and fix conflicts.
>>>>>
>>>>> Changes in v8:
>>>>>  - split out from the previous patch
>>>>> ---
>>>>>  xen/arch/x86/acpi/power.c       |  2 +-
>>>>>  xen/arch/x86/microcode.c        | 90 ++++++++++++++++++-----------------------
>>>>>  xen/arch/x86/smpboot.c          |  5 +--
>>>>>  xen/include/asm-x86/processor.h |  4 +-
>>>>>  4 files changed, 44 insertions(+), 57 deletions(-)
>>>>>
>>>>> diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
>>>>> index 4f21903..24798d5 100644
>>>>> --- a/xen/arch/x86/acpi/power.c
>>>>> +++ b/xen/arch/x86/acpi/power.c
>>>>> @@ -253,7 +253,7 @@ static int enter_state(u32 state)
>>>>>  
>>>>>      console_end_sync();
>>>>>  
>>>>> -    microcode_resume_cpu();
>>>>> +    microcode_update_one();
>>>>>  
>>>>>      if ( !recheck_cpu_features(0) )
>>>>>          panic("Missing previously available feature(s)\n");
>>>>> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
>>>>> index a2febc7..bdd9c9f 100644
>>>>> --- a/xen/arch/x86/microcode.c
>>>>> +++ b/xen/arch/x86/microcode.c
>>>>> @@ -203,24 +203,6 @@ static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
>>>>>      return NULL;
>>>>>  }
>>>>>  
>>>>> -int microcode_resume_cpu(void)
>>>>> -{
>>>>> -    int err;
>>>>> -    struct cpu_signature *sig = &this_cpu(cpu_sig);
>>>>> -
>>>>> -    if ( !microcode_ops )
>>>>> -        return 0;
>>>>> -
>>>>> -    spin_lock(&microcode_mutex);
>>>>> -
>>>>> -    err = microcode_ops->collect_cpu_info(sig);
>>>>> -    if ( likely(!err) )
>>>>> -        err = microcode_ops->apply_microcode(microcode_cache);
>>>>> -    spin_unlock(&microcode_mutex);
>>>>> -
>>>>> -    return err;
>>>>> -}
>>>>> -
>>>>>  void microcode_free_patch(struct microcode_patch *microcode_patch)
>>>>>  {
>>>>>      microcode_ops->free_patch(microcode_patch->mc);
>>>>> @@ -384,11 +366,29 @@ static int __init microcode_init(void)
>>>>>  }
>>>>>  __initcall(microcode_init);
>>>>>  
>>>>> -int __init early_microcode_update_cpu(bool start_update)
>>>>> +/* Load a cached update to current cpu */
>>>>> +int microcode_update_one(void)
>>>>> +{
>>>>> +    int rc;
>>>>> +
>>>>> +    if ( !microcode_ops )
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    rc = microcode_update_cpu(NULL);
>>>>> +
>>>>> +    if ( microcode_ops->end_update )
>>>>> +        microcode_ops->end_update();
>>>>
>>>> Don't you need to call start_update before calling
>>>> microcode_update_cpu?
>>>
>>> No. On AMD side, osvw_status records the hardware erratum in the system.
>>> As we don't assume all CPUs have the same erratum, each cpu calls
>>> end_update to update osvw_status after ucode loading.
>>> start_update just resets osvw_status to 0. And it is called once prior
>>> to ucode loading on any CPU so that osvw_status can be recomputed.
>>
>> Oh, I think I understand it. start_update must only be called once
>> _before_ the sequence to update the microcode on all CPUs is
>> performed, while end_update needs to be called on _each_ CPU after the
>> update has been completed in order to account for any erratas.
>>
>> The name for those hooks should be improved, I guess renaming
>> end_update to end_update_each or end_update_percpu would be clearer in
>> order to make it clear that start_update is global, while end_update
>> is percpu. Anyway, I don't want to delay this series for a naming nit.
>>
>> I'm still unsure where start_update is called for the resume from
>> suspension case, I don't seem to see any call to start_update neither
>> in enter_state or microcode_update_one, hence I think this is missing?
> 
> No. Actually, no call of start_update for resume case.
> 
>>
>> I would expect you need to clean osvw_status also on resume from
>> suspension, in case microcode loading fails? Or else you will be
>> carrying a stale osvw_status.
> 
> Then we need to send IPI to all other CPUs to recompute osvw_state. But
> I think it is not necessary. If ucode cache isn't changed during the
> CPU's suspension period, there is not stale osvw bit (assuming OSVW on
> the resuming CPU won't change). If the ucode cache is updated (there
> must be a late ucode loading), osvw_status should have been cleaned
> before late ucode loading.

I'd actually expect firmware to load whatever ucode it has available,
in which case the OSVW state can very well change across resume. I
agree though that after a successful load of the ucode Xen has
cached that state should be the pre-suspend one again. Yet I guess it
would be more consistent if a proper start-update, ucode-load, end-
update cycle was done even in this case.

Jan
Jan Beulich Aug. 29, 2019, 10:29 a.m. UTC | #7
On 19.08.2019 03:25, Chao Gao wrote:
> Both are loading the cached patch. Since APs call the unified function,
> microcode_update_one(), during wakeup, the 'start_update' parameter
> which originally used to distinguish BSP and APs is redundant. So remove
> this parameter.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
> Note that here is a functional change: resuming a CPU would call
> ->end_update() now while previously it wasn't. Not quite sure
> whether it is correct.

I think it is, as implied by the other response I've sent. But it
should then (as also said) include calling ->start_update() as well.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
index 4f21903..24798d5 100644
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -253,7 +253,7 @@  static int enter_state(u32 state)
 
     console_end_sync();
 
-    microcode_resume_cpu();
+    microcode_update_one();
 
     if ( !recheck_cpu_features(0) )
         panic("Missing previously available feature(s)\n");
diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index a2febc7..bdd9c9f 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -203,24 +203,6 @@  static struct microcode_patch *parse_blob(const char *buf, uint32_t len)
     return NULL;
 }
 
-int microcode_resume_cpu(void)
-{
-    int err;
-    struct cpu_signature *sig = &this_cpu(cpu_sig);
-
-    if ( !microcode_ops )
-        return 0;
-
-    spin_lock(&microcode_mutex);
-
-    err = microcode_ops->collect_cpu_info(sig);
-    if ( likely(!err) )
-        err = microcode_ops->apply_microcode(microcode_cache);
-    spin_unlock(&microcode_mutex);
-
-    return err;
-}
-
 void microcode_free_patch(struct microcode_patch *microcode_patch)
 {
     microcode_ops->free_patch(microcode_patch->mc);
@@ -384,11 +366,29 @@  static int __init microcode_init(void)
 }
 __initcall(microcode_init);
 
-int __init early_microcode_update_cpu(bool start_update)
+/* Load a cached update to current cpu */
+int microcode_update_one(void)
+{
+    int rc;
+
+    if ( !microcode_ops )
+        return -EOPNOTSUPP;
+
+    rc = microcode_update_cpu(NULL);
+
+    if ( microcode_ops->end_update )
+        microcode_ops->end_update();
+
+    return rc;
+}
+
+/* BSP calls this function to parse ucode blob and then apply an update. */
+int __init early_microcode_update_cpu(void)
 {
     int rc = 0;
     void *data = NULL;
     size_t len;
+    struct microcode_patch *patch;
 
     if ( !microcode_ops )
         return -ENOSYS;
@@ -409,43 +409,33 @@  int __init early_microcode_update_cpu(bool start_update)
     if ( !data )
         return -ENOMEM;
 
-    if ( start_update )
-    {
-        struct microcode_patch *patch;
-
-        if ( microcode_ops->start_update )
-            rc = microcode_ops->start_update();
-
-        if ( rc )
-            return rc;
-
-        patch = parse_blob(data, len);
-        if ( IS_ERR(patch) )
-        {
-            printk(XENLOG_INFO "Parsing microcode blob error %ld\n",
-                   PTR_ERR(patch));
-            return PTR_ERR(patch);
-        }
+    if ( microcode_ops->start_update )
+        rc = microcode_ops->start_update();
 
-        if ( !patch )
-        {
-            printk(XENLOG_INFO "No ucode found. Update aborted!\n");
-            return -EINVAL;
-        }
+    if ( rc )
+        return rc;
 
-        spin_lock(&microcode_mutex);
-        rc = microcode_update_cache(patch);
-        spin_unlock(&microcode_mutex);
+    patch = parse_blob(data, len);
+    if ( IS_ERR(patch) )
+    {
+        printk(XENLOG_INFO "Parsing microcode blob error %ld\n",
+               PTR_ERR(patch));
+        return PTR_ERR(patch);
+    }
 
-        ASSERT(rc);
+    if ( !patch )
+    {
+        printk(XENLOG_INFO "No ucode found. Update aborted!\n");
+        return -EINVAL;
     }
 
-    rc = microcode_update_cpu(NULL);
+    spin_lock(&microcode_mutex);
+    rc = microcode_update_cache(patch);
+    spin_unlock(&microcode_mutex);
 
-    if ( microcode_ops->end_update )
-        microcode_ops->end_update();
+    ASSERT(rc);
 
-    return rc;
+    return microcode_update_one();
 }
 
 int __init early_microcode_init(void)
@@ -465,7 +455,7 @@  int __init early_microcode_init(void)
         microcode_ops->collect_cpu_info(&this_cpu(cpu_sig));
 
         if ( ucode_mod.mod_end || ucode_blob.size )
-            rc = early_microcode_update_cpu(true);
+            rc = early_microcode_update_cpu();
     }
 
     return rc;
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index c818cfc..e62a1ca 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -361,10 +361,7 @@  void start_secondary(void *unused)
 
     initialize_cpu_data(cpu);
 
-    if ( system_state <= SYS_STATE_smp_boot )
-        early_microcode_update_cpu(false);
-    else
-        microcode_resume_cpu();
+    microcode_update_one();
 
     /*
      * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index 104faa9..2a76d90 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -568,9 +568,9 @@  int guest_wrmsr_xen(struct vcpu *v, uint32_t idx, uint64_t val);
 
 void microcode_set_module(unsigned int);
 int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void), unsigned long len);
-int microcode_resume_cpu(void);
-int early_microcode_update_cpu(bool start_update);
+int early_microcode_update_cpu(void);
 int early_microcode_init(void);
+int microcode_update_one(void);
 int microcode_init_intel(void);
 int microcode_init_amd(void);