diff mbox series

[v2,1/6] spapr/xive: Introduce a StoreEOI capability

Message ID 20201005165147.526426-2-clg@kaod.org
State New, archived
Headers show
Series spapr/xive: Activate StoreEOI in P10 compat guests | expand

Commit Message

Cédric Le Goater Oct. 5, 2020, 4:51 p.m. UTC
When an interrupt has been handled, the OS notifies the interrupt
controller with an EOI sequence. On the XIVE interrupt controller
(POWER9 and POWER10), this can be done with a load or a store
operation on the ESB interrupt management page of the interrupt. The
StoreEOI operation has less latency and improves interrupt handling
performance but it was deactivated during the POWER9 DD2.0 time-frame
because of ordering issues. POWER9 systems use the LoadEOI instead.
POWER10 has fixed the issue with a special load command which enforces
Load-after-Store ordering and StoreEOI can be safely used.

The new StoreEOI capability adds StoreEOI support to the flags
returned by the hcall H_INT_GET_SOURCE_INFO. When the machine is using
an emulated interrupt controller, TCG or without kernel IRQ chip,
there are no limitations and activating StoreEOI is not an issue.
However, when running with a kernel IRQ chip, some verification needs
to be done on the host. This is done through the DT, which tells us
that firmware has configured the HW for StoreEOI, but a new KVM
capability would be cleaner.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr.h |  4 +++-
 target/ppc/kvm_ppc.h   |  6 ++++++
 hw/ppc/spapr.c         |  1 +
 hw/ppc/spapr_caps.c    | 30 ++++++++++++++++++++++++++++++
 target/ppc/kvm.c       | 18 ++++++++++++++++++
 5 files changed, 58 insertions(+), 1 deletion(-)

Comments

Greg Kurz Oct. 6, 2020, 4:42 p.m. UTC | #1
On Mon, 5 Oct 2020 18:51:42 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> When an interrupt has been handled, the OS notifies the interrupt
> controller with an EOI sequence. On the XIVE interrupt controller
> (POWER9 and POWER10), this can be done with a load or a store
> operation on the ESB interrupt management page of the interrupt. The
> StoreEOI operation has less latency and improves interrupt handling
> performance but it was deactivated during the POWER9 DD2.0 time-frame
> because of ordering issues. POWER9 systems use the LoadEOI instead.
> POWER10 has fixed the issue with a special load command which enforces
> Load-after-Store ordering and StoreEOI can be safely used.
> 
> The new StoreEOI capability adds StoreEOI support to the flags
> returned by the hcall H_INT_GET_SOURCE_INFO. When the machine is using
> an emulated interrupt controller, TCG or without kernel IRQ chip,
> there are no limitations and activating StoreEOI is not an issue.
> However, when running with a kernel IRQ chip, some verification needs
> to be done on the host. This is done through the DT, which tells us
> that firmware has configured the HW for StoreEOI, but a new KVM
> capability would be cleaner.
> 

Cleaner and even required... a user could possibly run an older
KVM that doesn't know about StoreEOI on a POWER10 host and QEMU
would wrongly assume the feature is supported. Also, I guess this
should rather be an attribute of the XIVE KVM device rather than a
plain KVM property.

> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr.h |  4 +++-
>  target/ppc/kvm_ppc.h   |  6 ++++++
>  hw/ppc/spapr.c         |  1 +
>  hw/ppc/spapr_caps.c    | 30 ++++++++++++++++++++++++++++++
>  target/ppc/kvm.c       | 18 ++++++++++++++++++
>  5 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index bba8736269f4..b701c14b4e09 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -74,8 +74,10 @@ typedef enum {
>  #define SPAPR_CAP_CCF_ASSIST            0x09
>  /* Implements PAPR FWNMI option */
>  #define SPAPR_CAP_FWNMI                 0x0A
> +/* Implements XIVE StoreEOI feature */
> +#define SPAPR_CAP_STOREEOI              0x0B

The name should mention XIVE, ie. SPAPR_CAP_XIVE_STOREEOI

>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_STOREEOI + 1)
>  
>  /*
>   * Capability Values
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 72e05f1cd2fc..c5a487dbba13 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -64,6 +64,7 @@ bool kvmppc_has_cap_htm(void);
>  bool kvmppc_has_cap_mmu_radix(void);
>  bool kvmppc_has_cap_mmu_hash_v3(void);
>  bool kvmppc_has_cap_xive(void);
> +bool kvmppc_has_cap_xive_storeeoi(void);
>  int kvmppc_get_cap_safe_cache(void);
>  int kvmppc_get_cap_safe_bounds_check(void);
>  int kvmppc_get_cap_safe_indirect_branch(void);
> @@ -346,6 +347,11 @@ static inline bool kvmppc_has_cap_xive(void)
>      return false;
>  }
>  
> +static inline bool kvmppc_has_cap_xive_storeeoi(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_get_cap_safe_cache(void)
>  {
>      return 0;
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 4256794f3bed..e83de0580142 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> +    smc->default_caps.caps[SPAPR_CAP_STOREEOI] = SPAPR_CAP_OFF;
>      spapr_caps_add_properties(smc);
>      smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 9341e9782a3f..57c62c22e4cc 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -524,6 +524,26 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  }
>  
> +static void cap_storeeoi_apply(SpaprMachineState *spapr, uint8_t val,
> +                               Error **errp)
> +{
> +    ERRP_GUARD();

From "qapi/error.h":

 * = Why, when and how to use ERRP_GUARD() =
 *
 * Without ERRP_GUARD(), use of the @errp parameter is restricted:
 * - It must not be dereferenced, because it may be null.
 * - It should not be passed to error_prepend() or
 *   error_append_hint(), because that doesn't work with &error_fatal.
 * ERRP_GUARD() lifts these restrictions.
 *
 * To use ERRP_GUARD(), add it right at the beginning of the function.
 * @errp can then be used without worrying about the argument being
 * NULL or &error_fatal.
 *
 * Using it when it's not needed is safe, but please avoid cluttering
 * the source with useless code.
 *

So for this ERRP_GUARD() to be justified, you should come up with
a hint, otherwise you should drop it.

> +    MachineState *machine = MACHINE(spapr);
> +    bool kvm_storeeoi = kvmppc_has_cap_xive_storeeoi();
> +
> +    if (!val) {
> +        return; /* Disabled by default */
> +    }
> +
> +    /* Check host support when the KVM device is in use */
> +    if (kvm_irqchip_in_kernel()) {

Hmm... checking kvm_irqchip_in_kernel() is imprecise because
it returns true if either the XIVE or XICS KVM device has
been open, regardless of the flavor we're going to use. This
really depends on the ic-mode setting:

1) xics: we really don't care whether StoreEOI is available or not.
   This is very similar to the case of POWER8 in patch 2. Spit a
   warning and return.

2) xive: at this point the XIVE KVM device is open and we can check
   the availability of StoreEOI with kvm_device_check_attr().

3) dual: this one is problematic because at this point the XICS KVM
   device is open but XIVE KVM won't be open until CAS.

So I think we can only do something sensible for cases 1) and 2),
eg:

    if (!spapr->irq->xive) {
        warn_report(...);
        return;
    }

    if (spapr_xive_in_kernel(spapr->xive)) {
        !kvm_device_check_attr(spapr->xive->fd, ...) {
        error_setg(errp, "StoreEOI not supported by XIVE KVM");
        return;
    }

Case 3) requires a similar check in CAS if the guest asked for XIVE
and cap-xive-storeeoi=on.

> +        if (!kvm_storeeoi) {
> +            error_setg(errp, "StoreEOI not supported by KVM");
> +            return;
> +        }
> +    }
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -632,6 +652,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_fwnmi_apply,
>      },
> +    [SPAPR_CAP_STOREEOI] = {
> +        .name = "storeeoi",
> +        .description = "Implements XIVE StoreEOI feature",
> +        .index = SPAPR_CAP_STOREEOI,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_storeeoi_apply,
> +    },
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -772,6 +801,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
> +SPAPR_CAP_MIG_STATE(storeeoi, SPAPR_CAP_STOREEOI);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index d85ba8ffe00b..9ad637151070 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -2448,6 +2448,24 @@ bool kvmppc_has_cap_xive(void)
>      return cap_xive;
>  }
>  
> +/*
> + * TODO: Introduce a new KVM capability
> + */

Is there anything that prevents to add such a capability
or KVM device attribute before modifying QEMU ?

> +bool kvmppc_has_cap_xive_storeeoi(void)
> +{
> +    static const char *compat = "ibm,opal-xive-pe";
> +    void *host_fdt;
> +    int xive_node;
> +
> +    host_fdt = load_device_tree_from_sysfs();
> +    xive_node = fdt_node_offset_by_compatible(host_fdt, -1, compat);
> +    if (xive_node < 0) {
> +        return false;
> +    }
> +
> +    return !!fdt_getprop(host_fdt, xive_node, "store-eoi-support", NULL);
> +}
> +
>  static void kvmppc_get_cpu_characteristics(KVMState *s)
>  {
>      struct kvm_ppc_cpu_char c;
Cédric Le Goater Oct. 7, 2020, 5:59 a.m. UTC | #2
On 10/6/20 6:42 PM, Greg Kurz wrote:
> On Mon, 5 Oct 2020 18:51:42 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
>> When an interrupt has been handled, the OS notifies the interrupt
>> controller with an EOI sequence. On the XIVE interrupt controller
>> (POWER9 and POWER10), this can be done with a load or a store
>> operation on the ESB interrupt management page of the interrupt. The
>> StoreEOI operation has less latency and improves interrupt handling
>> performance but it was deactivated during the POWER9 DD2.0 time-frame
>> because of ordering issues. POWER9 systems use the LoadEOI instead.
>> POWER10 has fixed the issue with a special load command which enforces
>> Load-after-Store ordering and StoreEOI can be safely used.
>>
>> The new StoreEOI capability adds StoreEOI support to the flags
>> returned by the hcall H_INT_GET_SOURCE_INFO. When the machine is using
>> an emulated interrupt controller, TCG or without kernel IRQ chip,
>> there are no limitations and activating StoreEOI is not an issue.
>> However, when running with a kernel IRQ chip, some verification needs
>> to be done on the host. This is done through the DT, which tells us
>> that firmware has configured the HW for StoreEOI, but a new KVM
>> capability would be cleaner.
>>
> 
> Cleaner and even required... a user could possibly run an older
> KVM that doesn't know about StoreEOI on a POWER10 host and QEMU
> would wrongly assume the feature is supported. 

Well, no, because the ESB pages of the interrupts being passthrough 
in the guest, it should be safe to use StoreEOI in a guest even if 
the host kernel is not aware of it. As long as HW is correctly 
configured by FW of course, which is what the DT property says.

I agree it's a bit of shortcut but it works. 

> Also, I guess this
> should rather be an attribute of the XIVE KVM device rather than a
> plain KVM property.

It should even be a source attribute in theory. Since QEMU does not, 
an attribute of the XIVE KVM device is fine but we would just be
looking at the device tree from KVM. So may be, it's easier to make
it global to VM without relying on the XIVE device.

>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/spapr.h |  4 +++-
>>  target/ppc/kvm_ppc.h   |  6 ++++++
>>  hw/ppc/spapr.c         |  1 +
>>  hw/ppc/spapr_caps.c    | 30 ++++++++++++++++++++++++++++++
>>  target/ppc/kvm.c       | 18 ++++++++++++++++++
>>  5 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index bba8736269f4..b701c14b4e09 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -74,8 +74,10 @@ typedef enum {
>>  #define SPAPR_CAP_CCF_ASSIST            0x09
>>  /* Implements PAPR FWNMI option */
>>  #define SPAPR_CAP_FWNMI                 0x0A
>> +/* Implements XIVE StoreEOI feature */
>> +#define SPAPR_CAP_STOREEOI              0x0B
> 
> The name should mention XIVE, ie. SPAPR_CAP_XIVE_STOREEOI

ok.

> 
>>  /* Num Caps */
>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI + 1)
>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_STOREEOI + 1)
>>  
>>  /*
>>   * Capability Values
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 72e05f1cd2fc..c5a487dbba13 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -64,6 +64,7 @@ bool kvmppc_has_cap_htm(void);
>>  bool kvmppc_has_cap_mmu_radix(void);
>>  bool kvmppc_has_cap_mmu_hash_v3(void);
>>  bool kvmppc_has_cap_xive(void);
>> +bool kvmppc_has_cap_xive_storeeoi(void);
>>  int kvmppc_get_cap_safe_cache(void);
>>  int kvmppc_get_cap_safe_bounds_check(void);
>>  int kvmppc_get_cap_safe_indirect_branch(void);
>> @@ -346,6 +347,11 @@ static inline bool kvmppc_has_cap_xive(void)
>>      return false;
>>  }
>>  
>> +static inline bool kvmppc_has_cap_xive_storeeoi(void)
>> +{
>> +    return false;
>> +}
>> +
>>  static inline int kvmppc_get_cap_safe_cache(void)
>>  {
>>      return 0;
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 4256794f3bed..e83de0580142 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
>> +    smc->default_caps.caps[SPAPR_CAP_STOREEOI] = SPAPR_CAP_OFF;
>>      spapr_caps_add_properties(smc);
>>      smc->irq = &spapr_irq_dual;
>>      smc->dr_phb_enabled = true;
>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>> index 9341e9782a3f..57c62c22e4cc 100644
>> --- a/hw/ppc/spapr_caps.c
>> +++ b/hw/ppc/spapr_caps.c
>> @@ -524,6 +524,26 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
>>      }
>>  }
>>  
>> +static void cap_storeeoi_apply(SpaprMachineState *spapr, uint8_t val,
>> +                               Error **errp)
>> +{
>> +    ERRP_GUARD();
> 
> From "qapi/error.h":
> 
>  * = Why, when and how to use ERRP_GUARD() =
>  *
>  * Without ERRP_GUARD(), use of the @errp parameter is restricted:
>  * - It must not be dereferenced, because it may be null.
>  * - It should not be passed to error_prepend() or
>  *   error_append_hint(), because that doesn't work with &error_fatal.
>  * ERRP_GUARD() lifts these restrictions.
>  *
>  * To use ERRP_GUARD(), add it right at the beginning of the function.
>  * @errp can then be used without worrying about the argument being
>  * NULL or &error_fatal.
>  *
>  * Using it when it's not needed is safe, but please avoid cluttering
>  * the source with useless code.
>  *
> 
> So for this ERRP_GUARD() to be justified, you should come up with
> a hint, otherwise you should drop it.

OK. 

>> +    MachineState *machine = MACHINE(spapr);
>> +    bool kvm_storeeoi = kvmppc_has_cap_xive_storeeoi();
>> +
>> +    if (!val) {
>> +        return; /* Disabled by default */
>> +    }
>> +
>> +    /* Check host support when the KVM device is in use */
>> +    if (kvm_irqchip_in_kernel()) {
> 
> Hmm... checking kvm_irqchip_in_kernel() is imprecise because
> it returns true if either the XIVE or XICS KVM device has
> been open, regardless of the flavor we're going to use. This
> really depends on the ic-mode setting:
True.
> 1) xics: we really don't care whether StoreEOI is available or not.
>    This is very similar to the case of POWER8 in patch 2. Spit a
>    warning and return.

yes

> 2) xive: at this point the XIVE KVM device is open and we can check
>    the availability of StoreEOI with kvm_device_check_attr().

yes and similar to kvmppc_has_cap_xive_storeeoi() in a sense. 

> 3) dual: this one is problematic because at this point the XICS KVM
>    device is open but XIVE KVM won't be open until CAS.
> 
> So I think we can only do something sensible for cases 1) and 2),
> eg:
> 
>     if (!spapr->irq->xive) {
>         warn_report(...);
>         return;
>     }
> 
>     if (spapr_xive_in_kernel(spapr->xive)) {
>         !kvm_device_check_attr(spapr->xive->fd, ...) {
>         error_setg(errp, "StoreEOI not supported by XIVE KVM");
>         return;
>     }
> 
> Case 3) requires a similar check in CAS if the guest asked for XIVE
> and cap-xive-storeeoi=on.

Yes. dual is more complex because we could be using the XIVE emulated
device which does not have limitations on P9. I need to take a closer 
look.


>> +        if (!kvm_storeeoi) {
>> +            error_setg(errp, "StoreEOI not supported by KVM");
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>      [SPAPR_CAP_HTM] = {
>>          .name = "htm",
>> @@ -632,6 +652,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>          .type = "bool",
>>          .apply = cap_fwnmi_apply,
>>      },
>> +    [SPAPR_CAP_STOREEOI] = {
>> +        .name = "storeeoi",
>> +        .description = "Implements XIVE StoreEOI feature",
>> +        .index = SPAPR_CAP_STOREEOI,
>> +        .get = spapr_cap_get_bool,
>> +        .set = spapr_cap_set_bool,
>> +        .type = "bool",
>> +        .apply = cap_storeeoi_apply,
>> +    },
>>  };
>>  
>>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>> @@ -772,6 +801,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>> +SPAPR_CAP_MIG_STATE(storeeoi, SPAPR_CAP_STOREEOI);
>>  
>>  void spapr_caps_init(SpaprMachineState *spapr)
>>  {
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index d85ba8ffe00b..9ad637151070 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -2448,6 +2448,24 @@ bool kvmppc_has_cap_xive(void)
>>      return cap_xive;
>>  }
>>  
>> +/*
>> + * TODO: Introduce a new KVM capability
>> + */
> 
> Is there anything that prevents to add such a capability
> or KVM device attribute before modifying QEMU ?

no. I was just lazy as the device tree check is good enough.

Thanks for the review,

C. 


>> +bool kvmppc_has_cap_xive_storeeoi(void)
>> +{
>> +    static const char *compat = "ibm,opal-xive-pe";
>> +    void *host_fdt;
>> +    int xive_node;
>> +
>> +    host_fdt = load_device_tree_from_sysfs();
>> +    xive_node = fdt_node_offset_by_compatible(host_fdt, -1, compat);
>> +    if (xive_node < 0) {
>> +        return false;
>> +    }
>> +
>> +    return !!fdt_getprop(host_fdt, xive_node, "store-eoi-support", NULL);
>> +}
>> +
>>  static void kvmppc_get_cpu_characteristics(KVMState *s)
>>  {
>>      struct kvm_ppc_cpu_char c;
>
Greg Kurz Oct. 7, 2020, 7:24 a.m. UTC | #3
On Wed, 7 Oct 2020 07:59:26 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> 
> 
> On 10/6/20 6:42 PM, Greg Kurz wrote:
> > On Mon, 5 Oct 2020 18:51:42 +0200
> > Cédric Le Goater <clg@kaod.org> wrote:
> > 
> >> When an interrupt has been handled, the OS notifies the interrupt
> >> controller with an EOI sequence. On the XIVE interrupt controller
> >> (POWER9 and POWER10), this can be done with a load or a store
> >> operation on the ESB interrupt management page of the interrupt. The
> >> StoreEOI operation has less latency and improves interrupt handling
> >> performance but it was deactivated during the POWER9 DD2.0 time-frame
> >> because of ordering issues. POWER9 systems use the LoadEOI instead.
> >> POWER10 has fixed the issue with a special load command which enforces
> >> Load-after-Store ordering and StoreEOI can be safely used.
> >>
> >> The new StoreEOI capability adds StoreEOI support to the flags
> >> returned by the hcall H_INT_GET_SOURCE_INFO. When the machine is using
> >> an emulated interrupt controller, TCG or without kernel IRQ chip,
> >> there are no limitations and activating StoreEOI is not an issue.
> >> However, when running with a kernel IRQ chip, some verification needs
> >> to be done on the host. This is done through the DT, which tells us
> >> that firmware has configured the HW for StoreEOI, but a new KVM
> >> capability would be cleaner.
> >>
> > 
> > Cleaner and even required... a user could possibly run an older
> > KVM that doesn't know about StoreEOI on a POWER10 host and QEMU
> > would wrongly assume the feature is supported. 
> 
> Well, no, because the ESB pages of the interrupts being passthrough 
> in the guest, it should be safe to use StoreEOI in a guest even if 
> the host kernel is not aware of it. As long as HW is correctly 
> configured by FW of course, which is what the DT property says.
> 
> I agree it's a bit of shortcut but it works. 
> 

Ok this makes sense. I suggest you write this down in a comment in
kvmppc_has_cap_xive_storeeoi() then because it is only _obvious_ to
the very few people that know about XIVE internals.

> > Also, I guess this
> > should rather be an attribute of the XIVE KVM device rather than a
> > plain KVM property.
> 
> It should even be a source attribute in theory. Since QEMU does not, 
> an attribute of the XIVE KVM device is fine but we would just be
> looking at the device tree from KVM. So may be, it's easier to make
> it global to VM without relying on the XIVE device.
> 

Well if KVM doesn't explicitly exploit StoreEOI, not sure this
would bring much to expose this through a KVM interface at all
in the end... 

> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/spapr.h |  4 +++-
> >>  target/ppc/kvm_ppc.h   |  6 ++++++
> >>  hw/ppc/spapr.c         |  1 +
> >>  hw/ppc/spapr_caps.c    | 30 ++++++++++++++++++++++++++++++
> >>  target/ppc/kvm.c       | 18 ++++++++++++++++++
> >>  5 files changed, 58 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index bba8736269f4..b701c14b4e09 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -74,8 +74,10 @@ typedef enum {
> >>  #define SPAPR_CAP_CCF_ASSIST            0x09
> >>  /* Implements PAPR FWNMI option */
> >>  #define SPAPR_CAP_FWNMI                 0x0A
> >> +/* Implements XIVE StoreEOI feature */
> >> +#define SPAPR_CAP_STOREEOI              0x0B
> > 
> > The name should mention XIVE, ie. SPAPR_CAP_XIVE_STOREEOI
> 
> ok.
> 
> > 
> >>  /* Num Caps */
> >> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI + 1)
> >> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_STOREEOI + 1)
> >>  
> >>  /*
> >>   * Capability Values
> >> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> >> index 72e05f1cd2fc..c5a487dbba13 100644
> >> --- a/target/ppc/kvm_ppc.h
> >> +++ b/target/ppc/kvm_ppc.h
> >> @@ -64,6 +64,7 @@ bool kvmppc_has_cap_htm(void);
> >>  bool kvmppc_has_cap_mmu_radix(void);
> >>  bool kvmppc_has_cap_mmu_hash_v3(void);
> >>  bool kvmppc_has_cap_xive(void);
> >> +bool kvmppc_has_cap_xive_storeeoi(void);
> >>  int kvmppc_get_cap_safe_cache(void);
> >>  int kvmppc_get_cap_safe_bounds_check(void);
> >>  int kvmppc_get_cap_safe_indirect_branch(void);
> >> @@ -346,6 +347,11 @@ static inline bool kvmppc_has_cap_xive(void)
> >>      return false;
> >>  }
> >>  
> >> +static inline bool kvmppc_has_cap_xive_storeeoi(void)
> >> +{
> >> +    return false;
> >> +}
> >> +
> >>  static inline int kvmppc_get_cap_safe_cache(void)
> >>  {
> >>      return 0;
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 4256794f3bed..e83de0580142 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -4447,6 +4447,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> >> +    smc->default_caps.caps[SPAPR_CAP_STOREEOI] = SPAPR_CAP_OFF;
> >>      spapr_caps_add_properties(smc);
> >>      smc->irq = &spapr_irq_dual;
> >>      smc->dr_phb_enabled = true;
> >> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >> index 9341e9782a3f..57c62c22e4cc 100644
> >> --- a/hw/ppc/spapr_caps.c
> >> +++ b/hw/ppc/spapr_caps.c
> >> @@ -524,6 +524,26 @@ static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
> >>      }
> >>  }
> >>  
> >> +static void cap_storeeoi_apply(SpaprMachineState *spapr, uint8_t val,
> >> +                               Error **errp)
> >> +{
> >> +    ERRP_GUARD();
> > 
> > From "qapi/error.h":
> > 
> >  * = Why, when and how to use ERRP_GUARD() =
> >  *
> >  * Without ERRP_GUARD(), use of the @errp parameter is restricted:
> >  * - It must not be dereferenced, because it may be null.
> >  * - It should not be passed to error_prepend() or
> >  *   error_append_hint(), because that doesn't work with &error_fatal.
> >  * ERRP_GUARD() lifts these restrictions.
> >  *
> >  * To use ERRP_GUARD(), add it right at the beginning of the function.
> >  * @errp can then be used without worrying about the argument being
> >  * NULL or &error_fatal.
> >  *
> >  * Using it when it's not needed is safe, but please avoid cluttering
> >  * the source with useless code.
> >  *
> > 
> > So for this ERRP_GUARD() to be justified, you should come up with
> > a hint, otherwise you should drop it.
> 
> OK. 
> 
> >> +    MachineState *machine = MACHINE(spapr);
> >> +    bool kvm_storeeoi = kvmppc_has_cap_xive_storeeoi();
> >> +
> >> +    if (!val) {
> >> +        return; /* Disabled by default */
> >> +    }
> >> +
> >> +    /* Check host support when the KVM device is in use */
> >> +    if (kvm_irqchip_in_kernel()) {
> > 
> > Hmm... checking kvm_irqchip_in_kernel() is imprecise because
> > it returns true if either the XIVE or XICS KVM device has
> > been open, regardless of the flavor we're going to use. This
> > really depends on the ic-mode setting:
> True.
> > 1) xics: we really don't care whether StoreEOI is available or not.
> >    This is very similar to the case of POWER8 in patch 2. Spit a
> >    warning and return.
> 
> yes
> 
> > 2) xive: at this point the XIVE KVM device is open and we can check
> >    the availability of StoreEOI with kvm_device_check_attr().
> 
> yes and similar to kvmppc_has_cap_xive_storeeoi() in a sense. 
> 
> > 3) dual: this one is problematic because at this point the XICS KVM
> >    device is open but XIVE KVM won't be open until CAS.
> > 
> > So I think we can only do something sensible for cases 1) and 2),
> > eg:
> > 
> >     if (!spapr->irq->xive) {
> >         warn_report(...);
> >         return;
> >     }
> > 
> >     if (spapr_xive_in_kernel(spapr->xive)) {
> >         !kvm_device_check_attr(spapr->xive->fd, ...) {
> >         error_setg(errp, "StoreEOI not supported by XIVE KVM");
> >         return;
> >     }
> > 
> > Case 3) requires a similar check in CAS if the guest asked for XIVE
> > and cap-xive-storeeoi=on.
> 
> Yes. dual is more complex because we could be using the XIVE emulated
> device which does not have limitations on P9. I need to take a closer 
> look.
> 

Yeah, we should take kernel-irqchip into account as well...

> 
> >> +        if (!kvm_storeeoi) {
> >> +            error_setg(errp, "StoreEOI not supported by KVM");
> >> +            return;
> >> +        }
> >> +    }
> >> +}
> >> +
> >>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >>      [SPAPR_CAP_HTM] = {
> >>          .name = "htm",
> >> @@ -632,6 +652,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >>          .type = "bool",
> >>          .apply = cap_fwnmi_apply,
> >>      },
> >> +    [SPAPR_CAP_STOREEOI] = {
> >> +        .name = "storeeoi",
> >> +        .description = "Implements XIVE StoreEOI feature",
> >> +        .index = SPAPR_CAP_STOREEOI,
> >> +        .get = spapr_cap_get_bool,
> >> +        .set = spapr_cap_set_bool,
> >> +        .type = "bool",
> >> +        .apply = cap_storeeoi_apply,
> >> +    },
> >>  };
> >>  
> >>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> >> @@ -772,6 +801,7 @@ SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> >>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> >>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
> >> +SPAPR_CAP_MIG_STATE(storeeoi, SPAPR_CAP_STOREEOI);
> >>  
> >>  void spapr_caps_init(SpaprMachineState *spapr)
> >>  {
> >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >> index d85ba8ffe00b..9ad637151070 100644
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -2448,6 +2448,24 @@ bool kvmppc_has_cap_xive(void)
> >>      return cap_xive;
> >>  }
> >>  
> >> +/*
> >> + * TODO: Introduce a new KVM capability
> >> + */
> > 
> > Is there anything that prevents to add such a capability
> > or KVM device attribute before modifying QEMU ?
> 
> no. I was just lazy as the device tree check is good enough.
> 
> Thanks for the review,
> 
> C. 
> 
> 
> >> +bool kvmppc_has_cap_xive_storeeoi(void)
> >> +{
> >> +    static const char *compat = "ibm,opal-xive-pe";
> >> +    void *host_fdt;
> >> +    int xive_node;
> >> +
> >> +    host_fdt = load_device_tree_from_sysfs();
> >> +    xive_node = fdt_node_offset_by_compatible(host_fdt, -1, compat);
> >> +    if (xive_node < 0) {
> >> +        return false;
> >> +    }
> >> +
> >> +    return !!fdt_getprop(host_fdt, xive_node, "store-eoi-support", NULL);
> >> +}
> >> +
> >>  static void kvmppc_get_cpu_characteristics(KVMState *s)
> >>  {
> >>      struct kvm_ppc_cpu_char c;
> >
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bba8736269f4..b701c14b4e09 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -74,8 +74,10 @@  typedef enum {
 #define SPAPR_CAP_CCF_ASSIST            0x09
 /* Implements PAPR FWNMI option */
 #define SPAPR_CAP_FWNMI                 0x0A
+/* Implements XIVE StoreEOI feature */
+#define SPAPR_CAP_STOREEOI              0x0B
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_STOREEOI + 1)
 
 /*
  * Capability Values
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 72e05f1cd2fc..c5a487dbba13 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -64,6 +64,7 @@  bool kvmppc_has_cap_htm(void);
 bool kvmppc_has_cap_mmu_radix(void);
 bool kvmppc_has_cap_mmu_hash_v3(void);
 bool kvmppc_has_cap_xive(void);
+bool kvmppc_has_cap_xive_storeeoi(void);
 int kvmppc_get_cap_safe_cache(void);
 int kvmppc_get_cap_safe_bounds_check(void);
 int kvmppc_get_cap_safe_indirect_branch(void);
@@ -346,6 +347,11 @@  static inline bool kvmppc_has_cap_xive(void)
     return false;
 }
 
+static inline bool kvmppc_has_cap_xive_storeeoi(void)
+{
+    return false;
+}
+
 static inline int kvmppc_get_cap_safe_cache(void)
 {
     return 0;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4256794f3bed..e83de0580142 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4447,6 +4447,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_STOREEOI] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 9341e9782a3f..57c62c22e4cc 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -524,6 +524,26 @@  static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
     }
 }
 
+static void cap_storeeoi_apply(SpaprMachineState *spapr, uint8_t val,
+                               Error **errp)
+{
+    ERRP_GUARD();
+    MachineState *machine = MACHINE(spapr);
+    bool kvm_storeeoi = kvmppc_has_cap_xive_storeeoi();
+
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    /* Check host support when the KVM device is in use */
+    if (kvm_irqchip_in_kernel()) {
+        if (!kvm_storeeoi) {
+            error_setg(errp, "StoreEOI not supported by KVM");
+            return;
+        }
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -632,6 +652,15 @@  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_fwnmi_apply,
     },
+    [SPAPR_CAP_STOREEOI] = {
+        .name = "storeeoi",
+        .description = "Implements XIVE StoreEOI feature",
+        .index = SPAPR_CAP_STOREEOI,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_storeeoi_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -772,6 +801,7 @@  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(storeeoi, SPAPR_CAP_STOREEOI);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index d85ba8ffe00b..9ad637151070 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2448,6 +2448,24 @@  bool kvmppc_has_cap_xive(void)
     return cap_xive;
 }
 
+/*
+ * TODO: Introduce a new KVM capability
+ */
+bool kvmppc_has_cap_xive_storeeoi(void)
+{
+    static const char *compat = "ibm,opal-xive-pe";
+    void *host_fdt;
+    int xive_node;
+
+    host_fdt = load_device_tree_from_sysfs();
+    xive_node = fdt_node_offset_by_compatible(host_fdt, -1, compat);
+    if (xive_node < 0) {
+        return false;
+    }
+
+    return !!fdt_getprop(host_fdt, xive_node, "store-eoi-support", NULL);
+}
+
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
     struct kvm_ppc_cpu_char c;