diff mbox series

[v4,17/19] spapr: Remove last pieces of SpaprIrq

Message ID 20191009060818.29719-18-david@gibson.dropbear.id.au (mailing list archive)
State New, archived
Headers show
Series spapr: IRQ subsystem cleanup | expand

Commit Message

David Gibson Oct. 9, 2019, 6:08 a.m. UTC
The only thing remaining in this structure are the flags to allow either
XICS or XIVE to be present.  These actually make more sense as spapr
capabilities - that way they can take advantage of the existing
infrastructure to sanity check capability states across migration and so
forth.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr.c             | 40 ++++++++++--------
 hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
 hw/ppc/spapr_hcall.c       |  7 ++--
 hw/ppc/spapr_irq.c         | 84 ++------------------------------------
 include/hw/ppc/spapr.h     | 10 +++--
 include/hw/ppc/spapr_irq.h | 10 -----
 6 files changed, 103 insertions(+), 112 deletions(-)

Comments

Cédric Le Goater Oct. 9, 2019, 4:44 p.m. UTC | #1
On 09/10/2019 08:08, David Gibson wrote:
> The only thing remaining in this structure are the flags to allow either
> XICS or XIVE to be present.  These actually make more sense as spapr
> capabilities - that way they can take advantage of the existing
> infrastructure to sanity check capability states across migration and so
> forth.
> 
> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

Nice. That is what you had in mind :) Let's make sure we use your
cap framework for the next major change, power10. ic-mode should
be deprecated one day I suppose.


Reviewed-by: Cédric Le Goater <clg@kaod.org>

C.


> ---
>  hw/ppc/spapr.c             | 40 ++++++++++--------
>  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
>  hw/ppc/spapr_hcall.c       |  7 ++--
>  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
>  include/hw/ppc/spapr.h     | 10 +++--
>  include/hw/ppc/spapr_irq.h | 10 -----
>  6 files changed, 103 insertions(+), 112 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e1ff03152e..bf9fdb1693 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
>          26, 0x40, /* Radix options: GTSE == yes. */
>      };
>  
> -    if (spapr->irq->xics && spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          val[1] = SPAPR_OV5_XIVE_BOTH;
> -    } else if (spapr->irq->xive) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
>      } else {
> -        assert(spapr->irq->xics);
> +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
>          val[1] = SPAPR_OV5_XIVE_LEGACY;
>      }
>  
> @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_dtb,
>          &vmstate_spapr_cap_large_decr,
>          &vmstate_spapr_cap_ccf_assist,
> +        &vmstate_spapr_cap_xics,
> +        &vmstate_spapr_cap_xive,
>          NULL
>      }
>  };
> @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
>      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
>  
>      /* advertise XIVE on POWER9 machines */
> -    if (spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
>      }
>  
> @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
>  static char *spapr_get_ic_mode(Object *obj, Error **errp)
>  {
>      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>  
> -    if (spapr->irq == &spapr_irq_xics_legacy) {
> +    if (smc->legacy_irq_allocation) {
>          return g_strdup("legacy");
> -    } else if (spapr->irq == &spapr_irq_xics) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("xics");
> -    } else if (spapr->irq == &spapr_irq_xive) {
> +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("xive");
> -    } else if (spapr->irq == &spapr_irq_dual) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("dual");
>      }
>      g_assert_not_reached();
> @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
>  
>      /* The legacy IRQ backend can not be set */
>      if (strcmp(value, "xics") == 0) {
> -        spapr->irq = &spapr_irq_xics;
> +        object_property_set_bool(obj, true, "cap-xics", errp);
> +        object_property_set_bool(obj, false, "cap-xive", errp);
>      } else if (strcmp(value, "xive") == 0) {
> -        spapr->irq = &spapr_irq_xive;
> +        object_property_set_bool(obj, false, "cap-xics", errp);
> +        object_property_set_bool(obj, true, "cap-xive", errp);
>      } else if (strcmp(value, "dual") == 0) {
> -        spapr->irq = &spapr_irq_dual;
> +        object_property_set_bool(obj, true, "cap-xics", errp);
> +        object_property_set_bool(obj, true, "cap-xive", errp);
>      } else {
>          error_setg(errp, "Bad value for \"ic-mode\" property");
>      }
> @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
>  static void spapr_instance_init(Object *obj)
>  {
>      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>  
>      spapr->htab_fd = -1;
>      spapr->use_hotplug_event_source = true;
> @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
>                               spapr_get_msix_emulation, NULL, NULL);
>  
>      /* The machine class defines the default interrupt controller mode */
> -    spapr->irq = smc->irq;
>      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
>                              spapr_set_ic_mode, NULL);
>      object_property_set_description(obj, "ic-mode",
> @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
>      spapr_caps_add_properties(smc, &error_abort);
> -    smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
>      smc->linux_pci_probe = true;
>      smc->nr_xirqs = SPAPR_NR_XIRQS;
> @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
>      spapr_machine_4_1_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
>      smc->phb_placement = phb_placement_4_0;
> -    smc->irq = &spapr_irq_xics;
> +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
>      smc->pre_4_1_migration = true;
>  }
>  
> @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
>  
>      smc->legacy_irq_allocation = true;
>      smc->nr_xirqs = 0x400;
> -    smc->irq = &spapr_irq_xics_legacy;
>  }
>  
>  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 481dfd2a27..e06fd386f6 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  }
>  
> +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> +{
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +
> +    if (!val) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> +            error_setg(errp,
> +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> +            return;
> +        }
> +
> +        if (smc->legacy_irq_allocation) {
> +            error_setg(errp, "This machine version requires XICS support");
> +            return;
> +        }
> +    }
> +}
> +
> +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> +{
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +
> +    if (val) {
> +        if (smc->legacy_irq_allocation) {
> +            error_setg(errp, "This machine version cannot support XIVE");
> +            return;
> +        }
> +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> +                              spapr->max_compat_pvr)) {
> +            error_setg(errp, "XIVE requires POWER9 CPU");
> +            return;
> +        }
> +    }
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_ccf_assist_apply,
>      },
> +    [SPAPR_CAP_XICS] = {
> +        .name = "xics",
> +        .description = "Allow XICS interrupt controller",
> +        .index = SPAPR_CAP_XICS,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_xics_apply,
> +    },
> +    [SPAPR_CAP_XIVE] = {
> +        .name = "xive",
> +        .description = "Allow XIVE interrupt controller",
> +        .index = SPAPR_CAP_XIVE,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_xive_apply,
> +    },
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
>      }
>  
> +    /*
> +     * POWER8 machines don't have XIVE
> +     */
> +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> +                               0, spapr->max_compat_pvr)) {
> +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> +    }
> +
>      return caps;
>  }
>  
> @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 140f05c1c6..cb4c6edf63 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>       * terminate the boot.
>       */
>      if (guest_xive) {
> -        if (!spapr->irq->xive) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>              error_report(
>  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
>              exit(EXIT_FAILURE);
>          }
>      } else {
> -        if (!spapr->irq->xics) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
>              error_report(
>  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
>              exit(EXIT_FAILURE);
> @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>       */
>      if (!spapr->cas_reboot) {
>          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> -            && spapr->irq->xics && spapr->irq->xive;
> +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
>      }
>  
>      spapr_ovec_cleanup(ov5_updates);
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 2768f9a765..473fc8780a 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
>      return 0;
>  }
>  
> -/*
> - * XICS IRQ backend.
> - */
> -
> -SpaprIrq spapr_irq_xics = {
> -    .xics        = true,
> -    .xive        = false,
> -};
> -
> -/*
> - * XIVE IRQ backend.
> - */
> -
> -SpaprIrq spapr_irq_xive = {
> -    .xics        = false,
> -    .xive        = true,
> -};
> -
> -/*
> - * Dual XIVE and XICS IRQ backend.
> - *
> - * Both interrupt mode, XIVE and XICS, objects are created but the
> - * machine starts in legacy interrupt mode (XICS). It can be changed
> - * by the CAS negotiation process and, in that case, the new mode is
> - * activated after an extra machine reset.
> - */
> -
> -/*
> - * Define values in sync with the XIVE and XICS backend
> - */
> -SpaprIrq spapr_irq_dual = {
> -    .xics        = true,
> -    .xive        = true,
> -};
> -
> -
>  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
>  {
>      MachineState *machine = MACHINE(spapr);
>  
> -    /*
> -     * Sanity checks on non-P9 machines. On these, XIVE is not
> -     * advertised, see spapr_dt_ov5_platform_support()
> -     */
> -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> -                               0, spapr->max_compat_pvr)) {
> -        /*
> -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> -         * negotiation is useless.
> -         */
> -        if (spapr->irq == &spapr_irq_dual) {
> -            spapr->irq = &spapr_irq_xics;
> -            return 0;
> -        }
> -
> -        /*
> -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> -         * scenarios to take into account because of the compat mode:
> -         *
> -         * 1. POWER7/8 machines should fail to init later on when creating
> -         *    the XIVE interrupt presenters because a POWER9 exception
> -         *    model is required.
> -
> -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> -         *    will let the OS boot with a partial XIVE setup : DT
> -         *    properties but no hcalls.
> -         *
> -         * To cover both and not confuse the OS, add an early failure in
> -         * QEMU.
> -         */
> -        if (spapr->irq == &spapr_irq_xive) {
> -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> -            return -1;
> -        }
> -    }
> -
>      /*
>       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
>       * re-created. Detect that early to avoid QEMU to exit later when the
>       * guest reboots.
>       */
>      if (kvm_enabled() &&
> -        spapr->irq == &spapr_irq_dual &&
>          machine_kernel_irqchip_required(machine) &&
> +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
>          xics_kvm_has_broken_disconnect(spapr)) {
>          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
>          return -1;
> @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>      /* Initialize the MSI IRQ allocator. */
>      spapr_irq_msi_init(spapr);
>  
> -    if (spapr->irq->xics) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
>          Error *local_err = NULL;
>          Object *obj;
>  
> @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>          spapr->ics = ICS_SPAPR(obj);
>      }
>  
> -    if (spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          uint32_t nr_servers = spapr_max_server_number(spapr);
>          DeviceState *dev;
>          int i;
> @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
>      return first + ics->offset;
>  }
>  
> -SpaprIrq spapr_irq_xics_legacy = {
> -    .xics        = true,
> -    .xive        = false,
> -};
> -
>  static void spapr_irq_register_types(void)
>  {
>      type_register_static(&spapr_intc_info);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 623e8e3f93..d3b4dd7de3 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -79,8 +79,12 @@ typedef enum {
>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>  /* Count Cache Flush Assist HW Instruction */
>  #define SPAPR_CAP_CCF_ASSIST            0x09
> +/* XICS interrupt controller */
> +#define SPAPR_CAP_XICS                  0x0a
> +/* XIVE interrupt controller */
> +#define SPAPR_CAP_XIVE                  0x0b
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
>  
>  /*
>   * Capability Values
> @@ -131,7 +135,6 @@ struct SpaprMachineClass {
>                            hwaddr *nv2atsd, Error **errp);
>      SpaprResizeHpt resize_hpt_default;
>      SpaprCapabilities default_caps;
> -    SpaprIrq *irq;
>  };
>  
>  /**
> @@ -195,7 +198,6 @@ struct SpaprMachineState {
>  
>      int32_t irq_map_nr;
>      unsigned long *irq_map;
> -    SpaprIrq *irq;
>      qemu_irq *qirqs;
>      SpaprInterruptController *active_intc;
>      ICSState *ics;
> @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>  extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> +extern const VMStateDescription vmstate_spapr_cap_xics;
> +extern const VMStateDescription vmstate_spapr_cap_xive;
>  
>  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>  {
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> index 5e150a6679..71aee13743 100644
> --- a/include/hw/ppc/spapr_irq.h
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
>                          Error **errp);
>  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
>  
> -typedef struct SpaprIrq {
> -    bool        xics;
> -    bool        xive;
> -} SpaprIrq;
> -
> -extern SpaprIrq spapr_irq_xics;
> -extern SpaprIrq spapr_irq_xics_legacy;
> -extern SpaprIrq spapr_irq_xive;
> -extern SpaprIrq spapr_irq_dual;
> -
>  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
>  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
>  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
>
Greg Kurz Oct. 9, 2019, 5:02 p.m. UTC | #2
On Wed,  9 Oct 2019 17:08:16 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> The only thing remaining in this structure are the flags to allow either
> XICS or XIVE to be present.  These actually make more sense as spapr
> capabilities - that way they can take advantage of the existing
> infrastructure to sanity check capability states across migration and so
> forth.
> 

The user can now choose the interrupt controller mode either through
ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
to expose another API to do the same thing but it raises some questions.

We should at least document somewhere that ic-mode is an alias to these
caps, and maybe state which is the preferred method (I personally vote
for the caps).

Also, we must keep ic-mode for the moment to stay compatible with the
existing pseries-4.0 and pseries-4.1 machine types, but will we
keep ic-mode forever ? If no, maybe start by not allowing it for
pseries-4.2 ?

> Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> ---
>  hw/ppc/spapr.c             | 40 ++++++++++--------
>  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
>  hw/ppc/spapr_hcall.c       |  7 ++--
>  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
>  include/hw/ppc/spapr.h     | 10 +++--
>  include/hw/ppc/spapr_irq.h | 10 -----
>  6 files changed, 103 insertions(+), 112 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e1ff03152e..bf9fdb1693 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
>          26, 0x40, /* Radix options: GTSE == yes. */
>      };
>  
> -    if (spapr->irq->xics && spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          val[1] = SPAPR_OV5_XIVE_BOTH;
> -    } else if (spapr->irq->xive) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
>      } else {
> -        assert(spapr->irq->xics);
> +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
>          val[1] = SPAPR_OV5_XIVE_LEGACY;
>      }
>  
> @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_dtb,
>          &vmstate_spapr_cap_large_decr,
>          &vmstate_spapr_cap_ccf_assist,
> +        &vmstate_spapr_cap_xics,
> +        &vmstate_spapr_cap_xive,
>          NULL
>      }
>  };
> @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
>      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
>  
>      /* advertise XIVE on POWER9 machines */
> -    if (spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
>      }
>  
> @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
>  static char *spapr_get_ic_mode(Object *obj, Error **errp)
>  {
>      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>  
> -    if (spapr->irq == &spapr_irq_xics_legacy) {
> +    if (smc->legacy_irq_allocation) {
>          return g_strdup("legacy");
> -    } else if (spapr->irq == &spapr_irq_xics) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("xics");
> -    } else if (spapr->irq == &spapr_irq_xive) {
> +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("xive");
> -    } else if (spapr->irq == &spapr_irq_dual) {
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          return g_strdup("dual");
>      }
>      g_assert_not_reached();
> @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
>  
>      /* The legacy IRQ backend can not be set */
>      if (strcmp(value, "xics") == 0) {
> -        spapr->irq = &spapr_irq_xics;
> +        object_property_set_bool(obj, true, "cap-xics", errp);
> +        object_property_set_bool(obj, false, "cap-xive", errp);
>      } else if (strcmp(value, "xive") == 0) {
> -        spapr->irq = &spapr_irq_xive;
> +        object_property_set_bool(obj, false, "cap-xics", errp);
> +        object_property_set_bool(obj, true, "cap-xive", errp);
>      } else if (strcmp(value, "dual") == 0) {
> -        spapr->irq = &spapr_irq_dual;
> +        object_property_set_bool(obj, true, "cap-xics", errp);
> +        object_property_set_bool(obj, true, "cap-xive", errp);
>      } else {
>          error_setg(errp, "Bad value for \"ic-mode\" property");
>      }
> @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
>  static void spapr_instance_init(Object *obj)
>  {
>      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
>  
>      spapr->htab_fd = -1;
>      spapr->use_hotplug_event_source = true;
> @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
>                               spapr_get_msix_emulation, NULL, NULL);
>  
>      /* The machine class defines the default interrupt controller mode */
> -    spapr->irq = smc->irq;
>      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
>                              spapr_set_ic_mode, NULL);
>      object_property_set_description(obj, "ic-mode",
> @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
>      spapr_caps_add_properties(smc, &error_abort);
> -    smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
>      smc->linux_pci_probe = true;
>      smc->nr_xirqs = SPAPR_NR_XIRQS;
> @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
>      spapr_machine_4_1_class_options(mc);
>      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
>      smc->phb_placement = phb_placement_4_0;
> -    smc->irq = &spapr_irq_xics;
> +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
>      smc->pre_4_1_migration = true;
>  }
>  
> @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
>  
>      smc->legacy_irq_allocation = true;
>      smc->nr_xirqs = 0x400;
> -    smc->irq = &spapr_irq_xics_legacy;
>  }
>  
>  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 481dfd2a27..e06fd386f6 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  }
>  
> +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> +{
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +
> +    if (!val) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> +            error_setg(errp,
> +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> +            return;
> +        }
> +
> +        if (smc->legacy_irq_allocation) {
> +            error_setg(errp, "This machine version requires XICS support");
> +            return;
> +        }
> +    }
> +}
> +
> +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> +{
> +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +
> +    if (val) {
> +        if (smc->legacy_irq_allocation) {
> +            error_setg(errp, "This machine version cannot support XIVE");
> +            return;
> +        }
> +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> +                              spapr->max_compat_pvr)) {
> +            error_setg(errp, "XIVE requires POWER9 CPU");
> +            return;
> +        }
> +    }
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_ccf_assist_apply,
>      },
> +    [SPAPR_CAP_XICS] = {
> +        .name = "xics",
> +        .description = "Allow XICS interrupt controller",
> +        .index = SPAPR_CAP_XICS,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_xics_apply,
> +    },
> +    [SPAPR_CAP_XIVE] = {
> +        .name = "xive",
> +        .description = "Allow XIVE interrupt controller",
> +        .index = SPAPR_CAP_XIVE,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_xive_apply,
> +    },
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
>      }
>  
> +    /*
> +     * POWER8 machines don't have XIVE
> +     */
> +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> +                               0, spapr->max_compat_pvr)) {
> +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> +    }
> +
>      return caps;
>  }
>  
> @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 140f05c1c6..cb4c6edf63 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>       * terminate the boot.
>       */
>      if (guest_xive) {
> -        if (!spapr->irq->xive) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>              error_report(
>  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
>              exit(EXIT_FAILURE);
>          }
>      } else {
> -        if (!spapr->irq->xics) {
> +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
>              error_report(
>  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
>              exit(EXIT_FAILURE);
> @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
>       */
>      if (!spapr->cas_reboot) {
>          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> -            && spapr->irq->xics && spapr->irq->xive;
> +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
>      }
>  
>      spapr_ovec_cleanup(ov5_updates);
> diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> index 2768f9a765..473fc8780a 100644
> --- a/hw/ppc/spapr_irq.c
> +++ b/hw/ppc/spapr_irq.c
> @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
>      return 0;
>  }
>  
> -/*
> - * XICS IRQ backend.
> - */
> -
> -SpaprIrq spapr_irq_xics = {
> -    .xics        = true,
> -    .xive        = false,
> -};
> -
> -/*
> - * XIVE IRQ backend.
> - */
> -
> -SpaprIrq spapr_irq_xive = {
> -    .xics        = false,
> -    .xive        = true,
> -};
> -
> -/*
> - * Dual XIVE and XICS IRQ backend.
> - *
> - * Both interrupt mode, XIVE and XICS, objects are created but the
> - * machine starts in legacy interrupt mode (XICS). It can be changed
> - * by the CAS negotiation process and, in that case, the new mode is
> - * activated after an extra machine reset.
> - */
> -
> -/*
> - * Define values in sync with the XIVE and XICS backend
> - */
> -SpaprIrq spapr_irq_dual = {
> -    .xics        = true,
> -    .xive        = true,
> -};
> -
> -
>  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
>  {
>      MachineState *machine = MACHINE(spapr);
>  
> -    /*
> -     * Sanity checks on non-P9 machines. On these, XIVE is not
> -     * advertised, see spapr_dt_ov5_platform_support()
> -     */
> -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> -                               0, spapr->max_compat_pvr)) {
> -        /*
> -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> -         * negotiation is useless.
> -         */
> -        if (spapr->irq == &spapr_irq_dual) {
> -            spapr->irq = &spapr_irq_xics;
> -            return 0;
> -        }
> -
> -        /*
> -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> -         * scenarios to take into account because of the compat mode:
> -         *
> -         * 1. POWER7/8 machines should fail to init later on when creating
> -         *    the XIVE interrupt presenters because a POWER9 exception
> -         *    model is required.
> -
> -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> -         *    will let the OS boot with a partial XIVE setup : DT
> -         *    properties but no hcalls.
> -         *
> -         * To cover both and not confuse the OS, add an early failure in
> -         * QEMU.
> -         */
> -        if (spapr->irq == &spapr_irq_xive) {
> -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> -            return -1;
> -        }
> -    }
> -
>      /*
>       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
>       * re-created. Detect that early to avoid QEMU to exit later when the
>       * guest reboots.
>       */
>      if (kvm_enabled() &&
> -        spapr->irq == &spapr_irq_dual &&
>          machine_kernel_irqchip_required(machine) &&
> +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
>          xics_kvm_has_broken_disconnect(spapr)) {
>          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
>          return -1;
> @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>      /* Initialize the MSI IRQ allocator. */
>      spapr_irq_msi_init(spapr);
>  
> -    if (spapr->irq->xics) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
>          Error *local_err = NULL;
>          Object *obj;
>  
> @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
>          spapr->ics = ICS_SPAPR(obj);
>      }
>  
> -    if (spapr->irq->xive) {
> +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
>          uint32_t nr_servers = spapr_max_server_number(spapr);
>          DeviceState *dev;
>          int i;
> @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
>      return first + ics->offset;
>  }
>  
> -SpaprIrq spapr_irq_xics_legacy = {
> -    .xics        = true,
> -    .xive        = false,
> -};
> -
>  static void spapr_irq_register_types(void)
>  {
>      type_register_static(&spapr_intc_info);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 623e8e3f93..d3b4dd7de3 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -79,8 +79,12 @@ typedef enum {
>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>  /* Count Cache Flush Assist HW Instruction */
>  #define SPAPR_CAP_CCF_ASSIST            0x09
> +/* XICS interrupt controller */
> +#define SPAPR_CAP_XICS                  0x0a
> +/* XIVE interrupt controller */
> +#define SPAPR_CAP_XIVE                  0x0b
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
>  
>  /*
>   * Capability Values
> @@ -131,7 +135,6 @@ struct SpaprMachineClass {
>                            hwaddr *nv2atsd, Error **errp);
>      SpaprResizeHpt resize_hpt_default;
>      SpaprCapabilities default_caps;
> -    SpaprIrq *irq;
>  };
>  
>  /**
> @@ -195,7 +198,6 @@ struct SpaprMachineState {
>  
>      int32_t irq_map_nr;
>      unsigned long *irq_map;
> -    SpaprIrq *irq;
>      qemu_irq *qirqs;
>      SpaprInterruptController *active_intc;
>      ICSState *ics;
> @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>  extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> +extern const VMStateDescription vmstate_spapr_cap_xics;
> +extern const VMStateDescription vmstate_spapr_cap_xive;
>  
>  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
>  {
> diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> index 5e150a6679..71aee13743 100644
> --- a/include/hw/ppc/spapr_irq.h
> +++ b/include/hw/ppc/spapr_irq.h
> @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
>                          Error **errp);
>  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
>  
> -typedef struct SpaprIrq {
> -    bool        xics;
> -    bool        xive;
> -} SpaprIrq;
> -
> -extern SpaprIrq spapr_irq_xics;
> -extern SpaprIrq spapr_irq_xics_legacy;
> -extern SpaprIrq spapr_irq_xive;
> -extern SpaprIrq spapr_irq_dual;
> -
>  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
>  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
>  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
David Gibson Oct. 10, 2019, 1:59 a.m. UTC | #3
On Wed, Oct 09, 2019 at 06:44:53PM +0200, Cédric Le Goater wrote:
> On 09/10/2019 08:08, David Gibson wrote:
> > The only thing remaining in this structure are the flags to allow either
> > XICS or XIVE to be present.  These actually make more sense as spapr
> > capabilities - that way they can take advantage of the existing
> > infrastructure to sanity check capability states across migration and so
> > forth.
> > 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Nice. That is what you had in mind :) Let's make sure we use your
> cap framework for the next major change, power10.

Yeah, adding a cap-xive2 should be pretty straightforward.

> ic-mode should
> be deprecated one day I suppose.

I guess.  I'm not in a great rush, since the compat code for it is
pretty simple and localized.

> Reviewed-by: Cédric Le Goater <clg@kaod.org>
> 
> C.
> 
> 
> > ---
> >  hw/ppc/spapr.c             | 40 ++++++++++--------
> >  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
> >  hw/ppc/spapr_hcall.c       |  7 ++--
> >  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
> >  include/hw/ppc/spapr.h     | 10 +++--
> >  include/hw/ppc/spapr_irq.h | 10 -----
> >  6 files changed, 103 insertions(+), 112 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index e1ff03152e..bf9fdb1693 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
> >          26, 0x40, /* Radix options: GTSE == yes. */
> >      };
> >  
> > -    if (spapr->irq->xics && spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          val[1] = SPAPR_OV5_XIVE_BOTH;
> > -    } else if (spapr->irq->xive) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
> >      } else {
> > -        assert(spapr->irq->xics);
> > +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
> >          val[1] = SPAPR_OV5_XIVE_LEGACY;
> >      }
> >  
> > @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
> >          &vmstate_spapr_dtb,
> >          &vmstate_spapr_cap_large_decr,
> >          &vmstate_spapr_cap_ccf_assist,
> > +        &vmstate_spapr_cap_xics,
> > +        &vmstate_spapr_cap_xive,
> >          NULL
> >      }
> >  };
> > @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
> >      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
> >  
> >      /* advertise XIVE on POWER9 machines */
> > -    if (spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
> >      }
> >  
> > @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
> >  static char *spapr_get_ic_mode(Object *obj, Error **errp)
> >  {
> >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >  
> > -    if (spapr->irq == &spapr_irq_xics_legacy) {
> > +    if (smc->legacy_irq_allocation) {
> >          return g_strdup("legacy");
> > -    } else if (spapr->irq == &spapr_irq_xics) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("xics");
> > -    } else if (spapr->irq == &spapr_irq_xive) {
> > +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("xive");
> > -    } else if (spapr->irq == &spapr_irq_dual) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("dual");
> >      }
> >      g_assert_not_reached();
> > @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
> >  
> >      /* The legacy IRQ backend can not be set */
> >      if (strcmp(value, "xics") == 0) {
> > -        spapr->irq = &spapr_irq_xics;
> > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > +        object_property_set_bool(obj, false, "cap-xive", errp);
> >      } else if (strcmp(value, "xive") == 0) {
> > -        spapr->irq = &spapr_irq_xive;
> > +        object_property_set_bool(obj, false, "cap-xics", errp);
> > +        object_property_set_bool(obj, true, "cap-xive", errp);
> >      } else if (strcmp(value, "dual") == 0) {
> > -        spapr->irq = &spapr_irq_dual;
> > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > +        object_property_set_bool(obj, true, "cap-xive", errp);
> >      } else {
> >          error_setg(errp, "Bad value for \"ic-mode\" property");
> >      }
> > @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
> >  static void spapr_instance_init(Object *obj)
> >  {
> >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >  
> >      spapr->htab_fd = -1;
> >      spapr->use_hotplug_event_source = true;
> > @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
> >                               spapr_get_msix_emulation, NULL, NULL);
> >  
> >      /* The machine class defines the default interrupt controller mode */
> > -    spapr->irq = smc->irq;
> >      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
> >                              spapr_set_ic_mode, NULL);
> >      object_property_set_description(obj, "ic-mode",
> > @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> > +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
> >      spapr_caps_add_properties(smc, &error_abort);
> > -    smc->irq = &spapr_irq_dual;
> >      smc->dr_phb_enabled = true;
> >      smc->linux_pci_probe = true;
> >      smc->nr_xirqs = SPAPR_NR_XIRQS;
> > @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
> >      spapr_machine_4_1_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
> >      smc->phb_placement = phb_placement_4_0;
> > -    smc->irq = &spapr_irq_xics;
> > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> >      smc->pre_4_1_migration = true;
> >  }
> >  
> > @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
> >  
> >      smc->legacy_irq_allocation = true;
> >      smc->nr_xirqs = 0x400;
> > -    smc->irq = &spapr_irq_xics_legacy;
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index 481dfd2a27..e06fd386f6 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >      }
> >  }
> >  
> > +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > +{
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +
> > +    if (!val) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > +            error_setg(errp,
> > +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> > +            return;
> > +        }
> > +
> > +        if (smc->legacy_irq_allocation) {
> > +            error_setg(errp, "This machine version requires XICS support");
> > +            return;
> > +        }
> > +    }
> > +}
> > +
> > +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > +{
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > +
> > +    if (val) {
> > +        if (smc->legacy_irq_allocation) {
> > +            error_setg(errp, "This machine version cannot support XIVE");
> > +            return;
> > +        }
> > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > +                              spapr->max_compat_pvr)) {
> > +            error_setg(errp, "XIVE requires POWER9 CPU");
> > +            return;
> > +        }
> > +    }
> > +}
> > +
> >  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >      [SPAPR_CAP_HTM] = {
> >          .name = "htm",
> > @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >          .type = "bool",
> >          .apply = cap_ccf_assist_apply,
> >      },
> > +    [SPAPR_CAP_XICS] = {
> > +        .name = "xics",
> > +        .description = "Allow XICS interrupt controller",
> > +        .index = SPAPR_CAP_XICS,
> > +        .get = spapr_cap_get_bool,
> > +        .set = spapr_cap_set_bool,
> > +        .type = "bool",
> > +        .apply = cap_xics_apply,
> > +    },
> > +    [SPAPR_CAP_XIVE] = {
> > +        .name = "xive",
> > +        .description = "Allow XIVE interrupt controller",
> > +        .index = SPAPR_CAP_XIVE,
> > +        .get = spapr_cap_get_bool,
> > +        .set = spapr_cap_set_bool,
> > +        .type = "bool",
> > +        .apply = cap_xive_apply,
> > +    },
> >  };
> >  
> >  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> >          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
> >      }
> >  
> > +    /*
> > +     * POWER8 machines don't have XIVE
> > +     */
> > +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> > +                               0, spapr->max_compat_pvr)) {
> > +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > +    }
> > +
> >      return caps;
> >  }
> >  
> > @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
> >  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> >  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> > +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> > +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
> >  
> >  void spapr_caps_init(SpaprMachineState *spapr)
> >  {
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 140f05c1c6..cb4c6edf63 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> >       * terminate the boot.
> >       */
> >      if (guest_xive) {
> > -        if (!spapr->irq->xive) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >              error_report(
> >  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
> >              exit(EXIT_FAILURE);
> >          }
> >      } else {
> > -        if (!spapr->irq->xics) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> >              error_report(
> >  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
> >              exit(EXIT_FAILURE);
> > @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> >       */
> >      if (!spapr->cas_reboot) {
> >          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> > -            && spapr->irq->xics && spapr->irq->xive;
> > +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
> >      }
> >  
> >      spapr_ovec_cleanup(ov5_updates);
> > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> > index 2768f9a765..473fc8780a 100644
> > --- a/hw/ppc/spapr_irq.c
> > +++ b/hw/ppc/spapr_irq.c
> > @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
> >      return 0;
> >  }
> >  
> > -/*
> > - * XICS IRQ backend.
> > - */
> > -
> > -SpaprIrq spapr_irq_xics = {
> > -    .xics        = true,
> > -    .xive        = false,
> > -};
> > -
> > -/*
> > - * XIVE IRQ backend.
> > - */
> > -
> > -SpaprIrq spapr_irq_xive = {
> > -    .xics        = false,
> > -    .xive        = true,
> > -};
> > -
> > -/*
> > - * Dual XIVE and XICS IRQ backend.
> > - *
> > - * Both interrupt mode, XIVE and XICS, objects are created but the
> > - * machine starts in legacy interrupt mode (XICS). It can be changed
> > - * by the CAS negotiation process and, in that case, the new mode is
> > - * activated after an extra machine reset.
> > - */
> > -
> > -/*
> > - * Define values in sync with the XIVE and XICS backend
> > - */
> > -SpaprIrq spapr_irq_dual = {
> > -    .xics        = true,
> > -    .xive        = true,
> > -};
> > -
> > -
> >  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
> >  {
> >      MachineState *machine = MACHINE(spapr);
> >  
> > -    /*
> > -     * Sanity checks on non-P9 machines. On these, XIVE is not
> > -     * advertised, see spapr_dt_ov5_platform_support()
> > -     */
> > -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> > -                               0, spapr->max_compat_pvr)) {
> > -        /*
> > -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> > -         * negotiation is useless.
> > -         */
> > -        if (spapr->irq == &spapr_irq_dual) {
> > -            spapr->irq = &spapr_irq_xics;
> > -            return 0;
> > -        }
> > -
> > -        /*
> > -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> > -         * scenarios to take into account because of the compat mode:
> > -         *
> > -         * 1. POWER7/8 machines should fail to init later on when creating
> > -         *    the XIVE interrupt presenters because a POWER9 exception
> > -         *    model is required.
> > -
> > -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> > -         *    will let the OS boot with a partial XIVE setup : DT
> > -         *    properties but no hcalls.
> > -         *
> > -         * To cover both and not confuse the OS, add an early failure in
> > -         * QEMU.
> > -         */
> > -        if (spapr->irq == &spapr_irq_xive) {
> > -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> > -            return -1;
> > -        }
> > -    }
> > -
> >      /*
> >       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
> >       * re-created. Detect that early to avoid QEMU to exit later when the
> >       * guest reboots.
> >       */
> >      if (kvm_enabled() &&
> > -        spapr->irq == &spapr_irq_dual &&
> >          machine_kernel_irqchip_required(machine) &&
> > +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> > +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
> >          xics_kvm_has_broken_disconnect(spapr)) {
> >          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
> >          return -1;
> > @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> >      /* Initialize the MSI IRQ allocator. */
> >      spapr_irq_msi_init(spapr);
> >  
> > -    if (spapr->irq->xics) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> >          Error *local_err = NULL;
> >          Object *obj;
> >  
> > @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> >          spapr->ics = ICS_SPAPR(obj);
> >      }
> >  
> > -    if (spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          uint32_t nr_servers = spapr_max_server_number(spapr);
> >          DeviceState *dev;
> >          int i;
> > @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
> >      return first + ics->offset;
> >  }
> >  
> > -SpaprIrq spapr_irq_xics_legacy = {
> > -    .xics        = true,
> > -    .xive        = false,
> > -};
> > -
> >  static void spapr_irq_register_types(void)
> >  {
> >      type_register_static(&spapr_intc_info);
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 623e8e3f93..d3b4dd7de3 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -79,8 +79,12 @@ typedef enum {
> >  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
> >  /* Count Cache Flush Assist HW Instruction */
> >  #define SPAPR_CAP_CCF_ASSIST            0x09
> > +/* XICS interrupt controller */
> > +#define SPAPR_CAP_XICS                  0x0a
> > +/* XIVE interrupt controller */
> > +#define SPAPR_CAP_XIVE                  0x0b
> >  /* Num Caps */
> > -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> > +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
> >  
> >  /*
> >   * Capability Values
> > @@ -131,7 +135,6 @@ struct SpaprMachineClass {
> >                            hwaddr *nv2atsd, Error **errp);
> >      SpaprResizeHpt resize_hpt_default;
> >      SpaprCapabilities default_caps;
> > -    SpaprIrq *irq;
> >  };
> >  
> >  /**
> > @@ -195,7 +198,6 @@ struct SpaprMachineState {
> >  
> >      int32_t irq_map_nr;
> >      unsigned long *irq_map;
> > -    SpaprIrq *irq;
> >      qemu_irq *qirqs;
> >      SpaprInterruptController *active_intc;
> >      ICSState *ics;
> > @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
> >  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> >  extern const VMStateDescription vmstate_spapr_cap_large_decr;
> >  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> > +extern const VMStateDescription vmstate_spapr_cap_xics;
> > +extern const VMStateDescription vmstate_spapr_cap_xive;
> >  
> >  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
> >  {
> > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > index 5e150a6679..71aee13743 100644
> > --- a/include/hw/ppc/spapr_irq.h
> > +++ b/include/hw/ppc/spapr_irq.h
> > @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
> >                          Error **errp);
> >  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
> >  
> > -typedef struct SpaprIrq {
> > -    bool        xics;
> > -    bool        xive;
> > -} SpaprIrq;
> > -
> > -extern SpaprIrq spapr_irq_xics;
> > -extern SpaprIrq spapr_irq_xics_legacy;
> > -extern SpaprIrq spapr_irq_xive;
> > -extern SpaprIrq spapr_irq_dual;
> > -
> >  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
> >  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
> >  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
> > 
>
David Gibson Oct. 10, 2019, 2:02 a.m. UTC | #4
On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> On Wed,  9 Oct 2019 17:08:16 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > The only thing remaining in this structure are the flags to allow either
> > XICS or XIVE to be present.  These actually make more sense as spapr
> > capabilities - that way they can take advantage of the existing
> > infrastructure to sanity check capability states across migration and so
> > forth.
> > 
> 
> The user can now choose the interrupt controller mode either through
> ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> to expose another API to do the same thing but it raises some questions.
> 
> We should at least document somewhere that ic-mode is an alias to these
> caps, and maybe state which is the preferred method (I personally vote
> for the caps).
> 
> Also, we must keep ic-mode for the moment to stay compatible with the
> existing pseries-4.0 and pseries-4.1 machine types, but will we
> keep ic-mode forever ? If no, maybe start by not allowing it for
> pseries-4.2 ?

I'm actually inclined to keep it for now, maybe even leave it as the
suggested way to configure this.  The caps are nice from an internal
organization point of view, but ic-mode is arguably a more user
friendly way of configuring it.  The conversion of one to the other is
straightforward, isolated ans small, so I'm not especially bothered by
keeping it around.

> 
> > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > ---
> >  hw/ppc/spapr.c             | 40 ++++++++++--------
> >  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
> >  hw/ppc/spapr_hcall.c       |  7 ++--
> >  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
> >  include/hw/ppc/spapr.h     | 10 +++--
> >  include/hw/ppc/spapr_irq.h | 10 -----
> >  6 files changed, 103 insertions(+), 112 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index e1ff03152e..bf9fdb1693 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
> >          26, 0x40, /* Radix options: GTSE == yes. */
> >      };
> >  
> > -    if (spapr->irq->xics && spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          val[1] = SPAPR_OV5_XIVE_BOTH;
> > -    } else if (spapr->irq->xive) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
> >      } else {
> > -        assert(spapr->irq->xics);
> > +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
> >          val[1] = SPAPR_OV5_XIVE_LEGACY;
> >      }
> >  
> > @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
> >          &vmstate_spapr_dtb,
> >          &vmstate_spapr_cap_large_decr,
> >          &vmstate_spapr_cap_ccf_assist,
> > +        &vmstate_spapr_cap_xics,
> > +        &vmstate_spapr_cap_xive,
> >          NULL
> >      }
> >  };
> > @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
> >      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
> >  
> >      /* advertise XIVE on POWER9 machines */
> > -    if (spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
> >      }
> >  
> > @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
> >  static char *spapr_get_ic_mode(Object *obj, Error **errp)
> >  {
> >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >  
> > -    if (spapr->irq == &spapr_irq_xics_legacy) {
> > +    if (smc->legacy_irq_allocation) {
> >          return g_strdup("legacy");
> > -    } else if (spapr->irq == &spapr_irq_xics) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("xics");
> > -    } else if (spapr->irq == &spapr_irq_xive) {
> > +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("xive");
> > -    } else if (spapr->irq == &spapr_irq_dual) {
> > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          return g_strdup("dual");
> >      }
> >      g_assert_not_reached();
> > @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
> >  
> >      /* The legacy IRQ backend can not be set */
> >      if (strcmp(value, "xics") == 0) {
> > -        spapr->irq = &spapr_irq_xics;
> > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > +        object_property_set_bool(obj, false, "cap-xive", errp);
> >      } else if (strcmp(value, "xive") == 0) {
> > -        spapr->irq = &spapr_irq_xive;
> > +        object_property_set_bool(obj, false, "cap-xics", errp);
> > +        object_property_set_bool(obj, true, "cap-xive", errp);
> >      } else if (strcmp(value, "dual") == 0) {
> > -        spapr->irq = &spapr_irq_dual;
> > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > +        object_property_set_bool(obj, true, "cap-xive", errp);
> >      } else {
> >          error_setg(errp, "Bad value for \"ic-mode\" property");
> >      }
> > @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
> >  static void spapr_instance_init(Object *obj)
> >  {
> >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> >  
> >      spapr->htab_fd = -1;
> >      spapr->use_hotplug_event_source = true;
> > @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
> >                               spapr_get_msix_emulation, NULL, NULL);
> >  
> >      /* The machine class defines the default interrupt controller mode */
> > -    spapr->irq = smc->irq;
> >      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
> >                              spapr_set_ic_mode, NULL);
> >      object_property_set_description(obj, "ic-mode",
> > @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> > +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
> >      spapr_caps_add_properties(smc, &error_abort);
> > -    smc->irq = &spapr_irq_dual;
> >      smc->dr_phb_enabled = true;
> >      smc->linux_pci_probe = true;
> >      smc->nr_xirqs = SPAPR_NR_XIRQS;
> > @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
> >      spapr_machine_4_1_class_options(mc);
> >      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
> >      smc->phb_placement = phb_placement_4_0;
> > -    smc->irq = &spapr_irq_xics;
> > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> >      smc->pre_4_1_migration = true;
> >  }
> >  
> > @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
> >  
> >      smc->legacy_irq_allocation = true;
> >      smc->nr_xirqs = 0x400;
> > -    smc->irq = &spapr_irq_xics_legacy;
> >  }
> >  
> >  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > index 481dfd2a27..e06fd386f6 100644
> > --- a/hw/ppc/spapr_caps.c
> > +++ b/hw/ppc/spapr_caps.c
> > @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >      }
> >  }
> >  
> > +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > +{
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +
> > +    if (!val) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > +            error_setg(errp,
> > +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> > +            return;
> > +        }
> > +
> > +        if (smc->legacy_irq_allocation) {
> > +            error_setg(errp, "This machine version requires XICS support");
> > +            return;
> > +        }
> > +    }
> > +}
> > +
> > +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > +{
> > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > +
> > +    if (val) {
> > +        if (smc->legacy_irq_allocation) {
> > +            error_setg(errp, "This machine version cannot support XIVE");
> > +            return;
> > +        }
> > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > +                              spapr->max_compat_pvr)) {
> > +            error_setg(errp, "XIVE requires POWER9 CPU");
> > +            return;
> > +        }
> > +    }
> > +}
> > +
> >  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >      [SPAPR_CAP_HTM] = {
> >          .name = "htm",
> > @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> >          .type = "bool",
> >          .apply = cap_ccf_assist_apply,
> >      },
> > +    [SPAPR_CAP_XICS] = {
> > +        .name = "xics",
> > +        .description = "Allow XICS interrupt controller",
> > +        .index = SPAPR_CAP_XICS,
> > +        .get = spapr_cap_get_bool,
> > +        .set = spapr_cap_set_bool,
> > +        .type = "bool",
> > +        .apply = cap_xics_apply,
> > +    },
> > +    [SPAPR_CAP_XIVE] = {
> > +        .name = "xive",
> > +        .description = "Allow XIVE interrupt controller",
> > +        .index = SPAPR_CAP_XIVE,
> > +        .get = spapr_cap_get_bool,
> > +        .set = spapr_cap_set_bool,
> > +        .type = "bool",
> > +        .apply = cap_xive_apply,
> > +    },
> >  };
> >  
> >  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> >          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
> >      }
> >  
> > +    /*
> > +     * POWER8 machines don't have XIVE
> > +     */
> > +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> > +                               0, spapr->max_compat_pvr)) {
> > +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > +    }
> > +
> >      return caps;
> >  }
> >  
> > @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
> >  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> >  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> >  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> > +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> > +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
> >  
> >  void spapr_caps_init(SpaprMachineState *spapr)
> >  {
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 140f05c1c6..cb4c6edf63 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> >       * terminate the boot.
> >       */
> >      if (guest_xive) {
> > -        if (!spapr->irq->xive) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >              error_report(
> >  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
> >              exit(EXIT_FAILURE);
> >          }
> >      } else {
> > -        if (!spapr->irq->xics) {
> > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> >              error_report(
> >  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
> >              exit(EXIT_FAILURE);
> > @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> >       */
> >      if (!spapr->cas_reboot) {
> >          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> > -            && spapr->irq->xics && spapr->irq->xive;
> > +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
> >      }
> >  
> >      spapr_ovec_cleanup(ov5_updates);
> > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> > index 2768f9a765..473fc8780a 100644
> > --- a/hw/ppc/spapr_irq.c
> > +++ b/hw/ppc/spapr_irq.c
> > @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
> >      return 0;
> >  }
> >  
> > -/*
> > - * XICS IRQ backend.
> > - */
> > -
> > -SpaprIrq spapr_irq_xics = {
> > -    .xics        = true,
> > -    .xive        = false,
> > -};
> > -
> > -/*
> > - * XIVE IRQ backend.
> > - */
> > -
> > -SpaprIrq spapr_irq_xive = {
> > -    .xics        = false,
> > -    .xive        = true,
> > -};
> > -
> > -/*
> > - * Dual XIVE and XICS IRQ backend.
> > - *
> > - * Both interrupt mode, XIVE and XICS, objects are created but the
> > - * machine starts in legacy interrupt mode (XICS). It can be changed
> > - * by the CAS negotiation process and, in that case, the new mode is
> > - * activated after an extra machine reset.
> > - */
> > -
> > -/*
> > - * Define values in sync with the XIVE and XICS backend
> > - */
> > -SpaprIrq spapr_irq_dual = {
> > -    .xics        = true,
> > -    .xive        = true,
> > -};
> > -
> > -
> >  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
> >  {
> >      MachineState *machine = MACHINE(spapr);
> >  
> > -    /*
> > -     * Sanity checks on non-P9 machines. On these, XIVE is not
> > -     * advertised, see spapr_dt_ov5_platform_support()
> > -     */
> > -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> > -                               0, spapr->max_compat_pvr)) {
> > -        /*
> > -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> > -         * negotiation is useless.
> > -         */
> > -        if (spapr->irq == &spapr_irq_dual) {
> > -            spapr->irq = &spapr_irq_xics;
> > -            return 0;
> > -        }
> > -
> > -        /*
> > -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> > -         * scenarios to take into account because of the compat mode:
> > -         *
> > -         * 1. POWER7/8 machines should fail to init later on when creating
> > -         *    the XIVE interrupt presenters because a POWER9 exception
> > -         *    model is required.
> > -
> > -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> > -         *    will let the OS boot with a partial XIVE setup : DT
> > -         *    properties but no hcalls.
> > -         *
> > -         * To cover both and not confuse the OS, add an early failure in
> > -         * QEMU.
> > -         */
> > -        if (spapr->irq == &spapr_irq_xive) {
> > -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> > -            return -1;
> > -        }
> > -    }
> > -
> >      /*
> >       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
> >       * re-created. Detect that early to avoid QEMU to exit later when the
> >       * guest reboots.
> >       */
> >      if (kvm_enabled() &&
> > -        spapr->irq == &spapr_irq_dual &&
> >          machine_kernel_irqchip_required(machine) &&
> > +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> > +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
> >          xics_kvm_has_broken_disconnect(spapr)) {
> >          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
> >          return -1;
> > @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> >      /* Initialize the MSI IRQ allocator. */
> >      spapr_irq_msi_init(spapr);
> >  
> > -    if (spapr->irq->xics) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> >          Error *local_err = NULL;
> >          Object *obj;
> >  
> > @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> >          spapr->ics = ICS_SPAPR(obj);
> >      }
> >  
> > -    if (spapr->irq->xive) {
> > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> >          uint32_t nr_servers = spapr_max_server_number(spapr);
> >          DeviceState *dev;
> >          int i;
> > @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
> >      return first + ics->offset;
> >  }
> >  
> > -SpaprIrq spapr_irq_xics_legacy = {
> > -    .xics        = true,
> > -    .xive        = false,
> > -};
> > -
> >  static void spapr_irq_register_types(void)
> >  {
> >      type_register_static(&spapr_intc_info);
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 623e8e3f93..d3b4dd7de3 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -79,8 +79,12 @@ typedef enum {
> >  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
> >  /* Count Cache Flush Assist HW Instruction */
> >  #define SPAPR_CAP_CCF_ASSIST            0x09
> > +/* XICS interrupt controller */
> > +#define SPAPR_CAP_XICS                  0x0a
> > +/* XIVE interrupt controller */
> > +#define SPAPR_CAP_XIVE                  0x0b
> >  /* Num Caps */
> > -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> > +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
> >  
> >  /*
> >   * Capability Values
> > @@ -131,7 +135,6 @@ struct SpaprMachineClass {
> >                            hwaddr *nv2atsd, Error **errp);
> >      SpaprResizeHpt resize_hpt_default;
> >      SpaprCapabilities default_caps;
> > -    SpaprIrq *irq;
> >  };
> >  
> >  /**
> > @@ -195,7 +198,6 @@ struct SpaprMachineState {
> >  
> >      int32_t irq_map_nr;
> >      unsigned long *irq_map;
> > -    SpaprIrq *irq;
> >      qemu_irq *qirqs;
> >      SpaprInterruptController *active_intc;
> >      ICSState *ics;
> > @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
> >  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> >  extern const VMStateDescription vmstate_spapr_cap_large_decr;
> >  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> > +extern const VMStateDescription vmstate_spapr_cap_xics;
> > +extern const VMStateDescription vmstate_spapr_cap_xive;
> >  
> >  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
> >  {
> > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > index 5e150a6679..71aee13743 100644
> > --- a/include/hw/ppc/spapr_irq.h
> > +++ b/include/hw/ppc/spapr_irq.h
> > @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
> >                          Error **errp);
> >  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
> >  
> > -typedef struct SpaprIrq {
> > -    bool        xics;
> > -    bool        xive;
> > -} SpaprIrq;
> > -
> > -extern SpaprIrq spapr_irq_xics;
> > -extern SpaprIrq spapr_irq_xics_legacy;
> > -extern SpaprIrq spapr_irq_xive;
> > -extern SpaprIrq spapr_irq_dual;
> > -
> >  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
> >  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
> >  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
>
Greg Kurz Oct. 10, 2019, 6:29 a.m. UTC | #5
On Thu, 10 Oct 2019 13:02:09 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > On Wed,  9 Oct 2019 17:08:16 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > The only thing remaining in this structure are the flags to allow either
> > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > capabilities - that way they can take advantage of the existing
> > > infrastructure to sanity check capability states across migration and so
> > > forth.
> > > 
> > 
> > The user can now choose the interrupt controller mode either through
> > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > to expose another API to do the same thing but it raises some questions.
> > 
> > We should at least document somewhere that ic-mode is an alias to these
> > caps, and maybe state which is the preferred method (I personally vote
> > for the caps).
> > 
> > Also, we must keep ic-mode for the moment to stay compatible with the
> > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > keep ic-mode forever ? If no, maybe start by not allowing it for
> > pseries-4.2 ?
> 
> I'm actually inclined to keep it for now, maybe even leave it as the
> suggested way to configure this.  The caps are nice from an internal
> organization point of view, but ic-mode is arguably a more user
> friendly way of configuring it.  The conversion of one to the other is
> straightforward, isolated ans small, so I'm not especially bothered by
> keeping it around.
> 

Fair enough.

Reviewed-by: Greg Kurz <groug@kaod.org>

> > 
> > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > ---
> > >  hw/ppc/spapr.c             | 40 ++++++++++--------
> > >  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
> > >  hw/ppc/spapr_hcall.c       |  7 ++--
> > >  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
> > >  include/hw/ppc/spapr.h     | 10 +++--
> > >  include/hw/ppc/spapr_irq.h | 10 -----
> > >  6 files changed, 103 insertions(+), 112 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index e1ff03152e..bf9fdb1693 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
> > >          26, 0x40, /* Radix options: GTSE == yes. */
> > >      };
> > >  
> > > -    if (spapr->irq->xics && spapr->irq->xive) {
> > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          val[1] = SPAPR_OV5_XIVE_BOTH;
> > > -    } else if (spapr->irq->xive) {
> > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
> > >      } else {
> > > -        assert(spapr->irq->xics);
> > > +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
> > >          val[1] = SPAPR_OV5_XIVE_LEGACY;
> > >      }
> > >  
> > > @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
> > >          &vmstate_spapr_dtb,
> > >          &vmstate_spapr_cap_large_decr,
> > >          &vmstate_spapr_cap_ccf_assist,
> > > +        &vmstate_spapr_cap_xics,
> > > +        &vmstate_spapr_cap_xive,
> > >          NULL
> > >      }
> > >  };
> > > @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
> > >      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
> > >  
> > >      /* advertise XIVE on POWER9 machines */
> > > -    if (spapr->irq->xive) {
> > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
> > >      }
> > >  
> > > @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
> > >  static char *spapr_get_ic_mode(Object *obj, Error **errp)
> > >  {
> > >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > >  
> > > -    if (spapr->irq == &spapr_irq_xics_legacy) {
> > > +    if (smc->legacy_irq_allocation) {
> > >          return g_strdup("legacy");
> > > -    } else if (spapr->irq == &spapr_irq_xics) {
> > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          return g_strdup("xics");
> > > -    } else if (spapr->irq == &spapr_irq_xive) {
> > > +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          return g_strdup("xive");
> > > -    } else if (spapr->irq == &spapr_irq_dual) {
> > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          return g_strdup("dual");
> > >      }
> > >      g_assert_not_reached();
> > > @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
> > >  
> > >      /* The legacy IRQ backend can not be set */
> > >      if (strcmp(value, "xics") == 0) {
> > > -        spapr->irq = &spapr_irq_xics;
> > > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > > +        object_property_set_bool(obj, false, "cap-xive", errp);
> > >      } else if (strcmp(value, "xive") == 0) {
> > > -        spapr->irq = &spapr_irq_xive;
> > > +        object_property_set_bool(obj, false, "cap-xics", errp);
> > > +        object_property_set_bool(obj, true, "cap-xive", errp);
> > >      } else if (strcmp(value, "dual") == 0) {
> > > -        spapr->irq = &spapr_irq_dual;
> > > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > > +        object_property_set_bool(obj, true, "cap-xive", errp);
> > >      } else {
> > >          error_setg(errp, "Bad value for \"ic-mode\" property");
> > >      }
> > > @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
> > >  static void spapr_instance_init(Object *obj)
> > >  {
> > >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > > -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > >  
> > >      spapr->htab_fd = -1;
> > >      spapr->use_hotplug_event_source = true;
> > > @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
> > >                               spapr_get_msix_emulation, NULL, NULL);
> > >  
> > >      /* The machine class defines the default interrupt controller mode */
> > > -    spapr->irq = smc->irq;
> > >      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
> > >                              spapr_set_ic_mode, NULL);
> > >      object_property_set_description(obj, "ic-mode",
> > > @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> > >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> > >      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> > >      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> > > +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> > > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
> > >      spapr_caps_add_properties(smc, &error_abort);
> > > -    smc->irq = &spapr_irq_dual;
> > >      smc->dr_phb_enabled = true;
> > >      smc->linux_pci_probe = true;
> > >      smc->nr_xirqs = SPAPR_NR_XIRQS;
> > > @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
> > >      spapr_machine_4_1_class_options(mc);
> > >      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
> > >      smc->phb_placement = phb_placement_4_0;
> > > -    smc->irq = &spapr_irq_xics;
> > > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > >      smc->pre_4_1_migration = true;
> > >  }
> > >  
> > > @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
> > >  
> > >      smc->legacy_irq_allocation = true;
> > >      smc->nr_xirqs = 0x400;
> > > -    smc->irq = &spapr_irq_xics_legacy;
> > >  }
> > >  
> > >  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> > > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > > index 481dfd2a27..e06fd386f6 100644
> > > --- a/hw/ppc/spapr_caps.c
> > > +++ b/hw/ppc/spapr_caps.c
> > > @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> > >      }
> > >  }
> > >  
> > > +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > > +{
> > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > +
> > > +    if (!val) {
> > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > +            error_setg(errp,
> > > +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> > > +            return;
> > > +        }
> > > +
> > > +        if (smc->legacy_irq_allocation) {
> > > +            error_setg(errp, "This machine version requires XICS support");
> > > +            return;
> > > +        }
> > > +    }
> > > +}
> > > +
> > > +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > > +{
> > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > > +
> > > +    if (val) {
> > > +        if (smc->legacy_irq_allocation) {
> > > +            error_setg(errp, "This machine version cannot support XIVE");
> > > +            return;
> > > +        }
> > > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > > +                              spapr->max_compat_pvr)) {
> > > +            error_setg(errp, "XIVE requires POWER9 CPU");
> > > +            return;
> > > +        }
> > > +    }
> > > +}
> > > +
> > >  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > >      [SPAPR_CAP_HTM] = {
> > >          .name = "htm",
> > > @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > >          .type = "bool",
> > >          .apply = cap_ccf_assist_apply,
> > >      },
> > > +    [SPAPR_CAP_XICS] = {
> > > +        .name = "xics",
> > > +        .description = "Allow XICS interrupt controller",
> > > +        .index = SPAPR_CAP_XICS,
> > > +        .get = spapr_cap_get_bool,
> > > +        .set = spapr_cap_set_bool,
> > > +        .type = "bool",
> > > +        .apply = cap_xics_apply,
> > > +    },
> > > +    [SPAPR_CAP_XIVE] = {
> > > +        .name = "xive",
> > > +        .description = "Allow XIVE interrupt controller",
> > > +        .index = SPAPR_CAP_XIVE,
> > > +        .get = spapr_cap_get_bool,
> > > +        .set = spapr_cap_set_bool,
> > > +        .type = "bool",
> > > +        .apply = cap_xive_apply,
> > > +    },
> > >  };
> > >  
> > >  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > > @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > >          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
> > >      }
> > >  
> > > +    /*
> > > +     * POWER8 machines don't have XIVE
> > > +     */
> > > +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> > > +                               0, spapr->max_compat_pvr)) {
> > > +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > > +    }
> > > +
> > >      return caps;
> > >  }
> > >  
> > > @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
> > >  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> > >  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> > >  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> > > +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> > > +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
> > >  
> > >  void spapr_caps_init(SpaprMachineState *spapr)
> > >  {
> > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > > index 140f05c1c6..cb4c6edf63 100644
> > > --- a/hw/ppc/spapr_hcall.c
> > > +++ b/hw/ppc/spapr_hcall.c
> > > @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> > >       * terminate the boot.
> > >       */
> > >      if (guest_xive) {
> > > -        if (!spapr->irq->xive) {
> > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >              error_report(
> > >  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
> > >              exit(EXIT_FAILURE);
> > >          }
> > >      } else {
> > > -        if (!spapr->irq->xics) {
> > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> > >              error_report(
> > >  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
> > >              exit(EXIT_FAILURE);
> > > @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> > >       */
> > >      if (!spapr->cas_reboot) {
> > >          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> > > -            && spapr->irq->xics && spapr->irq->xive;
> > > +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
> > >      }
> > >  
> > >      spapr_ovec_cleanup(ov5_updates);
> > > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> > > index 2768f9a765..473fc8780a 100644
> > > --- a/hw/ppc/spapr_irq.c
> > > +++ b/hw/ppc/spapr_irq.c
> > > @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
> > >      return 0;
> > >  }
> > >  
> > > -/*
> > > - * XICS IRQ backend.
> > > - */
> > > -
> > > -SpaprIrq spapr_irq_xics = {
> > > -    .xics        = true,
> > > -    .xive        = false,
> > > -};
> > > -
> > > -/*
> > > - * XIVE IRQ backend.
> > > - */
> > > -
> > > -SpaprIrq spapr_irq_xive = {
> > > -    .xics        = false,
> > > -    .xive        = true,
> > > -};
> > > -
> > > -/*
> > > - * Dual XIVE and XICS IRQ backend.
> > > - *
> > > - * Both interrupt mode, XIVE and XICS, objects are created but the
> > > - * machine starts in legacy interrupt mode (XICS). It can be changed
> > > - * by the CAS negotiation process and, in that case, the new mode is
> > > - * activated after an extra machine reset.
> > > - */
> > > -
> > > -/*
> > > - * Define values in sync with the XIVE and XICS backend
> > > - */
> > > -SpaprIrq spapr_irq_dual = {
> > > -    .xics        = true,
> > > -    .xive        = true,
> > > -};
> > > -
> > > -
> > >  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
> > >  {
> > >      MachineState *machine = MACHINE(spapr);
> > >  
> > > -    /*
> > > -     * Sanity checks on non-P9 machines. On these, XIVE is not
> > > -     * advertised, see spapr_dt_ov5_platform_support()
> > > -     */
> > > -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> > > -                               0, spapr->max_compat_pvr)) {
> > > -        /*
> > > -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> > > -         * negotiation is useless.
> > > -         */
> > > -        if (spapr->irq == &spapr_irq_dual) {
> > > -            spapr->irq = &spapr_irq_xics;
> > > -            return 0;
> > > -        }
> > > -
> > > -        /*
> > > -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> > > -         * scenarios to take into account because of the compat mode:
> > > -         *
> > > -         * 1. POWER7/8 machines should fail to init later on when creating
> > > -         *    the XIVE interrupt presenters because a POWER9 exception
> > > -         *    model is required.
> > > -
> > > -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> > > -         *    will let the OS boot with a partial XIVE setup : DT
> > > -         *    properties but no hcalls.
> > > -         *
> > > -         * To cover both and not confuse the OS, add an early failure in
> > > -         * QEMU.
> > > -         */
> > > -        if (spapr->irq == &spapr_irq_xive) {
> > > -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> > > -            return -1;
> > > -        }
> > > -    }
> > > -
> > >      /*
> > >       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
> > >       * re-created. Detect that early to avoid QEMU to exit later when the
> > >       * guest reboots.
> > >       */
> > >      if (kvm_enabled() &&
> > > -        spapr->irq == &spapr_irq_dual &&
> > >          machine_kernel_irqchip_required(machine) &&
> > > +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> > > +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
> > >          xics_kvm_has_broken_disconnect(spapr)) {
> > >          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
> > >          return -1;
> > > @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> > >      /* Initialize the MSI IRQ allocator. */
> > >      spapr_irq_msi_init(spapr);
> > >  
> > > -    if (spapr->irq->xics) {
> > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> > >          Error *local_err = NULL;
> > >          Object *obj;
> > >  
> > > @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> > >          spapr->ics = ICS_SPAPR(obj);
> > >      }
> > >  
> > > -    if (spapr->irq->xive) {
> > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > >          uint32_t nr_servers = spapr_max_server_number(spapr);
> > >          DeviceState *dev;
> > >          int i;
> > > @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
> > >      return first + ics->offset;
> > >  }
> > >  
> > > -SpaprIrq spapr_irq_xics_legacy = {
> > > -    .xics        = true,
> > > -    .xive        = false,
> > > -};
> > > -
> > >  static void spapr_irq_register_types(void)
> > >  {
> > >      type_register_static(&spapr_intc_info);
> > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > index 623e8e3f93..d3b4dd7de3 100644
> > > --- a/include/hw/ppc/spapr.h
> > > +++ b/include/hw/ppc/spapr.h
> > > @@ -79,8 +79,12 @@ typedef enum {
> > >  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
> > >  /* Count Cache Flush Assist HW Instruction */
> > >  #define SPAPR_CAP_CCF_ASSIST            0x09
> > > +/* XICS interrupt controller */
> > > +#define SPAPR_CAP_XICS                  0x0a
> > > +/* XIVE interrupt controller */
> > > +#define SPAPR_CAP_XIVE                  0x0b
> > >  /* Num Caps */
> > > -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> > > +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
> > >  
> > >  /*
> > >   * Capability Values
> > > @@ -131,7 +135,6 @@ struct SpaprMachineClass {
> > >                            hwaddr *nv2atsd, Error **errp);
> > >      SpaprResizeHpt resize_hpt_default;
> > >      SpaprCapabilities default_caps;
> > > -    SpaprIrq *irq;
> > >  };
> > >  
> > >  /**
> > > @@ -195,7 +198,6 @@ struct SpaprMachineState {
> > >  
> > >      int32_t irq_map_nr;
> > >      unsigned long *irq_map;
> > > -    SpaprIrq *irq;
> > >      qemu_irq *qirqs;
> > >      SpaprInterruptController *active_intc;
> > >      ICSState *ics;
> > > @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
> > >  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> > >  extern const VMStateDescription vmstate_spapr_cap_large_decr;
> > >  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> > > +extern const VMStateDescription vmstate_spapr_cap_xics;
> > > +extern const VMStateDescription vmstate_spapr_cap_xive;
> > >  
> > >  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
> > >  {
> > > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > > index 5e150a6679..71aee13743 100644
> > > --- a/include/hw/ppc/spapr_irq.h
> > > +++ b/include/hw/ppc/spapr_irq.h
> > > @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
> > >                          Error **errp);
> > >  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
> > >  
> > > -typedef struct SpaprIrq {
> > > -    bool        xics;
> > > -    bool        xive;
> > > -} SpaprIrq;
> > > -
> > > -extern SpaprIrq spapr_irq_xics;
> > > -extern SpaprIrq spapr_irq_xics_legacy;
> > > -extern SpaprIrq spapr_irq_xive;
> > > -extern SpaprIrq spapr_irq_dual;
> > > -
> > >  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
> > >  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
> > >  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
> > 
>
Greg Kurz Oct. 10, 2019, 8:33 p.m. UTC | #6
On Thu, 10 Oct 2019 08:29:58 +0200
Greg Kurz <groug@kaod.org> wrote:

> On Thu, 10 Oct 2019 13:02:09 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > The only thing remaining in this structure are the flags to allow either
> > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > capabilities - that way they can take advantage of the existing
> > > > infrastructure to sanity check capability states across migration and so
> > > > forth.
> > > > 
> > > 
> > > The user can now choose the interrupt controller mode either through
> > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > to expose another API to do the same thing but it raises some questions.
> > > 
> > > We should at least document somewhere that ic-mode is an alias to these
> > > caps, and maybe state which is the preferred method (I personally vote
> > > for the caps).
> > > 
> > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > pseries-4.2 ?
> > 
> > I'm actually inclined to keep it for now, maybe even leave it as the
> > suggested way to configure this.  The caps are nice from an internal
> > organization point of view, but ic-mode is arguably a more user
> > friendly way of configuring it.  The conversion of one to the other is
> > straightforward, isolated ans small, so I'm not especially bothered by
> > keeping it around.
> > 
> 
> Fair enough.
> 
> Reviewed-by: Greg Kurz <groug@kaod.org>
> 

But unfortunately this still requires care :-\

qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
qemu-system-ppc64: load of migration failed: Invalid argument

or

qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
qemu-system-ppc64: load of migration failed: Invalid argument

when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.

This happens because the existing pseries-4.1 machine type doesn't send the
new caps and the logic in spapr_caps_post_migration() wrongly assumes that
the source has both caps set:

    srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
    for (i = 0; i < SPAPR_CAP_NUM; i++) {
        /* If not default value then assume came in with the migration */
        if (spapr->mig.caps[i] != spapr->def.caps[i]) {

spapr->mig.caps[SPAPR_CAP_XICS] = 0
spapr->mig.caps[SPAPR_CAP_XIVE] = 0

            srccaps.caps[i] = spapr->mig.caps[i];

srcaps.caps[SPAPR_CAP_XICS] = 1
srcaps.caps[SPAPR_CAP_XIVE] = 1

        }
    }

and breaks

    for (i = 0; i < SPAPR_CAP_NUM; i++) {
        SpaprCapabilityInfo *info = &capability_table[i];

        if (srccaps.caps[i] > dstcaps.caps[i]) {

srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics

            error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
                         info->name, srccaps.caps[i], dstcaps.caps[i]);
            ok = false;
        }

Maybe we shouldn't check capabilities that we know the source
isn't supposed to send, eg. by having a smc->max_cap ?

> > > 
> > > > Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
> > > > ---
> > > >  hw/ppc/spapr.c             | 40 ++++++++++--------
> > > >  hw/ppc/spapr_caps.c        | 64 +++++++++++++++++++++++++++++
> > > >  hw/ppc/spapr_hcall.c       |  7 ++--
> > > >  hw/ppc/spapr_irq.c         | 84 ++------------------------------------
> > > >  include/hw/ppc/spapr.h     | 10 +++--
> > > >  include/hw/ppc/spapr_irq.h | 10 -----
> > > >  6 files changed, 103 insertions(+), 112 deletions(-)
> > > > 
> > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > index e1ff03152e..bf9fdb1693 100644
> > > > --- a/hw/ppc/spapr.c
> > > > +++ b/hw/ppc/spapr.c
> > > > @@ -1072,12 +1072,13 @@ static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
> > > >          26, 0x40, /* Radix options: GTSE == yes. */
> > > >      };
> > > >  
> > > > -    if (spapr->irq->xics && spapr->irq->xive) {
> > > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > > +        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          val[1] = SPAPR_OV5_XIVE_BOTH;
> > > > -    } else if (spapr->irq->xive) {
> > > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          val[1] = SPAPR_OV5_XIVE_EXPLOIT;
> > > >      } else {
> > > > -        assert(spapr->irq->xics);
> > > > +        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
> > > >          val[1] = SPAPR_OV5_XIVE_LEGACY;
> > > >      }
> > > >  
> > > > @@ -2075,6 +2076,8 @@ static const VMStateDescription vmstate_spapr = {
> > > >          &vmstate_spapr_dtb,
> > > >          &vmstate_spapr_cap_large_decr,
> > > >          &vmstate_spapr_cap_ccf_assist,
> > > > +        &vmstate_spapr_cap_xics,
> > > > +        &vmstate_spapr_cap_xive,
> > > >          NULL
> > > >      }
> > > >  };
> > > > @@ -2775,7 +2778,7 @@ static void spapr_machine_init(MachineState *machine)
> > > >      spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
> > > >  
> > > >      /* advertise XIVE on POWER9 machines */
> > > > -    if (spapr->irq->xive) {
> > > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
> > > >      }
> > > >  
> > > > @@ -3242,14 +3245,18 @@ static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
> > > >  static char *spapr_get_ic_mode(Object *obj, Error **errp)
> > > >  {
> > > >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > >  
> > > > -    if (spapr->irq == &spapr_irq_xics_legacy) {
> > > > +    if (smc->legacy_irq_allocation) {
> > > >          return g_strdup("legacy");
> > > > -    } else if (spapr->irq == &spapr_irq_xics) {
> > > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > > +               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          return g_strdup("xics");
> > > > -    } else if (spapr->irq == &spapr_irq_xive) {
> > > > +    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          return g_strdup("xive");
> > > > -    } else if (spapr->irq == &spapr_irq_dual) {
> > > > +    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > > +               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          return g_strdup("dual");
> > > >      }
> > > >      g_assert_not_reached();
> > > > @@ -3266,11 +3273,14 @@ static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
> > > >  
> > > >      /* The legacy IRQ backend can not be set */
> > > >      if (strcmp(value, "xics") == 0) {
> > > > -        spapr->irq = &spapr_irq_xics;
> > > > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > > > +        object_property_set_bool(obj, false, "cap-xive", errp);
> > > >      } else if (strcmp(value, "xive") == 0) {
> > > > -        spapr->irq = &spapr_irq_xive;
> > > > +        object_property_set_bool(obj, false, "cap-xics", errp);
> > > > +        object_property_set_bool(obj, true, "cap-xive", errp);
> > > >      } else if (strcmp(value, "dual") == 0) {
> > > > -        spapr->irq = &spapr_irq_dual;
> > > > +        object_property_set_bool(obj, true, "cap-xics", errp);
> > > > +        object_property_set_bool(obj, true, "cap-xive", errp);
> > > >      } else {
> > > >          error_setg(errp, "Bad value for \"ic-mode\" property");
> > > >      }
> > > > @@ -3309,7 +3319,6 @@ static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
> > > >  static void spapr_instance_init(Object *obj)
> > > >  {
> > > >      SpaprMachineState *spapr = SPAPR_MACHINE(obj);
> > > > -    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > >  
> > > >      spapr->htab_fd = -1;
> > > >      spapr->use_hotplug_event_source = true;
> > > > @@ -3345,7 +3354,6 @@ static void spapr_instance_init(Object *obj)
> > > >                               spapr_get_msix_emulation, NULL, NULL);
> > > >  
> > > >      /* The machine class defines the default interrupt controller mode */
> > > > -    spapr->irq = smc->irq;
> > > >      object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
> > > >                              spapr_set_ic_mode, NULL);
> > > >      object_property_set_description(obj, "ic-mode",
> > > > @@ -4439,8 +4447,9 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> > > >      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> > > >      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> > > >      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> > > > +    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
> > > > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
> > > >      spapr_caps_add_properties(smc, &error_abort);
> > > > -    smc->irq = &spapr_irq_dual;
> > > >      smc->dr_phb_enabled = true;
> > > >      smc->linux_pci_probe = true;
> > > >      smc->nr_xirqs = SPAPR_NR_XIRQS;
> > > > @@ -4539,7 +4548,7 @@ static void spapr_machine_4_0_class_options(MachineClass *mc)
> > > >      spapr_machine_4_1_class_options(mc);
> > > >      compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
> > > >      smc->phb_placement = phb_placement_4_0;
> > > > -    smc->irq = &spapr_irq_xics;
> > > > +    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > > >      smc->pre_4_1_migration = true;
> > > >  }
> > > >  
> > > > @@ -4580,7 +4589,6 @@ static void spapr_machine_3_0_class_options(MachineClass *mc)
> > > >  
> > > >      smc->legacy_irq_allocation = true;
> > > >      smc->nr_xirqs = 0x400;
> > > > -    smc->irq = &spapr_irq_xics_legacy;
> > > >  }
> > > >  
> > > >  DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
> > > > diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> > > > index 481dfd2a27..e06fd386f6 100644
> > > > --- a/hw/ppc/spapr_caps.c
> > > > +++ b/hw/ppc/spapr_caps.c
> > > > @@ -496,6 +496,42 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> > > >      }
> > > >  }
> > > >  
> > > > +static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > > > +{
> > > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > > +
> > > > +    if (!val) {
> > > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > > +            error_setg(errp,
> > > > +"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
> > > > +            return;
> > > > +        }
> > > > +
> > > > +        if (smc->legacy_irq_allocation) {
> > > > +            error_setg(errp, "This machine version requires XICS support");
> > > > +            return;
> > > > +        }
> > > > +    }
> > > > +}
> > > > +
> > > > +static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
> > > > +{
> > > > +    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> > > > +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> > > > +
> > > > +    if (val) {
> > > > +        if (smc->legacy_irq_allocation) {
> > > > +            error_setg(errp, "This machine version cannot support XIVE");
> > > > +            return;
> > > > +        }
> > > > +        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
> > > > +                              spapr->max_compat_pvr)) {
> > > > +            error_setg(errp, "XIVE requires POWER9 CPU");
> > > > +            return;
> > > > +        }
> > > > +    }
> > > > +}
> > > > +
> > > >  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > > >      [SPAPR_CAP_HTM] = {
> > > >          .name = "htm",
> > > > @@ -595,6 +631,24 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
> > > >          .type = "bool",
> > > >          .apply = cap_ccf_assist_apply,
> > > >      },
> > > > +    [SPAPR_CAP_XICS] = {
> > > > +        .name = "xics",
> > > > +        .description = "Allow XICS interrupt controller",
> > > > +        .index = SPAPR_CAP_XICS,
> > > > +        .get = spapr_cap_get_bool,
> > > > +        .set = spapr_cap_set_bool,
> > > > +        .type = "bool",
> > > > +        .apply = cap_xics_apply,
> > > > +    },
> > > > +    [SPAPR_CAP_XIVE] = {
> > > > +        .name = "xive",
> > > > +        .description = "Allow XIVE interrupt controller",
> > > > +        .index = SPAPR_CAP_XIVE,
> > > > +        .get = spapr_cap_get_bool,
> > > > +        .set = spapr_cap_set_bool,
> > > > +        .type = "bool",
> > > > +        .apply = cap_xive_apply,
> > > > +    },
> > > >  };
> > > >  
> > > >  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > > > @@ -641,6 +695,14 @@ static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> > > >          caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
> > > >      }
> > > >  
> > > > +    /*
> > > > +     * POWER8 machines don't have XIVE
> > > > +     */
> > > > +    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
> > > > +                               0, spapr->max_compat_pvr)) {
> > > > +        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
> > > > +    }
> > > > +
> > > >      return caps;
> > > >  }
> > > >  
> > > > @@ -734,6 +796,8 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
> > > >  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> > > >  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
> > > >  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> > > > +SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
> > > > +SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
> > > >  
> > > >  void spapr_caps_init(SpaprMachineState *spapr)
> > > >  {
> > > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > > > index 140f05c1c6..cb4c6edf63 100644
> > > > --- a/hw/ppc/spapr_hcall.c
> > > > +++ b/hw/ppc/spapr_hcall.c
> > > > @@ -1784,13 +1784,13 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> > > >       * terminate the boot.
> > > >       */
> > > >      if (guest_xive) {
> > > > -        if (!spapr->irq->xive) {
> > > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >              error_report(
> > > >  "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
> > > >              exit(EXIT_FAILURE);
> > > >          }
> > > >      } else {
> > > > -        if (!spapr->irq->xics) {
> > > > +        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> > > >              error_report(
> > > >  "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
> > > >              exit(EXIT_FAILURE);
> > > > @@ -1804,7 +1804,8 @@ static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
> > > >       */
> > > >      if (!spapr->cas_reboot) {
> > > >          spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
> > > > -            && spapr->irq->xics && spapr->irq->xive;
> > > > +            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
> > > > +            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
> > > >      }
> > > >  
> > > >      spapr_ovec_cleanup(ov5_updates);
> > > > diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
> > > > index 2768f9a765..473fc8780a 100644
> > > > --- a/hw/ppc/spapr_irq.c
> > > > +++ b/hw/ppc/spapr_irq.c
> > > > @@ -101,90 +101,19 @@ int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
> > > >      return 0;
> > > >  }
> > > >  
> > > > -/*
> > > > - * XICS IRQ backend.
> > > > - */
> > > > -
> > > > -SpaprIrq spapr_irq_xics = {
> > > > -    .xics        = true,
> > > > -    .xive        = false,
> > > > -};
> > > > -
> > > > -/*
> > > > - * XIVE IRQ backend.
> > > > - */
> > > > -
> > > > -SpaprIrq spapr_irq_xive = {
> > > > -    .xics        = false,
> > > > -    .xive        = true,
> > > > -};
> > > > -
> > > > -/*
> > > > - * Dual XIVE and XICS IRQ backend.
> > > > - *
> > > > - * Both interrupt mode, XIVE and XICS, objects are created but the
> > > > - * machine starts in legacy interrupt mode (XICS). It can be changed
> > > > - * by the CAS negotiation process and, in that case, the new mode is
> > > > - * activated after an extra machine reset.
> > > > - */
> > > > -
> > > > -/*
> > > > - * Define values in sync with the XIVE and XICS backend
> > > > - */
> > > > -SpaprIrq spapr_irq_dual = {
> > > > -    .xics        = true,
> > > > -    .xive        = true,
> > > > -};
> > > > -
> > > > -
> > > >  static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
> > > >  {
> > > >      MachineState *machine = MACHINE(spapr);
> > > >  
> > > > -    /*
> > > > -     * Sanity checks on non-P9 machines. On these, XIVE is not
> > > > -     * advertised, see spapr_dt_ov5_platform_support()
> > > > -     */
> > > > -    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
> > > > -                               0, spapr->max_compat_pvr)) {
> > > > -        /*
> > > > -         * If the 'dual' interrupt mode is selected, force XICS as CAS
> > > > -         * negotiation is useless.
> > > > -         */
> > > > -        if (spapr->irq == &spapr_irq_dual) {
> > > > -            spapr->irq = &spapr_irq_xics;
> > > > -            return 0;
> > > > -        }
> > > > -
> > > > -        /*
> > > > -         * Non-P9 machines using only XIVE is a bogus setup. We have two
> > > > -         * scenarios to take into account because of the compat mode:
> > > > -         *
> > > > -         * 1. POWER7/8 machines should fail to init later on when creating
> > > > -         *    the XIVE interrupt presenters because a POWER9 exception
> > > > -         *    model is required.
> > > > -
> > > > -         * 2. POWER9 machines using the POWER8 compat mode won't fail and
> > > > -         *    will let the OS boot with a partial XIVE setup : DT
> > > > -         *    properties but no hcalls.
> > > > -         *
> > > > -         * To cover both and not confuse the OS, add an early failure in
> > > > -         * QEMU.
> > > > -         */
> > > > -        if (spapr->irq == &spapr_irq_xive) {
> > > > -            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
> > > > -            return -1;
> > > > -        }
> > > > -    }
> > > > -
> > > >      /*
> > > >       * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
> > > >       * re-created. Detect that early to avoid QEMU to exit later when the
> > > >       * guest reboots.
> > > >       */
> > > >      if (kvm_enabled() &&
> > > > -        spapr->irq == &spapr_irq_dual &&
> > > >          machine_kernel_irqchip_required(machine) &&
> > > > +        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
> > > > +        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
> > > >          xics_kvm_has_broken_disconnect(spapr)) {
> > > >          error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
> > > >          return -1;
> > > > @@ -280,7 +209,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> > > >      /* Initialize the MSI IRQ allocator. */
> > > >      spapr_irq_msi_init(spapr);
> > > >  
> > > > -    if (spapr->irq->xics) {
> > > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
> > > >          Error *local_err = NULL;
> > > >          Object *obj;
> > > >  
> > > > @@ -313,7 +242,7 @@ void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
> > > >          spapr->ics = ICS_SPAPR(obj);
> > > >      }
> > > >  
> > > > -    if (spapr->irq->xive) {
> > > > +    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
> > > >          uint32_t nr_servers = spapr_max_server_number(spapr);
> > > >          DeviceState *dev;
> > > >          int i;
> > > > @@ -558,11 +487,6 @@ int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
> > > >      return first + ics->offset;
> > > >  }
> > > >  
> > > > -SpaprIrq spapr_irq_xics_legacy = {
> > > > -    .xics        = true,
> > > > -    .xive        = false,
> > > > -};
> > > > -
> > > >  static void spapr_irq_register_types(void)
> > > >  {
> > > >      type_register_static(&spapr_intc_info);
> > > > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > > > index 623e8e3f93..d3b4dd7de3 100644
> > > > --- a/include/hw/ppc/spapr.h
> > > > +++ b/include/hw/ppc/spapr.h
> > > > @@ -79,8 +79,12 @@ typedef enum {
> > > >  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
> > > >  /* Count Cache Flush Assist HW Instruction */
> > > >  #define SPAPR_CAP_CCF_ASSIST            0x09
> > > > +/* XICS interrupt controller */
> > > > +#define SPAPR_CAP_XICS                  0x0a
> > > > +/* XIVE interrupt controller */
> > > > +#define SPAPR_CAP_XIVE                  0x0b
> > > >  /* Num Caps */
> > > > -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> > > > +#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
> > > >  
> > > >  /*
> > > >   * Capability Values
> > > > @@ -131,7 +135,6 @@ struct SpaprMachineClass {
> > > >                            hwaddr *nv2atsd, Error **errp);
> > > >      SpaprResizeHpt resize_hpt_default;
> > > >      SpaprCapabilities default_caps;
> > > > -    SpaprIrq *irq;
> > > >  };
> > > >  
> > > >  /**
> > > > @@ -195,7 +198,6 @@ struct SpaprMachineState {
> > > >  
> > > >      int32_t irq_map_nr;
> > > >      unsigned long *irq_map;
> > > > -    SpaprIrq *irq;
> > > >      qemu_irq *qirqs;
> > > >      SpaprInterruptController *active_intc;
> > > >      ICSState *ics;
> > > > @@ -870,6 +872,8 @@ extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
> > > >  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> > > >  extern const VMStateDescription vmstate_spapr_cap_large_decr;
> > > >  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> > > > +extern const VMStateDescription vmstate_spapr_cap_xics;
> > > > +extern const VMStateDescription vmstate_spapr_cap_xive;
> > > >  
> > > >  static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
> > > >  {
> > > > diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
> > > > index 5e150a6679..71aee13743 100644
> > > > --- a/include/hw/ppc/spapr_irq.h
> > > > +++ b/include/hw/ppc/spapr_irq.h
> > > > @@ -77,16 +77,6 @@ int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
> > > >                          Error **errp);
> > > >  void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
> > > >  
> > > > -typedef struct SpaprIrq {
> > > > -    bool        xics;
> > > > -    bool        xive;
> > > > -} SpaprIrq;
> > > > -
> > > > -extern SpaprIrq spapr_irq_xics;
> > > > -extern SpaprIrq spapr_irq_xics_legacy;
> > > > -extern SpaprIrq spapr_irq_xive;
> > > > -extern SpaprIrq spapr_irq_dual;
> > > > -
> > > >  void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
> > > >  int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
> > > >  void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);
> > > 
> > 
>
David Gibson Oct. 11, 2019, 5:07 a.m. UTC | #7
On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> On Thu, 10 Oct 2019 08:29:58 +0200
> Greg Kurz <groug@kaod.org> wrote:
> 
> > On Thu, 10 Oct 2019 13:02:09 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > 
> > > > > The only thing remaining in this structure are the flags to allow either
> > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > capabilities - that way they can take advantage of the existing
> > > > > infrastructure to sanity check capability states across migration and so
> > > > > forth.
> > > > > 
> > > > 
> > > > The user can now choose the interrupt controller mode either through
> > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > to expose another API to do the same thing but it raises some questions.
> > > > 
> > > > We should at least document somewhere that ic-mode is an alias to these
> > > > caps, and maybe state which is the preferred method (I personally vote
> > > > for the caps).
> > > > 
> > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > pseries-4.2 ?
> > > 
> > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > suggested way to configure this.  The caps are nice from an internal
> > > organization point of view, but ic-mode is arguably a more user
> > > friendly way of configuring it.  The conversion of one to the other is
> > > straightforward, isolated ans small, so I'm not especially bothered by
> > > keeping it around.
> > > 
> > 
> > Fair enough.
> > 
> > Reviewed-by: Greg Kurz <groug@kaod.org>
> > 
> 
> But unfortunately this still requires care :-\
> 
> qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> qemu-system-ppc64: load of migration failed: Invalid argument
> 
> or
> 
> qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> qemu-system-ppc64: load of migration failed: Invalid argument
> 
> when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> 
> This happens because the existing pseries-4.1 machine type doesn't send the
> new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> the source has both caps set:
> 
>     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
>     for (i = 0; i < SPAPR_CAP_NUM; i++) {
>         /* If not default value then assume came in with the migration */
>         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> 
> spapr->mig.caps[SPAPR_CAP_XICS] = 0
> spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> 
>             srccaps.caps[i] = spapr->mig.caps[i];
> 
> srcaps.caps[SPAPR_CAP_XICS] = 1
> srcaps.caps[SPAPR_CAP_XIVE] = 1
> 
>         }
>     }
> 
> and breaks
> 
>     for (i = 0; i < SPAPR_CAP_NUM; i++) {
>         SpaprCapabilityInfo *info = &capability_table[i];
> 
>         if (srccaps.caps[i] > dstcaps.caps[i]) {
> 
> srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> 
>             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
>                          info->name, srccaps.caps[i], dstcaps.caps[i]);
>             ok = false;
>         }

Ah.. right.  I thought there would be problems with backwards
migration, but I didn't think of this problem even with forward
migration.

> Maybe we shouldn't check capabilities that we know the source
> isn't supposed to send, eg. by having a smc->max_cap ?

Uh.. I'm not really sure what exactly you're suggesting here.

I think what we need here is a custom migrate_needed function, like we
already have for cap_hpt_maxpagesize, to exclude it from the migration
stream for machine versions before 4.2.
Greg Kurz Oct. 11, 2019, 6:13 a.m. UTC | #8
On Fri, 11 Oct 2019 16:07:58 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> > On Thu, 10 Oct 2019 08:29:58 +0200
> > Greg Kurz <groug@kaod.org> wrote:
> > 
> > > On Thu, 10 Oct 2019 13:02:09 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > 
> > > > > > The only thing remaining in this structure are the flags to allow either
> > > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > > capabilities - that way they can take advantage of the existing
> > > > > > infrastructure to sanity check capability states across migration and so
> > > > > > forth.
> > > > > > 
> > > > > 
> > > > > The user can now choose the interrupt controller mode either through
> > > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > > to expose another API to do the same thing but it raises some questions.
> > > > > 
> > > > > We should at least document somewhere that ic-mode is an alias to these
> > > > > caps, and maybe state which is the preferred method (I personally vote
> > > > > for the caps).
> > > > > 
> > > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > > pseries-4.2 ?
> > > > 
> > > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > > suggested way to configure this.  The caps are nice from an internal
> > > > organization point of view, but ic-mode is arguably a more user
> > > > friendly way of configuring it.  The conversion of one to the other is
> > > > straightforward, isolated ans small, so I'm not especially bothered by
> > > > keeping it around.
> > > > 
> > > 
> > > Fair enough.
> > > 
> > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > 
> > 
> > But unfortunately this still requires care :-\
> > 
> > qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > qemu-system-ppc64: load of migration failed: Invalid argument
> > 
> > or
> > 
> > qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > qemu-system-ppc64: load of migration failed: Invalid argument
> > 
> > when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> > 
> > This happens because the existing pseries-4.1 machine type doesn't send the
> > new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> > the source has both caps set:
> > 
> >     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
> >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> >         /* If not default value then assume came in with the migration */
> >         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> > 
> > spapr->mig.caps[SPAPR_CAP_XICS] = 0
> > spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> > 
> >             srccaps.caps[i] = spapr->mig.caps[i];
> > 
> > srcaps.caps[SPAPR_CAP_XICS] = 1
> > srcaps.caps[SPAPR_CAP_XIVE] = 1
> > 
> >         }
> >     }
> > 
> > and breaks
> > 
> >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> >         SpaprCapabilityInfo *info = &capability_table[i];
> > 
> >         if (srccaps.caps[i] > dstcaps.caps[i]) {
> > 
> > srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> > srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> > 
> >             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
> >                          info->name, srccaps.caps[i], dstcaps.caps[i]);
> >             ok = false;
> >         }
> 
> Ah.. right.  I thought there would be problems with backwards
> migration, but I didn't think of this problem even with forward
> migration.
> 
> > Maybe we shouldn't check capabilities that we know the source
> > isn't supposed to send, eg. by having a smc->max_cap ?
> 
> Uh.. I'm not really sure what exactly you're suggesting here.
> 

I'm suggesting to have a per-machine version smc->max_cap that
contains the highest supported cap index, to be used instead of
SPAPR_CAP_NUM in this functions, ie.

for (i = 0; i <= smc->max_cap; i++) {
    ...
}

where we would have

smc->max_cap = SPAPR_CAP_CCF_ASSIST for pseries-4.1

and

smc->max_cap = SPAPR_CAP_XIVE for psereis-4.2

> I think what we need here is a custom migrate_needed function, like we
> already have for cap_hpt_maxpagesize, to exclude it from the migration
> stream for machine versions before 4.2.
> 

No, VMState needed() hooks are for outgoing migration only.

bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
{
    if (vmsd->needed && !vmsd->needed(opaque)) {
        /* optional section not needed */
        return false;
    }
    return true;
}
Greg Kurz Oct. 11, 2019, 8:33 a.m. UTC | #9
On Fri, 11 Oct 2019 08:13:33 +0200
Greg Kurz <groug@kaod.org> wrote:

> On Fri, 11 Oct 2019 16:07:58 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> > > On Thu, 10 Oct 2019 08:29:58 +0200
> > > Greg Kurz <groug@kaod.org> wrote:
> > > 
> > > > On Thu, 10 Oct 2019 13:02:09 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > 
> > > > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > 
> > > > > > > The only thing remaining in this structure are the flags to allow either
> > > > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > > > capabilities - that way they can take advantage of the existing
> > > > > > > infrastructure to sanity check capability states across migration and so
> > > > > > > forth.
> > > > > > > 
> > > > > > 
> > > > > > The user can now choose the interrupt controller mode either through
> > > > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > > > to expose another API to do the same thing but it raises some questions.
> > > > > > 
> > > > > > We should at least document somewhere that ic-mode is an alias to these
> > > > > > caps, and maybe state which is the preferred method (I personally vote
> > > > > > for the caps).
> > > > > > 
> > > > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > > > pseries-4.2 ?
> > > > > 
> > > > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > > > suggested way to configure this.  The caps are nice from an internal
> > > > > organization point of view, but ic-mode is arguably a more user
> > > > > friendly way of configuring it.  The conversion of one to the other is
> > > > > straightforward, isolated ans small, so I'm not especially bothered by
> > > > > keeping it around.
> > > > > 
> > > > 
> > > > Fair enough.
> > > > 
> > > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > > 
> > > 
> > > But unfortunately this still requires care :-\
> > > 
> > > qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > 
> > > or
> > > 
> > > qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > 
> > > when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> > > 
> > > This happens because the existing pseries-4.1 machine type doesn't send the
> > > new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> > > the source has both caps set:
> > > 
> > >     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
> > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > >         /* If not default value then assume came in with the migration */
> > >         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> > > 
> > > spapr->mig.caps[SPAPR_CAP_XICS] = 0
> > > spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> > > 
> > >             srccaps.caps[i] = spapr->mig.caps[i];
> > > 
> > > srcaps.caps[SPAPR_CAP_XICS] = 1
> > > srcaps.caps[SPAPR_CAP_XIVE] = 1
> > > 
> > >         }
> > >     }
> > > 
> > > and breaks
> > > 
> > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > >         SpaprCapabilityInfo *info = &capability_table[i];
> > > 
> > >         if (srccaps.caps[i] > dstcaps.caps[i]) {
> > > 
> > > srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> > > srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> > > 
> > >             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
> > >                          info->name, srccaps.caps[i], dstcaps.caps[i]);
> > >             ok = false;
> > >         }
> > 
> > Ah.. right.  I thought there would be problems with backwards
> > migration, but I didn't think of this problem even with forward
> > migration.
> > 
> > > Maybe we shouldn't check capabilities that we know the source
> > > isn't supposed to send, eg. by having a smc->max_cap ?
> > 
> > Uh.. I'm not really sure what exactly you're suggesting here.
> > 
> 
> I'm suggesting to have a per-machine version smc->max_cap that
> contains the highest supported cap index, to be used instead of
> SPAPR_CAP_NUM in this functions, ie.
> 
> for (i = 0; i <= smc->max_cap; i++) {
>     ...
> }
> 
> where we would have
> 
> smc->max_cap = SPAPR_CAP_CCF_ASSIST for pseries-4.1
> 
> and
> 
> smc->max_cap = SPAPR_CAP_XIVE for psereis-4.2
> 
> > I think what we need here is a custom migrate_needed function, like we
> > already have for cap_hpt_maxpagesize, to exclude it from the migration
> > stream for machine versions before 4.2.
> > 
> 
> No, VMState needed() hooks are for outgoing migration only.
> 

Well we actually do need a needed() function to fix backward
migration, but it doesn't solve anything with forward migration.

I'm thinking about something like this to address both:

=======================================================================
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 66b68fdd5ef5..1342058c1aae 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -83,7 +83,12 @@ typedef enum {
 #define SPAPR_CAP_XICS                  0x0a
 /* XIVE interrupt controller */
 #define SPAPR_CAP_XIVE                  0x0b
-/* Num Caps */
+/*
+ * Num Caps.
+ *
+ * CAUTION: when new caps are being added, older machine types should
+ * set smc->mig_cap_num to the previous value of SPAPR_CAP_NUM.
+ */
 #define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
 
 /*
@@ -135,6 +140,7 @@ struct SpaprMachineClass {
                           hwaddr *nv2atsd, Error **errp);
     SpaprResizeHpt resize_hpt_default;
     SpaprCapabilities default_caps;
+    int mig_cap_num; /* don't migrate newer capabilities */
 };
 
 /**
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bf9fdb169303..fa81cedfbcc5 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4453,6 +4453,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->dr_phb_enabled = true;
     smc->linux_pci_probe = true;
     smc->nr_xirqs = SPAPR_NR_XIRQS;
+    smc->mig_cap_num = SPAPR_CAP_NUM;
 }
 
 static const TypeInfo spapr_machine_info = {
@@ -4520,6 +4521,7 @@ static void spapr_machine_4_1_class_options(MachineClass *mc)
 
     spapr_machine_4_2_class_options(mc);
     smc->linux_pci_probe = false;
+    smc->mig_cap_num = SPAPR_CAP_CCF_ASSIST + 1;
     compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
     compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index e06fd386f6ac..ba079f46e084 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -532,6 +532,13 @@ static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
     }
 }
 
+static bool cap_xics_xive_migrate_needed(void *opaque)
+{
+    int mig_cap_num = SPAPR_MACHINE_GET_CLASS(opaque)->mig_cap_num;
+
+    return mig_cap_num > SPAPR_CAP_XIVE && mig_cap_num > SPAPR_CAP_XICS;
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -639,6 +646,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .set = spapr_cap_set_bool,
         .type = "bool",
         .apply = cap_xics_apply,
+        .migrate_needed = cap_xics_xive_migrate_needed,
     },
     [SPAPR_CAP_XIVE] = {
         .name = "xive",
@@ -648,6 +656,7 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .set = spapr_cap_set_bool,
         .type = "bool",
         .apply = cap_xive_apply,
+        .migrate_needed = cap_xics_xive_migrate_needed,
     },
 };
 
@@ -729,20 +738,21 @@ int spapr_caps_pre_save(void *opaque)
  * caps on the destination */
 int spapr_caps_post_migration(SpaprMachineState *spapr)
 {
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
     int i;
     bool ok = true;
     SpaprCapabilities dstcaps = spapr->eff;
     SpaprCapabilities srccaps;
 
     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
-    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+    for (i = 0; i < smc->mig_cap_num; i++) {
         /* If not default value then assume came in with the migration */
         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
             srccaps.caps[i] = spapr->mig.caps[i];
         }
     }
 
-    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+    for (i = 0; i < smc->mig_cap_num; i++) {
         SpaprCapabilityInfo *info = &capability_table[i];
 
         if (srccaps.caps[i] > dstcaps.caps[i]) {
=======================================================================

> bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
> {
>     if (vmsd->needed && !vmsd->needed(opaque)) {
>         /* optional section not needed */
>         return false;
>     }
>     return true;
> }
David Gibson Oct. 12, 2019, midnight UTC | #10
On Fri, Oct 11, 2019 at 08:13:33AM +0200, Greg Kurz wrote:
> On Fri, 11 Oct 2019 16:07:58 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> > > On Thu, 10 Oct 2019 08:29:58 +0200
> > > Greg Kurz <groug@kaod.org> wrote:
> > > 
> > > > On Thu, 10 Oct 2019 13:02:09 +1100
> > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > 
> > > > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > 
> > > > > > > The only thing remaining in this structure are the flags to allow either
> > > > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > > > capabilities - that way they can take advantage of the existing
> > > > > > > infrastructure to sanity check capability states across migration and so
> > > > > > > forth.
> > > > > > > 
> > > > > > 
> > > > > > The user can now choose the interrupt controller mode either through
> > > > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > > > to expose another API to do the same thing but it raises some questions.
> > > > > > 
> > > > > > We should at least document somewhere that ic-mode is an alias to these
> > > > > > caps, and maybe state which is the preferred method (I personally vote
> > > > > > for the caps).
> > > > > > 
> > > > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > > > pseries-4.2 ?
> > > > > 
> > > > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > > > suggested way to configure this.  The caps are nice from an internal
> > > > > organization point of view, but ic-mode is arguably a more user
> > > > > friendly way of configuring it.  The conversion of one to the other is
> > > > > straightforward, isolated ans small, so I'm not especially bothered by
> > > > > keeping it around.
> > > > > 
> > > > 
> > > > Fair enough.
> > > > 
> > > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > > 
> > > 
> > > But unfortunately this still requires care :-\
> > > 
> > > qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > 
> > > or
> > > 
> > > qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > 
> > > when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> > > 
> > > This happens because the existing pseries-4.1 machine type doesn't send the
> > > new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> > > the source has both caps set:
> > > 
> > >     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
> > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > >         /* If not default value then assume came in with the migration */
> > >         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> > > 
> > > spapr->mig.caps[SPAPR_CAP_XICS] = 0
> > > spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> > > 
> > >             srccaps.caps[i] = spapr->mig.caps[i];
> > > 
> > > srcaps.caps[SPAPR_CAP_XICS] = 1
> > > srcaps.caps[SPAPR_CAP_XIVE] = 1
> > > 
> > >         }
> > >     }
> > > 
> > > and breaks
> > > 
> > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > >         SpaprCapabilityInfo *info = &capability_table[i];
> > > 
> > >         if (srccaps.caps[i] > dstcaps.caps[i]) {
> > > 
> > > srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> > > srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> > > 
> > >             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
> > >                          info->name, srccaps.caps[i], dstcaps.caps[i]);
> > >             ok = false;
> > >         }
> > 
> > Ah.. right.  I thought there would be problems with backwards
> > migration, but I didn't think of this problem even with forward
> > migration.
> > 
> > > Maybe we shouldn't check capabilities that we know the source
> > > isn't supposed to send, eg. by having a smc->max_cap ?
> > 
> > Uh.. I'm not really sure what exactly you're suggesting here.
> > 
> 
> I'm suggesting to have a per-machine version smc->max_cap that
> contains the highest supported cap index, to be used instead of
> SPAPR_CAP_NUM in this functions, ie.
> 
> for (i = 0; i <= smc->max_cap; i++) {
>     ...
> }
> 
> where we would have
> 
> smc->max_cap = SPAPR_CAP_CCF_ASSIST for pseries-4.1
> 
> and
> 
> smc->max_cap = SPAPR_CAP_XIVE for psereis-4.2

Oh, I see, a max cap index.  I think that sounds fragile if we ever
deprecate any caps, and it also might be problematic for downstream
where we've sometimes selectively backported caps.

> > I think what we need here is a custom migrate_needed function, like we
> > already have for cap_hpt_maxpagesize, to exclude it from the migration
> > stream for machine versions before 4.2.
> > 
> 
> No, VMState needed() hooks are for outgoing migration only.

Ah, yeah, right.  Essentially the problem is that in the absence of
caps, the new qemu assumes they're in the default state, but if an old
source had ic-mode set, then they effectively aren't.  Or looked at
another way, it's now trying to check that the ends match w.r.t. intc
selection, but doesn't have enough information supplied by old sources
to do so correctly.

Ugh, that's a bit trickier to work around.

> bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
> {
>     if (vmsd->needed && !vmsd->needed(opaque)) {
>         /* optional section not needed */
>         return false;
>     }
>     return true;
> }
Greg Kurz Oct. 14, 2019, 9:15 a.m. UTC | #11
On Sat, 12 Oct 2019 11:00:41 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Oct 11, 2019 at 08:13:33AM +0200, Greg Kurz wrote:
> > On Fri, 11 Oct 2019 16:07:58 +1100
> > David Gibson <david@gibson.dropbear.id.au> wrote:
> > 
> > > On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> > > > On Thu, 10 Oct 2019 08:29:58 +0200
> > > > Greg Kurz <groug@kaod.org> wrote:
> > > > 
> > > > > On Thu, 10 Oct 2019 13:02:09 +1100
> > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > 
> > > > > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > 
> > > > > > > > The only thing remaining in this structure are the flags to allow either
> > > > > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > > > > capabilities - that way they can take advantage of the existing
> > > > > > > > infrastructure to sanity check capability states across migration and so
> > > > > > > > forth.
> > > > > > > > 
> > > > > > > 
> > > > > > > The user can now choose the interrupt controller mode either through
> > > > > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > > > > to expose another API to do the same thing but it raises some questions.
> > > > > > > 
> > > > > > > We should at least document somewhere that ic-mode is an alias to these
> > > > > > > caps, and maybe state which is the preferred method (I personally vote
> > > > > > > for the caps).
> > > > > > > 
> > > > > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > > > > pseries-4.2 ?
> > > > > > 
> > > > > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > > > > suggested way to configure this.  The caps are nice from an internal
> > > > > > organization point of view, but ic-mode is arguably a more user
> > > > > > friendly way of configuring it.  The conversion of one to the other is
> > > > > > straightforward, isolated ans small, so I'm not especially bothered by
> > > > > > keeping it around.
> > > > > > 
> > > > > 
> > > > > Fair enough.
> > > > > 
> > > > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > > > 
> > > > 
> > > > But unfortunately this still requires care :-\
> > > > 
> > > > qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> > > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > > 
> > > > or
> > > > 
> > > > qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> > > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > > 
> > > > when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> > > > 
> > > > This happens because the existing pseries-4.1 machine type doesn't send the
> > > > new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> > > > the source has both caps set:
> > > > 
> > > >     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
> > > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > > >         /* If not default value then assume came in with the migration */
> > > >         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> > > > 
> > > > spapr->mig.caps[SPAPR_CAP_XICS] = 0
> > > > spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> > > > 
> > > >             srccaps.caps[i] = spapr->mig.caps[i];
> > > > 
> > > > srcaps.caps[SPAPR_CAP_XICS] = 1
> > > > srcaps.caps[SPAPR_CAP_XIVE] = 1
> > > > 
> > > >         }
> > > >     }
> > > > 
> > > > and breaks
> > > > 
> > > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > > >         SpaprCapabilityInfo *info = &capability_table[i];
> > > > 
> > > >         if (srccaps.caps[i] > dstcaps.caps[i]) {
> > > > 
> > > > srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> > > > srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> > > > 
> > > >             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
> > > >                          info->name, srccaps.caps[i], dstcaps.caps[i]);
> > > >             ok = false;
> > > >         }
> > > 
> > > Ah.. right.  I thought there would be problems with backwards
> > > migration, but I didn't think of this problem even with forward
> > > migration.
> > > 
> > > > Maybe we shouldn't check capabilities that we know the source
> > > > isn't supposed to send, eg. by having a smc->max_cap ?
> > > 
> > > Uh.. I'm not really sure what exactly you're suggesting here.
> > > 
> > 
> > I'm suggesting to have a per-machine version smc->max_cap that
> > contains the highest supported cap index, to be used instead of
> > SPAPR_CAP_NUM in this functions, ie.
> > 
> > for (i = 0; i <= smc->max_cap; i++) {
> >     ...
> > }
> > 
> > where we would have
> > 
> > smc->max_cap = SPAPR_CAP_CCF_ASSIST for pseries-4.1
> > 
> > and
> > 
> > smc->max_cap = SPAPR_CAP_XIVE for psereis-4.2
> 
> Oh, I see, a max cap index.  I think that sounds fragile if we ever
> deprecate any caps, 

Hmmm... I had the impression that capability numbers would stay
forever, even if at some point we may decide to not support some
of them for newer machine types... Can you elaborate on a
deprecating scenario that would break ?

> and it also might be problematic for downstream
> where we've sometimes selectively backported caps.
> 

Do you mean that capability numbers defined in spapr.h differ
from the ones in upstream QEMU ?

> > > I think what we need here is a custom migrate_needed function, like we
> > > already have for cap_hpt_maxpagesize, to exclude it from the migration
> > > stream for machine versions before 4.2.
> > > 
> > 
> > No, VMState needed() hooks are for outgoing migration only.
> 
> Ah, yeah, right.  Essentially the problem is that in the absence of
> caps, the new qemu assumes they're in the default state, but if an old
> source had ic-mode set, then they effectively aren't.  Or looked at
> another way, it's now trying to check that the ends match w.r.t. intc
> selection, but doesn't have enough information supplied by old sources
> to do so correctly.
> 

Yes, but do we really need to perform strict checks on ic-mode in the first
place ? I mean that migrating the state of XICS and/or XIVE entities _only_
requires the destination to have instantiated them, ie:

SOURCE/DEST | xics | xive | dual
------------+------+------+-------
xics        | ok   | fail | ok (*)
xive        | fail | ok   | ok (*)
dual        | fail | fail | ok

(*) missing migrated state for xics/xive means that the corresponding
    objects will have reset state, like after CAS.

> Ugh, that's a bit trickier to work around.
> 

Maybe have a migrate_needed() hook like this:

static bool cap_xics_xive_migrate_needed(void *opaque)
{
    return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_2_migration;
}

and also use it in spapr_caps_post_migration() ?

> > bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
> > {
> >     if (vmsd->needed && !vmsd->needed(opaque)) {
> >         /* optional section not needed */
> >         return false;
> >     }
> >     return true;
> > }
> 
> 
>
David Gibson Nov. 20, 2019, 5:38 a.m. UTC | #12
On Mon, Oct 14, 2019 at 11:15:16AM +0200, Greg Kurz wrote:
> On Sat, 12 Oct 2019 11:00:41 +1100
> David Gibson <david@gibson.dropbear.id.au> wrote:
> 
> > On Fri, Oct 11, 2019 at 08:13:33AM +0200, Greg Kurz wrote:
> > > On Fri, 11 Oct 2019 16:07:58 +1100
> > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > 
> > > > On Thu, Oct 10, 2019 at 10:33:04PM +0200, Greg Kurz wrote:
> > > > > On Thu, 10 Oct 2019 08:29:58 +0200
> > > > > Greg Kurz <groug@kaod.org> wrote:
> > > > > 
> > > > > > On Thu, 10 Oct 2019 13:02:09 +1100
> > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > 
> > > > > > > On Wed, Oct 09, 2019 at 07:02:15PM +0200, Greg Kurz wrote:
> > > > > > > > On Wed,  9 Oct 2019 17:08:16 +1100
> > > > > > > > David Gibson <david@gibson.dropbear.id.au> wrote:
> > > > > > > > 
> > > > > > > > > The only thing remaining in this structure are the flags to allow either
> > > > > > > > > XICS or XIVE to be present.  These actually make more sense as spapr
> > > > > > > > > capabilities - that way they can take advantage of the existing
> > > > > > > > > infrastructure to sanity check capability states across migration and so
> > > > > > > > > forth.
> > > > > > > > > 
> > > > > > > > 
> > > > > > > > The user can now choose the interrupt controller mode either through
> > > > > > > > ic-mode or through cap-xics/cap-xive. I guess it doesn't break anything
> > > > > > > > to expose another API to do the same thing but it raises some questions.
> > > > > > > > 
> > > > > > > > We should at least document somewhere that ic-mode is an alias to these
> > > > > > > > caps, and maybe state which is the preferred method (I personally vote
> > > > > > > > for the caps).
> > > > > > > > 
> > > > > > > > Also, we must keep ic-mode for the moment to stay compatible with the
> > > > > > > > existing pseries-4.0 and pseries-4.1 machine types, but will we
> > > > > > > > keep ic-mode forever ? If no, maybe start by not allowing it for
> > > > > > > > pseries-4.2 ?
> > > > > > > 
> > > > > > > I'm actually inclined to keep it for now, maybe even leave it as the
> > > > > > > suggested way to configure this.  The caps are nice from an internal
> > > > > > > organization point of view, but ic-mode is arguably a more user
> > > > > > > friendly way of configuring it.  The conversion of one to the other is
> > > > > > > straightforward, isolated ans small, so I'm not especially bothered by
> > > > > > > keeping it around.
> > > > > > > 
> > > > > > 
> > > > > > Fair enough.
> > > > > > 
> > > > > > Reviewed-by: Greg Kurz <groug@kaod.org>
> > > > > > 
> > > > > 
> > > > > But unfortunately this still requires care :-\
> > > > > 
> > > > > qemu-system-ppc64: cap-xive higher level (1) in incoming stream than on destination (0)
> > > > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > > > 
> > > > > or
> > > > > 
> > > > > qemu-system-ppc64: cap-xics higher level (1) in incoming stream than on destination (0)
> > > > > qemu-system-ppc64: error while loading state for instance 0x0 of device 'spapr'
> > > > > qemu-system-ppc64: load of migration failed: Invalid argument
> > > > > 
> > > > > when migrating from QEMU 4.1 with ic-mode=xics and ic-mode=xive respectively.
> > > > > 
> > > > > This happens because the existing pseries-4.1 machine type doesn't send the
> > > > > new caps and the logic in spapr_caps_post_migration() wrongly assumes that
> > > > > the source has both caps set:
> > > > > 
> > > > >     srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
> > > > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > > > >         /* If not default value then assume came in with the migration */
> > > > >         if (spapr->mig.caps[i] != spapr->def.caps[i]) {
> > > > > 
> > > > > spapr->mig.caps[SPAPR_CAP_XICS] = 0
> > > > > spapr->mig.caps[SPAPR_CAP_XIVE] = 0
> > > > > 
> > > > >             srccaps.caps[i] = spapr->mig.caps[i];
> > > > > 
> > > > > srcaps.caps[SPAPR_CAP_XICS] = 1
> > > > > srcaps.caps[SPAPR_CAP_XIVE] = 1
> > > > > 
> > > > >         }
> > > > >     }
> > > > > 
> > > > > and breaks
> > > > > 
> > > > >     for (i = 0; i < SPAPR_CAP_NUM; i++) {
> > > > >         SpaprCapabilityInfo *info = &capability_table[i];
> > > > > 
> > > > >         if (srccaps.caps[i] > dstcaps.caps[i]) {
> > > > > 
> > > > > srcaps.caps[SPAPR_CAP_XICS] = 0 when ic-mode=xive
> > > > > srcaps.caps[SPAPR_CAP_XIVE] = 0 when ic-mode=xics
> > > > > 
> > > > >             error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
> > > > >                          info->name, srccaps.caps[i], dstcaps.caps[i]);
> > > > >             ok = false;
> > > > >         }
> > > > 
> > > > Ah.. right.  I thought there would be problems with backwards
> > > > migration, but I didn't think of this problem even with forward
> > > > migration.
> > > > 
> > > > > Maybe we shouldn't check capabilities that we know the source
> > > > > isn't supposed to send, eg. by having a smc->max_cap ?
> > > > 
> > > > Uh.. I'm not really sure what exactly you're suggesting here.
> > > > 
> > > 
> > > I'm suggesting to have a per-machine version smc->max_cap that
> > > contains the highest supported cap index, to be used instead of
> > > SPAPR_CAP_NUM in this functions, ie.
> > > 
> > > for (i = 0; i <= smc->max_cap; i++) {
> > >     ...
> > > }
> > > 
> > > where we would have
> > > 
> > > smc->max_cap = SPAPR_CAP_CCF_ASSIST for pseries-4.1
> > > 
> > > and
> > > 
> > > smc->max_cap = SPAPR_CAP_XIVE for psereis-4.2
> > 
> > Oh, I see, a max cap index.  I think that sounds fragile if we ever
> > deprecate any caps, 
> 
> Hmmm... I had the impression that capability numbers would stay
> forever, even if at some point we may decide to not support some
> of them for newer machine types... Can you elaborate on a
> deprecating scenario that would break ?

Uhh... good point, I don't think that could break it.  Even if we
deprecated a capability we could still retain enough awareness of the
old number to sanity check this.

> > and it also might be problematic for downstream
> > where we've sometimes selectively backported caps.
> 
> Do you mean that capability numbers defined in spapr.h differ
> from the ones in upstream QEMU ?

No, they don't but that's actually the problem.  The point is that we
might backport some later caps without necessarily backporting all the
earlier ones - that means that the "max cap index" no longer implies
that all the lower indexed caps are present.

> 
> > > > I think what we need here is a custom migrate_needed function, like we
> > > > already have for cap_hpt_maxpagesize, to exclude it from the migration
> > > > stream for machine versions before 4.2.
> > > > 
> > > 
> > > No, VMState needed() hooks are for outgoing migration only.
> > 
> > Ah, yeah, right.  Essentially the problem is that in the absence of
> > caps, the new qemu assumes they're in the default state, but if an old
> > source had ic-mode set, then they effectively aren't.  Or looked at
> > another way, it's now trying to check that the ends match w.r.t. intc
> > selection, but doesn't have enough information supplied by old sources
> > to do so correctly.
> 
> Yes, but do we really need to perform strict checks on ic-mode in the first
> place ? I mean that migrating the state of XICS and/or XIVE entities _only_
> requires the destination to have instantiated them, ie:
> 
> SOURCE/DEST | xics | xive | dual
> ------------+------+------+-------
> xics        | ok   | fail | ok (*)
> xive        | fail | ok   | ok (*)
> dual        | fail | fail | ok
> 
> (*) missing migrated state for xics/xive means that the corresponding
>     objects will have reset state, like after CAS.

Yes... I don't really see where you're goig with that thought.

> > Ugh, that's a bit trickier to work around.
> > 
> 
> Maybe have a migrate_needed() hook like this:
> 
> static bool cap_xics_xive_migrate_needed(void *opaque)
> {
>     return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_2_migration;
> }
> 
> and also use it in spapr_caps_post_migration() ?

Yeah, maybe.  I think we have a hack like this for one of the other
caps already.
Greg Kurz Nov. 20, 2019, 8:36 a.m. UTC | #13
On Wed, 20 Nov 2019 16:38:37 +1100
David Gibson <david@gibson.dropbear.id.au> wrote:

[...]
> > 
> > Hmmm... I had the impression that capability numbers would stay
> > forever, even if at some point we may decide to not support some
> > of them for newer machine types... Can you elaborate on a
> > deprecating scenario that would break ?
> 
> Uhh... good point, I don't think that could break it.  Even if we
> deprecated a capability we could still retain enough awareness of the
> old number to sanity check this.
> 
> > > and it also might be problematic for downstream
> > > where we've sometimes selectively backported caps.
> > 
> > Do you mean that capability numbers defined in spapr.h differ
> > from the ones in upstream QEMU ?
> 
> No, they don't but that's actually the problem.  The point is that we
> might backport some later caps without necessarily backporting all the
> earlier ones - that means that the "max cap index" no longer implies
> that all the lower indexed caps are present.
> 

The idea with "max cap index" isn't that all the lower indexed caps are
present but rather higher indexed caps are absent. Maybe rename it to
something like "ignore higher cap index" or any better naming you can
think of ?

> > 
> > > > > I think what we need here is a custom migrate_needed function, like we
> > > > > already have for cap_hpt_maxpagesize, to exclude it from the migration
> > > > > stream for machine versions before 4.2.
> > > > > 
> > > > 
> > > > No, VMState needed() hooks are for outgoing migration only.
> > > 
> > > Ah, yeah, right.  Essentially the problem is that in the absence of
> > > caps, the new qemu assumes they're in the default state, but if an old
> > > source had ic-mode set, then they effectively aren't.  Or looked at
> > > another way, it's now trying to check that the ends match w.r.t. intc
> > > selection, but doesn't have enough information supplied by old sources
> > > to do so correctly.
> > 
> > Yes, but do we really need to perform strict checks on ic-mode in the first
> > place ? I mean that migrating the state of XICS and/or XIVE entities _only_
> > requires the destination to have instantiated them, ie:
> > 
> > SOURCE/DEST | xics | xive | dual
> > ------------+------+------+-------
> > xics        | ok   | fail | ok (*)
> > xive        | fail | ok   | ok (*)
> > dual        | fail | fail | ok
> > 
> > (*) missing migrated state for xics/xive means that the corresponding
> >     objects will have reset state, like after CAS.
> 
> Yes... I don't really see where you're goig with that thought.
> 

I mean that if we didn't check the XICS and XIVE capabilities, we
would still fail migration when it is really needed, ie. migrating
from ic-mode=xics to ic-mode=xive or the other way round. This
would it make it possible to migrate anything to ic-mode=dual though
but I don't think this is a problem since it doesn't break anything.

> > > Ugh, that's a bit trickier to work around.
> > > 
> > 
> > Maybe have a migrate_needed() hook like this:
> > 
> > static bool cap_xics_xive_migrate_needed(void *opaque)
> > {
> >     return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_2_migration;
> > }
> > 
> > and also use it in spapr_caps_post_migration() ?
> 
> Yeah, maybe.  I think we have a hack like this for one of the other
> caps already.
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e1ff03152e..bf9fdb1693 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1072,12 +1072,13 @@  static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
         26, 0x40, /* Radix options: GTSE == yes. */
     };
 
-    if (spapr->irq->xics && spapr->irq->xive) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
+        && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         val[1] = SPAPR_OV5_XIVE_BOTH;
-    } else if (spapr->irq->xive) {
+    } else if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         val[1] = SPAPR_OV5_XIVE_EXPLOIT;
     } else {
-        assert(spapr->irq->xics);
+        assert(spapr_get_cap(spapr, SPAPR_CAP_XICS));
         val[1] = SPAPR_OV5_XIVE_LEGACY;
     }
 
@@ -2075,6 +2076,8 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_dtb,
         &vmstate_spapr_cap_large_decr,
         &vmstate_spapr_cap_ccf_assist,
+        &vmstate_spapr_cap_xics,
+        &vmstate_spapr_cap_xive,
         NULL
     }
 };
@@ -2775,7 +2778,7 @@  static void spapr_machine_init(MachineState *machine)
     spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
 
     /* advertise XIVE on POWER9 machines */
-    if (spapr->irq->xive) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
     }
 
@@ -3242,14 +3245,18 @@  static void spapr_set_vsmt(Object *obj, Visitor *v, const char *name,
 static char *spapr_get_ic_mode(Object *obj, Error **errp)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 
-    if (spapr->irq == &spapr_irq_xics_legacy) {
+    if (smc->legacy_irq_allocation) {
         return g_strdup("legacy");
-    } else if (spapr->irq == &spapr_irq_xics) {
+    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
+               && !spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         return g_strdup("xics");
-    } else if (spapr->irq == &spapr_irq_xive) {
+    } else if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)
+               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         return g_strdup("xive");
-    } else if (spapr->irq == &spapr_irq_dual) {
+    } else if (spapr_get_cap(spapr, SPAPR_CAP_XICS)
+               && spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         return g_strdup("dual");
     }
     g_assert_not_reached();
@@ -3266,11 +3273,14 @@  static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
 
     /* The legacy IRQ backend can not be set */
     if (strcmp(value, "xics") == 0) {
-        spapr->irq = &spapr_irq_xics;
+        object_property_set_bool(obj, true, "cap-xics", errp);
+        object_property_set_bool(obj, false, "cap-xive", errp);
     } else if (strcmp(value, "xive") == 0) {
-        spapr->irq = &spapr_irq_xive;
+        object_property_set_bool(obj, false, "cap-xics", errp);
+        object_property_set_bool(obj, true, "cap-xive", errp);
     } else if (strcmp(value, "dual") == 0) {
-        spapr->irq = &spapr_irq_dual;
+        object_property_set_bool(obj, true, "cap-xics", errp);
+        object_property_set_bool(obj, true, "cap-xive", errp);
     } else {
         error_setg(errp, "Bad value for \"ic-mode\" property");
     }
@@ -3309,7 +3319,6 @@  static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
 static void spapr_instance_init(Object *obj)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(obj);
-    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
 
     spapr->htab_fd = -1;
     spapr->use_hotplug_event_source = true;
@@ -3345,7 +3354,6 @@  static void spapr_instance_init(Object *obj)
                              spapr_get_msix_emulation, NULL, NULL);
 
     /* The machine class defines the default interrupt controller mode */
-    spapr->irq = smc->irq;
     object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
                             spapr_set_ic_mode, NULL);
     object_property_set_description(obj, "ic-mode",
@@ -4439,8 +4447,9 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_XICS] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_ON;
     spapr_caps_add_properties(smc, &error_abort);
-    smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
     smc->linux_pci_probe = true;
     smc->nr_xirqs = SPAPR_NR_XIRQS;
@@ -4539,7 +4548,7 @@  static void spapr_machine_4_0_class_options(MachineClass *mc)
     spapr_machine_4_1_class_options(mc);
     compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
     smc->phb_placement = phb_placement_4_0;
-    smc->irq = &spapr_irq_xics;
+    smc->default_caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
     smc->pre_4_1_migration = true;
 }
 
@@ -4580,7 +4589,6 @@  static void spapr_machine_3_0_class_options(MachineClass *mc)
 
     smc->legacy_irq_allocation = true;
     smc->nr_xirqs = 0x400;
-    smc->irq = &spapr_irq_xics_legacy;
 }
 
 DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..e06fd386f6 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,42 @@  static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
     }
 }
 
+static void cap_xics_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+    if (!val) {
+        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
+            error_setg(errp,
+"No interrupt controllers enabled, try cap-xics=on or cap-xive=on");
+            return;
+        }
+
+        if (smc->legacy_irq_allocation) {
+            error_setg(errp, "This machine version requires XICS support");
+            return;
+        }
+    }
+}
+
+static void cap_xive_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+    if (val) {
+        if (smc->legacy_irq_allocation) {
+            error_setg(errp, "This machine version cannot support XIVE");
+            return;
+        }
+        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+            error_setg(errp, "XIVE requires POWER9 CPU");
+            return;
+        }
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -595,6 +631,24 @@  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_ccf_assist_apply,
     },
+    [SPAPR_CAP_XICS] = {
+        .name = "xics",
+        .description = "Allow XICS interrupt controller",
+        .index = SPAPR_CAP_XICS,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_xics_apply,
+    },
+    [SPAPR_CAP_XIVE] = {
+        .name = "xive",
+        .description = "Allow XIVE interrupt controller",
+        .index = SPAPR_CAP_XIVE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_xive_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -641,6 +695,14 @@  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
         caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
     }
 
+    /*
+     * POWER8 machines don't have XIVE
+     */
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_XIVE] = SPAPR_CAP_OFF;
+    }
+
     return caps;
 }
 
@@ -734,6 +796,8 @@  SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(xics, SPAPR_CAP_XICS);
+SPAPR_CAP_MIG_STATE(xive, SPAPR_CAP_XIVE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 140f05c1c6..cb4c6edf63 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1784,13 +1784,13 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
      * terminate the boot.
      */
     if (guest_xive) {
-        if (!spapr->irq->xive) {
+        if (!spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
             error_report(
 "Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
             exit(EXIT_FAILURE);
         }
     } else {
-        if (!spapr->irq->xics) {
+        if (!spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
             error_report(
 "Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
             exit(EXIT_FAILURE);
@@ -1804,7 +1804,8 @@  static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
      */
     if (!spapr->cas_reboot) {
         spapr->cas_reboot = spapr_ovec_test(ov5_updates, OV5_XIVE_EXPLOIT)
-            && spapr->irq->xics && spapr->irq->xive;
+            && spapr_get_cap(spapr, SPAPR_CAP_XICS)
+            && spapr_get_cap(spapr, SPAPR_CAP_XIVE);
     }
 
     spapr_ovec_cleanup(ov5_updates);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
index 2768f9a765..473fc8780a 100644
--- a/hw/ppc/spapr_irq.c
+++ b/hw/ppc/spapr_irq.c
@@ -101,90 +101,19 @@  int spapr_irq_init_kvm(int (*fn)(SpaprInterruptController *, Error **),
     return 0;
 }
 
-/*
- * XICS IRQ backend.
- */
-
-SpaprIrq spapr_irq_xics = {
-    .xics        = true,
-    .xive        = false,
-};
-
-/*
- * XIVE IRQ backend.
- */
-
-SpaprIrq spapr_irq_xive = {
-    .xics        = false,
-    .xive        = true,
-};
-
-/*
- * Dual XIVE and XICS IRQ backend.
- *
- * Both interrupt mode, XIVE and XICS, objects are created but the
- * machine starts in legacy interrupt mode (XICS). It can be changed
- * by the CAS negotiation process and, in that case, the new mode is
- * activated after an extra machine reset.
- */
-
-/*
- * Define values in sync with the XIVE and XICS backend
- */
-SpaprIrq spapr_irq_dual = {
-    .xics        = true,
-    .xive        = true,
-};
-
-
 static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
 {
     MachineState *machine = MACHINE(spapr);
 
-    /*
-     * Sanity checks on non-P9 machines. On these, XIVE is not
-     * advertised, see spapr_dt_ov5_platform_support()
-     */
-    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
-                               0, spapr->max_compat_pvr)) {
-        /*
-         * If the 'dual' interrupt mode is selected, force XICS as CAS
-         * negotiation is useless.
-         */
-        if (spapr->irq == &spapr_irq_dual) {
-            spapr->irq = &spapr_irq_xics;
-            return 0;
-        }
-
-        /*
-         * Non-P9 machines using only XIVE is a bogus setup. We have two
-         * scenarios to take into account because of the compat mode:
-         *
-         * 1. POWER7/8 machines should fail to init later on when creating
-         *    the XIVE interrupt presenters because a POWER9 exception
-         *    model is required.
-
-         * 2. POWER9 machines using the POWER8 compat mode won't fail and
-         *    will let the OS boot with a partial XIVE setup : DT
-         *    properties but no hcalls.
-         *
-         * To cover both and not confuse the OS, add an early failure in
-         * QEMU.
-         */
-        if (spapr->irq == &spapr_irq_xive) {
-            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
-            return -1;
-        }
-    }
-
     /*
      * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
      * re-created. Detect that early to avoid QEMU to exit later when the
      * guest reboots.
      */
     if (kvm_enabled() &&
-        spapr->irq == &spapr_irq_dual &&
         machine_kernel_irqchip_required(machine) &&
+        spapr_get_cap(spapr, SPAPR_CAP_XICS) &&
+        spapr_get_cap(spapr, SPAPR_CAP_XIVE) &&
         xics_kvm_has_broken_disconnect(spapr)) {
         error_setg(errp, "KVM is too old to support ic-mode=dual,kernel-irqchip=on");
         return -1;
@@ -280,7 +209,7 @@  void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
     /* Initialize the MSI IRQ allocator. */
     spapr_irq_msi_init(spapr);
 
-    if (spapr->irq->xics) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_XICS)) {
         Error *local_err = NULL;
         Object *obj;
 
@@ -313,7 +242,7 @@  void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
         spapr->ics = ICS_SPAPR(obj);
     }
 
-    if (spapr->irq->xive) {
+    if (spapr_get_cap(spapr, SPAPR_CAP_XIVE)) {
         uint32_t nr_servers = spapr_max_server_number(spapr);
         DeviceState *dev;
         int i;
@@ -558,11 +487,6 @@  int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
     return first + ics->offset;
 }
 
-SpaprIrq spapr_irq_xics_legacy = {
-    .xics        = true,
-    .xive        = false,
-};
-
 static void spapr_irq_register_types(void)
 {
     type_register_static(&spapr_intc_info);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 623e8e3f93..d3b4dd7de3 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,12 @@  typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER     0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST            0x09
+/* XICS interrupt controller */
+#define SPAPR_CAP_XICS                  0x0a
+/* XIVE interrupt controller */
+#define SPAPR_CAP_XIVE                  0x0b
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_XIVE + 1)
 
 /*
  * Capability Values
@@ -131,7 +135,6 @@  struct SpaprMachineClass {
                           hwaddr *nv2atsd, Error **errp);
     SpaprResizeHpt resize_hpt_default;
     SpaprCapabilities default_caps;
-    SpaprIrq *irq;
 };
 
 /**
@@ -195,7 +198,6 @@  struct SpaprMachineState {
 
     int32_t irq_map_nr;
     unsigned long *irq_map;
-    SpaprIrq *irq;
     qemu_irq *qirqs;
     SpaprInterruptController *active_intc;
     ICSState *ics;
@@ -870,6 +872,8 @@  extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_xics;
+extern const VMStateDescription vmstate_spapr_cap_xive;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/include/hw/ppc/spapr_irq.h b/include/hw/ppc/spapr_irq.h
index 5e150a6679..71aee13743 100644
--- a/include/hw/ppc/spapr_irq.h
+++ b/include/hw/ppc/spapr_irq.h
@@ -77,16 +77,6 @@  int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
                         Error **errp);
 void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num);
 
-typedef struct SpaprIrq {
-    bool        xics;
-    bool        xive;
-} SpaprIrq;
-
-extern SpaprIrq spapr_irq_xics;
-extern SpaprIrq spapr_irq_xics_legacy;
-extern SpaprIrq spapr_irq_xive;
-extern SpaprIrq spapr_irq_dual;
-
 void spapr_irq_init(SpaprMachineState *spapr, Error **errp);
 int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp);
 void spapr_irq_free(SpaprMachineState *spapr, int irq, int num);