Message ID | 20201120174646.619395-3-groug@kaod.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | spapr: Address the confusion between IPI numbers and vCPU ids | expand |
On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote: > We're going to kill the "nr_ends" field in a subsequent patch. > Prepare ground by using an helper instead of peeking into > the sPAPR XIVE structure directly. > > Signed-off-by: Greg Kurz <groug@kaod.org> Applied to ppc-for-6.0, thanks. > --- > include/hw/ppc/spapr_xive.h | 1 + > hw/intc/spapr_xive.c | 23 ++++++++++++++--------- > hw/intc/spapr_xive_kvm.c | 4 ++-- > 3 files changed, 17 insertions(+), 11 deletions(-) > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > index 26c8d90d7196..4b967f13c030 100644 > --- a/include/hw/ppc/spapr_xive.h > +++ b/include/hw/ppc/spapr_xive.h > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); > > int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > uint32_t *out_server, uint8_t *out_prio); > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); > > /* > * KVM XIVE device helpers > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index 60e0d5769dcc..f473ad9cba47 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) > uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > XiveEND *end; > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > if (xive_end_is_valid(end)) { > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) > } > > /* Clear all ENDs */ > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > spapr_xive_end_reset(&xive->endt[i]); > } > } > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) > xive->fd = -1; > } > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) > +{ > + return xive->nr_ends; > +} > + > static void spapr_xive_realize(DeviceState *dev, Error **errp) > { > SpaprXive *xive = SPAPR_XIVE(dev); > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > * Allocate the routing tables > */ > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > - xive->endt = g_new0(XiveEND, xive->nr_ends); > + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); > > xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, > xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, > { > SpaprXive *xive = SPAPR_XIVE(xrtr); > > - if (end_idx >= xive->nr_ends) { > + if (end_idx >= spapr_xive_nr_ends(xive)) { > return -1; > } > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, > { > SpaprXive *xive = SPAPR_XIVE(xrtr); > > - if (end_idx >= xive->nr_ends) { > + if (end_idx >= spapr_xive_nr_ends(xive)) { > return -1; > } > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, > /* EAS_END_BLOCK is unused on sPAPR */ > end_idx = xive_get_field64(EAS_END_INDEX, eas.w); > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); > > switch (qsize) { > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > args[0] = 0; > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 66bf4c06fe55..1566016f0e28 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) > int i; > int ret; > > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > if (!xive_end_is_valid(&xive->endt[i])) { > continue; > } > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > assert(xive->fd != -1); > > /* Restore the ENDT first. The targetting depends on it. */ > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > if (!xive_end_is_valid(&xive->endt[i])) { > continue; > }
On 11/20/20 6:46 PM, Greg Kurz wrote: > We're going to kill the "nr_ends" field in a subsequent patch. why ? it is one of the tables of the controller and its part of the main XIVE concepts. Conceptually, we could let the machine dimension it with an arbitrary value as OPAL does. The controller would fail when the table is fully used. > Prepare ground by using an helper instead of peeking into > the sPAPR XIVE structure directly. I am not against the helper though but we should introduce a prio_shift value which would let us define the number of available priorities. To be linked with "hv-prio" C. > Signed-off-by: Greg Kurz <groug@kaod.org> > --- > include/hw/ppc/spapr_xive.h | 1 + > hw/intc/spapr_xive.c | 23 ++++++++++++++--------- > hw/intc/spapr_xive_kvm.c | 4 ++-- > 3 files changed, 17 insertions(+), 11 deletions(-) > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > index 26c8d90d7196..4b967f13c030 100644 > --- a/include/hw/ppc/spapr_xive.h > +++ b/include/hw/ppc/spapr_xive.h > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); > > int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > uint32_t *out_server, uint8_t *out_prio); > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); > > /* > * KVM XIVE device helpers > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > index 60e0d5769dcc..f473ad9cba47 100644 > --- a/hw/intc/spapr_xive.c > +++ b/hw/intc/spapr_xive.c > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) > uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > XiveEND *end; > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > if (xive_end_is_valid(end)) { > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) > } > > /* Clear all ENDs */ > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > spapr_xive_end_reset(&xive->endt[i]); > } > } > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) > xive->fd = -1; > } > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) > +{ > + return xive->nr_ends; > +} > + > static void spapr_xive_realize(DeviceState *dev, Error **errp) > { > SpaprXive *xive = SPAPR_XIVE(dev); > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > * Allocate the routing tables > */ > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > - xive->endt = g_new0(XiveEND, xive->nr_ends); > + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); > > xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, > xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, > { > SpaprXive *xive = SPAPR_XIVE(xrtr); > > - if (end_idx >= xive->nr_ends) { > + if (end_idx >= spapr_xive_nr_ends(xive)) { > return -1; > } > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, > { > SpaprXive *xive = SPAPR_XIVE(xrtr); > > - if (end_idx >= xive->nr_ends) { > + if (end_idx >= spapr_xive_nr_ends(xive)) { > return -1; > } > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, > /* EAS_END_BLOCK is unused on sPAPR */ > end_idx = xive_get_field64(EAS_END_INDEX, eas.w); > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); > > switch (qsize) { > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > return H_P2; > } > > - assert(end_idx < xive->nr_ends); > + assert(end_idx < spapr_xive_nr_ends(xive)); > end = &xive->endt[end_idx]; > > args[0] = 0; > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > index 66bf4c06fe55..1566016f0e28 100644 > --- a/hw/intc/spapr_xive_kvm.c > +++ b/hw/intc/spapr_xive_kvm.c > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) > int i; > int ret; > > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > if (!xive_end_is_valid(&xive->endt[i])) { > continue; > } > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > assert(xive->fd != -1); > > /* Restore the ENDT first. The targetting depends on it. */ > - for (i = 0; i < xive->nr_ends; i++) { > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > if (!xive_end_is_valid(&xive->endt[i])) { > continue; > } >
On Mon, 23 Nov 2020 10:46:38 +0100 Cédric Le Goater <clg@kaod.org> wrote: > On 11/20/20 6:46 PM, Greg Kurz wrote: > > We're going to kill the "nr_ends" field in a subsequent patch. > > why ? it is one of the tables of the controller and its part of > the main XIVE concepts. Conceptually, we could let the machine > dimension it with an arbitrary value as OPAL does. The controller > would fail when the table is fully used. > The idea is that the sPAPR machine only true need is to create a controller that can accommodate up to a certain number of vCPU ids. It doesn't really to know about the END itself IMHO. This being said, if we decide to pass both spapr_max_server_number() and smp.max_cpus down to the backends as function arguments, we won't have to change "nr_ends" at all. > > > Prepare ground by using an helper instead of peeking into > > the sPAPR XIVE structure directly. > > > I am not against the helper though but we should introduce a > prio_shift value which would let us define the number of > available priorities. To be linked with "hv-prio" > > C. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > --- > > include/hw/ppc/spapr_xive.h | 1 + > > hw/intc/spapr_xive.c | 23 ++++++++++++++--------- > > hw/intc/spapr_xive_kvm.c | 4 ++-- > > 3 files changed, 17 insertions(+), 11 deletions(-) > > > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > > index 26c8d90d7196..4b967f13c030 100644 > > --- a/include/hw/ppc/spapr_xive.h > > +++ b/include/hw/ppc/spapr_xive.h > > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); > > > > int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > > uint32_t *out_server, uint8_t *out_prio); > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); > > > > /* > > * KVM XIVE device helpers > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > > index 60e0d5769dcc..f473ad9cba47 100644 > > --- a/hw/intc/spapr_xive.c > > +++ b/hw/intc/spapr_xive.c > > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) > > uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > > XiveEND *end; > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > if (xive_end_is_valid(end)) { > > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) > > } > > > > /* Clear all ENDs */ > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > spapr_xive_end_reset(&xive->endt[i]); > > } > > } > > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) > > xive->fd = -1; > > } > > > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) > > +{ > > + return xive->nr_ends; > > +} > > + > > static void spapr_xive_realize(DeviceState *dev, Error **errp) > > { > > SpaprXive *xive = SPAPR_XIVE(dev); > > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > > * Allocate the routing tables > > */ > > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > > - xive->endt = g_new0(XiveEND, xive->nr_ends); > > + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); > > > > xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, > > xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); > > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, > > { > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > - if (end_idx >= xive->nr_ends) { > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > return -1; > > } > > > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, > > { > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > - if (end_idx >= xive->nr_ends) { > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > return -1; > > } > > > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, > > /* EAS_END_BLOCK is unused on sPAPR */ > > end_idx = xive_get_field64(EAS_END_INDEX, eas.w); > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); > > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; > > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); > > > > switch (qsize) { > > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > args[0] = 0; > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > > index 66bf4c06fe55..1566016f0e28 100644 > > --- a/hw/intc/spapr_xive_kvm.c > > +++ b/hw/intc/spapr_xive_kvm.c > > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) > > int i; > > int ret; > > > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > if (!xive_end_is_valid(&xive->endt[i])) { > > continue; > > } > > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > > assert(xive->fd != -1); > > > > /* Restore the ENDT first. The targetting depends on it. */ > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > if (!xive_end_is_valid(&xive->endt[i])) { > > continue; > > } > > >
On 11/23/20 12:16 PM, Greg Kurz wrote: > On Mon, 23 Nov 2020 10:46:38 +0100 > Cédric Le Goater <clg@kaod.org> wrote: > >> On 11/20/20 6:46 PM, Greg Kurz wrote: >>> We're going to kill the "nr_ends" field in a subsequent patch. >> >> why ? it is one of the tables of the controller and its part of >> the main XIVE concepts. Conceptually, we could let the machine >> dimension it with an arbitrary value as OPAL does. The controller >> would fail when the table is fully used. >> > > The idea is that the sPAPR machine only true need is to create a > controller that can accommodate up to a certain number of vCPU ids. > It doesn't really to know about the END itself IMHO.> > This being said, if we decide to pass both spapr_max_server_number() > and smp.max_cpus down to the backends as function arguments, we won't > have to change "nr_ends" at all. I would prefer that but I am still not sure what they represent. Looking at the sPAPR XIVE code, we deal with numbers/ranges in the following places today. * spapr_xive_dt() It defines a range of interrupt numbers to be used by the guest for the threads/vCPUs IPIs. It's a subset of interrupt numbers in : [ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [ These are not vCPU ids. Since these interrupt numbers will be considered as free to use by the OS, it makes sense to pre-claim them. But claiming an interrupt number in the guest can potentially set up, through the KVM device, a mapping on the host and in HW. See below why this can be a problem. * kvmppc_xive_cpu_connect() This sizes the NVT tables in OPAL for the guest. This is the max number of vCPUs of the guest (not vCPU ids) * spapr_irq_init() This is where the IPI interrupt numbers are claimed today. Directly in QEMU and KVM, if the machine is running XIVE only, indirectly if it's dual, first in QEMU and then in KVM when the machine switches of interrupt mode in CAS. The problem is that the underlying XIVE resources in HW are allocated where the QEMU process is running. Which is not the best option when the vCPUs are pinned on different chips. My patchset was trying to improve that by claiming the IPI on demand when the vCPU is connected to the KVM device. But it was using the vCPU id as the IPI interrupt number which is utterly wrong, the guest OS could use any number in the range exposed in the DT. The last patch you sent was going in the right direction I think. That is to claim the IPI when the guest OS is requesting for it. http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/ But I don't understand why it was so complex. It should be like the MSIs claimed by PCI devices. All this to say, that we need to size better the range in the "ibm,xive-lisn-ranges" property if that's broken for vSMT. Then, I think the IPIs can be treated just like the PCI MSIs but they need to be claimed first. That's the ugly part. Should we add a special check in h_int_set_source_config to deal with unclaimed IPIs that are being configured ? C.
On Tue, 24 Nov 2020 14:54:38 +0100 Cédric Le Goater <clg@kaod.org> wrote: > On 11/23/20 12:16 PM, Greg Kurz wrote: > > On Mon, 23 Nov 2020 10:46:38 +0100 > > Cédric Le Goater <clg@kaod.org> wrote: > > > >> On 11/20/20 6:46 PM, Greg Kurz wrote: > >>> We're going to kill the "nr_ends" field in a subsequent patch. > >> > >> why ? it is one of the tables of the controller and its part of > >> the main XIVE concepts. Conceptually, we could let the machine > >> dimension it with an arbitrary value as OPAL does. The controller > >> would fail when the table is fully used. > >> > > > > The idea is that the sPAPR machine only true need is to create a > > controller that can accommodate up to a certain number of vCPU ids. > > It doesn't really to know about the END itself IMHO.> > > This being said, if we decide to pass both spapr_max_server_number() > > and smp.max_cpus down to the backends as function arguments, we won't > > have to change "nr_ends" at all. > > I would prefer that but I am still not sure what they represent. > > Looking at the sPAPR XIVE code, we deal with numbers/ranges in the > following places today. > > * spapr_xive_dt() > > It defines a range of interrupt numbers to be used by the guest > for the threads/vCPUs IPIs. It's a subset of interrupt numbers > in : > > [ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [ > > These are not vCPU ids. > > Since these interrupt numbers will be considered as free to use > by the OS, it makes sense to pre-claim them. But claiming an > interrupt number in the guest can potentially set up, through > the KVM device, a mapping on the host and in HW. See below why > this can be a problem. > > * kvmppc_xive_cpu_connect() > > This sizes the NVT tables in OPAL for the guest. This is the > max number of vCPUs of the guest (not vCPU ids) > I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in kvmppc_xive_connect() actually. We're currently passing spapr_max_server_number() (vCPU id) but you might be right. I need to re-read the story around VSMT and XIVE. commit 1e175d2e07c71d9574f5b1c74523abca54e2654f Author: Sam Bobroff <sam.bobroff@au1.ibm.com> Date: Wed Jul 25 16:12:02 2018 +1000 KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space > * spapr_irq_init() > > This is where the IPI interrupt numbers are claimed today. > Directly in QEMU and KVM, if the machine is running XIVE only, > indirectly if it's dual, first in QEMU and then in KVM when > the machine switches of interrupt mode in CAS. > > The problem is that the underlying XIVE resources in HW are > allocated where the QEMU process is running. Which is not the > best option when the vCPUs are pinned on different chips. > > My patchset was trying to improve that by claiming the IPI on > demand when the vCPU is connected to the KVM device. But it > was using the vCPU id as the IPI interrupt number which is > utterly wrong, the guest OS could use any number in the range > exposed in the DT. > > The last patch you sent was going in the right direction I think. > That is to claim the IPI when the guest OS is requesting for it. > > http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/ > > But I don't understand why it was so complex. It should be like > the MSIs claimed by PCI devices. > The difference here is that the guest doesn't claim IPIs. They are supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually the case in QEMU. The IPI setup sequence in the guest is basically: 1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges" 2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data() 3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq()) If we want an IPI to be claimed by the appropriate vCPU, we can only do this from under H_INT_SET_SOURCE_CONFIG. And until the guest eventually configures the IPI, KVM and QEMU are out of sync. This complexifies migration because we have to guess at post load if we should claim the IPI in KVM or not. The simple presence of the vCPU isn't enough : we need to guess if the guest actually configured the IPI or not. > > All this to say, that we need to size better the range in the > "ibm,xive-lisn-ranges" property if that's broken for vSMT. > Sizing the range to smp.max_cpus as proposed in this series is fine, no matter what the VSMT is. > Then, I think the IPIs can be treated just like the PCI MSIs > but they need to be claimed first. That's the ugly part. > Yeah that's the big difference. For PCI MSIs, QEMU owns the bitmap and the guest can claim (or release) a number of MSIs the "ibm,change-msi" RTAS interface. There's no such thing for IPIs : they are supposedly already claimed. > Should we add a special check in h_int_set_source_config to > deal with unclaimed IPIs that are being configured ? > This is what my tentative fix does. > > C.
On 11/24/20 6:01 PM, Greg Kurz wrote: > On Tue, 24 Nov 2020 14:54:38 +0100 > Cédric Le Goater <clg@kaod.org> wrote: > >> On 11/23/20 12:16 PM, Greg Kurz wrote: >>> On Mon, 23 Nov 2020 10:46:38 +0100 >>> Cédric Le Goater <clg@kaod.org> wrote: >>> >>>> On 11/20/20 6:46 PM, Greg Kurz wrote: >>>>> We're going to kill the "nr_ends" field in a subsequent patch. >>>> >>>> why ? it is one of the tables of the controller and its part of >>>> the main XIVE concepts. Conceptually, we could let the machine >>>> dimension it with an arbitrary value as OPAL does. The controller >>>> would fail when the table is fully used. >>>> >>> >>> The idea is that the sPAPR machine only true need is to create a >>> controller that can accommodate up to a certain number of vCPU ids. >>> It doesn't really to know about the END itself IMHO.> >>> This being said, if we decide to pass both spapr_max_server_number() >>> and smp.max_cpus down to the backends as function arguments, we won't >>> have to change "nr_ends" at all. >> >> I would prefer that but I am still not sure what they represent. >> >> Looking at the sPAPR XIVE code, we deal with numbers/ranges in the >> following places today. >> >> * spapr_xive_dt() >> >> It defines a range of interrupt numbers to be used by the guest >> for the threads/vCPUs IPIs. It's a subset of interrupt numbers >> in : >> >> [ SPAPR_IRQ_IPI - SPAPR_IRQ_EPOW [ >> >> These are not vCPU ids. >> >> Since these interrupt numbers will be considered as free to use >> by the OS, it makes sense to pre-claim them. But claiming an >> interrupt number in the guest can potentially set up, through >> the KVM device, a mapping on the host and in HW. See below why >> this can be a problem. >> >> * kvmppc_xive_cpu_connect() >> >> This sizes the NVT tables in OPAL for the guest. This is the >> max number of vCPUs of the guest (not vCPU ids) >> > > I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in > kvmppc_xive_connect() actually. We're currently passing > spapr_max_server_number() (vCPU id) but you might be > right. > > I need to re-read the story around VSMT and XIVE. ok. What we care about here, is a size to allocate the NVT block representing the vCPUs in HW. NVT ids are pushed in the thread contexts when the vCPUs are scheduled to run and looked for by the presenter when an interrupt is to be delivered. > commit 1e175d2e07c71d9574f5b1c74523abca54e2654f > Author: Sam Bobroff <sam.bobroff@au1.ibm.com> > Date: Wed Jul 25 16:12:02 2018 +1000 > > KVM: PPC: Book3S HV: Pack VCORE IDs to access full VCPU ID space > >> * spapr_irq_init() >> >> This is where the IPI interrupt numbers are claimed today. >> Directly in QEMU and KVM, if the machine is running XIVE only, >> indirectly if it's dual, first in QEMU and then in KVM when >> the machine switches of interrupt mode in CAS. >> >> The problem is that the underlying XIVE resources in HW are >> allocated where the QEMU process is running. Which is not the >> best option when the vCPUs are pinned on different chips. >> >> My patchset was trying to improve that by claiming the IPI on >> demand when the vCPU is connected to the KVM device. But it >> was using the vCPU id as the IPI interrupt number which is >> utterly wrong, the guest OS could use any number in the range >> exposed in the DT. >> >> The last patch you sent was going in the right direction I think. >> That is to claim the IPI when the guest OS is requesting for it. >> >> http://patchwork.ozlabs.org/project/qemu-devel/patch/160528045027.804522.6161091782230763832.stgit@bahia.lan/ >> >> But I don't understand why it was so complex. It should be like >> the MSIs claimed by PCI devices. >> > > The difference here is that the guest doesn't claim IPIs. They are > supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually > the case in QEMU. yes. That's what I want to change (for performance) > The IPI setup sequence in the guest is basically: > 1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges" > 2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data() > 3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq()) > > If we want an IPI to be claimed by the appropriate vCPU, we > can only do this from under H_INT_SET_SOURCE_CONFIG. And > until the guest eventually configures the IPI, KVM and QEMU > are out of sync. Well, KVM doesn't know either how much PCI MSIs will be claimed. It all depends on the guest OS. I don't think this is a problem to expose unclaimed interrupt numbers to the guest if they are IPIs. We can detect that easily with the range and claim the source at KVM level when it's configured or in h_int_get_source_info(). Talking of which, it might be good to have a KVM command to query the source characteristics on the host. I sent a patchset a while ago in that sense. > This complexifies migration because we have to guess at > post load if we should claim the IPI in KVM or not. The > simple presence of the vCPU isn't enough : we need to > guess if the guest actually configured the IPI or not. The EAT will be transferred from the source and the call to kvmppc_xive_source_reset_one() should initialize the KVM device correctly on the target for all interrupts. >> All this to say, that we need to size better the range in the >> "ibm,xive-lisn-ranges" property if that's broken for vSMT. >> > > Sizing the range to smp.max_cpus as proposed in this series > is fine, no matter what the VSMT is. ok. That's a fix for spapr_irq_dt() then. And possibly, there is a similar one for KVM_DEV_XIVE_NR_SERVERS. >> Then, I think the IPIs can be treated just like the PCI MSIs >> but they need to be claimed first. That's the ugly part. >> > > Yeah that's the big difference. For PCI MSIs, QEMU owns the > bitmap and the guest can claim (or release) a number of > MSIs the "ibm,change-msi" RTAS interface. There's no > such thing for IPIs : they are supposedly already claimed. IPIs are a bit special because there are no I/O devices to claim them. We could consider the vCPU has being the device. That was my first attempt but it was wrong since the OS is in charge of choosing an interrupt number for the IPI. >> Should we add a special check in h_int_set_source_config to >> deal with unclaimed IPIs that are being configured ? >> > > This is what my tentative fix does. I didn't understand the complexity of it, may be due to my patchset. You should try again :) C.
On Tue, 24 Nov 2020 18:56:02 +0100 Cédric Le Goater <clg@kaod.org> wrote: [...] > > > > I guess you're talking about KVM_DEV_XIVE_NR_SERVERS in > > kvmppc_xive_connect() actually. We're currently passing > > spapr_max_server_number() (vCPU id) but you might be > > right. > > > > I need to re-read the story around VSMT and XIVE. > > ok. What we care about here, is a size to allocate the NVT block > representing the vCPUs in HW. NVT ids are pushed in the thread > contexts when the vCPUs are scheduled to run and looked for by > the presenter when an interrupt is to be delivered. > Yeah, looking at the code again, I realize there was a confusion when we added the possibility to size the NVT. This should not depend on the vCPU id limnit as it is done today, it should just be the maximum number of possible vCPUs (ie. smp.max_cpus). [...] > > > > The difference here is that the guest doesn't claim IPIs. They are > > supposedly pre-claimed in "ibm,xive-lisn-ranges". And this is actually > > the case in QEMU. > > yes. That's what I want to change (for performance) > I understand the purpose of claiming the IPI from its associated vCPU context. But this can only be done on a path where we have both the vCPU id and the IPI ; kvmppc_xive_set_source_config() looks like a good candidate to handle this for runtime and post-load. > > The IPI setup sequence in the guest is basically: > > 1) grab a free irq from the bitmap, ie. "ibm,xive-lisn-ranges" > > 2) calls H_INT_GET_SOURCE_INFO, ie. populate_irq_data() > > 3) calls H_INT_SET_SOURCE_CONFIG, ie, configure_irq()) > > > > If we want an IPI to be claimed by the appropriate vCPU, we > > can only do this from under H_INT_SET_SOURCE_CONFIG. And > > until the guest eventually configures the IPI, KVM and QEMU > > are out of sync. > > Well, KVM doesn't know either how much PCI MSIs will be claimed. > It all depends on the guest OS. > Yes but QEMU and KVM are always in sync for them. When the guest calls the "ibm,change-msi" RTAS interface to get some MSIs for a device, they are immediately claimed both in QEMU and KVM. > I don't think this is a problem to expose unclaimed interrupt > numbers to the guest if they are IPIs. We can detect that > easily with the range and claim the source at KVM level when > it's configured or in h_int_get_source_info(). Talking of which, We cannot claim the source at the KVM level from H_INT_GET_SOURCE_INFO because we don't know about the vCPU id here => we can't do the run_on_cpu() optimization. > it might be good to have a KVM command to query the source > characteristics on the host. I sent a patchset a while ago in > that sense. > > > This complexifies migration because we have to guess at > > post load if we should claim the IPI in KVM or not. The > > simple presence of the vCPU isn't enough : we need to > > guess if the guest actually configured the IPI or not. > > The EAT will be transferred from the source and the call to > kvmppc_xive_source_reset_one() should initialize the KVM > device correctly on the target for all interrupts. > Except that the EAS appears as valid for all IPIs, even though the source didn't claim them at the KVM level. It looks wrong to blindly restore all of them in post-load. > >> All this to say, that we need to size better the range in the > >> "ibm,xive-lisn-ranges" property if that's broken for vSMT. > >> > > > > Sizing the range to smp.max_cpus as proposed in this series > > is fine, no matter what the VSMT is. > > ok. That's a fix for spapr_irq_dt() then. And possibly, there > is a similar one for KVM_DEV_XIVE_NR_SERVERS. > Yup. > >> Then, I think the IPIs can be treated just like the PCI MSIs > >> but they need to be claimed first. That's the ugly part. > >> > > > > Yeah that's the big difference. For PCI MSIs, QEMU owns the > > bitmap and the guest can claim (or release) a number of > > MSIs the "ibm,change-msi" RTAS interface. There's no > > such thing for IPIs : they are supposedly already claimed. > > IPIs are a bit special because there are no I/O devices to > claim them. We could consider the vCPU has being the device. > That was my first attempt but it was wrong since the OS is > in charge of choosing an interrupt number for the IPI. > > >> Should we add a special check in h_int_set_source_config to > >> deal with unclaimed IPIs that are being configured ? > >> > > > > This is what my tentative fix does. > > I didn't understand the complexity of it, may be due to my > patchset. > > You should try again :) > Will do when I've fixed the misuses of spapr_max_server_number(). > C.
>>> This complexifies migration because we have to guess at >>> post load if we should claim the IPI in KVM or not. The >>> simple presence of the vCPU isn't enough : we need to >>> guess if the guest actually configured the IPI or not. >> >> The EAT will be transferred from the source and the call to >> kvmppc_xive_source_reset_one() should initialize the KVM >> device correctly on the target for all interrupts. >> > > Except that the EAS appears as valid for all IPIs, even > though the source didn't claim them at the KVM level. why ? we would stop claiming IPIs in spapr_irq_init() and so they won't appear as being valid anymore, at boot time or restore time. C.
On Wed, 25 Nov 2020 12:34:25 +0100 Cédric Le Goater <clg@kaod.org> wrote: > > >>> This complexifies migration because we have to guess at > >>> post load if we should claim the IPI in KVM or not. The > >>> simple presence of the vCPU isn't enough : we need to > >>> guess if the guest actually configured the IPI or not. > >> > >> The EAT will be transferred from the source and the call to > >> kvmppc_xive_source_reset_one() should initialize the KVM > >> device correctly on the target for all interrupts. > >> > > > > Except that the EAS appears as valid for all IPIs, even > > though the source didn't claim them at the KVM level. > > why ? we would stop claiming IPIs in spapr_irq_init() and so > they won't appear as being valid anymore, at boot time or > restore time. > If we don't claim the IPIs in spapr_irq_init() anymore then we must at least claim them on the path of H_INT_GET_SOURCE_INFO otherwise it will fail with H_P2 and the guest won't even try to setup the IPI. Even if we do that, we still have a window where the source is valid in QEMU but not yet at the KVM level. > > C.
On Mon, 23 Nov 2020 14:33:55 +1100 David Gibson <david@gibson.dropbear.id.au> wrote: > On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote: > > We're going to kill the "nr_ends" field in a subsequent patch. > > Prepare ground by using an helper instead of peeking into > > the sPAPR XIVE structure directly. > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > Applied to ppc-for-6.0, thanks. > I'm working on a new approach that doesn't need this change. Especially the new approach doesn't kill the "nr_ends" fields, which makes the changelog of this patch slightly wrong. Since it doesn't bring much in the end, maybe you can just drop it from ppc-for-6.0 ? > > > --- > > include/hw/ppc/spapr_xive.h | 1 + > > hw/intc/spapr_xive.c | 23 ++++++++++++++--------- > > hw/intc/spapr_xive_kvm.c | 4 ++-- > > 3 files changed, 17 insertions(+), 11 deletions(-) > > > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > > index 26c8d90d7196..4b967f13c030 100644 > > --- a/include/hw/ppc/spapr_xive.h > > +++ b/include/hw/ppc/spapr_xive.h > > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); > > > > int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > > uint32_t *out_server, uint8_t *out_prio); > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); > > > > /* > > * KVM XIVE device helpers > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > > index 60e0d5769dcc..f473ad9cba47 100644 > > --- a/hw/intc/spapr_xive.c > > +++ b/hw/intc/spapr_xive.c > > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) > > uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > > XiveEND *end; > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > if (xive_end_is_valid(end)) { > > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) > > } > > > > /* Clear all ENDs */ > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > spapr_xive_end_reset(&xive->endt[i]); > > } > > } > > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) > > xive->fd = -1; > > } > > > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) > > +{ > > + return xive->nr_ends; > > +} > > + > > static void spapr_xive_realize(DeviceState *dev, Error **errp) > > { > > SpaprXive *xive = SPAPR_XIVE(dev); > > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > > * Allocate the routing tables > > */ > > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > > - xive->endt = g_new0(XiveEND, xive->nr_ends); > > + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); > > > > xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, > > xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); > > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, > > { > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > - if (end_idx >= xive->nr_ends) { > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > return -1; > > } > > > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, > > { > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > - if (end_idx >= xive->nr_ends) { > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > return -1; > > } > > > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, > > /* EAS_END_BLOCK is unused on sPAPR */ > > end_idx = xive_get_field64(EAS_END_INDEX, eas.w); > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); > > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; > > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); > > > > switch (qsize) { > > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > > return H_P2; > > } > > > > - assert(end_idx < xive->nr_ends); > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > end = &xive->endt[end_idx]; > > > > args[0] = 0; > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > > index 66bf4c06fe55..1566016f0e28 100644 > > --- a/hw/intc/spapr_xive_kvm.c > > +++ b/hw/intc/spapr_xive_kvm.c > > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) > > int i; > > int ret; > > > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > if (!xive_end_is_valid(&xive->endt[i])) { > > continue; > > } > > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > > assert(xive->fd != -1); > > > > /* Restore the ENDT first. The targetting depends on it. */ > > - for (i = 0; i < xive->nr_ends; i++) { > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > if (!xive_end_is_valid(&xive->endt[i])) { > > continue; > > } >
On Wed, Nov 25, 2020 at 11:43:26PM +0100, Greg Kurz wrote: > On Mon, 23 Nov 2020 14:33:55 +1100 > David Gibson <david@gibson.dropbear.id.au> wrote: > > > On Fri, Nov 20, 2020 at 06:46:40PM +0100, Greg Kurz wrote: > > > We're going to kill the "nr_ends" field in a subsequent patch. > > > Prepare ground by using an helper instead of peeking into > > > the sPAPR XIVE structure directly. > > > > > > Signed-off-by: Greg Kurz <groug@kaod.org> > > > > Applied to ppc-for-6.0, thanks. > > > > I'm working on a new approach that doesn't need this change. Especially the > new approach doesn't kill the "nr_ends" fields, which makes the changelog of > this patch slightly wrong. Since it doesn't bring much in the end, maybe you > can just drop it from ppc-for-6.0 ? Done. > > > > > > --- > > > include/hw/ppc/spapr_xive.h | 1 + > > > hw/intc/spapr_xive.c | 23 ++++++++++++++--------- > > > hw/intc/spapr_xive_kvm.c | 4 ++-- > > > 3 files changed, 17 insertions(+), 11 deletions(-) > > > > > > diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h > > > index 26c8d90d7196..4b967f13c030 100644 > > > --- a/include/hw/ppc/spapr_xive.h > > > +++ b/include/hw/ppc/spapr_xive.h > > > @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); > > > > > > int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, > > > uint32_t *out_server, uint8_t *out_prio); > > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); > > > > > > /* > > > * KVM XIVE device helpers > > > diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c > > > index 60e0d5769dcc..f473ad9cba47 100644 > > > --- a/hw/intc/spapr_xive.c > > > +++ b/hw/intc/spapr_xive.c > > > @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) > > > uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); > > > XiveEND *end; > > > > > > - assert(end_idx < xive->nr_ends); > > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > > end = &xive->endt[end_idx]; > > > > > > if (xive_end_is_valid(end)) { > > > @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) > > > } > > > > > > /* Clear all ENDs */ > > > - for (i = 0; i < xive->nr_ends; i++) { > > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > > spapr_xive_end_reset(&xive->endt[i]); > > > } > > > } > > > @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) > > > xive->fd = -1; > > > } > > > > > > +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) > > > +{ > > > + return xive->nr_ends; > > > +} > > > + > > > static void spapr_xive_realize(DeviceState *dev, Error **errp) > > > { > > > SpaprXive *xive = SPAPR_XIVE(dev); > > > @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) > > > * Allocate the routing tables > > > */ > > > xive->eat = g_new0(XiveEAS, xive->nr_irqs); > > > - xive->endt = g_new0(XiveEND, xive->nr_ends); > > > + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); > > > > > > xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, > > > xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); > > > @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, > > > { > > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > > > - if (end_idx >= xive->nr_ends) { > > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > > return -1; > > > } > > > > > > @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, > > > { > > > SpaprXive *xive = SPAPR_XIVE(xrtr); > > > > > > - if (end_idx >= xive->nr_ends) { > > > + if (end_idx >= spapr_xive_nr_ends(xive)) { > > > return -1; > > > } > > > > > > @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, > > > /* EAS_END_BLOCK is unused on sPAPR */ > > > end_idx = xive_get_field64(EAS_END_INDEX, eas.w); > > > > > > - assert(end_idx < xive->nr_ends); > > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > > end = &xive->endt[end_idx]; > > > > > > nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); > > > @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, > > > return H_P2; > > > } > > > > > > - assert(end_idx < xive->nr_ends); > > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > > end = &xive->endt[end_idx]; > > > > > > args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; > > > @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, > > > return H_P2; > > > } > > > > > > - assert(end_idx < xive->nr_ends); > > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > > memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); > > > > > > switch (qsize) { > > > @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, > > > return H_P2; > > > } > > > > > > - assert(end_idx < xive->nr_ends); > > > + assert(end_idx < spapr_xive_nr_ends(xive)); > > > end = &xive->endt[end_idx]; > > > > > > args[0] = 0; > > > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c > > > index 66bf4c06fe55..1566016f0e28 100644 > > > --- a/hw/intc/spapr_xive_kvm.c > > > +++ b/hw/intc/spapr_xive_kvm.c > > > @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) > > > int i; > > > int ret; > > > > > > - for (i = 0; i < xive->nr_ends; i++) { > > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > > if (!xive_end_is_valid(&xive->endt[i])) { > > > continue; > > > } > > > @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) > > > assert(xive->fd != -1); > > > > > > /* Restore the ENDT first. The targetting depends on it. */ > > > - for (i = 0; i < xive->nr_ends; i++) { > > > + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { > > > if (!xive_end_is_valid(&xive->endt[i])) { > > > continue; > > > } > > >
>> why ? we would stop claiming IPIs in spapr_irq_init() and so >> they won't appear as being valid anymore, at boot time or >> restore time. >> > > If we don't claim the IPIs in spapr_irq_init() anymore then > we must at least claim them on the path of H_INT_GET_SOURCE_INFO > otherwise it will fail with H_P2 and the guest won't even > try to setup the IPI. IPIs could be handled a special case as we know in which range they are. > Even if we do that, we still have a > window where the source is valid in QEMU but not yet at > the KVM level. May be some kind of lazy binding with KVM would help us. The problem is that the guest ESB pages won't be backed by HW pages which could lead to a crash. We could overlap the emulated one until we are bound to KVM. Just a thought. C.
diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h index 26c8d90d7196..4b967f13c030 100644 --- a/include/hw/ppc/spapr_xive.h +++ b/include/hw/ppc/spapr_xive.h @@ -75,6 +75,7 @@ void spapr_xive_map_mmio(SpaprXive *xive); int spapr_xive_end_to_target(uint8_t end_blk, uint32_t end_idx, uint32_t *out_server, uint8_t *out_prio); +uint32_t spapr_xive_nr_ends(const SpaprXive *xive); /* * KVM XIVE device helpers diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c index 60e0d5769dcc..f473ad9cba47 100644 --- a/hw/intc/spapr_xive.c +++ b/hw/intc/spapr_xive.c @@ -192,7 +192,7 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon) uint32_t end_idx = xive_get_field64(EAS_END_INDEX, eas->w); XiveEND *end; - assert(end_idx < xive->nr_ends); + assert(end_idx < spapr_xive_nr_ends(xive)); end = &xive->endt[end_idx]; if (xive_end_is_valid(end)) { @@ -270,7 +270,7 @@ static void spapr_xive_reset(void *dev) } /* Clear all ENDs */ - for (i = 0; i < xive->nr_ends; i++) { + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { spapr_xive_end_reset(&xive->endt[i]); } } @@ -288,6 +288,11 @@ static void spapr_xive_instance_init(Object *obj) xive->fd = -1; } +uint32_t spapr_xive_nr_ends(const SpaprXive *xive) +{ + return xive->nr_ends; +} + static void spapr_xive_realize(DeviceState *dev, Error **errp) { SpaprXive *xive = SPAPR_XIVE(dev); @@ -336,7 +341,7 @@ static void spapr_xive_realize(DeviceState *dev, Error **errp) * Allocate the routing tables */ xive->eat = g_new0(XiveEAS, xive->nr_irqs); - xive->endt = g_new0(XiveEND, xive->nr_ends); + xive->endt = g_new0(XiveEND, spapr_xive_nr_ends(xive)); xive->nodename = g_strdup_printf("interrupt-controller@%" PRIx64, xive->tm_base + XIVE_TM_USER_PAGE * (1 << TM_SHIFT)); @@ -375,7 +380,7 @@ static int spapr_xive_get_end(XiveRouter *xrtr, { SpaprXive *xive = SPAPR_XIVE(xrtr); - if (end_idx >= xive->nr_ends) { + if (end_idx >= spapr_xive_nr_ends(xive)) { return -1; } @@ -389,7 +394,7 @@ static int spapr_xive_write_end(XiveRouter *xrtr, uint8_t end_blk, { SpaprXive *xive = SPAPR_XIVE(xrtr); - if (end_idx >= xive->nr_ends) { + if (end_idx >= spapr_xive_nr_ends(xive)) { return -1; } @@ -1138,7 +1143,7 @@ static target_ulong h_int_get_source_config(PowerPCCPU *cpu, /* EAS_END_BLOCK is unused on sPAPR */ end_idx = xive_get_field64(EAS_END_INDEX, eas.w); - assert(end_idx < xive->nr_ends); + assert(end_idx < spapr_xive_nr_ends(xive)); end = &xive->endt[end_idx]; nvt_blk = xive_get_field32(END_W6_NVT_BLOCK, end->w6); @@ -1216,7 +1221,7 @@ static target_ulong h_int_get_queue_info(PowerPCCPU *cpu, return H_P2; } - assert(end_idx < xive->nr_ends); + assert(end_idx < spapr_xive_nr_ends(xive)); end = &xive->endt[end_idx]; args[0] = xive->end_base + (1ull << (end_xsrc->esb_shift + 1)) * end_idx; @@ -1304,7 +1309,7 @@ static target_ulong h_int_set_queue_config(PowerPCCPU *cpu, return H_P2; } - assert(end_idx < xive->nr_ends); + assert(end_idx < spapr_xive_nr_ends(xive)); memcpy(&end, &xive->endt[end_idx], sizeof(XiveEND)); switch (qsize) { @@ -1470,7 +1475,7 @@ static target_ulong h_int_get_queue_config(PowerPCCPU *cpu, return H_P2; } - assert(end_idx < xive->nr_ends); + assert(end_idx < spapr_xive_nr_ends(xive)); end = &xive->endt[end_idx]; args[0] = 0; diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c index 66bf4c06fe55..1566016f0e28 100644 --- a/hw/intc/spapr_xive_kvm.c +++ b/hw/intc/spapr_xive_kvm.c @@ -531,7 +531,7 @@ static int kvmppc_xive_get_queues(SpaprXive *xive, Error **errp) int i; int ret; - for (i = 0; i < xive->nr_ends; i++) { + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { if (!xive_end_is_valid(&xive->endt[i])) { continue; } @@ -701,7 +701,7 @@ int kvmppc_xive_post_load(SpaprXive *xive, int version_id) assert(xive->fd != -1); /* Restore the ENDT first. The targetting depends on it. */ - for (i = 0; i < xive->nr_ends; i++) { + for (i = 0; i < spapr_xive_nr_ends(xive); i++) { if (!xive_end_is_valid(&xive->endt[i])) { continue; }
We're going to kill the "nr_ends" field in a subsequent patch. Prepare ground by using an helper instead of peeking into the sPAPR XIVE structure directly. Signed-off-by: Greg Kurz <groug@kaod.org> --- include/hw/ppc/spapr_xive.h | 1 + hw/intc/spapr_xive.c | 23 ++++++++++++++--------- hw/intc/spapr_xive_kvm.c | 4 ++-- 3 files changed, 17 insertions(+), 11 deletions(-)