Message ID | 20181116105729.23240-6-clg@kaod.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ppc: support for the XIVE interrupt controller (POWER9) | expand |
On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote: > To complete the event routing, the IVRE sub-engine uses an internal > table containing Event Notification Descriptor (END) structures. > > An END specifies on which Event Queue (EQ) the event notification > data, defined in the associated EAS, should be posted when an > exception occurs. It also defines which Notification Virtual Target > (NVT) should be notified. > > The Event Queue is a memory page provided by the O/S defining a > circular buffer, one per server and priority couple, containing Event > Queue entries. These are 4 bytes long, the first bit being a > 'generation' bit and the 31 following bits the END Data field. They > are pulled by the O/S when the exception occurs. > > The END Data field is a way to set an invariant logical event source > number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall > when the EISN flag is used. > > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > include/hw/ppc/xive.h | 18 ++++ > include/hw/ppc/xive_regs.h | 48 ++++++++++ > hw/intc/xive.c | 185 ++++++++++++++++++++++++++++++++++++- > 3 files changed, 248 insertions(+), 3 deletions(-) > > diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > index 5a0696366577..ce62aaf28343 100644 > --- a/include/hw/ppc/xive.h > +++ b/include/hw/ppc/xive.h > @@ -193,11 +193,29 @@ typedef struct XiveRouterClass { > /* XIVE table accessors */ > int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end); > + int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end); Hrm. So unlike the EAS, which is basically just a word, the END is a pretty large structure. It's unclear here if get/set are expected to copy the whole thing out and in, or if get get give you a pointer into a "live" structure and set just does any necessary barriers after an update. Really, for a non-atomic value like this, I'm not sure get/set is the right model. Also as I understand it nearly all the indices in XIVE are broken into block/index. Is there a reason those are folded together into lisn for the EAS, but not for the END? > } XiveRouterClass; > > void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); > > int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end); > +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end); > + > +/* > + * For legacy compatibility, the exceptions define up to 256 different > + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] > + * and the least favored level 0xFF. > + */ > +#define XIVE_PRIORITY_MAX 7 > + > +void xive_end_reset(XiveEND *end); > +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); > > #endif /* PPC_XIVE_H */ > diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h > index 12499b33614c..f97fb2b90bee 100644 > --- a/include/hw/ppc/xive_regs.h > +++ b/include/hw/ppc/xive_regs.h > @@ -28,4 +28,52 @@ typedef struct XiveEAS { > #define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ > } XiveEAS; > > +/* Event Notification Descriptor (END) */ > +typedef struct XiveEND { > + uint32_t w0; > +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ > +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ > +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ > +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ > +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ > +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ > +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ > +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ > +#define END_W0_QSIZE PPC_BITMASK32(12, 15) > +#define END_W0_SW0 PPC_BIT32(16) > +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ > +#define END_QSIZE_4K 0 > +#define END_QSIZE_64K 4 > +#define END_W0_HWDEP PPC_BITMASK32(24, 31) > + uint32_t w1; > +#define END_W1_ESn PPC_BITMASK32(0, 1) > +#define END_W1_ESn_P PPC_BIT32(0) > +#define END_W1_ESn_Q PPC_BIT32(1) > +#define END_W1_ESe PPC_BITMASK32(2, 3) > +#define END_W1_ESe_P PPC_BIT32(2) > +#define END_W1_ESe_Q PPC_BIT32(3) > +#define END_W1_GENERATION PPC_BIT32(9) > +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) > + uint32_t w2; > +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > + uint32_t w3; > +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > + uint32_t w4; > +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) > +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) > + uint32_t w5; > +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) > + uint32_t w6; > +#define END_W6_FORMAT_BIT PPC_BIT32(8) > +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) > + uint32_t w7; > +#define END_W7_F0_IGNORE PPC_BIT32(0) > +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) > +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > +#define END_W7_F1_WAKEZ PPC_BIT32(0) > +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > +} XiveEND; > + > #endif /* PPC_XIVE_REGS_H */ > diff --git a/hw/intc/xive.c b/hw/intc/xive.c > index c4c90a25758e..9cb001e7b540 100644 > --- a/hw/intc/xive.c > +++ b/hw/intc/xive.c > @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = { > .class_init = xive_source_class_init, > }; > > +/* > + * XiveEND helpers > + */ > + > +void xive_end_reset(XiveEND *end) > +{ > + memset(end, 0, sizeof(*end)); > + > + /* switch off the escalation and notification ESBs */ > + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; It's not obvious to me what circumstances this would be called under. Since the ENDs are in system memory, a memset() seems like an odd thing for (virtual) hardware to be doing to it. > +} > + > +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, > + Monitor *mon) > +{ > + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > + uint32_t qentries = 1 << (qsize + 10); > + int i; > + > + /* > + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window > + */ > + monitor_printf(mon, " [ "); > + qindex = (qindex - (width - 1)) & (qentries - 1); > + for (i = 0; i < width; i++) { > + uint64_t qaddr = qaddr_base + (qindex << 2); > + uint32_t qdata = -1; > + > + if (dma_memory_read(&address_space_memory, qaddr, &qdata, > + sizeof(qdata))) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" > + HWADDR_PRIx "\n", qaddr); > + return; > + } > + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", > + be32_to_cpu(qdata)); > + qindex = (qindex + 1) & (qentries - 1); > + } > + monitor_printf(mon, "]\n"); > +} > + > +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) > +{ > + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); > + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > + uint32_t qentries = 1 << (qsize + 10); > + > + uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6); > + uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7); > + > + if (!(end->w0 & END_W0_VALID)) { > + return; > + } > + > + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 > + "% 6d/%5d ^%d", end_idx, > + end->w0 & END_W0_VALID ? 'v' : '-', > + end->w0 & END_W0_ENQUEUE ? 'q' : '-', > + end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-', > + end->w0 & END_W0_BACKLOG ? 'b' : '-', > + end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-', > + priority, nvt, qaddr_base, qindex, qentries, qgen); > + > + xive_end_queue_pic_print_info(end, 6, mon); > +} > + > +static void xive_end_push(XiveEND *end, uint32_t data) s/push/enqueue/ please, "push" suggests a stack. (Not to mention that "push" and "pull" are used as terms elsewhere in XIVE). > +{ > + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); > + > + uint64_t qaddr = qaddr_base + (qindex << 2); > + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); > + uint32_t qentries = 1 << (qsize + 10); > + > + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" > + HWADDR_PRIx "\n", qaddr); > + return; > + } > + > + qindex = (qindex + 1) & (qentries - 1); > + if (qindex == 0) { > + qgen ^= 1; > + end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen); > + } > + end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex); > +} > + > /* > * XIVE Router (aka. Virtualization Controller or IVRE) > */ > @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas) > return xrc->set_eas(xrtr, lisn, eas); > } > > +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end) > +{ > + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); > + > + return xrc->get_end(xrtr, end_blk, end_idx, end); > +} > + > +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > + XiveEND *end) > +{ > + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); > + > + return xrc->set_end(xrtr, end_blk, end_idx, end); > +} > + > +/* > + * An END trigger can come from an event trigger (IPI or HW) or from > + * another chip. We don't model the PowerBus but the END trigger > + * message has the same parameters than in the function below. > + */ > +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, > + uint32_t end_idx, uint32_t end_data) > +{ > + XiveEND end; > + uint8_t priority; > + uint8_t format; > + > + /* END cache lookup */ > + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, > + end_idx); > + return; > + } > + > + if (!(end.w0 & END_W0_VALID)) { > + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", > + end_blk, end_idx); > + return; > + } > + > + if (end.w0 & END_W0_ENQUEUE) { > + xive_end_push(&end, end_data); > + xive_router_set_end(xrtr, end_blk, end_idx, &end); > + } > + > + /* > + * The W7 format depends on the F bit in W6. It defines the type > + * of the notification : > + * > + * F=0 : single or multiple NVT notification > + * F=1 : User level Event-Based Branch (EBB) notification, no > + * priority > + */ > + format = GETFIELD(END_W6_FORMAT_BIT, end.w6); > + priority = GETFIELD(END_W7_F0_PRIORITY, end.w7); > + > + /* The END is masked */ > + if (format == 0 && priority == 0xff) { > + return; > + } > + > + /* > + * Check the END ESn (Event State Buffer for notification) for > + * even futher coalescing in the Router > + */ > + if (!(end.w0 & END_W0_UCOND_NOTIFY)) { > + qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n"); > + return; > + } > + > + /* > + * Follows IVPE notification > + */ > +} > + > static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > { > XiveRouter *xrtr = XIVE_ROUTER(xf); > @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > return; > } > > - /* The IVRE has a State Bit Cache for its internal sources which > - * is also involed at this point. We skip the SBC lookup because > - * the state bits of the sources are modeled internally in QEMU. > + /* The IVRE checks the State Bit Cache at this point. We skip the > + * SBC lookup because the state bits of the sources are modeled > + * internally in QEMU. Replacing a comment about something we're not doing with a different comment about something we're not doing doesn't seem very useful. Maybe fold these together into one patch or the other. > */ > > if (!(eas.w & EAS_VALID)) { > @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > /* Notification completed */ > return; > } > + > + /* > + * The event trigger becomes an END trigger > + */ > + xive_router_end_notify(xrtr, > + GETFIELD(EAS_END_BLOCK, eas.w), > + GETFIELD(EAS_END_INDEX, eas.w), > + GETFIELD(EAS_END_DATA, eas.w)); > } > > static Property xive_router_properties[] = {
On Thu, 2018-11-22 at 15:41 +1100, David Gibson wrote: > > > +void xive_end_reset(XiveEND *end) > > +{ > > + memset(end, 0, sizeof(*end)); > > + > > + /* switch off the escalation and notification ESBs */ > > + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; > > It's not obvious to me what circumstances this would be called under. > Since the ENDs are in system memory, a memset() seems like an odd > thing for (virtual) hardware to be doing to it. > > > +} Not on PAPR ... Ben.
On 11/22/18 5:41 AM, David Gibson wrote: > On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote: >> To complete the event routing, the IVRE sub-engine uses an internal >> table containing Event Notification Descriptor (END) structures. >> >> An END specifies on which Event Queue (EQ) the event notification >> data, defined in the associated EAS, should be posted when an >> exception occurs. It also defines which Notification Virtual Target >> (NVT) should be notified. >> >> The Event Queue is a memory page provided by the O/S defining a >> circular buffer, one per server and priority couple, containing Event >> Queue entries. These are 4 bytes long, the first bit being a >> 'generation' bit and the 31 following bits the END Data field. They >> are pulled by the O/S when the exception occurs. >> >> The END Data field is a way to set an invariant logical event source >> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall >> when the EISN flag is used. >> >> Signed-off-by: Cédric Le Goater <clg@kaod.org> >> --- >> include/hw/ppc/xive.h | 18 ++++ >> include/hw/ppc/xive_regs.h | 48 ++++++++++ >> hw/intc/xive.c | 185 ++++++++++++++++++++++++++++++++++++- >> 3 files changed, 248 insertions(+), 3 deletions(-) >> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h >> index 5a0696366577..ce62aaf28343 100644 >> --- a/include/hw/ppc/xive.h >> +++ b/include/hw/ppc/xive.h >> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass { >> /* XIVE table accessors */ >> int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >> int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >> + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end); >> + int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end); > > Hrm. So unlike the EAS, which is basically just a word, the END is a > pretty large structure. yes. and so will be the NVT. > It's unclear here if get/set are expected to copy the whole thing out > and in, That's the plan. What I had in mind are memory accessors to the XIVE structures, which are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please take a look at the XIVE PowerNV model). > or if get give you a pointer into a "live" structure no > and set just does any necessary barriers after an update. that would be too complex for the PowerNV model I think. There is a cache in between the software running on the (QEMU) machine and the XIVE HW but it would be hard to handle. > Really, for a non-atomic value like this, I'm not sure get/set is the > right model. ok. we need something to get them out and in. > Also as I understand it nearly all the indices in XIVE are broken into > block/index. Is there a reason those are folded together into lisn > for the EAS, but not for the END? The indexing of the EAT is global to the sytem and the index defines which blk to use. The IRQ source numbers on the powerbus are architected to be : #define XIVE_SRCNO(blk, idx) ((uint32_t)(blk) << 28 | (idx)) and XIVE can use different strategies to identify the XIVE IC in charge of routing. It can be a one-to-one chip to block relation as skiboot does. Using a block scope table is possible also. Our model only supports one block per chip and some shortcuts are taken but not that much in fact. Remote access to the XIVE structures of another chip are done through MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO regions. Being local is better for performance because the END and NVT tables have a strong relation with the XIVE subengines using them (VC and PC). May be, Ben can clarified it this is badly explained. >> } XiveRouterClass; >> >> void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); >> >> int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >> int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end); >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end); >> + >> +/* >> + * For legacy compatibility, the exceptions define up to 256 different >> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] >> + * and the least favored level 0xFF. >> + */ >> +#define XIVE_PRIORITY_MAX 7 >> + >> +void xive_end_reset(XiveEND *end); >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); >> >> #endif /* PPC_XIVE_H */ >> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h >> index 12499b33614c..f97fb2b90bee 100644 >> --- a/include/hw/ppc/xive_regs.h >> +++ b/include/hw/ppc/xive_regs.h >> @@ -28,4 +28,52 @@ typedef struct XiveEAS { >> #define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ >> } XiveEAS; >> >> +/* Event Notification Descriptor (END) */ >> +typedef struct XiveEND { >> + uint32_t w0; >> +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ >> +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ >> +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ >> +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ >> +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ >> +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ >> +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ >> +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ >> +#define END_W0_QSIZE PPC_BITMASK32(12, 15) >> +#define END_W0_SW0 PPC_BIT32(16) >> +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ >> +#define END_QSIZE_4K 0 >> +#define END_QSIZE_64K 4 >> +#define END_W0_HWDEP PPC_BITMASK32(24, 31) >> + uint32_t w1; >> +#define END_W1_ESn PPC_BITMASK32(0, 1) >> +#define END_W1_ESn_P PPC_BIT32(0) >> +#define END_W1_ESn_Q PPC_BIT32(1) >> +#define END_W1_ESe PPC_BITMASK32(2, 3) >> +#define END_W1_ESe_P PPC_BIT32(2) >> +#define END_W1_ESe_Q PPC_BIT32(3) >> +#define END_W1_GENERATION PPC_BIT32(9) >> +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) >> + uint32_t w2; >> +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) >> +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) >> + uint32_t w3; >> +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) >> + uint32_t w4; >> +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) >> +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) >> + uint32_t w5; >> +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) >> + uint32_t w6; >> +#define END_W6_FORMAT_BIT PPC_BIT32(8) >> +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) >> +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) >> + uint32_t w7; >> +#define END_W7_F0_IGNORE PPC_BIT32(0) >> +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) >> +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) >> +#define END_W7_F1_WAKEZ PPC_BIT32(0) >> +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) >> +} XiveEND; >> + >> #endif /* PPC_XIVE_REGS_H */ >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >> index c4c90a25758e..9cb001e7b540 100644 >> --- a/hw/intc/xive.c >> +++ b/hw/intc/xive.c >> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = { >> .class_init = xive_source_class_init, >> }; >> >> +/* >> + * XiveEND helpers >> + */ >> + >> +void xive_end_reset(XiveEND *end) >> +{ >> + memset(end, 0, sizeof(*end)); >> + >> + /* switch off the escalation and notification ESBs */ >> + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; > > It's not obvious to me what circumstances this would be called under. > Since the ENDs are in system memory, a memset() seems like an odd > thing for (virtual) hardware to be doing to it. It makes sense on sPAPR if one day some OS starts using the END ESBs for further coalescing of the events. None does for now but I have added the model though. >> +} >> + >> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, >> + Monitor *mon) >> +{ >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >> + uint32_t qentries = 1 << (qsize + 10); >> + int i; >> + >> + /* >> + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window >> + */ >> + monitor_printf(mon, " [ "); >> + qindex = (qindex - (width - 1)) & (qentries - 1); >> + for (i = 0; i < width; i++) { >> + uint64_t qaddr = qaddr_base + (qindex << 2); >> + uint32_t qdata = -1; >> + >> + if (dma_memory_read(&address_space_memory, qaddr, &qdata, >> + sizeof(qdata))) { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" >> + HWADDR_PRIx "\n", qaddr); >> + return; >> + } >> + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", >> + be32_to_cpu(qdata)); >> + qindex = (qindex + 1) & (qentries - 1); >> + } >> + monitor_printf(mon, "]\n"); >> +} >> + >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) >> +{ >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >> + uint32_t qentries = 1 << (qsize + 10); >> + >> + uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6); >> + uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7); >> + >> + if (!(end->w0 & END_W0_VALID)) { >> + return; >> + } >> + >> + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 >> + "% 6d/%5d ^%d", end_idx, >> + end->w0 & END_W0_VALID ? 'v' : '-', >> + end->w0 & END_W0_ENQUEUE ? 'q' : '-', >> + end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-', >> + end->w0 & END_W0_BACKLOG ? 'b' : '-', >> + end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-', >> + priority, nvt, qaddr_base, qindex, qentries, qgen); >> + >> + xive_end_queue_pic_print_info(end, 6, mon); >> +} >> + >> +static void xive_end_push(XiveEND *end, uint32_t data) > > s/push/enqueue/ please, "push" suggests a stack. (Not to mention that > "push" and "pull" are used as terms elsewhere in XIVE). yes. you are right. I will change. >> +{ >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); >> + >> + uint64_t qaddr = qaddr_base + (qindex << 2); >> + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); >> + uint32_t qentries = 1 << (qsize + 10); >> + >> + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" >> + HWADDR_PRIx "\n", qaddr); >> + return; >> + } >> + >> + qindex = (qindex + 1) & (qentries - 1); >> + if (qindex == 0) { >> + qgen ^= 1; >> + end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen); >> + } >> + end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex); >> +} >> + >> /* >> * XIVE Router (aka. Virtualization Controller or IVRE) >> */ >> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas) >> return xrc->set_eas(xrtr, lisn, eas); >> } >> >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end) >> +{ >> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); >> + >> + return xrc->get_end(xrtr, end_blk, end_idx, end); >> +} >> + >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >> + XiveEND *end) >> +{ >> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); >> + >> + return xrc->set_end(xrtr, end_blk, end_idx, end); >> +} >> + >> +/* >> + * An END trigger can come from an event trigger (IPI or HW) or from >> + * another chip. We don't model the PowerBus but the END trigger >> + * message has the same parameters than in the function below. >> + */ >> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, >> + uint32_t end_idx, uint32_t end_data) >> +{ >> + XiveEND end; >> + uint8_t priority; >> + uint8_t format; >> + >> + /* END cache lookup */ >> + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, >> + end_idx); >> + return; >> + } >> + >> + if (!(end.w0 & END_W0_VALID)) { >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", >> + end_blk, end_idx); >> + return; >> + } >> + >> + if (end.w0 & END_W0_ENQUEUE) { >> + xive_end_push(&end, end_data); >> + xive_router_set_end(xrtr, end_blk, end_idx, &end); >> + } >> + >> + /* >> + * The W7 format depends on the F bit in W6. It defines the type >> + * of the notification : >> + * >> + * F=0 : single or multiple NVT notification >> + * F=1 : User level Event-Based Branch (EBB) notification, no >> + * priority >> + */ >> + format = GETFIELD(END_W6_FORMAT_BIT, end.w6); >> + priority = GETFIELD(END_W7_F0_PRIORITY, end.w7); >> + >> + /* The END is masked */ >> + if (format == 0 && priority == 0xff) { >> + return; >> + } >> + >> + /* >> + * Check the END ESn (Event State Buffer for notification) for >> + * even futher coalescing in the Router >> + */ >> + if (!(end.w0 & END_W0_UCOND_NOTIFY)) { >> + qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n"); >> + return; >> + } >> + >> + /* >> + * Follows IVPE notification >> + */ >> +} >> + >> static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >> { >> XiveRouter *xrtr = XIVE_ROUTER(xf); >> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >> return; >> } >> >> - /* The IVRE has a State Bit Cache for its internal sources which >> - * is also involed at this point. We skip the SBC lookup because >> - * the state bits of the sources are modeled internally in QEMU. >> + /* The IVRE checks the State Bit Cache at this point. We skip the >> + * SBC lookup because the state bits of the sources are modeled >> + * internally in QEMU. > > Replacing a comment about something we're not doing with a different > comment about something we're not doing doesn't seem very useful. > Maybe fold these together into one patch or the other. That's me rephrasing. it should be indeed in the previous patch Thanks, C. >> */ >> >> if (!(eas.w & EAS_VALID)) { >> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >> /* Notification completed */ >> return; >> } >> + >> + /* >> + * The event trigger becomes an END trigger >> + */ >> + xive_router_end_notify(xrtr, >> + GETFIELD(EAS_END_BLOCK, eas.w), >> + GETFIELD(EAS_END_INDEX, eas.w), >> + GETFIELD(EAS_END_DATA, eas.w)); >> } >> >> static Property xive_router_properties[] = { >
On Thu, Nov 22, 2018 at 05:49:09PM +1100, Benjamin Herrenschmidt wrote: > On Thu, 2018-11-22 at 15:41 +1100, David Gibson wrote: > > > > > +void xive_end_reset(XiveEND *end) > > > +{ > > > + memset(end, 0, sizeof(*end)); > > > + > > > + /* switch off the escalation and notification ESBs */ > > > + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; > > > > It's not obvious to me what circumstances this would be called under. > > Since the ENDs are in system memory, a memset() seems like an odd > > thing for (virtual) hardware to be doing to it. > > > > > +} > > Not on PAPR ... Right, so the memset() can go in PAPR specific code.
On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote: > On 11/22/18 5:41 AM, David Gibson wrote: > > On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote: > >> To complete the event routing, the IVRE sub-engine uses an internal > >> table containing Event Notification Descriptor (END) structures. > >> > >> An END specifies on which Event Queue (EQ) the event notification > >> data, defined in the associated EAS, should be posted when an > >> exception occurs. It also defines which Notification Virtual Target > >> (NVT) should be notified. > >> > >> The Event Queue is a memory page provided by the O/S defining a > >> circular buffer, one per server and priority couple, containing Event > >> Queue entries. These are 4 bytes long, the first bit being a > >> 'generation' bit and the 31 following bits the END Data field. They > >> are pulled by the O/S when the exception occurs. > >> > >> The END Data field is a way to set an invariant logical event source > >> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall > >> when the EISN flag is used. > >> > >> Signed-off-by: Cédric Le Goater <clg@kaod.org> > >> --- > >> include/hw/ppc/xive.h | 18 ++++ > >> include/hw/ppc/xive_regs.h | 48 ++++++++++ > >> hw/intc/xive.c | 185 ++++++++++++++++++++++++++++++++++++- > >> 3 files changed, 248 insertions(+), 3 deletions(-) > >> > >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h > >> index 5a0696366577..ce62aaf28343 100644 > >> --- a/include/hw/ppc/xive.h > >> +++ b/include/hw/ppc/xive.h > >> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass { > >> /* XIVE table accessors */ > >> int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > >> int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > >> + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end); > >> + int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end); > > > > Hrm. So unlike the EAS, which is basically just a word, the END is a > > pretty large structure. > > yes. and so will be the NVT. > > > It's unclear here if get/set are expected to copy the whole thing out > > and in, > > That's the plan. Yeah, I don't think that's a good idea. In some cases the updates are on hot paths, so the extra copy isn't good, and more importantly it makes it look like an atomic update, but it's not really. Well... I guess it probably is because of the BQL, but I'd prefer not to rely on that excessively. > What I had in mind are memory accessors to the XIVE structures, which > are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please > take a look at the XIVE PowerNV model). > > > or if get give you a pointer into a "live" structure > > no > > > and set just does any necessary barriers after an update. > that would be too complex for the PowerNV model I think. There is a cache > in between the software running on the (QEMU) machine and the XIVE HW but > it would be hard to handle. > > > Really, for a non-atomic value like this, I'm not sure get/set is the > > right model. > > ok. we need something to get them out and in. I've thought about this a bit more. What I think might work is "end_read" and "end_write" callbacks, which take a word number in addition to the parameters you have already > > Also as I understand it nearly all the indices in XIVE are broken into > > block/index. Is there a reason those are folded together into lisn > > for the EAS, but not for the END? > > The indexing of the EAT is global to the sytem and the index defines > which blk to use. The IRQ source numbers on the powerbus are architected > to be : > > #define XIVE_SRCNO(blk, idx) ((uint32_t)(blk) << 28 | (idx)) > > and XIVE can use different strategies to identify the XIVE IC in charge > of routing. It can be a one-to-one chip to block relation as skiboot does. > Using a block scope table is possible also. Our model only supports one > block per chip and some shortcuts are taken but not that much in fact. > > Remote access to the XIVE structures of another chip are done through > MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO > regions. Being local is better for performance because the END and NVT > tables have a strong relation with the XIVE subengines using them > (VC and PC). > > May be, Ben can clarified it this is badly explained. Right.. I think I understand what the blocks are all about. But my question is, why encode the block and index together for the EAS, but separately for the END? > > >> } XiveRouterClass; > >> > >> void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); > >> > >> int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > >> int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); > >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end); > >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end); > >> + > >> +/* > >> + * For legacy compatibility, the exceptions define up to 256 different > >> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] > >> + * and the least favored level 0xFF. > >> + */ > >> +#define XIVE_PRIORITY_MAX 7 > >> + > >> +void xive_end_reset(XiveEND *end); > >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); > >> > >> #endif /* PPC_XIVE_H */ > >> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h > >> index 12499b33614c..f97fb2b90bee 100644 > >> --- a/include/hw/ppc/xive_regs.h > >> +++ b/include/hw/ppc/xive_regs.h > >> @@ -28,4 +28,52 @@ typedef struct XiveEAS { > >> #define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ > >> } XiveEAS; > >> > >> +/* Event Notification Descriptor (END) */ > >> +typedef struct XiveEND { > >> + uint32_t w0; > >> +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ > >> +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ > >> +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ > >> +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ > >> +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ > >> +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ > >> +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ > >> +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ > >> +#define END_W0_QSIZE PPC_BITMASK32(12, 15) > >> +#define END_W0_SW0 PPC_BIT32(16) > >> +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ > >> +#define END_QSIZE_4K 0 > >> +#define END_QSIZE_64K 4 > >> +#define END_W0_HWDEP PPC_BITMASK32(24, 31) > >> + uint32_t w1; > >> +#define END_W1_ESn PPC_BITMASK32(0, 1) > >> +#define END_W1_ESn_P PPC_BIT32(0) > >> +#define END_W1_ESn_Q PPC_BIT32(1) > >> +#define END_W1_ESe PPC_BITMASK32(2, 3) > >> +#define END_W1_ESe_P PPC_BIT32(2) > >> +#define END_W1_ESe_Q PPC_BIT32(3) > >> +#define END_W1_GENERATION PPC_BIT32(9) > >> +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) > >> + uint32_t w2; > >> +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) > >> +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) > >> + uint32_t w3; > >> +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) > >> + uint32_t w4; > >> +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) > >> +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) > >> + uint32_t w5; > >> +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) > >> + uint32_t w6; > >> +#define END_W6_FORMAT_BIT PPC_BIT32(8) > >> +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) > >> +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) > >> + uint32_t w7; > >> +#define END_W7_F0_IGNORE PPC_BIT32(0) > >> +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) > >> +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) > >> +#define END_W7_F1_WAKEZ PPC_BIT32(0) > >> +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) > >> +} XiveEND; > >> + > >> #endif /* PPC_XIVE_REGS_H */ > >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c > >> index c4c90a25758e..9cb001e7b540 100644 > >> --- a/hw/intc/xive.c > >> +++ b/hw/intc/xive.c > >> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = { > >> .class_init = xive_source_class_init, > >> }; > >> > >> +/* > >> + * XiveEND helpers > >> + */ > >> + > >> +void xive_end_reset(XiveEND *end) > >> +{ > >> + memset(end, 0, sizeof(*end)); > >> + > >> + /* switch off the escalation and notification ESBs */ > >> + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; > > > > It's not obvious to me what circumstances this would be called under. > > Since the ENDs are in system memory, a memset() seems like an odd > > thing for (virtual) hardware to be doing to it. > > It makes sense on sPAPR if one day some OS starts using the END ESBs for > further coalescing of the events. None does for now but I have added the > model though. Hrm, I think that belongs in PAPR specific code. It's not really part of the router model - it's the PAPR stuff configuring the router at reset time (much as firmware would configure it at reset time for bare metal). > > >> +} > >> + > >> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, > >> + Monitor *mon) > >> +{ > >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > >> + uint32_t qentries = 1 << (qsize + 10); > >> + int i; > >> + > >> + /* > >> + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window > >> + */ > >> + monitor_printf(mon, " [ "); > >> + qindex = (qindex - (width - 1)) & (qentries - 1); > >> + for (i = 0; i < width; i++) { > >> + uint64_t qaddr = qaddr_base + (qindex << 2); > >> + uint32_t qdata = -1; > >> + > >> + if (dma_memory_read(&address_space_memory, qaddr, &qdata, > >> + sizeof(qdata))) { > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" > >> + HWADDR_PRIx "\n", qaddr); > >> + return; > >> + } > >> + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", > >> + be32_to_cpu(qdata)); > >> + qindex = (qindex + 1) & (qentries - 1); > >> + } > >> + monitor_printf(mon, "]\n"); > >> +} > >> + > >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) > >> +{ > >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > >> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); > >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > >> + uint32_t qentries = 1 << (qsize + 10); > >> + > >> + uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6); > >> + uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7); > >> + > >> + if (!(end->w0 & END_W0_VALID)) { > >> + return; > >> + } > >> + > >> + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 > >> + "% 6d/%5d ^%d", end_idx, > >> + end->w0 & END_W0_VALID ? 'v' : '-', > >> + end->w0 & END_W0_ENQUEUE ? 'q' : '-', > >> + end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-', > >> + end->w0 & END_W0_BACKLOG ? 'b' : '-', > >> + end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-', > >> + priority, nvt, qaddr_base, qindex, qentries, qgen); > >> + > >> + xive_end_queue_pic_print_info(end, 6, mon); > >> +} > >> + > >> +static void xive_end_push(XiveEND *end, uint32_t data) > > > > s/push/enqueue/ please, "push" suggests a stack. (Not to mention that > > "push" and "pull" are used as terms elsewhere in XIVE). > > yes. you are right. I will change. > > >> +{ > >> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; > >> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); > >> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); > >> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); > >> + > >> + uint64_t qaddr = qaddr_base + (qindex << 2); > >> + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); > >> + uint32_t qentries = 1 << (qsize + 10); > >> + > >> + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" > >> + HWADDR_PRIx "\n", qaddr); > >> + return; > >> + } > >> + > >> + qindex = (qindex + 1) & (qentries - 1); > >> + if (qindex == 0) { > >> + qgen ^= 1; > >> + end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen); > >> + } > >> + end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex); > >> +} > >> + > >> /* > >> * XIVE Router (aka. Virtualization Controller or IVRE) > >> */ > >> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas) > >> return xrc->set_eas(xrtr, lisn, eas); > >> } > >> > >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end) > >> +{ > >> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); > >> + > >> + return xrc->get_end(xrtr, end_blk, end_idx, end); > >> +} > >> + > >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, > >> + XiveEND *end) > >> +{ > >> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); > >> + > >> + return xrc->set_end(xrtr, end_blk, end_idx, end); > >> +} > >> + > >> +/* > >> + * An END trigger can come from an event trigger (IPI or HW) or from > >> + * another chip. We don't model the PowerBus but the END trigger > >> + * message has the same parameters than in the function below. > >> + */ > >> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, > >> + uint32_t end_idx, uint32_t end_data) > >> +{ > >> + XiveEND end; > >> + uint8_t priority; > >> + uint8_t format; > >> + > >> + /* END cache lookup */ > >> + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, > >> + end_idx); > >> + return; > >> + } > >> + > >> + if (!(end.w0 & END_W0_VALID)) { > >> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", > >> + end_blk, end_idx); > >> + return; > >> + } > >> + > >> + if (end.w0 & END_W0_ENQUEUE) { > >> + xive_end_push(&end, end_data); > >> + xive_router_set_end(xrtr, end_blk, end_idx, &end); > >> + } > >> + > >> + /* > >> + * The W7 format depends on the F bit in W6. It defines the type > >> + * of the notification : > >> + * > >> + * F=0 : single or multiple NVT notification > >> + * F=1 : User level Event-Based Branch (EBB) notification, no > >> + * priority > >> + */ > >> + format = GETFIELD(END_W6_FORMAT_BIT, end.w6); > >> + priority = GETFIELD(END_W7_F0_PRIORITY, end.w7); > >> + > >> + /* The END is masked */ > >> + if (format == 0 && priority == 0xff) { > >> + return; > >> + } > >> + > >> + /* > >> + * Check the END ESn (Event State Buffer for notification) for > >> + * even futher coalescing in the Router > >> + */ > >> + if (!(end.w0 & END_W0_UCOND_NOTIFY)) { > >> + qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n"); > >> + return; > >> + } > >> + > >> + /* > >> + * Follows IVPE notification > >> + */ > >> +} > >> + > >> static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > >> { > >> XiveRouter *xrtr = XIVE_ROUTER(xf); > >> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > >> return; > >> } > >> > >> - /* The IVRE has a State Bit Cache for its internal sources which > >> - * is also involed at this point. We skip the SBC lookup because > >> - * the state bits of the sources are modeled internally in QEMU. > >> + /* The IVRE checks the State Bit Cache at this point. We skip the > >> + * SBC lookup because the state bits of the sources are modeled > >> + * internally in QEMU. > > > > Replacing a comment about something we're not doing with a different > > comment about something we're not doing doesn't seem very useful. > > Maybe fold these together into one patch or the other. > > That's me rephrasing. it should be indeed in the previous patch > > Thanks, > > C. > > >> */ > >> > >> if (!(eas.w & EAS_VALID)) { > >> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) > >> /* Notification completed */ > >> return; > >> } > >> + > >> + /* > >> + * The event trigger becomes an END trigger > >> + */ > >> + xive_router_end_notify(xrtr, > >> + GETFIELD(EAS_END_BLOCK, eas.w), > >> + GETFIELD(EAS_END_INDEX, eas.w), > >> + GETFIELD(EAS_END_DATA, eas.w)); > >> } > >> > >> static Property xive_router_properties[] = { > > >
On 11/23/18 5:35 AM, David Gibson wrote: > On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote: >> On 11/22/18 5:41 AM, David Gibson wrote: >>> On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote: >>>> To complete the event routing, the IVRE sub-engine uses an internal >>>> table containing Event Notification Descriptor (END) structures. >>>> >>>> An END specifies on which Event Queue (EQ) the event notification >>>> data, defined in the associated EAS, should be posted when an >>>> exception occurs. It also defines which Notification Virtual Target >>>> (NVT) should be notified. >>>> >>>> The Event Queue is a memory page provided by the O/S defining a >>>> circular buffer, one per server and priority couple, containing Event >>>> Queue entries. These are 4 bytes long, the first bit being a >>>> 'generation' bit and the 31 following bits the END Data field. They >>>> are pulled by the O/S when the exception occurs. >>>> >>>> The END Data field is a way to set an invariant logical event source >>>> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall >>>> when the EISN flag is used. >>>> >>>> Signed-off-by: Cédric Le Goater <clg@kaod.org> >>>> --- >>>> include/hw/ppc/xive.h | 18 ++++ >>>> include/hw/ppc/xive_regs.h | 48 ++++++++++ >>>> hw/intc/xive.c | 185 ++++++++++++++++++++++++++++++++++++- >>>> 3 files changed, 248 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h >>>> index 5a0696366577..ce62aaf28343 100644 >>>> --- a/include/hw/ppc/xive.h >>>> +++ b/include/hw/ppc/xive.h >>>> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass { >>>> /* XIVE table accessors */ >>>> int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >>>> int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >>>> + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end); >>>> + int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end); >>> >>> Hrm. So unlike the EAS, which is basically just a word, the END is a >>> pretty large structure. >> >> yes. and so will be the NVT. >> >>> It's unclear here if get/set are expected to copy the whole thing out >>> and in, >> >> That's the plan. > > Yeah, I don't think that's a good idea. In some cases the updates are > on hot paths, so the extra copy isn't good, and more importantly it > makes it look like an atomic update, but it's not really. > > Well... I guess it probably is because of the BQL, but I'd prefer not > to rely on that excessively. > >> What I had in mind are memory accessors to the XIVE structures, which >> are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please >> take a look at the XIVE PowerNV model). >> >>> or if get give you a pointer into a "live" structure >> >> no >> >>> and set just does any necessary barriers after an update. >> that would be too complex for the PowerNV model I think. There is a cache >> in between the software running on the (QEMU) machine and the XIVE HW but >> it would be hard to handle. >> >>> Really, for a non-atomic value like this, I'm not sure get/set is the >>> right model. >> >> ok. we need something to get them out and in. > > I've thought about this a bit more. What I think might work is > "end_read" and "end_write" callbacks, which take a word number in > addition to the parameters you have already ouch. This is not going to simplify the routing algo where all the get and set are done. The whole END is needed in the END trigger. So we will need routines to get and set the whole END. The NVT is not used too much for the moment. >>> Also as I understand it nearly all the indices in XIVE are broken into >>> block/index. Is there a reason those are folded together into lisn >>> for the EAS, but not for the END? >> >> The indexing of the EAT is global to the sytem and the index defines >> which blk to use. The IRQ source numbers on the powerbus are architected >> to be : >> >> #define XIVE_SRCNO(blk, idx) ((uint32_t)(blk) << 28 | (idx)) >> >> and XIVE can use different strategies to identify the XIVE IC in charge >> of routing. It can be a one-to-one chip to block relation as skiboot does. >> Using a block scope table is possible also. Our model only supports one >> block per chip and some shortcuts are taken but not that much in fact. >> >> Remote access to the XIVE structures of another chip are done through >> MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO >> regions. Being local is better for performance because the END and NVT >> tables have a strong relation with the XIVE subengines using them >> (VC and PC). >> >> May be, Ben can clarified it this is badly explained. > > Right.. I think I understand what the blocks are all about. > > But my question is, why encode the block and index together for the > EAS, but separately for the END? mostly because when the PowerNV devices forward an notification event, they do so by writing the full IRQ number on a MMIO notify port and this number reaches the XiveRouter routing algo without being modified. On sPAPR it's even simpler. But we could decode the full IRQ number in the router before querying the associated EAS. The EAS accessor would then use the same interface. I will look into it. Not a big change. I think. > >> >>>> } XiveRouterClass; >>>> >>>> void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); >>>> >>>> int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >>>> int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); >>>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end); >>>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end); >>>> + >>>> +/* >>>> + * For legacy compatibility, the exceptions define up to 256 different >>>> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] >>>> + * and the least favored level 0xFF. >>>> + */ >>>> +#define XIVE_PRIORITY_MAX 7 >>>> + >>>> +void xive_end_reset(XiveEND *end); >>>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); >>>> >>>> #endif /* PPC_XIVE_H */ >>>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h >>>> index 12499b33614c..f97fb2b90bee 100644 >>>> --- a/include/hw/ppc/xive_regs.h >>>> +++ b/include/hw/ppc/xive_regs.h >>>> @@ -28,4 +28,52 @@ typedef struct XiveEAS { >>>> #define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ >>>> } XiveEAS; >>>> >>>> +/* Event Notification Descriptor (END) */ >>>> +typedef struct XiveEND { >>>> + uint32_t w0; >>>> +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ >>>> +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ >>>> +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ >>>> +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ >>>> +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ >>>> +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ >>>> +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ >>>> +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ >>>> +#define END_W0_QSIZE PPC_BITMASK32(12, 15) >>>> +#define END_W0_SW0 PPC_BIT32(16) >>>> +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ >>>> +#define END_QSIZE_4K 0 >>>> +#define END_QSIZE_64K 4 >>>> +#define END_W0_HWDEP PPC_BITMASK32(24, 31) >>>> + uint32_t w1; >>>> +#define END_W1_ESn PPC_BITMASK32(0, 1) >>>> +#define END_W1_ESn_P PPC_BIT32(0) >>>> +#define END_W1_ESn_Q PPC_BIT32(1) >>>> +#define END_W1_ESe PPC_BITMASK32(2, 3) >>>> +#define END_W1_ESe_P PPC_BIT32(2) >>>> +#define END_W1_ESe_Q PPC_BIT32(3) >>>> +#define END_W1_GENERATION PPC_BIT32(9) >>>> +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) >>>> + uint32_t w2; >>>> +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) >>>> +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) >>>> + uint32_t w3; >>>> +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) >>>> + uint32_t w4; >>>> +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) >>>> +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) >>>> + uint32_t w5; >>>> +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) >>>> + uint32_t w6; >>>> +#define END_W6_FORMAT_BIT PPC_BIT32(8) >>>> +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) >>>> +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) >>>> + uint32_t w7; >>>> +#define END_W7_F0_IGNORE PPC_BIT32(0) >>>> +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) >>>> +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) >>>> +#define END_W7_F1_WAKEZ PPC_BIT32(0) >>>> +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) >>>> +} XiveEND; >>>> + >>>> #endif /* PPC_XIVE_REGS_H */ >>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c >>>> index c4c90a25758e..9cb001e7b540 100644 >>>> --- a/hw/intc/xive.c >>>> +++ b/hw/intc/xive.c >>>> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = { >>>> .class_init = xive_source_class_init, >>>> }; >>>> >>>> +/* >>>> + * XiveEND helpers >>>> + */ >>>> + >>>> +void xive_end_reset(XiveEND *end) >>>> +{ >>>> + memset(end, 0, sizeof(*end)); >>>> + >>>> + /* switch off the escalation and notification ESBs */ >>>> + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; >>> >>> It's not obvious to me what circumstances this would be called under. >>> Since the ENDs are in system memory, a memset() seems like an odd >>> thing for (virtual) hardware to be doing to it. >> >> It makes sense on sPAPR if one day some OS starts using the END ESBs for >> further coalescing of the events. None does for now but I have added the >> model though. > > Hrm, I think that belongs in PAPR specific code. It's not really part > of the router model - it's the PAPR stuff configuring the router at > reset time (much as firmware would configure it at reset time for bare > metal). This is true this routine is only used by the H_INT_RESET hcall and by the reset handler of the sPAPR controller model. But it made sense to put this END helper routine with the other END routines. Don't you think so ? C. > >> >>>> +} >>>> + >>>> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, >>>> + Monitor *mon) >>>> +{ >>>> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >>>> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >>>> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >>>> + uint32_t qentries = 1 << (qsize + 10); >>>> + int i; >>>> + >>>> + /* >>>> + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window >>>> + */ >>>> + monitor_printf(mon, " [ "); >>>> + qindex = (qindex - (width - 1)) & (qentries - 1); >>>> + for (i = 0; i < width; i++) { >>>> + uint64_t qaddr = qaddr_base + (qindex << 2); >>>> + uint32_t qdata = -1; >>>> + >>>> + if (dma_memory_read(&address_space_memory, qaddr, &qdata, >>>> + sizeof(qdata))) { >>>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" >>>> + HWADDR_PRIx "\n", qaddr); >>>> + return; >>>> + } >>>> + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", >>>> + be32_to_cpu(qdata)); >>>> + qindex = (qindex + 1) & (qentries - 1); >>>> + } >>>> + monitor_printf(mon, "]\n"); >>>> +} >>>> + >>>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) >>>> +{ >>>> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >>>> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >>>> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); >>>> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >>>> + uint32_t qentries = 1 << (qsize + 10); >>>> + >>>> + uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6); >>>> + uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7); >>>> + >>>> + if (!(end->w0 & END_W0_VALID)) { >>>> + return; >>>> + } >>>> + >>>> + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 >>>> + "% 6d/%5d ^%d", end_idx, >>>> + end->w0 & END_W0_VALID ? 'v' : '-', >>>> + end->w0 & END_W0_ENQUEUE ? 'q' : '-', >>>> + end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-', >>>> + end->w0 & END_W0_BACKLOG ? 'b' : '-', >>>> + end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-', >>>> + priority, nvt, qaddr_base, qindex, qentries, qgen); >>>> + >>>> + xive_end_queue_pic_print_info(end, 6, mon); >>>> +} >>>> + >>>> +static void xive_end_push(XiveEND *end, uint32_t data) >>> >>> s/push/enqueue/ please, "push" suggests a stack. (Not to mention that >>> "push" and "pull" are used as terms elsewhere in XIVE). >> >> yes. you are right. I will change. >> >>>> +{ >>>> + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; >>>> + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); >>>> + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); >>>> + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); >>>> + >>>> + uint64_t qaddr = qaddr_base + (qindex << 2); >>>> + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); >>>> + uint32_t qentries = 1 << (qsize + 10); >>>> + >>>> + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { >>>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" >>>> + HWADDR_PRIx "\n", qaddr); >>>> + return; >>>> + } >>>> + >>>> + qindex = (qindex + 1) & (qentries - 1); >>>> + if (qindex == 0) { >>>> + qgen ^= 1; >>>> + end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen); >>>> + } >>>> + end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex); >>>> +} >>>> + >>>> /* >>>> * XIVE Router (aka. Virtualization Controller or IVRE) >>>> */ >>>> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas) >>>> return xrc->set_eas(xrtr, lisn, eas); >>>> } >>>> >>>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end) >>>> +{ >>>> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); >>>> + >>>> + return xrc->get_end(xrtr, end_blk, end_idx, end); >>>> +} >>>> + >>>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, >>>> + XiveEND *end) >>>> +{ >>>> + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); >>>> + >>>> + return xrc->set_end(xrtr, end_blk, end_idx, end); >>>> +} >>>> + >>>> +/* >>>> + * An END trigger can come from an event trigger (IPI or HW) or from >>>> + * another chip. We don't model the PowerBus but the END trigger >>>> + * message has the same parameters than in the function below. >>>> + */ >>>> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, >>>> + uint32_t end_idx, uint32_t end_data) >>>> +{ >>>> + XiveEND end; >>>> + uint8_t priority; >>>> + uint8_t format; >>>> + >>>> + /* END cache lookup */ >>>> + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { >>>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, >>>> + end_idx); >>>> + return; >>>> + } >>>> + >>>> + if (!(end.w0 & END_W0_VALID)) { >>>> + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", >>>> + end_blk, end_idx); >>>> + return; >>>> + } >>>> + >>>> + if (end.w0 & END_W0_ENQUEUE) { >>>> + xive_end_push(&end, end_data); >>>> + xive_router_set_end(xrtr, end_blk, end_idx, &end); >>>> + } >>>> + >>>> + /* >>>> + * The W7 format depends on the F bit in W6. It defines the type >>>> + * of the notification : >>>> + * >>>> + * F=0 : single or multiple NVT notification >>>> + * F=1 : User level Event-Based Branch (EBB) notification, no >>>> + * priority >>>> + */ >>>> + format = GETFIELD(END_W6_FORMAT_BIT, end.w6); >>>> + priority = GETFIELD(END_W7_F0_PRIORITY, end.w7); >>>> + >>>> + /* The END is masked */ >>>> + if (format == 0 && priority == 0xff) { >>>> + return; >>>> + } >>>> + >>>> + /* >>>> + * Check the END ESn (Event State Buffer for notification) for >>>> + * even futher coalescing in the Router >>>> + */ >>>> + if (!(end.w0 & END_W0_UCOND_NOTIFY)) { >>>> + qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n"); >>>> + return; >>>> + } >>>> + >>>> + /* >>>> + * Follows IVPE notification >>>> + */ >>>> +} >>>> + >>>> static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >>>> { >>>> XiveRouter *xrtr = XIVE_ROUTER(xf); >>>> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >>>> return; >>>> } >>>> >>>> - /* The IVRE has a State Bit Cache for its internal sources which >>>> - * is also involed at this point. We skip the SBC lookup because >>>> - * the state bits of the sources are modeled internally in QEMU. >>>> + /* The IVRE checks the State Bit Cache at this point. We skip the >>>> + * SBC lookup because the state bits of the sources are modeled >>>> + * internally in QEMU. >>> >>> Replacing a comment about something we're not doing with a different >>> comment about something we're not doing doesn't seem very useful. >>> Maybe fold these together into one patch or the other. >> >> That's me rephrasing. it should be indeed in the previous patch >> >> Thanks, >> >> C. >> >>>> */ >>>> >>>> if (!(eas.w & EAS_VALID)) { >>>> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) >>>> /* Notification completed */ >>>> return; >>>> } >>>> + >>>> + /* >>>> + * The event trigger becomes an END trigger >>>> + */ >>>> + xive_router_end_notify(xrtr, >>>> + GETFIELD(EAS_END_BLOCK, eas.w), >>>> + GETFIELD(EAS_END_INDEX, eas.w), >>>> + GETFIELD(EAS_END_DATA, eas.w)); >>>> } >>>> >>>> static Property xive_router_properties[] = { >>> >> >
On Fri, Nov 23, 2018 at 12:01:27PM +0100, Cédric Le Goater wrote: > On 11/23/18 5:35 AM, David Gibson wrote: > > On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote: > >> On 11/22/18 5:41 AM, David Gibson wrote: > >>> On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater > wrote: [snip] > >>>> +/* > >>>> + * XiveEND helpers > >>>> + */ > >>>> + > >>>> +void xive_end_reset(XiveEND *end) > >>>> +{ > >>>> + memset(end, 0, sizeof(*end)); > >>>> + > >>>> + /* switch off the escalation and notification ESBs */ > >>>> + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; > >>> > >>> It's not obvious to me what circumstances this would be called under. > >>> Since the ENDs are in system memory, a memset() seems like an odd > >>> thing for (virtual) hardware to be doing to it. > >> > >> It makes sense on sPAPR if one day some OS starts using the END ESBs for > >> further coalescing of the events. None does for now but I have added the > >> model though. > > > > Hrm, I think that belongs in PAPR specific code. It's not really part > > of the router model - it's the PAPR stuff configuring the router at > > reset time (much as firmware would configure it at reset time for bare > > metal). > > This is true this routine is only used by the H_INT_RESET hcall and by > the reset handler of the sPAPR controller model. But it made sense to put > this END helper routine with the other END routines. Don't you think so ? Actually, no. In real hardware this would be handled by a different component - the system firmware would do this setup of the ENDs as it configures the XIVE. So it makes sense to do this in a separate component for PAPR as well. In this case that's another piece of qemu (the spapr stuff) rather than being within the VM, but the difference isn't important to the END handling itself.
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h index 5a0696366577..ce62aaf28343 100644 --- a/include/hw/ppc/xive.h +++ b/include/hw/ppc/xive.h @@ -193,11 +193,29 @@ typedef struct XiveRouterClass { /* XIVE table accessors */ int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); + int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); + int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); } XiveRouterClass; void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon); int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas); +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end); + +/* + * For legacy compatibility, the exceptions define up to 256 different + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7] + * and the least favored level 0xFF. + */ +#define XIVE_PRIORITY_MAX 7 + +void xive_end_reset(XiveEND *end); +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon); #endif /* PPC_XIVE_H */ diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h index 12499b33614c..f97fb2b90bee 100644 --- a/include/hw/ppc/xive_regs.h +++ b/include/hw/ppc/xive_regs.h @@ -28,4 +28,52 @@ typedef struct XiveEAS { #define EAS_END_DATA PPC_BITMASK(33, 63) /* Data written to the END */ } XiveEAS; +/* Event Notification Descriptor (END) */ +typedef struct XiveEND { + uint32_t w0; +#define END_W0_VALID PPC_BIT32(0) /* "v" bit */ +#define END_W0_ENQUEUE PPC_BIT32(1) /* "q" bit */ +#define END_W0_UCOND_NOTIFY PPC_BIT32(2) /* "n" bit */ +#define END_W0_BACKLOG PPC_BIT32(3) /* "b" bit */ +#define END_W0_PRECL_ESC_CTL PPC_BIT32(4) /* "p" bit */ +#define END_W0_ESCALATE_CTL PPC_BIT32(5) /* "e" bit */ +#define END_W0_UNCOND_ESCALATE PPC_BIT32(6) /* "u" bit - DD2.0 */ +#define END_W0_SILENT_ESCALATE PPC_BIT32(7) /* "s" bit - DD2.0 */ +#define END_W0_QSIZE PPC_BITMASK32(12, 15) +#define END_W0_SW0 PPC_BIT32(16) +#define END_W0_FIRMWARE END_W0_SW0 /* Owned by FW */ +#define END_QSIZE_4K 0 +#define END_QSIZE_64K 4 +#define END_W0_HWDEP PPC_BITMASK32(24, 31) + uint32_t w1; +#define END_W1_ESn PPC_BITMASK32(0, 1) +#define END_W1_ESn_P PPC_BIT32(0) +#define END_W1_ESn_Q PPC_BIT32(1) +#define END_W1_ESe PPC_BITMASK32(2, 3) +#define END_W1_ESe_P PPC_BIT32(2) +#define END_W1_ESe_Q PPC_BIT32(3) +#define END_W1_GENERATION PPC_BIT32(9) +#define END_W1_PAGE_OFF PPC_BITMASK32(10, 31) + uint32_t w2; +#define END_W2_MIGRATION_REG PPC_BITMASK32(0, 3) +#define END_W2_OP_DESC_HI PPC_BITMASK32(4, 31) + uint32_t w3; +#define END_W3_OP_DESC_LO PPC_BITMASK32(0, 31) + uint32_t w4; +#define END_W4_ESC_END_BLOCK PPC_BITMASK32(4, 7) +#define END_W4_ESC_END_INDEX PPC_BITMASK32(8, 31) + uint32_t w5; +#define END_W5_ESC_END_DATA PPC_BITMASK32(1, 31) + uint32_t w6; +#define END_W6_FORMAT_BIT PPC_BIT32(8) +#define END_W6_NVT_BLOCK PPC_BITMASK32(9, 12) +#define END_W6_NVT_INDEX PPC_BITMASK32(13, 31) + uint32_t w7; +#define END_W7_F0_IGNORE PPC_BIT32(0) +#define END_W7_F0_BLK_GROUPING PPC_BIT32(1) +#define END_W7_F0_PRIORITY PPC_BITMASK32(8, 15) +#define END_W7_F1_WAKEZ PPC_BIT32(0) +#define END_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1, 31) +} XiveEND; + #endif /* PPC_XIVE_REGS_H */ diff --git a/hw/intc/xive.c b/hw/intc/xive.c index c4c90a25758e..9cb001e7b540 100644 --- a/hw/intc/xive.c +++ b/hw/intc/xive.c @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = { .class_init = xive_source_class_init, }; +/* + * XiveEND helpers + */ + +void xive_end_reset(XiveEND *end) +{ + memset(end, 0, sizeof(*end)); + + /* switch off the escalation and notification ESBs */ + end->w1 = END_W1_ESe_Q | END_W1_ESn_Q; +} + +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width, + Monitor *mon) +{ + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); + uint32_t qentries = 1 << (qsize + 10); + int i; + + /* + * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window + */ + monitor_printf(mon, " [ "); + qindex = (qindex - (width - 1)) & (qentries - 1); + for (i = 0; i < width; i++) { + uint64_t qaddr = qaddr_base + (qindex << 2); + uint32_t qdata = -1; + + if (dma_memory_read(&address_space_memory, qaddr, &qdata, + sizeof(qdata))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%" + HWADDR_PRIx "\n", qaddr); + return; + } + monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "", + be32_to_cpu(qdata)); + qindex = (qindex + 1) & (qentries - 1); + } + monitor_printf(mon, "]\n"); +} + +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon) +{ + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); + uint32_t qentries = 1 << (qsize + 10); + + uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6); + uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7); + + if (!(end->w0 & END_W0_VALID)) { + return; + } + + monitor_printf(mon, " %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64 + "% 6d/%5d ^%d", end_idx, + end->w0 & END_W0_VALID ? 'v' : '-', + end->w0 & END_W0_ENQUEUE ? 'q' : '-', + end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-', + end->w0 & END_W0_BACKLOG ? 'b' : '-', + end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-', + priority, nvt, qaddr_base, qindex, qentries, qgen); + + xive_end_queue_pic_print_info(end, 6, mon); +} + +static void xive_end_push(XiveEND *end, uint32_t data) +{ + uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3; + uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0); + uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1); + uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1); + + uint64_t qaddr = qaddr_base + (qindex << 2); + uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff)); + uint32_t qentries = 1 << (qsize + 10); + + if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%" + HWADDR_PRIx "\n", qaddr); + return; + } + + qindex = (qindex + 1) & (qentries - 1); + if (qindex == 0) { + qgen ^= 1; + end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen); + } + end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex); +} + /* * XIVE Router (aka. Virtualization Controller or IVRE) */ @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas) return xrc->set_eas(xrtr, lisn, eas); } +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->get_end(xrtr, end_blk, end_idx, end); +} + +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx, + XiveEND *end) +{ + XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr); + + return xrc->set_end(xrtr, end_blk, end_idx, end); +} + +/* + * An END trigger can come from an event trigger (IPI or HW) or from + * another chip. We don't model the PowerBus but the END trigger + * message has the same parameters than in the function below. + */ +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk, + uint32_t end_idx, uint32_t end_data) +{ + XiveEND end; + uint8_t priority; + uint8_t format; + + /* END cache lookup */ + if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk, + end_idx); + return; + } + + if (!(end.w0 & END_W0_VALID)) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n", + end_blk, end_idx); + return; + } + + if (end.w0 & END_W0_ENQUEUE) { + xive_end_push(&end, end_data); + xive_router_set_end(xrtr, end_blk, end_idx, &end); + } + + /* + * The W7 format depends on the F bit in W6. It defines the type + * of the notification : + * + * F=0 : single or multiple NVT notification + * F=1 : User level Event-Based Branch (EBB) notification, no + * priority + */ + format = GETFIELD(END_W6_FORMAT_BIT, end.w6); + priority = GETFIELD(END_W7_F0_PRIORITY, end.w7); + + /* The END is masked */ + if (format == 0 && priority == 0xff) { + return; + } + + /* + * Check the END ESn (Event State Buffer for notification) for + * even futher coalescing in the Router + */ + if (!(end.w0 & END_W0_UCOND_NOTIFY)) { + qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n"); + return; + } + + /* + * Follows IVPE notification + */ +} + static void xive_router_notify(XiveFabric *xf, uint32_t lisn) { XiveRouter *xrtr = XIVE_ROUTER(xf); @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) return; } - /* The IVRE has a State Bit Cache for its internal sources which - * is also involed at this point. We skip the SBC lookup because - * the state bits of the sources are modeled internally in QEMU. + /* The IVRE checks the State Bit Cache at this point. We skip the + * SBC lookup because the state bits of the sources are modeled + * internally in QEMU. */ if (!(eas.w & EAS_VALID)) { @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn) /* Notification completed */ return; } + + /* + * The event trigger becomes an END trigger + */ + xive_router_end_notify(xrtr, + GETFIELD(EAS_END_BLOCK, eas.w), + GETFIELD(EAS_END_INDEX, eas.w), + GETFIELD(EAS_END_DATA, eas.w)); } static Property xive_router_properties[] = {
To complete the event routing, the IVRE sub-engine uses an internal table containing Event Notification Descriptor (END) structures. An END specifies on which Event Queue (EQ) the event notification data, defined in the associated EAS, should be posted when an exception occurs. It also defines which Notification Virtual Target (NVT) should be notified. The Event Queue is a memory page provided by the O/S defining a circular buffer, one per server and priority couple, containing Event Queue entries. These are 4 bytes long, the first bit being a 'generation' bit and the 31 following bits the END Data field. They are pulled by the O/S when the exception occurs. The END Data field is a way to set an invariant logical event source number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall when the EISN flag is used. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- include/hw/ppc/xive.h | 18 ++++ include/hw/ppc/xive_regs.h | 48 ++++++++++ hw/intc/xive.c | 185 ++++++++++++++++++++++++++++++++++++- 3 files changed, 248 insertions(+), 3 deletions(-)