diff mbox series

[v5,05/36] ppc/xive: introduce the XIVE Event Notification Descriptors

Message ID 20181116105729.23240-6-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:56 a.m. UTC
To complete the event routing, the IVRE sub-engine uses an internal
table containing Event Notification Descriptor (END) structures.

An END specifies on which Event Queue (EQ) the event notification
data, defined in the associated EAS, should be posted when an
exception occurs. It also defines which Notification Virtual Target
(NVT) should be notified.

The Event Queue is a memory page provided by the O/S defining a
circular buffer, one per server and priority couple, containing Event
Queue entries. These are 4 bytes long, the first bit being a
'generation' bit and the 31 following bits the END Data field. They
are pulled by the O/S when the exception occurs.

The END Data field is a way to set an invariant logical event source
number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall
when the EISN flag is used.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/xive.h      |  18 ++++
 include/hw/ppc/xive_regs.h |  48 ++++++++++
 hw/intc/xive.c             | 185 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 248 insertions(+), 3 deletions(-)

Comments

David Gibson Nov. 22, 2018, 4:41 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote:
> To complete the event routing, the IVRE sub-engine uses an internal
> table containing Event Notification Descriptor (END) structures.
> 
> An END specifies on which Event Queue (EQ) the event notification
> data, defined in the associated EAS, should be posted when an
> exception occurs. It also defines which Notification Virtual Target
> (NVT) should be notified.
> 
> The Event Queue is a memory page provided by the O/S defining a
> circular buffer, one per server and priority couple, containing Event
> Queue entries. These are 4 bytes long, the first bit being a
> 'generation' bit and the 31 following bits the END Data field. They
> are pulled by the O/S when the exception occurs.
> 
> The END Data field is a way to set an invariant logical event source
> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall
> when the EISN flag is used.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/xive.h      |  18 ++++
>  include/hw/ppc/xive_regs.h |  48 ++++++++++
>  hw/intc/xive.c             | 185 ++++++++++++++++++++++++++++++++++++-
>  3 files changed, 248 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index 5a0696366577..ce62aaf28343 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass {
>      /* XIVE table accessors */
>      int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>      int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                   XiveEND *end);
> +    int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                   XiveEND *end);

Hrm.  So unlike the EAS, which is basically just a word, the END is a
pretty large structure.  It's unclear here if get/set are expected to
copy the whole thing out and in, or if get get give you a pointer into
a "live" structure and set just does any necessary barriers after an
update.

Really, for a non-atomic value like this, I'm not sure get/set is the
right model.

Also as I understand it nearly all the indices in XIVE are broken into
block/index.  Is there a reason those are folded together into lisn
for the EAS, but not for the END?

>  } XiveRouterClass;
>  
>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>  
>  int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>  int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                        XiveEND *end);
> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                        XiveEND *end);
> +
> +/*
> + * For legacy compatibility, the exceptions define up to 256 different
> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
> + * and the least favored level 0xFF.
> + */
> +#define XIVE_PRIORITY_MAX  7
> +
> +void xive_end_reset(XiveEND *end);
> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
>  
>  #endif /* PPC_XIVE_H */
> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> index 12499b33614c..f97fb2b90bee 100644
> --- a/include/hw/ppc/xive_regs.h
> +++ b/include/hw/ppc/xive_regs.h
> @@ -28,4 +28,52 @@ typedef struct XiveEAS {
>  #define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
>  } XiveEAS;
>  
> +/* Event Notification Descriptor (END) */
> +typedef struct XiveEND {
> +        uint32_t        w0;
> +#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
> +#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
> +#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
> +#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
> +#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
> +#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
> +#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
> +#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
> +#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
> +#define END_W0_SW0               PPC_BIT32(16)
> +#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
> +#define END_QSIZE_4K             0
> +#define END_QSIZE_64K            4
> +#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
> +        uint32_t        w1;
> +#define END_W1_ESn               PPC_BITMASK32(0, 1)
> +#define END_W1_ESn_P             PPC_BIT32(0)
> +#define END_W1_ESn_Q             PPC_BIT32(1)
> +#define END_W1_ESe               PPC_BITMASK32(2, 3)
> +#define END_W1_ESe_P             PPC_BIT32(2)
> +#define END_W1_ESe_Q             PPC_BIT32(3)
> +#define END_W1_GENERATION        PPC_BIT32(9)
> +#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
> +        uint32_t        w2;
> +#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
> +#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
> +        uint32_t        w3;
> +#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
> +        uint32_t        w4;
> +#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
> +#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
> +        uint32_t        w5;
> +#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
> +        uint32_t        w6;
> +#define END_W6_FORMAT_BIT        PPC_BIT32(8)
> +#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
> +#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
> +        uint32_t        w7;
> +#define END_W7_F0_IGNORE         PPC_BIT32(0)
> +#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
> +#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
> +#define END_W7_F1_WAKEZ          PPC_BIT32(0)
> +#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
> +} XiveEND;
> +
>  #endif /* PPC_XIVE_REGS_H */
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index c4c90a25758e..9cb001e7b540 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = {
>      .class_init    = xive_source_class_init,
>  };
>  
> +/*
> + * XiveEND helpers
> + */
> +
> +void xive_end_reset(XiveEND *end)
> +{
> +    memset(end, 0, sizeof(*end));
> +
> +    /* switch off the escalation and notification ESBs */
> +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;

It's not obvious to me what circumstances this would be called under.
Since the ENDs are in system memory, a memset() seems like an odd
thing for (virtual) hardware to be doing to it.

> +}
> +
> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width,
> +                                          Monitor *mon)
> +{
> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> +    uint32_t qentries = 1 << (qsize + 10);
> +    int i;
> +
> +    /*
> +     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
> +     */
> +    monitor_printf(mon, " [ ");
> +    qindex = (qindex - (width - 1)) & (qentries - 1);
> +    for (i = 0; i < width; i++) {
> +        uint64_t qaddr = qaddr_base + (qindex << 2);
> +        uint32_t qdata = -1;
> +
> +        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
> +                            sizeof(qdata))) {
> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
> +                          HWADDR_PRIx "\n", qaddr);
> +            return;
> +        }
> +        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
> +                       be32_to_cpu(qdata));
> +        qindex = (qindex + 1) & (qentries - 1);
> +    }
> +    monitor_printf(mon, "]\n");
> +}
> +
> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
> +{
> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> +    uint32_t qentries = 1 << (qsize + 10);
> +
> +    uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6);
> +    uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7);
> +
> +    if (!(end->w0 & END_W0_VALID)) {
> +        return;
> +    }
> +
> +    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
> +                   "% 6d/%5d ^%d", end_idx,
> +                   end->w0 & END_W0_VALID ? 'v' : '-',
> +                   end->w0 & END_W0_ENQUEUE ? 'q' : '-',
> +                   end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-',
> +                   end->w0 & END_W0_BACKLOG ? 'b' : '-',
> +                   end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-',
> +                   priority, nvt, qaddr_base, qindex, qentries, qgen);
> +
> +    xive_end_queue_pic_print_info(end, 6, mon);
> +}
> +
> +static void xive_end_push(XiveEND *end, uint32_t data)

s/push/enqueue/ please, "push" suggests a stack.  (Not to mention that
"push" and "pull" are used as terms elsewhere in XIVE).

> +{
> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
> +
> +    uint64_t qaddr = qaddr_base + (qindex << 2);
> +    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
> +    uint32_t qentries = 1 << (qsize + 10);
> +
> +    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
> +                      HWADDR_PRIx "\n", qaddr);
> +        return;
> +    }
> +
> +    qindex = (qindex + 1) & (qentries - 1);
> +    if (qindex == 0) {
> +        qgen ^= 1;
> +        end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen);
> +    }
> +    end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex);
> +}
> +
>  /*
>   * XIVE Router (aka. Virtualization Controller or IVRE)
>   */
> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>      return xrc->set_eas(xrtr, lisn, eas);
>  }
>  
> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                        XiveEND *end)
> +{
> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> +
> +   return xrc->get_end(xrtr, end_blk, end_idx, end);
> +}
> +
> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> +                        XiveEND *end)
> +{
> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> +
> +   return xrc->set_end(xrtr, end_blk, end_idx, end);
> +}
> +
> +/*
> + * An END trigger can come from an event trigger (IPI or HW) or from
> + * another chip. We don't model the PowerBus but the END trigger
> + * message has the same parameters than in the function below.
> + */
> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
> +                                   uint32_t end_idx, uint32_t end_data)
> +{
> +    XiveEND end;
> +    uint8_t priority;
> +    uint8_t format;
> +
> +    /* END cache lookup */
> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
> +                      end_idx);
> +        return;
> +    }
> +
> +    if (!(end.w0 & END_W0_VALID)) {
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
> +                      end_blk, end_idx);
> +        return;
> +    }
> +
> +    if (end.w0 & END_W0_ENQUEUE) {
> +        xive_end_push(&end, end_data);
> +        xive_router_set_end(xrtr, end_blk, end_idx, &end);
> +    }
> +
> +    /*
> +     * The W7 format depends on the F bit in W6. It defines the type
> +     * of the notification :
> +     *
> +     *   F=0 : single or multiple NVT notification
> +     *   F=1 : User level Event-Based Branch (EBB) notification, no
> +     *         priority
> +     */
> +    format = GETFIELD(END_W6_FORMAT_BIT, end.w6);
> +    priority = GETFIELD(END_W7_F0_PRIORITY, end.w7);
> +
> +    /* The END is masked */
> +    if (format == 0 && priority == 0xff) {
> +        return;
> +    }
> +
> +    /*
> +     * Check the END ESn (Event State Buffer for notification) for
> +     * even futher coalescing in the Router
> +     */
> +    if (!(end.w0 & END_W0_UCOND_NOTIFY)) {
> +        qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n");
> +        return;
> +    }
> +
> +    /*
> +     * Follows IVPE notification
> +     */
> +}
> +
>  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>  {
>      XiveRouter *xrtr = XIVE_ROUTER(xf);
> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>          return;
>      }
>  
> -    /* The IVRE has a State Bit Cache for its internal sources which
> -     * is also involed at this point. We skip the SBC lookup because
> -     * the state bits of the sources are modeled internally in QEMU.
> +    /* The IVRE checks the State Bit Cache at this point. We skip the
> +     * SBC lookup because the state bits of the sources are modeled
> +     * internally in QEMU.

Replacing a comment about something we're not doing with a different
comment about something we're not doing doesn't seem very useful.
Maybe fold these together into one patch or the other.

>       */
>  
>      if (!(eas.w & EAS_VALID)) {
> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>          /* Notification completed */
>          return;
>      }
> +
> +    /*
> +     * The event trigger becomes an END trigger
> +     */
> +    xive_router_end_notify(xrtr,
> +                           GETFIELD(EAS_END_BLOCK, eas.w),
> +                           GETFIELD(EAS_END_INDEX, eas.w),
> +                           GETFIELD(EAS_END_DATA,  eas.w));
>  }
>  
>  static Property xive_router_properties[] = {
Benjamin Herrenschmidt Nov. 22, 2018, 6:49 a.m. UTC | #2
On Thu, 2018-11-22 at 15:41 +1100, David Gibson wrote:
> 
> > +void xive_end_reset(XiveEND *end)
> > +{
> > +    memset(end, 0, sizeof(*end));
> > +
> > +    /* switch off the escalation and notification ESBs */
> > +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
> 
> It's not obvious to me what circumstances this would be called under.
> Since the ENDs are in system memory, a memset() seems like an odd
> thing for (virtual) hardware to be doing to it.
> 
> > +}

Not on PAPR ...

Ben.
Cédric Le Goater Nov. 22, 2018, 9:47 p.m. UTC | #3
On 11/22/18 5:41 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote:
>> To complete the event routing, the IVRE sub-engine uses an internal
>> table containing Event Notification Descriptor (END) structures.
>>
>> An END specifies on which Event Queue (EQ) the event notification
>> data, defined in the associated EAS, should be posted when an
>> exception occurs. It also defines which Notification Virtual Target
>> (NVT) should be notified.
>>
>> The Event Queue is a memory page provided by the O/S defining a
>> circular buffer, one per server and priority couple, containing Event
>> Queue entries. These are 4 bytes long, the first bit being a
>> 'generation' bit and the 31 following bits the END Data field. They
>> are pulled by the O/S when the exception occurs.
>>
>> The END Data field is a way to set an invariant logical event source
>> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall
>> when the EISN flag is used.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/xive.h      |  18 ++++
>>  include/hw/ppc/xive_regs.h |  48 ++++++++++
>>  hw/intc/xive.c             | 185 ++++++++++++++++++++++++++++++++++++-
>>  3 files changed, 248 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index 5a0696366577..ce62aaf28343 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass {
>>      /* XIVE table accessors */
>>      int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>      int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                   XiveEND *end);
>> +    int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                   XiveEND *end);
> 
> Hrm.  So unlike the EAS, which is basically just a word, the END is a
> pretty large structure.  

yes. and so will be the NVT.

> It's unclear here if get/set are expected to copy the whole thing out 
> and in, 

That's the plan. 

What I had in mind are memory accessors to the XIVE structures, which 
are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please
take a look at the XIVE PowerNV model).

> or if get give you a pointer into a "live" structure 

no

> and set just does any necessary barriers after an update.
that would be too complex for the PowerNV model I think. There is a cache
in between the software running on the (QEMU) machine and the XIVE HW but
it would be hard to handle. 
 
> Really, for a non-atomic value like this, I'm not sure get/set is the
> right model.

ok. we need something to get them out and in.

> Also as I understand it nearly all the indices in XIVE are broken into
> block/index.  Is there a reason those are folded together into lisn
> for the EAS, but not for the END?

The indexing of the EAT is global to the sytem and the index defines
which blk to use. The IRQ source numbers on the powerbus are architected 
to be :

    #define XIVE_SRCNO(blk, idx)      ((uint32_t)(blk) << 28 | (idx))

and XIVE can use different strategies to identify the XIVE IC in charge 
of routing. It can be a one-to-one chip to block relation as skiboot does. 
Using a block scope table is possible also. Our model only supports one 
block per chip and some shortcuts are taken but not that much in fact.
 
Remote access to the XIVE structures of another chip are done through 
MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO 
regions. Being local is better for performance because the END and NVT 
tables have a strong relation with the XIVE subengines using them 
(VC and PC). 

May be, Ben can clarified it this is badly explained. 

>>  } XiveRouterClass;
>>  
>>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>>  
>>  int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>  int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                        XiveEND *end);
>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                        XiveEND *end);
>> +
>> +/*
>> + * For legacy compatibility, the exceptions define up to 256 different
>> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
>> + * and the least favored level 0xFF.
>> + */
>> +#define XIVE_PRIORITY_MAX  7
>> +
>> +void xive_end_reset(XiveEND *end);
>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
>>  
>>  #endif /* PPC_XIVE_H */
>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>> index 12499b33614c..f97fb2b90bee 100644
>> --- a/include/hw/ppc/xive_regs.h
>> +++ b/include/hw/ppc/xive_regs.h
>> @@ -28,4 +28,52 @@ typedef struct XiveEAS {
>>  #define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
>>  } XiveEAS;
>>  
>> +/* Event Notification Descriptor (END) */
>> +typedef struct XiveEND {
>> +        uint32_t        w0;
>> +#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
>> +#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
>> +#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
>> +#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
>> +#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
>> +#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
>> +#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
>> +#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
>> +#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
>> +#define END_W0_SW0               PPC_BIT32(16)
>> +#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
>> +#define END_QSIZE_4K             0
>> +#define END_QSIZE_64K            4
>> +#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
>> +        uint32_t        w1;
>> +#define END_W1_ESn               PPC_BITMASK32(0, 1)
>> +#define END_W1_ESn_P             PPC_BIT32(0)
>> +#define END_W1_ESn_Q             PPC_BIT32(1)
>> +#define END_W1_ESe               PPC_BITMASK32(2, 3)
>> +#define END_W1_ESe_P             PPC_BIT32(2)
>> +#define END_W1_ESe_Q             PPC_BIT32(3)
>> +#define END_W1_GENERATION        PPC_BIT32(9)
>> +#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
>> +        uint32_t        w2;
>> +#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
>> +#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
>> +        uint32_t        w3;
>> +#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
>> +        uint32_t        w4;
>> +#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
>> +#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
>> +        uint32_t        w5;
>> +#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
>> +        uint32_t        w6;
>> +#define END_W6_FORMAT_BIT        PPC_BIT32(8)
>> +#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
>> +#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
>> +        uint32_t        w7;
>> +#define END_W7_F0_IGNORE         PPC_BIT32(0)
>> +#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
>> +#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
>> +#define END_W7_F1_WAKEZ          PPC_BIT32(0)
>> +#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
>> +} XiveEND;
>> +
>>  #endif /* PPC_XIVE_REGS_H */
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index c4c90a25758e..9cb001e7b540 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = {
>>      .class_init    = xive_source_class_init,
>>  };
>>  
>> +/*
>> + * XiveEND helpers
>> + */
>> +
>> +void xive_end_reset(XiveEND *end)
>> +{
>> +    memset(end, 0, sizeof(*end));
>> +
>> +    /* switch off the escalation and notification ESBs */
>> +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
> 
> It's not obvious to me what circumstances this would be called under.
> Since the ENDs are in system memory, a memset() seems like an odd
> thing for (virtual) hardware to be doing to it.

It makes sense on sPAPR if one day some OS starts using the END ESBs for 
further coalescing of the events. None does for now but I have added the 
model though. 

>> +}
>> +
>> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width,
>> +                                          Monitor *mon)
>> +{
>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>> +    uint32_t qentries = 1 << (qsize + 10);
>> +    int i;
>> +
>> +    /*
>> +     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
>> +     */
>> +    monitor_printf(mon, " [ ");
>> +    qindex = (qindex - (width - 1)) & (qentries - 1);
>> +    for (i = 0; i < width; i++) {
>> +        uint64_t qaddr = qaddr_base + (qindex << 2);
>> +        uint32_t qdata = -1;
>> +
>> +        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
>> +                            sizeof(qdata))) {
>> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
>> +                          HWADDR_PRIx "\n", qaddr);
>> +            return;
>> +        }
>> +        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
>> +                       be32_to_cpu(qdata));
>> +        qindex = (qindex + 1) & (qentries - 1);
>> +    }
>> +    monitor_printf(mon, "]\n");
>> +}
>> +
>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
>> +{
>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>> +    uint32_t qentries = 1 << (qsize + 10);
>> +
>> +    uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6);
>> +    uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7);
>> +
>> +    if (!(end->w0 & END_W0_VALID)) {
>> +        return;
>> +    }
>> +
>> +    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
>> +                   "% 6d/%5d ^%d", end_idx,
>> +                   end->w0 & END_W0_VALID ? 'v' : '-',
>> +                   end->w0 & END_W0_ENQUEUE ? 'q' : '-',
>> +                   end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-',
>> +                   end->w0 & END_W0_BACKLOG ? 'b' : '-',
>> +                   end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-',
>> +                   priority, nvt, qaddr_base, qindex, qentries, qgen);
>> +
>> +    xive_end_queue_pic_print_info(end, 6, mon);
>> +}
>> +
>> +static void xive_end_push(XiveEND *end, uint32_t data)
> 
> s/push/enqueue/ please, "push" suggests a stack.  (Not to mention that
> "push" and "pull" are used as terms elsewhere in XIVE).

yes. you are right. I will change.

>> +{
>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
>> +
>> +    uint64_t qaddr = qaddr_base + (qindex << 2);
>> +    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
>> +    uint32_t qentries = 1 << (qsize + 10);
>> +
>> +    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
>> +                      HWADDR_PRIx "\n", qaddr);
>> +        return;
>> +    }
>> +
>> +    qindex = (qindex + 1) & (qentries - 1);
>> +    if (qindex == 0) {
>> +        qgen ^= 1;
>> +        end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen);
>> +    }
>> +    end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex);
>> +}
>> +
>>  /*
>>   * XIVE Router (aka. Virtualization Controller or IVRE)
>>   */
>> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>>      return xrc->set_eas(xrtr, lisn, eas);
>>  }
>>  
>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                        XiveEND *end)
>> +{
>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +   return xrc->get_end(xrtr, end_blk, end_idx, end);
>> +}
>> +
>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>> +                        XiveEND *end)
>> +{
>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>> +
>> +   return xrc->set_end(xrtr, end_blk, end_idx, end);
>> +}
>> +
>> +/*
>> + * An END trigger can come from an event trigger (IPI or HW) or from
>> + * another chip. We don't model the PowerBus but the END trigger
>> + * message has the same parameters than in the function below.
>> + */
>> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
>> +                                   uint32_t end_idx, uint32_t end_data)
>> +{
>> +    XiveEND end;
>> +    uint8_t priority;
>> +    uint8_t format;
>> +
>> +    /* END cache lookup */
>> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
>> +                      end_idx);
>> +        return;
>> +    }
>> +
>> +    if (!(end.w0 & END_W0_VALID)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
>> +                      end_blk, end_idx);
>> +        return;
>> +    }
>> +
>> +    if (end.w0 & END_W0_ENQUEUE) {
>> +        xive_end_push(&end, end_data);
>> +        xive_router_set_end(xrtr, end_blk, end_idx, &end);
>> +    }
>> +
>> +    /*
>> +     * The W7 format depends on the F bit in W6. It defines the type
>> +     * of the notification :
>> +     *
>> +     *   F=0 : single or multiple NVT notification
>> +     *   F=1 : User level Event-Based Branch (EBB) notification, no
>> +     *         priority
>> +     */
>> +    format = GETFIELD(END_W6_FORMAT_BIT, end.w6);
>> +    priority = GETFIELD(END_W7_F0_PRIORITY, end.w7);
>> +
>> +    /* The END is masked */
>> +    if (format == 0 && priority == 0xff) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Check the END ESn (Event State Buffer for notification) for
>> +     * even futher coalescing in the Router
>> +     */
>> +    if (!(end.w0 & END_W0_UCOND_NOTIFY)) {
>> +        qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n");
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * Follows IVPE notification
>> +     */
>> +}
>> +
>>  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>  {
>>      XiveRouter *xrtr = XIVE_ROUTER(xf);
>> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>          return;
>>      }
>>  
>> -    /* The IVRE has a State Bit Cache for its internal sources which
>> -     * is also involed at this point. We skip the SBC lookup because
>> -     * the state bits of the sources are modeled internally in QEMU.
>> +    /* The IVRE checks the State Bit Cache at this point. We skip the
>> +     * SBC lookup because the state bits of the sources are modeled
>> +     * internally in QEMU.
> 
> Replacing a comment about something we're not doing with a different
> comment about something we're not doing doesn't seem very useful.
> Maybe fold these together into one patch or the other.

That's me rephrasing. it should be indeed in the previous patch

Thanks,

C.

>>       */
>>  
>>      if (!(eas.w & EAS_VALID)) {
>> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>          /* Notification completed */
>>          return;
>>      }
>> +
>> +    /*
>> +     * The event trigger becomes an END trigger
>> +     */
>> +    xive_router_end_notify(xrtr,
>> +                           GETFIELD(EAS_END_BLOCK, eas.w),
>> +                           GETFIELD(EAS_END_INDEX, eas.w),
>> +                           GETFIELD(EAS_END_DATA,  eas.w));
>>  }
>>  
>>  static Property xive_router_properties[] = {
>
David Gibson Nov. 23, 2018, 3:51 a.m. UTC | #4
On Thu, Nov 22, 2018 at 05:49:09PM +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2018-11-22 at 15:41 +1100, David Gibson wrote:
> > 
> > > +void xive_end_reset(XiveEND *end)
> > > +{
> > > +    memset(end, 0, sizeof(*end));
> > > +
> > > +    /* switch off the escalation and notification ESBs */
> > > +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
> > 
> > It's not obvious to me what circumstances this would be called under.
> > Since the ENDs are in system memory, a memset() seems like an odd
> > thing for (virtual) hardware to be doing to it.
> > 
> > > +}
> 
> Not on PAPR ...

Right, so the memset() can go in PAPR specific code.
David Gibson Nov. 23, 2018, 4:35 a.m. UTC | #5
On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote:
> On 11/22/18 5:41 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote:
> >> To complete the event routing, the IVRE sub-engine uses an internal
> >> table containing Event Notification Descriptor (END) structures.
> >>
> >> An END specifies on which Event Queue (EQ) the event notification
> >> data, defined in the associated EAS, should be posted when an
> >> exception occurs. It also defines which Notification Virtual Target
> >> (NVT) should be notified.
> >>
> >> The Event Queue is a memory page provided by the O/S defining a
> >> circular buffer, one per server and priority couple, containing Event
> >> Queue entries. These are 4 bytes long, the first bit being a
> >> 'generation' bit and the 31 following bits the END Data field. They
> >> are pulled by the O/S when the exception occurs.
> >>
> >> The END Data field is a way to set an invariant logical event source
> >> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall
> >> when the EISN flag is used.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/xive.h      |  18 ++++
> >>  include/hw/ppc/xive_regs.h |  48 ++++++++++
> >>  hw/intc/xive.c             | 185 ++++++++++++++++++++++++++++++++++++-
> >>  3 files changed, 248 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index 5a0696366577..ce62aaf28343 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass {
> >>      /* XIVE table accessors */
> >>      int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >>      int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                   XiveEND *end);
> >> +    int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                   XiveEND *end);
> > 
> > Hrm.  So unlike the EAS, which is basically just a word, the END is a
> > pretty large structure.  
> 
> yes. and so will be the NVT.
> 
> > It's unclear here if get/set are expected to copy the whole thing out 
> > and in, 
> 
> That's the plan. 

Yeah, I don't think that's a good idea.  In some cases the updates are
on hot paths, so the extra copy isn't good, and more importantly it
makes it look like an atomic update, but it's not really.

Well... I guess it probably is because of the BQL, but I'd prefer not
to rely on that excessively.

> What I had in mind are memory accessors to the XIVE structures, which 
> are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please
> take a look at the XIVE PowerNV model).
> 
> > or if get give you a pointer into a "live" structure 
> 
> no
> 
> > and set just does any necessary barriers after an update.
> that would be too complex for the PowerNV model I think. There is a cache
> in between the software running on the (QEMU) machine and the XIVE HW but
> it would be hard to handle. 
>  
> > Really, for a non-atomic value like this, I'm not sure get/set is the
> > right model.
> 
> ok. we need something to get them out and in.

I've thought about this a bit more.  What I think might work is
"end_read" and "end_write" callbacks, which take a word number in
addition to the parameters you have already

> > Also as I understand it nearly all the indices in XIVE are broken into
> > block/index.  Is there a reason those are folded together into lisn
> > for the EAS, but not for the END?
> 
> The indexing of the EAT is global to the sytem and the index defines
> which blk to use. The IRQ source numbers on the powerbus are architected 
> to be :
> 
>     #define XIVE_SRCNO(blk, idx)      ((uint32_t)(blk) << 28 | (idx))
> 
> and XIVE can use different strategies to identify the XIVE IC in charge 
> of routing. It can be a one-to-one chip to block relation as skiboot does. 
> Using a block scope table is possible also. Our model only supports one 
> block per chip and some shortcuts are taken but not that much in fact.
>  
> Remote access to the XIVE structures of another chip are done through 
> MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO 
> regions. Being local is better for performance because the END and NVT 
> tables have a strong relation with the XIVE subengines using them 
> (VC and PC). 
> 
> May be, Ben can clarified it this is badly explained.

Right.. I think I understand what the blocks are all about.

But my question is, why encode the block and index together for the
EAS, but separately for the END?

> 
> >>  } XiveRouterClass;
> >>  
> >>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
> >>  
> >>  int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >>  int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
> >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                        XiveEND *end);
> >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                        XiveEND *end);
> >> +
> >> +/*
> >> + * For legacy compatibility, the exceptions define up to 256 different
> >> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
> >> + * and the least favored level 0xFF.
> >> + */
> >> +#define XIVE_PRIORITY_MAX  7
> >> +
> >> +void xive_end_reset(XiveEND *end);
> >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
> >>  
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
> >> index 12499b33614c..f97fb2b90bee 100644
> >> --- a/include/hw/ppc/xive_regs.h
> >> +++ b/include/hw/ppc/xive_regs.h
> >> @@ -28,4 +28,52 @@ typedef struct XiveEAS {
> >>  #define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
> >>  } XiveEAS;
> >>  
> >> +/* Event Notification Descriptor (END) */
> >> +typedef struct XiveEND {
> >> +        uint32_t        w0;
> >> +#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
> >> +#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
> >> +#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
> >> +#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
> >> +#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
> >> +#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
> >> +#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
> >> +#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
> >> +#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
> >> +#define END_W0_SW0               PPC_BIT32(16)
> >> +#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
> >> +#define END_QSIZE_4K             0
> >> +#define END_QSIZE_64K            4
> >> +#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
> >> +        uint32_t        w1;
> >> +#define END_W1_ESn               PPC_BITMASK32(0, 1)
> >> +#define END_W1_ESn_P             PPC_BIT32(0)
> >> +#define END_W1_ESn_Q             PPC_BIT32(1)
> >> +#define END_W1_ESe               PPC_BITMASK32(2, 3)
> >> +#define END_W1_ESe_P             PPC_BIT32(2)
> >> +#define END_W1_ESe_Q             PPC_BIT32(3)
> >> +#define END_W1_GENERATION        PPC_BIT32(9)
> >> +#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
> >> +        uint32_t        w2;
> >> +#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
> >> +#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
> >> +        uint32_t        w3;
> >> +#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
> >> +        uint32_t        w4;
> >> +#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
> >> +#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
> >> +        uint32_t        w5;
> >> +#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
> >> +        uint32_t        w6;
> >> +#define END_W6_FORMAT_BIT        PPC_BIT32(8)
> >> +#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
> >> +#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
> >> +        uint32_t        w7;
> >> +#define END_W7_F0_IGNORE         PPC_BIT32(0)
> >> +#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
> >> +#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
> >> +#define END_W7_F1_WAKEZ          PPC_BIT32(0)
> >> +#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
> >> +} XiveEND;
> >> +
> >>  #endif /* PPC_XIVE_REGS_H */
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index c4c90a25758e..9cb001e7b540 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = {
> >>      .class_init    = xive_source_class_init,
> >>  };
> >>  
> >> +/*
> >> + * XiveEND helpers
> >> + */
> >> +
> >> +void xive_end_reset(XiveEND *end)
> >> +{
> >> +    memset(end, 0, sizeof(*end));
> >> +
> >> +    /* switch off the escalation and notification ESBs */
> >> +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
> > 
> > It's not obvious to me what circumstances this would be called under.
> > Since the ENDs are in system memory, a memset() seems like an odd
> > thing for (virtual) hardware to be doing to it.
> 
> It makes sense on sPAPR if one day some OS starts using the END ESBs for 
> further coalescing of the events. None does for now but I have added the 
> model though.

Hrm, I think that belongs in PAPR specific code.  It's not really part
of the router model - it's the PAPR stuff configuring the router at
reset time (much as firmware would configure it at reset time for bare
metal).

> 
> >> +}
> >> +
> >> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width,
> >> +                                          Monitor *mon)
> >> +{
> >> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> >> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> >> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> >> +    uint32_t qentries = 1 << (qsize + 10);
> >> +    int i;
> >> +
> >> +    /*
> >> +     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
> >> +     */
> >> +    monitor_printf(mon, " [ ");
> >> +    qindex = (qindex - (width - 1)) & (qentries - 1);
> >> +    for (i = 0; i < width; i++) {
> >> +        uint64_t qaddr = qaddr_base + (qindex << 2);
> >> +        uint32_t qdata = -1;
> >> +
> >> +        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
> >> +                            sizeof(qdata))) {
> >> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
> >> +                          HWADDR_PRIx "\n", qaddr);
> >> +            return;
> >> +        }
> >> +        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
> >> +                       be32_to_cpu(qdata));
> >> +        qindex = (qindex + 1) & (qentries - 1);
> >> +    }
> >> +    monitor_printf(mon, "]\n");
> >> +}
> >> +
> >> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
> >> +{
> >> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> >> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> >> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
> >> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> >> +    uint32_t qentries = 1 << (qsize + 10);
> >> +
> >> +    uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6);
> >> +    uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7);
> >> +
> >> +    if (!(end->w0 & END_W0_VALID)) {
> >> +        return;
> >> +    }
> >> +
> >> +    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
> >> +                   "% 6d/%5d ^%d", end_idx,
> >> +                   end->w0 & END_W0_VALID ? 'v' : '-',
> >> +                   end->w0 & END_W0_ENQUEUE ? 'q' : '-',
> >> +                   end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-',
> >> +                   end->w0 & END_W0_BACKLOG ? 'b' : '-',
> >> +                   end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-',
> >> +                   priority, nvt, qaddr_base, qindex, qentries, qgen);
> >> +
> >> +    xive_end_queue_pic_print_info(end, 6, mon);
> >> +}
> >> +
> >> +static void xive_end_push(XiveEND *end, uint32_t data)
> > 
> > s/push/enqueue/ please, "push" suggests a stack.  (Not to mention that
> > "push" and "pull" are used as terms elsewhere in XIVE).
> 
> yes. you are right. I will change.
> 
> >> +{
> >> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
> >> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
> >> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
> >> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
> >> +
> >> +    uint64_t qaddr = qaddr_base + (qindex << 2);
> >> +    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
> >> +    uint32_t qentries = 1 << (qsize + 10);
> >> +
> >> +    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
> >> +                      HWADDR_PRIx "\n", qaddr);
> >> +        return;
> >> +    }
> >> +
> >> +    qindex = (qindex + 1) & (qentries - 1);
> >> +    if (qindex == 0) {
> >> +        qgen ^= 1;
> >> +        end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen);
> >> +    }
> >> +    end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex);
> >> +}
> >> +
> >>  /*
> >>   * XIVE Router (aka. Virtualization Controller or IVRE)
> >>   */
> >> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
> >>      return xrc->set_eas(xrtr, lisn, eas);
> >>  }
> >>  
> >> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                        XiveEND *end)
> >> +{
> >> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> >> +
> >> +   return xrc->get_end(xrtr, end_blk, end_idx, end);
> >> +}
> >> +
> >> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
> >> +                        XiveEND *end)
> >> +{
> >> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
> >> +
> >> +   return xrc->set_end(xrtr, end_blk, end_idx, end);
> >> +}
> >> +
> >> +/*
> >> + * An END trigger can come from an event trigger (IPI or HW) or from
> >> + * another chip. We don't model the PowerBus but the END trigger
> >> + * message has the same parameters than in the function below.
> >> + */
> >> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
> >> +                                   uint32_t end_idx, uint32_t end_data)
> >> +{
> >> +    XiveEND end;
> >> +    uint8_t priority;
> >> +    uint8_t format;
> >> +
> >> +    /* END cache lookup */
> >> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
> >> +                      end_idx);
> >> +        return;
> >> +    }
> >> +
> >> +    if (!(end.w0 & END_W0_VALID)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
> >> +                      end_blk, end_idx);
> >> +        return;
> >> +    }
> >> +
> >> +    if (end.w0 & END_W0_ENQUEUE) {
> >> +        xive_end_push(&end, end_data);
> >> +        xive_router_set_end(xrtr, end_blk, end_idx, &end);
> >> +    }
> >> +
> >> +    /*
> >> +     * The W7 format depends on the F bit in W6. It defines the type
> >> +     * of the notification :
> >> +     *
> >> +     *   F=0 : single or multiple NVT notification
> >> +     *   F=1 : User level Event-Based Branch (EBB) notification, no
> >> +     *         priority
> >> +     */
> >> +    format = GETFIELD(END_W6_FORMAT_BIT, end.w6);
> >> +    priority = GETFIELD(END_W7_F0_PRIORITY, end.w7);
> >> +
> >> +    /* The END is masked */
> >> +    if (format == 0 && priority == 0xff) {
> >> +        return;
> >> +    }
> >> +
> >> +    /*
> >> +     * Check the END ESn (Event State Buffer for notification) for
> >> +     * even futher coalescing in the Router
> >> +     */
> >> +    if (!(end.w0 & END_W0_UCOND_NOTIFY)) {
> >> +        qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n");
> >> +        return;
> >> +    }
> >> +
> >> +    /*
> >> +     * Follows IVPE notification
> >> +     */
> >> +}
> >> +
> >>  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
> >>  {
> >>      XiveRouter *xrtr = XIVE_ROUTER(xf);
> >> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
> >>          return;
> >>      }
> >>  
> >> -    /* The IVRE has a State Bit Cache for its internal sources which
> >> -     * is also involed at this point. We skip the SBC lookup because
> >> -     * the state bits of the sources are modeled internally in QEMU.
> >> +    /* The IVRE checks the State Bit Cache at this point. We skip the
> >> +     * SBC lookup because the state bits of the sources are modeled
> >> +     * internally in QEMU.
> > 
> > Replacing a comment about something we're not doing with a different
> > comment about something we're not doing doesn't seem very useful.
> > Maybe fold these together into one patch or the other.
> 
> That's me rephrasing. it should be indeed in the previous patch
> 
> Thanks,
> 
> C.
> 
> >>       */
> >>  
> >>      if (!(eas.w & EAS_VALID)) {
> >> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
> >>          /* Notification completed */
> >>          return;
> >>      }
> >> +
> >> +    /*
> >> +     * The event trigger becomes an END trigger
> >> +     */
> >> +    xive_router_end_notify(xrtr,
> >> +                           GETFIELD(EAS_END_BLOCK, eas.w),
> >> +                           GETFIELD(EAS_END_INDEX, eas.w),
> >> +                           GETFIELD(EAS_END_DATA,  eas.w));
> >>  }
> >>  
> >>  static Property xive_router_properties[] = {
> > 
>
Cédric Le Goater Nov. 23, 2018, 11:01 a.m. UTC | #6
On 11/23/18 5:35 AM, David Gibson wrote:
> On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote:
>> On 11/22/18 5:41 AM, David Gibson wrote:
>>> On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater wrote:
>>>> To complete the event routing, the IVRE sub-engine uses an internal
>>>> table containing Event Notification Descriptor (END) structures.
>>>>
>>>> An END specifies on which Event Queue (EQ) the event notification
>>>> data, defined in the associated EAS, should be posted when an
>>>> exception occurs. It also defines which Notification Virtual Target
>>>> (NVT) should be notified.
>>>>
>>>> The Event Queue is a memory page provided by the O/S defining a
>>>> circular buffer, one per server and priority couple, containing Event
>>>> Queue entries. These are 4 bytes long, the first bit being a
>>>> 'generation' bit and the 31 following bits the END Data field. They
>>>> are pulled by the O/S when the exception occurs.
>>>>
>>>> The END Data field is a way to set an invariant logical event source
>>>> number for an IRQ. It is set with the H_INT_SET_SOURCE_CONFIG hcall
>>>> when the EISN flag is used.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>  include/hw/ppc/xive.h      |  18 ++++
>>>>  include/hw/ppc/xive_regs.h |  48 ++++++++++
>>>>  hw/intc/xive.c             | 185 ++++++++++++++++++++++++++++++++++++-
>>>>  3 files changed, 248 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index 5a0696366577..ce62aaf28343 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -193,11 +193,29 @@ typedef struct XiveRouterClass {
>>>>      /* XIVE table accessors */
>>>>      int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>>      int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                   XiveEND *end);
>>>> +    int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                   XiveEND *end);
>>>
>>> Hrm.  So unlike the EAS, which is basically just a word, the END is a
>>> pretty large structure.  
>>
>> yes. and so will be the NVT.
>>
>>> It's unclear here if get/set are expected to copy the whole thing out 
>>> and in, 
>>
>> That's the plan. 
> 
> Yeah, I don't think that's a good idea.  In some cases the updates are
> on hot paths, so the extra copy isn't good, and more importantly it
> makes it look like an atomic update, but it's not really.
> 
> Well... I guess it probably is because of the BQL, but I'd prefer not
> to rely on that excessively.
> 
>> What I had in mind are memory accessors to the XIVE structures, which 
>> are local to QEMU for sPAPR and in the guest RAM for PowerNV (Please
>> take a look at the XIVE PowerNV model).
>>
>>> or if get give you a pointer into a "live" structure 
>>
>> no
>>
>>> and set just does any necessary barriers after an update.
>> that would be too complex for the PowerNV model I think. There is a cache
>> in between the software running on the (QEMU) machine and the XIVE HW but
>> it would be hard to handle. 
>>  
>>> Really, for a non-atomic value like this, I'm not sure get/set is the
>>> right model.
>>
>> ok. we need something to get them out and in.
> 
> I've thought about this a bit more.  What I think might work is
> "end_read" and "end_write" callbacks, which take a word number in
> addition to the parameters you have already

ouch. This is not going to simplify the routing algo where all the get and
set are done. The whole END is needed in the END trigger. So we will need
routines to get and set the whole END.

The NVT is not used too much for the moment. 

>>> Also as I understand it nearly all the indices in XIVE are broken into
>>> block/index.  Is there a reason those are folded together into lisn
>>> for the EAS, but not for the END?
>>
>> The indexing of the EAT is global to the sytem and the index defines
>> which blk to use. The IRQ source numbers on the powerbus are architected 
>> to be :
>>
>>     #define XIVE_SRCNO(blk, idx)      ((uint32_t)(blk) << 28 | (idx))
>>
>> and XIVE can use different strategies to identify the XIVE IC in charge 
>> of routing. It can be a one-to-one chip to block relation as skiboot does. 
>> Using a block scope table is possible also. Our model only supports one 
>> block per chip and some shortcuts are taken but not that much in fact.
>>  
>> Remote access to the XIVE structures of another chip are done through 
>> MMIO (not modeled in PowerNV) and the blkid is used to partition the MMIO 
>> regions. Being local is better for performance because the END and NVT 
>> tables have a strong relation with the XIVE subengines using them 
>> (VC and PC).  
>>
>> May be, Ben can clarified it this is badly explained.
> 
> Right.. I think I understand what the blocks are all about.
> 
> But my question is, why encode the block and index together for the
> EAS, but separately for the END?

mostly because when the PowerNV devices forward an notification event, they 
do so by writing the full IRQ number on a MMIO notify port and this number
reaches the XiveRouter routing algo without being modified. On sPAPR it's
even simpler.

But we could decode the full IRQ number in the router before querying the 
associated EAS. The EAS accessor would then use the same interface. 
I will look into it. Not a big change. I think. 

> 
>>
>>>>  } XiveRouterClass;
>>>>  
>>>>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>>>>  
>>>>  int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>>  int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
>>>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                        XiveEND *end);
>>>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                        XiveEND *end);
>>>> +
>>>> +/*
>>>> + * For legacy compatibility, the exceptions define up to 256 different
>>>> + * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
>>>> + * and the least favored level 0xFF.
>>>> + */
>>>> +#define XIVE_PRIORITY_MAX  7
>>>> +
>>>> +void xive_end_reset(XiveEND *end);
>>>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
>>>>  
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
>>>> index 12499b33614c..f97fb2b90bee 100644
>>>> --- a/include/hw/ppc/xive_regs.h
>>>> +++ b/include/hw/ppc/xive_regs.h
>>>> @@ -28,4 +28,52 @@ typedef struct XiveEAS {
>>>>  #define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
>>>>  } XiveEAS;
>>>>  
>>>> +/* Event Notification Descriptor (END) */
>>>> +typedef struct XiveEND {
>>>> +        uint32_t        w0;
>>>> +#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
>>>> +#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
>>>> +#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
>>>> +#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
>>>> +#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
>>>> +#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
>>>> +#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
>>>> +#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
>>>> +#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
>>>> +#define END_W0_SW0               PPC_BIT32(16)
>>>> +#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
>>>> +#define END_QSIZE_4K             0
>>>> +#define END_QSIZE_64K            4
>>>> +#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
>>>> +        uint32_t        w1;
>>>> +#define END_W1_ESn               PPC_BITMASK32(0, 1)
>>>> +#define END_W1_ESn_P             PPC_BIT32(0)
>>>> +#define END_W1_ESn_Q             PPC_BIT32(1)
>>>> +#define END_W1_ESe               PPC_BITMASK32(2, 3)
>>>> +#define END_W1_ESe_P             PPC_BIT32(2)
>>>> +#define END_W1_ESe_Q             PPC_BIT32(3)
>>>> +#define END_W1_GENERATION        PPC_BIT32(9)
>>>> +#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
>>>> +        uint32_t        w2;
>>>> +#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
>>>> +#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
>>>> +        uint32_t        w3;
>>>> +#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
>>>> +        uint32_t        w4;
>>>> +#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
>>>> +#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
>>>> +        uint32_t        w5;
>>>> +#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
>>>> +        uint32_t        w6;
>>>> +#define END_W6_FORMAT_BIT        PPC_BIT32(8)
>>>> +#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
>>>> +#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
>>>> +        uint32_t        w7;
>>>> +#define END_W7_F0_IGNORE         PPC_BIT32(0)
>>>> +#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
>>>> +#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
>>>> +#define END_W7_F1_WAKEZ          PPC_BIT32(0)
>>>> +#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
>>>> +} XiveEND;
>>>> +
>>>>  #endif /* PPC_XIVE_REGS_H */
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> index c4c90a25758e..9cb001e7b540 100644
>>>> --- a/hw/intc/xive.c
>>>> +++ b/hw/intc/xive.c
>>>> @@ -442,6 +442,101 @@ static const TypeInfo xive_source_info = {
>>>>      .class_init    = xive_source_class_init,
>>>>  };
>>>>  
>>>> +/*
>>>> + * XiveEND helpers
>>>> + */
>>>> +
>>>> +void xive_end_reset(XiveEND *end)
>>>> +{
>>>> +    memset(end, 0, sizeof(*end));
>>>> +
>>>> +    /* switch off the escalation and notification ESBs */
>>>> +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
>>>
>>> It's not obvious to me what circumstances this would be called under.
>>> Since the ENDs are in system memory, a memset() seems like an odd
>>> thing for (virtual) hardware to be doing to it.
>>
>> It makes sense on sPAPR if one day some OS starts using the END ESBs for 
>> further coalescing of the events. None does for now but I have added the 
>> model though.
> 
> Hrm, I think that belongs in PAPR specific code.  It's not really part
> of the router model - it's the PAPR stuff configuring the router at
> reset time (much as firmware would configure it at reset time for bare
> metal).

This is true this routine is only used by the H_INT_RESET hcall and by 
the reset handler of the sPAPR controller model. But it made sense to put 
this END helper routine with the other END routines. Don't you think so ? 

C.

> 
>>
>>>> +}
>>>> +
>>>> +static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width,
>>>> +                                          Monitor *mon)
>>>> +{
>>>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>>>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>>>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>>>> +    uint32_t qentries = 1 << (qsize + 10);
>>>> +    int i;
>>>> +
>>>> +    /*
>>>> +     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
>>>> +     */
>>>> +    monitor_printf(mon, " [ ");
>>>> +    qindex = (qindex - (width - 1)) & (qentries - 1);
>>>> +    for (i = 0; i < width; i++) {
>>>> +        uint64_t qaddr = qaddr_base + (qindex << 2);
>>>> +        uint32_t qdata = -1;
>>>> +
>>>> +        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
>>>> +                            sizeof(qdata))) {
>>>> +            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
>>>> +                          HWADDR_PRIx "\n", qaddr);
>>>> +            return;
>>>> +        }
>>>> +        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
>>>> +                       be32_to_cpu(qdata));
>>>> +        qindex = (qindex + 1) & (qentries - 1);
>>>> +    }
>>>> +    monitor_printf(mon, "]\n");
>>>> +}
>>>> +
>>>> +void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
>>>> +{
>>>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>>>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>>>> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
>>>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>>>> +    uint32_t qentries = 1 << (qsize + 10);
>>>> +
>>>> +    uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6);
>>>> +    uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7);
>>>> +
>>>> +    if (!(end->w0 & END_W0_VALID)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
>>>> +                   "% 6d/%5d ^%d", end_idx,
>>>> +                   end->w0 & END_W0_VALID ? 'v' : '-',
>>>> +                   end->w0 & END_W0_ENQUEUE ? 'q' : '-',
>>>> +                   end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-',
>>>> +                   end->w0 & END_W0_BACKLOG ? 'b' : '-',
>>>> +                   end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-',
>>>> +                   priority, nvt, qaddr_base, qindex, qentries, qgen);
>>>> +
>>>> +    xive_end_queue_pic_print_info(end, 6, mon);
>>>> +}
>>>> +
>>>> +static void xive_end_push(XiveEND *end, uint32_t data)
>>>
>>> s/push/enqueue/ please, "push" suggests a stack.  (Not to mention that
>>> "push" and "pull" are used as terms elsewhere in XIVE).
>>
>> yes. you are right. I will change.
>>
>>>> +{
>>>> +    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
>>>> +    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
>>>> +    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
>>>> +    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
>>>> +
>>>> +    uint64_t qaddr = qaddr_base + (qindex << 2);
>>>> +    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
>>>> +    uint32_t qentries = 1 << (qsize + 10);
>>>> +
>>>> +    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
>>>> +                      HWADDR_PRIx "\n", qaddr);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    qindex = (qindex + 1) & (qentries - 1);
>>>> +    if (qindex == 0) {
>>>> +        qgen ^= 1;
>>>> +        end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen);
>>>> +    }
>>>> +    end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex);
>>>> +}
>>>> +
>>>>  /*
>>>>   * XIVE Router (aka. Virtualization Controller or IVRE)
>>>>   */
>>>> @@ -460,6 +555,82 @@ int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
>>>>      return xrc->set_eas(xrtr, lisn, eas);
>>>>  }
>>>>  
>>>> +int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                        XiveEND *end)
>>>> +{
>>>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>>>> +
>>>> +   return xrc->get_end(xrtr, end_blk, end_idx, end);
>>>> +}
>>>> +
>>>> +int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
>>>> +                        XiveEND *end)
>>>> +{
>>>> +   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
>>>> +
>>>> +   return xrc->set_end(xrtr, end_blk, end_idx, end);
>>>> +}
>>>> +
>>>> +/*
>>>> + * An END trigger can come from an event trigger (IPI or HW) or from
>>>> + * another chip. We don't model the PowerBus but the END trigger
>>>> + * message has the same parameters than in the function below.
>>>> + */
>>>> +static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
>>>> +                                   uint32_t end_idx, uint32_t end_data)
>>>> +{
>>>> +    XiveEND end;
>>>> +    uint8_t priority;
>>>> +    uint8_t format;
>>>> +
>>>> +    /* END cache lookup */
>>>> +    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
>>>> +                      end_idx);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (!(end.w0 & END_W0_VALID)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
>>>> +                      end_blk, end_idx);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (end.w0 & END_W0_ENQUEUE) {
>>>> +        xive_end_push(&end, end_data);
>>>> +        xive_router_set_end(xrtr, end_blk, end_idx, &end);
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * The W7 format depends on the F bit in W6. It defines the type
>>>> +     * of the notification :
>>>> +     *
>>>> +     *   F=0 : single or multiple NVT notification
>>>> +     *   F=1 : User level Event-Based Branch (EBB) notification, no
>>>> +     *         priority
>>>> +     */
>>>> +    format = GETFIELD(END_W6_FORMAT_BIT, end.w6);
>>>> +    priority = GETFIELD(END_W7_F0_PRIORITY, end.w7);
>>>> +
>>>> +    /* The END is masked */
>>>> +    if (format == 0 && priority == 0xff) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Check the END ESn (Event State Buffer for notification) for
>>>> +     * even futher coalescing in the Router
>>>> +     */
>>>> +    if (!(end.w0 & END_W0_UCOND_NOTIFY)) {
>>>> +        qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * Follows IVPE notification
>>>> +     */
>>>> +}
>>>> +
>>>>  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>>>  {
>>>>      XiveRouter *xrtr = XIVE_ROUTER(xf);
>>>> @@ -471,9 +642,9 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>>>          return;
>>>>      }
>>>>  
>>>> -    /* The IVRE has a State Bit Cache for its internal sources which
>>>> -     * is also involed at this point. We skip the SBC lookup because
>>>> -     * the state bits of the sources are modeled internally in QEMU.
>>>> +    /* The IVRE checks the State Bit Cache at this point. We skip the
>>>> +     * SBC lookup because the state bits of the sources are modeled
>>>> +     * internally in QEMU.
>>>
>>> Replacing a comment about something we're not doing with a different
>>> comment about something we're not doing doesn't seem very useful.
>>> Maybe fold these together into one patch or the other.
>>
>> That's me rephrasing. it should be indeed in the previous patch
>>
>> Thanks,
>>
>> C.
>>
>>>>       */
>>>>  
>>>>      if (!(eas.w & EAS_VALID)) {
>>>> @@ -485,6 +656,14 @@ static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
>>>>          /* Notification completed */
>>>>          return;
>>>>      }
>>>> +
>>>> +    /*
>>>> +     * The event trigger becomes an END trigger
>>>> +     */
>>>> +    xive_router_end_notify(xrtr,
>>>> +                           GETFIELD(EAS_END_BLOCK, eas.w),
>>>> +                           GETFIELD(EAS_END_INDEX, eas.w),
>>>> +                           GETFIELD(EAS_END_DATA,  eas.w));
>>>>  }
>>>>  
>>>>  static Property xive_router_properties[] = {
>>>
>>
>
David Gibson Nov. 29, 2018, 4:46 a.m. UTC | #7
On Fri, Nov 23, 2018 at 12:01:27PM +0100, Cédric Le Goater wrote:
> On 11/23/18 5:35 AM, David Gibson wrote:
> > On Thu, Nov 22, 2018 at 10:47:44PM +0100, Cédric Le Goater wrote:
> >> On 11/22/18 5:41 AM, David Gibson wrote:
> >>> On Fri, Nov 16, 2018 at 11:56:58AM +0100, Cédric Le Goater
> wrote:
[snip]
> >>>> +/*
> >>>> + * XiveEND helpers
> >>>> + */
> >>>> +
> >>>> +void xive_end_reset(XiveEND *end)
> >>>> +{
> >>>> +    memset(end, 0, sizeof(*end));
> >>>> +
> >>>> +    /* switch off the escalation and notification ESBs */
> >>>> +    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
> >>>
> >>> It's not obvious to me what circumstances this would be called under.
> >>> Since the ENDs are in system memory, a memset() seems like an odd
> >>> thing for (virtual) hardware to be doing to it.
> >>
> >> It makes sense on sPAPR if one day some OS starts using the END ESBs for 
> >> further coalescing of the events. None does for now but I have added the 
> >> model though.
> > 
> > Hrm, I think that belongs in PAPR specific code.  It's not really part
> > of the router model - it's the PAPR stuff configuring the router at
> > reset time (much as firmware would configure it at reset time for bare
> > metal).
> 
> This is true this routine is only used by the H_INT_RESET hcall and by 
> the reset handler of the sPAPR controller model. But it made sense to put 
> this END helper routine with the other END routines. Don't you think so ? 

Actually, no.  In real hardware this would be handled by a different
component - the system firmware would do this setup of the ENDs as it
configures the XIVE.  So it makes sense to do this in a separate
component for PAPR as well.  In this case that's another piece of qemu
(the spapr stuff) rather than being within the VM, but the difference
isn't important to the END handling itself.
diff mbox series

Patch

diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index 5a0696366577..ce62aaf28343 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -193,11 +193,29 @@  typedef struct XiveRouterClass {
     /* XIVE table accessors */
     int (*get_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
     int (*set_eas)(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+    int (*get_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                   XiveEND *end);
+    int (*set_end)(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                   XiveEND *end);
 } XiveRouterClass;
 
 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
 
 int xive_router_get_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
 int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas);
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end);
+int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end);
+
+/*
+ * For legacy compatibility, the exceptions define up to 256 different
+ * priorities. P9 implements only 9 levels : 8 active levels [0 - 7]
+ * and the least favored level 0xFF.
+ */
+#define XIVE_PRIORITY_MAX  7
+
+void xive_end_reset(XiveEND *end);
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon);
 
 #endif /* PPC_XIVE_H */
diff --git a/include/hw/ppc/xive_regs.h b/include/hw/ppc/xive_regs.h
index 12499b33614c..f97fb2b90bee 100644
--- a/include/hw/ppc/xive_regs.h
+++ b/include/hw/ppc/xive_regs.h
@@ -28,4 +28,52 @@  typedef struct XiveEAS {
 #define EAS_END_DATA    PPC_BITMASK(33, 63)      /* Data written to the END */
 } XiveEAS;
 
+/* Event Notification Descriptor (END) */
+typedef struct XiveEND {
+        uint32_t        w0;
+#define END_W0_VALID             PPC_BIT32(0) /* "v" bit */
+#define END_W0_ENQUEUE           PPC_BIT32(1) /* "q" bit */
+#define END_W0_UCOND_NOTIFY      PPC_BIT32(2) /* "n" bit */
+#define END_W0_BACKLOG           PPC_BIT32(3) /* "b" bit */
+#define END_W0_PRECL_ESC_CTL     PPC_BIT32(4) /* "p" bit */
+#define END_W0_ESCALATE_CTL      PPC_BIT32(5) /* "e" bit */
+#define END_W0_UNCOND_ESCALATE   PPC_BIT32(6) /* "u" bit - DD2.0 */
+#define END_W0_SILENT_ESCALATE   PPC_BIT32(7) /* "s" bit - DD2.0 */
+#define END_W0_QSIZE             PPC_BITMASK32(12, 15)
+#define END_W0_SW0               PPC_BIT32(16)
+#define END_W0_FIRMWARE          END_W0_SW0 /* Owned by FW */
+#define END_QSIZE_4K             0
+#define END_QSIZE_64K            4
+#define END_W0_HWDEP             PPC_BITMASK32(24, 31)
+        uint32_t        w1;
+#define END_W1_ESn               PPC_BITMASK32(0, 1)
+#define END_W1_ESn_P             PPC_BIT32(0)
+#define END_W1_ESn_Q             PPC_BIT32(1)
+#define END_W1_ESe               PPC_BITMASK32(2, 3)
+#define END_W1_ESe_P             PPC_BIT32(2)
+#define END_W1_ESe_Q             PPC_BIT32(3)
+#define END_W1_GENERATION        PPC_BIT32(9)
+#define END_W1_PAGE_OFF          PPC_BITMASK32(10, 31)
+        uint32_t        w2;
+#define END_W2_MIGRATION_REG     PPC_BITMASK32(0, 3)
+#define END_W2_OP_DESC_HI        PPC_BITMASK32(4, 31)
+        uint32_t        w3;
+#define END_W3_OP_DESC_LO        PPC_BITMASK32(0, 31)
+        uint32_t        w4;
+#define END_W4_ESC_END_BLOCK     PPC_BITMASK32(4, 7)
+#define END_W4_ESC_END_INDEX     PPC_BITMASK32(8, 31)
+        uint32_t        w5;
+#define END_W5_ESC_END_DATA      PPC_BITMASK32(1, 31)
+        uint32_t        w6;
+#define END_W6_FORMAT_BIT        PPC_BIT32(8)
+#define END_W6_NVT_BLOCK         PPC_BITMASK32(9, 12)
+#define END_W6_NVT_INDEX         PPC_BITMASK32(13, 31)
+        uint32_t        w7;
+#define END_W7_F0_IGNORE         PPC_BIT32(0)
+#define END_W7_F0_BLK_GROUPING   PPC_BIT32(1)
+#define END_W7_F0_PRIORITY       PPC_BITMASK32(8, 15)
+#define END_W7_F1_WAKEZ          PPC_BIT32(0)
+#define END_W7_F1_LOG_SERVER_ID  PPC_BITMASK32(1, 31)
+} XiveEND;
+
 #endif /* PPC_XIVE_REGS_H */
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index c4c90a25758e..9cb001e7b540 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -442,6 +442,101 @@  static const TypeInfo xive_source_info = {
     .class_init    = xive_source_class_init,
 };
 
+/*
+ * XiveEND helpers
+ */
+
+void xive_end_reset(XiveEND *end)
+{
+    memset(end, 0, sizeof(*end));
+
+    /* switch off the escalation and notification ESBs */
+    end->w1 = END_W1_ESe_Q | END_W1_ESn_Q;
+}
+
+static void xive_end_queue_pic_print_info(XiveEND *end, uint32_t width,
+                                          Monitor *mon)
+{
+    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
+    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
+    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
+    uint32_t qentries = 1 << (qsize + 10);
+    int i;
+
+    /*
+     * print out the [ (qindex - (width - 1)) .. (qindex + 1)] window
+     */
+    monitor_printf(mon, " [ ");
+    qindex = (qindex - (width - 1)) & (qentries - 1);
+    for (i = 0; i < width; i++) {
+        uint64_t qaddr = qaddr_base + (qindex << 2);
+        uint32_t qdata = -1;
+
+        if (dma_memory_read(&address_space_memory, qaddr, &qdata,
+                            sizeof(qdata))) {
+            qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to read EQ @0x%"
+                          HWADDR_PRIx "\n", qaddr);
+            return;
+        }
+        monitor_printf(mon, "%s%08x ", i == width - 1 ? "^" : "",
+                       be32_to_cpu(qdata));
+        qindex = (qindex + 1) & (qentries - 1);
+    }
+    monitor_printf(mon, "]\n");
+}
+
+void xive_end_pic_print_info(XiveEND *end, uint32_t end_idx, Monitor *mon)
+{
+    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
+    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
+    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
+    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
+    uint32_t qentries = 1 << (qsize + 10);
+
+    uint32_t nvt = GETFIELD(END_W6_NVT_INDEX, end->w6);
+    uint8_t priority = GETFIELD(END_W7_F0_PRIORITY, end->w7);
+
+    if (!(end->w0 & END_W0_VALID)) {
+        return;
+    }
+
+    monitor_printf(mon, "  %08x %c%c%c%c%c prio:%d nvt:%04x eq:@%08"PRIx64
+                   "% 6d/%5d ^%d", end_idx,
+                   end->w0 & END_W0_VALID ? 'v' : '-',
+                   end->w0 & END_W0_ENQUEUE ? 'q' : '-',
+                   end->w0 & END_W0_UCOND_NOTIFY ? 'n' : '-',
+                   end->w0 & END_W0_BACKLOG ? 'b' : '-',
+                   end->w0 & END_W0_ESCALATE_CTL ? 'e' : '-',
+                   priority, nvt, qaddr_base, qindex, qentries, qgen);
+
+    xive_end_queue_pic_print_info(end, 6, mon);
+}
+
+static void xive_end_push(XiveEND *end, uint32_t data)
+{
+    uint64_t qaddr_base = (((uint64_t)(end->w2 & 0x0fffffff)) << 32) | end->w3;
+    uint32_t qsize = GETFIELD(END_W0_QSIZE, end->w0);
+    uint32_t qindex = GETFIELD(END_W1_PAGE_OFF, end->w1);
+    uint32_t qgen = GETFIELD(END_W1_GENERATION, end->w1);
+
+    uint64_t qaddr = qaddr_base + (qindex << 2);
+    uint32_t qdata = cpu_to_be32((qgen << 31) | (data & 0x7fffffff));
+    uint32_t qentries = 1 << (qsize + 10);
+
+    if (dma_memory_write(&address_space_memory, qaddr, &qdata, sizeof(qdata))) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: failed to write END data @0x%"
+                      HWADDR_PRIx "\n", qaddr);
+        return;
+    }
+
+    qindex = (qindex + 1) & (qentries - 1);
+    if (qindex == 0) {
+        qgen ^= 1;
+        end->w1 = SETFIELD(END_W1_GENERATION, end->w1, qgen);
+    }
+    end->w1 = SETFIELD(END_W1_PAGE_OFF, end->w1, qindex);
+}
+
 /*
  * XIVE Router (aka. Virtualization Controller or IVRE)
  */
@@ -460,6 +555,82 @@  int xive_router_set_eas(XiveRouter *xrtr, uint32_t lisn, XiveEAS *eas)
     return xrc->set_eas(xrtr, lisn, eas);
 }
 
+int xive_router_get_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->get_end(xrtr, end_blk, end_idx, end);
+}
+
+int xive_router_set_end(XiveRouter *xrtr, uint8_t end_blk, uint32_t end_idx,
+                        XiveEND *end)
+{
+   XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(xrtr);
+
+   return xrc->set_end(xrtr, end_blk, end_idx, end);
+}
+
+/*
+ * An END trigger can come from an event trigger (IPI or HW) or from
+ * another chip. We don't model the PowerBus but the END trigger
+ * message has the same parameters than in the function below.
+ */
+static void xive_router_end_notify(XiveRouter *xrtr, uint8_t end_blk,
+                                   uint32_t end_idx, uint32_t end_data)
+{
+    XiveEND end;
+    uint8_t priority;
+    uint8_t format;
+
+    /* END cache lookup */
+    if (xive_router_get_end(xrtr, end_blk, end_idx, &end)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: No END %x/%x\n", end_blk,
+                      end_idx);
+        return;
+    }
+
+    if (!(end.w0 & END_W0_VALID)) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: END %x/%x is invalid\n",
+                      end_blk, end_idx);
+        return;
+    }
+
+    if (end.w0 & END_W0_ENQUEUE) {
+        xive_end_push(&end, end_data);
+        xive_router_set_end(xrtr, end_blk, end_idx, &end);
+    }
+
+    /*
+     * The W7 format depends on the F bit in W6. It defines the type
+     * of the notification :
+     *
+     *   F=0 : single or multiple NVT notification
+     *   F=1 : User level Event-Based Branch (EBB) notification, no
+     *         priority
+     */
+    format = GETFIELD(END_W6_FORMAT_BIT, end.w6);
+    priority = GETFIELD(END_W7_F0_PRIORITY, end.w7);
+
+    /* The END is masked */
+    if (format == 0 && priority == 0xff) {
+        return;
+    }
+
+    /*
+     * Check the END ESn (Event State Buffer for notification) for
+     * even futher coalescing in the Router
+     */
+    if (!(end.w0 & END_W0_UCOND_NOTIFY)) {
+        qemu_log_mask(LOG_UNIMP, "XIVE: !UCOND_NOTIFY not implemented\n");
+        return;
+    }
+
+    /*
+     * Follows IVPE notification
+     */
+}
+
 static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
 {
     XiveRouter *xrtr = XIVE_ROUTER(xf);
@@ -471,9 +642,9 @@  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
         return;
     }
 
-    /* The IVRE has a State Bit Cache for its internal sources which
-     * is also involed at this point. We skip the SBC lookup because
-     * the state bits of the sources are modeled internally in QEMU.
+    /* The IVRE checks the State Bit Cache at this point. We skip the
+     * SBC lookup because the state bits of the sources are modeled
+     * internally in QEMU.
      */
 
     if (!(eas.w & EAS_VALID)) {
@@ -485,6 +656,14 @@  static void xive_router_notify(XiveFabric *xf, uint32_t lisn)
         /* Notification completed */
         return;
     }
+
+    /*
+     * The event trigger becomes an END trigger
+     */
+    xive_router_end_notify(xrtr,
+                           GETFIELD(EAS_END_BLOCK, eas.w),
+                           GETFIELD(EAS_END_INDEX, eas.w),
+                           GETFIELD(EAS_END_DATA,  eas.w));
 }
 
 static Property xive_router_properties[] = {