diff mbox series

[v5,01/36] ppc/xive: introduce a XIVE interrupt source model

Message ID 20181116105729.23240-2-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:56 a.m. UTC
The first sub-engine of the overall XIVE architecture is the Interrupt
Virtualization Source Engine (IVSE). An IVSE can be integrated into
another logic, like in a PCI PHB or in the main interrupt controller
to manage IPIs.

Each IVSE instance is associated with an Event State Buffer (ESB) that
contains a two bit state entry for each possible event source. When an
event is signaled to the IVSE, by MMIO or some other means, the
associated interrupt state bits are fetched from the ESB and
modified. Depending on the resulting ESB state, the event is forwarded
to the IVRE sub-engine of the controller doing the routing.

Each supported ESB entry is associated with either a single or a
even/odd pair of pages which provides commands to manage the source:
to EOI, to turn off the source for instance.

On a sPAPR machine, the O/S will obtain the page address of the ESB
entry associated with a source and its characteristic using the
H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is used.

The xive_source_notify() routine is in charge forwarding the source
event notification to the routing engine. It will be filled later on.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 default-configs/ppc64-softmmu.mak |   1 +
 include/hw/ppc/xive.h             | 130 ++++++++++
 hw/intc/xive.c                    | 379 ++++++++++++++++++++++++++++++
 hw/intc/Makefile.objs             |   1 +
 4 files changed, 511 insertions(+)
 create mode 100644 include/hw/ppc/xive.h
 create mode 100644 hw/intc/xive.c

Comments

David Gibson Nov. 22, 2018, 3:05 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:56:54AM +0100, Cédric Le Goater wrote:
> The first sub-engine of the overall XIVE architecture is the Interrupt
> Virtualization Source Engine (IVSE). An IVSE can be integrated into
> another logic, like in a PCI PHB or in the main interrupt controller
> to manage IPIs.
> 
> Each IVSE instance is associated with an Event State Buffer (ESB) that
> contains a two bit state entry for each possible event source. When an
> event is signaled to the IVSE, by MMIO or some other means, the
> associated interrupt state bits are fetched from the ESB and
> modified. Depending on the resulting ESB state, the event is forwarded
> to the IVRE sub-engine of the controller doing the routing.
> 
> Each supported ESB entry is associated with either a single or a
> even/odd pair of pages which provides commands to manage the source:
> to EOI, to turn off the source for instance.
> 
> On a sPAPR machine, the O/S will obtain the page address of the ESB
> entry associated with a source and its characteristic using the
> H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is used.
> 
> The xive_source_notify() routine is in charge forwarding the source
> event notification to the routing engine. It will be filled later on.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Ok, this is looking basically pretty good.  Few details to query
below.


> ---
>  default-configs/ppc64-softmmu.mak |   1 +
>  include/hw/ppc/xive.h             | 130 ++++++++++
>  hw/intc/xive.c                    | 379 ++++++++++++++++++++++++++++++
>  hw/intc/Makefile.objs             |   1 +
>  4 files changed, 511 insertions(+)
>  create mode 100644 include/hw/ppc/xive.h
>  create mode 100644 hw/intc/xive.c
> 
> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> index aec2855750d6..2d1e7c5c4668 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -16,6 +16,7 @@ CONFIG_VIRTIO_VGA=y
>  CONFIG_XICS=$(CONFIG_PSERIES)
>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
> +CONFIG_XIVE=$(CONFIG_PSERIES)
>  CONFIG_MEM_DEVICE=y
>  CONFIG_DIMM=y
>  CONFIG_SPAPR_RNG=y
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> new file mode 100644
> index 000000000000..5fec4b08705d
> --- /dev/null
> +++ b/include/hw/ppc/xive.h
> @@ -0,0 +1,130 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.

A cheat sheet in the top of this header with the old and new XIVE
terms would quite nice to have.

> + */
> +
> +#ifndef PPC_XIVE_H
> +#define PPC_XIVE_H
> +
> +#include "hw/sysbus.h"

So, I'm a bit dubious about making the XiveSource a SysBus device -
I'm concerned it won't play well with tying it into the other devices
like PHB that "own" it in real hardware.

I think we'd be better off making it a direct descendent of
TYPE_DEVICE which constructs the MMIO region, but doesn't map it.
Then we can havea SysBusDevice (and/or other) wrapper which
instantiates the XiveSource core and maps it into somewhere
accessible.

> +
> +/*
> + * XIVE Interrupt Source
> + */
> +
> +#define TYPE_XIVE_SOURCE "xive-source"
> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
> +
> +/*
> + * XIVE Interrupt Source characteristics, which define how the ESB are
> + * controlled.
> + */
> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
> +#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
> +
> +typedef struct XiveSource {
> +    SysBusDevice parent;
> +
> +    /* IRQs */
> +    uint32_t        nr_irqs;
> +    qemu_irq        *qirqs;
> +
> +    /* PQ bits */
> +    uint8_t         *status;
> +
> +    /* ESB memory region */
> +    uint64_t        esb_flags;
> +    uint32_t        esb_shift;
> +    MemoryRegion    esb_mmio;
> +} XiveSource;
> +
> +/*
> + * ESB MMIO setting. Can be one page, for both source triggering and
> + * source management, or two different pages. See below for magic
> + * values.
> + */
> +#define XIVE_ESB_4K          12 /* PSI HB only */
> +#define XIVE_ESB_4K_2PAGE    13
> +#define XIVE_ESB_64K         16
> +#define XIVE_ESB_64K_2PAGE   17
> +
> +static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
> +{
> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
> +        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
> +}
> +
> +/* The trigger page is always the first/even page */
> +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)

This function doesn't appear to be used anywhere except..

> +{
> +    assert(srcno < xsrc->nr_irqs);
> +    return (1ull << xsrc->esb_shift) * srcno;
> +}
> +
> +/* In a two pages ESB MMIO setting, the odd page is for management */
> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)


..here, and this function doesn't appear to be used anywhere.

> +{
> +    hwaddr addr = xive_source_esb_page(xsrc, srcno);
> +
> +    if (xive_source_esb_has_2page(xsrc)) {
> +        addr += (1 << (xsrc->esb_shift - 1));
> +    }
> +
> +    return addr;
> +}
> +
> +/*
> + * Each interrupt source has a 2-bit state machine which can be
> + * controlled by MMIO. P indicates that an interrupt is pending (has
> + * been sent to a queue and is waiting for an EOI). Q indicates that
> + * the interrupt has been triggered while pending.
> + *
> + * This acts as a coalescing mechanism in order to guarantee that a
> + * given interrupt only occurs at most once in a queue.
> + *
> + * When doing an EOI, the Q bit will indicate if the interrupt
> + * needs to be re-triggered.
> + */
> +#define XIVE_ESB_VAL_P        0x2
> +#define XIVE_ESB_VAL_Q        0x1
> +
> +#define XIVE_ESB_RESET        0x0
> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
> +
> +/*
> + * "magic" Event State Buffer (ESB) MMIO offsets.
> + *
> + * The following offsets into the ESB MMIO allow to read or manipulate
> + * the PQ bits. They must be used with an 8-byte load instruction.
> + * They all return the previous state of the interrupt (atomically).
> + *
> + * Additionally, some ESB pages support doing an EOI via a store and
> + * some ESBs support doing a trigger via a separate trigger page.
> + */
> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
> +#define XIVE_ESB_GET            0x800 /* Load */
> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
> +
> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
> +
> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
> +                                Monitor *mon);
> +
> +static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +    return xsrc->qirqs[srcno];
> +}
> +
> +#endif /* PPC_XIVE_H */
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> new file mode 100644
> index 000000000000..f7621f84828c
> --- /dev/null
> +++ b/hw/intc/xive.c
> @@ -0,0 +1,379 @@
> +/*
> + * QEMU PowerPC XIVE interrupt controller model
> + *
> + * Copyright (c) 2017-2018, IBM Corporation.
> + *
> + * This code is licensed under the GPL version 2 or later. See the
> + * COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu/log.h"
> +#include "qapi/error.h"
> +#include "target/ppc/cpu.h"
> +#include "sysemu/cpus.h"
> +#include "sysemu/dma.h"
> +#include "monitor/monitor.h"
> +#include "hw/ppc/xive.h"
> +
> +/*
> + * XIVE ESB helpers
> + */
> +
> +static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
> +{
> +    uint8_t old_pq = *pq & 0x3;
> +
> +    *pq &= ~0x3;
> +    *pq |= value & 0x3;
> +
> +    return old_pq;
> +}
> +
> +static bool xive_esb_trigger(uint8_t *pq)
> +{
> +    uint8_t old_pq = *pq & 0x3;
> +
> +    switch (old_pq) {
> +    case XIVE_ESB_RESET:
> +        xive_esb_set(pq, XIVE_ESB_PENDING);
> +        return true;
> +    case XIVE_ESB_PENDING:
> +    case XIVE_ESB_QUEUED:
> +        xive_esb_set(pq, XIVE_ESB_QUEUED);
> +        return false;
> +    case XIVE_ESB_OFF:
> +        xive_esb_set(pq, XIVE_ESB_OFF);
> +        return false;
> +    default:
> +         g_assert_not_reached();
> +    }
> +}
> +
> +static bool xive_esb_eoi(uint8_t *pq)
> +{
> +    uint8_t old_pq = *pq & 0x3;
> +
> +    switch (old_pq) {
> +    case XIVE_ESB_RESET:
> +    case XIVE_ESB_PENDING:
> +        xive_esb_set(pq, XIVE_ESB_RESET);
> +        return false;
> +    case XIVE_ESB_QUEUED:
> +        xive_esb_set(pq, XIVE_ESB_PENDING);
> +        return true;
> +    case XIVE_ESB_OFF:
> +        xive_esb_set(pq, XIVE_ESB_OFF);
> +        return false;
> +    default:
> +         g_assert_not_reached();
> +    }
> +}
> +
> +/*
> + * XIVE Interrupt Source (or IVSE)
> + */
> +
> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +
> +    return xsrc->status[srcno] & 0x3;
> +}
> +
> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +
> +    return xive_esb_set(&xsrc->status[srcno], pq);
> +}
> +
> +/*
> + * Returns whether the event notification should be forwarded.
> + */
> +static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +
> +    return xive_esb_trigger(&xsrc->status[srcno]);
> +}
> +
> +/*
> + * Returns whether the event notification should be forwarded.
> + */
> +static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
> +{
> +    assert(srcno < xsrc->nr_irqs);
> +
> +    return xive_esb_eoi(&xsrc->status[srcno]);
> +}
> +
> +/*
> + * Forward the source event notification to the Router
> + */
> +static void xive_source_notify(XiveSource *xsrc, int srcno)
> +{
> +
> +}
> +
> +/*
> + * In a two pages ESB MMIO setting, even page is the trigger page, odd
> + * page is for management
> + */
> +static inline bool addr_is_even(hwaddr addr, uint32_t shift)
> +{
> +    return !((addr >> shift) & 1);
> +}
> +
> +static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
> +{
> +    return xive_source_esb_has_2page(xsrc) &&
> +        addr_is_even(addr, xsrc->esb_shift - 1);
> +}
> +
> +/*
> + * ESB MMIO loads
> + *                      Trigger page    Management/EOI page
> + * 2 pages setting      even            odd
> + *
> + * 0x000 .. 0x3FF       -1              EOI and return 0|1
> + * 0x400 .. 0x7FF       -1              EOI and return 0|1
> + * 0x800 .. 0xBFF       -1              return PQ
> + * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=0
> + * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=0
> + * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=1
> + * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=1
> + */

I can't quite make sense of this table.  What do the -1s represent,
and how does it relate to the non-2page case?

> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    uint32_t offset = addr & 0xFFF;
> +    uint32_t srcno = addr >> xsrc->esb_shift;
> +    uint64_t ret = -1;
> +
> +    /* In a two pages ESB MMIO setting, trigger page should not be read */
> +    if (xive_source_is_trigger_page(xsrc, addr)) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "XIVE: invalid load on IRQ %d trigger page at "
> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
> +        return -1;
> +    }
> +
> +    switch (offset) {
> +    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
> +        ret = xive_source_esb_eoi(xsrc, srcno);
> +
> +        /* Forward the source event notification for routing */
> +        if (ret) {
> +            xive_source_notify(xsrc, srcno);
> +        }
> +        break;
> +
> +    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
> +        ret = xive_source_esb_get(xsrc, srcno);
> +        break;
> +
> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
> +        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
> +        break;
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
> +                      offset);
> +    }
> +
> +    return ret;
> +}
> +
> +/*
> + * ESB MMIO stores
> + *                      Trigger page    Management/EOI page
> + * 2 pages setting      even            odd

As with the previous table, I don't quite understand what the headings
above mean.

> + * 0x000 .. 0x3FF       Trigger         Trigger
> + * 0x400 .. 0x7FF       Trigger         EOI
> + * 0x800 .. 0xBFF       Trigger         undefined
> + * 0xC00 .. 0xCFF       Trigger         PQ=00
> + * 0xD00 .. 0xDFF       Trigger         PQ=01
> + * 0xE00 .. 0xDFF       Trigger         PQ=10
> + * 0xF00 .. 0xDFF       Trigger         PQ=11
> + */
> +static void xive_source_esb_write(void *opaque, hwaddr addr,
> +                                  uint64_t value, unsigned size)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    uint32_t offset = addr & 0xFFF;
> +    uint32_t srcno = addr >> xsrc->esb_shift;
> +    bool notify = false;
> +
> +    /* In a two pages ESB MMIO setting, trigger page only triggers */
> +    if (xive_source_is_trigger_page(xsrc, addr)) {
> +        notify = xive_source_esb_trigger(xsrc, srcno);
> +        goto out;
> +    }
> +
> +    switch (offset) {
> +    case 0 ... 0x3FF:
> +        notify = xive_source_esb_trigger(xsrc, srcno);
> +        break;
> +
> +    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
> +            return;
> +        }
> +
> +        notify = xive_source_esb_eoi(xsrc, srcno);
> +        break;
> +
> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
> +        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
> +        break;
> +
> +    default:
> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
> +                      offset);
> +        return;
> +    }
> +
> +out:
> +    /* Forward the source event notification for routing */
> +    if (notify) {
> +        xive_source_notify(xsrc, srcno);
> +    }
> +}
> +
> +static const MemoryRegionOps xive_source_esb_ops = {
> +    .read = xive_source_esb_read,
> +    .write = xive_source_esb_write,
> +    .endianness = DEVICE_BIG_ENDIAN,
> +    .valid = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +    .impl = {
> +        .min_access_size = 8,
> +        .max_access_size = 8,
> +    },
> +};
> +
> +static void xive_source_set_irq(void *opaque, int srcno, int val)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> +    bool notify = false;
> +
> +    if (val) {
> +        notify = xive_source_esb_trigger(xsrc, srcno);
> +    }
> +
> +    /* Forward the source event notification for routing */
> +    if (notify) {
> +        xive_source_notify(xsrc, srcno);
> +    }
> +}
> +
> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> +
> +        if (pq == XIVE_ESB_OFF) {
> +            continue;
> +        }
> +
> +        monitor_printf(mon, "  %08x %c%c\n", i + offset,
> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
> +    }
> +}
> +
> +static void xive_source_reset(DeviceState *dev)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> +
> +    /* PQs are initialized to 0b01 which corresponds to "ints off" */
> +    memset(xsrc->status, 0x1, xsrc->nr_irqs);

You've already got XIVE_ESB_OFF defined to make this a little clearer.

> +}
> +
> +static void xive_source_realize(DeviceState *dev, Error **errp)
> +{
> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> +
> +    if (!xsrc->nr_irqs) {
> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
> +        return;
> +    }
> +
> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
> +        xsrc->esb_shift != XIVE_ESB_64K &&
> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
> +        error_setg(errp, "Invalid ESB shift setting");
> +        return;
> +    }
> +
> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
> +                                     xsrc->nr_irqs);
> +
> +    xsrc->status = g_malloc0(xsrc->nr_irqs);
> +
> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> +                          &xive_source_esb_ops, xsrc, "xive.esb",
> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
> +}
> +
> +static const VMStateDescription vmstate_xive_source = {
> +    .name = TYPE_XIVE_SOURCE,
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
> +        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
> +/*
> + * The default XIVE interrupt source setting for the ESB MMIOs is two
> + * 64k pages without Store EOI, to be in sync with KVM.
> + */
> +static Property xive_source_properties[] = {
> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void xive_source_class_init(ObjectClass *klass, void *data)
> +{
> +    DeviceClass *dc = DEVICE_CLASS(klass);
> +
> +    dc->desc    = "XIVE Interrupt Source";
> +    dc->props   = xive_source_properties;
> +    dc->realize = xive_source_realize;
> +    dc->reset   = xive_source_reset;
> +    dc->vmsd    = &vmstate_xive_source;
> +}
> +
> +static const TypeInfo xive_source_info = {
> +    .name          = TYPE_XIVE_SOURCE,
> +    .parent        = TYPE_SYS_BUS_DEVICE,
> +    .instance_size = sizeof(XiveSource),
> +    .class_init    = xive_source_class_init,
> +};
> +
> +static void xive_register_types(void)
> +{
> +    type_register_static(&xive_source_info);
> +}
> +
> +type_init(xive_register_types)
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index 0e9963f5eecc..72a46ed91c31 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>  obj-$(CONFIG_XICS) += xics.o
>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> +obj-$(CONFIG_XIVE) += xive.o
>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
Cédric Le Goater Nov. 22, 2018, 7:25 a.m. UTC | #2
On 11/22/18 4:05 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:56:54AM +0100, Cédric Le Goater wrote:
>> The first sub-engine of the overall XIVE architecture is the Interrupt
>> Virtualization Source Engine (IVSE). An IVSE can be integrated into
>> another logic, like in a PCI PHB or in the main interrupt controller
>> to manage IPIs.
>>
>> Each IVSE instance is associated with an Event State Buffer (ESB) that
>> contains a two bit state entry for each possible event source. When an
>> event is signaled to the IVSE, by MMIO or some other means, the
>> associated interrupt state bits are fetched from the ESB and
>> modified. Depending on the resulting ESB state, the event is forwarded
>> to the IVRE sub-engine of the controller doing the routing.
>>
>> Each supported ESB entry is associated with either a single or a
>> even/odd pair of pages which provides commands to manage the source:
>> to EOI, to turn off the source for instance.
>>
>> On a sPAPR machine, the O/S will obtain the page address of the ESB
>> entry associated with a source and its characteristic using the
>> H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is used.
>>
>> The xive_source_notify() routine is in charge forwarding the source
>> event notification to the routing engine. It will be filled later on.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Ok, this is looking basically pretty good.  Few details to query
> below.
> 
> 
>> ---
>>  default-configs/ppc64-softmmu.mak |   1 +
>>  include/hw/ppc/xive.h             | 130 ++++++++++
>>  hw/intc/xive.c                    | 379 ++++++++++++++++++++++++++++++
>>  hw/intc/Makefile.objs             |   1 +
>>  4 files changed, 511 insertions(+)
>>  create mode 100644 include/hw/ppc/xive.h
>>  create mode 100644 hw/intc/xive.c
>>
>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>> index aec2855750d6..2d1e7c5c4668 100644
>> --- a/default-configs/ppc64-softmmu.mak
>> +++ b/default-configs/ppc64-softmmu.mak
>> @@ -16,6 +16,7 @@ CONFIG_VIRTIO_VGA=y
>>  CONFIG_XICS=$(CONFIG_PSERIES)
>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>> +CONFIG_XIVE=$(CONFIG_PSERIES)
>>  CONFIG_MEM_DEVICE=y
>>  CONFIG_DIMM=y
>>  CONFIG_SPAPR_RNG=y
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> new file mode 100644
>> index 000000000000..5fec4b08705d
>> --- /dev/null
>> +++ b/include/hw/ppc/xive.h
>> @@ -0,0 +1,130 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
> 
> A cheat sheet in the top of this header with the old and new XIVE
> terms would quite nice to have.

Yes. It's a good place. I will put the XIVE acronyms here :
     
     EA		Event Assignment
     EISN	Effective Interrupt Source Number
     END	Event Notification Descriptor
     ESB	Event State Buffer
     EQ		Event Queue
     LISN	Logical Interrupt Source Number
     NVT	Notification Virtual Target
     TIMA	Thread Interrupt Management Area
     ...


>> + */
>> +
>> +#ifndef PPC_XIVE_H
>> +#define PPC_XIVE_H
>> +
>> +#include "hw/sysbus.h"
> 
> So, I'm a bit dubious about making the XiveSource a SysBus device -
> I'm concerned it won't play well with tying it into the other devices
> like PHB that "own" it in real hardware.

It does but I can take a look at changing it to a DeviceState. The 
reset handlers might be a concern.

> I think we'd be better off making it a direct descendent of
> TYPE_DEVICE which constructs the MMIO region, but doesn't map it.

At a moment, I started working on a XiveESB object doing what I think 
you are suggesting and I removed it. I am reluctant adding more 
complexity now, the patchset is just growing and growing ... 

But I agree there are fundamentals to get right for KVM. Let's talk 
about it after you have looked at the overall patchset, at least up 
to KVM initial support.

> Then we can havea SysBusDevice (and/or other) wrapper which
> instantiates the XiveSource core and maps it into somewhere
> accessible.

The XIVE controller model does the mapping of the source currently.
In the case of sPAPR, the controller model controls the TIMA and 
for PowerNV, there are quite few others MMIO regions to handle.

> 
>> +
>> +/*
>> + * XIVE Interrupt Source
>> + */
>> +
>> +#define TYPE_XIVE_SOURCE "xive-source"
>> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
>> +
>> +/*
>> + * XIVE Interrupt Source characteristics, which define how the ESB are
>> + * controlled.
>> + */
>> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
>> +#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
>> +
>> +typedef struct XiveSource {
>> +    SysBusDevice parent;
>> +
>> +    /* IRQs */
>> +    uint32_t        nr_irqs;
>> +    qemu_irq        *qirqs;
>> +
>> +    /* PQ bits */
>> +    uint8_t         *status;
>> +
>> +    /* ESB memory region */
>> +    uint64_t        esb_flags;
>> +    uint32_t        esb_shift;
>> +    MemoryRegion    esb_mmio;
>> +} XiveSource;
>> +
>> +/*
>> + * ESB MMIO setting. Can be one page, for both source triggering and
>> + * source management, or two different pages. See below for magic
>> + * values.
>> + */
>> +#define XIVE_ESB_4K          12 /* PSI HB only */
>> +#define XIVE_ESB_4K_2PAGE    13
>> +#define XIVE_ESB_64K         16
>> +#define XIVE_ESB_64K_2PAGE   17
>> +
>> +static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
>> +{
>> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
>> +        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
>> +}
>> +
>> +/* The trigger page is always the first/even page */
>> +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
> 
> This function doesn't appear to be used anywhere except..

It's used in patch 16 adding the hcalls also.

>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +    return (1ull << xsrc->esb_shift) * srcno;
>> +}
>> +
>> +/* In a two pages ESB MMIO setting, the odd page is for management */
>> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
> 
> 
> ..here, and this function doesn't appear to be used anywhere.

It's used in patch 16 adding the hcalls and patch 23 for KVM.

This is basic ESB support which I thought belong to the patch on sources.
 
> 
>> +{
>> +    hwaddr addr = xive_source_esb_page(xsrc, srcno);
>> +
>> +    if (xive_source_esb_has_2page(xsrc)) {
>> +        addr += (1 << (xsrc->esb_shift - 1));
>> +    }
>> +
>> +    return addr;
>> +}
>> +
>> +/*
>> + * Each interrupt source has a 2-bit state machine which can be
>> + * controlled by MMIO. P indicates that an interrupt is pending (has
>> + * been sent to a queue and is waiting for an EOI). Q indicates that
>> + * the interrupt has been triggered while pending.
>> + *
>> + * This acts as a coalescing mechanism in order to guarantee that a
>> + * given interrupt only occurs at most once in a queue.
>> + *
>> + * When doing an EOI, the Q bit will indicate if the interrupt
>> + * needs to be re-triggered.
>> + */
>> +#define XIVE_ESB_VAL_P        0x2
>> +#define XIVE_ESB_VAL_Q        0x1
>> +
>> +#define XIVE_ESB_RESET        0x0
>> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
>> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
>> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
>> +
>> +/*
>> + * "magic" Event State Buffer (ESB) MMIO offsets.
>> + *
>> + * The following offsets into the ESB MMIO allow to read or manipulate
>> + * the PQ bits. They must be used with an 8-byte load instruction.
>> + * They all return the previous state of the interrupt (atomically).
>> + *
>> + * Additionally, some ESB pages support doing an EOI via a store and
>> + * some ESBs support doing a trigger via a separate trigger page.
>> + */
>> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
>> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
>> +#define XIVE_ESB_GET            0x800 /* Load */
>> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
>> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
>> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
>> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
>> +
>> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
>> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
>> +
>> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
>> +                                Monitor *mon);
>> +
>> +static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +    return xsrc->qirqs[srcno];
>> +}
>> +
>> +#endif /* PPC_XIVE_H */
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> new file mode 100644
>> index 000000000000..f7621f84828c
>> --- /dev/null
>> +++ b/hw/intc/xive.c
>> @@ -0,0 +1,379 @@
>> +/*
>> + * QEMU PowerPC XIVE interrupt controller model
>> + *
>> + * Copyright (c) 2017-2018, IBM Corporation.
>> + *
>> + * This code is licensed under the GPL version 2 or later. See the
>> + * COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/log.h"
>> +#include "qapi/error.h"
>> +#include "target/ppc/cpu.h"
>> +#include "sysemu/cpus.h"
>> +#include "sysemu/dma.h"
>> +#include "monitor/monitor.h"
>> +#include "hw/ppc/xive.h"
>> +
>> +/*
>> + * XIVE ESB helpers
>> + */
>> +
>> +static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
>> +{
>> +    uint8_t old_pq = *pq & 0x3;
>> +
>> +    *pq &= ~0x3;
>> +    *pq |= value & 0x3;
>> +
>> +    return old_pq;
>> +}
>> +
>> +static bool xive_esb_trigger(uint8_t *pq)
>> +{
>> +    uint8_t old_pq = *pq & 0x3;
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +        xive_esb_set(pq, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_PENDING:
>> +    case XIVE_ESB_QUEUED:
>> +        xive_esb_set(pq, XIVE_ESB_QUEUED);
>> +        return false;
>> +    case XIVE_ESB_OFF:
>> +        xive_esb_set(pq, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +static bool xive_esb_eoi(uint8_t *pq)
>> +{
>> +    uint8_t old_pq = *pq & 0x3;
>> +
>> +    switch (old_pq) {
>> +    case XIVE_ESB_RESET:
>> +    case XIVE_ESB_PENDING:
>> +        xive_esb_set(pq, XIVE_ESB_RESET);
>> +        return false;
>> +    case XIVE_ESB_QUEUED:
>> +        xive_esb_set(pq, XIVE_ESB_PENDING);
>> +        return true;
>> +    case XIVE_ESB_OFF:
>> +        xive_esb_set(pq, XIVE_ESB_OFF);
>> +        return false;
>> +    default:
>> +         g_assert_not_reached();
>> +    }
>> +}
>> +
>> +/*
>> + * XIVE Interrupt Source (or IVSE)
>> + */
>> +
>> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +
>> +    return xsrc->status[srcno] & 0x3;
>> +}
>> +
>> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +
>> +    return xive_esb_set(&xsrc->status[srcno], pq);
>> +}
>> +
>> +/*
>> + * Returns whether the event notification should be forwarded.
>> + */
>> +static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +
>> +    return xive_esb_trigger(&xsrc->status[srcno]);
>> +}
>> +
>> +/*
>> + * Returns whether the event notification should be forwarded.
>> + */
>> +static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
>> +{
>> +    assert(srcno < xsrc->nr_irqs);
>> +
>> +    return xive_esb_eoi(&xsrc->status[srcno]);
>> +}
>> +
>> +/*
>> + * Forward the source event notification to the Router
>> + */
>> +static void xive_source_notify(XiveSource *xsrc, int srcno)
>> +{
>> +
>> +}
>> +
>> +/*
>> + * In a two pages ESB MMIO setting, even page is the trigger page, odd
>> + * page is for management
>> + */
>> +static inline bool addr_is_even(hwaddr addr, uint32_t shift)
>> +{
>> +    return !((addr >> shift) & 1);
>> +}
>> +
>> +static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
>> +{
>> +    return xive_source_esb_has_2page(xsrc) &&
>> +        addr_is_even(addr, xsrc->esb_shift - 1);
>> +}
>> +
>> +/*
>> + * ESB MMIO loads
>> + *                      Trigger page    Management/EOI page
>> + * 2 pages setting      even            odd
>> + *
>> + * 0x000 .. 0x3FF       -1              EOI and return 0|1
>> + * 0x400 .. 0x7FF       -1              EOI and return 0|1
>> + * 0x800 .. 0xBFF       -1              return PQ
>> + * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=0
>> + * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=0
>> + * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=1
>> + * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=1
>> + */
> 
> I can't quite make sense of this table.  What do the -1s represent,

the value returned by the load.

> and how does it relate to the non-2page case?

one page ESB support trigger and management on the same page. So for loads,
the odd page behavior applies.  

>> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    uint32_t offset = addr & 0xFFF;
>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>> +    uint64_t ret = -1;
>> +
>> +    /* In a two pages ESB MMIO setting, trigger page should not be read */
>> +    if (xive_source_is_trigger_page(xsrc, addr)) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "XIVE: invalid load on IRQ %d trigger page at "
>> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
>> +        return -1;
>> +    }
>> +
>> +    switch (offset) {
>> +    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
>> +        ret = xive_source_esb_eoi(xsrc, srcno);
>> +
>> +        /* Forward the source event notification for routing */
>> +        if (ret) {
>> +            xive_source_notify(xsrc, srcno);
>> +        }
>> +        break;
>> +
>> +    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
>> +        ret = xive_source_esb_get(xsrc, srcno);
>> +        break;
>> +
>> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
>> +        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
>> +        break;
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
>> +                      offset);
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +/*
>> + * ESB MMIO stores
>> + *                      Trigger page    Management/EOI page
>> + * 2 pages setting      even            odd
> 
> As with the previous table, I don't quite understand what the headings
> above mean.

one page ESB support trigger and management on the same page. So for stores,
the odd page behavior applies.

The headings can be improved. I will think of something.

>> + * 0x000 .. 0x3FF       Trigger         Trigger
>> + * 0x400 .. 0x7FF       Trigger         EOI
>> + * 0x800 .. 0xBFF       Trigger         undefined
>> + * 0xC00 .. 0xCFF       Trigger         PQ=00
>> + * 0xD00 .. 0xDFF       Trigger         PQ=01
>> + * 0xE00 .. 0xDFF       Trigger         PQ=10
>> + * 0xF00 .. 0xDFF       Trigger         PQ=11
>> + */
>> +static void xive_source_esb_write(void *opaque, hwaddr addr,
>> +                                  uint64_t value, unsigned size)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    uint32_t offset = addr & 0xFFF;
>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>> +    bool notify = false;
>> +
>> +    /* In a two pages ESB MMIO setting, trigger page only triggers */
>> +    if (xive_source_is_trigger_page(xsrc, addr)) {
>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>> +        goto out;
>> +    }
>> +
>> +    switch (offset) {
>> +    case 0 ... 0x3FF:
>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>> +        break;
>> +
>> +    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
>> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
>> +            return;
>> +        }
>> +
>> +        notify = xive_source_esb_eoi(xsrc, srcno);
>> +        break;
>> +
>> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
>> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
>> +        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
>> +        break;
>> +
>> +    default:
>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
>> +                      offset);
>> +        return;
>> +    }
>> +
>> +out:
>> +    /* Forward the source event notification for routing */
>> +    if (notify) {
>> +        xive_source_notify(xsrc, srcno);
>> +    }
>> +}
>> +
>> +static const MemoryRegionOps xive_source_esb_ops = {
>> +    .read = xive_source_esb_read,
>> +    .write = xive_source_esb_write,
>> +    .endianness = DEVICE_BIG_ENDIAN,
>> +    .valid = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +    .impl = {
>> +        .min_access_size = 8,
>> +        .max_access_size = 8,
>> +    },
>> +};
>> +
>> +static void xive_source_set_irq(void *opaque, int srcno, int val)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>> +    bool notify = false;
>> +
>> +    if (val) {
>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>> +    }
>> +
>> +    /* Forward the source event notification for routing */
>> +    if (notify) {
>> +        xive_source_notify(xsrc, srcno);
>> +    }
>> +}
>> +
>> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        uint8_t pq = xive_source_esb_get(xsrc, i);
>> +
>> +        if (pq == XIVE_ESB_OFF) {
>> +            continue;
>> +        }
>> +
>> +        monitor_printf(mon, "  %08x %c%c\n", i + offset,
>> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
>> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
>> +    }
>> +}
>> +
>> +static void xive_source_reset(DeviceState *dev)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>> +
>> +    /* PQs are initialized to 0b01 which corresponds to "ints off" */
>> +    memset(xsrc->status, 0x1, xsrc->nr_irqs);
> 
> You've already got XIVE_ESB_OFF defined to make this a little clearer.

Sure.

Thanks,

C. 


> 
>> +}
>> +
>> +static void xive_source_realize(DeviceState *dev, Error **errp)
>> +{
>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>> +
>> +    if (!xsrc->nr_irqs) {
>> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
>> +        return;
>> +    }
>> +
>> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
>> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
>> +        xsrc->esb_shift != XIVE_ESB_64K &&
>> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
>> +        error_setg(errp, "Invalid ESB shift setting");
>> +        return;
>> +    }
>> +
>> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
>> +                                     xsrc->nr_irqs);
>> +
>> +    xsrc->status = g_malloc0(xsrc->nr_irqs);
>> +
>> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>> +                          &xive_source_esb_ops, xsrc, "xive.esb",
>> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
>> +}
>> +
>> +static const VMStateDescription vmstate_xive_source = {
>> +    .name = TYPE_XIVE_SOURCE,
>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
>> +        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
>> +        VMSTATE_END_OF_LIST()
>> +    },
>> +};
>> +
>> +/*
>> + * The default XIVE interrupt source setting for the ESB MMIOs is two
>> + * 64k pages without Store EOI, to be in sync with KVM.
>> + */
>> +static Property xive_source_properties[] = {
>> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
>> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
>> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
>> +    DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void xive_source_class_init(ObjectClass *klass, void *data)
>> +{
>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>> +
>> +    dc->desc    = "XIVE Interrupt Source";
>> +    dc->props   = xive_source_properties;
>> +    dc->realize = xive_source_realize;
>> +    dc->reset   = xive_source_reset;
>> +    dc->vmsd    = &vmstate_xive_source;
>> +}
>> +
>> +static const TypeInfo xive_source_info = {
>> +    .name          = TYPE_XIVE_SOURCE,
>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>> +    .instance_size = sizeof(XiveSource),
>> +    .class_init    = xive_source_class_init,
>> +};
>> +
>> +static void xive_register_types(void)
>> +{
>> +    type_register_static(&xive_source_info);
>> +}
>> +
>> +type_init(xive_register_types)
>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>> index 0e9963f5eecc..72a46ed91c31 100644
>> --- a/hw/intc/Makefile.objs
>> +++ b/hw/intc/Makefile.objs
>> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>>  obj-$(CONFIG_XICS) += xics.o
>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>> +obj-$(CONFIG_XIVE) += xive.o
>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>
David Gibson Nov. 23, 2018, 12:31 a.m. UTC | #3
On Thu, Nov 22, 2018 at 08:25:06AM +0100, Cédric Le Goater wrote:
> On 11/22/18 4:05 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:56:54AM +0100, Cédric Le Goater wrote:
> >> The first sub-engine of the overall XIVE architecture is the Interrupt
> >> Virtualization Source Engine (IVSE). An IVSE can be integrated into
> >> another logic, like in a PCI PHB or in the main interrupt controller
> >> to manage IPIs.
> >>
> >> Each IVSE instance is associated with an Event State Buffer (ESB) that
> >> contains a two bit state entry for each possible event source. When an
> >> event is signaled to the IVSE, by MMIO or some other means, the
> >> associated interrupt state bits are fetched from the ESB and
> >> modified. Depending on the resulting ESB state, the event is forwarded
> >> to the IVRE sub-engine of the controller doing the routing.
> >>
> >> Each supported ESB entry is associated with either a single or a
> >> even/odd pair of pages which provides commands to manage the source:
> >> to EOI, to turn off the source for instance.
> >>
> >> On a sPAPR machine, the O/S will obtain the page address of the ESB
> >> entry associated with a source and its characteristic using the
> >> H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is used.
> >>
> >> The xive_source_notify() routine is in charge forwarding the source
> >> event notification to the routing engine. It will be filled later on.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > 
> > Ok, this is looking basically pretty good.  Few details to query
> > below.
> > 
> > 
> >> ---
> >>  default-configs/ppc64-softmmu.mak |   1 +
> >>  include/hw/ppc/xive.h             | 130 ++++++++++
> >>  hw/intc/xive.c                    | 379 ++++++++++++++++++++++++++++++
> >>  hw/intc/Makefile.objs             |   1 +
> >>  4 files changed, 511 insertions(+)
> >>  create mode 100644 include/hw/ppc/xive.h
> >>  create mode 100644 hw/intc/xive.c
> >>
> >> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
> >> index aec2855750d6..2d1e7c5c4668 100644
> >> --- a/default-configs/ppc64-softmmu.mak
> >> +++ b/default-configs/ppc64-softmmu.mak
> >> @@ -16,6 +16,7 @@ CONFIG_VIRTIO_VGA=y
> >>  CONFIG_XICS=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
> >>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
> >> +CONFIG_XIVE=$(CONFIG_PSERIES)
> >>  CONFIG_MEM_DEVICE=y
> >>  CONFIG_DIMM=y
> >>  CONFIG_SPAPR_RNG=y
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> new file mode 100644
> >> index 000000000000..5fec4b08705d
> >> --- /dev/null
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -0,0 +1,130 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> > 
> > A cheat sheet in the top of this header with the old and new XIVE
> > terms would quite nice to have.
> 
> Yes. It's a good place. I will put the XIVE acronyms here :
>      
>      EA		Event Assignment
>      EISN	Effective Interrupt Source Number
>      END	Event Notification Descriptor
>      ESB	Event State Buffer
>      EQ		Event Queue
>      LISN	Logical Interrupt Source Number
>      NVT	Notification Virtual Target
>      TIMA	Thread Interrupt Management Area
>      ...

That sounds good, but what I'd also like is showing that NVT == VP and
EAS == IVT and so forth.

> >> + */
> >> +
> >> +#ifndef PPC_XIVE_H
> >> +#define PPC_XIVE_H
> >> +
> >> +#include "hw/sysbus.h"
> > 
> > So, I'm a bit dubious about making the XiveSource a SysBus device -
> > I'm concerned it won't play well with tying it into the other devices
> > like PHB that "own" it in real hardware.
> 
> It does but I can take a look at changing it to a DeviceState. The 
> reset handlers might be a concern.

As "non bus" device I think you'd need to register your own reset
handler rather than just setting dc->reset.  Otherwise, I think that
should work.

> > I think we'd be better off making it a direct descendent of
> > TYPE_DEVICE which constructs the MMIO region, but doesn't map it.
> 
> At a moment, I started working on a XiveESB object doing what I think 
> you are suggesting and I removed it. I am reluctant adding more 
> complexity now, the patchset is just growing and growing ... 
> 
> But I agree there are fundamentals to get right for KVM. Let's talk 
> about it after you have looked at the overall patchset, at least up 
> to KVM initial support.

Hm, ok.

> > Then we can havea SysBusDevice (and/or other) wrapper which
> > instantiates the XiveSource core and maps it into somewhere
> > accessible.
> 
> The XIVE controller model does the mapping of the source currently.

I'm.. I'm not sure what you mean by that.   We have a
sysbus_init_mmio() right here which effectively maps in the MMIO
region AFAICT.

> In the case of sPAPR, the controller model controls the TIMA and 
> for PowerNV, there are quite few others MMIO regions to handle.
> 
> > 
> >> +
> >> +/*
> >> + * XIVE Interrupt Source
> >> + */
> >> +
> >> +#define TYPE_XIVE_SOURCE "xive-source"
> >> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
> >> +
> >> +/*
> >> + * XIVE Interrupt Source characteristics, which define how the ESB are
> >> + * controlled.
> >> + */
> >> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
> >> +#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
> >> +
> >> +typedef struct XiveSource {
> >> +    SysBusDevice parent;
> >> +
> >> +    /* IRQs */
> >> +    uint32_t        nr_irqs;
> >> +    qemu_irq        *qirqs;
> >> +
> >> +    /* PQ bits */
> >> +    uint8_t         *status;
> >> +
> >> +    /* ESB memory region */
> >> +    uint64_t        esb_flags;
> >> +    uint32_t        esb_shift;
> >> +    MemoryRegion    esb_mmio;
> >> +} XiveSource;
> >> +
> >> +/*
> >> + * ESB MMIO setting. Can be one page, for both source triggering and
> >> + * source management, or two different pages. See below for magic
> >> + * values.
> >> + */
> >> +#define XIVE_ESB_4K          12 /* PSI HB only */
> >> +#define XIVE_ESB_4K_2PAGE    13
> >> +#define XIVE_ESB_64K         16
> >> +#define XIVE_ESB_64K_2PAGE   17
> >> +
> >> +static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
> >> +{
> >> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
> >> +        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
> >> +}
> >> +
> >> +/* The trigger page is always the first/even page */
> >> +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
> > 
> > This function doesn't appear to be used anywhere except..
> 
> It's used in patch 16 adding the hcalls also.
> 
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +    return (1ull << xsrc->esb_shift) * srcno;
> >> +}
> >> +
> >> +/* In a two pages ESB MMIO setting, the odd page is for management */
> >> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
> > 
> > 
> > ..here, and this function doesn't appear to be used anywhere.
> 
> It's used in patch 16 adding the hcalls and patch 23 for KVM.
> 
> This is basic ESB support which I thought belong to the patch on sources.
>  
> > 
> >> +{
> >> +    hwaddr addr = xive_source_esb_page(xsrc, srcno);
> >> +
> >> +    if (xive_source_esb_has_2page(xsrc)) {
> >> +        addr += (1 << (xsrc->esb_shift - 1));
> >> +    }
> >> +
> >> +    return addr;
> >> +}
> >> +
> >> +/*
> >> + * Each interrupt source has a 2-bit state machine which can be
> >> + * controlled by MMIO. P indicates that an interrupt is pending (has
> >> + * been sent to a queue and is waiting for an EOI). Q indicates that
> >> + * the interrupt has been triggered while pending.
> >> + *
> >> + * This acts as a coalescing mechanism in order to guarantee that a
> >> + * given interrupt only occurs at most once in a queue.
> >> + *
> >> + * When doing an EOI, the Q bit will indicate if the interrupt
> >> + * needs to be re-triggered.
> >> + */
> >> +#define XIVE_ESB_VAL_P        0x2
> >> +#define XIVE_ESB_VAL_Q        0x1
> >> +
> >> +#define XIVE_ESB_RESET        0x0
> >> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
> >> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
> >> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
> >> +
> >> +/*
> >> + * "magic" Event State Buffer (ESB) MMIO offsets.
> >> + *
> >> + * The following offsets into the ESB MMIO allow to read or manipulate
> >> + * the PQ bits. They must be used with an 8-byte load instruction.
> >> + * They all return the previous state of the interrupt (atomically).
> >> + *
> >> + * Additionally, some ESB pages support doing an EOI via a store and
> >> + * some ESBs support doing a trigger via a separate trigger page.
> >> + */
> >> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
> >> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
> >> +#define XIVE_ESB_GET            0x800 /* Load */
> >> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
> >> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
> >> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
> >> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
> >> +
> >> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
> >> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
> >> +
> >> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
> >> +                                Monitor *mon);
> >> +
> >> +static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +    return xsrc->qirqs[srcno];
> >> +}
> >> +
> >> +#endif /* PPC_XIVE_H */
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> new file mode 100644
> >> index 000000000000..f7621f84828c
> >> --- /dev/null
> >> +++ b/hw/intc/xive.c
> >> @@ -0,0 +1,379 @@
> >> +/*
> >> + * QEMU PowerPC XIVE interrupt controller model
> >> + *
> >> + * Copyright (c) 2017-2018, IBM Corporation.
> >> + *
> >> + * This code is licensed under the GPL version 2 or later. See the
> >> + * COPYING file in the top-level directory.
> >> + */
> >> +
> >> +#include "qemu/osdep.h"
> >> +#include "qemu/log.h"
> >> +#include "qapi/error.h"
> >> +#include "target/ppc/cpu.h"
> >> +#include "sysemu/cpus.h"
> >> +#include "sysemu/dma.h"
> >> +#include "monitor/monitor.h"
> >> +#include "hw/ppc/xive.h"
> >> +
> >> +/*
> >> + * XIVE ESB helpers
> >> + */
> >> +
> >> +static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
> >> +{
> >> +    uint8_t old_pq = *pq & 0x3;
> >> +
> >> +    *pq &= ~0x3;
> >> +    *pq |= value & 0x3;
> >> +
> >> +    return old_pq;
> >> +}
> >> +
> >> +static bool xive_esb_trigger(uint8_t *pq)
> >> +{
> >> +    uint8_t old_pq = *pq & 0x3;
> >> +
> >> +    switch (old_pq) {
> >> +    case XIVE_ESB_RESET:
> >> +        xive_esb_set(pq, XIVE_ESB_PENDING);
> >> +        return true;
> >> +    case XIVE_ESB_PENDING:
> >> +    case XIVE_ESB_QUEUED:
> >> +        xive_esb_set(pq, XIVE_ESB_QUEUED);
> >> +        return false;
> >> +    case XIVE_ESB_OFF:
> >> +        xive_esb_set(pq, XIVE_ESB_OFF);
> >> +        return false;
> >> +    default:
> >> +         g_assert_not_reached();
> >> +    }
> >> +}
> >> +
> >> +static bool xive_esb_eoi(uint8_t *pq)
> >> +{
> >> +    uint8_t old_pq = *pq & 0x3;
> >> +
> >> +    switch (old_pq) {
> >> +    case XIVE_ESB_RESET:
> >> +    case XIVE_ESB_PENDING:
> >> +        xive_esb_set(pq, XIVE_ESB_RESET);
> >> +        return false;
> >> +    case XIVE_ESB_QUEUED:
> >> +        xive_esb_set(pq, XIVE_ESB_PENDING);
> >> +        return true;
> >> +    case XIVE_ESB_OFF:
> >> +        xive_esb_set(pq, XIVE_ESB_OFF);
> >> +        return false;
> >> +    default:
> >> +         g_assert_not_reached();
> >> +    }
> >> +}
> >> +
> >> +/*
> >> + * XIVE Interrupt Source (or IVSE)
> >> + */
> >> +
> >> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +
> >> +    return xsrc->status[srcno] & 0x3;
> >> +}
> >> +
> >> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +
> >> +    return xive_esb_set(&xsrc->status[srcno], pq);
> >> +}
> >> +
> >> +/*
> >> + * Returns whether the event notification should be forwarded.
> >> + */
> >> +static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +
> >> +    return xive_esb_trigger(&xsrc->status[srcno]);
> >> +}
> >> +
> >> +/*
> >> + * Returns whether the event notification should be forwarded.
> >> + */
> >> +static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
> >> +{
> >> +    assert(srcno < xsrc->nr_irqs);
> >> +
> >> +    return xive_esb_eoi(&xsrc->status[srcno]);
> >> +}
> >> +
> >> +/*
> >> + * Forward the source event notification to the Router
> >> + */
> >> +static void xive_source_notify(XiveSource *xsrc, int srcno)
> >> +{
> >> +
> >> +}
> >> +
> >> +/*
> >> + * In a two pages ESB MMIO setting, even page is the trigger page, odd
> >> + * page is for management
> >> + */
> >> +static inline bool addr_is_even(hwaddr addr, uint32_t shift)
> >> +{
> >> +    return !((addr >> shift) & 1);
> >> +}
> >> +
> >> +static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
> >> +{
> >> +    return xive_source_esb_has_2page(xsrc) &&
> >> +        addr_is_even(addr, xsrc->esb_shift - 1);
> >> +}
> >> +
> >> +/*
> >> + * ESB MMIO loads
> >> + *                      Trigger page    Management/EOI page
> >> + * 2 pages setting      even            odd
> >> + *
> >> + * 0x000 .. 0x3FF       -1              EOI and return 0|1
> >> + * 0x400 .. 0x7FF       -1              EOI and return 0|1
> >> + * 0x800 .. 0xBFF       -1              return PQ
> >> + * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=0
> >> + * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=0
> >> + * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=1
> >> + * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=1
> >> + */
> > 
> > I can't quite make sense of this table.  What do the -1s represent,
> 
> the value returned by the load.
> 
> > and how does it relate to the non-2page case?
> 
> one page ESB support trigger and management on the same page. So for loads,
> the odd page behavior applies.  
> 
> >> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    uint32_t offset = addr & 0xFFF;
> >> +    uint32_t srcno = addr >> xsrc->esb_shift;
> >> +    uint64_t ret = -1;
> >> +
> >> +    /* In a two pages ESB MMIO setting, trigger page should not be read */
> >> +    if (xive_source_is_trigger_page(xsrc, addr)) {
> >> +        qemu_log_mask(LOG_GUEST_ERROR,
> >> +                      "XIVE: invalid load on IRQ %d trigger page at "
> >> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
> >> +        return -1;
> >> +    }
> >> +
> >> +    switch (offset) {
> >> +    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
> >> +        ret = xive_source_esb_eoi(xsrc, srcno);
> >> +
> >> +        /* Forward the source event notification for routing */
> >> +        if (ret) {
> >> +            xive_source_notify(xsrc, srcno);
> >> +        }
> >> +        break;
> >> +
> >> +    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
> >> +        ret = xive_source_esb_get(xsrc, srcno);
> >> +        break;
> >> +
> >> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
> >> +        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
> >> +        break;
> >> +    default:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
> >> +                      offset);
> >> +    }
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +/*
> >> + * ESB MMIO stores
> >> + *                      Trigger page    Management/EOI page
> >> + * 2 pages setting      even            odd
> > 
> > As with the previous table, I don't quite understand what the headings
> > above mean.
> 
> one page ESB support trigger and management on the same page. So for stores,
> the odd page behavior applies.
> 
> The headings can be improved. I will think of something.
> 
> >> + * 0x000 .. 0x3FF       Trigger         Trigger
> >> + * 0x400 .. 0x7FF       Trigger         EOI
> >> + * 0x800 .. 0xBFF       Trigger         undefined
> >> + * 0xC00 .. 0xCFF       Trigger         PQ=00
> >> + * 0xD00 .. 0xDFF       Trigger         PQ=01
> >> + * 0xE00 .. 0xDFF       Trigger         PQ=10
> >> + * 0xF00 .. 0xDFF       Trigger         PQ=11
> >> + */
> >> +static void xive_source_esb_write(void *opaque, hwaddr addr,
> >> +                                  uint64_t value, unsigned size)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    uint32_t offset = addr & 0xFFF;
> >> +    uint32_t srcno = addr >> xsrc->esb_shift;
> >> +    bool notify = false;
> >> +
> >> +    /* In a two pages ESB MMIO setting, trigger page only triggers */
> >> +    if (xive_source_is_trigger_page(xsrc, addr)) {
> >> +        notify = xive_source_esb_trigger(xsrc, srcno);
> >> +        goto out;
> >> +    }
> >> +
> >> +    switch (offset) {
> >> +    case 0 ... 0x3FF:
> >> +        notify = xive_source_esb_trigger(xsrc, srcno);
> >> +        break;
> >> +
> >> +    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
> >> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
> >> +            qemu_log_mask(LOG_GUEST_ERROR,
> >> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
> >> +            return;
> >> +        }
> >> +
> >> +        notify = xive_source_esb_eoi(xsrc, srcno);
> >> +        break;
> >> +
> >> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
> >> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
> >> +        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
> >> +        break;
> >> +
> >> +    default:
> >> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
> >> +                      offset);
> >> +        return;
> >> +    }
> >> +
> >> +out:
> >> +    /* Forward the source event notification for routing */
> >> +    if (notify) {
> >> +        xive_source_notify(xsrc, srcno);
> >> +    }
> >> +}
> >> +
> >> +static const MemoryRegionOps xive_source_esb_ops = {
> >> +    .read = xive_source_esb_read,
> >> +    .write = xive_source_esb_write,
> >> +    .endianness = DEVICE_BIG_ENDIAN,
> >> +    .valid = {
> >> +        .min_access_size = 8,
> >> +        .max_access_size = 8,
> >> +    },
> >> +    .impl = {
> >> +        .min_access_size = 8,
> >> +        .max_access_size = 8,
> >> +    },
> >> +};
> >> +
> >> +static void xive_source_set_irq(void *opaque, int srcno, int val)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
> >> +    bool notify = false;
> >> +
> >> +    if (val) {
> >> +        notify = xive_source_esb_trigger(xsrc, srcno);
> >> +    }
> >> +
> >> +    /* Forward the source event notification for routing */
> >> +    if (notify) {
> >> +        xive_source_notify(xsrc, srcno);
> >> +    }
> >> +}
> >> +
> >> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> >> +        uint8_t pq = xive_source_esb_get(xsrc, i);
> >> +
> >> +        if (pq == XIVE_ESB_OFF) {
> >> +            continue;
> >> +        }
> >> +
> >> +        monitor_printf(mon, "  %08x %c%c\n", i + offset,
> >> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
> >> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
> >> +    }
> >> +}
> >> +
> >> +static void xive_source_reset(DeviceState *dev)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> >> +
> >> +    /* PQs are initialized to 0b01 which corresponds to "ints off" */
> >> +    memset(xsrc->status, 0x1, xsrc->nr_irqs);
> > 
> > You've already got XIVE_ESB_OFF defined to make this a little clearer.
> 
> Sure.
> 
> Thanks,
> 
> C. 
> 
> 
> > 
> >> +}
> >> +
> >> +static void xive_source_realize(DeviceState *dev, Error **errp)
> >> +{
> >> +    XiveSource *xsrc = XIVE_SOURCE(dev);
> >> +
> >> +    if (!xsrc->nr_irqs) {
> >> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
> >> +        return;
> >> +    }
> >> +
> >> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
> >> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
> >> +        xsrc->esb_shift != XIVE_ESB_64K &&
> >> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
> >> +        error_setg(errp, "Invalid ESB shift setting");
> >> +        return;
> >> +    }
> >> +
> >> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
> >> +                                     xsrc->nr_irqs);
> >> +
> >> +    xsrc->status = g_malloc0(xsrc->nr_irqs);
> >> +
> >> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
> >> +                          &xive_source_esb_ops, xsrc, "xive.esb",
> >> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
> >> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
> >> +}
> >> +
> >> +static const VMStateDescription vmstate_xive_source = {
> >> +    .name = TYPE_XIVE_SOURCE,
> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .fields = (VMStateField[]) {
> >> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
> >> +        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
> >> +        VMSTATE_END_OF_LIST()
> >> +    },
> >> +};
> >> +
> >> +/*
> >> + * The default XIVE interrupt source setting for the ESB MMIOs is two
> >> + * 64k pages without Store EOI, to be in sync with KVM.
> >> + */
> >> +static Property xive_source_properties[] = {
> >> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
> >> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
> >> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
> >> +    DEFINE_PROP_END_OF_LIST(),
> >> +};
> >> +
> >> +static void xive_source_class_init(ObjectClass *klass, void *data)
> >> +{
> >> +    DeviceClass *dc = DEVICE_CLASS(klass);
> >> +
> >> +    dc->desc    = "XIVE Interrupt Source";
> >> +    dc->props   = xive_source_properties;
> >> +    dc->realize = xive_source_realize;
> >> +    dc->reset   = xive_source_reset;
> >> +    dc->vmsd    = &vmstate_xive_source;
> >> +}
> >> +
> >> +static const TypeInfo xive_source_info = {
> >> +    .name          = TYPE_XIVE_SOURCE,
> >> +    .parent        = TYPE_SYS_BUS_DEVICE,
> >> +    .instance_size = sizeof(XiveSource),
> >> +    .class_init    = xive_source_class_init,
> >> +};
> >> +
> >> +static void xive_register_types(void)
> >> +{
> >> +    type_register_static(&xive_source_info);
> >> +}
> >> +
> >> +type_init(xive_register_types)
> >> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> >> index 0e9963f5eecc..72a46ed91c31 100644
> >> --- a/hw/intc/Makefile.objs
> >> +++ b/hw/intc/Makefile.objs
> >> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
> >>  obj-$(CONFIG_XICS) += xics.o
> >>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
> >>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> >> +obj-$(CONFIG_XIVE) += xive.o
> >>  obj-$(CONFIG_POWERNV) += xics_pnv.o
> >>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
> >>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
> > 
>
Cédric Le Goater Nov. 23, 2018, 8:21 a.m. UTC | #4
On 11/23/18 1:31 AM, David Gibson wrote:
> On Thu, Nov 22, 2018 at 08:25:06AM +0100, Cédric Le Goater wrote:
>> On 11/22/18 4:05 AM, David Gibson wrote:
>>> On Fri, Nov 16, 2018 at 11:56:54AM +0100, Cédric Le Goater wrote:
>>>> The first sub-engine of the overall XIVE architecture is the Interrupt
>>>> Virtualization Source Engine (IVSE). An IVSE can be integrated into
>>>> another logic, like in a PCI PHB or in the main interrupt controller
>>>> to manage IPIs.
>>>>
>>>> Each IVSE instance is associated with an Event State Buffer (ESB) that
>>>> contains a two bit state entry for each possible event source. When an
>>>> event is signaled to the IVSE, by MMIO or some other means, the
>>>> associated interrupt state bits are fetched from the ESB and
>>>> modified. Depending on the resulting ESB state, the event is forwarded
>>>> to the IVRE sub-engine of the controller doing the routing.
>>>>
>>>> Each supported ESB entry is associated with either a single or a
>>>> even/odd pair of pages which provides commands to manage the source:
>>>> to EOI, to turn off the source for instance.
>>>>
>>>> On a sPAPR machine, the O/S will obtain the page address of the ESB
>>>> entry associated with a source and its characteristic using the
>>>> H_INT_GET_SOURCE_INFO hcall. On PowerNV, a similar OPAL call is used.
>>>>
>>>> The xive_source_notify() routine is in charge forwarding the source
>>>> event notification to the routing engine. It will be filled later on.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>
>>> Ok, this is looking basically pretty good.  Few details to query
>>> below.
>>>
>>>
>>>> ---
>>>>  default-configs/ppc64-softmmu.mak |   1 +
>>>>  include/hw/ppc/xive.h             | 130 ++++++++++
>>>>  hw/intc/xive.c                    | 379 ++++++++++++++++++++++++++++++
>>>>  hw/intc/Makefile.objs             |   1 +
>>>>  4 files changed, 511 insertions(+)
>>>>  create mode 100644 include/hw/ppc/xive.h
>>>>  create mode 100644 hw/intc/xive.c
>>>>
>>>> diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
>>>> index aec2855750d6..2d1e7c5c4668 100644
>>>> --- a/default-configs/ppc64-softmmu.mak
>>>> +++ b/default-configs/ppc64-softmmu.mak
>>>> @@ -16,6 +16,7 @@ CONFIG_VIRTIO_VGA=y
>>>>  CONFIG_XICS=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
>>>>  CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
>>>> +CONFIG_XIVE=$(CONFIG_PSERIES)
>>>>  CONFIG_MEM_DEVICE=y
>>>>  CONFIG_DIMM=y
>>>>  CONFIG_SPAPR_RNG=y
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> new file mode 100644
>>>> index 000000000000..5fec4b08705d
>>>> --- /dev/null
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -0,0 +1,130 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>
>>> A cheat sheet in the top of this header with the old and new XIVE
>>> terms would quite nice to have.
>>
>> Yes. It's a good place. I will put the XIVE acronyms here :
>>      
>>      EA		Event Assignment
>>      EISN	Effective Interrupt Source Number
>>      END	Event Notification Descriptor
>>      ESB	Event State Buffer
>>      EQ		Event Queue
>>      LISN	Logical Interrupt Source Number
>>      NVT	Notification Virtual Target
>>      TIMA	Thread Interrupt Management Area
>>      ...
> 
> That sounds good, but what I'd also like is showing that NVT == VP and
> EAS == IVT and so forth.

sure. I will add that. 

skiboot and PowerNV Linux are using a mixed version and I wonder if I 
should not clarify that also, skiboot at least. 

>>>> + */
>>>> +
>>>> +#ifndef PPC_XIVE_H
>>>> +#define PPC_XIVE_H
>>>> +
>>>> +#include "hw/sysbus.h"
>>>
>>> So, I'm a bit dubious about making the XiveSource a SysBus device -
>>> I'm concerned it won't play well with tying it into the other devices
>>> like PHB that "own" it in real hardware.
>>
>> It does but I can take a look at changing it to a DeviceState. The 
>> reset handlers might be a concern.
> 
> As "non bus" device I think you'd need to register your own reset
> handler rather than just setting dc->reset.  Otherwise, I think that
> should work.

yes. I will give a look/try, it might not be such a problem. 

>>> I think we'd be better off making it a direct descendent of
>>> TYPE_DEVICE which constructs the MMIO region, but doesn't map it.
>>
>> At a moment, I started working on a XiveESB object doing what I think 
>> you are suggesting and I removed it. I am reluctant adding more 
>> complexity now, the patchset is just growing and growing ... 
>>
>> But I agree there are fundamentals to get right for KVM. Let's talk 
>> about it after you have looked at the overall patchset, at least up 
>> to KVM initial support.
> 
> Hm, ok.
> 
>>> Then we can havea SysBusDevice (and/or other) wrapper which
>>> instantiates the XiveSource core and maps it into somewhere
>>> accessible.
>>
>> The XIVE controller model does the mapping of the source currently.
> 
> I'm.. I'm not sure what you mean by that.   We have a
> sysbus_init_mmio() right here which effectively maps in the MMIO
> region AFAICT.

yes. what I meant is that the XIVE controller model does all the 
mapping, for the TIMA and for the ESB pages of the XiveSource. 

This is a 'critical' part of the XIVE model because the region have 
a different nature under KVM, which requires the KVM device to be 
created before the region are, and to be destroyed when the device
is. 

It took me a while to get all in place to support all aspects of 
the model: KVM and not KVM, switch of interrupt controller, machine 
reset, post_load, PowerNV devices, etc. So expect some resistant 
from me on that topic. 

Thanks,

C.

>> In the case of sPAPR, the controller model controls the TIMA and 
>> for PowerNV, there are quite few others MMIO regions to handle.
>>
>>>
>>>> +
>>>> +/*
>>>> + * XIVE Interrupt Source
>>>> + */
>>>> +
>>>> +#define TYPE_XIVE_SOURCE "xive-source"
>>>> +#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
>>>> +
>>>> +/*
>>>> + * XIVE Interrupt Source characteristics, which define how the ESB are
>>>> + * controlled.
>>>> + */
>>>> +#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
>>>> +#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
>>>> +
>>>> +typedef struct XiveSource {
>>>> +    SysBusDevice parent;
>>>> +
>>>> +    /* IRQs */
>>>> +    uint32_t        nr_irqs;
>>>> +    qemu_irq        *qirqs;
>>>> +
>>>> +    /* PQ bits */
>>>> +    uint8_t         *status;
>>>> +
>>>> +    /* ESB memory region */
>>>> +    uint64_t        esb_flags;
>>>> +    uint32_t        esb_shift;
>>>> +    MemoryRegion    esb_mmio;
>>>> +} XiveSource;
>>>> +
>>>> +/*
>>>> + * ESB MMIO setting. Can be one page, for both source triggering and
>>>> + * source management, or two different pages. See below for magic
>>>> + * values.
>>>> + */
>>>> +#define XIVE_ESB_4K          12 /* PSI HB only */
>>>> +#define XIVE_ESB_4K_2PAGE    13
>>>> +#define XIVE_ESB_64K         16
>>>> +#define XIVE_ESB_64K_2PAGE   17
>>>> +
>>>> +static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
>>>> +{
>>>> +    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
>>>> +        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
>>>> +}
>>>> +
>>>> +/* The trigger page is always the first/even page */
>>>> +static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
>>>
>>> This function doesn't appear to be used anywhere except..
>>
>> It's used in patch 16 adding the hcalls also.
>>
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +    return (1ull << xsrc->esb_shift) * srcno;
>>>> +}
>>>> +
>>>> +/* In a two pages ESB MMIO setting, the odd page is for management */
>>>> +static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
>>>
>>>
>>> ..here, and this function doesn't appear to be used anywhere.
>>
>> It's used in patch 16 adding the hcalls and patch 23 for KVM.
>>
>> This is basic ESB support which I thought belong to the patch on sources.
>>  
>>>
>>>> +{
>>>> +    hwaddr addr = xive_source_esb_page(xsrc, srcno);
>>>> +
>>>> +    if (xive_source_esb_has_2page(xsrc)) {
>>>> +        addr += (1 << (xsrc->esb_shift - 1));
>>>> +    }
>>>> +
>>>> +    return addr;
>>>> +}
>>>> +
>>>> +/*
>>>> + * Each interrupt source has a 2-bit state machine which can be
>>>> + * controlled by MMIO. P indicates that an interrupt is pending (has
>>>> + * been sent to a queue and is waiting for an EOI). Q indicates that
>>>> + * the interrupt has been triggered while pending.
>>>> + *
>>>> + * This acts as a coalescing mechanism in order to guarantee that a
>>>> + * given interrupt only occurs at most once in a queue.
>>>> + *
>>>> + * When doing an EOI, the Q bit will indicate if the interrupt
>>>> + * needs to be re-triggered.
>>>> + */
>>>> +#define XIVE_ESB_VAL_P        0x2
>>>> +#define XIVE_ESB_VAL_Q        0x1
>>>> +
>>>> +#define XIVE_ESB_RESET        0x0
>>>> +#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
>>>> +#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
>>>> +#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
>>>> +
>>>> +/*
>>>> + * "magic" Event State Buffer (ESB) MMIO offsets.
>>>> + *
>>>> + * The following offsets into the ESB MMIO allow to read or manipulate
>>>> + * the PQ bits. They must be used with an 8-byte load instruction.
>>>> + * They all return the previous state of the interrupt (atomically).
>>>> + *
>>>> + * Additionally, some ESB pages support doing an EOI via a store and
>>>> + * some ESBs support doing a trigger via a separate trigger page.
>>>> + */
>>>> +#define XIVE_ESB_STORE_EOI      0x400 /* Store */
>>>> +#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
>>>> +#define XIVE_ESB_GET            0x800 /* Load */
>>>> +#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
>>>> +#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
>>>> +#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
>>>> +#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
>>>> +
>>>> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
>>>> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
>>>> +
>>>> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
>>>> +                                Monitor *mon);
>>>> +
>>>> +static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +    return xsrc->qirqs[srcno];
>>>> +}
>>>> +
>>>> +#endif /* PPC_XIVE_H */
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> new file mode 100644
>>>> index 000000000000..f7621f84828c
>>>> --- /dev/null
>>>> +++ b/hw/intc/xive.c
>>>> @@ -0,0 +1,379 @@
>>>> +/*
>>>> + * QEMU PowerPC XIVE interrupt controller model
>>>> + *
>>>> + * Copyright (c) 2017-2018, IBM Corporation.
>>>> + *
>>>> + * This code is licensed under the GPL version 2 or later. See the
>>>> + * COPYING file in the top-level directory.
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +#include "qemu/log.h"
>>>> +#include "qapi/error.h"
>>>> +#include "target/ppc/cpu.h"
>>>> +#include "sysemu/cpus.h"
>>>> +#include "sysemu/dma.h"
>>>> +#include "monitor/monitor.h"
>>>> +#include "hw/ppc/xive.h"
>>>> +
>>>> +/*
>>>> + * XIVE ESB helpers
>>>> + */
>>>> +
>>>> +static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
>>>> +{
>>>> +    uint8_t old_pq = *pq & 0x3;
>>>> +
>>>> +    *pq &= ~0x3;
>>>> +    *pq |= value & 0x3;
>>>> +
>>>> +    return old_pq;
>>>> +}
>>>> +
>>>> +static bool xive_esb_trigger(uint8_t *pq)
>>>> +{
>>>> +    uint8_t old_pq = *pq & 0x3;
>>>> +
>>>> +    switch (old_pq) {
>>>> +    case XIVE_ESB_RESET:
>>>> +        xive_esb_set(pq, XIVE_ESB_PENDING);
>>>> +        return true;
>>>> +    case XIVE_ESB_PENDING:
>>>> +    case XIVE_ESB_QUEUED:
>>>> +        xive_esb_set(pq, XIVE_ESB_QUEUED);
>>>> +        return false;
>>>> +    case XIVE_ESB_OFF:
>>>> +        xive_esb_set(pq, XIVE_ESB_OFF);
>>>> +        return false;
>>>> +    default:
>>>> +         g_assert_not_reached();
>>>> +    }
>>>> +}
>>>> +
>>>> +static bool xive_esb_eoi(uint8_t *pq)
>>>> +{
>>>> +    uint8_t old_pq = *pq & 0x3;
>>>> +
>>>> +    switch (old_pq) {
>>>> +    case XIVE_ESB_RESET:
>>>> +    case XIVE_ESB_PENDING:
>>>> +        xive_esb_set(pq, XIVE_ESB_RESET);
>>>> +        return false;
>>>> +    case XIVE_ESB_QUEUED:
>>>> +        xive_esb_set(pq, XIVE_ESB_PENDING);
>>>> +        return true;
>>>> +    case XIVE_ESB_OFF:
>>>> +        xive_esb_set(pq, XIVE_ESB_OFF);
>>>> +        return false;
>>>> +    default:
>>>> +         g_assert_not_reached();
>>>> +    }
>>>> +}
>>>> +
>>>> +/*
>>>> + * XIVE Interrupt Source (or IVSE)
>>>> + */
>>>> +
>>>> +uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +
>>>> +    return xsrc->status[srcno] & 0x3;
>>>> +}
>>>> +
>>>> +uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +
>>>> +    return xive_esb_set(&xsrc->status[srcno], pq);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Returns whether the event notification should be forwarded.
>>>> + */
>>>> +static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +
>>>> +    return xive_esb_trigger(&xsrc->status[srcno]);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Returns whether the event notification should be forwarded.
>>>> + */
>>>> +static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
>>>> +{
>>>> +    assert(srcno < xsrc->nr_irqs);
>>>> +
>>>> +    return xive_esb_eoi(&xsrc->status[srcno]);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Forward the source event notification to the Router
>>>> + */
>>>> +static void xive_source_notify(XiveSource *xsrc, int srcno)
>>>> +{
>>>> +
>>>> +}
>>>> +
>>>> +/*
>>>> + * In a two pages ESB MMIO setting, even page is the trigger page, odd
>>>> + * page is for management
>>>> + */
>>>> +static inline bool addr_is_even(hwaddr addr, uint32_t shift)
>>>> +{
>>>> +    return !((addr >> shift) & 1);
>>>> +}
>>>> +
>>>> +static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
>>>> +{
>>>> +    return xive_source_esb_has_2page(xsrc) &&
>>>> +        addr_is_even(addr, xsrc->esb_shift - 1);
>>>> +}
>>>> +
>>>> +/*
>>>> + * ESB MMIO loads
>>>> + *                      Trigger page    Management/EOI page
>>>> + * 2 pages setting      even            odd
>>>> + *
>>>> + * 0x000 .. 0x3FF       -1              EOI and return 0|1
>>>> + * 0x400 .. 0x7FF       -1              EOI and return 0|1
>>>> + * 0x800 .. 0xBFF       -1              return PQ
>>>> + * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=0
>>>> + * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=0
>>>> + * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=1
>>>> + * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=1
>>>> + */
>>>
>>> I can't quite make sense of this table.  What do the -1s represent,
>>
>> the value returned by the load.
>>
>>> and how does it relate to the non-2page case?
>>
>> one page ESB support trigger and management on the same page. So for loads,
>> the odd page behavior applies.  
>>
>>>> +static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    uint32_t offset = addr & 0xFFF;
>>>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>>>> +    uint64_t ret = -1;
>>>> +
>>>> +    /* In a two pages ESB MMIO setting, trigger page should not be read */
>>>> +    if (xive_source_is_trigger_page(xsrc, addr)) {
>>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>>> +                      "XIVE: invalid load on IRQ %d trigger page at "
>>>> +                      "0x%"HWADDR_PRIx"\n", srcno, addr);
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    switch (offset) {
>>>> +    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
>>>> +        ret = xive_source_esb_eoi(xsrc, srcno);
>>>> +
>>>> +        /* Forward the source event notification for routing */
>>>> +        if (ret) {
>>>> +            xive_source_notify(xsrc, srcno);
>>>> +        }
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
>>>> +        ret = xive_source_esb_get(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
>>>> +        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
>>>> +        break;
>>>> +    default:
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
>>>> +                      offset);
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +/*
>>>> + * ESB MMIO stores
>>>> + *                      Trigger page    Management/EOI page
>>>> + * 2 pages setting      even            odd
>>>
>>> As with the previous table, I don't quite understand what the headings
>>> above mean.
>>
>> one page ESB support trigger and management on the same page. So for stores,
>> the odd page behavior applies.
>>
>> The headings can be improved. I will think of something.
>>
>>>> + * 0x000 .. 0x3FF       Trigger         Trigger
>>>> + * 0x400 .. 0x7FF       Trigger         EOI
>>>> + * 0x800 .. 0xBFF       Trigger         undefined
>>>> + * 0xC00 .. 0xCFF       Trigger         PQ=00
>>>> + * 0xD00 .. 0xDFF       Trigger         PQ=01
>>>> + * 0xE00 .. 0xDFF       Trigger         PQ=10
>>>> + * 0xF00 .. 0xDFF       Trigger         PQ=11
>>>> + */
>>>> +static void xive_source_esb_write(void *opaque, hwaddr addr,
>>>> +                                  uint64_t value, unsigned size)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    uint32_t offset = addr & 0xFFF;
>>>> +    uint32_t srcno = addr >> xsrc->esb_shift;
>>>> +    bool notify = false;
>>>> +
>>>> +    /* In a two pages ESB MMIO setting, trigger page only triggers */
>>>> +    if (xive_source_is_trigger_page(xsrc, addr)) {
>>>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    switch (offset) {
>>>> +    case 0 ... 0x3FF:
>>>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
>>>> +        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
>>>> +            qemu_log_mask(LOG_GUEST_ERROR,
>>>> +                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
>>>> +            return;
>>>> +        }
>>>> +
>>>> +        notify = xive_source_esb_eoi(xsrc, srcno);
>>>> +        break;
>>>> +
>>>> +    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
>>>> +    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
>>>> +        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
>>>> +        break;
>>>> +
>>>> +    default:
>>>> +        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
>>>> +                      offset);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +out:
>>>> +    /* Forward the source event notification for routing */
>>>> +    if (notify) {
>>>> +        xive_source_notify(xsrc, srcno);
>>>> +    }
>>>> +}
>>>> +
>>>> +static const MemoryRegionOps xive_source_esb_ops = {
>>>> +    .read = xive_source_esb_read,
>>>> +    .write = xive_source_esb_write,
>>>> +    .endianness = DEVICE_BIG_ENDIAN,
>>>> +    .valid = {
>>>> +        .min_access_size = 8,
>>>> +        .max_access_size = 8,
>>>> +    },
>>>> +    .impl = {
>>>> +        .min_access_size = 8,
>>>> +        .max_access_size = 8,
>>>> +    },
>>>> +};
>>>> +
>>>> +static void xive_source_set_irq(void *opaque, int srcno, int val)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(opaque);
>>>> +    bool notify = false;
>>>> +
>>>> +    if (val) {
>>>> +        notify = xive_source_esb_trigger(xsrc, srcno);
>>>> +    }
>>>> +
>>>> +    /* Forward the source event notification for routing */
>>>> +    if (notify) {
>>>> +        xive_source_notify(xsrc, srcno);
>>>> +    }
>>>> +}
>>>> +
>>>> +void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>>>> +        uint8_t pq = xive_source_esb_get(xsrc, i);
>>>> +
>>>> +        if (pq == XIVE_ESB_OFF) {
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        monitor_printf(mon, "  %08x %c%c\n", i + offset,
>>>> +                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
>>>> +                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
>>>> +    }
>>>> +}
>>>> +
>>>> +static void xive_source_reset(DeviceState *dev)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>>>> +
>>>> +    /* PQs are initialized to 0b01 which corresponds to "ints off" */
>>>> +    memset(xsrc->status, 0x1, xsrc->nr_irqs);
>>>
>>> You've already got XIVE_ESB_OFF defined to make this a little clearer.
>>
>> Sure.
>>
>> Thanks,
>>
>> C. 
>>
>>
>>>
>>>> +}
>>>> +
>>>> +static void xive_source_realize(DeviceState *dev, Error **errp)
>>>> +{
>>>> +    XiveSource *xsrc = XIVE_SOURCE(dev);
>>>> +
>>>> +    if (!xsrc->nr_irqs) {
>>>> +        error_setg(errp, "Number of interrupt needs to be greater than 0");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (xsrc->esb_shift != XIVE_ESB_4K &&
>>>> +        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
>>>> +        xsrc->esb_shift != XIVE_ESB_64K &&
>>>> +        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
>>>> +        error_setg(errp, "Invalid ESB shift setting");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
>>>> +                                     xsrc->nr_irqs);
>>>> +
>>>> +    xsrc->status = g_malloc0(xsrc->nr_irqs);
>>>> +
>>>> +    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
>>>> +                          &xive_source_esb_ops, xsrc, "xive.esb",
>>>> +                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
>>>> +    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
>>>> +}
>>>> +
>>>> +static const VMStateDescription vmstate_xive_source = {
>>>> +    .name = TYPE_XIVE_SOURCE,
>>>> +    .version_id = 1,
>>>> +    .minimum_version_id = 1,
>>>> +    .fields = (VMStateField[]) {
>>>> +        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
>>>> +        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
>>>> +        VMSTATE_END_OF_LIST()
>>>> +    },
>>>> +};
>>>> +
>>>> +/*
>>>> + * The default XIVE interrupt source setting for the ESB MMIOs is two
>>>> + * 64k pages without Store EOI, to be in sync with KVM.
>>>> + */
>>>> +static Property xive_source_properties[] = {
>>>> +    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
>>>> +    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
>>>> +    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
>>>> +    DEFINE_PROP_END_OF_LIST(),
>>>> +};
>>>> +
>>>> +static void xive_source_class_init(ObjectClass *klass, void *data)
>>>> +{
>>>> +    DeviceClass *dc = DEVICE_CLASS(klass);
>>>> +
>>>> +    dc->desc    = "XIVE Interrupt Source";
>>>> +    dc->props   = xive_source_properties;
>>>> +    dc->realize = xive_source_realize;
>>>> +    dc->reset   = xive_source_reset;
>>>> +    dc->vmsd    = &vmstate_xive_source;
>>>> +}
>>>> +
>>>> +static const TypeInfo xive_source_info = {
>>>> +    .name          = TYPE_XIVE_SOURCE,
>>>> +    .parent        = TYPE_SYS_BUS_DEVICE,
>>>> +    .instance_size = sizeof(XiveSource),
>>>> +    .class_init    = xive_source_class_init,
>>>> +};
>>>> +
>>>> +static void xive_register_types(void)
>>>> +{
>>>> +    type_register_static(&xive_source_info);
>>>> +}
>>>> +
>>>> +type_init(xive_register_types)
>>>> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
>>>> index 0e9963f5eecc..72a46ed91c31 100644
>>>> --- a/hw/intc/Makefile.objs
>>>> +++ b/hw/intc/Makefile.objs
>>>> @@ -37,6 +37,7 @@ obj-$(CONFIG_SH4) += sh_intc.o
>>>>  obj-$(CONFIG_XICS) += xics.o
>>>>  obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
>>>>  obj-$(CONFIG_XICS_KVM) += xics_kvm.o
>>>> +obj-$(CONFIG_XIVE) += xive.o
>>>>  obj-$(CONFIG_POWERNV) += xics_pnv.o
>>>>  obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
>>>>  obj-$(CONFIG_S390_FLIC) += s390_flic.o
>>>
>>
>
Cédric Le Goater Nov. 26, 2018, 8:14 a.m. UTC | #5
>>>> + */
>>>> +
>>>> +#ifndef PPC_XIVE_H
>>>> +#define PPC_XIVE_H
>>>> +
>>>> +#include "hw/sysbus.h"
>>>
>>> So, I'm a bit dubious about making the XiveSource a SysBus device -
>>> I'm concerned it won't play well with tying it into the other devices
>>> like PHB that "own" it in real hardware.
>>
>> It does but I can take a look at changing it to a DeviceState. The 
>> reset handlers might be a concern.
> 
> As "non bus" device I think you'd need to register your own reset
> handler rather than just setting dc->reset.  Otherwise, I think that
> should work.

I removed from XIVE the SysBus dependencies and indeed it's better 
not to rely on the default reset and mapping behavior of sysbus. 

I am addressing your comments in a WIP v6 branch on github.   

C.
diff mbox series

Patch

diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index aec2855750d6..2d1e7c5c4668 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -16,6 +16,7 @@  CONFIG_VIRTIO_VGA=y
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_SPAPR=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(call land,$(CONFIG_PSERIES),$(CONFIG_KVM))
+CONFIG_XIVE=$(CONFIG_PSERIES)
 CONFIG_MEM_DEVICE=y
 CONFIG_DIMM=y
 CONFIG_SPAPR_RNG=y
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
new file mode 100644
index 000000000000..5fec4b08705d
--- /dev/null
+++ b/include/hw/ppc/xive.h
@@ -0,0 +1,130 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#ifndef PPC_XIVE_H
+#define PPC_XIVE_H
+
+#include "hw/sysbus.h"
+
+/*
+ * XIVE Interrupt Source
+ */
+
+#define TYPE_XIVE_SOURCE "xive-source"
+#define XIVE_SOURCE(obj) OBJECT_CHECK(XiveSource, (obj), TYPE_XIVE_SOURCE)
+
+/*
+ * XIVE Interrupt Source characteristics, which define how the ESB are
+ * controlled.
+ */
+#define XIVE_SRC_H_INT_ESB     0x1 /* ESB managed with hcall H_INT_ESB */
+#define XIVE_SRC_STORE_EOI     0x2 /* Store EOI supported */
+
+typedef struct XiveSource {
+    SysBusDevice parent;
+
+    /* IRQs */
+    uint32_t        nr_irqs;
+    qemu_irq        *qirqs;
+
+    /* PQ bits */
+    uint8_t         *status;
+
+    /* ESB memory region */
+    uint64_t        esb_flags;
+    uint32_t        esb_shift;
+    MemoryRegion    esb_mmio;
+} XiveSource;
+
+/*
+ * ESB MMIO setting. Can be one page, for both source triggering and
+ * source management, or two different pages. See below for magic
+ * values.
+ */
+#define XIVE_ESB_4K          12 /* PSI HB only */
+#define XIVE_ESB_4K_2PAGE    13
+#define XIVE_ESB_64K         16
+#define XIVE_ESB_64K_2PAGE   17
+
+static inline bool xive_source_esb_has_2page(XiveSource *xsrc)
+{
+    return xsrc->esb_shift == XIVE_ESB_64K_2PAGE ||
+        xsrc->esb_shift == XIVE_ESB_4K_2PAGE;
+}
+
+/* The trigger page is always the first/even page */
+static inline hwaddr xive_source_esb_page(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return (1ull << xsrc->esb_shift) * srcno;
+}
+
+/* In a two pages ESB MMIO setting, the odd page is for management */
+static inline hwaddr xive_source_esb_mgmt(XiveSource *xsrc, int srcno)
+{
+    hwaddr addr = xive_source_esb_page(xsrc, srcno);
+
+    if (xive_source_esb_has_2page(xsrc)) {
+        addr += (1 << (xsrc->esb_shift - 1));
+    }
+
+    return addr;
+}
+
+/*
+ * Each interrupt source has a 2-bit state machine which can be
+ * controlled by MMIO. P indicates that an interrupt is pending (has
+ * been sent to a queue and is waiting for an EOI). Q indicates that
+ * the interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee that a
+ * given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ */
+#define XIVE_ESB_VAL_P        0x2
+#define XIVE_ESB_VAL_Q        0x1
+
+#define XIVE_ESB_RESET        0x0
+#define XIVE_ESB_PENDING      XIVE_ESB_VAL_P
+#define XIVE_ESB_QUEUED       (XIVE_ESB_VAL_P | XIVE_ESB_VAL_Q)
+#define XIVE_ESB_OFF          XIVE_ESB_VAL_Q
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * The following offsets into the ESB MMIO allow to read or manipulate
+ * the PQ bits. They must be used with an 8-byte load instruction.
+ * They all return the previous state of the interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a store and
+ * some ESBs support doing a trigger via a separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI      0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI       0x000 /* Load */
+#define XIVE_ESB_GET            0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00      0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01      0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10      0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11      0xf00 /* Load */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno);
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq);
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset,
+                                Monitor *mon);
+
+static inline qemu_irq xive_source_qirq(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+    return xsrc->qirqs[srcno];
+}
+
+#endif /* PPC_XIVE_H */
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
new file mode 100644
index 000000000000..f7621f84828c
--- /dev/null
+++ b/hw/intc/xive.c
@@ -0,0 +1,379 @@ 
+/*
+ * QEMU PowerPC XIVE interrupt controller model
+ *
+ * Copyright (c) 2017-2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "sysemu/cpus.h"
+#include "sysemu/dma.h"
+#include "monitor/monitor.h"
+#include "hw/ppc/xive.h"
+
+/*
+ * XIVE ESB helpers
+ */
+
+static uint8_t xive_esb_set(uint8_t *pq, uint8_t value)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    *pq &= ~0x3;
+    *pq |= value & 0x3;
+
+    return old_pq;
+}
+
+static bool xive_esb_trigger(uint8_t *pq)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+        xive_esb_set(pq, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_PENDING:
+    case XIVE_ESB_QUEUED:
+        xive_esb_set(pq, XIVE_ESB_QUEUED);
+        return false;
+    case XIVE_ESB_OFF:
+        xive_esb_set(pq, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+static bool xive_esb_eoi(uint8_t *pq)
+{
+    uint8_t old_pq = *pq & 0x3;
+
+    switch (old_pq) {
+    case XIVE_ESB_RESET:
+    case XIVE_ESB_PENDING:
+        xive_esb_set(pq, XIVE_ESB_RESET);
+        return false;
+    case XIVE_ESB_QUEUED:
+        xive_esb_set(pq, XIVE_ESB_PENDING);
+        return true;
+    case XIVE_ESB_OFF:
+        xive_esb_set(pq, XIVE_ESB_OFF);
+        return false;
+    default:
+         g_assert_not_reached();
+    }
+}
+
+/*
+ * XIVE Interrupt Source (or IVSE)
+ */
+
+uint8_t xive_source_esb_get(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xsrc->status[srcno] & 0x3;
+}
+
+uint8_t xive_source_esb_set(XiveSource *xsrc, uint32_t srcno, uint8_t pq)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xive_esb_set(&xsrc->status[srcno], pq);
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_trigger(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xive_esb_trigger(&xsrc->status[srcno]);
+}
+
+/*
+ * Returns whether the event notification should be forwarded.
+ */
+static bool xive_source_esb_eoi(XiveSource *xsrc, uint32_t srcno)
+{
+    assert(srcno < xsrc->nr_irqs);
+
+    return xive_esb_eoi(&xsrc->status[srcno]);
+}
+
+/*
+ * Forward the source event notification to the Router
+ */
+static void xive_source_notify(XiveSource *xsrc, int srcno)
+{
+
+}
+
+/*
+ * In a two pages ESB MMIO setting, even page is the trigger page, odd
+ * page is for management
+ */
+static inline bool addr_is_even(hwaddr addr, uint32_t shift)
+{
+    return !((addr >> shift) & 1);
+}
+
+static inline bool xive_source_is_trigger_page(XiveSource *xsrc, hwaddr addr)
+{
+    return xive_source_esb_has_2page(xsrc) &&
+        addr_is_even(addr, xsrc->esb_shift - 1);
+}
+
+/*
+ * ESB MMIO loads
+ *                      Trigger page    Management/EOI page
+ * 2 pages setting      even            odd
+ *
+ * 0x000 .. 0x3FF       -1              EOI and return 0|1
+ * 0x400 .. 0x7FF       -1              EOI and return 0|1
+ * 0x800 .. 0xBFF       -1              return PQ
+ * 0xC00 .. 0xCFF       -1              return PQ and atomically PQ=0
+ * 0xD00 .. 0xDFF       -1              return PQ and atomically PQ=0
+ * 0xE00 .. 0xDFF       -1              return PQ and atomically PQ=1
+ * 0xF00 .. 0xDFF       -1              return PQ and atomically PQ=1
+ */
+static uint64_t xive_source_esb_read(void *opaque, hwaddr addr, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xFFF;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    uint64_t ret = -1;
+
+    /* In a two pages ESB MMIO setting, trigger page should not be read */
+    if (xive_source_is_trigger_page(xsrc, addr)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "XIVE: invalid load on IRQ %d trigger page at "
+                      "0x%"HWADDR_PRIx"\n", srcno, addr);
+        return -1;
+    }
+
+    switch (offset) {
+    case XIVE_ESB_LOAD_EOI ... XIVE_ESB_LOAD_EOI + 0x7FF:
+        ret = xive_source_esb_eoi(xsrc, srcno);
+
+        /* Forward the source event notification for routing */
+        if (ret) {
+            xive_source_notify(xsrc, srcno);
+        }
+        break;
+
+    case XIVE_ESB_GET ... XIVE_ESB_GET + 0x3FF:
+        ret = xive_source_esb_get(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+        ret = xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB load addr %x\n",
+                      offset);
+    }
+
+    return ret;
+}
+
+/*
+ * ESB MMIO stores
+ *                      Trigger page    Management/EOI page
+ * 2 pages setting      even            odd
+ *
+ * 0x000 .. 0x3FF       Trigger         Trigger
+ * 0x400 .. 0x7FF       Trigger         EOI
+ * 0x800 .. 0xBFF       Trigger         undefined
+ * 0xC00 .. 0xCFF       Trigger         PQ=00
+ * 0xD00 .. 0xDFF       Trigger         PQ=01
+ * 0xE00 .. 0xDFF       Trigger         PQ=10
+ * 0xF00 .. 0xDFF       Trigger         PQ=11
+ */
+static void xive_source_esb_write(void *opaque, hwaddr addr,
+                                  uint64_t value, unsigned size)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    uint32_t offset = addr & 0xFFF;
+    uint32_t srcno = addr >> xsrc->esb_shift;
+    bool notify = false;
+
+    /* In a two pages ESB MMIO setting, trigger page only triggers */
+    if (xive_source_is_trigger_page(xsrc, addr)) {
+        notify = xive_source_esb_trigger(xsrc, srcno);
+        goto out;
+    }
+
+    switch (offset) {
+    case 0 ... 0x3FF:
+        notify = xive_source_esb_trigger(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_STORE_EOI ... XIVE_ESB_STORE_EOI + 0x3FF:
+        if (!(xsrc->esb_flags & XIVE_SRC_STORE_EOI)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "XIVE: invalid Store EOI for IRQ %d\n", srcno);
+            return;
+        }
+
+        notify = xive_source_esb_eoi(xsrc, srcno);
+        break;
+
+    case XIVE_ESB_SET_PQ_00 ... XIVE_ESB_SET_PQ_00 + 0x0FF:
+    case XIVE_ESB_SET_PQ_01 ... XIVE_ESB_SET_PQ_01 + 0x0FF:
+    case XIVE_ESB_SET_PQ_10 ... XIVE_ESB_SET_PQ_10 + 0x0FF:
+    case XIVE_ESB_SET_PQ_11 ... XIVE_ESB_SET_PQ_11 + 0x0FF:
+        xive_source_esb_set(xsrc, srcno, (offset >> 8) & 0x3);
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: invalid ESB write addr %x\n",
+                      offset);
+        return;
+    }
+
+out:
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+static const MemoryRegionOps xive_source_esb_ops = {
+    .read = xive_source_esb_read,
+    .write = xive_source_esb_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static void xive_source_set_irq(void *opaque, int srcno, int val)
+{
+    XiveSource *xsrc = XIVE_SOURCE(opaque);
+    bool notify = false;
+
+    if (val) {
+        notify = xive_source_esb_trigger(xsrc, srcno);
+    }
+
+    /* Forward the source event notification for routing */
+    if (notify) {
+        xive_source_notify(xsrc, srcno);
+    }
+}
+
+void xive_source_pic_print_info(XiveSource *xsrc, uint32_t offset, Monitor *mon)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        uint8_t pq = xive_source_esb_get(xsrc, i);
+
+        if (pq == XIVE_ESB_OFF) {
+            continue;
+        }
+
+        monitor_printf(mon, "  %08x %c%c\n", i + offset,
+                       pq & XIVE_ESB_VAL_P ? 'P' : '-',
+                       pq & XIVE_ESB_VAL_Q ? 'Q' : '-');
+    }
+}
+
+static void xive_source_reset(DeviceState *dev)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+
+    /* PQs are initialized to 0b01 which corresponds to "ints off" */
+    memset(xsrc->status, 0x1, xsrc->nr_irqs);
+}
+
+static void xive_source_realize(DeviceState *dev, Error **errp)
+{
+    XiveSource *xsrc = XIVE_SOURCE(dev);
+
+    if (!xsrc->nr_irqs) {
+        error_setg(errp, "Number of interrupt needs to be greater than 0");
+        return;
+    }
+
+    if (xsrc->esb_shift != XIVE_ESB_4K &&
+        xsrc->esb_shift != XIVE_ESB_4K_2PAGE &&
+        xsrc->esb_shift != XIVE_ESB_64K &&
+        xsrc->esb_shift != XIVE_ESB_64K_2PAGE) {
+        error_setg(errp, "Invalid ESB shift setting");
+        return;
+    }
+
+    xsrc->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc,
+                                     xsrc->nr_irqs);
+
+    xsrc->status = g_malloc0(xsrc->nr_irqs);
+
+    memory_region_init_io(&xsrc->esb_mmio, OBJECT(xsrc),
+                          &xive_source_esb_ops, xsrc, "xive.esb",
+                          (1ull << xsrc->esb_shift) * xsrc->nr_irqs);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &xsrc->esb_mmio);
+}
+
+static const VMStateDescription vmstate_xive_source = {
+    .name = TYPE_XIVE_SOURCE,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(nr_irqs, XiveSource, NULL),
+        VMSTATE_VBUFFER_UINT32(status, XiveSource, 1, NULL, nr_irqs),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+/*
+ * The default XIVE interrupt source setting for the ESB MMIOs is two
+ * 64k pages without Store EOI, to be in sync with KVM.
+ */
+static Property xive_source_properties[] = {
+    DEFINE_PROP_UINT64("flags", XiveSource, esb_flags, 0),
+    DEFINE_PROP_UINT32("nr-irqs", XiveSource, nr_irqs, 0),
+    DEFINE_PROP_UINT32("shift", XiveSource, esb_shift, XIVE_ESB_64K_2PAGE),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xive_source_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc    = "XIVE Interrupt Source";
+    dc->props   = xive_source_properties;
+    dc->realize = xive_source_realize;
+    dc->reset   = xive_source_reset;
+    dc->vmsd    = &vmstate_xive_source;
+}
+
+static const TypeInfo xive_source_info = {
+    .name          = TYPE_XIVE_SOURCE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(XiveSource),
+    .class_init    = xive_source_class_init,
+};
+
+static void xive_register_types(void)
+{
+    type_register_static(&xive_source_info);
+}
+
+type_init(xive_register_types)
diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 0e9963f5eecc..72a46ed91c31 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -37,6 +37,7 @@  obj-$(CONFIG_SH4) += sh_intc.o
 obj-$(CONFIG_XICS) += xics.o
 obj-$(CONFIG_XICS_SPAPR) += xics_spapr.o
 obj-$(CONFIG_XICS_KVM) += xics_kvm.o
+obj-$(CONFIG_XIVE) += xive.o
 obj-$(CONFIG_POWERNV) += xics_pnv.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o