diff mbox series

[v5,11/36] spapr/xive: use the VCPU id as a NVT identifier

Message ID 20181116105729.23240-12-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series ppc: support for the XIVE interrupt controller (POWER9) | expand

Commit Message

Cédric Le Goater Nov. 16, 2018, 10:57 a.m. UTC
The IVPE scans the O/S CAM line of the XIVE thread interrupt contexts
to find a matching Notification Virtual Target (NVT) among the NVTs
dispatched on the HW processor threads.

On a real system, the thread interrupt contexts are updated by the
hypervisor when a Virtual Processor is scheduled to run on a HW
thread. Under QEMU, the model emulates the same behavior by hardwiring
the NVT identifier in the thread context registers at reset.

The NVT identifier used by the sPAPRXive model is the VCPU id. The END
identifier is also derived from the VCPU id. A set of helpers doing
the conversion between identifiers are provided for the hcalls
configuring the sources and the ENDs.

The model does not need a NVT table but The XiveRouter NVT operations
are provided to perform some extra checks in the routing algorithm.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/hw/ppc/spapr_xive.h |  17 +++++
 include/hw/ppc/xive.h       |   3 +
 hw/intc/spapr_xive.c        | 136 ++++++++++++++++++++++++++++++++++++
 hw/intc/xive.c              |   9 +++
 4 files changed, 165 insertions(+)

Comments

David Gibson Nov. 28, 2018, 2:39 a.m. UTC | #1
On Fri, Nov 16, 2018 at 11:57:04AM +0100, Cédric Le Goater wrote:
> The IVPE scans the O/S CAM line of the XIVE thread interrupt contexts
> to find a matching Notification Virtual Target (NVT) among the NVTs
> dispatched on the HW processor threads.
> 
> On a real system, the thread interrupt contexts are updated by the
> hypervisor when a Virtual Processor is scheduled to run on a HW
> thread. Under QEMU, the model emulates the same behavior by hardwiring
> the NVT identifier in the thread context registers at reset.
> 
> The NVT identifier used by the sPAPRXive model is the VCPU id. The END
> identifier is also derived from the VCPU id. A set of helpers doing
> the conversion between identifiers are provided for the hcalls
> configuring the sources and the ENDs.
> 
> The model does not need a NVT table but The XiveRouter NVT operations
> are provided to perform some extra checks in the routing algorithm.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  include/hw/ppc/spapr_xive.h |  17 +++++
>  include/hw/ppc/xive.h       |   3 +
>  hw/intc/spapr_xive.c        | 136 ++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c              |   9 +++
>  4 files changed, 165 insertions(+)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 06727bd86aa9..3f65b8f485fd 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -43,4 +43,21 @@ bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
>  
> +/*
> + * sPAPR NVT and END indexing helpers
> + */
> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
> +                                  uint32_t nvt_idx);
> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
> +                            uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> +
> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio);
> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
> +                             uint8_t *out_end_blk, uint32_t *out_end_idx);
> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
> +                          uint8_t *out_end_blk, uint32_t *out_end_idx);
> +
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index e715a6c6923d..e6931ddaa83f 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -187,6 +187,8 @@ typedef struct XiveRouter {
>  #define XIVE_ROUTER_GET_CLASS(obj)                              \
>      OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
>  
> +typedef struct XiveTCTX XiveTCTX;
> +
>  typedef struct XiveRouterClass {
>      SysBusDeviceClass parent;
>  
> @@ -201,6 +203,7 @@ typedef struct XiveRouterClass {
>                     XiveNVT *nvt);
>      int (*set_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>                     XiveNVT *nvt);
> +    void (*reset_tctx)(XiveRouter *xrtr, XiveTCTX *tctx);
>  } XiveRouterClass;
>  
>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index 5d038146c08e..3bf77ace11a2 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -199,6 +199,139 @@ static int spapr_xive_set_end(XiveRouter *xrtr,
>      return 0;
>  }
>  
> +static int spapr_xive_get_nvt(XiveRouter *xrtr,
> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
> +{
> +    sPAPRXive *xive = SPAPR_XIVE(xrtr);
> +    uint32_t vcpu_id = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);
> +    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
> +
> +    if (!cpu) {
> +        return -1;
> +    }
> +
> +    /*
> +     * sPAPR does not maintain a NVT table. Return that the NVT is
> +     * valid if we have found a matching CPU
> +     */
> +    nvt->w0 = NVT_W0_VALID;
> +    return 0;
> +}
> +
> +static int spapr_xive_set_nvt(XiveRouter *xrtr,
> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
> +{
> +    /* no NVT table */
> +    return 0;
> +}
> +
> +/*
> + * When a Virtual Processor is scheduled to run on a HW thread, the
> + * hypervisor pushes its identifier in the OS CAM line. Under QEMU, we
> + * need to emulate the same behavior.
> + */
> +static void spapr_xive_reset_tctx(XiveRouter *xrtr, XiveTCTX *tctx)
> +{
> +    uint8_t  nvt_blk;
> +    uint32_t nvt_idx;
> +    uint32_t nvt_cam;
> +
> +    spapr_xive_cpu_to_nvt(SPAPR_XIVE(xrtr), POWERPC_CPU(tctx->cs),
> +                          &nvt_blk, &nvt_idx);
> +
> +    nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_tctx_cam_line(nvt_blk, nvt_idx));
> +    memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
> +}
> +
> +/*
> + * The allocation of VP blocks is a complex operation in OPAL and the
> + * VP identifiers have a relation with the number of HW chips, the
> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
> + * controller model does not have the same constraints and can use a
> + * simple mapping scheme of the CPU vcpu_id
> + *
> + * These identifiers are never returned to the OS.
> + */
> +
> +#define SPAPR_XIVE_VP_BASE 0x400

0x400 == 1024.  Could we ever have the possibility of needing to
consider both physical NVTs and PAPR NVTs at the same time?  If so,
does this base leave enough space for the physical ones?

> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
> +                                  uint32_t nvt_idx)
> +{
> +    return nvt_idx - SPAPR_XIVE_VP_BASE;
> +}
> +
> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)

A number of these conversions will come out a bit simpler if we pass
the block and index around as a single word in most places.

> +{
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +
> +    if (!cpu) {
> +        return -1;
> +    }
> +
> +    if (out_nvt_blk) {
> +        /* For testing purpose, we could use 0 for nvt_blk */
> +        *out_nvt_blk = xrtr->chip_id;

I don't see any point using the chip_id here, which is currently
always set to 0 for PAPR anyway.  If we just hardwire this to 0 it
removes the only use here of xrtr, which will allow some further
simplifications in the caller, I think.

> +    }
> +
> +    if (out_nvt_blk) {
> +        *out_nvt_idx = SPAPR_XIVE_VP_BASE + cpu->vcpu_id;
> +    }
> +    return 0;
> +}
> +
> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
> +                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)

I suspect some, maybe most of these conversion functions could be static.

> +{
> +    return spapr_xive_cpu_to_nvt(xive, spapr_find_cpu(target), out_nvt_blk,
> +                                 out_nvt_idx);
> +}
> +
> +/*
> + * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
> + * priorities per CPU
> + */
> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
> +                             uint32_t *out_server, uint8_t *out_prio)
> +{
> +    if (out_server) {
> +        *out_server = end_idx >> 3;
> +    }
> +
> +    if (out_prio) {
> +        *out_prio = end_idx & 0x7;
> +    }
> +    return 0;
> +}
> +
> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
> +                          uint8_t *out_end_blk, uint32_t *out_end_idx)
> +{
> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> +
> +    if (!cpu) {
> +        return -1;

Is there ever a reason this would be called with cpu == NULL?  If not
might as well just assert() here rather than pushing the error
handling back to the caller.

> +    }
> +
> +    if (out_end_blk) {
> +        /* For testing purpose, we could use 0 for nvt_blk */
> +        *out_end_blk = xrtr->chip_id;

Again, I don't see any point to using the chip_id, which is pretty
meaningless for PAPR.

> +    }
> +
> +    if (out_end_idx) {
> +        *out_end_idx = (cpu->vcpu_id << 3) + prio;
> +    }
> +    return 0;
> +}
> +
> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
> +                             uint8_t *out_end_blk, uint32_t *out_end_idx)
> +{
> +    return spapr_xive_cpu_to_end(xive, spapr_find_cpu(target), prio,
> +                                 out_end_blk, out_end_idx);
> +}
> +
>  static const VMStateDescription vmstate_spapr_xive_end = {
>      .name = TYPE_SPAPR_XIVE "/end",
>      .version_id = 1,
> @@ -263,6 +396,9 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
>      xrc->set_eas = spapr_xive_set_eas;
>      xrc->get_end = spapr_xive_get_end;
>      xrc->set_end = spapr_xive_set_end;
> +    xrc->get_nvt = spapr_xive_get_nvt;
> +    xrc->set_nvt = spapr_xive_set_nvt;
> +    xrc->reset_tctx = spapr_xive_reset_tctx;
>  }
>  
>  static const TypeInfo spapr_xive_info = {
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index c49932d2b799..fc6ef5895e6d 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -481,6 +481,7 @@ static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx, bool block_group)
>  static void xive_tctx_reset(void *dev)
>  {
>      XiveTCTX *tctx = XIVE_TCTX(dev);
> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
>  
>      memset(tctx->regs, 0, sizeof(tctx->regs));
>  
> @@ -495,6 +496,14 @@ static void xive_tctx_reset(void *dev)
>       */
>      tctx->regs[TM_QW1_OS + TM_PIPR] =
>          ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
> +
> +    /*
> +     * QEMU sPAPR XIVE only. To let the controller model reset the OS
> +     * CAM line with the VP identifier.
> +     */
> +    if (xrc->reset_tctx) {
> +        xrc->reset_tctx(tctx->xrtr, tctx);
> +    }

AFAICT this whole function is only used from PAPR, so you can just
move the whole thing to the papr code and avoid the hook function.

>  }
>  
>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
Cédric Le Goater Nov. 28, 2018, 4:48 p.m. UTC | #2
On 11/28/18 3:39 AM, David Gibson wrote:
> On Fri, Nov 16, 2018 at 11:57:04AM +0100, Cédric Le Goater wrote:
>> The IVPE scans the O/S CAM line of the XIVE thread interrupt contexts
>> to find a matching Notification Virtual Target (NVT) among the NVTs
>> dispatched on the HW processor threads.
>>
>> On a real system, the thread interrupt contexts are updated by the
>> hypervisor when a Virtual Processor is scheduled to run on a HW
>> thread. Under QEMU, the model emulates the same behavior by hardwiring
>> the NVT identifier in the thread context registers at reset.
>>
>> The NVT identifier used by the sPAPRXive model is the VCPU id. The END
>> identifier is also derived from the VCPU id. A set of helpers doing
>> the conversion between identifiers are provided for the hcalls
>> configuring the sources and the ENDs.
>>
>> The model does not need a NVT table but The XiveRouter NVT operations
>> are provided to perform some extra checks in the routing algorithm.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>  include/hw/ppc/spapr_xive.h |  17 +++++
>>  include/hw/ppc/xive.h       |   3 +
>>  hw/intc/spapr_xive.c        | 136 ++++++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c              |   9 +++
>>  4 files changed, 165 insertions(+)
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index 06727bd86aa9..3f65b8f485fd 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -43,4 +43,21 @@ bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
>>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
>>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
>>  
>> +/*
>> + * sPAPR NVT and END indexing helpers
>> + */
>> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
>> +                                  uint32_t nvt_idx);
>> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
>> +                            uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
>> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
>> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
>> +
>> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
>> +                             uint32_t *out_server, uint8_t *out_prio);
>> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
>> +                             uint8_t *out_end_blk, uint32_t *out_end_idx);
>> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
>> +                          uint8_t *out_end_blk, uint32_t *out_end_idx);
>> +
>>  #endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index e715a6c6923d..e6931ddaa83f 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -187,6 +187,8 @@ typedef struct XiveRouter {
>>  #define XIVE_ROUTER_GET_CLASS(obj)                              \
>>      OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
>>  
>> +typedef struct XiveTCTX XiveTCTX;
>> +
>>  typedef struct XiveRouterClass {
>>      SysBusDeviceClass parent;
>>  
>> @@ -201,6 +203,7 @@ typedef struct XiveRouterClass {
>>                     XiveNVT *nvt);
>>      int (*set_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
>>                     XiveNVT *nvt);
>> +    void (*reset_tctx)(XiveRouter *xrtr, XiveTCTX *tctx);
>>  } XiveRouterClass;
>>  
>>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index 5d038146c08e..3bf77ace11a2 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -199,6 +199,139 @@ static int spapr_xive_set_end(XiveRouter *xrtr,
>>      return 0;
>>  }
>>  
>> +static int spapr_xive_get_nvt(XiveRouter *xrtr,
>> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
>> +{
>> +    sPAPRXive *xive = SPAPR_XIVE(xrtr);
>> +    uint32_t vcpu_id = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);
>> +    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
>> +
>> +    if (!cpu) {
>> +        return -1;
>> +    }
>> +
>> +    /*
>> +     * sPAPR does not maintain a NVT table. Return that the NVT is
>> +     * valid if we have found a matching CPU
>> +     */
>> +    nvt->w0 = NVT_W0_VALID;
>> +    return 0;
>> +}
>> +
>> +static int spapr_xive_set_nvt(XiveRouter *xrtr,
>> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
>> +{
>> +    /* no NVT table */
>> +    return 0;
>> +}
>> +
>> +/*
>> + * When a Virtual Processor is scheduled to run on a HW thread, the
>> + * hypervisor pushes its identifier in the OS CAM line. Under QEMU, we
>> + * need to emulate the same behavior.
>> + */
>> +static void spapr_xive_reset_tctx(XiveRouter *xrtr, XiveTCTX *tctx)
>> +{
>> +    uint8_t  nvt_blk;
>> +    uint32_t nvt_idx;
>> +    uint32_t nvt_cam;
>> +
>> +    spapr_xive_cpu_to_nvt(SPAPR_XIVE(xrtr), POWERPC_CPU(tctx->cs),
>> +                          &nvt_blk, &nvt_idx);
>> +
>> +    nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_tctx_cam_line(nvt_blk, nvt_idx));
>> +    memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
>> +}
>> +
>> +/*
>> + * The allocation of VP blocks is a complex operation in OPAL and the
>> + * VP identifiers have a relation with the number of HW chips, the
>> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
>> + * controller model does not have the same constraints and can use a
>> + * simple mapping scheme of the CPU vcpu_id
>> + *
>> + * These identifiers are never returned to the OS.
>> + */
>> +
>> +#define SPAPR_XIVE_VP_BASE 0x400
> 
> 0x400 == 1024.  Could we ever have the possibility of needing to
> consider both physical NVTs and PAPR NVTs at the same time?  

They would not be in the same CAM line: OS ring vs. PHYS ring. 

> If so, does this base leave enough space for the physical ones?

I only used 0x400 to map the VP identifier to the ones allocated by KVM. 
0x0 would be fine but to exercise the model, it's better having a different 
base. 

>> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
>> +                                  uint32_t nvt_idx)
>> +{
>> +    return nvt_idx - SPAPR_XIVE_VP_BASE;
>> +}
>> +
>> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
>> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
> 
> A number of these conversions will come out a bit simpler if we pass
> the block and index around as a single word in most places.

Yes I have to check the whole patchset first. These prototype changes
are not too difficult in terms of code complexity but they do break
how patches apply and PowerNV is also using the idx and blk much more 
explicitly. the block has a meaning on bare metal. So I am a bit 
reluctant to do so. I will check.

>> +{
>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>> +
>> +    if (!cpu) {
>> +        return -1;
>> +    }
>> +
>> +    if (out_nvt_blk) {
>> +        /* For testing purpose, we could use 0 for nvt_blk */
>> +        *out_nvt_blk = xrtr->chip_id;
> 
> I don't see any point using the chip_id here, which is currently
> always set to 0 for PAPR anyway.  If we just hardwire this to 0 it
> removes the only use here of xrtr, which will allow some further
> simplifications in the caller, I think.

You are right about the simplification. It was one way to exercise 
the router model and remove any shortcuts in the indexing. I kept 
it to be sure I was not tempted to invent new ones. I think we can
remove it before merging. 

> 
>> +    }
>> +
>> +    if (out_nvt_blk) {
>> +        *out_nvt_idx = SPAPR_XIVE_VP_BASE + cpu->vcpu_id;
>> +    }
>> +    return 0;
>> +}
>> +
>> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
>> +                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
> 
> I suspect some, maybe most of these conversion functions could be static.

static inline ? 

> 
>> +{
>> +    return spapr_xive_cpu_to_nvt(xive, spapr_find_cpu(target), out_nvt_blk,
>> +                                 out_nvt_idx);
>> +}
>> +
>> +/*
>> + * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>> + * priorities per CPU
>> + */
>> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
>> +                             uint32_t *out_server, uint8_t *out_prio)
>> +{
>> +    if (out_server) {
>> +        *out_server = end_idx >> 3;
>> +    }
>> +
>> +    if (out_prio) {
>> +        *out_prio = end_idx & 0x7;
>> +    }
>> +    return 0;
>> +}
>> +
>> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
>> +                          uint8_t *out_end_blk, uint32_t *out_end_idx)
>> +{
>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>> +
>> +    if (!cpu) {
>> +        return -1;
> 
> Is there ever a reason this would be called with cpu == NULL?  If not
> might as well just assert() here rather than pushing the error
> handling back to the caller.

ok. yes.

> 
>> +    }
>> +
>> +    if (out_end_blk) {
>> +        /* For testing purpose, we could use 0 for nvt_blk */
>> +        *out_end_blk = xrtr->chip_id;
> 
> Again, I don't see any point to using the chip_id, which is pretty
> meaningless for PAPR.
> 
>> +    }
>> +
>> +    if (out_end_idx) {
>> +        *out_end_idx = (cpu->vcpu_id << 3) + prio;
>> +    }
>> +    return 0;
>> +}
>> +
>> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
>> +                             uint8_t *out_end_blk, uint32_t *out_end_idx)
>> +{
>> +    return spapr_xive_cpu_to_end(xive, spapr_find_cpu(target), prio,
>> +                                 out_end_blk, out_end_idx);
>> +}
>> +
>>  static const VMStateDescription vmstate_spapr_xive_end = {
>>      .name = TYPE_SPAPR_XIVE "/end",
>>      .version_id = 1,
>> @@ -263,6 +396,9 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
>>      xrc->set_eas = spapr_xive_set_eas;
>>      xrc->get_end = spapr_xive_get_end;
>>      xrc->set_end = spapr_xive_set_end;
>> +    xrc->get_nvt = spapr_xive_get_nvt;
>> +    xrc->set_nvt = spapr_xive_set_nvt;
>> +    xrc->reset_tctx = spapr_xive_reset_tctx;
>>  }
>>  
>>  static const TypeInfo spapr_xive_info = {
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index c49932d2b799..fc6ef5895e6d 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -481,6 +481,7 @@ static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx, bool block_group)
>>  static void xive_tctx_reset(void *dev)
>>  {
>>      XiveTCTX *tctx = XIVE_TCTX(dev);
>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
>>  
>>      memset(tctx->regs, 0, sizeof(tctx->regs));
>>  
>> @@ -495,6 +496,14 @@ static void xive_tctx_reset(void *dev)
>>       */
>>      tctx->regs[TM_QW1_OS + TM_PIPR] =
>>          ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
>> +
>> +    /*
>> +     * QEMU sPAPR XIVE only. To let the controller model reset the OS
>> +     * CAM line with the VP identifier.
>> +     */
>> +    if (xrc->reset_tctx) {
>> +        xrc->reset_tctx(tctx->xrtr, tctx);
>> +    }
> 
> AFAICT this whole function is only used from PAPR, so you can just
> move the whole thing to the papr code and avoid the hook function.

Yes we could add a loop on all CPUs and reset all the XiveTCTX from
the machine or a spapr_irq->reset handler. We will need at some time
anyhow.

Thanks,

C.


> 
>>  }
>>  
>>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
>
David Gibson Nov. 29, 2018, 1 a.m. UTC | #3
On Wed, Nov 28, 2018 at 05:48:32PM +0100, Cédric Le Goater wrote:
> On 11/28/18 3:39 AM, David Gibson wrote:
> > On Fri, Nov 16, 2018 at 11:57:04AM +0100, Cédric Le Goater wrote:
> >> The IVPE scans the O/S CAM line of the XIVE thread interrupt contexts
> >> to find a matching Notification Virtual Target (NVT) among the NVTs
> >> dispatched on the HW processor threads.
> >>
> >> On a real system, the thread interrupt contexts are updated by the
> >> hypervisor when a Virtual Processor is scheduled to run on a HW
> >> thread. Under QEMU, the model emulates the same behavior by hardwiring
> >> the NVT identifier in the thread context registers at reset.
> >>
> >> The NVT identifier used by the sPAPRXive model is the VCPU id. The END
> >> identifier is also derived from the VCPU id. A set of helpers doing
> >> the conversion between identifiers are provided for the hcalls
> >> configuring the sources and the ENDs.
> >>
> >> The model does not need a NVT table but The XiveRouter NVT operations
> >> are provided to perform some extra checks in the routing algorithm.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>  include/hw/ppc/spapr_xive.h |  17 +++++
> >>  include/hw/ppc/xive.h       |   3 +
> >>  hw/intc/spapr_xive.c        | 136 ++++++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c              |   9 +++
> >>  4 files changed, 165 insertions(+)
> >>
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> index 06727bd86aa9..3f65b8f485fd 100644
> >> --- a/include/hw/ppc/spapr_xive.h
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -43,4 +43,21 @@ bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
> >>  void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
> >>  qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
> >>  
> >> +/*
> >> + * sPAPR NVT and END indexing helpers
> >> + */
> >> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
> >> +                                  uint32_t nvt_idx);
> >> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
> >> +                            uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> >> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
> >> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
> >> +
> >> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
> >> +                             uint32_t *out_server, uint8_t *out_prio);
> >> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
> >> +                             uint8_t *out_end_blk, uint32_t *out_end_idx);
> >> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
> >> +                          uint8_t *out_end_blk, uint32_t *out_end_idx);
> >> +
> >>  #endif /* PPC_SPAPR_XIVE_H */
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index e715a6c6923d..e6931ddaa83f 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -187,6 +187,8 @@ typedef struct XiveRouter {
> >>  #define XIVE_ROUTER_GET_CLASS(obj)                              \
> >>      OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
> >>  
> >> +typedef struct XiveTCTX XiveTCTX;
> >> +
> >>  typedef struct XiveRouterClass {
> >>      SysBusDeviceClass parent;
> >>  
> >> @@ -201,6 +203,7 @@ typedef struct XiveRouterClass {
> >>                     XiveNVT *nvt);
> >>      int (*set_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
> >>                     XiveNVT *nvt);
> >> +    void (*reset_tctx)(XiveRouter *xrtr, XiveTCTX *tctx);
> >>  } XiveRouterClass;
> >>  
> >>  void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> index 5d038146c08e..3bf77ace11a2 100644
> >> --- a/hw/intc/spapr_xive.c
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -199,6 +199,139 @@ static int spapr_xive_set_end(XiveRouter *xrtr,
> >>      return 0;
> >>  }
> >>  
> >> +static int spapr_xive_get_nvt(XiveRouter *xrtr,
> >> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
> >> +{
> >> +    sPAPRXive *xive = SPAPR_XIVE(xrtr);
> >> +    uint32_t vcpu_id = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);
> >> +    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
> >> +
> >> +    if (!cpu) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    /*
> >> +     * sPAPR does not maintain a NVT table. Return that the NVT is
> >> +     * valid if we have found a matching CPU
> >> +     */
> >> +    nvt->w0 = NVT_W0_VALID;
> >> +    return 0;
> >> +}
> >> +
> >> +static int spapr_xive_set_nvt(XiveRouter *xrtr,
> >> +                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
> >> +{
> >> +    /* no NVT table */
> >> +    return 0;
> >> +}
> >> +
> >> +/*
> >> + * When a Virtual Processor is scheduled to run on a HW thread, the
> >> + * hypervisor pushes its identifier in the OS CAM line. Under QEMU, we
> >> + * need to emulate the same behavior.
> >> + */
> >> +static void spapr_xive_reset_tctx(XiveRouter *xrtr, XiveTCTX *tctx)
> >> +{
> >> +    uint8_t  nvt_blk;
> >> +    uint32_t nvt_idx;
> >> +    uint32_t nvt_cam;
> >> +
> >> +    spapr_xive_cpu_to_nvt(SPAPR_XIVE(xrtr), POWERPC_CPU(tctx->cs),
> >> +                          &nvt_blk, &nvt_idx);
> >> +
> >> +    nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_tctx_cam_line(nvt_blk, nvt_idx));
> >> +    memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
> >> +}
> >> +
> >> +/*
> >> + * The allocation of VP blocks is a complex operation in OPAL and the
> >> + * VP identifiers have a relation with the number of HW chips, the
> >> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
> >> + * controller model does not have the same constraints and can use a
> >> + * simple mapping scheme of the CPU vcpu_id
> >> + *
> >> + * These identifiers are never returned to the OS.
> >> + */
> >> +
> >> +#define SPAPR_XIVE_VP_BASE 0x400
> > 
> > 0x400 == 1024.  Could we ever have the possibility of needing to
> > consider both physical NVTs and PAPR NVTs at the same time?  
> 
> They would not be in the same CAM line: OS ring vs. PHYS ring. 

Hm.  They still inhabit the same NVT number space though, don't they?
I'm thinking about the END->NVT stage of the process here, rather than
the NVT->TCTX stage.

Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
standardize on one, please.

> > If so, does this base leave enough space for the physical ones?
> 
> I only used 0x400 to map the VP identifier to the ones allocated by KVM. 
> 0x0 would be fine but to exercise the model, it's better having a different 
> base. 
> 
> >> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
> >> +                                  uint32_t nvt_idx)
> >> +{
> >> +    return nvt_idx - SPAPR_XIVE_VP_BASE;
> >> +}
> >> +
> >> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
> >> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
> > 
> > A number of these conversions will come out a bit simpler if we pass
> > the block and index around as a single word in most places.
> 
> Yes I have to check the whole patchset first. These prototype changes
> are not too difficult in terms of code complexity but they do break
> how patches apply and PowerNV is also using the idx and blk much more 
> explicitly. the block has a meaning on bare metal. So I am a bit 
> reluctant to do so. I will check.

Yeah, based on your comments here and earlier, I'm not sure that's a
good idea any more either.

> >> +{
> >> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> >> +
> >> +    if (!cpu) {
> >> +        return -1;
> >> +    }
> >> +
> >> +    if (out_nvt_blk) {
> >> +        /* For testing purpose, we could use 0 for nvt_blk */
> >> +        *out_nvt_blk = xrtr->chip_id;
> > 
> > I don't see any point using the chip_id here, which is currently
> > always set to 0 for PAPR anyway.  If we just hardwire this to 0 it
> > removes the only use here of xrtr, which will allow some further
> > simplifications in the caller, I think.
> 
> You are right about the simplification. It was one way to exercise 
> the router model and remove any shortcuts in the indexing. I kept 
> it to be sure I was not tempted to invent new ones. I think we can
> remove it before merging. 
> 
> > 
> >> +    }
> >> +
> >> +    if (out_nvt_blk) {
> >> +        *out_nvt_idx = SPAPR_XIVE_VP_BASE + cpu->vcpu_id;
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
> >> +                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
> > 
> > I suspect some, maybe most of these conversion functions could be static.
> 
> static inline ? 

It's in a .c file so you don't need the "inline" - the compiler can
work out whether it's a good idea to inline on its own.

> > 
> >> +{
> >> +    return spapr_xive_cpu_to_nvt(xive, spapr_find_cpu(target), out_nvt_blk,
> >> +                                 out_nvt_idx);
> >> +}
> >> +
> >> +/*
> >> + * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
> >> + * priorities per CPU
> >> + */
> >> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
> >> +                             uint32_t *out_server, uint8_t *out_prio)
> >> +{
> >> +    if (out_server) {
> >> +        *out_server = end_idx >> 3;
> >> +    }
> >> +
> >> +    if (out_prio) {
> >> +        *out_prio = end_idx & 0x7;
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
> >> +                          uint8_t *out_end_blk, uint32_t *out_end_idx)
> >> +{
> >> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
> >> +
> >> +    if (!cpu) {
> >> +        return -1;
> > 
> > Is there ever a reason this would be called with cpu == NULL?  If not
> > might as well just assert() here rather than pushing the error
> > handling back to the caller.
> 
> ok. yes.
> 
> > 
> >> +    }
> >> +
> >> +    if (out_end_blk) {
> >> +        /* For testing purpose, we could use 0 for nvt_blk */
> >> +        *out_end_blk = xrtr->chip_id;
> > 
> > Again, I don't see any point to using the chip_id, which is pretty
> > meaningless for PAPR.
> > 
> >> +    }
> >> +
> >> +    if (out_end_idx) {
> >> +        *out_end_idx = (cpu->vcpu_id << 3) + prio;
> >> +    }
> >> +    return 0;
> >> +}
> >> +
> >> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
> >> +                             uint8_t *out_end_blk, uint32_t *out_end_idx)
> >> +{
> >> +    return spapr_xive_cpu_to_end(xive, spapr_find_cpu(target), prio,
> >> +                                 out_end_blk, out_end_idx);
> >> +}
> >> +
> >>  static const VMStateDescription vmstate_spapr_xive_end = {
> >>      .name = TYPE_SPAPR_XIVE "/end",
> >>      .version_id = 1,
> >> @@ -263,6 +396,9 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
> >>      xrc->set_eas = spapr_xive_set_eas;
> >>      xrc->get_end = spapr_xive_get_end;
> >>      xrc->set_end = spapr_xive_set_end;
> >> +    xrc->get_nvt = spapr_xive_get_nvt;
> >> +    xrc->set_nvt = spapr_xive_set_nvt;
> >> +    xrc->reset_tctx = spapr_xive_reset_tctx;
> >>  }
> >>  
> >>  static const TypeInfo spapr_xive_info = {
> >> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> >> index c49932d2b799..fc6ef5895e6d 100644
> >> --- a/hw/intc/xive.c
> >> +++ b/hw/intc/xive.c
> >> @@ -481,6 +481,7 @@ static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx, bool block_group)
> >>  static void xive_tctx_reset(void *dev)
> >>  {
> >>      XiveTCTX *tctx = XIVE_TCTX(dev);
> >> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
> >>  
> >>      memset(tctx->regs, 0, sizeof(tctx->regs));
> >>  
> >> @@ -495,6 +496,14 @@ static void xive_tctx_reset(void *dev)
> >>       */
> >>      tctx->regs[TM_QW1_OS + TM_PIPR] =
> >>          ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
> >> +
> >> +    /*
> >> +     * QEMU sPAPR XIVE only. To let the controller model reset the OS
> >> +     * CAM line with the VP identifier.
> >> +     */
> >> +    if (xrc->reset_tctx) {
> >> +        xrc->reset_tctx(tctx->xrtr, tctx);
> >> +    }
> > 
> > AFAICT this whole function is only used from PAPR, so you can just
> > move the whole thing to the papr code and avoid the hook function.
> 
> Yes we could add a loop on all CPUs and reset all the XiveTCTX from
> the machine or a spapr_irq->reset handler. We will need at some time
> anyhow.
> 
> Thanks,
> 
> C.
> 
> 
> > 
> >>  }
> >>  
> >>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
> > 
>
Cédric Le Goater Nov. 29, 2018, 3:27 p.m. UTC | #4
[ ... ] 

>>>> +/*
>>>> + * The allocation of VP blocks is a complex operation in OPAL and the
>>>> + * VP identifiers have a relation with the number of HW chips, the
>>>> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
>>>> + * controller model does not have the same constraints and can use a
>>>> + * simple mapping scheme of the CPU vcpu_id
>>>> + *
>>>> + * These identifiers are never returned to the OS.
>>>> + */
>>>> +
>>>> +#define SPAPR_XIVE_VP_BASE 0x400
>>>
>>> 0x400 == 1024.  Could we ever have the possibility of needing to
>>> consider both physical NVTs and PAPR NVTs at the same time?  
>>
>> They would not be in the same CAM line: OS ring vs. PHYS ring. 
> 
> Hm.  They still inhabit the same NVT number space though, don't they?

No. skiboot reserves the range of VPs for the HW at init.

https://github.com/open-power/skiboot/blob/master/hw/xive.c#L1093

> I'm thinking about the END->NVT stage of the process here, rather than
> the NVT->TCTX stage.
>
> Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
> standardize on one, please.

VP is used in Linux/KVM Linux/Native and skiboot. Yes. it's a mess. 
Let's have consistent naming in QEMU and use NVT. 

>>> If so, does this base leave enough space for the physical ones?
>>
>> I only used 0x400 to map the VP identifier to the ones allocated by KVM. 
>> 0x0 would be fine but to exercise the model, it's better having a different 
>> base. 
>>
>>>> +uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
>>>> +                                  uint32_t nvt_idx)
>>>> +{
>>>> +    return nvt_idx - SPAPR_XIVE_VP_BASE;
>>>> +}
>>>> +
>>>> +int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
>>>> +                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
>>>
>>> A number of these conversions will come out a bit simpler if we pass
>>> the block and index around as a single word in most places.
>>
>> Yes I have to check the whole patchset first. These prototype changes
>> are not too difficult in terms of code complexity but they do break
>> how patches apply and PowerNV is also using the idx and blk much more 
>> explicitly. the block has a meaning on bare metal. So I am a bit 
>> reluctant to do so. I will check.
> 
> Yeah, based on your comments here and earlier, I'm not sure that's a
> good idea any more either.
> 
>>>> +{
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>>>> +
>>>> +    if (!cpu) {
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    if (out_nvt_blk) {
>>>> +        /* For testing purpose, we could use 0 for nvt_blk */
>>>> +        *out_nvt_blk = xrtr->chip_id;
>>>
>>> I don't see any point using the chip_id here, which is currently
>>> always set to 0 for PAPR anyway.  If we just hardwire this to 0 it
>>> removes the only use here of xrtr, which will allow some further
>>> simplifications in the caller, I think.
>>
>> You are right about the simplification. It was one way to exercise 
>> the router model and remove any shortcuts in the indexing. I kept 
>> it to be sure I was not tempted to invent new ones. I think we can
>> remove it before merging. 
>>
>>>
>>>> +    }
>>>> +
>>>> +    if (out_nvt_blk) {
>>>> +        *out_nvt_idx = SPAPR_XIVE_VP_BASE + cpu->vcpu_id;
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
>>>> +                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
>>>
>>> I suspect some, maybe most of these conversion functions could be static.
>>
>> static inline ? 
> 
> It's in a .c file so you don't need the "inline" - the compiler can
> work out whether it's a good idea to inline on its own.

It is used in the hcall file also. But we are going to change that.

Thanks,

C.

> 
>>>
>>>> +{
>>>> +    return spapr_xive_cpu_to_nvt(xive, spapr_find_cpu(target), out_nvt_blk,
>>>> +                                 out_nvt_idx);
>>>> +}
>>>> +
>>>> +/*
>>>> + * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
>>>> + * priorities per CPU
>>>> + */
>>>> +int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
>>>> +                             uint32_t *out_server, uint8_t *out_prio)
>>>> +{
>>>> +    if (out_server) {
>>>> +        *out_server = end_idx >> 3;
>>>> +    }
>>>> +
>>>> +    if (out_prio) {
>>>> +        *out_prio = end_idx & 0x7;
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
>>>> +                          uint8_t *out_end_blk, uint32_t *out_end_idx)
>>>> +{
>>>> +    XiveRouter *xrtr = XIVE_ROUTER(xive);
>>>> +
>>>> +    if (!cpu) {
>>>> +        return -1;
>>>
>>> Is there ever a reason this would be called with cpu == NULL?  If not
>>> might as well just assert() here rather than pushing the error
>>> handling back to the caller.
>>
>> ok. yes.
>>
>>>
>>>> +    }
>>>> +
>>>> +    if (out_end_blk) {
>>>> +        /* For testing purpose, we could use 0 for nvt_blk */
>>>> +        *out_end_blk = xrtr->chip_id;
>>>
>>> Again, I don't see any point to using the chip_id, which is pretty
>>> meaningless for PAPR.
>>>
>>>> +    }
>>>> +
>>>> +    if (out_end_idx) {
>>>> +        *out_end_idx = (cpu->vcpu_id << 3) + prio;
>>>> +    }
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
>>>> +                             uint8_t *out_end_blk, uint32_t *out_end_idx)
>>>> +{
>>>> +    return spapr_xive_cpu_to_end(xive, spapr_find_cpu(target), prio,
>>>> +                                 out_end_blk, out_end_idx);
>>>> +}
>>>> +
>>>>  static const VMStateDescription vmstate_spapr_xive_end = {
>>>>      .name = TYPE_SPAPR_XIVE "/end",
>>>>      .version_id = 1,
>>>> @@ -263,6 +396,9 @@ static void spapr_xive_class_init(ObjectClass *klass, void *data)
>>>>      xrc->set_eas = spapr_xive_set_eas;
>>>>      xrc->get_end = spapr_xive_get_end;
>>>>      xrc->set_end = spapr_xive_set_end;
>>>> +    xrc->get_nvt = spapr_xive_get_nvt;
>>>> +    xrc->set_nvt = spapr_xive_set_nvt;
>>>> +    xrc->reset_tctx = spapr_xive_reset_tctx;
>>>>  }
>>>>  
>>>>  static const TypeInfo spapr_xive_info = {
>>>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>>>> index c49932d2b799..fc6ef5895e6d 100644
>>>> --- a/hw/intc/xive.c
>>>> +++ b/hw/intc/xive.c
>>>> @@ -481,6 +481,7 @@ static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx, bool block_group)
>>>>  static void xive_tctx_reset(void *dev)
>>>>  {
>>>>      XiveTCTX *tctx = XIVE_TCTX(dev);
>>>> +    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
>>>>  
>>>>      memset(tctx->regs, 0, sizeof(tctx->regs));
>>>>  
>>>> @@ -495,6 +496,14 @@ static void xive_tctx_reset(void *dev)
>>>>       */
>>>>      tctx->regs[TM_QW1_OS + TM_PIPR] =
>>>>          ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
>>>> +
>>>> +    /*
>>>> +     * QEMU sPAPR XIVE only. To let the controller model reset the OS
>>>> +     * CAM line with the VP identifier.
>>>> +     */
>>>> +    if (xrc->reset_tctx) {
>>>> +        xrc->reset_tctx(tctx->xrtr, tctx);
>>>> +    }
>>>
>>> AFAICT this whole function is only used from PAPR, so you can just
>>> move the whole thing to the papr code and avoid the hook function.
>>
>> Yes we could add a loop on all CPUs and reset all the XiveTCTX from
>> the machine or a spapr_irq->reset handler. We will need at some time
>> anyhow.
>>
>> Thanks,
>>
>> C.
>>
>>
>>>
>>>>  }
>>>>  
>>>>  static void xive_tctx_realize(DeviceState *dev, Error **errp)
>>>
>>
>
David Gibson Nov. 30, 2018, 1:11 a.m. UTC | #5
On Thu, Nov 29, 2018 at 04:27:31PM +0100, Cédric Le Goater wrote:
> [ ... ] 
> 
> >>>> +/*
> >>>> + * The allocation of VP blocks is a complex operation in OPAL and the
> >>>> + * VP identifiers have a relation with the number of HW chips, the
> >>>> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
> >>>> + * controller model does not have the same constraints and can use a
> >>>> + * simple mapping scheme of the CPU vcpu_id
> >>>> + *
> >>>> + * These identifiers are never returned to the OS.
> >>>> + */
> >>>> +
> >>>> +#define SPAPR_XIVE_VP_BASE 0x400
> >>>
> >>> 0x400 == 1024.  Could we ever have the possibility of needing to
> >>> consider both physical NVTs and PAPR NVTs at the same time?  
> >>
> >> They would not be in the same CAM line: OS ring vs. PHYS ring. 
> > 
> > Hm.  They still inhabit the same NVT number space though, don't they?
> 
> No. skiboot reserves the range of VPs for the HW at init.
> 
> https://github.com/open-power/skiboot/blob/master/hw/xive.c#L1093

Uh.. I don't see how they're reserved is relevant.

What I mean is that the ENDs address the NVTs for HW endpoints by the
same (block, index) tuples as the NVTs for virtualized endpoints, yes?

> > I'm thinking about the END->NVT stage of the process here, rather than
> > the NVT->TCTX stage.
> >
> > Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
> > standardize on one, please.
> 
> VP is used in Linux/KVM Linux/Native and skiboot. Yes. it's a mess. 
> Let's have consistent naming in QEMU and use NVT. 

Right.  And to cover any inevitable missed ones is why I'd like to see
a cheatsheet giving both terms in the header comments somewhere.

[snip]
Cédric Le Goater Nov. 30, 2018, 6:56 a.m. UTC | #6
On 11/30/18 2:11 AM, David Gibson wrote:
> On Thu, Nov 29, 2018 at 04:27:31PM +0100, Cédric Le Goater wrote:
>> [ ... ] 
>>
>>>>>> +/*
>>>>>> + * The allocation of VP blocks is a complex operation in OPAL and the
>>>>>> + * VP identifiers have a relation with the number of HW chips, the
>>>>>> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
>>>>>> + * controller model does not have the same constraints and can use a
>>>>>> + * simple mapping scheme of the CPU vcpu_id
>>>>>> + *
>>>>>> + * These identifiers are never returned to the OS.
>>>>>> + */
>>>>>> +
>>>>>> +#define SPAPR_XIVE_VP_BASE 0x400
>>>>>
>>>>> 0x400 == 1024.  Could we ever have the possibility of needing to
>>>>> consider both physical NVTs and PAPR NVTs at the same time?  
>>>>
>>>> They would not be in the same CAM line: OS ring vs. PHYS ring. 
>>>
>>> Hm.  They still inhabit the same NVT number space though, don't they?
>>
>> No. skiboot reserves the range of VPs for the HW at init.
>>
>> https://github.com/open-power/skiboot/blob/master/hw/xive.c#L1093
> 
> Uh.. I don't see how they're reserved is relevant.
> 
> What I mean is that the ENDs address the NVTs for HW endpoints by the
> same (block, index) tuples as the NVTs for virtualized endpoints, yes?

Ah. Yes. The (block, index) tuples, fields END_W6_NVT_BLOCK and 
END_W6_NVT_INDEX in the END structure, are all in the same number space.

skiboot defines some ranges though.


>>> I'm thinking about the END->NVT stage of the process here, rather than
>>> the NVT->TCTX stage.
>>>
>>> Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
>>> standardize on one, please.
>>
>> VP is used in Linux/KVM Linux/Native and skiboot. Yes. it's a mess. 
>> Let's have consistent naming in QEMU and use NVT. 
> 
> Right.  And to cover any inevitable missed ones is why I'd like to see
> a cheatsheet giving both terms in the header comments somewhere.

yes. I have added a list of names in xive.h. 

I was wondering if I should put the diagram below somewhere in a .h file 
or under doc/specs/.

Thanks,

C.  


= XIVE =================================================================

The POWER9 processor comes with a new interrupt controller, called
XIVE as "eXternal Interrupt Virtualization Engine".


* Overall architecture


             XIVE Interrupt Controller
             +------------------------------------+      IPIs
             | +---------+ +---------+ +--------+ |    +-------+
             | |VC       | |CQ       | |PC      |----> | CORES |
             | |     esb | |         | |        |----> |       |
             | |     eas | |  Bridge | |   tctx |----> |       |
             | |SC   end | |         | |    nvt | |    |       |
 +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
 | RAM  |    +------------------|-----------------+      | | |
 |      |                       |                        | | |
 |      |                       |                        | | |
 |      |  +--------------------v------------------------v-v-v--+    other
 |      <--+                     Power Bus                      +--> chips
 |  esb |  +---------+-----------------------+------------------+
 |  eas |            |                       |
 |  end |        +---|-----+                 |
 |  nvt |       +----+----+|            +----+----+
 +------+       |SC       ||            |SC       |
                |         ||            |         |
                | PQ-bits ||            | PQ-bits |
                | local   |+            |  in VC  |
                +---------+             +---------+
                   PCIe                 NX,NPU,CAPI

                  SC: Source Controller (aka. IVSE)
                  VC: Virtualization Controller (aka. IVRE)
                  PC: Presentation Controller (aka. IVPE)
                  CQ: Common Queue (Bridge)

             PQ-bits: 2 bits source state machine (P:pending Q:queued)
                 esb: Event State Buffer (Array of PQ bits in an IVSE)
                 eas: Event Assignment Structure
                 end: Event Notification Descriptor
                 nvt: Notification Virtual Target
                tctx: Thread interrupt Context


The XIVE IC is composed of three sub-engines :

  - Interrupt Virtualization Source Engine (IVSE), or Source
    Controller (SC). These are found in PCI PHBs, in the PSI host
    bridge controller, but also inside the main controller for the
    core IPIs and other sub-chips (NX, CAP, NPU) of the
    chip/processor. They are configured to feed the IVRE with events.

  - Interrupt Virtualization Routing Engine (IVRE) or Virtualization
    Controller (VC). Its job is to match an event source with an Event
    Notification Descriptor (END).

  - Interrupt Virtualization Presentation Engine (IVPE) or Presentation
    Controller (PC). It maintains the interrupt context state of each
    thread and handles the delivery of the external exception to the
    thread.


* XIVE internal tables

Each of the sub-engines uses a set of tables to redirect exceptions
from event sources to CPU threads.

                                          +-------+
  User or OS                              |  EQ   |
      or                          +------>|entries|
  Hypervisor                      |       |  ..   |
    Memory                        |       +-------+
                                  |           ^
                                  |           |
             +-------------------------------------------------+
                                  |           |
  Hypervisor      +------+    +---+--+    +---+--+   +------+
    Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
   (skiboot)      +----+-+    +----+-+    +----+-+   +------+
                    ^  |        ^  |        ^  |       ^
                    |  |        |  |        |  |       |
             +-------------------------------------------------+
                    |  |        |  |        |  |       |
                    |  |        |  |        |  |       |
               +----|--|--------|--|--------|--|-+   +-|-----+    +------+
               |    |  |        |  |        |  | |   | | tctx|    |Thread|
   IPI or   ---+    +  v        +  v        +  v |---| +  .. |----->     |
  HW events    |                                 |   |       |    |      |
               |             IVRE                |   | IVPE  |    +------+
               +---------------------------------+   +-------+
            


The IVSE have a 2-bits, P for pending and Q for queued, state machine
for each source that allows events to be triggered. They are stored in
an array, the Event State Buffer (ESB) and controlled by MMIOs.

If the event is let through, the IVRE looks up in the Event Assignment
Structure (EAS) table for an Event Notification Descriptor (END)
configured for the source. Each Event Notification Descriptor defines
a notification path to a CPU and an in-memory Event Queue, in which
will be pushed an EQ data for the OS to pull.

The IVPE determines if a Notification Virtual Target (NVT) can handle
the event by scanning the thread contexts of the VPs dispatched on the
processor HW threads. It maintains the interrupt context state of each
thread in a NVT table.
David Gibson Dec. 3, 2018, 1:18 a.m. UTC | #7
On Fri, Nov 30, 2018 at 07:56:02AM +0100, Cédric Le Goater wrote:
> On 11/30/18 2:11 AM, David Gibson wrote:
> > On Thu, Nov 29, 2018 at 04:27:31PM +0100, Cédric Le Goater wrote:
> >> [ ... ] 
> >>
> >>>>>> +/*
> >>>>>> + * The allocation of VP blocks is a complex operation in OPAL and the
> >>>>>> + * VP identifiers have a relation with the number of HW chips, the
> >>>>>> + * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
> >>>>>> + * controller model does not have the same constraints and can use a
> >>>>>> + * simple mapping scheme of the CPU vcpu_id
> >>>>>> + *
> >>>>>> + * These identifiers are never returned to the OS.
> >>>>>> + */
> >>>>>> +
> >>>>>> +#define SPAPR_XIVE_VP_BASE 0x400
> >>>>>
> >>>>> 0x400 == 1024.  Could we ever have the possibility of needing to
> >>>>> consider both physical NVTs and PAPR NVTs at the same time?  
> >>>>
> >>>> They would not be in the same CAM line: OS ring vs. PHYS ring. 
> >>>
> >>> Hm.  They still inhabit the same NVT number space though, don't they?
> >>
> >> No. skiboot reserves the range of VPs for the HW at init.
> >>
> >> https://github.com/open-power/skiboot/blob/master/hw/xive.c#L1093
> > 
> > Uh.. I don't see how they're reserved is relevant.
> > 
> > What I mean is that the ENDs address the NVTs for HW endpoints by the
> > same (block, index) tuples as the NVTs for virtualized endpoints, yes?
> 
> Ah. Yes. The (block, index) tuples, fields END_W6_NVT_BLOCK and 
> END_W6_NVT_INDEX in the END structure, are all in the same number space.

Right.

> skiboot defines some ranges though.

Ok.  I guess we can rely on that for PAPR, but not for PowerNV.

> >>> I'm thinking about the END->NVT stage of the process here, rather than
> >>> the NVT->TCTX stage.
> >>>
> >>> Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
> >>> standardize on one, please.
> >>
> >> VP is used in Linux/KVM Linux/Native and skiboot. Yes. it's a mess. 
> >> Let's have consistent naming in QEMU and use NVT. 
> > 
> > Right.  And to cover any inevitable missed ones is why I'd like to see
> > a cheatsheet giving both terms in the header comments somewhere.
> 
> yes. I have added a list of names in xive.h. 

Great.  Oh BTW - this is getting big enough, that I wonder if it makes
sense to create a hw/intc/xive subdir to put things in, then splitting
IVSE, IVRE, IVPE related code into separate .c files (I'd still expect
a common .h though).

> I was wondering if I should put the diagram below somewhere in a .h file 
> or under doc/specs/.

I'd prefer it in the .h file.

> 
> Thanks,
> 
> C.  
> 
> 
> = XIVE =================================================================
> 
> The POWER9 processor comes with a new interrupt controller, called
> XIVE as "eXternal Interrupt Virtualization Engine".
> 
> 
> * Overall architecture
> 
> 
>              XIVE Interrupt Controller
>              +------------------------------------+      IPIs
>              | +---------+ +---------+ +--------+ |    +-------+
>              | |VC       | |CQ       | |PC      |----> | CORES |
>              | |     esb | |         | |        |----> |       |
>              | |     eas | |  Bridge | |   tctx |----> |       |
>              | |SC   end | |         | |    nvt | |    |       |
>  +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
>  | RAM  |    +------------------|-----------------+      | | |
>  |      |                       |                        | | |
>  |      |                       |                        | | |
>  |      |  +--------------------v------------------------v-v-v--+    other
>  |      <--+                     Power Bus                      +--> chips
>  |  esb |  +---------+-----------------------+------------------+
>  |  eas |            |                       |
>  |  end |        +---|-----+                 |
>  |  nvt |       +----+----+|            +----+----+
>  +------+       |SC       ||            |SC       |
>                 |         ||            |         |
>                 | PQ-bits ||            | PQ-bits |
>                 | local   |+            |  in VC  |
>                 +---------+             +---------+
>                    PCIe                 NX,NPU,CAPI
> 
>                   SC: Source Controller (aka. IVSE)
>                   VC: Virtualization Controller (aka. IVRE)
>                   PC: Presentation Controller (aka. IVPE)
>                   CQ: Common Queue (Bridge)
> 
>              PQ-bits: 2 bits source state machine (P:pending Q:queued)
>                  esb: Event State Buffer (Array of PQ bits in an IVSE)
>                  eas: Event Assignment Structure
>                  end: Event Notification Descriptor
>                  nvt: Notification Virtual Target
>                 tctx: Thread interrupt Context
> 
> 
> The XIVE IC is composed of three sub-engines :
> 
>   - Interrupt Virtualization Source Engine (IVSE), or Source
>     Controller (SC). These are found in PCI PHBs, in the PSI host
>     bridge controller, but also inside the main controller for the
>     core IPIs and other sub-chips (NX, CAP, NPU) of the
>     chip/processor. They are configured to feed the IVRE with events.
> 
>   - Interrupt Virtualization Routing Engine (IVRE) or Virtualization
>     Controller (VC). Its job is to match an event source with an Event
>     Notification Descriptor (END).
> 
>   - Interrupt Virtualization Presentation Engine (IVPE) or Presentation
>     Controller (PC). It maintains the interrupt context state of each
>     thread and handles the delivery of the external exception to the
>     thread.
> 
> 
> * XIVE internal tables
> 
> Each of the sub-engines uses a set of tables to redirect exceptions
> from event sources to CPU threads.
> 
>                                           +-------+
>   User or OS                              |  EQ   |
>       or                          +------>|entries|
>   Hypervisor                      |       |  ..   |
>     Memory                        |       +-------+
>                                   |           ^
>                                   |           |
>              +-------------------------------------------------+
>                                   |           |
>   Hypervisor      +------+    +---+--+    +---+--+   +------+
>     Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
>    (skiboot)      +----+-+    +----+-+    +----+-+   +------+
>                     ^  |        ^  |        ^  |       ^
>                     |  |        |  |        |  |       |
>              +-------------------------------------------------+
>                     |  |        |  |        |  |       |
>                     |  |        |  |        |  |       |
>                +----|--|--------|--|--------|--|-+   +-|-----+    +------+
>                |    |  |        |  |        |  | |   | | tctx|    |Thread|
>    IPI or   ---+    +  v        +  v        +  v |---| +  .. |----->     |
>   HW events    |                                 |   |       |    |      |
>                |             IVRE                |   | IVPE  |    +------+
>                +---------------------------------+   +-------+
>             
> 
> 
> The IVSE have a 2-bits, P for pending and Q for queued, state machine
> for each source that allows events to be triggered. They are stored in
> an array, the Event State Buffer (ESB) and controlled by MMIOs.
> 
> If the event is let through, the IVRE looks up in the Event Assignment
> Structure (EAS) table for an Event Notification Descriptor (END)
> configured for the source. Each Event Notification Descriptor defines
> a notification path to a CPU and an in-memory Event Queue, in which
> will be pushed an EQ data for the OS to pull.
> 
> The IVPE determines if a Notification Virtual Target (NVT) can handle
> the event by scanning the thread contexts of the VPs dispatched on the
> processor HW threads. It maintains the interrupt context state of each
> thread in a NVT table.
>
Cédric Le Goater Dec. 3, 2018, 4:30 p.m. UTC | #8
>>>>> I'm thinking about the END->NVT stage of the process here, rather than
>>>>> the NVT->TCTX stage.
>>>>>
>>>>> Oh, also, you're using "VP" here which IIUC == "NVT".  Can we
>>>>> standardize on one, please.
>>>>
>>>> VP is used in Linux/KVM Linux/Native and skiboot. Yes. it's a mess. 
>>>> Let's have consistent naming in QEMU and use NVT. 
>>>
>>> Right.  And to cover any inevitable missed ones is why I'd like to see
>>> a cheatsheet giving both terms in the header comments somewhere.
>>
>> yes. I have added a list of names in xive.h. 
> 
> Great.  Oh BTW - this is getting big enough, that I wonder if it makes
> sense to create a hw/intc/xive subdir to put things in, then splitting
> IVSE, IVRE, IVPE related code into separate .c files (I'd still expect
> a common .h though).

Here is what I have for now, for the new files only :

   190 include/hw/ppc/xive_regs.h
   343 include/hw/ppc/xive.h
    78 include/hw/ppc/spapr_xive.h
  1453 hw/intc/spapr_xive.c (sPAPRXive + hcalls)
  1678 hw/intc/xive.c       (XiveSource, XiveRouter, XiveENDSource, XiveTCTX, 
			     END helpers)
   864 hw/intc/spapr_xive_kvm.c
  4606 total

I am putting the KVM export definitions in spapr_xive.h and xive.h but all 
the code is in spapr_xive_kvm.c.

So you would rather have something like :
 
   include/hw/ppc/xive_regs.h
   include/hw/ppc/xive.h
   include/hw/ppc/spapr_xive.h

   hw/intc/xive/spapr.c 
   hw/intc/xive/spapr_kvm.c 
   hw/intc/xive/source.c 
   hw/intc/xive/router.c 
   hw/intc/xive/presenter.c 
   hw/intc/xive/tcontext.c 

>> I was wondering if I should put the diagram below somewhere in a .h file 
>> or under doc/specs/.
> 
> I'd prefer it in the .h file.

OK. will do in  xive.h
 
>>
>> Thanks,
>>
>> C.  
>>
>>
>> = XIVE =================================================================
>>
>> The POWER9 processor comes with a new interrupt controller, called
>> XIVE as "eXternal Interrupt Virtualization Engine".
>>
>>
>> * Overall architecture
>>
>>
>>              XIVE Interrupt Controller
>>              +------------------------------------+      IPIs
>>              | +---------+ +---------+ +--------+ |    +-------+
>>              | |VC       | |CQ       | |PC      |----> | CORES |
>>              | |     esb | |         | |        |----> |       |
>>              | |     eas | |  Bridge | |   tctx |----> |       |
>>              | |SC   end | |         | |    nvt | |    |       |
>>  +------+    | +---------+ +----+----+ +--------+ |    +-+-+-+-+
>>  | RAM  |    +------------------|-----------------+      | | |
>>  |      |                       |                        | | |
>>  |      |                       |                        | | |
>>  |      |  +--------------------v------------------------v-v-v--+    other
>>  |      <--+                     Power Bus                      +--> chips
>>  |  esb |  +---------+-----------------------+------------------+
>>  |  eas |            |                       |
>>  |  end |        +---|-----+                 |
>>  |  nvt |       +----+----+|            +----+----+
>>  +------+       |SC       ||            |SC       |
>>                 |         ||            |         |
>>                 | PQ-bits ||            | PQ-bits |
>>                 | local   |+            |  in VC  |
>>                 +---------+             +---------+
>>                    PCIe                 NX,NPU,CAPI
>>
>>                   SC: Source Controller (aka. IVSE)
>>                   VC: Virtualization Controller (aka. IVRE)
>>                   PC: Presentation Controller (aka. IVPE)
>>                   CQ: Common Queue (Bridge)
>>
>>              PQ-bits: 2 bits source state machine (P:pending Q:queued)
>>                  esb: Event State Buffer (Array of PQ bits in an IVSE)
>>                  eas: Event Assignment Structure
>>                  end: Event Notification Descriptor
>>                  nvt: Notification Virtual Target
>>                 tctx: Thread interrupt Context
>>
>>
>> The XIVE IC is composed of three sub-engines :
>>
>>   - Interrupt Virtualization Source Engine (IVSE), or Source
>>     Controller (SC). These are found in PCI PHBs, in the PSI host
>>     bridge controller, but also inside the main controller for the
>>     core IPIs and other sub-chips (NX, CAP, NPU) of the
>>     chip/processor. They are configured to feed the IVRE with events.
>>
>>   - Interrupt Virtualization Routing Engine (IVRE) or Virtualization
>>     Controller (VC). Its job is to match an event source with an Event
>>     Notification Descriptor (END).
>>
>>   - Interrupt Virtualization Presentation Engine (IVPE) or Presentation
>>     Controller (PC). It maintains the interrupt context state of each
>>     thread and handles the delivery of the external exception to the
>>     thread.
>>
>>
>> * XIVE internal tables
>>
>> Each of the sub-engines uses a set of tables to redirect exceptions
>> from event sources to CPU threads.
>>
>>                                           +-------+
>>   User or OS                              |  EQ   |
>>       or                          +------>|entries|
>>   Hypervisor                      |       |  ..   |
>>     Memory                        |       +-------+
>>                                   |           ^
>>                                   |           |
>>              +-------------------------------------------------+
>>                                   |           |
>>   Hypervisor      +------+    +---+--+    +---+--+   +------+
>>     Memory        | ESB  |    | EAT  |    | ENDT |   | NVTT |
>>    (skiboot)      +----+-+    +----+-+    +----+-+   +------+
>>                     ^  |        ^  |        ^  |       ^
>>                     |  |        |  |        |  |       |
>>              +-------------------------------------------------+
>>                     |  |        |  |        |  |       |
>>                     |  |        |  |        |  |       |
>>                +----|--|--------|--|--------|--|-+   +-|-----+    +------+
>>                |    |  |        |  |        |  | |   | | tctx|    |Thread|
>>    IPI or   ---+    +  v        +  v        +  v |---| +  .. |----->     |
>>   HW events    |                                 |   |       |    |      |
>>                |             IVRE                |   | IVPE  |    +------+
>>                +---------------------------------+   +-------+
>>             
>>
>>
>> The IVSE have a 2-bits, P for pending and Q for queued, state machine
>> for each source that allows events to be triggered. They are stored in
>> an array, the Event State Buffer (ESB) and controlled by MMIOs.
>>
>> If the event is let through, the IVRE looks up in the Event Assignment
>> Structure (EAS) table for an Event Notification Descriptor (END)
>> configured for the source. Each Event Notification Descriptor defines
>> a notification path to a CPU and an in-memory Event Queue, in which
>> will be pushed an EQ data for the OS to pull.
>>
>> The IVPE determines if a Notification Virtual Target (NVT) can handle
>> the event by scanning the thread contexts of the VPs dispatched on the
>> processor HW threads. It maintains the interrupt context state of each
>> thread in a NVT table.
>>
>
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 06727bd86aa9..3f65b8f485fd 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -43,4 +43,21 @@  bool spapr_xive_irq_disable(sPAPRXive *xive, uint32_t lisn);
 void spapr_xive_pic_print_info(sPAPRXive *xive, Monitor *mon);
 qemu_irq spapr_xive_qirq(sPAPRXive *xive, uint32_t lisn);
 
+/*
+ * sPAPR NVT and END indexing helpers
+ */
+uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
+                                  uint32_t nvt_idx);
+int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
+                            uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
+int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
+                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx);
+
+int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio);
+int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
+                             uint8_t *out_end_blk, uint32_t *out_end_idx);
+int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
+                          uint8_t *out_end_blk, uint32_t *out_end_idx);
+
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index e715a6c6923d..e6931ddaa83f 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -187,6 +187,8 @@  typedef struct XiveRouter {
 #define XIVE_ROUTER_GET_CLASS(obj)                              \
     OBJECT_GET_CLASS(XiveRouterClass, (obj), TYPE_XIVE_ROUTER)
 
+typedef struct XiveTCTX XiveTCTX;
+
 typedef struct XiveRouterClass {
     SysBusDeviceClass parent;
 
@@ -201,6 +203,7 @@  typedef struct XiveRouterClass {
                    XiveNVT *nvt);
     int (*set_nvt)(XiveRouter *xrtr, uint8_t nvt_blk, uint32_t nvt_idx,
                    XiveNVT *nvt);
+    void (*reset_tctx)(XiveRouter *xrtr, XiveTCTX *tctx);
 } XiveRouterClass;
 
 void xive_eas_pic_print_info(XiveEAS *eas, uint32_t lisn, Monitor *mon);
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 5d038146c08e..3bf77ace11a2 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -199,6 +199,139 @@  static int spapr_xive_set_end(XiveRouter *xrtr,
     return 0;
 }
 
+static int spapr_xive_get_nvt(XiveRouter *xrtr,
+                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
+{
+    sPAPRXive *xive = SPAPR_XIVE(xrtr);
+    uint32_t vcpu_id = spapr_xive_nvt_to_target(xive, nvt_blk, nvt_idx);
+    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+    if (!cpu) {
+        return -1;
+    }
+
+    /*
+     * sPAPR does not maintain a NVT table. Return that the NVT is
+     * valid if we have found a matching CPU
+     */
+    nvt->w0 = NVT_W0_VALID;
+    return 0;
+}
+
+static int spapr_xive_set_nvt(XiveRouter *xrtr,
+                              uint8_t nvt_blk, uint32_t nvt_idx, XiveNVT *nvt)
+{
+    /* no NVT table */
+    return 0;
+}
+
+/*
+ * When a Virtual Processor is scheduled to run on a HW thread, the
+ * hypervisor pushes its identifier in the OS CAM line. Under QEMU, we
+ * need to emulate the same behavior.
+ */
+static void spapr_xive_reset_tctx(XiveRouter *xrtr, XiveTCTX *tctx)
+{
+    uint8_t  nvt_blk;
+    uint32_t nvt_idx;
+    uint32_t nvt_cam;
+
+    spapr_xive_cpu_to_nvt(SPAPR_XIVE(xrtr), POWERPC_CPU(tctx->cs),
+                          &nvt_blk, &nvt_idx);
+
+    nvt_cam = cpu_to_be32(TM_QW1W2_VO | xive_tctx_cam_line(nvt_blk, nvt_idx));
+    memcpy(&tctx->regs[TM_QW1_OS + TM_WORD2], &nvt_cam, 4);
+}
+
+/*
+ * The allocation of VP blocks is a complex operation in OPAL and the
+ * VP identifiers have a relation with the number of HW chips, the
+ * size of the VP blocks, VP grouping, etc. The QEMU sPAPR XIVE
+ * controller model does not have the same constraints and can use a
+ * simple mapping scheme of the CPU vcpu_id
+ *
+ * These identifiers are never returned to the OS.
+ */
+
+#define SPAPR_XIVE_VP_BASE 0x400
+
+uint32_t spapr_xive_nvt_to_target(sPAPRXive *xive, uint8_t nvt_blk,
+                                  uint32_t nvt_idx)
+{
+    return nvt_idx - SPAPR_XIVE_VP_BASE;
+}
+
+int spapr_xive_cpu_to_nvt(sPAPRXive *xive, PowerPCCPU *cpu,
+                          uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+    XiveRouter *xrtr = XIVE_ROUTER(xive);
+
+    if (!cpu) {
+        return -1;
+    }
+
+    if (out_nvt_blk) {
+        /* For testing purpose, we could use 0 for nvt_blk */
+        *out_nvt_blk = xrtr->chip_id;
+    }
+
+    if (out_nvt_blk) {
+        *out_nvt_idx = SPAPR_XIVE_VP_BASE + cpu->vcpu_id;
+    }
+    return 0;
+}
+
+int spapr_xive_target_to_nvt(sPAPRXive *xive, uint32_t target,
+                             uint8_t *out_nvt_blk, uint32_t *out_nvt_idx)
+{
+    return spapr_xive_cpu_to_nvt(xive, spapr_find_cpu(target), out_nvt_blk,
+                                 out_nvt_idx);
+}
+
+/*
+ * sPAPR END indexing uses a simple mapping of the CPU vcpu_id, 8
+ * priorities per CPU
+ */
+int spapr_xive_end_to_target(sPAPRXive *xive, uint8_t end_blk, uint32_t end_idx,
+                             uint32_t *out_server, uint8_t *out_prio)
+{
+    if (out_server) {
+        *out_server = end_idx >> 3;
+    }
+
+    if (out_prio) {
+        *out_prio = end_idx & 0x7;
+    }
+    return 0;
+}
+
+int spapr_xive_cpu_to_end(sPAPRXive *xive, PowerPCCPU *cpu, uint8_t prio,
+                          uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+    XiveRouter *xrtr = XIVE_ROUTER(xive);
+
+    if (!cpu) {
+        return -1;
+    }
+
+    if (out_end_blk) {
+        /* For testing purpose, we could use 0 for nvt_blk */
+        *out_end_blk = xrtr->chip_id;
+    }
+
+    if (out_end_idx) {
+        *out_end_idx = (cpu->vcpu_id << 3) + prio;
+    }
+    return 0;
+}
+
+int spapr_xive_target_to_end(sPAPRXive *xive, uint32_t target, uint8_t prio,
+                             uint8_t *out_end_blk, uint32_t *out_end_idx)
+{
+    return spapr_xive_cpu_to_end(xive, spapr_find_cpu(target), prio,
+                                 out_end_blk, out_end_idx);
+}
+
 static const VMStateDescription vmstate_spapr_xive_end = {
     .name = TYPE_SPAPR_XIVE "/end",
     .version_id = 1,
@@ -263,6 +396,9 @@  static void spapr_xive_class_init(ObjectClass *klass, void *data)
     xrc->set_eas = spapr_xive_set_eas;
     xrc->get_end = spapr_xive_get_end;
     xrc->set_end = spapr_xive_set_end;
+    xrc->get_nvt = spapr_xive_get_nvt;
+    xrc->set_nvt = spapr_xive_set_nvt;
+    xrc->reset_tctx = spapr_xive_reset_tctx;
 }
 
 static const TypeInfo spapr_xive_info = {
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index c49932d2b799..fc6ef5895e6d 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -481,6 +481,7 @@  static uint32_t xive_tctx_hw_cam_line(XiveTCTX *tctx, bool block_group)
 static void xive_tctx_reset(void *dev)
 {
     XiveTCTX *tctx = XIVE_TCTX(dev);
+    XiveRouterClass *xrc = XIVE_ROUTER_GET_CLASS(tctx->xrtr);
 
     memset(tctx->regs, 0, sizeof(tctx->regs));
 
@@ -495,6 +496,14 @@  static void xive_tctx_reset(void *dev)
      */
     tctx->regs[TM_QW1_OS + TM_PIPR] =
         ipb_to_pipr(tctx->regs[TM_QW1_OS + TM_IPB]);
+
+    /*
+     * QEMU sPAPR XIVE only. To let the controller model reset the OS
+     * CAM line with the VP identifier.
+     */
+    if (xrc->reset_tctx) {
+        xrc->reset_tctx(tctx->xrtr, tctx);
+    }
 }
 
 static void xive_tctx_realize(DeviceState *dev, Error **errp)