diff mbox series

[v3,04/15] spapr/xive: add state synchronization with KVM

Message ID 20190321144914.19934-5-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series spapr: add KVM support to the XIVE interrupt mode | expand

Commit Message

Cédric Le Goater March 21, 2019, 2:49 p.m. UTC
This extends the KVM XIVE device backend with 'synchronize_state'
methods used to retrieve the state from KVM. The HW state of the
sources, the KVM device and the thread interrupt contexts are
collected for the monitor usage and also migration.

These get operations rely on their KVM counterpart in the host kernel
which acts as a proxy for OPAL, the host firmware. The set operations
will be added for migration support later.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---

 Changes since v2:

 - removed the capture of the OS CAM line value from KVM
 - added xive_end_is_valid() check

 include/hw/ppc/spapr_xive.h |  8 ++++
 include/hw/ppc/xive.h       |  1 +
 hw/intc/spapr_xive.c        | 17 +++++---
 hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
 hw/intc/xive.c              | 10 +++++
 5 files changed, 116 insertions(+), 7 deletions(-)

Comments

David Gibson March 22, 2019, 1 a.m. UTC | #1
On Thu, Mar 21, 2019 at 03:49:03PM +0100, Cédric Le Goater wrote:
> This extends the KVM XIVE device backend with 'synchronize_state'
> methods used to retrieve the state from KVM. The HW state of the
> sources, the KVM device and the thread interrupt contexts are
> collected for the monitor usage and also migration.
> 
> These get operations rely on their KVM counterpart in the host kernel
> which acts as a proxy for OPAL, the host firmware. The set operations
> will be added for migration support later.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
> 
>  Changes since v2:
> 
>  - removed the capture of the OS CAM line value from KVM
>  - added xive_end_is_valid() check
> 
>  include/hw/ppc/spapr_xive.h |  8 ++++
>  include/hw/ppc/xive.h       |  1 +
>  hw/intc/spapr_xive.c        | 17 +++++---
>  hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
>  hw/intc/xive.c              | 10 +++++
>  5 files changed, 116 insertions(+), 7 deletions(-)
> 
> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> index 03685910e76b..7e49badd8c9a 100644
> --- a/include/hw/ppc/spapr_xive.h
> +++ b/include/hw/ppc/spapr_xive.h
> @@ -44,6 +44,13 @@ typedef struct SpaprXive {
>      void          *tm_mmap;
>  } SpaprXive;
>  
> +/*
> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> + * to the controller block id value. It can nevertheless be changed
> + * for testing purpose.
> + */
> +#define SPAPR_XIVE_BLOCK_ID 0x0
> +
>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
>  bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
>  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
>  void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
>                                   uint32_t end_idx, XiveEND *end,
>                                   Error **errp);
> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
>  
>  #endif /* PPC_SPAPR_XIVE_H */
> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> index dd115da30ebc..78c919c4a590 100644
> --- a/include/hw/ppc/xive.h
> +++ b/include/hw/ppc/xive.h
> @@ -435,5 +435,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>  
>  #endif /* PPC_XIVE_H */
> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> index cde2fd7c8997..4d07140f1078 100644
> --- a/hw/intc/spapr_xive.c
> +++ b/hw/intc/spapr_xive.c
> @@ -40,13 +40,6 @@
>  
>  #define SPAPR_XIVE_NVT_BASE 0x400
>  
> -/*
> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> - * to the controller block id value. It can nevertheless be changed
> - * for testing purpose.
> - */
> -#define SPAPR_XIVE_BLOCK_ID 0x0
> -
>  /*
>   * sPAPR NVT and END indexing helpers
>   */
> @@ -156,6 +149,16 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>      XiveSource *xsrc = &xive->source;
>      int i;
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_synchronize_state(xive, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>  
>      for (i = 0; i < xive->nr_irqs; i++) {
> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> index 2714f8e4702e..2e46661cb3ad 100644
> --- a/hw/intc/spapr_xive_kvm.c
> +++ b/hw/intc/spapr_xive_kvm.c
> @@ -60,6 +60,51 @@ static void kvm_cpu_enable(CPUState *cs)
>  /*
>   * XIVE Thread Interrupt Management context (KVM)
>   */
> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> +{
> +    uint64_t state[2] = { 0 };
> +    int ret;
> +
> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
> +    if (ret != 0) {
> +        error_setg_errno(errp, errno,
> +                         "XIVE: could not capture KVM state of CPU %ld",
> +                         kvm_arch_vcpu_id(tctx->cs));
> +        return;
> +    }
> +
> +    /* word0 and word1 of the OS ring. */
> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> +}
> +
> +typedef struct {
> +    XiveTCTX *tctx;
> +    Error *err;
> +} XiveCpuGetState;
> +
> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> +                                                 run_on_cpu_data arg)
> +{
> +    XiveCpuGetState *s = arg.host_ptr;
> +
> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> +}
> +
> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> +{
> +    XiveCpuGetState s = {
> +        .tctx = tctx,
> +        .err = NULL,
> +    };
> +
> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> +               RUN_ON_CPU_HOST_PTR(&s));

I think I brought this up earlier, but I'm still confused.

Retreiving information with GET_ONE_REG doesn't usually need dancing
around to run on a particular thread.  Why is it necessary here?

> +    if (s.err) {
> +        error_propagate(errp, s.err);
> +        return;
> +    }
> +}
>  
>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>  {
> @@ -227,6 +272,19 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>      }
>  }
>  
> +static void kvmppc_xive_source_get_state(XiveSource *xsrc)
> +{
> +    int i;
> +
> +    for (i = 0; i < xsrc->nr_irqs; i++) {
> +        /* Perform a load without side effect to retrieve the PQ bits */
> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
> +
> +        /* and save PQ locally */
> +        xive_source_esb_set(xsrc, i, pq);
> +    }
> +}
> +
>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>  {
>      XiveSource *xsrc = opaque;
> @@ -354,6 +412,35 @@ void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
>                        NULL, true, errp);
>  }
>  
> +static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
> +{
> +    Error *local_err = NULL;
> +    int i;
> +
> +    for (i = 0; i < xive->nr_ends; i++) {
> +        if (!xive_end_is_valid(&xive->endt[i])) {
> +            continue;
> +        }
> +
> +        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
> +                                     &xive->endt[i], &local_err);
> +        if (local_err) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }
> +    }
> +}
> +
> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
> +{
> +    kvmppc_xive_source_get_state(&xive->source);
> +
> +    /* EAT: there is no extra state to query from KVM */
> +
> +    /* ENDT */
> +    kvmppc_xive_get_queues(xive, errp);
> +}
> +
>  static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
>                                Error **errp)
>  {
> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
> index a0662fd33174..65cb772676a5 100644
> --- a/hw/intc/xive.c
> +++ b/hw/intc/xive.c
> @@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>      int i;
>  
> +    if (kvm_irqchip_in_kernel()) {
> +        Error *local_err = NULL;
> +
> +        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
> +        if (local_err) {
> +            error_report_err(local_err);
> +            return;
> +        }
> +    }
> +
>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
>                     "  W2\n", cpu_index);
>
Cédric Le Goater March 22, 2019, 7:53 a.m. UTC | #2
On 3/22/19 2:00 AM, David Gibson wrote:
> On Thu, Mar 21, 2019 at 03:49:03PM +0100, Cédric Le Goater wrote:
>> This extends the KVM XIVE device backend with 'synchronize_state'
>> methods used to retrieve the state from KVM. The HW state of the
>> sources, the KVM device and the thread interrupt contexts are
>> collected for the monitor usage and also migration.
>>
>> These get operations rely on their KVM counterpart in the host kernel
>> which acts as a proxy for OPAL, the host firmware. The set operations
>> will be added for migration support later.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>> ---
>>
>>  Changes since v2:
>>
>>  - removed the capture of the OS CAM line value from KVM
>>  - added xive_end_is_valid() check
>>
>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>  include/hw/ppc/xive.h       |  1 +
>>  hw/intc/spapr_xive.c        | 17 +++++---
>>  hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
>>  hw/intc/xive.c              | 10 +++++
>>  5 files changed, 116 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>> index 03685910e76b..7e49badd8c9a 100644
>> --- a/include/hw/ppc/spapr_xive.h
>> +++ b/include/hw/ppc/spapr_xive.h
>> @@ -44,6 +44,13 @@ typedef struct SpaprXive {
>>      void          *tm_mmap;
>>  } SpaprXive;
>>  
>> +/*
>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>> + * to the controller block id value. It can nevertheless be changed
>> + * for testing purpose.
>> + */
>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>> +
>>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
>>  bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
>>  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
>>  void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
>>                                   uint32_t end_idx, XiveEND *end,
>>                                   Error **errp);
>> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
>>  
>>  #endif /* PPC_SPAPR_XIVE_H */
>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>> index dd115da30ebc..78c919c4a590 100644
>> --- a/include/hw/ppc/xive.h
>> +++ b/include/hw/ppc/xive.h
>> @@ -435,5 +435,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>  
>>  #endif /* PPC_XIVE_H */
>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>> index cde2fd7c8997..4d07140f1078 100644
>> --- a/hw/intc/spapr_xive.c
>> +++ b/hw/intc/spapr_xive.c
>> @@ -40,13 +40,6 @@
>>  
>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>  
>> -/*
>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>> - * to the controller block id value. It can nevertheless be changed
>> - * for testing purpose.
>> - */
>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>> -
>>  /*
>>   * sPAPR NVT and END indexing helpers
>>   */
>> @@ -156,6 +149,16 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>>      XiveSource *xsrc = &xive->source;
>>      int i;
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return;
>> +        }
>> +    }
>> +
>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>  
>>      for (i = 0; i < xive->nr_irqs; i++) {
>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>> index 2714f8e4702e..2e46661cb3ad 100644
>> --- a/hw/intc/spapr_xive_kvm.c
>> +++ b/hw/intc/spapr_xive_kvm.c
>> @@ -60,6 +60,51 @@ static void kvm_cpu_enable(CPUState *cs)
>>  /*
>>   * XIVE Thread Interrupt Management context (KVM)
>>   */
>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    uint64_t state[2] = { 0 };
>> +    int ret;
>> +
>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
>> +    if (ret != 0) {
>> +        error_setg_errno(errp, errno,
>> +                         "XIVE: could not capture KVM state of CPU %ld",
>> +                         kvm_arch_vcpu_id(tctx->cs));
>> +        return;
>> +    }
>> +
>> +    /* word0 and word1 of the OS ring. */
>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>> +}
>> +
>> +typedef struct {
>> +    XiveTCTX *tctx;
>> +    Error *err;
>> +} XiveCpuGetState;
>> +
>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>> +                                                 run_on_cpu_data arg)
>> +{
>> +    XiveCpuGetState *s = arg.host_ptr;
>> +
>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>> +}
>> +
>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>> +{
>> +    XiveCpuGetState s = {
>> +        .tctx = tctx,
>> +        .err = NULL,
>> +    };
>> +
>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>> +               RUN_ON_CPU_HOST_PTR(&s));
> 
> I think I brought this up earlier, but I'm still confused.
> 
> Retreiving information with GET_ONE_REG doesn't usually need dancing
> around to run on a particular thread.  Why is it necessary here?

The thread can be scheduled, so we need to kick it to get the thread
interrupt context registers. This is very much like XICS getting the 
ICP state.

May be there is something I am not getting from your question ? I am
not an expert of the VCPU scheduling area either.

Thanks,

C.


>> +    if (s.err) {
>> +        error_propagate(errp, s.err);
>> +        return;
>> +    }
>> +}
>>  
>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
>>  {
>> @@ -227,6 +272,19 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
>>      }
>>  }
>>  
>> +static void kvmppc_xive_source_get_state(XiveSource *xsrc)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < xsrc->nr_irqs; i++) {
>> +        /* Perform a load without side effect to retrieve the PQ bits */
>> +        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
>> +
>> +        /* and save PQ locally */
>> +        xive_source_esb_set(xsrc, i, pq);
>> +    }
>> +}
>> +
>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
>>  {
>>      XiveSource *xsrc = opaque;
>> @@ -354,6 +412,35 @@ void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
>>                        NULL, true, errp);
>>  }
>>  
>> +static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
>> +{
>> +    Error *local_err = NULL;
>> +    int i;
>> +
>> +    for (i = 0; i < xive->nr_ends; i++) {
>> +        if (!xive_end_is_valid(&xive->endt[i])) {
>> +            continue;
>> +        }
>> +
>> +        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
>> +                                     &xive->endt[i], &local_err);
>> +        if (local_err) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
>> +    }
>> +}
>> +
>> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
>> +{
>> +    kvmppc_xive_source_get_state(&xive->source);
>> +
>> +    /* EAT: there is no extra state to query from KVM */
>> +
>> +    /* ENDT */
>> +    kvmppc_xive_get_queues(xive, errp);
>> +}
>> +
>>  static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
>>                                Error **errp)
>>  {
>> diff --git a/hw/intc/xive.c b/hw/intc/xive.c
>> index a0662fd33174..65cb772676a5 100644
>> --- a/hw/intc/xive.c
>> +++ b/hw/intc/xive.c
>> @@ -493,6 +493,16 @@ void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
>>      int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
>>      int i;
>>  
>> +    if (kvm_irqchip_in_kernel()) {
>> +        Error *local_err = NULL;
>> +
>> +        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
>> +        if (local_err) {
>> +            error_report_err(local_err);
>> +            return;
>> +        }
>> +    }
>> +
>>      monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
>>                     "  W2\n", cpu_index);
>>  
>
David Gibson March 26, 2019, 4:25 a.m. UTC | #3
On Fri, Mar 22, 2019 at 08:53:21AM +0100, Cédric Le Goater wrote:
> On 3/22/19 2:00 AM, David Gibson wrote:
> > On Thu, Mar 21, 2019 at 03:49:03PM +0100, Cédric Le Goater wrote:
> >> This extends the KVM XIVE device backend with 'synchronize_state'
> >> methods used to retrieve the state from KVM. The HW state of the
> >> sources, the KVM device and the thread interrupt contexts are
> >> collected for the monitor usage and also migration.
> >>
> >> These get operations rely on their KVM counterpart in the host kernel
> >> which acts as a proxy for OPAL, the host firmware. The set operations
> >> will be added for migration support later.
> >>
> >> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >> ---
> >>
> >>  Changes since v2:
> >>
> >>  - removed the capture of the OS CAM line value from KVM
> >>  - added xive_end_is_valid() check
> >>
> >>  include/hw/ppc/spapr_xive.h |  8 ++++
> >>  include/hw/ppc/xive.h       |  1 +
> >>  hw/intc/spapr_xive.c        | 17 +++++---
> >>  hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
> >>  hw/intc/xive.c              | 10 +++++
> >>  5 files changed, 116 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
> >> index 03685910e76b..7e49badd8c9a 100644
> >> --- a/include/hw/ppc/spapr_xive.h
> >> +++ b/include/hw/ppc/spapr_xive.h
> >> @@ -44,6 +44,13 @@ typedef struct SpaprXive {
> >>      void          *tm_mmap;
> >>  } SpaprXive;
> >>  
> >> +/*
> >> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >> + * to the controller block id value. It can nevertheless be changed
> >> + * for testing purpose.
> >> + */
> >> +#define SPAPR_XIVE_BLOCK_ID 0x0
> >> +
> >>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
> >>  bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
> >>  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
> >> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
> >>  void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
> >>                                   uint32_t end_idx, XiveEND *end,
> >>                                   Error **errp);
> >> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
> >>  
> >>  #endif /* PPC_SPAPR_XIVE_H */
> >> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
> >> index dd115da30ebc..78c919c4a590 100644
> >> --- a/include/hw/ppc/xive.h
> >> +++ b/include/hw/ppc/xive.h
> >> @@ -435,5 +435,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
> >>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
> >>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
> >>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
> >> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
> >>  
> >>  #endif /* PPC_XIVE_H */
> >> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
> >> index cde2fd7c8997..4d07140f1078 100644
> >> --- a/hw/intc/spapr_xive.c
> >> +++ b/hw/intc/spapr_xive.c
> >> @@ -40,13 +40,6 @@
> >>  
> >>  #define SPAPR_XIVE_NVT_BASE 0x400
> >>  
> >> -/*
> >> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
> >> - * to the controller block id value. It can nevertheless be changed
> >> - * for testing purpose.
> >> - */
> >> -#define SPAPR_XIVE_BLOCK_ID 0x0
> >> -
> >>  /*
> >>   * sPAPR NVT and END indexing helpers
> >>   */
> >> @@ -156,6 +149,16 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
> >>      XiveSource *xsrc = &xive->source;
> >>      int i;
> >>  
> >> +    if (kvm_irqchip_in_kernel()) {
> >> +        Error *local_err = NULL;
> >> +
> >> +        kvmppc_xive_synchronize_state(xive, &local_err);
> >> +        if (local_err) {
> >> +            error_report_err(local_err);
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
> >>  
> >>      for (i = 0; i < xive->nr_irqs; i++) {
> >> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> >> index 2714f8e4702e..2e46661cb3ad 100644
> >> --- a/hw/intc/spapr_xive_kvm.c
> >> +++ b/hw/intc/spapr_xive_kvm.c
> >> @@ -60,6 +60,51 @@ static void kvm_cpu_enable(CPUState *cs)
> >>  /*
> >>   * XIVE Thread Interrupt Management context (KVM)
> >>   */
> >> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    uint64_t state[2] = { 0 };
> >> +    int ret;
> >> +
> >> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
> >> +    if (ret != 0) {
> >> +        error_setg_errno(errp, errno,
> >> +                         "XIVE: could not capture KVM state of CPU %ld",
> >> +                         kvm_arch_vcpu_id(tctx->cs));
> >> +        return;
> >> +    }
> >> +
> >> +    /* word0 and word1 of the OS ring. */
> >> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
> >> +}
> >> +
> >> +typedef struct {
> >> +    XiveTCTX *tctx;
> >> +    Error *err;
> >> +} XiveCpuGetState;
> >> +
> >> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
> >> +                                                 run_on_cpu_data arg)
> >> +{
> >> +    XiveCpuGetState *s = arg.host_ptr;
> >> +
> >> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
> >> +}
> >> +
> >> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
> >> +{
> >> +    XiveCpuGetState s = {
> >> +        .tctx = tctx,
> >> +        .err = NULL,
> >> +    };
> >> +
> >> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
> >> +               RUN_ON_CPU_HOST_PTR(&s));
> > 
> > I think I brought this up earlier, but I'm still confused.
> > 
> > Retreiving information with GET_ONE_REG doesn't usually need dancing
> > around to run on a particular thread.  Why is it necessary here?
> 
> The thread can be scheduled, so we need to kick it to get the thread
> interrupt context registers. This is very much like XICS getting the 
> ICP state.
> 
> May be there is something I am not getting from your question ? I am
> not an expert of the VCPU scheduling area either.

Ah, right I see.  We need to make sure the vcpu is not running so we
don't get stale information.  Ok, I think I'll remember that for the
next review round, should be ok as it is.
Cédric Le Goater March 26, 2019, 7:07 a.m. UTC | #4
On 3/26/19 5:25 AM, David Gibson wrote:
> On Fri, Mar 22, 2019 at 08:53:21AM +0100, Cédric Le Goater wrote:
>> On 3/22/19 2:00 AM, David Gibson wrote:
>>> On Thu, Mar 21, 2019 at 03:49:03PM +0100, Cédric Le Goater wrote:
>>>> This extends the KVM XIVE device backend with 'synchronize_state'
>>>> methods used to retrieve the state from KVM. The HW state of the
>>>> sources, the KVM device and the thread interrupt contexts are
>>>> collected for the monitor usage and also migration.
>>>>
>>>> These get operations rely on their KVM counterpart in the host kernel
>>>> which acts as a proxy for OPAL, the host firmware. The set operations
>>>> will be added for migration support later.
>>>>
>>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>>> ---
>>>>
>>>>  Changes since v2:
>>>>
>>>>  - removed the capture of the OS CAM line value from KVM
>>>>  - added xive_end_is_valid() check
>>>>
>>>>  include/hw/ppc/spapr_xive.h |  8 ++++
>>>>  include/hw/ppc/xive.h       |  1 +
>>>>  hw/intc/spapr_xive.c        | 17 +++++---
>>>>  hw/intc/spapr_xive_kvm.c    | 87 +++++++++++++++++++++++++++++++++++++
>>>>  hw/intc/xive.c              | 10 +++++
>>>>  5 files changed, 116 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
>>>> index 03685910e76b..7e49badd8c9a 100644
>>>> --- a/include/hw/ppc/spapr_xive.h
>>>> +++ b/include/hw/ppc/spapr_xive.h
>>>> @@ -44,6 +44,13 @@ typedef struct SpaprXive {
>>>>      void          *tm_mmap;
>>>>  } SpaprXive;
>>>>  
>>>> +/*
>>>> + * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> + * to the controller block id value. It can nevertheless be changed
>>>> + * for testing purpose.
>>>> + */
>>>> +#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> +
>>>>  bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
>>>>  bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
>>>>  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
>>>> @@ -74,5 +81,6 @@ void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
>>>>  void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
>>>>                                   uint32_t end_idx, XiveEND *end,
>>>>                                   Error **errp);
>>>> +void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
>>>>  
>>>>  #endif /* PPC_SPAPR_XIVE_H */
>>>> diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
>>>> index dd115da30ebc..78c919c4a590 100644
>>>> --- a/include/hw/ppc/xive.h
>>>> +++ b/include/hw/ppc/xive.h
>>>> @@ -435,5 +435,6 @@ void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
>>>>  void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
>>>>  void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
>>>>  void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
>>>>  
>>>>  #endif /* PPC_XIVE_H */
>>>> diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
>>>> index cde2fd7c8997..4d07140f1078 100644
>>>> --- a/hw/intc/spapr_xive.c
>>>> +++ b/hw/intc/spapr_xive.c
>>>> @@ -40,13 +40,6 @@
>>>>  
>>>>  #define SPAPR_XIVE_NVT_BASE 0x400
>>>>  
>>>> -/*
>>>> - * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
>>>> - * to the controller block id value. It can nevertheless be changed
>>>> - * for testing purpose.
>>>> - */
>>>> -#define SPAPR_XIVE_BLOCK_ID 0x0
>>>> -
>>>>  /*
>>>>   * sPAPR NVT and END indexing helpers
>>>>   */
>>>> @@ -156,6 +149,16 @@ void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
>>>>      XiveSource *xsrc = &xive->source;
>>>>      int i;
>>>>  
>>>> +    if (kvm_irqchip_in_kernel()) {
>>>> +        Error *local_err = NULL;
>>>> +
>>>> +        kvmppc_xive_synchronize_state(xive, &local_err);
>>>> +        if (local_err) {
>>>> +            error_report_err(local_err);
>>>> +            return;
>>>> +        }
>>>> +    }
>>>> +
>>>>      monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
>>>>  
>>>>      for (i = 0; i < xive->nr_irqs; i++) {
>>>> diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
>>>> index 2714f8e4702e..2e46661cb3ad 100644
>>>> --- a/hw/intc/spapr_xive_kvm.c
>>>> +++ b/hw/intc/spapr_xive_kvm.c
>>>> @@ -60,6 +60,51 @@ static void kvm_cpu_enable(CPUState *cs)
>>>>  /*
>>>>   * XIVE Thread Interrupt Management context (KVM)
>>>>   */
>>>> +static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    uint64_t state[2] = { 0 };
>>>> +    int ret;
>>>> +
>>>> +    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
>>>> +    if (ret != 0) {
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "XIVE: could not capture KVM state of CPU %ld",
>>>> +                         kvm_arch_vcpu_id(tctx->cs));
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* word0 and word1 of the OS ring. */
>>>> +    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
>>>> +}
>>>> +
>>>> +typedef struct {
>>>> +    XiveTCTX *tctx;
>>>> +    Error *err;
>>>> +} XiveCpuGetState;
>>>> +
>>>> +static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
>>>> +                                                 run_on_cpu_data arg)
>>>> +{
>>>> +    XiveCpuGetState *s = arg.host_ptr;
>>>> +
>>>> +    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
>>>> +}
>>>> +
>>>> +void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
>>>> +{
>>>> +    XiveCpuGetState s = {
>>>> +        .tctx = tctx,
>>>> +        .err = NULL,
>>>> +    };
>>>> +
>>>> +    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
>>>> +               RUN_ON_CPU_HOST_PTR(&s));
>>>
>>> I think I brought this up earlier, but I'm still confused.
>>>
>>> Retreiving information with GET_ONE_REG doesn't usually need dancing
>>> around to run on a particular thread.  Why is it necessary here?
>>
>> The thread can be scheduled, so we need to kick it to get the thread
>> interrupt context registers. This is very much like XICS getting the 
>> ICP state.
>>
>> May be there is something I am not getting from your question ? I am
>> not an expert of the VCPU scheduling area either.
> 
> Ah, right I see.  We need to make sure the vcpu is not running so we
> don't get stale information.  Ok, I think I'll remember that for the
> next review round, should be ok as it is.

It's not only getting stale information, it can also result in a QEMU 
CPU stall.

I will add a comment for next around. 

Thanks,

C.
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr_xive.h b/include/hw/ppc/spapr_xive.h
index 03685910e76b..7e49badd8c9a 100644
--- a/include/hw/ppc/spapr_xive.h
+++ b/include/hw/ppc/spapr_xive.h
@@ -44,6 +44,13 @@  typedef struct SpaprXive {
     void          *tm_mmap;
 } SpaprXive;
 
+/*
+ * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
+ * to the controller block id value. It can nevertheless be changed
+ * for testing purpose.
+ */
+#define SPAPR_XIVE_BLOCK_ID 0x0
+
 bool spapr_xive_irq_claim(SpaprXive *xive, uint32_t lisn, bool lsi);
 bool spapr_xive_irq_free(SpaprXive *xive, uint32_t lisn);
 void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon);
@@ -74,5 +81,6 @@  void kvmppc_xive_set_queue_config(SpaprXive *xive, uint8_t end_blk,
 void kvmppc_xive_get_queue_config(SpaprXive *xive, uint8_t end_blk,
                                  uint32_t end_idx, XiveEND *end,
                                  Error **errp);
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp);
 
 #endif /* PPC_SPAPR_XIVE_H */
diff --git a/include/hw/ppc/xive.h b/include/hw/ppc/xive.h
index dd115da30ebc..78c919c4a590 100644
--- a/include/hw/ppc/xive.h
+++ b/include/hw/ppc/xive.h
@@ -435,5 +435,6 @@  void kvmppc_xive_source_reset_one(XiveSource *xsrc, int srcno, Error **errp);
 void kvmppc_xive_source_reset(XiveSource *xsrc, Error **errp);
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val);
 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp);
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp);
 
 #endif /* PPC_XIVE_H */
diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index cde2fd7c8997..4d07140f1078 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -40,13 +40,6 @@ 
 
 #define SPAPR_XIVE_NVT_BASE 0x400
 
-/*
- * The sPAPR machine has a unique XIVE IC device. Assign a fixed value
- * to the controller block id value. It can nevertheless be changed
- * for testing purpose.
- */
-#define SPAPR_XIVE_BLOCK_ID 0x0
-
 /*
  * sPAPR NVT and END indexing helpers
  */
@@ -156,6 +149,16 @@  void spapr_xive_pic_print_info(SpaprXive *xive, Monitor *mon)
     XiveSource *xsrc = &xive->source;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_synchronize_state(xive, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "  LSIN         PQ    EISN     CPU/PRIO EQ\n");
 
     for (i = 0; i < xive->nr_irqs; i++) {
diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
index 2714f8e4702e..2e46661cb3ad 100644
--- a/hw/intc/spapr_xive_kvm.c
+++ b/hw/intc/spapr_xive_kvm.c
@@ -60,6 +60,51 @@  static void kvm_cpu_enable(CPUState *cs)
 /*
  * XIVE Thread Interrupt Management context (KVM)
  */
+static void kvmppc_xive_cpu_get_state(XiveTCTX *tctx, Error **errp)
+{
+    uint64_t state[2] = { 0 };
+    int ret;
+
+    ret = kvm_get_one_reg(tctx->cs, KVM_REG_PPC_VP_STATE, state);
+    if (ret != 0) {
+        error_setg_errno(errp, errno,
+                         "XIVE: could not capture KVM state of CPU %ld",
+                         kvm_arch_vcpu_id(tctx->cs));
+        return;
+    }
+
+    /* word0 and word1 of the OS ring. */
+    *((uint64_t *) &tctx->regs[TM_QW1_OS]) = state[0];
+}
+
+typedef struct {
+    XiveTCTX *tctx;
+    Error *err;
+} XiveCpuGetState;
+
+static void kvmppc_xive_cpu_do_synchronize_state(CPUState *cpu,
+                                                 run_on_cpu_data arg)
+{
+    XiveCpuGetState *s = arg.host_ptr;
+
+    kvmppc_xive_cpu_get_state(s->tctx, &s->err);
+}
+
+void kvmppc_xive_cpu_synchronize_state(XiveTCTX *tctx, Error **errp)
+{
+    XiveCpuGetState s = {
+        .tctx = tctx,
+        .err = NULL,
+    };
+
+    run_on_cpu(tctx->cs, kvmppc_xive_cpu_do_synchronize_state,
+               RUN_ON_CPU_HOST_PTR(&s));
+
+    if (s.err) {
+        error_propagate(errp, s.err);
+        return;
+    }
+}
 
 void kvmppc_xive_cpu_connect(XiveTCTX *tctx, Error **errp)
 {
@@ -227,6 +272,19 @@  uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int srcno, uint32_t offset,
     }
 }
 
+static void kvmppc_xive_source_get_state(XiveSource *xsrc)
+{
+    int i;
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        /* Perform a load without side effect to retrieve the PQ bits */
+        uint8_t pq = xive_esb_read(xsrc, i, XIVE_ESB_GET);
+
+        /* and save PQ locally */
+        xive_source_esb_set(xsrc, i, pq);
+    }
+}
+
 void kvmppc_xive_source_set_irq(void *opaque, int srcno, int val)
 {
     XiveSource *xsrc = opaque;
@@ -354,6 +412,35 @@  void kvmppc_xive_reset(SpaprXive *xive, Error **errp)
                       NULL, true, errp);
 }
 
+static void kvmppc_xive_get_queues(SpaprXive *xive, Error **errp)
+{
+    Error *local_err = NULL;
+    int i;
+
+    for (i = 0; i < xive->nr_ends; i++) {
+        if (!xive_end_is_valid(&xive->endt[i])) {
+            continue;
+        }
+
+        kvmppc_xive_get_queue_config(xive, SPAPR_XIVE_BLOCK_ID, i,
+                                     &xive->endt[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+void kvmppc_xive_synchronize_state(SpaprXive *xive, Error **errp)
+{
+    kvmppc_xive_source_get_state(&xive->source);
+
+    /* EAT: there is no extra state to query from KVM */
+
+    /* ENDT */
+    kvmppc_xive_get_queues(xive, errp);
+}
+
 static void *kvmppc_xive_mmap(SpaprXive *xive, int pgoff, size_t len,
                               Error **errp)
 {
diff --git a/hw/intc/xive.c b/hw/intc/xive.c
index a0662fd33174..65cb772676a5 100644
--- a/hw/intc/xive.c
+++ b/hw/intc/xive.c
@@ -493,6 +493,16 @@  void xive_tctx_pic_print_info(XiveTCTX *tctx, Monitor *mon)
     int cpu_index = tctx->cs ? tctx->cs->cpu_index : -1;
     int i;
 
+    if (kvm_irqchip_in_kernel()) {
+        Error *local_err = NULL;
+
+        kvmppc_xive_cpu_synchronize_state(tctx, &local_err);
+        if (local_err) {
+            error_report_err(local_err);
+            return;
+        }
+    }
+
     monitor_printf(mon, "CPU[%04x]:   QW   NSR CPPR IPB LSMFB ACK# INC AGE PIPR"
                    "  W2\n", cpu_index);