diff mbox series

[v5,14/14] spapr: nested: Introduce cap-nested-papr for Nested PAPR API

Message ID 20240308111940.1617660-15-harshpb@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Nested PAPR API (KVM on PowerVM) | expand

Commit Message

Harsh Prateek Bora March 8, 2024, 11:19 a.m. UTC
Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
API for nested guests. This new API is to enable support for KVM on PowerVM
and the support in Linux kernel has already merged upstream.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
---
 include/hw/ppc/spapr.h |  6 +++-
 hw/ppc/spapr.c         |  2 ++
 hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
 hw/ppc/spapr_nested.c  |  8 ++++--
 4 files changed, 74 insertions(+), 4 deletions(-)

Comments

Nicholas Piggin March 12, 2024, 11:45 a.m. UTC | #1
On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
> API for nested guests. This new API is to enable support for KVM on PowerVM
> and the support in Linux kernel has already merged upstream.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> ---
>  include/hw/ppc/spapr.h |  6 +++-
>  hw/ppc/spapr.c         |  2 ++
>  hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_nested.c  |  8 ++++--
>  4 files changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 6223873641..4aaf23d28f 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -81,8 +81,10 @@ typedef enum {
>  #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>  /* Support for AIL modes */
>  #define SPAPR_CAP_AIL_MODE_3            0x0C
> +/* Nested PAPR */
> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>  
>  /*
>   * Capability Values
> @@ -592,6 +594,7 @@ struct SpaprMachineState {
>  #define H_GUEST_CREATE_VCPU      0x474
>  #define H_GUEST_GET_STATE        0x478
>  #define H_GUEST_SET_STATE        0x47C
> +#define H_GUEST_RUN_VCPU         0x480
>  #define H_GUEST_DELETE           0x488
>  
>  #define MAX_HCALL_OPCODE         H_GUEST_DELETE
> @@ -996,6 +999,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
>  extern const VMStateDescription vmstate_spapr_cap_ibs;
>  extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>  extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 54fc01e462..beb23fae8f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_cap_fwnmi,
>          &vmstate_spapr_fwnmi,
>          &vmstate_spapr_cap_rpt_invalidate,
> +        &vmstate_spapr_cap_nested_papr,
>          NULL
>      }
>  };
> @@ -4687,6 +4688,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index e889244e52..d6d5a6b8df 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -487,6 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>              error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
>                                      "or use threads=1 with -smp\n");
>          }
> +        if (spapr_nested_api(spapr) &&
> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> +                                    "cap-nested-papr to proceed.\n");
> +            return;
> +        } else {
> +            spapr->nested.api = NESTED_API_KVM_HV;
> +        }
> +    }
> +}
> +
> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
> +                                    uint8_t val, Error **errp)
> +{
> +    ERRP_GUARD();
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (!val) {
> +        /* capability disabled by default */
> +        return;
> +    }
> +
> +    if (tcg_enabled()) {
> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
> +            error_append_hint(errp,
> +                              "Try appending -machine cap-nested-papr=off\n");
> +            return;
> +        }
> +        if (spapr_nested_api(spapr) &&
> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> +                                    "cap-nested-papr to proceed.\n");
> +            return;
> +        } else {
> +            spapr->nested.api = NESTED_API_PAPR;
> +        }
> +
> +    } else if (kvm_enabled()) {
> +        /*
> +         * this gets executed in L1 qemu when L2 is launched,
> +         * needs kvm-hv support in L1 kernel.
> +         */
> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
> +            error_setg(errp,
> +                       "KVM implementation does not support Nested-HV");
> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
> +            error_setg(errp, "Error enabling Nested-HV with KVM");
> +        }

Hmm, if KVM is enabled then we don't support PAPR nested at all, do we?
Because the KVM cap is only for v1 nested.

Thanks,
Nick

>      }
>  }
>  
> @@ -735,6 +787,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_nested_kvm_hv_apply,
>      },
> +    [SPAPR_CAP_NESTED_PAPR] = {
> +        .name = "nested-papr",
> +        .description = "Allow Nested HV (PAPR API)",
> +        .index = SPAPR_CAP_NESTED_PAPR,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_nested_papr_apply,
> +    },
>      [SPAPR_CAP_LARGE_DECREMENTER] = {
>          .name = "large-decr",
>          .description = "Allow Large Decrementer",
> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>  SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>  SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
> index 597dba7fdc..8db9dc19e3 100644
> --- a/hw/ppc/spapr_nested.c
> +++ b/hw/ppc/spapr_nested.c
> @@ -13,13 +13,15 @@
>  void spapr_nested_reset(SpaprMachineState *spapr)
>  {
>      if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
> -        spapr->nested.api = NESTED_API_KVM_HV;
>          spapr_unregister_nested_hv();
>          spapr_register_nested_hv();
> -    } else {
> -        spapr->nested.api = 0;
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>          spapr->nested.capabilities_set = false;
> +        spapr_unregister_nested_papr();
> +        spapr_register_nested_papr();
>          spapr_nested_gsb_init();
> +    } else {
> +        spapr->nested.api = 0;
>      }
>  }
>
Nicholas Piggin March 12, 2024, 11:51 a.m. UTC | #2
On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
> API for nested guests. This new API is to enable support for KVM on PowerVM
> and the support in Linux kernel has already merged upstream.
>
> Signed-off-by: Michael Neuling <mikey@neuling.org>
> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> ---
>  include/hw/ppc/spapr.h |  6 +++-
>  hw/ppc/spapr.c         |  2 ++
>  hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
>  hw/ppc/spapr_nested.c  |  8 ++++--
>  4 files changed, 74 insertions(+), 4 deletions(-)
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 6223873641..4aaf23d28f 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -81,8 +81,10 @@ typedef enum {
>  #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>  /* Support for AIL modes */
>  #define SPAPR_CAP_AIL_MODE_3            0x0C
> +/* Nested PAPR */
> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>  
>  /*
>   * Capability Values
> @@ -592,6 +594,7 @@ struct SpaprMachineState {
>  #define H_GUEST_CREATE_VCPU      0x474
>  #define H_GUEST_GET_STATE        0x478
>  #define H_GUEST_SET_STATE        0x47C
> +#define H_GUEST_RUN_VCPU         0x480
>  #define H_GUEST_DELETE           0x488
>  
>  #define MAX_HCALL_OPCODE         H_GUEST_DELETE
> @@ -996,6 +999,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
>  extern const VMStateDescription vmstate_spapr_cap_ibs;
>  extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>  extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>  extern const VMStateDescription vmstate_spapr_cap_large_decr;
>  extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>  extern const VMStateDescription vmstate_spapr_cap_fwnmi;
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 54fc01e462..beb23fae8f 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_cap_fwnmi,
>          &vmstate_spapr_fwnmi,
>          &vmstate_spapr_cap_rpt_invalidate,
> +        &vmstate_spapr_cap_nested_papr,
>          NULL
>      }
>  };
> @@ -4687,6 +4688,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>      smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index e889244e52..d6d5a6b8df 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -487,6 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>              error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
>                                      "or use threads=1 with -smp\n");
>          }
> +        if (spapr_nested_api(spapr) &&
> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> +                                    "cap-nested-papr to proceed.\n");
> +            return;
> +        } else {
> +            spapr->nested.api = NESTED_API_KVM_HV;
> +        }
> +    }
> +}
> +
> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
> +                                    uint8_t val, Error **errp)
> +{
> +    ERRP_GUARD();
> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (!val) {
> +        /* capability disabled by default */
> +        return;
> +    }
> +
> +    if (tcg_enabled()) {
> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
> +            error_append_hint(errp,
> +                              "Try appending -machine cap-nested-papr=off\n");
> +            return;
> +        }
> +        if (spapr_nested_api(spapr) &&
> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> +                                    "cap-nested-papr to proceed.\n");
> +            return;
> +        } else {
> +            spapr->nested.api = NESTED_API_PAPR;
> +        }
> +
> +    } else if (kvm_enabled()) {
> +        /*
> +         * this gets executed in L1 qemu when L2 is launched,
> +         * needs kvm-hv support in L1 kernel.
> +         */
> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
> +            error_setg(errp,
> +                       "KVM implementation does not support Nested-HV");
> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
> +            error_setg(errp, "Error enabling Nested-HV with KVM");
> +        }

I'll just disable this on KVM for now. With that changed,

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

>      }
>  }
>  
> @@ -735,6 +787,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_nested_kvm_hv_apply,
>      },
> +    [SPAPR_CAP_NESTED_PAPR] = {
> +        .name = "nested-papr",
> +        .description = "Allow Nested HV (PAPR API)",
> +        .index = SPAPR_CAP_NESTED_PAPR,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_nested_papr_apply,
> +    },
>      [SPAPR_CAP_LARGE_DECREMENTER] = {
>          .name = "large-decr",
>          .description = "Allow Large Decrementer",
> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>  SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>  SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>  SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
> index 597dba7fdc..8db9dc19e3 100644
> --- a/hw/ppc/spapr_nested.c
> +++ b/hw/ppc/spapr_nested.c
> @@ -13,13 +13,15 @@
>  void spapr_nested_reset(SpaprMachineState *spapr)
>  {
>      if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
> -        spapr->nested.api = NESTED_API_KVM_HV;
>          spapr_unregister_nested_hv();
>          spapr_register_nested_hv();
> -    } else {
> -        spapr->nested.api = 0;
> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>          spapr->nested.capabilities_set = false;
> +        spapr_unregister_nested_papr();
> +        spapr_register_nested_papr();
>          spapr_nested_gsb_init();
> +    } else {
> +        spapr->nested.api = 0;
>      }
>  }
>
Harsh Prateek Bora March 12, 2024, 12:11 p.m. UTC | #3
Hi Nick,

On 3/12/24 17:21, Nicholas Piggin wrote:
> On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
>> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
>> API for nested guests. This new API is to enable support for KVM on PowerVM
>> and the support in Linux kernel has already merged upstream.
>>
>> Signed-off-by: Michael Neuling <mikey@neuling.org>
>> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>> ---
>>   include/hw/ppc/spapr.h |  6 +++-
>>   hw/ppc/spapr.c         |  2 ++
>>   hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
>>   hw/ppc/spapr_nested.c  |  8 ++++--
>>   4 files changed, 74 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 6223873641..4aaf23d28f 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -81,8 +81,10 @@ typedef enum {
>>   #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>>   /* Support for AIL modes */
>>   #define SPAPR_CAP_AIL_MODE_3            0x0C
>> +/* Nested PAPR */
>> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>>   /* Num Caps */
>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>>   
>>   /*
>>    * Capability Values
>> @@ -592,6 +594,7 @@ struct SpaprMachineState {
>>   #define H_GUEST_CREATE_VCPU      0x474
>>   #define H_GUEST_GET_STATE        0x478
>>   #define H_GUEST_SET_STATE        0x47C
>> +#define H_GUEST_RUN_VCPU         0x480
>>   #define H_GUEST_DELETE           0x488
>>   
>>   #define MAX_HCALL_OPCODE         H_GUEST_DELETE
>> @@ -996,6 +999,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
>>   extern const VMStateDescription vmstate_spapr_cap_ibs;
>>   extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>>   extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>>   extern const VMStateDescription vmstate_spapr_cap_large_decr;
>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 54fc01e462..beb23fae8f 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
>>           &vmstate_spapr_cap_fwnmi,
>>           &vmstate_spapr_fwnmi,
>>           &vmstate_spapr_cap_rpt_invalidate,
>> +        &vmstate_spapr_cap_nested_papr,
>>           NULL
>>       }
>>   };
>> @@ -4687,6 +4688,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>       smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>>       smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
>>       smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>>       smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>       smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>>       smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>> index e889244e52..d6d5a6b8df 100644
>> --- a/hw/ppc/spapr_caps.c
>> +++ b/hw/ppc/spapr_caps.c
>> @@ -487,6 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>>               error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
>>                                       "or use threads=1 with -smp\n");
>>           }
>> +        if (spapr_nested_api(spapr) &&
>> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
>> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
>> +            error_append_hint(errp, "Please use either cap-nested-hv or "
>> +                                    "cap-nested-papr to proceed.\n");
>> +            return;
>> +        } else {
>> +            spapr->nested.api = NESTED_API_KVM_HV;
>> +        }
>> +    }
>> +}
>> +
>> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
>> +                                    uint8_t val, Error **errp)
>> +{
>> +    ERRP_GUARD();
>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>> +    CPUPPCState *env = &cpu->env;
>> +
>> +    if (!val) {
>> +        /* capability disabled by default */
>> +        return;
>> +    }
>> +
>> +    if (tcg_enabled()) {
>> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
>> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
>> +            error_append_hint(errp,
>> +                              "Try appending -machine cap-nested-papr=off\n");
>> +            return;
>> +        }
>> +        if (spapr_nested_api(spapr) &&
>> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
>> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
>> +            error_append_hint(errp, "Please use either cap-nested-hv or "
>> +                                    "cap-nested-papr to proceed.\n");
>> +            return;
>> +        } else {
>> +            spapr->nested.api = NESTED_API_PAPR;
>> +        }
>> +
>> +    } else if (kvm_enabled()) {
>> +        /*
>> +         * this gets executed in L1 qemu when L2 is launched,
>> +         * needs kvm-hv support in L1 kernel.
>> +         */
>> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
>> +            error_setg(errp,
>> +                       "KVM implementation does not support Nested-HV");
>> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
>> +            error_setg(errp, "Error enabling Nested-HV with KVM");
>> +        }
> 
> I'll just disable this on KVM for now. With that changed,
> 
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> 

AFAIK, v2 api also expects this capability to be enabled on L1 kernel.
I guess the reason is the L1 implementation has used the same capab and
extended to be used with v2 api. So, this check is needed in L1 Qemu for
now. We may revisit L1 implementation later to see if a change is
appropriate.

regards,
Harsh

>>       }
>>   }
>>   
>> @@ -735,6 +787,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>           .type = "bool",
>>           .apply = cap_nested_kvm_hv_apply,
>>       },
>> +    [SPAPR_CAP_NESTED_PAPR] = {
>> +        .name = "nested-papr",
>> +        .description = "Allow Nested HV (PAPR API)",
>> +        .index = SPAPR_CAP_NESTED_PAPR,
>> +        .get = spapr_cap_get_bool,
>> +        .set = spapr_cap_set_bool,
>> +        .type = "bool",
>> +        .apply = cap_nested_papr_apply,
>> +    },
>>       [SPAPR_CAP_LARGE_DECREMENTER] = {
>>           .name = "large-decr",
>>           .description = "Allow Large Decrementer",
>> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>>   SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>>   SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>   SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>>   SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>   SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>   SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
>> index 597dba7fdc..8db9dc19e3 100644
>> --- a/hw/ppc/spapr_nested.c
>> +++ b/hw/ppc/spapr_nested.c
>> @@ -13,13 +13,15 @@
>>   void spapr_nested_reset(SpaprMachineState *spapr)
>>   {
>>       if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
>> -        spapr->nested.api = NESTED_API_KVM_HV;
>>           spapr_unregister_nested_hv();
>>           spapr_register_nested_hv();
>> -    } else {
>> -        spapr->nested.api = 0;
>> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>>           spapr->nested.capabilities_set = false;
>> +        spapr_unregister_nested_papr();
>> +        spapr_register_nested_papr();
>>           spapr_nested_gsb_init();
>> +    } else {
>> +        spapr->nested.api = 0;
>>       }
>>   }
>>   
>
Nicholas Piggin March 12, 2024, 12:46 p.m. UTC | #4
On Tue Mar 12, 2024 at 10:11 PM AEST, Harsh Prateek Bora wrote:
> Hi Nick,
>
> On 3/12/24 17:21, Nicholas Piggin wrote:
> > On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
> >> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
> >> API for nested guests. This new API is to enable support for KVM on PowerVM
> >> and the support in Linux kernel has already merged upstream.
> >>
> >> Signed-off-by: Michael Neuling <mikey@neuling.org>
> >> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
> >> ---
> >>   include/hw/ppc/spapr.h |  6 +++-
> >>   hw/ppc/spapr.c         |  2 ++
> >>   hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
> >>   hw/ppc/spapr_nested.c  |  8 ++++--
> >>   4 files changed, 74 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index 6223873641..4aaf23d28f 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -81,8 +81,10 @@ typedef enum {
> >>   #define SPAPR_CAP_RPT_INVALIDATE        0x0B
> >>   /* Support for AIL modes */
> >>   #define SPAPR_CAP_AIL_MODE_3            0x0C
> >> +/* Nested PAPR */
> >> +#define SPAPR_CAP_NESTED_PAPR           0x0D
> >>   /* Num Caps */
> >> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
> >> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
> >>   
> >>   /*
> >>    * Capability Values
> >> @@ -592,6 +594,7 @@ struct SpaprMachineState {
> >>   #define H_GUEST_CREATE_VCPU      0x474
> >>   #define H_GUEST_GET_STATE        0x478
> >>   #define H_GUEST_SET_STATE        0x47C
> >> +#define H_GUEST_RUN_VCPU         0x480
> >>   #define H_GUEST_DELETE           0x488
> >>   
> >>   #define MAX_HCALL_OPCODE         H_GUEST_DELETE
> >> @@ -996,6 +999,7 @@ extern const VMStateDescription vmstate_spapr_cap_sbbc;
> >>   extern const VMStateDescription vmstate_spapr_cap_ibs;
> >>   extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
> >>   extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
> >> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
> >>   extern const VMStateDescription vmstate_spapr_cap_large_decr;
> >>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
> >>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 54fc01e462..beb23fae8f 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
> >>           &vmstate_spapr_cap_fwnmi,
> >>           &vmstate_spapr_fwnmi,
> >>           &vmstate_spapr_cap_rpt_invalidate,
> >> +        &vmstate_spapr_cap_nested_papr,
> >>           NULL
> >>       }
> >>   };
> >> @@ -4687,6 +4688,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>       smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
> >>       smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
> >>       smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
> >>       smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>       smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
> >>       smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
> >> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >> index e889244e52..d6d5a6b8df 100644
> >> --- a/hw/ppc/spapr_caps.c
> >> +++ b/hw/ppc/spapr_caps.c
> >> @@ -487,6 +487,58 @@ static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
> >>               error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
> >>                                       "or use threads=1 with -smp\n");
> >>           }
> >> +        if (spapr_nested_api(spapr) &&
> >> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
> >> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> >> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> >> +                                    "cap-nested-papr to proceed.\n");
> >> +            return;
> >> +        } else {
> >> +            spapr->nested.api = NESTED_API_KVM_HV;
> >> +        }
> >> +    }
> >> +}
> >> +
> >> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
> >> +                                    uint8_t val, Error **errp)
> >> +{
> >> +    ERRP_GUARD();
> >> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
> >> +    CPUPPCState *env = &cpu->env;
> >> +
> >> +    if (!val) {
> >> +        /* capability disabled by default */
> >> +        return;
> >> +    }
> >> +
> >> +    if (tcg_enabled()) {
> >> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
> >> +            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
> >> +            error_append_hint(errp,
> >> +                              "Try appending -machine cap-nested-papr=off\n");
> >> +            return;
> >> +        }
> >> +        if (spapr_nested_api(spapr) &&
> >> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
> >> +            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
> >> +            error_append_hint(errp, "Please use either cap-nested-hv or "
> >> +                                    "cap-nested-papr to proceed.\n");
> >> +            return;
> >> +        } else {
> >> +            spapr->nested.api = NESTED_API_PAPR;
> >> +        }
> >> +
> >> +    } else if (kvm_enabled()) {
> >> +        /*
> >> +         * this gets executed in L1 qemu when L2 is launched,
> >> +         * needs kvm-hv support in L1 kernel.
> >> +         */
> >> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
> >> +            error_setg(errp,
> >> +                       "KVM implementation does not support Nested-HV");
> >> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
> >> +            error_setg(errp, "Error enabling Nested-HV with KVM");
> >> +        }
> > 
> > I'll just disable this on KVM for now. With that changed,
> > 
> > Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> > 
>
> AFAIK, v2 api also expects this capability to be enabled on L1 kernel.
> I guess the reason is the L1 implementation has used the same capab and
> extended to be used with v2 api. So, this check is needed in L1 Qemu for
> now. We may revisit L1 implementation later to see if a change is
> appropriate.

The capability is what the pseries machine provides its software. So,
can the OS running under QEMU use the hypervisor v1 or v2 hypercalls and
run KVM. KVM does not implement the PAPR API (yet) so it does not
support this cap.

KVM can *consume* the PAPR API when it's running as an L1 on top of an
L0 that provides it. That side of it is queried via the hcalls.

Thanks,
Nick
Harsh Prateek Bora March 12, 2024, 12:47 p.m. UTC | #5
Hi Nick,

On 3/12/24 17:41, Harsh Prateek Bora wrote:
> Hi Nick,
> 
> On 3/12/24 17:21, Nicholas Piggin wrote:
>> On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
>>> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
>>> API for nested guests. This new API is to enable support for KVM on 
>>> PowerVM
>>> and the support in Linux kernel has already merged upstream.
>>>
>>> Signed-off-by: Michael Neuling <mikey@neuling.org>
>>> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>>> ---
>>>   include/hw/ppc/spapr.h |  6 +++-
>>>   hw/ppc/spapr.c         |  2 ++
>>>   hw/ppc/spapr_caps.c    | 62 ++++++++++++++++++++++++++++++++++++++++++
>>>   hw/ppc/spapr_nested.c  |  8 ++++--
>>>   4 files changed, 74 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>> index 6223873641..4aaf23d28f 100644
>>> --- a/include/hw/ppc/spapr.h
>>> +++ b/include/hw/ppc/spapr.h
>>> @@ -81,8 +81,10 @@ typedef enum {
>>>   #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>>>   /* Support for AIL modes */
>>>   #define SPAPR_CAP_AIL_MODE_3            0x0C
>>> +/* Nested PAPR */
>>> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>>>   /* Num Caps */
>>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
>>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>>>   /*
>>>    * Capability Values
>>> @@ -592,6 +594,7 @@ struct SpaprMachineState {
>>>   #define H_GUEST_CREATE_VCPU      0x474
>>>   #define H_GUEST_GET_STATE        0x478
>>>   #define H_GUEST_SET_STATE        0x47C
>>> +#define H_GUEST_RUN_VCPU         0x480
>>>   #define H_GUEST_DELETE           0x488
>>>   #define MAX_HCALL_OPCODE         H_GUEST_DELETE
>>> @@ -996,6 +999,7 @@ extern const VMStateDescription 
>>> vmstate_spapr_cap_sbbc;
>>>   extern const VMStateDescription vmstate_spapr_cap_ibs;
>>>   extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>>>   extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>>> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>>>   extern const VMStateDescription vmstate_spapr_cap_large_decr;
>>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>> index 54fc01e462..beb23fae8f 100644
>>> --- a/hw/ppc/spapr.c
>>> +++ b/hw/ppc/spapr.c
>>> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
>>>           &vmstate_spapr_cap_fwnmi,
>>>           &vmstate_spapr_fwnmi,
>>>           &vmstate_spapr_cap_rpt_invalidate,
>>> +        &vmstate_spapr_cap_nested_papr,
>>>           NULL
>>>       }
>>>   };
>>> @@ -4687,6 +4688,7 @@ static void 
>>> spapr_machine_class_init(ObjectClass *oc, void *data)
>>>       smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>>>       smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 
>>> 64kiB */
>>>       smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>>>       smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 
>>> SPAPR_CAP_ON;
>>>       smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>>>       smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>> index e889244e52..d6d5a6b8df 100644
>>> --- a/hw/ppc/spapr_caps.c
>>> +++ b/hw/ppc/spapr_caps.c
>>> @@ -487,6 +487,58 @@ static void 
>>> cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>>>               error_append_hint(errp, "Try appending -machine 
>>> cap-nested-hv=off "
>>>                                       "or use threads=1 with -smp\n");
>>>           }
>>> +        if (spapr_nested_api(spapr) &&
>>> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
>>> +            error_setg(errp, "Nested-HV APIs are mutually 
>>> exclusive/incompatible");
>>> +            error_append_hint(errp, "Please use either cap-nested-hv 
>>> or "
>>> +                                    "cap-nested-papr to proceed.\n");
>>> +            return;
>>> +        } else {
>>> +            spapr->nested.api = NESTED_API_KVM_HV;
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
>>> +                                    uint8_t val, Error **errp)
>>> +{
>>> +    ERRP_GUARD();
>>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>>> +    CPUPPCState *env = &cpu->env;
>>> +
>>> +    if (!val) {
>>> +        /* capability disabled by default */
>>> +        return;
>>> +    }
>>> +
>>> +    if (tcg_enabled()) {
>>> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
>>> +            error_setg(errp, "Nested-PAPR only supported on POWER9 
>>> and later");
>>> +            error_append_hint(errp,
>>> +                              "Try appending -machine 
>>> cap-nested-papr=off\n");
>>> +            return;
>>> +        }
>>> +        if (spapr_nested_api(spapr) &&
>>> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
>>> +            error_setg(errp, "Nested-HV APIs are mutually 
>>> exclusive/incompatible");
>>> +            error_append_hint(errp, "Please use either cap-nested-hv 
>>> or "
>>> +                                    "cap-nested-papr to proceed.\n");
>>> +            return;
>>> +        } else {
>>> +            spapr->nested.api = NESTED_API_PAPR;
>>> +        }
>>> +
>>> +    } else if (kvm_enabled()) {
>>> +        /*
>>> +         * this gets executed in L1 qemu when L2 is launched,
>>> +         * needs kvm-hv support in L1 kernel.
>>> +         */
>>> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
>>> +            error_setg(errp,
>>> +                       "KVM implementation does not support 
>>> Nested-HV");
>>> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
>>> +            error_setg(errp, "Error enabling Nested-HV with KVM");
>>> +        }
>>
>> I'll just disable this on KVM for now. With that changed,
>>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>
> 
> AFAIK, v2 api also expects this capability to be enabled on L1 kernel.
> I guess the reason is the L1 implementation has used the same capab and
> extended to be used with v2 api. So, this check is needed in L1 Qemu for
> now. We may revisit L1 implementation later to see if a change is
> appropriate.

Please ignore above response. I think my observation was based on older 
version of L1 implementation. This doesnt seem to be an issue with 
upstream L1. You may disable the kvm_enabled() path for now. I just 
tested and it works fine.

regards,
Harsh

> 
> regards,
> Harsh
> 
>>>       }
>>>   }
>>> @@ -735,6 +787,15 @@ SpaprCapabilityInfo 
>>> capability_table[SPAPR_CAP_NUM] = {
>>>           .type = "bool",
>>>           .apply = cap_nested_kvm_hv_apply,
>>>       },
>>> +    [SPAPR_CAP_NESTED_PAPR] = {
>>> +        .name = "nested-papr",
>>> +        .description = "Allow Nested HV (PAPR API)",
>>> +        .index = SPAPR_CAP_NESTED_PAPR,
>>> +        .get = spapr_cap_get_bool,
>>> +        .set = spapr_cap_set_bool,
>>> +        .type = "bool",
>>> +        .apply = cap_nested_papr_apply,
>>> +    },
>>>       [SPAPR_CAP_LARGE_DECREMENTER] = {
>>>           .name = "large-decr",
>>>           .description = "Allow Large Decrementer",
>>> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>>>   SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>>>   SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>>   SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>>>   SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>>   SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>>   SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>>> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
>>> index 597dba7fdc..8db9dc19e3 100644
>>> --- a/hw/ppc/spapr_nested.c
>>> +++ b/hw/ppc/spapr_nested.c
>>> @@ -13,13 +13,15 @@
>>>   void spapr_nested_reset(SpaprMachineState *spapr)
>>>   {
>>>       if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
>>> -        spapr->nested.api = NESTED_API_KVM_HV;
>>>           spapr_unregister_nested_hv();
>>>           spapr_register_nested_hv();
>>> -    } else {
>>> -        spapr->nested.api = 0;
>>> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>>>           spapr->nested.capabilities_set = false;
>>> +        spapr_unregister_nested_papr();
>>> +        spapr_register_nested_papr();
>>>           spapr_nested_gsb_init();
>>> +    } else {
>>> +        spapr->nested.api = 0;
>>>       }
>>>   }
>>
Harsh Prateek Bora March 12, 2024, 12:51 p.m. UTC | #6
On 3/12/24 18:17, Harsh Prateek Bora wrote:
> Hi Nick,
> 
> On 3/12/24 17:41, Harsh Prateek Bora wrote:
>> Hi Nick,
>>
>> On 3/12/24 17:21, Nicholas Piggin wrote:
>>> On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
>>>> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
>>>> API for nested guests. This new API is to enable support for KVM on 
>>>> PowerVM
>>>> and the support in Linux kernel has already merged upstream.
>>>>
>>>> Signed-off-by: Michael Neuling <mikey@neuling.org>
>>>> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>>>> ---
>>>>   include/hw/ppc/spapr.h |  6 +++-
>>>>   hw/ppc/spapr.c         |  2 ++
>>>>   hw/ppc/spapr_caps.c    | 62 
>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>   hw/ppc/spapr_nested.c  |  8 ++++--
>>>>   4 files changed, 74 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 6223873641..4aaf23d28f 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -81,8 +81,10 @@ typedef enum {
>>>>   #define SPAPR_CAP_RPT_INVALIDATE        0x0B
>>>>   /* Support for AIL modes */
>>>>   #define SPAPR_CAP_AIL_MODE_3            0x0C
>>>> +/* Nested PAPR */
>>>> +#define SPAPR_CAP_NESTED_PAPR           0x0D
>>>>   /* Num Caps */
>>>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
>>>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
>>>>   /*
>>>>    * Capability Values
>>>> @@ -592,6 +594,7 @@ struct SpaprMachineState {
>>>>   #define H_GUEST_CREATE_VCPU      0x474
>>>>   #define H_GUEST_GET_STATE        0x478
>>>>   #define H_GUEST_SET_STATE        0x47C
>>>> +#define H_GUEST_RUN_VCPU         0x480
>>>>   #define H_GUEST_DELETE           0x488
>>>>   #define MAX_HCALL_OPCODE         H_GUEST_DELETE
>>>> @@ -996,6 +999,7 @@ extern const VMStateDescription 
>>>> vmstate_spapr_cap_sbbc;
>>>>   extern const VMStateDescription vmstate_spapr_cap_ibs;
>>>>   extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
>>>>   extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
>>>> +extern const VMStateDescription vmstate_spapr_cap_nested_papr;
>>>>   extern const VMStateDescription vmstate_spapr_cap_large_decr;
>>>>   extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
>>>>   extern const VMStateDescription vmstate_spapr_cap_fwnmi;
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index 54fc01e462..beb23fae8f 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -2121,6 +2121,7 @@ static const VMStateDescription vmstate_spapr = {
>>>>           &vmstate_spapr_cap_fwnmi,
>>>>           &vmstate_spapr_fwnmi,
>>>>           &vmstate_spapr_cap_rpt_invalidate,
>>>> +        &vmstate_spapr_cap_nested_papr,
>>>>           NULL
>>>>       }
>>>>   };
>>>> @@ -4687,6 +4688,7 @@ static void 
>>>> spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>       smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
>>>>       smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 
>>>> 64kiB */
>>>>       smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
>>>>       smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = 
>>>> SPAPR_CAP_ON;
>>>>       smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
>>>>       smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>> index e889244e52..d6d5a6b8df 100644
>>>> --- a/hw/ppc/spapr_caps.c
>>>> +++ b/hw/ppc/spapr_caps.c
>>>> @@ -487,6 +487,58 @@ static void 
>>>> cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
>>>>               error_append_hint(errp, "Try appending -machine 
>>>> cap-nested-hv=off "
>>>>                                       "or use threads=1 with -smp\n");
>>>>           }
>>>> +        if (spapr_nested_api(spapr) &&
>>>> +            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
>>>> +            error_setg(errp, "Nested-HV APIs are mutually 
>>>> exclusive/incompatible");
>>>> +            error_append_hint(errp, "Please use either 
>>>> cap-nested-hv or "
>>>> +                                    "cap-nested-papr to proceed.\n");
>>>> +            return;
>>>> +        } else {
>>>> +            spapr->nested.api = NESTED_API_KVM_HV;
>>>> +        }
>>>> +    }
>>>> +}
>>>> +
>>>> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
>>>> +                                    uint8_t val, Error **errp)
>>>> +{
>>>> +    ERRP_GUARD();
>>>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>>>> +    CPUPPCState *env = &cpu->env;
>>>> +
>>>> +    if (!val) {
>>>> +        /* capability disabled by default */
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (tcg_enabled()) {
>>>> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
>>>> +            error_setg(errp, "Nested-PAPR only supported on POWER9 
>>>> and later");
>>>> +            error_append_hint(errp,
>>>> +                              "Try appending -machine 
>>>> cap-nested-papr=off\n");
>>>> +            return;
>>>> +        }
>>>> +        if (spapr_nested_api(spapr) &&
>>>> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
>>>> +            error_setg(errp, "Nested-HV APIs are mutually 
>>>> exclusive/incompatible");
>>>> +            error_append_hint(errp, "Please use either 
>>>> cap-nested-hv or "
>>>> +                                    "cap-nested-papr to proceed.\n");
>>>> +            return;
>>>> +        } else {
>>>> +            spapr->nested.api = NESTED_API_PAPR;
>>>> +        }
>>>> +
>>>> +    } else if (kvm_enabled()) {
>>>> +        /*
>>>> +         * this gets executed in L1 qemu when L2 is launched,
>>>> +         * needs kvm-hv support in L1 kernel.
>>>> +         */
>>>> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
>>>> +            error_setg(errp,
>>>> +                       "KVM implementation does not support 
>>>> Nested-HV");
>>>> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
>>>> +            error_setg(errp, "Error enabling Nested-HV with KVM");
>>>> +        }
>>>
>>> I'll just disable this on KVM for now. With that changed,
>>>
>>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>>
>>
>> AFAIK, v2 api also expects this capability to be enabled on L1 kernel.
>> I guess the reason is the L1 implementation has used the same capab and
>> extended to be used with v2 api. So, this check is needed in L1 Qemu for
>> now. We may revisit L1 implementation later to see if a change is
>> appropriate.
> 
> Please ignore above response. I think my observation was based on older 
> version of L1 implementation. This doesnt seem to be an issue with 
> upstream L1. You may disable the kvm_enabled() path for now. I just 
> tested and it works fine.

Here's the incremental fix:

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index d6d5a6b8df..c14fdd32f5 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -528,17 +528,6 @@ static void cap_nested_papr_apply(SpaprMachineState 
*spapr,
              spapr->nested.api = NESTED_API_PAPR;
          }

-    } else if (kvm_enabled()) {
-        /*
-         * this gets executed in L1 qemu when L2 is launched,
-         * needs kvm-hv support in L1 kernel.
-         */
-        if (!kvmppc_has_cap_nested_kvm_hv()) {
-            error_setg(errp,
-                       "KVM implementation does not support Nested-HV");
-        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
-            error_setg(errp, "Error enabling Nested-HV with KVM");
-        }
      }
  }

regards,
Harsh
> 
> regards,
> Harsh
> 
>>
>> regards,
>> Harsh
>>
>>>>       }
>>>>   }
>>>> @@ -735,6 +787,15 @@ SpaprCapabilityInfo 
>>>> capability_table[SPAPR_CAP_NUM] = {
>>>>           .type = "bool",
>>>>           .apply = cap_nested_kvm_hv_apply,
>>>>       },
>>>> +    [SPAPR_CAP_NESTED_PAPR] = {
>>>> +        .name = "nested-papr",
>>>> +        .description = "Allow Nested HV (PAPR API)",
>>>> +        .index = SPAPR_CAP_NESTED_PAPR,
>>>> +        .get = spapr_cap_get_bool,
>>>> +        .set = spapr_cap_set_bool,
>>>> +        .type = "bool",
>>>> +        .apply = cap_nested_papr_apply,
>>>> +    },
>>>>       [SPAPR_CAP_LARGE_DECREMENTER] = {
>>>>           .name = "large-decr",
>>>>           .description = "Allow Large Decrementer",
>>>> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>>>>   SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>>>>   SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>>>   SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>>> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>>>>   SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>>>   SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>>>   SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>>>> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
>>>> index 597dba7fdc..8db9dc19e3 100644
>>>> --- a/hw/ppc/spapr_nested.c
>>>> +++ b/hw/ppc/spapr_nested.c
>>>> @@ -13,13 +13,15 @@
>>>>   void spapr_nested_reset(SpaprMachineState *spapr)
>>>>   {
>>>>       if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
>>>> -        spapr->nested.api = NESTED_API_KVM_HV;
>>>>           spapr_unregister_nested_hv();
>>>>           spapr_register_nested_hv();
>>>> -    } else {
>>>> -        spapr->nested.api = 0;
>>>> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>>>>           spapr->nested.capabilities_set = false;
>>>> +        spapr_unregister_nested_papr();
>>>> +        spapr_register_nested_papr();
>>>>           spapr_nested_gsb_init();
>>>> +    } else {
>>>> +        spapr->nested.api = 0;
>>>>       }
>>>>   }
>>>
Harsh Prateek Bora March 12, 2024, 1:18 p.m. UTC | #7
Hi Nick,

Updated incremental fix below:

On 3/12/24 18:21, Harsh Prateek Bora wrote:
> 
> 
> On 3/12/24 18:17, Harsh Prateek Bora wrote:
>> Hi Nick,
>>
>> On 3/12/24 17:41, Harsh Prateek Bora wrote:
>>> Hi Nick,
>>>
>>> On 3/12/24 17:21, Nicholas Piggin wrote:
>>>> On Fri Mar 8, 2024 at 9:19 PM AEST, Harsh Prateek Bora wrote:
>>>>> Introduce a SPAPR capability cap-nested-papr which enables nested PAPR
>>>>> API for nested guests. This new API is to enable support for KVM on 
>>>>> PowerVM
>>>>> and the support in Linux kernel has already merged upstream.
>>>>>
>>>>> Signed-off-by: Michael Neuling <mikey@neuling.org>
>>>>> Signed-off-by: Harsh Prateek Bora <harshpb@linux.ibm.com>
>>>>> ---
>>>>>   include/hw/ppc/spapr.h |  6 +++-
>>>>>   hw/ppc/spapr.c         |  2 ++
>>>>>   hw/ppc/spapr_caps.c    | 62 
>>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>>   hw/ppc/spapr_nested.c  |  8 ++++--
>>>>>   4 files changed, 74 insertions(+), 4 deletions(-)
>>>>>

<snip>

>>>>> +static void cap_nested_papr_apply(SpaprMachineState *spapr,
>>>>> +                                    uint8_t val, Error **errp)
>>>>> +{
>>>>> +    ERRP_GUARD();
>>>>> +    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
>>>>> +    CPUPPCState *env = &cpu->env;
>>>>> +
>>>>> +    if (!val) {
>>>>> +        /* capability disabled by default */
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (tcg_enabled()) {
>>>>> +        if (!(env->insns_flags2 & PPC2_ISA300)) {
>>>>> +            error_setg(errp, "Nested-PAPR only supported on POWER9 
>>>>> and later");
>>>>> +            error_append_hint(errp,
>>>>> +                              "Try appending -machine 
>>>>> cap-nested-papr=off\n");
>>>>> +            return;
>>>>> +        }
>>>>> +        if (spapr_nested_api(spapr) &&
>>>>> +            spapr_nested_api(spapr) != NESTED_API_PAPR) {
>>>>> +            error_setg(errp, "Nested-HV APIs are mutually 
>>>>> exclusive/incompatible");
>>>>> +            error_append_hint(errp, "Please use either 
>>>>> cap-nested-hv or "
>>>>> +                                    "cap-nested-papr to proceed.\n");
>>>>> +            return;
>>>>> +        } else {
>>>>> +            spapr->nested.api = NESTED_API_PAPR;
>>>>> +        }
>>>>> +
>>>>> +    } else if (kvm_enabled()) {
>>>>> +        /*
>>>>> +         * this gets executed in L1 qemu when L2 is launched,
>>>>> +         * needs kvm-hv support in L1 kernel.
>>>>> +         */
>>>>> +        if (!kvmppc_has_cap_nested_kvm_hv()) {
>>>>> +            error_setg(errp,
>>>>> +                       "KVM implementation does not support 
>>>>> Nested-HV");
>>>>> +        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
>>>>> +            error_setg(errp, "Error enabling Nested-HV with KVM");
>>>>> +        }
>>>>
>>>> I'll just disable this on KVM for now. With that changed,
>>>>
>>>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
>>>>
>>>
>>> AFAIK, v2 api also expects this capability to be enabled on L1 kernel.
>>> I guess the reason is the L1 implementation has used the same capab and
>>> extended to be used with v2 api. So, this check is needed in L1 Qemu for
>>> now. We may revisit L1 implementation later to see if a change is
>>> appropriate.
>>
>> Please ignore above response. I think my observation was based on 
>> older version of L1 implementation. This doesnt seem to be an issue 
>> with upstream L1. You may disable the kvm_enabled() path for now. I 
>> just tested and it works fine.
> 
> Here's the incremental fix:
> 

Updated to keep error_setg for kvm_enabled() case:

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index d6d5a6b8df..92d8966d60 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -527,18 +527,9 @@ static void cap_nested_papr_apply(SpaprMachineState 
*spapr,
          } else {
              spapr->nested.api = NESTED_API_PAPR;
          }
-
      } else if (kvm_enabled()) {
-        /*
-         * this gets executed in L1 qemu when L2 is launched,
-         * needs kvm-hv support in L1 kernel.
-         */
-        if (!kvmppc_has_cap_nested_kvm_hv()) {
              error_setg(errp,
-                       "KVM implementation does not support Nested-HV");
-        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
-            error_setg(errp, "Error enabling Nested-HV with KVM");
-        }
+                       "KVM implementation does not support Nested-PAPR");
      }
  }



> 
> regards,
> Harsh
>>
>> regards,
>> Harsh
>>
>>>
>>> regards,
>>> Harsh
>>>
>>>>>       }
>>>>>   }
>>>>> @@ -735,6 +787,15 @@ SpaprCapabilityInfo 
>>>>> capability_table[SPAPR_CAP_NUM] = {
>>>>>           .type = "bool",
>>>>>           .apply = cap_nested_kvm_hv_apply,
>>>>>       },
>>>>> +    [SPAPR_CAP_NESTED_PAPR] = {
>>>>> +        .name = "nested-papr",
>>>>> +        .description = "Allow Nested HV (PAPR API)",
>>>>> +        .index = SPAPR_CAP_NESTED_PAPR,
>>>>> +        .get = spapr_cap_get_bool,
>>>>> +        .set = spapr_cap_set_bool,
>>>>> +        .type = "bool",
>>>>> +        .apply = cap_nested_papr_apply,
>>>>> +    },
>>>>>       [SPAPR_CAP_LARGE_DECREMENTER] = {
>>>>>           .name = "large-decr",
>>>>>           .description = "Allow Large Decrementer",
>>>>> @@ -919,6 +980,7 @@ SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
>>>>>   SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
>>>>>   SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>>>>   SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>>>> +SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
>>>>>   SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>>>>   SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>>>>>   SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
>>>>> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
>>>>> index 597dba7fdc..8db9dc19e3 100644
>>>>> --- a/hw/ppc/spapr_nested.c
>>>>> +++ b/hw/ppc/spapr_nested.c
>>>>> @@ -13,13 +13,15 @@
>>>>>   void spapr_nested_reset(SpaprMachineState *spapr)
>>>>>   {
>>>>>       if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
>>>>> -        spapr->nested.api = NESTED_API_KVM_HV;
>>>>>           spapr_unregister_nested_hv();
>>>>>           spapr_register_nested_hv();
>>>>> -    } else {
>>>>> -        spapr->nested.api = 0;
>>>>> +    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
>>>>>           spapr->nested.capabilities_set = false;
>>>>> +        spapr_unregister_nested_papr();
>>>>> +        spapr_register_nested_papr();
>>>>>           spapr_nested_gsb_init();
>>>>> +    } else {
>>>>> +        spapr->nested.api = 0;
>>>>>       }
>>>>>   }
>>>>
>
diff mbox series

Patch

diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 6223873641..4aaf23d28f 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -81,8 +81,10 @@  typedef enum {
 #define SPAPR_CAP_RPT_INVALIDATE        0x0B
 /* Support for AIL modes */
 #define SPAPR_CAP_AIL_MODE_3            0x0C
+/* Nested PAPR */
+#define SPAPR_CAP_NESTED_PAPR           0x0D
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_AIL_MODE_3 + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_NESTED_PAPR + 1)
 
 /*
  * Capability Values
@@ -592,6 +594,7 @@  struct SpaprMachineState {
 #define H_GUEST_CREATE_VCPU      0x474
 #define H_GUEST_GET_STATE        0x478
 #define H_GUEST_SET_STATE        0x47C
+#define H_GUEST_RUN_VCPU         0x480
 #define H_GUEST_DELETE           0x488
 
 #define MAX_HCALL_OPCODE         H_GUEST_DELETE
@@ -996,6 +999,7 @@  extern const VMStateDescription vmstate_spapr_cap_sbbc;
 extern const VMStateDescription vmstate_spapr_cap_ibs;
 extern const VMStateDescription vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
+extern const VMStateDescription vmstate_spapr_cap_nested_papr;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
 extern const VMStateDescription vmstate_spapr_cap_fwnmi;
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 54fc01e462..beb23fae8f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2121,6 +2121,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_fwnmi,
         &vmstate_spapr_fwnmi,
         &vmstate_spapr_cap_rpt_invalidate,
+        &vmstate_spapr_cap_nested_papr,
         NULL
     }
 };
@@ -4687,6 +4688,7 @@  static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
     smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_NESTED_PAPR] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index e889244e52..d6d5a6b8df 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -487,6 +487,58 @@  static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
             error_append_hint(errp, "Try appending -machine cap-nested-hv=off "
                                     "or use threads=1 with -smp\n");
         }
+        if (spapr_nested_api(spapr) &&
+            spapr_nested_api(spapr) != NESTED_API_KVM_HV) {
+            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
+            error_append_hint(errp, "Please use either cap-nested-hv or "
+                                    "cap-nested-papr to proceed.\n");
+            return;
+        } else {
+            spapr->nested.api = NESTED_API_KVM_HV;
+        }
+    }
+}
+
+static void cap_nested_papr_apply(SpaprMachineState *spapr,
+                                    uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* capability disabled by default */
+        return;
+    }
+
+    if (tcg_enabled()) {
+        if (!(env->insns_flags2 & PPC2_ISA300)) {
+            error_setg(errp, "Nested-PAPR only supported on POWER9 and later");
+            error_append_hint(errp,
+                              "Try appending -machine cap-nested-papr=off\n");
+            return;
+        }
+        if (spapr_nested_api(spapr) &&
+            spapr_nested_api(spapr) != NESTED_API_PAPR) {
+            error_setg(errp, "Nested-HV APIs are mutually exclusive/incompatible");
+            error_append_hint(errp, "Please use either cap-nested-hv or "
+                                    "cap-nested-papr to proceed.\n");
+            return;
+        } else {
+            spapr->nested.api = NESTED_API_PAPR;
+        }
+
+    } else if (kvm_enabled()) {
+        /*
+         * this gets executed in L1 qemu when L2 is launched,
+         * needs kvm-hv support in L1 kernel.
+         */
+        if (!kvmppc_has_cap_nested_kvm_hv()) {
+            error_setg(errp,
+                       "KVM implementation does not support Nested-HV");
+        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
+            error_setg(errp, "Error enabling Nested-HV with KVM");
+        }
     }
 }
 
@@ -735,6 +787,15 @@  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_nested_kvm_hv_apply,
     },
+    [SPAPR_CAP_NESTED_PAPR] = {
+        .name = "nested-papr",
+        .description = "Allow Nested HV (PAPR API)",
+        .index = SPAPR_CAP_NESTED_PAPR,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_nested_papr_apply,
+    },
     [SPAPR_CAP_LARGE_DECREMENTER] = {
         .name = "large-decr",
         .description = "Allow Large Decrementer",
@@ -919,6 +980,7 @@  SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
 SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
 SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
+SPAPR_CAP_MIG_STATE(nested_papr, SPAPR_CAP_NESTED_PAPR);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
 SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c
index 597dba7fdc..8db9dc19e3 100644
--- a/hw/ppc/spapr_nested.c
+++ b/hw/ppc/spapr_nested.c
@@ -13,13 +13,15 @@ 
 void spapr_nested_reset(SpaprMachineState *spapr)
 {
     if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_KVM_HV)) {
-        spapr->nested.api = NESTED_API_KVM_HV;
         spapr_unregister_nested_hv();
         spapr_register_nested_hv();
-    } else {
-        spapr->nested.api = 0;
+    } else if (spapr_get_cap(spapr, SPAPR_CAP_NESTED_PAPR)) {
         spapr->nested.capabilities_set = false;
+        spapr_unregister_nested_papr();
+        spapr_register_nested_papr();
         spapr_nested_gsb_init();
+    } else {
+        spapr->nested.api = 0;
     }
 }