diff mbox

[qemu,v4,3/4] nvdimm, acpi: support NFIT platform capabilities

Message ID 20180521163203.26590-4-ross.zwisler@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ross Zwisler May 21, 2018, 4:32 p.m. UTC
Add a machine command line option to allow the user to control the Platform
Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
Structure was added in ACPI 6.2 Errata A.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 docs/nvdimm.txt         | 27 +++++++++++++++++++++++++++
 hw/acpi/nvdimm.c        | 45 +++++++++++++++++++++++++++++++++++++++++----
 hw/i386/pc.c            | 31 +++++++++++++++++++++++++++++++
 include/hw/i386/pc.h    |  1 +
 include/hw/mem/nvdimm.h |  5 +++++
 5 files changed, 105 insertions(+), 4 deletions(-)

Comments

Michael S. Tsirkin June 5, 2018, 3:25 p.m. UTC | #1
On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> Add a machine command line option to allow the user to control the Platform
> Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> Structure was added in ACPI 6.2 Errata A.
> 
> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>

I tried playing with it and encoding the capabilities is
quite awkward.

Can we add bits for specific capabilities instead of nvdimm-cap?

How about:

"cpu-flush-on-power-loss-cap"
"memory-flush-on-power-loss-cap"
"byte-addressable-mirroring-cap"

?



> ---
>  docs/nvdimm.txt         | 27 +++++++++++++++++++++++++++
>  hw/acpi/nvdimm.c        | 45 +++++++++++++++++++++++++++++++++++++++++----
>  hw/i386/pc.c            | 31 +++++++++++++++++++++++++++++++
>  include/hw/i386/pc.h    |  1 +
>  include/hw/mem/nvdimm.h |  5 +++++
>  5 files changed, 105 insertions(+), 4 deletions(-)
> 
> diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
> index e903d8bb09..8b48fb4633 100644
> --- a/docs/nvdimm.txt
> +++ b/docs/nvdimm.txt
> @@ -153,3 +153,30 @@ guest NVDIMM region mapping structure.  This unarmed flag indicates
>  guest software that this vNVDIMM device contains a region that cannot
>  accept persistent writes. In result, for example, the guest Linux
>  NVDIMM driver, marks such vNVDIMM device as read-only.
> +
> +Platform Capabilities
> +---------------------
> +
> +ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
> +which allows the platform to communicate what features it supports related to
> +NVDIMM data durability.  Users can provide a capabilities value to a guest via
> +the optional "nvdimm-cap" machine command line option:
> +
> +    -machine pc,accel=kvm,nvdimm,nvdimm-cap=2
> +
> +This "nvdimm-cap" field is an integer, and is the combined value of the
> +various capability bits defined in table 5-137 of the ACPI 6.2 Errata A spec.
> +
> +Here is a quick summary of the three bits that are defined as of that spec:
> +
> +Bit[0] - CPU Cache Flush to NVDIMM Durability on Power Loss Capable.
> +Bit[1] - Memory Controller Flush to NVDIMM Durability on Power Loss Capable.
> +         Note: If bit 0 is set to 1 then this bit shall be set to 1 as well.
> +Bit[2] - Byte Addressable Persistent Memory Hardware Mirroring Capable.
> +
> +So, a "nvdimm-cap" value of 2 would mean that the platform supports Memory
> +Controller Flush on Power Loss, a value of 3 would mean that the platform
> +supports CPU Cache Flush and Memory Controller Flush on Power Loss, etc.
> +
> +For a complete list of the flags available and for more detailed descriptions,
> +please consult the ACPI spec.
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index 59d6e4254c..87e4280c71 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -169,6 +169,21 @@ struct NvdimmNfitControlRegion {
>  } QEMU_PACKED;
>  typedef struct NvdimmNfitControlRegion NvdimmNfitControlRegion;
>  
> +/*
> + * NVDIMM Platform Capabilities Structure
> + *
> + * Defined in section 5.2.25.9 of ACPI 6.2 Errata A, September 2017
> + */
> +struct NvdimmNfitPlatformCaps {
> +    uint16_t type;
> +    uint16_t length;
> +    uint8_t highest_cap;
> +    uint8_t reserved[3];
> +    uint32_t capabilities;
> +    uint8_t reserved2[4];
> +} QEMU_PACKED;
> +typedef struct NvdimmNfitPlatformCaps NvdimmNfitPlatformCaps;
> +
>  /*
>   * Module serial number is a unique number for each device. We use the
>   * slot id of NVDIMM device to generate this number so that each device
> @@ -351,7 +366,23 @@ static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev)
>                                           JEDEC Annex L Release 3. */);
>  }
>  
> -static GArray *nvdimm_build_device_structure(void)
> +/*
> + * ACPI 6.2 Errata A: 5.2.25.9 NVDIMM Platform Capabilities Structure
> + */
> +static void
> +nvdimm_build_structure_caps(GArray *structures, uint32_t capabilities)
> +{
> +    NvdimmNfitPlatformCaps *nfit_caps;
> +
> +    nfit_caps = acpi_data_push(structures, sizeof(*nfit_caps));
> +
> +    nfit_caps->type = cpu_to_le16(7 /* NVDIMM Platform Capabilities */);
> +    nfit_caps->length = cpu_to_le16(sizeof(*nfit_caps));
> +    nfit_caps->highest_cap = 31 - clz32(capabilities);
> +    nfit_caps->capabilities = cpu_to_le32(capabilities);
> +}
> +
> +static GArray *nvdimm_build_device_structure(AcpiNVDIMMState *state)
>  {
>      GSList *device_list = nvdimm_get_device_list();
>      GArray *structures = g_array_new(false, true /* clear */, 1);
> @@ -373,6 +404,10 @@ static GArray *nvdimm_build_device_structure(void)
>      }
>      g_slist_free(device_list);
>  
> +    if (state->capabilities) {
> +        nvdimm_build_structure_caps(structures, state->capabilities);
> +    }
> +
>      return structures;
>  }
>  
> @@ -381,16 +416,18 @@ static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf)
>      fit_buf->fit = g_array_new(false, true /* clear */, 1);
>  }
>  
> -static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf)
> +static void nvdimm_build_fit_buffer(AcpiNVDIMMState *state)
>  {
> +    NvdimmFitBuffer *fit_buf = &state->fit_buf;
> +
>      g_array_free(fit_buf->fit, true);
> -    fit_buf->fit = nvdimm_build_device_structure();
> +    fit_buf->fit = nvdimm_build_device_structure(state);
>      fit_buf->dirty = true;
>  }
>  
>  void nvdimm_plug(AcpiNVDIMMState *state)
>  {
> -    nvdimm_build_fit_buffer(&state->fit_buf);
> +    nvdimm_build_fit_buffer(state);
>  }
>  
>  static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index d768930d02..1b2684c549 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -2182,6 +2182,33 @@ static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
>      pcms->acpi_nvdimm_state.is_enabled = value;
>  }
>  
> +static void pc_machine_get_nvdimm_capabilities(Object *obj, Visitor *v,
> +                                               const char *name, void *opaque,
> +                                               Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    uint32_t value = pcms->acpi_nvdimm_state.capabilities;
> +
> +    visit_type_uint32(v, name, &value, errp);
> +}
> +
> +static void pc_machine_set_nvdimm_capabilities(Object *obj, Visitor *v,
> +                                               const char *name, void *opaque,
> +                                               Error **errp)
> +{
> +    PCMachineState *pcms = PC_MACHINE(obj);
> +    Error *error = NULL;
> +    uint32_t value;
> +
> +    visit_type_uint32(v, name, &value, &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        return;
> +    }
> +
> +    pcms->acpi_nvdimm_state.capabilities = value;
> +}
> +
>  static bool pc_machine_get_smbus(Object *obj, Error **errp)
>  {
>      PCMachineState *pcms = PC_MACHINE(obj);
> @@ -2395,6 +2422,10 @@ static void pc_machine_class_init(ObjectClass *oc, void *data)
>      object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
>          pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
>  
> +    object_class_property_add(oc, PC_MACHINE_NVDIMM_CAP, "uint32",
> +        pc_machine_get_nvdimm_capabilities,
> +        pc_machine_set_nvdimm_capabilities, NULL, NULL, &error_abort);
> +
>      object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
>          pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);
>  
> diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
> index 2a98e3ad68..e9b22f929c 100644
> --- a/include/hw/i386/pc.h
> +++ b/include/hw/i386/pc.h
> @@ -76,6 +76,7 @@ struct PCMachineState {
>  #define PC_MACHINE_VMPORT           "vmport"
>  #define PC_MACHINE_SMM              "smm"
>  #define PC_MACHINE_NVDIMM           "nvdimm"
> +#define PC_MACHINE_NVDIMM_CAP       "nvdimm-cap"
>  #define PC_MACHINE_SMBUS            "smbus"
>  #define PC_MACHINE_SATA             "sata"
>  #define PC_MACHINE_PIT              "pit"
> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
> index 74c60332e1..3c82751bab 100644
> --- a/include/hw/mem/nvdimm.h
> +++ b/include/hw/mem/nvdimm.h
> @@ -134,6 +134,11 @@ struct AcpiNVDIMMState {
>  
>      /* the IO region used by OSPM to transfer control to QEMU. */
>      MemoryRegion io_mr;
> +
> +    /*
> +     * Platform capabilities, section 5.2.25.9 of ACPI 6.2 Errata A
> +     */
> +    int32_t capabilities;
>  };
>  typedef struct AcpiNVDIMMState AcpiNVDIMMState;
>  
> -- 
> 2.14.3
Ross Zwisler June 5, 2018, 4:42 p.m. UTC | #2
On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > Add a machine command line option to allow the user to control the Platform
> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > Structure was added in ACPI 6.2 Errata A.
> > 
> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> 
> I tried playing with it and encoding the capabilities is
> quite awkward.
> 
> Can we add bits for specific capabilities instead of nvdimm-cap?
> 
> How about:
> 
> "cpu-flush-on-power-loss-cap"
> "memory-flush-on-power-loss-cap"
> "byte-addressable-mirroring-cap"

Hmmm...I don't like that as much because:

a) It's very verbose.  Looking at my current qemu command line few other
   options require that many characters, and you'd commonly be defining more
   than one of these for a given VM.

b) It means that the QEMU will need to be updated if/when new flags are added,
   because we'll have to have new options for each flag.  The current
   implementation is more future-proof because you can specify any flags
   value you want.

However, if you feel strongly about this, I'll make the change.
Dan Williams June 5, 2018, 6:15 p.m. UTC | #3
On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
<ross.zwisler@linux.intel.com> wrote:
> On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
>> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
>> > Add a machine command line option to allow the user to control the Platform
>> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
>> > Structure was added in ACPI 6.2 Errata A.
>> >
>> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
>>
>> I tried playing with it and encoding the capabilities is
>> quite awkward.
>>
>> Can we add bits for specific capabilities instead of nvdimm-cap?
>>
>> How about:
>>
>> "cpu-flush-on-power-loss-cap"
>> "memory-flush-on-power-loss-cap"
>> "byte-addressable-mirroring-cap"
>
> Hmmm...I don't like that as much because:
>
> a) It's very verbose.  Looking at my current qemu command line few other
>    options require that many characters, and you'd commonly be defining more
>    than one of these for a given VM.
>
> b) It means that the QEMU will need to be updated if/when new flags are added,
>    because we'll have to have new options for each flag.  The current
>    implementation is more future-proof because you can specify any flags
>    value you want.
>
> However, if you feel strongly about this, I'll make the change.

Straw-man: Could we do something similar with what we are doing in ndctl?

enum ndctl_persistence_domain {
        PERSISTENCE_NONE = 0,
        PERSISTENCE_MEM_CTRL = 10,
        PERSISTENCE_CPU_CACHE = 20,
        PERSISTENCE_UNKNOWN = INT_MAX,
};

...and have the command line take a number where "10" and "20" are
supported today, but allows us to adapt to new persistence domains in
the future.
Michael S. Tsirkin June 5, 2018, 6:37 p.m. UTC | #4
On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> <ross.zwisler@linux.intel.com> wrote:
> > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> >> > Add a machine command line option to allow the user to control the Platform
> >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> >> > Structure was added in ACPI 6.2 Errata A.
> >> >
> >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> >>
> >> I tried playing with it and encoding the capabilities is
> >> quite awkward.
> >>
> >> Can we add bits for specific capabilities instead of nvdimm-cap?
> >>
> >> How about:
> >>
> >> "cpu-flush-on-power-loss-cap"
> >> "memory-flush-on-power-loss-cap"
> >> "byte-addressable-mirroring-cap"
> >
> > Hmmm...I don't like that as much because:
> >
> > a) It's very verbose.  Looking at my current qemu command line few other
> >    options require that many characters, and you'd commonly be defining more
> >    than one of these for a given VM.
> >
> > b) It means that the QEMU will need to be updated if/when new flags are added,
> >    because we'll have to have new options for each flag.  The current
> >    implementation is more future-proof because you can specify any flags
> >    value you want.
> >
> > However, if you feel strongly about this, I'll make the change.
> 
> Straw-man: Could we do something similar with what we are doing in ndctl?
> 
> enum ndctl_persistence_domain {
>         PERSISTENCE_NONE = 0,
>         PERSISTENCE_MEM_CTRL = 10,
>         PERSISTENCE_CPU_CACHE = 20,
>         PERSISTENCE_UNKNOWN = INT_MAX,
> };
> 
> ...and have the command line take a number where "10" and "20" are
> supported today, but allows us to adapt to new persistence domains in
> the future.

I'm fine with that except can we have symbolic names instead of numbers
on command line?
Ross Zwisler June 5, 2018, 10:07 p.m. UTC | #5
On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > <ross.zwisler@linux.intel.com> wrote:
> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > >> > Add a machine command line option to allow the user to control the Platform
> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > >> > Structure was added in ACPI 6.2 Errata A.
> > >> >
> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > >>
> > >> I tried playing with it and encoding the capabilities is
> > >> quite awkward.
> > >>
> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > >>
> > >> How about:
> > >>
> > >> "cpu-flush-on-power-loss-cap"
> > >> "memory-flush-on-power-loss-cap"
> > >> "byte-addressable-mirroring-cap"
> > >
> > > Hmmm...I don't like that as much because:
> > >
> > > a) It's very verbose.  Looking at my current qemu command line few other
> > >    options require that many characters, and you'd commonly be defining more
> > >    than one of these for a given VM.
> > >
> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > >    because we'll have to have new options for each flag.  The current
> > >    implementation is more future-proof because you can specify any flags
> > >    value you want.
> > >
> > > However, if you feel strongly about this, I'll make the change.
> > 
> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > 
> > enum ndctl_persistence_domain {
> >         PERSISTENCE_NONE = 0,
> >         PERSISTENCE_MEM_CTRL = 10,
> >         PERSISTENCE_CPU_CACHE = 20,
> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > };
> > 
> > ...and have the command line take a number where "10" and "20" are
> > supported today, but allows us to adapt to new persistence domains in
> > the future.
> 
> I'm fine with that except can we have symbolic names instead of numbers
> on command line?
> 
> -- 
> MST

Okay, we can move to the symbolic names.  Do you want them to be that long, or
would:

nvdimm-cap-cpu
nvdimm-cap-mem-ctrl
nvdimm-cap-mirroring

or something be better?
Dan Williams June 5, 2018, 10:21 p.m. UTC | #6
On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
<ross.zwisler@linux.intel.com> wrote:
> On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
>> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
>> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
>> > <ross.zwisler@linux.intel.com> wrote:
>> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
>> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
>> > >> > Add a machine command line option to allow the user to control the Platform
>> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
>> > >> > Structure was added in ACPI 6.2 Errata A.
>> > >> >
>> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
>> > >>
>> > >> I tried playing with it and encoding the capabilities is
>> > >> quite awkward.
>> > >>
>> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
>> > >>
>> > >> How about:
>> > >>
>> > >> "cpu-flush-on-power-loss-cap"
>> > >> "memory-flush-on-power-loss-cap"
>> > >> "byte-addressable-mirroring-cap"
>> > >
>> > > Hmmm...I don't like that as much because:
>> > >
>> > > a) It's very verbose.  Looking at my current qemu command line few other
>> > >    options require that many characters, and you'd commonly be defining more
>> > >    than one of these for a given VM.
>> > >
>> > > b) It means that the QEMU will need to be updated if/when new flags are added,
>> > >    because we'll have to have new options for each flag.  The current
>> > >    implementation is more future-proof because you can specify any flags
>> > >    value you want.
>> > >
>> > > However, if you feel strongly about this, I'll make the change.
>> >
>> > Straw-man: Could we do something similar with what we are doing in ndctl?
>> >
>> > enum ndctl_persistence_domain {
>> >         PERSISTENCE_NONE = 0,
>> >         PERSISTENCE_MEM_CTRL = 10,
>> >         PERSISTENCE_CPU_CACHE = 20,
>> >         PERSISTENCE_UNKNOWN = INT_MAX,
>> > };
>> >
>> > ...and have the command line take a number where "10" and "20" are
>> > supported today, but allows us to adapt to new persistence domains in
>> > the future.
>>
>> I'm fine with that except can we have symbolic names instead of numbers
>> on command line?
>>
>> --
>> MST
>
> Okay, we can move to the symbolic names.  Do you want them to be that long, or
> would:
>
> nvdimm-cap-cpu
> nvdimm-cap-mem-ctrl
> nvdimm-cap-mirroring

Wait, why is mirroring part of this?

I was thinking this option would be:

    --persistence-domain={cpu,mem-ctrl}

...and try not to let ACPI specifics leak into the qemu command line
interface. For example PowerPC qemu could have a persistence domain
communicated via Open Firmware or some other mechanism.
>
> or something be better?
Ross Zwisler June 6, 2018, 4:50 p.m. UTC | #7
On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> <ross.zwisler@linux.intel.com> wrote:
> > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> >> > <ross.zwisler@linux.intel.com> wrote:
> >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> >> > >> > Add a machine command line option to allow the user to control the Platform
> >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> >> > >> > Structure was added in ACPI 6.2 Errata A.
> >> > >> >
> >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> >> > >>
> >> > >> I tried playing with it and encoding the capabilities is
> >> > >> quite awkward.
> >> > >>
> >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> >> > >>
> >> > >> How about:
> >> > >>
> >> > >> "cpu-flush-on-power-loss-cap"
> >> > >> "memory-flush-on-power-loss-cap"
> >> > >> "byte-addressable-mirroring-cap"
> >> > >
> >> > > Hmmm...I don't like that as much because:
> >> > >
> >> > > a) It's very verbose.  Looking at my current qemu command line few other
> >> > >    options require that many characters, and you'd commonly be defining more
> >> > >    than one of these for a given VM.
> >> > >
> >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> >> > >    because we'll have to have new options for each flag.  The current
> >> > >    implementation is more future-proof because you can specify any flags
> >> > >    value you want.
> >> > >
> >> > > However, if you feel strongly about this, I'll make the change.
> >> >
> >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> >> >
> >> > enum ndctl_persistence_domain {
> >> >         PERSISTENCE_NONE = 0,
> >> >         PERSISTENCE_MEM_CTRL = 10,
> >> >         PERSISTENCE_CPU_CACHE = 20,
> >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> >> > };
> >> >
> >> > ...and have the command line take a number where "10" and "20" are
> >> > supported today, but allows us to adapt to new persistence domains in
> >> > the future.
> >>
> >> I'm fine with that except can we have symbolic names instead of numbers
> >> on command line?
> >>
> >> --
> >> MST
> >
> > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > would:
> >
> > nvdimm-cap-cpu
> > nvdimm-cap-mem-ctrl
> > nvdimm-cap-mirroring
> 
> Wait, why is mirroring part of this?
> 
> I was thinking this option would be:
> 
>     --persistence-domain={cpu,mem-ctrl}
> 
> ...and try not to let ACPI specifics leak into the qemu command line
> interface. For example PowerPC qemu could have a persistence domain
> communicated via Open Firmware or some other mechanism.

Sure, this seems fine, though we may want to throw an "nvdimm" in the name
somewhere so it's clear what the option affects.

nvdimm-persistence={cpu,mem-ctrl} maybe?

Michael, does this work for you?
Ross Zwisler June 6, 2018, 5 p.m. UTC | #8
On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:
> On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > <ross.zwisler@linux.intel.com> wrote:
> > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > >> > <ross.zwisler@linux.intel.com> wrote:
> > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > >> > >> > Add a machine command line option to allow the user to control the Platform
> > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > >> > >> >
> > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > >> > >>
> > >> > >> I tried playing with it and encoding the capabilities is
> > >> > >> quite awkward.
> > >> > >>
> > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > >> > >>
> > >> > >> How about:
> > >> > >>
> > >> > >> "cpu-flush-on-power-loss-cap"
> > >> > >> "memory-flush-on-power-loss-cap"
> > >> > >> "byte-addressable-mirroring-cap"
> > >> > >
> > >> > > Hmmm...I don't like that as much because:
> > >> > >
> > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > >> > >    options require that many characters, and you'd commonly be defining more
> > >> > >    than one of these for a given VM.
> > >> > >
> > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > >> > >    because we'll have to have new options for each flag.  The current
> > >> > >    implementation is more future-proof because you can specify any flags
> > >> > >    value you want.
> > >> > >
> > >> > > However, if you feel strongly about this, I'll make the change.
> > >> >
> > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > >> >
> > >> > enum ndctl_persistence_domain {
> > >> >         PERSISTENCE_NONE = 0,
> > >> >         PERSISTENCE_MEM_CTRL = 10,
> > >> >         PERSISTENCE_CPU_CACHE = 20,
> > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > >> > };
> > >> >
> > >> > ...and have the command line take a number where "10" and "20" are
> > >> > supported today, but allows us to adapt to new persistence domains in
> > >> > the future.
> > >>
> > >> I'm fine with that except can we have symbolic names instead of numbers
> > >> on command line?
> > >>
> > >> --
> > >> MST
> > >
> > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > would:
> > >
> > > nvdimm-cap-cpu
> > > nvdimm-cap-mem-ctrl
> > > nvdimm-cap-mirroring
> > 
> > Wait, why is mirroring part of this?
> > 
> > I was thinking this option would be:
> > 
> >     --persistence-domain={cpu,mem-ctrl}
> > 
> > ...and try not to let ACPI specifics leak into the qemu command line
> > interface. For example PowerPC qemu could have a persistence domain
> > communicated via Open Firmware or some other mechanism.
> 
> Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> somewhere so it's clear what the option affects.
> 
> nvdimm-persistence={cpu,mem-ctrl} maybe?
> 
> Michael, does this work for you?

Hmm...also, the version of these patches that used numeric values did go
upstream in QEMU.  So, do we need to leave that interface intact, and just add
a new one that uses symbolic names?  Or did you still just want to replace the
numeric one since it hasn't appeared in a QEMU release yet?
Ross Zwisler June 6, 2018, 5:08 p.m. UTC | #9
On Wed, Jun 06, 2018 at 11:00:33AM -0600, Ross Zwisler wrote:
> On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:
> > On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> > > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > > <ross.zwisler@linux.intel.com> wrote:
> > > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> > > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > > >> > <ross.zwisler@linux.intel.com> wrote:
> > > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > > >> > >> > Add a machine command line option to allow the user to control the Platform
> > > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > > >> > >> >
> > > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > > >> > >>
> > > >> > >> I tried playing with it and encoding the capabilities is
> > > >> > >> quite awkward.
> > > >> > >>
> > > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > > >> > >>
> > > >> > >> How about:
> > > >> > >>
> > > >> > >> "cpu-flush-on-power-loss-cap"
> > > >> > >> "memory-flush-on-power-loss-cap"
> > > >> > >> "byte-addressable-mirroring-cap"
> > > >> > >
> > > >> > > Hmmm...I don't like that as much because:
> > > >> > >
> > > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > > >> > >    options require that many characters, and you'd commonly be defining more
> > > >> > >    than one of these for a given VM.
> > > >> > >
> > > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > > >> > >    because we'll have to have new options for each flag.  The current
> > > >> > >    implementation is more future-proof because you can specify any flags
> > > >> > >    value you want.
> > > >> > >
> > > >> > > However, if you feel strongly about this, I'll make the change.
> > > >> >
> > > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > > >> >
> > > >> > enum ndctl_persistence_domain {
> > > >> >         PERSISTENCE_NONE = 0,
> > > >> >         PERSISTENCE_MEM_CTRL = 10,
> > > >> >         PERSISTENCE_CPU_CACHE = 20,
> > > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > > >> > };
> > > >> >
> > > >> > ...and have the command line take a number where "10" and "20" are
> > > >> > supported today, but allows us to adapt to new persistence domains in
> > > >> > the future.
> > > >>
> > > >> I'm fine with that except can we have symbolic names instead of numbers
> > > >> on command line?
> > > >>
> > > >> --
> > > >> MST
> > > >
> > > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > > would:
> > > >
> > > > nvdimm-cap-cpu
> > > > nvdimm-cap-mem-ctrl
> > > > nvdimm-cap-mirroring
> > > 
> > > Wait, why is mirroring part of this?
> > > 
> > > I was thinking this option would be:
> > > 
> > >     --persistence-domain={cpu,mem-ctrl}
> > > 
> > > ...and try not to let ACPI specifics leak into the qemu command line
> > > interface. For example PowerPC qemu could have a persistence domain
> > > communicated via Open Firmware or some other mechanism.
> > 
> > Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> > somewhere so it's clear what the option affects.
> > 
> > nvdimm-persistence={cpu,mem-ctrl} maybe?
> > 
> > Michael, does this work for you?
> 
> Hmm...also, the version of these patches that used numeric values did go
> upstream in QEMU.  So, do we need to leave that interface intact, and just add
> a new one that uses symbolic names?  Or did you still just want to replace the
> numeric one since it hasn't appeared in a QEMU release yet?

I guess another alternative would be to just add symbolic name options to the
existing command line option, i.e. allow all of these:

nvdimm-cap=3		# CPU cache
nvdimm-cap=cpu		# CPU cache
nvdimm-cap=2		# memory controller
nvdimm-cap=mem-ctrl	# memory controller

And just have them as aliases.  This retains backwards compatibility with
what is already there, allows for other numeric values without QEMU updates if
other bits are defined (though we are still tightly tied to ACPI in this
case), and provides a symbolic name which is easier to use.

Thoughts?
Michael S. Tsirkin June 6, 2018, 7:04 p.m. UTC | #10
On Wed, Jun 06, 2018 at 11:00:33AM -0600, Ross Zwisler wrote:
> On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:
> > On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> > > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > > <ross.zwisler@linux.intel.com> wrote:
> > > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> > > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > > >> > <ross.zwisler@linux.intel.com> wrote:
> > > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > > >> > >> > Add a machine command line option to allow the user to control the Platform
> > > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > > >> > >> >
> > > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > > >> > >>
> > > >> > >> I tried playing with it and encoding the capabilities is
> > > >> > >> quite awkward.
> > > >> > >>
> > > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > > >> > >>
> > > >> > >> How about:
> > > >> > >>
> > > >> > >> "cpu-flush-on-power-loss-cap"
> > > >> > >> "memory-flush-on-power-loss-cap"
> > > >> > >> "byte-addressable-mirroring-cap"
> > > >> > >
> > > >> > > Hmmm...I don't like that as much because:
> > > >> > >
> > > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > > >> > >    options require that many characters, and you'd commonly be defining more
> > > >> > >    than one of these for a given VM.
> > > >> > >
> > > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > > >> > >    because we'll have to have new options for each flag.  The current
> > > >> > >    implementation is more future-proof because you can specify any flags
> > > >> > >    value you want.
> > > >> > >
> > > >> > > However, if you feel strongly about this, I'll make the change.
> > > >> >
> > > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > > >> >
> > > >> > enum ndctl_persistence_domain {
> > > >> >         PERSISTENCE_NONE = 0,
> > > >> >         PERSISTENCE_MEM_CTRL = 10,
> > > >> >         PERSISTENCE_CPU_CACHE = 20,
> > > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > > >> > };
> > > >> >
> > > >> > ...and have the command line take a number where "10" and "20" are
> > > >> > supported today, but allows us to adapt to new persistence domains in
> > > >> > the future.
> > > >>
> > > >> I'm fine with that except can we have symbolic names instead of numbers
> > > >> on command line?
> > > >>
> > > >> --
> > > >> MST
> > > >
> > > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > > would:
> > > >
> > > > nvdimm-cap-cpu
> > > > nvdimm-cap-mem-ctrl
> > > > nvdimm-cap-mirroring
> > > 
> > > Wait, why is mirroring part of this?
> > > 
> > > I was thinking this option would be:
> > > 
> > >     --persistence-domain={cpu,mem-ctrl}
> > > 
> > > ...and try not to let ACPI specifics leak into the qemu command line
> > > interface. For example PowerPC qemu could have a persistence domain
> > > communicated via Open Firmware or some other mechanism.
> > 
> > Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> > somewhere so it's clear what the option affects.
> > 
> > nvdimm-persistence={cpu,mem-ctrl} maybe?
> > 
> > Michael, does this work for you?
> 
> Hmm...also, the version of these patches that used numeric values did go
> upstream in QEMU.  So, do we need to leave that interface intact, and just add
> a new one that uses symbolic names?  Or did you still just want to replace the
> numeric one since it hasn't appeared in a QEMU release yet?

The later. No release => no stable APIs.
Michael S. Tsirkin June 6, 2018, 7:06 p.m. UTC | #11
On Wed, Jun 06, 2018 at 11:08:49AM -0600, Ross Zwisler wrote:
> On Wed, Jun 06, 2018 at 11:00:33AM -0600, Ross Zwisler wrote:
> > On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:
> > > On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> > > > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > > > <ross.zwisler@linux.intel.com> wrote:
> > > > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> > > > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > > > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > > > >> > <ross.zwisler@linux.intel.com> wrote:
> > > > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > > > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > > > >> > >> > Add a machine command line option to allow the user to control the Platform
> > > > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > > > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > > > >> > >> >
> > > > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > > > >> > >>
> > > > >> > >> I tried playing with it and encoding the capabilities is
> > > > >> > >> quite awkward.
> > > > >> > >>
> > > > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > > > >> > >>
> > > > >> > >> How about:
> > > > >> > >>
> > > > >> > >> "cpu-flush-on-power-loss-cap"
> > > > >> > >> "memory-flush-on-power-loss-cap"
> > > > >> > >> "byte-addressable-mirroring-cap"
> > > > >> > >
> > > > >> > > Hmmm...I don't like that as much because:
> > > > >> > >
> > > > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > > > >> > >    options require that many characters, and you'd commonly be defining more
> > > > >> > >    than one of these for a given VM.
> > > > >> > >
> > > > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > > > >> > >    because we'll have to have new options for each flag.  The current
> > > > >> > >    implementation is more future-proof because you can specify any flags
> > > > >> > >    value you want.
> > > > >> > >
> > > > >> > > However, if you feel strongly about this, I'll make the change.
> > > > >> >
> > > > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > > > >> >
> > > > >> > enum ndctl_persistence_domain {
> > > > >> >         PERSISTENCE_NONE = 0,
> > > > >> >         PERSISTENCE_MEM_CTRL = 10,
> > > > >> >         PERSISTENCE_CPU_CACHE = 20,
> > > > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > > > >> > };
> > > > >> >
> > > > >> > ...and have the command line take a number where "10" and "20" are
> > > > >> > supported today, but allows us to adapt to new persistence domains in
> > > > >> > the future.
> > > > >>
> > > > >> I'm fine with that except can we have symbolic names instead of numbers
> > > > >> on command line?
> > > > >>
> > > > >> --
> > > > >> MST
> > > > >
> > > > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > > > would:
> > > > >
> > > > > nvdimm-cap-cpu
> > > > > nvdimm-cap-mem-ctrl
> > > > > nvdimm-cap-mirroring
> > > > 
> > > > Wait, why is mirroring part of this?
> > > > 
> > > > I was thinking this option would be:
> > > > 
> > > >     --persistence-domain={cpu,mem-ctrl}
> > > > 
> > > > ...and try not to let ACPI specifics leak into the qemu command line
> > > > interface. For example PowerPC qemu could have a persistence domain
> > > > communicated via Open Firmware or some other mechanism.
> > > 
> > > Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> > > somewhere so it's clear what the option affects.
> > > 
> > > nvdimm-persistence={cpu,mem-ctrl} maybe?
> > > 
> > > Michael, does this work for you?
> > 
> > Hmm...also, the version of these patches that used numeric values did go
> > upstream in QEMU.  So, do we need to leave that interface intact, and just add
> > a new one that uses symbolic names?  Or did you still just want to replace the
> > numeric one since it hasn't appeared in a QEMU release yet?
> 
> I guess another alternative would be to just add symbolic name options to the
> existing command line option, i.e. allow all of these:
> 
> nvdimm-cap=3		# CPU cache
> nvdimm-cap=cpu		# CPU cache
> nvdimm-cap=2		# memory controller
> nvdimm-cap=mem-ctrl	# memory controller
> 
> And just have them as aliases.  This retains backwards compatibility with
> what is already there, allows for other numeric values without QEMU updates if
> other bits are defined (though we are still tightly tied to ACPI in this
> case), and provides a symbolic name which is easier to use.
> 
> Thoughts?

I'm inclined to say let's just keep the symbolic names.
Less of a chance users configure something incorrectly,
it somehow kind of works and then we get stuck with working
around these bugs.
Michael S. Tsirkin June 6, 2018, 7:07 p.m. UTC | #12
On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:
> On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:
> > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > <ross.zwisler@linux.intel.com> wrote:
> > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:
> > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:
> > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > >> > <ross.zwisler@linux.intel.com> wrote:
> > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:
> > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:
> > >> > >> > Add a machine command line option to allow the user to control the Platform
> > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > >> > >> >
> > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > >> > >>
> > >> > >> I tried playing with it and encoding the capabilities is
> > >> > >> quite awkward.
> > >> > >>
> > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > >> > >>
> > >> > >> How about:
> > >> > >>
> > >> > >> "cpu-flush-on-power-loss-cap"
> > >> > >> "memory-flush-on-power-loss-cap"
> > >> > >> "byte-addressable-mirroring-cap"
> > >> > >
> > >> > > Hmmm...I don't like that as much because:
> > >> > >
> > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > >> > >    options require that many characters, and you'd commonly be defining more
> > >> > >    than one of these for a given VM.
> > >> > >
> > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > >> > >    because we'll have to have new options for each flag.  The current
> > >> > >    implementation is more future-proof because you can specify any flags
> > >> > >    value you want.
> > >> > >
> > >> > > However, if you feel strongly about this, I'll make the change.
> > >> >
> > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > >> >
> > >> > enum ndctl_persistence_domain {
> > >> >         PERSISTENCE_NONE = 0,
> > >> >         PERSISTENCE_MEM_CTRL = 10,
> > >> >         PERSISTENCE_CPU_CACHE = 20,
> > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > >> > };
> > >> >
> > >> > ...and have the command line take a number where "10" and "20" are
> > >> > supported today, but allows us to adapt to new persistence domains in
> > >> > the future.
> > >>
> > >> I'm fine with that except can we have symbolic names instead of numbers
> > >> on command line?
> > >>
> > >> --
> > >> MST
> > >
> > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > would:
> > >
> > > nvdimm-cap-cpu
> > > nvdimm-cap-mem-ctrl
> > > nvdimm-cap-mirroring
> > 
> > Wait, why is mirroring part of this?
> > 
> > I was thinking this option would be:
> > 
> >     --persistence-domain={cpu,mem-ctrl}
> > 
> > ...and try not to let ACPI specifics leak into the qemu command line
> > interface. For example PowerPC qemu could have a persistence domain
> > communicated via Open Firmware or some other mechanism.
> 
> Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> somewhere so it's clear what the option affects.
> 
> nvdimm-persistence={cpu,mem-ctrl} maybe?
> 
> Michael, does this work for you?

Sounds good to me.
Elliott, Robert (Servers) June 6, 2018, 11:20 p.m. UTC | #13
> > Okay, we can move to the symbolic names.  Do you want them to be that
> long, or
> > would:
> >
> > nvdimm-cap-cpu
> > nvdimm-cap-mem-ctrl
> > nvdimm-cap-mirroring
> 
> Wait, why is mirroring part of this?

This data structure is intended to report any kind of platform capability, not 
just platform persistence capabilities.

We could add a short symbolic name to the definitions in ACPI that matches
the ones selected for this command line option, if that'll help people
find the right names to use.

I recommend mc rather than mem-ctrl to keep dashes as special.
Dan Williams June 6, 2018, 11:40 p.m. UTC | #14
On Wed, Jun 6, 2018 at 4:20 PM, Elliott, Robert (Persistent Memory)
<elliott@hpe.com> wrote:
>
>> > Okay, we can move to the symbolic names.  Do you want them to be that
>> long, or
>> > would:
>> >
>> > nvdimm-cap-cpu
>> > nvdimm-cap-mem-ctrl
>> > nvdimm-cap-mirroring
>>
>> Wait, why is mirroring part of this?
>
> This data structure is intended to report any kind of platform capability, not
> just platform persistence capabilities.

Yes, but here's nothing actionable that a qemu guest OS can do with
that mirroring information, so there's no need at this time to add cli
cruft and code to support it.
Michael S. Tsirkin June 7, 2018, 3:29 p.m. UTC | #15
On Wed, Jun 06, 2018 at 04:40:31PM -0700, Dan Williams wrote:
> On Wed, Jun 6, 2018 at 4:20 PM, Elliott, Robert (Persistent Memory)
> <elliott@hpe.com> wrote:
> >
> >> > Okay, we can move to the symbolic names.  Do you want them to be that
> >> long, or
> >> > would:
> >> >
> >> > nvdimm-cap-cpu
> >> > nvdimm-cap-mem-ctrl
> >> > nvdimm-cap-mirroring
> >>
> >> Wait, why is mirroring part of this?
> >
> > This data structure is intended to report any kind of platform capability, not
> > just platform persistence capabilities.
> 
> Yes, but here's nothing actionable that a qemu guest OS can do with
> that mirroring information, so there's no need at this time to add cli
> cruft and code to support it.

I agree.
Michael S. Tsirkin June 7, 2018, 3:30 p.m. UTC | #16
On Wed, Jun 06, 2018 at 11:20:58PM +0000, Elliott, Robert (Persistent Memory) wrote:
> 
> > > Okay, we can move to the symbolic names.  Do you want them to be that
> > long, or
> > > would:
> > >
> > > nvdimm-cap-cpu
> > > nvdimm-cap-mem-ctrl
> > > nvdimm-cap-mirroring
> > 
> > Wait, why is mirroring part of this?
> 
> This data structure is intended to report any kind of platform capability, not 
> just platform persistence capabilities.
> 
> We could add a short symbolic name to the definitions in ACPI that matches
> the ones selected for this command line option, if that'll help people
> find the right names to use.
> 
> I recommend mc rather than mem-ctrl to keep dashes as special.
> 

I'm not sure it's a good idea:
dashes aren't special in qemu, and mc's way too brief.
Igor Mammedov June 12, 2018, 11:55 a.m. UTC | #17
On Wed, 6 Jun 2018 22:06:17 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Wed, Jun 06, 2018 at 11:08:49AM -0600, Ross Zwisler wrote:
> > On Wed, Jun 06, 2018 at 11:00:33AM -0600, Ross Zwisler wrote:  
> > > On Wed, Jun 06, 2018 at 10:50:51AM -0600, Ross Zwisler wrote:  
> > > > On Tue, Jun 05, 2018 at 03:21:30PM -0700, Dan Williams wrote:  
> > > > > On Tue, Jun 5, 2018 at 3:07 PM, Ross Zwisler
> > > > > <ross.zwisler@linux.intel.com> wrote:  
> > > > > > On Tue, Jun 05, 2018 at 09:37:25PM +0300, Michael S. Tsirkin wrote:  
> > > > > >> On Tue, Jun 05, 2018 at 11:15:00AM -0700, Dan Williams wrote:  
> > > > > >> > On Tue, Jun 5, 2018 at 9:42 AM, Ross Zwisler
> > > > > >> > <ross.zwisler@linux.intel.com> wrote:  
> > > > > >> > > On Tue, Jun 05, 2018 at 06:25:27PM +0300, Michael S. Tsirkin wrote:  
> > > > > >> > >> On Mon, May 21, 2018 at 10:32:02AM -0600, Ross Zwisler wrote:  
> > > > > >> > >> > Add a machine command line option to allow the user to control the Platform
> > > > > >> > >> > Capabilities Structure in the virtualized NFIT.  This Platform Capabilities
> > > > > >> > >> > Structure was added in ACPI 6.2 Errata A.
> > > > > >> > >> >
> > > > > >> > >> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>  
> > > > > >> > >>
> > > > > >> > >> I tried playing with it and encoding the capabilities is
> > > > > >> > >> quite awkward.
> > > > > >> > >>
> > > > > >> > >> Can we add bits for specific capabilities instead of nvdimm-cap?
> > > > > >> > >>
> > > > > >> > >> How about:
> > > > > >> > >>
> > > > > >> > >> "cpu-flush-on-power-loss-cap"
> > > > > >> > >> "memory-flush-on-power-loss-cap"
> > > > > >> > >> "byte-addressable-mirroring-cap"  
> > > > > >> > >
> > > > > >> > > Hmmm...I don't like that as much because:
> > > > > >> > >
> > > > > >> > > a) It's very verbose.  Looking at my current qemu command line few other
> > > > > >> > >    options require that many characters, and you'd commonly be defining more
> > > > > >> > >    than one of these for a given VM.
> > > > > >> > >
> > > > > >> > > b) It means that the QEMU will need to be updated if/when new flags are added,
> > > > > >> > >    because we'll have to have new options for each flag.  The current
> > > > > >> > >    implementation is more future-proof because you can specify any flags
> > > > > >> > >    value you want.
> > > > > >> > >
> > > > > >> > > However, if you feel strongly about this, I'll make the change.  
> > > > > >> >
> > > > > >> > Straw-man: Could we do something similar with what we are doing in ndctl?
> > > > > >> >
> > > > > >> > enum ndctl_persistence_domain {
> > > > > >> >         PERSISTENCE_NONE = 0,
> > > > > >> >         PERSISTENCE_MEM_CTRL = 10,
> > > > > >> >         PERSISTENCE_CPU_CACHE = 20,
> > > > > >> >         PERSISTENCE_UNKNOWN = INT_MAX,
> > > > > >> > };
> > > > > >> >
> > > > > >> > ...and have the command line take a number where "10" and "20" are
> > > > > >> > supported today, but allows us to adapt to new persistence domains in
> > > > > >> > the future.  
> > > > > >>
> > > > > >> I'm fine with that except can we have symbolic names instead of numbers
> > > > > >> on command line?
> > > > > >>
> > > > > >> --
> > > > > >> MST  
> > > > > >
> > > > > > Okay, we can move to the symbolic names.  Do you want them to be that long, or
> > > > > > would:
> > > > > >
> > > > > > nvdimm-cap-cpu
> > > > > > nvdimm-cap-mem-ctrl
> > > > > > nvdimm-cap-mirroring  
> > > > > 
> > > > > Wait, why is mirroring part of this?
> > > > > 
> > > > > I was thinking this option would be:
> > > > > 
> > > > >     --persistence-domain={cpu,mem-ctrl}
> > > > > 
> > > > > ...and try not to let ACPI specifics leak into the qemu command line
> > > > > interface. For example PowerPC qemu could have a persistence domain
> > > > > communicated via Open Firmware or some other mechanism.  
> > > > 
> > > > Sure, this seems fine, though we may want to throw an "nvdimm" in the name
> > > > somewhere so it's clear what the option affects.
> > > > 
> > > > nvdimm-persistence={cpu,mem-ctrl} maybe?
> > > > 
> > > > Michael, does this work for you?  
> > > 
> > > Hmm...also, the version of these patches that used numeric values did go
> > > upstream in QEMU.  So, do we need to leave that interface intact, and just add
> > > a new one that uses symbolic names?  Or did you still just want to replace the
> > > numeric one since it hasn't appeared in a QEMU release yet?  
> > 
> > I guess another alternative would be to just add symbolic name options to the
> > existing command line option, i.e. allow all of these:
> > 
> > nvdimm-cap=3		# CPU cache
> > nvdimm-cap=cpu		# CPU cache
> > nvdimm-cap=2		# memory controller
> > nvdimm-cap=mem-ctrl	# memory controller
> > 
> > And just have them as aliases.  This retains backwards compatibility with
> > what is already there, allows for other numeric values without QEMU updates if
> > other bits are defined (though we are still tightly tied to ACPI in this
> > case), and provides a symbolic name which is easier to use.
> > 
> > Thoughts?  
> 
> I'm inclined to say let's just keep the symbolic names.
> Less of a chance users configure something incorrectly,
> it somehow kind of works and then we get stuck with working
> around these bugs.
We used numeric values before (OSPM status) in QMP which is ACPI spec defined,
so we probably should do so in this case as well, as far as there is clear
explanation for user where values come from. We can have symbolic aliases,
but it would be just extra burden to maintain (so I'd prefer not to have it).
diff mbox

Patch

diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
index e903d8bb09..8b48fb4633 100644
--- a/docs/nvdimm.txt
+++ b/docs/nvdimm.txt
@@ -153,3 +153,30 @@  guest NVDIMM region mapping structure.  This unarmed flag indicates
 guest software that this vNVDIMM device contains a region that cannot
 accept persistent writes. In result, for example, the guest Linux
 NVDIMM driver, marks such vNVDIMM device as read-only.
+
+Platform Capabilities
+---------------------
+
+ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
+which allows the platform to communicate what features it supports related to
+NVDIMM data durability.  Users can provide a capabilities value to a guest via
+the optional "nvdimm-cap" machine command line option:
+
+    -machine pc,accel=kvm,nvdimm,nvdimm-cap=2
+
+This "nvdimm-cap" field is an integer, and is the combined value of the
+various capability bits defined in table 5-137 of the ACPI 6.2 Errata A spec.
+
+Here is a quick summary of the three bits that are defined as of that spec:
+
+Bit[0] - CPU Cache Flush to NVDIMM Durability on Power Loss Capable.
+Bit[1] - Memory Controller Flush to NVDIMM Durability on Power Loss Capable.
+         Note: If bit 0 is set to 1 then this bit shall be set to 1 as well.
+Bit[2] - Byte Addressable Persistent Memory Hardware Mirroring Capable.
+
+So, a "nvdimm-cap" value of 2 would mean that the platform supports Memory
+Controller Flush on Power Loss, a value of 3 would mean that the platform
+supports CPU Cache Flush and Memory Controller Flush on Power Loss, etc.
+
+For a complete list of the flags available and for more detailed descriptions,
+please consult the ACPI spec.
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 59d6e4254c..87e4280c71 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -169,6 +169,21 @@  struct NvdimmNfitControlRegion {
 } QEMU_PACKED;
 typedef struct NvdimmNfitControlRegion NvdimmNfitControlRegion;
 
+/*
+ * NVDIMM Platform Capabilities Structure
+ *
+ * Defined in section 5.2.25.9 of ACPI 6.2 Errata A, September 2017
+ */
+struct NvdimmNfitPlatformCaps {
+    uint16_t type;
+    uint16_t length;
+    uint8_t highest_cap;
+    uint8_t reserved[3];
+    uint32_t capabilities;
+    uint8_t reserved2[4];
+} QEMU_PACKED;
+typedef struct NvdimmNfitPlatformCaps NvdimmNfitPlatformCaps;
+
 /*
  * Module serial number is a unique number for each device. We use the
  * slot id of NVDIMM device to generate this number so that each device
@@ -351,7 +366,23 @@  static void nvdimm_build_structure_dcr(GArray *structures, DeviceState *dev)
                                          JEDEC Annex L Release 3. */);
 }
 
-static GArray *nvdimm_build_device_structure(void)
+/*
+ * ACPI 6.2 Errata A: 5.2.25.9 NVDIMM Platform Capabilities Structure
+ */
+static void
+nvdimm_build_structure_caps(GArray *structures, uint32_t capabilities)
+{
+    NvdimmNfitPlatformCaps *nfit_caps;
+
+    nfit_caps = acpi_data_push(structures, sizeof(*nfit_caps));
+
+    nfit_caps->type = cpu_to_le16(7 /* NVDIMM Platform Capabilities */);
+    nfit_caps->length = cpu_to_le16(sizeof(*nfit_caps));
+    nfit_caps->highest_cap = 31 - clz32(capabilities);
+    nfit_caps->capabilities = cpu_to_le32(capabilities);
+}
+
+static GArray *nvdimm_build_device_structure(AcpiNVDIMMState *state)
 {
     GSList *device_list = nvdimm_get_device_list();
     GArray *structures = g_array_new(false, true /* clear */, 1);
@@ -373,6 +404,10 @@  static GArray *nvdimm_build_device_structure(void)
     }
     g_slist_free(device_list);
 
+    if (state->capabilities) {
+        nvdimm_build_structure_caps(structures, state->capabilities);
+    }
+
     return structures;
 }
 
@@ -381,16 +416,18 @@  static void nvdimm_init_fit_buffer(NvdimmFitBuffer *fit_buf)
     fit_buf->fit = g_array_new(false, true /* clear */, 1);
 }
 
-static void nvdimm_build_fit_buffer(NvdimmFitBuffer *fit_buf)
+static void nvdimm_build_fit_buffer(AcpiNVDIMMState *state)
 {
+    NvdimmFitBuffer *fit_buf = &state->fit_buf;
+
     g_array_free(fit_buf->fit, true);
-    fit_buf->fit = nvdimm_build_device_structure();
+    fit_buf->fit = nvdimm_build_device_structure(state);
     fit_buf->dirty = true;
 }
 
 void nvdimm_plug(AcpiNVDIMMState *state)
 {
-    nvdimm_build_fit_buffer(&state->fit_buf);
+    nvdimm_build_fit_buffer(state);
 }
 
 static void nvdimm_build_nfit(AcpiNVDIMMState *state, GArray *table_offsets,
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index d768930d02..1b2684c549 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -2182,6 +2182,33 @@  static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
     pcms->acpi_nvdimm_state.is_enabled = value;
 }
 
+static void pc_machine_get_nvdimm_capabilities(Object *obj, Visitor *v,
+                                               const char *name, void *opaque,
+                                               Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    uint32_t value = pcms->acpi_nvdimm_state.capabilities;
+
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void pc_machine_set_nvdimm_capabilities(Object *obj, Visitor *v,
+                                               const char *name, void *opaque,
+                                               Error **errp)
+{
+    PCMachineState *pcms = PC_MACHINE(obj);
+    Error *error = NULL;
+    uint32_t value;
+
+    visit_type_uint32(v, name, &value, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    pcms->acpi_nvdimm_state.capabilities = value;
+}
+
 static bool pc_machine_get_smbus(Object *obj, Error **errp)
 {
     PCMachineState *pcms = PC_MACHINE(obj);
@@ -2395,6 +2422,10 @@  static void pc_machine_class_init(ObjectClass *oc, void *data)
     object_class_property_add_bool(oc, PC_MACHINE_NVDIMM,
         pc_machine_get_nvdimm, pc_machine_set_nvdimm, &error_abort);
 
+    object_class_property_add(oc, PC_MACHINE_NVDIMM_CAP, "uint32",
+        pc_machine_get_nvdimm_capabilities,
+        pc_machine_set_nvdimm_capabilities, NULL, NULL, &error_abort);
+
     object_class_property_add_bool(oc, PC_MACHINE_SMBUS,
         pc_machine_get_smbus, pc_machine_set_smbus, &error_abort);
 
diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h
index 2a98e3ad68..e9b22f929c 100644
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -76,6 +76,7 @@  struct PCMachineState {
 #define PC_MACHINE_VMPORT           "vmport"
 #define PC_MACHINE_SMM              "smm"
 #define PC_MACHINE_NVDIMM           "nvdimm"
+#define PC_MACHINE_NVDIMM_CAP       "nvdimm-cap"
 #define PC_MACHINE_SMBUS            "smbus"
 #define PC_MACHINE_SATA             "sata"
 #define PC_MACHINE_PIT              "pit"
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 74c60332e1..3c82751bab 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -134,6 +134,11 @@  struct AcpiNVDIMMState {
 
     /* the IO region used by OSPM to transfer control to QEMU. */
     MemoryRegion io_mr;
+
+    /*
+     * Platform capabilities, section 5.2.25.9 of ACPI 6.2 Errata A
+     */
+    int32_t capabilities;
 };
 typedef struct AcpiNVDIMMState AcpiNVDIMMState;