diff mbox series

[REBASED,v5,2/2] spapr: nvdimm: Introduce spapr-nvdimm device

Message ID 162571304881.1030381.2406869533148471546.stgit@lep8c.aus.stglabs.ibm.com (mailing list archive)
State New
Headers show
Series spapr: nvdimm: Introduce spapr-nvdimm device | expand

Commit Message

Shivaprasad G Bhat July 8, 2021, 2:57 a.m. UTC
If the device backend is not persistent memory for the nvdimm, there is
need for explicit IO flushes on the backend to ensure persistence.

On SPAPR, the issue is addressed by adding a new hcall to request for
an explicit flush from the guest when the backend is not pmem. So, the
approach here is to convey when the hcall flush is required in a device
tree property. The guest once it knows the device backend is not pmem,
makes the hcall whenever flush is required.

To set the device tree property, the patch introduces a new papr specific
device type inheriting the nvdimm device. When the backend doesn't have
pmem="yes", the device tree property "ibm,hcall-flush-required" is set,
and the guest makes hcall H_SCM_FLUSH requesting for an explicit flush.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
---
 hw/ppc/spapr_nvdimm.c         |   46 +++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr_nvdimm.h |    4 ++++
 2 files changed, 50 insertions(+)

Comments

David Gibson Sept. 21, 2021, 6:32 a.m. UTC | #1
On Wed, Jul 07, 2021 at 09:57:31PM -0500, Shivaprasad G Bhat wrote:
> If the device backend is not persistent memory for the nvdimm, there is
> need for explicit IO flushes on the backend to ensure persistence.
> 
> On SPAPR, the issue is addressed by adding a new hcall to request for
> an explicit flush from the guest when the backend is not pmem. So, the
> approach here is to convey when the hcall flush is required in a device
> tree property. The guest once it knows the device backend is not pmem,
> makes the hcall whenever flush is required.
> 
> To set the device tree property, the patch introduces a new papr specific
> device type inheriting the nvdimm device. When the backend doesn't have
> pmem="yes", the device tree property "ibm,hcall-flush-required" is set,
> and the guest makes hcall H_SCM_FLUSH requesting for an explicit flush.
> 
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> ---
>  hw/ppc/spapr_nvdimm.c         |   46 +++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr_nvdimm.h |    4 ++++
>  2 files changed, 50 insertions(+)
> 
> diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
> index 4f8931ab15..4dc7c3f147 100644
> --- a/hw/ppc/spapr_nvdimm.c
> +++ b/hw/ppc/spapr_nvdimm.c
> @@ -54,6 +54,8 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
>  {
>      const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
>      const MachineState *ms = MACHINE(hotplug_dev);
> +    PCDIMMDevice *dimm = PC_DIMM(nvdimm);
> +    MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
>      g_autofree char *uuidstr = NULL;
>      QemuUUID uuid;
>      int ret;
> @@ -91,6 +93,14 @@ bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
>          return false;
>      }
>  
> +    if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) &&
> +        (memory_region_get_fd(mr) < 0)) {
> +        error_setg(errp, "spapr-nvdimm device requires the "
> +                   "memdev %s to be of memory-backend-file type",
> +                   object_get_canonical_path_component(OBJECT(dimm->hostmem)));

It's not obvious to me why the spapr nvdimm device has an additional
restriction here over the regular nvdimm device.

> +        return false;
> +    }
> +
>      return true;
>  }
>  
> @@ -162,6 +172,21 @@ static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
>                               "operating-system")));
>      _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
>  
> +    if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
> +        bool is_pmem = false;
> +#ifdef CONFIG_LIBPMEM
> +        PCDIMMDevice *dimm = PC_DIMM(nvdimm);
> +        HostMemoryBackend *hostmem = dimm->hostmem;
> +
> +        is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
> +                                           &error_abort);

Presenting to the guest a property of the backend worries me
slightly.  How the backends are synchronized between the source and
destination is out of scope for qemu: is there any possibility that we
could migrate from a host where the backend is pmem to one where it is
not (or the reverse).

I think at the least we want a property on the spapr-nvdimm object
which will override what's presented to the guest (which, yes, might
mean lying to the guest).  I think that could be important for
testing, if nothing else.

> +#endif
> +        if (!is_pmem) {
> +            _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
> +                             NULL, 0));
> +        }
> +    }
> +
>      return child_offset;
>  }
>  
> @@ -585,7 +610,16 @@ static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      }
>  
>      dimm = PC_DIMM(drc->dev);
> +    if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) {
> +        return H_PARAMETER;
> +    }

Hmm.  If you're going to make flushes specific to spapr nvdimms, you
could put the queue of pending flushes into the spapr-nvdimm object,
rather than having a global list in the machine.

> +
>      backend = MEMORY_BACKEND(dimm->hostmem);
> +#ifdef CONFIG_LIBPMEM
> +    if (object_property_get_bool(OBJECT(backend), "pmem", &error_abort)) {
> +        return H_UNSUPPORTED;

Could you make this not be UNSUPPORTED, but instead fake the flush for
the pmem device?  Either as a no-op, or simulating the guest invoking
the right cpu cache flushes?  That seems like it would be more useful:
that way users who don't care too much about performance could just
always do a flush hcall and not have to have another path for the
"real" pmem case.

> +    }
> +#endif
>      fd = memory_region_get_fd(&backend->mr);
>  
>      if (fd < 0) {
> @@ -766,3 +800,15 @@ static void spapr_scm_register_types(void)
>  }
>  
>  type_init(spapr_scm_register_types)
> +
> +static TypeInfo spapr_nvdimm_info = {
> +    .name          = TYPE_SPAPR_NVDIMM,
> +    .parent        = TYPE_NVDIMM,
> +};
> +
> +static void spapr_nvdimm_register_types(void)
> +{
> +    type_register_static(&spapr_nvdimm_info);
> +}
> +
> +type_init(spapr_nvdimm_register_types)
> diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h
> index 24d8e37b33..fb4e56418e 100644
> --- a/include/hw/ppc/spapr_nvdimm.h
> +++ b/include/hw/ppc/spapr_nvdimm.h
> @@ -13,6 +13,10 @@
>  #include "hw/mem/nvdimm.h"
>  #include "migration/vmstate.h"
>  
> +#define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
> +OBJECT_DECLARE_SIMPLE_TYPE(SpaprNVDIMMDevice, SPAPR_NVDIMM)
> +
> +typedef struct SpaprNVDIMMDevice  SpaprNVDIMMDevice;
>  typedef struct SpaprDrc SpaprDrc;
>  typedef struct SpaprMachineState SpaprMachineState;
>  
> 
>
diff mbox series

Patch

diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
index 4f8931ab15..4dc7c3f147 100644
--- a/hw/ppc/spapr_nvdimm.c
+++ b/hw/ppc/spapr_nvdimm.c
@@ -54,6 +54,8 @@  bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
 {
     const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
     const MachineState *ms = MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+    MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem);
     g_autofree char *uuidstr = NULL;
     QemuUUID uuid;
     int ret;
@@ -91,6 +93,14 @@  bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
         return false;
     }
 
+    if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM) &&
+        (memory_region_get_fd(mr) < 0)) {
+        error_setg(errp, "spapr-nvdimm device requires the "
+                   "memdev %s to be of memory-backend-file type",
+                   object_get_canonical_path_component(OBJECT(dimm->hostmem)));
+        return false;
+    }
+
     return true;
 }
 
@@ -162,6 +172,21 @@  static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
                              "operating-system")));
     _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
 
+    if (object_dynamic_cast(OBJECT(nvdimm), TYPE_SPAPR_NVDIMM)) {
+        bool is_pmem = false;
+#ifdef CONFIG_LIBPMEM
+        PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+        HostMemoryBackend *hostmem = dimm->hostmem;
+
+        is_pmem = object_property_get_bool(OBJECT(hostmem), "pmem",
+                                           &error_abort);
+#endif
+        if (!is_pmem) {
+            _FDT(fdt_setprop(fdt, child_offset, "ibm,hcall-flush-required",
+                             NULL, 0));
+        }
+    }
+
     return child_offset;
 }
 
@@ -585,7 +610,16 @@  static target_ulong h_scm_flush(PowerPCCPU *cpu, SpaprMachineState *spapr,
     }
 
     dimm = PC_DIMM(drc->dev);
+    if (!object_dynamic_cast(OBJECT(dimm), TYPE_SPAPR_NVDIMM)) {
+        return H_PARAMETER;
+    }
+
     backend = MEMORY_BACKEND(dimm->hostmem);
+#ifdef CONFIG_LIBPMEM
+    if (object_property_get_bool(OBJECT(backend), "pmem", &error_abort)) {
+        return H_UNSUPPORTED;
+    }
+#endif
     fd = memory_region_get_fd(&backend->mr);
 
     if (fd < 0) {
@@ -766,3 +800,15 @@  static void spapr_scm_register_types(void)
 }
 
 type_init(spapr_scm_register_types)
+
+static TypeInfo spapr_nvdimm_info = {
+    .name          = TYPE_SPAPR_NVDIMM,
+    .parent        = TYPE_NVDIMM,
+};
+
+static void spapr_nvdimm_register_types(void)
+{
+    type_register_static(&spapr_nvdimm_info);
+}
+
+type_init(spapr_nvdimm_register_types)
diff --git a/include/hw/ppc/spapr_nvdimm.h b/include/hw/ppc/spapr_nvdimm.h
index 24d8e37b33..fb4e56418e 100644
--- a/include/hw/ppc/spapr_nvdimm.h
+++ b/include/hw/ppc/spapr_nvdimm.h
@@ -13,6 +13,10 @@ 
 #include "hw/mem/nvdimm.h"
 #include "migration/vmstate.h"
 
+#define TYPE_SPAPR_NVDIMM "spapr-nvdimm"
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprNVDIMMDevice, SPAPR_NVDIMM)
+
+typedef struct SpaprNVDIMMDevice  SpaprNVDIMMDevice;
 typedef struct SpaprDrc SpaprDrc;
 typedef struct SpaprMachineState SpaprMachineState;