[04/15] nvdimm: support nvdimm label
diff mbox

Message ID 1458203581-59143-5-git-send-email-guangrong.xiao@linux.intel.com
State New
Headers show

Commit Message

Xiao Guangrong March 17, 2016, 8:32 a.m. UTC
Introduce a parameter, 'reserve-label', which is false on default. If
it is set, we will reserve 128K memory which is the minimum namespace
label size required by NVDIMM Namespace Spec at the end of backend
memory as NVDIMM label area

Two callbacks, read_label_data() and write_label_data(), are used to
operate the label area

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 hw/mem/nvdimm.c         | 95 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/mem/nvdimm.h | 61 ++++++++++++++++++++++++++++++-
 2 files changed, 155 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi March 17, 2016, 10:28 a.m. UTC | #1
On Thu, Mar 17, 2016 at 04:32:50PM +0800, Xiao Guangrong wrote:
> +static void nvdimm_init(Object *obj)
> +{
> +    object_property_add_bool(obj, "reserve-label", nvdimm_get_reserve_label,
> +                             nvdimm_set_reserve_label, NULL);

In the future users may wish for larger namespace label sizes.  This
bool option will not allow that.

Perhaps the option should be an integer called "label-size"?

> +static void nvdimm_assert_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
> +                                        uint64_t offset)
> +{
> +    assert(nvdimm->reserve_label &&
> +           (nvdimm->label_size >= size + offset) && (offset + size > offset));
> +}

It's not clear from this patch alone, but QEMU is not allowed to assert
due to invalid inputs from the guest.  So if input validation is
necessary here because the values may be invalid, please write if
statements and error returns.

This is important so guests cannot cause QEMU to core dump (SIGABRT
default behavior) and so that nested virtualization doesn't allow a
nested guest to DoS its parent guest.
Xiao Guangrong March 23, 2016, 3:40 a.m. UTC | #2
On 03/17/2016 06:28 PM, Stefan Hajnoczi wrote:
> On Thu, Mar 17, 2016 at 04:32:50PM +0800, Xiao Guangrong wrote:
>> +static void nvdimm_init(Object *obj)
>> +{
>> +    object_property_add_bool(obj, "reserve-label", nvdimm_get_reserve_label,
>> +                             nvdimm_set_reserve_label, NULL);
>
> In the future users may wish for larger namespace label sizes.  This
> bool option will not allow that.
>
> Perhaps the option should be an integer called "label-size"?

Yes, good to me.

>
>> +static void nvdimm_assert_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
>> +                                        uint64_t offset)
>> +{
>> +    assert(nvdimm->reserve_label &&
>> +           (nvdimm->label_size >= size + offset) && (offset + size > offset));
>> +}
>
> It's not clear from this patch alone, but QEMU is not allowed to assert
> due to invalid inputs from the guest.  So if input validation is
> necessary here because the values may be invalid, please write if
> statements and error returns.

The caller should check it before calling these callbacks, in our case, we did
it in nvdimm_rw_label_data_check() in patch 13.

So if that happen, it is really a QEMU internal BUG.

>
> This is important so guests cannot cause QEMU to core dump (SIGABRT
> default behavior) and so that nested virtualization doesn't allow a
> nested guest to DoS its parent guest.


Yes, i understood it, but it is not the case in this patch as the assert()
can not be triggered by guest.

Maybe i should mention it in the changelog to make this fact more clean.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c
index 0a602f2..921e6a1 100644
--- a/hw/mem/nvdimm.c
+++ b/hw/mem/nvdimm.c
@@ -25,18 +25,113 @@ 
 #include "qemu/osdep.h"
 #include "hw/mem/nvdimm.h"
 
+static bool nvdimm_get_reserve_label(Object *obj, Error **errp)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    return nvdimm->reserve_label;
+}
+
+static void nvdimm_set_reserve_label(Object *obj, bool value, Error **errp)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(obj);
+
+    nvdimm->reserve_label = value;
+}
+
+static void nvdimm_init(Object *obj)
+{
+    object_property_add_bool(obj, "reserve-label", nvdimm_get_reserve_label,
+                             nvdimm_set_reserve_label, NULL);
+}
+
+static MemoryRegion *nvdimm_get_memory_region(PCDIMMDevice *dimm)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(dimm);
+
+    return &nvdimm->nvdimm_mr;
+}
+
+static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp)
+{
+    MemoryRegion *mr = host_memory_backend_get_memory(dimm->hostmem, errp);
+    NVDIMMDevice *nvdimm = NVDIMM(dimm);
+    uint64_t size = memory_region_size(mr);
+
+    nvdimm->label_size = nvdimm->reserve_label ? MIN_NAMESPACE_LABEL_SIZE : 0;
+
+    if (size <= nvdimm->label_size) {
+        HostMemoryBackend *hostmem = dimm->hostmem;
+        char *path = object_get_canonical_path_component(OBJECT(hostmem));
+
+        error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too"
+                   "small to contain nvdimm label (0x%" PRIx64 ")",
+                   path, memory_region_size(mr), nvdimm->label_size);
+        return;
+    }
+
+    size -= nvdimm->label_size;
+    memory_region_init_alias(&nvdimm->nvdimm_mr, OBJECT(dimm),
+                             "nvdimm-memory", mr, 0, size);
+    nvdimm->nvdimm_mr.align = memory_region_get_alignment(mr);
+
+    nvdimm->label_data = memory_region_get_ram_ptr(mr) + size;
+}
+
+static void nvdimm_assert_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size,
+                                        uint64_t offset)
+{
+    assert(nvdimm->reserve_label &&
+           (nvdimm->label_size >= size + offset) && (offset + size > offset));
+}
+
+static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf,
+                                   uint64_t size, uint64_t offset)
+{
+    nvdimm_assert_rw_label_data(nvdimm, size, offset);
+
+    memcpy(buf, nvdimm->label_data + offset, size);
+}
+
+static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf,
+                                    uint64_t size, uint64_t offset)
+{
+    MemoryRegion *mr;
+    PCDIMMDevice *dimm = PC_DIMM(nvdimm);
+    uint64_t backend_offset;
+
+    nvdimm_assert_rw_label_data(nvdimm, size, offset);
+
+    memcpy(nvdimm->label_data + offset, buf, size);
+
+    mr = host_memory_backend_get_memory(dimm->hostmem, &error_abort);
+    backend_offset = memory_region_size(mr) - nvdimm->label_size + offset;
+    memory_region_set_dirty(mr, backend_offset, size);
+}
+
 static void nvdimm_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
+    PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
+    NVDIMMClass *nvc = NVDIMM_CLASS(oc);
 
     /* nvdimm hotplug has not been supported yet. */
     dc->hotpluggable = false;
+
+    ddc->realize = nvdimm_realize;
+    ddc->get_memory_region = nvdimm_get_memory_region;
+
+    nvc->read_label_data = nvdimm_read_label_data;
+    nvc->write_label_data = nvdimm_write_label_data;
 }
 
 static TypeInfo nvdimm_info = {
     .name          = TYPE_NVDIMM,
     .parent        = TYPE_PC_DIMM,
+    .class_size    = sizeof(NVDIMMClass),
     .class_init    = nvdimm_class_init,
+    .instance_size = sizeof(NVDIMMDevice),
+    .instance_init = nvdimm_init,
 };
 
 static void nvdimm_register_types(void)
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 517de9c..bb5c74a 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -33,7 +33,66 @@ 
         }                                                     \
     } while (0)
 
-#define TYPE_NVDIMM             "nvdimm"
+/*
+ * The minimum label data size is required by NVDIMM Namespace
+ * specification, see the chapter 2 Namespaces:
+ *   "NVDIMMs following the NVDIMM Block Mode Specification use an area
+ *    at least 128KB in size, which holds around 1000 labels."
+ */
+#define MIN_NAMESPACE_LABEL_SIZE      (128UL << 10)
+
+#define TYPE_NVDIMM      "nvdimm"
+#define NVDIMM(obj)      OBJECT_CHECK(NVDIMMDevice, (obj), TYPE_NVDIMM)
+#define NVDIMM_CLASS(oc) OBJECT_CLASS_CHECK(NVDIMMClass, (oc), TYPE_NVDIMM)
+#define NVDIMM_GET_CLASS(obj) OBJECT_GET_CLASS(NVDIMMClass, (obj), \
+                                               TYPE_NVDIMM)
+struct NVDIMMDevice {
+    /* private */
+    PCDIMMDevice parent_obj;
+
+    /* public */
+
+    /*
+     * if we need to reserve memory region for NVDIMM label at the
+     * end of backend memory?
+     */
+    bool reserve_label;
+
+    /*
+     * the size of label data in NVDIMM device which is presented to
+     * guest via __DSM "Get Namespace Label Size" function.
+     */
+    uint64_t label_size;
+
+    /*
+     * the address of label data which is read by __DSM "Get Namespace
+     * Label Data" function and written by __DSM "Set Namespace Label
+     * Data" function.
+     */
+    void *label_data;
+
+    /*
+     * it's the PMEM region in NVDIMM device, which is presented to
+     * guest via ACPI NFIT and _FIT method if NVDIMM hotplug is supported.
+     */
+    MemoryRegion nvdimm_mr;
+};
+typedef struct NVDIMMDevice NVDIMMDevice;
+
+struct NVDIMMClass {
+    /* private */
+    PCDIMMDeviceClass parent_class;
+
+    /* public */
+
+    /* read @size bytes from NVDIMM label data at @offset into @buf. */
+    void (*read_label_data)(NVDIMMDevice *nvdimm, void *buf,
+                            uint64_t size, uint64_t offset);
+    /* write @size bytes from @buf to NVDIMM label data at @offset. */
+    void (*write_label_data)(NVDIMMDevice *nvdimm, const void *buf,
+                             uint64_t size, uint64_t offset);
+};
+typedef struct NVDIMMClass NVDIMMClass;
 
 #define NVDIMM_DSM_MEM_FILE     "etc/acpi/nvdimm-mem"