diff mbox

[v2,14/18] nvdimm: support NFIT_CMD_IMPLEMENTED function

Message ID 1439563931-12352-15-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong Aug. 14, 2015, 2:52 p.m. UTC
__DSM is defined in ACPI 6.0: 9.14.1 _DSM (Device Specific Method)

Function 0 is a query function. We do not support any function on root
device and only 3 functions are support for NVDIMM device,
NFIT_CMD_GET_CONFIG_SIZE, NFIT_CMD_GET_CONFIG_DATA and
NFIT_CMD_SET_CONFIG_DATA, that means we currently only allow to access
device's Label Namespace

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 hw/mem/nvdimm/acpi.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 152 insertions(+)

Comments

Stefan Hajnoczi Aug. 25, 2015, 4:23 p.m. UTC | #1
On Fri, Aug 14, 2015 at 10:52:07PM +0800, Xiao Guangrong wrote:
> @@ -306,6 +354,18 @@ struct dsm_buffer {
>  static ram_addr_t dsm_addr;
>  static size_t dsm_size;
>  
> +struct cmd_out_implemented {

QEMU coding style uses typedef struct {} CamelCase.  Please follow this
convention in all user-defined structs (see ./CODING_STYLE).

>  static void dsm_write(void *opaque, hwaddr addr,
>                        uint64_t val, unsigned size)
>  {
> +    struct MemoryRegion *dsm_ram_mr = opaque;
> +    struct dsm_buffer *dsm;
> +    struct dsm_out *out;
> +    void *buf;
> +
>      assert(val == NOTIFY_VALUE);

The guest should not be able to cause an abort(3).  If val !=
NOTIFY_VALUE we can do nvdebug() and then return.

> +
> +    buf = memory_region_get_ram_ptr(dsm_ram_mr);
> +    dsm = buf;
> +    out = buf;
> +
> +    le32_to_cpus(&dsm->handle);
> +    le32_to_cpus(&dsm->arg1);
> +    le32_to_cpus(&dsm->arg2);

Can SMP guests modify DSM RAM while this thread is running?

We must avoid race conditions.  It's probably better to copy in data
before byte-swapping or checking input values.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 26, 2015, 10:46 a.m. UTC | #2
On 08/26/2015 12:23 AM, Stefan Hajnoczi wrote:
> On Fri, Aug 14, 2015 at 10:52:07PM +0800, Xiao Guangrong wrote:
>> @@ -306,6 +354,18 @@ struct dsm_buffer {
>>   static ram_addr_t dsm_addr;
>>   static size_t dsm_size;
>>
>> +struct cmd_out_implemented {
>
> QEMU coding style uses typedef struct {} CamelCase.  Please follow this
> convention in all user-defined structs (see ./CODING_STYLE).
>

Okay, will adjust all the defines in the next version.

>>   static void dsm_write(void *opaque, hwaddr addr,
>>                         uint64_t val, unsigned size)
>>   {
>> +    struct MemoryRegion *dsm_ram_mr = opaque;
>> +    struct dsm_buffer *dsm;
>> +    struct dsm_out *out;
>> +    void *buf;
>> +
>>       assert(val == NOTIFY_VALUE);
>
> The guest should not be able to cause an abort(3).  If val !=
> NOTIFY_VALUE we can do nvdebug() and then return.

The ACPI code and emulation code both are from qemu, if that happens,
it's really a bug, aborting the VM is better than throwing a debug
message under this case to avoid potential data corruption.

>
>> +
>> +    buf = memory_region_get_ram_ptr(dsm_ram_mr);
>> +    dsm = buf;
>> +    out = buf;
>> +
>> +    le32_to_cpus(&dsm->handle);
>> +    le32_to_cpus(&dsm->arg1);
>> +    le32_to_cpus(&dsm->arg2);
>
> Can SMP guests modify DSM RAM while this thread is running?
>
> We must avoid race conditions.  It's probably better to copy in data
> before byte-swapping or checking input values.

Yes, my mistake, will fix.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Aug. 28, 2015, 12:01 p.m. UTC | #3
On Wed, Aug 26, 2015 at 06:46:35PM +0800, Xiao Guangrong wrote:
> On 08/26/2015 12:23 AM, Stefan Hajnoczi wrote:
> >On Fri, Aug 14, 2015 at 10:52:07PM +0800, Xiao Guangrong wrote:
> >>  static void dsm_write(void *opaque, hwaddr addr,
> >>                        uint64_t val, unsigned size)
> >>  {
> >>+    struct MemoryRegion *dsm_ram_mr = opaque;
> >>+    struct dsm_buffer *dsm;
> >>+    struct dsm_out *out;
> >>+    void *buf;
> >>+
> >>      assert(val == NOTIFY_VALUE);
> >
> >The guest should not be able to cause an abort(3).  If val !=
> >NOTIFY_VALUE we can do nvdebug() and then return.
> 
> The ACPI code and emulation code both are from qemu, if that happens,
> it's really a bug, aborting the VM is better than throwing a debug
> message under this case to avoid potential data corruption.

abort(3) is dangerous because it can create a core dump.  If a malicious
guest triggers this repeatedly it could consume a lot of disk space and
I/O or CPU while performing the core dumps.

We cannot trust anything inside the guest, even if the guest code comes
from QEMU because a malicious guest can still read/write to the same
hardware registers.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 31, 2015, 6:51 a.m. UTC | #4
On 08/28/2015 08:01 PM, Stefan Hajnoczi wrote:
> On Wed, Aug 26, 2015 at 06:46:35PM +0800, Xiao Guangrong wrote:
>> On 08/26/2015 12:23 AM, Stefan Hajnoczi wrote:
>>> On Fri, Aug 14, 2015 at 10:52:07PM +0800, Xiao Guangrong wrote:
>>>>   static void dsm_write(void *opaque, hwaddr addr,
>>>>                         uint64_t val, unsigned size)
>>>>   {
>>>> +    struct MemoryRegion *dsm_ram_mr = opaque;
>>>> +    struct dsm_buffer *dsm;
>>>> +    struct dsm_out *out;
>>>> +    void *buf;
>>>> +
>>>>       assert(val == NOTIFY_VALUE);
>>>
>>> The guest should not be able to cause an abort(3).  If val !=
>>> NOTIFY_VALUE we can do nvdebug() and then return.
>>
>> The ACPI code and emulation code both are from qemu, if that happens,
>> it's really a bug, aborting the VM is better than throwing a debug
>> message under this case to avoid potential data corruption.
>
> abort(3) is dangerous because it can create a core dump.  If a malicious
> guest triggers this repeatedly it could consume a lot of disk space and
> I/O or CPU while performing the core dumps.
>
> We cannot trust anything inside the guest, even if the guest code comes
> from QEMU because a malicious guest can still read/write to the same
> hardware registers.
>

Completely agree with you. :)

How about use exit{1} instead of abort() to kill the VM?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Sept. 1, 2015, 9:16 a.m. UTC | #5
On Mon, Aug 31, 2015 at 02:51:50PM +0800, Xiao Guangrong wrote:
> 
> 
> On 08/28/2015 08:01 PM, Stefan Hajnoczi wrote:
> >On Wed, Aug 26, 2015 at 06:46:35PM +0800, Xiao Guangrong wrote:
> >>On 08/26/2015 12:23 AM, Stefan Hajnoczi wrote:
> >>>On Fri, Aug 14, 2015 at 10:52:07PM +0800, Xiao Guangrong wrote:
> >>>>  static void dsm_write(void *opaque, hwaddr addr,
> >>>>                        uint64_t val, unsigned size)
> >>>>  {
> >>>>+    struct MemoryRegion *dsm_ram_mr = opaque;
> >>>>+    struct dsm_buffer *dsm;
> >>>>+    struct dsm_out *out;
> >>>>+    void *buf;
> >>>>+
> >>>>      assert(val == NOTIFY_VALUE);
> >>>
> >>>The guest should not be able to cause an abort(3).  If val !=
> >>>NOTIFY_VALUE we can do nvdebug() and then return.
> >>
> >>The ACPI code and emulation code both are from qemu, if that happens,
> >>it's really a bug, aborting the VM is better than throwing a debug
> >>message under this case to avoid potential data corruption.
> >
> >abort(3) is dangerous because it can create a core dump.  If a malicious
> >guest triggers this repeatedly it could consume a lot of disk space and
> >I/O or CPU while performing the core dumps.
> >
> >We cannot trust anything inside the guest, even if the guest code comes
> >from QEMU because a malicious guest can still read/write to the same
> >hardware registers.
> >
> 
> Completely agree with you. :)
> 
> How about use exit{1} instead of abort() to kill the VM?

Most devices on a physical machine do not power off or reset the machine
in case of error.

I think it's good to follow that model and avoid killing the VM.
Otherwise nested virtualization or userspace drivers can take down the
whole VM.

Stefan
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c
index c773954..20aefce 100644
--- a/hw/mem/nvdimm/acpi.c
+++ b/hw/mem/nvdimm/acpi.c
@@ -31,6 +31,7 @@ 
 #include "exec/address-spaces.h"
 #include "hw/acpi/aml-build.h"
 #include "hw/mem/pc-nvdimm.h"
+#include "sysemu/sysemu.h"
 
 #include "internal.h"
 
@@ -41,6 +42,22 @@  static void nfit_spa_uuid_pm(void *uuid)
     memcpy(uuid, &uuid_pm, sizeof(uuid_pm));
 }
 
+static bool dsm_is_root_uuid(uint8_t *uuid)
+{
+    uuid_le uuid_root = UUID_LE(0x2f10e7a4, 0x9e91, 0x11e4, 0x89,
+                                0xd3, 0x12, 0x3b, 0x93, 0xf7, 0x5c, 0xba);
+
+    return !memcmp(uuid, &uuid_root, sizeof(uuid_root));
+}
+
+static bool dsm_is_dimm_uuid(uint8_t *uuid)
+{
+    uuid_le uuid_dimm = UUID_LE(0x4309ac30, 0x0d11, 0x11e4, 0x91,
+                                0x91, 0x08, 0x00, 0x20, 0x0c, 0x9a, 0x66);
+
+    return !memcmp(uuid, &uuid_dimm, sizeof(uuid_dimm));
+}
+
 enum {
     NFIT_TABLE_SPA = 0,
     NFIT_TABLE_MEM = 1,
@@ -162,6 +179,20 @@  static uint32_t nvdimm_index_to_handle(int index)
     return index + 1;
 }
 
+static PCNVDIMMDevice
+*get_nvdimm_device_by_handle(GSList *list, uint32_t handle)
+{
+    for (; list; list = list->next) {
+        PCNVDIMMDevice *nvdimm = list->data;
+
+        if (nvdimm_index_to_handle(nvdimm->device_index) == handle) {
+            return nvdimm;
+        }
+    }
+
+    return NULL;
+}
+
 static size_t get_nfit_total_size(int nr)
 {
     /* each nvdimm has 3 tables. */
@@ -286,6 +317,23 @@  enum {
     NFIT_CMD_VENDOR = 9,
 };
 
+enum {
+    NFIT_STATUS_SUCCESS = 0,
+    NFIT_STATUS_NOT_SUPPORTED = 1,
+    NFIT_STATUS_NON_EXISTING_MEM_DEV = 2,
+    NFIT_STATUS_INVALID_PARAS = 3,
+    NFIT_STATUS_VENDOR_SPECIFIC_ERROR = 4,
+};
+
+#define DSM_REVISION        (1)
+
+/* do not support any command except NFIT_CMD_IMPLEMENTED on root. */
+#define ROOT_SUPPORT_CMD    (1 << NFIT_CMD_IMPLEMENTED)
+/* support NFIT_CMD_SET_CONFIG_DATA iif nvdimm->configdata is true. */
+#define DIMM_SUPPORT_CMD    ((1 << NFIT_CMD_IMPLEMENTED)        \
+                           | (1 << NFIT_CMD_GET_CONFIG_SIZE)    \
+                           | (1 << NFIT_CMD_GET_CONFIG_DATA))
+
 struct dsm_buffer {
     /* RAM page. */
     uint32_t handle;
@@ -306,6 +354,18 @@  struct dsm_buffer {
 static ram_addr_t dsm_addr;
 static size_t dsm_size;
 
+struct cmd_out_implemented {
+    uint64_t cmd_list;
+};
+
+struct dsm_out {
+    union {
+        uint32_t status;
+        struct cmd_out_implemented cmd_implemented;
+        uint8_t data[PAGE_SIZE];
+    };
+};
+
 static uint64_t dsm_read(void *opaque, hwaddr addr,
                          unsigned size)
 {
@@ -314,10 +374,102 @@  static uint64_t dsm_read(void *opaque, hwaddr addr,
     return 0;
 }
 
+static void dsm_write_root(struct dsm_buffer *in, struct dsm_out *out)
+{
+    uint32_t function = in->arg2;
+
+    if (function == NFIT_CMD_IMPLEMENTED) {
+        out->cmd_implemented.cmd_list = cpu_to_le64(ROOT_SUPPORT_CMD);
+        return;
+    }
+
+    out->status = cpu_to_le32(NFIT_STATUS_NOT_SUPPORTED);
+    nvdebug("Return status %#x.\n", out->status);
+}
+
+static void dsm_write_nvdimm(struct dsm_buffer *in, struct dsm_out *out)
+{
+    GSList *list = get_nvdimm_built_list();
+    PCNVDIMMDevice *nvdimm = get_nvdimm_device_by_handle(list, in->handle);
+    uint32_t function = in->arg2;
+    uint32_t status = NFIT_STATUS_NON_EXISTING_MEM_DEV;
+    uint64_t cmd_list;
+
+    if (!nvdimm) {
+        goto set_status_free;
+    }
+
+    switch (function) {
+    case NFIT_CMD_IMPLEMENTED:
+        cmd_list = DIMM_SUPPORT_CMD;
+        if (nvdimm->configdata) {
+            cmd_list |= 1 << NFIT_CMD_SET_CONFIG_DATA;
+        }
+
+        out->cmd_implemented.cmd_list = cpu_to_le64(cmd_list);
+        goto free;
+    default:
+        status = NFIT_STATUS_NOT_SUPPORTED;
+    };
+
+    nvdebug("Return status %#x.\n", status);
+
+set_status_free:
+    out->status = cpu_to_le32(status);
+free:
+    g_slist_free(list);
+}
+
 static void dsm_write(void *opaque, hwaddr addr,
                       uint64_t val, unsigned size)
 {
+    struct MemoryRegion *dsm_ram_mr = opaque;
+    struct dsm_buffer *dsm;
+    struct dsm_out *out;
+    void *buf;
+
     assert(val == NOTIFY_VALUE);
+
+    buf = memory_region_get_ram_ptr(dsm_ram_mr);
+    dsm = buf;
+    out = buf;
+
+    le32_to_cpus(&dsm->handle);
+    le32_to_cpus(&dsm->arg1);
+    le32_to_cpus(&dsm->arg2);
+
+    nvdebug("Arg0 " UUID_FMT ".\n", dsm->arg0[0], dsm->arg0[1], dsm->arg0[2],
+            dsm->arg0[3], dsm->arg0[4], dsm->arg0[5], dsm->arg0[6],
+            dsm->arg0[7], dsm->arg0[8], dsm->arg0[9], dsm->arg0[10],
+            dsm->arg0[11], dsm->arg0[12], dsm->arg0[13], dsm->arg0[14],
+            dsm->arg0[15]);
+    nvdebug("Handler %#x, Arg1 %#x, Arg2 %#x.\n", dsm->handle, dsm->arg1,
+            dsm->arg2);
+
+    if (dsm->arg1 != DSM_REVISION) {
+        nvdebug("Revision %#x is not supported, expect %#x.\n",
+                dsm->arg1, DSM_REVISION);
+        goto exit;
+    }
+
+    if (!dsm->handle) {
+        if (!dsm_is_root_uuid(dsm->arg0)) {
+            nvdebug("Root UUID does not match.\n");
+            goto exit;
+        }
+
+        return dsm_write_root(dsm, out);
+    }
+
+    if (!dsm_is_dimm_uuid(dsm->arg0)) {
+        nvdebug("DIMM UUID does not match.\n");
+        goto exit;
+    }
+
+    return dsm_write_nvdimm(dsm, out);
+
+exit:
+    out->status = cpu_to_le32(NFIT_STATUS_NOT_SUPPORTED);
 }
 
 static const MemoryRegionOps dsm_ops = {