diff mbox

[8/9] nvdimm acpi: emulate dsm method

Message ID 1456829771-71553-9-git-send-email-guangrong.xiao@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong March 1, 2016, 10:56 a.m. UTC
Emulate dsm method after IO VM-exit

Currently, we only introduce the framework and no function is actually
supported

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 hw/acpi/aml-build.c         |  2 +-
 hw/acpi/nvdimm.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/hw/acpi/aml-build.h |  1 +
 include/hw/mem/nvdimm.h     |  8 ++++++++
 4 files changed, 54 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin March 1, 2016, 5:09 p.m. UTC | #1
On Tue, Mar 01, 2016 at 06:56:10PM +0800, Xiao Guangrong wrote:
> Emulate dsm method after IO VM-exit
> 
> Currently, we only introduce the framework and no function is actually
> supported
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  hw/acpi/aml-build.c         |  2 +-
>  hw/acpi/nvdimm.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/acpi/aml-build.h |  1 +
>  include/hw/mem/nvdimm.h     |  8 ++++++++
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index ab89ca6..da11bf8 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -227,7 +227,7 @@ static void build_extop_package(GArray *package, uint8_t op)
>      build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
>  }
>  
> -static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
> +void build_append_int_noprefix(GArray *table, uint64_t value, int size)
>  {
>      int i;
>  
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index 781f6c1..e0b483a 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -393,12 +393,56 @@ typedef struct NvdimmDsmOut NvdimmDsmOut;
>  static uint64_t
>  nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
>  {
> +    fprintf(stderr, "BUG: we never read _DSM IO Port.\n");
>      return 0;
>  }

Can't guest trigger this?
If yes, don't put such code in production please:
this will fill up disk on the host.


>  
>  static void
>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>  {
> +    NvdimmDsmIn *in;
> +    GArray *out;
> +    uint32_t buf_size;
> +    hwaddr dsm_mem_addr = val;
> +
> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
> +
> +    /*
> +     * The DSM memory is mapped to guest address space so an evil guest
> +     * can change its content while we are doing DSM emulation. Avoid
> +     * this by copying DSM memory to QEMU local memory.
> +     */
> +    in = g_malloc(TARGET_PAGE_SIZE);
> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
> +
> +    le32_to_cpus(&in->revision);
> +    le32_to_cpus(&in->function);
> +    le32_to_cpus(&in->handle);
> +
> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
> +                 in->handle, in->function);
> +
> +    out = g_array_new(false, true /* clear */, 1);
> +
> +    /*
> +     * function 0 is called to inquire what functions are supported by
> +     * OSPM
> +     */
> +    if (in->function == 0) {
> +        build_append_int_noprefix(out, 0 /* No function Supported */,
> +                                  sizeof(uint8_t));
> +    } else {
> +        /* No function is supported yet. */
> +        build_append_int_noprefix(out, 1 /* Not Supported */,
> +                                  sizeof(uint8_t));
> +    }
> +
> +    buf_size = cpu_to_le32(out->len);
> +    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));

is there a race here?
can guest read this before data is written?

> +    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
> +                              out->len);

What is this doing?
Is this actually writing AML bytecode into guest memory?


> +    g_free(in);
> +    g_array_free(out, true);
>  }
>  
>  static const MemoryRegionOps nvdimm_dsm_ops = {
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 7404e2a..b0826f0 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -357,6 +357,7 @@ Aml *aml_derefof(Aml *arg);
>  Aml *aml_sizeof(Aml *arg);
>  Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
>  
> +void build_append_int_noprefix(GArray *table, uint64_t value, int size);
>  void
>  build_header(GArray *linker, GArray *table_data,
>               AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
> index 634c60b..aaa2608 100644
> --- a/include/hw/mem/nvdimm.h
> +++ b/include/hw/mem/nvdimm.h
> @@ -25,6 +25,14 @@
>  
>  #include "hw/mem/pc-dimm.h"
>  
> +#define NVDIMM_DEBUG 0
> +#define nvdimm_debug(fmt, ...)                                \
> +    do {                                                      \
> +        if (NVDIMM_DEBUG) {                                   \
> +            fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__);  \
> +        }                                                     \
> +    } while (0)
> +
>  #define TYPE_NVDIMM             "nvdimm"
>  
>  #define NVDIMM_DSM_MEM_FILE     "etc/acpi/nvdimm-mem"
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin March 1, 2016, 5:12 p.m. UTC | #2
On Tue, Mar 01, 2016 at 06:56:10PM +0800, Xiao Guangrong wrote:
> Emulate dsm method after IO VM-exit
> 
> Currently, we only introduce the framework and no function is actually
> supported
> 
> Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
> ---
>  hw/acpi/aml-build.c         |  2 +-
>  hw/acpi/nvdimm.c            | 44 ++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/acpi/aml-build.h |  1 +
>  include/hw/mem/nvdimm.h     |  8 ++++++++
>  4 files changed, 54 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index ab89ca6..da11bf8 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -227,7 +227,7 @@ static void build_extop_package(GArray *package, uint8_t op)
>      build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
>  }
>  
> -static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
> +void build_append_int_noprefix(GArray *table, uint64_t value, int size)
>  {
>      int i;
>  
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index 781f6c1..e0b483a 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -393,12 +393,56 @@ typedef struct NvdimmDsmOut NvdimmDsmOut;
>  static uint64_t
>  nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
>  {
> +    fprintf(stderr, "BUG: we never read _DSM IO Port.\n");
>      return 0;
>  }
>  
>  static void
>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>  {
> +    NvdimmDsmIn *in;
> +    GArray *out;
> +    uint32_t buf_size;
> +    hwaddr dsm_mem_addr = val;
> +
> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
> +
> +    /*
> +     * The DSM memory is mapped to guest address space so an evil guest
> +     * can change its content while we are doing DSM emulation. Avoid
> +     * this by copying DSM memory to QEMU local memory.
> +     */
> +    in = g_malloc(TARGET_PAGE_SIZE);
> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
> +
> +    le32_to_cpus(&in->revision);
> +    le32_to_cpus(&in->function);
> +    le32_to_cpus(&in->handle);
> +
> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
> +                 in->handle, in->function);
> +
> +    out = g_array_new(false, true /* clear */, 1);
> +
> +    /*
> +     * function 0 is called to inquire what functions are supported by
> +     * OSPM
> +     */
> +    if (in->function == 0) {
> +        build_append_int_noprefix(out, 0 /* No function Supported */,
> +                                  sizeof(uint8_t));
> +    } else {
> +        /* No function is supported yet. */
> +        build_append_int_noprefix(out, 1 /* Not Supported */,
> +                                  sizeof(uint8_t));
> +    }
> +
> +    buf_size = cpu_to_le32(out->len);
> +    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
> +    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
> +                              out->len);

BTW, how do we know buffer is big enough? Add assert here?

Also, you have a packed structure with the layout, correct?
Can't you use that instead of open-coding it?

> +    g_free(in);
> +    g_array_free(out, true);
>  }
>  
>  static const MemoryRegionOps nvdimm_dsm_ops = {
> diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
> index 7404e2a..b0826f0 100644
> --- a/include/hw/acpi/aml-build.h
> +++ b/include/hw/acpi/aml-build.h
> @@ -357,6 +357,7 @@ Aml *aml_derefof(Aml *arg);
>  Aml *aml_sizeof(Aml *arg);
>  Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
>  
> +void build_append_int_noprefix(GArray *table, uint64_t value, int size);
>  void
>  build_header(GArray *linker, GArray *table_data,
>               AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
> diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
> index 634c60b..aaa2608 100644
> --- a/include/hw/mem/nvdimm.h
> +++ b/include/hw/mem/nvdimm.h
> @@ -25,6 +25,14 @@
>  
>  #include "hw/mem/pc-dimm.h"
>  
> +#define NVDIMM_DEBUG 0
> +#define nvdimm_debug(fmt, ...)                                \
> +    do {                                                      \
> +        if (NVDIMM_DEBUG) {                                   \
> +            fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__);  \
> +        }                                                     \
> +    } while (0)
> +
>  #define TYPE_NVDIMM             "nvdimm"
>  
>  #define NVDIMM_DSM_MEM_FILE     "etc/acpi/nvdimm-mem"
> -- 
> 1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong March 2, 2016, 3:30 a.m. UTC | #3
On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:

>
> Can't guest trigger this?
> If yes, don't put such code in production please:
> this will fill up disk on the host.
>

Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.

>
>>
>>   static void
>>   nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>>   {
>> +    NvdimmDsmIn *in;
>> +    GArray *out;
>> +    uint32_t buf_size;
>> +    hwaddr dsm_mem_addr = val;
>> +
>> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
>> +
>> +    /*
>> +     * The DSM memory is mapped to guest address space so an evil guest
>> +     * can change its content while we are doing DSM emulation. Avoid
>> +     * this by copying DSM memory to QEMU local memory.
>> +     */
>> +    in = g_malloc(TARGET_PAGE_SIZE);
>> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
>> +
>> +    le32_to_cpus(&in->revision);
>> +    le32_to_cpus(&in->function);
>> +    le32_to_cpus(&in->handle);
>> +
>> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
>> +                 in->handle, in->function);
>> +
>> +    out = g_array_new(false, true /* clear */, 1);
>> +
>> +    /*
>> +     * function 0 is called to inquire what functions are supported by
>> +     * OSPM
>> +     */
>> +    if (in->function == 0) {
>> +        build_append_int_noprefix(out, 0 /* No function Supported */,
>> +                                  sizeof(uint8_t));
>> +    } else {
>> +        /* No function is supported yet. */
>> +        build_append_int_noprefix(out, 1 /* Not Supported */,
>> +                                  sizeof(uint8_t));
>> +    }
>> +
>> +    buf_size = cpu_to_le32(out->len);
>> +    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
>
> is there a race here?
> can guest read this before data is written?

I think no.

It is the SERIALIZED DSM so there is no race in guest. And the CPU has exited
from guest mode when we fill the buffer in the same CPU-context so the guest
can not read the buffer at this point also memory-barrier is not needed here.

>
>> +    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
>> +                              out->len);
>
> What is this doing?
> Is this actually writing AML bytecode into guest memory?

The layout of result written into the buffer is like this:
struct NvdimmDsmOut {
     /* the size of buffer filled by QEMU. */
     uint32_t len;
     uint8_t data[0];
} QEMU_PACKED;
typedef struct NvdimmDsmOut NvdimmDsmOut;

So the first cpu_physical_memory_write() writes the @len and the second one you
pointed out writes the real payload.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin March 2, 2016, 6:36 a.m. UTC | #4
On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
> 
> 
> On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
> 
> >
> >Can't guest trigger this?
> >If yes, don't put such code in production please:
> >this will fill up disk on the host.
> >
> 
> Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
> 
> >
> >>
> >>  static void
> >>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
> >>  {
> >>+    NvdimmDsmIn *in;
> >>+    GArray *out;
> >>+    uint32_t buf_size;
> >>+    hwaddr dsm_mem_addr = val;
> >>+
> >>+    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
> >>+
> >>+    /*
> >>+     * The DSM memory is mapped to guest address space so an evil guest
> >>+     * can change its content while we are doing DSM emulation. Avoid
> >>+     * this by copying DSM memory to QEMU local memory.
> >>+     */
> >>+    in = g_malloc(TARGET_PAGE_SIZE);

ugh. manual memory management :(

> >>+    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);

is there a requirement address is aligned?
if not this might cross page and crash qemu.
better read just what you need.

> >>+
> >>+    le32_to_cpus(&in->revision);
> >>+    le32_to_cpus(&in->function);
> >>+    le32_to_cpus(&in->handle);
> >>+
> >>+    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
> >>+                 in->handle, in->function);
> >>+
> >>+    out = g_array_new(false, true /* clear */, 1);

export build_alloc_array then, and reuse?

> >>+
> >>+    /*
> >>+     * function 0 is called to inquire what functions are supported by
> >>+     * OSPM
> >>+     */
> >>+    if (in->function == 0) {
> >>+        build_append_int_noprefix(out, 0 /* No function Supported */,
> >>+                                  sizeof(uint8_t));

What does this mean? Same comment here and below ...


> >>+    } else {
> >>+        /* No function is supported yet. */
> >>+        build_append_int_noprefix(out, 1 /* Not Supported */,
> >>+                                  sizeof(uint8_t));
> >>+    }
> >>+
> >>+    buf_size = cpu_to_le32(out->len);
> >>+    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
> >
> >is there a race here?
> >can guest read this before data is written?
> 
> I think no.
> 
> It is the SERIALIZED DSM so there is no race in guest. And the CPU has exited
> from guest mode when we fill the buffer in the same CPU-context so the guest
> can not read the buffer at this point also memory-barrier is not needed here.
> 
> >
> >>+    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
> >>+                              out->len);
> >
> >What is this doing?
> >Is this actually writing AML bytecode into guest memory?
> 
> The layout of result written into the buffer is like this:
> struct NvdimmDsmOut {
>     /* the size of buffer filled by QEMU. */
>     uint32_t len;
>     uint8_t data[0];
> } QEMU_PACKED;
> typedef struct NvdimmDsmOut NvdimmDsmOut;
> 
> So the first cpu_physical_memory_write() writes the @len and the second one you
> pointed out writes the real payload.


So either write a function that gets parameters and formats
buffer, or use a structure to do this.
Do not open-code formatting and don't mess with
offsets.

E.g.

struct NvdimmDsmFunc0Out {
     /* the size of buffer filled by QEMU. */
     uint32_t len;
     uint8_t supported;
} QEMU_PACKED;
typedef struct NvdimmDsmFunc0Out NvdimmDsmFunc0Out;


And now

NvdimmDsmFunc0Out func0 = { .len = cpu_to_le32(sizeof(func0)); suppported = func == 0; };

cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0);


Or if you really insist on using GArray:

build_dsm_out_func0(int function...)
{
    uint32_t len;
    uint8_t result;

    len = sizeof result;
    if (function == 0) {
        result = 0 /* No function Supported */;
   } else {
        /* No function is supported yet. */
        result = 1 /* Not Supported */;
   }

    build_append_int_noprefix(out, len, sizeof len);
    build_append_int_noprefix(out, result, sizeof result);

    assert(out->len < PAGE_SIZE); - is this right?
    cpu_physical_memory_write(dsm_mem_addr, out->data,
                              out->len);
}


but I prefer the former ...
Xiao Guangrong March 2, 2016, 7:15 a.m. UTC | #5
On 03/02/2016 02:36 PM, Michael S. Tsirkin wrote:
> On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
>>
>>
>> On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
>>
>>>
>>> Can't guest trigger this?
>>> If yes, don't put such code in production please:
>>> this will fill up disk on the host.
>>>
>>
>> Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
>>
>>>
>>>>
>>>>   static void
>>>>   nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>>>>   {
>>>> +    NvdimmDsmIn *in;
>>>> +    GArray *out;
>>>> +    uint32_t buf_size;
>>>> +    hwaddr dsm_mem_addr = val;
>>>> +
>>>> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
>>>> +
>>>> +    /*
>>>> +     * The DSM memory is mapped to guest address space so an evil guest
>>>> +     * can change its content while we are doing DSM emulation. Avoid
>>>> +     * this by copying DSM memory to QEMU local memory.
>>>> +     */
>>>> +    in = g_malloc(TARGET_PAGE_SIZE);
>
> ugh. manual memory management :(
>

Hmm... Or use GArray? But it is :)

>>>> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
>
> is there a requirement address is aligned?
> if not this might cross page and crash qemu.
> better read just what you need.
>

Yes, this memory is allocated by BIOS and we asked it to align the memory
with PAGE_SIZE:

     bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
                              false /* high memory */);

>>>> +
>>>> +    le32_to_cpus(&in->revision);
>>>> +    le32_to_cpus(&in->function);
>>>> +    le32_to_cpus(&in->handle);
>>>> +
>>>> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
>>>> +                 in->handle, in->function);
>>>> +
>>>> +    out = g_array_new(false, true /* clear */, 1);
>
> export build_alloc_array then, and reuse?

It is good to me, but as your suggestions, this code will be removed.

>
>>>> +
>>>> +    /*
>>>> +     * function 0 is called to inquire what functions are supported by
>>>> +     * OSPM
>>>> +     */
>>>> +    if (in->function == 0) {
>>>> +        build_append_int_noprefix(out, 0 /* No function Supported */,
>>>> +                                  sizeof(uint8_t));
>
> What does this mean? Same comment here and below ...

If its the function 0, we return 0 that indicates no command is supported yet.
Other wise, it is a command request from a evil guest regardless of the result
returned by function 0, we return the status code 1 to indicates this command
is not supported.

>
>
>>>> +    } else {
>>>> +        /* No function is supported yet. */
>>>> +        build_append_int_noprefix(out, 1 /* Not Supported */,
>>>> +                                  sizeof(uint8_t));
>>>> +    }
>>>> +
>>>> +    buf_size = cpu_to_le32(out->len);
>>>> +    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
>>>
>>> is there a race here?
>>> can guest read this before data is written?
>>
>> I think no.
>>
>> It is the SERIALIZED DSM so there is no race in guest. And the CPU has exited
>> from guest mode when we fill the buffer in the same CPU-context so the guest
>> can not read the buffer at this point also memory-barrier is not needed here.
>>
>>>
>>>> +    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
>>>> +                              out->len);
>>>
>>> What is this doing?
>>> Is this actually writing AML bytecode into guest memory?
>>
>> The layout of result written into the buffer is like this:
>> struct NvdimmDsmOut {
>>      /* the size of buffer filled by QEMU. */
>>      uint32_t len;
>>      uint8_t data[0];
>> } QEMU_PACKED;
>> typedef struct NvdimmDsmOut NvdimmDsmOut;
>>
>> So the first cpu_physical_memory_write() writes the @len and the second one you
>> pointed out writes the real payload.
>
>
> So either write a function that gets parameters and formats
> buffer, or use a structure to do this.
> Do not open-code formatting and don't mess with
> offsets.
>
> E.g.
>
> struct NvdimmDsmFunc0Out {
>       /* the size of buffer filled by QEMU. */
>       uint32_t len;
>       uint8_t supported;
> } QEMU_PACKED;
> typedef struct NvdimmDsmFunc0Out NvdimmDsmFunc0Out;
>
>
> And now
>
> NvdimmDsmFunc0Out func0 = { .len = cpu_to_le32(sizeof(func0)); suppported = func == 0; };
>
> cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0);
>
>
> Or if you really insist on using GArray:
>
> build_dsm_out_func0(int function...)
> {
>      uint32_t len;
>      uint8_t result;
>
>      len = sizeof result;
>      if (function == 0) {
>          result = 0 /* No function Supported */;
>     } else {
>          /* No function is supported yet. */
>          result = 1 /* Not Supported */;
>     }
>
>      build_append_int_noprefix(out, len, sizeof len);
>      build_append_int_noprefix(out, result, sizeof result);
>
>      assert(out->len < PAGE_SIZE); - is this right?
>      cpu_physical_memory_write(dsm_mem_addr, out->data,
>                                out->len);
> }
>
>
> but I prefer the former ...
>

Okay, i prefer the former too ;).

Thank you, Michael!


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin March 2, 2016, 7:20 a.m. UTC | #6
On Wed, Mar 02, 2016 at 03:15:19PM +0800, Xiao Guangrong wrote:
> 
> 
> On 03/02/2016 02:36 PM, Michael S. Tsirkin wrote:
> >On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
> >>
> >>
> >>On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
> >>
> >>>
> >>>Can't guest trigger this?
> >>>If yes, don't put such code in production please:
> >>>this will fill up disk on the host.
> >>>
> >>
> >>Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
> >>
> >>>
> >>>>
> >>>>  static void
> >>>>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
> >>>>  {
> >>>>+    NvdimmDsmIn *in;
> >>>>+    GArray *out;
> >>>>+    uint32_t buf_size;
> >>>>+    hwaddr dsm_mem_addr = val;
> >>>>+
> >>>>+    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
> >>>>+
> >>>>+    /*
> >>>>+     * The DSM memory is mapped to guest address space so an evil guest
> >>>>+     * can change its content while we are doing DSM emulation. Avoid
> >>>>+     * this by copying DSM memory to QEMU local memory.
> >>>>+     */
> >>>>+    in = g_malloc(TARGET_PAGE_SIZE);
> >
> >ugh. manual memory management :(
> >
> 
> Hmm... Or use GArray? But it is :)
> 
> >>>>+    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
> >
> >is there a requirement address is aligned?
> >if not this might cross page and crash qemu.
> >better read just what you need.
> >
> 
> Yes, this memory is allocated by BIOS and we asked it to align the memory
> with PAGE_SIZE:
> 
>     bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
>                              false /* high memory */);
> 
> >>>>+
> >>>>+    le32_to_cpus(&in->revision);
> >>>>+    le32_to_cpus(&in->function);
> >>>>+    le32_to_cpus(&in->handle);
> >>>>+
> >>>>+    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
> >>>>+                 in->handle, in->function);
> >>>>+
> >>>>+    out = g_array_new(false, true /* clear */, 1);
> >
> >export build_alloc_array then, and reuse?
> 
> It is good to me, but as your suggestions, this code will be removed.
> 
> >
> >>>>+
> >>>>+    /*
> >>>>+     * function 0 is called to inquire what functions are supported by
> >>>>+     * OSPM
> >>>>+     */
> >>>>+    if (in->function == 0) {
> >>>>+        build_append_int_noprefix(out, 0 /* No function Supported */,
> >>>>+                                  sizeof(uint8_t));
> >
> >What does this mean? Same comment here and below ...
> 
> If its the function 0, we return 0 that indicates no command is supported yet.

first comment says no function supported.
clearly function 0 is supported, is it not?
how exactly does 0 indicate no command is supported?
is it a bitmask of supported commands?

> Other wise, it is a command request from a evil guest regardless of the result
> returned by function 0, we return the status code 1 to indicates this command
> is not supported.

is command same as function?

> 
> >
> >
> >>>>+    } else {
> >>>>+        /* No function is supported yet. */
> >>>>+        build_append_int_noprefix(out, 1 /* Not Supported */,
> >>>>+                                  sizeof(uint8_t));
> >>>>+    }
> >>>>+
> >>>>+    buf_size = cpu_to_le32(out->len);
> >>>>+    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
> >>>
> >>>is there a race here?
> >>>can guest read this before data is written?
> >>
> >>I think no.
> >>
> >>It is the SERIALIZED DSM so there is no race in guest. And the CPU has exited
> >>from guest mode when we fill the buffer in the same CPU-context so the guest
> >>can not read the buffer at this point also memory-barrier is not needed here.
> >>
> >>>
> >>>>+    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
> >>>>+                              out->len);
> >>>
> >>>What is this doing?
> >>>Is this actually writing AML bytecode into guest memory?
> >>
> >>The layout of result written into the buffer is like this:
> >>struct NvdimmDsmOut {
> >>     /* the size of buffer filled by QEMU. */
> >>     uint32_t len;
> >>     uint8_t data[0];
> >>} QEMU_PACKED;
> >>typedef struct NvdimmDsmOut NvdimmDsmOut;
> >>
> >>So the first cpu_physical_memory_write() writes the @len and the second one you
> >>pointed out writes the real payload.
> >
> >
> >So either write a function that gets parameters and formats
> >buffer, or use a structure to do this.
> >Do not open-code formatting and don't mess with
> >offsets.
> >
> >E.g.
> >
> >struct NvdimmDsmFunc0Out {
> >      /* the size of buffer filled by QEMU. */
> >      uint32_t len;
> >      uint8_t supported;
> >} QEMU_PACKED;
> >typedef struct NvdimmDsmFunc0Out NvdimmDsmFunc0Out;
> >
> >
> >And now
> >
> >NvdimmDsmFunc0Out func0 = { .len = cpu_to_le32(sizeof(func0)); suppported = func == 0; };
> >
> >cpu_physical_memory_write(dsm_mem_addr, &func0, sizeof func0);
> >
> >
> >Or if you really insist on using GArray:
> >
> >build_dsm_out_func0(int function...)
> >{
> >     uint32_t len;
> >     uint8_t result;
> >
> >     len = sizeof result;
> >     if (function == 0) {
> >         result = 0 /* No function Supported */;
> >    } else {
> >         /* No function is supported yet. */
> >         result = 1 /* Not Supported */;
> >    }
> >
> >     build_append_int_noprefix(out, len, sizeof len);
> >     build_append_int_noprefix(out, result, sizeof result);
> >
> >     assert(out->len < PAGE_SIZE); - is this right?
> >     cpu_physical_memory_write(dsm_mem_addr, out->data,
> >                               out->len);
> >}
> >
> >
> >but I prefer the former ...
> >
> 
> Okay, i prefer the former too ;).
> 
> Thank you, Michael!
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong March 2, 2016, 7:21 a.m. UTC | #7
On 03/02/2016 03:15 PM, Xiao Guangrong wrote:

>>>>> +    in = g_malloc(TARGET_PAGE_SIZE);
>>
>> ugh. manual memory management :(
>>
>
> Hmm... Or use GArray? But it is :)

Sorry, typo.

But it is the static size and we should read all memory out to get
the consistent data to avid guest changes it at anytime. It is no
much room to improve it i think...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong March 2, 2016, 7:29 a.m. UTC | #8
On 03/02/2016 03:20 PM, Michael S. Tsirkin wrote:
> On Wed, Mar 02, 2016 at 03:15:19PM +0800, Xiao Guangrong wrote:
>>
>>
>> On 03/02/2016 02:36 PM, Michael S. Tsirkin wrote:
>>> On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
>>>>
>>>>
>>>> On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
>>>>
>>>>>
>>>>> Can't guest trigger this?
>>>>> If yes, don't put such code in production please:
>>>>> this will fill up disk on the host.
>>>>>
>>>>
>>>> Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
>>>>
>>>>>
>>>>>>
>>>>>>   static void
>>>>>>   nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>>>>>>   {
>>>>>> +    NvdimmDsmIn *in;
>>>>>> +    GArray *out;
>>>>>> +    uint32_t buf_size;
>>>>>> +    hwaddr dsm_mem_addr = val;
>>>>>> +
>>>>>> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
>>>>>> +
>>>>>> +    /*
>>>>>> +     * The DSM memory is mapped to guest address space so an evil guest
>>>>>> +     * can change its content while we are doing DSM emulation. Avoid
>>>>>> +     * this by copying DSM memory to QEMU local memory.
>>>>>> +     */
>>>>>> +    in = g_malloc(TARGET_PAGE_SIZE);
>>>
>>> ugh. manual memory management :(
>>>
>>
>> Hmm... Or use GArray? But it is :)
>>
>>>>>> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
>>>
>>> is there a requirement address is aligned?
>>> if not this might cross page and crash qemu.
>>> better read just what you need.
>>>
>>
>> Yes, this memory is allocated by BIOS and we asked it to align the memory
>> with PAGE_SIZE:
>>
>>      bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
>>                               false /* high memory */);
>>
>>>>>> +
>>>>>> +    le32_to_cpus(&in->revision);
>>>>>> +    le32_to_cpus(&in->function);
>>>>>> +    le32_to_cpus(&in->handle);
>>>>>> +
>>>>>> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
>>>>>> +                 in->handle, in->function);
>>>>>> +
>>>>>> +    out = g_array_new(false, true /* clear */, 1);
>>>
>>> export build_alloc_array then, and reuse?
>>
>> It is good to me, but as your suggestions, this code will be removed.
>>
>>>
>>>>>> +
>>>>>> +    /*
>>>>>> +     * function 0 is called to inquire what functions are supported by
>>>>>> +     * OSPM
>>>>>> +     */
>>>>>> +    if (in->function == 0) {
>>>>>> +        build_append_int_noprefix(out, 0 /* No function Supported */,
>>>>>> +                                  sizeof(uint8_t));
>>>
>>> What does this mean? Same comment here and below ...
>>
>> If its the function 0, we return 0 that indicates no command is supported yet.
>
> first comment says no function supported.
> clearly function 0 is supported, is it not?

Yep, the comment is not clear here. It should be "No function Supported other
than function 0 "

Function 0 is the common function supported by all DSMs to inquire what functions are
supported by this DSM.

> how exactly does 0 indicate no command is supported?
> is it a bitmask of supported commands?

It is a bitmask. The spec said:

If Function Index is zero, the return is a buffer containing one bit for each function
index, starting with zero. Bit 0 indicates whether there is support for any functions other
than function 0 for the specified UUID and Revision ID. If set to zero, no functions are
supported (other than function zero) for the specified UUID and Revision ID.

>
>> Other wise, it is a command request from a evil guest regardless of the result
>> returned by function 0, we return the status code 1 to indicates this command
>> is not supported.
>
> is command same as function?

Yes.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin March 2, 2016, 8:44 a.m. UTC | #9
On Wed, Mar 02, 2016 at 03:29:33PM +0800, Xiao Guangrong wrote:
> 
> 
> On 03/02/2016 03:20 PM, Michael S. Tsirkin wrote:
> >On Wed, Mar 02, 2016 at 03:15:19PM +0800, Xiao Guangrong wrote:
> >>
> >>
> >>On 03/02/2016 02:36 PM, Michael S. Tsirkin wrote:
> >>>On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
> >>>>
> >>>>
> >>>>On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
> >>>>
> >>>>>
> >>>>>Can't guest trigger this?
> >>>>>If yes, don't put such code in production please:
> >>>>>this will fill up disk on the host.
> >>>>>
> >>>>
> >>>>Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
> >>>>
> >>>>>
> >>>>>>
> >>>>>>  static void
> >>>>>>  nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
> >>>>>>  {
> >>>>>>+    NvdimmDsmIn *in;
> >>>>>>+    GArray *out;
> >>>>>>+    uint32_t buf_size;
> >>>>>>+    hwaddr dsm_mem_addr = val;
> >>>>>>+
> >>>>>>+    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
> >>>>>>+
> >>>>>>+    /*
> >>>>>>+     * The DSM memory is mapped to guest address space so an evil guest
> >>>>>>+     * can change its content while we are doing DSM emulation. Avoid
> >>>>>>+     * this by copying DSM memory to QEMU local memory.
> >>>>>>+     */
> >>>>>>+    in = g_malloc(TARGET_PAGE_SIZE);
> >>>
> >>>ugh. manual memory management :(
> >>>
> >>
> >>Hmm... Or use GArray? But it is :)
> >>
> >>>>>>+    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
> >>>
> >>>is there a requirement address is aligned?
> >>>if not this might cross page and crash qemu.
> >>>better read just what you need.
> >>>
> >>
> >>Yes, this memory is allocated by BIOS and we asked it to align the memory
> >>with PAGE_SIZE:
> >>
> >>     bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
> >>                              false /* high memory */);
> >>
> >>>>>>+
> >>>>>>+    le32_to_cpus(&in->revision);
> >>>>>>+    le32_to_cpus(&in->function);
> >>>>>>+    le32_to_cpus(&in->handle);
> >>>>>>+
> >>>>>>+    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
> >>>>>>+                 in->handle, in->function);
> >>>>>>+
> >>>>>>+    out = g_array_new(false, true /* clear */, 1);
> >>>
> >>>export build_alloc_array then, and reuse?
> >>
> >>It is good to me, but as your suggestions, this code will be removed.
> >>
> >>>
> >>>>>>+
> >>>>>>+    /*
> >>>>>>+     * function 0 is called to inquire what functions are supported by
> >>>>>>+     * OSPM
> >>>>>>+     */
> >>>>>>+    if (in->function == 0) {
> >>>>>>+        build_append_int_noprefix(out, 0 /* No function Supported */,
> >>>>>>+                                  sizeof(uint8_t));
> >>>
> >>>What does this mean? Same comment here and below ...
> >>
> >>If its the function 0, we return 0 that indicates no command is supported yet.
> >
> >first comment says no function supported.
> >clearly function 0 is supported, is it not?
> 
> Yep, the comment is not clear here. It should be "No function Supported other
> than function 0 "
> 
> Function 0 is the common function supported by all DSMs to inquire what functions are
> supported by this DSM.
> 
> >how exactly does 0 indicate no command is supported?
> >is it a bitmask of supported commands?
> 
> It is a bitmask. The spec said:
> 
> If Function Index is zero, the return is a buffer containing one bit for each function
> index, starting with zero.

Why not start from 1?
So 0x1 - function 1 supported, 0x2 - function 2, 0x4 - function 3 etc.

> Bit 0 indicates whether there is support for any functions other
> than function 0 for the specified UUID and Revision ID. If set to zero, no functions are
> supported (other than function zero) for the specified UUID and Revision ID.

> >
> >>Other wise, it is a command request from a evil guest regardless of the result
> >>returned by function 0, we return the status code 1 to indicates this command
> >>is not supported.
> >
> >is command same as function?
> 
> Yes.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong March 2, 2016, 9:05 a.m. UTC | #10
On 03/02/2016 04:44 PM, Michael S. Tsirkin wrote:
> On Wed, Mar 02, 2016 at 03:29:33PM +0800, Xiao Guangrong wrote:
>>
>>
>> On 03/02/2016 03:20 PM, Michael S. Tsirkin wrote:
>>> On Wed, Mar 02, 2016 at 03:15:19PM +0800, Xiao Guangrong wrote:
>>>>
>>>>
>>>> On 03/02/2016 02:36 PM, Michael S. Tsirkin wrote:
>>>>> On Wed, Mar 02, 2016 at 11:30:10AM +0800, Xiao Guangrong wrote:
>>>>>>
>>>>>>
>>>>>> On 03/02/2016 01:09 AM, Michael S. Tsirkin wrote:
>>>>>>
>>>>>>>
>>>>>>> Can't guest trigger this?
>>>>>>> If yes, don't put such code in production please:
>>>>>>> this will fill up disk on the host.
>>>>>>>
>>>>>>
>>>>>> Okay, the evil guest can read the IO port freely. I will use nvdimm_debug() instead.
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>   static void
>>>>>>>>   nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
>>>>>>>>   {
>>>>>>>> +    NvdimmDsmIn *in;
>>>>>>>> +    GArray *out;
>>>>>>>> +    uint32_t buf_size;
>>>>>>>> +    hwaddr dsm_mem_addr = val;
>>>>>>>> +
>>>>>>>> +    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * The DSM memory is mapped to guest address space so an evil guest
>>>>>>>> +     * can change its content while we are doing DSM emulation. Avoid
>>>>>>>> +     * this by copying DSM memory to QEMU local memory.
>>>>>>>> +     */
>>>>>>>> +    in = g_malloc(TARGET_PAGE_SIZE);
>>>>>
>>>>> ugh. manual memory management :(
>>>>>
>>>>
>>>> Hmm... Or use GArray? But it is :)
>>>>
>>>>>>>> +    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
>>>>>
>>>>> is there a requirement address is aligned?
>>>>> if not this might cross page and crash qemu.
>>>>> better read just what you need.
>>>>>
>>>>
>>>> Yes, this memory is allocated by BIOS and we asked it to align the memory
>>>> with PAGE_SIZE:
>>>>
>>>>      bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
>>>>                               false /* high memory */);
>>>>
>>>>>>>> +
>>>>>>>> +    le32_to_cpus(&in->revision);
>>>>>>>> +    le32_to_cpus(&in->function);
>>>>>>>> +    le32_to_cpus(&in->handle);
>>>>>>>> +
>>>>>>>> +    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
>>>>>>>> +                 in->handle, in->function);
>>>>>>>> +
>>>>>>>> +    out = g_array_new(false, true /* clear */, 1);
>>>>>
>>>>> export build_alloc_array then, and reuse?
>>>>
>>>> It is good to me, but as your suggestions, this code will be removed.
>>>>
>>>>>
>>>>>>>> +
>>>>>>>> +    /*
>>>>>>>> +     * function 0 is called to inquire what functions are supported by
>>>>>>>> +     * OSPM
>>>>>>>> +     */
>>>>>>>> +    if (in->function == 0) {
>>>>>>>> +        build_append_int_noprefix(out, 0 /* No function Supported */,
>>>>>>>> +                                  sizeof(uint8_t));
>>>>>
>>>>> What does this mean? Same comment here and below ...
>>>>
>>>> If its the function 0, we return 0 that indicates no command is supported yet.
>>>
>>> first comment says no function supported.
>>> clearly function 0 is supported, is it not?
>>
>> Yep, the comment is not clear here. It should be "No function Supported other
>> than function 0 "
>>
>> Function 0 is the common function supported by all DSMs to inquire what functions are
>> supported by this DSM.
>>
>>> how exactly does 0 indicate no command is supported?
>>> is it a bitmask of supported commands?
>>
>> It is a bitmask. The spec said:
>>
>> If Function Index is zero, the return is a buffer containing one bit for each function
>> index, starting with zero.
>
> Why not start from 1?
> So 0x1 - function 1 supported, 0x2 - function 2, 0x4 - function 3 etc.

It is defined by the spec in ACPI 6.0 "9.14.1 _DSM (Device Specific Method)" on page 532:

If set to zero, no functions are supported (other than function zero) for the specified UUID and
Revision ID. If set to one, at least one additional function is supported.

I audaciously guess the ACPI guys just want to reserve 0 to quickly identify if any function is
valid other than walking the bits one by one. :) but who knows...
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ab89ca6..da11bf8 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -227,7 +227,7 @@  static void build_extop_package(GArray *package, uint8_t op)
     build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
 }
 
-static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
+void build_append_int_noprefix(GArray *table, uint64_t value, int size)
 {
     int i;
 
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 781f6c1..e0b483a 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -393,12 +393,56 @@  typedef struct NvdimmDsmOut NvdimmDsmOut;
 static uint64_t
 nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
 {
+    fprintf(stderr, "BUG: we never read _DSM IO Port.\n");
     return 0;
 }
 
 static void
 nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 {
+    NvdimmDsmIn *in;
+    GArray *out;
+    uint32_t buf_size;
+    hwaddr dsm_mem_addr = val;
+
+    nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
+
+    /*
+     * The DSM memory is mapped to guest address space so an evil guest
+     * can change its content while we are doing DSM emulation. Avoid
+     * this by copying DSM memory to QEMU local memory.
+     */
+    in = g_malloc(TARGET_PAGE_SIZE);
+    cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
+
+    le32_to_cpus(&in->revision);
+    le32_to_cpus(&in->function);
+    le32_to_cpus(&in->handle);
+
+    nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
+                 in->handle, in->function);
+
+    out = g_array_new(false, true /* clear */, 1);
+
+    /*
+     * function 0 is called to inquire what functions are supported by
+     * OSPM
+     */
+    if (in->function == 0) {
+        build_append_int_noprefix(out, 0 /* No function Supported */,
+                                  sizeof(uint8_t));
+    } else {
+        /* No function is supported yet. */
+        build_append_int_noprefix(out, 1 /* Not Supported */,
+                                  sizeof(uint8_t));
+    }
+
+    buf_size = cpu_to_le32(out->len);
+    cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
+    cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
+                              out->len);
+    g_free(in);
+    g_array_free(out, true);
 }
 
 static const MemoryRegionOps nvdimm_dsm_ops = {
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 7404e2a..b0826f0 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -357,6 +357,7 @@  Aml *aml_derefof(Aml *arg);
 Aml *aml_sizeof(Aml *arg);
 Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 
+void build_append_int_noprefix(GArray *table, uint64_t value, int size);
 void
 build_header(GArray *linker, GArray *table_data,
              AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 634c60b..aaa2608 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -25,6 +25,14 @@ 
 
 #include "hw/mem/pc-dimm.h"
 
+#define NVDIMM_DEBUG 0
+#define nvdimm_debug(fmt, ...)                                \
+    do {                                                      \
+        if (NVDIMM_DEBUG) {                                   \
+            fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__);  \
+        }                                                     \
+    } while (0)
+
 #define TYPE_NVDIMM             "nvdimm"
 
 #define NVDIMM_DSM_MEM_FILE     "etc/acpi/nvdimm-mem"