diff mbox

[v2,13/18] nvdimm: build namespace config data

Message ID 1439563931-12352-14-git-send-email-guangrong.xiao@linux.intel.com
State New, archived
Headers show

Commit Message

Xiao Guangrong Aug. 14, 2015, 2:52 p.m. UTC
If @configdata is false, Qemu will build a static and readonly
namespace in memory and use it serveing for
DSM GET_CONFIG_SIZE/GET_CONFIG_DATA requests

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
---
 hw/mem/Makefile.objs       |   3 +-
 hw/mem/nvdimm/acpi.c       |  10 ++
 hw/mem/nvdimm/internal.h   |  12 ++
 hw/mem/nvdimm/namespace.c  | 307 +++++++++++++++++++++++++++++++++++++++++++++
 include/hw/mem/pc-nvdimm.h |   2 +
 5 files changed, 333 insertions(+), 1 deletion(-)
 create mode 100644 hw/mem/nvdimm/namespace.c

Comments

Stefan Hajnoczi Aug. 25, 2015, 4:16 p.m. UTC | #1
On Fri, Aug 14, 2015 at 10:52:06PM +0800, Xiao Guangrong wrote:
> +#ifdef NVDIMM_DEBUG
> +#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__)
> +#else
> +#define nvdebug(...)
> +#endif

The following allows the compiler to check format strings and syntax
check the argument expressions:

#define NVDIMM_DEBUG 0  /* set to 1 for debug output */
#define nvdebug(fmt, ...) \
    if (NVDIMM_DEBUG) { \
        fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \
    }

This approach avoids bitrot (e.g. debug format string arguments have
become outdated).
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 26, 2015, 10:42 a.m. UTC | #2
On 08/26/2015 12:16 AM, Stefan Hajnoczi wrote:
> On Fri, Aug 14, 2015 at 10:52:06PM +0800, Xiao Guangrong wrote:
>> +#ifdef NVDIMM_DEBUG
>> +#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__)
>> +#else
>> +#define nvdebug(...)
>> +#endif
>
> The following allows the compiler to check format strings and syntax
> check the argument expressions:
>
> #define NVDIMM_DEBUG 0  /* set to 1 for debug output */
> #define nvdebug(fmt, ...) \
>      if (NVDIMM_DEBUG) { \
>          fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \
>      }
>
> This approach avoids bitrot (e.g. debug format string arguments have
> become outdated).
>

Really good tips, thanks for your sharing.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Hajnoczi Aug. 28, 2015, 11:59 a.m. UTC | #3
On Wed, Aug 26, 2015 at 06:42:01PM +0800, Xiao Guangrong wrote:
> 
> 
> On 08/26/2015 12:16 AM, Stefan Hajnoczi wrote:
> >On Fri, Aug 14, 2015 at 10:52:06PM +0800, Xiao Guangrong wrote:
> >>+#ifdef NVDIMM_DEBUG
> >>+#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__)
> >>+#else
> >>+#define nvdebug(...)
> >>+#endif
> >
> >The following allows the compiler to check format strings and syntax
> >check the argument expressions:
> >
> >#define NVDIMM_DEBUG 0  /* set to 1 for debug output */
> >#define nvdebug(fmt, ...) \
> >     if (NVDIMM_DEBUG) { \
> >         fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \
> >     }
> >
> >This approach avoids bitrot (e.g. debug format string arguments have
> >become outdated).
> >
> 
> Really good tips, thanks for your sharing.

I forgot the do { ... } while (0) in the macro to make nvdebug("hello
world"); work like a normal C statement.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Xiao Guangrong Aug. 31, 2015, 6:25 a.m. UTC | #4
On 08/28/2015 07:59 PM, Stefan Hajnoczi wrote:
> On Wed, Aug 26, 2015 at 06:42:01PM +0800, Xiao Guangrong wrote:
>>
>>
>> On 08/26/2015 12:16 AM, Stefan Hajnoczi wrote:
>>> On Fri, Aug 14, 2015 at 10:52:06PM +0800, Xiao Guangrong wrote:
>>>> +#ifdef NVDIMM_DEBUG
>>>> +#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__)
>>>> +#else
>>>> +#define nvdebug(...)
>>>> +#endif
>>>
>>> The following allows the compiler to check format strings and syntax
>>> check the argument expressions:
>>>
>>> #define NVDIMM_DEBUG 0  /* set to 1 for debug output */
>>> #define nvdebug(fmt, ...) \
>>>      if (NVDIMM_DEBUG) { \
>>>          fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__); \
>>>      }
>>>
>>> This approach avoids bitrot (e.g. debug format string arguments have
>>> become outdated).
>>>
>>
>> Really good tips, thanks for your sharing.
>
> I forgot the do { ... } while (0) in the macro to make nvdebug("hello
> world"); work like a normal C statement.
>

Got it, will keep it in my mind.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs
index 7a6948d..7f3fab2 100644
--- a/hw/mem/Makefile.objs
+++ b/hw/mem/Makefile.objs
@@ -1,2 +1,3 @@ 
 common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o
-common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o
+common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o	\
+			       nvdimm/namespace.o
diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c
index 0b09efa..c773954 100644
--- a/hw/mem/nvdimm/acpi.c
+++ b/hw/mem/nvdimm/acpi.c
@@ -240,6 +240,8 @@  static void build_nfit_table(GSList *device_list, char *buf)
 
     for (; device_list; device_list = device_list->next) {
         PCNVDIMMDevice *nvdimm = device_list->data;
+        struct nfit_memdev *nfit_memdev;
+        struct nfit_dcr *nfit_dcr;
         int spa_index, dcr_index;
 
         spa_index = ++index;
@@ -252,10 +254,15 @@  static void build_nfit_table(GSList *device_list, char *buf)
          * build Memory Device to System Physical Address Range Mapping
          * Table.
          */
+        nfit_memdev = (struct nfit_memdev *)buf;
         buf += build_memdev_table(buf, nvdimm, spa_index, dcr_index);
 
         /* build Control Region Descriptor Table. */
+        nfit_dcr = (struct nfit_dcr *)buf;
         buf += build_dcr_table(buf, nvdimm, dcr_index);
+
+        calculate_nvdimm_isetcookie(nvdimm, nfit_memdev->region_spa_offset,
+                                    nfit_dcr->serial_number);
     }
 }
 
@@ -382,6 +389,9 @@  void pc_nvdimm_build_nfit_table(GArray *table_offsets, GArray *table_data,
 
     build_header(linker, table_data, (void *)(table_data->data + nfit_start),
                  "NFIT", table_data->len - nfit_start, 1);
+
+    build_nvdimm_configdata(list);
+
 exit:
     g_slist_free(list);
 }
diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h
index 90d54dc..b1f3f16 100644
--- a/hw/mem/nvdimm/internal.h
+++ b/hw/mem/nvdimm/internal.h
@@ -13,6 +13,14 @@ 
 #ifndef __NVDIMM_INTERNAL_H
 #define __NVDIMM_INTERNAL_H
 
+/* #define NVDIMM_DEBUG */
+
+#ifdef NVDIMM_DEBUG
+#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__)
+#else
+#define nvdebug(...)
+#endif
+
 #define PAGE_SIZE               (1UL << 12)
 
 typedef struct {
@@ -27,4 +35,8 @@  typedef struct {
 
 GSList *get_nvdimm_built_list(void);
 ram_addr_t reserved_range_push(uint64_t size);
+
+void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa,
+                                 uint32_t sn);
+void build_nvdimm_configdata(GSList *device_list);
 #endif
diff --git a/hw/mem/nvdimm/namespace.c b/hw/mem/nvdimm/namespace.c
new file mode 100644
index 0000000..04626da
--- /dev/null
+++ b/hw/mem/nvdimm/namespace.c
@@ -0,0 +1,307 @@ 
+/*
+ * NVDIMM  Namespace Support
+ *
+ * Copyright(C) 2015 Intel Corporation.
+ *
+ * Author:
+ *  Xiao Guangrong <guangrong.xiao@linux.intel.com>
+ *
+ * NVDIMM namespace specification can be found at:
+ *      http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "hw/mem/pc-nvdimm.h"
+
+#include "internal.h"
+
+static uint64_t fletcher64(void *addr, size_t len)
+{
+    uint32_t *buf = addr;
+    uint32_t lo32 = 0;
+    uint64_t hi32 = 0;
+    int i;
+
+    for (i = 0; i < len / sizeof(uint32_t); i++) {
+        lo32 += cpu_to_le32(buf[i]);
+        hi32 += lo32;
+    }
+
+    return hi32 << 32 | lo32;
+}
+
+struct interleave_set_info {
+    struct interleave_set_info_map {
+        uint64_t region_spa_offset;
+        uint32_t serial_number;
+        uint32_t zero;
+    } mapping[1];
+};
+
+void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa,
+                                 uint32_t sn)
+{
+    struct interleave_set_info info;
+
+    info.mapping[0].region_spa_offset = spa;
+    info.mapping[0].serial_number = sn;
+    info.mapping[0].zero = 0;
+
+    nvdimm->isetcookie = fletcher64(&info, sizeof(info));
+}
+
+#define NSINDEX_SIGNATURE      "NAMESPACE_INDEX\0"
+
+enum {
+    NSINDEX_SIG_LEN = 16,
+    NSINDEX_ALIGN = 256,
+    NSINDEX_SEQ_MASK = 0x3,
+    NSINDEX_MAJOR = 0x1,
+    NSINDEX_MINOR = 0x1,
+
+    NSLABEL_UUID_LEN = 16,
+    NSLABEL_NAME_LEN = 64,
+    NSLABEL_FLAG_ROLABEL = 0x1,  /* read-only label */
+    NSLABEL_FLAG_LOCAL = 0x2,    /* DIMM-local namespace */
+    NSLABEL_FLAG_BTT = 0x4,      /* namespace contains a BTT */
+    NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
+};
+
+/*
+ * struct nd_namespace_index - label set superblock
+ * @sig: NAMESPACE_INDEX\0
+ * @flags: placeholder
+ * @seq: sequence number for this index
+ * @myoff: offset of this index in label area
+ * @mysize: size of this index struct
+ * @otheroff: offset of other index
+ * @labeloff: offset of first label slot
+ * @nslot: total number of label slots
+ * @major: label area major version
+ * @minor: label area minor version
+ * @checksum: fletcher64 of all fields
+ * @free[0]: bitmap, nlabel bits
+ *
+ * The size of free[] is rounded up so the total struct size is a
+ * multiple of NSINDEX_ALIGN bytes.  Any bits this allocates beyond
+ * nlabel bits must be zero.
+ */
+struct namespace_label_index_block {
+    uint8_t sig[NSINDEX_SIG_LEN];
+    uint32_t flags;
+    uint32_t seq;
+    uint64_t myoff;
+    uint64_t mysize;
+    uint64_t otheroff;
+    uint64_t labeloff;
+    uint32_t nlabel;
+    uint16_t major;
+    uint16_t minor;
+    uint64_t checksum;
+    uint8_t free[0];
+} QEMU_PACKED;
+
+/*
+ * struct nd_namespace_label - namespace superblock
+ * @uuid: UUID per RFC 4122
+ * @name: optional name (NULL-terminated)
+ * @flags: see NSLABEL_FLAG_*
+ * @nlabel: num labels to describe this ns
+ * @position: labels position in set
+ * @isetcookie: interleave set cookie
+ * @lbasize: LBA size in bytes or 0 for pmem
+ * @dpa: DPA of NVM range on this DIMM
+ * @rawsize: size of namespace
+ * @slot: slot of this label in label area
+ * @unused: must be zero
+ */
+struct namespace_label {
+    uint8_t uuid[NSLABEL_UUID_LEN];
+    uint8_t name[NSLABEL_NAME_LEN];
+    uint32_t flags;
+    uint16_t nlabel;
+    uint16_t position;
+    uint64_t isetcookie;
+    uint64_t lbasize;
+    uint64_t dpa;
+    uint64_t rawsize;
+    uint32_t slot;
+    uint32_t unused;
+} QEMU_PACKED;
+
+/*calculate the number of label can be contained in whole config space. */
+static int config_space_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size)
+{
+    /* totally we have 2 namespace label index block. */
+    if (block_size * 2 >= nvdimm->config_data_size) {
+        return 0;
+    }
+
+    return (nvdimm->config_data_size - block_size * 2) /
+            sizeof(struct namespace_label);
+}
+
+/*calculate the number of label can be contained in index block. */
+static int label_index_block_max_label_nr(size_t block_size)
+{
+    int free_size;
+
+    free_size = block_size - sizeof(struct namespace_label_index_block);
+
+    return free_size * BITS_PER_BYTE;
+}
+
+static int calculate_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size)
+{
+    return MIN(label_index_block_max_label_nr(block_size),
+        config_space_max_label_nr(nvdimm, block_size));
+}
+
+/*
+ * check if we can increase the size of namespace_label_index_block to
+ * contain more labels.
+ */
+static bool can_increase_index_block(PCNVDIMMDevice *nvdimm,
+                                     size_t block_size, int label_nr)
+{
+    size_t remaining;
+
+    remaining = nvdimm->config_data_size - block_size * 2 -
+                label_nr * sizeof(struct namespace_label);
+
+    assert((int64_t)remaining >= 0);
+
+    /* can contain 1 label at least. */
+    return remaining >=  NSINDEX_ALIGN * 2 + sizeof(struct namespace_label);
+}
+
+static void count_label_nr(PCNVDIMMDevice *nvdimm, size_t *label_block_size,
+                           int *label_nr)
+{
+    *label_block_size = 0;
+
+    do {
+        /*
+          * The minimum size of an index block is 256 bytes and the size must
+          * be a multiple of 256 bytes.
+          */
+        *label_block_size += NSINDEX_ALIGN;
+
+        *label_nr = calculate_max_label_nr(nvdimm, *label_block_size);
+    } while (can_increase_index_block(nvdimm, *label_block_size, *label_nr));
+}
+
+static void namespace_label_uuid(PCNVDIMMDevice *nvdimm, void *uuid)
+{
+    uuid_le label_uuid_init = UUID_LE(0x137e67a9, 0x7dcb, 0x4c66, 0xb2,
+                                      0xe6, 0x05, 0x06, 0x5b, 0xeb,
+                                      0x6a, 0x00);
+
+    assert(nvdimm->device_index <= 0xff);
+
+    label_uuid_init.b[0] += nvdimm->device_index;
+    memcpy(uuid, &label_uuid_init, sizeof(label_uuid_init));
+}
+
+static void init_namespace(PCNVDIMMDevice *nvdimm)
+{
+    struct namespace_label_index_block *index1, *index2;
+    struct namespace_label *label;
+    int i;
+
+    size_t label_block_size;
+    int label_nr;
+
+    assert(!nvdimm->configdata);
+
+    count_label_nr(nvdimm, &label_block_size, &label_nr);
+    nvdebug("nvdimm%d: label_block_size 0x%lx label_nr %d.\n",
+            nvdimm->device_index, label_block_size, label_nr);
+
+    index1 = nvdimm->config_data_addr;
+
+    /*
+     * init the first namespace label index block, except @otheroff
+     * and @checksum. we will do it later.
+     */
+    memcpy(index1->sig, NSINDEX_SIGNATURE, sizeof(NSINDEX_SIGNATURE));
+    index1->flags = cpu_to_le32(0);
+    index1->seq = cpu_to_le32(0x1);
+    index1->myoff = cpu_to_le64(0);
+    index1->mysize = cpu_to_le64(label_block_size);
+    index1->labeloff = cpu_to_le64(label_block_size * 2);
+    index1->nlabel = cpu_to_le32(label_nr);
+    index1->major = cpu_to_le16(NSINDEX_MAJOR);
+    index1->minor = cpu_to_le16(NSINDEX_MINOR);
+    index1->checksum = cpu_to_le64(0);
+    memset(index1->free, 0,
+           label_block_size - sizeof(struct namespace_label_index_block));
+
+    /*
+     * the label slot with the lowest offset in the label storage area is
+     * tracked by the least significant bit of the first byte of the free
+     * array.
+     *
+     * the fist label is used.
+     */
+    for (i = 1; i < index1->nlabel; i++) {
+        set_bit(i, (unsigned long *)index1->free);
+    }
+
+    /* init the second namespace label index block. */
+    index2 = (void *)index1 + label_block_size;
+    memcpy(index2, index1, label_block_size);
+    index2->seq = cpu_to_le32(0x2);
+    index2->myoff = cpu_to_le64(label_block_size);
+
+    /* init @otheroff and @checksume. */
+    index1->otheroff = cpu_to_le64(index2->myoff);
+    index2->otheroff = cpu_to_le64(index1->myoff);
+    index1->checksum = cpu_to_le64(fletcher64(index1, label_block_size));
+    index2->checksum = cpu_to_le64(fletcher64(index2, label_block_size));
+
+    /* only one label is used which is the first label and is readonly. */
+    label = nvdimm->config_data_addr + label_block_size * 2;
+    namespace_label_uuid(nvdimm, label->uuid);
+    sprintf((char *)label->name, "QEMU NS%d", nvdimm->device_index);
+    label->flags = cpu_to_le32(NSLABEL_FLAG_ROLABEL);
+    label->nlabel = cpu_to_le16(1);
+    label->position = cpu_to_le16(0);
+    label->isetcookie = cpu_to_le64(nvdimm->isetcookie);
+    label->lbasize = cpu_to_le64(0);
+    label->dpa = cpu_to_le64(object_property_get_int(OBJECT(&nvdimm->mr),
+                                                     "addr", NULL));
+    label->rawsize = cpu_to_le64(memory_region_size(&nvdimm->mr));
+    label->slot = cpu_to_le32(0);
+    label->unused = cpu_to_le32(0);
+
+    nvdebug("nvdimm%d, checksum1 0x%lx checksum2 0x%lx isetcookie 0x%lx.\n",
+            nvdimm->device_index, index1->checksum, index2->checksum,
+            label->isetcookie);
+}
+
+void build_nvdimm_configdata(GSList *device_list)
+{
+    for (; device_list; device_list = device_list->next) {
+        PCNVDIMMDevice *nvdimm = device_list->data;
+
+        if (nvdimm->config_data_addr) {
+            return;
+        }
+
+        nvdimm->config_data_addr = g_malloc(nvdimm->config_data_size);
+        init_namespace(nvdimm);
+    }
+}
diff --git a/include/hw/mem/pc-nvdimm.h b/include/hw/mem/pc-nvdimm.h
index b7faec3..8aa7086 100644
--- a/include/hw/mem/pc-nvdimm.h
+++ b/include/hw/mem/pc-nvdimm.h
@@ -28,6 +28,8 @@  typedef struct PCNVDIMMDevice {
     uint64_t config_data_size;
     void *config_data_addr;
 
+    uint64_t isetcookie;
+
     MemoryRegion mr;
 } PCNVDIMMDevice;