diff mbox series

[5/8] nvme: add support for metadata

Message ID 20190517084234.26923-6-klaus@birkelund.eu (mailing list archive)
State Superseded, archived
Headers show
Series nvme: v1.3, sgls, metadata and new 'ocssd' device | expand

Commit Message

Klaus Jensen May 17, 2019, 8:42 a.m. UTC
The new `ms` parameter may be used to indicate the number of metadata
bytes provided per LBA.

Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
---
 hw/block/nvme.c | 31 +++++++++++++++++++++++++++++--
 hw/block/nvme.h | 11 ++++++++++-
 2 files changed, 39 insertions(+), 3 deletions(-)

Comments

Klaus Jensen May 22, 2019, 6:12 a.m. UTC | #1
On Fri, May 17, 2019 at 10:42:31AM +0200, Klaus Birkelund Jensen wrote:
> The new `ms` parameter may be used to indicate the number of metadata
> bytes provided per LBA.
> 
> Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
> ---
>  hw/block/nvme.c | 31 +++++++++++++++++++++++++++++--
>  hw/block/nvme.h | 11 ++++++++++-
>  2 files changed, 39 insertions(+), 3 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index c514f93f3867..675967a596d1 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -33,6 +33,8 @@
>   *   num_ns=<int>          : Namespaces to make out of the backing storage,
>   *                           Default:1
>   *   num_queues=<int>      : Number of possible IO Queues, Default:64
> + *   ms=<int>              : Number of metadata bytes provided per LBA,
> + *                           Default:0
>   *   cmb_size_mb=<int>     : Size of CMB in MBs, Default:0
>   *
>   * Parameters will be verified against conflicting capabilities and attributes
> @@ -386,6 +388,8 @@ static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>  
>      uint32_t unit_len = nvme_ns_lbads_bytes(ns);
>      uint32_t len = req->nlb * unit_len;
> +    uint32_t meta_unit_len = nvme_ns_ms(ns);
> +    uint32_t meta_len = req->nlb * meta_unit_len;
>      uint64_t prp1 = le64_to_cpu(cmd->prp1);
>      uint64_t prp2 = le64_to_cpu(cmd->prp2);
>  
> @@ -399,6 +403,19 @@ static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
>          return err;
>      }
>  
> +    qsg.nsg = 0;
> +    qsg.size = 0;
> +
> +    if (cmd->mptr && n->params.ms) {
> +        qemu_sglist_add(&qsg, le64_to_cpu(cmd->mptr), meta_len);
> +
> +        err = nvme_blk_setup(n, ns, &qsg, ns->blk_offset_md, meta_unit_len,
> +            req);
> +        if (err) {
> +            return err;
> +        }
> +    }
> +
>      qemu_sglist_destroy(&qsg);
>  
>      return NVME_SUCCESS;
> @@ -1902,6 +1919,11 @@ static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
>          return 1;
>      }
>  
> +    if (params->ms && !is_power_of_2(params->ms)) {
> +        error_setg(errp, "nvme: invalid metadata configuration");
> +        return 1;
> +    }
> +
>      return 0;
>  }
>  
> @@ -2066,17 +2088,20 @@ static void nvme_init_ctrl(NvmeCtrl *n)
>  
>  static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns)
>  {
> -    return n->ns_size / nvme_ns_lbads_bytes(ns);
> +    return n->ns_size / (nvme_ns_lbads_bytes(ns) + nvme_ns_ms(ns));
>  }
>  
>  static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns)
>  {
> +    NvmeParams *params = &n->params;
> +
>      id_ns->nlbaf = 0;
>      id_ns->flbas = 0;
> -    id_ns->mc = 0;
> +    id_ns->mc = params->ms ? 0x2 : 0;
>      id_ns->dpc = 0;
>      id_ns->dps = 0;
>      id_ns->lbaf[0].lbads = BDRV_SECTOR_BITS;
> +    id_ns->lbaf[0].ms = params->ms;
>  }
>  
>  static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
> @@ -2086,6 +2111,8 @@ static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
>      nvme_ns_init_identify(n, id_ns);
>  
>      ns->ns_blks = nvme_ns_calc_blks(n, ns);
> +    ns->blk_offset_md = ns->blk_offset + nvme_ns_lbads_bytes(ns) * ns->ns_blks;
> +
>      id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns->ns_blks);
>  
>      return 0;
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index 711ca249eac5..81ee0c5173d5 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -8,13 +8,15 @@
>      DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
>      DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
>      DEFINE_PROP_UINT32("num_ns", _state, _props.num_ns, 1), \
> -    DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7)
> +    DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7), \
> +    DEFINE_PROP_UINT8("ms", _state, _props.ms, 0)
>  
>  typedef struct NvmeParams {
>      char     *serial;
>      uint32_t num_queues;
>      uint32_t num_ns;
>      uint8_t  mdts;
> +    uint8_t  ms;
>      uint32_t cmb_size_mb;
>  } NvmeParams;
>  
> @@ -91,6 +93,7 @@ typedef struct NvmeNamespace {
>      uint32_t        id;
>      uint64_t        ns_blks;
>      uint64_t        blk_offset;
> +    uint64_t        blk_offset_md;
>  } NvmeNamespace;
>  
>  #define TYPE_NVME "nvme"
> @@ -154,4 +157,10 @@ static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
>      return 1 << nvme_ns_lbads(ns);
>  }
>  
> +static inline uint16_t nvme_ns_ms(NvmeNamespace *ns)
> +{
> +    NvmeIdNs *id = &ns->id_ns;
> +    return le16_to_cpu(id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].ms);
> +}
> +
>  #endif /* HW_NVME_H */
> -- 
> 2.21.0
> 

Hi Kevin,

I feel this patch that has nothing to do with ocssd also touches upon
the core issue we are discussing in the ocssd thread.

As I understand it, QEMU does not support metadata pr LBA in the block
layer. Thus, the approach in this patch is also flawed because it breaks
the assumptions of other devices?

The approach suggested in this patch is to split the image into a large
data part and a trailing part for storing metadata bytes per data
sector.

  [                      data                        ][ meta ]

How about I drop this patch and the ocssd patch from this series and
just move forward with the other changes to the nvme device (i.e. v1.3
and sgls) for now and we can continue the discussion on how to get
metadata and ocssd support merged in another thread?
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index c514f93f3867..675967a596d1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -33,6 +33,8 @@ 
  *   num_ns=<int>          : Namespaces to make out of the backing storage,
  *                           Default:1
  *   num_queues=<int>      : Number of possible IO Queues, Default:64
+ *   ms=<int>              : Number of metadata bytes provided per LBA,
+ *                           Default:0
  *   cmb_size_mb=<int>     : Size of CMB in MBs, Default:0
  *
  * Parameters will be verified against conflicting capabilities and attributes
@@ -386,6 +388,8 @@  static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
 
     uint32_t unit_len = nvme_ns_lbads_bytes(ns);
     uint32_t len = req->nlb * unit_len;
+    uint32_t meta_unit_len = nvme_ns_ms(ns);
+    uint32_t meta_len = req->nlb * meta_unit_len;
     uint64_t prp1 = le64_to_cpu(cmd->prp1);
     uint64_t prp2 = le64_to_cpu(cmd->prp2);
 
@@ -399,6 +403,19 @@  static uint16_t nvme_blk_map(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
         return err;
     }
 
+    qsg.nsg = 0;
+    qsg.size = 0;
+
+    if (cmd->mptr && n->params.ms) {
+        qemu_sglist_add(&qsg, le64_to_cpu(cmd->mptr), meta_len);
+
+        err = nvme_blk_setup(n, ns, &qsg, ns->blk_offset_md, meta_unit_len,
+            req);
+        if (err) {
+            return err;
+        }
+    }
+
     qemu_sglist_destroy(&qsg);
 
     return NVME_SUCCESS;
@@ -1902,6 +1919,11 @@  static int nvme_check_constraints(NvmeCtrl *n, Error **errp)
         return 1;
     }
 
+    if (params->ms && !is_power_of_2(params->ms)) {
+        error_setg(errp, "nvme: invalid metadata configuration");
+        return 1;
+    }
+
     return 0;
 }
 
@@ -2066,17 +2088,20 @@  static void nvme_init_ctrl(NvmeCtrl *n)
 
 static uint64_t nvme_ns_calc_blks(NvmeCtrl *n, NvmeNamespace *ns)
 {
-    return n->ns_size / nvme_ns_lbads_bytes(ns);
+    return n->ns_size / (nvme_ns_lbads_bytes(ns) + nvme_ns_ms(ns));
 }
 
 static void nvme_ns_init_identify(NvmeCtrl *n, NvmeIdNs *id_ns)
 {
+    NvmeParams *params = &n->params;
+
     id_ns->nlbaf = 0;
     id_ns->flbas = 0;
-    id_ns->mc = 0;
+    id_ns->mc = params->ms ? 0x2 : 0;
     id_ns->dpc = 0;
     id_ns->dps = 0;
     id_ns->lbaf[0].lbads = BDRV_SECTOR_BITS;
+    id_ns->lbaf[0].ms = params->ms;
 }
 
 static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
@@ -2086,6 +2111,8 @@  static int nvme_init_namespace(NvmeCtrl *n, NvmeNamespace *ns, Error **errp)
     nvme_ns_init_identify(n, id_ns);
 
     ns->ns_blks = nvme_ns_calc_blks(n, ns);
+    ns->blk_offset_md = ns->blk_offset + nvme_ns_lbads_bytes(ns) * ns->ns_blks;
+
     id_ns->nuse = id_ns->ncap = id_ns->nsze = cpu_to_le64(ns->ns_blks);
 
     return 0;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 711ca249eac5..81ee0c5173d5 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -8,13 +8,15 @@ 
     DEFINE_PROP_UINT32("cmb_size_mb", _state, _props.cmb_size_mb, 0), \
     DEFINE_PROP_UINT32("num_queues", _state, _props.num_queues, 64), \
     DEFINE_PROP_UINT32("num_ns", _state, _props.num_ns, 1), \
-    DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7)
+    DEFINE_PROP_UINT8("mdts", _state, _props.mdts, 7), \
+    DEFINE_PROP_UINT8("ms", _state, _props.ms, 0)
 
 typedef struct NvmeParams {
     char     *serial;
     uint32_t num_queues;
     uint32_t num_ns;
     uint8_t  mdts;
+    uint8_t  ms;
     uint32_t cmb_size_mb;
 } NvmeParams;
 
@@ -91,6 +93,7 @@  typedef struct NvmeNamespace {
     uint32_t        id;
     uint64_t        ns_blks;
     uint64_t        blk_offset;
+    uint64_t        blk_offset_md;
 } NvmeNamespace;
 
 #define TYPE_NVME "nvme"
@@ -154,4 +157,10 @@  static inline size_t nvme_ns_lbads_bytes(NvmeNamespace *ns)
     return 1 << nvme_ns_lbads(ns);
 }
 
+static inline uint16_t nvme_ns_ms(NvmeNamespace *ns)
+{
+    NvmeIdNs *id = &ns->id_ns;
+    return le16_to_cpu(id->lbaf[NVME_ID_NS_FLBAS_INDEX(id->flbas)].ms);
+}
+
 #endif /* HW_NVME_H */