diff mbox series

[v1,1/1] hw/nvme: Add OCP SMART / Health Information Extended Log Page

Message ID Zz-yyOolT5iWH7E7@snoc-pinewood (mailing list archive)
State New, archived
Headers show
Series [v1,1/1] hw/nvme: Add OCP SMART / Health Information Extended Log Page | expand

Commit Message

Stephen Bates Nov. 21, 2024, 10:23 p.m. UTC
The Open Compute Project [1] includes a Datacenter NVMe
SSD Specification [2]. The most recent version of this specification
(as of November 2024) is 2.6.1. This specification layers on top of
the NVM Express specifications [3] to provide additional
functionality. A key part of of this is the 512 Byte OCP SMART / Health
Information Extended log page that is defined in Section 4.8.6 of the
specification.

We add a controller argument (ocp) that toggles on/off the SMART log
extended structure.  To accommodate different vendor specific specifications
like OCP, we add a multiplexing function (nvme_vendor_specific_log) which
will route to the different log functions based on arguments and log ids.
We only return the OCP extended SMART log when the command is 0xC0 and ocp
has been turned on in the nvme argumants.

Though we add the whole nvme SMART log extended structure, we only populate
the physical_media_units_{read,written}, log_page_version and
log_page_uuid.

This patch is based on work done by Joel but has been modified enough
that he requested a co-developed-by tag rather than a signed-off-by.

[1]: https://www.opencompute.org/
[2]: https://www.opencompute.org/documents/datacenter-nvme-ssd-specification-v2-6-1-pdf
[3]: https://nvmexpress.org/specifications/

Signed-off-by: Stephen Bates <sbates@raithlin.com>
Co-developed-by: Joel Granados <j.granados@samsung.com>
---
 docs/system/devices/nvme.rst |  7 +++++
 hw/nvme/ctrl.c               | 59 ++++++++++++++++++++++++++++++++++++
 hw/nvme/nvme.h               |  1 +
 include/block/nvme.h         | 36 ++++++++++++++++++++++
 4 files changed, 103 insertions(+)

Comments

Klaus Jensen Nov. 27, 2024, 8:16 a.m. UTC | #1
Hi Stephen,

On Nov 21 15:23, Stephen Bates wrote:
> The Open Compute Project [1] includes a Datacenter NVMe
> SSD Specification [2]. The most recent version of this specification
> (as of November 2024) is 2.6.1. This specification layers on top of
> the NVM Express specifications [3] to provide additional
> functionality. A key part of of this is the 512 Byte OCP SMART / Health
> Information Extended log page that is defined in Section 4.8.6 of the
> specification.
> 
> We add a controller argument (ocp) that toggles on/off the SMART log
> extended structure.  To accommodate different vendor specific specifications
> like OCP, we add a multiplexing function (nvme_vendor_specific_log) which
> will route to the different log functions based on arguments and log ids.
> We only return the OCP extended SMART log when the command is 0xC0 and ocp
> has been turned on in the nvme argumants.
> 
> Though we add the whole nvme SMART log extended structure, we only populate
> the physical_media_units_{read,written}, log_page_version and
> log_page_uuid.
> 
> This patch is based on work done by Joel but has been modified enough
> that he requested a co-developed-by tag rather than a signed-off-by.
> 
> [1]: https://www.opencompute.org/
> [2]: https://www.opencompute.org/documents/datacenter-nvme-ssd-specification-v2-6-1-pdf
> [3]: https://nvmexpress.org/specifications/
> 
> Signed-off-by: Stephen Bates <sbates@raithlin.com>
> Co-developed-by: Joel Granados <j.granados@samsung.com>

LGTM, but see a few comments below.

> diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
> index d2b1ca9645..6509b35fcb 100644
> --- a/docs/system/devices/nvme.rst
> +++ b/docs/system/devices/nvme.rst
> @@ -53,6 +53,13 @@ parameters.
>    Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
>    previously used.
>  
> +``ocp`` (default: ``off``)
> +  The Open Compute Project defines the Datacenter NVMe SSD Specification that
> +  sits on top of NVMe. It describes additional commands and NVMe behaviors
> +  specific for the Datacenter. When this option is ``on`` OCP features such as
> +  the SMART / Health information extended log become available in the
> +  controller. We emulate version 5 of this log page.
> +

Do you think we would need to differentiate between version of the
specification?

> @@ -5113,6 +5152,23 @@ static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
>      return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
>  }
>  
> +static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
> +                                         uint32_t buf_len, uint64_t off,
> +                                         NvmeRequest *req, uint8_t lid)
> +{
> +    switch (lid) {
> +    case 0xc0:

Let's do an enum for these log pages.

>  typedef struct NvmeCtrl {
> diff --git a/include/block/nvme.h b/include/block/nvme.h
> index 5298bc4a28..df8e45e396 100644
> --- a/include/block/nvme.h
> +++ b/include/block/nvme.h
> @@ -1015,6 +1015,40 @@ typedef struct QEMU_PACKED NvmeSmartLog {
>      uint8_t     reserved2[320];
>  } NvmeSmartLog;
>  
> +typedef struct QEMU_PACKED NvmeSmartLogExtended {
> +    uint64_t    physical_media_units_written[2];
> +    uint64_t    physical_media_units_read[2];
> +    uint64_t    bad_user_blocks;
> +    uint64_t    bad_system_nand_blocks;
> +    uint64_t    xor_recovery_count;
> +    uint64_t    uncorrectable_read_error_count;
> +    uint64_t    soft_ecc_error_count;
> +    uint64_t    end2end_correction_counts;
> +    uint8_t     system_data_percent_used;
> +    uint8_t     refresh_counts[7];
> +    uint64_t    user_data_erase_counts;
> +    uint16_t    thermal_throttling_stat_and_count;
> +    uint16_t    dssd_spec_version[3];
> +    uint64_t    pcie_correctable_error_count;
> +    uint32_t    incomplete_shutdowns;
> +    uint32_t    rsvd116;
> +    uint8_t     percent_free_blocks;
> +    uint8_t     rsvd121[7];
> +    uint16_t    capacity_health;
> +    uint8_t     nvme_errata_ver;
> +    uint8_t     rsvd131[5];
> +    uint64_t    unaligned_io;
> +    uint64_t    security_ver_num;
> +    uint64_t    total_nuse;
> +    uint64_t    plp_start_count[2];
> +    uint64_t    endurance_estimate[2];
> +    uint64_t    pcie_retraining_count;
> +    uint64_t    power_state_change_count;
> +    uint8_t     rsvd208[286];
> +    uint16_t    log_page_version;
> +    uint64_t    log_page_guid[2];
> +} NvmeSmartLogExtended;

Please add a QEMU_BUILD_BUG_ON to check the size.
diff mbox series

Patch

diff --git a/docs/system/devices/nvme.rst b/docs/system/devices/nvme.rst
index d2b1ca9645..6509b35fcb 100644
--- a/docs/system/devices/nvme.rst
+++ b/docs/system/devices/nvme.rst
@@ -53,6 +53,13 @@  parameters.
   Vendor ID. Set this to ``on`` to revert to the unallocated Intel ID
   previously used.
 
+``ocp`` (default: ``off``)
+  The Open Compute Project defines the Datacenter NVMe SSD Specification that
+  sits on top of NVMe. It describes additional commands and NVMe behaviors
+  specific for the Datacenter. When this option is ``on`` OCP features such as
+  the SMART / Health information extended log become available in the
+  controller. We emulate version 5 of this log page.
+
 Additional Namespaces
 ---------------------
 
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 9f277b81d8..42a7bbf945 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4884,6 +4884,45 @@  static void nvme_set_blk_stats(NvmeNamespace *ns, struct nvme_stats *stats)
     stats->write_commands += s->nr_ops[BLOCK_ACCT_WRITE];
 }
 
+static uint16_t nvme_ocp_extended_smart_info(NvmeCtrl *n, uint8_t rae,
+                                             uint32_t buf_len, uint64_t off,
+                                             NvmeRequest *req)
+{
+    NvmeNamespace *ns = NULL;
+    NvmeSmartLogExtended smart_l = { 0 };
+    struct nvme_stats stats = { 0 };
+    uint32_t trans_len;
+
+    if (off >= sizeof(smart_l)) {
+        return NVME_INVALID_FIELD | NVME_DNR;
+    }
+
+    /* accumulate all stats from all namespaces */
+    for (int i = 1; i <= NVME_MAX_NAMESPACES; i++) {
+        ns = nvme_ns(n, i);
+        if (ns) {
+            nvme_set_blk_stats(ns, &stats);
+        }
+    }
+
+    smart_l.physical_media_units_written[0] = cpu_to_le64(stats.units_written);
+    smart_l.physical_media_units_read[0] = cpu_to_le64(stats.units_read);
+    smart_l.log_page_version = 0x0005;
+
+    static const uint8_t guid[16] = {
+        0xC5, 0xAF, 0x10, 0x28, 0xEA, 0xBF, 0xF2, 0xA4,
+        0x9C, 0x4F, 0x6F, 0x7C, 0xC9, 0x14, 0xD5, 0xAF
+    };
+    memcpy(smart_l.log_page_guid, guid, sizeof(smart_l.log_page_guid));
+
+    if (!rae) {
+        nvme_clear_events(n, NVME_AER_TYPE_SMART);
+    }
+
+    trans_len = MIN(sizeof(smart_l) - off, buf_len);
+    return nvme_c2h(n, (uint8_t *) &smart_l + off, trans_len, req);
+}
+
 static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
                                 uint64_t off, NvmeRequest *req)
 {
@@ -5113,6 +5152,23 @@  static uint16_t nvme_cmd_effects(NvmeCtrl *n, uint8_t csi, uint32_t buf_len,
     return nvme_c2h(n, ((uint8_t *)&log) + off, trans_len, req);
 }
 
+static uint16_t nvme_vendor_specific_log(NvmeCtrl *n, uint8_t rae,
+                                         uint32_t buf_len, uint64_t off,
+                                         NvmeRequest *req, uint8_t lid)
+{
+    switch (lid) {
+    case 0xc0:
+        if (n->params.ocp) {
+            return nvme_ocp_extended_smart_info(n, rae, buf_len, off, req);
+        }
+        break;
+        /* add a case for each additional vendor specific log id */
+    }
+
+    trace_pci_nvme_err_invalid_log_page(nvme_cid(req), lid);
+    return NVME_INVALID_FIELD | NVME_DNR;
+}
+
 static size_t sizeof_fdp_conf_descr(size_t nruh, size_t vss)
 {
     size_t entry_siz = sizeof(NvmeFdpDescrHdr) + nruh * sizeof(NvmeRuhDescr)
@@ -5363,6 +5419,8 @@  static uint16_t nvme_get_log(NvmeCtrl *n, NvmeRequest *req)
         return nvme_smart_info(n, rae, len, off, req);
     case NVME_LOG_FW_SLOT_INFO:
         return nvme_fw_log_info(n, len, off, req);
+    case NVME_LOG_VENDOR_START...NVME_LOG_VENDOR_END:
+        return nvme_vendor_specific_log(n, rae, len, off, req, lid);
     case NVME_LOG_CHANGED_NSLIST:
         return nvme_changed_nslist(n, rae, len, off, req);
     case NVME_LOG_CMD_EFFECTS:
@@ -8734,6 +8792,7 @@  static Property nvme_props[] = {
                      false),
     DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
     DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
+    DEFINE_PROP_BOOL("ocp", NvmeCtrl, params.ocp, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 781985754d..099f40f3e9 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -538,6 +538,7 @@  typedef struct NvmeParams {
     uint32_t  sriov_max_vq_per_vf;
     uint32_t  sriov_max_vi_per_vf;
     bool     msix_exclusive_bar;
+    bool     ocp;
 } NvmeParams;
 
 typedef struct NvmeCtrl {
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 5298bc4a28..df8e45e396 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1015,6 +1015,40 @@  typedef struct QEMU_PACKED NvmeSmartLog {
     uint8_t     reserved2[320];
 } NvmeSmartLog;
 
+typedef struct QEMU_PACKED NvmeSmartLogExtended {
+    uint64_t    physical_media_units_written[2];
+    uint64_t    physical_media_units_read[2];
+    uint64_t    bad_user_blocks;
+    uint64_t    bad_system_nand_blocks;
+    uint64_t    xor_recovery_count;
+    uint64_t    uncorrectable_read_error_count;
+    uint64_t    soft_ecc_error_count;
+    uint64_t    end2end_correction_counts;
+    uint8_t     system_data_percent_used;
+    uint8_t     refresh_counts[7];
+    uint64_t    user_data_erase_counts;
+    uint16_t    thermal_throttling_stat_and_count;
+    uint16_t    dssd_spec_version[3];
+    uint64_t    pcie_correctable_error_count;
+    uint32_t    incomplete_shutdowns;
+    uint32_t    rsvd116;
+    uint8_t     percent_free_blocks;
+    uint8_t     rsvd121[7];
+    uint16_t    capacity_health;
+    uint8_t     nvme_errata_ver;
+    uint8_t     rsvd131[5];
+    uint64_t    unaligned_io;
+    uint64_t    security_ver_num;
+    uint64_t    total_nuse;
+    uint64_t    plp_start_count[2];
+    uint64_t    endurance_estimate[2];
+    uint64_t    pcie_retraining_count;
+    uint64_t    power_state_change_count;
+    uint8_t     rsvd208[286];
+    uint16_t    log_page_version;
+    uint64_t    log_page_guid[2];
+} NvmeSmartLogExtended;
+
 #define NVME_SMART_WARN_MAX     6
 enum NvmeSmartWarn {
     NVME_SMART_SPARE                  = 1 << 0,
@@ -1052,6 +1086,8 @@  enum NvmeLogIdentifier {
     NVME_LOG_FDP_RUH_USAGE              = 0x21,
     NVME_LOG_FDP_STATS                  = 0x22,
     NVME_LOG_FDP_EVENTS                 = 0x23,
+    NVME_LOG_VENDOR_START               = 0xc0,
+    NVME_LOG_VENDOR_END                 = 0xff,
 };
 
 typedef struct QEMU_PACKED NvmePSD {