diff mbox series

[v2,1/1] hw/block/nvme: add smart_critical_warning property

Message ID 20210112074924.217862-2-pizhenwei@bytedance.com (mailing list archive)
State New, archived
Headers show
Series add smart_critical_warning property for nvme | expand

Commit Message

zhenwei pi Jan. 12, 2021, 7:49 a.m. UTC
There is a very low probability that hitting physical NVMe disk
hardware critical warning case, it's hard to write & test a monitor
agent service.

For debugging purposes, add a new 'smart_critical_warning' property
to emulate this situation.

The orignal version of this change is implemented by adding a fixed
property which could be initialized by QEMU command line. Suggested
by Philippe & Klaus, rework like current version.

Test with this patch:
1, change smart_critical_warning property for a running VM:
 #virsh qemu-monitor-command nvme-upstream '{ "execute": "qom-set",
  "arguments": { "path": "/machine/peripheral-anon/device[0]",
  "property": "smart_critical_warning", "value":16 } }'
2, run smartctl in guest
 #smartctl -H -l error /dev/nvme0n1

  === START OF SMART DATA SECTION ===
  SMART overall-health self-assessment test result: FAILED!
  - volatile memory backup device has failed

Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
---
 hw/block/nvme.c | 28 ++++++++++++++++++++++++++++
 hw/block/nvme.h |  1 +
 2 files changed, 29 insertions(+)

Comments

Klaus Jensen Jan. 12, 2021, 10:39 a.m. UTC | #1
On Jan 12 15:49, zhenwei pi wrote:
> There is a very low probability that hitting physical NVMe disk
> hardware critical warning case, it's hard to write & test a monitor
> agent service.
> 
> For debugging purposes, add a new 'smart_critical_warning' property
> to emulate this situation.
> 
> The orignal version of this change is implemented by adding a fixed
> property which could be initialized by QEMU command line. Suggested
> by Philippe & Klaus, rework like current version.
> 
> Test with this patch:
> 1, change smart_critical_warning property for a running VM:
>  #virsh qemu-monitor-command nvme-upstream '{ "execute": "qom-set",
>   "arguments": { "path": "/machine/peripheral-anon/device[0]",
>   "property": "smart_critical_warning", "value":16 } }'
> 2, run smartctl in guest
>  #smartctl -H -l error /dev/nvme0n1
> 
>   === START OF SMART DATA SECTION ===
>   SMART overall-health self-assessment test result: FAILED!
>   - volatile memory backup device has failed
> 
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>

I think we also need to check the asynchronous event configuration and
issue an AEN if required like we do when the temperature threshold
changes.

Philippe, what are the locking semantics here? This runs under the big
lock?

> ---
>  hw/block/nvme.c | 28 ++++++++++++++++++++++++++++
>  hw/block/nvme.h |  1 +
>  2 files changed, 29 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 27d2c72716..a98757b6a1 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1214,6 +1214,7 @@ static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
>      }
>  
>      trans_len = MIN(sizeof(smart) - off, buf_len);
> +    smart.critical_warning = n->smart_critical_warning;
>  
>      smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
>                                                          1000));
> @@ -2827,6 +2828,29 @@ static Property nvme_props[] = {
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> +
> +static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
> +                                   void *opaque, Error **errp)
> +{
> +    NvmeCtrl *s = NVME(obj);
> +    uint8_t value = s->smart_critical_warning;
> +
> +    visit_type_uint8(v, name, &value, errp);
> +}
> +
> +static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
> +                                   void *opaque, Error **errp)
> +{
> +    NvmeCtrl *s = NVME(obj);
> +    uint8_t value;
> +
> +    if (!visit_type_uint8(v, name, &value, errp)) {
> +        return;
> +    }
> +
> +    s->smart_critical_warning = value;
> +}
> +
>  static const VMStateDescription nvme_vmstate = {
>      .name = "nvme",
>      .unmigratable = 1,
> @@ -2857,6 +2881,10 @@ static void nvme_instance_init(Object *obj)
>                                        "bootindex", "/namespace@1,0",
>                                        DEVICE(obj));
>      }
> +
> +    object_property_add(obj, "smart_critical_warning", "uint8",
> +                        nvme_get_smart_warning,
> +                        nvme_set_smart_warning, NULL, NULL);
>  }
>  
>  static const TypeInfo nvme_info = {
> diff --git a/hw/block/nvme.h b/hw/block/nvme.h
> index e080a2318a..64e3497244 100644
> --- a/hw/block/nvme.h
> +++ b/hw/block/nvme.h
> @@ -139,6 +139,7 @@ typedef struct NvmeCtrl {
>      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>      uint64_t    starttime_ms;
>      uint16_t    temperature;
> +    uint8_t     smart_critical_warning;
>  
>      HostMemoryBackend *pmrdev;
>  
> -- 
> 2.25.1
> 
>
diff mbox series

Patch

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 27d2c72716..a98757b6a1 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1214,6 +1214,7 @@  static uint16_t nvme_smart_info(NvmeCtrl *n, uint8_t rae, uint32_t buf_len,
     }
 
     trans_len = MIN(sizeof(smart) - off, buf_len);
+    smart.critical_warning = n->smart_critical_warning;
 
     smart.data_units_read[0] = cpu_to_le64(DIV_ROUND_UP(stats.units_read,
                                                         1000));
@@ -2827,6 +2828,29 @@  static Property nvme_props[] = {
     DEFINE_PROP_END_OF_LIST(),
 };
 
+
+static void nvme_get_smart_warning(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NvmeCtrl *s = NVME(obj);
+    uint8_t value = s->smart_critical_warning;
+
+    visit_type_uint8(v, name, &value, errp);
+}
+
+static void nvme_set_smart_warning(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    NvmeCtrl *s = NVME(obj);
+    uint8_t value;
+
+    if (!visit_type_uint8(v, name, &value, errp)) {
+        return;
+    }
+
+    s->smart_critical_warning = value;
+}
+
 static const VMStateDescription nvme_vmstate = {
     .name = "nvme",
     .unmigratable = 1,
@@ -2857,6 +2881,10 @@  static void nvme_instance_init(Object *obj)
                                       "bootindex", "/namespace@1,0",
                                       DEVICE(obj));
     }
+
+    object_property_add(obj, "smart_critical_warning", "uint8",
+                        nvme_get_smart_warning,
+                        nvme_set_smart_warning, NULL, NULL);
 }
 
 static const TypeInfo nvme_info = {
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index e080a2318a..64e3497244 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -139,6 +139,7 @@  typedef struct NvmeCtrl {
     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
     uint64_t    starttime_ms;
     uint16_t    temperature;
+    uint8_t     smart_critical_warning;
 
     HostMemoryBackend *pmrdev;