diff mbox series

[v5,7/7] hw/cxl/events: Add injection of Memory Module Events

Message ID 20230423165140.16833-8-Jonathan.Cameron@huawei.com (mailing list archive)
State New, archived
Headers show
Series QEMU CXL Provide mock CXL events and irq support | expand

Commit Message

Jonathan Cameron April 23, 2023, 4:51 p.m. UTC
These events include a copy of the device health information at the
time of the event. Actually using the emulated device health would
require a lot of controls to manipulate that state.  Given the aim
of this injection code is to just test the flows when events occur,
inject the contents of the device health state as well.

Future work may add more sophisticate device health emulation
including direct generation of these records when events occur
(such as a temperature threshold being crossed).  That does not
reduce the usefulness of this more basic generation of the events.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

--
v5:
* Rebase
* Update Since entries for QMP.
---
 hw/mem/cxl_type3.c          | 62 +++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3_stubs.c    | 12 +++++++
 include/hw/cxl/cxl_events.h | 19 ++++++++++++
 qapi/cxl.json               | 35 +++++++++++++++++++++
 4 files changed, 128 insertions(+)

Comments

Michael S. Tsirkin May 19, 2023, 7:06 a.m. UTC | #1
On Sun, Apr 23, 2023 at 05:51:40PM +0100, Jonathan Cameron wrote:
> These events include a copy of the device health information at the
> time of the event. Actually using the emulated device health would
> require a lot of controls to manipulate that state.  Given the aim
> of this injection code is to just test the flows when events occur,
> inject the contents of the device health state as well.
> 
> Future work may add more sophisticate device health emulation
> including direct generation of these records when events occur
> (such as a temperature threshold being crossed).  That does not
> reduce the usefulness of this more basic generation of the events.
> 
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> --

This should be --- not --, otherwise git am does not cut at this point.


> v5:
> * Rebase
> * Update Since entries for QMP.
> ---
>  hw/mem/cxl_type3.c          | 62 +++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3_stubs.c    | 12 +++++++
>  include/hw/cxl/cxl_events.h | 19 ++++++++++++
>  qapi/cxl.json               | 35 +++++++++++++++++++++
>  4 files changed, 128 insertions(+)
> 
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 71df31917c..5d1b3a5b9b 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -1201,6 +1201,11 @@ static const QemuUUID dram_uuid = {
>                   0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
>  };
>  
> +static const QemuUUID memory_module_uuid = {
> +    .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
> +                 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
> +};
> +
>  #define CXL_GMER_VALID_CHANNEL                          BIT(0)
>  #define CXL_GMER_VALID_RANK                             BIT(1)
>  #define CXL_GMER_VALID_DEVICE                           BIT(2)
> @@ -1408,6 +1413,63 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
>      return;
>  }
>  
> +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
> +                                        uint8_t flags, uint8_t type,
> +                                        uint8_t health_status,
> +                                        uint8_t media_status,
> +                                        uint8_t additional_status,
> +                                        uint8_t life_used,
> +                                        int16_t temperature,
> +                                        uint32_t dirty_shutdown_count,
> +                                        uint32_t corrected_volatile_error_count,
> +                                        uint32_t corrected_persistent_error_count,
> +                                        Error **errp)
> +{
> +    Object *obj = object_resolve_path(path, NULL);
> +    CXLEventMemoryModule module;
> +    CXLEventRecordHdr *hdr = &module.hdr;
> +    CXLDeviceState *cxlds;
> +    CXLType3Dev *ct3d;
> +    uint8_t enc_log;
> +    int rc;
> +
> +    if (!obj) {
> +        error_setg(errp, "Unable to resolve path");
> +        return;
> +    }
> +    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
> +        error_setg(errp, "Path does not point to a CXL type 3 device");
> +        return;
> +    }
> +    ct3d = CXL_TYPE3(obj);
> +    cxlds = &ct3d->cxl_dstate;
> +
> +    rc = ct3d_qmp_cxl_event_log_enc(log);
> +    if (rc < 0) {
> +        error_setg(errp, "Unhandled error log type");
> +        return;
> +    }
> +    enc_log = rc;
> +
> +    memset(&module, 0, sizeof(module));
> +    cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
> +                            cxl_device_get_timestamp(&ct3d->cxl_dstate));
> +
> +    module.type = type;
> +    module.health_status = health_status;
> +    module.media_status = media_status;
> +    module.additional_status = additional_status;
> +    module.life_used = life_used;
> +    stw_le_p(&module.temperature, temperature);
> +    stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
> +    stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count);
> +    stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count);
> +
> +    if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
> +        cxl_event_irq_assert(ct3d);
> +    }
> +}
> +
>  static void ct3_class_init(ObjectClass *oc, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(oc);
> diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
> index 235c171264..2196bd841c 100644
> --- a/hw/mem/cxl_type3_stubs.c
> +++ b/hw/mem/cxl_type3_stubs.c
> @@ -26,6 +26,18 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
>                                 bool has_correction_mask, uint64List *correction_mask,
>                                 Error **errp) {}
>  
> +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
> +                                        uint8_t flags, uint8_t type,
> +                                        uint8_t health_status,
> +                                        uint8_t media_status,
> +                                        uint8_t additional_status,
> +                                        uint8_t life_used,
> +                                        int16_t temperature,
> +                                        uint32_t dirty_shutdown_count,
> +                                        uint32_t corrected_volatile_error_count,
> +                                        uint32_t corrected_persistent_error_count,
> +                                        Error **errp) {}
> +
>  void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
>                             Error **errp)
>  {
> diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
> index a39e30d973..089ba2091f 100644
> --- a/include/hw/cxl/cxl_events.h
> +++ b/include/hw/cxl/cxl_events.h
> @@ -146,4 +146,23 @@ typedef struct CXLEventDram {
>      uint8_t reserved[0x17];
>  } QEMU_PACKED CXLEventDram;
>  
> +/*
> + * Memory Module Event Record
> + * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45
> + * All fields little endian.
> + */
> +typedef struct CXLEventMemoryModule {
> +    CXLEventRecordHdr hdr;
> +    uint8_t type;
> +    uint8_t health_status;
> +    uint8_t media_status;
> +    uint8_t additional_status;
> +    uint8_t life_used;
> +    int16_t temperature;
> +    uint32_t dirty_shutdown_count;
> +    uint32_t corrected_volatile_error_count;
> +    uint32_t corrected_persistent_error_count;
> +    uint8_t reserved[0x3d];
> +} QEMU_PACKED CXLEventMemoryModule;
> +
>  #endif /* CXL_EVENTS_H */
> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 190db58385..aae70667c2 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -90,6 +90,41 @@
>              '*column': 'uint16', '*correction-mask': [ 'uint64' ]
>             }}
>  
> +##
> +# @cxl-inject-memory-module-event:
> +#
> +# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3)
> +# This event includes a copy of the Device Health info at the time of
> +# the event.
> +#
> +# @path: CXL type 3 device canonical QOM path
> +# @log: Event Log to add the event to
> +# @flags: header flags
> +# @type: Device Event Type (see spec for permitted values)
> +# @health-status: Overall health summary bitmap (see spec for permitted bits)
> +# @media-status: Overall media health summary (see spec for permitted values)
> +# @additional-status: Complex field (see spec for meaning)
> +# @life-used: Percentage (0-100) of factory expected life span
> +# @temperature: Device temperature in degrees Celsius
> +# @dirty-shutdown-count: Counter incremented whenever device is unable
> +#                        to determine if data loss may have occurred.
> +# @corrected-volatile-error-count: Total number of correctable errors in
> +#                                  volatile memory
> +# @corrected-persistent-error-count: Total number correctable errors in
> +#                                    persistent memory
> +#
> +# Since: 8.1
> +##
> +{ 'command': 'cxl-inject-memory-module-event',
> +  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
> +            'type': 'uint8', 'health-status': 'uint8',
> +            'media-status': 'uint8', 'additional-status': 'uint8',
> +            'life-used': 'uint8', 'temperature' : 'int16',
> +            'dirty-shutdown-count': 'uint32',
> +            'corrected-volatile-error-count': 'uint32',
> +            'corrected-persistent-error-count': 'uint32'
> +            }}
> +
>  ##
>  # @cxl-inject-poison:
>  #
> -- 
> 2.37.2
Markus Armbruster May 22, 2023, 7:47 a.m. UTC | #2
Jonathan Cameron <Jonathan.Cameron@huawei.com> writes:

> These events include a copy of the device health information at the
> time of the event. Actually using the emulated device health would
> require a lot of controls to manipulate that state.  Given the aim
> of this injection code is to just test the flows when events occur,
> inject the contents of the device health state as well.
>
> Future work may add more sophisticate device health emulation
> including direct generation of these records when events occur
> (such as a temperature threshold being crossed).  That does not
> reduce the usefulness of this more basic generation of the events.
>
> Reviewed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

[...]

> diff --git a/qapi/cxl.json b/qapi/cxl.json
> index 190db58385..aae70667c2 100644
> --- a/qapi/cxl.json
> +++ b/qapi/cxl.json
> @@ -90,6 +90,41 @@
>              '*column': 'uint16', '*correction-mask': [ 'uint64' ]
>             }}
>  
> +##
> +# @cxl-inject-memory-module-event:
> +#
> +# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3)

End the sentence with a period, please.

> +# This event includes a copy of the Device Health info at the time of
> +# the event.
> +#
> +# @path: CXL type 3 device canonical QOM path
> +# @log: Event Log to add the event to
> +# @flags: header flags
> +# @type: Device Event Type (see spec for permitted values)
> +# @health-status: Overall health summary bitmap (see spec for permitted bits)
> +# @media-status: Overall media health summary (see spec for permitted values)
> +# @additional-status: Complex field (see spec for meaning)

"spec" is not a word.  Yes, typing out references gets tedious, but your
readers will appreciate tediously unambiguous references.

> +# @life-used: Percentage (0-100) of factory expected life span
> +# @temperature: Device temperature in degrees Celsius
> +# @dirty-shutdown-count: Counter incremented whenever device is unable

Suggest something like "number of times the device has been unable to
determine whether data loss ..."

> +#                        to determine if data loss may have occurred.
> +# @corrected-volatile-error-count: Total number of correctable errors in
> +#                                  volatile memory
> +# @corrected-persistent-error-count: Total number correctable errors in
> +#                                    persistent memory
> +#
> +# Since: 8.1
> +##


Please format like

   ##
   # @cxl-inject-memory-module-event:
   #
   # Inject an event record for a Memory Module Event (CXL r3.0
   # 8.2.9.2.1.3).  This event includes a copy of the Device Health info
   # at the time of the event.
   #
   # @path: CXL type 3 device canonical QOM path
   #
   # @log: Event Log to add the event to
   #
   # @flags: header flags
   #
   # @type: Device Event Type (see spec for permitted values)
   #
   # @health-status: Overall health summary bitmap (see spec for
   #     permitted bits)
   #
   # @media-status: Overall media health summary (see spec for permitted
   #     values)
   #
   # @additional-status: Complex field (see spec for meaning)
   #
   # @life-used: Percentage (0-100) of factory expected life span
   #
   # @temperature: Device temperature in degrees Celsius
   #
   # @dirty-shutdown-count: Counter incremented whenever device is unable
   #     to determine if data loss may have occurred.
   #
   # @corrected-volatile-error-count: Total number of correctable errors
   #     in volatile memory
   #
   # @corrected-persistent-error-count: Total number correctable errors
   #     in persistent memory
   #
   # Since: 8.1
   ##

to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments
to conform to current conventions).

> +{ 'command': 'cxl-inject-memory-module-event',
> +  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
> +            'type': 'uint8', 'health-status': 'uint8',
> +            'media-status': 'uint8', 'additional-status': 'uint8',
> +            'life-used': 'uint8', 'temperature' : 'int16',
> +            'dirty-shutdown-count': 'uint32',
> +            'corrected-volatile-error-count': 'uint32',
> +            'corrected-persistent-error-count': 'uint32'
> +            }}
> +
>  ##
>  # @cxl-inject-poison:
>  #
diff mbox series

Patch

diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 71df31917c..5d1b3a5b9b 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -1201,6 +1201,11 @@  static const QemuUUID dram_uuid = {
                  0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24),
 };
 
+static const QemuUUID memory_module_uuid = {
+    .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86,
+                 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74),
+};
+
 #define CXL_GMER_VALID_CHANNEL                          BIT(0)
 #define CXL_GMER_VALID_RANK                             BIT(1)
 #define CXL_GMER_VALID_DEVICE                           BIT(2)
@@ -1408,6 +1413,63 @@  void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
     return;
 }
 
+void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
+                                        uint8_t flags, uint8_t type,
+                                        uint8_t health_status,
+                                        uint8_t media_status,
+                                        uint8_t additional_status,
+                                        uint8_t life_used,
+                                        int16_t temperature,
+                                        uint32_t dirty_shutdown_count,
+                                        uint32_t corrected_volatile_error_count,
+                                        uint32_t corrected_persistent_error_count,
+                                        Error **errp)
+{
+    Object *obj = object_resolve_path(path, NULL);
+    CXLEventMemoryModule module;
+    CXLEventRecordHdr *hdr = &module.hdr;
+    CXLDeviceState *cxlds;
+    CXLType3Dev *ct3d;
+    uint8_t enc_log;
+    int rc;
+
+    if (!obj) {
+        error_setg(errp, "Unable to resolve path");
+        return;
+    }
+    if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) {
+        error_setg(errp, "Path does not point to a CXL type 3 device");
+        return;
+    }
+    ct3d = CXL_TYPE3(obj);
+    cxlds = &ct3d->cxl_dstate;
+
+    rc = ct3d_qmp_cxl_event_log_enc(log);
+    if (rc < 0) {
+        error_setg(errp, "Unhandled error log type");
+        return;
+    }
+    enc_log = rc;
+
+    memset(&module, 0, sizeof(module));
+    cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module),
+                            cxl_device_get_timestamp(&ct3d->cxl_dstate));
+
+    module.type = type;
+    module.health_status = health_status;
+    module.media_status = media_status;
+    module.additional_status = additional_status;
+    module.life_used = life_used;
+    stw_le_p(&module.temperature, temperature);
+    stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count);
+    stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count);
+    stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count);
+
+    if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) {
+        cxl_event_irq_assert(ct3d);
+    }
+}
+
 static void ct3_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c
index 235c171264..2196bd841c 100644
--- a/hw/mem/cxl_type3_stubs.c
+++ b/hw/mem/cxl_type3_stubs.c
@@ -26,6 +26,18 @@  void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags,
                                bool has_correction_mask, uint64List *correction_mask,
                                Error **errp) {}
 
+void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log,
+                                        uint8_t flags, uint8_t type,
+                                        uint8_t health_status,
+                                        uint8_t media_status,
+                                        uint8_t additional_status,
+                                        uint8_t life_used,
+                                        int16_t temperature,
+                                        uint32_t dirty_shutdown_count,
+                                        uint32_t corrected_volatile_error_count,
+                                        uint32_t corrected_persistent_error_count,
+                                        Error **errp) {}
+
 void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length,
                            Error **errp)
 {
diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h
index a39e30d973..089ba2091f 100644
--- a/include/hw/cxl/cxl_events.h
+++ b/include/hw/cxl/cxl_events.h
@@ -146,4 +146,23 @@  typedef struct CXLEventDram {
     uint8_t reserved[0x17];
 } QEMU_PACKED CXLEventDram;
 
+/*
+ * Memory Module Event Record
+ * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45
+ * All fields little endian.
+ */
+typedef struct CXLEventMemoryModule {
+    CXLEventRecordHdr hdr;
+    uint8_t type;
+    uint8_t health_status;
+    uint8_t media_status;
+    uint8_t additional_status;
+    uint8_t life_used;
+    int16_t temperature;
+    uint32_t dirty_shutdown_count;
+    uint32_t corrected_volatile_error_count;
+    uint32_t corrected_persistent_error_count;
+    uint8_t reserved[0x3d];
+} QEMU_PACKED CXLEventMemoryModule;
+
 #endif /* CXL_EVENTS_H */
diff --git a/qapi/cxl.json b/qapi/cxl.json
index 190db58385..aae70667c2 100644
--- a/qapi/cxl.json
+++ b/qapi/cxl.json
@@ -90,6 +90,41 @@ 
             '*column': 'uint16', '*correction-mask': [ 'uint64' ]
            }}
 
+##
+# @cxl-inject-memory-module-event:
+#
+# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3)
+# This event includes a copy of the Device Health info at the time of
+# the event.
+#
+# @path: CXL type 3 device canonical QOM path
+# @log: Event Log to add the event to
+# @flags: header flags
+# @type: Device Event Type (see spec for permitted values)
+# @health-status: Overall health summary bitmap (see spec for permitted bits)
+# @media-status: Overall media health summary (see spec for permitted values)
+# @additional-status: Complex field (see spec for meaning)
+# @life-used: Percentage (0-100) of factory expected life span
+# @temperature: Device temperature in degrees Celsius
+# @dirty-shutdown-count: Counter incremented whenever device is unable
+#                        to determine if data loss may have occurred.
+# @corrected-volatile-error-count: Total number of correctable errors in
+#                                  volatile memory
+# @corrected-persistent-error-count: Total number correctable errors in
+#                                    persistent memory
+#
+# Since: 8.1
+##
+{ 'command': 'cxl-inject-memory-module-event',
+  'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8',
+            'type': 'uint8', 'health-status': 'uint8',
+            'media-status': 'uint8', 'additional-status': 'uint8',
+            'life-used': 'uint8', 'temperature' : 'int16',
+            'dirty-shutdown-count': 'uint32',
+            'corrected-volatile-error-count': 'uint32',
+            'corrected-persistent-error-count': 'uint32'
+            }}
+
 ##
 # @cxl-inject-poison:
 #