Message ID | 20230423165140.16833-8-Jonathan.Cameron@huawei.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | QEMU CXL Provide mock CXL events and irq support | expand |
On Sun, Apr 23, 2023 at 05:51:40PM +0100, Jonathan Cameron wrote: > These events include a copy of the device health information at the > time of the event. Actually using the emulated device health would > require a lot of controls to manipulate that state. Given the aim > of this injection code is to just test the flows when events occur, > inject the contents of the device health state as well. > > Future work may add more sophisticate device health emulation > including direct generation of these records when events occur > (such as a temperature threshold being crossed). That does not > reduce the usefulness of this more basic generation of the events. > > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > > -- This should be --- not --, otherwise git am does not cut at this point. > v5: > * Rebase > * Update Since entries for QMP. > --- > hw/mem/cxl_type3.c | 62 +++++++++++++++++++++++++++++++++++++ > hw/mem/cxl_type3_stubs.c | 12 +++++++ > include/hw/cxl/cxl_events.h | 19 ++++++++++++ > qapi/cxl.json | 35 +++++++++++++++++++++ > 4 files changed, 128 insertions(+) > > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c > index 71df31917c..5d1b3a5b9b 100644 > --- a/hw/mem/cxl_type3.c > +++ b/hw/mem/cxl_type3.c > @@ -1201,6 +1201,11 @@ static const QemuUUID dram_uuid = { > 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), > }; > > +static const QemuUUID memory_module_uuid = { > + .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, > + 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), > +}; > + > #define CXL_GMER_VALID_CHANNEL BIT(0) > #define CXL_GMER_VALID_RANK BIT(1) > #define CXL_GMER_VALID_DEVICE BIT(2) > @@ -1408,6 +1413,63 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, > return; > } > > +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, > + uint8_t flags, uint8_t type, > + uint8_t health_status, > + uint8_t media_status, > + uint8_t additional_status, > + uint8_t life_used, > + int16_t temperature, > + uint32_t dirty_shutdown_count, > + uint32_t corrected_volatile_error_count, > + uint32_t corrected_persistent_error_count, > + Error **errp) > +{ > + Object *obj = object_resolve_path(path, NULL); > + CXLEventMemoryModule module; > + CXLEventRecordHdr *hdr = &module.hdr; > + CXLDeviceState *cxlds; > + CXLType3Dev *ct3d; > + uint8_t enc_log; > + int rc; > + > + if (!obj) { > + error_setg(errp, "Unable to resolve path"); > + return; > + } > + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { > + error_setg(errp, "Path does not point to a CXL type 3 device"); > + return; > + } > + ct3d = CXL_TYPE3(obj); > + cxlds = &ct3d->cxl_dstate; > + > + rc = ct3d_qmp_cxl_event_log_enc(log); > + if (rc < 0) { > + error_setg(errp, "Unhandled error log type"); > + return; > + } > + enc_log = rc; > + > + memset(&module, 0, sizeof(module)); > + cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module), > + cxl_device_get_timestamp(&ct3d->cxl_dstate)); > + > + module.type = type; > + module.health_status = health_status; > + module.media_status = media_status; > + module.additional_status = additional_status; > + module.life_used = life_used; > + stw_le_p(&module.temperature, temperature); > + stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count); > + stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count); > + stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count); > + > + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) { > + cxl_event_irq_assert(ct3d); > + } > +} > + > static void ct3_class_init(ObjectClass *oc, void *data) > { > DeviceClass *dc = DEVICE_CLASS(oc); > diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c > index 235c171264..2196bd841c 100644 > --- a/hw/mem/cxl_type3_stubs.c > +++ b/hw/mem/cxl_type3_stubs.c > @@ -26,6 +26,18 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, > bool has_correction_mask, uint64List *correction_mask, > Error **errp) {} > > +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, > + uint8_t flags, uint8_t type, > + uint8_t health_status, > + uint8_t media_status, > + uint8_t additional_status, > + uint8_t life_used, > + int16_t temperature, > + uint32_t dirty_shutdown_count, > + uint32_t corrected_volatile_error_count, > + uint32_t corrected_persistent_error_count, > + Error **errp) {} > + > void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, > Error **errp) > { > diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h > index a39e30d973..089ba2091f 100644 > --- a/include/hw/cxl/cxl_events.h > +++ b/include/hw/cxl/cxl_events.h > @@ -146,4 +146,23 @@ typedef struct CXLEventDram { > uint8_t reserved[0x17]; > } QEMU_PACKED CXLEventDram; > > +/* > + * Memory Module Event Record > + * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45 > + * All fields little endian. > + */ > +typedef struct CXLEventMemoryModule { > + CXLEventRecordHdr hdr; > + uint8_t type; > + uint8_t health_status; > + uint8_t media_status; > + uint8_t additional_status; > + uint8_t life_used; > + int16_t temperature; > + uint32_t dirty_shutdown_count; > + uint32_t corrected_volatile_error_count; > + uint32_t corrected_persistent_error_count; > + uint8_t reserved[0x3d]; > +} QEMU_PACKED CXLEventMemoryModule; > + > #endif /* CXL_EVENTS_H */ > diff --git a/qapi/cxl.json b/qapi/cxl.json > index 190db58385..aae70667c2 100644 > --- a/qapi/cxl.json > +++ b/qapi/cxl.json > @@ -90,6 +90,41 @@ > '*column': 'uint16', '*correction-mask': [ 'uint64' ] > }} > > +## > +# @cxl-inject-memory-module-event: > +# > +# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3) > +# This event includes a copy of the Device Health info at the time of > +# the event. > +# > +# @path: CXL type 3 device canonical QOM path > +# @log: Event Log to add the event to > +# @flags: header flags > +# @type: Device Event Type (see spec for permitted values) > +# @health-status: Overall health summary bitmap (see spec for permitted bits) > +# @media-status: Overall media health summary (see spec for permitted values) > +# @additional-status: Complex field (see spec for meaning) > +# @life-used: Percentage (0-100) of factory expected life span > +# @temperature: Device temperature in degrees Celsius > +# @dirty-shutdown-count: Counter incremented whenever device is unable > +# to determine if data loss may have occurred. > +# @corrected-volatile-error-count: Total number of correctable errors in > +# volatile memory > +# @corrected-persistent-error-count: Total number correctable errors in > +# persistent memory > +# > +# Since: 8.1 > +## > +{ 'command': 'cxl-inject-memory-module-event', > + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', > + 'type': 'uint8', 'health-status': 'uint8', > + 'media-status': 'uint8', 'additional-status': 'uint8', > + 'life-used': 'uint8', 'temperature' : 'int16', > + 'dirty-shutdown-count': 'uint32', > + 'corrected-volatile-error-count': 'uint32', > + 'corrected-persistent-error-count': 'uint32' > + }} > + > ## > # @cxl-inject-poison: > # > -- > 2.37.2
Jonathan Cameron <Jonathan.Cameron@huawei.com> writes: > These events include a copy of the device health information at the > time of the event. Actually using the emulated device health would > require a lot of controls to manipulate that state. Given the aim > of this injection code is to just test the flows when events occur, > inject the contents of the device health state as well. > > Future work may add more sophisticate device health emulation > including direct generation of these records when events occur > (such as a temperature threshold being crossed). That does not > reduce the usefulness of this more basic generation of the events. > > Reviewed-by: Ira Weiny <ira.weiny@intel.com> > Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> [...] > diff --git a/qapi/cxl.json b/qapi/cxl.json > index 190db58385..aae70667c2 100644 > --- a/qapi/cxl.json > +++ b/qapi/cxl.json > @@ -90,6 +90,41 @@ > '*column': 'uint16', '*correction-mask': [ 'uint64' ] > }} > > +## > +# @cxl-inject-memory-module-event: > +# > +# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3) End the sentence with a period, please. > +# This event includes a copy of the Device Health info at the time of > +# the event. > +# > +# @path: CXL type 3 device canonical QOM path > +# @log: Event Log to add the event to > +# @flags: header flags > +# @type: Device Event Type (see spec for permitted values) > +# @health-status: Overall health summary bitmap (see spec for permitted bits) > +# @media-status: Overall media health summary (see spec for permitted values) > +# @additional-status: Complex field (see spec for meaning) "spec" is not a word. Yes, typing out references gets tedious, but your readers will appreciate tediously unambiguous references. > +# @life-used: Percentage (0-100) of factory expected life span > +# @temperature: Device temperature in degrees Celsius > +# @dirty-shutdown-count: Counter incremented whenever device is unable Suggest something like "number of times the device has been unable to determine whether data loss ..." > +# to determine if data loss may have occurred. > +# @corrected-volatile-error-count: Total number of correctable errors in > +# volatile memory > +# @corrected-persistent-error-count: Total number correctable errors in > +# persistent memory > +# > +# Since: 8.1 > +## Please format like ## # @cxl-inject-memory-module-event: # # Inject an event record for a Memory Module Event (CXL r3.0 # 8.2.9.2.1.3). This event includes a copy of the Device Health info # at the time of the event. # # @path: CXL type 3 device canonical QOM path # # @log: Event Log to add the event to # # @flags: header flags # # @type: Device Event Type (see spec for permitted values) # # @health-status: Overall health summary bitmap (see spec for # permitted bits) # # @media-status: Overall media health summary (see spec for permitted # values) # # @additional-status: Complex field (see spec for meaning) # # @life-used: Percentage (0-100) of factory expected life span # # @temperature: Device temperature in degrees Celsius # # @dirty-shutdown-count: Counter incremented whenever device is unable # to determine if data loss may have occurred. # # @corrected-volatile-error-count: Total number of correctable errors # in volatile memory # # @corrected-persistent-error-count: Total number correctable errors # in persistent memory # # Since: 8.1 ## to blend in with recent commit a937b6aa739 (qapi: Reformat doc comments to conform to current conventions). > +{ 'command': 'cxl-inject-memory-module-event', > + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', > + 'type': 'uint8', 'health-status': 'uint8', > + 'media-status': 'uint8', 'additional-status': 'uint8', > + 'life-used': 'uint8', 'temperature' : 'int16', > + 'dirty-shutdown-count': 'uint32', > + 'corrected-volatile-error-count': 'uint32', > + 'corrected-persistent-error-count': 'uint32' > + }} > + > ## > # @cxl-inject-poison: > #
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 71df31917c..5d1b3a5b9b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -1201,6 +1201,11 @@ static const QemuUUID dram_uuid = { 0x4e, 0x9b, 0xfb, 0x5c, 0x96, 0x24), }; +static const QemuUUID memory_module_uuid = { + .data = UUID(0xfe927475, 0xdd59, 0x4339, 0xa5, 0x86, + 0x79, 0xba, 0xb1, 0x13, 0xb7, 0x74), +}; + #define CXL_GMER_VALID_CHANNEL BIT(0) #define CXL_GMER_VALID_RANK BIT(1) #define CXL_GMER_VALID_DEVICE BIT(2) @@ -1408,6 +1413,63 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, return; } +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) +{ + Object *obj = object_resolve_path(path, NULL); + CXLEventMemoryModule module; + CXLEventRecordHdr *hdr = &module.hdr; + CXLDeviceState *cxlds; + CXLType3Dev *ct3d; + uint8_t enc_log; + int rc; + + if (!obj) { + error_setg(errp, "Unable to resolve path"); + return; + } + if (!object_dynamic_cast(obj, TYPE_CXL_TYPE3)) { + error_setg(errp, "Path does not point to a CXL type 3 device"); + return; + } + ct3d = CXL_TYPE3(obj); + cxlds = &ct3d->cxl_dstate; + + rc = ct3d_qmp_cxl_event_log_enc(log); + if (rc < 0) { + error_setg(errp, "Unhandled error log type"); + return; + } + enc_log = rc; + + memset(&module, 0, sizeof(module)); + cxl_assign_event_header(hdr, &memory_module_uuid, flags, sizeof(module), + cxl_device_get_timestamp(&ct3d->cxl_dstate)); + + module.type = type; + module.health_status = health_status; + module.media_status = media_status; + module.additional_status = additional_status; + module.life_used = life_used; + stw_le_p(&module.temperature, temperature); + stl_le_p(&module.dirty_shutdown_count, dirty_shutdown_count); + stl_le_p(&module.corrected_volatile_error_count, corrected_volatile_error_count); + stl_le_p(&module.corrected_persistent_error_count, corrected_persistent_error_count); + + if (cxl_event_insert(cxlds, enc_log, (CXLEventRecordRaw *)&module)) { + cxl_event_irq_assert(ct3d); + } +} + static void ct3_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); diff --git a/hw/mem/cxl_type3_stubs.c b/hw/mem/cxl_type3_stubs.c index 235c171264..2196bd841c 100644 --- a/hw/mem/cxl_type3_stubs.c +++ b/hw/mem/cxl_type3_stubs.c @@ -26,6 +26,18 @@ void qmp_cxl_inject_dram_event(const char *path, CxlEventLog log, uint8_t flags, bool has_correction_mask, uint64List *correction_mask, Error **errp) {} +void qmp_cxl_inject_memory_module_event(const char *path, CxlEventLog log, + uint8_t flags, uint8_t type, + uint8_t health_status, + uint8_t media_status, + uint8_t additional_status, + uint8_t life_used, + int16_t temperature, + uint32_t dirty_shutdown_count, + uint32_t corrected_volatile_error_count, + uint32_t corrected_persistent_error_count, + Error **errp) {} + void qmp_cxl_inject_poison(const char *path, uint64_t start, uint64_t length, Error **errp) { diff --git a/include/hw/cxl/cxl_events.h b/include/hw/cxl/cxl_events.h index a39e30d973..089ba2091f 100644 --- a/include/hw/cxl/cxl_events.h +++ b/include/hw/cxl/cxl_events.h @@ -146,4 +146,23 @@ typedef struct CXLEventDram { uint8_t reserved[0x17]; } QEMU_PACKED CXLEventDram; +/* + * Memory Module Event Record + * CXL Rev 3.0 Section 8.2.9.2.1.3: Table 8-45 + * All fields little endian. + */ +typedef struct CXLEventMemoryModule { + CXLEventRecordHdr hdr; + uint8_t type; + uint8_t health_status; + uint8_t media_status; + uint8_t additional_status; + uint8_t life_used; + int16_t temperature; + uint32_t dirty_shutdown_count; + uint32_t corrected_volatile_error_count; + uint32_t corrected_persistent_error_count; + uint8_t reserved[0x3d]; +} QEMU_PACKED CXLEventMemoryModule; + #endif /* CXL_EVENTS_H */ diff --git a/qapi/cxl.json b/qapi/cxl.json index 190db58385..aae70667c2 100644 --- a/qapi/cxl.json +++ b/qapi/cxl.json @@ -90,6 +90,41 @@ '*column': 'uint16', '*correction-mask': [ 'uint64' ] }} +## +# @cxl-inject-memory-module-event: +# +# Inject an event record for a Memory Module Event (CXL r3.0 8.2.9.2.1.3) +# This event includes a copy of the Device Health info at the time of +# the event. +# +# @path: CXL type 3 device canonical QOM path +# @log: Event Log to add the event to +# @flags: header flags +# @type: Device Event Type (see spec for permitted values) +# @health-status: Overall health summary bitmap (see spec for permitted bits) +# @media-status: Overall media health summary (see spec for permitted values) +# @additional-status: Complex field (see spec for meaning) +# @life-used: Percentage (0-100) of factory expected life span +# @temperature: Device temperature in degrees Celsius +# @dirty-shutdown-count: Counter incremented whenever device is unable +# to determine if data loss may have occurred. +# @corrected-volatile-error-count: Total number of correctable errors in +# volatile memory +# @corrected-persistent-error-count: Total number correctable errors in +# persistent memory +# +# Since: 8.1 +## +{ 'command': 'cxl-inject-memory-module-event', + 'data': { 'path': 'str', 'log': 'CxlEventLog', 'flags' : 'uint8', + 'type': 'uint8', 'health-status': 'uint8', + 'media-status': 'uint8', 'additional-status': 'uint8', + 'life-used': 'uint8', 'temperature' : 'int16', + 'dirty-shutdown-count': 'uint32', + 'corrected-volatile-error-count': 'uint32', + 'corrected-persistent-error-count': 'uint32' + }} + ## # @cxl-inject-poison: #