diff mbox series

[v2] CXL CCI Get/Set Alert Configuration commands implmented as per CXL Specification 3.2 section 8.2.10.9.3

Message ID 20250214132211.528019-1-s5.kumari@samsung.com (mailing list archive)
State New
Headers show
Series [v2] CXL CCI Get/Set Alert Configuration commands implmented as per CXL Specification 3.2 section 8.2.10.9.3 | expand

Commit Message

Sweta Kumari Feb. 14, 2025, 1:22 p.m. UTC
1)get alert configuration(Opcode 4201h)
2)set alert configuration(Opcode 4202h)

This v2 patch addresses the feedback from the v1 patch and include some minor new changes.

Changes in V2:
- Removed cover letter as it's a single patch
- Added latest spec reference
- Fixed alignment issues
- Updated shorter variable names to be more descriptive
- Replaced field-by-field initialization in 'init_alert_config' with structured initialization for improved readability.
- Replaced bit fields with 'uint8_t' and added defines for individual bits.

The patch is generated against the Johnathan's tree
https://gitlab.com/jic23/qemu.git and branch cxl-2024-11-27.

Signed-off-by: Sweta Kumari <s5.kumari@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 116 ++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3.c          |  16 +++++
 include/hw/cxl/cxl_device.h |  15 +++++
 3 files changed, 147 insertions(+)

Comments

Jonathan Cameron Feb. 14, 2025, 5:43 p.m. UTC | #1
On Fri, 14 Feb 2025 18:52:11 +0530
Sweta Kumari <s5.kumari@samsung.com> wrote:

> 1)get alert configuration(Opcode 4201h)
> 2)set alert configuration(Opcode 4202h)

Move the change log to below the ---
The key thing being git then doesn't pick it up whilst applying the patch.
Whilst changed logs are very useful during the review process we don't
typically want to keep them in the git history for ever!

Otherwise, main comment here is shorten more names.

Jonathan

> 
> This v2 patch addresses the feedback from the v1 patch and include some minor new changes.
> 
> Changes in V2:
> - Removed cover letter as it's a single patch
> - Added latest spec reference
> - Fixed alignment issues
> - Updated shorter variable names to be more descriptive
> - Replaced field-by-field initialization in 'init_alert_config' with structured initialization for improved readability.
> - Replaced bit fields with 'uint8_t' and added defines for individual bits.
> 
> The patch is generated against the Johnathan's tree
> https://gitlab.com/jic23/qemu.git and branch cxl-2024-11-27.
> 
> Signed-off-by: Sweta Kumari <s5.kumari@samsung.com>
> ---
>  hw/cxl/cxl-mailbox-utils.c  | 116 ++++++++++++++++++++++++++++++++++++
>  hw/mem/cxl_type3.c          |  16 +++++
>  include/hw/cxl/cxl_device.h |  15 +++++
>  3 files changed, 147 insertions(+)
> 
> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
> index 9c7ea5bc35..105c63fdec 100644
> --- a/hw/cxl/cxl-mailbox-utils.c
> +++ b/hw/cxl/cxl-mailbox-utils.c
> @@ -28,6 +28,11 @@
>  #define CXL_DC_EVENT_LOG_SIZE 8
>  #define CXL_NUM_EXTENTS_SUPPORTED 512
>  #define CXL_NUM_TAGS_SUPPORTED 0
> +#define CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD (1 << 0)
> +#define CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD (1 << 1)
> +#define CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD (1 << 2)
> +#define CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD (1 << 3)
> +#define CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD (1 << 4)
Let's shorten these as they are very ugly to use when a line long!

#define CXL_ALERTS_OVER_TEMP_WARN_THRESH
etc. Similar to comments below.

>  
>  /*
>   * How to add a new command, example. The command set FOO, with cmd BAR.
> @@ -86,6 +91,9 @@ enum {
>          #define GET_PARTITION_INFO     0x0
>          #define GET_LSA       0x2
>          #define SET_LSA       0x3
> +    HEALTH_INFO_ALERTS = 0x42,
> +        #define GET_ALERT_CONFIGURATION 0x1
> +        #define SET_ALERT_CONFIGURATION 0x2
CONFIG maybe enough?

>      SANITIZE    = 0x44,
>          #define OVERWRITE     0x0
>          #define SECURE_ERASE  0x1
> @@ -1625,6 +1633,110 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd,
>      return CXL_MBOX_SUCCESS;
>  }
>  
> +/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */
> +static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd,
> +                                       uint8_t *payload_in,
> +                                       size_t len_in,
> +                                       uint8_t *payload_out,
> +                                       size_t *len_out,
> +                                       CXLCCI *cci)
> +{
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    CXLAlertConfig *out = (void *)payload_out;

In this case we can cast to the right type (can't do that if we don't
name that type which happens in a lot these structures). So prefer
    CXLAlertConfig *out = (CXLAlertConfig *)payload_out;
 

> +
> +    memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config));
> +    *len_out = sizeof(ct3d->alert_config);
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +
> +/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */
> +static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd,
> +                                       uint8_t *payload_in,
> +                                       size_t len_in,
> +                                       uint8_t *payload_out,
> +                                       size_t *len_out,
> +                                       CXLCCI *cci)
> +{
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    CXLAlertConfig *alert_config = &ct3d->alert_config;
> +    struct {
> +        uint8_t valid_alert_actions;
> +        uint8_t enable_alert_actions;
> +        uint8_t life_used_warning_threshold;
> +        uint8_t rsvd;
> +        uint16_t device_over_temperature_warning_threshold;
> +        uint16_t device_under_temperature_warning_threshold;
> +        uint16_t Corrected_volatile_memory_error_warning_threshold;
> +        uint16_t Corrected_persistent_memory_error_warning_threshold;

Shorten these as well. Similar to suggestions below.  They are
just too long to make for nice code!

> +    } QEMU_PACKED *in = (void *)payload_in;
> +
> +    if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD) {
> +        /*
> +         * CXL 3.2 Table 8-149 The life used warning threshold shall be
> +         * less than the life used critical alert value.
> +         */
> +        if (in->life_used_warning_threshold >=
> +            alert_config->life_used_critical_alert_threshold) {
> +            return CXL_MBOX_INVALID_INPUT;
> +        }
> +        alert_config->life_used_warning_threshold =
> +            in->life_used_warning_threshold;
> +        alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD;
> +    }
> +
> +    if (in->valid_alert_actions &
> +        CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD) {
> +        /*
> +         * CXL 3.2 Table 8-149 The Device Over-Temperature Warning Threshold
> +         * shall be less than the the Device Over-Temperature Critical
> +         * Alert Threshold.
> +         */
> +        if (in->device_over_temperature_warning_threshold >=
> +            alert_config->device_over_temperature_critical_alert_threshold) {
> +            return CXL_MBOX_INVALID_INPUT;
> +        }
> +        alert_config->device_over_temperature_warning_threshold =
> +            in->device_over_temperature_warning_threshold;
> +        alert_config->enable_alerts |=
> +            CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD;
> +    }
> +
> +    if (in->valid_alert_actions &
> +        CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD) {
> +        /*
> +         * CXL 3.2 Table 8-149 The Device Under-Temperature Warning Threshold
> +         * shall be higher than the the Device Under-Temperature Critical
> +         * Alert Threshold.
> +         */
> +        if (in->device_under_temperature_warning_threshold <=
> +            alert_config->device_under_temperature_critical_alert_threshold) {
> +            return CXL_MBOX_INVALID_INPUT;
> +        }
> +        alert_config->device_under_temperature_warning_threshold =
> +            in->device_under_temperature_warning_threshold;
> +        alert_config->enable_alerts |=
> +            CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD;
> +    }
> +
> +    if (in->valid_alert_actions &
> +        CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD) {
> +        alert_config->Corrected_volatile_memory_error_warning_threshold =
> +            in->Corrected_volatile_memory_error_warning_threshold;
> +        alert_config->enable_alerts |=
> +            CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD;
> +    }
> +
> +    if (in->valid_alert_actions &
> +        CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD) {
> +        alert_config->Corrected_persistent_memory_error_warning_threshold =
> +            in->Corrected_persistent_memory_error_warning_threshold;
> +        alert_config->enable_alerts |=
> +            CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD;
> +    }
> +    return CXL_MBOX_SUCCESS;
> +}
> +
>  /* Perform the actual device zeroing */
>  static void __do_sanitization(CXLType3Dev *ct3d)
>  {
> @@ -2859,6 +2971,10 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
>      [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
>      [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
>          ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
> +    [HEALTH_INFO_ALERTS][GET_ALERT_CONFIGURATION] = {"GET_ALERT_CONFIGURATION",

Space after { to match local style.

> +        cmd_get_alert_config, 0, 0 },
> +    [HEALTH_INFO_ALERTS][SET_ALERT_CONFIGURATION] = {"SET_ALERT_CONFIGURATION",
> +        cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
>      [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
>          (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
>           CXL_MBOX_SECURITY_STATE_CHANGE |

> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
> index a64739be25..1da23bf553 100644
> --- a/include/hw/cxl/cxl_device.h
> +++ b/include/hw/cxl/cxl_device.h
> @@ -581,6 +581,19 @@ typedef struct CXLSetFeatureInfo {
>      size_t data_size;
>  } CXLSetFeatureInfo;
>  
> +typedef struct CXLAlertConfig {
> +    uint8_t valid_alerts;
> +    uint8_t enable_alerts;
> +    uint8_t life_used_critical_alert_threshold;
> +    uint8_t life_used_warning_threshold;
> +    uint16_t device_over_temperature_critical_alert_threshold;
I think we can shorten these at least a bit without lost of meaning!
It's on a device so can drop that entirely. Perhaps

    uint8_t life_used_crit_alert_thresh;
    uint8_t life_used_warn_thresh;
    uint16_t over_temp_crit_alert_thresh;
    uint16_t under_temp_crit_alert_thresh;
    uint16_t over_temp_warn_thresh;
    uint16_t under_temp_warn_thresh;
    uint16_t cor_volatile_mem_err_warn_thresh;
    uint16_t cor_persistent_mem_err_warn_thresh;

> +    uint16_t device_under_temperature_critical_alert_threshold;
> +    uint16_t device_over_temperature_warning_threshold;
> +    uint16_t device_under_temperature_warning_threshold;
> +    uint16_t Corrected_volatile_memory_error_warning_threshold;
Capital in just this one doesn't make much sense.
> +    uint16_t Corrected_persistent_memory_error_warning_threshold;
> +} QEMU_PACKED CXLAlertConfig;
Sweta Kumari Feb. 17, 2025, 9:37 a.m. UTC | #2
On 14/02/25 05:43PM, Jonathan Cameron wrote:
>On Fri, 14 Feb 2025 18:52:11 +0530
>Sweta Kumari <s5.kumari@samsung.com> wrote:
>
>> 1)get alert configuration(Opcode 4201h)
>> 2)set alert configuration(Opcode 4202h)
>
>Move the change log to below the ---
>The key thing being git then doesn't pick it up whilst applying the patch.
>Whilst changed logs are very useful during the review process we don't
>typically want to keep them in the git history for ever!
>
>Otherwise, main comment here is shorten more names.
>
>Jonathan
>
Thank you for the feedback, will make the changes accordingly in the v3
patch.
>>
>> This v2 patch addresses the feedback from the v1 patch and include some minor new changes.
>>
>> Changes in V2:
>> - Removed cover letter as it's a single patch
>> - Added latest spec reference
>> - Fixed alignment issues
>> - Updated shorter variable names to be more descriptive
>> - Replaced field-by-field initialization in 'init_alert_config' with structured initialization for improved readability.
>> - Replaced bit fields with 'uint8_t' and added defines for individual bits.
>>
>> The patch is generated against the Johnathan's tree
>> https://gitlab.com/jic23/qemu.git and branch cxl-2024-11-27.
>>
>> Signed-off-by: Sweta Kumari <s5.kumari@samsung.com>
>> ---
>>  hw/cxl/cxl-mailbox-utils.c  | 116 ++++++++++++++++++++++++++++++++++++
>>  hw/mem/cxl_type3.c          |  16 +++++
>>  include/hw/cxl/cxl_device.h |  15 +++++
>>  3 files changed, 147 insertions(+)
>>
>> diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
>> index 9c7ea5bc35..105c63fdec 100644
>> --- a/hw/cxl/cxl-mailbox-utils.c
>> +++ b/hw/cxl/cxl-mailbox-utils.c
>> @@ -28,6 +28,11 @@
>>  #define CXL_DC_EVENT_LOG_SIZE 8
>>  #define CXL_NUM_EXTENTS_SUPPORTED 512
>>  #define CXL_NUM_TAGS_SUPPORTED 0
>> +#define CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD (1 << 0)
>> +#define CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD (1 << 1)
>> +#define CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD (1 << 2)
>> +#define CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD (1 << 3)
>> +#define CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD (1 << 4)
>Let's shorten these as they are very ugly to use when a line long!
>
>#define CXL_ALERTS_OVER_TEMP_WARN_THRESH
>etc. Similar to comments below.
>
Ok, will shorten the macro names as suggested in v3 patch.
>>
>>  /*
>>   * How to add a new command, example. The command set FOO, with cmd BAR.
>> @@ -86,6 +91,9 @@ enum {
>>          #define GET_PARTITION_INFO     0x0
>>          #define GET_LSA       0x2
>>          #define SET_LSA       0x3
>> +    HEALTH_INFO_ALERTS = 0x42,
>> +        #define GET_ALERT_CONFIGURATION 0x1
>> +        #define SET_ALERT_CONFIGURATION 0x2
>CONFIG maybe enough?
>
Okay.
>>      SANITIZE    = 0x44,
>>          #define OVERWRITE     0x0
>>          #define SECURE_ERASE  0x1
>> @@ -1625,6 +1633,110 @@ static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd,
>>      return CXL_MBOX_SUCCESS;
>>  }
>>
>> +/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */
>> +static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd,
>> +                                       uint8_t *payload_in,
>> +                                       size_t len_in,
>> +                                       uint8_t *payload_out,
>> +                                       size_t *len_out,
>> +                                       CXLCCI *cci)
>> +{
>> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
>> +    CXLAlertConfig *out = (void *)payload_out;
>
>In this case we can cast to the right type (can't do that if we don't
>name that type which happens in a lot these structures). So prefer
>    CXLAlertConfig *out = (CXLAlertConfig *)payload_out;
>
>
Okay, will fix this in v3 patch
>> +
>> +    memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config));
>> +    *len_out = sizeof(ct3d->alert_config);
>> +
>> +    return CXL_MBOX_SUCCESS;
>> +}
>> +
>> +/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */
>> +static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd,
>> +                                       uint8_t *payload_in,
>> +                                       size_t len_in,
>> +                                       uint8_t *payload_out,
>> +                                       size_t *len_out,
>> +                                       CXLCCI *cci)
>> +{
>> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
>> +    CXLAlertConfig *alert_config = &ct3d->alert_config;
>> +    struct {
>> +        uint8_t valid_alert_actions;
>> +        uint8_t enable_alert_actions;
>> +        uint8_t life_used_warning_threshold;
>> +        uint8_t rsvd;
>> +        uint16_t device_over_temperature_warning_threshold;
>> +        uint16_t device_under_temperature_warning_threshold;
>> +        uint16_t Corrected_volatile_memory_error_warning_threshold;
>> +        uint16_t Corrected_persistent_memory_error_warning_threshold;
>
>Shorten these as well. Similar to suggestions below.  They are
>just too long to make for nice code!
>
Got it.
>> +    } QEMU_PACKED *in = (void *)payload_in;
>> +
>> +    if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD) {
>> +        /*
>> +         * CXL 3.2 Table 8-149 The life used warning threshold shall be
>> +         * less than the life used critical alert value.
>> +         */
>> +        if (in->life_used_warning_threshold >=
>> +            alert_config->life_used_critical_alert_threshold) {
>> +            return CXL_MBOX_INVALID_INPUT;
>> +        }
>> +        alert_config->life_used_warning_threshold =
>> +            in->life_used_warning_threshold;
>> +        alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD;
>> +    }
>> +
>> +    if (in->valid_alert_actions &
>> +        CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD) {
>> +        /*
>> +         * CXL 3.2 Table 8-149 The Device Over-Temperature Warning Threshold
>> +         * shall be less than the the Device Over-Temperature Critical
>> +         * Alert Threshold.
>> +         */
>> +        if (in->device_over_temperature_warning_threshold >=
>> +            alert_config->device_over_temperature_critical_alert_threshold) {
>> +            return CXL_MBOX_INVALID_INPUT;
>> +        }
>> +        alert_config->device_over_temperature_warning_threshold =
>> +            in->device_over_temperature_warning_threshold;
>> +        alert_config->enable_alerts |=
>> +            CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD;
>> +    }
>> +
>> +    if (in->valid_alert_actions &
>> +        CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD) {
>> +        /*
>> +         * CXL 3.2 Table 8-149 The Device Under-Temperature Warning Threshold
>> +         * shall be higher than the the Device Under-Temperature Critical
>> +         * Alert Threshold.
>> +         */
>> +        if (in->device_under_temperature_warning_threshold <=
>> +            alert_config->device_under_temperature_critical_alert_threshold) {
>> +            return CXL_MBOX_INVALID_INPUT;
>> +        }
>> +        alert_config->device_under_temperature_warning_threshold =
>> +            in->device_under_temperature_warning_threshold;
>> +        alert_config->enable_alerts |=
>> +            CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD;
>> +    }
>> +
>> +    if (in->valid_alert_actions &
>> +        CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD) {
>> +        alert_config->Corrected_volatile_memory_error_warning_threshold =
>> +            in->Corrected_volatile_memory_error_warning_threshold;
>> +        alert_config->enable_alerts |=
>> +            CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD;
>> +    }
>> +
>> +    if (in->valid_alert_actions &
>> +        CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD) {
>> +        alert_config->Corrected_persistent_memory_error_warning_threshold =
>> +            in->Corrected_persistent_memory_error_warning_threshold;
>> +        alert_config->enable_alerts |=
>> +            CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD;
>> +    }
>> +    return CXL_MBOX_SUCCESS;
>> +}
>> +
>>  /* Perform the actual device zeroing */
>>  static void __do_sanitization(CXLType3Dev *ct3d)
>>  {
>> @@ -2859,6 +2971,10 @@ static const struct cxl_cmd cxl_cmd_set[256][256] = {
>>      [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
>>      [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
>>          ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
>> +    [HEALTH_INFO_ALERTS][GET_ALERT_CONFIGURATION] = {"GET_ALERT_CONFIGURATION",
>
>Space after { to match local style.
>
Ok, will address this in v3 patch.
>> +        cmd_get_alert_config, 0, 0 },
>> +    [HEALTH_INFO_ALERTS][SET_ALERT_CONFIGURATION] = {"SET_ALERT_CONFIGURATION",
>> +        cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
>>      [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
>>          (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
>>           CXL_MBOX_SECURITY_STATE_CHANGE |
>
>> diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
>> index a64739be25..1da23bf553 100644
>> --- a/include/hw/cxl/cxl_device.h
>> +++ b/include/hw/cxl/cxl_device.h
>> @@ -581,6 +581,19 @@ typedef struct CXLSetFeatureInfo {
>>      size_t data_size;
>>  } CXLSetFeatureInfo;
>>
>> +typedef struct CXLAlertConfig {
>> +    uint8_t valid_alerts;
>> +    uint8_t enable_alerts;
>> +    uint8_t life_used_critical_alert_threshold;
>> +    uint8_t life_used_warning_threshold;
>> +    uint16_t device_over_temperature_critical_alert_threshold;
>I think we can shorten these at least a bit without lost of meaning!
>It's on a device so can drop that entirely. Perhaps
>
>    uint8_t life_used_crit_alert_thresh;
>    uint8_t life_used_warn_thresh;
>    uint16_t over_temp_crit_alert_thresh;
>    uint16_t under_temp_crit_alert_thresh;
>    uint16_t over_temp_warn_thresh;
>    uint16_t under_temp_warn_thresh;
>    uint16_t cor_volatile_mem_err_warn_thresh;
>    uint16_t cor_persistent_mem_err_warn_thresh;
>
Ok, will shorten the field names as suggested.
>> +    uint16_t device_under_temperature_critical_alert_threshold;
>> +    uint16_t device_over_temperature_warning_threshold;
>> +    uint16_t device_under_temperature_warning_threshold;
>> +    uint16_t Corrected_volatile_memory_error_warning_threshold;
>Capital in just this one doesn't make much sense.
>> +    uint16_t Corrected_persistent_memory_error_warning_threshold;
>> +} QEMU_PACKED CXLAlertConfig;
>
>
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 9c7ea5bc35..105c63fdec 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -28,6 +28,11 @@ 
 #define CXL_DC_EVENT_LOG_SIZE 8
 #define CXL_NUM_EXTENTS_SUPPORTED 512
 #define CXL_NUM_TAGS_SUPPORTED 0
+#define CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD (1 << 0)
+#define CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD (1 << 1)
+#define CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD (1 << 2)
+#define CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD (1 << 3)
+#define CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD (1 << 4)
 
 /*
  * How to add a new command, example. The command set FOO, with cmd BAR.
@@ -86,6 +91,9 @@  enum {
         #define GET_PARTITION_INFO     0x0
         #define GET_LSA       0x2
         #define SET_LSA       0x3
+    HEALTH_INFO_ALERTS = 0x42,
+        #define GET_ALERT_CONFIGURATION 0x1
+        #define SET_ALERT_CONFIGURATION 0x2
     SANITIZE    = 0x44,
         #define OVERWRITE     0x0
         #define SECURE_ERASE  0x1
@@ -1625,6 +1633,110 @@  static CXLRetCode cmd_ccls_set_lsa(const struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+/* CXL r3.2 Section 8.2.10.9.3.2 Get Alert Configuration (Opcode 4201h) */
+static CXLRetCode cmd_get_alert_config(const struct cxl_cmd *cmd,
+                                       uint8_t *payload_in,
+                                       size_t len_in,
+                                       uint8_t *payload_out,
+                                       size_t *len_out,
+                                       CXLCCI *cci)
+{
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    CXLAlertConfig *out = (void *)payload_out;
+
+    memcpy(out, &ct3d->alert_config, sizeof(ct3d->alert_config));
+    *len_out = sizeof(ct3d->alert_config);
+
+    return CXL_MBOX_SUCCESS;
+}
+
+/* CXL r3.2 Section 8.2.10.9.3.3 Set Alert Configuration (Opcode 4202h) */
+static CXLRetCode cmd_set_alert_config(const struct cxl_cmd *cmd,
+                                       uint8_t *payload_in,
+                                       size_t len_in,
+                                       uint8_t *payload_out,
+                                       size_t *len_out,
+                                       CXLCCI *cci)
+{
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    CXLAlertConfig *alert_config = &ct3d->alert_config;
+    struct {
+        uint8_t valid_alert_actions;
+        uint8_t enable_alert_actions;
+        uint8_t life_used_warning_threshold;
+        uint8_t rsvd;
+        uint16_t device_over_temperature_warning_threshold;
+        uint16_t device_under_temperature_warning_threshold;
+        uint16_t Corrected_volatile_memory_error_warning_threshold;
+        uint16_t Corrected_persistent_memory_error_warning_threshold;
+    } QEMU_PACKED *in = (void *)payload_in;
+
+    if (in->valid_alert_actions & CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD) {
+        /*
+         * CXL 3.2 Table 8-149 The life used warning threshold shall be
+         * less than the life used critical alert value.
+         */
+        if (in->life_used_warning_threshold >=
+            alert_config->life_used_critical_alert_threshold) {
+            return CXL_MBOX_INVALID_INPUT;
+        }
+        alert_config->life_used_warning_threshold =
+            in->life_used_warning_threshold;
+        alert_config->enable_alerts |= CXL_ALERTS_LIFE_USED_WARNING_THRESHOLD;
+    }
+
+    if (in->valid_alert_actions &
+        CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD) {
+        /*
+         * CXL 3.2 Table 8-149 The Device Over-Temperature Warning Threshold
+         * shall be less than the the Device Over-Temperature Critical
+         * Alert Threshold.
+         */
+        if (in->device_over_temperature_warning_threshold >=
+            alert_config->device_over_temperature_critical_alert_threshold) {
+            return CXL_MBOX_INVALID_INPUT;
+        }
+        alert_config->device_over_temperature_warning_threshold =
+            in->device_over_temperature_warning_threshold;
+        alert_config->enable_alerts |=
+            CXL_ALERTS_DEVICE_OVER_TEMP_WARNING_THRESHOLD;
+    }
+
+    if (in->valid_alert_actions &
+        CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD) {
+        /*
+         * CXL 3.2 Table 8-149 The Device Under-Temperature Warning Threshold
+         * shall be higher than the the Device Under-Temperature Critical
+         * Alert Threshold.
+         */
+        if (in->device_under_temperature_warning_threshold <=
+            alert_config->device_under_temperature_critical_alert_threshold) {
+            return CXL_MBOX_INVALID_INPUT;
+        }
+        alert_config->device_under_temperature_warning_threshold =
+            in->device_under_temperature_warning_threshold;
+        alert_config->enable_alerts |=
+            CXL_ALERTS_DEVICE_UNDER_TEMP_WARNING_THRESHOLD;
+    }
+
+    if (in->valid_alert_actions &
+        CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD) {
+        alert_config->Corrected_volatile_memory_error_warning_threshold =
+            in->Corrected_volatile_memory_error_warning_threshold;
+        alert_config->enable_alerts |=
+            CXL_ALERTS_CORRECTED_VOLATILE_MEMORY_ERROR_WARNING_THRESHOLD;
+    }
+
+    if (in->valid_alert_actions &
+        CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD) {
+        alert_config->Corrected_persistent_memory_error_warning_threshold =
+            in->Corrected_persistent_memory_error_warning_threshold;
+        alert_config->enable_alerts |=
+            CXL_ALERTS_CORRECTED_PERSISTENT_MEMORY_ERROR_WARNING_THRESHOLD;
+    }
+    return CXL_MBOX_SUCCESS;
+}
+
 /* Perform the actual device zeroing */
 static void __do_sanitization(CXLType3Dev *ct3d)
 {
@@ -2859,6 +2971,10 @@  static const struct cxl_cmd cxl_cmd_set[256][256] = {
     [CCLS][GET_LSA] = { "CCLS_GET_LSA", cmd_ccls_get_lsa, 8, 0 },
     [CCLS][SET_LSA] = { "CCLS_SET_LSA", cmd_ccls_set_lsa,
         ~0, CXL_MBOX_IMMEDIATE_CONFIG_CHANGE | CXL_MBOX_IMMEDIATE_DATA_CHANGE },
+    [HEALTH_INFO_ALERTS][GET_ALERT_CONFIGURATION] = {"GET_ALERT_CONFIGURATION",
+        cmd_get_alert_config, 0, 0 },
+    [HEALTH_INFO_ALERTS][SET_ALERT_CONFIGURATION] = {"SET_ALERT_CONFIGURATION",
+        cmd_set_alert_config, 12, CXL_MBOX_IMMEDIATE_POLICY_CHANGE },
     [SANITIZE][OVERWRITE] = { "SANITIZE_OVERWRITE", cmd_sanitize_overwrite, 0,
         (CXL_MBOX_IMMEDIATE_DATA_CHANGE |
          CXL_MBOX_SECURITY_STATE_CHANGE |
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 5f365afb4d..5f01684ffc 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -956,6 +956,21 @@  static DOEProtocol doe_comp_prot[] = {
     { }
 };
 
+/*
+ * Initialize CXL device alerts with default threshold values.
+ */
+static void init_alert_config(CXLType3Dev *ct3d)
+{
+    ct3d->alert_config = (CXLAlertConfig) {
+        .life_used_critical_alert_threshold = 75,
+        .life_used_warning_threshold = 40,
+        .device_over_temperature_critical_alert_threshold = 35,
+        .device_under_temperature_critical_alert_threshold = 10,
+        .device_over_temperature_warning_threshold = 25,
+        .device_under_temperature_warning_threshold = 20
+    };
+}
+
 void ct3_realize(PCIDevice *pci_dev, Error **errp)
 {
     ERRP_GUARD();
@@ -1030,6 +1045,7 @@  void ct3_realize(PCIDevice *pci_dev, Error **errp)
         goto err_free_special_ops;
     }
 
+    init_alert_config(ct3d);
     pcie_cap_deverr_init(pci_dev);
     /* Leave a bit of room for expansion */
     rc = pcie_aer_init(pci_dev, PCI_ERR_VER, 0x200, PCI_ERR_SIZEOF, NULL);
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index a64739be25..1da23bf553 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -581,6 +581,19 @@  typedef struct CXLSetFeatureInfo {
     size_t data_size;
 } CXLSetFeatureInfo;
 
+typedef struct CXLAlertConfig {
+    uint8_t valid_alerts;
+    uint8_t enable_alerts;
+    uint8_t life_used_critical_alert_threshold;
+    uint8_t life_used_warning_threshold;
+    uint16_t device_over_temperature_critical_alert_threshold;
+    uint16_t device_under_temperature_critical_alert_threshold;
+    uint16_t device_over_temperature_warning_threshold;
+    uint16_t device_under_temperature_warning_threshold;
+    uint16_t Corrected_volatile_memory_error_warning_threshold;
+    uint16_t Corrected_persistent_memory_error_warning_threshold;
+} QEMU_PACKED CXLAlertConfig;
+
 struct CXLType3Dev {
     /* Private */
     PCIDevice parent_obj;
@@ -605,6 +618,8 @@  struct CXLType3Dev {
     CXLCCI vdm_fm_owned_ld_mctp_cci;
     CXLCCI ld0_cci;
 
+    CXLAlertConfig alert_config;
+
     /* PCIe link characteristics */
     PCIExpLinkSpeed speed;
     PCIExpLinkWidth width;